2016-04-30 9 views
0

Ich bin dabei, einen Zusammenfassungsdatenrahmen für einen Bericht zu erstellen. Ich konnte den gewünschten Datenrahmen manuell erstellen. Ich erstelle eine Funktion, um die Erstellung des Ergebnisses zu vereinfachen.Extrahieren eines Vektors aus einer Liste für eine R-Funktion

Der manuelle Prozess ist

# create the summary function 
    summaryStatistics <- function(x,levels) { 
     xx <- na.omit(x) 
     c(table(factor(x, levels=levels), useNA='always', exclude=NULL), 
      sum=sum(xx), 
      length=length(x), 
      mean=mean(xx), 
      standard.deviation=sqrt(var(xx)), 
      var=(var(xx)), 
      median=median(xx), 
      min=min(xx), 
      max=max(xx), 
      quantile=quantile(xx), 
      skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) , 
      kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3 
     ) 
    } 

    # create the test data frame 
    Id <- c(1,2,3,4,5,6,7,8,9,10) 
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3) 
    ClassB <- c(2,1,1,3,3,2,1,1,3,3) 
    R <- c(1,2,3,NA,9,2,4,5,6,7) 
    S <- c(3,7,NA,9,5,8,7,NA,7,6) 
    W <- c(4,5,6,7,2,4,5,6,7,8) 

    df <- data.frame(Id,ClassA,ClassB,R,S,W) 

    ClassAAnswers <- c(1:5,NA) 
    ClassBAnswers <- c(1:5,NA) 

    RAnswers <- c(0:10,NA); 
    SAnswers <- c(0:20,NA); 
    WAnswers <- c(0:30,NA); 
    answers.list <- list(RAnswers,SAnswers,WAnswers); 

    RSW.df <- df[c('R','S','W')]; 

    # create the result 
    result <- setNames(
     nm=c('answer','question','value'), 
     as.data.frame(
      as.table(
       simplify2array(
        lapply(
         df[c('R')], 
         summaryStatistics, 
         RAnswers 
        ) 
       ) 
      ) 
     ) 
    ) 

    result <- rbind(result, 
     setNames(
       nm=c('answer','question','value'), 
       as.data.frame(
        as.table(
         simplify2array(
          lapply(
           df[c('S')], 
           summaryStatistics, 
           SAnswers 
          ) 
         ) 
        ) 
       ) 
     ) 
    ) 

    result <- rbind(result, 
     setNames(
       nm=c('answer','question','value'), 
       as.data.frame(
        as.table(
         simplify2array(
          lapply(
           df[c('W')], 
           summaryStatistics, 
           WAnswers 
          ) 
         ) 
        ) 
       ) 
     ) 
    ) 

    # change the order to question, answer, value 
    result <- result[, c(2, 1, 3)] 

    # add the filter 
    result <- cbind(filter='None',result) 

    # return the result 
    result 

, die das Ergebnis

 filter question    answer  value 
    1  None  R     0 0.0000000 
    2  None  R     1 1.0000000 
    3  None  R     2 2.0000000 
    4  None  R     3 1.0000000 
    5  None  R     4 1.0000000 
    6  None  R     5 1.0000000 
    7  None  R     6 1.0000000 
    8  None  R     7 1.0000000 
    9  None  R     8 0.0000000 
    10 None  R     9 1.0000000 
    11 None  R     10 0.0000000 
    12 None  R    <NA> 1.0000000 
    13 None  R    sum 39.0000000 
    14 None  R    length 10.0000000 
    15 None  R    mean 4.3333333 
    16 None  R standard.deviation 2.6457513 
    17 None  R    var 7.0000000 
    18 None  R    median 4.0000000 
    19 None  R    min 1.0000000 
    20 None  R    max 9.0000000 
    21 None  R  quantile.0% 1.0000000 
    22 None  R  quantile.25% 2.0000000 
    23 None  R  quantile.50% 4.0000000 
    24 None  R  quantile.75% 6.0000000 
    25 None  R  quantile.100% 9.0000000 
    26 None  R    skew 0.3275692 
    27 None  R   kurtosis -1.5333333 
    28 None  S     0 0.0000000 
    29 None  S     1 0.0000000 
    30 None  S     2 0.0000000 
    31 None  S     3 1.0000000 
    32 None  S     4 0.0000000 
    33 None  S     5 1.0000000 
    34 None  S     6 1.0000000 
    35 None  S     7 3.0000000 
    36 None  S     8 1.0000000 
    37 None  S     9 1.0000000 
    38 None  S     10 0.0000000 
    39 None  S     11 0.0000000 
    40 None  S     12 0.0000000 
    41 None  S     13 0.0000000 
    42 None  S     14 0.0000000 
    43 None  S     15 0.0000000 
    44 None  S     16 0.0000000 
    45 None  S     17 0.0000000 
    46 None  S     18 0.0000000 
    47 None  S     19 0.0000000 
    48 None  S     20 0.0000000 
    49 None  S    <NA> 2.0000000 
    50 None  S    sum 52.0000000 
    51 None  S    length 10.0000000 
    52 None  S    mean 6.5000000 
    53 None  S standard.deviation 1.8516402 
    54 None  S    var 3.4285714 
    55 None  S    median 7.0000000 
    56 None  S    min 3.0000000 
    57 None  S    max 9.0000000 
    58 None  S  quantile.0% 3.0000000 
    59 None  S  quantile.25% 5.7500000 
    60 None  S  quantile.50% 7.0000000 
    61 None  S  quantile.75% 7.2500000 
    62 None  S  quantile.100% 9.0000000 
    63 None  S    skew -0.4252986 
    64 None  S   kurtosis -1.3028646 
    65 None  W     0 0.0000000 
    66 None  W     1 0.0000000 
    67 None  W     2 1.0000000 
    68 None  W     3 0.0000000 
    69 None  W     4 2.0000000 
    70 None  W     5 2.0000000 
    71 None  W     6 2.0000000 
    72 None  W     7 2.0000000 
    73 None  W     8 1.0000000 
    74 None  W     9 0.0000000 
    75 None  W     10 0.0000000 
    76 None  W     11 0.0000000 
    77 None  W     12 0.0000000 
    78 None  W     13 0.0000000 
    79 None  W     14 0.0000000 
    80 None  W     15 0.0000000 
    81 None  W     16 0.0000000 
    82 None  W     17 0.0000000 
    83 None  W     18 0.0000000 
    84 None  W     19 0.0000000 
    85 None  W     20 0.0000000 
    86 None  W     21 0.0000000 
    87 None  W     22 0.0000000 
    88 None  W     23 0.0000000 
    89 None  W     24 0.0000000 
    90 None  W     25 0.0000000 
    91 None  W     26 0.0000000 
    92 None  W     27 0.0000000 
    93 None  W     28 0.0000000 
    94 None  W     29 0.0000000 
    95 None  W     30 0.0000000 
    96 None  W    <NA> 0.0000000 
    97 None  W    sum 54.0000000 
    98 None  W    length 10.0000000 
    99 None  W    mean 5.4000000 
    100 None  W standard.deviation 1.7763883 
    101 None  W    var 3.1555556 
    102 None  W    median 5.5000000 
    103 None  W    min 2.0000000 
    104 None  W    max 8.0000000 
    105 None  W  quantile.0% 2.0000000 
    106 None  W  quantile.25% 4.2500000 
    107 None  W  quantile.50% 5.5000000 
    108 None  W  quantile.75% 6.7500000 
    109 None  W  quantile.100% 8.0000000 
    110 None  W    skew -0.3339582 
    111 None  W   kurtosis -0.9871315 

produziert das ist, was ich suche.

Ich habe eine Funktion erstellt, um durch den Datenrahmen und die möglichen Antworten zu gehen. Wenn ich den Vektor hart Code, bekomme ich ein Ergebnis, das mit den obigen Ergebnissen übereinstimmt.

extractSummaryDataframe <- function(questions.dataframe, answers.list, filter) { 

     result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 
      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             c(0:10,NA) 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- cbind(filter=filter,result) ; 
     result 
    } 

    extractSummaryDataframe(RSW.df, answers.list, 'None') 

kehrt

 filter    answer question  value 
    1 None     0  R 0.0000000 
    2 None     1  R 1.0000000 
    3 None     2  R 2.0000000 
    4 None     3  R 1.0000000 
    5 None     4  R 1.0000000 
    6 None     5  R 1.0000000 
    7 None     6  R 1.0000000 
    8 None     7  R 1.0000000 
    9 None     8  R 0.0000000 
    10 None     9  R 1.0000000 
    11 None     10  R 0.0000000 
    12 None    <NA>  R 1.0000000 
    13 None    sum  R 39.0000000 
    14 None    length  R 10.0000000 
    15 None    mean  R 4.3333333 
    16 None standard.deviation  R 2.6457513 
    17 None    var  R 7.0000000 
    18 None    median  R 4.0000000 
    19 None    min  R 1.0000000 
    20 None    max  R 9.0000000 
    21 None  quantile.0%  R 1.0000000 
    22 None  quantile.25%  R 2.0000000 
    23 None  quantile.50%  R 4.0000000 
    24 None  quantile.75%  R 6.0000000 
    25 None  quantile.100%  R 9.0000000 
    26 None    skew  R 0.3275692 
    27 None   kurtosis  R -1.5333333 
    28 None     0  S 0.0000000 
    29 None     1  S 0.0000000 
    30 None     2  S 0.0000000 
    31 None     3  S 1.0000000 
    32 None     4  S 0.0000000 
    33 None     5  S 1.0000000 
    34 None     6  S 1.0000000 
    35 None     7  S 3.0000000 
    36 None     8  S 1.0000000 
    37 None     9  S 1.0000000 
    38 None     10  S 0.0000000 
    39 None    <NA>  S 2.0000000 
    40 None    sum  S 52.0000000 
    41 None    length  S 10.0000000 
    42 None    mean  S 6.5000000 
    43 None standard.deviation  S 1.8516402 
    44 None    var  S 3.4285714 
    45 None    median  S 7.0000000 
    46 None    min  S 3.0000000 
    47 None    max  S 9.0000000 
    48 None  quantile.0%  S 3.0000000 
    49 None  quantile.25%  S 5.7500000 
    50 None  quantile.50%  S 7.0000000 
    51 None  quantile.75%  S 7.2500000 
    52 None  quantile.100%  S 9.0000000 
    53 None    skew  S -0.4252986 
    54 None   kurtosis  S -1.3028646 
    55 None     0  W 0.0000000 
    56 None     1  W 0.0000000 
    57 None     2  W 1.0000000 
    58 None     3  W 0.0000000 
    59 None     4  W 2.0000000 
    60 None     5  W 2.0000000 
    61 None     6  W 2.0000000 
    62 None     7  W 2.0000000 
    63 None     8  W 1.0000000 
    64 None     9  W 0.0000000 
    65 None     10  W 0.0000000 
    66 None    <NA>  W 0.0000000 
    67 None    sum  W 54.0000000 
    68 None    length  W 10.0000000 
    69 None    mean  W 5.4000000 
    70 None standard.deviation  W 1.7763883 
    71 None    var  W 3.1555556 
    72 None    median  W 5.5000000 
    73 None    min  W 2.0000000 
    74 None    max  W 8.0000000 
    75 None  quantile.0%  W 2.0000000 
    76 None  quantile.25%  W 4.2500000 
    77 None  quantile.50%  W 5.5000000 
    78 None  quantile.75%  W 6.7500000 
    79 None  quantile.100%  W 8.0000000 
    80 None    skew  W -0.3339582 
    81 None   kurtosis  W -0.9871315 

Wenn aber ich versuche, einen Listeneintrag zu verwenden, wie es die Namen wie

extractSummaryDataframe < Schritte durch - Funktion (questions.dataframe, answers.list, Filter) {

 result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 
      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             answers.list[listIndex] 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- cbind(filter=filter,result) ; 
     result 
    } 

    extractSummaryDataframe(RSW.df, answers.list, 'None') 

       filter 
    1 None 
    2 None 
    3 None 
    4 None 
    5 None 
    6 None 
    7 None 
    8 None 
    9 None 
    10 None 
    11 None 
    12 None 
    13 None 
    14 None 
    15 None 
    16 None 
    17 None 
    18 None 
    19 None 
    20 None 
    21 None 
    22 None 
    23 None 
    24 None 
    25 None 
    26 None 
    27 None 
    28 None 
    29 None 
    30 None 
    31 None 
    32 None 
    33 None 
    34 None 
    35 None 
    36 None 
    37 None 
    38 None 
    39 None 
    40 None 
    41 None 
    42 None 
    43 None 
    44 None 
    45 None 
    46 None 
    47 None 
    48 None 
    49 None 
    50 None 
    51 None 
                                 answer 
    1                     c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NA) 
    2                              <NA> 
    3                              sum 
    4                             length 
    5                              mean 
    6                          standard.deviation 
    7                              var 
    8                             median 
    9                              min 
    10                              max 
    11                            quantile.0% 
    12                           quantile.25% 
    13                           quantile.50% 
    14                           quantile.75% 
    15                           quantile.100% 
    16                             skew 
    17                            kurtosis 
    18           c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, NA) 
    19                             <NA> 
    20                              sum 
    21                             length 
    22                             mean 
    23                          standard.deviation 
    24                              var 
    25                             median 
    26                              min 
    27                              max 
    28                            quantile.0% 
    29                           quantile.25% 
    30                           quantile.50% 
    31                           quantile.75% 
    32                           quantile.100% 
    33                             skew 
    34                            kurtosis 
    35 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, NA) 
    36                             <NA> 
    37                              sum 
    38                             length 
    39                             mean 
    40                          standard.deviation 
    41                              var 
    42                             median 
    43                              min 
    44                              max 
    45                            quantile.0% 
    46                           quantile.25% 
    47                           quantile.50% 
    48                           quantile.75% 
    49                           quantile.100% 
    50                             skew 
    51                            kurtosis 
     question  value 
    1   R 0.0000000 
    2   R 10.0000000 
    3   R 39.0000000 
    4   R 10.0000000 
    5   R 4.3333333 
    6   R 2.6457513 
    7   R 7.0000000 
    8   R 4.0000000 
    9   R 1.0000000 
    10  R 9.0000000 
    11  R 1.0000000 
    12  R 2.0000000 
    13  R 4.0000000 
    14  R 6.0000000 
    15  R 9.0000000 
    16  R 0.3275692 
    17  R -1.5333333 
    18  S 0.0000000 
    19  S 10.0000000 
    20  S 52.0000000 
    21  S 10.0000000 
    22  S 6.5000000 
    23  S 1.8516402 
    24  S 3.4285714 
    25  S 7.0000000 
    26  S 3.0000000 
    27  S 9.0000000 
    28  S 3.0000000 
    29  S 5.7500000 
    30  S 7.0000000 
    31  S 7.2500000 
    32  S 9.0000000 
    33  S -0.4252986 
    34  S -1.3028646 
    35  W 0.0000000 
    36  W 10.0000000 
    37  W 54.0000000 
    38  W 10.0000000 
    39  W 5.4000000 
    40  W 1.7763883 
    41  W 3.1555556 
    42  W 5.5000000 
    43  W 2.0000000 
    44  W 8.0000000 
    45  W 2.0000000 
    46  W 4.2500000 
    47  W 5.5000000 
    48  W 6.7500000 
    49  W 8.0000000 
    50  W -0.3339582 
    51  W -0.9871315 

Welche ist nichts, wie das Ergebnis, das ich f suchen bin oder.

Was wäre die Syntax, um answer.list [1] wie c (0: 10, NA) zu erkennen?

Antwort

0

Der Syntaxfehler, den ich hatte, war answers.list [listIndex] benötigt, um answers.list [[listIndex]] zu sein.

Ich entdeckte dies mit der Klassenfunktion.

class(answers.list) returned list: expected. 

class(answers.list[1]) returned list: unexpected. 

class(answers.list[[1]]) returned integer: which is what I was looking for. 

der neue Code ist

# create the summary function 
    summaryStatistics <- function(x,levels) { 
     xx <- na.omit(x) 
     c(table(factor(x, levels=levels), useNA='always', exclude=NULL), 
      sum=sum(xx), 
      length=length(x), 
      mean=mean(xx), 
      standard.deviation=sqrt(var(xx)), 
      var=(var(xx)), 
      median=median(xx), 
      min=min(xx), 
      max=max(xx), 
      quantile=quantile(xx), 
      skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) , 
      kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3 
     ) 
    } 

    # create the function that steps through the summary function 
    extractSummaryDataframe <- function(questions.dataframe, answers.list, filter) { 

     result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 

      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             answers.list[[listIndex]] 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- result[, c(2, 1, 3)] ; 
     result <- cbind(filter=filter,result) ; 
     result 
    } 

    # create the test data frame 
    Id <- c(1,2,3,4,5,6,7,8,9,10) 
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3) 
    ClassB <- c(2,1,1,3,3,2,1,1,3,3) 
    R <- c(1,2,3,NA,9,2,4,5,6,7) 
    S <- c(3,7,NA,9,5,8,7,NA,7,6) 
    W <- c(4,5,6,7,2,4,5,6,7,8) 
    df <- data.frame(Id,ClassA,ClassB,R,S,W) 

    ClassAAnswers <- c(1:5,NA) 
    ClassBAnswers <- c(1:5,NA) 

    RAnswers <- c(0:10,NA); 
    SAnswers <- c(0:20,NA); 
    WAnswers <- c(0:30,NA); 
    answers.list <- list(RAnswers,SAnswers,WAnswers); 

    RSW.df <- df[c('R','S','W')]; 

    # create the result 
    result <- extractSummaryDataframe(RSW.df, answers.list, 'None') ; 

    # return the result 
    result 

die

 filter question    answer  value 
    1  None  R     0 0.0000000 
    2  None  R     1 1.0000000 
    3  None  R     2 2.0000000 
    4  None  R     3 1.0000000 
    5  None  R     4 1.0000000 
    6  None  R     5 1.0000000 
    7  None  R     6 1.0000000 
    8  None  R     7 1.0000000 
    9  None  R     8 0.0000000 
    10 None  R     9 1.0000000 
    11 None  R     10 0.0000000 
    12 None  R    <NA> 1.0000000 
    13 None  R    sum 39.0000000 
    14 None  R    length 10.0000000 
    15 None  R    mean 4.3333333 
    16 None  R standard.deviation 2.6457513 
    17 None  R    var 7.0000000 
    18 None  R    median 4.0000000 
    19 None  R    min 1.0000000 
    20 None  R    max 9.0000000 
    21 None  R  quantile.0% 1.0000000 
    22 None  R  quantile.25% 2.0000000 
    23 None  R  quantile.50% 4.0000000 
    24 None  R  quantile.75% 6.0000000 
    25 None  R  quantile.100% 9.0000000 
    26 None  R    skew 0.3275692 
    27 None  R   kurtosis -1.5333333 
    28 None  S     0 0.0000000 
    29 None  S     1 0.0000000 
    30 None  S     2 0.0000000 
    31 None  S     3 1.0000000 
    32 None  S     4 0.0000000 
    33 None  S     5 1.0000000 
    34 None  S     6 1.0000000 
    35 None  S     7 3.0000000 
    36 None  S     8 1.0000000 
    37 None  S     9 1.0000000 
    38 None  S     10 0.0000000 
    39 None  S     11 0.0000000 
    40 None  S     12 0.0000000 
    41 None  S     13 0.0000000 
    42 None  S     14 0.0000000 
    43 None  S     15 0.0000000 
    44 None  S     16 0.0000000 
    45 None  S     17 0.0000000 
    46 None  S     18 0.0000000 
    47 None  S     19 0.0000000 
    48 None  S     20 0.0000000 
    49 None  S    <NA> 2.0000000 
    50 None  S    sum 52.0000000 
    51 None  S    length 10.0000000 
    52 None  S    mean 6.5000000 
    53 None  S standard.deviation 1.8516402 
    54 None  S    var 3.4285714 
    55 None  S    median 7.0000000 
    56 None  S    min 3.0000000 
    57 None  S    max 9.0000000 
    58 None  S  quantile.0% 3.0000000 
    59 None  S  quantile.25% 5.7500000 
    60 None  S  quantile.50% 7.0000000 
    61 None  S  quantile.75% 7.2500000 
    62 None  S  quantile.100% 9.0000000 
    63 None  S    skew -0.4252986 
    64 None  S   kurtosis -1.3028646 
    65 None  W     0 0.0000000 
    66 None  W     1 0.0000000 
    67 None  W     2 1.0000000 
    68 None  W     3 0.0000000 
    69 None  W     4 2.0000000 
    70 None  W     5 2.0000000 
    71 None  W     6 2.0000000 
    72 None  W     7 2.0000000 
    73 None  W     8 1.0000000 
    74 None  W     9 0.0000000 
    75 None  W     10 0.0000000 
    76 None  W     11 0.0000000 
    77 None  W     12 0.0000000 
    78 None  W     13 0.0000000 
    79 None  W     14 0.0000000 
    80 None  W     15 0.0000000 
    81 None  W     16 0.0000000 
    82 None  W     17 0.0000000 
    83 None  W     18 0.0000000 
    84 None  W     19 0.0000000 
    85 None  W     20 0.0000000 
    86 None  W     21 0.0000000 
    87 None  W     22 0.0000000 
    88 None  W     23 0.0000000 
    89 None  W     24 0.0000000 
    90 None  W     25 0.0000000 
    91 None  W     26 0.0000000 
    92 None  W     27 0.0000000 
    93 None  W     28 0.0000000 
    94 None  W     29 0.0000000 
    95 None  W     30 0.0000000 
    96 None  W    <NA> 0.0000000 
    97 None  W    sum 54.0000000 
    98 None  W    length 10.0000000 
    99 None  W    mean 5.4000000 
    100 None  W standard.deviation 1.7763883 
    101 None  W    var 3.1555556 
    102 None  W    median 5.5000000 
    103 None  W    min 2.0000000 
    104 None  W    max 8.0000000 
    105 None  W  quantile.0% 2.0000000 
    106 None  W  quantile.25% 4.2500000 
    107 None  W  quantile.50% 5.5000000 
    108 None  W  quantile.75% 6.7500000 
    109 None  W  quantile.100% 8.0000000 
    110 None  W    skew -0.3339582 
    111 None  W   kurtosis -0.9871315 

kehrt das ist genau das, was ich für :-) suchen.

Verwandte Themen