表输出 - 用于R中的循环

时间:2016-12-29 18:42:00

标签: r for-loop statistics

我正在尝试获取一个包含for循环内所有值的表。我检查了每个d6-d10长度的长度,它是35,与“ou”的长度相同。我能够将这些值中的每一个存储为向量。在这种情况下,“ou”是安哥拉,博茨瓦纳等国家名称。

IDictionary<string, object>

错误:ou<- sort(unique(vldata$OperatingUnit) for (i in ou) { d6 <- sum(vldata$FY2016APR[vldata$indicator=="TX_CURR" & vldata$OperatingUnit==i],na.rm = TRUE) d7 <- sum(vldata$FY2016Q2[vldata$indicator=="TX_CURR" & vldata$OperatingUnit==i],na.rm = TRUE) d8 <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$numeratorDenom=="D" & vldata$OperatingUnit==i], na.rm = TRUE) d9 <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$numeratorDenom=="N" & vldata$OperatingUnit==i], na.rm = TRUE ) d10 <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$categoryOptionComboName=="Undetectable" & vldata$OperatingUnit==i], na.rm = TRUE) dash1.table <- table(vldata$OperatingUnit,d6, d7, d8, d9, d10) }

Error in table(vldata$OperatingUnit, d6, d7, d8, d9, d10, : 
  all arguments must have the same length

运行循环后,我得到:

> str(ou)
List of 1
$ : Factor w/ 35 levels "Angola","Asia Regional Program",..: 1 1 1 1 1 1 1 1 1 1 ...

str(vldata$OperatingUnit)
Factor w/ 35 levels "Angola","Asia Regional Program",..: 1 1 1 1 1 1 1 1 1 1 ...

来自我的数据的30个样本行(&gt; 200k行)。您可以使用UIDAll作为操作单元名称或国家/地区名称。

str(i)
Factor w/ 35 levels "Angola","Asia Regional Program",..: 1 1 1 1 1 1 1 1 1 1 ...

此处structure(list(UIDAll = structure(c(4L, 19L, 30L, 21L, 12L, 1L, 24L, 20L, 9L, 22L, 13L, 8L, 3L, 6L, 5L, 29L, 7L, 18L, 25L, 15L, 2L, 23L, 11L, 14L, 17L, 16L, 10L, 27L, 26L, 28L), .Label = c("a7kj6yR3FRT", "a9b2mJ4Z5Kv", "bcYTjHaZcE4", "CVAHCZfm5MK", "d507u4dhfug", "dBBdk4e1xGM", "h54eHtqrMYc", "h5siIA7lQfx", "Hk6QKCvBV9B", "I3Pvb7ryppf", "IrB4tyQKF5E", "JgEjEEV5Nsy", "JKnoiKiTSgm", "loiaqD14rjG", "mtHydPyChpG", "nPh5JYQQyqa", "oWG1PMWA3N1", "QDkCAOGVng6", "QZwDp4lHXWp", "rV9MIblAWIF", "s6HYTxvqhzX", "sNi4VZ2HDoT", "WGkhTclMK4U", "wZsEzdJHKVJ", "XME74MPBxXl", "Y63LmT4BKqS", "ycP81nJei2m", "yhnSeJ5NaC2", "yqGgSdD5hP3", "Z4XPx6jaeFf"), class = "factor"), indicator = structure(c(2L, 2L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L), .Label = c("TX_CURR", "TX_UNDETECT", "TX_VIRAL"), class = "factor"), numeratorDenom = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("D", "N"), class = "factor"), categoryOptionComboName = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 5L, 3L, 1L, 5L, 1L, 1L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 3L, 1L, 3L, 1L, 2L, 3L), .Label = c("default", "Detectable", "Routine", "Targeted", "Undetectable"), class = "factor"), FY2016Q2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 764L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), FY2016Q4 = c(71L, NA, NA, NA, 43L, NA, 844L, 2130L, NA, 0L, -9L, 0L, NA, NA, 0L, NA, NA, 643L, NA, NA, NA, NA, 48L, NA, 889L, NA, 89L, 651L, NA, 115L), FY2016APR = c(71L, NA, NA, NA, 43L, NA, 844L, 2130L, NA, 0L, -9L, 0L, NA, NA, 0L, NA, NA, 643L, NA, NA, NA, NA, 48L, NA, 889L, NA, 89L, 651L, NA, 115L)), .Names = c("UIDAll", "indicator", "numeratorDenom", "categoryOptionComboName", "FY2016Q2", "FY2016Q4", "FY2016APR" ), row.names = c(NA, -30L), class = "data.frame") ou

1 个答案:

答案 0 :(得分:0)

看起来问题是你如何使用table()功能。

table()显示输入向量的每个级别的频率。你给它多个相等长度的矢量然后创建一个交叉表。但是,sum()的结果是标量。

您对table()的调用在每次迭代中都有一个混合了多个标量的向量。这不行。

我认为这就是你想要的:

dash1.table <- data.frame(ou = ou, d6=NA, d7 = NA, d8=NA, d9=NA, d10=NA)

for (i in ou) {
  dash1.table$d6[dash1.table$ou==i] <- sum(vldata$FY2016APR[vldata$indicator=="TX_CURR" & vldata$UIDAll==i],na.rm = TRUE)
  dash1.table$d7[dash1.table$ou==i] <- sum(vldata$FY2016Q2[vldata$indicator=="TX_CURR" & vldata$UIDAll==i],na.rm = TRUE)
  dash1.table$d8[dash1.table$ou==i] <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$numeratorDenom=="D" & vldata$UIDAll==i], na.rm = TRUE)
  dash1.table$d9[dash1.table$ou==i] <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$numeratorDenom=="N" & vldata$UIDAll==i], na.rm = TRUE )
  dash1.table$d10[dash1.table$ou==i] <- sum(vldata$FY2016APR[vldata$indicator=="TX_VIRAL" & vldata$categoryOptionComboName=="Undetectable" & 
                                vldata$UIDAll==i], na.rm = TRUE)


}

dash1.table
            ou  d6  d7 d8  d9 d10
1  CVAHCZfm5MK   0   0  0   0   0
2  QZwDp4lHXWp   0   0  0   0   0
3  Z4XPx6jaeFf   0   0  0   0   0
4  s6HYTxvqhzX   0   0  0   0   0
5  JgEjEEV5Nsy   0   0 43   0   0
6  a7kj6yR3FRT   0   0  0   0   0
7  wZsEzdJHKVJ   0   0  0 844 844
8  rV9MIblAWIF   0   0  0   0   0
9  Hk6QKCvBV9B   0   0  0   0   0
10 sNi4VZ2HDoT   0   0  0   0   0
11 JKnoiKiTSgm   0   0  0   0   0
12 h5siIA7lQfx   0   0  0   0   0
13 bcYTjHaZcE4   0   0  0   0   0
14 dBBdk4e1xGM   0   0  0   0   0
15 d507u4dhfug   0   0  0   0   0
16 yqGgSdD5hP3   0   0  0   0   0
17 h54eHtqrMYc   0   0  0   0   0
18 QDkCAOGVng6 643 764  0   0   0
19 XME74MPBxXl   0   0  0   0   0
20 mtHydPyChpG   0   0  0   0   0
21 a9b2mJ4Z5Kv   0   0  0   0   0
22 WGkhTclMK4U   0   0  0   0   0
23 IrB4tyQKF5E   0   0  0   0   0
24 loiaqD14rjG   0   0  0   0   0
25 oWG1PMWA3N1   0   0  0   0   0
26 nPh5JYQQyqa   0   0  0   0   0
27 I3Pvb7ryppf   0   0  0   0   0
28 ycP81nJei2m   0   0  0   0   0
29 Y63LmT4BKqS   0   0  0   0   0
30 yhnSeJ5NaC2   0   0  0   0   0

请注意,由于这只是示例的数据样本,因此有很多0。