R数据帧中的条件计数聚合

时间:2017-05-01 01:17:16

标签: r dataframe aggregate

我有一个如下所示的数据框:

SubjectID CoupleID PrePost hit1RT  hit2RT hit3RT ... hit26RT  miss1RT  miss2RT miss3RT ... miss26RT
1531      153    Post       5        5      NA   ...   3        NA      NA     2      ...     NA
1531      153     Pre       NA        5      2   ...   3        4      NA     NA     ...     NA  
1532      153    Post       2        NA      NA   ...   2        NA      5     2      ...     NA    

对于hit [i] RT和miss [i] RT,每个有26列,每个SubjectID有两列,一个PrePost ==' Pre'和一个PrePost ==' Post'

我想创建一个新的数据框,每个SubjectID / PrePost具有相同的两行,其中该行中的hit [i] RT单元总数在一列和一列中不是NA该行中未连接的miss [i] RT单元的总数。

由于有26个试验,每个试验都是命中或未命中,因此hitcount列+ misscount列应= = 26

例如:

Subject ID    PrePost   hitcount    misscount
1531           Pre        3          23
1531           Post       5          21
1532           Pre        10         16
1531           Post       21         5

编辑:每条评论,添加输出

structure(list(SubjectID = c("1531", "1531", "1532", "1532", "5291", "5291"), CoupleID = c("153", "153", "153", "153", "529", "529"), PrePost = c("Post", "Pre", "Post", "Pre", "Post", "Pre" ), hit10RT = c(11.0550000000076, 11.0209999999934, 11.0889999999927, 11.0270000000019, 11.0499999999956, 11.0610000000015), hit11RT = c(15.5299999999988, 15.6460000000079, 15.5979999999981, 15.5310000000027, 15.8790000000008, 15.5410000000047), hit12RT = c(15.5329999999958, 15.5209999999934, 15.5350000000035, 15.5160000000033, 15.5840000000026, 15.5469999999987 ), hit13RT = c(8.03299999999581, 8.03600000000733, 8.03299999999581, 8.0509999999922, 8.05399999999645, 8.03899999999703), hit14RT = c(15.601999999999, 15.5269999999873, 15.625, 15.6340000000055, 15.5889999999999, 15.5449999999983), hit15RT = c(15.5280000000057, 15.5350000000035, 15.5280000000057, 16.0089999999909, 15.5450000000055, 15.6209999999992 ), hit16RT = c(11.0849999999919, 11.0200000000041, 11.0329999999958, 11.0370000000112, 11.0459999999948, 11.0440000000017), hit17RT = c(14.0370000000112, 14.0610000000015, 14.0890000000072, 14.1059999999998, 14.1180000000022, 14.0440000000017), hit18RT = c(6.51999999998952, 6.53800000000047, NA, 6.58799999998882, 6.5679999999993, 6.57600000000093), hit19RT = c(9.52200000001176, 9.54299999999057, 9.64699999999721, 9.50700000001234, 9.64899999999761, 9.62799999999697), hit1RT = c(NA, NA, NA, NA, NA, 0), hit20RT = c(15.5369999999966, 15.5210000000079, 15.525999999998, 15.5639999999985, 15.6130000000048, 15.6170000000056), hit21RT = c(14.0570000000007, 14.0439999999944, 14.0380000000005, 14.0219999999972, 14.0219999999972, 14.0479999999952 ), hit22RT = c(15.5829999999987, 15.5290000000095, 15.5219999999972, 15.5840000000026, 15.5970000000016, 15.5480000000025), hit23RT = c(12.6189999999915, 12.5779999999941, 12.5200000000041, 12.5369999999966, 12.5329999999958, 12.5319999999992), hit24RT = c(6.52100000000792, 6.52700000000186, 6.53800000000047, 6.55899999999383, 6.54100000000471, 6.53800000000047 ), hit25RT = c(14.0580000000045, 14.0979999999981, 14.0359999999928, 14.1100000000006, 14.0999999999985, 14.1460000000006), hit26RT = c(15.525999999998, 15.5540000000037, 15.570000000007, 15.5890000000072, 15.5610000000015, 15.6259999999966), hit2RT = c(36.781999999992, 96.6390000000101, 35.6609999999928, 108.394, 54.0280000000057, NA), hit3RT = c(14.0270000000019, 14.0539999999892, 14.0369999999966, 14.0130000000063, 14.0360000000001, 14.0639999999985), hit4RT = c(15.5850000000064, 15.5080000000016, 15.5610000000015, 15.6109999999899, 15.5859999999957, 15.5490000000063 ), hit5RT = c(6.50699999999779, 6.53699999999662, 6.57000000000698, 6.52200000001176, 6.55800000000454, 6.64699999999721), hit6RT = c(15.5650000000023, 15.6280000000115, 15.5849999999919, 15.531999999992, 15.5349999999962, 15.6630000000005), hit7RT = c(12.5760000000009, 12.5190000000002, 12.5350000000035, 12.5200000000041, 12.5390000000043, NA), hit8RT = c(6.62699999999313, 6.5049999999901, 6.50599999999395, 6.50599999999395, 6.55099999999948, 6.65199999999459), hit9RT = c(8.00400000000081, 8.03600000000733, 8.03300000001036, 8.03800000000047, 8.12299999999959, NA), miss10RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss11RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss12RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss13RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss14RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss15RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss16RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss17RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss18RT = c(NA, NA, 6.60599999999977, NA, NA, NA), miss19RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss1RT = c(0, 0, 0, 0, 0, NA), miss20RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss21RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss22RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss23RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss24RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss25RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss26RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss2RT = c(NA, NA, NA, NA, NA, 104.578000000001), miss3RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss4RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss5RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss6RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss7RT = c(NA, NA, NA, NA, NA, 12.6160000000018), miss8RT = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), miss9RT = c(NA, NA, NA, NA, NA, 8.03399999999965)), .Names = c("SubjectID", "CoupleID", "PrePost", "hit10RT", "hit11RT", "hit12RT", "hit13RT", "hit14RT", "hit15RT", "hit16RT", "hit17RT", "hit18RT", "hit19RT", "hit1RT", "hit20RT", "hit21RT", "hit22RT", "hit23RT", "hit24RT", "hit25RT", "hit26RT", "hit2RT", "hit3RT", "hit4RT", "hit5RT", "hit6RT", "hit7RT", "hit8RT", "hit9RT", "miss10RT", "miss11RT", "miss12RT", "miss13RT", "miss14RT", "miss15RT", "miss16RT", "miss17RT", "miss18RT", "miss19RT", "miss1RT", "miss20RT", "miss21RT", "miss22RT", "miss23RT", "miss24RT", "miss25RT", "miss26RT", "miss2RT", "miss3RT", "miss4RT", "miss5RT", "miss6RT", "miss7RT", "miss8RT", "miss9RT"), sorted = c("SubjectID", "CoupleID", "PrePost"), class = c("data.table", "data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x101820b78>)

1 个答案:

答案 0 :(得分:0)

您可以尝试以下方法:

df.new<- data.frame(SubjectID = df$SubjectID, PrePost = df$PrePost, 
                    hitcount = apply(df[, 4:29], 1, function(x) sum(!is.na(x))), 
                    misscount = apply(df[, 30:ncol(df)], 1, function(x) sum(!is.na(x))))

如果你通过这样做添加更多“点击”或“未命中”列,你也可以使它更通用:

df.new1<- data.frame(SubjectID = df$SubjectID, PrePost = df$PrePost, 
                     hitcount = apply(df[, names(df)[startsWith(names(df), "hit")]], 1, function(x) sum(!is.na(x))), 
                     misscount = apply(df[, names(df)[startsWith(names(df), "miss")]], 1, function(x) sum(!is.na(x))))