使用lapply,Reduce和union折叠data.table中的行

时间:2016-09-21 17:50:20

标签: r data.table

我有一个data.table示例(JACcar),它应该使用下面的代码基于ID折叠成一行。但是,我无法弄清楚为什么它不会崩溃少于2行。我还试图通过将列限制为仅包含NA以外的值的列来验证输出,因为原始数据中有123列。有人可以帮助解释为什么数据不会折叠成一行,原始数据中所有4行的ID都是相同的吗?

dput(JACcar)
structure(list(ID = c("64909", "64909", "64909", "64909"), totni = c(-341.31, 
-341.31, -341.31, -341.31), I1 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I10 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I11 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I12 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I14 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I15 = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), I2 = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), I20 = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), I21 = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), I22 = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), I3 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I30 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I31 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I32 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I33 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I34 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I35 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I36 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I37 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I38 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
), I4 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_), 
    I40 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I41 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I42 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I43 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I44 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I45 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I46 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I47 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I48 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I5 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I50 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I51 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I52 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I54 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I55 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I56 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I57 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I58 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I6 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I60 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I61 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I62 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I63 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I64 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I66 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I7 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I70 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I71 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I72 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I73 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I74 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I75 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I76 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I77 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I8 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), I9 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L1 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L10 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L12 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L19 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L2 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L21 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L22 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L23 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L24 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L25 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L26 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L28 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), L29 = c(NA, NA, NA, 4L), L3 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L32 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L35 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L36 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L37 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L4 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L40 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L41 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L42 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L5 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L50 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L51 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L54 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L55 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L56 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L57 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L58 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L6 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L61 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L63 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L64 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L66 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L7 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L70 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L71 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L77 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L8 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L80 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L81 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L82 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L83 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L84 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L85 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L86 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L87 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), L9 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), S14 = c(NA, NA, 3L, NA), S15 = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), S2 = c(1L, NA, NA, 
    NA), S22 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S23 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S3 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S35 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S4 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S5 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S66 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), S7 = c(NA, 2L, NA, NA), S70 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), S97 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), S98 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), S99 = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_)), .Names = c("ID", "totni", "I1", 
"I10", "I11", "I12", "I14", "I15", "I2", "I20", "I21", "I22", 
"I3", "I30", "I31", "I32", "I33", "I34", "I35", "I36", "I37", 
"I38", "I4", "I40", "I41", "I42", "I43", "I44", "I45", "I46", 
"I47", "I48", "I5", "I50", "I51", "I52", "I54", "I55", "I56", 
"I57", "I58", "I6", "I60", "I61", "I62", "I63", "I64", "I66", 
"I7", "I70", "I71", "I72", "I73", "I74", "I75", "I76", "I77", 
"I8", "I9", "L1", "L10", "L12", "L19", "L2", "L21", "L22", "L23", 
"L24", "L25", "L26", "L28", "L29", "L3", "L32", "L35", "L36", 
"L37", "L4", "L40", "L41", "L42", "L5", "L50", "L51", "L54", 
"L55", "L56", "L57", "L58", "L6", "L61", "L63", "L64", "L66", 
"L7", "L70", "L71", "L77", "L8", "L80", "L81", "L82", "L83", 
"L84", "L85", "L86", "L87", "L9", "S14", "S15", "S2", "S22", 
"S23", "S3", "S35", "S4", "S5", "S66", "S7", "S70", "S97", "S98", 
"S99"), class = c("tbl_dt", "tbl", "data.table", "data.frame"
), row.names = c(NA, -4L), .internal.selfref = <pointer: 0x00000000003e0788>)

jacCARtest <- JACcar[,lapply(.SD, function(x) 
     Reduce(union, x)),
     by = ID]
jacCARfull <- jacCARtest[, Filter(function(x) !all(is.na(x)), jacCARtest)]

1 个答案:

答案 0 :(得分:2)

原因是某些列具有NA和非NA元素。使用OP的代码,NA和非NA元素保持导致> 1行。如果我们只需要按&#39; ID&#39;分组的单行,请创建if/else条件以返回unique非NA元素或if all元素是NA,返回&#39; NA&#39;

JACcar[,lapply(.SD, function(x) if(all(is.na(x))) NA
                                else unique(x[!is.na(x)])) , by = ID]

或者我们可以使用which,获取第一个索引来对列中的元素进行子集化(因为每列中只有一个唯一的非NA元素)

JACcar[,lapply(.SD, function(x) x[which(!is.na(x))[1]]) , ID]