我有一个数据框,其中有一些变量组,我需要提取每个组并创建一个新变量,即每个组的行总和。可以识别每个组,因为它们包含0到5之间的数字。
目前,我通过以下步骤分别提取和汇总每个组:
sum_cols <- function(x, col1, col2, col3){
x[[col1]] + x[[col2]] + x[[col3]]
}
test %>%
select(contains('0')) %>%
mutate(
p0 = sum_cols(x = ., col1 = 1, col2 = 2, col3 = 3)
) %>% pull(p0)
然后在将每个新创建的向量组合到数据帧之前,为每个组重复此步骤,以替换数字1,2,3,4,5。
我认为执行此步骤有更快/更好的方法,我使用过for循环,但没有设法使其正常工作。或者,可以使用某种方式用split()
拆分数据,或者通过应用族的某个函数拆分数据,但不确定哪种方法最好。任何建议将不胜感激?
谢谢
这里有一个数据样本:
test <- data.frame(Time = c(0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009),
X.P0..X = c(2.26, 1.722, 2.798, 1.722, 1.722, 2.798, 1.722, 1.722, 2.26, 2.26),
X.P0..Y = c(1.434, 1.434, 0.641, 0.641, 0.107, -0.411, -0.683, -0.423, -0.151, 0.369),
X.P0..Z = c(3.188, 3.188, 3.188, 3.726, 3.188, 2.65, 3.188, 3.188, 3.188, 3.188),
X.P1..X = c(15.656, 15.656, 15.656, 15.118, 15.118, 15.656, 15.656, 15.118, 15.656, 15.656),
X.P1..Y = c(268.808, 268.033, 269.322, 269.063, 269.318, 269.065, 269.065, 269.322, 268.813, 269.055),
X.P1..Z = c(14.096, 14.096, 14.096, 14.634, 14.634, 14.634, 14.096, 14.634, 14.634, 14.634),
X.P2..X = c(1.063, 1.063, 1.063, 1.063, 1.063, 1.063, 1.063, 1.063, 1.063, 1.063),
X.P2..Y = c(-0.292, -1.068, -1.07, -0.813, -1.329, -0.551, -1.07, -0.813, -1.074, -1.074),
X.P2..Z = c(-1.143, -1.681, -1.681, -1.143, -1.681, -1.143, -1.681, -1.681, -1.681, -1.143),
X.P3..X = c(-0.417, -0.955, -1.493, -1.493, -0.955, -0.955, -0.955, -0.955, -1.493, -0.955),
X.P3..Y = c(0.041, 0.297, 0.288, 0.554, 0.565, 0.291, 0.295, 0.298, 0.554, 0.554),
X.P3..Z = c(0.726, 1.802, 1.802, 1.264, 1.264, 1.264, 1.264, 0.726, 1.264, 0.726),
X.P4..X = c(-7.411, -6.873, -7.411, -7.411, -6.873, -7.411, -7.411, -7.411, -6.873, -7.411),
X.P4..Y = c(278.284, 278.28, 279.06, 278.8, 278.803, 279.323, 278.801, 278.284, 279.062, 278.547),
X.P4..Z = c(3.511, 4.587, 3.511, 4.587, 3.511, 4.587, 4.587, 4.587, 5.125, 4.587),
X.P5..X = c(-1.843, -1.843, -1.843, -1.843, -1.843, -1.843, -1.843, -1.843, -1.843, -1.843),
X.P5..Y = c(0.964, 0.964, 0.447, 0.701, 0.439, 0.708, 0.183, 0.701, 0.959, 0.701),
X.P5..Z = c(-3.631, -3.631, -3.631, -3.631, -4.169, -3.093, -3.093, -4.169, -4.169, -3.631)
)
答案 0 :(得分:4)
使用map_dfc
遍历当前paste0('P',0:5)
的{{1}},select
和transmute
,然后将 P 组合在一起>
P
答案 1 :(得分:3)
我们还可以使用基数R split.default
根据名称(“ P”后跟数字)中的公共子字符串拆分列,然后使用sapply
获得rowSums
每个组。
df <- test[-1]
data.frame(sapply(split.default(df, sub(".*(P\\d+).*", "\\1", names(df))), rowSums))
# P0 P1 P2 P3 P4 P5
#1 6.882 298.560 -0.372 0.350 274.384 -4.510
#2 6.344 297.785 -1.686 1.144 275.994 -4.510
#3 6.627 299.074 -1.688 0.597 275.160 -5.027
#4 6.089 298.815 -0.893 0.325 275.976 -4.773
#5 5.017 299.070 -1.947 0.874 275.441 -5.573
#6 5.037 299.355 -0.631 0.600 276.499 -4.228
#7 4.227 298.817 -1.688 0.604 275.977 -4.753
#8 4.487 299.074 -1.431 0.069 275.460 -5.311
#9 5.297 299.103 -1.692 0.325 277.314 -5.053
#10 5.817 299.345 -1.154 0.325 275.723 -4.773
sub
部分在列名称中获得通用部分
sub(".*(P\\d+).*", "\\1", names(df))
# [1] "P0" "P0" "P0" "P1" "P1" "P1" "P2" "P2" "P2" "P3" "P3" "P3" "P4" "P4" "P4"
# "P5" "P5" "P5"