我想计算变量' wt'的时间变化百分比= 1。和' wc'在时间1,时间2和时间3.在时间1,它将是0.在时间2,它看起来像这样的百分比变化' wt'在t2 = t2-t1 / t1 * 100和t3它应该看起来像' wt' = t3-t1 / t1 * 100。然后,我想将此作为新变量添加到现有的Excel数据表中。我试着寻找其他例子,但没有一个与我的数据格式匹配。谢谢!
structure(list(code = c(100, 100, 100, 101, 101, 101, 102, 102,
102), treatment = c(1, 1, 1, 2, 2, 2, 1, 1, 1), time = c(1, 2,
3, 1, 2, 3, 1, 2, 3), wt = c(80, 78, 76, 75, 74, 74, 78, 74,
72), wc = c(90, 89, 87, 92, 91, 90, 89, 86, 84)), .Names = c("code",
"treatment", "time", "wt", "wc"), row.names = c(NA, -9L),
class =c("tbl_df",
"tbl", "data.frame"))
我试图遵循以下建议。但是我收到了一个错误
> data <- read.csv("All Data with BMI and other tweaks.csv", header = TRUE, na.strings = ".", stringsAsFactors = FALSE)
> names(data)
[1] "code" "treatment" "age" "sex"
[5] "time" "bicep" "tricep" "subscapular"
[9] "suprailiac" "weight" "pwc" "wc"
[13] "bia" "height" "bmi" "wthr"
[17] "density" "X.fat" "fm" "ffm"
[21] "dietary.recall" "reportingdate" "NumFoods" "NumCodes"
[25] "kcal" "prot" "tfat" "carb"
[29] "mois" "alc" "caff" "theo"
[33] "sugr" "fibe" "calc" "iron"
[37] "magn" "phos" "pota" "sodi"
[41] "zinc" "copp" "sele" "vc"
[45] "vb1" "vb2" "niac" "vb6"
[49] "fola" "fa" "ff" "fdfe"
[53] "vb12" "vara" "ret" "bcar"
[57] "acar" "cryp" "lyco" "lz"
[61] "atoc" "vk" "vitd" "choln"
[65] "chole" "sfat" "s040" "s060"
[69] "s080" "s100" "s120" "s140"
[73] "s160" "s180" "mfat" "m161"
[77] "m181" "m201" "m221" "pfat"
[81] "p182" "p183" "p184" "p204"
[85] "p205" "p225" "p226" "vite_add"
[89] "b12_add" "datacomp"
> library(dplyr)
> data <- data %>%
+ group_by(code) %>%
+ mutate(wt.pch = (data$weight - data$weight[1]) / data$weight * 100, wc.pch = (data$wc - data$wc[1]) / data$wc[1] * 100)
Error in mutate_impl(.data, dots) :
Column `wt.pch` must be length 3 (the group size) or one, not 114
答案 0 :(得分:0)
您可以尝试:
df %>%
group_by(code) %>%
mutate(pct_change_wt = ((wt - lag(wt))/ lag(wt)) * 100,
pct_change_wc = ((wc - lag(wc))/ lag(wc)) * 100)
print(df)
code treatment time wt wc pct_change_wt pct_change_wc
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 100 1.00 1.00 80.0 90.0 NA NA
2 100 1.00 2.00 78.0 89.0 - 2.50 - 1.11
3 100 1.00 3.00 76.0 87.0 - 2.56 - 2.25
4 101 2.00 1.00 75.0 92.0 NA NA
5 101 2.00 2.00 74.0 91.0 - 1.33 - 1.09
6 101 2.00 3.00 74.0 90.0 0 - 1.10
7 102 1.00 1.00 78.0 89.0 NA NA
8 102 1.00 2.00 74.0 86.0 - 5.13 - 3.37
9 102 1.00 3.00 72.0 84.0 - 2.70 - 2.33
说明:
代码上的 1。group_by
确保我们计算每个组的百分比变化。
2. lag
函数取每组中的先前值。
答案 1 :(得分:0)
这是一种方法:
library(dplyr)
df %>% group_by(code) %>% mutate(wt.pch = (wt - wt[1]) / wt[1] * 100,
wc.pch = (wc - wc[1]) / wc[1] * 100)
# A tibble: 9 x 7
# Groups: code [3]
# code treatment time wt wc wt.pch wc.pch
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 100 1.00 1.00 80.0 90.0 0 0
# 2 100 1.00 2.00 78.0 89.0 -2.50 -1.11
# 3 100 1.00 3.00 76.0 87.0 -5.00 -3.33
# 4 101 2.00 1.00 75.0 92.0 0 0
# 5 101 2.00 2.00 74.0 91.0 -1.33 -1.09
# 6 101 2.00 3.00 74.0 90.0 -1.33 -2.17
# 7 102 1.00 1.00 78.0 89.0 0 0
# 8 102 1.00 2.00 74.0 86.0 -5.13 -3.37
# 9 102 1.00 3.00 72.0 84.0 -7.69 -5.62