我想计算不同水平的冷热治疗之间不同个体的面积变化百分比(在时间T1和T9之间)。
一些示例数据:
library(dplyr)
Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")
df.Area <- data.frame(Individual, Treat,Time,Area)
head(df.Area, n=20)
head(df.Area, n=20)
Individual Treat Time Area
1 a1.2 hot T1 0.1
2 a1.2 hot T9 0.5
3 bd3.d hot T1 0.1
4 bd3.d hot T9 0.645
5 k20.d hot T1 0.1
6 k20.d hot T9 0.54
7 dfd.2 cold T1 0.1
8 dfd.2 cold T9 0.587
9 d3.d cold T1 0.1
10 d3.d cold T9 0.78
11 df3.1 cold T1 0.23
12 df3.1 cold T9 0.78
例如:(T9-T1 / T9)* 100
首先在第1行和第2行中找到相同的个体,例如a1.2,在T9和T1之间进行计算:(0.5-0.1 / 0.1)* 100 = 400%增加。
输出为:
Individual Treat Ch.Area
1 a1.2 hot 400
2 bd3.d hot num.etc
3 k20.d hot num.etc
4 dfd.2 cold num.etc
5 d3.d cold num.etc ....
df1 <- df.Area %>% group_by(Treat, Time, Individual)
这是对结构的疯狂猜测:
df2 <- df1 %>% summarise(Ch.Area = T9[!Individual == "??"] - T1[!Individual == "??"])/T9([!Individual == "??"])*100)
我希望dplyr将具有相同名称的每个人分组在一起以计算百分比,同时仍保留Treat的组变量。这可能吗?我也很乐意使用其他更好的软件包/方法。
任何帮助都会很棒!
答案 0 :(得分:1)
使用dplyr
:
解决方案1:假设T9的Area变量值总是比T1高
Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")
df <- data.frame(Individual, Treat,Time, Area)
df %>%
group_by(Individual) %>%
mutate(Ch.Area = ((last(as.numeric(as.character(Area)))-first(as.numeric(as.character(Area))))/first(as.numeric(as.character(Area))))*100) %>% #Setting them as.numeric because in your data.frame they are stored as factors
summarise(Treat = last(Treat),
Ch.Area = last(Ch.Area))
# A tibble: 6 x 3
Individual Treat Ch.Area
<fct> <fct> <dbl>
1 a1.2 hot 400.
2 bd3.d hot 545.
3 d3.d cold 680.
4 df3.1 cold 239.
5 dfd.2 cold 487.
6 k20.d hot 440.
解决方案2:不假设T9的Area变量值总是比T1高
df %>%
group_by(Individual) %>%
mutate(Ch.Area = ((as.numeric(as.character(Area[Time=="T9"]))-as.numeric(as.character(Area[Time=="T1"])))/as.numeric(as.character(Area[Time=="T1"])))*100) %>%
summarise(Treat = last(Treat),
Ch.Area = last(Ch.Area))
# A tibble: 6 x 3
Individual Treat Ch.Area
<fct> <fct> <dbl>
1 a1.2 hot 400.
2 bd3.d hot 545.
3 d3.d cold 680.
4 df3.1 cold 239.
5 dfd.2 cold 487.
6 k20.d hot 440.
答案 1 :(得分:0)
这是一个tidyverse
选项,使用了一些重塑:
library(tidyverse)
Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")
df.Area <- data.frame(Individual, Treat,Time,Area)
df.Area %>%
spread(Time, Area, convert = T) %>%
mutate(Ch.Area = 100*(T9/T1-1))
# Individual Treat T1 T9 Ch.Area
# 1 a1.2 hot 0.10 0.500 400.0000
# 2 bd3.d hot 0.10 0.645 545.0000
# 3 d3.d cold 0.10 0.780 680.0000
# 4 df3.1 cold 0.23 0.780 239.1304
# 5 dfd.2 cold 0.10 0.587 487.0000
# 6 k20.d hot 0.10 0.540 440.0000
答案 2 :(得分:0)
library(dplyr)
df.Area %>% mutate_at('Area', as.numeric) %>%
group_by(Individual,Treat) %>%
summarise(Ch.Area = (Area[Time=='T9']/Area[Time=='T1']-1)*100)
# A tibble: 6 x 3
# Groups: Individual [?]
Individual Treat Ch.Area
<chr> <chr> <dbl>
1 a1.2 hot 400
2 bd3.d hot 545
3 d3.d cold 680
4 df3.1 cold 239.
5 dfd.2 cold 487.
6 k20.d hot 440.