使用dplyr摘要应用自定义功能进行许多独特的测量

时间:2018-08-28 09:40:14

标签: r dplyr

我想计算不同水平的冷热治疗之间不同个体的面积变化百分比(在时间T1和T9之间)。

一些示例数据:

library(dplyr)

Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")
df.Area <- data.frame(Individual, Treat,Time,Area)
head(df.Area, n=20)

head(df.Area, n=20)
   Individual Treat Time  Area
1        a1.2   hot   T1   0.1
2        a1.2   hot   T9   0.5
3       bd3.d   hot   T1   0.1
4       bd3.d   hot   T9 0.645
5       k20.d   hot   T1   0.1
6       k20.d   hot   T9  0.54
7       dfd.2  cold   T1   0.1
8       dfd.2  cold   T9 0.587
9        d3.d  cold   T1   0.1
10       d3.d  cold   T9  0.78
11      df3.1  cold   T1  0.23
12      df3.1  cold   T9  0.78

例如:(T9-T1 / T9)* 100

首先在第1行和第2行中找到相同的个体,例如a1.2,在T9和T1之间进行计算:(0.5-0.1 / 0.1)* 100 = 400%增加。

输出为:

       Individual Treat Ch.Area  
    1        a1.2   hot    400    
    2        bd3.d  hot     num.etc 
    3       k20.d   hot     num.etc 
    4       dfd.2   cold    num.etc
    5       d3.d    cold    num.etc ....

df1 <- df.Area %>% group_by(Treat, Time, Individual)

这是对结构的疯狂猜测:

df2 <- df1 %>%  summarise(Ch.Area = T9[!Individual == "??"] - T1[!Individual == "??"])/T9([!Individual == "??"])*100)

我希望dplyr将具有相同名称的每个人分组在一起以计算百分比,同时仍保留Treat的组变量。这可能吗?我也很乐意使用其他更好的软件包/方法。

任何帮助都会很棒!

3 个答案:

答案 0 :(得分:1)

使用dplyr

解决方案1:假设T9的Area变量值总是比T1高

Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")

df <- data.frame(Individual, Treat,Time, Area)

df %>%
  group_by(Individual) %>%
  mutate(Ch.Area = ((last(as.numeric(as.character(Area)))-first(as.numeric(as.character(Area))))/first(as.numeric(as.character(Area))))*100) %>% #Setting them as.numeric because in your data.frame they are stored as factors 
  summarise(Treat = last(Treat),
            Ch.Area = last(Ch.Area))

# A tibble: 6 x 3
  Individual Treat Ch.Area
  <fct>      <fct>   <dbl>
1 a1.2       hot      400.
2 bd3.d      hot      545.
3 d3.d       cold     680.
4 df3.1      cold     239.
5 dfd.2      cold     487.
6 k20.d      hot      440.

解决方案2:不假设T9的Area变量值总是比T1高

df %>%
  group_by(Individual) %>%
  mutate(Ch.Area = ((as.numeric(as.character(Area[Time=="T9"]))-as.numeric(as.character(Area[Time=="T1"])))/as.numeric(as.character(Area[Time=="T1"])))*100) %>% 
  summarise(Treat = last(Treat),
            Ch.Area = last(Ch.Area))

# A tibble: 6 x 3
  Individual Treat Ch.Area
  <fct>      <fct>   <dbl>
1 a1.2       hot      400.
2 bd3.d      hot      545.
3 d3.d       cold     680.
4 df3.1      cold     239.
5 dfd.2      cold     487.
6 k20.d      hot      440.

答案 1 :(得分:0)

这是一个tidyverse选项,使用了一些重塑:

library(tidyverse)

Individual<-c("a1.2", "a1.2","bd3.d","bd3.d", "k20.d","k20.d", "dfd.2","dfd.2", "d3.d","d3.d", "df3.1","df3.1")
Treat <- c('hot','hot','hot','hot','hot','hot','cold',"cold",'cold',"cold",'cold',"cold")
Time <- c("T1", "T9", "T1", "T9","T1", "T9","T1", "T9","T1", "T9","T1", "T9")
Area<- c("0.1", "0.5", "0.1", "0.645","0.1", "0.54","0.1", "0.587","0.1", "0.78","0.23", "0.78")
df.Area <- data.frame(Individual, Treat,Time,Area)

df.Area %>%
  spread(Time, Area, convert = T) %>%
  mutate(Ch.Area = 100*(T9/T1-1))

#   Individual Treat   T1    T9  Ch.Area
# 1       a1.2   hot 0.10 0.500 400.0000
# 2      bd3.d   hot 0.10 0.645 545.0000
# 3       d3.d  cold 0.10 0.780 680.0000
# 4      df3.1  cold 0.23 0.780 239.1304
# 5      dfd.2  cold 0.10 0.587 487.0000
# 6      k20.d   hot 0.10 0.540 440.0000

答案 2 :(得分:0)

library(dplyr)
df.Area %>%  mutate_at('Area', as.numeric) %>% 
             group_by(Individual,Treat) %>% 
             summarise(Ch.Area = (Area[Time=='T9']/Area[Time=='T1']-1)*100)


# A tibble: 6 x 3
# Groups:   Individual [?]
Individual Treat Ch.Area
<chr>      <chr>   <dbl>
1 a1.2       hot      400 
2 bd3.d      hot      545 
3 d3.d       cold     680 
4 df3.1      cold     239.
5 dfd.2      cold     487.
6 k20.d      hot      440.