Question

我的数据格式如下：

site    location    treatment    response
1         1           1            20
1         1           2            30
1         1           3            30
1         2           1            80
1         2           2            30
1         2           3            50
1         3           1            10
1         3           2            15
1         3           3            100
1         4           1            25
1         4           2            20
1         4           3            90

每个站点的多个站点和10个站点。

我希望创建三个新变量，treat1，treat2和treat3，参考三种处理，它们取每个站点/位置组合的响应值。但是，我希望他们为所有三个治疗记录取值。即数据框，如：

site    location    treatment    response     treat1    treat2     treat3
1         1           1            20           20        30         30
1         1           2            30           20        30         30
1         1           3            30           20        30         30
1         2           1            80           80        30         50
1         2           2            30           80        30         50
1         2           3            50           80        30         50
1         3           1            10           10        15        100
1         3           2            15           10        15        100
1         3           3            100          10        15        100
1         4           1            25           25        20         90  
1         4           2            20           25        20         90 
1         4           3            90           25        20         90

为了获得这个，我使用了一个相当冗长的解决方案（6行代码 - 请参见下文），但我想知道是否有人能指出更直接的方法：

对于treat1：

df$trt1 <- ifelse(df$treatment==1, df$response, NA)
df2 <- aggregate(df$trt1, list(df1$location, df$site), max, na.rm=TRUE)
df3 <- rbind(df2, df2, df2)
df4 <- df3[with(df3, order(Group.2, Group.1)),]
df$x <- ifelse(df4$x==-Inf, NA, df4$x)
names(df)[names(df) == 'x'] <- 'treat1'

我怀疑tapply在这里可能有用，但我不确定如何在这种情况下使用它。

谢谢。

Answer 1

一种方式可能是：

merge(DF, 
      do.call(data.frame, aggregate(response ~ site + location, DF, c)), 
      by = c("site", "location"))
#   site location treatment response response.1 response.2 response.3
#1     1        1         1       20         20         30         30
#2     1        1         2       30         20         30         30
#3     1        1         3       30         20         30         30
#4     1        2         1       80         80         30         50
#5     1        2         2       30         80         30         50
#6     1        2         3       50         80         30         50
#7     1        3         1       10         10         15        100
#8     1        3         2       15         10         15        100
#9     1        3         3      100         10         15        100
#10    1        4         1       25         25         20         90
#11    1        4         2       20         25         20         90
#12    1        4         3       90         25         20         90

在哪里＆＃34; DF＆＃34;：

DF = structure(list(site = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), location = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
4L, 4L), treatment = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L), response = c(20L, 30L, 30L, 80L, 30L, 50L, 10L, 15L, 
100L, 25L, 20L, 90L)), .Names = c("site", "location", "treatment", 
"response"), class = "data.frame", row.names = c(NA, -12L))

Answer 2

您还可以使用标准子集并通过以下方式获取治疗矩阵：

matrix(df$response,ncol=3,byrow=T)[rep(1:(nrow(df)/3),rep(3,nrow(df)/3)),]

Answer 3

1）dplyr

library(dplyr)

DF %>% 
   group_by(site, location) %>% 
   mutate(treat1 = response[1], treat2 = response[2], treat3 = response[3])

，并提供：

Source: local data frame [12 x 7]
Groups: site, location
   site location treatment response treat1 treat2 treat3
1     1        1         1       20     20     30     30
2     1        1         2       30     20     30     30
3     1        1         3       30     20     30     30
4     1        2         1       80     80     30     50
5     1        2         2       30     80     30     50
6     1        2         3       50     80     30     50
7     1        3         1       10     10     15    100
8     1        3         2       15     10     15    100
9     1        3         3      100     10     15    100
10    1        4         1       25     25     20     90
11    1        4         2       20     25     20     90
12    1        4         3       90     25     20     90

2）data.table

library(data.table)
DT <- data.table(DF)

treats <- paste0("treat", unique(DF$treatment)) # column names
DT[, (treats) := as.list(response), by = list(site, location)]

，并提供：

> DT
    site location treatment response treat1 treat2 treat3
 1:    1        1         1       20     20     30     30
 2:    1        1         2       30     20     30     30
 3:    1        1         3       30     20     30     30
 4:    1        2         1       80     80     30     50
 5:    1        2         2       30     80     30     50
 6:    1        2         3       50     80     30     50
 7:    1        3         1       10     10     15    100
 8:    1        3         2       15     10     15    100
 9:    1        3         3      100     10     15    100
10:    1        4         1       25     25     20     90
11:    1        4         2       20     25     20     90
12:    1        4         3       90     25     20     90

3）ave

treat <- function(i) ave(DF$response, DF$site, DF$location, FUN = function(x) x[i])
cbind(DF, treat1 = treat(1), treat2 = treat(2), treat3 = treat(3))

，并提供：

   site location treatment response treat1 treat2 treat3
1     1        1         1       20     20     30     30
2     1        1         2       30     20     30     30
3     1        1         3       30     20     30     30
4     1        2         1       80     80     30     50
5     1        2         2       30     80     30     50
6     1        2         3       50     80     30     50
7     1        3         1       10     10     15    100
8     1        3         2       15     10     15    100
9     1        3         3      100     10     15    100
10    1        4         1       25     25     20     90
11    1        4         2       20     25     20     90
12    1        4         3       90     25     20     90

添加了附加解决方案

R中的数据格式使用多个记录

3 个答案: