Question

我有一个类似于以下的数据框。有许多Strain和Day s。

  Strain Day Parasite Rep1 Rep2 Rep3
1    KO1   1      Red    5    6    7
2    KO1   1    Green    6    7    8
3    KO1   1     Both    3    1    5
4    KO2   1      Red    5    6    7
5    KO2   1    Green    6    7    8
6    KO2   1     Both   10   10   10

有些寄生虫是红色的，有些是绿色的，有些是两种寄生虫。我想创建一个新的数据框，其中包含新的Red <- Red+Both和新的Green <- Green+Both（适用于Rep1，Rep2和Rep3）。

具体而言，如果Parasite=="Red" | Parasite=="Green"，则Rep1 Rep1 Parasite=="Both Strain Day的值Rep2。对此行重复Rep3和Parasite=="Red" | Parasite=="Green"，然后对所有其他Parasite=="Both重复此操作。对于最终数据框，请不要包含Strain Day Parasite Obs1 Obs2 Obs3 1 KO1 1 Red 8 7 12 2 KO1 1 Green 9 8 13 3 KO2 1 Red 15 16 17 4 KO2 1 Green 16 17 18。

的行

新数据框应如下所示。

$scope.$watch(
      OffCanvasService.getState()
    ,

Answer 1

假设您的数据存储在变量data

中

library(data.table)
# that way we can use "by=StrainDay" later on
data$StrainDay <- paste(data$Strain,data$Day,sep="_")

# this will sum up the green parasites and both by StrainDay which we created earlier
res1 <- data[,list("Obs1"=sum(Rep1[Parasite=="Green" | Parasite=="Both"]),
                   "Obs2"=sum(Rep2[Parasite=="Green" | Parasite=="Both"]),
                   "Obs3"=sum(Rep3[Parasite=="Green" | Parasite=="Both"]),
                   "Parasite"="Green"),
             by=StrainDay]
# this will sum up the red parasites and both by StrainDay as well
res2 <- data[,list("Obs1"=sum(Rep1[Parasite=="Red" | Parasite=="Both"]),
                   "Obs2"=sum(Rep2[Parasite=="Red" | Parasite=="Both"]),
                   "Obs3"=sum(Rep3[Parasite=="Red" | Parasite=="Both"]),
                   "Parasite"="Red"),
             by=StrainDay]
# the output isn't in the same order as yours, but you can fix that easily
res <- rbind(res1,res2)

res$Strain <- unlist(strsplit(res$StrainDay,"_"))[seq(1,nrow(res),2)]
res$Day <- unlist(strsplit(res$StrainDay,"_"))[seq(2,nrow(res),2)]

res$StrainDay <- NULL

> res
#   Obs1 Obs2 Obs3 Parasite Strain Day
#1:    9    8   13    Green    KO1   1
#2:   16   17   18    Green    KO2   1
#3:    8    7   12      Red    KO1   1
#4:   15   16   17      Red    KO2   1

Answer 2

我们可以使用data.table。我们使用grep（'nm1'）创建一个以'Rep'开头的列名称向量。将'data.frame'转换为'data.table'（setDT(df1)），按'Strain'，'Day'分组，我们将列'nm1'（.SD[, nm1, with=FALSE]）分组。这可以与'寄生虫'一起用作Map的输入。根据“寄生虫”中的“红色”或“两者”和“绿色”或“两者”中的值，对.SD[, nm1, with=FALSE]中的每一列进行子集。获取sum中每个列的Map。然后，我们通过回收“红色”，“绿色”字符串创建“寄生虫”列，并根据需要更改列名称（setnames(..）。

 library(data.table)
 nm1 <- grep('^Rep', names(df1), value=TRUE)
 res <- setDT(df1)[, Map(function(x,y) c(sum(x[y %in% c('Red', 'Both')]), 
                                    sum(x[y %in% c('Green', 'Both')])),
             .SD[, nm1, with=FALSE], list(Parasite)), .(Strain, Day)
                 ][, Parasite:=c('Red', 'Green')][]
setnames(res, 2:4, paste0('Obs', 1:3))
res
#   Strain Day Obs1 Obs2 Obs3 Parasite
#1:    KO1   1    8    7   12      Red
#2:    KO1   1    9    8   13    Green
#3:    KO2   1   15   16   17      Red
#4:    KO2   1   16   17   18    Green

str(res)
#Classes ‘data.table’ and 'data.frame':  4 obs. of  6 variables:
# $ Strain  : chr  "KO1" "KO1" "KO2" "KO2"
# $ Obs1    : int  1 1 1 1
# $ Obs2    : int  8 9 15 16  
# $ Obs3    : int  7 8 16 17
# $ Rep3    : int  12 13 17 18
# $ Parasite: chr  "Red" "Green" "Red" "Green"

或者我们可以使用lapply

res1 <- setDT(df1)[, c(list(Parasite=c('Red', 'Green')),
         lapply(.SD[, nm1, with=FALSE], function(x) 
                 c(sum(x[Parasite %in% c('Red', 'Both')]), 
                 sum(x[Parasite %in% c('Green', 'Both')])))), 
                        .(Strain, Day)]
setnames(res1, nm1, paste0('Obs', 1:3))

数据

df1 <- structure(list(Strain = c("KO1", "KO1", "KO1", "KO2", "KO2", 
"KO2"), Day = c(1L, 1L, 1L, 1L, 1L, 1L), Parasite = c("Red", 
"Green", "Both", "Red", "Green", "Both"), Rep1 = c(5L, 6L, 3L, 
5L, 6L, 10L), Rep2 = c(6L, 7L, 1L, 6L, 7L, 10L), Rep3 = c(7L, 
8L, 5L, 7L, 8L, 10L)), .Names = c("Strain", "Day", "Parasite", 
"Rep1", "Rep2", "Rep3"), class = "data.frame", 
 row.names = c("1", "2", "3", "4", "5", "6"))

如何根据R中的其他行执行条件运算？

2 个答案:

数据