如何将by()调用的结果添加到原始数据框?

时间:2014-12-18 16:51:04

标签: r

示例代码和数据:

attach(CO2)
head(CO2)

  Plant   Type  Treatment conc uptake
1   Qn1 Quebec nonchilled   95   16.0
2   Qn1 Quebec nonchilled  175   30.4
3   Qn1 Quebec nonchilled  250   34.8
4   Qn1 Quebec nonchilled  350   37.2
5   Qn1 Quebec nonchilled  500   35.3
6   Qn1 Quebec nonchilled  675   39.2

fcv <- by(CO2, CO2[, c("Plant", "Type", "Treatment")], function(m) fitted(lm(m[,5] ~ m[,4])))

fcv
Plant: Qn1
Type: Quebec
Treatment: nonchilled
       1        2        3        4        5        6        7
26.94781 28.42563 29.81110 31.65838 34.42931 37.66205 43.66573
------------------------------------------------------------
Plant: Qn2
Type: Quebec
Treatment: nonchilled
       1        2        3        4        5        6        7
26.47555 28.51828 30.43334 32.98674 36.81686 41.28533 49.58391
------------------------------------------------------------
Plant: Qn3
Type: Quebec
Treatment: nonchilled
       1        2        3        4        5        6        7
29.71856 31.57638 33.31808 35.64035 39.12376 43.18774 50.73512
------------------------------------------------------------
Plant: Qc1
Type: Quebec
Treatment: nonchilled
NULL
------------------------------------------------------------

and so on.

问题:

我想在基础数据集中添加一个额外的列,或者创建一个添加了列的新数据集,该列显示基于列的摄取的预测值。类似的东西:

  Plant   Type  Treatment conc uptake  predicted_uptake
1   Qn1 Quebec nonchilled   95   16.0  26.96781

谢谢, D_J

2 个答案:

答案 0 :(得分:1)

您可以使用dplyr

library(dplyr)
res <- CO2 %>% 
            group_by(Plant, Type, Treatment) %>% 
            do(data.frame(.,prediced_uptake=fitted(lm(uptake~conc, data=.))))
head(res)
#   Plant   Type  Treatment conc uptake prediced_uptake
#1   Qn1 Quebec nonchilled   95   16.0        26.94781
#2   Qn1 Quebec nonchilled  175   30.4        28.42563
#3   Qn1 Quebec nonchilled  250   34.8        29.81110
#4   Qn1 Quebec nonchilled  350   37.2        31.65838
#5   Qn1 Quebec nonchilled  500   35.3        34.42931
#6   Qn1 Quebec nonchilled  675   39.2        37.66205

或使用by

f1 <- function(m) cbind(m, predicted_uptake=fitted(lm(m[,5]~m[,4])))
res2 <- do.call(`rbind`, by(CO2, CO2[,c('Plant', 'Type','Treatment')],f1))

 head(res2)
 #    Plant   Type  Treatment conc uptake predicted_uptake
 #1   Qn1 Quebec nonchilled   95   16.0         26.94781
 #2   Qn1 Quebec nonchilled  175   30.4         28.42563
 #3   Qn1 Quebec nonchilled  250   34.8         29.81110
 #4   Qn1 Quebec nonchilled  350   37.2         31.65838
 #5   Qn1 Quebec nonchilled  500   35.3         34.42931
 #6   Qn1 Quebec nonchilled  675   39.2         37.66205

或使用data.table

 library(data.table)
 res3 <- setDT(CO2)[, predicted_uptake:=
            fitted(lm(uptake~conc, data=.SD)), by=list(Plant,Type, Treatment)]

 head(res3)
 #  Plant   Type  Treatment conc uptake predicted_uptake
 #1:   Qn1 Quebec nonchilled   95   16.0         26.94781
 #2:   Qn1 Quebec nonchilled  175   30.4         28.42563
 #3:   Qn1 Quebec nonchilled  250   34.8         29.81110
 #4:   Qn1 Quebec nonchilled  350   37.2         31.65838
 #5:   Qn1 Quebec nonchilled  500   35.3         34.42931
 #6:   Qn1 Quebec nonchilled  675   39.2         37.66205

答案 1 :(得分:1)

1)尝试ave

Fit <- function(ix) if (length(ix)) fitted(lm(uptake ~ conc, CO2, subset = ix))
transform(CO2, fitted = ave(1:nrow(CO2), Plant, Type, Treatment, FUN = Fit))

2)by

addFit <- function(DF) transform(DF, fit = fitted(lm(uptake ~ conc, DF)))
by(CO2, CO2[1:3], addFit)

2a)或者可能用以下内容替换最后一行:

do.call(rbind, by(CO2, CO2[1:3], addFit))

3)或使用data.table

library(data.table)
DT <- data.table(CO2)
DT[, fit := fitted(lm(uptake ~ conc, .SD)), by = CO2[1:3]]