使用“apply”系列函数处理data.frames列表

时间:2013-01-03 14:50:57

标签: r apply lapply sapply

我有一个数据框,然后我将其分成三个(或任意数量)的数据帧。

我要做的是自动处理每个数据框中的每一列,并添加现有变量的滞后版本。

例如,如果每个data.frame(V1,V2,V3)中有三个变量,我想自动(没有硬编码)添加V1.lag,V2.lag和V3.lag。

这是我到目前为止所做的,但我现在已经被困住了。

任何帮助都会非常适合。

dd<-data.frame(matrix(rnorm(216),72,3),c(rep("A",24),rep("B",24),rep("C",24)),c(rep("J",36),rep("K",36)));
colnames(dd) <- c("v1", "v2", "v3", "dim1", "dim2");
dd;
dds <- split(dd, dd$dim1);
dds;
# Missing step 1: Automatically create v1.lag, v2.lag, v3.lag, etc (if required)

最后,我想将三个数据框合并为一个大数据框,其中包括新创建的变量。

# Missing step 2: Merge data frames into single data frame

任何帮助都将受到高度赞赏。

编辑: 在评论部分,我询问了移动平均线而不是滞后。这是解决方案:

ma <- function(x, f=c(1,1,1)){as.numeric(filter(x, f, sides=1)/length(f));}
foo <- function(df, f = c(1,1,1)) {
nums <- sapply(df, is.numeric); ## which are numeric vars
nams <- paste(names(df)[nums], "ma", length(f), sep = "."); ## generate new names    foo.ma
df[, nams] <- lapply(which(nums), function(id, df, f) ma(df[[id]], f = f), df = df, f = f); ## apply ma to each numeric variable
  df; ## return
   }

2 个答案:

答案 0 :(得分:3)

这是一个选项:

## reuse @Andrie's clag() function as lag() is silly
clag <- function(x, n = 1) c(rep(NA, n), head(x, -n))
## wrapper function to do the addition of lag variables for single DF
foo <- function(df, n = 1) {
  nums <- sapply(df, is.numeric) ## which are numeric vars
  nams <- paste(names(df)[nums], "lag", sep = ".") ## generate new names foo.lag
  df[, nams] <- lapply(which(nums), function(id, df, n) clag(df[[id]], n = n),
                       df = df, n = n) ## apply clag to each numeric variable
  df ## return
}

lapply(dds, foo)

给出了:

> lapply(dds, foo)
$A
            v1          v2           v3 dim1 dim2      v1.lag      v2.lag       v3.lag
1  -1.15107343  1.47671548 -0.146501739    A    J          NA          NA           NA
2  -1.61068272 -0.85397093 -1.240187604    A    J -1.15107343  1.47671548 -0.146501739
3  -1.23470282 -0.26194027  1.938344030    A    J -1.61068272 -0.85397093 -1.240187604
4  -0.57874043 -0.44600138  0.326069423    A    J -1.23470282 -0.26194027  1.938344030
5   0.16139066 -1.95804742 -0.744678169    A    J -0.57874043 -0.44600138  0.326069423
6  -1.01497027  0.36850034  1.532640065    A    J  0.16139066 -1.95804742 -0.744678169
7   0.72288058 -0.40115543 -0.686450596    A    J -1.01497027  0.36850034  1.532640065
8  -0.51300447  0.19686310  0.441649595    A    J  0.72288058 -0.40115543 -0.686450596
9   0.95439966 -2.03513002 -0.897784897    A    J -0.51300447  0.19686310  0.441649595
10 -1.36736081 -0.41040962 -0.459403176    A    J  0.95439966 -2.03513002 -0.897784897
11  0.59503846  0.28925760 -0.003095389    A    J -1.36736081 -0.41040962 -0.459403176
12 -0.37951869  0.49551357  0.269412108    A    J  0.59503846  0.28925760 -0.003095389
13 -0.52953401 -0.28433351  1.125505917    A    J -0.37951869  0.49551357  0.269412108
14 -1.73466020  0.25442637 -1.094139749    A    J -0.52953401 -0.28433351  1.125505917
15  0.08479137 -0.11688894 -1.034378216    A    J -1.73466020  0.25442637 -1.094139749
16 -2.45854464  0.15806266 -2.275995527    A    J  0.08479137 -0.11688894 -1.034378216
17  1.10663502  1.28587230  0.070334868    A    J -2.45854464  0.15806266 -2.275995527
18 -0.01945585  1.63659116 -0.137040232    A    J  1.10663502  1.28587230  0.070334868
19  0.59026606 -1.95724134 -0.480014930    A    J -0.01945585  1.63659116 -0.137040232
20 -0.32245933  1.35372005  1.348717525    A    J  0.59026606 -1.95724134 -0.480014930
21 -0.42560327 -1.30145328  2.020609480    A    J -0.32245933  1.35372005  1.348717525
22  1.19550777  0.18417336  0.099232994    A    J -0.42560327 -1.30145328  2.020609480
23  1.20198621  0.05926023 -0.171505810    A    J  1.19550777  0.18417336  0.099232994
24 -1.00667141  1.32441782  0.056696824    A    J  1.20198621  0.05926023 -0.171505810

$B
           v1          v2          v3 dim1 dim2     v1.lag      v2.lag      v3.lag
25  0.7878614  0.10354576 -0.69308980    B    J         NA          NA          NA
26  0.5824551  0.42319616  0.42734938    B    J  0.7878614  0.10354576 -0.69308980
27 -0.2769730  1.51559382 -0.64106570    B    J  0.5824551  0.42319616  0.42734938
28 -0.5736416 -1.58745816 -1.13274631    B    J -0.2769730  1.51559382 -0.64106570
29 -1.9082145 -0.26148604 -0.04699411    B    J -0.5736416 -1.58745816 -1.13274631
30 -1.6254549  0.39390814 -1.79993619    B    J -1.9082145 -0.26148604 -0.04699411
31  0.3963274  1.79667985  0.92873142    B    J -1.6254549  0.39390814 -1.79993619
32 -0.5889415 -0.04690351  1.43394978    B    J  0.3963274  1.79667985  0.92873142
33  0.4683819 -1.34023029  0.18749782    B    J -0.5889415 -0.04690351  1.43394978
34  0.7373052 -0.93470320 -1.14528378    B    J  0.4683819 -1.34023029  0.18749782
35 -0.7751348 -1.26533917  0.11246728    B    J  0.7373052 -0.93470320 -1.14528378
36  1.7786627 -0.19757164  0.14150980    B    J -0.7751348 -1.26533917  0.11246728
37  1.8570412 -2.15174901  1.07751105    B    K  1.7786627 -0.19757164  0.14150980
38  0.5128697  0.40112948 -0.94826274    B    K  1.8570412 -2.15174901  1.07751105
39  0.8710264 -0.59978467  0.54462858    B    K  0.5128697  0.40112948 -0.94826274
40 -0.3711512 -0.15632337  0.15832543    B    K  0.8710264 -0.59978467  0.54462858
41  1.4505624  0.20915835  2.59369653    B    K -0.3711512 -0.15632337  0.15832543
42  0.0871329  0.25440471  0.30096063    B    K  1.4505624  0.20915835  2.59369653
43 -0.7398342 -1.72678544  0.45534941    B    K  0.0871329  0.25440471  0.30096063
44  0.1953264 -0.60560630 -0.36884626    B    K -0.7398342 -1.72678544  0.45534941
45 -0.2702493  0.50747209 -0.50699830    B    K  0.1953264 -0.60560630 -0.36884626
46  0.2987449  0.46347722  1.20725190    B    K -0.2702493  0.50747209 -0.50699830
47 -0.5682779 -0.71470625 -0.07865078    B    K  0.2987449  0.46347722  1.20725190
48 -1.5291983  1.80092050 -1.73317395    B    K -0.5682779 -0.71470625 -0.07865078

$C
            v1           v2           v3 dim1 dim2      v1.lag       v2.lag       v3.lag
49  0.06095825 -0.518263220  0.510999371    C    K          NA           NA           NA
50  0.40077713  0.477989115  0.855752036    C    K  0.06095825 -0.518263220  0.510999371
51  0.06763037  0.802110426 -0.102536186    C    K  0.40077713  0.477989115  0.855752036
52 -0.90530986 -0.005452101 -0.089703589    C    K  0.06763037  0.802110426 -0.102536186
53 -0.79360209  0.299844218 -0.765164525    C    K -0.90530986 -0.005452101 -0.089703589
54  1.34050298 -1.093705314 -0.955952912    C    K -0.79360209  0.299844218 -0.765164525
55  0.45377712  0.054978470  0.382874895    C    K  1.34050298 -1.093705314 -0.955952912
56  0.95283101 -0.564193352  1.458002944    C    K  0.45377712  0.054978470  0.382874895
57  1.09157807 -1.351894599 -1.366084414    C    K  0.95283101 -0.564193352  1.458002944
58  2.71993062 -1.126272793  1.374046159    C    K  1.09157807 -1.351894599 -1.366084414
59 -0.04685281  0.423085481 -0.455903151    C    K  2.71993062 -1.126272793  1.374046159
60 -0.31055449  0.818291875  0.400386018    C    K -0.04685281  0.423085481 -0.455903151
61 -0.54904545  1.542272313  0.648135340    C    K -0.31055449  0.818291875  0.400386018
62 -0.72914142  1.495482707 -0.212135011    C    K -0.54904545  1.542272313  0.648135340
63 -0.27374611 -1.309254707 -0.005125047    C    K -0.72914142  1.495482707 -0.212135011
64  0.87439910 -2.666588138  1.043778597    C    K -0.27374611 -1.309254707 -0.005125047
65  1.07142042  0.446233778 -0.286784683    C    K  0.87439910 -2.666588138  1.043778597
66 -0.10431808  0.510820156  0.405309569    C    K  1.07142042  0.446233778 -0.286784683
67 -1.04006019 -0.041327622  1.202855549    C    K -0.10431808  0.510820156  0.405309569
68  0.41084794 -0.376796559 -1.147032471    C    K -1.04006019 -0.041327622  1.202855549
69  0.88329788 -0.344611311  1.862998306    C    K  0.41084794 -0.376796559 -1.147032471
70 -0.67916248  1.396061431  0.697517685    C    K  0.88329788 -0.344611311  1.862998306
71  3.55359528 -0.207825480 -0.949834845    C    K -0.67916248  1.396061431  0.697517685
72  0.11329113  0.294747300 -0.955891419    C    K  3.55359528 -0.207825480 -0.949834845

对于最后一位,组合步骤,保存上面的内容:

dds <- lapply(dds, foo)

然后将do.call()用于rbind()各个数据框,如:

df2 <- do.call(rbind, dds)

给出:

> head(df2)
            v1         v2         v3 dim1 dim2     v1.lag     v2.lag     v3.lag
A.1 -1.1510734  1.4767155 -0.1465017    A    J         NA         NA         NA
A.2 -1.6106827 -0.8539709 -1.2401876    A    J -1.1510734  1.4767155 -0.1465017
A.3 -1.2347028 -0.2619403  1.9383440    A    J -1.6106827 -0.8539709 -1.2401876
A.4 -0.5787404 -0.4460014  0.3260694    A    J -1.2347028 -0.2619403  1.9383440
A.5  0.1613907 -1.9580474 -0.7446782    A    J -0.5787404 -0.4460014  0.3260694
A.6 -1.0149703  0.3685003  1.5326401    A    J  0.1613907 -1.9580474 -0.7446782

答案 1 :(得分:2)

使用plyr包执行以下所有步骤:

library(plyr)
clag <- function(x, n=1)c(rep(NA, n), head(x, -n))
x <- ddply(dd, .(dim1), transform, 
               v1.lag=clag(v1), v2.lag=clag(v2), v3.lag=clag(v3))

head(x)
          v1         v2         v3 dim1 dim2     v1.lag     v2.lag     v3.lag
1  0.4465910 -0.2564334 -0.9122640    A    J         NA         NA         NA
2 -0.3748563 -0.9461061  0.1641274    A    J  0.4465910 -0.2564334 -0.9122640
3 -0.5010834 -0.4413026 -0.7509968    A    J -0.3748563 -0.9461061  0.1641274
4 -0.5278584 -0.6377017  0.5528831    A    J -0.5010834 -0.4413026 -0.7509968
5 -0.4290586  0.4687849  0.6885102    A    J -0.5278584 -0.6377017  0.5528831
6  0.1179935 -0.2742456 -0.1945482    A    J -0.4290586  0.4687849  0.6885102