Question

我的数据具有以下形式：

Individual Year    X2011Int    X2010Int    X2009Int   X2008Int   X2007Int
  1     2011    10.36703    18.07049    4789.338   51.3443    423.235
  2     2010    10.36703    18.07049    4789.338   51.3443    423.235
  3     2009    10.36703    18.07049    4789.338   51.3443    423.235
  4     2008    10.36703    18.07049    4789.338   51.3443    423.235

其中X2009Int，X2008Int，X2007Int列表示在指定时间段内特定事件的严重性。我的目标是创建一个函数，我可以循环遍历列以创建以下内容：

Individual   Year            T        T-1           T-2
1             2011     =X2011Int   =X2010Int    =X2009Int
2             2010     =X2010Int   =X2009Int    =X2008Int
3             2009     =X2009Int   =X2008Int    =X2007Int
4             2008     =X2008Int   =X2007Int    =X2006Int

为了进一步说明2009年个人3的情况。在$ T栏下我需要X2009Int中的事件强度值，$ T-1列=来自X2008Int的值，$ T-2 = X2007Int，$ T -3 = X2006Int等（年份范围从2012-1980）

关键点在我的循环中：

for(i in unique(d1$Year)){
print(i)
d1$T[d1$Year == i]     <- TOY2[TOY2$Year == i,colnames(TOY2) == i] #placed  the i value in the T column. 
d1$T.1[d1$Year == (i-1)] <- TOY2[TOY2$Year == (i-1),colnames(TOY2) == (i-1)]  
d1$T.2[d1$Year == i-2] <- TOY2[TOY2$Year == i-2,colnames(TOY2) == i-2] 
d1$T.3[d1$Year == i-3] <- TOY2[TOY2$Year == i-3,colnames(TOY2) == i-3] 

}

第一次迭代（$T）列工作正常但后来没有按预期执行（即，我的原始数据帧中的错误值被粘贴到列中）

第二个问题是我是否能够使用lapply执行相同的任务，如果是这样的话？

非常感谢任何帮助！

编辑：＆gt; dput(head(TOY2))

structure(list(Individual = 1201:1206, Wave = c(6L, 6L, 6L, 6L, 
6L, 6L), Country = c(32L, 32L, 32L, 32L, 32L, 32L), Year = c(2011, 
2011, 2011, 2011, 2011, 2011), `2000` = c(45.19665424, 45.19665424, 
45.19665424, 45.19665424, 45.19665424, 45.19665424), `2001` = c(176.2932337, 
176.2932337, 176.2932337, 176.2932337, 176.2932337, 176.2932337
), `2002` = c(9.601447666, 9.601447666, 9.601447666, 9.601447666, 
9.601447666, 9.601447666), `2003` = c(259.2992188, 259.2992188, 
259.2992188, 259.2992188, 259.2992188, 259.2992188), `2004` = c(4.357976722, 
4.357976722, 4.357976722, 4.357976722, 4.357976722, 4.357976722
), `2005` = c(1.955436508, 1.955436508, 1.955436508, 1.955436508, 
1.955436508, 1.955436508), `2006` = c(1.865651073, 1.865651073, 
1.865651073, 1.865651073, 1.865651073, 1.865651073), `2007` = c(61.65472296, 
61.65472296, 61.65472296, 61.65472296, 61.65472296, 61.65472296
), `2008` = c(34.62974414, 34.62974414, 34.62974414, 34.62974414, 
34.62974414, 34.62974414), `2009` = c(32.96903414, 32.96903414, 
32.96903414, 32.96903414, 32.96903414, 32.96903414), `2010` = c(6.761739867, 
6.761739867, 6.761739867, 6.761739867, 6.761739867, 6.761739867
), `2011` = c(0, 0, 0, 0, 0, 0), `2012` = c(12.05299366, 12.05299366, 
12.05299366, 12.05299366, 12.05299366, 12.05299366)), .Names = c("Individual", 
"Wave", "Country", "Year", "2000", "2001", "2002", "2003", "2004", 
"2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012"
 ), row.names = 1201:1206, class = "data.frame")

预期数据框d1的第一行应如下所示：

   Individual Wave Country  Year  T    T.1       T.2        T.3
      1201       6    32    2011  0  6.76174   32.96903   34.62974

T值对应原始$2011 df中的TOY2值。 T-1值对应$2010中的TOY2值 T-2值对应$2009中的TOY2值等。

Answer 1

如果我理解你的要求，下面的黑客攻击（使用.ts和dplyr）应该有效。我更改了tidyr的示例输入，以便为每个d1使用不同的值。我相信这表明你想要的更好（假设我正确地解释了你的问题）。

注意：这为OP尝试执行的操作提供了另一种方法，并假设不需要Year解决方案。

lapply

可能有一种更简单的方法，但这似乎有效。

Answer 2

考虑reshape使用aggregate使用融合（从长到长）。下面使用您的dput(head(TOY2))：

library(reshape2)

mdf <- melt(TOY2, id.vars=c("Individual", "Wave", "Country", "Year"))
mdf$variable <- as.numeric(as.character(mdf$variable))  # CONVERT TO NUMERIC

# CREATE T COLUMNS
for(i in 0:11){
  mdf[paste0('T-', i)] <- ifelse((mdf$Year - mdf$variable == i), mdf$value, 0)
}

mdf$variable <- NULL    # REMOVE MELT COLS
mdf$value <- NULL       # REMOVE MELT COLS
aggdf <- aggregate(. ~ Individual + Wave + Country + Year, mdf, FUN=max)

#   Individual Wave Country Year T-0     T-1      T-2      T-3      T-4      T-5
# 1       1201    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
# 2       1202    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
# 3       1203    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
# 4       1204    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
# 5       1205    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
# 6       1206    6      32 2011   0 6.76174 32.96903 34.62974 61.65472 1.865651
#        T-6      T-7      T-8      T-9     T-10     T-11
# 1 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665
# 2 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665
# 3 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665
# 4 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665
# 5 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665
# 6 1.955437 4.357977 259.2992 9.601448 176.2932 45.19665

Answer 3

这是矩阵子集的一个很好的例子。

col_index <- match(toy$year, names(toy))
toy$T_0 <- toy[cbind(1:nrow(toy), col_index - 0)]

现在我们可以把它放在一个函数

中

val_find <- function(ind) {
  col_index <- match(toy$year, names(toy))
  toy[cbind(1:nrow(toy), col_index - ind)]
}

toy[,paste0("T_", 0:8)] <- sapply(0:8, val_find)

示例数据

id <- 1:12
wave <- 6
country <- gl(3, 4, labels=LETTERS[1:3])
year <- rep(c(2011,2012,2010), each=4)
dates <- setNames(as.data.frame(matrix(1:144, 12, 12)), as.character(2001:2012))
toy <- cbind(id, wave, country, year, dates)

#Try function
toy[,paste0("T_", 0:8)] <- sapply(0:8, val_find)
toy
# id wave country year 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 T_0 T_1 T_2 T_3 T_4 T_5 T_6 T_7
#  1   1    6       A 2011    1   13   25   37   49   61   73   85   97  109  121  133 121 109  97  85  73  61  49  37
#  2   2    6       A 2011    2   14   26   38   50   62   74   86   98  110  122  134 122 110  98  86  74  62  50  38
#  3   3    6       A 2011    3   15   27   39   51   63   75   87   99  111  123  135 123 111  99  87  75  63  51  39
#  4   4    6       A 2011    4   16   28   40   52   64   76   88  100  112  124  136 124 112 100  88  76  64  52  40
#  5   5    6       B 2012    5   17   29   41   53   65   77   89  101  113  125  137 137 125 113 101  89  77  65  53
#  6   6    6       B 2012    6   18   30   42   54   66   78   90  102  114  126  138 138 126 114 102  90  78  66  54
#  7   7    6       B 2012    7   19   31   43   55   67   79   91  103  115  127  139 139 127 115 103  91  79  67  55
#  8   8    6       B 2012    8   20   32   44   56   68   80   92  104  116  128  140 140 128 116 104  92  80  68  56
#  9   9    6       C 2010    9   21   33   45   57   69   81   93  105  117  129  141 117 105  93  81  69  57  45  33
#  10 10    6       C 2010   10   22   34   46   58   70   82   94  106  118  130  142 118 106  94  82  70  58  46  34
#  11 11    6       C 2010   11   23   35   47   59   71   83   95  107  119  131  143 119 107  95  83  71  59  47  35
#  12 12    6       C 2010   12   24   36   48   60   72   84   96  108  120  132  144 120 108  96  84  72  60  48  36

从数据框中选择每行的前N列值

3 个答案:

示例数据