一列big.data
(year.60
)按行描述所需值的列名称,如下所示:
big.data= data.frame(ID= c(1,2), Y.1990= c(100, 120),
Y.1991= c(NA, 125), Y.1992= c(115, 130), year.60= c("Y.1990",
"Y.1991"), Y.60= c(NA, NA) )
big.data$year.60 = as.character(big.data$year.60)
big.data
# ID Y.1990 Y.1991 Y.1992 year.60 Y.60
#1 1 100 NA 115 Y.1990 NA
#2 2 120 125 130 Y.1991 NA
Y.60
等于row1中的big.data $ Y.1990和row2中的big.data $ Y.1991等10,000行(即Y.60= year.60 by row)
?< / H2>
big.data$Y.60= big.data[[year.60]]
Error in (function(x, i, exact) if (is.matrix(i))
as.matrix(x)[[i]] else .subset2(x, :
object 'year.60' not found
我希望skinny.data$Y.60
在skinny.data$y.1970
skinny.data$Y.60[1,]
,skinny.data$y.1953
skinny.data$Y.60[2,]
,skinny.data$y.1963
skinny.data$Y.60[3,]
skinny.data$y.1993
,skinny.data$Y.60[4,]
1}}在str(skinny.data)
'data.frame': 42001 obs. of 39 variables:
$ Y.60 : chr NA NA NA NA ...
$ year.60 : chr "y.1970" "y.1953" "y.1963" "y.1993" ...
$ y.1968 : num 10006 19467 19467 19467 19467 ...
$ y.1969 : num NA 18994 18994 18994 18994 ...
$ y.1970 : num NA 23150 23150 23150 23150 ...
$ y.1971 : num NA 15041 15041 15041 25773 ...
$ y.1972 : num NA 17183 17183 NA 17183 ...
$ y.1973 : num NA 14354 14354 NA 14354 ...
$ y.1974 : num NA 6829 6829 NA 6829 ...
$ y.1975 : num NA 9444 9444 NA 9444 ...
$ y.1976 : num NA 3717 3717 NA 9294 ...
$ y.1977 : num NA 0 0 NA 2636 ...
$ y.1978 : num NA 0 0 NA 4125 ...
$ y.1979 : num NA 3394 3394 NA 12577 ...
$ y.1980 : num NA 0 0 NA 4821 ...
$ y.1981 : num NA 0 0 NA 7257 ...
$ y.1982 : num NA 4778 4778 NA 8009 ...
# ...
# etc.
skinny.data.mini= skinny.data[1:5, ]
## mapply
skinny.data.mini$Y.60 <-
mapply(getElement, name=skinny.data.mini$year.60,
data.frame(t(skinny.data.mini)))
# Error in `[[.default`(object, name, exact = TRUE) :
# subscript out of bounds
## quick vectorized approach
skinny.data.mini$Y.60 <-
skinny.data[cbind(seq_len(nrow(skinny.data.mini)),
match(skinny.data.mini$year.60, names(skinny.data.mini)))]
describe(skinny.data.mini$Y.60)
# Error in x - mx : non-numeric argument to binary operator
# In addition: Warning messages:
# 1: In mean.default(x, na.rm = na.rm) :
# argument is not numeric or logical: returning NA
# 2: In mean.default(x, na.rm = na.rm, trim = trim) :
# argument is not numeric or logical: returning NA
# 3: In mean.default(x) : argument is not numeric or logical: returning NA
## getval
getval <- function(byrow)
skinny.data.mini[, match(skinny.data.mini$year.60[byrow],
names(skinny.data.mini))][byrow]
skinny.data.mini$Y.60 <- sapply(1:nrow(skinny.data.mini), getval)
# Show Traceback
# Rerun with Debug
# Error in `[.data.frame`(skinny.data.mini, ,
# match(skinny.data.mini$forty.fam.head.laby.year[byrow], :
# undefined columns selected
等等。但即使我将此限制为5行,以下所有方法都会失败。
{{1}}
答案 0 :(得分:1)
这对你有用吗?
getval <- function(byrow)
big.data[byrow, match(big.data$year.60[byrow], names(big.data))]
big.data$Y.60 <- sapply(1:nrow(big.data), getval)
或者,如果你真的想这么做:
big.data$Y.60 <- mapply(getElement, name=big.data$year.60, data.frame(t(big.data)))
答案 1 :(得分:1)
这是一种快速矢量化方法
big.data$Y.60 <- big.data[cbind(seq_len(nrow(big.data)),
match(big.data$year.60, names(big.data)))]
big.data
# ID Y.1990 Y.1991 Y.1992 year.60 Y.60
# 1 1 100 NA 115 Y.1990 100
# 2 2 120 125 130 Y.1991 125