以我的第一个问题为出发点: Split data frame into multiple data frames based on information in a xts object
现在我遇到了一个新问题。
假设您的data.frames d1&中有双重条目。 d2 - 即你在d1中有一个字母“h”在grp B& A因此也是d2中“a”的两个时间序列。我们怎样解决这个问题?
d1 <- data.frame(grp=sample(LETTERS[1:4], 11, replace=TRUE),
name=letters[c(8,1:10)])
> d1
grp name
1 B h
2 D a
3 B b
4 D c
5 B d
6 C e
7 A f
8 A g
9 A h
10 B i
11 C j
d2 <- matrix(round(runif(55), 2), ncol=11)
colnames(d2) <- letters[c(8,1:10)]
library(xts)
d2 <- xts(d2, seq.Date(as.Date('01-01-2011', '%d-%m-%Y'),
as.Date('5-01-2011', '%d-%m-%Y'), 1))
> d2
h a b c d e f g h i j
2011-01-01 0.04 0.77 0.49 0.87 0.23 0.95 0.69 0.35 0.14 0.47 0.25
2011-01-02 0.73 0.46 0.28 0.86 0.75 0.08 0.00 0.89 0.50 0.12 0.54
2011-01-03 0.36 0.61 0.92 0.80 0.12 0.25 0.18 0.44 0.73 0.19 0.30
2011-01-04 0.18 0.65 0.68 0.44 0.54 0.84 0.13 0.64 0.54 0.81 0.73
2011-01-05 0.58 0.55 0.10 0.33 0.55 0.23 0.82 0.21 0.58 0.24 0.04
这不起作用:
out <- setNames(sapply(unique(d1$grp), function(x) {
d2[, which(d1$grp[match(colnames(d2), d1$name)] == x)]
}), unique(d1$grp))
out
$B
h b d h i
2011-01-01 0.04 0.49 0.23 0.14 0.47
2011-01-02 0.73 0.28 0.75 0.50 0.12
2011-01-03 0.36 0.92 0.12 0.73 0.19
2011-01-04 0.18 0.68 0.54 0.54 0.81
2011-01-05 0.58 0.10 0.55 0.58 0.24
$D
a c
2011-01-01 0.77 0.87
2011-01-02 0.46 0.86
2011-01-03 0.61 0.80
2011-01-04 0.65 0.44
2011-01-05 0.55 0.33
$C
e j
2011-01-01 0.95 0.25
2011-01-02 0.08 0.54
2011-01-03 0.25 0.30
2011-01-04 0.84 0.73
2011-01-05 0.23 0.04
$A
f g
2011-01-01 0.69 0.35
2011-01-02 0.00 0.89
2011-01-03 0.18 0.44
2011-01-04 0.13 0.64
2011-01-05 0.82 0.21
预期输出应为:
out
$B
h b d i
2011-01-01 0.04 0.49 0.23 0.47
2011-01-02 0.73 0.28 0.75 0.12
2011-01-03 0.36 0.92 0.12 0.19
2011-01-04 0.18 0.68 0.54 0.81
2011-01-05 0.58 0.10 0.55 0.24
$D
a c
2011-01-01 0.77 0.87
2011-01-02 0.46 0.86
2011-01-03 0.61 0.80
2011-01-04 0.65 0.44
2011-01-05 0.55 0.33
$C
e j
2011-01-01 0.95 0.25
2011-01-02 0.08 0.54
2011-01-03 0.25 0.30
2011-01-04 0.84 0.73
2011-01-05 0.23 0.04
$A
f g h
2011-01-01 0.69 0.35 0.14
2011-01-02 0.00 0.89 0.50
2011-01-03 0.18 0.44 0.73
2011-01-04 0.13 0.64 0.54
2011-01-05 0.82 0.21 0.58
非常感谢帮助!
提前谢谢你......
答案 0 :(得分:0)
你可以这样做:
name1 <- with(d1,paste0(name,"#", ave(name, name,
FUN=seq_along)))
colnames(d2) <- name1 #assuming that it is in the same order as in the example
res <- lapply(split(name1, d1$grp), function(x) {
x1 <- d2[, as.character(x), drop=FALSE]
colnames(x1) <- gsub("\\#.*$", '', colnames(x1))
x1})
res1 <- res[match(unique(d1$grp), names(res))]
res1
# $B
# h b d i
#2011-01-01 0.04 0.49 0.23 0.47
#2011-01-02 0.73 0.28 0.75 0.12
#2011-01-03 0.36 0.92 0.12 0.19
#2011-01-04 0.18 0.68 0.54 0.81
#2011-01-05 0.58 0.10 0.55 0.24
# $D
# a c
#2011-01-01 0.77 0.87
#2011-01-02 0.46 0.86
#2011-01-03 0.61 0.80
#2011-01-04 0.65 0.44
#2011-01-05 0.55 0.33
# $C
# e j
#2011-01-01 0.95 0.25
#2011-01-02 0.08 0.54
#2011-01-03 0.25 0.30
#2011-01-04 0.84 0.73
#2011-01-05 0.23 0.04
# $A
# f g h
#2011-01-01 0.69 0.35 0.14
#2011-01-02 0.00 0.89 0.50
#2011-01-03 0.18 0.44 0.73
#2011-01-04 0.13 0.64 0.54
#2011-01-05 0.82 0.21 0.58
d1 <- structure(list(grp = c("B", "D", "B", "D", "B", "C", "A", "A",
"A", "B", "C"), name = c("h", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j")), .Names = c("grp", "name"), class = "data.frame", row.names =
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))
d2 <- structure(c(0.04, 0.73, 0.36, 0.18, 0.58, 0.77, 0.46, 0.61, 0.65,
0.55, 0.49, 0.28, 0.92, 0.68, 0.1, 0.87, 0.86, 0.8, 0.44, 0.33,
0.23, 0.75, 0.12, 0.54, 0.55, 0.95, 0.08, 0.25, 0.84, 0.23, 0.69,
0, 0.18, 0.13, 0.82, 0.35, 0.89, 0.44, 0.64, 0.21, 0.14, 0.5,
0.73, 0.54, 0.58, 0.47, 0.12, 0.19, 0.81, 0.24, 0.25, 0.54, 0.3,
0.73, 0.04), .Dim = c(5L, 11L), .Dimnames = list(NULL, c("h",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j")), index =
structure(c(1293840000, 1293926400, 1294012800, 1294099200, 1294185600),
tzone = "UTC", tclass = "Date"), .indexCLASS = "Date", tclass = "Date",
.indexTZ = "UTC", tzone = "UTC", class = c("xts", "zoo"))