根据xts对象第2部分中的信息将数据帧拆分为多个数据帧

时间:2014-11-12 13:45:58

标签: r split dataframe xts

以我的第一个问题为出发点: Split data frame into multiple data frames based on information in a xts object

现在我遇到了一个新问题。

假设您的data.frames d1&中有双重条目。 d2 - 即你在d1中有一个字母“h”在grp B& A因此也是d2中“a”的两个时间序列。我们怎样解决这个问题?

d1 <- data.frame(grp=sample(LETTERS[1:4], 11, replace=TRUE),
                 name=letters[c(8,1:10)])
> d1
grp name
1    B    h
2    D    a
3    B    b
4    D    c
5    B    d
6    C    e
7    A    f
8    A    g
9    A    h
10   B    i
11   C    j

d2 <- matrix(round(runif(55), 2), ncol=11)
colnames(d2) <- letters[c(8,1:10)]
library(xts)
d2 <- xts(d2, seq.Date(as.Date('01-01-2011', '%d-%m-%Y'), 
                       as.Date('5-01-2011', '%d-%m-%Y'), 1))

> d2
              h    a    b    c    d    e    f    g    h    i    j
2011-01-01 0.04 0.77 0.49 0.87 0.23 0.95 0.69 0.35 0.14 0.47 0.25
2011-01-02 0.73 0.46 0.28 0.86 0.75 0.08 0.00 0.89 0.50 0.12 0.54
2011-01-03 0.36 0.61 0.92 0.80 0.12 0.25 0.18 0.44 0.73 0.19 0.30
2011-01-04 0.18 0.65 0.68 0.44 0.54 0.84 0.13 0.64 0.54 0.81 0.73
2011-01-05 0.58 0.55 0.10 0.33 0.55 0.23 0.82 0.21 0.58 0.24 0.04

这不起作用:

out <- setNames(sapply(unique(d1$grp), function(x) {
  d2[, which(d1$grp[match(colnames(d2), d1$name)] == x)]
}), unique(d1$grp))

out

$B
              h    b    d    h    i
2011-01-01 0.04 0.49 0.23 0.14 0.47
2011-01-02 0.73 0.28 0.75 0.50 0.12
2011-01-03 0.36 0.92 0.12 0.73 0.19
2011-01-04 0.18 0.68 0.54 0.54 0.81
2011-01-05 0.58 0.10 0.55 0.58 0.24

$D
              a    c
2011-01-01 0.77 0.87
2011-01-02 0.46 0.86
2011-01-03 0.61 0.80
2011-01-04 0.65 0.44
2011-01-05 0.55 0.33

$C
              e    j
2011-01-01 0.95 0.25
2011-01-02 0.08 0.54
2011-01-03 0.25 0.30
2011-01-04 0.84 0.73
2011-01-05 0.23 0.04

$A
              f    g
2011-01-01 0.69 0.35
2011-01-02 0.00 0.89
2011-01-03 0.18 0.44
2011-01-04 0.13 0.64
2011-01-05 0.82 0.21

预期输出应为:

out

$B
              h    b    d    i
2011-01-01 0.04 0.49 0.23 0.47
2011-01-02 0.73 0.28 0.75 0.12
2011-01-03 0.36 0.92 0.12 0.19
2011-01-04 0.18 0.68 0.54 0.81
2011-01-05 0.58 0.10 0.55 0.24

$D
              a    c
2011-01-01 0.77 0.87
2011-01-02 0.46 0.86
2011-01-03 0.61 0.80
2011-01-04 0.65 0.44
2011-01-05 0.55 0.33

$C
              e    j
2011-01-01 0.95 0.25
2011-01-02 0.08 0.54
2011-01-03 0.25 0.30
2011-01-04 0.84 0.73
2011-01-05 0.23 0.04

$A
              f    g    h
2011-01-01 0.69 0.35 0.14
2011-01-02 0.00 0.89 0.50
2011-01-03 0.18 0.44 0.73
2011-01-04 0.13 0.64 0.54
2011-01-05 0.82 0.21 0.58

非常感谢帮助!

提前谢谢你......

1 个答案:

答案 0 :(得分:0)

你可以这样做:

  name1 <- with(d1,paste0(name,"#", ave(name, name,
                                    FUN=seq_along)))
  colnames(d2) <- name1 #assuming that it is in the same order as in the example

  res <- lapply(split(name1, d1$grp), function(x) {
                 x1 <- d2[, as.character(x), drop=FALSE]
                 colnames(x1) <- gsub("\\#.*$", '', colnames(x1))
                 x1})

  res1 <- res[match(unique(d1$grp), names(res))]

  res1
  # $B
  #            h    b    d    i
  #2011-01-01 0.04 0.49 0.23 0.47
  #2011-01-02 0.73 0.28 0.75 0.12
  #2011-01-03 0.36 0.92 0.12 0.19
  #2011-01-04 0.18 0.68 0.54 0.81
  #2011-01-05 0.58 0.10 0.55 0.24

  # $D
  #            a    c
  #2011-01-01 0.77 0.87
  #2011-01-02 0.46 0.86
  #2011-01-03 0.61 0.80
  #2011-01-04 0.65 0.44
  #2011-01-05 0.55 0.33

  # $C
  #            e    j
  #2011-01-01 0.95 0.25
  #2011-01-02 0.08 0.54
  #2011-01-03 0.25 0.30
  #2011-01-04 0.84 0.73
  #2011-01-05 0.23 0.04

  # $A
  #            f    g    h
  #2011-01-01 0.69 0.35 0.14
  #2011-01-02 0.00 0.89 0.50
  #2011-01-03 0.18 0.44 0.73
  #2011-01-04 0.13 0.64 0.54
  #2011-01-05 0.82 0.21 0.58

数据

  d1  <- structure(list(grp = c("B", "D", "B", "D", "B", "C", "A", "A", 
  "A", "B", "C"), name = c("h", "a", "b", "c", "d", "e", "f", "g", 
  "h", "i", "j")), .Names = c("grp", "name"), class = "data.frame", row.names = 
  c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))


  d2 <- structure(c(0.04, 0.73, 0.36, 0.18, 0.58, 0.77, 0.46, 0.61, 0.65, 
  0.55, 0.49, 0.28, 0.92, 0.68, 0.1, 0.87, 0.86, 0.8, 0.44, 0.33, 
  0.23, 0.75, 0.12, 0.54, 0.55, 0.95, 0.08, 0.25, 0.84, 0.23, 0.69, 
  0, 0.18, 0.13, 0.82, 0.35, 0.89, 0.44, 0.64, 0.21, 0.14, 0.5, 
  0.73, 0.54, 0.58, 0.47, 0.12, 0.19, 0.81, 0.24, 0.25, 0.54, 0.3, 
  0.73, 0.04), .Dim = c(5L, 11L), .Dimnames = list(NULL, c("h", 
  "a", "b", "c", "d", "e", "f", "g", "h", "i", "j")), index = 
  structure(c(1293840000, 1293926400, 1294012800, 1294099200, 1294185600), 
  tzone = "UTC", tclass = "Date"), .indexCLASS = "Date", tclass = "Date",
   .indexTZ = "UTC", tzone = "UTC", class = c("xts", "zoo"))