我见过一些关于将csv文件合并到一个数据框中的questions。如果数据框已在工作区中,该怎么办?我有五个广角动物园,我作为数据框投射,然后融化。这是一个头:
> head(df.mon.ssf.ret)
date variable value
1 2009.000 AA1C NA
2 2009.083 AA1C NA
3 2009.167 AA1C NA
4 2009.250 AA1C NA
5 2009.333 AA1C NA
6 2009.417 AA1C NA
我可以将“date”和“variable”中的这些与一系列嵌套合并合并,但这看起来很笨拙。是否有更程序化的合并方式?
如果我确信所有动物园中的列的顺序相同,我是否可以确信融合维持该排序并使用cbind
?谢谢!
更新
我对熔化的使用理念缺少一些东西。这是当我合并为一个动物园并使用三个动物园融合成一个非常宽的数据框架时会发生的事情:
> temp <- merge(z.ssf.oi, z.ssf.oig, z.ssf.ret)
> class(temp)
[1] "zoo"
> temp2 <- cbind(index(temp), as.data.frame(temp))
> class(temp2)
[1] "data.frame"
> names(temp2)[1] <- "date"
> dim(temp2)
[1] 12 1204
> temp3 <- melt(temp2, id="date")
Error in data.frame(ids, variable, value) :
arguments imply differing number of rows: 12, 14436
> head(temp2)[, 1:5]
date AA1C.z.ssf.oi AAPL1C.z.ssf.oi ABT1C.z.ssf.oi ABX1C.z.ssf.oi
Jan 2009 Jan 2009 1895.800 49191.25 NA NA
Feb 2009 Feb 2009 1415.579 42650.26 NA 6267.96
Mar 2009 Mar 2009 1501.398 36712.20 NA 11581.65
Apr 2009 Apr 2009 1752.936 74376.27 NA 12168.29
May 2009 May 2009 1942.874 96307.30 NA 13490.60
Jun 2009 Jun 2009 NA 79170.70 NA 16337.21
更新2:感谢您的帮助!这是一个非常手动的解决方案
> A <- cbind(index(z.ssf.oi), as.data.frame(z.ssf.oi))
> names(A)[1] <- "date"
> B <- cbind(index(z.ssf.oig), as.data.frame(z.ssf.oig))
> names(B)[1] <- "date"
> C <- cbind(index(z.ssf.ret), as.data.frame(z.ssf.ret))
> names(C)[1] <- "date"
> A.melt <- melt(A, id="date")
> head(A.melt)
date variable value
1 Jan 2009 A1C NA
2 Feb 2009 A1C NA
3 Mar 2009 A1C NA
4 Apr 2009 A1C NA
5 May 2009 A1C NA
6 Jun 2009 A1C NA
> B.melt <- melt(B, id="date")
> C.melt <- melt(C, id="date")
> ans <- merge(merge(A.melt, B.melt, by=c("date", "variable")), C.melt, by=c("date", "variable"))
> names(ans)[3:5] <- c("oi", "oig", "ret")
> head(ans)
date variable oi oig ret
1 Apr 2009 A1C NA NA NA
2 Apr 2009 AA1C NA NA NA
3 Apr 2009 AAPL1C 59316.88 0.3375786 0.008600073
4 Apr 2009 ABB1C NA NA NA
5 Apr 2009 ABT1C NA NA NA
6 Apr 2009 ABX1C NA NA NA
(并且NA来自家中不完整的数据集,需要从我的数据库中过滤拨号)
更新3:这是一些dputs(我拿了每个宽动物园的[1:10,1:10]子集并转换为数据框)
> dput(A)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "oi"), date = structure(c(2009,
2009.08333333333, 2009.16666666667, 2009.25, 2009.33333333333,
2009.41666666667, 2009.5, 2009.58333333333, 2009.66666666667,
2009.75), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(49226.391, 42662.1589473684, 35354.4254545455,
57161.6495238095, 84362.895, NA, NA, 47011.8519047619, 57852.2171428571,
33058.0090909091), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
ABX1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACE1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), ACI1C = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACS1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("group", "date",
"AA1C", "AAPL1C", "ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C",
"ADBE1C", "ADCT1C", "ADI1C"), row.names = c("Jan 2009", "Feb 2009",
"Mar 2009", "Apr 2009", "May 2009", "Jun 2009", "Jul 2009", "Aug 2009",
"Sep 2009", "Oct 2009"), class = "data.frame")
> dput(B)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "oig"), date = structure(c(2009.08333333333,
2009.16666666667, 2009.25, 2009.33333333333, 2009.41666666667,
2009.5, 2009.58333333333, 2009.66666666667, 2009.75, 2009.83333333333
), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(-0.143117562125788, -0.187888745830302, 0.480459636485712,
0.389244461579155, NA, NA, NA, 0.207492040517069, -0.559627909130612,
NA), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ABX1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ACI1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACS1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_)), .Names = c("group", "date", "AA1C", "AAPL1C",
"ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C", "ADBE1C", "ADCT1C",
"ADI1C"), row.names = c("Feb 2009", "Mar 2009", "Apr 2009", "May 2009",
"Jun 2009", "Jul 2009", "Aug 2009", "Sep 2009", "Oct 2009", "Nov 2009"
), class = "data.frame")
> dput(C)
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class = "factor", .Label = "ret"), date = structure(c(2009,
2009.08333333333, 2009.16666666667, 2009.25, 2009.33333333333,
2009.41666666667, 2009.5, 2009.58333333333, 2009.66666666667,
2009.75), class = "yearmon"), AA1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), AAPL1C = c(-0.143117562125788, -0.187888745830302, 0.480459636485712,
0.389244461579155, NA, NA, NA, 0.207492040517069, -0.559627909130612,
NA), ABT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ABX1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ACE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ACI1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ACS1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ADBE1C = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), ADCT1C = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ADI1C = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_)), .Names = c("group", "date", "AA1C", "AAPL1C",
"ABT1C", "ABX1C", "ACE1C", "ACI1C", "ACS1C", "ADBE1C", "ADCT1C",
"ADI1C"), row.names = c("Feb 2009", "Mar 2009", "Apr 2009", "May 2009",
"Jun 2009", "Jul 2009", "Aug 2009", "Sep 2009", "Oct 2009", "Nov 2009"
), class = "data.frame")
答案 0 :(得分:6)
你可以试试这个。未经测试,因为您的示例不可重复。如果您想要更好的答案,请为z.sfff.oi,z.sff.oig和z.sff.ret提供一些虚拟数据。您可以使用dput()为可重现的数据集生成代码。
A <- data.frame(Group = "oi", date = as.factor(index(z.ssf.oi),) as.data.frame(z.ssf.oi)))
B <- data.frame(Group = "oig", date = as.factor(index(z.ssf.oig)), as.data.frame(z.ssf.oig)))
C <- data.frame(Group = "ret", date = as.factor(index(z.ssf.ret)), as.data.frame(z.ssf.ret)))
Long <- melt(rbind(A, B, C), id.vars = c("Group", "date")))
cast(date ~ Group, data = Long)