我是R.的新手。我想将数据汇总为
Id Symbol
T1 MSFT
T2 MSFT
T3 AAPL
T4 GOOG
T5 AAPL
Date T1 T2 T3 T4 T5
12/3/2014 100 150 100 150 100
12/2/2014 200 200 300 250 330
12/1/2014 300 250 400 250 420
11/28/2014 200 650 150 500 150
我想要的输出是
Date MSFT AAPL GOOG
12/3/2014 100+150 100+100 150
12/2/2014 200+200 300+330 250
12/1/2014 300+250 400+420 250
11/28/2014 200+650 150+150 500
更新
df2 <- structure(list(Date = structure(c(6L, 5L, 4L, 3L, 2L, 1L), .Label = c("11/26/2014",
"11/27/2014", "11/28/2014", "12/1/2014", "12/2/2014", "12/3/2014"
), class = "factor"), T2 = c(482544, 475968, 470304, 468000,
466752, 460656), T1 = c(457878.7879, 455656.5657, 452424.2424,
447171.7172, 445252.5253, 439595.9596), T3 = c(8155612.90284,
8066709.67752, 7982838.70956, 7926924.73092, 7901763.441, 7799440.86012
)), .Names = c("Date", "T2", "T1", "T3"), row.names = c(NA, 6L
), class = "data.frame")
Date T2 T1 T3
1 12/3/2014 482544 457878.8 8155613
2 12/2/2014 475968 455656.6 8066710
3 12/1/2014 470304 452424.2 7982839
4 11/28/2014 468000 447171.7 7926925
5 11/27/2014 466752 445252.5 7901763
6 11/26/2014 460656 439596.0 7799441
df1 <- structure(list(Id = structure(1:3, .Label = c("T1", "T2",
"T3"), class = "factor"), Fund = structure(c(1L, 1L, 1L), .Label = "F1", class = "factor"),
Symbol = structure(c(3L, 1L, 2L), .Label = c("AAPL",
"GOOG", "MSFT"), class = "factor"), Quantity= c(5000L, 4800L,
7800L)), .Names = c("Id", "Fund", "Symbol",
"Quantity"), row.names = c(NA, 3L), class = "data.frame")
Id Fund Symbol Quantity
1 T1 F1 MSFT 5000
2 T2 F1 AAPL 4800
3 T3 F1 GOOG 7800
答案 0 :(得分:3)
你可以尝试
df3 <- df2[1]
lst <- lapply(split(df1$Id,df1$Symbol), function (x) rowSums(df2[x]))
df3[names(lst)] <- lst
df3
# Date AAPL GOOG MSFT
#1 12/3/2014 200 150 250
#2 12/2/2014 630 250 400
#3 12/1/2014 820 250 550
#4 11/28/2014 300 500 850
或者
lst <- split(df1$Id, df1$Symbol)
df3[names(lst)] <- Map(function(x,y) rowSums(x[y]),list(df2),lst)
基于更新的数据集
df3 <- df2[1]
lst <- split(as.character(df1$TaxlotId), df1$Symbol)
lst1 <- lapply(lst, function(x) rowSums(df2[x]))
df3[names(lst1)] <- lst1
df3
# Date AAPL GOOG MSFT
#1 12/3/2014 482544 8155613 457878.8
#2 12/2/2014 475968 8066710 455656.6
#3 12/1/2014 470304 7982839 452424.2
#4 11/28/2014 468000 7926925 447171.7
#5 11/27/2014 466752 7901763 445252.5
#6 11/26/2014 460656 7799441 439596.0
或者
df3[names(lst)] <- Map(function(x,y) rowSums(x[y]),list(df2),lst)
另一个选项(对于大数据集)是将long
形式更改为gather
,然后使用wide
重新转换为spread
(与@David相同) Arenburg的)
library(dplyr)
library(tidyr)
left_join(gather(df2, Var, Val, -Date), df1[,c('Id', 'Symbol')],
by=c('Var'='Id')) %>%
select(-Var) %>%
spread(Symbol, Val)
# Date AAPL GOOG MSFT
#1 11/26/2014 460656 7799441 439596.0
#2 11/27/2014 466752 7901763 445252.5
#3 11/28/2014 468000 7926925 447171.7
#4 12/1/2014 470304 7982839 452424.2
#5 12/2/2014 475968 8066710 455656.6
#6 12/3/2014 482544 8155613 457878.8
df1 <- structure(list(Id = c("T1", "T2", "T3", "T4", "T5"), Symbol =
c("MSFT", "MSFT", "AAPL", "GOOG", "AAPL")), .Names = c("Id", "Symbol"),
class = "data.frame", row.names = c(NA, -5L))
df2 <- structure(list(Date = c("12/3/2014", "12/2/2014", "12/1/2014",
"11/28/2014"), T1 = c(100L, 200L, 300L, 200L), T2 = c(150L, 200L,
250L, 650L), T3 = c(100L, 300L, 400L, 150L), T4 = c(150L, 250L,
250L, 500L), T5 = c(100L, 330L, 420L, 150L)), .Names = c("Date",
"T1", "T2", "T3", "T4", "T5"), class = "data.frame", row.names = c(NA,
-4L))
答案 1 :(得分:2)
这是一个使用data.table
包的选项,而#34;熔化&#34;和&#34;铸造&#34;数据集。这种方法对于大型连接应该非常有效,因为它不使用循环(df1
和df2
,如@akruns中所述)
library(data.table)
df3 <- setkey(melt(setDT(df2), "Date"), variable)
dcast.data.table(df3[df1], Date ~ Symbol, sum, value.var = "value")
# Date AAPL GOOG MSFT
# 1: 11/26/2014 460656 7799441 439596.0
# 2: 11/27/2014 466752 7901763 445252.5
# 3: 11/28/2014 468000 7926925 447171.7
# 4: 12/1/2014 470304 7982839 452424.2
# 5: 12/2/2014 475968 8066710 455656.6
# 6: 12/3/2014 482544 8155613 457878.8