我有一个不同长度的数据框列表(df),按年份编制索引,以便数据的代理如下:
df
$df1
X..i..
1999 10
1998 13
1997 14
$df2
X..i..
1999 20
1998 11
$df3
X..i..
1999 17
1998 8
1997 9
1996 19
我想使用并保留索引/ rownames将这些数据帧组合到单个数据框
那样:
df_all
Index df1 df2 df3
1999 10 20 17
1998 13 11 8
1997 14 n/a 9
1996 n/a n/a 19
编辑:
smalldflist <- lapply(bai_df, function(i) head(i, 10))
dput(smalldflist)
产生以下输出:
结构(列表(IN_DonaldsonWoods_QUAL.txt =结构(列表(X..i .. = c)(4.5528243479162, 32.6474339976978,52.7116018957456,170.932582874866,227.0430440174, 191.462399206825,226.94053541991,274.854835798233,336.457600434571, 409.132933511232)),。Name =“X..i ..”,row.names = c(“1725”, “1726”,“1727”,“1728”,“1729”,“1730”,“1731”,“1732”,“1733”, “1734”),class =“data.frame”),IN_DonaldsonWoods_QURU.txt = structure(list(list) X..i .. = c(4.33729067152776,5.72878688080428,13.0247658962315, 22.0205798005054,25.9885943197615,18.9273551074104,43.5197887382031, 58.2775710248884,72.9225976242458,73.0466756114972)),。Name =“X..i ..”,row.names = c(“1827”, “1828”,“1829”,“1830”,“1831”,“1832”,“1833”,“1834”,“1835”, “1836”),class =“data.frame”),IN_DonaldsonWoods_QUVE.txt = structure(list( X..i .. = c(7.87253273859391,18.9481296742303,42.5055176960097, 62.9980951594496,88.906442207264,74.2523230533691,106.911242713809, 152.445167763284,192.399603839633,221.263660216113)),。Name =“X..i ..”,row.names = c(“1731”, “1732”,“1733”,“1734”,“1735”,“1736”,“1737”,“1738”,“1739”, “1740”),class =“data.frame”),IN_LillyDickey_QUAL.txt = structure(list( X..i .. = c(8.29576810088555,17.2934968058816,31.2091720401804, 33.8966066349882,47.6496887415004,32.9921546763907,82.2281435044324, 108.068226885475,103.894002151431,110.255812097949)),. Name =“X..i ..”,row.names = c(“1863”, “1864”,“1865”,“1866”,“1867”,“1868”,“1869”,“1870”,“1871”, “1872”),class =“data.frame”),IN_LillyDickey_QUMO.txt = structure(list( X..i .. = c(3.42413493048312,8.0847630303073,19.6833503197648, 13.791136218324,21.4638165402601,30.6707376168741,30.8789937938806, 26.8661212585221,24.0732956549621,29.7872997715364)),。Name =“X..i ..”,row.names = c(“1867”, “1868”,“1869”,“1870”,“1871”,“1872”,“1873”,“1874”,“1875”, “1876”),class =“data.frame”),IN_Pioneers_QUAL.txt = structure(list( X..i .. = c(9.14340435634345,23.5108626053757,33.8507393822465, 46.1027716604662,57.5247983011993,50.5892015892391,92.2448163706925, 225.832932372368,278.367628044195,193.931508820174)),。Name =“X..i ..”,row.names = c(“1817”, “1818”,“1819”,“1820”,“1821”,“1822”,“1823”,“1824”,“1825”, “1826”),class =“data.frame”),IN_Pioneers_QURU.txt = structure(list( X..i .. = c(122.443727611702,658.649900930018,830.471777578934, 843.357139228152,1725.6495913006,1244.38668477703,973.00892131628, 1294.7441782001,1717.18570086886,1676.63841798444)),。Name =“X..i ..”,row.names = c(“1861”, “1862”,“1863”,“1864”,“1865”,“1866”,“1867”,“1868”,“1869”, “1870”),class =“data.frame”),OH_JohnsonWoods_QUAL.txt = structure(list( X..i .. = c(1.9113449704439,3.39794661412248,5.32688450342693, 6.41921626908008,11.0307601252838,13.0825342873437,14.843680070585, 16.885746353779,20.1011664347289,19.853294774361)),。Name =“X..i ..”,row.names = c(“1626”, “1627”,“1628”,“1629”,“1630”,“1631”,“1632”,“1633”,“1634”, “1635”),class =“data.frame”)),. Name = c(“IN_DonaldsonWoods_QUAL.txt”, “IN_DonaldsonWoods_QURU.txt”,“IN_DonaldsonWoods_QUVE.txt”,“IN_LillyDickey_QUAL.txt”, “IN_LillyDickey_QUMO.txt”,“IN_Pioneers_QUAL.txt”,“IN_Pioneers_QURU.txt”, “OH_JohnsonWoods_QUAL.txt”))
答案 0 :(得分:1)
您可以使用Reduce
到merge
多个数据框。设置all = TRUE
,在没有匹配时添加NAs。注意df是您设置的数据框列表,by
表示用于合并的列。因此,在您的数据框列表中,&#34;索引&#34;应该是每个数据框中年份列的名称。
Reduce(function(...) merge(..., by="Index", all=TRUE), df)
感谢@jazzuro提供样本数据,以下是基础R中使用Reduce
的等效解决方案。在此示例中设置用于合并by="year"
的列:
df1 <- data.frame(year = c(1999, 1998, 1997),
value = c(10, 13, 14))
df2 <- data.frame(year = c(1999, 1998),
value = c(20, 11))
df3 <- data.frame(year = c(1999, 1998, 1997, 1996),
value = c(17, 8, 9, 19))
df <- list(df1=df1, df2=df2, df3=df3)
df_merge <- Reduce(function(...) merge(..., by="year", all=TRUE), df)
colnames(df_merge) <- c("Index", names(df))
# Index df1 df2 df3
# 1 1996 NA NA 19
# 2 1997 14 NA 9
# 3 1998 13 11 8
# 4 1999 10 20 17
答案 1 :(得分:1)
如果您拥有仅在全球环境中需要的数据,则可以尝试以下操作。首先,您在所有数据框中收集独特年份并创建主数据框,其中仅包含唯一年份。然后,将所有数据框放在一个列表中,并将每个数据框与master
合并。由于您在temp
中拥有主数据框,因此将其删除。最后,绑定所有数据帧并将长格式更改为宽格式。
library(tidyverse)
# Create a data frame with all unique years
master <- data.frame(year = mget(ls()) %>%
sapply(`[`, 1) %>%
as_vector %>%
unique)
# Merge each data frame with the master df
temp <- mget(ls()) %>%
lapply(function(x){full_join(x, master, by = "year")})
# Remove the master df in the list
temp[["master"]] <- NULL
# Bind all dfs and make it wide.
bind_rows(temp, .id = "data") %>%
spread(key = data, value = value)
# year df1 df2 df3
#1 1996 NA NA 19
#2 1997 14 NA 9
#3 1998 13 11 8
#4 1999 10 20 17
DATA
df1 <- data.frame(year = c(1999, 1998, 1997),
value = c(10, 13, 14))
df2 <- data.frame(year = c(1999, 1998),
value = c(20, 11))
df3 <- data.frame(year = c(1999, 1998, 1997, 1996),
value = c(17, 8, 9, 19))
答案 2 :(得分:1)
重新考虑@Djork显示的链合并,但请确保创建名为 Index 的实际列等于rownames()
。此外,根据 df#重命名X..1
列,这也可以避免在合并期间出现重复列警告。 dfs 下面等同于发布的 smalldflist :
dfs <- lapply(seq_along(dfs), function(i){
dfs[[i]]$Index = rownames(dfs[[i]]) # CREATE INDEX
colnames(dfs[[i]])[1] <- paste0("df", i) # RENAME X..1 COLUMN
return(dfs[[i]])
})
dfs[[1]]
# df1 Index
# 1725 4.552824 1725
# 1726 32.647434 1726
# 1727 52.711602 1727
# 1728 170.932583 1728
# 1729 227.043044 1729
# 1730 191.462399 1730
# 1731 226.940535 1731
# 1732 274.854836 1732
# 1733 336.457600 1733
# 1734 409.132934 1734
finaldf <- Reduce(function(...) merge(..., by="Index", all=TRUE), dfs)
finaldf
# Index df1 df2 df3 df4 df5 df6 df7 df8
# 1 1626 NA NA NA NA NA NA NA 1.911345
# 2 1627 NA NA NA NA NA NA NA 3.397947
# 3 1628 NA NA NA NA NA NA NA 5.326885
# 4 1629 NA NA NA NA NA NA NA 6.419216
# 5 1630 NA NA NA NA NA NA NA 11.030760
# ...