我正在寻找一种方法来将一个数据框列表(一个行或空的7000个数据框)重新绑定到一个数据框中,但又不删除列表中的空数据框。
## Example of data
l <- list(Name1 = data.frame(),
Name2 = data.frame(V1 = "A", V2 = "B", stringsAsFactors = F),
Name3 = data.frame(V1="B", V2= NA, V3 ="C", stringsAsFactors = F))
> l
$Name1
data frame with 0 columns and 0 rows
$Name2
V1 V2
1 A B
$Name3
V1 V2 V3
1 B NA C
所以我尝试了一些dplyr解决方案,但是它没有保留空的数据框(不令人惊讶,因为它没有行)
## rbind
library(dplyr)
df <- bind_rows(l,.id = "NAME")
> df
NAME V1 V2 V3
1 Name2 A B <NA>
2 Name3 B <NA> C
是否有一种简单的方法来获得类似的东西?
> df
NAME V1 V2 V3
1 Name1 <NA> <NA> <NA>
2 Name2 A B <NA>
3 Name3 B <NA> C
在我的示例中,似乎使用tidyr :: complete
df <- bind_rows(l,.id = "NAME") %>% complete(NAME = names(l))
但不适用于我的整个数据集。 这是我实际数据集的头
dput(head(GNPlaceName))
list(Seydikemer = structure(list(), class = "data.frame", row.names = integer(0)),
Fanna = structure(list(timezone.gmtOffset = "1", timezone.timeZoneId = "Europe/Rome",
timezone.dstOffset = "2", bbox.east = "12.7623022680034",
bbox.south = "46.1770686700141", bbox.north = "46.1904973822354",
bbox.west = "12.7415900814293", bbox.accuracyLevel = "10",
asciiName = "Fanna", astergdem = "258", countryId = "3175395",
fcl = "P", srtm3 = "263", score = "62.2252464294434",
adminId2 = "3170146", adminId3 = "6537931", countryCode = "IT",
adminCodes2.ISO3166_2 = "PN", adminCodes1.ISO3166_2 = "36",
adminId1 = "3176525", lat = "46.18455", fcode = "PPLA3",
continentCode = "EU", elevation = "274", adminCode2 = "PN",
adminCode3 = "093020", adminCode1 = "06", lng = "12.75161",
geonameId = "3177221", toponymName = "Fanna", population = "1500",
adminName5 = "", adminName4 = "", adminName3 = "Fanna",
alternateNames.name = "Fane", alternateNames.lang = "fur",
adminName2 = "Province of Pordenone", name = "Fanna",
fclName = "city, village,...", countryName = "Italy",
fcodeName = "seat of a third-order administrative division",
adminName1 = "Friuli Venezia Giulia"), row.names = c(NA,
-1L), class = "data.frame"), Warsaw = structure(list(timezone.gmtOffset = "1",
timezone.timeZoneId = "Europe/Warsaw", timezone.dstOffset = "2",
bbox.east = "21.2711512942955", bbox.south = "52.0978496125492",
bbox.north = "52.368153944595", bbox.west = "20.8516883368428",
bbox.accuracyLevel = "10", asciiName = "Warsaw", astergdem = "121",
countryId = "798544", fcl = "P", srtm3 = "113", score = "145.618896484375",
adminId2 = "6695624", adminId3 = "7531926", countryCode = "PL",
adminCodes1.ISO3166_2 = "14", adminId1 = "858787", lat = "52.22977",
fcode = "PPLC", continentCode = "EU", adminCode2 = "1465",
adminCode3 = "146501", adminCode1 = "78", lng = "21.01178",
geonameId = "756135", toponymName = "Warsaw", population = "1702139",
adminName5 = "", adminName4 = "", adminName3 = "Warsaw",
alternateNames.name = "Warskou", alternateNames.lang = "af",
alternateNames.isShortName = "TRUE", alternateNames.isPreferredName = "TRUE",
adminName2 = "Warszawa", name = "Warsaw", fclName = "city, village,...",
countryName = "Poland", fcodeName = "capital of a political entity",
adminName1 = "Mazovia"), row.names = c(NA, -1L), class = "data.frame"),
`Gaverina Terme` = structure(list(timezone.gmtOffset = "1",
timezone.timeZoneId = "Europe/Rome", timezone.dstOffset = "2",
bbox.east = "9.89570095710424", bbox.south = "45.7540309868418",
bbox.north = "45.7576290305937", bbox.west = "9.8836191498607",
bbox.accuracyLevel = "10", asciiName = "Gaverina Terme",
astergdem = "502", countryId = "3175395", fcl = "P",
srtm3 = "494", score = "22.0922546386719", adminId2 = "3182163",
adminId3 = "6542955", countryCode = "IT", adminCodes2.ISO3166_2 = "BG",
adminCodes1.ISO3166_2 = "25", adminId1 = "3174618", lat = "45.75578",
fcode = "PPLA3", continentCode = "EU", elevation = "509",
adminCode2 = "BG", adminCode3 = "016110", adminCode1 = "09",
lng = "9.88666", geonameId = "6534969", toponymName = "Gaverina Terme",
population = "358", adminName5 = "", adminName4 = "",
adminName3 = "Gaverina Terme", alternateNames.name = "ITGVR",
alternateNames.lang = "unlc", adminName2 = "Provincia di Bergamo",
name = "Gaverina Terme", fclName = "city, village,...",
countryName = "Italy", fcodeName = "seat of a third-order administrative division",
adminName1 = "Lombardy"), row.names = c(NA, -1L), class = "data.frame"),
`Cañaveral de León` = structure(list(timezone.gmtOffset = "1",
timezone.timeZoneId = "Europe/Madrid", timezone.dstOffset = "2",
bbox.east = "-6.49382969510649", bbox.south = "37.9986794024971",
bbox.north = "38.0346539975029", bbox.west = "-6.53950370489351",
bbox.accuracyLevel = "1", asciiName = "Canaveral de Leon",
astergdem = "524", countryId = "2510769", fcl = "P",
srtm3 = "528", score = "45.6841278076172", adminId2 = "2516547",
adminId3 = "6358196", countryCode = "ES", adminCodes2.ISO3166_2 = "H",
adminCodes1.ISO3166_2 = "AN", adminId1 = "2593109", lat = "38.01667",
fcode = "PPLA3", continentCode = "EU", adminCode2 = "H",
adminCode3 = "21020", adminCode1 = "51", lng = "-6.51667",
geonameId = "2520292", toponymName = "Cañaveral de León",
population = "0", adminName5 = "", adminName4 = "", adminName3 = "Cañaveral de León",
alternateNames.name = "https://en.wikipedia.org/wiki/Ca%C3%B1averal_de_Le%C3%B3n",
alternateNames.lang = "link", adminName2 = "Huelva",
name = "Cañaveral de León", fclName = "city, village,...",
countryName = "Spain", fcodeName = "seat of a third-order administrative division",
adminName1 = "Andalusia"), row.names = c(NA, -1L), class = "data.frame"),
Voranava = structure(list(timezone.gmtOffset = "3", timezone.timeZoneId = "Europe/Minsk",
timezone.dstOffset = "3", bbox.east = "25.3283727016391",
bbox.south = "54.139144795013", bbox.north = "54.159255204987",
bbox.west = "25.2940272983609", bbox.accuracyLevel = "2",
asciiName = "Voranava", astergdem = "170", countryId = "630336",
fcl = "P", srtm3 = "172", score = "62.4154739379883",
countryCode = "BY", adminCodes1.ISO3166_2 = "HR", adminId1 = "628035",
lat = "54.1492", fcode = "PPLA2", continentCode = "EU",
adminCode1 = "03", lng = "25.3112", geonameId = "619979",
toponymName = "Voranava", population = "6500", adminName5 = "",
adminName4 = "", adminName3 = "", alternateNames.name = "Воранава",
alternateNames.lang = "be", adminName2 = "", name = "Voranava",
fclName = "city, village,...", countryName = "Belarus",
fcodeName = "seat of a second-order administrative division",
adminName1 = "Grodnenskaya"), row.names = c(NA, -1L), class = "data.frame"))
非常感谢您
答案 0 :(得分:1)
一个dplyr
和tidyr
选项可以是:
bind_rows(l, .id = "Names") %>%
complete(Names = names(l))
Names V1 V2 V3
<chr> <chr> <chr> <chr>
1 Name1 <NA> <NA> <NA>
2 Name2 A B <NA>
3 Name3 B <NA> C
答案 1 :(得分:0)
您需要首先在空的dataFrame中初始化一列,尝试一下
l <- list(Name1 = data.frame(V1=NA),
Name2 = data.frame(V1 = "A", V2 = "B", stringsAsFactors = F),
Name3 = data.frame(V1="B", V2= NA, V3 ="C", stringsAsFactors = F))
df <- bind_rows(l,.id = "NAME")
答案 2 :(得分:0)
@tmfmnk提出的解决方案效果很好。再次谢谢你。
如果起初这对我的所有数据都无效,那是因为我列表中的一个项目(数据框)没有名称。一旦纠正了该错误,它便可以正常工作。