我有一些数据以难以使用的方式格式化,所以我试图将其弄平。 minimum reproducible example is here。
> str(sampleData)
List of 4
$ Events :'data.frame': 2 obs. of 3 variables:
..$ CateringOptions:List of 2
.. ..$ :'data.frame': 1 obs. of 3 variables:
.. .. ..$ Agreed : logi TRUE
.. .. ..$ Tnc :'data.frame': 1 obs. of 5 variables:
.. .. .. ..$ Identity : chr "SpicyOWing"
.. .. .. ..$ Schema : logi NA
.. .. .. ..$ ElementId : chr "105031"
.. .. .. ..$ ElementType : logi NA
.. .. .. ..$ ElementVersion: logi NA
.. .. ..$ Address: chr "New York"
.. ..$ :'data.frame': 1 obs. of 3 variables:
.. .. ..$ Agreed : logi TRUE
.. .. ..$ Tnc :'data.frame': 1 obs. of 5 variables:
.. .. .. ..$ Identity : chr "BaconEggs"
.. .. .. ..$ Schema : logi NA
.. .. .. ..$ ElementId : chr "105032"
.. .. .. ..$ ElementType : logi NA
.. .. .. ..$ ElementVersion: logi NA
.. .. ..$ Address: chr "Seattle"
..$ Action : num [1:2] 1 1
..$ Volume : num [1:2] 1000 2000
$ Host :List of 5
..$ Identity : chr "John"
..$ Schema : logi NA
..$ ElementId : chr "101505"
..$ ElementType : logi NA
..$ ElementVersion: logi NA
$ Sender :List of 5
..$ Identity : chr "Jane"
..$ Schema : logi NA
..$ ElementId : chr "101005"
..$ ElementType : logi NA
..$ ElementVersion: logi NA
$ CompletedDate: chr "/Date(1490112000000)/"
预期
> expectedOutcome
Events.CateringOptions.Agreed Events.CateringOptions.Tnc.Identity Events.CateringOptions.Tnc.Schema Events.CateringOptions.Tnc.ElementId
1 NA SpicyOWing TRUE 105031
2 NA BaconEggs TRUE 105032
Events.CateringOptions.Tnc.ElementType Events.CateringOptions.Tnc.ElementVersion Events.CateringOptions.Address Events.Action Events.Volume Host.Identity
1 NA NA New York 1 1000 John
2 NA NA Seattle 1 2000 John
Host.Schema Host.ElementId Host.ElementType Host.ElementVersion Sender.Identity Sender.Schema Sender.ElementId Sender.ElementType Sender.ElementVersion
1 NA 101505 NA NA Jane NA 101005 NA NA
2 NA 101505 NA NA Jane NA 101005 NA NA
CompletedDate
1 /Date(1490112000000)/
2 /Date(1490112000000)/
检查功能
check<-function(li){
areDF<-sapply(1:length(li), function(i) class(li[[i]]) == "data.frame")
areList<-sapply(1:length(li), function(i) class(li[[i]]) == "list")
tmp1 <- NULL
tmp2 <- NULL
if(any(areDF)){
for(j in which(areDF)){
columns <- jsonlite::flatten(li[[j]])
li[[j]] <- check(columns)
}
tmp1<-plyr::rbind.fill(li[areDF])
#return(tmp1)
}
if(any(areList)){
for(j in which(areList)){
li[[j]]<-check(li[[j]])
}
tmp2<-do.call(cbind,li)
#return(tmp2)
}
if(!is.null(tmp1) & !is.null(tmp2)){
return (cbind(tmp1,tmp2))
}
else if(!is.null(tmp1)){
return (tmp1)
}
else if(!is.null(tmp2)){
return (tmp2)
}
return(li)
}
结果
> str(check(sampleData))
'data.frame': 2 obs. of 29 variables:
$ CateringOptions.Agreed : logi TRUE TRUE
$ CateringOptions.Address : chr "New York" "Seattle"
$ CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
$ CateringOptions.Tnc.Schema : logi NA NA
$ CateringOptions.Tnc.ElementId : chr "105031" "105032"
$ CateringOptions.Tnc.ElementType : logi NA NA
$ CateringOptions.Tnc.ElementVersion : logi NA NA
$ Action : num 1 1
$ Volume : num 1000 2000
$ Events.CateringOptions.Agreed : logi TRUE TRUE
$ Events.CateringOptions.Address : chr "New York" "Seattle"
$ Events.CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
$ Events.CateringOptions.Tnc.Schema : logi NA NA
$ Events.CateringOptions.Tnc.ElementId : chr "105031" "105032"
$ Events.CateringOptions.Tnc.ElementType : logi NA NA
$ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
$ Events.Action : num 1 1
$ Events.Volume : num 1000 2000
$ Host.Identity : Factor w/ 1 level "John": 1 1
$ Host.Schema : logi NA NA
$ Host.ElementId : Factor w/ 1 level "101505": 1 1
$ Host.ElementType : logi NA NA
$ Host.ElementVersion : logi NA NA
$ Sender.Identity : Factor w/ 1 level "Jane": 1 1
$ Sender.Schema : logi NA NA
$ Sender.ElementId : Factor w/ 1 level "101005": 1 1
$ Sender.ElementType : logi NA NA
$ Sender.ElementVersion : logi NA NA
$ CompletedDate : Factor w/ 1 level "/Date(1490112000000)/": 1 1
我几乎拥有它,但嵌套的数据框被欺骗了。此外,我的代码需要相当长的时间。有谁知道我怎么能搞平呢?
我最后在gist
中添加了我的解决方案答案 0 :(得分:1)
在purrr
的帮助下,我接受了这一点
这个想法与您的想法类似,只是使用不同的语法:flatten()
最嵌套的数据框,然后是rbind()
它们。
如果我正确地理解了你的代码,那么最终我的代码会略有不同,因为我会尝试使用更“jsonlite::flatten
友好”的结构将其再次应用到最终结果中:
library(jsonlite)
library(purrr)
res <-
sampleData %>%
modify_if(
is.list,
.f = ~ modify_if(
.x,
.p = function(x) all(sapply(x, is.data.frame)),
.f = ~ do.call("rbind", lapply(.x, jsonlite::flatten))
)
) %>%
as.data.frame() %>%
jsonlite::flatten()
str(res)
# 'data.frame': 2 obs. of 20 variables:
# $ Events.Action : num 1 1
# $ Events.Volume : num 1000 2000
# $ Host.Identity : chr "John" "John"
# $ Host.Schema : logi NA NA
# $ Host.ElementId : chr "101505" "101505"
# $ Host.ElementType : logi NA NA
# $ Host.ElementVersion : logi NA NA
# $ Sender.Identity : chr "Jane" "Jane"
# $ Sender.Schema : logi NA NA
# $ Sender.ElementId : chr "101005" "101005"
# $ Sender.ElementType : logi NA NA
# $ Sender.ElementVersion : logi NA NA
# $ CompletedDate : chr "/Date(1490112000000)/" "/Date(1490112000000)/"
# $ Events.CateringOptions.Agreed : logi TRUE TRUE
# $ Events.CateringOptions.Address : chr "New York" "Seattle"
# $ Events.CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
# $ Events.CateringOptions.Tnc.Schema : logi NA NA
# $ Events.CateringOptions.Tnc.ElementId : chr "105031" "105032"
# $ Events.CateringOptions.Tnc.ElementType : logi NA NA
# $ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
我与你的expectedOutcome
有一个不匹配但是如果可以的话,它可能就在你身边:
all.equal(expectedOutcome[sort(names(expectedOutcome))], res[sort(names(res))])
# [1] "Component “Events.CateringOptions.Agreed”: 'is.NA' value mismatch: 0 in current 2 in target"
答案 1 :(得分:0)
不确定这是否会过度简化您的问题,但是对于您分享的示例,它似乎有效。基本上,如果在执行data.frame(your_list)
时该列不是向量,则unlist
是数据并生成matrix
。
FLAT <- function(inlist) {
A <- data.frame(inlist)
out <- lapply(A, function(y) {
if (is.list(y)) {
y <- unlist(y)
m <- matrix(y, nrow(A), byrow = TRUE, dimnames = list(NULL, unique(names(y))))
y <- data.frame(m, stringsAsFactors = FALSE)
y[] <- lapply(y, type.convert)
}
y
})
do.call(cbind, out)
}
FLAT(sampleData)
以下是您的示例数据的str
:
str(FLAT(sampleData))
## 'data.frame': 2 obs. of 20 variables:
## $ Events.CateringOptions.Agreed : logi TRUE TRUE
## $ Events.CateringOptions.Tnc.Identity : Factor w/ 2 levels "BaconEggs","SpicyOWing": 2 1
## $ Events.CateringOptions.Tnc.Schema : logi NA NA
## $ Events.CateringOptions.Tnc.ElementId : int 105031 105032
## $ Events.CateringOptions.Tnc.ElementType : logi NA NA
## $ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
## $ Events.CateringOptions.Address : Factor w/ 2 levels "New York","Seattle": 1 2
## $ Events.Action : num 1 1
## $ Events.Volume : num 1000 2000
## $ Host.Identity : Factor w/ 1 level "John": 1 1
## $ Host.Schema : logi NA NA
## $ Host.ElementId : Factor w/ 1 level "101505": 1 1
## $ Host.ElementType : logi NA NA
## $ Host.ElementVersion : logi NA NA
## $ Sender.Identity : Factor w/ 1 level "Jane": 1 1
## $ Sender.Schema : logi NA NA
## $ Sender.ElementId : Factor w/ 1 level "101005": 1 1
## $ Sender.ElementType : logi NA NA
## $ Sender.ElementVersion : logi NA NA
## $ CompletedDate : Factor w/ 1 level "/Date(1490112000000)/": 1 1