我有一个列表,列出了几个层次:
[[12]]
[[12]][[1]]
estimateName insuredName priceList
"KING" "IDIF8X_MAY18"
laborEff claimNumber policyNumber
"Restoration/Service/Remodel" "000000-01" "0000000"
typeOfLoss roofDamage deprMat
"Hail" "0" "1"
deprNonMat deprOandP deprTaxes
"1" "1" "1"
onsite recipientsXNAddress carrierId
"1" "CO" "00000"
estimateType
"Mixed"
[[12]][[2]]
type lineNum
"I" "12"
cat sel
"SFG" "GUTA"
act actPrefix
"&" "R&R"
"1"
[[13]]
[[13]][[1]]
eName iName priceList
"KING" "MAY18"
laborEff claimNumber policyNumber
"Restoration/Service/Remodel" "00000-01" "000000000"
typeOfLoss roofDamage deprMat
"Hail" "0" "1"
deprNonMat deprOandP deprTaxes
"1" "1" "1"
onsite recipientsXNAddress carrierId
"1" "MRP.BRIGHTON.CO" "2570112"
estimateType
"Mixed"
[[13]][[2]]
type lineNum
"I" "13"
cat sel
"FEN" "VNLS6"
act actPrefix
"&" "R&R"
这只是一个示例。我只是要创建一个数据框,它将属性转换为标题,并且所有行都是其值。我试图取消列出和其他一些功能,但没有给我正确的格式。
因此,所需的输出将是两行带有列名称的值。
list(list(structure(c("DARIAN_KING_&_CASSI1", "DARIAN KING & CASSIDY R KING",
"IDIF8X_MAY18", "Restoration/Service/Remodel", "037262569-01",
"H3726819012070", "Hail", "0", "1", "1", "1", "1", "1", "MRP.BRIGHTON.CO",
"2570112", "Mixed"), .Names = c("estimateName", "insuredName",
"priceList", "laborEff", "claimNumber", "policyNumber", "typeOfLoss",
"roofDamage", "deprMat", "deprNonMat", "deprOandP", "deprTaxes",
"onsite", "recipientsXNAddress", "carrierId", "estimateType")),
structure(c("I", "12", "SFG", "GUTA", "&", "R&R", "Gutter / downspout - aluminum - up to 5\"",
"12", "12", "LF", "Dwelling", "0.32", "4.23", "54.6", "36.33",
"2", "1", "9", "25", "18.27", "1.49", "37.28", "18.81", "56.09",
"1", "29.76", "10.08", "6.36", "13.32", "0.58", "24.84",
"1"), .Names = c("type", "lineNum", "cat", "sel", "act",
"actPrefix", "desc", "calc", "qty", "unit", "coverageName",
"remove", "replace", "total", "acv", "deprType", "recoverable",
"age", "lifeExp", "depr", "tax", "acvTotal", "deprTotal",
"rcvTotal", "isPartOfInitSettle", "laborTotal", "laborBase",
"laborBurden", "laborMarkup", "laborHours", "material", "containsBSCDontApply"
))), list(structure(c("DARIAN_KING_&_CASSI1", "DARIAN KING & CASSIDY R KING",
"IDIF8X_MAY18", "Restoration/Service/Remodel", "037262569-01",
"H3726819012070", "Hail", "0", "1", "1", "1", "1", "1", "MRP.BRIGHTON.CO",
"2570112", "Mixed"), .Names = c("estimateName", "insuredName",
"priceList", "laborEff", "claimNumber", "policyNumber", "typeOfLoss",
"roofDamage", "deprMat", "deprNonMat", "deprOandP", "deprTaxes",
"onsite", "recipientsXNAddress", "carrierId", "estimateType")),
structure(c("I", "13", "FEN", "VNLS6", "&", "R&R", "Vinyl (PVC) fence, 5'- 6' high - full slat",
"8*3", "24", "LF", "Other Structures", "4.01", "27.71", "761.28",
"721.38", "2", "9", "150", "39.9", "29.61", "749.21", "41.68",
"790.89", "1", "267.84", "141.36", "80.64", "45.84", "9.1",
"493.44", "1"), .Names = c("type", "lineNum", "cat", "sel",
"act", "actPrefix", "desc", "calc", "qty", "unit", "coverageName",
"remove", "replace", "total", "acv", "deprType", "age", "lifeExp",
"depr", "tax", "acvTotal", "deprTotal", "rcvTotal", "isPartOfInitSettle",
"laborTotal", "laborBase", "laborBurden", "laborMarkup",
"laborHours", "material", "containsBSCDontApply"))))
答案 0 :(得分:1)
预先:
dplyr::bind_rows(lapply(datlst, function(dl) as.data.frame(as.list(unlist(dl)), stringsAsFactors=FALSE)))
# estimateName insuredName priceList laborEff claimNumber policyNumber typeOfLoss roofDamage deprMat deprNonMat deprOandP deprTaxes onsite
# 1 DARIAN_KING_&_CASSI1 DARIAN KING & CASSIDY R KING IDIF8X_MAY18 Restoration/Service/Remodel 037262569-01 H3726819012070 Hail 0 1 1 1 1 1
# 2 DARIAN_KING_&_CASSI1 DARIAN KING & CASSIDY R KING IDIF8X_MAY18 Restoration/Service/Remodel 037262569-01 H3726819012070 Hail 0 1 1 1 1 1
# recipientsXNAddress carrierId estimateType type lineNum cat sel act actPrefix desc calc qty unit coverageName remove replace total acv deprType
# 1 MRP.BRIGHTON.CO 2570112 Mixed I 12 SFG GUTA & R&R Gutter / downspout - aluminum - up to 5" 12 12 LF Dwelling 0.32 4.23 54.6 36.33 2
# 2 MRP.BRIGHTON.CO 2570112 Mixed I 13 FEN VNLS6 & R&R Vinyl (PVC) fence, 5'- 6' high - full slat 8*3 24 LF Other Structures 4.01 27.71 761.28 721.38 2
# recoverable age lifeExp depr tax acvTotal deprTotal rcvTotal isPartOfInitSettle laborTotal laborBase laborBurden laborMarkup laborHours material containsBSCDontApply
# 1 1 9 25 18.27 1.49 37.28 18.81 56.09 1 29.76 10.08 6.36 13.32 0.58 24.84 1
# 2 <NA> 9 150 39.9 29.61 749.21 41.68 790.89 1 267.84 141.36 80.64 45.84 9.1 493.44 1
让我们分解一下。在每个顶层中都有两个非常不同的列表,因此我猜测它们需要位于同一行。我们可以简单地将它们与unlist(...)
组合在一起。 (我将在需要的地方使用str
和截断符号来辅助演示。)
str(unlist(datlst[[1]]))
# Named chr [1:48] "DARIAN_KING_&_CASSI1" "DARIAN KING & CASSIDY R KING" "IDIF8X_MAY18" "Restoration/Service/Remodel" "037262569-01" "H3726819012070" "Hail" "0" "1" "1" "1" "1" "1" ...
# - attr(*, "names")= chr [1:48] "estimateName" "insuredName" "priceList" "laborEff" ...
知道我们可以轻松地将命名的list
转换为data.frame
,让我们将该命名矢量转换为命名列表:
str(as.list(unlist(datlst[[1]])))
# List of 48
# $ estimateName : chr "DARIAN_KING_&_CASSI1"
# $ insuredName : chr "DARIAN KING & CASSIDY R KING"
# $ priceList : chr "IDIF8X_MAY18"
# ...snip...
# $ laborHours : chr "0.58"
# $ material : chr "24.84"
# $ containsBSCDontApply: chr "1"
因此,我们可以使用datlst
将其应用于lapply
的每个元素。从那里,我们需要rbind
全部。在基数R中,这通常是用do.call(rbind, lapply(...))
完成的,但是我注意到一个元素在另一个元素中而不是另一个元素(recoverable
),所以这突出了rbind
的约束。 :列名称必须相同且顺序相同。但是,两个(非基础)工具可以解决此问题:
dplyr::bind_rows(...)
data.table::rbindlist(..., fill=TRUE)
我展示了第一个,但是第二个也一样……使用您可能已经安装的任何一个。
答案 1 :(得分:0)
> microbenchmark(
+ test1 <- datlst %>% ldply(., function(x) c(x, recursive=TRUE) %>% t %>% as_data_frame),
+ test2 <- dplyr::bind_rows(lapply(datlst, function(dl) as.data.frame(as.list(unlist(dl)), stringsAsFactors=FALSE))),
+ test3 <- datlst %>% llply(., function(x) c(x, recursive=TRUE) %>% t %>% as_data_frame) %>% bind_rows)
Unit: milliseconds
expr
test1 <- datlst %>% ldply(., function(x) c(x, recursive = TRUE) %>% t %>% as_data_frame)
test2 <- dplyr::bind_rows(lapply(datlst, function(dl) as.data.frame(as.list(unlist(dl)), stringsAsFactors = FALSE)))
test3 <- datlst %>% llply(., function(x) c(x, recursive = TRUE) %>% t %>% as_data_frame) %>% bind_rows
min lq mean median uq max neval cld
3.984349 4.463147 6.516226 4.674322 4.851113 35.71595 100 a
6.804798 7.246649 11.556857 7.455770 7.750850 49.91981 100 b
2.805568 3.040350 4.628043 3.153763 3.326192 36.70896 100 a