Question

我知道已有很多帖子。我保证我已经看过他们了。然而，我正在努力。

下面是一个输入列表，它是lapply调用的输出。

我想要一个漂亮，易于阅读的数据框，包含2列，一列用于true，一列用于false，25个列表项中的每一列都有一行。

尝试：

 falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% 
+   data.frame(do.call(rbind, .))

data.frame（。，do.call（rbind，。））中的错误：参数意味着不同的行数：2,25

falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% 
  as.data.frame.matrix()

seq_len（ncols）中的错误：参数必须是可强制的非负整数另外：警告信息：在seq_len（ncols）中：'length.out'参数使用的第一个元素

falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% as.vector(t(.)) %>% 
  as.data.frame(Field = names(.), Value = unlist(.))

as.vector（x，mode）出错：'mode'参数无效

如何将列表转换为2个功能宽的数据框？

my_list <- structure(list(ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Fiscal_Week_Date = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), FISCAL_WEEK = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_CURRENT_RECORD_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), PROFIT_CENTRE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), ACTIVE_ON_BASE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_STATUS_ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_BIRTH_DATE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_GENDER = structure(c(17193L, 
13899L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), AVERAGE_SPEND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), CU_PAPERLESS_BILL_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_FIXED_MOBILE_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), MMS_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), INSURANCE_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), INSURANCE_AMOUNT = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), PREFERRED_TOPUP_METHOD_DESC = structure(c(7672L, 
23420L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), BROADBAND_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), ICT_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), TENURE_IN_MONTHS = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), CONTRACT_TYPE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), HA_DEVICE_CAPABILITY = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Year = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Week = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Age = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Target_New_Card = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table")), .Names = c("ID", 
"Fiscal_Week_Date", "FISCAL_WEEK", "SU_CURRENT_RECORD_IND", "PROFIT_CENTRE", 
"ACTIVE_ON_BASE", "SU_STATUS_ID", "SU_BIRTH_DATE", "SU_GENDER", 
"AVERAGE_SPEND", "CU_PAPERLESS_BILL_IND", "SU_FIXED_MOBILE_IND", 
"MMS_INDICATOR", "INSURANCE_INDICATOR", "INSURANCE_AMOUNT", "PREFERRED_TOPUP_METHOD_DESC", 
"BROADBAND_IND", "ICT_IND", "TENURE_IN_MONTHS", "CONTRACT_TYPE", 
"HA_DEVICE_CAPABILITY", "Year", "Week", "Age", "Target_New_Card"
))

Answer 1

有多种方法可以做到这一点，但要认识到您请求的输出不会很整洁，因此不是典型或最佳实践数据框。这里的主要挑战是您的列表由表组成，其中一个元素是FALSE和TRUE的表，而所有其他元素只是FALSE的表。只有FALSE值包含所有信息，但您可以使用适合您的任何形式的数据：）

此处我们不假设ID.FALSE包含所有假ID，但我们使用my_list的一个元素同时包含TRUE和FALSE值总数。然后我们更改该元素，使其处于兼容的形式，转换为data.frame，添加TRUE值，然后瞧！

total <- sum(my_list$PREFERRED_TOPUP_METHOD_DESC)
my_list$PREFERRED_TOPUP_METHOD_DESC <- my_list$PREFERRED_TOPUP_METHOD_DESC["FALSE"]
DF <- as.data.frame(unlist(my_list))
DF[2] <- total - DF[1]
names(DF) <- c("FALSE", "TRUE")
head(DF)
#                             FALSE TRUE
# ID.FALSE                    31092    0
# Fiscal_Week_Date.FALSE      31092    0
# FISCAL_WEEK.FALSE           31092    0
# SU_CURRENT_RECORD_IND.FALSE 31092    0
# PROFIT_CENTRE.FALSE         31092    0
# ACTIVE_ON_BASE.FALSE        31092    0

# a helpful pair of rows to convince yourself this worked
 DF[c("SU_GENDER.FALSE", "SU_GENDER.TRUE"), ]
#                 FALSE  TRUE
# SU_GENDER.FALSE 17193 13899
# SU_GENDER.TRUE  13899 17193

将lapply的输出转换为数据帧

1 个答案: