将lapply的输出转换为数据帧

时间:2018-03-23 03:33:24

标签: r

我知道已有很多帖子。我保证我已经看过他们了。然而,我正在努力。

下面是一个输入列表,它是lapply调用的输出。

我想要一个漂亮,易于阅读的数据框,包含2列,一列用于true,一列用于false,25个列表项中的每一列都有一行。

尝试:

 falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% 
+   data.frame(do.call(rbind, .))
  

data.frame(。,do.call(rbind,。))中的错误:   参数意味着不同的行数:2,25

falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% 
  as.data.frame.matrix()
  

seq_len(ncols)中的错误:     参数必须是可强制的非负整数   另外:警告信息:   在seq_len(ncols)中:'length.out'参数使用的第一个元素

falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% as.vector(t(.)) %>% 
  as.data.frame(Field = names(.), Value = unlist(.))
  

as.vector(x,mode)出错:'mode'参数无效

如何将列表转换为2个功能宽的数据框?

my_list <- structure(list(ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Fiscal_Week_Date = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), FISCAL_WEEK = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_CURRENT_RECORD_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), PROFIT_CENTRE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), ACTIVE_ON_BASE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_STATUS_ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_BIRTH_DATE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_GENDER = structure(c(17193L, 
13899L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), AVERAGE_SPEND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), CU_PAPERLESS_BILL_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), SU_FIXED_MOBILE_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), MMS_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), INSURANCE_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), INSURANCE_AMOUNT = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), PREFERRED_TOPUP_METHOD_DESC = structure(c(7672L, 
23420L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), BROADBAND_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), ICT_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), TENURE_IN_MONTHS = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), CONTRACT_TYPE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), HA_DEVICE_CAPABILITY = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Year = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Week = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Age = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table"), Target_New_Card = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
    "FALSE"), .Names = ""), class = "table")), .Names = c("ID", 
"Fiscal_Week_Date", "FISCAL_WEEK", "SU_CURRENT_RECORD_IND", "PROFIT_CENTRE", 
"ACTIVE_ON_BASE", "SU_STATUS_ID", "SU_BIRTH_DATE", "SU_GENDER", 
"AVERAGE_SPEND", "CU_PAPERLESS_BILL_IND", "SU_FIXED_MOBILE_IND", 
"MMS_INDICATOR", "INSURANCE_INDICATOR", "INSURANCE_AMOUNT", "PREFERRED_TOPUP_METHOD_DESC", 
"BROADBAND_IND", "ICT_IND", "TENURE_IN_MONTHS", "CONTRACT_TYPE", 
"HA_DEVICE_CAPABILITY", "Year", "Week", "Age", "Target_New_Card"
))

1 个答案:

答案 0 :(得分:0)

有多种方法可以做到这一点,但要认识到您请求的输出不会很整洁,因此不是典型或最佳实践数据框。这里的主要挑战是您的列表由表组成,其中一个元素是FALSETRUE的表,而所有其他元素只是FALSE的表。只有FALSE值包含所有信息,但您可以使用适合您的任何形式的数据:)

此处我们不假设ID.FALSE包含所有假ID,但我们使用my_list的一个元素同时包含TRUEFALSE值总数。然后我们更改该元素,使其处于兼容的形式,转换为data.frame,添加TRUE值,然后瞧!

total <- sum(my_list$PREFERRED_TOPUP_METHOD_DESC)
my_list$PREFERRED_TOPUP_METHOD_DESC <- my_list$PREFERRED_TOPUP_METHOD_DESC["FALSE"]
DF <- as.data.frame(unlist(my_list))
DF[2] <- total - DF[1]
names(DF) <- c("FALSE", "TRUE")
head(DF)
#                             FALSE TRUE
# ID.FALSE                    31092    0
# Fiscal_Week_Date.FALSE      31092    0
# FISCAL_WEEK.FALSE           31092    0
# SU_CURRENT_RECORD_IND.FALSE 31092    0
# PROFIT_CENTRE.FALSE         31092    0
# ACTIVE_ON_BASE.FALSE        31092    0

# a helpful pair of rows to convince yourself this worked
 DF[c("SU_GENDER.FALSE", "SU_GENDER.TRUE"), ]
#                 FALSE  TRUE
# SU_GENDER.FALSE 17193 13899
# SU_GENDER.TRUE  13899 17193