我知道已有很多帖子。我保证我已经看过他们了。然而,我正在努力。
下面是一个输入列表,它是lapply调用的输出。
我想要一个漂亮,易于阅读的数据框,包含2列,一列用于true,一列用于false,25个列表项中的每一列都有一行。
尝试:
falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>%
+ data.frame(do.call(rbind, .))
data.frame(。,do.call(rbind,。))中的错误: 参数意味着不同的行数:2,25
falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>%
as.data.frame.matrix()
seq_len(ncols)中的错误: 参数必须是可强制的非负整数 另外:警告信息: 在seq_len(ncols)中:'length.out'参数使用的第一个元素
falsies <- lapply(my_list, function(x) table(tolower(x) %in% c("", "unknown", "\\?"))) %>% as.vector(t(.)) %>%
as.data.frame(Field = names(.), Value = unlist(.))
as.vector(x,mode)出错:'mode'参数无效
如何将列表转换为2个功能宽的数据框?
my_list <- structure(list(ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), Fiscal_Week_Date = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), FISCAL_WEEK = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), SU_CURRENT_RECORD_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), PROFIT_CENTRE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), ACTIVE_ON_BASE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), SU_STATUS_ID = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), SU_BIRTH_DATE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), SU_GENDER = structure(c(17193L,
13899L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), AVERAGE_SPEND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), CU_PAPERLESS_BILL_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), SU_FIXED_MOBILE_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), MMS_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), INSURANCE_INDICATOR = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), INSURANCE_AMOUNT = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), PREFERRED_TOPUP_METHOD_DESC = structure(c(7672L,
23420L), .Dim = 2L, .Dimnames = structure(list(c("FALSE", "TRUE"
)), .Names = ""), class = "table"), BROADBAND_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), ICT_IND = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), TENURE_IN_MONTHS = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), CONTRACT_TYPE = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), HA_DEVICE_CAPABILITY = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), Year = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), Week = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), Age = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table"), Target_New_Card = structure(31092L, .Dim = 1L, .Dimnames = structure(list(
"FALSE"), .Names = ""), class = "table")), .Names = c("ID",
"Fiscal_Week_Date", "FISCAL_WEEK", "SU_CURRENT_RECORD_IND", "PROFIT_CENTRE",
"ACTIVE_ON_BASE", "SU_STATUS_ID", "SU_BIRTH_DATE", "SU_GENDER",
"AVERAGE_SPEND", "CU_PAPERLESS_BILL_IND", "SU_FIXED_MOBILE_IND",
"MMS_INDICATOR", "INSURANCE_INDICATOR", "INSURANCE_AMOUNT", "PREFERRED_TOPUP_METHOD_DESC",
"BROADBAND_IND", "ICT_IND", "TENURE_IN_MONTHS", "CONTRACT_TYPE",
"HA_DEVICE_CAPABILITY", "Year", "Week", "Age", "Target_New_Card"
))
答案 0 :(得分:0)
有多种方法可以做到这一点,但要认识到您请求的输出不会很整洁,因此不是典型或最佳实践数据框。这里的主要挑战是您的列表由表组成,其中一个元素是FALSE
和TRUE
的表,而所有其他元素只是FALSE
的表。只有FALSE
值包含所有信息,但您可以使用适合您的任何形式的数据:)
此处我们不假设ID.FALSE
包含所有假ID,但我们使用my_list
的一个元素同时包含TRUE
和FALSE
值总数。然后我们更改该元素,使其处于兼容的形式,转换为data.frame,添加TRUE
值,然后瞧!
total <- sum(my_list$PREFERRED_TOPUP_METHOD_DESC)
my_list$PREFERRED_TOPUP_METHOD_DESC <- my_list$PREFERRED_TOPUP_METHOD_DESC["FALSE"]
DF <- as.data.frame(unlist(my_list))
DF[2] <- total - DF[1]
names(DF) <- c("FALSE", "TRUE")
head(DF)
# FALSE TRUE
# ID.FALSE 31092 0
# Fiscal_Week_Date.FALSE 31092 0
# FISCAL_WEEK.FALSE 31092 0
# SU_CURRENT_RECORD_IND.FALSE 31092 0
# PROFIT_CENTRE.FALSE 31092 0
# ACTIVE_ON_BASE.FALSE 31092 0
# a helpful pair of rows to convince yourself this worked
DF[c("SU_GENDER.FALSE", "SU_GENDER.TRUE"), ]
# FALSE TRUE
# SU_GENDER.FALSE 17193 13899
# SU_GENDER.TRUE 13899 17193