我有以下数据框:
Offer ID Item ID
162840 56A340942123, 902G569800234
162841 96A3403718213, 872M569814535, M235481980234, 890TG56248191
162842 H7901230913592
商品ID是字符向量,商品ID是列表。这里,Item ID有3个元素 - 每行一个。
是否可以将项目ID转换为列表中的向量,以便我可以分别引用同一行中的每个项目ID?
我是R的新手,我尝试过unlist,do.call和paste,它们都搞乱了我数据框的维度。
注意:我不想将它们拆分为子列表。
ETA:dput
-
structure(list(Col1 = c(162840L, 162841L, 162842L, 162843L, 162845L,
162847L, 162849L), Col2 = structure(list(`1` = c("137089", "668552",
"346129"), `4` = c("442054", "479934", "58316"), `7` = c("149298",
"533977", "598069"), `10` = c("898134", "614982", "581007", "570515"
), `14` = c("93015", "252103", "639482", "226594", "64429"),
`19` = c("328971", "604454", "603078"), `22` = "93774"), .Names = c("1",
"4", "7", "10", "14", "19", "22"))), .Names = c("Col1", "Col2"
), row.names = c(NA, -7L), class = "data.frame")
预期产出:
Offer ID Item ID1 Item ID2 Item ID3 Item ID4
168240 137089 668552 346129
162841 442054 479934 58316
162842 149298 533977 598069
162843 898134 614982 581007 570515
答案 0 :(得分:0)
# get length of the largest vector
max_cols <- max( unlist( lapply(df1$Col2, length ) ) )
# fill with NA to make it equal length
a1 <- lapply( df1$Col2, function( x ) c( x, rep( NA, ( max_cols - length( x ) ) ) ) )
# combine column 1 with transformed data
df1 <- do.call( 'cbind', list( df1$Col1, do.call( 'rbind', a1 ) ) )
# apply column names
colnames( df1 ) <- paste( "Col", 1: (max_cols+1), sep = '' )
# convert to numeric data type
df1 <- data.frame( apply( df1, 2, as.numeric ) )
df1
# Col1 Col2 Col3 Col4 Col5 Col6
# 1 162840 137089 668552 346129 NA NA
# 2 162841 442054 479934 58316 NA NA
# 3 162842 149298 533977 598069 NA NA
# 4 162843 898134 614982 581007 570515 NA
# 5 162845 93015 252103 639482 226594 64429
# 6 162847 328971 604454 603078 NA NA
# 7 162849 93774 NA NA NA NA
数据:强>
df1 <- structure(list(Col1 = c(162840L, 162841L, 162842L, 162843L, 162845L, 162847L, 162849L),
Col2 = structure(list(`1` = c("137089", "668552", "346129"),
`4` = c("442054", "479934", "58316"),
`7` = c("149298", "533977", "598069"),
`10` = c("898134", "614982", "581007", "570515"),
`14` = c("93015", "252103", "639482", "226594", "64429"),
`19` = c("328971", "604454", "603078"),
`22` = "93774"),
.Names = c("1", "4", "7", "10", "14", "19", "22"))),
.Names = c("Col1", "Col2"),
row.names = c(NA, -7L),
class = "data.frame")