我的数据与此类似,其中fruit
是最高级别,color
是第二级,quality
是层次结构的第三级:
data.frame(
Level = c("fruit", "color", "quality", "fruit", "color", "color", "fruit", "color", "quality", "quality", "fruit", "fruit"),
Tag = c("apple", "red", "normal", "pear", "yellow", "green", "peach", "orange", "normal", "abnormal", "banana", "avocado"),
Value = 1:12)
如何将层次结构展平为层次结构级别为列的表,从左到右?这种情况的结果应该是:
data.frame(
fruit=c("apple", "pear", "pear","peach", "peach","banana", "avocado"),
color=c("red","yellow", "green", "orange", "orange", "", ""),
quality=c("normal","","", "normal", "abnormal", "",""),
Value = c(3, NA, NA, 9, 10, NA, NA))
测试数据:
structure(list(Level = c(0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1,
2, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2), Tag = structure(c(5L,
7L, 6L, 16L, 6L, 13L, 7L, 14L, 17L, 7L, 88L, 21L, 88L, 19L, 7L,
18L, 21L, 18L, 23L, 18L, 46L, 20L, 7L, 18L, 22L, 18L, 23L, 18L
), .Label = c("A", "AA", "AAA", "ascot", "AUD", "Australia",
"availableCash", "B", "BB", "BBB", "Brazil", "C", "CAD", "Canada",
"CCC", "cfd", "CHF", "China", "CNH", "CNY", "commonShare", "convertibleBond",
"corporateBond", "corporateBondTotalReturnSwap", "corporateCds",
"corporateCdx", "defaultedCorporateBond", "Denmark", "discountNote",
"DKK", "dr", "equityFutureCfd", "equityOption", "equityTotalReturnSwap",
"equityVarianceSwap", "EUR", "European Union Member State", "exchangeTradedFund",
"frn", "fxForward", "fxOption", "GBP", "Germany", "governmentBond",
"HKD", "Hong Kong", "IDR", "ILS", "indexFuture", "indexOption",
"India", "Indonesia", "INR", "interestRateSwap", "Ireland", "Israel",
"Italy", "Jamaica", "Japan", "JPY", "Kazakhstan", "KRW", "Luxembourg",
"Malaysia", "Multinational", "mutualFund", "MYR", "New Zealand",
"NOK", "Norway", "Not Applicable", "Not Available", "Not Rated",
"NZD", "Philippines", "PHP", "PLN", "Poland", "Portugal", "preferredShare",
"SEK", "SGD", "Singapore", "South Africa", "South Korea", "sovereignCds",
"Sweden", "Switzerland", "Taiwan", "Thailand", "THB", "Total",
"TWD", "United Arab Emirates", "United Kingdom", "United States",
"USD", "ZAR"), class = "factor"), Value = c(-2427544.452063,
-2375655.354498, -2375655.354498, -51889.0975650001, -51889.0975650001,
3733.69460778798, 3733.69460778798, 3733.69460778798, -213296.649672231,
-73289.5744459406, -73289.5744459406, -140007.07522629, -140007.07522629,
205101731.9733, 2238511.58321064, 2238511.58321064, 104126150.859062,
104126150.859062, 98737069.5310267, 83011916.6708533, 15725152.8601734,
30968975.3531842, 11349121.3053863, 11349121.3053863, 14796021.271866,
14796021.271866, 4823832.77593178, 4823832.77593178)), .Names = c("Level",
"Tag", "Value"), class = "data.frame", row.names = c(NA, -28L
))
答案 0 :(得分:0)
df <- data.frame(
Level = c("fruit", "color", "quality", "fruit", "color", "color", "fruit", "color", "quality", "quality", "fruit", "fruit"),
Tag = c("apple", "red", "normal", "pear", "yellow", "green", "peach", "orange", "normal", "abnormal", "banana", "avocado"),
Value = 1:12)
final <- as.data.frame(matrix(ncol=(length(unique(df$Level))+1)))
names(final) <- c(as.character(unique(df$Level)), "Value")
index <- which(df$Level==names(final[1]))
index2 <- which(df$Level==names(final[2]))
index3 <- which(df$Level==names(final[3]))
diff <- diff(index)
diff[length(diff)+1] <- nrow(df)-index[length(index)]+1
row <- torow <- 0
for (i in 1:length(index)) {
row <- torow+1
torow <- row+max(table(df$Level[index[i]:(index[i]+diff[i]-1)]))-1
# Assign fruit
final[row:torow,1] <- as.character(df$Tag[index[i]])
# Assign color
if (length(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])>0) {
final[row:torow,2] <- as.character(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])
}
# Assign quality
if (length(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])>0) {
final[row:torow,3] <- as.character(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])
}
# Assign Value
if (length(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])>0 &
length(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])>0) {
final[row:torow,4] <- as.character(df$Value[index3[which(index3>index[i]&index3<index[i+1])]])
}
}