R:展平表格层次结构

时间:2015-09-28 14:39:56

标签: r

我的数据与此类似,其中fruit是最高级别,color是第二级,quality是层次结构的第三级:

data.frame(
 Level = c("fruit", "color", "quality", "fruit", "color", "color", "fruit", "color", "quality", "quality", "fruit", "fruit"), 
 Tag = c("apple", "red", "normal", "pear", "yellow", "green", "peach", "orange", "normal", "abnormal", "banana", "avocado"), 
 Value = 1:12)

如何将层次结构展平为层次结构级别为列的表,从左到右?这种情况的结果应该是:

data.frame(
 fruit=c("apple", "pear", "pear","peach", "peach","banana", "avocado"),
 color=c("red","yellow", "green", "orange", "orange", "", ""),
 quality=c("normal","","", "normal", "abnormal", "",""), 
 Value = c(3, NA, NA, 9, 10, NA, NA))

测试数据

structure(list(Level = c(0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 
2, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2), Tag = structure(c(5L, 
7L, 6L, 16L, 6L, 13L, 7L, 14L, 17L, 7L, 88L, 21L, 88L, 19L, 7L, 
18L, 21L, 18L, 23L, 18L, 46L, 20L, 7L, 18L, 22L, 18L, 23L, 18L
), .Label = c("A", "AA", "AAA", "ascot", "AUD", "Australia", 
"availableCash", "B", "BB", "BBB", "Brazil", "C", "CAD", "Canada", 
"CCC", "cfd", "CHF", "China", "CNH", "CNY", "commonShare", "convertibleBond", 
"corporateBond", "corporateBondTotalReturnSwap", "corporateCds", 
"corporateCdx", "defaultedCorporateBond", "Denmark", "discountNote", 
"DKK", "dr", "equityFutureCfd", "equityOption", "equityTotalReturnSwap", 
"equityVarianceSwap", "EUR", "European Union Member State", "exchangeTradedFund", 
"frn", "fxForward", "fxOption", "GBP", "Germany", "governmentBond", 
"HKD", "Hong Kong", "IDR", "ILS", "indexFuture", "indexOption", 
"India", "Indonesia", "INR", "interestRateSwap", "Ireland", "Israel", 
"Italy", "Jamaica", "Japan", "JPY", "Kazakhstan", "KRW", "Luxembourg", 
"Malaysia", "Multinational", "mutualFund", "MYR", "New Zealand", 
"NOK", "Norway", "Not Applicable", "Not Available", "Not Rated", 
"NZD", "Philippines", "PHP", "PLN", "Poland", "Portugal", "preferredShare", 
"SEK", "SGD", "Singapore", "South Africa", "South Korea", "sovereignCds", 
"Sweden", "Switzerland", "Taiwan", "Thailand", "THB", "Total", 
"TWD", "United Arab Emirates", "United Kingdom", "United States", 
"USD", "ZAR"), class = "factor"), Value = c(-2427544.452063, 
-2375655.354498, -2375655.354498, -51889.0975650001, -51889.0975650001, 
3733.69460778798, 3733.69460778798, 3733.69460778798, -213296.649672231, 
-73289.5744459406, -73289.5744459406, -140007.07522629, -140007.07522629, 
205101731.9733, 2238511.58321064, 2238511.58321064, 104126150.859062, 
104126150.859062, 98737069.5310267, 83011916.6708533, 15725152.8601734, 
30968975.3531842, 11349121.3053863, 11349121.3053863, 14796021.271866, 
14796021.271866, 4823832.77593178, 4823832.77593178)), .Names = c("Level", 
"Tag", "Value"), class = "data.frame", row.names = c(NA, -28L
))

1 个答案:

答案 0 :(得分:0)

df <- data.frame(
  Level = c("fruit", "color", "quality", "fruit", "color", "color", "fruit", "color", "quality", "quality", "fruit", "fruit"), 
  Tag = c("apple", "red", "normal", "pear", "yellow", "green", "peach", "orange", "normal", "abnormal", "banana", "avocado"), 
  Value = 1:12)

final <- as.data.frame(matrix(ncol=(length(unique(df$Level))+1)))
names(final) <- c(as.character(unique(df$Level)), "Value")

index <- which(df$Level==names(final[1]))
index2 <- which(df$Level==names(final[2]))
index3 <- which(df$Level==names(final[3]))

diff <- diff(index)
diff[length(diff)+1] <- nrow(df)-index[length(index)]+1


row <- torow <- 0

for (i in 1:length(index)) {
  row <- torow+1
  torow <- row+max(table(df$Level[index[i]:(index[i]+diff[i]-1)]))-1

  # Assign fruit
  final[row:torow,1] <- as.character(df$Tag[index[i]])

  # Assign color
  if (length(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])>0) {
    final[row:torow,2] <- as.character(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])  
  }

  # Assign quality
  if (length(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])>0) {
    final[row:torow,3] <- as.character(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])  
  }

  # Assign Value
  if (length(df$Tag[index3[which(index3>index[i]&index3<index[i+1])]])>0 & 
      length(df$Tag[index2[which(index2>index[i]&index2<index[i+1])]])>0) {
    final[row:torow,4] <- as.character(df$Value[index3[which(index3>index[i]&index3<index[i+1])]])  
  }
}