答案 0 :(得分:1)
如果可能,应避免在原始文件中合并单元格。 如果您的xls文件不是太乱,那么可以处理:
library(readxl)
library(reshape)
# load an example excel file and see how it looks
d <- as.data.frame(read_excel("~/Bureau/Merged_Headers_Example.xls"))
d
## X__1 January X__2 X__3 February X__4 X__5 March X__6 X__7
## 1 SECTOR var1 var2 var3 var1 var2 var3 var1 var2 var3
## 2 AAAA 2 8 2 0 0 5 2 3 9
## 3 BBBB 9 3 8 4 2 6 8 6 8
## 4 CCCC 6 0 10 7 4 9 5 8 2
## 5 DDDD 0 1 2 2 5 7 4 3 2
## 6 EEEE 4 8 1 8 10 8 5 10 4
## 7 FFFF 1 0 10 2 1 4 1 3 3
# Capture the values of the merged cells on the first line
header <- colnames(d)
header <- header[!grepl("X__", header)]
# remove the first line and set the colomn names as the second line
name_cols <- d[1,]
d <- d[-1,]
colnames(d) <- name_cols
# Repeat the months the correct number of time
# to be adapted depending on the structure of the file
# e.g. `-1` here is to not count the first column
header <- factor(rep(header, each = nrow(d) * (length(unique(colnames(d)))-1)),
levels = header)
# Dataset in "long" "tidy" form and add the monthes
d <- melt(d, id = "SECTOR")
d$month <- header
# Now you can rearange that as you want with cast
result <- cast(d, month + SECTOR ~ variable)
result
## month SECTOR var1 var2 var3
## 1 January AAAA 2 8 2
## 2 January BBBB 9 3 8
## 3 January CCCC 6 0 10
## 4 January DDDD 0 1 2
## 5 January EEEE 4 8 1
## 6 January FFFF 1 0 10
## 7 February AAAA 2 8 2
## 8 February BBBB 9 3 8
## 9 February CCCC 6 0 10
## 10 February DDDD 0 1 2
## 11 February EEEE 4 8 1
## 12 February FFFF 1 0 10
## 13 March AAAA 2 8 2
## 14 March BBBB 9 3 8
## 15 March CCCC 6 0 10
## 16 March DDDD 0 1 2
## 17 March EEEE 4 8 1
## 18 March FFFF 1 0 10
# Or to be very close to what you asked (but this really not a good idea
# in most circumstances I think)
result <- cast(d, SECTOR + month ~ variable)
result <- do.call(cbind.data.frame, split(result, result$month))
colnames(result) <- gsub("^.*\\.", "", colnames(result))
result
## SECTOR month var1 var2 var3 SECTOR month var1 var2 var3 SECTOR
## 1 AAAA January 2 8 2 AAAA February 2 8 2 AAAA
## 4 BBBB January 9 3 8 BBBB February 9 3 8 BBBB
## 7 CCCC January 6 0 10 CCCC February 6 0 10 CCCC
## 10 DDDD January 0 1 2 DDDD February 0 1 2 DDDD
## 13 EEEE January 4 8 1 EEEE February 4 8 1 EEEE
## 16 FFFF January 1 0 10 FFFF February 1 0 10 FFFF
## month var1 var2 var3
## 1 March 2 8 2
## 4 March 9 3 8
## 7 March 6 0 10
## 10 March 0 1 2
## 13 March 4 8 1
## 16 March 1 0 10