我尝试了几种方法来做这件事,但没有成功。基本上我有一个包含6列和+8000行的庞大数据帧。第一列有2个代码字母(例如AA,然后是AB,依此类推......)。第二列具有一些其他4列共有的数值(例如,AA为0至180,AB为0至170,依此类推)。其他列是值。
这是我的DataFrame的摘录:
structure(list(X2code = c("AA", "AA", "AA", "AA", "AA", "AA",
"AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA",
"AA", "AA", "AA", "AA", "AA", "AA", "AB", "AB", "AB", "AB", "AB",
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB",
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB",
"AB", "AB", "AB", "AB", "AB", "AB", "AC", "AC", "AC", "AC", "AC",
"AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC"
), DOY = c(294L, 295L, 296L, 297L, 298L, 299L, 300L, 301L, 302L,
303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L, 313L,
314L, 315L, 316L, 294L, 295L, 296L, 297L, 298L, 299L, 300L, 301L,
302L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 320L, 321L, 322L, 323L,
324L, 325L, 326L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L,
176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L), WDrain = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13,
0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13,
0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13,
0.13, 0.13, 0.13, 0.13, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1,
244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1,
244.1), CumET = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.34, 4.75, 5.33,
6.67, 7.41, 7.84, 8.78, 9.59, 10.47, 10.57, 11.17, 11.91, 12.07,
12.48, 12.63, 12.88, 13.8, 14.56, 15.11, 15.43, 15.86, 16.66,
17.27, 17.54, 18.21, 18.64, 18.75, 19.11, 19.2, 19.85, 20.48,
21.02, 21.32, 222, 226, 233, 241, 250, 258, 265, 269, 271, 276,
279, 281, 281, 283, 285, 288), SoilAvW = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 246.51, 245.1, 249.52, 248.68, 248.04, 247.44, 246.5,
245.69, 244.81, 244.71, 244.11, 243.37, 243.3, 242.88, 242.83,
242.58, 241.66, 241, 243.65, 243.5, 243.36, 242.65, 249.03, 250.74,
253.05, 266.21, 270.28, 279.71, 287.9, 288.84, 288.69, 288.25,
295.13, 330.2, 326, 319.5, 311.2, 302.8, 294.4, 287.7, 287.2,
285, 280.4, 278.6, 276, 282.3, 286.5, 284.1, 281.5), Runoff = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), Transp = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0.01, 0.01,
0.01, 0.01, 0.02, 0.02, 0.02, 0.02, 0.03, 0.03, 0.03, 0.03, 0.04,
0.04, 0.04, 0.04, 0.04, 0.05, 0.05, 105.7, 109.1, 114.5, 121.5,
128.9, 136.2, 141.9, 145.4, 147.1, 150.7, 152.9, 154.8, 155,
155.8, 157.9, 159.9)), .Names = c("X2code", "DOY", "WDrain",
"CumET", "SoilAvW", "Runoff", "Transp"), class = "data.frame", row.names = c(NA,
-72L))
我想要做的是创建4个新的数据框,在每个数据框中我想要以AA,AB等新列的方式打破长列。另外,在值旁边,我希望第2列的值与每个2代码一起使用。例如,AA将具有0到180的列,然后值AB将具有0到170的列值,然后是其值。此外,在列的顶部,我想为两列添加2个字母的代码。 以下是我想要的数据帧示例(例如,这是针对value1)。
AA AA AB AB AC AC
0 2 0 0.5 0 50
1 2.4 1 1.6 1 0.6
2 5 2 4.6 .. ..
3 6.7 3 2 .. ..
.. .. .. ..
.. .. 170 70
180 10
在我的部分代码之下:
setwd("C:\\.....")
my.data <- read.table("my.data.txt", header=T, na.strings = c("na" , "n/a" , "NA" , "") , stringsAsFactors = F)
for(id in my.data$2.code)
{
# here I take the columns 2 to 3 in the data, the first col is the ID that is used # to let R understand that for each ID needs to do a certain operation
data.code <- my.data[my.data$2.code == id, 2:3]
# And now my is my problems... I tried with DOBY, and other operations but I could not do what I want!!
write.table(....., "trials.txt", quote=F, col.names=FALSE, row.names= F)
}
答案 0 :(得分:2)
我建议保留你的data.frame,因为它是最合理的格式,可以在R中进一步处理。但是,这里有一些可能性,它们不能完全达到你想要的输出(不太容易),但是可能仍然有帮助。
library(reshape2)
WDrain.wide <- dcast(DF,DOY~X2code,value.var='WDrain')
library(plyr)
WDrain.strange <- dlply(DF,.(X2code),function(x) x[,2:3])
或者@ SimonO101建议:
split(DF[,-1],DF[,1])