我在R中的示例数据框看起来如下:
NAME ID SURVEY_YEAR REFERENCE_YEAR CUMULATIVE_SUM CUMULATIVE_SUM_REFYEAR
1 NAME1 47 1960 1959 -6 0
2 NAME1 47 1961 1960 -10 -6
3 NAME1 47 1963 1961 NA NA
4 NAME1 47 1965 1963 -23 -10
5 NAME2 259 2007 2004 -9 0
6 NAME2 259 2009 2007 NA NA
7 NAME2 259 2010 2009 NA NA
8 NAME2 259 2011 2010 NA NA
9 NAME2 259 2014 2011 -40 -9
我要做的是按以下方式重新排列数据框。
NAME ID time YEAR CUMULATIVE_SUM id
NAME1 47 REFERENCE_YEAR 1959 0 1
NAME1 47 SURVEY_YEAR 1960 -6 1
NAME1 47 REFERENCE_YEAR 1960 -6 2
NAME1 47 SURVEY_YEAR 1961 -10 2
NAME1 47 REFERENCE_YEAR 1961 NA 3
NAME1 47 SURVEY_YEAR 1963 NA 3
NAME1 47 REFERENCE_YEAR 1963 -10 4
NAME1 47 SURVEY_YEAR 1965 -23 4
NAME2 259 REFERENCE_YEAR 2004 0 5
NAME2 259 SURVEY_YEAR 2007 -9 5
NAME2 259 REFERENCE_YEAR 2007 NA 6
NAME2 259 SURVEY_YEAR 2009 NA 6
NAME2 259 REFERENCE_YEAR 2009 NA 7
NAME2 259 SURVEY_YEAR 2010 NA 7
NAME2 259 REFERENCE_YEAR 2010 NA 8
NAME2 259 SURVEY_YEAR 2011 NA 8
NAME2 259 REFERENCE_YEAR 2011 -9 9
NAME2 259 SURVEY_YEAR 2014 -40 9
我试图通过以下方式做到这一点,但这不是我想要的。
# read in data
data <- data.frame(NAME=c("NAME1", "NAME1","NAME1","NAME1","NAME2","NAME2","NAME2","NAME2","NAME2"),
ID=c(47,47,47,47,259,259,259,259,259),
SURVEY_YEAR=c(1960,1961,1963,1965,2007,2009,2010,2011,2014),
REFERENCE_YEAR=c(1959,1960,1961,1963,2004,2007,2009,2010,2011),
CUMULATIVE_SUM=c(-6,-10,NA,-23,-9,NA,NA,NA,-40),
CUMULATIVE_SUM_REFYEAR=c(0,-6,NA,-10,0,NA,NA,NA,-9))
# reshape data
dat3 <- reshape(data, direction="long",
varying = list(c("SURVEY_YEAR","REFERENCE_YEAR"), c("CUMULATIVE_SUM","CUMULATIVE_SUM_REFYEAR")),
v.names=c("YEAR","CUMULATIVE_SUM"), times = c("SURVEY_YEAR", "REFERENCE_YEAR"))
有没有人知道如何正确地重塑数据帧,以便它对应于上面所需的解决方案?感谢。
答案 0 :(得分:1)
只需重新排序您已有的数据
dat3 <- dat3[with(dat3, order(NAME, ID, YEAR, id)), ]
或者如果你想要高性能
library(data.table)
setkey(setDT(dat3), NAME, ID, YEAR, id)
答案 1 :(得分:0)
这样的事情应该有效
data["id"]<- c(1,2,3,4,5,6,7,8,9)
sursub<-data # make subset
refsub<-data # make subset
sursub["time"]<-"SURVEY_YEAR" #add column
sursub["REFERENCE_YEAR"]<- 0 #remove reference_year
refsub["time"]<- "REFERENCE_YEAR" #add column
refsub["SURVEY_YEAR"]<- 0 #remove survey_year
merge<-rbind(sursub, refsub) #merge two subsets back together
sorted<- merge[with(merge, order(id)), ] #sort based on id column
sorted["year"]<-sorted[,3]+sorted[,4] #make year column with the observation year
这只是删除不需要的列的问题。
答案 2 :(得分:0)
或者您可以使用dplyr
library(dplyr)
library(tidyr)
data%>%
gather(time, YEAR, 3:4)%>%
mutate(CUMULATIVE_SUM=ifelse(time=="SURVEY_YEAR", CUMULATIVE_SUM, CUMULATIVE_SUM_REFYEAR)) %>%
arrange(NAME, ID, YEAR, row_number()) %>%
select(c(1:2, 5:6,3)) %>%
head(4)
# NAME ID time YEAR CUMULATIVE_SUM
#1 NAME1 47 REFERENCE_YEAR 1959 0
#2 NAME1 47 SURVEY_YEAR 1960 -6
#3 NAME1 47 REFERENCE_YEAR 1960 -6
#4 NAME1 47 SURVEY_YEAR 1961 -10
答案 3 :(得分:0)
尝试:
ddf2 = data.frame(NAME=character(), ID=numeric(), time=character(), YEAR=numeric(), CUMULATIVE_SUM=numeric(), id=numeric(), stringsAsFactors=F)
for (r in 1:nrow(data)){
ddf2[nrow(ddf2)+1,]=c(as.character(data[r,1]), data[r,2], "REFERENCE_YEAR", data[r,4], data[r,6], rownames(data)[r] )
ddf2[nrow(ddf2)+1,]=c(as.character(data[r,1]), data[r,2], "SURVEY_YEAR", data[r,3], data[r,5], rownames(data)[r] )
}
ddf2
NAME ID time YEAR CUMULATIVE_SUM id
1 NAME1 47 REFERENCE_YEAR 1959 0 1
2 NAME1 47 SURVEY_YEAR 1960 -6 1
3 NAME1 47 REFERENCE_YEAR 1960 -6 2
4 NAME1 47 SURVEY_YEAR 1961 -10 2
5 NAME1 47 REFERENCE_YEAR 1961 <NA> 3
6 NAME1 47 SURVEY_YEAR 1963 <NA> 3
7 NAME1 47 REFERENCE_YEAR 1963 -10 4
8 NAME1 47 SURVEY_YEAR 1965 -23 4
9 NAME2 259 REFERENCE_YEAR 2004 0 5
10 NAME2 259 SURVEY_YEAR 2007 -9 5
11 NAME2 259 REFERENCE_YEAR 2007 <NA> 6
12 NAME2 259 SURVEY_YEAR 2009 <NA> 6
13 NAME2 259 REFERENCE_YEAR 2009 <NA> 7
14 NAME2 259 SURVEY_YEAR 2010 <NA> 7
15 NAME2 259 REFERENCE_YEAR 2010 <NA> 8
16 NAME2 259 SURVEY_YEAR 2011 <NA> 8
17 NAME2 259 REFERENCE_YEAR 2011 -9 9
18 NAME2 259 SURVEY_YEAR 2014 -40 9