我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。所以这是我的问题。我有一个数据框,其中包含日期和几个实体的名称:
df <- data.frame(
time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2),
name=c(rep("a",24),rep("b",24))
)
str(df)
'data.frame': 48 obs. of 2 variables:
$ time: Date, format: "2004-01-01" "2004-02-01" ...
$ name: Factor w/ 2 levels "a","b": 1 1 1 1 1 1 1 1 1 1 ...
我还有另一个dataframe
有几个不均匀的事件:
events <- data.frame(
time = c("2004-12-1", "2005-8-1", "2005-6-1", "2004-4-1"),
event = c("normal", "extraordinary", "normal", "extraordinary"),
name = c("a", "a", "b", "b")
)
我希望合并这两个数据框的方式是从数据集的开头到事件分配event
或从< / em>直到下一个事件或数据集结束的最后一个事件。这看起来像是:
date name event
2004-01-01 a normal
2004-01-02 a normal
...
2004-12-01 a extraordinary
2005-01-01 a extraordinary
在R
中我是否有一种简单的方法可以做到这一点,我看不到或者手动合并这些内容?非常感谢你的帮助!
答案 0 :(得分:1)
我不知道有什么功能可以做到这一点,但这里有一些R代码可以自己做:
# Needed type coercions (Date for comparisons, characters to avoid 'factor' problems)
events$time <- as.Date(events$time)
events$event <- as.character(events$event)
events$name <- as.character(events$name)
df$name <- as.character(df$name)
# Events ordering (needed to detect previous events as non NA)
events <- events[ order(events$time) ,]
# Updates
df$event = NA
for(i in 1:nrow(events)) {
# Update where time is lesser than the limit, if names correspond and if an event was not already assigned to the row
df[ df$time <= events[i,"time"] & df$name == events[i,"name"] & is.na(df$event) , "event" ] = events[i,"event"]
}
答案 1 :(得分:1)
这是一个完成你想要的功能:
event.aligning <- function(time.dataframe, events){
if(!class(events[["time"]]) == 'Date'){
events[["time"]] <- as.Date(events[["time"]])
}
## lets sort on time
events <- events[order(events[["time"]]),]
## setup event column
time.dataframe$event <- NA
time.dataframe$event <- as.factor(time.dataframe$event)
levels(time.dataframe$event) <- event.types
rownames.tdf <- rownames(time.dataframe)
res.time.dataframe <- NULL
for( i in 1:length(levels(events$name))){
i.name <- levels(events$name)[i]
i.name.events <- subset(events, name == i.name)
first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
first.event <- i.name.events$time[1]
## assume 2 events
first.event.type <- i.name.events$event[1]
second.event.type <- unique(i.name.events$event[i.name.events$event != first.event.type])
event.types <- levels(i.name.events$event)
sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
rownames(sub.time.df) <- 1:length(sub.time.df[,1])
sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type
cur.event <- first.event
for( j in 2:length(i.name.events[,1])){
next.event <- i.name.events$time[j]
sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) :
(as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
cur.event <- next.event
next.event.type = i.name.events$event[j]
}
last.time <- sub.time.df$time[length(sub.time.df$time)]
last.event <- i.name.events$time[length(i.name.events$time)]
sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
}
rownames(res.time.dataframe) <- rownames.tdf
return(res.time.dataframe)
}
df2 <- event.aligning(df, events)