将数据分配给具有不均匀间隔事件的日期向量

时间:2011-12-23 11:26:44

标签: r

我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。所以这是我的问题。我有一个数据框,其中包含日期和几个实体的名称:

 df <- data.frame(
       time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2),
       name=c(rep("a",24),rep("b",24))
  )
  str(df)

  'data.frame':   48 obs. of  2 variables:
   $ time: Date, format: "2004-01-01" "2004-02-01" ...
   $ name: Factor w/ 2 levels "a","b": 1 1 1 1 1 1 1 1 1 1 ...

我还有另一个dataframe有几个不均匀的事件:

   events <- data.frame(
          time = c("2004-12-1", "2005-8-1", "2005-6-1", "2004-4-1"),
          event = c("normal", "extraordinary", "normal", "extraordinary"),
          name = c("a", "a", "b", "b")
   )

我希望合并这两个数据框的方式是从数据集的开头到事件分配event从< / em>直到下一个事件或数据集结束的最后一个事件。这看起来像是:

    date      name  event
   2004-01-01  a     normal
   2004-01-02  a     normal 
      ...
   2004-12-01  a     extraordinary
   2005-01-01  a     extraordinary

R中我是否有一种简单的方法可以做到这一点,我看不到或者手动合并这些内容?非常感谢你的帮助!

2 个答案:

答案 0 :(得分:1)

我不知道有什么功能可以做到这一点,但这里有一些R代码可以自己做:

# Needed type coercions (Date for comparisons, characters to avoid 'factor' problems)
events$time <- as.Date(events$time)
events$event <- as.character(events$event)
events$name <- as.character(events$name)
df$name <- as.character(df$name)

# Events ordering (needed to detect previous events as non NA)
events <- events[ order(events$time) ,]

# Updates
df$event = NA
for(i in 1:nrow(events)) {
    # Update where time is lesser than the limit, if names correspond and if an event was not already assigned to the row
    df[ df$time <= events[i,"time"] & df$name == events[i,"name"] & is.na(df$event) , "event" ] = events[i,"event"]
}

答案 1 :(得分:1)

这是一个完成你想要的功能:

event.aligning <- function(time.dataframe, events){
  if(!class(events[["time"]]) == 'Date'){
    events[["time"]] <- as.Date(events[["time"]])
  }
  ## lets sort on time
  events <- events[order(events[["time"]]),]

  ## setup event column
  time.dataframe$event <- NA 
  time.dataframe$event <- as.factor(time.dataframe$event)
  levels(time.dataframe$event) <- event.types
  rownames.tdf <- rownames(time.dataframe)

  res.time.dataframe <- NULL
  for( i in 1:length(levels(events$name))){
    i.name <- levels(events$name)[i]

    i.name.events <- subset(events, name == i.name)

    first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
    first.event <- i.name.events$time[1]

    ## assume 2 events
    first.event.type <- i.name.events$event[1]
    second.event.type <- unique(i.name.events$event[i.name.events$event !=     first.event.type])

    event.types <- levels(i.name.events$event)

    sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
    rownames(sub.time.df) <- 1:length(sub.time.df[,1])

    sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type

    cur.event <- first.event
    for( j in 2:length(i.name.events[,1])){
      next.event <- i.name.events$time[j]
      sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) : 
        (as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
      cur.event <- next.event
      next.event.type = i.name.events$event[j]
    }    
    last.time <- sub.time.df$time[length(sub.time.df$time)]
    last.event <- i.name.events$time[length(i.name.events$time)]   
    sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
    res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
  }
  rownames(res.time.dataframe) <- rownames.tdf
  return(res.time.dataframe)
}


df2 <- event.aligning(df, events)