Question

我的数据看起来或多或少像这样（在列车中是汽车ID，因此每行代表一辆汽车，并且其中的时段是活动的）：

structure(list(car = c(100987, 100995, 104641, 120350, 123444, 128248, 128253, 129537, 129612, 130189), 
  location = structure(c(1L, 1L, 1L, 2L, 3L, 4L, 3L, 3L, 3L, 3L), .Label = c("A", "A", "B", "D", "C", "C"), class = "factor"), 
  start = structure(c(12784, 12784, 365, 15027, 15951, -1096, 7305, 365, 365, -731), class = "Date"), 
  end = structure(c(16070, 16070, 16070, 16070, 16070, 13725, 16070, 16070, 16070, 16070), class = "Date")), 
 .Names = c("car", "location", "start", "end"), row.names = c(NA, 10L), class = "data.frame")

我想获得如下数据框：

output=data.frame(location=rep(c("A","B","C"),each=2),year=rep(2000:2001,3),n_cars=10:15)

所以我想根据位置计算一年多来的汽车数量（比如2000：2013）。一辆汽车只计算在那年的12月31日那里。我想为每个条目制作一辆汽车活动期间的年份列表，然后计算包含该年份的列表项目。但是我的数据框架太大了，无法使其成为一种有效的方法。我怎么做到最好？这似乎很容易，但我无法理解。

Answer 1

这样的东西？

library(lubridate)
library(plyr)
library(gdata)

# dat is the name of your original data set structure

yeardf <- data.frame()

for(i in 1:nrow(dat)){
    if(yday(dat$end[i]) > 364)
       years_active <- paste(seq(year(dat$start)[i], year(dat$end)[i]))
    else
       years_active <- paste(seq(year(dat$start)[i], year(dat$end)[i]-1))
    car <- paste(rep(dat$car[i], length(years_active)))
    location <- as.character(rep(dat$location[i], length(years_active)))
    df <- data.frame(car = car, location = location, year = years_active)
    yeardf <- rbind(yeardf, df)
}

output <- ddply(yeardf, .(location, year), 'nrow')
output <- rename.vars(output, 'nrow', 'n_cars')

计算多个时期之间一年的时间

1 个答案: