时间上的差异取决于列

时间:2015-03-02 17:36:30

标签: r

enter link description here用于包含列'时间'的数据框格式为%Y-%m-%d%H:%M:%S我试图计算当行满足某些列要求时行之间的分钟差异。仅在数据来自同一站点,相机和一个物种时计算difftime。 行是观察,有列:SpeciesID,Site,Plot,Camera,Time。

我试过了:

site.list<-unique(data2$Site) #site list made
  species.list<-unique(data2$SpeciesID)  #species list made
  Time<-as.POSIXlt(data2$Time)
  Time<-rev (Time)

      difftime <- NULL
      for( Site in site.list ){  
        for( Camera in paste('C', 1:4, sep='') ){
                      index <- which( data2$Site == Site & data2$Camera == Camera & data2$SpeciesID==SpeciesID) 
          index2 <- order( data2[index,'Camera'], data2[index, 'Date'], data2[index, 'SpeciesID'])
          small.data <- data2[index, ][index2, ]

          i <- 2
          while( i<- dim(small.data)[1]){

           if ( small.data[i, 'SpeciesID'] == small.data[i-1, 'SpeciesID'] & 
                             small.data[i, 'Site'] == small.data[i-1,'Site'] &
                             small.data[i, 'Camera'] == small.data[i-1,'Camera']{
                               small.data<-difftime(Time[1:(length(Time)-1)] , Time[2:length(Time)])}
          foo<- rbind(difftime, small.data)
        }
      }

1 个答案:

答案 0 :(得分:0)

如果您还没有data.table

install.packages("data.table","http://cran.us.r-project.org")
library (data.table)

我以这种方式导入了数据:     dt&lt; - fread(“pathToData.csv”,key ='Camera,Site,SpeciesID')

但我正在对SO进行10次观察。如果你想要删除它们,请随意说出来。

df <- structure(list(Individual = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), Date = c(15543L, 15543L, 15543L, 15554L, 15554L, 15554L, 
15554L, 15554L, 15543L, 15543L), Image1 = c("544.1.P5_", "544.1.P5_", 
"544.1.P5_", "544.7.I1_2", "544.7.I1_2", "544.7.I1_2", "544.7.I1_2", 
"544.7.I1_2", "544.1.P5_", "544.1.P5_"), Site = c("544", "544", 
"544", "544", "544", "544", "544", "544", "544", "544"), Camera = c(1L, 
1L, 1L, 7L, 7L, 7L, 7L, 7L, 1L, 1L), Plot = c(1L, 1L, 1L, 5L, 
5L, 5L, 5L, 5L, 1L, 1L), Plot_Type = c("OnTrail", "OnTrail", 
"OnTrail", "OffTrail", "OffTrail", "OffTrail", "OffTrail", "OffTrail", 
"OnTrail", "OnTrail"), CameraID = c("P5", "P5", "P5", "I1", "I1", 
"I1", "I1", "I1", "P5", "P5"), Time = c("2012/07/22 00:31:00", 
"2012/07/22 00:31:00", "2012/07/22 00:31:00", "2012/08/02 09:09:00", 
"2012/08/02 09:09:00", "2012/08/02 09:09:00", "2012/08/02 09:09:00", 
"2012/08/02 09:09:00", "2012/07/22 00:31:00", "2012/07/22 00:31:00"
), Hour = c(0L, 0L, 0L, 9L, 9L, 9L, 9L, 9L, 0L, 0L), Minute = c(31L, 
31L, 31L, 9L, 9L, 9L, 9L, 9L, 31L, 31L), Second = c(17L, 18L, 
23L, 18L, 20L, 22L, 24L, 26L, 34L, 36L), SpeciesID = c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Common = c("Mule deer", "Mule deer", 
"Mule deer", "Mule deer", "Mule deer", "Mule deer", "Mule deer", 
"Mule deer", "Mule deer", "Mule deer"), Scientific = c("Odocoile", 
"Odocoile", "Odocoile", "Odocoile", "Odocoile", "Odocoile", "Odocoile", 
"Odocoile", "Odocoile", "Odocoile"), SpeciesID.1 = c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)), .Names = c("Individual", "Date", 
"Image1", "Site", "Camera", "Plot", "Plot_Type", "CameraID", 
"Time", "Hour", "Minute", "Second", "SpeciesID", "Common", "Scientific", 
"SpeciesID.1"), row.names = c(NA, 10L), class = "data.frame")

dt = data.table(df,key='Camera,Site,SpeciesID')

# 1) Count the number of obs per category (SpeciesID,Cameraand Site)
# 2) Convert Time into time variable (the format is optional here)
# 3) Perform the time difference where there were at least 2 observations per category
#     (SpeciesID,Cameraand Site) and compute the time difference for each category.
#     (in seconds)

dt[,n:=.N,list(SpeciesID,Camera,Site)][
,Time:=as.POSIXct(Time,format="%Y/%m/%d %H:%M:%S")][
     n>2,diffSec:=filter(Time,c(1,-1),sides=1),list(Camera,Site,SpeciesID)]

其他解决方案

 dt[,n:=.N,list(SpeciesID,Camera,Site)][
 ,Time:=as.POSIXct(Time,format="%Y/%m/%d %H:%M:%S")][
      n>2,diffSec:=c(NA,diff(Time,1)),list(Camera,Site,SpeciesID)]

您可能还想检查变量时间格式。是时候了吗?我注意到没有包括秒。