我有一个数据集,我需要计算一个非常大的数据集的长度(65400行!)。当我们收集数据时,我们记录了我们的焦点动物在每分钟和第二次5分钟时间段内所做的事情。我的样本数据为
structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"), minute = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), sec = 1:11, activity = structure(c(2L,
2L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("N", "S",
"U"), class = "factor"), day_time = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "14:45", class = "factor")), .Names = c("date",
"minute", "sec", "activity", "day_time"), class = "data.frame", row.names = c(NA,
-11L))
df
date minute sec activity day_time
1 02/04/2015 1 1 S 14:45
2 02/04/2015 1 2 S 14:45
3 02/04/2015 1 3 S 14:45
4 02/04/2015 1 4 N 14:45
5 02/04/2015 1 5 U 14:45
6 02/04/2015 1 6 U 14:45
7 02/04/2015 1 7 U 14:45
8 02/04/2015 1 8 S 14:45
9 02/04/2015 1 9 S 14:45
10 02/04/2015 1 10 S 14:45
11 02/04/2015 1 11 S 14:45
我们需要计算的是每个回合的长度(以秒为单位)保持5分钟的会话分开。在这种情况下,我们所需的输出将是
structure(list(date = structure(c(1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"),
minute = c(1L, 1L, 1L, 1L), activity = structure(c(2L, 1L,
3L, 2L), .Label = c("N", "S", "U"), class = "factor"), day_time = structure(c(1L,
1L, 1L, 1L), .Label = "14:45", class = "factor"), bout_length = c(3L,
1L, 3L, 4L)), .Names = c("date", "minute", "activity", "day_time",
"bout_length"), class = "data.frame", row.names = c(NA, -4L))
desired output
date minute activity day_time bout_length
1 02/04/2015 1 S 14:45 3
2 02/04/2015 1 N 14:45 1
3 02/04/2015 1 U 14:45 3
4 02/04/2015 1 S 14:45 4
我试过(rle)没有成功,因为我必须指定不同的分钟和会话。谢谢你的帮助
答案 0 :(得分:0)
我终于顺其自然了。如果有一个很短的路可能会很好
bouts1 <- as.data.frame(lapply(df, as.character), stringsAsFactors = FALSE)
bouts1<-head(do.call(rbind, by(bouts1, df$session, rbind, "empty")), -1)
rownames(bouts1) <- seq(length=nrow(bouts1))# this renames row names in my dataframe
diffs <- bouts1$activity[-1L] != bouts1$activity[-length(bouts1$activity)]#tells us where the activity is diff
idx <- c(which(diffs), length(bouts1$activity))
bout.len<-diff(c(0, idx))
trial <- c(which(diffs=="TRUE"))#these are the row IDs that contain activities that change
new.bouts<-bouts1[row.names(bouts1) %in% trial,]# extracting the rows in the data where activity changes
temp<-bouts[65400,]#took the last observation from the main data set.
new.bouts <- rbind(new.bouts,temp)#these 2 commands I had to create another line so that we get where to put e value
new.bouts$bout.len<-bout.len
new.bouts<-new.bouts[!new.bouts$Date=="empty",]