我有鱼标签信号的数据集,我想根据游泳速度计算不同行为的持续时间,例如:静态,巡航,爆发,所以我可以计算行为状态频率。我使用for
循环完成了此操作,但在我的大型数据集上它非常慢。我确信这可以使用R的apply
功能之一来完成,但我无法弄清楚如何做到这一点。
这就是我的数据:
Period PEN SEC BLSEC BS BScount CountTF BSdur
380 7045 7 7 0.204 cruise 2 FALSE NA
381 7045 7 7 0.694 cruise 3 FALSE NA
382 7045 7 7 0.325 cruise 4 TRUE 21
383 7045 7 7 0.000 static 1 TRUE 7
384 7045 7 7 0.197 cruise 1 FALSE NA
385 7045 7 7 0.312 cruise 2 FALSE NA
386 7045 7 7 0.242 cruise 3 TRUE 21
387 7045 7 7 0.096 static 1 TRUE 7
388 7045 7 7 0.274 cruise 1 FALSE NA
389 7045 7 7 0.268 cruise 2 FALSE NA
390 7045 7 7 0.312 cruise 3 FALSE NA
391 7045 7 7 0.694 cruise 4 FALSE NA
392 7045 7 7 0.268 cruise 5 FALSE NA
SEC是标签ping之间的秒数(它并不总是7!),BLSEC是每秒的体长(即标签ping之间的鱼的标准化距离游泳)。我通过以下方式计算了BS,BScount和CountTF:
static = 0.1
cruise = 1
bsffile$BS <- ifelse(bsffile$BLSEC <= static, 'static', ifelse(bsffile$BLSEC > static & bsffile$BLSEC <= cruise, 'cruise', 'burst'))
bsffile$BScount <- sequence(rle(bsffile$BS)$lengths)
bsffile$CountTF <- c(ifelse(diff(bsffile$BScount, 1, 1) < 1, T, F), F)
BSdur是连续行为状态的SEC的总和。我用以下方法计算了它:
bssum <- 0
for (i in 1:nrow(bsffile)){
bssum <- bssum + bsffile[i, 'SEC']
if(bsffile[i, 'CountTF'] == T & is.na(bsffile[i, 'SEC']) == F){
bsffile[i,'BSdur'] <- bssum
bssum <- 0
} else {
bsffile[i,'BSdur'] <- NA
}
}
在我的数据集上运行大约需要五分钟。有什么建议我可以更快地加快速度,例如使用apply
函数之一吗?
以下是一些dput
:
structure(list(Period = c(7045, 7045, 7045, 7045, 7045, 7045,
7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045,
7045, 7045, 7045, 7045), PEN = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("7", "8"), class = "factor"), SEC = c(7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 7), BLSEC = c(0.204,
0.694, 0.325, 0, 0.197, 0.312, 0.242, 0.096, 0.274, 0.268, 0.312,
0.694, 0.268, 0.541, 0.796, 0.306, 0.089, 0.93, 0.389, 0.452,
0.917), BS = c("cruise", "cruise", "cruise", "static", "cruise",
"cruise", "cruise", "static", "cruise", "cruise", "cruise", "cruise",
"cruise", "cruise", "cruise", "cruise", "static", "cruise", "cruise",
"cruise", "cruise"), BScount = c(2L, 3L, 4L, 1L, 1L, 2L, 3L,
1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 1L, 2L, 3L, 4L), CountTF = c(FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
TRUE), BSdur = c(NA, NA, 21, 7, NA, NA, 21, 7, NA, NA, NA, NA,
NA, NA, NA, 57, 7, NA, NA, NA, 28)), row.names = 380:400, .Names = c("Period",
"PEN", "SEC", "BLSEC", "BS", "BScount", "CountTF", "BSdur"
), class = "data.frame")
答案 0 :(得分:2)
轻松data.table
library(data.table)
setDT(bsffile)
bsffile[,BSdur:=ifelse(CountTF==T,sum(SEC),0),by=.(rleid(BS))]
答案 1 :(得分:0)
我们可以使用ave
base R
执行此操作
df1$BSdur <- with(df1, ave(SEC, cumsum(c(TRUE, BS[-1]!= BS[-nrow(df1)])), FUN = sum)*CountTF)
df1$BSdur
#[1] 0 0 21 7 0 0 21 7 0 0 0 0 0 0 0 57 7 0 0 0 28