我有一个数据框,Phys。它包含一列时间和两列其他变量,如下所示: Data frame "Phys"
在某个时间点,两个变量达到某个阈值(例如,etatg> 0.5,etco2> 2.5)。我需要报告这些值在这些阈值之上的初始时间至少为以下9个元素(持续90秒)。我正在寻找最有效的方法来测试"以下9个要素,看它们是否符合标准。
我目前有以下代码:
#Find all instances of relevant heuristic
tempalgEval = which(Phys$etagt > 0.5 & Phys$etco2>2.5)
#Reduce tempalgEval by length 9 to avoid index error when searching data frame
tempalgEval = head(tempalgEval, length(tempalgEval)-9)
if (length(tempalgEval) < 9) {
algEval = tempalgEval
} else{
for (m in tempalgEval) {
if ((
Phys$etagt[m + 1] > 0.5 &
Phys$etagt[m + 2] > 0.5 &
Phys$etagt[m + 3] > 0.5 &
Phys$etagt[m + 4] > 0.5 &
Phys$etagt[m + 5] > 0.5 &
Phys$etagt[m + 6] > 0.5 &
Phys$etagt[m + 7] > 0.5 &
Phys$etagt[m + 8] > 0.5 &
Phys$etagt[m + 9] > 0.5
) |
(
Phys$etco2[m + 1] > 2.5 &
Phys$etco2[m + 2] > 2.5 &
Phys$etco2[m + 3] > 2.5 &
Phys$etco2[m + 4] > 2.5 &
Phys$etco2[m + 5] > 2.5 &
Phys$etco2[m + 6] > 2.5 &
Phys$etco2[m + 7] > 2.5 &
Phys$etco2[m + 8] > 2.5 & Phys$etco2[m + 9] > 2.5
)) {
algEval = tempalgEval
}
}
}
if(length(algEval) > 0){
algTime = min(Phys$time[algEval], na.rm=T)
}else{
algTime = NA
}
提前谢谢。
编辑:最小工作数据集
structure(
list(
time = c(
1070,
1080,
1090,
1100,
1110,
1120,
1130,
1160,
1170,
1180,
1190,
1200,
1210,
1220,
1230,
1240,
1250,
1260,
1270,
1280,
1290,
1300,
1310,
1320,
1330,
1340,
1350,
1360,
1370,
1380,
1390
),
etagt = c(
0,
0,
0,
0,
0,
0,
0,
2.92,
2.33379310344828,
1.74758620689655,
1.21689655172414,
1.18586206896552,
1.1548275862069,
1.11965517241379,
1.06793103448276,
1.01620689655172,
0.997586206896552,
1.05620689655172,
1.1148275862069,
1.16241379310345,
1.19344827586207,
1.22448275862069,
1.23655172413793,
1.22965517241379,
1.22275862068966,
1.74965517241379,
2.63241379310345,
3.5151724137931,
3.59655172413793,
3.33448275862069,
3.07241379310345
),
etco2 = c(
0,
0.871379310344828,
2.11620689655172,
3.36103448275862,
2.61413793103448,
1.36931034482759,
0.124482758620689,
0,
1.5448275862069,
3.08965517241379,
4.49379310344828,
4.63172413793103,
4.76965517241379,
4.92620689655172,
5.15724137931034,
5.38827586206897,
5.53551724137931,
5.48724137931034,
5.43896551724138,
5.37551724137931,
5.28931034482759,
5.20310344827586,
5.16,
5.16,
5.16,
4.15034482758621,
2.46758620689655,
0.784827586206896,
1.56896551724138,
3.41034482758621,
5.25172413793103
)
),
.Names = c("time",
"etagt", "etco2"),
row.names = c(
108L,
109L,
110L,
111L,
112L,
113L,
114L,
117L,
118L,
119L,
120L,
121L,
122L,
123L,
124L,
125L,
126L,
127L,
128L,
129L,
130L,
131L,
132L,
133L,
134L,
135L,
136L,
137L, 138L, 139L, 140L), class = "data.frame")
答案 0 :(得分:0)
您可以按照以下方式执行此操作:
require(data.table)
setDT(dat)
# tr := both Threshold Reached
dat[, tr:=etagt>0.5 & etco2 > 2.5]
# Get grouping variable - in case have a look at ?rleid
dat[, run := rleid(tr)]
# Get indices where run was long enough
# 10 means the first one and the 9 following were > threshold
ind <- dat[,.N, run][N>=10] # For >=9 you would get 2 matches
# Get the first timeing per run
dat[ind, on="run", mult="first"]
这给了你:
time etagt etco2 tr run N
1: 1180 1.747586 3.089655 TRUE 2 17
要查看最新情况,请查看dat
,dat[,.N, run]
和ind