我想使用以下窗口遍历数据框Out
:
在每个增量处,应在窗口上运行以下规则:
if (mean(Speed_out) <= 0.152682)
Behaviour <- Lying
else if (Movement_Out == “left”) <= 20.8 && (mean(Speed_Out) >=
0.200921)
Behaviour <- Grazing
如果没有满足任何规则,那么窗口应该一次增长一个增量,直到满足规则。
符合规则后,所有之前的增量都应使用分配给上述规则的Behaviour
进行标记。
然后,下一个窗口应从最后一个窗口终止后的下一个元素开始。
初始窗口大小应该是可调整的(开始时和每个终止窗口后的窗口大小)。
备注:
单位(Movement_Out == “left”) <= 20.8
表示如果"left"
占据的窗口少于20.8%。
示例:
以下是我从下面提供的数据中输出的简短示例,其中起始窗口大小设置为4
:
Speed_Out Movement_Out Behaviour
1 0.220 left Lying
2 0.155 left Lying
3 0.120 forward Lying
4 0.090 non-moving Lying <== window terminates here
5 0.125 non-moving Grazing <== new window starts here
6 0.125 non-moving Grazing
7 0.155 non-moving Grazing
8 0.340 forward Grazing
9 0.370 forward Grazing <== window terminates here
10 0.185 forward Grazing <== new window starts here
11 0.155 right Grazing
12 0.220 non-moving Grazing
13 0.220 non-moving Grazing
14 0.280 non-moving Grazing <== window terminates here
15 0.215 non-moving Grazing <== new window starts here
16 0.060 right Grazing
17 0.340 non-moving Grazing
18 0.555 forward Grazing <== window terminates here
19 0.275 right And so on..
20 0.215 forward
适合您使用的数据框
Out <- structure(list(Speed_Out = c(0.22, 0.155, 0.12, 0.09, 0.125,
0.125, 0.155, 0.34, 0.37, 0.185, 0.155, 0.22, 0.22, 0.28, 0.215,
0.06, 0.34, 0.555, 0.275, 0.215, 0.185, 0.06, 0.245, 0.31, 0.345,
0.375, 0.375, 0.87, 1.025, 0.405, 0, 0.185, 0.31, 0.155, 0.125,
0.22, 0.375, 0.345, 0.345, 0.405, 0.31, 0.34, 0.245, 0.155, 0.19,
0.22, 0.185, 0.12, 0.185, 0.155, 0.245, 0.31, 0.155, 0.155, 0.25,
0.215, 0.09, 0.06, 0.245, 0.495, 0.495, 0.34, 0.28, 0.31, 0.28,
0.25, 0.25, 0.185, 0.155, 0.25, 0.28, 0.28, 0.34, 0.215, 0.125,
0.155, 0.34, 0.34, 0.09, 0.59, 1.71, 1.18, 0.185, 0.215, 0.185,
0.185, 0.155, 0.19, 0.19, 0.19, 0.87, 2.045, 2.73, 1.585, 0.22,
0.25, 0.435, 0.405, 0.405, 0.405, 0.715, 0.62, 0.37, 0.4, 0.185,
0.375, 0.59, 0.525, 0.245, 0.495, 0.495, 0.68, 0.775, 0.25, 0.31,
0.34, 0.28, 0.28, 0.25, 1.55, 2.695, 1.705, 1.21, 0.87, 0.25,
1.52, 1.52, 0.405, 0.81, 2.08, 2.915, 1.705, 0.435, 0.22, 0.78,
1.215, 0.84, 0.495, 0.495, 0.56, 0.375, 0.28, 0.715, 1.025, 0.495,
0.65, 1.18, 1.09, 0.995, 0.87, 0.435, 0.125, 0.435, 0.555, 0.775,
1.12, 1.555, 1.15, 0.25, 0.87, 0.93, 0.28, 0.31, 0.31, 0.375,
0.78, 0.655, 0.53, 0.62, 0.525, 0.37, 0.555, 1.025, 0.655, 1.12,
1.585, 0.715, 0.155, 0.28, 1.12, 2.11, 1.645, 0.715, 0.465, 0.84,
0.81, 0.655, 0.84, 0.435, 0.28, 0.215, 0.93, 1.335, 0.65, 0.185,
0.155, 0.34, 0.4, 0.37, 0.435, 0.405, 0.28, 0.28, 0.25, 0.25,
0.745, 1.24, 0.805, 1.055, 1.085, 0.465, 0.375, 0.5, 0.59, 0.37,
0.185, 0.34, 0.37, 0.435, 0.405, 0.06, 0.125, 0.25, 0.31, 0.405,
0.78, 0.56, 0.215, 0.495, 0.87, 1.025, 0.62, 0.405, 0.405, 0.405,
0.31, 0.215, 0.465, 0.435, 0.34, 0.275, 0.215, 0.25, 0.22, 0.22,
0.125, 0.245, 0.34, 0.31, 0.37, 0.31, 0.31, 0.245, 0.185, 0.25,
0.22, 0.22, 0.31, 0.28, 0.22, 0.28, 0.53, 0.655, 0.375, 0.19,
0.405, 0.435, 0.28, 0.215, 0.77, 0.96, 1.865, 1.83, 0.495, 0.655,
1.615, 1.395, 0.31, 0.31, 0.25, 0.28, 0.34, 0.34), Movement_Out = structure(c(2L,
2L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
1L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 3L, 2L, 4L, 1L, 2L, 1L, 3L, 3L,
1L, 3L, 2L, 4L, 3L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 1L, 1L, 2L, 4L, 1L, 2L,
4L, 3L, 3L, 4L, 3L, 3L, 2L, 4L, 2L, 1L, 2L, 4L, 4L, 2L, 4L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 3L,
2L, 2L, 3L, 1L, 2L, 4L, 3L, 4L, 2L, 3L, 1L, 4L, 4L, 3L, 1L, 2L,
1L, 1L, 4L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 2L, 4L, 1L, 1L,
2L, 4L, 2L, 4L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 4L,
2L, 1L, 1L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 4L, 1L, 1L, 4L, 2L,
4L, 4L, 3L, 4L, 4L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 4L, 2L, 2L,
4L, 1L, 2L, 2L, 4L, 4L, 4L, 2L, 2L, 1L, 4L, 4L, 2L, 3L, 1L, 2L,
2L, 4L, 4L, 1L, 2L, 4L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 1L, 1L,
2L, 1L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 4L, 2L,
4L, 2L, 4L, 3L, 1L, 4L, 1L, 1L, 2L, 4L, 2L, 1L, 4L, 1L, 4L, 3L,
2L, 3L, 2L, 4L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 2L, 4L, 2L, 3L, 2L, 1L, 4L, 3L, 2L, 4L, 4L, 2L, 4L, 1L, 1L,
2L, 2L, 4L, 1L, 2L, 4L, 2L, 4L, 3L, 4L), .Label = c("forward",
"left", "non-moving", "right"), class = "factor")), .Names = c("Speed_Out",
"Movement_Out"), row.names = c(NA, 283L), class = "data.frame")
答案 0 :(得分:1)
好的,我不得不说这比我想象的要小得多。我的答案很丑陋,很可能不是最佳的,但似乎有效。
似乎有一些地方即使考虑到其他数据,也没有满足任何条件,所以那些人的行为都停留在NA。
library(dplyr)
# Create id variable used to join results later
Out <- Out %>%
mutate(id=row_number())
# Initial window size
window_size <- 4
# Initialize variables used in loop
w <- window_size
i<-1
window_cnt<-1
out_behaviour <- data.frame(id=as.numeric(), Behaviour=as.character(), stringsAsFactors = FALSE)
while (i <= NROW(Out)){
print(paste0("Row: ", i, ", Window Size: ", w))
df <- Out[i:(i+w-1),] %>%
mutate(mean_sp=mean(Speed_Out),
mvmt=sum(ifelse(Movement_Out=="left",1 ,0))/NROW(.)) %>%
mutate(Behaviour=case_when(mean_sp <= 0.152682 ~ "Lying",
mvmt <= 0.208 & mean_sp >= 0.200921 ~ "Grazing",
TRUE ~ as.character(NA)),
window_nr=window_cnt)
if (!all(is.na(df$Behaviour))){
i<-w+i
w<-window_size
out_behaviour <- rbind(out_behaviour, df %>% select(id, Behaviour, window_nr))
window_cnt<-window_cnt+1
} else {
if (w<=NROW(Out)-i){
w<-w+1
} else {
w<-window_size
i<-i+1
}
}
rm(df)
}
# Join Behaviour column bacl to original data frame
Out <- left_join(Out, out_behaviour, by="id") %>% select(-id)
# Clean up workspace
rm(i, w, window_size, window_cnt, out_behaviour)
前20个输出
Speed_Out Movement_Out Behaviour window_nr
1 0.220 left Lying 1
2 0.155 left Lying 1
3 0.120 forward Lying 1
4 0.090 non-moving Lying 1
5 0.125 non-moving Grazing 2
6 0.125 non-moving Grazing 2
7 0.155 non-moving Grazing 2
8 0.340 forward Grazing 2
9 0.370 forward Grazing 2
10 0.185 forward Grazing 3
11 0.155 right Grazing 3
12 0.220 non-moving Grazing 3
13 0.220 non-moving Grazing 3
14 0.280 non-moving Grazing 3
15 0.215 non-moving Grazing 4
16 0.060 right Grazing 4
17 0.340 non-moving Grazing 4
18 0.555 forward Grazing 4
19 0.275 right Grazing 5
20 0.215 forward Grazing 5
我知道代码很乱,所以如果需要额外的评论,请告诉我。