我想从传感器数据中识别出零件,并给它们一个ID。因此,我想按“传感器”列对以下数据集进行分组,并查看“值”行是否从0切换为1。确定第一行时,caseid切换为1(如手工列caseid一样)。只要值保持为1,它就保持为1。当它变为0时,应切换回0。 在下一次从0切换到1的情况下,caseid应该变为2,因为第二部分被传感器识别,依此类推。
time = c("07:00:01","07:00:01","07:00:01","07:00:02","07:00:02","07:00:02","07:00:03","07:00:03","07:00:03","07:00:04",
"07:00:04","07:00:04","07:00:05","07:00:05","07:00:05","07:00:06","07:00:06","07:00:06","07:00:07","07:00:07",
"07:00:07","07:00:08","07:00:08","07:00:08","07:00:09","07:00:09","07:00:09")
sensor = c(10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,
10001,10002,10003,10001,10002,10003,10001,10002,10003)
values = c(0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1)
caseid = c(0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,2,0,1,2,0,1)
data = data.frame(time,sensor,values,caseid)
(所以data $ caseid是我想要得到的)
我认为可以通过一个小组以某种方式实现这一目标,但是我做得不好,因此我选择了另一种(草率的)方法。那就是我得到的。
data%>%
filter(Sensor=="10002") -> sensor_data_temp
sensor_data_temp$CaseID2 <- NA
case_id = 1
for(i in 1:nrow(sensor_data_temp)){
current_value <- sensor_data_temp[i,"values"]
next_value <- sensor_data_temp[i+1,"values"]
if(i+1 > nrow(sensor_data_temp)){
break
}
if(current_value==0 & next_value==1 || current_value==1 & next_value==1){
sensor_data_temp$CaseID2[i+1] <- case_id
}
else if(current_value==1 & next_value==0){
sensor_data_temp$CaseID2[i+1] <- 0
case_id = case_id +1
}
else{
sensor_data_temp$CaseID2[i+1] <- 0
}
}
我认为这就是我如何获取一个传感器的Caseid的方法。但是我不知道如何将每个传感器都放入一个数据帧(如上一个)
我确信有一种更加优雅的方式来获得我想要的东西。
我希望有人能帮助我。。在此先感谢! :)
答案 0 :(得分:3)
这是一种方法:
library(dplyr)
mutate(group_by(arrange(data, sensor, time), sensor),
caseID = case_when(values != 0 ~ cumsum(diff(c(0, values)) > 0),
TRUE ~ 0L))
答案 1 :(得分:1)
这是data.table
library("data.table")
data <- data.table(
time = c("07:00:01","07:00:01","07:00:01","07:00:02","07:00:02","07:00:02","07:00:03","07:00:03","07:00:03","07:00:04",
"07:00:04","07:00:04","07:00:05","07:00:05","07:00:05","07:00:06","07:00:06","07:00:06","07:00:07","07:00:07",
"07:00:07","07:00:08","07:00:08","07:00:08","07:00:09","07:00:09","07:00:09"),
sensor = c(10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,10001,10002,10003,
10001,10002,10003,10001,10002,10003,10001,10002,10003),
values = c(0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1),
caseid = c(0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,2,0,1,2,0,1))
data[, caseID:=ifelse(values==0, 0, cumsum(diff(c(0, values))==1)), sensor][]
且没有ifelse()
:
data[, caseID:= { v <- rep(0, .N); v[values==1] <- cumsum(diff(c(0, values))==1)[values==1]; v }, sensor][]