在R的dplyr中使用mutate和case_when使用shift:未按预期工作

时间:2018-09-16 11:24:58

标签: r dataframe dplyr mutate

我的数据显示瞳孔大小发生了变化。当值为-1时,表示闪烁。我已经编写了一些代码来检测眨眼开始和偏移,但是使用shift函数时遇到了一些问题。

我的数据示例:

library(dplyr)
DataFrame<-structure(list(Pupil_Avg = c(7.174, 6.6910005, 6.518, 2.461, 
                                    2.182, 1.942, 1.942, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
                                    -1, -1, -1, -1, -1, -1, -1, 1.487, -1, -1, -1, -1, 2.202, 2.202, 
                                    2.281, 2.344)), row.names = c(NA, -481L), class = c("tbl_df", 
                                                                                        "tbl", "data.frame"))

我对该数据执行的操作:

DataFrame$BLINK_IDENTIFICATION <- ""

# set an arbitrary decrease value in pupil size
Pupil_Constriction = 3

DataFrame<-DataFrame %>%
  # removed the columns below as they're not necessary to work on the problem.
  #group_by(StimulusName, Name, StimuliBlock) %>%
  # if there is a reduction in pupil size of the value in Pupil_Constriction in the current row add "Blink Onset"
  mutate(BLINK_IDENTIFICATION = case_when((DataFrame$Pupil_Avg <= (shift(DataFrame$Pupil_Avg, 1L, type="lag")-Pupil_Constriction)) ~ "Blink Onset",
                                          # The next line of code is supposed to check the last row and if "Blink Onset" is contained in the previous row in BLINK_IDENTIFICATION AND the current Pupil_Avg value is NOT equal to -1, then the write "Blink Onset" to the current BLINK_IDENTIFICATION row 
                                          ( (shift(DataFrame$BLINK_IDENTIFICATION, 1L, type="lag")=="Blink Onset") & (DataFrame$Pupil_Avg != -1) ) ~ "Blink Onset",
# the next line of code write "Blink Offset" if previous row was -1, current is greater than -1, and the next row is NOT -1
                                          ( (shift(DataFrame$Pupil_Avg, 1L, type="lag")==-1) & (DataFrame$Pupil_Avg >-1) & (shift(DataFrame$Pupil_Avg, 1L, type="lead")!=-1)) ~ "Blink Offset",
# the next line write "Eye Closed" if current row equals -1
                                          (DataFrame$Pupil_Avg==-1) ~ "Eye Closed"))

我试图根据Pupil_Avg中值的变化来检测眨眼的开始和偏移。我的主要问题是代码行( (shift(DataFrame$BLINK_IDENTIFICATION, 1L, type="lag")=="Blink Onset") & (DataFrame$Pupil_Avg != -1) ) ~ "Blink Onset",

该行应检查BLINK_IDENTIFICATION的前一行值,如果它等于“ Blink Onset”,而Pupil_Avg的当前值不等于-1:在当前行中将“ Blink Onset”写入BLINK_IDENTIFICATION。

运行代码时,您会发现这不起作用。我想我不了解如何正确使用shift函数,因为我认为逻辑是合理的。当然,我可能会弄错。

谢谢您的时间。

1 个答案:

答案 0 :(得分:2)

您的case语句的

3个选项可以通过dplyr完成:

DataFrame <- DataFrame %>% 
  mutate(BLINK_IDENTIFICATION = case_when(Pupil_Avg == -1 ~ "Eye Closed",
                                          Pupil_Avg <= lag(Pupil_Avg) - Pupil_Constriction ~ "Blink Onset",
                                          lag(Pupil_Avg) == -1 & Pupil_Avg > -1 & lead(Pupil_Avg) != -1 ~ "Blink Offset",
                                          TRUE ~ ""))

# A tibble: 481 x 2
   Pupil_Avg BLINK_IDENTIFICATION
       <dbl> <chr>               
 1      7.17 ""                  
 2      6.69 ""                  
 3      6.52 ""                  
 4      2.46 Blink Onset         
 5      2.18 ""                  
 6      1.94 ""                  
 7      1.94 ""                  
 8     -1    Eye Closed          
 9     -1    Eye Closed          
10     -1    Eye Closed          
# ... with 471 more rows

但是条件lag(BLINK_IDENTIFICATION) == "Blink Onset" & Pupil_Avg != -1递归依赖于先前的值(请参阅第5、6、7行)。为此,您需要一个循环。

for(i in 2:nrow(DataFrame)) {
  DataFrame$BLINK_IDENTIFICATION[i] = ifelse(DataFrame$BLINK_IDENTIFICATION[i-1]  == "Blink Onset" & DataFrame$Pupil_Avg[i] != -1, "Blink Onset", DataFrame$BLINK_IDENTIFICATION[i])
}

DataFrame
# A tibble: 481 x 2
   Pupil_Avg BLINK_IDENTIFICATION
       <dbl> <chr>               
 1      7.17 ""                  
 2      6.69 ""                  
 3      6.52 ""                  
 4      2.46 Blink Onset         
 5      2.18 Blink Onset         
 6      1.94 Blink Onset         
 7      1.94 Blink Onset         
 8     -1    Eye Closed          
 9     -1    Eye Closed          
10     -1    Eye Closed     

tail(DataFrame, 10)
# A tibble: 10 x 2
   Pupil_Avg BLINK_IDENTIFICATION
       <dbl> <chr>               
 1     -1    Eye Closed          
 2      1.49 ""                  
 3     -1    Eye Closed          
 4     -1    Eye Closed          
 5     -1    Eye Closed          
 6     -1    Eye Closed          
 7      2.20 Blink Offset        
 8      2.20 ""                  
 9      2.28 ""                  
10      2.34 ""       

但是您也可以在for循环中执行所有操作。如您所见,数据结尾处没有填充空白。在那里,您需要定义要使用它们的功能。保留它们不变,或填写它们。