我正在使用一些时间序列(瞳孔扩大)数据,并希望根据不同的因子变量(Time
)过滤不同的时间范围(SOA
)
示例数据:
library(dplyr)
Data <- structure(list(Subject = structure(c(1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("12", "14",
"15", "16", "18", "20", "21", "22", "23", "28", "29", "30", "33",
"36", "37", "38", "40", "42", "43", "44"), class = "factor"),
SOA = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("Long SOA", "Short SOA"
), class = "factor"), Time = c(-66.68, -66.68, -66.68, -66.68,
-33.34, -33.34, -33.34, -33.34, 0, 0, 0, 0, 33.34, 33.34,
33.34, 33.34, 66.68, 66.68, 66.68, 66.68, 100.02, 100.02,
100.02, 100.02, 133.36, 133.36, 133.36, 133.36, 166.7, 166.7,
166.7, 166.7), Pcent_Chng = c(0.14391, 0.076759, -0.022377,
0.038111, 0.21093, 0.11448, -0.0047064, 0.078232, 0.27924,
0.1527, -0.0085276, 0.12385, 0.38328, 0.21299, 0.01988, 0.15626,
0.47471, 0.25357, 0.050318, 0.20517, 0.58012, 0.2888, 0.080629,
0.20616, 0.65861, 0.33622, 0.12892, 0.20832, 0.75277, 0.38181,
0.17921, 0.21789)), class = "data.frame", row.names = c(NA,
-32L), .Names = c("Subject", "SOA", "Time", "Pcent_Chng"))
我想对Time
的{{1}}期间的SOA = "Short"
期间的平均时间比对SOA = "Long"
的平均值要长。
我已经针对Type = "Word"
(filter
之前和之后的group_by
)尝试了这些内容:
Data %>%
filter(Time[SOA = "Short SOA"] >= 0 & Time[SOA = "Short SOA"] <= 100,
Time[SOA = "Long SOA"] >= 0 & Time[SOA = "Long SOA"] <= 150) %>%
group_by(Subject,SOA) %>%
summarize(Word_Avg_Pcent = mean(Pcent_Chng,na.rm=TRUE))
和
Data %>%
group_by(Subject,SOA) %>%
filter(Time[SOA = "Short SOA"] >= 0 & Time[SOA = "Short SOA"] <= 100,
Time[SOA = "Long SOA"] >= 0 & Time[SOA = "Long SOA"] <= 150) %>%
summarize(Word_Avg_Pcent = mean(Pcent_Chng,na.rm=TRUE))
两者都会产生空数据帧;列在那里,但没有数据。如果我不使用第二个过滤器,我会得到一个完整的数据框。
有没有办法在dplyr链中使用管道和过滤器来实现我想要的东西?
答案 0 :(得分:2)
如评论中所述,您需要OR |
您正在寻找的两个AND &
条件。
您的过滤器是:
filter(Time[SOA = "Short SOA"] >= 0 & Time[SOA = "Short SOA"] <= 1200,
Time[SOA = "Long SOA"] >= 0 & Time[SOA = "Long SOA"] <= 3000)
哪些语句不能评估为逻辑(例如SOA =&#34; Short SOA&#34;)。你需要做的是更明确。
您希望过滤到SOA等同于&#34; Short SOA&#34;并且时间值介于0到1200之间,或者SOA等于&#34; Long SOA&#34;并且时间值介于0到3000之间。
SOA == "Short SOA" and 0 <= Time <= 1200 OR SOA == "Long SOA" and 0 <= Time <= 1200
您可以使用between
中的dplyr
作为时间条件。
实施的是:
library(tidyverse)
Data <- eval(parse(file("http://pastebin.com/raw.php?i=VTWCVgCA")))
Data %>%
gather(Sample, Prop_Chng, X.8:X100) %>%
mutate(Sample = gsub("[.]","-", Sample)) %>%
mutate(Sample = as.numeric(gsub("X","", Sample))) %>%
mutate(Time = Sample*33.34) %>%
mutate(Pcent_Chng = Prop_Chng*100) %>%
filter(Type == "Word") %>%
filter((SOA == "Short SOA" & between(Time, 0, 1200)) | (SOA == "Long SOA" & between(Time, 0, 3000))) %>%
group_by(Subject, NsCond,Close,SOA) %>%
summarize(Word_Avg_Pcent = mean(Pcent_Chng,na.rm=TRUE))