我正在尝试使用dplyr按组过滤数据帧,其中捕获了字符串'ReadingOnset'在一行中的首次出现,并将其和所有后续行传递到新的数据帧中。
Text_Stimuli <- structure(list(Name = c("Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1"
), StimulusName = c("GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20"), StimuliBlock = c("Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4"), Reading_Onset = c("", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "ReadingOnset", "", "", "", "",
"", "", "")), row.names = c(NA, -101L), vars = c("Name", "StimulusName",
"StimuliBlock"), drop = TRUE, indices = list(0:100), group_sizes = 101L, biggest_group_size = 101L, labels = structure(list(
Name = "Innocent Subject 15", StimulusName = "GenLie20",
StimuliBlock = "Block_4"), row.names = c(NA, -1L), class = "data.frame", vars = c("Name",
"StimulusName", "StimuliBlock"), drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
这是我一直尝试开始工作的解决方案类型的一个例子,但没有成功。
Test <- Text_Stimuli %>%
group_by(Name, StimulusName, StimuliBlock)%>%
filter(!lead(cumsum(grepl("ReadingOnset", Reading_Onset)), default = 0))
如您所见,我正在尝试按Name
,StimulusName
和StimuliBlock
进行分组。然后,我试图在列Reading_Onset
中找到“ ReadingOnset”的第一个匹配项,并从中返回所有前导行(包括带有“ ReadingOnset”的行)。
我一直在尝试使该解决方案适应问题的反面:https://stackoverflow.com/a/37922522/2653210
答案 0 :(得分:2)
您可以尝试使用数据集进行此操作:
library(dplyr)
library(stringr)
library(zoo)
df %>% filter(ifelse(str_detect(Reading_Onset,"ReadingOnset"),TRUE,NA) %>%
na.locf(na.rm=FALSE))
## A tibble: 8 x 4
## Groups: Name, StimulusName, StimuliBlock [1]
# Name StimulusName StimuliBlock Reading_Onset
# <chr> <chr> <chr> <chr>
#1 Innocent Subject 15 GenLie20 Block_4 ReadingOnset
#2 Innocent Subject 15 GenLie20 Block_4 ""
#3 Innocent Subject 15 GenLie20 Block_4 ""
#4 Innocent Subject 15 GenLie20 Block_4 ""
#5 Innocent Subject 15 GenLie20 Block_4 ""
#6 Innocent Subject 15 GenLie20 Block_4 ""
#7 Innocent Subject 15 GenLie20 Block_4 ""
#8 Innocent Subject 15 GenLie20 Block_4 ""
答案 1 :(得分:1)
我无法确定您是否想要包括ReadingOnset之前的内容,还是想要包括ReadingOnset的所有内容,所以我将同时显示两者。
之前和包括的所有内容:
library(dplyr)
Text_Stimuli %>%
filter(row_number() <= which(grepl("ReadingOnset", Reading_Onset)))
包括以下所有内容:
Text_Stimuli %>%
filter(row_number() >= which(grepl("ReadingOnset", Reading_Onset)))
我们正在做的只是基于找到“ ReadingOnset”的行号进行过滤。