我有这个数据框:
df<-structure(list(StimulusName = c("Alpha5", "Alpha5", "Alpha5",
"Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5",
"Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5",
"Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5",
"Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5", "Alpha5",
"Alpha5", "Alpha5"), Label = c(NA, NA, NA, NA, NA, "Onset", NA,
NA, NA, NA, NA, "Offset", NA, NA, NA, NA, NA, NA, NA, "Onset",
NA, NA, NA, NA, NA, NA, NA, NA, "Offset", NA, NA, NA, NA)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
我想在Onset
和Offset
的每个出现的行{em>之间填充Label
列中的单词'Onset'。最终的数据帧如下所示:
StimulusName Label
1 Alpha5 NA
2 Alpha5 NA
3 Alpha5 NA
4 Alpha5 NA
5 Alpha5 NA
6 Alpha5 Onset
7 Alpha5 Onset
8 Alpha5 Onset
9 Alpha5 Onset
10 Alpha5 Onset
11 Alpha5 Onset
12 Alpha5 Offset
13 Alpha5 NA
14 Alpha5 NA
15 Alpha5 NA
16 Alpha5 NA
17 Alpha5 NA
18 Alpha5 NA
19 Alpha5 NA
20 Alpha5 Onset
21 Alpha5 Onset
22 Alpha5 Onset
23 Alpha5 Onset
24 Alpha5 Onset
25 Alpha5 Onset
26 Alpha5 Onset
27 Alpha5 Onset
28 Alpha5 Onset
29 Alpha5 Offset
30 Alpha5 NA
31 Alpha5 NA
32 Alpha5 NA
33 Alpha5 NA
可以在dplyr
中使用fill()
来实现吗?
此代码显然无法正常工作,因为它会填充第一个非NA
值之后的所有内容:
Test <- df%>%
fill(df$Label)
我愿意接受任何解决方案,但希望使用dplyr
并尽可能避免循环。
答案 0 :(得分:4)
我们可以使用基数R mapply
#Get "Onset" indices
onset_ind <- which(df$Label == "Onset")
#Get "Offset" indices
offset_ind <- which(df$Label == "Offset")
#Create a sequence of indices between onset and offset and change their value
df$Label[unlist(mapply(function(x, y) x:y, onset_ind, offset_ind - 1))] <- "Onset"
df
# StimulusName Label
#1 Alpha5 <NA>
#2 Alpha5 <NA>
#3 Alpha5 <NA>
#4 Alpha5 <NA>
#5 Alpha5 <NA>
#6 Alpha5 Onset
#7 Alpha5 Onset
#8 Alpha5 Onset
#9 Alpha5 Onset
#10 Alpha5 Onset
#11 Alpha5 Onset
#12 Alpha5 Offset
#13 Alpha5 <NA>
#14 Alpha5 <NA>
#.....
此处假定“开始”和“偏移”出现的次数相同。
如果要避免不必要的变量创建(onset_ind
和offset_ind
),可以单行执行
df$Label[unlist(mapply(function(x, y) x:y,
which(df$Label == "Onset"), which(df$Label == "Offset") - 1))] <- "Onset"
答案 1 :(得分:2)
这是一种dplyr
的方式,
library(tidyverse)
df %>%
group_by(grp = cumsum(!is.na(Label))) %>%
mutate(Label = replace(Label, first(Label) == 'Onset', 'Onset')) %>%
ungroup() %>%
select(-grp)
答案 2 :(得分:1)
library(zoo) #using na.locf()
df2<-data.frame(df) # converting list to data frame
df2$Label[which((is.na(df2$Label)& !is.na(na.locf(df2)[,2]) & na.locf(df2)[,2]=="Offset")=="FALSE")]<-
na.locf(df2)[,2][which((is.na(df2$Label)& !is.na(na.locf(df2)[,2]) & na.locf(df2)[,2]=="Offset")=="FALSE")]
df2
StimulusName Label
1 Alpha5 <NA>
2 Alpha5 <NA>
3 Alpha5 <NA>
4 Alpha5 <NA>
5 Alpha5 <NA>
6 Alpha5 Onset
7 Alpha5 Onset
8 Alpha5 Onset
9 Alpha5 Onset
10 Alpha5 Onset
11 Alpha5 Onset
12 Alpha5 Offset
13 Alpha5 <NA>
14 Alpha5 <NA>
15 Alpha5 <NA>
16 Alpha5 <NA>
17 Alpha5 <NA>
18 Alpha5 <NA>
19 Alpha5 <NA>
20 Alpha5 Onset
21 Alpha5 Onset
22 Alpha5 Onset
23 Alpha5 Onset
24 Alpha5 Onset
25 Alpha5 Onset
26 Alpha5 Onset
27 Alpha5 Onset
28 Alpha5 Onset
29 Alpha5 Offset
30 Alpha5 <NA>
31 Alpha5 <NA>
32 Alpha5 <NA>
33 Alpha5 <NA>
答案 3 :(得分:0)
如何创建用户定义的函数。
for(i in 1:length(df$Label)){
if(df$Label[i]%in%"Onff"==TRUE)
{if(df$Label[i+1]%in%NA==TRUE){df$Label[i+1]<-"Onff"}}
else{df$Label[i]<-df$Label[i]}
}