我正在尝试创建一个执行以下操作的for循环:
for (i in 2:length(Exampledata$Levels)) {
if(is.na(Exampledata$Levels[i]) == "TRUE" {
find the last instance where
is.na(Exampledata$Levels) == "FALSE"
for that same ID, and input
the day from that row into last_entry[i]
}
}
示例数据:
ID<-c("QYZ","MMM","QYZ","bb2","gm6","gm6","YOU","LLL","LLL","LLL")
day<-c(1,2,3,4,5,6,7,8,9,10)
values<-c(1,2,4,5,5,6,8,9,6,4)
Levels<-c("A","","A","C",'D','D',"C","y","","")
last_entry<-c(0,0,0,0,0,0,0,0,0,0)
当前数据如下:
ID values Levels day last_entry
1 QYZ 1 A 1 0
2 MMM 2 2 0
3 QYZ 4 A 3 0
4 bb2 5 C 4 0
5 gm6 5 D 5 0
6 gm6 6 D 6 0
7 YOU 8 C 7 0
8 LLL 9 y 8 0
9 LLL 6 9 0
10 LLL 4 10 0
我希望它看起来像什么:
ID values Levels day last_entry
1 QYZ 1 A 1 0
2 MMM 2 2 0
3 QYZ 4 A 3 0
4 bb2 5 C 4 0
5 gm6 5 D 5 0
6 gm6 6 D 6 0
7 YOU 8 C 7 0
8 LLL 9 y 8 0
9 LLL 6 9 8
10 LLL 4 10 8
我看过很多代码,它们查找最后一个非零元素或最后一个is.na = FALSE,但是没有一个代码可以通过ID来完成,并从该行中提取值。我还需要忽略没有该ID条目的情况。
基本上,我想知道为该ID输入级别的最后一天。
答案 0 :(得分:1)
这是使用tidyr::fill
的一种方法。我们将last_entry
列替换为NA
,其中级别为空,然后使用fill
用最新的非NA值替换那些NA
,并将{{1} }所有非空last_entry
的值都设为0。
Levels
我们也可以
library(dplyr)
df %>%
mutate(last_entry = ifelse(Levels != "", day, NA)) %>%
group_by(ID) %>%
tidyr::fill(last_entry) %>%
mutate(last_entry = replace(last_entry, Levels != "" | n() == 1, 0))
# ID day values Levels last_entry
# <fct> <dbl> <dbl> <fct> <dbl>
# 1 QYZ 1 1 A 0
# 2 MMM 2 2 "" 0
# 3 QYZ 3 4 A 0
# 4 bb2 4 5 C 0
# 5 gm6 5 5 D 0
# 6 gm6 6 6 D 0
# 7 YOU 7 8 C 0
# 8 LLL 8 9 y 0
# 9 LLL 9 6 "" 8
#10 LLL 10 4 "" 8
数据
df %>%
group_by(ID) %>%
mutate(last_entry = purrr::map_dbl(row_number(), ~if (Levels[.x] == "" & n() > 1)
day[max(which(Levels[1:.x] != ""))] else 0))
答案 1 :(得分:1)
这是使用data.table的解决方案:
library('data.table')
dt <- data.table(ID = c("QYZ","MMM","QYZ","bb2","gm6","gm6","YOU","LLL","LLL","LLL"),
Day = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
values = c(1, 2, 4, 5, 5, 6, 8, 9, 6, 4),
Levels = c("A", NA, "A", "C", "D", "D", "C", "y", NA, NA),
last_entry = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
func <- function(days, levels){
if(!any(is.na(levels)) | all(is.na(levels))) return(0)
return(last(days[which(!is.na(levels))]))
}
dt[, last_entry := ifelse(!is.na(Levels), 0, func(Day, Levels)), by = ID]
但是如果您设置使用for循环:
ID <- c("QYZ","MMM","QYZ","bb2","gm6","gm6","YOU","LLL","LLL","LLL")
Day <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Levels <- c("A", NA, "A", "C", "D", "D", "C", "y", NA, NA)
last_entry <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
i.na <- which(is.na(Levels))
for(id in unique(ID)){
i.id <- which(ID == id)
if(all(is.na(Levels[i.id])) | !any(is.na(Levels[i.id]))) next
day <- last(Day[i.id[!(i.id %in% i.na)]])
last_entry[i.na[i.na %in% i.id]] <- day
}
答案 2 :(得分:1)
如果您想正确执行操作,则可能需要事先将“空”单元格编码为NA
。
Exampledata[Exampledata == ""] <- NA
然后,您可以使用基数R中的by
在"day"
拆分数据中查找!is.na
的最后"Levels"
项的"ID"
。 / p>
res <- do.call(rbind, by(Exampledata, Exampledata$ID, function(x) {
x$last_entry <- ifelse(is.na(x$Levels), x$day[tail(which(!is.na(x$Levels)), 1)], 0)
x
}))
由于rbind
的结果是按"ID"
的字母顺序排列的,因此我们可以按天重新排序。
res <- res[order(res$day), ]
res
# ID day values Levels last_entry
# QYZ.1 QYZ 1 1 A 0
# MMM MMM 2 2 <NA> NA
# QYZ.3 QYZ 3 4 A 0
# bb2 bb2 4 5 C 0
# gm6.5 gm6 5 5 D 0
# gm6.6 gm6 6 6 D 0
# YOU YOU 7 8 C 0
# LLL.8 LLL 8 9 y 0
# LLL.9 LLL 9 6 <NA> 8
# LLL.10 LLL 10 4 <NA> 8
现在,"LLL"
级别有所需的最后一个条目,而NA
的{{1}}自MMM
起在逻辑上应该是"Levels"
,并且没有最后一个条目。
NA