我有一个数据集,其中一列(临时)中有间隔。我试图使用来自“传感器”的“温度”数据或同一“处理”中的“传感器”平均值,当然还有相同的日期戳来填补空白。我正在尝试使用tidyverse / lubridate。
date treatment sensor temp
1/01/2019 1 A 30
2/01/2019 1 A 29.1
3/01/2019 1 A 21.2
4/01/2019 1 A NA
1/01/2019 1 B 20.5
2/01/2019 1 B 19.8
3/01/2019 1 B 35.1
4/01/2019 1 B 23.5
1/01/2019 2 C 31.2
2/01/2019 2 C 32.1
3/01/2019 2 C 28.1
4/01/2019 2 C 31.2
1/01/2019 2 D NA
2/01/2019 2 D 26.5
3/01/2019 2 D 27.9
4/01/2019 2 D 28
这是我所期望的:
date treatment sensor temp
1/01/2019 1 A 30
2/01/2019 1 A 29.1
3/01/2019 1 A 21.2
4/01/2019 1 A 23.5
1/01/2019 1 B 20.5
2/01/2019 1 B 19.8
3/01/2019 1 B 35.1
4/01/2019 1 B 23.5
1/01/2019 2 C 31.2
2/01/2019 2 C 32.1
3/01/2019 2 C 28.1
4/01/2019 2 C 31.2
1/01/2019 2 D 31.2
2/01/2019 2 D 26.5
3/01/2019 2 D 27.9
4/01/2019 2 D 28
非常感谢您的帮助。
答案 0 :(得分:1)
na.aggregate
中另一个zoo
的选项
library(dplyr)
library(zoo)
df %>%
group_by(date, treatment) %>%
mutate(temp = na.aggregate(temp))
# A tibble: 16 x 4
# Groups: date, treatment [8]
# date treatment sensor temp
# <fct> <int> <fct> <dbl>
# 1 1/01/2019 1 A 30
# 2 2/01/2019 1 A 29.1
# 3 3/01/2019 1 A 21.2
# 4 4/01/2019 1 A 23.5
# 5 1/01/2019 1 B 20.5
# 6 2/01/2019 1 B 19.8
# 7 3/01/2019 1 B 35.1
# 8 4/01/2019 1 B 23.5
# 9 1/01/2019 2 C 31.2
#10 2/01/2019 2 C 32.1
#11 3/01/2019 2 C 28.1
#12 4/01/2019 2 C 31.2
#13 1/01/2019 2 D 31.2
#14 2/01/2019 2 D 26.5
#15 3/01/2019 2 D 27.9
#16 4/01/2019 2 D 28
df <- structure(list(date = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("1/01/2019",
"2/01/2019", "3/01/2019", "4/01/2019"), class = "factor"), treatment = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
sensor = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C", "D"
), class = "factor"), temp = c(30, 29.1, 21.2, NA, 20.5,
19.8, 35.1, 23.5, 31.2, 32.1, 28.1, 31.2, NA, 26.5, 27.9,
28)), class = "data.frame", row.names = c(NA, -16L))
答案 1 :(得分:0)
如何?
df <- df %>%
group_by(date, treatment) %>%
mutate(
fill = mean(temp, na.rm=TRUE), # value to fill in blanks
temp2 = case_when(!is.na(temp) ~ temp,
TRUE ~ fill)
)
答案 2 :(得分:0)
这是使用map2_dbl
中的purrr
的一个选项。我们group_by
treatment
并将NA
temp替换为组中相同的temp
中的第一个非NA date
。
library(dplyr)
library(purrr)
df %>%
group_by(treatment) %>%
mutate(temp = map2_dbl(temp, date, ~if (is.na(.x))
temp[which.max(date == .y & !is.na(temp))] else .x))
# date treatment sensor temp
# <fct> <int> <fct> <dbl>
# 1 1/01/2019 1 A 30
# 2 2/01/2019 1 A 29.1
# 3 3/01/2019 1 A 21.2
# 4 4/01/2019 1 A 23.5
# 5 1/01/2019 1 B 20.5
# 6 2/01/2019 1 B 19.8
# 7 3/01/2019 1 B 35.1
# 8 4/01/2019 1 B 23.5
# 9 1/01/2019 2 C 31.2
#10 2/01/2019 2 C 32.1
#11 3/01/2019 2 C 28.1
#12 4/01/2019 2 C 31.2
#13 1/01/2019 2 D 31.2
#14 2/01/2019 2 D 26.5
#15 3/01/2019 2 D 27.9
#16 4/01/2019 2 D 28
数据
df <- structure(list(date = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("1/01/2019",
"2/01/2019", "3/01/2019", "4/01/2019"), class = "factor"), treatment =
c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
sensor = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C", "D"
), class = "factor"), temp = c(30, 29.1, 21.2, NA, 20.5,
19.8, 35.1, 23.5, 31.2, 32.1, 28.1, 31.2, NA, 26.5, 27.9,
28)), class = "data.frame", row.names = c(NA, -16L))