我有一个如下所示的数据..有一个时间和2个数据列
time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)
我使用下面的代码为data1拟合了正弦曲线。
Data <- data.frame(time,data1,data2)
HR <- Data$data1
Time <- Data$time
xc <- cos(2*pi*Time/9)
xs <- sin(2*pi*Time/9)
fit.lm <- lm(HR ~ xc+xs)
pred <- predict(fit.lm, newdata=data.frame(Time=Time))
plot(HR ~ time, data=Data)
lines(Time, pred, col="blue")
现在我想使用正弦曲线填充{{1}}中的NA值。
我还要重复Data$data1
..
如何使用此曲线填充NA值? 我做了些蠢事吗?还有其他方便吗?
答案 0 :(得分:5)
我冒昧地清理代码并绘制更平滑的拟合,以证明估算的值(红色)与之相关。
Data <- data.frame(time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24) ,
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8),
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20))
Data$xc <- cos(2*pi*Data$time/9)
Data$xs <- sin(2*pi*Data$time/9)
fit.lm <- lm(data1 ~ xc + xs, data = Data)
# provide only the non-NA values to find predicted (fitted) values and write it to the result
Data$pred[!is.na(Data$data1)] <- predict(fit.lm, newdata = Data[!is.na(Data$data1), ])
plot(data1 ~ time, data = Data)
### smooth fitted values ###
smoothP <- data.frame(time = seq(from = min(Data$time),
to = max(Data$time),
by = 0.1))
smoothP$xc <- cos(2*pi*smoothP$time/9)
smoothP$xs <- sin(2*pi*smoothP$time/9)
smoothP$fitted <- predict(fit.lm, newdata = smoothP)
lines(fitted ~ time, data = smoothP, col = "blue")
### end smooth fitted values ###
# predicting NAs by the same analogy as above, only this time only for NAs
Data$pred[is.na(Data$data1)] <- predict(fit.lm, newdata = Data[is.na(Data$data1), ])
points(pred ~ time, data = Data[is.na(Data$data1),], col = "red", pch = 16)
答案 1 :(得分:3)
这应有助于了解如何使用一个变量(data1
)
time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)
Data <- data.frame(time,data1,data2)
HR <- Data$data1
Time <- Data$time
xc <- cos(2*pi*Time/9)
xs <- sin(2*pi*Time/9)
fit.lm <- lm(HR ~ xc+xs)
pred <- predict(fit.lm, newdata=data.frame(Time=Time))
# update HR using prediction values only when HR has NA values
HR[which(is.na(HR))] = pred[which(is.na(HR))]
# plot again (to visualise that NAs are correctly replaced)
plot(HR ~ time, data=Data)
lines(Time, pred, col="blue")
您可以以类似的方式处理多个变量(例如循环变量),但我强烈推荐这样的tidyverse
方法:
time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)
Data <- data.frame(time,data1,data2)
library(tidyverse)
# function to build your model
fm = function(Time, HR) {
xc <- cos(2*pi*Time/9)
xs <- sin(2*pi*Time/9)
fit.lm <- lm(HR ~ xc+xs)
fit.lm
}
Data %>%
gather(d,HR,-time) %>% # reshape dataset
group_by(d) %>% # for each variable name
nest() %>% # create a dataset of all other columns
mutate(mdl = map(data, ~ fm(.$time,.$HR))) %>% # create a model using your function for each subset
mutate(pred = map2(mdl, data, ~ predict(.x, newdata=.y))) %>% # predict using the corresponding model
unnest(data, pred) %>% # unnest columns of interest
mutate(HR_upd = ifelse(is.na(HR), pred, HR)) # replace NAs with predictions
# # A tibble: 48 x 5
# d pred time HR HR_upd
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 data1 8.314433 1 10 10.000000
# 2 data1 6.024161 2 8 8.000000
# 3 data1 3.814390 3 NA 3.814390
# 4 data1 2.719097 4 3 3.000000
# 5 data1 3.250781 5 2 2.000000
# 6 data1 5.160662 6 NA 5.160662
# 7 data1 7.555084 7 6 6.000000
# 8 data1 9.313672 8 8 8.000000
# 9 data1 9.613562 9 9 9.000000
# 10 data1 8.314433 10 7 7.000000
# # ... with 38 more rows
您可以在一个重新整形的数据集中看到预测(pred
),原始HR
值和更新后的HR_upd
,其中您的初始变量名为data1
和{{1现在是一个变量data2
的值。
请注意,您可以在d
之前停止pipped过程,以便创建包含通过命令获取的所有信息的数据集:
unnest
如果您希望使用
,现在可以看到您的模型Data %>%
gather(d,HR,-time) %>%
group_by(d) %>%
nest() %>%
mutate(mdl = map(data, ~ fm(.$time,.$HR))) %>%
mutate(pred = map2(mdl, data, ~ predict(.x, newdata=.y))) -> dt_full
dt_full
# # A tibble: 2 x 4
# d data mdl pred
# <chr> <list> <list> <list>
# 1 data1 <tibble [24 x 2]> <S3: lm> <dbl [24]>
# 2 data2 <tibble [24 x 2]> <S3: lm> <dbl [24]>
第一个模型对应dt_full$mdl
# [[1]]
#
# Call:
# lm(formula = HR ~ xc + xs)
#
# Coefficients:
# (Intercept) xc xs
# 6.1962 3.4174 -0.7773
#
#
# [[2]]
#
# Call:
# lm(formula = HR ~ xc + xs)
#
# Coefficients:
# (Intercept) xc xs
# 15.879 7.443 -1.234
,第二个模型对应data1
。