NA填充使用正弦曲线拟合

时间:2017-12-15 13:14:34

标签: r curve-fitting na

我有一个如下所示的数据..有一个时间和2个数据列

time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)  
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)

我使用下面的代码为data1拟合了正弦曲线。

Data <- data.frame(time,data1,data2)  
HR <- Data$data1  
Time <- Data$time  
xc <- cos(2*pi*Time/9)  
xs <- sin(2*pi*Time/9)  
fit.lm <- lm(HR ~ xc+xs)  
pred <- predict(fit.lm, newdata=data.frame(Time=Time))  
plot(HR ~ time, data=Data)  
lines(Time, pred, col="blue") 

The sinusoidal curve with my data1

现在我想使用正弦曲线填充{{​​1}}中的NA值。 我还要重复Data$data1 ..

如何使用此曲线填充NA值? 我做了些蠢事吗?还有其他方便吗?

2 个答案:

答案 0 :(得分:5)

我冒昧地清理代码并绘制更平滑的拟合,以证明估算的值(红色)与之相关。

Data <- data.frame(time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24) ,
                   data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8),
                   data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20))

Data$xc <- cos(2*pi*Data$time/9)
Data$xs <- sin(2*pi*Data$time/9)

fit.lm <- lm(data1 ~ xc + xs, data = Data)  

# provide only the non-NA values to find predicted (fitted) values and write it to the result
Data$pred[!is.na(Data$data1)] <- predict(fit.lm, newdata = Data[!is.na(Data$data1), ])  
plot(data1 ~ time, data = Data)

### smooth fitted values ###
smoothP <- data.frame(time = seq(from = min(Data$time),
                                 to = max(Data$time),
                                 by = 0.1))
smoothP$xc <- cos(2*pi*smoothP$time/9)
smoothP$xs <- sin(2*pi*smoothP$time/9)

smoothP$fitted <- predict(fit.lm, newdata = smoothP)
lines(fitted ~ time, data = smoothP, col = "blue")
### end smooth fitted values ###

# predicting NAs by the same analogy as above, only this time only for NAs
Data$pred[is.na(Data$data1)] <- predict(fit.lm, newdata = Data[is.na(Data$data1), ])

points(pred ~ time, data = Data[is.na(Data$data1),], col = "red", pch = 16)

enter image description here

答案 1 :(得分:3)

这应有助于了解如何使用一个变量(data1

time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)  
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)

Data <- data.frame(time,data1,data2)  
HR <- Data$data1  
Time <- Data$time  
xc <- cos(2*pi*Time/9)  
xs <- sin(2*pi*Time/9)  
fit.lm <- lm(HR ~ xc+xs)  
pred <- predict(fit.lm, newdata=data.frame(Time=Time))   

# update HR using prediction values only when HR has NA values
HR[which(is.na(HR))] = pred[which(is.na(HR))]

# plot again (to visualise that NAs are correctly replaced)
plot(HR ~ time, data=Data)
lines(Time, pred, col="blue")

enter image description here

您可以以类似的方式处理多个变量(例如循环变量),但我强烈推荐这样的tidyverse方法:

time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)  
data1 = c(10, 8, NA, 3, 2, NA, 6, 8, 9, 7, NA, 3, 1, NA, 5, 7, 11, 10, NA, 5, 3, 5, NA, 8)
data2 = c(25, 20, NA, 7.5, NA,10, 15, NA, 22.5, NA, 15, 7.5, NA, 10, 12.5, 17.5, NA, 25, 17.5,NA, 7.5, 12.5, NA, 20)

Data <- data.frame(time,data1,data2) 

library(tidyverse)

# function to build your model
fm = function(Time, HR) {

  xc <- cos(2*pi*Time/9)  
  xs <- sin(2*pi*Time/9)  
  fit.lm <- lm(HR ~ xc+xs)
  fit.lm

}


Data %>%
  gather(d,HR,-time) %>%     # reshape dataset
  group_by(d) %>%            # for each variable name
  nest() %>%                 # create a dataset of all other columns
  mutate(mdl = map(data, ~ fm(.$time,.$HR))) %>%     # create a model using your function for each subset
  mutate(pred = map2(mdl, data, ~ predict(.x, newdata=.y))) %>%   # predict using the corresponding model
  unnest(data, pred) %>%                                          # unnest columns of interest
  mutate(HR_upd = ifelse(is.na(HR), pred, HR))                    # replace NAs with predictions

# # A tibble: 48 x 5
#       d     pred  time    HR    HR_upd
#   <chr>    <dbl> <dbl> <dbl>     <dbl>
# 1 data1 8.314433     1    10 10.000000
# 2 data1 6.024161     2     8  8.000000
# 3 data1 3.814390     3    NA  3.814390
# 4 data1 2.719097     4     3  3.000000
# 5 data1 3.250781     5     2  2.000000
# 6 data1 5.160662     6    NA  5.160662
# 7 data1 7.555084     7     6  6.000000
# 8 data1 9.313672     8     8  8.000000
# 9 data1 9.613562     9     9  9.000000
# 10 data1 8.314433    10     7  7.000000
# # ... with 38 more rows

您可以在一个重新整形的数据集中看到预测(pred),原始HR值和更新后的HR_upd,其中您的初始变量名为data1和{{1现在是一个变量data2的值。

请注意,您可以在d之前停止pipped过程,以便创建包含通过命令获取的所有信息的数据集:

unnest

如果您希望使用

,现在可以看到您的模型
Data %>%
  gather(d,HR,-time) %>%   
  group_by(d) %>%         
  nest() %>%              
  mutate(mdl = map(data, ~ fm(.$time,.$HR))) %>%    
  mutate(pred = map2(mdl, data, ~ predict(.x, newdata=.y))) -> dt_full

dt_full

# # A tibble: 2 x 4
#         d              data      mdl       pred
#     <chr>            <list>   <list>     <list>
#   1 data1 <tibble [24 x 2]> <S3: lm> <dbl [24]>
#   2 data2 <tibble [24 x 2]> <S3: lm> <dbl [24]>

第一个模型对应dt_full$mdl # [[1]] # # Call: # lm(formula = HR ~ xc + xs) # # Coefficients: # (Intercept) xc xs # 6.1962 3.4174 -0.7773 # # # [[2]] # # Call: # lm(formula = HR ~ xc + xs) # # Coefficients: # (Intercept) xc xs # 15.879 7.443 -1.234 ,第二个模型对应data1