R na.approx错误:需要至少两个非NA值来插值

时间:2018-05-04 03:47:24

标签: r interpolation

样本数据

1/1/2000    NA  NA  NA  29.71   NA
1/2/2000    NA  NA  NA  NA  NA
1/3/2000    NA  NA  NA  NA  NA
1/4/2000    NA  NA  NA  29.25   NA
1/5/2000    NA  NA  NA  30.28   NA
1/6/2000    NA  NA  NA  27.66   NA
1/7/2000    NA  NA  NA  27.22   NA
1/8/2000    NA  NA  NA  27.27   NA
1/9/2000    170 4.1 NA  5.24    NA
1/10/2000   NA  NA  NA  NA  NA
1/11/2000   NA  NA  NA  27.65   NA
1/12/2000   NA  NA  NA  28.28   100.57
1/13/2000   NA  NA  NA  27.52   NA

我正在尝试插入很多NA值。

我有唯一的日期(键),但大多数[其他]数据列以NULL / NA值开始/结束(combined_data_z [,a])。我关心将这些[其他]列空值插入日期,我在尝试

时遇到此错误
Error in approx(x[!na], y[!na], xout, ...) :    need at least two
  non-NA values to interpolate
library(zoo)

#start with 2 because 1st column is date
a=2
for (i in parsedList)
{

dates <- combined_data_z[,1]
test1 <- combined_data_z[,a]
test1_z <- zoo(test1)
test1_z_approx <- na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend")
#print(test1_z_approx)

a=a+1

}

更新:显然它与for循环有关,当我删除它并使用print语句测试并从那里构建时,我发现它没有用括号括起来(但我需要循环)。< / p>

dates <- combined_data_z[,1]
test1 <- combined_data_z[,4]
test1_z <- zoo(test1)
test1_z_approx <- na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend")
print(test1_z_approx)

2 个答案:

答案 0 :(得分:2)

对于您在评论中提供的以下数据集,可以使用:

library(zoo)
combined_data_z <- read.csv(file="http://thistleknot.sytes.net/wordpress/wp-content/uploads/2018/04/output_NoNA.csv")


test1_z_approx <- matrix(NA, ncol=ncol(combined_data_z)-2, nrow = nrow(combined_data_z))
for (i in 3:ncol(combined_data_z))
{

  dates <- combined_data_z[,1]
  test1 <- combined_data_z[,i]
  test1_z <- zoo(test1)
  test1_z_approx[,i-2] <-as.matrix( na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend"))[,1]


}

如果您的数据集以“日期”列开头,则代码将如下所示:

head(combined_data_z)
#          date CPIAUCSL UNRATE MEHOINUSA672N INTDSRUSM193N CIVPART
#    1 1/1/2000    169.3      4         58544             5    67.3
#    2 1/2/2000       NA     NA            NA            NA      NA
#    3 1/3/2000       NA     NA            NA            NA      NA
#    4 1/4/2000       NA     NA            NA            NA      NA
#    5 1/5/2000       NA     NA            NA            NA      NA
#    6 1/6/2000       NA     NA            NA            NA      NA

test1_z_approx <- matrix(NA, ncol=ncol(combined_data_z)-1, nrow = nrow(combined_data_z))
for (i in 2:ncol(combined_data_z))
{

  dates <- combined_data_z[,1]
  test1 <- combined_data_z[,i]
  test1_z <- zoo(test1)
  test1_z_approx[,i-1] <-as.matrix( na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend"))[,1]

}

head(test1_z_approx)
#         [,1]     [,2]  [,3]     [,4]     [,5]
#[1,] 169.3000 4.000000 58544 5.000000 67.30000
#[2,] 224.0420 4.033100 59039 2.844406 64.07145
#[3,] 196.4639 3.959895 59039 4.579983 65.57215
#[4,] 188.9426 3.939930 59039 5.053322 65.98144
#[5,] 186.4355 3.933275 59039 5.211101 66.11786
#[6,] 183.9284 3.926620 59039 5.368881 66.25429

答案 1 :(得分:0)

感谢Katia的帮助(特别是我的x和y需要在不同的数据帧中)

combined_data_z <- df3
#https://stackoverflow.com/a/50173660/1731972

#file begins with numeric iterations
#ncol(combined_data_z)

dates <- combined_data_z[1]

print(dates)


#important to start at 2!, otherwise na.approx will not work!

#either copy from 2: on or copy whole and drop first column (date)
#test1 <- combined_data_z[c(2:length(parsedList)+1)]

#drop date
test1 <- combined_data_z
test1[1] <- NULL

print(test1)                  

#wtf, had to add data.frame today!
test1_z <- zoo(data.frame(test1))

date_z <- zoo(data.frame(dates))

print(test1_z)

#colnames(test1_z)

print(dates)

test1_z_approx <- na.fill(na.approx(test1_z, dates$date, rule=2, na.rm = FALSE), "extend")

print(test1_z_approx)

#new <- NULL
print(new)
new <- c(data.frame(dates),data.frame(test1_z_approx))
print(new)

write.csv(new, file = "output_test.csv")