样本数据
1/1/2000 NA NA NA 29.71 NA
1/2/2000 NA NA NA NA NA
1/3/2000 NA NA NA NA NA
1/4/2000 NA NA NA 29.25 NA
1/5/2000 NA NA NA 30.28 NA
1/6/2000 NA NA NA 27.66 NA
1/7/2000 NA NA NA 27.22 NA
1/8/2000 NA NA NA 27.27 NA
1/9/2000 170 4.1 NA 5.24 NA
1/10/2000 NA NA NA NA NA
1/11/2000 NA NA NA 27.65 NA
1/12/2000 NA NA NA 28.28 100.57
1/13/2000 NA NA NA 27.52 NA
我正在尝试插入很多NA值。
我有唯一的日期(键),但大多数[其他]数据列以NULL / NA值开始/结束(combined_data_z [,a])。我关心将这些[其他]列空值插入日期,我在尝试
时遇到此错误Error in approx(x[!na], y[!na], xout, ...) : need at least two non-NA values to interpolate
library(zoo)
#start with 2 because 1st column is date
a=2
for (i in parsedList)
{
dates <- combined_data_z[,1]
test1 <- combined_data_z[,a]
test1_z <- zoo(test1)
test1_z_approx <- na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend")
#print(test1_z_approx)
a=a+1
}
更新:显然它与for循环有关,当我删除它并使用print语句测试并从那里构建时,我发现它没有用括号括起来(但我需要循环)。< / p>
dates <- combined_data_z[,1]
test1 <- combined_data_z[,4]
test1_z <- zoo(test1)
test1_z_approx <- na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend")
print(test1_z_approx)
答案 0 :(得分:2)
对于您在评论中提供的以下数据集,可以使用:
library(zoo)
combined_data_z <- read.csv(file="http://thistleknot.sytes.net/wordpress/wp-content/uploads/2018/04/output_NoNA.csv")
test1_z_approx <- matrix(NA, ncol=ncol(combined_data_z)-2, nrow = nrow(combined_data_z))
for (i in 3:ncol(combined_data_z))
{
dates <- combined_data_z[,1]
test1 <- combined_data_z[,i]
test1_z <- zoo(test1)
test1_z_approx[,i-2] <-as.matrix( na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend"))[,1]
}
如果您的数据集以“日期”列开头,则代码将如下所示:
head(combined_data_z)
# date CPIAUCSL UNRATE MEHOINUSA672N INTDSRUSM193N CIVPART
# 1 1/1/2000 169.3 4 58544 5 67.3
# 2 1/2/2000 NA NA NA NA NA
# 3 1/3/2000 NA NA NA NA NA
# 4 1/4/2000 NA NA NA NA NA
# 5 1/5/2000 NA NA NA NA NA
# 6 1/6/2000 NA NA NA NA NA
test1_z_approx <- matrix(NA, ncol=ncol(combined_data_z)-1, nrow = nrow(combined_data_z))
for (i in 2:ncol(combined_data_z))
{
dates <- combined_data_z[,1]
test1 <- combined_data_z[,i]
test1_z <- zoo(test1)
test1_z_approx[,i-1] <-as.matrix( na.fill(na.approx(test1_z, x=dates, rule=2, na.rm = FALSE), "extend"))[,1]
}
head(test1_z_approx)
# [,1] [,2] [,3] [,4] [,5]
#[1,] 169.3000 4.000000 58544 5.000000 67.30000
#[2,] 224.0420 4.033100 59039 2.844406 64.07145
#[3,] 196.4639 3.959895 59039 4.579983 65.57215
#[4,] 188.9426 3.939930 59039 5.053322 65.98144
#[5,] 186.4355 3.933275 59039 5.211101 66.11786
#[6,] 183.9284 3.926620 59039 5.368881 66.25429
答案 1 :(得分:0)
感谢Katia的帮助(特别是我的x和y需要在不同的数据帧中)
combined_data_z <- df3
#https://stackoverflow.com/a/50173660/1731972
#file begins with numeric iterations
#ncol(combined_data_z)
dates <- combined_data_z[1]
print(dates)
#important to start at 2!, otherwise na.approx will not work!
#either copy from 2: on or copy whole and drop first column (date)
#test1 <- combined_data_z[c(2:length(parsedList)+1)]
#drop date
test1 <- combined_data_z
test1[1] <- NULL
print(test1)
#wtf, had to add data.frame today!
test1_z <- zoo(data.frame(test1))
date_z <- zoo(data.frame(dates))
print(test1_z)
#colnames(test1_z)
print(dates)
test1_z_approx <- na.fill(na.approx(test1_z, dates$date, rule=2, na.rm = FALSE), "extend")
print(test1_z_approx)
#new <- NULL
print(new)
new <- c(data.frame(dates),data.frame(test1_z_approx))
print(new)
write.csv(new, file = "output_test.csv")