我正在尝试在时间序列栅格中估算NA值。这是我的数据的可复制示例:
library(raster)
library(rgdal)
library(doParallel)
library(foreach)
r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow=100, ncol=100)
values(r1) <- runif(ncell(r1))
values(r2) <- runif(ncell(r2))
values(r3) <- runif(ncell(r3))
values(r4) <- runif(ncell(r4))
values(r5) <- runif(ncell(r5))
s <- stack(r1, r2, r3, r4, r5)
time_series <- brick(s)
time_series[1, 30][2] <- NA
time_series[3, 20][3] <- NA
time_series[5, 10][5] <- NA
time_series[8, 40][4] <- NA
有一些程序,例如gapfill,但是我发现它们对于我的任务来说太慢了。我在答案中找到了另一种方法: https://gis.stackexchange.com/questions/279354/ndvi-time-series-with-missing-values
作者:https://gis.stackexchange.com/users/8520/jeffrey-evans
我想将for循环转换为foreach,以便可以为较大的图像进行计算。这是带有for循环的代码:
impute.loess <- function(y, x.length = NULL, s = 0.80,
smooth.data = FALSE, ...) {
if(is.null(x.length)) { x.length = length(y) }
options(warn = -1)
x <- 1:x.length
if (all(is.na(y))) {
return(y)
} else {
p <- loess(y ~ x, span = s, data.frame(x = x, y = y))
if(smooth.data == TRUE) {
y <- predict(p, x)
} else {
na.idx <- which( is.na(y) )
if( length(na.idx) > 1 ) {
y[na.idx] <- predict(p, data.frame(x = na.idx))
}
}
return(y)
}
}
time_series_new <- time_series
time_series_new[] <- NA
for (rl in 1:nrow(time_series)) {
v <- getValues(time_series, rl, 1)
time_series_new[rl,] <- as.matrix(t(apply(v, MARGIN=1, FUN=impute.loess)))
}
我尝试过的Foreach替代方法是这样:
time_series_new2 <- time_series
time_series_new2[] <- NA
cl <- parallel::makeCluster(detectCores()-1)
doParallel::registerDoParallel(cl)
time_series_new2 <- foreach (rl = 1:nrow(time_series),
.packages = "raster",
.combine = 'rbind') %dopar% {
v <- getValues(time_series, rl, 1)
time_series_new[rl,] <- as.matrix(t(apply(v,
MARGIN=1, FUN=impute.loess)))
}
parallel::stopCluster(cl)
但是,这是区别:
> class(time_series_new)
[1] "RasterBrick"
attr(,"package")
[1] "raster"
> class(time_series_new2)
[1] "matrix"
如果我不将foreach循环分配给对象,则仅导出结果。我希望最后有一个更新的栅格对象,但是找不到解决我问题的方法。
我找不到如何设置矩阵值栅格对象的方法-设置值无效,也许是因为尺寸不同,例如:
> dim(time_series_new)
[1] 100 100 5
> dim(time_series_new2)
[1] 10000 5
我知道foreach循环的工作原理有所不同。有没有一种方法可以在foreach循环中更新time_series_new2对象,以便最后可以有一个更新的栅格对象?
编辑:
setValues()实际上有效!为:
time_series_new3 <- time_series
time_series_new3[] <- NA #empty raster object
time_series_new3 <- setValues(time_series_new3, time_series_new2) #filled with matrix rendered from foreach loop
> time_series_new3
class : RasterBrick
dimensions : 100, 100, 10000, 5 (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8 (x, y)
extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
source : memory
names : layer.1, layer.2, layer.3, layer.4, layer.5
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05
max values : 0.9999564, 0.9999854, 0.9997795, 0.9999780, 0.9997880
> time_series_new2
class : RasterBrick
dimensions : 100, 100, 10000, 5 (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8 (x, y)
extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
source : memory
names : layer.1, layer.2, layer.3, layer.4, layer.5
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05
max values : 0.9999564, 0.9999854, 0.9997795, 0.9999780, 0.9997880
> all.equal(time_series_new2, time_series_new3)
[1] TRUE
仍然,我想了解有关foreach中的更新。
答案 0 :(得分:1)
在foreach
循环中,您没有更新rasterBrick time_series_new
的副作用。也就是说,time_series_new
知道它是什么-raster
类型的对象。 rbind
组合会将非数据帧强制转换为matrices
。这就是100 x 100 x 5变成10000 x 5的方式。
由于parallel
循环的缓慢性,我假设您要进行for
计算。如果是这种情况,我建议您以其他方式处理该问题,尤其是在没有很多缺失值的情况下。
我们首先可以看到实际上有多少行缺少数据:
missing_dat_rows <- which(is.na(getValues(time_series)) == T, arr.ind = T)[, 1]
missing_dat_rows <- unique(missing_dat_rows)
missing_dat_rows
#[1] 30 220 740 410
因此,我们现在不需遍历10,000个结果,而可以专注于这4个结果。
time_series3 <- time_series
for (mis_row in missing_dat_rows) {
values(time_series3)[mis_row, ] <- impute.loess(getValues(time_series3)[mis_row, ])
}
不幸的是,我无法获得impute.loess()
函数来为我返回值。我做了几个小小的大改变,如果您想继续循环方法,这也可能会有所帮助:
impute.loess <- function(y, x.length = NULL, s = 0.80,
smooth.data = FALSE, ...) {
if(is.null(x.length)) { x.length = length(y) }
options(warn = -1)
x <- 1:x.length
if (all(is.na(y))| all(!is.na(y))) { #added the or statement - I don't think we want to do this if there are no missing values.
return(y)
} else {
p <- loess(y ~ x, span = s, data.frame(x = x, y = y))
if(smooth.data == TRUE) {
y <- predict(p, x)
} else {
na.idx <- which( is.na(y) )
# if( length(na.idx) > 1 ) { #commented out - I feel as though we should be replacing all NAs
y[na.idx] <- predict(p, data.frame(x = na.idx))
# }
}
return(y)
}
}