Question

我有一个值和零的矩阵，其中零= NA。这些值散布在矩阵周围，我想要做的是插入所有NA值的值。这是数据：

我试图通过获取矩阵中的所有已知值并将该值乘以距离来猜测所有这些值（使得点越远，其影响越小）。这就是插值结果的样子：

正如您所看到的，此方法不是很有效，影响最接近已知值的NA，但随后它们会快速收敛到平均值。我认为这是因为它采用了整个范围，它有很多起伏......而不仅仅是最接近它的点。

显然，矩阵运算不是我的专长......我需要改变什么才能正确地进行线性插值？

以下是代码：

library(dplyr)
library(plotly)

Cont <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 1816, 2320, 1406, 2028, 1760, 1932, 1630, 
                    1835, 1873, 1474, 1671, 2073, 1347, 2131, 2038, 1969, 2036, 1602, 
                    1986, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 2311, 1947, 2094, 1947, 2441, 1775, 1461, 1260, 
                    1494, 2022, 1863, 1587, 2082, 1567, 1770, 2065, 1404, 1809, 1972, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 2314, 1595, 2065, 1870, 2178, 1410, 1994, 1979, 2111, 
                    1531, 1917, 1559, 2109, 1921, 1606, 1469, 1601, 1771, 1771), .Dim = c(19L, 
                                                                                       30L))

  ## First get real control values
  idx <- which(Cont > 0, arr.ind=TRUE)
  V <- Cont[idx]
  ControlValues <- data.frame(idx,V)

  ## Make data.frame of values to fill
  toFill <- which(Cont == 0, arr.ind=TRUE) %>% as.data.frame
  toFill$V <- 0

  ## And now figure out the weighted value of each point
  for (i in 1:nrow(toFill)){
    toFill[i,] -> CurrentPoint

    Xs <- (1/abs(CurrentPoint[,1] - ControlValues[,1])) 
    Xs[is.infinite(Xs)] <- 0
    Xs <- Xs/sum(Xs)/100

    Ys <- (1/abs(CurrentPoint[,2] - ControlValues[,2])) 
    Ys[is.infinite(Ys)] <- 0
    Ys <- Ys/sum(Ys)/100

    ControlValues1 <- data.frame(Xs,Ys)
    toFill[i,3] <- sum(rowMeans(ControlValues1) * ControlValues$V)*100
  }

  ## add back in the controls and reorder
  bind_rows(ControlValues,toFill) -> Both
  Both %>% arrange(row,col) -> Both

  ## and plot the new surface
  NewCont <- matrix(Both$V,max(Both$row),max(Both$col),byrow = T)
  plot_ly(z=NewCont, type="surface",showscale=FALSE)

Answer 1

在R中插入和外推数据的一种方法是使用akima包。以下执行双线性插值，然后使用数据帧ControlValues中的已知数据点作为输入进行外推，以填充Cont中的零。

library(akima)
library(plotly)

NewCont <- akima::interp(x=ControlValues[,1], y=ControlValues[,2], z=ControlValues[,3],
                         xo=1:nrow(Cont), yo=1:ncol(Cont), linear=TRUE)$z
NewCont[,1:9] <- akima::interp.old(x=ControlValues[,1], y=ControlValues[,2], 
                                   z=ControlValues[,3], xo=1:nrow(Cont), 
                                   yo=1:9, ncp=2, extrap=TRUE)$z

plot_ly(z=NewCont, type="surface",showscale=FALSE)

注意：

第一次调用akima::interp执行双线性插值。有关用法和详细信息，请参阅帮助页面?akima::interp。
- 关键点是，已知数据点的输入x，y和z无需位于x-y网格上。在这种情况下，这些是ControlValues的列。
- akima::interp的输出是一个列表，其z分量是网格上的内插值矩阵，其x和y坐标由输入{{分别为1}}和xo。在这种情况下，这些只是yo
- 如帮助页面所述
凸包外部点的z值将返回Cont。

在这种情况下，与NA对应的输出的前九列将为yo=1:9 s。
对NA的第二次调用（实际为akima::interp）执行数据外推以填充第一次调用留下的akima::interp.old s。有关此用法的详细信息，请参阅this SO quation/answer。

上述方法给出了以下结果

执行双线性插值的另一种方法是使用NA包中的interp.surface函数。提到这种方法是因为实现是一个R脚本，可以通过在R命令行键入函数名fields来列出。

interp.surface

此处，要求与library(fields) loc <- make.surface.grid(list(x=1:nrow(Cont), y=1:ncol(Cont))) NewCont2 <- matrix(interp.surface(list(x=sort(unique(ControlValues[,1])), y=sort(unique(ControlValues[,2])), z=matrix(ControlValues[,3], nrow=length(unique(ControlValues[,1])), ncol=length(unique(ControlValues[,2])))), loc), nrow=nrow(Cont), ncol=ncol(Cont)) NewCont2[,1:9] <- akima::interp.old(x=ControlValues[,1], y=ControlValues[,2], z=ControlValues[,3], xo=1:nrow(Cont), yo=1:9, ncp=2, extrap=TRUE)$z的要求相反。具体而言，已知数据点必须位于akima::interp网格上。但是，插值的坐标不需要在网格上，而是包含x-y和x坐标的相应列向量的矩阵，其中每个元组y是(x[i],y[i])坐标插值。由于x-y中的数据点位于网格上，因此对于这种情况也满足这些要求。有关用法和详细信息，请参阅帮助页面ControlValues。

注意：

?interp.surface和sort(unique(ControlValues[,1]))只为已知数据点的网格提供sort(unique(ControlValues[,2]))和x坐标
列表中的y组件只是已知数据点的z值，这些值已重新整形为已知数据点网格上的矩阵
要插值的坐标矩阵由z使用make.surface.grid和x生成y的行和列索引生成
要插入位于已知点网格之外的坐标将导致内插值Conf
NA返回与要插入的坐标对应的interp.surface值的向量。然后将其重新定位到要插值的坐标网格上的矩阵，其内容为z nrow(Cont)

最后，很容易验证这两种方法是否给出了相同的结果

ncol(Cont)

R：为什么这个矩阵3d线性插值不能正常工作？

1 个答案: