我有一个图表,其中外推与初始插值不匹配。我希望热图能够填满整个图像。
首先,插值代码:
library(akima)
library(reshape2)
xmin <- signif(min(CBLo2$MD1))
xmax <- signif(max(CBLo2$MD1))
ymin <- signif(min(CBLo2$MD2))
ymax <- signif(max(CBLo2$MD2))
gridint <- 100
fld <- with(CBLo2, interp(x = MD1, y = MD2, z = Abundance,
xo=seq(xmin, xmax, length=gridint), yo=seq(ymin, ymax, length=gridint) ))
df <- melt(fld$z, na.rm = TRUE)
names(df) <- c("MD1", "MD2", "Abundance")
df$MD1 <- fld$x[df$MD1]
df$MD2 <- fld$y[df$MD2]
contour(fld) # test plot
我不会发布整个ggplot代码(用于下图),只需要生成热图:
ggplot() +
geom_tile(inherit.aes=FALSE,data = df, aes(x = MD1, y = MD2,fill = Abundance)) +
scale_fill_continuous(name = "Rain (mm)", low = "yellow", high = "green")
然而,当我尝试推断数据时(以下其他帖子中的示例),我得到以下情节,它与第一口井完全不匹配:
fld <- with(CBLo2, interp(x = MD1, y = MD2, z = Abundance, extrap=TRUE, linear=FALSE,
xo=seq(xmin, xmax, length=gridint), yo=seq(ymin, ymax, length=gridint) ))
以下是数据:
Abundance MD1 MD2
9 -0.59042 0.76793119
42 -0.48544284 -0.09465043
13 0.51250586 -0.24599322
84 -0.30857525 -0.21529624
2 0.90449257 0.679926
16 0.24536209 0.24016424
52 -0.43144002 -0.75474149
4 1.23830339 -0.11985391
37 -1.10235817 0.33886773
79 0.01757236 -0.59635386
我做错了什么?如何使推断更准确?
答案 0 :(得分:2)
TLDR解决方案:
将linear = FALSE
添加到所有interp()
代码以保持一致性,并在scale_fill_continuous()
中指定相同的限制。
<强>解释强>:
这里有两个问题。
问题1 :用于生成第一个fld
的代码不包含参数linear = FALSE
,而用于第二个的代码确实
让我们比较插值:
library(dplyr)
fld1 <- with(CBLo2,
interp(x = MD1, y = MD2, z = Abundance,
xo=seq(xmin, xmax, length=gridint),
yo=seq(ymin, ymax, length=gridint) ))
df1 <- melt(fld1$z, na.rm = TRUE) # 6426 obs
fld2 <- with(CBLo2,
interp(x = MD1, y = MD2, z = Abundance,
extrap = TRUE, linear = FALSE,
xo=seq(xmin, xmax, length=gridint),
yo=seq(ymin, ymax, length=gridint) ))
df2 <- melt(fld2$z, na.rm = TRUE) #1000 obs
df.combined <- left_join(df2, df1, by = c("Var1", "Var2"))
df.combined %>%
filter(!is.na(value.y)) %>% # compare for the overlapping range
mutate(diff = value.x - value.y) %>%
select(diff) %>%
summary()
diff
Min. :-303.360
1st Qu.: -42.399
Median : 8.763
Mean : -7.552
3rd Qu.: 36.132
Max. : 238.647
现在将linear = FALSE
添加到第一个fld
:
fld3 <- with(CBLo2,
interp(x = MD1, y = MD2, z = Abundance,
linear = FALSE,
xo=seq(xmin, xmax, length=gridint),
yo=seq(ymin, ymax, length=gridint) ))
df3 <- melt(fld3$z, na.rm = TRUE) # 6426 obs
df.combined <- left_join(df2, df3, by = c("Var1", "Var2"))
df.combined %>%
filter(!is.na(value.y)) %>%
mutate(diff = value.x - value.y) %>%
select(diff) %>%
summary()
diff
Min. :0
1st Qu.:0
Median :0
Mean :0
3rd Qu.:0
Max. :0
第2期 :插值的范围非常不同。
# define column names
names(df2) <- c("MD1", "MD2", "Abundance")
names(df3) <- c("MD1", "MD2", "Abundance")
> range(df2$Abundance)
[1] -1136.341 420.369
> range(df3$Abundance)
[1] -297.9161 241.6618
我们可以看到,即使值在相同的MD1 / MD2坐标处匹配,扩展的df2中的值范围也远远超过df3的范围。为了确保丰度值和颜色之间的相同映射,我们必须根据两者的组合范围指定填充限制。
我将使用一个丑陋但视觉上不同的渐变来说明这一点:
library(gridExtra)
p <- ggplot() +
scale_fill_gradientn(name = "Rain (mm)", colours = rainbow(15),
limits = range(c(df2$Abundance, df3$Abundance)))
grid.arrange(p + geom_tile(data = df3, aes(x = MD1, y = MD2, fill = Abundance)),
p + geom_tile(data = df2, aes(x = MD1, y = MD2, fill = Abundance)),
nrow = 1)
如果我们覆盖这些图,它们会完全重叠(调整透明度以显示df3的边缘):
p +
geom_tile(data = df3, aes(x = MD1, y = MD2, fill = Abundance), alpha = 0.5) +
geom_tile(data = df2, aes(x = MD1, y = MD2, fill = Abundance), alpha = 0.5)