在R中创建空间簇LISA的映射

时间:2016-06-06 18:45:17

标签: r plot ggplot2 geospatial spdep

我想创建一个显示现象的局部空间聚类的地图,最好使用Local Moran(LISA)。

在下面的可重复示例中,我使用spdep计算本地moran的索引,但我想知道是否有简单的方法来映射群集,优选使用ggplot2。帮忙?

library(UScensus2000tract)
library(ggplot2)
library(spdep)

# load data
data("oregon.tract")

# plot Census Tract map
plot(oregon.tract)

# create  Queens contiguity matrix
spatmatrix <- poly2nb(oregon.tract)

#calculate the local moran of the distribution of black population
lmoran <- localmoran(oregon.tract@data$black, nb2listw(spatmatrix))

现在为了使这个例子与我的真实数据集更相似,我的形状文件中有一些NA值,它们代表多边形中的孔,因此这些区域不应该用于计算。

oregon.tract@data$black[3:5] <- NA

2 个答案:

答案 0 :(得分:2)

这是一个策略:

library(UScensus2000tract)
library(spdep)
library(ggplot2)
library(dplyr)

# load data
data("oregon.tract")
# plot Census Tract map
plot(oregon.tract)

# create Queens contiguity matrix
spatmatrix <- poly2nb(oregon.tract)

# create a neighbours list with spatial weights
listw <- nb2listw(spatmatrix)

# calculate the local moran of the distribution of white population
lmoran <- localmoran(oregon.tract$white, listw)
summary(lmoran)

# padronize the variable and save it to a new column
oregon.tract$s_white <- scale(oregon.tract$white)  %>% as.vector()

# create a spatially lagged variable and save it to a new column
oregon.tract$lag_s_white <- lag.listw(listw, oregon.tract$s_white)

# summary of variables, to inform the analysis
summary(oregon.tract$s_white)
summary(oregon.tract$lag_s_white)

# moran scatterplot, in basic graphics (with identification of influential observations)
x <- oregon.tract$s_white
y <- oregon.tract$lag_s_white %>% as.vector()
xx <- data.frame(x, y)

moran.plot(x, listw)

# moran sccaterplot, in ggplot 
# (without identification of influential observations - which is possible but requires more effort)
ggplot(xx, aes(x, y)) + geom_point() + geom_smooth(method = 'lm', se = F) + geom_hline(yintercept = 0, linetype = 'dashed') + geom_vline(xintercept = 0, linetype = 'dashed') 

# create a new variable identifying the moran plot quadrant for each observation, dismissing the non-significant ones
oregon.tract$quad_sig <- NA

# high-high quadrant
oregon.tract[(oregon.tract$s_white >= 0 & 
                 oregon.tract$lag_s_white >= 0) & 
                (lmoran[, 5] <= 0.05), "quad_sig"] <- "high-high"
# low-low quadrant
oregon.tract[(oregon.tract$s_white <= 0 & 
                 oregon.tract$lag_s_white <= 0) & 
                (lmoran[, 5] <= 0.05), "quad_sig"] <- "low-low"
# high-low quadrant
oregon.tract[(oregon.tract$s_white >= 0 & 
                 oregon.tract$lag_s_white <= 0) & 
                (lmoran[, 5] <= 0.05), "quad_sig"] <- "high-low"
# low-high quadrant
oregon.tract@data[(oregon.tract$s_white <= 0 
               & oregon.tract$lag_s_white >= 0) & 
                (lmoran[, 5] <= 0.05), "quad_sig"] <- "low-high"
# non-significant observations
oregon.tract@data[(lmoran[, 5] > 0.05), "quad_sig"] <- "not signif."  

oregon.tract$quad_sig <- as.factor(oregon.tract$quad_sig)
oregon.tract@data$id <- rownames(oregon.tract@data)

# plotting the map
df <- fortify(oregon.tract, region="id")
df <- left_join(df, oregon.tract@data)
df %>% 
  ggplot(aes(long, lat, group = group, fill = quad_sig)) + 
  geom_polygon(color = "white", size = .05)  + coord_equal() + 
  theme_void() + scale_fill_brewer(palette = "Set1")

此答案基于this page建议的Eli Knaap on twitter,并且还借用@timelyportfolio对此问题的回答。

我使用变量white代替black,因为black的结果不太明确。

关于NAs,localmoran()包含参数na.action,文档说明了这一点:

  

na.action是一个函数(默认为na.fail),也可以是na.omit或&gt; na.exclude - 在这些情况下,权重列表将被子集化以删除数据中的NA。可能需要将zero.policy设置为TRUE,因为此子集可能会创建无邻居观察。请注意,只有在不使用glist参数到nb2listw的情况下创建的权重列表才可以进行子集化。如果使用na.pass,则在计算空间滞后时用零代替NA值。

我试过了:

oregon.tract@data$white[3:5] <- NA
lmoran <- localmoran(oregon.tract@data$white, listw, zero.policy = TRUE, 
                 na.action = na.exclude)

但在lag.listw中遇到问题,但没有时间研究它。遗憾。

答案 1 :(得分:0)

我认为这个答案不值得赏心悦目,但也许它会让你更接近答案。由于我对IF OBJECT_ID('tempdb.dbo.#Results') IS NOT NULL BEGIN DROP TABLE #Results; END CREATE TABLE #Results (ID INT NOT NULL PRIMARY KEY, Father1ID INT); WITH CteRec AS ( -- It returns Father only rows SELECT l1.ID, l1.Son, l1.Father, CONVERT(VARCHAR(900), '/'+LTRIM(l1.ID)+'/') AS Node -- FamilyTree FROM #PPL AS l1 -- First level WHERE NOT EXISTS(SELECT * FROM #PPL p WHERE p.Son = l1.Father) UNION ALL -- It returns Son only and Son-Father rows SELECT ln.ID, ln.Son, ln.Father, CONVERT(VARCHAR(900), prt.Node+LTRIM(ln.ID)+'/') AS Node -- FamilyTree FROM #PPL AS ln -- Next level JOIN CteRec AS prt ON prt.Son = ln.Father ) INSERT #Results (ID, Father1ID) SELECT ID, Father1ID = CONVERT(INT,REPLACE(CONVERT(HIERARCHYID, Node).GetAncestor(CONVERT(HIERARCHYID, Node).GetLevel()-1).ToString(),'/','')) FROM CteRec; SELECT p.*, r.Father1ID, rp.Father AS Father1Name FROM #PPL p INNER JOIN #Results r ON p.ID = r.ID INNER JOIN #PPL rp ON r.Father1ID = rp.ID -- Also you ca use #Result with UPDATE statement but I would store this values within new column Father1 一无所知,所以我猜错了。

localmoran