我想创建一个显示现象的局部空间聚类的地图,最好使用Local Moran(LISA)。
在下面的可重复示例中,我使用spdep
计算本地moran的索引,但我想知道是否有简单的方法来映射群集,优选使用ggplot2
。帮忙?
library(UScensus2000tract)
library(ggplot2)
library(spdep)
# load data
data("oregon.tract")
# plot Census Tract map
plot(oregon.tract)
# create Queens contiguity matrix
spatmatrix <- poly2nb(oregon.tract)
#calculate the local moran of the distribution of black population
lmoran <- localmoran(oregon.tract@data$black, nb2listw(spatmatrix))
现在为了使这个例子与我的真实数据集更相似,我的形状文件中有一些NA
值,它们代表多边形中的孔,因此这些区域不应该用于计算。
oregon.tract@data$black[3:5] <- NA
答案 0 :(得分:2)
这是一个策略:
library(UScensus2000tract)
library(spdep)
library(ggplot2)
library(dplyr)
# load data
data("oregon.tract")
# plot Census Tract map
plot(oregon.tract)
# create Queens contiguity matrix
spatmatrix <- poly2nb(oregon.tract)
# create a neighbours list with spatial weights
listw <- nb2listw(spatmatrix)
# calculate the local moran of the distribution of white population
lmoran <- localmoran(oregon.tract$white, listw)
summary(lmoran)
# padronize the variable and save it to a new column
oregon.tract$s_white <- scale(oregon.tract$white) %>% as.vector()
# create a spatially lagged variable and save it to a new column
oregon.tract$lag_s_white <- lag.listw(listw, oregon.tract$s_white)
# summary of variables, to inform the analysis
summary(oregon.tract$s_white)
summary(oregon.tract$lag_s_white)
# moran scatterplot, in basic graphics (with identification of influential observations)
x <- oregon.tract$s_white
y <- oregon.tract$lag_s_white %>% as.vector()
xx <- data.frame(x, y)
moran.plot(x, listw)
# moran sccaterplot, in ggplot
# (without identification of influential observations - which is possible but requires more effort)
ggplot(xx, aes(x, y)) + geom_point() + geom_smooth(method = 'lm', se = F) + geom_hline(yintercept = 0, linetype = 'dashed') + geom_vline(xintercept = 0, linetype = 'dashed')
# create a new variable identifying the moran plot quadrant for each observation, dismissing the non-significant ones
oregon.tract$quad_sig <- NA
# high-high quadrant
oregon.tract[(oregon.tract$s_white >= 0 &
oregon.tract$lag_s_white >= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "high-high"
# low-low quadrant
oregon.tract[(oregon.tract$s_white <= 0 &
oregon.tract$lag_s_white <= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "low-low"
# high-low quadrant
oregon.tract[(oregon.tract$s_white >= 0 &
oregon.tract$lag_s_white <= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "high-low"
# low-high quadrant
oregon.tract@data[(oregon.tract$s_white <= 0
& oregon.tract$lag_s_white >= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "low-high"
# non-significant observations
oregon.tract@data[(lmoran[, 5] > 0.05), "quad_sig"] <- "not signif."
oregon.tract$quad_sig <- as.factor(oregon.tract$quad_sig)
oregon.tract@data$id <- rownames(oregon.tract@data)
# plotting the map
df <- fortify(oregon.tract, region="id")
df <- left_join(df, oregon.tract@data)
df %>%
ggplot(aes(long, lat, group = group, fill = quad_sig)) +
geom_polygon(color = "white", size = .05) + coord_equal() +
theme_void() + scale_fill_brewer(palette = "Set1")
此答案基于this page建议的Eli Knaap on twitter,并且还借用@timelyportfolio对此问题的回答。
我使用变量white
代替black
,因为black
的结果不太明确。
关于NAs,localmoran()
包含参数na.action
,文档说明了这一点:
na.action是一个函数(默认为na.fail),也可以是na.omit或&gt; na.exclude - 在这些情况下,权重列表将被子集化以删除数据中的NA。可能需要将zero.policy设置为TRUE,因为此子集可能会创建无邻居观察。请注意,只有在不使用glist参数到nb2listw的情况下创建的权重列表才可以进行子集化。如果使用na.pass,则在计算空间滞后时用零代替NA值。
我试过了:
oregon.tract@data$white[3:5] <- NA
lmoran <- localmoran(oregon.tract@data$white, listw, zero.policy = TRUE,
na.action = na.exclude)
但在lag.listw
中遇到问题,但没有时间研究它。遗憾。
答案 1 :(得分:0)
我认为这个答案不值得赏心悦目,但也许它会让你更接近答案。由于我对IF OBJECT_ID('tempdb.dbo.#Results') IS NOT NULL
BEGIN
DROP TABLE #Results;
END
CREATE TABLE #Results (ID INT NOT NULL PRIMARY KEY, Father1ID INT);
WITH CteRec
AS (
-- It returns Father only rows
SELECT l1.ID, l1.Son, l1.Father, CONVERT(VARCHAR(900), '/'+LTRIM(l1.ID)+'/') AS Node -- FamilyTree
FROM #PPL AS l1 -- First level
WHERE NOT EXISTS(SELECT * FROM #PPL p WHERE p.Son = l1.Father)
UNION ALL
-- It returns Son only and Son-Father rows
SELECT ln.ID, ln.Son, ln.Father, CONVERT(VARCHAR(900), prt.Node+LTRIM(ln.ID)+'/') AS Node -- FamilyTree
FROM #PPL AS ln -- Next level
JOIN CteRec AS prt ON prt.Son = ln.Father
)
INSERT #Results (ID, Father1ID)
SELECT ID,
Father1ID = CONVERT(INT,REPLACE(CONVERT(HIERARCHYID, Node).GetAncestor(CONVERT(HIERARCHYID, Node).GetLevel()-1).ToString(),'/',''))
FROM CteRec;
SELECT p.*, r.Father1ID, rp.Father AS Father1Name
FROM #PPL p
INNER JOIN #Results r ON p.ID = r.ID
INNER JOIN #PPL rp ON r.Father1ID = rp.ID
-- Also you ca use #Result with UPDATE statement but I would store this values within new column Father1
一无所知,所以我猜错了。
localmoran