通过网格方格汇总样本

时间:2013-11-28 14:51:59

标签: r intersect r-raster

我已经分配了一个我想在R中尝试的特定任务。要导入的shapefile(SpatialPoints df)包含多个属性,但最重要的是,特定点坐标的商业捕获权重(lat / lon) 。

我需要一个脚本:

1)创建一个网格(可以修改大小和单位) 2)与导入的文件相交,以便通过网格方格汇总样本(平均值,标准差,范围等)。

我可以通过ArcGIS这样做,但我对通过R修改网格大小和重复使用算法的方便感兴趣。下面是一个使用数据的简短示例。

有人知道怎么做吗?

   ENT_LATITU ENT_LONGIT   CSK
     415300     654400  195.954
     430100     622200   21.228
     442300     631000  232.423
     424700     642300   77.837
     442800     630600  154.586
     424600     642900    9.253

3 个答案:

答案 0 :(得分:3)

我建议使用raster&用于汇总网格单元格中的点的sp个包。下面的代码可以帮助您入门。这允许您设置行数和列数,如果您想要设置单元格的大小,则不难修改它。

library(sp)
library(raster)

#recreate a sample of your data
dF <- data.frame(ENT_LATITU=c(415300,430100,442300,424700),ENT_LONGIT=c(654400,622200,631000,642300), CSK=c(195,21,232,77))

nameLon <- "ENT_LATITU"
nameLat <- "ENT_LONGIT"

#put points into a 'SpatialPointsDataFrame' 'sp' object
coords <- cbind(x=dF[[nameLon]],y=dF[[nameLat]])
sPDF <- SpatialPointsDataFrame(coords,data=dF)

#set number of rows & columns in the grid
nrows <- 3
ncols <- 3

#setting extents from the data
xmn <- min(dF[[nameLon]]) 
ymn <- min(dF[[nameLat]]) 
xmx <- max(dF[[nameLon]]) 
ymx <- max(dF[[nameLat]]) 

#create a grid
blankRaster <- raster(nrows=nrows, ncols=ncols, xmn=xmn, xmx=xmx, ymn=ymn, ymx=ymx)
#adding data into raster to avoid 'no data' error
blankRaster[] <- 1:ncell(blankRaster)

#calc mean (or other function) of points per cell 
rasterMeanPoints <- rasterize(x=sPDF, y=blankRaster, field='CSK', fun=mean)

#plot to get an idea whether it's doing the right thing
plot(rasterMeanPoints)
text(sPDF,sPDF$CSK)

答案 1 :(得分:1)

我认为你应该使用ggplot2&#39; s geom_raster()。这是使用一些合成数据的示例。我首先创建了一个30x30网格,然后展示了如何将其减少到任何x / y聚合。

require(ggplot2)
require(plyr)

## CREATE REASONABLE SIZE GRID 30x30
dfe<-expand.grid(ENT_LATITU=seq(415000,418000,100),
            ENT_LONGIT=seq(630000,633000,100),
            CSK=0)
## FILL WITH RANDOM DATA
dfe$CSK=round(rnorm(nrow(dfe),200,50),0)

#######################################################
#####  VALUES TO CHANGE IN THIS BLOCK             #####
#######################################################
## TRIM ORIGINAL DATASET
lat.max<-Inf       # change items to trim data
lat.min<-0       
long.max<-Inf    
long.min<-631000      
dfe.trim<-dfe[findInterval(dfe$ENT_LATITU,c(lat.min,lat.max))*findInterval(dfe$ENT_LONGIT,c(long.min,long.max))==1,]
## SUMMARIZE TO NEW X/Y GRID
xblocks<-6
yblocks<-8

## GRAPH COLOR AND TEXT CONTROLS
showText<-TRUE
txtSize<-3
heatmap.low<-"lightgreen"
heatmap.high<-"orangered"
#######################################################
#####                                             #####
#######################################################

## BASIC PLOT (ALL DATA POINTS)
ggplot(dfe) +
  geom_raster(aes(ENT_LATITU,ENT_LONGIT,fill=CSK)) + theme_bw() +
  scale_fill_gradient(low=heatmap.low, high=heatmap.high) +
  geom_text(aes(ENT_LATITU,ENT_LONGIT,label=CSK,fontface="bold"),
            color="black",
            size=2.5) 

基本情节:

enter image description here

然后汇总的情节:

## CALL ddply to roll-up the data and calculate summary means, SDs,ec
dfe.plot<-ddply(dfe.trim,
      .(lat=cut(dfe.trim$ENT_LATITU,xblocks),
        long=cut(dfe.trim$ENT_LONGIT,yblocks)),
      summarize,
      mean=mean(CSK),
      sd=sd(CSK),
      sum=sum(CSK),
      range=paste(min(CSK),max(CSK),sep="-"))

## BUILD THE SUMMARY CHART
g<-ggplot(dfe.plot) +
  geom_raster(aes(lat,long,fill=sum),alpha=0.75) +
  scale_fill_gradient(low=heatmap.low, high=heatmap.high) +
  theme_bw() + theme(axis.text.x=element_text(angle=-90)) +
  ggtitle(paste(xblocks,
                " X ",
                yblocks,
                " grid of Catch Data\nbetween ( ",
                min(dfe.trim$ENT_LATITU),
                " : ",
                min(dfe.trim$ENT_LONGIT),
                " ) and ( ",
                max(dfe.trim$ENT_LATITU),
                " : ",
                max(dfe.trim$ENT_LONGIT),
                " )\n\n",
                sep=""))

## ADD THE LABELS IF NEEDED
if(showText)g<-g+geom_text(aes(lat,long,label=paste("SUM=",round(sum,0),
                                            "\nMEAN=",round(mean,1),
                                            "\nSD=",round(sd,1),
                                            "\nRNG=",range,sep=""),
                                  fontface=c("italic")),
                                  color="black",size=txtSize)

## FUDGE THE LABELS TO MAKE MORE READABLE
## REPLACE "," with newline and "]" with ")"
g$data[,1:2]<-gsub("[,]",replacement=" to\n",x=as.matrix(g$data[,1:2]))
g$data[,1:2]<-gsub("]",replacement=")",x=as.matrix(g$data[,1:2]))

## PLOT THE CHART
g + labs(x="\nLatitude", y="Longitude\n", fill="Sum\nBlock\n")

## SHOW HEADER OF data.plot
head(dfe.plot)

enter image description here

答案 2 :(得分:0)

如果你有一个名为'dat'的数据对象,你希望在通过在它们的中点分割这两组坐标而形成的范围内应用函数mean

# dput(dat)
dat <- 
structure(list(ENT_LATITU = c(415300L, 430100L, 442300L, 424700L, 
442800L, 424600L), ENT_LONGIT = c(654400L, 622200L, 631000L, 
642300L, 630600L, 642900L), CSK = c(195.954, 21.228, 232.423, 
77.837, 154.586, 9.253)), .Names = c("ENT_LATITU", "ENT_LONGIT", 
"CSK"), class = "data.frame", row.names = c(NA, -6L))

with(dat, tapply(CSK, list(lat.cut=cut(ENT_LATITU, 2), 
                           lon.cut=cut( ENT_LONGIT ,2)), 
                      mean))
#--------------------------------
                     lon.cut
lat.cut               (6.22e+05,6.38e+05] (6.38e+05,6.54e+05]
  (4.15e+05,4.29e+05]                  NA              94.348
  (4.29e+05,4.43e+05]             136.079                  NA

这为您提供了一个表对象(继承自matrix-class)。