我有两个数据集,一个用于芝加哥的犯罪,标有经度和纬度坐标,以及芝加哥的人口普查区块。鉴于这两个文件,R是否有可能在人口普查区块内汇总犯罪?目的是能够通过人口普查阻止制定犯罪。
下载芝加哥人口普查区数据的位置: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Blocks-2000/uktd-fzhd
下载犯罪数据的位置: https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2
我从另一个项目中删除了一些代码。当它通过时,有一个人口普查区信息的空间对象和一个包含犯罪数据的数据框,包括lon / lat coords:
library(rgdal)
library(dplyr)
#Helper function to reduce crime data to single year and limit to variables of interest
yearReduce <- function(rawData=NULL,year=NULL) {
datout <- data.frame(year = numeric(0), community = numeric(0), type = numeric(0), arrest = numeric(0),
Latitude = numeric(0), longitude = numeric(0))
dat <- rawData[rawData$Year==year,]
datout <- data.frame(year = dat$Year, community = as.numeric(dat$Community.Area), type = dat$Primary.Type, arrest = dat$Arrest,
latitude = dat$Latitude, longitude = dat$Longitude)
datout
}
#Load crime data
crimedata <- '~/Documents/data/Crimes_-_2001_to_present.csv'
mydata_crime <- read.csv(crimedata,na.strings = c("", " ", "NA"), stringsAsFactors=F)
mydata_crime$Primary.Type <- tolower(mydata_crime$Primary.Type)
#Set cwd to location of the census tract shape file
setwd('~/Documents/data/Boundaries_-_Census_Blocks_-_2010/')
#Create spatial vector object and transform projection
tract = readOGR(".","CensusBlockTIGER2010") %>% spTransform(CRS("+proj=longlat +datum=WGS84"))
### Process crime data to narrow to single year###
crime2010 <- yearReduce(mydata_crime,'2010')
# further select specific crime(s). Fairly limited for testing purposes
violent_crimes <- subset(crime2010,
type == "homicide")
violent_crimes <- violent_crimes[complete.cases(violent_crimes),] #Clean data a little bit
感谢您提供的任何帮助。
帕特里克
答案 0 :(得分:1)
#Load libraries
library(rgdal)
library(sp)
library(raster)
#Set my wd
setwd('~/Dropbox/rstats/r_blog_home/stack_o/')
#Load crime data
my_crime <- read.csv(file='spat_aggreg/Crimes_2001_to_present.csv',stringsAsFactors=F)`
my_crime$Primary.Type <- tolower(my_crime$Primary.Type)
#Select variables of interest and subset by year and type of crime
#Note, yearReduce() not necessary at all: check R documentation before creating own functions
my_crime <- data.frame(year=my_crime$Year, community=my_crime$Community.Area,
type=my_crime$Primary.Type, arrest=my_crime$Arrest,
latitude=my_crime$Latitude, longitude=my_crime$Longitude)
vc <- subset(my_crime, year==2010, type=="homicide")
#Keep only complete cases
vc <- vc[complete.cases(vc), ]
#Load census tract data
#Note: function `shapefile` is a neater than `readOGR()`
#Note: The usage of `@` to access attribute data tables associated with spatial objects in R
tract <- shapefile('spat_aggreg/census_blocks_2000/Census_Blocks.shp')
tract <- spTransform(x=tract, CRSobj=CRS("+proj=longlat +datum=WGS84"))
names(tract@data) <- tolower(names(tract@data))
#Convert crime data to a spatial points object
vc <- SpatialPointsDataFrame(coords=vc[, c("longitude", "latitude")],
data=vc[, c("year", "community", "type", "arrest")],
proj4string=CRS("+proj=longlat +datum=WGS84"))
#Each row entry represents one homicide, so, add count column
vc@data$count <- 1
#Spatial overlay to identify census polygon in which each crime point falls
#The Result `vc_tract` is a dataframe with the tract data for each point
vc_tract <- over(x=vc, y=tract)
#Add tract data to crimePoints
vc@data <- data.frame(vc@data, vc_tract)
#Aggregate homicides by tract (assuming the column "census_tra" is the unique ID for census tracts)
hom_tract <- aggregate(formula=count~census_tra, data=vc@data, FUN=length)
#Add number of homicides to tracts object
m <- match(x=tract@data$census_tra, table=hom_tract$census_tra)
tract@data$homs_2010 <- hom_tract$count[m]
现在,您的人口普查区域(在名为tract
的spatialPolygonDataframe对象中)包含一个名为homs_2010
的列,其中包含每个区域的凶杀案数量。从那里,映射它应该是轻而易举的。