我正在分析一些与犯罪有关的数据。我具有犯罪类型,温度,季节,纬度,经度等特征。我的最终目标是预测哪些是未来犯罪的热点。
这些事迹大约有50万。我本来想将纬度和经度归类,然后在每个归类中预测每种犯罪的可能性。
我尝试了以下方法,使用R将5000 mtrs作为半径进行分箱。不胜感激,如果您可以让我知道更好的方法(R / Python)来打包lat-long
mydata DF包含犯罪发生的时间很长 村庄DF包含两个地区下的所有村庄
在这里,我试图找出距离每个村庄约5公里的所有罪行。稍后,我可以使用仍需探索的距离进行分类。.但是,如果有人可以帮助我提供一种更好,更简单的方法,我将不胜感激。
df=data.frame()
df1=data.frame()
point_A <- data.frame("long"=mydata$longitude[1],"lat"=mydata$latitude[1]) %>% SpatialPoints(proj4string = CRS("+init=epsg:4326"))
point_B <- data.frame("id"=id_village,"long"=villages$Longitude , "lat" = villages$Latitude)
point_B <- SpatialPointsDataFrame(coords=point_B[,c("long","lat")],data=point_B,proj4string = CRS(proj4string(point_A)))
point_A <- spTransform(point_A,CRS("+init=epsg:3228"))
Point_A_buffer <- gBuffer(point_A,width = radius,quadsegs = 100)
point_A <- spTransform(point_A,CRS("+init=epsg:4326"))
Point_A_buffer <- spTransform(Point_A_buffer,CRS("+init=epsg:4326"))
point_in_zone <- point_B %over% Point_A_buffer %>% data.frame("id"=point_B@data[,"id"],"test"=.)
point_B_possibly_in_distance <- point_B[!is.na(point_in_zone$test),"id"]
point_B_possibly_not_in_distance <- point_B[is.na(point_in_zone$test),"id"]
if (length(point_B_possibly_in_distance) > 0){
in_zone<-data.frame(point_B_possibly_in_distance)
in_zone$distance<-distHaversine(up100[1,10:11],(in_zone[,3:2]),r=6371)
in_zone<-in_zone[order(in_zone$dist),]
df=data.frame(in_zone[1,"distance"])
colnames(df)<-"distance"
df1<-villages[in_zone[1,]$id,-c(4,5)]
}else{
out_zone<-data.frame(point_B_possibly_not_in_distance)
out_zone$distance<-distHaversine(up100[1,10:11],(out_zone[,3:2]),r=6371)
out_zone<-out_zone[order(out_zone$dist),]
df=data.frame(out_zone[1,"distance"])
colnames(df)<-"distance"
df1<-villages[out_zone[1,]$id,-c(4,5)]
}
up100_new<-rbind(up100_new,bind_cols(up100[1,],bind_cols(df,df1)))
up100<-up100[!mydata$event == mydata$event[1],]
if (i %in% records){
msg<-paste0("Processed: ",i)
msg<-paste(msg," records...")
print(msg)
print('')
print(paste0("Total Records Left: ",nrow(up100)))
}
i=i+1
}