如何根据纬度和经度对事件进行分组/分类

时间:2018-06-26 09:40:13

标签: python r cluster-analysis geospatial binning

我正在分析一些与犯罪有关的数据。我具有犯罪类型,温度,季节,纬度,经度等特征。我的最终目标是预测哪些是未来犯罪的热点。

这些事迹大约有50万。我本来想将纬度和经度归类,然后在每个归类中预测每种犯罪的可能性。

我尝试了以下方法,使用R将5000 mtrs作为半径进行分箱。不胜感激,如果您可以让我知道更好的方法(R / Python)来打包lat-long

mydata DF包含犯罪发生的时间很长 村庄DF包含两个地区下的所有村庄

在这里,我试图找出距离每个村庄约5公里的所有罪行。稍后,我可以使用仍需探索的距离进行分类。.但是,如果有人可以帮助我提供一种更好,更简单的方法,我将不胜感激。

  df=data.frame()
  df1=data.frame()

  point_A <- data.frame("long"=mydata$longitude[1],"lat"=mydata$latitude[1]) %>% SpatialPoints(proj4string = CRS("+init=epsg:4326"))

  point_B <- data.frame("id"=id_village,"long"=villages$Longitude , "lat" = villages$Latitude)
  point_B <- SpatialPointsDataFrame(coords=point_B[,c("long","lat")],data=point_B,proj4string = CRS(proj4string(point_A)))


  point_A <- spTransform(point_A,CRS("+init=epsg:3228")) 
  Point_A_buffer <- gBuffer(point_A,width = radius,quadsegs = 100)
  point_A <- spTransform(point_A,CRS("+init=epsg:4326")) 
  Point_A_buffer <- spTransform(Point_A_buffer,CRS("+init=epsg:4326")) 

  point_in_zone <- point_B %over% Point_A_buffer %>% data.frame("id"=point_B@data[,"id"],"test"=.)

  point_B_possibly_in_distance <- point_B[!is.na(point_in_zone$test),"id"]
  point_B_possibly_not_in_distance <- point_B[is.na(point_in_zone$test),"id"]

  if (length(point_B_possibly_in_distance) > 0){

    in_zone<-data.frame(point_B_possibly_in_distance)
    in_zone$distance<-distHaversine(up100[1,10:11],(in_zone[,3:2]),r=6371)
    in_zone<-in_zone[order(in_zone$dist),]

    df=data.frame(in_zone[1,"distance"])
    colnames(df)<-"distance"
    df1<-villages[in_zone[1,]$id,-c(4,5)]

  }else{

    out_zone<-data.frame(point_B_possibly_not_in_distance)
    out_zone$distance<-distHaversine(up100[1,10:11],(out_zone[,3:2]),r=6371)
    out_zone<-out_zone[order(out_zone$dist),]

    df=data.frame(out_zone[1,"distance"])
    colnames(df)<-"distance"
    df1<-villages[out_zone[1,]$id,-c(4,5)]

  }

  up100_new<-rbind(up100_new,bind_cols(up100[1,],bind_cols(df,df1)))
  up100<-up100[!mydata$event == mydata$event[1],]

  if (i %in% records){
    msg<-paste0("Processed: ",i)
    msg<-paste(msg," records...")
    print(msg)

    print('')
    print(paste0("Total Records Left: ",nrow(up100)))

  }
  i=i+1
}

0 个答案:

没有答案