我有两个文件,一个带点和它的id。另一个有多边形及其id和更多列。我试图获得与每个点相关联的多边形id。我有近170万个点和50000个多边形。多边形检查中的点在IPython笔记本中永远运行,有没有办法在这里优化我的代码,或者可以使用多处理模块并进行并行处理以加快多边形检查中的点?
首先阅读多边形文件。
import shapefile
import shapely
polygons_sf = shapefile.Reader("ploygonshapefile.shp")
polygon_shapes = polygons_sf.shapes()
polygon_points = [q.points for q in polygon_shapes]
from shapely.geometry import Polygon
polygons = [Polygon(q) for q in polygon_points]
polygons_sf_records = polygons_sf.records() #To get the polygon id
然后为多边形
创建rtree索引from rtree import index
idx = index.Index() #creating an rtree
count = -1
for a,b in zip(polygon_shapes,polygons_sf_records):
count +=1
idx.insert(count,a.bbox,obj=b[1])
然后加载点文件。
points_sf = shapefile.Reader("pointshapefile.shp")
point_shapes = points_sf.iterShapes()
#Creating points as list to iterate
point_coords= [q.points[0] for q in point_shapes]
from shapely.geometry import Point
points = [Point(q.points[0]) for q in point_shapes]
points_sf_records = points_sf.records() #To get the point id
然后尝试找出每个点的多边形id。
matches = []
for i in range(len(points)): #Iterate through each point
temp= None
#Iterate only through the bounding boxes which contain the point
for j in idx.intersection(point_coords[i]):
#Verify that point is within the polygon itself not just the bounding box
if points[i].within(polygons[j]):
temp=j
break
matches.append({'Point_id' : points_sf_recods[i][0],'Polygon_id' : polygons_sf_records[j][1]}) #Either the first match found, or None for no matches