Python:使用多处理模块提高我的功能速度。有些困惑

时间:2013-01-09 16:47:11

标签: python performance optimization multiprocessing

这是第二篇文章,是我previous question的一部分。

我在Python 2.7中编写了一个函数(在Window OS 64bit上),以便计算ESRI shapefile中参考多边形(Ref)和一个或多个分段(Seg)多边形的交集区域的平均值格式。代码非常慢,因为我有超过2000个参考多边形,并且对于每个Ref_polygon,函数每次运行所有Seg多边形(超过7000)。对不起,但功能是原型。

遵循David Robinson的建议

from multiprocessing import Pool
p = Pool()  # run multiple processes
for l in p.imap_unordered(process_reference_object, range(ref_layer.GetFeatureCount())):
    file_out.write(l)

TimothyAWiseman,我希望在优化方式中使用多处理,以提高我的功能速度。

我有以下问题:

  1. 在哪里找到p = Pool()....的最佳位置 第二个功能(例如:segmentation_accuracy)还是最后?
  2. 我试图在这里插入(在segmentation_accuracy的末尾)

    p = Pool()
    for l in p.imap_unordered(object_accuracy, range(ref_layer.GetFeatureCount())):
        file_out.write(l)
    file_out.close()
    

    但我的电脑正在冻结

    1. 我可以改进(我想是的)我的代码以及如何编写?

    2. from shapely.geometry import Polygon
      import math
      import numpy as np
      import osgeo.gdal
      import ogr
      import numpy
      import os
      from multiprocessing import Pool
      
      def shapefile_NameFilter(inFile):
          if inFile.endswith(".shp"):
              return inFile
          else:
              raise ValueError('"%s" is not an ESRI shapefile' % inFile)
      
      def object_accuracy(ref,seg, index,threshold=10.0,noData=-9999):
          """
          segmetation accuracy metrics
          """
          ref_layer = ref
          seg_layer = seg
      
          # convert in a shapely polygon
          ref_feature = ref_layer.GetFeature(index)
          # get FID (=Feature ID)
          FID = str(ref_feature.GetFID())
          ref_geometry = ref_feature.GetGeometryRef()
          #  exterior boundaries
          pts = ref_geometry.GetGeometryRef(0)
          points = []
          for p in xrange(pts.GetPointCount()):
              points.append((pts.GetX(p), pts.GetY(p)))
          # convert in a shapely polygon
          ref_polygon_exterior = Polygon(points)
          nHole = ref_geometry.GetGeometryCount()
          if nHole != 1:
          for h in range(1, nHole):
              #  interior boundaries or "holes" of the feature
              pts = ref_geometry.GetGeometryRef(h)
              points = []
              for p in range(pts.GetPointCount()):
                  points.append((pts.GetX(p), pts.GetY(p)))
              ref_polygon_interior = Polygon(points)
              ref_polygon_exterior = ref_polygon_exterior.difference(ref_polygon_interior)
          # Net Reference Polygon
          ref_polygon = ref_polygon_exterior
          # get the area
          ref_Area = ref_polygon.area
          # get centroid of the reference object-i
          geom, xy = ref_polygon.centroid.wkt.split(None, 1)
          xy = xy.strip('()').split()
          xcr, ycr = (float(i) for i in xy)
          # create empty lists
          nObject = 0
          Area_seg, Area_intersect = ([] for _ in range(2))
          RAor, RAos = ([] for _ in range(2))
          OverSeg, UnderSeg, OverMerg, UnderMerg = ([] for _ in range(4))
          qr, SimSize, SegError, Dsr = ([] for _ in range(4))
          # For each segmented objects-j
          for segment in xrange(seg_layer.GetFeatureCount()):
             seg_feature = seg_layer.GetFeature(segment)
             seg_geometry = seg_feature.GetGeometryRef()
             pts = seg_geometry.GetGeometryRef(0)
             points = []
             for p in xrange(pts.GetPointCount()):
                 points.append((pts.GetX(p), pts.GetY(p)))
             seg_polygon_exterior = Polygon(points)
             nHole = seg_geometry.GetGeometryCount()
             if nHole != 1:
             for h in range(1, nHole):
                  #  interior boundaries or "holes" of the feature
                  pts = seg_geometry.GetGeometryRef(h)
                  points = []
                  for p in range(pts.GetPointCount()):
                      points.append((pts.GetX(p), pts.GetY(p)))
                  seg_polygon_interior = Polygon(points)
                  seg_polygon_exterior = seg_polygon_exterior.difference(seg_polygon_interior)
             # Net Segemted Polygon
             seg_polygon = seg_polygon_exterior
             seg_Area = seg_polygon.area
             # get centroid of the segemented object-j
             geom, xy = seg_polygon.centroid.wkt.split(None, 1)
             xy = xy.strip('()').split()
             xcs, ycs = (float(i) for i in xy)
             # intersection (overlap) of reference object with the segmented object
             intersect_polygon = ref_polygon.intersection(seg_polygon)
             # area of intersection (= 0, No intersection)
             intersect_Area = intersect_polygon.area
             # Refinement in order to eliminate spurious effects
             if intersect_Area > (ref_Area*(float(threshold)/100)):
                # Union
                union_polygon = ref_polygon.union(seg_polygon)
                # area of union
                union_Area = union_polygon.area
                # Number of segmented objects
                nObject += 1
                # segmented object area
                Area_seg.append(seg_Area)
                # intersected (=overlapped) region area
                Area_intersect.append(intersect_Area)
                # Area-Based Measures
                # Relative Area of a reference object (RAor)
                RAor.append(intersect_Area/ref_Area)
                # Relative Area of a segmented object (RAos)
                RAos.append(intersect_Area/seg_Area)
                # OverSegmentation (OverSeg)
                OverSeg.append(1-(intersect_Area/ref_Area))
                # UnderSegmentation (UnderSeg)
                UnderSeg.append(1-(intersect_Area/seg_Area))
                # OverMerging (OverMerg)
                OverMerg.append((ref_Area - intersect_Area)/ref_Area)
                # UnderMerging (UnderMerg)
                UnderMerg.append((seg_Area -intersect_Area)/ref_Area)
                # Quality rate (qr)
                qr.append(1-(intersect_Area/(union_Area)))
                # SimSize
                SimSize.append(min(ref_Area,seg_Area)/max(ref_Area,seg_Area))
                # Mean Absolute Segmentation Error (SegError)
                SegError.append(abs(ref_Area - seg_Area)/(ref_Area + seg_Area))
                # Location-based Measures
                # Position discrepancy of segmented object to a reference object
                # Euclidean distance in the xy plane
                Eucdist_sr = math.sqrt(math.pow((xcs-xcr),2)+math.pow((ycs-ycr),2))
                Dsr.append(Eucdist_sr)
             # No segmented objects of intrest
             if nObject == 0:
                  AREAs_average, SDs, seg_AreaMax = (noData for _ in range(3))
                  AREAo_average, SDo, intersect_AreaMax = (noData for _ in range(3))
                  ORrs,RAor_average,RAos_average = (noData for _ in range(3))
                  OverSeg_average,UnderSeg_average  = (noData for _ in range(2))
                  OverMerg_average,UnderMerg_average = (noData for _ in range(2))
                  qr_average,SimSize_average,SegError_average, AFI = (noData for _ in range(4))
                  Dsr_avarage,RPsr_average,dmax,D = (noData for _ in range(4))
             else:
                  ORrs = (1.0/nObject)*100
                  AREAs_average = numpy.average(Area_seg)
                  SDs = numpy.std(Area_seg)
                  seg_AreaMax = numpy.max(Area_seg)
                  AREAo_average = numpy.average(Area_intersect)
                  SDo = numpy.std(Area_intersect)
                  intersect_AreaMax = numpy.max(Area_intersect)
                  # Avarage for all segmented objects
                  RAor_average = numpy.average(RAor)*100
                  RAos_average = numpy.average(RAos)*100
                  OverSeg_average = numpy.average(OverSeg)
                  UnderSeg_average = numpy.average(UnderSeg)
                  OverMerg_average = numpy.average(OverMerg)
                  UnderMerg_average = numpy.average(UnderMerg)
                  qr_average = numpy.average(qr)
                  SimSize_average = numpy.average(SimSize)
                  SegError_average = numpy.average(SegError)
                  # Area Fit Index
                  AFI = (ref_Area-seg_AreaMax)/ref_Area
                  Dsr_avarage = numpy.average(Dsr)
                  # Maximum Distance
                  dmax = numpy.max(Dsr)
                  # Avarage Realative Position (RPsr)
                  RPsr_average = numpy.average(numpy.array(Dsr)/numpy.max(Dsr))
                  # D index
                  D = math.sqrt((math.pow(OverSeg_average,2)+math.pow(UnderSeg_average,2))/2)
             return(" ".join(["%s" %i for i in [FID, ref_Area, nObject, ORrs,\
                  AREAs_average, SDs, seg_AreaMax, AREAo_average, SDo, intersect_AreaMax,\
                  RAor_average, RAos_average, OverSeg_average, UnderSeg_average,\
                  OverMerg_average, UnderMerg_average,qr_average, SimSize_average,\
                  SegError_average, AFI, Dsr_avarage, RPsr_average, dmax, D]])+ "\n")
      
      
      def segmentation_accuracy(reference,segmented,outFile,threshold=10.0,noData=-9999):
          """
          Segmentation accuracy
      
          """
          # check if reference and segmented are ESRI shapefile format
          reference = shapefile_NameFilter(reference)
          segmented = shapefile_NameFilter(segmented)
          # open shapefile
          ref = osgeo.ogr.Open(reference)
          if ref is None:
              raise SystemExit('Unable to open %s' % reference)
          seg = osgeo.ogr.Open(segmented)
          if seg is None:
              raise SystemExit('Unable to open %s' % segmented)
          ref_layer = ref.GetLayer()
          seg_layer = seg.GetLayer()
          # create outfile
          if not os.path.split(outFile)[0]:
              file_path, file_name_ext = os.path.split(os.path.abspath(reference))
              outFile_filename = os.path.splitext(os.path.basename(outFile))[0]
              file_out = open(os.path.abspath("{0}\\{1}.txt".format(file_path, outFile_filename)), "w")
          else:
              file_path_name, file_ext = os.path.splitext(outFile)
              file_out = open(os.path.abspath("{0}.txt".format(file_path_name)), "w")
          # Header
          file_out.write(" ".join(["%s" %i for i in ["ReferenceFID","AREAr",\
          "nObject","ORrs","AREAs","SDs","AREAsMAX","AREAo","SDo","AREAoMAX",\
          "RAor","RAos","OverSeg","UnderSeg","OverMerg","UnderMerg","qr","SimSize",\
          "SegError","AFI","Dsr","RPro","dmax","D"]])+ "\n")
          for index in xrange(ref_layer.GetFeatureCount()):
              file_out.write(object_accuracy(index))
          file_out.close()
      

0 个答案:

没有答案