Question

我是新尝试实现并行Python（PP）或async到多进程arcgis shapefile剪辑。我在pool_async和PP方面都取得了成功;然而，它永远停留在大文件上（是的，我试图让python访问大地址）。这是我使用PP的代码，请提供任何解决方案，如果有:-)

，请注意显示错误

def ClipDo(F,M,O,OW = ""):

#for F in F:
    print "\n"+"PID:%s"%(os.getpid())

    arcpy.env.overwriteOutput = False

    if OW == "":
        pass
    else:
        arcpy.env.overwriteOutput = True

    FPath = os.path.dirname(F)
    F = os.path.basename(F)
    ClipList = []
    pattern = '*.shp'

    for filename in M:
            ClipList.append(filename)
            clipN = str(os.path.splitext(os.path.basename(filename))[0])
            if not os.path.isdir(O+"/"+clipN+"/"):
                os.makedirs(O+"/"+clipN+"/")

    #Counts files in clip directory
    count = len(ClipList)
    for num in range(0,count):

        clip = ClipList[num]

        clipN = str(os.path.splitext(os.path.basename(clip))[0])

        OutShp = clipN +"_"+ F

        try:
            print "Clipping, Base File: %s Clip File: %s Output: %s" % (F,clip,O+"\\"+OutShp)
            arcpy.Clip_analysis(os.path.join(FPath,F),os.path.join(M,clip), os.path.join(os.path.join(O+"\\",clipN),OutShp))
            print "Clipping SUCCESS "

        except:
            print "Clipping FAILED "  +F


def PP(F,M,O,OW):
    print F
    #~ # tuple of all parallel python servers to connect with
    ncpus = 6
    ncpus = ncpus
    ppservers = ("localhost",)
    #~ #ppservers = ("10.0.0.1",)

    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        # Creates jobserver with ncpus workers
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        #~ # Creates jobserver with automatically detected number of workers
        job_server = pp.Server(ncpus,ppservers=ppservers)

    print "Starting pp with", job_server.get_ncpus(), "workers"

    jobs = []
    start_time = time.time()

    for f in F:

        job = job_server.submit(ClipDo, (f,M,O,OW),(),  ("arcpy","NullGeomFilter"))
        jobs.append(job)

    for job in jobs:
         result = job()
         print result
         if result:
            break

    job_server.destroy()    

    print "\n"+"PID:%s"%(os.getpid())

    print "Time elapsed: ", time.time() - start_time, "s"

Answer 1

难道你的大块对于arcpy而言太大了并且并行化不是问题吗？

作为测试，通过您的函数以交互方式/本地运行大数据来运行其中一个arg列表可能会很好，看看它是否正常工作。如果是，那么您可以继续记录和调试并行版本。

Python多处理arcgis shapefile与PP或异步停滞在大文件上

1 个答案: