在python中使用多处理程序包时不完整的gdal,NumPy操作

时间:2018-08-09 14:08:45

标签: python numpy parallel-processing multiprocessing

这是我的python代码,它在其中读取栅格堆栈并进行Numpy操作并将图像重新写入另一个文件夹。

from osgeo import gdal 
import numpy as np
from os import listdir
import os
from scipy.stats import skew
import pandas as pd
import pysal as ps

def skewness(x):
    x = x1
    x = x[~np.isnan(x)]
    n = x.shape[0]  
    numer = sum((x - np.mean(x))**3)*(n**-1)
    denom = (np.var(x))**1.5
    return(numer/denom)

filnam = listdir("/media/root/Seagate Backup Plus Drive/movement data/python_crop/")
pathname = "/media/root/Seagate Backup Plus Drive/movement data/python_crop/"
csvpath = "/media/root/Seagate Backup Plus Drive/movement data/python_csv/"

def par_ews(j):

    filename = pathname + filnam[j]
    writedir = "/media/root/Seagate Backup Plus Drive/movement data/python_crop1/" + filnam[j][:-4].strip() + "/"
    os.makedirs(writedir)
    filehandle = gdal.Open(filename)

    geotransform = filehandle.GetGeoTransform()
    geoproj = filehandle.GetProjection()
    Z = filehandle.ReadAsArray()
    Z = np.array(Z)
    Z[Z<0]= np.nan
    Z = Z/10000
    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    (xres,yres) = [Z.shape[1], Z.shape[2]]
    dst_datatype = gdal.GDT_Float32

    var=np.zeros(Z.shape[0])
    skew=np.zeros(Z.shape[0])
    moran=np.zeros(Z.shape[0])

    csvfile = csvpath + filnam[j][:-4].strip() + '.csv'

    for i in range(Z.shape[0]):

    writefile = writedir+ 'band' + str(i+1) +'.tif'
    print(writefile)
    dst_ds = driver.Create(writefile,yres,xres,1,dst_datatype)
    print('driver created')
    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(geoproj)
    dst_ds.GetRasterBand(1).WriteArray(Z[i])
    dst_ds.FlushCache()

    x = Z[i]
    x1 = np.reshape(x, x.shape[0]*x.shape[1])
    num = np.count_nonzero(~np.isnan(x1))/x1.shape[0]

    print(i)

    if(num > 0.9):
        x2 = x1[~np.isnan(x1)] 
        var[i]=np.var(x2)
        print("Variance")
        skew[i]=skewness(x2)
        print("skewness")
        w = ps.lat2W(Z.shape[1],Z.shape[2], rook=False, id_type="int")
        print("weights")
        lm = ps.Moran(Z[i], w)
        print("moran")
        moran[i]=lm.I


    dat = np.column_stack((var,skew,moran))
    df = pd.DataFrame({'variance':dat[:,0],'skew':dat[:,1], 'Moran':dat[:,2]})
    df.to_csv(csvfile, sep='\t', encoding='utf-8')
    print(j)

我已经使用多重处理对此进行了并行化处理:

import multiprocessing as mp
pool = mp.Pool(processes=4)
[pool.apply_async(par_ews, args=(j,)) for j in range(len(filnam))]

每次迭代时,一个文件夹中应写入345个文件;但是写入文件夹的文件数量在2-345之间变化。因此,它不会在每次迭代下完成任务。随着时间的流逝,处理速度也会下降。有什么方法可以即兴编写此代码?

0 个答案:

没有答案