这是我的python代码,它在其中读取栅格堆栈并进行Numpy操作并将图像重新写入另一个文件夹。
from osgeo import gdal
import numpy as np
from os import listdir
import os
from scipy.stats import skew
import pandas as pd
import pysal as ps
def skewness(x):
x = x1
x = x[~np.isnan(x)]
n = x.shape[0]
numer = sum((x - np.mean(x))**3)*(n**-1)
denom = (np.var(x))**1.5
return(numer/denom)
filnam = listdir("/media/root/Seagate Backup Plus Drive/movement data/python_crop/")
pathname = "/media/root/Seagate Backup Plus Drive/movement data/python_crop/"
csvpath = "/media/root/Seagate Backup Plus Drive/movement data/python_csv/"
def par_ews(j):
filename = pathname + filnam[j]
writedir = "/media/root/Seagate Backup Plus Drive/movement data/python_crop1/" + filnam[j][:-4].strip() + "/"
os.makedirs(writedir)
filehandle = gdal.Open(filename)
geotransform = filehandle.GetGeoTransform()
geoproj = filehandle.GetProjection()
Z = filehandle.ReadAsArray()
Z = np.array(Z)
Z[Z<0]= np.nan
Z = Z/10000
format = "GTiff"
driver = gdal.GetDriverByName(format)
(xres,yres) = [Z.shape[1], Z.shape[2]]
dst_datatype = gdal.GDT_Float32
var=np.zeros(Z.shape[0])
skew=np.zeros(Z.shape[0])
moran=np.zeros(Z.shape[0])
csvfile = csvpath + filnam[j][:-4].strip() + '.csv'
for i in range(Z.shape[0]):
writefile = writedir+ 'band' + str(i+1) +'.tif'
print(writefile)
dst_ds = driver.Create(writefile,yres,xres,1,dst_datatype)
print('driver created')
dst_ds.SetGeoTransform(geotransform)
dst_ds.SetProjection(geoproj)
dst_ds.GetRasterBand(1).WriteArray(Z[i])
dst_ds.FlushCache()
x = Z[i]
x1 = np.reshape(x, x.shape[0]*x.shape[1])
num = np.count_nonzero(~np.isnan(x1))/x1.shape[0]
print(i)
if(num > 0.9):
x2 = x1[~np.isnan(x1)]
var[i]=np.var(x2)
print("Variance")
skew[i]=skewness(x2)
print("skewness")
w = ps.lat2W(Z.shape[1],Z.shape[2], rook=False, id_type="int")
print("weights")
lm = ps.Moran(Z[i], w)
print("moran")
moran[i]=lm.I
dat = np.column_stack((var,skew,moran))
df = pd.DataFrame({'variance':dat[:,0],'skew':dat[:,1], 'Moran':dat[:,2]})
df.to_csv(csvfile, sep='\t', encoding='utf-8')
print(j)
我已经使用多重处理对此进行了并行化处理:
import multiprocessing as mp
pool = mp.Pool(processes=4)
[pool.apply_async(par_ews, args=(j,)) for j in range(len(filnam))]
每次迭代时,一个文件夹中应写入345个文件;但是写入文件夹的文件数量在2-345之间变化。因此,它不会在每次迭代下完成任务。随着时间的流逝,处理速度也会下降。有什么方法可以即兴编写此代码?