我正在尝试使用python从文件夹中的多个tiff文件中提取值。 我已经使用glob-glob遍历文件并根据纬度和经度提取每个位置的值。我已经编写了代码,并且起初运行良好,但最终却不断消耗我的内存,并且系统挂起。我有将近5000个此类文件和200多个位置。代码如下。
import math,os,sys,glob
import gdal
from openpyxl import load_workbook
f1=('C:\Users\APPLIED\Desktop\SPIE Analysis\Data\Data1.xlsx')
wt = load_workbook(f1, read_only=False)#, keep_vba=True)
sheet1 = wt['Sheet1']
#---------------------------------------------------------------------------
lat = []
file_in = open("C:\Users\APPLIED\Desktop\SPIE Analysis\Data\lat.txt", "r")
for y in file_in.read().split("\n"):
lat.append(y)
file_in.close
lon = []
file_in = open("C:\Users\APPLIED\Desktop\SPIE Analysis\Data\lon.txt", "r")
for y in file_in.read().split("\n"):
lon.append(y)
file_in.close
#---------------------------------------------------------------------------
lp=0
y1=2002
print y1
os.chdir("F:\\wget\\Renamed")
for file in glob.glob("*.tif"):
b=[]
b=list(file)
c = file
a = [b[0],b[1],b[2],b[3]]
d = [b[4],b[5],b[6]]
year = int(''.join(a))
day = int(''.join(d))
dif=year-2002
if year!=y1:
print ""
print year
if (year-1)%4==0:
lp=lp+1
y1=year
else:
sys.stdout.write(">")
r=day+dif*365+lp+3
for i in range(0,274):
u0 = float(os.popen('gdallocationinfo -valonly -wgs84 %s %s %s' % (c, (lon[i]), lat[i])).read())
if u0<0:
if u0==-9999:
sheet1.cell(row=r,column=i+2).value = u0
else:
sheet1.cell(row=r,column=i+2).value = 0
else:
sheet1.cell(row=r,column=i+2).value = u0/1000
wt.save(f1)
答案 0 :(得分:0)
我写了这个小代码段,希望它能起作用。我建议您使用pandas
库而不是openpyxl
,因为它更容易操作。我还对代码进行了一些更改和优化,使其更符合当今的标准和python 3。
import math,os,sys,glob
import gdal
import pandas as pd
import numpy as np
#---------------------------------------------------------------------------
# newer version of doing things
with open("C:\Users\APPLIED\Desktop\SPIE Analysis\Data\lat.txt", "r") as file_in:
lat = [y for y in file_in.read().split("\n")]
with open("C:\Users\APPLIED\Desktop\SPIE Analysis\Data\lon.txt", "r") as file_in:
lon = [y for y in file_in.read().split("\n")]
#---------------------------------------------------------------------------
lp = 0
y1 = 2002
print(y1)
os.chdir("F:\\wget\\Renamed")
data_dict = {}
for file in glob.glob("*.tif"):
year = int(file[:4]) # you can slice strings and convert them to int like this
day = int(file[4:7]) # string is a vector of chars so you can just slice it
dif = year - 2002
if year != y1:
print("")
print(year)
# In python if something == 0 and if something is False (not True) is equivalent
if not (year - 1) % 4:
lp = lp + 1
y1 = year
else:
print(">")
r = day + dif * 365 + lp + 3
arr = np.zeros((1, 274))
for i in range(274):
u0 = float(os.popen('gdallocationinfo -valonly -wgs84 %s %s %s' % (file, (lon[i]), lat[i])).read())
if u0 < 0:
if u0 == -9999:
val_to_write = u0
else:
val_to_write = 0
else:
val_to_write = u0/1000
arr[i] = val_to_write
data_dict[r] = arr
# we construct a pandas dataframe out of the dictionary
df = pd.DataFrame.from_dict(data_dict, orient='index')
# I'm not sure whether all r indices will be consecutive or will some values be missing
# In the latter case we will reindex the data frame, missing rows will be added and filled with NaN
# We're creating a new Index from 0 to whatever is the maximum key value in the dictionary, +1
df = df.reindex(pd.Index(np.arange(0, max(data_dict, key=int) + 1))) # only if indices are missing, e.g. 0,2,8,9
# Finally we're saving the dataframe to xlsx file
writer = pd.ExcelWriter('C:\Users\APPLIED\Desktop\SPIE Analysis\Data\Data1.xlsx')
df.to_excel(writer, 'Sheet1', index=False, header=False, startcol=2)
writer.save()