我有两个程序,第一个程序将.h5文件转换为.tiff文件,第二个程序使用arima模型预测温度。
这是第一个将.h5文件转换为.tiff文件和min和max .tiff的程序,它也调用geo tiff。
from osgeo import gdal
import numpy as np
import os
import h5py
from collections import defaultdict
from osgeo import osr
import datetime
in_dir = r'/Users/sunnybhargav/Desktop/jan'
out_dir = r'/Users/sunnybhargav/Desktop/new_output'
#in_dir = input('Enter input directory path where hdf files are stored: ')
#out_dir = input('Enter output directory path where geotiff files are to be stored: ')
def arrayToTif(array,tifFilePath,proj,transform,nodatavalue):
with open(tifFilePath,'a') as file:
pass
# write raster
out_ds = gdal.GetDriverByName('GTiff').Create(tifFilePath,
array.shape[1],
array.shape[0],
1, #Number of bands
gdal.GDT_Float32)
out_ds.GetRasterBand(1).WriteArray(array)
out_ds.GetRasterBand(1).SetNoDataValue(nodatavalue)
# close tif to write into disk (free tif file)
out_ds = None
dates_dict = defaultdict(list)
for root,directories,filenames in os.walk(in_dir):
for filename in filenames:
if (filename.endswith('.h5')):
hdffileDate = filename[6:15]
hdfdate = (int(hdffileDate[0:2]))
dates_dict[hdfdate].append(filename)
print(filename)
for key in dates_dict.keys():
file_list = dates_dict[key]
min_lst = 1000*np.ones((2816,2805))
max_lst = -1000*np.ones((2816,2805))
for v in file_list:
hdf_ds = h5py.File(os.path.join(in_dir,v))
lst = np.array(hdf_ds['LST'])[0,:,:]
hdf_ds = gdal.Open(os.path.join(in_dir,v))
metadata = hdf_ds.GetMetadata_Dict()
lst = lst.astype('Float32')
max_lst = np.maximum(max_lst,lst)
lst[lst==-999] = 999
min_lst = np.minimum(min_lst,lst)
min_lst[min_lst==999] = -999
transform = (0,1,0,0,0,-1)
proj = None
nodatavalue = -999
tiffileDate = v[6:15]
MinName = 'MIN' +v[0:2]+str.lower(tiffileDate) + '.tif'
MaxName = 'MAX' +v[0:2]+str.lower(tiffileDate) + '.tif'
arrayToTif
(max_lst,os.path.join(out_dir,MaxName),proj,transform,nodatavalue)
arrayToTif
(min_lst,os.path.join(out_dir,MinName),proj,transform,nodatavalue)
del lst
del min_lst
del max_lst
第二个程序
在第二个程序中是tiff获取ndarray然后输出特定的所有最大温度并预测接下来的5天预测。
import pandas as pd
import seaborn as sns
import matplotlib
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import csv
import datetime
from datetime import datetime
import time
from matplotlib import pyplot
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
import numpy
from statsmodels.tsa.stattools import adfuller
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_squared_error
import numpy as np
import subprocess
import gdal,osr
from gdalconst import *
import os
import numpy as np
from PIL import Image
import scipy.misc
from datetime import datetime
# import timeseries as ts
count = 1
max_temp = []
min_temp = []
filename = []
filenamer = []
max_temp_points = []
min_temp_points = []
source = r'/Volumes/bhargav 1/data/NEW_MAX'
for root, dirs, filenames in os.walk(source):
print(filenames)
for f in filenames:
print (f)
dataset = gdal.Open( source + '//' + f ,gdal.GA_ReadOnly)
#print(dataset)
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
data = band.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize).astype(np.float64)
#print(np.histogram(data,bins=500))
print(np.shape(data))
max_temp_point = data[793][1160]
max_temp_point = max_temp_point - 273
print(max_temp_point)
print("Count:",count)
max_temp_points.append(max_temp_point)
count = count + 1
print(np.shape(max_temp_points))
print(np.mean(max_temp_points))
count = 1
np.save("Max_temp_points_1",max_temp_points)
X = max_temp_points
model = ARIMA(X, order=(5,0,4))
model_fit = model.fit(disp=-1)
# print summary of fit model
print(model_fit.summary())
forecast = model_fit.predict()
print (forecast)
# plot
start_index = len(X)
end_index = start_index + 6
predict_val = model_fit.predict(start=start_index, end=end_index)
print('Prediction:',predict_val)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()