用于转换hadoop的mapper和reducer程序的python程序

时间:2018-03-30 03:09:14

标签: python hadoop

我有两个程序,第一个程序将.h5文件转换为.tiff文件,第二个程序使用arima模型预测温度。

这是第一个将.h5文件转换为.tiff文件和min和max .tiff的程序,它也调用geo tiff。

from osgeo import gdal
import numpy as np
import os
import h5py
from collections import defaultdict
from osgeo import osr
import datetime

in_dir = r'/Users/sunnybhargav/Desktop/jan'
out_dir = r'/Users/sunnybhargav/Desktop/new_output'
#in_dir = input('Enter input directory path where hdf files are stored: ')
#out_dir = input('Enter output directory path where geotiff files are to be stored: ') 



def arrayToTif(array,tifFilePath,proj,transform,nodatavalue):
 with open(tifFilePath,'a') as file:

    pass

# write raster
out_ds = gdal.GetDriverByName('GTiff').Create(tifFilePath,
                                              array.shape[1],
                                              array.shape[0],
                                              1,  #Number of bands
                                              gdal.GDT_Float32)

out_ds.GetRasterBand(1).WriteArray(array)
out_ds.GetRasterBand(1).SetNoDataValue(nodatavalue)

# close tif to write into disk (free tif file) 
out_ds = None 



dates_dict = defaultdict(list)

for root,directories,filenames in os.walk(in_dir):
    for filename in filenames:
       if (filename.endswith('.h5')):

            hdffileDate = filename[6:15]
            hdfdate = (int(hdffileDate[0:2]))
            dates_dict[hdfdate].append(filename)
            print(filename)

 for key in dates_dict.keys():

  file_list = dates_dict[key]
  min_lst = 1000*np.ones((2816,2805))
  max_lst = -1000*np.ones((2816,2805))

for v in file_list:

    hdf_ds = h5py.File(os.path.join(in_dir,v))
    lst = np.array(hdf_ds['LST'])[0,:,:]


    hdf_ds = gdal.Open(os.path.join(in_dir,v))
    metadata = hdf_ds.GetMetadata_Dict()

    lst = lst.astype('Float32')
    max_lst = np.maximum(max_lst,lst)

    lst[lst==-999] = 999
    min_lst = np.minimum(min_lst,lst)





   min_lst[min_lst==999] = -999
   transform = (0,1,0,0,0,-1)
   proj = None
   nodatavalue = -999

   tiffileDate = v[6:15]
   MinName = 'MIN' +v[0:2]+str.lower(tiffileDate) + '.tif'
   MaxName = 'MAX' +v[0:2]+str.lower(tiffileDate) + '.tif'


arrayToTif
(max_lst,os.path.join(out_dir,MaxName),proj,transform,nodatavalue)



arrayToTif
(min_lst,os.path.join(out_dir,MinName),proj,transform,nodatavalue)


del lst
del min_lst
del max_lst

第二个程序

在第二个程序中是tiff获取ndarray然后输出特定的所有最大温度并预测接下来的5天预测。

import pandas as pd
import seaborn as sns
import matplotlib 
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import csv
import datetime
from datetime import datetime 
import time
from matplotlib import pyplot
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
import numpy
from statsmodels.tsa.stattools import adfuller
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_squared_error
import numpy as np 
import subprocess
import gdal,osr
from gdalconst import *
import os
import numpy as np
from PIL import Image
import scipy.misc
from datetime import datetime

# import timeseries as ts 
count = 1
max_temp = []
min_temp = []
filename = []
filenamer = []
max_temp_points = []
min_temp_points = []
source = r'/Volumes/bhargav 1/data/NEW_MAX'

for root, dirs, filenames in os.walk(source):
  print(filenames)
  for f in filenames:

    print (f)
    dataset = gdal.Open( source + '//' + f  ,gdal.GA_ReadOnly)
    #print(dataset)
    geotransform = dataset.GetGeoTransform()
    band = dataset.GetRasterBand(1)
    data = band.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize).astype(np.float64)
    #print(np.histogram(data,bins=500))
    print(np.shape(data))
    max_temp_point = data[793][1160] 
    max_temp_point = max_temp_point - 273
    print(max_temp_point)
    print("Count:",count)
    max_temp_points.append(max_temp_point)
    count = count + 1
print(np.shape(max_temp_points))
print(np.mean(max_temp_points))
count = 1
np.save("Max_temp_points_1",max_temp_points)

X = max_temp_points

model = ARIMA(X, order=(5,0,4))
model_fit = model.fit(disp=-1)
# print summary of fit model
print(model_fit.summary())
forecast = model_fit.predict()
print (forecast)


# plot
start_index = len(X)
end_index = start_index + 6
predict_val = model_fit.predict(start=start_index, end=end_index)
print('Prediction:',predict_val)


pyplot.plot(X)

pyplot.plot(forecast, color='red')
pyplot.show()

0 个答案:

没有答案