我试图通过建立随机森林模型来估算天然气的浓度,并使用cPickle来腌制模型。这是“regression_rf_dump_model.py”文件中的代码
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas as pd
import cPickle
import os
#import numpy as np
print "code started"
train = pd.read_csv("D:/FH KOELN/Courses/Semester 3/Prof.Gaida_SVM/TDMR/Test/final_combined_data_random_train.csv") #Reading the file
new_train=train.drop(train.columns[[0,1,2,3,4,5,6,7,8,118]], axis=1)
colnames=list(new_train)
len_column = len(new_train.columns)
b = len(train)
len_iteration=len_column-1
j=3000
i=0
new_col=pd.DataFrame(index=range(0,b),columns=['temp'])
while i < len_iteration:
if int(colnames[i])== j:
i=i+1
j=j+5;
else:
for m in range(0,b):
new_col.iloc[m]=(new_train.iloc[m,i-1]+new_train.iloc[m,i+1])/2
new_train.insert(i,str(j),new_col)
colnames=list(new_train)
j=j+5
i=i+1
len_iteration=len_iteration+1;
trainRes = train['Methane'] #Response column
trainArr = new_train.as_matrix(colnames) #Convert dataframe into array matrix representation
print "building model"
#For RF
rf = RandomForestRegressor(n_estimators=678,max_features=12,min_samples_split=1)
rf.fit(trainArr, trainRes) #Fit the random forest model
os.chdir("D:/")
with open('methane_forest1.pickle', 'wb') as f:
cPickle.dump(rf, f,-1)
f.close()
使用这个我能够生成名为'methane_forest1.pickle'文件的腌制模型,大约40mb。
要加载pickle文件我创建了另一个名为'RF_Classification_Pkl_Loading.py'的python文件,代码是,
import pandas as pd
from operator import truediv
import cPickle
import xml.etree.cElementTree as ET
import os
from sklearn.ensemble import RandomForestRegressor
with open('/home/pi/2015-06-09_21-19-50_scan_data_22.txt','r') as f:
data=[x.strip().split('\t') for x in f]
rowno=len(data)
print "No of rows",rowno
colno=len(data[1])
print "No of columns",colno
print data[rowno-1][0]
rn=rowno-1
a=[];
for col in range(2,colno):
sum =0
for row in range(rowno-10,rowno):
sum=sum+float(data[row][col])
avg=sum/10
a.append(avg)
with open('/home/pi/BG File.txt','r') as f:
ref=[x.strip().split('\t') for x in f]
rownof=len(ref)
print "No of rows",rownof
colnof=len(ref[1])
print "No of columns",colnof
b=[];
for col1 in range(2,colnof):
sum =0
for row1 in range(rownof-25,rownof):
sum=sum+float(ref[row1][col1])
avg1=sum/25
b.append(avg1)
arr1 = reversed(b)
arr2=[];
meas=[]
meas=map(truediv,a,b)
print len(meas)
meas.pop(140)
meas.pop(139)
meas.pop(138)
meas.pop(137)
print meas
with open('/home/pi/rf_classification_pkl.pickle', 'rb') as f:
forest2 = cPickle.load(f)
h2s=forest2.predict(meas)
print h2s
timedata=data[rownof-1][0]
date=data[rownof-1][1]
timeinfo=timedata +" "+ date
print timeinfo
Meth=10
Measurement = ET.Element("Measurement")
ET.SubElement(Measurement, "Time").text = timeinfo
ET.SubElement(Measurement, "Methane").text = str(Meth)
ET.SubElement(Measurement, "H2S").text = str(h2s)
tree = ET.ElementTree(Measurement)
os.chdir("/home/pi/")
tree.write("Classification_Result.xml")
我将pickle文件'methane_forest1.pickle'和'RF_Classification_Pkl_Loading.py'文件复制到了raspberry pi,但我收到的错误是:
AttributeError : 'module' object has no attribute 'Tree'
我可以在我的电脑上执行相同的代码,但不能在raspberry pi中执行。请帮忙。
答案 0 :(得分:0)
我找到了问题的答案。正如我在问题中解释的那样,我在我的PC中创建了pickle文件,其中有Windows 7,而我正试图在raspberry pi中加载pickle文件。但我的PC和覆盆子pi有不同版本的sklearn。由于版本冲突,我无法加载pickle文件。在Rasberry pi创建pickle文件后,我能够加载它而没有任何错误。