我正在使用bluemix对象存储容器,我想做什么,我想存储我的" RandomForestRegressor"使用joblib进入pkl文件。但是当我使用Swift客户端运行代码时,我收到错误。
TypeError: object of type 'DecisionTreeRegressor' has no len()
这是我的代码,请帮助。
import os
from flask import Flask,render_template, request,json
from flask.ext.cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os
from sklearn.externals import joblib
import pickle
import sys
import json
import csv
import swiftclient
app = Flask(__name__)
CORS(app)
cloudant_service = json.loads(os.environ['VCAP_SERVICES'])['Object-Storage'][0]
objectstorage_creds = cloudant_service['credentials']
if objectstorage_creds:
auth_url = objectstorage_creds['auth_url'] + '/v3' #authorization URL
password = objectstorage_creds['password'] #password
project_id = objectstorage_creds['projectId'] #project id
user_id = objectstorage_creds['userId'] #user id
region_name = objectstorage_creds['region'] #region name
def predict_joblib():
conn = swiftclient.Connection(key=password,
authurl=auth_url,
auth_version='3',
os_options={"project_id": project_id,
"user_id": user_id,
"region_name": region_name})
container_name = 'my-container'
# File name for testing
file_name = 'example_file.txt'
# Create a new container
conn.put_container(container_name)
print ("nContainer %s created successfully." % container_name)
# List your containers
print ("nContainer List:")
for container in conn.get_account()[1]:
print (container['name'])
# List objects in a container, and prints out each object name, the file size, and last modified date
print ("nObject List:")
for container in conn.get_account()[1]:
for data in conn.get_container(container['name'])[1]:
print ('object: {0}t size: {1}t date: {2}'.format(data['name'], data['bytes'], data['last_modified']))
print ("-----------LEARN-----------\n")
with open('training_set.json') as json_data:
df_train= pd.read_json(json_data)
train_X = df_train.drop('Price', 1)
train_y = df_train['Price']
print ("Training...")
rfreg = RandomForestRegressor(n_estimators=100, n_jobs=-1)
rfreg.fit(train_X, train_y)
print("\nPerformance on training set:")
print('R^2: %f' % rfreg.score(train_X, train_y))
# print('MSE: %f' % mean_squared_error(rfreg.predict(train_X), train_y))
# print('ABS: %f' % mean_absolute_error(rfreg.predict(train_X), train_y))
importances = rfreg.feature_importances_
std = np.std([tree.feature_importances_ for tree in rfreg.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print("\nFeature ranking:")
for f in range(len(importances)):
print("%d. feature %d %s (%f)" % (f + 1, indices[f], df_train.columns[indices[f]], importances[indices[f]]))
# SERIALIZE MODEL USING joblib
print ("Serializing models using joblib...")
conn.put_object(container_name,'v3.pkl', contents= rfreg)
print ("Serializing vectorizers using joblib...")
for feature in ['Fluorescence', 'Culet']:
conn.put_object(container_name,feature+'_v3.pkl', contents= vectorizers[feature])
return rfreg, vectorizers
@app.route('/')
def hello():
predict_joblib()
return 'Welcome to Python Flask!'
@app.route('/signUp')
def signUp():
return 'signUp'
port = os.getenv('PORT', '5000')
if __name__ == "__main__":
app.debug = True
app.run(host='0.0.0.0', port=int(port))