使用DecisionTreeRegressor的Swift客户端

时间:2016-05-17 12:07:11

标签: python containers ibm-cloud random-forest object-storage

我正在使用bluemix对​​象存储容器,我想做什么,我想存储我的" RandomForestRegressor"使用joblib进入pkl文件。但是当我使用Swift客户端运行代码时,我收到错误。

TypeError: object of type 'DecisionTreeRegressor' has no len()

这是我的代码,请帮助。

import os
from flask import Flask,render_template, request,json
from flask.ext.cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os
from sklearn.externals import joblib
import pickle
import sys
import json
import csv
import swiftclient

app = Flask(__name__)
CORS(app)



cloudant_service = json.loads(os.environ['VCAP_SERVICES'])['Object-Storage'][0]
objectstorage_creds = cloudant_service['credentials']

if objectstorage_creds:
   auth_url = objectstorage_creds['auth_url'] + '/v3' #authorization URL
   password = objectstorage_creds['password'] #password
   project_id = objectstorage_creds['projectId'] #project id
   user_id = objectstorage_creds['userId'] #user id 
   region_name = objectstorage_creds['region'] #region name 

def predict_joblib():
   conn = swiftclient.Connection(key=password,
   authurl=auth_url,
   auth_version='3',
   os_options={"project_id": project_id,
   "user_id": user_id,
   "region_name": region_name})

   container_name = 'my-container'

   # File name for testing
   file_name = 'example_file.txt'

   # Create a new container
   conn.put_container(container_name)
   print ("nContainer %s created successfully." % container_name)

   # List your containers
   print ("nContainer List:")
   for container in conn.get_account()[1]:
      print (container['name'])


   # List objects in a container, and prints out each object name, the file size, and last modified date
   print ("nObject List:")
   for container in conn.get_account()[1]:
      for data in conn.get_container(container['name'])[1]:
         print ('object: {0}t size: {1}t date: {2}'.format(data['name'], data['bytes'], data['last_modified']))



   print ("-----------LEARN-----------\n")
   with open('training_set.json') as json_data:
      df_train= pd.read_json(json_data)
   train_X = df_train.drop('Price', 1)
   train_y = df_train['Price']

   print ("Training...")
   rfreg = RandomForestRegressor(n_estimators=100, n_jobs=-1)
   rfreg.fit(train_X, train_y)
   print("\nPerformance on training set:")
   print('R^2: %f' % rfreg.score(train_X, train_y))
    # print('MSE: %f' % mean_squared_error(rfreg.predict(train_X), train_y))
    # print('ABS: %f' % mean_absolute_error(rfreg.predict(train_X), train_y))
   importances = rfreg.feature_importances_
   std = np.std([tree.feature_importances_ for tree in rfreg.estimators_], axis=0)
   indices = np.argsort(importances)[::-1]

   # Print the feature ranking
   print("\nFeature ranking:")
   for f in range(len(importances)):
       print("%d. feature %d %s (%f)" % (f + 1, indices[f], df_train.columns[indices[f]], importances[indices[f]]))

   # SERIALIZE MODEL USING joblib
   print ("Serializing models using joblib...")
   conn.put_object(container_name,'v3.pkl', contents= rfreg)
   print ("Serializing vectorizers using joblib...")
   for feature in ['Fluorescence', 'Culet']:
      conn.put_object(container_name,feature+'_v3.pkl', contents= vectorizers[feature])
   return rfreg, vectorizers    



@app.route('/')
def hello():
    predict_joblib()
    return 'Welcome to Python Flask!'

@app.route('/signUp')
def signUp():
    return 'signUp'


port = os.getenv('PORT', '5000')
if __name__ == "__main__":
    app.debug = True
    app.run(host='0.0.0.0', port=int(port))

0 个答案:

没有答案