我开始使用django开发一个Web应用程序,该应用程序预测了销售情况,因为我使用了线性回归,我有一些变量字符串,为了训练模型,我使用handle_non_numerical_data方法将所有字符串变量转换为int )。应用程序的用户将字符串字段输入为字符串
方法
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
我的模特
# Libraries
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn import model_selection
#from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#from sklearn.linear_model import Ridge
from sklearn.externals import joblib
# Importing Dataset
data = pd.read_csv('ml_code/ml_process/test.csv')
data.fillna(0, inplace=True)
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
data = handle_non_numerical_data(data)
data = data.as_matrix()
#X matrice des var. explicatives
X = data[:,0:9]
#y vecteur de la var. à prédire
y = data[:,9]
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.3, random_state=0)
lreg = LinearRegression()
lreg.fit(X2_train, y2_train)
print('Accuracy of linear regression on training set: {:.2f}'.format(lreg.score(X2_train, y2_train)))
print('Accuracy of linear regression on test set: {:.2f}'.format(lreg.score(X2_test, y2_test)))
# Saving the Logistic Regression Model
linear_regression_model = pickle.dumps(lreg)
# Saving the model to a file
#with open('ml_code/linear_regression_model.pkl','wb') as f:
joblib.dump(linear_regression_model, 'ml_code/linear_regression_model.pkl')
预测
import pickle
from sklearn.externals import joblib
linear_regression_model = joblib.load('ml_code/linear_regression_model.pkl')
lreg = pickle.loads(linear_regression_model)
def get_prediction(magasin, numero_article, designation_article, moyen_de_ventes_par_jour, vente_2013,
vente_2014, ventes_2015, ventes_2016, ventes_2017
):
predicted_sales = lreg.predict([[magasin, numero_article, designation_article, moyen_de_ventes_par_jour,
vente_2013, vente_2014, ventes_2015, ventes_2016, ventes_2017
]])
return predicted_sales
但我收到此错误
ValueError at /
could not convert string to float: 'tea'
Request Method: POST
Request URL: http://127.0.0.1:8000/
Django Version: 2.2
Exception Type: ValueError
Exception Value:
could not convert string to float: 'tea'
Exception Location: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\lib\site-packages\sklearn\utils\validation.py in check_array, line 448
Python Executable: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\Scripts\python.exe
Python Version: 3.6.5
Python Path:
['C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\appweb pred',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts\\python36.zip',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\Lib',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib\\site-packages']
Server time: Sat, 27 Apr 2019 03:32:44 +0000
因为我转换了模型的所有变量,