%pylab inline
import numpy as np
import pandas as pd
import random
import time
import scipy
import sklearn.feature_extraction
import pickle
from sklearn.cross_validation import StratifiedKFold
from sklearn.svm import LinearSVC
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix
bedsizes = {'None':0,
'Rest All':1}
invbedsizes = {v: k for k, v in bedsizes.items()}
model = joblib.load('model_bed_size.pkl')
vocab = pickle.load(open('dictionary', 'rb'))
var=pd.read_csv('Train_variables.csv')
dtest = pd.read_csv('/home/ubuntu/test_null_new.csv', usecols= ("Bed_size","title","short_description","long_description","primary_shelf.all_paths_str","attributes.all_shelves.0","attributes.all_shelves.1","attributes.all_shelves.2","attributes.all_shelves.3","attributes.all_shelves.4","attributes.type.0","attributes.type.1","attributes.type.2","item_id","last_updated_at"),encoding='ISO-8859-1')
lentest = len(dtest)
vocab=vocab["Vocabulary"].to_dict()
Xall = []
i=1
for col in var['Variable']:
vectorizer = CountVectorizer(min_df=1, vocabulary=(vocab[i]), token_pattern = '\\b\\w+\\b')
Xall.append(vectorizer.transform(dtest[col].astype(str)))
j=i
i=j+1
print (col, 'Done', shape(Xall[-1]))
Xspall = scipy.sparse.hstack(Xall)
X_test_final = scipy.sparse.csr_matrix(Xspall)
print (shape(X_test_final))
ypred = model.decision_function(X_test_final)
ypredc = model.classes_[np.argmax(ypred, axis = 0)]
ypredcon = (np.max(ypred, axis = 1) + 2.) / 8.
ypredcon[ypredcon < 0.] = 0 .
ypredcon[ypredcon > 1.] = 1.
dfinal = pd.DataFrame()
dfinal['item_id '] = dtest['item_id']
dfinal['Predictions'] = ypredc
dfinal['Predictions'].replace(invbedsizes, inplace = True)
dfinal['confidence_score'] = ypredcon
上面的代码给出了索引错误,指出索引14328超出了轴0和大小2的范围。
错误就在这条线上
ypredc = model.classes_ [np.argmax(ypred,axis = 0)]
任何人都可以帮我吗?
答案 0 :(得分:0)
在不了解代码中的变量的情况下,错误表明在
ypred = model.decision_function(X_test_final)
ypredc = model.classes_[np.argmax(ypred, axis = 0)]
error: index 14328 is out of bounds for axis 0 and size 2
model.classes_
是一个或多个维度,第一个是大小2,换句话说是2 rows/classes
,可能还有很多列。
ypred
可能非常大,np.argmax(ypred...)
是其最大值(沿轴0)的索引,即14328。
Maye正确使用model.classes_[:, np.argmax...]
。
您需要查看ypred, and
model.classes_`的形状,以及此区域中可能的其他变量。