kele gridSearchCV on sklearn One hot Encoded Data

时间:2018-02-04 09:30:35

标签: scikit-learn neural-network keras grid-search one-hot-encoding

这段代码的问题在于我给的是分类器,  一个热门编码数据: 手段:  X-trainX-testy_trainy_test是一个热门编码。 但是分类器正在预测输出:  数字形式的y_pred_testy_pred_train  (我认为这也是不正确的)。有人能帮忙吗? 这是一个虚拟的例子,所以不用担心低精度,而只是知道为什么它以非热编码形式预测输出。 谢谢!

# -*- coding: utf-8 -*-
import numpy as np
import pandas as  pd


x=pd.DataFrame()
x['names']= np.arange(1,10)
x['Age'] = np.arange(1,10)

y=pd.DataFrame()
y['target'] = np.arange(1,10)


from sklearn.preprocessing import OneHotEncoder, Normalizer

ohX= OneHotEncoder()
x_enc = ohX.fit_transform(x).toarray()
ohY = OneHotEncoder()
y_enc = ohY.fit_transform(y).toarray()

print (x_enc)
print("____")
print (y_enc)

import keras
from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.models import load_model
from keras.layers.advanced_activations import LeakyReLU
marker="-------"

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split


def create_model(learn_rate=0.001):
    model = Sequential()
    model.add(Dense(units = 15, input_dim =18,kernel_initializer= 'normal', activation="tanh"))
    model.add(Dense(units=9, activation = "softmax"))
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
    return model

if __name__=="__main__":
  X_train, X_test, y_train, y_test = train_test_split(x_enc, y_enc, test_size=0.33, random_state=42)
  print ("\n\n",marker*5," Classification\nX_train shape is: ",X_train.shape,"\tX_test shape is:",X_test.shape)
  print ("\ny_train shape is: ",y_train.shape,"\t    y_test shape is:",y_test.shape,"\n\n")
  norm = Normalizer()
  #model
  X_train = norm.fit_transform(X_train)
  X_test  = norm.transform(X_test)
  earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
  model = KerasClassifier(build_fn=create_model, verbose=0)
  fit_params={'callbacks': [earlyStopping]}
  #grid
#  batch_size =[50,100,200, 300,400]
  epochs = [2,5]
  learn_rate=[0.1,0.001]
  param_grid = dict(  epochs = epochs, learn_rate = learn_rate)
  grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs=1)
  #Predicting
  print (np.shape(X_train), np.shape(y_train))
  y_train = np.reshape(y_train, (-1,np.shape(y_train)[1]))
  print ("y_train shape after reshaping", np.shape(y_train))
  grid_result = grid.fit(X_train, y_train, callbacks=[earlyStopping])
  print ("grid score using params: ", grid_result.best_score_, "   ",grid_result.best_params_)
  #scores
  print("SCORES")
  print (grid_result.score(X_test,y_test))
  # summarize results
  #print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
  #means = grid_result.cv_results_['mean_test_score']
  #stds = grid_result.cv_results_['std_test_score']
  #params = grid_result.cv_results_['params']
  #for mean, stdev, param in zip(means, stds, params):
  #    print("%f (%f) with: %r" % (mean, stdev, param))
  print("\n\n")
  print("y_test is",y_test)
  y_hat_test = grid.predict(X_test)
  y_hat_train = grid.predict(X_train)
  print("y_hat_test is ", y_hat_test)

0 个答案:

没有答案