我已经训练了一个模型,现在想让它标记未标记的数据并为预测列填充1和0。但是,当我尝试将预测追加到数组中时,我收到的数组除了0之外什么都没有。
我不确定是什么原因引起的
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import sklearn.metrics as metrics
import seaborn as sns
import warnings
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import tensorflow as tf
warnings.filterwarnings("ignore")
heart_data = pd.read_csv('data1.csv')
heart_data.head()
y = heart_data.target.values
x_data = heart_data.drop(['target'], axis = 1)
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values
n_cols = x.shape[1]
#Splitting Data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.8)
def regression_model():
# create model
model = Sequential()
#inputs
model.add(Dense(50, activation='sigmoid', input_shape=(n_cols,)))
model.add(Dense(50, activation='sigmoid')) # activation function
model.add(Dense(1))
# compile model
model.compile(optimizer='adam', loss='mean_squared_error')
#loss measures the results and figures out how bad it did. Optimizer generates next guess.
return model
# build the model
model = regression_model()
print (model)
# fit the model
history=model.fit(x_train, y_train, validation_data=(x_test,y_test), epochs=100, batch_size=10)
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#K Nearest Neighbor
neigh = KNeighborsClassifier(n_neighbors=6)
neigh.fit(x_test, y_test)
y_test_pred = neigh.predict(x_test)
print("Test Accuracy of KNN Algorithm: {:.2f}%".format(neigh.score(x_test,y_test)*100))
print('KNN Classification report \n',classification_report(y_test, y_test_pred))
#Support Vector Machine
svm = SVC(random_state = 1)
svm.fit(x_test, y_test)
y_test_pred = svm.predict(x_test)
print("Test Accuracy of SVM Alg orithm: {:.2f}%".format(svm.score(x_test,y_test)*100))
print('SVM Classification report \n',classification_report(y_test, y_test_pred))
#Random Forest
rf = RandomForestClassifier(n_estimators = 1000, random_state = 1)
rf.fit(x_train, y_train)
y_test_pred = rf.predict(x_test)
print("Random Forest Algorithm Accuracy Score : {:.2f}%".format(rf.score(x_test,y_test)*100))
print('Random Forest Classification report \n',classification_report(y_test, y_test_pred))
#Naive Bayes
nb = GaussianNB()
nb.fit(x_train, y_train)
y_test_pred = nb.predict(x_test)
print("Naive Bayes Algorithm Accuracy Score : {:.2f}%".format(nb.score(x_test,y_test)*100))
print('Naive Bayes Classification report \n',classification_report(y_test, y_test_pred))
#Save Predictions into Array
predictions = rf.predict(x_test)
predictions2=[]
for i in range(len(x_test)):
predictions2.append(np.argmax(predictions[i]))
p=np.array(predictions2)
print (p)
预期输出应该是一个由1和0组成的大数组。