我尝试通过PCA分析实现神经网络。 当我开始训练网络时,几个小时后我出现了错误
连接重置错误:[win错误10054]远程主机强行关闭了现有连接
有什么问题吗?
我的代码,
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import sklearn.metrics as met
def classification(message, x_arg, y_arg):
print(message, '\n')
# podela na trening i test skup
x_train, x_test, y_train, y_test = train_test_split(x_arg, y_arg, test_size=0.3)
# Parametri za unakrsnu validacuju
parameters = [{'hidden_layer_sizes': [(2,), (6,), (10,)],
'alpha':[1e-5,3e-5,1e-4,3e-4,1e-3],
'activation': ['logistic', 'tanh', 'relu'],
}]
clf = GridSearchCV(MLPClassifier(), parameters, cv=10)
clf.fit(x_train, y_train)
print("Najbolji parametri:")
print(clf.best_params_)
print()
print("Izvestaj za trening skup:")
y_pred =clf.predict(x_train)
print(met.classification_report(y_train, y_pred))
print()
cnf_matrix = met.confusion_matrix(y_train, y_pred)
print("Matrica konfuzije", cnf_matrix, sep="\n")
print("\n")
print("Izvestaj za test skup:")
y_pred =clf.predict(x_test)
print(met.classification_report(y_test, y_pred))
print()
cnf_matrix = met.confusion_matrix(y_test, y_pred)
print("Matrica konfuzije", cnf_matrix, sep="\n")
print("\n")
if message == 'PCA':
colors = ['red', 'blue', 'gold', 'm', 'plum', 'orange', 'black']
x_test.is_copy=False
x_test['predicted'] = y_pred
classes =x_test['predicted'].unique()
for i, class_value in zip(range(0, len(classes)), classes):
class_samples = x_test.loc[lambda s: s['predicted'] == class_value, :]
plt.scatter(class_samples['pca1'], class_samples['pca2'], color=colors[i],
s=10, marker='o', label="class %s" % class_value)
plt.title('Classification with PCA')
plt.legend(loc='upper right')
plt.show()
df = pd.read_csv("Desktop/Podaci/test.csv")
#prikaz imena kolona + 5 prvih instanci
print('Prvih 5 instanci', df.head(), sep='\n')
print('\n\n')
print('Opis podataka', df.describe(), sep='\n')
print('\n\n')
print('Klase:', print(df["class"].value_counts()), sep='\n')
print('\n\n')
features=df.columns[1:5]
x=df[features]
y=df["class"]
num_features = x.shape[1]
#standardizacija podataka
scaler = preprocessing.StandardScaler().fit(x)
x =pd.DataFrame(scaler.transform(x))
x.columns = features
#primena pca
pca=PCA()
pca=PCA(n_components=2)
pca.fit(x)
x_pca = pd.DataFrame(pca.transform(x))
#promena imena kolona za skup sa pca
pca_columns = ['pca%d'%i for i in range(1, pca.n_components_+1)]
x_pca.columns=pca_columns
print('components_ ')
for i, component in zip(range(1, pca.n_components_+1), pca.components_):
pca_desc="pca%d"%i + "="
for j, value in zip(range(0, num_features), component):
pca_desc+="%.2f*%s"%(value, features[j])
print(pca_desc)
print()
print('explained_variance_ ')
for i, ev in zip(range(1, num_features+1), pca.explained_variance_):
print("pca%d: %.10f"%(i,ev))
print()
print()
print('explained_variance_ratio_ ')
for i, evr in zip(range(1, num_features+1), pca.explained_variance_ratio_):
print("pca%d: %.10f"%(i,evr))
print()
print('mean_ ', pca.mean_ , sep='\n')
print()
print('n_components_ ', pca.n_components_ , sep='\n')
print()
print('noise_variance_ ', pca.noise_variance_ , sep='\n')
print()
classification('Original', x, y)
classification('PCA', x_pca, y)
在少量数据上效果很好,但是当我在训练过程中涉及较大的数据集时,就会出错。