我有文本分类问题,我想将我的texinto分类
在喀拉拉邦使用CNN的25个课程,但使用时遇到问题
categorical_crossentropy作为损失函数,我获得了45%的准确度
但是当我使用Binary_crossentropy时,我得到了96%的准确度和18%的损失(我
知道我不能使用Binary_crossentropy),我该怎么做
当我使用categorical_crossentropy时减少损失值
提高准确性?
#data_x_encoding
le=preprocessing.LabelEncoder()
x_new = pd.DataFrame(x_new,dtype=str)
x_new.apply(le.fit_transform)
x_new = x_new.apply(le.fit_transform).values[:,:]
#data_x scaling before dividing into train and test
scaler = MinMaxScaler(feature_range=(0,100))
x_new=scaler.fit_transform(x_new)
#data_y encoding
le=preprocessing.LabelEncoder()
data_y = pd.DataFrame(data_y,dtype=str)
data_y.apply(le.fit_transform)
data_y = data_y.apply(le.fit_transform).values[:,:]
one_hot_encoder = OneHotEncoder(categories='auto')
y = np.array(data_y).reshape(-1, 1)
input_labels = one_hot_encoder.fit_transform(data_y).toarray()
#train test split
x_train,x_test,y_train,y_test=train_test_split(x_new,input_labels,random_state=42,test_size=0.25)
x_train = sequence.pad_sequences(x_train, maxlen=100)
x_test = sequence.pad_sequences(x_test, maxlen=100)
model_CNN=Sequential() model_CNN.add(Embedding(input_dim=101,output_dim=128,input_length=100))
model_CNN.add(Conv1D(128, kernel_size=4, activation='relu',padding='same'))
model_CNN.add(MaxPool1D(pool_size=(3)))
model_CNN.add(Dropout(0.2))
model_CNN.add(Conv1D(128, kernel_size=4, activation='relu' ,padding='same'))
model_CNN.add(MaxPool1D(pool_size=(3)))
model_CNN.add(Dropout(0.2))
model_CNN.add(Conv1D(64, kernel_size=4, activation='relu' ,padding='same'))
model_CNN.add(GlobalMaxPooling1D())
model_CNN.add(Dropout(0.2))
model_CNN.add(Dense(128,activation='relu'))
model_CNN.add(Dropout(0.2))
model_CNN.add(Dense(25,activation='softmax'))
model_CNN.summary()
opt=SGD(lr=0.01)
model_CNN.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])
DNN_CNN= model_CNN.fit(x_train,y_train, batch_size=64, epochs=300)