我正在尝试使用cnn构建文本分类模型,但出现此错误。我也尝试过使input_dim =(2198,)也不起作用。 因此,如何确定输入尺寸正确,是否对文本数据进行任何处理,例如“规范化”?
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
import matplotlib.pyplot as plt
# read dataset
paths = {
'amazon': 'D:\\NLP\\sentiment labelled sentences\\amazon_cells_labelled.txt',
'imdb': 'D:\\NLP\\sentiment labelled sentences\\imdb_labelled.txt',
'yelp': 'D:\\NLP\\sentiment labelled sentences\\yelp_labelled.txt'
}
data_fame = []
for source, path in paths.items():
df = pd.read_csv(path, names=['sentence', 'label'], sep='\t')
df['source'] = source # Add another column filled with the source name amazon, imdb, yelp
data_fame.append(df)
df = pd.concat(data_fame)
# split data frame into features and labels
X, y = [], []
for i in range(len(df)):
X.append(df.iloc[i][0])
y.append(df.iloc[i][1])
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)
x_train = np.expand_dims(x_train, axis=0)
x_test = np.expand_dims(x_test, axis=0)
# define CNN model
model = Sequential()
model.add(Dense(10, activation='relu', input_dim=x_train.shape[0]))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=90, batch_size=16, validation_data=(x_test, y_test))
model.save('model.h5')