在我的网络中,验证指标是否从纪元固定为0.0000e + 00。
我环顾四周,很少有人遇到相同的问题,但是我无法按照相同的建议进行修复。
行被改组,并且标签已经转换为float32。这些是我在类似问题上发现的建议。你能告诉我我错了吗?
# data here https://www.kaggle.com/crowdflower/twitter-airline-sentiment
dtf_data = pd.read_csv(str_path + "Tweets.csv")
def clean_data(str_text):
lst_tokens = str_text.split()
lst_tokens = [w for w in lst_tokens if not any(True for e in ["@", "#"] if e in w)]
table = str.maketrans('', '', string.punctuation)
lst_tokens = [c.translate(table) for c in lst_tokens]
lst_tokens = [str_word for str_word in lst_tokens if str_word.isalpha()] # solo parole
lst_stop_words = set(stopwords.words('english'))
lst_tokens = [str_word for str_word in lst_tokens if not str_word in lst_stop_words and len(str_word) > 1]
return " ".join(lst_tokens)
dtf_data["text_cleaned"] = dtf_data["text"].apply(lambda x: clean_data(x))
dtf_data["y"] = (dtf_data["airline_sentiment"] == "positive").astype(int)
X_train, X_test, y_train, y_test = train_test_split(dtf_data["text_cleaned"], dtf_data["y"], test_size=.25, random_state=0)
y_train = np.array(y_train, dtype = 'float32')
y_test = np.array(y_test, dtype = 'float32')
def build_corpus(dtf_in, str_col):
corpus = []
for sentence in dtf_in[str_col].iteritems():
word_list = sentence[1].split()
corpus.append(word_list)
return corpus
corpus = build_corpus(dtf_data, 'text_cleaned')
model2vec = word2vec.Word2Vec(corpus, size=50)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
corpus_train, corpus_test = train_test_split(corpus, test_size=.25, random_state=0)
sequences_train = tokenizer.texts_to_sequences(corpus_train)
sequences_test = tokenizer.texts_to_sequences(corpus_test)
word_index = tokenizer.word_index
print('Found %s unique tokens' % len(word_index))
data_train = pad_sequences(sequences_train, maxlen=20)
data_test = pad_sequences(sequences_test, maxlen=20)
# pred = np.array(data.sentiment.values)
print('Shape of data tensor:', data_train.shape)
print('Shape of label tensor:', data_test.shape)
nb_words = min(200000, len(word_index))+1
embedding_matrix = np.zeros((nb_words, 50))
for word, i in word_index.items():
if word in model2vec.wv.vocab:
embedding_matrix[i] = model2vec.wv[word]
data_train, y_train = shuffle(data_train, y_train, random_state=0)
data_test, y_test = shuffle(data_test, y_test, random_state=0)
model_input = Input(shape=(20,))
model = Embedding(nb_words, 50, weights=[embedding_matrix],input_length=20,
trainable=False)(model_input)
model = Dropout(0.25)(model)
model = LSTM(20, dropout=0.35,recurrent_dropout=0.35)(model)
model = BatchNormalization()(model)
model = Dropout(0.25)(model)
model = Dense(32)(model)
model = Activation('relu')(model)
model = BatchNormalization()(model)
model = Dense(1)(model)
out = Activation('sigmoid')(model)
opt = SGD(lr = 0.1, momentum = 0.9, nesterov=True)
model = Model(inputs=model_input, outputs=out)
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['accuracy'])
model.fit(data_train, y_train,
batch_size=128,
epochs=5,
verbose=1,
validation_data=[data_test, y_test])
Epoch 1/5
68/68 [==============================] - 1s 22ms/step - loss: 0.5084 - accuracy: 0.7975 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
68/68 [==============================] - 1s 19ms/step - loss: 0.5109 - accuracy: 0.7976 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 3/5
68/68 [==============================] - 1s 19ms/step - loss: 0.5095 - accuracy: 0.7976 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 4/5
68/68 [==============================] - 1s 19ms/step - loss: 0.5084 - accuracy: 0.7975 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 5/5
68/68 [==============================] - 1s 20ms/step - loss: 0.5083 - accuracy: 0.7973 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
此外,我发现验证中的指标实际上是不同的
accuracy_score(y_test, [c[0]>.3 for c in model.predict(data_test)])
0.7661122661122661
我真的不明白为什么会发生此问题。任何想法?谢谢
答案 0 :(得分:0)
已修复。我刚刚将我的张量流降级到2.0。 pip install tensorflow == 2.0