情感分析代码无法正确预测其输入值,接近0 shd的值为负,接近1 shd的值为正

时间:2018-09-09 12:21:56

标签: python tensorflow keras sentiment-analysis

我给出一个肯定和否定的句子作为预测模型的输入,但是该模型的输出不正确。对于否定句,它接近于零;对于否定句,它接近于1。请帮助更改代码以获得正确的预测值。

#download the data
import keras
from keras.datasets import imdb 
top_words = 5000 
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

#one hot encode your documents
from numpy import array
from keras.preprocessing.text import one_hot
docs = ['Gut gemacht',
        'Gute arbeit',
        'Super idee',
        'Perfekt erledigt',
        'exzellent',
        'naja',
        'Schwache arbeit.',
        'Nicht gut',
        'Miese arbeit.',
        'Hätte es besser machen können.']
# integer encode the documents
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]
print(encoded_docs)

# Truncate and pad the review sequences 
from keras.preprocessing import sequence 
max_review_length = 500 
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) 
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

# Build the model 
from keras.layers import Activation, Dense,LSTM,Flatten
from keras.layers import Embedding
from keras.models import Sequential

embedding_vector_length = 32 
model = Sequential() 
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length)) 
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

#Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=5, batch_size=64) 

#Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0) 
print("Accuracy: %.2f%%" % (scores[1]*100))

#predict sentiment from reviews
bad = "this movie was terrible and bad"
good = "i really liked the movie and had fun"
for review in [good,bad]:
    tmp = []
    for word in review.split(" "):
        tmp.append(word_to_id[word])
    tmp_padded = sequence.pad_sequences([tmp], maxlen=max_review_length) 
    print("%s. Sentiment: %s" % (review,model.predict(array([tmp_padded][0]))[0][0]))

0 个答案:

没有答案