使用tflearn

时间:2017-04-14 17:35:16

标签: python pandas numpy tflearn

我正在使用这个IGN数据集来查找有关游戏的积极和消极反馈,基本上有两列,sentiment包含0或1(差或好)和title可能是&# 39;失去了世界'

主要是我使用tflearn来做核心的东西。一切顺利,只有问题在于预测新的例子。

#lets load the ign dataset
dataframe = pd.read_csv('ign.csv')

# Convert score_phrase to binary sentiments and add a new column called sentiment
bad_phrases = ['Bad', 'Awful', 'Painful', 'Unbearable', 'Disaster']
dataframe['sentiment'] = dataframe.score_phrase.isin(bad_phrases).map({True: 0, False: 1})

# lets remove everything besides title and score_phrase
dataframe = dataframe.drop(["score_phrase","Unnamed: 0","url","platform", "score", "genre", "editors_choice", "release_year", "release_month","release_day"], axis=1)

#lets fill in any empty space with random spaces
dataframe.fillna(value='', inplace=True)

#preprocessing
word_processor = VocabularyProcessor(100)
#converting all the title as input featurres X
trainX = np.array(list(word_processor.fit_transform(dataframe["title"])))
#converting the score_pharse to trainY since its counted as the label
trainY = dataframe.loc[:, ["sentiment"]].as_matrix()

# Network building
def build_model():
    # This resets all parameters and variables, 
    tf.reset_default_graph()
    net = tflearn.input_data([None, 100])        # Input

    net = tflearn.fully_connected(net, 200, activation='ReLU')      # Hidden
    net = tflearn.fully_connected(net, 200, activation='ReLU')

    net = tflearn.fully_connected(net, 1, activation='softmax')   # Output
    net = tflearn.regression(net, optimizer='sgd', learning_rate=0.01, loss='categorical_crossentropy')

    model = tflearn.DNN(net)
    return model
model = build_model()
# Training
model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=128, n_epoch=10)

在预测的同时,这一切都变成了地狱。

example = 'Little Big Planet'
text = np.array(word_processor.fit(example))
pred_class = np.argmax(model.predict([text]))

我得到的错误是

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-59-14b2aecc0f49> in <module>()
      1 example = 'Little Big Planet'
      2 text = np.array(word_processor.fit(example))
----> 3 pred_class = np.argmax(model.predict([text]))
      4 pred_class

/anaconda/envs/MLHardCore/lib/python3.5/site-packages/tflearn/models/dnn.py in predict(self, X)
    229         """
    230         feed_dict = feed_dict_builder(X, None, self.inputs, None)
--> 231         return self.predictor.predict(feed_dict)
    232 
    233     def predict_label(self, X):

/anaconda/envs/MLHardCore/lib/python3.5/site-packages/tflearn/helpers/evaluator.py in predict(self, feed_dict)
     67             prediction = []
     68             for output in self.tensors:
---> 69                 o_pred = self.session.run(output, feed_dict=feed_dict).tolist()
     70                 for i, val in enumerate(o_pred): # Reshape pred per sample
     71                     if len(self.tensors) > 1:

/anaconda/envs/MLHardCore/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/anaconda/envs/MLHardCore/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    936                 ' to a larger type (e.g. int64).')
    937 
--> 938           np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
    939 
    940           if not subfeed_t.get_shape().is_compatible_with(np_val.shape):

/anaconda/envs/MLHardCore/lib/python3.5/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
    529 
    530     """
--> 531     return array(a, dtype, copy=False, order=order)
    532 
    533 

ValueError: setting an array element with a sequence.

1 个答案:

答案 0 :(得分:0)

我不知道这是否会有所帮助,但我能够做出预测。

这些是我的导入(上面的代码中缺少):

import pandas as pd
from tflearn.data_utils import VocabularyProcessor
import numpy as np
import tensorflow as tf
import tflearn

正在运行model.predict([text])会重新创建错误。

潜在的解决方案:

example = 'Little Big Planet'
text = list(word_processor.transform([example]))[0].reshape(1, 100)
model.predict(text)

输出:

[[1.0]]

修改

要查看单词的映射:

>>> vocab_dict = word_processor.vocabulary_._mapping
>>> vocab_dict['Little']
1
>>> vocab_dict['Big']
2
>>> vocab_dict['Planet']
3
>>> vocab_dict['World']
75

要查看这些数字是否有意义,请查看:

>>> example = 'Little Big Planet'
>>> text = list(word_processor.transform([example]))[0].reshape(1, 100)
>>> text
array([[1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)
>>> example = 'Little Big World'
>>> text = list(word_processor.transform([example]))[0].reshape(1, 100)
>>> text
array([[ 1,  2, 75,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]], dtype=int64)