Question

我之前已经问过这种情况的问题，但之前的答案似乎涉及检查基本元素实际上是整数，这似乎是我的情况 - 尽管我相对缺乏经验可能是阻碍我正确理解它。无论如何，这里的代码是从以前的碎片中一起破解的 - 注意错误特别发生在 data_dict 中，并告诉我＆＃39; ValueError：用序列设置数组元素。＆＃ 39;

提前感谢您的光临;我一直在盯着这个令人尴尬的时间。

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

import functools
import sets
import tensorflow as tf
from tensorflow import nn
import pandas
from sklearn.model_selection import train_test_split
from CorpusManager import CorpusManager 
from ArticleManager import ArticleManager
from SentimentAnalyser import SentimentAnalyser
from tqdm import tqdm

def lazy_property(function):
    attribute = '_' + function.__name__

    @property
    @functools.wraps(function)
    def wrapper(self):
        if not hasattr(self, attribute):
            setattr(self, attribute, function(self))
        return getattr(self, attribute)
    return wrapper



"""
RNN needs to read titles of articles and input each word with its associated sentiment in sequence, the aim is to then reuturn whether one should long, short
or ignore (need to somehow include risk/uncertainty into the loss function?)
"""

#######################Change to LSTM


class Variable_sequence_classifier():


    def __init__(self, num_hidden=200, num_layers=2,X_train=None,X_test=None,Y_train=None,Y_test=None):
            # self.data = data
            # self.target = target
            self._num_hidden = num_hidden
            self._num_layers = num_layers

            self.max_length = 0
 #           self.sent_data = sent_data
 #           self.sent_model = sent_model
            self.X_train = X_train
            self.X_test = X_test
            self.Y_train = Y_train
            self.Y_test = Y_test
            #self.load_test_train_data(corpus)
            self.data = None
            self.target = None


            self.input_data = self.load_data()


            _ = self.prepare_test_and_train()


            self.num_classes = 2
            self.row_size =  101
            self.batchSize = 10
            self.data = tf.placeholder(tf.float32, [self.batchSize, self.max_length, self.row_size])
            self.target = tf.placeholder(tf.float32, [self.batchSize, self.num_classes])



            self.prediction
            self.error
            self.optimize



    def load_data(self):
        data = pandas.read_pickle("./RnnTest.pkl")
        return(data)

    def prepare_test_and_train(self):
        data = self.input_data
        X = data['Representation']
        Y = data['Category']
        # print(X)
        # print('X[0] is')
        # print(list(X)[0])

        # for entry in list(X):
        #     print(len(entry))
        #     if len(entry)>self.max_length:
        #         self.max_length = len(entry)
        #         print('yep')

        print(list(X)[1])


        for j,row in X.items():  
            if len(row)>self.max_length:
                self.max_length = len(row)
                print('yep')
                print(self.max_length)

        #padding
        with tqdm(total = len(list(X))) as pbar:
            for j,row in X.items():  
                len1 = len(row[0])
                #print('len1 is:')
                #print(len1)
                if len(row)<self.max_length:
                #X.loc[i] = np.append(row,np.array([np.array([0] * len1)]*(self.max_length-len(row))))
                    for i in range(0,self.max_length-len(row)-1):

                        A = list(row)

                        B = [np.array([0] * len1)]

                        X.loc[j] = np.array(A+B)

                pbar.update(1)


        # print(list(X)[1])
        # print(list(X)[2])
        # print(list(X)[3])
        # print(list(X)[4])
        # print(list(X)[5])
        # print(list(X)[6])
        # print(list(X)[7])



        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

        return(data)


    def load_test_train_data(self, corpus_manager):
        """
        Retrieve a set of training and testing data from the corpus manager
        in prepration for training or validation. 

        Parameters
        ----------
        corpus_manager: A CorpusManager class already fitted with processed articles

        Returns
        -------
            X_train, y_train: Training feature space and target space data points 
            X_test, y_test: Orthogonal test feature space and target space data points to the training set
        """

        article_manager = ArticleManager(input_database='ModelCreation/slimmed_article_database_fin.csv',
                                     verbosity=0)    

        self.X, self.y = corpus_manager.get_wordvector_sentiment_data_alt_plus_word2vec(self.sent_model,self.sent_data,article_manager)


        self.log('Read ', len(self.X), ' wordvectors and ', len(self.y),' returns from the corpus.')

        # Separate the articles into test and train - we used a fixed random state
        # so that our results are the same between runs
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, 
                test_size=0.33, random_state=42)
        self.log('Split into ', len(X_train), ' training and ', len(X_test),' testing events.')

        # Store the training and test data in this class so it can be accesed later
        self.X_train, self.X_test, self.y_train, self.y_test =  X_train, X_test, y_train, y_test

        return  X_train, X_test, y_train, y_test

    @lazy_property
    def length(self):
            used = tf.sign(tf.reduce_max(tf.abs(self.data), reduction_indices=2))
            length = tf.reduce_sum(used, reduction_indices=1)
            length = tf.cast(length, tf.int32)
            return length

    @lazy_property
    def prediction(self):
            # Recurrent network.
            output, _ = nn.dynamic_rnn(
                nn.rnn_cell.GRUCell(self._num_hidden),
                self.data,
                dtype=tf.float32,
                sequence_length=self.length,
            )
            last = self._last_relevant(output, self.length)
            # Softmax layer.
            weight, bias = self._weight_and_bias(
                self._num_hidden, int(self.target.get_shape()[1]))
            prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
            return prediction

    @lazy_property
    def cost(self):
            cross_entropy = -tf.reduce_sum(self.target * tf.log(self.prediction))
            return cross_entropy

    @lazy_property
    def optimize(self):
            learning_rate = 0.003
            optimizer = tf.train.RMSPropOptimizer(learning_rate)
            return optimizer.minimize(self.cost)

    @lazy_property
    def error(self):
            mistakes = tf.not_equal(
                tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
            return tf.reduce_mean(tf.cast(mistakes, tf.float32))

    @staticmethod
    def _weight_and_bias(in_size, out_size):
            weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
            bias = tf.constant(0.1, shape=[out_size])
            return tf.Variable(weight), tf.Variable(bias)

    @staticmethod
    def _last_relevant(output, length):
            batch_size = tf.shape(output)[0]
            max_length = int(output.get_shape()[1])
            output_size = int(output.get_shape()[2])
            index = tf.range(0, batch_size) * max_length + (length - 1)
            flat = tf.reshape(output, [-1, output_size])
            relevant = tf.gather(flat, index)
            return(relevant)

有错误的部分在这里 - data_dict

if __name__ == '__main__':

    corp = CorpusManager()
    sentAnal = SentimentAnalyser()
    sentAnal.load_model(r'C:\Users\Richard\Sumeria\SentimentBasedPredictor\out\svm_fully_trained.pkl')

    #model = Variable_sequence_classifier(corp, sentAnal.model, sentAnal.data)
    #model.load_test_train_data(corp)
    #num_classes = len(list(set(model.y)))

    model = Variable_sequence_classifier()

    f = lambda x: x.apply(list(x))

    sess = tf.Session()
    #sess.run(tf.initialize_all_variables())
    tf.global_variables_initializer()
    for epoch in range(10):
        for _ in range(100):

            batchX = model.X_train[epoch:epoch+10]#.apply(f)
            print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
            batchX.shape
            print((list(batchX)))
            print((list(batchX)[0]))
            print((list(batchX)[0][0]))
            print((list(batchX)[0][0][0]))
            print(type(list(batchX)[0][0][0]))

            batchY = model.Y_train[epoch:epoch+10]
            batchY.shape
            print(type(list(batchY)[0]))

            data_dict = {model.data: list(batchX), model.target: list(batchY)}
            sess.run(model.optimize, data_dict)

        error = sess.run(model.error, {model.data: model.X_test, model.target: list(model.Y_test)})



    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))

Tensorflow填充向量RNN错误 - 使用序列设置数组元素

0 个答案: