我之前已经问过这种情况的问题,但之前的答案似乎涉及检查基本元素实际上是整数,这似乎是我的情况 - 尽管我相对缺乏经验可能是阻碍我正确理解它。无论如何,这里的代码是从以前的碎片中一起破解的 - 注意错误特别发生在 data_dict 中,并告诉我' ValueError:用序列设置数组元素。&# 39;
提前感谢您的光临;我一直在盯着这个令人尴尬的时间。
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import functools
import sets
import tensorflow as tf
from tensorflow import nn
import pandas
from sklearn.model_selection import train_test_split
from CorpusManager import CorpusManager
from ArticleManager import ArticleManager
from SentimentAnalyser import SentimentAnalyser
from tqdm import tqdm
def lazy_property(function):
attribute = '_' + function.__name__
@property
@functools.wraps(function)
def wrapper(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return wrapper
"""
RNN needs to read titles of articles and input each word with its associated sentiment in sequence, the aim is to then reuturn whether one should long, short
or ignore (need to somehow include risk/uncertainty into the loss function?)
"""
#######################Change to LSTM
class Variable_sequence_classifier():
def __init__(self, num_hidden=200, num_layers=2,X_train=None,X_test=None,Y_train=None,Y_test=None):
# self.data = data
# self.target = target
self._num_hidden = num_hidden
self._num_layers = num_layers
self.max_length = 0
# self.sent_data = sent_data
# self.sent_model = sent_model
self.X_train = X_train
self.X_test = X_test
self.Y_train = Y_train
self.Y_test = Y_test
#self.load_test_train_data(corpus)
self.data = None
self.target = None
self.input_data = self.load_data()
_ = self.prepare_test_and_train()
self.num_classes = 2
self.row_size = 101
self.batchSize = 10
self.data = tf.placeholder(tf.float32, [self.batchSize, self.max_length, self.row_size])
self.target = tf.placeholder(tf.float32, [self.batchSize, self.num_classes])
self.prediction
self.error
self.optimize
def load_data(self):
data = pandas.read_pickle("./RnnTest.pkl")
return(data)
def prepare_test_and_train(self):
data = self.input_data
X = data['Representation']
Y = data['Category']
# print(X)
# print('X[0] is')
# print(list(X)[0])
# for entry in list(X):
# print(len(entry))
# if len(entry)>self.max_length:
# self.max_length = len(entry)
# print('yep')
print(list(X)[1])
for j,row in X.items():
if len(row)>self.max_length:
self.max_length = len(row)
print('yep')
print(self.max_length)
#padding
with tqdm(total = len(list(X))) as pbar:
for j,row in X.items():
len1 = len(row[0])
#print('len1 is:')
#print(len1)
if len(row)<self.max_length:
#X.loc[i] = np.append(row,np.array([np.array([0] * len1)]*(self.max_length-len(row))))
for i in range(0,self.max_length-len(row)-1):
A = list(row)
B = [np.array([0] * len1)]
X.loc[j] = np.array(A+B)
pbar.update(1)
# print(list(X)[1])
# print(list(X)[2])
# print(list(X)[3])
# print(list(X)[4])
# print(list(X)[5])
# print(list(X)[6])
# print(list(X)[7])
self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
return(data)
def load_test_train_data(self, corpus_manager):
"""
Retrieve a set of training and testing data from the corpus manager
in prepration for training or validation.
Parameters
----------
corpus_manager: A CorpusManager class already fitted with processed articles
Returns
-------
X_train, y_train: Training feature space and target space data points
X_test, y_test: Orthogonal test feature space and target space data points to the training set
"""
article_manager = ArticleManager(input_database='ModelCreation/slimmed_article_database_fin.csv',
verbosity=0)
self.X, self.y = corpus_manager.get_wordvector_sentiment_data_alt_plus_word2vec(self.sent_model,self.sent_data,article_manager)
self.log('Read ', len(self.X), ' wordvectors and ', len(self.y),' returns from the corpus.')
# Separate the articles into test and train - we used a fixed random state
# so that our results are the same between runs
X_train, X_test, y_train, y_test = train_test_split(self.X, self.y,
test_size=0.33, random_state=42)
self.log('Split into ', len(X_train), ' training and ', len(X_test),' testing events.')
# Store the training and test data in this class so it can be accesed later
self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test
return X_train, X_test, y_train, y_test
@lazy_property
def length(self):
used = tf.sign(tf.reduce_max(tf.abs(self.data), reduction_indices=2))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
@lazy_property
def prediction(self):
# Recurrent network.
output, _ = nn.dynamic_rnn(
nn.rnn_cell.GRUCell(self._num_hidden),
self.data,
dtype=tf.float32,
sequence_length=self.length,
)
last = self._last_relevant(output, self.length)
# Softmax layer.
weight, bias = self._weight_and_bias(
self._num_hidden, int(self.target.get_shape()[1]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
return prediction
@lazy_property
def cost(self):
cross_entropy = -tf.reduce_sum(self.target * tf.log(self.prediction))
return cross_entropy
@lazy_property
def optimize(self):
learning_rate = 0.003
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(self.cost)
@lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
@staticmethod
def _weight_and_bias(in_size, out_size):
weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
bias = tf.constant(0.1, shape=[out_size])
return tf.Variable(weight), tf.Variable(bias)
@staticmethod
def _last_relevant(output, length):
batch_size = tf.shape(output)[0]
max_length = int(output.get_shape()[1])
output_size = int(output.get_shape()[2])
index = tf.range(0, batch_size) * max_length + (length - 1)
flat = tf.reshape(output, [-1, output_size])
relevant = tf.gather(flat, index)
return(relevant)
有错误的部分在这里 - data_dict
if __name__ == '__main__':
corp = CorpusManager()
sentAnal = SentimentAnalyser()
sentAnal.load_model(r'C:\Users\Richard\Sumeria\SentimentBasedPredictor\out\svm_fully_trained.pkl')
#model = Variable_sequence_classifier(corp, sentAnal.model, sentAnal.data)
#model.load_test_train_data(corp)
#num_classes = len(list(set(model.y)))
model = Variable_sequence_classifier()
f = lambda x: x.apply(list(x))
sess = tf.Session()
#sess.run(tf.initialize_all_variables())
tf.global_variables_initializer()
for epoch in range(10):
for _ in range(100):
batchX = model.X_train[epoch:epoch+10]#.apply(f)
print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
batchX.shape
print((list(batchX)))
print((list(batchX)[0]))
print((list(batchX)[0][0]))
print((list(batchX)[0][0][0]))
print(type(list(batchX)[0][0][0]))
batchY = model.Y_train[epoch:epoch+10]
batchY.shape
print(type(list(batchY)[0]))
data_dict = {model.data: list(batchX), model.target: list(batchY)}
sess.run(model.optimize, data_dict)
error = sess.run(model.error, {model.data: model.X_test, model.target: list(model.Y_test)})
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))