我是python的初学者。我尝试进行情绪分析和RNN。但是,我得到AttributeError: 'str' object has no attribute 'shape'".
的评论,有关此问题的所有已发布的解决方案,但我不能解决此问题。我尝试使用相同的代码编写另一个数据文件,并且可以正常工作。但不适用于我的原始数据文件。
这是我的代码:
import numpy as np
import pandas as pd
from tensorflow.python.keras.models import Sequential`
from tensorflow.python.keras.layers import Dense, GRU, Embedding, CuDNNGRU
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
dataset = pd.read_csv(r'C:\Users\Administrator\Desktop\tümveri8.csv', encoding='latin1')
target = dataset['duygu'].values.tolist()
data = dataset['yorum'].values.tolist()
cutoff = int(len(data) * 0.80)
x_train, x_test = data[:cutoff], data[cutoff:]
y_train, y_test = target[:cutoff], target[cutoff:]
num_words = 10000
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(data)
x_train_tokens = tokenizer.texts_to_sequences(x_train)
x_test_tokens = tokenizer.texts_to_sequences(x_test)
num_tokens = [len(tokens) for tokens in x_train_tokens + x_test_tokens]
num_tokens = np.array(num_tokens)
max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens)
max_tokens = int(max_tokens)
max_tokens
np.sum(num_tokens < max_tokens) / len(num_tokens)
x_train_pad = pad_sequences(x_train_tokens, maxlen=max_tokens)
x_test_pad = pad_sequences(x_test_tokens, maxlen=max_tokens)
idx = tokenizer.word_index
inverse_map = dict(zip(idx.values(), idx.keys()))
def tokens_to_string(tokens):
words = [inverse_map[token] for token in tokens if token!=0]
text = ' '.join(words)
return text
model = Sequential()
embedding_size = 50
model.add(Embedding(input_dim=num_words,
output_dim=embedding_size,
input_length=max_tokens,
name='embedding_layer'))
model.add(GRU(units=16, return_sequences=True))
model.add(GRU(units=8, return_sequences=True))
model.add(GRU(units=4))
model.add(Dense(1, activation='sigmoid'))
optimizer = Adam(lr=1e-3)
model.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
model.summary()
这是错误代码:
model.fit(x_train_pad, y_train, epochs=5, batch_size=256)
model.fit(x_train_pad, y_train, epochs=5, batch_size=256)
AttributeError Traceback (most recent call
last)
<ipython-input-79-631bbf0ac3a7> in <module>
----> 1 model.fit(x_train_pad, y_train, epochs=5, batch_size=256)
~\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight, sample_weight,
initial_epoch, steps_per_epoch, validation_steps, validation_freq,
max_queue_size, workers, use_multiprocessing, **kwargs)
707 steps=steps_per_epoch,
708 validation_split=validation_split,
--> 709 shuffle=shuffle)
710
711 # Prepare validation data.
~\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py
in _standardize_user_data(self, x, y, sample_weight, class_weight,
batch_size, check_steps, steps_name, steps, validation_split, shuffle,
extract_tensors_from_dataset)
2671 shapes=None,
2672 check_batch_axis=False, # Don't enforce the batch size.
-> 2673 exception_prefix='target')
2674
2675 # Generate sample-wise weight values given the `sample_weight`
and
~\Anaconda3\lib\site-
packages\tensorflow\python\keras\engine\training_utils.py in
standardize_input_data(data, names, shapes, check_batch_axis,
exception_prefix)
335 ]
336 else:
--> 337 data = [standardize_single_array(x) for x in data]
338
339 if len(data) != len(names):
~\Anaconda3\lib\site-
packages\tensorflow\python\keras\engine\training_utils.py in <listcomp> (.0)
335 ]
336 else:
--> 337 data = [standardize_single_array(x) for x in data]
338
339 if len(data) != len(names):
~\Anaconda3\lib\site-
packages\tensorflow\python\keras\engine\training_utils.py in
standardize_single_array(x, expected_shape)
263 return None
264
--> 265 if (x.shape is not None and len(x.shape) == 1 and
266 (expected_shape is None or len(expected_shape) != 1)):
267 if tensor_util.is_tensor(x):
AttributeError: 'str' object has no attribute 'shape'
答案 0 :(得分:0)
此行有错误:
Hello World!
This is para1.
This is para2.
基本上model.fit(x_train_pad, y_train, epochs=5, batch_size=256)
是一个str(字符串),它应该是一个numpy数组
答案 1 :(得分:0)
您可能遇到的两个可能的问题:
就像Pietro Marsella一样,建议将x_train_pad定义为字符串类型,或者在代码中的一个纪元后重新定义它。
在输入单词的单词嵌入之后,您应该有一个numpy,其shape =(N,K),应检查您的工作嵌入是否对每个单词都有效。