Question

我想制作能够学习我将提供的文字的LSTM。首先，我创建了将采用训练数据的功能。

def read_data(filename):
    with open(filename) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content
training_data = read_data(filename)
print("Loaded training data...")

之后，我有一个功能来分配所有的单词数字。

def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

字典变量的例子是'the': 0, 'and': 1, 'to': 2, 我找到了一些LSTM的示例代码

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list)

我不明白我要为重塑做些什么。我的seq长度是3，但我如何确定模式的数量，因为我理解dataX应该是单词的向量。当我执行此操作X = np.reshape(dictionary, (n_patterns, seq_length, 1))时，会输出cannot reshape array of size 1 into shape (775,100,1)。你能不能帮我做点什么。

Answer 1

此处的问题是示例代码中的dataX不应由dictionary替换，而应由数据中的n_patterns个样本列表替换，每个样本应为长度为seq_length的子序列，其长度应为长度为vocab_size的单热矢量。

通常以类似于此的方式制作此类数据集（调整seq_length，并尝试range的第三个参数）：

seq_length=50
dataX=[]
dataY=[]
for i in range(0,len(training_data)-seq_length-1,3):
    dataX.append([keras.utils.to_categorical(dictionary[word],num_classes=vocab_size) for word in training_data[i:i+seq_length]])
    dataY.append(keras.utils.to_categorical(dictionary[training_data[i+seq_length]],num_classes=vocab_size))

您可能还需要考虑使用build_dataset的集合代替计数器，这会导致此函数：

def build_dataset(words):
    dictionary = dict()
    for word in set(words):
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

因此，将所有内容放在一起，您的最终代码可能是（进行一些调整以使其适合LSTM）：

def read_data(filename):
    with open(filename) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content
training_data = read_data(filename)
print("Loaded training data...")

def build_dataset(words):
    dictionary = dict()
    for word in set(words):
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

seq_length=50
dataX=[]
dataY=[]
for i in range(0,len(training_data)-seq_length-1,3):
    dataX.append([keras.utils.to_categorical(dictionary[word],num_classes=vocab_size) for word in training_data[i:i+seq_length]])
    dataY.append(keras.utils.to_categorical(dictionary[training_data[i+seq_length]],num_classes=vocab_size))

n_patterns=len(dataX)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, vocab_size))
# reshape Y
y = numpy.reshape(dataY, (n_patterns, vocab_size))
# define the LSTM model
model = keras.Sequential()
model.add(LSTM(256, input_shape=(seq_length,vocab_size), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list)

重塑keras LSTM的变量

1 个答案: