我正在尝试用Keras实现CNN + RNN + LSTM结构(1)。
我找到了一个相关的Keras sample code。
如何正确地将model.fit转换为model.fit_generator?
原始代码:
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed
import numpy as np
from keras.layers import Concatenate
from keras.layers.convolutional import Conv2D
# Generate fake data
# Assumed to be 1730 grayscale video frames
x_data = np.random.random((1730, 1, 8, 10))
sequence_lengths = None
Izda=Sequential()
Izda.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Izda.add(Activation('relu'))
Izda.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Izda.add(Dropout(0.2))
Dcha=Sequential()
Dcha.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Dcha.add(Activation('relu'))
Dcha.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Dcha.add(Dropout(0.2))
Frt=Sequential()
Frt.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Frt.add(Activation('relu'))
Frt.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Frt.add(Dropout(0.2))
merged=Merge([Izda, Dcha,Frt], mode='concat', concat_axis=2)
#merged=Concatenate()([Izda, Dcha, Frt], axis=2)
# Output from merge is (batch_size, sequence_length, 120, 4, 5)
# We want to get this down to (batch_size, sequence_length, 120*4*5)
model=Sequential()
model.add(merged)
model.add(TimeDistributed(Flatten()))
model.add(LSTM(240, return_sequences=True))
model.compile(loss='mse', optimizer='adam')
model.summary()
我修改后:
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed
import numpy as np
from keras.layers import Concatenate
from keras.layers.convolutional import Conv2D
# Generate fake data
# Assumed to be 1730 grayscale video frames
x_data = np.random.random((1730, 1, 8, 10))
sequence_lengths = None
def defModel():
model=Sequential()
model.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
model.add(Activation('relu'))
model.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
model.add(Dropout(0.2))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(240, return_sequences=True))
model.compile(loss='mse', optimizer='adam')
model.summary()
return model
def gen():
for i in range(1730):
x_train = np.random.random((1, 8, 10))
y_train = np.ones((15, 240))
yield (x_train, y_train)
def main():
model = defModel()
# Slice our long, single sequence up into shorter sequeunces of images
# Let's make 50 examples of 15 frame videos
x_train = []
seq_len = 15
for i in range(50):
x_train.append(x_data[i*5:i*5+seq_len, :, :, :])
x_train = np.asarray(x_train, dtype='float32')
print(x_train.shape)
# >> (50, 15, 1, 8, 10)
model.fit_generator(
generator = gen(),
steps_per_epoch = 1,
epochs = 2)
if __name__ == "__main__":
main()
如何通过修改解决此错误产生的问题?
ValueError:检查输入时出错:expect time_distributed_1_input有5个维度,但得到了数组 形状(1,8,10)
(1)Wang,S.,Clark,R.,Wen,H。,& Trigoni,N。(2017年)。 DeepVO:采用深度递归卷积神经网络实现端到端视觉测距。会议录 - IEEE机器人与自动化国际会议,2043-2050。
model.add(TimeDistributed(Conv2D(16, (7, 7),padding='same'),input_shape=(None, 540, 960, 1)))
model.add(Activation('relu'))
model.add(TimeDistributed(Conv2D(32, (5, 5),padding='same'))) model.add(Activation('relu'))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(num_classes, return_sequences=True))
ValueError:检查目标时出错:预期lstm_1有3个维度,但得到的数组有形状(4,3)
目标是通过CNN提取图像特征,然后将3个图像中的3个特征组合到LSTM中。
#Input image
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)┐
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)---> (3, m,n,k) --flatten--> (3, mnk)
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)」
(3, mnk) => LSTM => predict three regression value
model = Sequential()
model.add(TimeDistributed(Conv2D(16, (7, 7), padding='same'),input_shape=(None, 540, 960, 1)))
model.add(Activation('relu'))
model.add(TimeDistributed(Conv2D(32, (5, 5), padding='same')))
model.add(Activation('relu'))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(num_classes, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='adam')
a = readIMG(filenames[start]) # (540, 960, 1)
b = readIMG(filenames[start + 1]) # (540, 960, 1)
c = readIMG(filenames[start + 2]) # (540, 960, 1)
x_train = np.array([[a, b, c]]) # (1, 3, 540, 960, 1)
然后我仍然得到错误:
ValueError:检查目标时出错:预期lstm_1有3个维度,但得到的数组有形状(1,3)
答案 0 :(得分:2)
问题是一个简单的形状不匹配问题。
您定义了input_shape=(sequence_lengths, 1,8,10)
,因此您的模型需要五个维度作为输入:(batch_size, sequence_lengths, 1, 8, 10)
您只需要使您的发电机输出5个尺寸的正确形状。
def gen():
x_data = np.random.random((numberOfVideos, videoLength, 1, 8, 10))
y_data = np.ones((numberOfVideos, videoLength, 240))
for video in range(numberOfVideos):
x_train = x_data[video:video+1]
y_train = y_data[video:video+1]
yield (x_train, y_train)
答案 1 :(得分:0)
以下是使用生成器的CNNLSTM的工作示例:https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45