下面的代码适用于输入和标签的形状,但是当我使用它们进行估算时,它不起作用(请参见下文)。
x1 = pickle.load(open(data_folder+'train_sen1.p', 'rb'))
x2 = pickle.load(open(data_folder+'train_sen2.p', 'rb'))
y = pickle.load(open(data_folder+'y_train.p', 'rb'))
x_train = np.concatenate([np.expand_dims(train_sen1, axis=1),np.expand_dims(train_sen2, axis=1)], axis=1)
print(x_train.shape)
print(y.shape)
#(11118, 2, 50)
#(11118,)
def build_model(vectors, max_length, num_classes, projected_dim,
num_hidden=200, dropout_rate1=0.2, dropout_rate2=0.2,
dropout_rate3=0.2, learn_rate=0.0001, optimizer='nadam'):
K.clear_session()
#input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')
#input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')
# modified model to take single input instead of two in order to work with hyperas specs.
model_input = layers.Input(shape=(2, max_length), dtype='int32')
input1 = layers.Lambda(get_input_slice(1))(model_input)
input2 = layers.Lambda(get_input_slice(2))(model_input)
# embeddings (projected)
embed = create_embedding(vectors, max_length, projected_dim)
a = embed(input1)
b = embed(input2)
# step 1: attend
F = create_feedforward(num_hidden, dropout_rate=dropout_rate1)
att_weights = layers.dot([F(a), F(b)], axes=-1, normalize=True)
G = create_feedforward(num_hidden, dropout_rate=dropout_rate2)
norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
alpha = layers.dot([norm_weights_a, a], axes=1)
beta = layers.dot([norm_weights_b, b], axes=1)
# step 2: compare
comp1 = layers.concatenate([a, beta])
comp2 = layers.concatenate([b, alpha])
v1 = layers.TimeDistributed(G)(comp1)
v2 = layers.TimeDistributed(G)(comp2)
# step 3: aggregate
v1_sum = layers.Lambda(sum_word)(v1)
v2_sum = layers.Lambda(sum_word)(v2)
concat = layers.concatenate([v1_sum, v2_sum])
H = create_feedforward(num_hidden, dropout_rate=dropout_rate3)
out = H(concat)
out = layers.Dense(num_classes, activation='sigmoid', use_bias=True)(out)
model = Model(model_input, out)
if optimizer == 'sgd':
opt = SGD(lr=learn_rate)
elif optimizer == 'adam':
opt = Adam(lr=learn_rate)
elif optimizer == 'rmsprop':
opt = RMSprop(lr=learn_rate)
else:
opt = Nadam(lr=learn_rate)
model.compile(optimizer=opt,
#model.compile(optimizer=optimizer,
loss='binary_crossentropy',
metrics=["accuracy",threshold_acc,precision_threshold(0.8),recall_threshold(0.8)])
return model
model = build_model(vectors=embedding_matrix, max_length=50, projected_dim=200, num_classes=1,
num_hidden=200, dropout_rate1=0.1,
dropout_rate2=0.3, dropout_rate3=0.3,
optimizer="adam",
learn_rate=0.001)
result=model.fit(x_train, y_train, batch_size= 50, epochs=1)
此代码适用于输入和标签的形状。
但是,如果我想做一个估算器:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={model.input_names[0]: x_train}, # input_names[0] would be 'main_input'
y=y,
batch_size=100,
num_epochs=None,
shuffle=True)
estimator = tf.keras.estimator.model_to_estimator(model)
estimator.train(input_fn=train_input_fn, steps=1)
形状错误:
ValueError: logits and labels must have the same shape ((100, 1) vs (100,))
我可以将标签的形状调整为:
np.asarray(y_train).astype('float32').reshape((-1,1))
但是我不知道为什么这对于估计量是必需的?
tf.keras与估算器的形状差异从何而来?