当我使用tf.feature_column.input_layer
时,它似乎会返回一个形状为[number of features, batch size]
的张量,当它应该返回相反的时候 - [batch size, number of features]
。代码是:
## Generate the input functions
def create_train_input_fn(x_train, y_train):
return tf.estimator.inputs.pandas_input_fn(
x=x_train,
y=y_train,
batch_size=32,
num_epochs=500,
shuffle=True)
p = create_train_input_fn(df, df.reward_next)
## Create the custom estimator
deep_q = tf.estimator.Estimator(
model_fn=deep_q_model_test,
params={
'feature_columns_current_state': feature_columns_results['current'],
'feature_columns_next_state': feature_columns_results['next'],
'gamma': GAMMA,
'n_classes':ACTION_DIM,
'hidden_units':[256, 256],
'batch_size':32
})
# Train the Model.
deep_q.train(p)
文件明确说明
它的形状是(batch_size,first_layer_dimension)
我正在使用我自己的自定义估算函数(上面的代码中为deep_q_model_test
),在其中我的第一行是:
net = tf.feature_column.input_layer(features, feature_columns)
print('shape of input to forward pass: ' + str(net.get_shape()))
我印刷品中显示的形状(以及检查张量板后)是:
shape of input to forward pass: (?, 32)
shape of hidden layer: (?, 256)
shape of hidden layer: (?, 256)
我正在使用预先构建的pandas输入函数来输入它:tf.estimator.inputs.pandas_input_fn。功能列的构建是:
[_NumericColumn(key='cpc', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='comp_win', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='impressions', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='clicks', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='cost', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='transactions', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='cpo', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_NumericColumn(key='reward', shape=(1,), default_value=None,
dtype=tf.float32, normalizer_fn=None),
_IndicatorColumn(categorical_column=_IdentityCategoricalColumn(key='hrs',
num_buckets=24, default_value=None))]
此外,神经网络实际上是训练和检查张量板,它确实表明形状被翻转。这个问题是当我运行一个不同的批量大小时,让我们说预测我想只预测1个观察,它不会工作。
EDITED 添加模型的实际代码
def deep_q_model(features, labels, mode, params):
"""deep q learning model"""
## Create the models/
def nn1_forward(features, feature_columns, hidden_units, n_classes):
net = tf.feature_column.input_layer(features, feature_columns)
print('shape of input to forward pass: ' + str(net.get_shape()))
# Hidden layers (batch size, hidden_nodes_dim)
layer_number = 0
for units in hidden_units:
net = tf.layers.dense(net, units=units,
activation=tf.nn.relu,
name='layer' + str(layer_number),
reuse=tf.AUTO_REUSE)
print('shape of hidden layer: ' + str(net.get_shape()))
layer_number += 1
# Logits layer with no activation (batch size, output dims)
logits = tf.layers.dense(net, n_classes,
activation=None,
name='layerOutput',
reuse=tf.AUTO_REUSE)
print('shape of output layer: ' + str(logits.get_shape()))
return logits
## Current state forward pass
logits_current = nn1_forward(features,
params['feature_columns_current_state'],
params['hidden_units'],
params['n_classes'])
tf.summary.histogram('logits_current', logits_current)
# Reshape the action tensor
action_num = tf.reshape(tf.cast(features['action_num'], tf.int32), [params['batch_size'], 1])
# Generate a counter
counter = tf.reshape(tf.range(params['batch_size']), [params['batch_size'], 1])
actions = tf.concat([counter,
action_num], axis=1)
print('shape of actions ' + str(actions.get_shape()))
# Get the Q values based on the actual action taken for each observation - flatten to (batch size, 1)
predicted_q_values = tf.transpose(tf.gather_nd(logits_current, [actions]))
print('shape of predicted q values: ' + str(predicted_q_values.get_shape()))
## next state forward pass
logits_next = nn1_forward(features,
params['feature_columns_next_state'],
params['hidden_units'],
params['n_classes'])
tf.summary.histogram('logits_next', logits_next)
# Get the maximum Q value possible for each observation - flatten to (batch size, 1)
max_next_q_values = tf.reshape(tf.reduce_max(logits_next, axis=1), [params['batch_size'], 1])
print('shape of max q values: ' + str(max_next_q_values.get_shape()))
### Bellman equation
# Rewards is what was fed in as "labels"
rewards = tf.cast(tf.reshape(labels, [params['batch_size'], 1]), tf.float64)
expected_q_values = tf.add(rewards,tf.multiply(tf.cast(params['gamma'], tf.float64),
tf.cast(max_next_q_values, tf.float64)))
tf.summary.histogram('expected_q_values', expected_q_values)
print('shape of expected q value: '+ str(expected_q_values.get_shape()))
## Compute predictions - predictions will calculate the Q values without activation
predicted_classes = tf.argmax(logits_current, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits_current),
'logits': logits_current,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# Compute loss
loss = tf.losses.mean_squared_error(labels=tf.cast(expected_q_values, tf.float32),
predictions=tf.cast(predicted_q_values, tf.float32))
# Compute evaluation metrics.
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=loss)
# Create training op.
assert mode == tf.estimator.ModeKeys.TRAIN
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
答案 0 :(得分:0)