我收到错误:
“tensorflow.python.framework.errors_impl.InvalidArgumentError:密集浮动特征必须是矩阵。”使用估算器进行训练时 tensorflow.contrib.boosted_trees.estimator_batch.estimator.GradientBoostedDecisionTreeClassifier 。我使用的是Tensorflow版本1.4.0。如果我将估算器更改为tf.contrib.learn.DNNClassifier,则相同的代码可以正常工作。在代码中,功能字典在tf.contrib.learn.Experiment中的“Train_input_fn”中传递。
之前有人遇到过类似的错误吗?
#'tensorflow==1.4.0'
import tensorflow as tf
import argparse
import sys
import os
from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
from tensorflow.contrib.boosted_trees.proto import learner_pb2
from tensorflow_transform.tf_metadata import metadata_io
from tensorflow_transform.saved import input_fn_maker
from tensorflow.contrib.learn.python.learn import learn_runner
RAW_METADATA_DIR="raw_metadata"
CONTRACTED_METADATA_DIR="contracted_metadata"
TRANSFORMED_METADATA_DIR="transformed_metadata"
TRANSFORMED_TRAIN_DATA_FILE_PREFIX="train"
TRANSFORMED_EVAL_DATA_FILE_PREFIX="eval"
DATA_FILE_SUFFIX=".tfrecord.gz"
TRANSFORM_FN_DIR="transform_fn"
TARGET_FEATURE_COLUMN='target_field'
FEATURE_NUMERICAL_COLUMN_NAMES = [
'feature1',
'feature2',
'feature3',
'feature4',
'feature5'
]
FEATURE_INTEGER_COLUMN_NAMES = [ # comment out fields that are not features
'feature6',
'feature7',
'feature8',
'feature9',
'feature10'
]
def _parse_arguments(argv):
"""Parses command line arguments."""
parser = argparse.ArgumentParser(
description="Runs training on data.")
parser.add_argument(
"--model_dir", required=True, type=str,
help="The directory where model outputs will be written")
parser.add_argument(
"--input_dir", required=True, type=str,
help=("GCS or local directory containing tensorflow-transform outputs."))
parser.add_argument(
"--batch_size", default=30, required=False, type=int,
help=("Batch size to use during training."))
parser.add_argument(
"--num_epochs", default=100, required=False, type=int,
help=("Number of epochs through the training set"))
args, _ = parser.parse_known_args(args=argv[1:])
return args
def get_eval_metrics():
return {
"accuracy":
tf.contrib.learn.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_accuracy,
prediction_key=tf.contrib.learn.PredictionKey.CLASSES),
"precision":
tf.contrib.learn.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_precision,
prediction_key=tf.contrib.learn.PredictionKey.CLASSES),
"recall":
tf.contrib.learn.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_recall,
prediction_key=tf.contrib.learn.PredictionKey.CLASSES)
}
def read_and_decode_single_record(input_dir, num_epochs,
mode=tf.contrib.learn.ModeKeys.TRAIN):
if mode == tf.contrib.learn.ModeKeys.TRAIN:
num_epochs = num_epochs
file_prefix = TRANSFORMED_TRAIN_DATA_FILE_PREFIX
else:
num_epochs = 1
file_prefix = TRANSFORMED_EVAL_DATA_FILE_PREFIX
transformed_metadata = metadata_io.read_metadata(os.path.join(input_dir,
TRANSFORMED_METADATA_DIR))
input_file_names = tf.train.match_filenames_once(os.path.join(input_dir,
'{}*{}'.format(file_prefix, DATA_FILE_SUFFIX)))
filename_queue = tf.train.string_input_producer(input_file_names,
num_epochs=num_epochs, shuffle=True)
reader = tf.TFRecordReader(options=tf.python_io.TFRecordOptions(
tf.python_io.TFRecordCompressionType.GZIP))
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized = serialized_example,
features=transformed_metadata.schema.as_feature_spec()
)
return features
def read_dataset(input_dir, num_epochs, batch_size, mode=tf.contrib.learn.ModeKeys.TRAIN):
def _input_fn():
min_after_dequeue = 10000
features = read_and_decode_single_record(input_dir, num_epochs, mode)
features = tf.train.shuffle_batch(
tensors=features,
batch_size=batch_size,
min_after_dequeue=min_after_dequeue,
capacity=(min_after_dequeue + 3) * batch_size)
target = features.pop(TARGET_FEATURE_COLUMN)
return features, target
return _input_fn
def specify_feature_columns():
feature_columns = [
tf.contrib.layers.real_valued_column(column_name = column_name)
for column_name in FEATURE_NUMERICAL_COLUMN_NAMES]
feature_columns.extend([
tf.contrib.layers.real_valued_column(column_name = column_name)
for column_name in FEATURE_INTEGER_COLUMN_NAMES])
return feature_columns
def build_estimator(model_dir, config, params):
print "Using gradient boosted decision trees estimator \n"
learner_config = learner_pb2.LearnerConfig()
learner_config.learning_rate_tuner.fixed.learning_rate = 0.1
learner_config.regularization.l1 = 0.0
learner_config.regularization.l2 = 4.0 / params.batch_size
learner_config.constraints.max_tree_depth = 4
learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
return GradientBoostedDecisionTreeClassifier(
learner_config=learner_config,
examples_per_layer=params.batch_size,
num_trees=100,
center_bias=False,
feature_columns=specify_feature_columns()
# feature_engineering_fn=feature_engineering_fn
)
def get_experiment_fn(args):
config = tf.contrib.learn.RunConfig(save_checkpoints_steps=1000)
def experiment_fn(output_dir):
return tf.contrib.learn.Experiment(
estimator = build_estimator(model_dir = output_dir,
config = config,
params = args),
train_input_fn = read_dataset(args.input_dir,
args.num_epochs, args.batch_size,
mode=tf.contrib.learn.ModeKeys.TRAIN),
eval_input_fn = read_dataset(args.input_dir,
args.num_epochs, args.batch_size,
mode=tf.contrib.learn.ModeKeys.EVAL),
eval_metrics = get_eval_metrics())
return experiment_fn
def run(args):
learn_runner.run(get_experiment_fn(args), args.model_dir)
if __name__ == '__main__':
args = _parse_arguments(sys.argv)
run(args)
完整的错误跟踪:
WARNING:tensorflow:Using temporary folder as model directory: /var/folders/mg/sd4_qlyj4_lbh5ggfn6frvcr00fk8_/T/tmpPFhins
WARNING:tensorflow:From /Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/monitors.py:267: __init__ (from tensorflow.contrib.learn.python.learn.monitors) is deprecated and will be removed after 2016-12-05.
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.
WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.
WARNING:tensorflow:Error encountered when serializing resources.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'_Resource' object has no attribute 'name'
2017-11-16 13:38:39.919664: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
WARNING:tensorflow:Error encountered when serializing resources.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'_Resource' object has no attribute 'name'
2017-11-16 13:38:48.810825: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Dense float feature must be a matrix.
2017-11-16 13:38:48.810825: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Dense float feature must be a matrix.
Traceback (most recent call last):
File "./trainer/task.py", line 162, in <module>
run(args)
File "./trainer/task.py", line 157, in run
learn_runner.run(get_experiment_fn(args), args.model_dir)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 218, in run
return _execute_schedule(experiment, schedule)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule
return task()
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 625, in train_and_evaluate
self.train(delay_secs=0)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 367, in train
hooks=self._train_monitors + extra_hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 812, in _call_train
monitors=hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
return func(*args, **kwargs)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 480, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1040, in _train_model
_, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss])
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 521, in run
run_metadata=run_metadata)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 892, in run
run_metadata=run_metadata)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 967, in run
raise six.reraise(*original_exc_info)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 952, in run
return self._sess.run(*args, **kwargs)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1024, in run
run_metadata=run_metadata)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 827, in run
return self._sess.run(*args, **kwargs)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dense float feature must be a matrix.
[[Node: gbdt_1/GradientTreesPartitionExamples = GradientTreesPartitionExamples[num_dense_float_features=10, num_sparse_float_features=0, num_sparse_int_features=0, use_locking=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ensemble_model, shuffle_batch:16, shuffle_batch:18, shuffle_batch:20, shuffle_batch:21, shuffle_batch:22, shuffle_batch:23, shuffle_batch:24, shuffle_batch:25, shuffle_batch:26, shuffle_batch:27, ^gbdt_1/TreeEnsembleStats)]]
Caused by op u'gbdt_1/GradientTreesPartitionExamples', defined at:
File "./trainer/task.py", line 162, in <module>
run(args)
File "./trainer/task.py", line 157, in run
learn_runner.run(get_experiment_fn(args), args.model_dir)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 218, in run
return _execute_schedule(experiment, schedule)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule
return task()
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 625, in train_and_evaluate
self.train(delay_secs=0)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 367, in train
hooks=self._train_monitors + extra_hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 812, in _call_train
monitors=hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
return func(*args, **kwargs)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 480, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 986, in _train_model
model_fn_ops = self._get_train_ops(features, labels)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1202, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1166, in _call_model_fn
model_fn_results = self._model_fn(features, labels, **kwargs)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/estimator_batch/model.py", line 98, in model_builder
predictions_dict = gbdt_model.predict(mode)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py", line 463, in predict
ensemble_stamp, mode)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py", line 392, in _predict_and_return_dict
use_locking=True)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/ops/gen_prediction_ops.py", line 117, in gradient_trees_partition_examples
use_locking=use_locking, name=name)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Dense float feature must be a matrix.
[[Node: gbdt_1/GradientTreesPartitionExamples = GradientTreesPartitionExamples[num_dense_float_features=10, num_sparse_float_features=0, num_sparse_int_features=0, use_locking=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ensemble_model, shuffle_batch:16, shuffle_batch:18, shuffle_batch:20, shuffle_batch:21, shuffle_batch:22, shuffle_batch:23, shuffle_batch:24, shuffle_batch:25, shuffle_batch:26, shuffle_batch:27, ^gbdt_1/TreeEnsembleStats)]]
答案 0 :(得分:3)
我猜测tf.transform创建的解析规范与我们通常得到的解析规范不同。 你能分享transformed_metadata.schema.as_feature_spec()的输出吗?
作为一种解决方法,尝试在features = tf.train.shuffle_batch(...)之后将此行添加到input_fn:
features = {feature_name: tf.reshape(feature_value, [-1, 1]) for
feature_name, feature_value in features.items()}