我目前正在使用自己的数据运行this tutorial,以扩展我对如何在Google云端平台上使用Dataflow和ML Engine的理解。我使用preproc_tft tutorial,因为这类似于我计划用自己的数据做的事情。当我执行this code from the tutorial时,我收到了此错误:
UnimplementedError: Cast string to float is not supported
[[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]
在我的预处理步骤中,csv文件被拆分为许多较小的csv,因此需要将它们组合在一起,以便创建数据集。
我的代码:
# In[1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import shutil
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
#excluded for Stack question
BUCKET = '<my bucket>'
PROJECT = '<my project>'
REGION = '<my region>'
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
get_ipython().run_cell_magic('bash', '', 'if ! gsutil ls | grep -q gs://${BUCKET}/; then\n gsutil mb -l ${REGION} gs://${BUCKET}\nfi')
get_ipython().run_line_magic('bash', '')
#gsutil ls gs://${BUCKET}/logs2/preproc_tft/*-00000*
gsutil ls gs://${BUCKET}/logs2/preproc/*-00000*
CSV_COLUMNS ='end_time,device,device_os,device_os_version,latency,megacycles,cost,Status,device_brand,device_family,browser_version,app,ua_parse,key'.split(',')
LABEL_COLUMN = 'Status'
KEY_COLUMN = 'key'
DEFAULTS = [['null'], ['null'],['null'],['null'], [0.0],[0.0],[0.0], ['null'], ['null'],['null'],['null'],['null'],['null'],['null'],['nokey']]
TRAIN_STEPS = 1000
EVAL_STEPS = None
BATCH_SIZE = 512
NEMBEDS = 3
NNSIZE = [64, 16, 4]
def read_dataset(filename, mode, batch_size=512):
def _input_fn():
def decode_csv(value_column):
columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
label = features.pop(LABEL_COLUMN)
return features, label
# Create list of files that match pattern
file_list = tf.gfile.Glob(filename)
# Create dataset from file list
filenames = tf.data.Dataset.from_tensor_slices(tf.constant(file_list, dtype=tf.string))
dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
dataset = dataset.map(decode_csv)
#dataset = (tf.data.TextLineDataset(file_list) # Read text file
# .map(decode_csv)) # Transform each elem by applying decode_csv fn
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size=10 * batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
return _input_fn
# Define feature columns
def get_wide_deep():
# Define column types
feature_columns = []
end_time = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('end_time', 1000), 10)
feature_columns.append(end_time)
device = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device', 1000), 10)
feature_columns.append(device)
device_os = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os', 1000), 10)
feature_columns.append(device_os)
device_os_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os_version', 1000), 10)
feature_columns.append(device_os_version)
latency = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('latency'),
boundaries=[.000000, .000010, .000100, .001000, .010000, .100000])
feature_columns.append(latency)
megacycles = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('megacycles'),
boundaries=[0, 50, 100, 200, 300])
feature_columns.append(megacycles)
cost = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('cost'),
boundaries=[0.000001e-08, 1.000000e-08, 5.000000e-08, 10.000000e-08, 15.000000e-08 ])
feature_columns.append(cost)
device_brand = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_brand', 1000), 10)
feature_columns.append(device_brand)
device_family = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_family', 1000), 10)
feature_columns.append(device_family)
browser_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('browser_version', 1000), 10)
feature_columns.append(browser_version)
app = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('app', 1000), 10)
feature_columns.append(app)
ua_parse = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('ua_parse', 1000), 10)
feature_columns.append(ua_parse)
# Sparse columns are wide, have a linear relationship with the output
wide = [end_time,
device,
device_os,
device_os_version,
latency,
megacycles,
cost,
device_brand,
device_family,
browser_version,
app,
ua_parse]
# Feature cross all the wide columns and embed into a lower dimension
#crossed = tf.feature_column.crossed_column(wide, hash_bucket_size=20000)
#embed = tf.feature_column.embedding_column(crossed, 3)
# Continuous columns are deep, have a complex relationship with the output
deep = [latency,
megacycles,
cost]
#embed]
return wide, deep
# Create serving input function to be able to serve predictions later using provided inputs
def serving_input_fn():
feature_placeholders = {
'end_time': tf.placeholder(tf.string, [None]),
'device': tf.placeholder(tf.string, [None]),
'device_os': tf.placeholder(tf.string, [None]),
'device_os_version': tf.placeholder(tf.string, [None]),
'latency': tf.placeholder(tf.float32, [None]),
'megacycles': tf.placeholder(tf.float32, [None]),
'cost': tf.placeholder(tf.float32, [None]),
'device_brand': tf.placeholder(tf.string, [None]),
'device_family': tf.placeholder(tf.string, [None]),
'browser_version': tf.placeholder(tf.string, [None]),
'app': tf.placeholder(tf.string, [None]),
'ua_parse': tf.placeholder(tf.string, [None]),
}
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
# create metric for hyperparameter tuning
def my_rmse(labels, predictions):
pred_values = predictions['predictions']
return {'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)}
# forward to key-column to export
def forward_key_to_export(estimator):
estimator = tf.contrib.estimator.forward_features(estimator, KEY_COLUMN)
# return estimator
## This shouldn't be necessary (I've filed CL/187793590 to update extenders.py with this code)
config = estimator.config
def model_fn2(features, labels, mode):
estimatorSpec = estimator._call_model_fn(features, labels, mode, config=config)
if estimatorSpec.export_outputs:
for ekey in ['predict', 'serving_default']:
if (ekey in estimatorSpec.export_outputs and
isinstance(estimatorSpec.export_outputs[ekey],
tf.estimator.export.PredictOutput)):
estimatorSpec.export_outputs[ekey] = tf.estimator.export.PredictOutput(estimatorSpec.predictions)
return estimatorSpec
return tf.estimator.Estimator(model_fn=model_fn2, config=config)
##
# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
wide, deep = get_wide_deep()
estimator = tf.estimator.DNNLinearCombinedRegressor(
model_dir = output_dir,
linear_feature_columns = wide,
dnn_feature_columns = deep,
dnn_hidden_units = [64, 32])
train_spec = tf.estimator.TrainSpec(
input_fn = read_dataset('gs://nosh_ml_models/logs2/preproc/train.*', mode = tf.estimator.ModeKeys.TRAIN),
max_steps = TRAIN_STEPS)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
eval_spec = tf.estimator.EvalSpec(
input_fn = read_dataset('gs://nosh_ml_models/logs2/preproc/eval.*', mode = tf.estimator.ModeKeys.EVAL),
steps = None,
start_delay_secs = 60, # start evaluating after N seconds
throttle_secs = 300, # evaluate every N seconds
exporters = exporter)
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Run the model
shutil.rmtree('logs_trained', ignore_errors = True) # start fresh each time
train_and_evaluate('logs_trained')
有没有办法跳过tensorflow中的标题行,或者如何修改preproc_tft以不处理标题行但仍然能够在tensor中定义csv列?
编辑: 在mrry的帮助下,我将_input_fn更新为:
def read_dataset(filename, mode, batch_size=512):
def _input_fn():
def decode_csv(value_column):
columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
label = features.pop(LABEL_COLUMN)
return features, label
# Create list of files that match pattern
file_list = tf.gfile.Glob(filename)
# Create dataset from file list
filenames = tf.data.Dataset.from_tensor_slices(file_list)
dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
dataset = dataset.map(decode_csv)
#dataset = (tf.data.TextLineDataset(file_list) # Read text file
#.map(decode_csv)) # Transform each elem by applying decode_csv fn
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size=10 * batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
return _input_fn
现在我收到此错误:
<ipython-input-8-17576dd9a3da> in <lambda>(fn)
12 # Create dataset from file list
13 filenames = tf.data.Dataset.from_tensor_slices(file_list)
---> 14 dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
15 dataset = dataset.map(decode_csv)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/data/ops/readers.pyc in __init__(self, filenames, compression_type, buffer_size)
46 super(TextLineDataset, self).__init__()
47 self._filenames = ops.convert_to_tensor(
---> 48 filenames, dtype=dtypes.string, name="filenames")
49 self._compression_type = convert.optional_param_to_tensor(
50 "compression_type",
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in convert_to_tensor(value, dtype, name, preferred_dtype)
930 name=name,
931 preferred_dtype=preferred_dtype,
--> 932 as_ref=False)
933
934
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
1020
1021 if ret is None:
-> 1022 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1023
1024 if ret is NotImplemented:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in _TensorTensorConversionFunction(t, dtype, name, as_ref)
864 raise ValueError(
865 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 866 (dtype.name, t.dtype.name, str(t)))
867 return t
868
ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: 'Tensor("arg0:0", shape=(), dtype=float32)'
我们调整了read_dataset
函数以强制数组为字符串:
def read_dataset(filename, mode, batch_size=512):
def _input_fn():
def decode_csv(value_column):
columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
label = features.pop(LABEL_COLUMN)
return features, label
# Create list of files that match pattern
file_list = tf.gfile.Glob(filename)
# Create dataset from file list
filenames = tf.data.Dataset.from_tensor_slices(tf.constant(file_list, dtype=tf.string))
dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
dataset = dataset.map(decode_csv)
#dataset = (tf.data.TextLineDataset(file_list) # Read text file
# .map(decode_csv)) # Transform each elem by applying decode_csv fn
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size=10 * batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
return _input_fn
现在我收到了这个错误:
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5127451fd0>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'logs_trained', '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 300 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Create CheckpointSaverHook.
UnimplementedErrorTraceback (most recent call last)
<ipython-input-13-9982390b7e4a> in <module>()
1 # Run the model
2 shutil.rmtree('logs_trained', ignore_errors = True) # start fresh each time
----> 3 train_and_evaluate('logs_trained')
<ipython-input-12-b456e07a6c7d> in train_and_evaluate(output_dir)
17 throttle_secs = 300, # evaluate every N seconds
18 exporters = exporter)
---> 19 tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.pyc in train_and_evaluate(estimator, train_spec, eval_spec)
430 config.task_type != run_config_lib.TaskType.EVALUATOR):
431 logging.info('Running training and evaluation locally (non-distributed).')
--> 432 executor.run_local()
433 return
434
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.pyc in run_local(self)
609 input_fn=self._train_spec.input_fn,
610 max_steps=self._train_spec.max_steps,
--> 611 hooks=train_hooks)
612
613 # Final export signal: For any eval result with global_step >= train
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
312
313 saving_listeners = _check_listeners_type(saving_listeners)
--> 314 loss = self._train_model(input_fn, hooks, saving_listeners)
315 logging.info('Loss for final step: %s.', loss)
316 return self
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
813 loss = None
814 while not mon_sess.should_stop():
--> 815 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
816 return loss
817
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
537 feed_dict=feed_dict,
538 options=options,
--> 539 run_metadata=run_metadata)
540
541 def run_step_fn(self, step_fn):
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
1011 feed_dict=feed_dict,
1012 options=options,
-> 1013 run_metadata=run_metadata)
1014 except _PREEMPTION_ERRORS as e:
1015 logging.info('An error was raised. This may be due to a preemption in '
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
1102 raise six.reraise(*original_exc_info)
1103 else:
-> 1104 raise six.reraise(*original_exc_info)
1105
1106
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
1087 def run(self, *args, **kwargs):
1088 try:
-> 1089 return self._sess.run(*args, **kwargs)
1090 except _PREEMPTION_ERRORS:
1091 raise
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
1159 feed_dict=feed_dict,
1160 options=options,
-> 1161 run_metadata=run_metadata)
1162
1163 for hook in self._hooks:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
939
940 def run(self, *args, **kwargs):
--> 941 return self._sess.run(*args, **kwargs)
942
943 def run_step_fn(self, step_fn, raw_session, run_with_hooks):
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
893 try:
894 result = self._run(None, fetches, feed_dict, options_ptr,
--> 895 run_metadata_ptr)
896 if run_metadata:
897 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1126 if final_fetches or final_targets or (handle and feed_dict_tensor):
1127 results = self._do_run(handle, final_targets, final_fetches,
-> 1128 feed_dict_tensor, options, run_metadata)
1129 else:
1130 results = []
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1342 if handle is None:
1343 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1344 options, run_metadata)
1345 else:
1346 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1361 except KeyError:
1362 pass
-> 1363 raise type(e)(node_def, op, message)
1364
1365 def _extend_graph(self):
UnimplementedError: Cast string to float is not supported
[[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]
Caused by op u'head/ToFloat', defined at:
File "/usr/local/envs/py2env/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/local/envs/py2env/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
app.launch_new_instance()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-13-9982390b7e4a>", line 3, in <module>
train_and_evaluate('logs_trained')
File "<ipython-input-12-b456e07a6c7d>", line 19, in train_and_evaluate
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.py", line 432, in train_and_evaluate
executor.run_local()
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.py", line 611, in run_local
hooks=train_hooks)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 314, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 743, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 725, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn_linear_combined.py", line 528, in _model_fn
config=config)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn_linear_combined.py", line 216, in _dnn_linear_combined_model_fn
logits=logits)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/head.py", line 1078, in create_estimator_spec
features=features, mode=mode, logits=logits, labels=labels)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/head.py", line 1026, in create_loss
labels = math_ops.to_float(labels)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 807, in to_float
return cast(x, dtypes.float32, name=name)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 758, in cast
return gen_math_ops.cast(x, base_type, name=name)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 919, in cast
"Cast", x=x, DstT=DstT, name=name)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
UnimplementedError (see above for traceback): Cast string to float is not supported
[[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]
答案 0 :(得分:4)
您可以使用Dataset.skip(1)
跳过数据集的元素。
但是,这会导致tf.data.TextLineDataset(file_list)
出现轻微问题,因为它只会跳过第一个文件的第一行。幸运的是,您可以使用Dataset.flat_map()
循环遍历文件名并跳过每个文件的第一行,如下所示:
# Start by making a dataset of filenames.
filenames = tf.data.Dataset.from_tensor_slices(
tf.constant(file_list, dtype=tf.string))
# For each filename, create a TextLineDataset and skip the first line.
# The resulting dataset contains all the non-header lines of all files in
# `file_list`.
dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
# Then continue to preprocess the data as needed.
dataset = dataset.map(decode_csv)
顺便提一下,TensorFlow 1.8(目前是候选版本)引入了一个用于读取CSV数据的实用程序,称为tf.contrib.data.make_csv_dataset()
,这对于简化与CSV相关的代码非常有用。