Tensorflow Error`UnimplempleError:不支持将字符串转换为浮点数

时间:2018-04-16 00:34:08

标签: tensorflow google-cloud-dataflow google-cloud-ml tensorflow-datasets

我目前正在使用自己的数据运行this tutorial,以扩展我对如何在Google云端平台上使用Dataflow和ML Engine的理解。我使用preproc_tft tutorial,因为这类似于我计划用自己的数据做的事情。当我执行this code from the tutorial时,我收到了此错误:

UnimplementedError: Cast string to float is not supported [[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]

在我的预处理步骤中,csv文件被拆分为许多较小的csv,因此需要将它们组合在一起,以便创建数据集。

我的代码:

# In[1]:


from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import shutil
import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

#excluded for Stack question
BUCKET = '<my bucket>'
PROJECT = '<my project>'
REGION = '<my region>'


import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION


get_ipython().run_cell_magic('bash', '', 'if ! gsutil ls | grep -q gs://${BUCKET}/; then\n  gsutil mb -l ${REGION} gs://${BUCKET}\nfi')


get_ipython().run_line_magic('bash', '')
#gsutil ls gs://${BUCKET}/logs2/preproc_tft/*-00000*
gsutil ls gs://${BUCKET}/logs2/preproc/*-00000*


CSV_COLUMNS ='end_time,device,device_os,device_os_version,latency,megacycles,cost,Status,device_brand,device_family,browser_version,app,ua_parse,key'.split(',')
LABEL_COLUMN = 'Status'
KEY_COLUMN = 'key'

DEFAULTS = [['null'], ['null'],['null'],['null'], [0.0],[0.0],[0.0], ['null'], ['null'],['null'],['null'],['null'],['null'],['null'],['nokey']]

TRAIN_STEPS = 1000
EVAL_STEPS = None
BATCH_SIZE = 512
NEMBEDS = 3
NNSIZE = [64, 16, 4]

def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return features, label

        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename)

        # Create dataset from file list
        filenames = tf.data.Dataset.from_tensor_slices(tf.constant(file_list, dtype=tf.string))
        dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
        dataset = dataset.map(decode_csv)
        #dataset = (tf.data.TextLineDataset(file_list)  # Read text file
        #           .map(decode_csv))  # Transform each elem by applying decode_csv fn

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn


# Define feature columns
def get_wide_deep():
  # Define column types
  feature_columns = []
  end_time = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('end_time', 1000), 10)
  feature_columns.append(end_time)
  device = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device', 1000), 10)
  feature_columns.append(device)
  device_os = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os', 1000), 10)
  feature_columns.append(device_os)
  device_os_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os_version', 1000), 10)
  feature_columns.append(device_os_version)
  latency = tf.feature_column.bucketized_column(
      tf.feature_column.numeric_column('latency'), 
      boundaries=[.000000, .000010, .000100, .001000, .010000, .100000])
  feature_columns.append(latency)
  megacycles = tf.feature_column.bucketized_column(
      tf.feature_column.numeric_column('megacycles'), 
      boundaries=[0, 50, 100, 200, 300])
  feature_columns.append(megacycles)
  cost = tf.feature_column.bucketized_column(
      tf.feature_column.numeric_column('cost'), 
      boundaries=[0.000001e-08, 1.000000e-08, 5.000000e-08, 10.000000e-08, 15.000000e-08 ])
  feature_columns.append(cost)
  device_brand = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_brand', 1000), 10)
  feature_columns.append(device_brand)
  device_family = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_family', 1000), 10)
  feature_columns.append(device_family)
  browser_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('browser_version', 1000), 10)
  feature_columns.append(browser_version)
  app = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('app', 1000), 10)
  feature_columns.append(app)
  ua_parse = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('ua_parse', 1000), 10)
  feature_columns.append(ua_parse)

  # Sparse columns are wide, have a linear relationship with the output
  wide = [end_time,
          device,
          device_os,
          device_os_version,
          latency,
          megacycles,
          cost,
          device_brand,
          device_family,
          browser_version,
          app,
          ua_parse]

  # Feature cross all the wide columns and embed into a lower dimension
  #crossed = tf.feature_column.crossed_column(wide, hash_bucket_size=20000)
  #embed = tf.feature_column.embedding_column(crossed, 3)

  # Continuous columns are deep, have a complex relationship with the output
  deep = [latency,
          megacycles,
          cost]
          #embed]
  return wide, deep


# Create serving input function to be able to serve predictions later using provided inputs
def serving_input_fn():
    feature_placeholders = {
        'end_time': tf.placeholder(tf.string, [None]),
        'device': tf.placeholder(tf.string, [None]),
        'device_os': tf.placeholder(tf.string, [None]),
        'device_os_version': tf.placeholder(tf.string, [None]),
        'latency': tf.placeholder(tf.float32, [None]),
        'megacycles': tf.placeholder(tf.float32, [None]),
        'cost': tf.placeholder(tf.float32, [None]),
        'device_brand': tf.placeholder(tf.string, [None]),
        'device_family': tf.placeholder(tf.string, [None]),
        'browser_version': tf.placeholder(tf.string, [None]),
        'app': tf.placeholder(tf.string, [None]),
        'ua_parse': tf.placeholder(tf.string, [None]),

    }
    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)


# create metric for hyperparameter tuning
def my_rmse(labels, predictions):
    pred_values = predictions['predictions']
    return {'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)}


# forward to key-column to export
def forward_key_to_export(estimator):
    estimator = tf.contrib.estimator.forward_features(estimator, KEY_COLUMN)
    # return estimator

    ## This shouldn't be necessary (I've filed CL/187793590 to update extenders.py with this code)
    config = estimator.config
    def model_fn2(features, labels, mode):
      estimatorSpec = estimator._call_model_fn(features, labels, mode, config=config)
      if estimatorSpec.export_outputs:
        for ekey in ['predict', 'serving_default']:
          if (ekey in estimatorSpec.export_outputs and
              isinstance(estimatorSpec.export_outputs[ekey],
                         tf.estimator.export.PredictOutput)):
               estimatorSpec.export_outputs[ekey] =                  tf.estimator.export.PredictOutput(estimatorSpec.predictions)
      return estimatorSpec
    return tf.estimator.Estimator(model_fn=model_fn2, config=config)
    ##


# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
  wide, deep = get_wide_deep()
  estimator = tf.estimator.DNNLinearCombinedRegressor(
                       model_dir = output_dir,
                       linear_feature_columns = wide,
                       dnn_feature_columns = deep,
                       dnn_hidden_units = [64, 32])
  train_spec = tf.estimator.TrainSpec(
                       input_fn = read_dataset('gs://nosh_ml_models/logs2/preproc/train.*', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = TRAIN_STEPS)
  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec = tf.estimator.EvalSpec(
                       input_fn = read_dataset('gs://nosh_ml_models/logs2/preproc/eval.*', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,
                       start_delay_secs = 60, # start evaluating after N seconds
                       throttle_secs = 300,  # evaluate every N seconds
                       exporters = exporter)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


# Run the model
shutil.rmtree('logs_trained', ignore_errors = True) # start fresh each time
train_and_evaluate('logs_trained')

有没有办法跳过tensorflow中的标题行,或者如何修改preproc_tft以不处理标题行但仍然能够在tensor中定义csv列?

编辑: 在mrry的帮助下,我将_input_fn更新为:

def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return features, label

        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename)

        # Create dataset from file list
        filenames = tf.data.Dataset.from_tensor_slices(file_list)
        dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
        dataset = dataset.map(decode_csv)
        #dataset = (tf.data.TextLineDataset(file_list)  # Read text file
                   #.map(decode_csv))  # Transform each elem by applying decode_csv fn

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn

现在我收到此错误:

    <ipython-input-8-17576dd9a3da> in <lambda>(fn)
     12         # Create dataset from file list
     13         filenames = tf.data.Dataset.from_tensor_slices(file_list)
---> 14         dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
     15         dataset = dataset.map(decode_csv)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/data/ops/readers.pyc in __init__(self, filenames, compression_type, buffer_size)
         46     super(TextLineDataset, self).__init__()
         47     self._filenames = ops.convert_to_tensor(
    ---> 48         filenames, dtype=dtypes.string, name="filenames")
         49     self._compression_type = convert.optional_param_to_tensor(
         50         "compression_type",

    /usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in convert_to_tensor(value, dtype, name, preferred_dtype)
        930       name=name,
        931       preferred_dtype=preferred_dtype,
    --> 932       as_ref=False)
        933 
        934 

    /usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
       1020 
       1021     if ret is None:
    -> 1022       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
       1023 
       1024     if ret is NotImplemented:

    /usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in _TensorTensorConversionFunction(t, dtype, name, as_ref)
        864     raise ValueError(
        865         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
    --> 866         (dtype.name, t.dtype.name, str(t)))
        867   return t
        868 

    ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: 'Tensor("arg0:0", shape=(), dtype=float32)'

我们调整了read_dataset函数以强制数组为字符串:

def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return features, label

        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename)

        # Create dataset from file list
        filenames = tf.data.Dataset.from_tensor_slices(tf.constant(file_list, dtype=tf.string))
        dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))
        dataset = dataset.map(decode_csv)
        #dataset = (tf.data.TextLineDataset(file_list)  # Read text file
        #           .map(decode_csv))  # Transform each elem by applying decode_csv fn

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn

现在我收到了这个错误:

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5127451fd0>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'logs_trained', '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 300 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Create CheckpointSaverHook.

UnimplementedErrorTraceback (most recent call last)
<ipython-input-13-9982390b7e4a> in <module>()
      1 # Run the model
      2 shutil.rmtree('logs_trained', ignore_errors = True) # start fresh each time
----> 3 train_and_evaluate('logs_trained')

<ipython-input-12-b456e07a6c7d> in train_and_evaluate(output_dir)
     17                        throttle_secs = 300,  # evaluate every N seconds
     18                        exporters = exporter)
---> 19   tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.pyc in train_and_evaluate(estimator, train_spec, eval_spec)
    430       config.task_type != run_config_lib.TaskType.EVALUATOR):
    431     logging.info('Running training and evaluation locally (non-distributed).')
--> 432     executor.run_local()
    433     return
    434 

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.pyc in run_local(self)
    609           input_fn=self._train_spec.input_fn,
    610           max_steps=self._train_spec.max_steps,
--> 611           hooks=train_hooks)
    612 
    613       # Final export signal: For any eval result with global_step >= train

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    312 
    313     saving_listeners = _check_listeners_type(saving_listeners)
--> 314     loss = self._train_model(input_fn, hooks, saving_listeners)
    315     logging.info('Loss for final step: %s.', loss)
    316     return self

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
    813         loss = None
    814         while not mon_sess.should_stop():
--> 815           _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
    816       return loss
    817 

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    537                           feed_dict=feed_dict,
    538                           options=options,
--> 539                           run_metadata=run_metadata)
    540 
    541   def run_step_fn(self, step_fn):

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
   1011                               feed_dict=feed_dict,
   1012                               options=options,
-> 1013                               run_metadata=run_metadata)
   1014       except _PREEMPTION_ERRORS as e:
   1015         logging.info('An error was raised. This may be due to a preemption in '

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
   1102         raise six.reraise(*original_exc_info)
   1103       else:
-> 1104         raise six.reraise(*original_exc_info)
   1105 
   1106 

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
   1087   def run(self, *args, **kwargs):
   1088     try:
-> 1089       return self._sess.run(*args, **kwargs)
   1090     except _PREEMPTION_ERRORS:
   1091       raise

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, fetches, feed_dict, options, run_metadata)
   1159                                   feed_dict=feed_dict,
   1160                                   options=options,
-> 1161                                   run_metadata=run_metadata)
   1162 
   1163     for hook in self._hooks:

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in run(self, *args, **kwargs)
    939 
    940   def run(self, *args, **kwargs):
--> 941     return self._sess.run(*args, **kwargs)
    942 
    943   def run_step_fn(self, step_fn, raw_session, run_with_hooks):

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1126     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1127       results = self._do_run(handle, final_targets, final_fetches,
-> 1128                              feed_dict_tensor, options, run_metadata)
   1129     else:
   1130       results = []

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1342     if handle is None:
   1343       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1344                            options, run_metadata)
   1345     else:
   1346       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1361         except KeyError:
   1362           pass
-> 1363       raise type(e)(node_def, op, message)
   1364 
   1365   def _extend_graph(self):

UnimplementedError: Cast string to float is not supported
     [[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]

Caused by op u'head/ToFloat', defined at:
  File "/usr/local/envs/py2env/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/local/envs/py2env/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-9982390b7e4a>", line 3, in <module>
    train_and_evaluate('logs_trained')
  File "<ipython-input-12-b456e07a6c7d>", line 19, in train_and_evaluate
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.py", line 432, in train_and_evaluate
    executor.run_local()
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/training.py", line 611, in run_local
    hooks=train_hooks)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 314, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 743, in _train_model
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 725, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn_linear_combined.py", line 528, in _model_fn
    config=config)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn_linear_combined.py", line 216, in _dnn_linear_combined_model_fn
    logits=logits)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/head.py", line 1078, in create_estimator_spec
    features=features, mode=mode, logits=logits, labels=labels)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/head.py", line 1026, in create_loss
    labels = math_ops.to_float(labels)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 807, in to_float
    return cast(x, dtypes.float32, name=name)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 758, in cast
    return gen_math_ops.cast(x, base_type, name=name)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 919, in cast
    "Cast", x=x, DstT=DstT, name=name)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

UnimplementedError (see above for traceback): Cast string to float is not supported
     [[Node: head/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](head/labels)]]

如何阅读具有下图中表格的csv? enter image description here

1 个答案:

答案 0 :(得分:4)

您可以使用Dataset.skip(1)跳过数据集的元素。 但是,这会导致tf.data.TextLineDataset(file_list)出现轻微问题,因为它只会跳过第一个文件的第一行。幸运的是,您可以使用Dataset.flat_map()循环遍历文件名并跳过每个文件的第一行,如下所示:

# Start by making a dataset of filenames.
filenames = tf.data.Dataset.from_tensor_slices(
    tf.constant(file_list, dtype=tf.string))

# For each filename, create a TextLineDataset and skip the first line.
# The resulting dataset contains all the non-header lines of all files in
# `file_list`.
dataset = filenames.flat_map(lambda fn: tf.data.TextLineDataset(fn).skip(1))

# Then continue to preprocess the data as needed.
dataset = dataset.map(decode_csv)

顺便提一下,TensorFlow 1.8(目前是候选版本)引入了一个用于读取CSV数据的实用程序,称为tf.contrib.data.make_csv_dataset(),这对于简化与CSV相关的代码非常有用。