我正在使用cloudml-samples中的花卉教程代码,尝试在一组餐厅照片上实现多标签分类。
我有dict.txt并相应地更新了输入,这里是示例行。
good_for_lunch
good_for_dinner
takes_reservations
outdoor_seating
restaurant_is_expensive
has_alcohol
has_table_service
ambience_is_classy
good_for_kids
...
gs://yelp_restaurant_photo_classification/train_photos/312753.jpg,good_for_dinner,takes_reservations,has_alcohol,has_table_service,good_for_kids
gs://yelp_restaurant_photo_classification/train_photos/342651.jpg,good_for_lunch,good_for_dinner,outdoor_seating,good_for_kids
gs://yelp_restaurant_photo_classification/train_photos/217079.jpg,takes_reservations,has_table_service
...
预处理作业开始运行正常,然后我看到这个特定错误不断出现,直到作业失败。
python trainer/preprocess.py \
--input_dict "$DICT_FILE" \
--input_path "gs://yelp_restaurant_photo_classification/labels/eval_set.csv" \
--output_path "${GCS_PATH}/preproc/eval" \
--cloud
(d8285fa55cb6ab07): Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 514, in do_work
work_executor.execute()
File "dataflow_worker/executor.py", line 894, in dataflow_worker.executor.MapTaskExecutor.execute (dataflow_worker/executor.c:24204)
op.start()
File "dataflow_worker/executor.py", line 197, in dataflow_worker.executor.ReadOperation.start (dataflow_worker/executor.c:7039)
def start(self):
File "dataflow_worker/executor.py", line 202, in dataflow_worker.executor.ReadOperation.start (dataflow_worker/executor.c:6946)
with self.spec.source.reader() as reader:
File "dataflow_worker/executor.py", line 212, in dataflow_worker.executor.ReadOperation.start (dataflow_worker/executor.c:6891)
self.output(windowed_value)
File "dataflow_worker/executor.py", line 142, in dataflow_worker.executor.Operation.output (dataflow_worker/executor.c:5249)
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "dataflow_worker/executor.py", line 89, in dataflow_worker.executor.ConsumerSet.receive (dataflow_worker/executor.c:3487)
cython.cast(Operation, consumer).process(windowed_value)
File "dataflow_worker/executor.py", line 500, in dataflow_worker.executor.DoOperation.process (dataflow_worker/executor.c:14239)
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 134, in apache_beam.runners.common.DoFnRunner.receive (apache_beam/runners/common.c:4172)
self.process(windowed_value)
File "apache_beam/runners/common.py", line 168, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:5282)
self.reraise_augmented(exn)
File "apache_beam/runners/common.py", line 181, in apache_beam.runners.common.DoFnRunner.reraise_augmented (apache_beam/runners/common.c:5665)
raise
File "apache_beam/runners/common.py", line 166, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:5218)
self._process_outputs(element, self.dofn_process(self.context))
File "apache_beam/runners/common.py", line 222, in apache_beam.runners.common.DoFnRunner._process_outputs (apache_beam/runners/common.c:6400)
self.main_receivers.receive(windowed_value)
File "dataflow_worker/executor.py", line 89, in dataflow_worker.executor.ConsumerSet.receive (dataflow_worker/executor.c:3487)
cython.cast(Operation, consumer).process(windowed_value)
File "dataflow_worker/executor.py", line 500, in dataflow_worker.executor.DoOperation.process (dataflow_worker/executor.c:14239)
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 134, in apache_beam.runners.common.DoFnRunner.receive (apache_beam/runners/common.c:4172)
self.process(windowed_value)
File "apache_beam/runners/common.py", line 168, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:5282)
self.reraise_augmented(exn)
File "apache_beam/runners/common.py", line 179, in apache_beam.runners.common.DoFnRunner.reraise_augmented (apache_beam/runners/common.c:5646)
raise type(exn), args, sys.exc_info()[2]
File "apache_beam/runners/common.py", line 166, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:5218)
self._process_outputs(element, self.dofn_process(self.context))
File "apache_beam/runners/common.py", line 191, in apache_beam.runners.common.DoFnRunner._process_outputs (apache_beam/runners/common.c:5838)
for result in results:
File "trainer/preprocess.py", line 130, in process
KeyError: u"FALSE [while running 'Extract label ids']"
(f3c7c09c0b6a453c): Workflow failed. Causes: (688819c5d32d79c8): S06:Read input+Parse input+Extract label ids+Read and convert to JPEG+Embed and make TFExample+Save to disk/Write to gs:__yelp_restaurant_photo_classification_yelp_restaurant_photo_classification_preproc_eval/Write/WriteImpl/write_bundles+Save to disk/Write to gs:__yelp_restaurant_photo_classification_yelp_restaurant_photo_classification_preproc_eval/Write/WriteImpl/pair+Save to disk/Write to gs:__yelp_restaurant_photo_classification_yelp_restaurant_photo_classification_preproc_eval/Write/WriteImpl/WindowInto+Save to disk/Write to gs:__yelp_restaurant_photo_classification_yelp_restaurant_photo_classification_preproc_eval/Write/WriteImpl/GroupByKey/Reify+Save to disk/Write to gs:__yelp_restaurant_photo_classification_yelp_restaurant_photo_classification_preproc_eval/Write/WriteImpl/GroupByKey/Write failed.
答案 0 :(得分:1)
您的输入CSV文件中可能有一行标签为“FALSE'”但是“FALSE'不在' dict.txt'。