cifar10示例tensorflow的分段错误(核心转储)错误

时间:2016-05-31 02:05:31

标签: python image gpu tensorflow deep-learning

我正在尝试调整示例cifar10代码,当运行adjust cifar10_eval.py时,我不确定为什么会出现Segmentation fault(core dumped)错误。看起来这段代码实际上可以在Mac上运行,我不知道为什么它对linux不起作用。

感谢您的帮助。

----------------------- Code Code ---------------------- --------

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.c
# ==============================================================================

"""Evaluation for CIFAR-10
Accuracy:
cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs
of data) as judged by cifar10_eval.py.
Speed:
On a single Tesla K40, cifar10_train.py processes a single batch of 128 imagecs
in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86%
accuracy after 100K steps in 8 hours of training time.
Usage:
Please see the tutorial and website for how to download the CIFAR-10
data set, compile the program and train the model.
http://tensorflow.org/tutorials/deep_cnn/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
import math
import time

import numpy as np
import tensorflow as tf
import os
import StringIO
import cv
import cv2
import urllib


from PIL import Image

import matplotlib

import glob

import cifar10

cur_dir = os.getcwd()

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval',
                          """Directory where to write event logs.""")
tf.app.flags.DEFINE_string('eval_data', 'test',
                           """Either 'test' or 'train_eval'.""")
tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train',
                          """Directory where to read model checkpoints.""")
tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
                           """How often to run the eval.""")
tf.app.flags.DEFINE_integer('num_examples', 128,
                           """Number of examples to run.""")
tf.app.flags.DEFINE_boolean('run_once', False,
                        """Whether to run eval only once.""")


def eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits):
 """Run Eval once.
 Args:
   saver: Saver.
   summary_writer: Summary writer.
   top_k_op: Top K op.
   summary_op: Summary op.
 """
 with tf.Session() as sess:
   ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
   if ckpt and ckpt.model_checkpoint_path:
     # Restores from checkpoint
     saver.restore(sess, ckpt.model_checkpoint_path)
     # Assuming model_checkpoint_path looks something like:
     #   /my-favorite-path/cifar10_train/model.ckpt-0,
     # extract global_step from it.
     global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
   else:
     print('No checkpoint file found')
     return

   # Start the queue runners.
   coord = tf.train.Coordinator()
   try:
     threads = []
     for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
       threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                        start=True))

     num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
     true_count = 0  # Counts the number of correct predictions.
     total_sample_count = num_iter * FLAGS.batch_size
     step = 0





     while step < num_iter and not coord.should_stop():
       predictions = sess.run([top_k_op])
       true_count += np.sum(predictions)
       step += 1
     # Compute precision @ 1.
       precision = true_count / total_sample_count
       print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
       e = tf.nn.softmax(logits)
       log = sess.run(e)
       #print(log)
       predict = np.zeros([FLAGS.batch_size])
       max_logi = np.zeros([FLAGS.batch_size])

       for i in xrange(FLAGS.batch_size):
         predict[i] = np.argmax(log[i, :])
         max_logi[i] = log[i, :].max()
       lab = sess.run(labels)
       top = sess.run([top_k_op])
       predictions = sess.run([top_k_op])
       true_count = 0
       true_count += np.sum(predictions)
       # chk = sess.run(images)
       #print(top)c
       for i in xrange(FLAGS.batch_size):
         #    tf.cast(images, tf.uint8)
         img = sess.run(images)
         save_img = img[i, :]

         save_img = ((save_img - save_img.min()) / (save_img.max() - save_img.min()) * 255)

         #      save_img2 = Image.fromarray(save_img, "RGB")

         path = cur_dir + "/result/"

         if not os.path.exists(path):
           os.mkdir(path, 0755)
         if predictions[0][i]==True:
           path = path + "Correct/"
         else:
           path = path + "Incorect/"

         if not os.path.exists(path):
           os.mkdir(path, 0755)
         class_fold = path + str(predict[i]) + "/"
         # class_fold = path + str(max_logi[i]) + "/
         if not os.path.exists(path + str(predict[i]) + "/"):
           os.mkdir(class_fold, 0755)

         cv2.imwrite(os.path.join(class_fold, str(i) + ".jpeg"), save_img)



     summary = tf.Summary()
     summary.ParseFromString(sess.run(summary_op))
     summary.value.add(tag='Precision @ 1', simple_value=precision)
     summary_writer.add_summary(summary, global_step)
   except Exception as e:  # pylint: disable=broad-except
     coord.request_stop(e)

   coord.request_stop()
   coord.join(threads, stop_grace_period_secs=10)


def evaluate():
 """Eval CIFAR-10 for a number of steps."""
 with tf.Graph().as_default() as g:
   # Get images and labels for CIFAR-10.
   eval_data = FLAGS.eval_data == 'test'
   images, labels = cifar10.inputs(eval_data=eval_data)

   # Build a Graph that computes the logits predictions from the
   # inference model.
   logits = cifar10.inference(images)
   true_count = 0
   # Calculate predictions.
   top_k_op = tf.nn.in_top_k(logits, labels, 1)




   # Restore the moving average version of the learned variables for eval.
   variable_averages = tf.train.ExponentialMovingAverage(
       cifar10.MOVING_AVERAGE_DECAY)
   variables_to_restore = variable_averages.variables_to_restore()
   saver = tf.train.Saver(variables_to_restore)

   # Build the summary operation based on the TF collection of Summaries.
   summary_op = tf.merge_all_summaries()

   summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

   #while True:
   eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits)
   #  if False:
   #    break
   #  time.sleep(FLAGS.eval_interval_secs)


def main(argv=None):  # pylint: disable=unused-argument
 cifar10.maybe_download_and_extract()
 if tf.gfile.Exists(FLAGS.eval_dir):
   tf.gfile.DeleteRecursively(FLAGS.eval_dir)
 tf.gfile.MakeDirs(FLAGS.eval_dir)
 evaluate()


if __name__ == '__main__':
 tf.app.run()

1 个答案:

答案 0 :(得分:3)

这看起来像recurring issue,其中TensorFlow Python模块与OpenCV和/或PIL库中的代码冲突。根本原因通常是这些库中包含的libjpeglibpng版本不兼容。

此问题应该是最新的TensorFlow夜间版本中的fixed。作为替代解决方法,您可以尝试移动该行:

import tensorflow as tf

...低于cvcv2PIL的导入语句。