Question

我正在使用tensorflow对象检测api来做一些半实时对象检测任务。图像将由相机以2幅图像/秒的速度拍摄。每张图像都会被裁剪成4张小图像，因此我需要处理8张图像/秒。

我的检测模型已导出到冻结图形（.pb文件）中并加载到GPU内存中。然后我将图像加载到numpy数组中以将它们提供给我的模型。

检测本身仅需约0.1秒/图像，但加载每张图像大约需要0.45秒。

我正在使用的脚本是从对象检测api（link）提供的代码示例中修改的，它读取每个图像并将它们转换为numpy数组，然后输入到检测模型中。完成此过程的一部分的最多时间是load_image_into_numpy_array，这需要将近0.45秒。

脚本如下：

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import timeit
import scipy.misc


from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image


from utils import label_map_util

from utils import visualization_utils as vis_util

# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = 'animal_detection.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')

NUM_CLASSES = 1


detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def,name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
                                                            max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)

def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)

# For the sake of simplicity we will use only 2 images:
    # image1.jpg
    # image2.jpg
    # If you want to test the code with your images, just add path to the
    # images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test'
TEST_IMAGE_PATHS = [
    os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 10) ]

    # Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
  with tf.Session(graph=detection_graph, config=config) as sess:
    for image_path in TEST_IMAGE_PATHS:
      start = timeit.default_timer()
      image = Image.open(image_path)
      # the array based representation of the image will be used later in order to prepare the
      # result image with boxes and labels on it.
      image_np = load_image_into_numpy_array(image)
      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
      image_np_expanded = np.expand_dims(image_np, axis=0)
      image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
      end = timeit.default_timer()
      print(end-start)
      start = timeit.default_timer()
      # Each box represents a part of the image where a particular object was detected.
      boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
      # Each score represent how level of confidence for each of the objects.
      # Score is shown on the result image, together with the class label.
      scores = detection_graph.get_tensor_by_name('detection_scores:0')
      classes = detection_graph.get_tensor_by_name('detection_classes:0')
      num_detections = detection_graph.get_tensor_by_name('num_detections:0')
      # Actual detection.
      (boxes, scores, classes, num_detections) = sess.run(
          [boxes, scores, classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
      stop = timeit.default_timer()
      print (stop - start)
      # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
         image_np,
         np.squeeze(boxes),
         np.squeeze(classes).astype(np.int32),
         np.squeeze(scores),
         category_index,
         use_normalized_coordinates=True,
         line_thickness=2)

我正在考虑一种更有效的方式来加载由相机生成的图像，首先想到的是避免numpy数组并尝试使用tensorflow本地方式来加载图像，但我不知道从哪里开始，因为我我对tensorflow很新。

如果我能找到一些加载图像的张量流方法，也许我可以将4张图像分成1批并将它们输入我的模型中，这样我的速度就可以得到一些改善。

一个不成熟的想法是尝试将从1个原始图像裁剪的4个小图像保存到tf_record文件中，并将tf_record文件作为一个批处理加载模型，但我不知道如何实现。

任何帮助将不胜感激。

Answer 1

我找到了一种可以将图像加载从0.4秒减少到0.01秒的解决方案。如果有人也遇到同样的问题，我会在这里发帖回答。我们可以在opencv中使用imread，而不是使用PIL.Image和numpy。我还设法批量处理图像，以便我们可以实现更好的加速。

该脚本如下：

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tensorflow as tf
import timeit
import cv2


from collections import defaultdict

from utils import label_map_util

from utils import visualization_utils as vis_util

MODEL_PATH = sys.argv[1]
IMAGE_PATH = sys.argv[2]
BATCH_SIZE = int(sys.argv[3])
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = os.path.join(MODEL_PATH, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')

NUM_CLASSES = 1

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def,name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
                                                            max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)

PATH_TO_TEST_IMAGES_DIR = IMAGE_PATH
TEST_IMAGE_PATHS = [
    os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 129) ]

config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
  with tf.Session(graph=detection_graph, config=config) as sess:
    for i in range(0, len(TEST_IMAGE_PATHS), BATCH_SIZE):
        images = []
        start = timeit.default_timer()
        for j in range(0, BATCH_SIZE):
            image = cv2.imread(TEST_IMAGE_PATHS[i+j])
            image = np.expand_dims(image, axis=0)
            images.append(image)
            image_np_expanded = np.concatenate(images, axis=0)
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        # Actual detection.
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})
        stop = timeit.default_timer()
        print (stop - start)

更有效的加载图像以进行检测的方法

1 个答案: