用于实时视频帧分类的Python和TensorFlow

时间:2017-09-25 00:27:52

标签: python opencv video tensorflow

我试图使用Python + OpenCV + TensorFlow进行on_the_fly对象识别,如下所示。会话运行但是一旦它到达(现已注释掉)部分,用于在名为run_inference()的函数中将结果转换为人类可读格式,它就会停止工作。

各个网站的在线文档提到,在视频帧(不是静态jpeg图像)的情况下,应使用Mul:0而不是softmax。但那些不可互换,我肯定会错过这两者之间的一些步骤。

感谢您提供任何有关此工作的提示。

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os.path
import re
import sys
import tarfile
import numpy as np
from six.moves import urllib
import cv2
import tensorflow as tf

FLAGS = None

cap = cv2.VideoCapture("nvcamerasrc ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720,format=(string)I420, framerate=(fraction)30/1 ! nvvidconv flip-method=0 ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink")

windowName="Hotel"


#currentFrame=tf.placeholder(tf.string)
#currentFrameFloat=tf.to_float(currentFrame)
#currentFrame4D=tf.expand_dims(currentFrameFloat,axis=0)

currentFrame=tf.placeholder(tf.float32,shape=(1,299,299,3))


class NodeLookup(object):
  def __init__(self,
               label_lookup_path=None,
               uid_lookup_path=None):
      label_lookup_path ='./imagenet_2012_challenge_label_map_proto.pbtxt'
      uid_lookup_path = './imagenet_synset_to_human_label_map.txt'
      self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

  def load(self, label_lookup_path, uid_lookup_path):
    proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
    uid_to_human = {}
    p = re.compile(r'[n\d]*[ \S,]*')
    for line in proto_as_ascii_lines:
      parsed_items = p.findall(line)
      uid = parsed_items[0]
      human_string = parsed_items[2]
      uid_to_human[uid] = human_string

    node_id_to_uid = {}
    proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
    for line in proto_as_ascii:
      if line.startswith('  target_class:'):
        target_class = int(line.split(': ')[1])
      if line.startswith('  target_class_string:'):
        target_class_string = line.split(': ')[1]
        node_id_to_uid[target_class] = target_class_string[1:-2]

    node_id_to_name = {}
    for key, val in node_id_to_uid.items():
      if val not in uid_to_human:
        tf.logging.fatal('Failed to locate: %s', val)
      name = uid_to_human[val]
      node_id_to_name[key] = name

    return node_id_to_name

  def id_to_string(self, node_id):
    if node_id not in self.node_lookup:
      return ''
    return self.node_lookup[node_id]

def tensor_init():
      with tf.gfile.FastGFile('./classify_image_graph_def.pb', 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            _ = tf.import_graph_def(graph_def, name='')

def cam_init():

    if cap.isOpened():
        cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(windowName, 1280,720)
        cv2.moveWindow(windowName,0,0)
        cv2.setWindowTitle(windowName,"Recognition")
            showHelp = True
            font = cv2.FONT_HERSHEY_PLAIN
            helpText="Esc to Quit"
            edgeThreshold=40
            showFullScreen = False

def run_inference():
  with tf.Session() as sess:
    mul_tensor = sess.graph.get_tensor_by_name('Mul:0')
    while(True):
            ret_val,frame = cap.read();
        cv2.imshow(windowName,frame)
        #np_image_data=tf.image.encode_jpeg(frame)
        frame=cv2.resize(frame,(299,299),interpolation=cv2.INTER_CUBIC)         
        np_image_data=np.asarray(frame)
        np_image_data=cv2.normalize(np_image_data.astype('float'),None,-0.5, .5,cv2.NORM_MINMAX)
        np_image_data=np.expand_dims(np_image_data,0)
        predictions = sess.run(mul_tensor,feed_dict={currentFrame:np_image_data})   
        predictions = np.squeeze(predictions)
            node_lookup = NodeLookup()

            #top_k = predictions.argsort()[-5:][::-1]
            #for node_id in top_k:
            #human_string = node_lookup.id_to_string(node_id)
                #score = predictions[node_id]
                #print('%s (score = %.5f)' % (human_string, score))

        if cv2.waitKey(10) == 27:
                break


def main(_):
 tensor_init()
 cam_init()
 run_inference()


if __name__ == '__main__':
  tf.app.run(main=main, argv=[sys.argv[0]])

0 个答案:

没有答案