我试图使用Python + OpenCV + TensorFlow进行on_the_fly对象识别,如下所示。会话运行但是一旦它到达(现已注释掉)部分,用于在名为run_inference()的函数中将结果转换为人类可读格式,它就会停止工作。
各个网站的在线文档提到,在视频帧(不是静态jpeg图像)的情况下,应使用Mul:0而不是softmax。但那些不可互换,我肯定会错过这两者之间的一些步骤。
感谢您提供任何有关此工作的提示。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os.path
import re
import sys
import tarfile
import numpy as np
from six.moves import urllib
import cv2
import tensorflow as tf
FLAGS = None
cap = cv2.VideoCapture("nvcamerasrc ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720,format=(string)I420, framerate=(fraction)30/1 ! nvvidconv flip-method=0 ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink")
windowName="Hotel"
#currentFrame=tf.placeholder(tf.string)
#currentFrameFloat=tf.to_float(currentFrame)
#currentFrame4D=tf.expand_dims(currentFrameFloat,axis=0)
currentFrame=tf.placeholder(tf.float32,shape=(1,299,299,3))
class NodeLookup(object):
def __init__(self,
label_lookup_path=None,
uid_lookup_path=None):
label_lookup_path ='./imagenet_2012_challenge_label_map_proto.pbtxt'
uid_lookup_path = './imagenet_synset_to_human_label_map.txt'
self.node_lookup = self.load(label_lookup_path, uid_lookup_path)
def load(self, label_lookup_path, uid_lookup_path):
proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
uid_to_human = {}
p = re.compile(r'[n\d]*[ \S,]*')
for line in proto_as_ascii_lines:
parsed_items = p.findall(line)
uid = parsed_items[0]
human_string = parsed_items[2]
uid_to_human[uid] = human_string
node_id_to_uid = {}
proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
for line in proto_as_ascii:
if line.startswith(' target_class:'):
target_class = int(line.split(': ')[1])
if line.startswith(' target_class_string:'):
target_class_string = line.split(': ')[1]
node_id_to_uid[target_class] = target_class_string[1:-2]
node_id_to_name = {}
for key, val in node_id_to_uid.items():
if val not in uid_to_human:
tf.logging.fatal('Failed to locate: %s', val)
name = uid_to_human[val]
node_id_to_name[key] = name
return node_id_to_name
def id_to_string(self, node_id):
if node_id not in self.node_lookup:
return ''
return self.node_lookup[node_id]
def tensor_init():
with tf.gfile.FastGFile('./classify_image_graph_def.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
def cam_init():
if cap.isOpened():
cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
cv2.resizeWindow(windowName, 1280,720)
cv2.moveWindow(windowName,0,0)
cv2.setWindowTitle(windowName,"Recognition")
showHelp = True
font = cv2.FONT_HERSHEY_PLAIN
helpText="Esc to Quit"
edgeThreshold=40
showFullScreen = False
def run_inference():
with tf.Session() as sess:
mul_tensor = sess.graph.get_tensor_by_name('Mul:0')
while(True):
ret_val,frame = cap.read();
cv2.imshow(windowName,frame)
#np_image_data=tf.image.encode_jpeg(frame)
frame=cv2.resize(frame,(299,299),interpolation=cv2.INTER_CUBIC)
np_image_data=np.asarray(frame)
np_image_data=cv2.normalize(np_image_data.astype('float'),None,-0.5, .5,cv2.NORM_MINMAX)
np_image_data=np.expand_dims(np_image_data,0)
predictions = sess.run(mul_tensor,feed_dict={currentFrame:np_image_data})
predictions = np.squeeze(predictions)
node_lookup = NodeLookup()
#top_k = predictions.argsort()[-5:][::-1]
#for node_id in top_k:
#human_string = node_lookup.id_to_string(node_id)
#score = predictions[node_id]
#print('%s (score = %.5f)' % (human_string, score))
if cv2.waitKey(10) == 27:
break
def main(_):
tensor_init()
cam_init()
run_inference()
if __name__ == '__main__':
tf.app.run(main=main, argv=[sys.argv[0]])