我正在使用本教程来开始使用TensorFlow- TensorFlow for poets。
使用retrain.py脚本训练模型后,我想使用 retrained_graph.pb,以便对视频进行分类并在视频运行时实时查看结果。
我所做的是使用opencv读取要逐帧分类的视频。即读取框架,保存,打开,分类并使用cv2.imshow()将其与分类结果一起显示在屏幕上。
它可以工作,但是由于从/向磁盘读取和写入帧,因此产生的视频延迟。
我可以使用从训练过程中获得的图形来对视频进行分类,而不必逐帧读取和保存它吗?
这是我正在使用的代码-
with tf.Session(graph=graph) as sess:
video_capture = cv2.VideoCapture(video_path)
i = 0
while True:
frame = video_capture.read()[1] # get current frame
frameId = video_capture.get(1) #current frame number
i = i + 1
cv2.imwrite(filename="C:\\video_images\\"+ str(i) +".jpg", img=frame) # write frame image to file
image_data = "C:\\video_images\\" + str(i) + ".jpg"
t = read_tensor_from_image_file(image_data,
input_height=input_height,
input_width=input_width,
input_mean=input_mean,
input_std=input_std)
predictions = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: t})
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
scores = []
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
scores.append([score, human_string])
#print('%s (score = %.5f)' % (human_string, score))
#print("\n\n")
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, scores[0][1] + " - " + repr(round(scores[0][0], 2)), (10, 50), font, 1, (0, 0, 255), 2, cv2.LINE_AA)
cv2.putText(frame, scores[1][1] + " - " + repr(round(scores[1][0], 2)), (10, 100), font, 1, (0, 0, 255), 2, cv2.LINE_AA)
cv2.imshow("image", frame)
cv2.waitKey(1)
os.remove("C:\\video_images\\" + str(i) + ".jpg")
video_capture.release()
cv2.destroyAllWindows()
谢谢。
答案 0 :(得分:1)
frame = video_capture.read()[1] # get current frame
float_caster = frame.astype(np.float32)
dims_expander = np.expand_dims(float_caster, axis=0)
resized = cv2.resize(dims_expander,(int(input_width),int(input_height)))
normalized = (resized - input_mean) / input_std
predictions = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: normalized})
获取框架本身,而不是仅使用imwrite
来调用read_tensor_from_image_file
。调整大小并标准化。然后,将normalized
传递到会话中。用这种方法摆脱不必要的磁盘写/读操作。
答案 1 :(得分:0)
设法解决。
将read_tensor_from_image_file编辑为以下内容,并仅将其与框架一起输入,而不是image_data。
def read_tensor_from_image_file(file_name,
input_height=299,
input_width=299,
input_mean=0,
input_std=255):
input_name = "file_reader"
output_name = "normalized"
if type(file_name) is str:
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"):
image_reader = tf.image.decode_png(file_reader, channels = 3,
name='png_reader')
elif file_name.endswith(".gif"):
image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
name='gif_reader'))
elif file_name.endswith(".bmp"):
image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
else:
image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
name='jpeg_reader')
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0);
resized = tf.image.resize_bilinear(dims_expander, [input_height,
input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]),
[input_std])
sess = tf.Session()
result = sess.run(normalized)
elif type(file_name) is np.ndarray:
resized = cv2.resize(file_name, (input_width, input_height),
interpolation=cv2.INTER_LINEAR)
normalized = (resized - input_mean) / input_std
result = normalized
result = array(result).reshape(1, 224, 224, 3)
return result