有许多示例显示如何使用tf.contrib.slim库对从Web下载的单个图像进行分类。实际上,tensorflow github提供了这个功能。但是,我很难理解在循环中执行此操作的最佳方法。任何使用Tensorflow进行分类的应用程序都必须对多批图像进行分类。推理过程涉及构建图形,并从检查点加载权重。迭代运行时,再次重复这些步骤似乎很浪费。事实上,当我尝试这种基本方法时,我可以看到分配给python的内存在每次迭代中都会继续增长。有人可以帮助建议如何修改基本示例以实现重复/迭代推理?这是我目前使用的方法,但显然浪费了内存资源(此代码崩溃了内存有限的机器,新图像定期转储到全局框架中): def分类():
def classification():
global frame
global count
slim = tf.contrib.slim
image_size = inception_v4.inception_v4.default_image_size
names = imagenet.create_readable_names_for_imagenet_labels()
checkpoints_dir = '../../checkpoints'
# Don't classify the first few frames
while count < 5:
pass
while True:
start = count
with tf.Graph().as_default():
image = tf.convert_to_tensor(frame,dtype=tf.float32)
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
logits, _ = inception_v4.inception_v4(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
init_fn(sess)
np_image, probabilities = sess.run([image, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index]))
end = count
print "Classification latency = %d frames" % (end-start)
答案 0 :(得分:1)
我让这个工作,仍然会欣赏别人的一些智慧。 我的解决方案是使用占位符作为输入来构建图形。然后可以将视频帧输入到使用feed_dict的会话运行方法中。这允许我将while循环放在对会话运行的调用上。使用此方法的延迟是我共享的原始的1/10,并且内存指纹是稳定的。这是我用于对网络摄像头中的视频帧进行分类的完整代码。请注意,它存在问题。我没有机制干净地退出线程。 Ctrl + C不会终止脚本。另外,请注意,要运行此命令,您需要克隆github tensorflow模型repo,并在../../ checkpoints下载并解压缩预训练的权重。
import cv2
import os
import time
import numpy as np
from threading import Thread
import tensorflow as tf
from datasets import imagenet
from nets import inception_v4
from preprocessing import inception_preprocessing
######################################################
# Global Variables Shared by threads
frame = None
count = 0
######################################################
def capture():
######################################################
global frame
global count
video_capture = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame_bgr = video_capture.read()
# Display the resulting frame
cv2.imshow('Video', frame_bgr)
# Convert to RGB format (Inception expects RGB not BGR color channels)
frame = cv2.cvtColor(frame_bgr,cv2.COLOR_BGR2RGB)
# Increment frame counter (Used only to calculate latency)
count += 1
# Kill loop when user hits q
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
######################################################
######################################################
def classification():
######################################################
global frame
global count
slim = tf.contrib.slim
image_size = inception_v4.inception_v4.default_image_size
names = imagenet.create_readable_names_for_imagenet_labels()
checkpoints_dir = '../../checkpoints'
# Don't classify the None Object
time.sleep(5)
with tf.Graph().as_default():
image = tf.placeholder(tf.uint8,[480,640,3])
processed_image = inception_preprocessing.preprocess_image(image,
image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
logits, _ = inception_v4.inception_v4(processed_images, num_classes=1001, is_training=False)
probs = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
init_fn(sess)
while True:
start = count
probabilities = sess.run(probs,feed_dict={image: frame})
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index]))
end = count
print "Classification latency = %d frames" % (end-start)
# How to end this thread cleanly?
######################################################
# Start the threads
capture_thread = Thread(target=capture)
classify_thread = Thread(target=classification)
capture_thread.start()
classify_thread.start()
答案 1 :(得分:1)
一个选项可以通过定义类来解决问题,并在 init 方法中加载模型。另外,添加一个名为 classify 的方法。所以,你首先开始上课。然后,对于每一帧,您调用方法分类。 您可以在下面找到我如何修改您的代码:
import os
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from datasets import imagenet
from nets import inception_v4
from preprocessing import inception_preprocessing
def show_image(img_path):
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_plot = plt.imshow(img)
# Set up the plot and hide axes
plt.title('test')
img_plot.axes.get_yaxis().set_ticks([])
img_plot.axes.get_xaxis().set_ticks([])
plt.show()
def load_image(img_path):
img = cv2.imread(img_path)
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
class ImageClassifier():
def __init__(self):
self.slim = tf.contrib.slim
self.image_size = inception_v4.inception_v4.default_image_size
self.checkpoints_dir = 'checkpoints'
self.names = imagenet.create_readable_names_for_imagenet_labels()
self.arg_scope = inception_v4.inception_v4_arg_scope()
self.image = tf.placeholder(tf.uint8, [480, 640, 3])
self.processed_image = inception_preprocessing.preprocess_image(self.image,
self.image_size, self.image_size,
is_training=False)
self.processed_images = tf.expand_dims(self.processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with self.slim.arg_scope(self.arg_scope):
self.logits, self.end_points = inception_v4.inception_v4(self.processed_images, num_classes=1001,
is_training=False)
self.probs = tf.nn.softmax(self.logits)
self.init_fn = self.slim.assign_from_checkpoint_fn(
os.path.join(self.checkpoints_dir, 'inception_v4.ckpt'),
self.slim.get_model_variables('InceptionV4'))
self.session = tf.Session()
self.init_fn(self.session)
def classify(self, img):
height, width = img.shape[:2]
feed_dict = {self.image: img}
probabilities = self.session.run(self.probs, feed_dict=feed_dict)
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, self.names[index]))
def main():
imgs_dir = "./imgs/wep"
image_classifier = ImageClassifier()
for img_name in os.listdir(imgs_dir):
img = load_image(os.path.join(imgs_dir, img_name))
img = cv2.resize(img, (640, 480))
print(img_name)
image_classifier.classify(img)