Question

我为诗人初始模型用tensorflow重新训练了我的模型。预测需要0.4秒，排序需要2秒。由于它需要很长时间，因此框架是滞后的，并且在预测时会被扰乱。虽然预测需要时间，但是我能以任何方式使帧变得平滑吗？以下是我的代码......

camera = cv2.VideoCapture(0)

# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
               in tf.gfile.GFile('retrained_labels.txt')]

def grabVideoFeed():
    grabbed, frame = camera.read()
    return frame if grabbed else None

def initialSetup():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    start_time = timeit.default_timer()

    # This takes 2-5 seconds to run
    # Unpersists graph from file
    with tf.gfile.FastGFile('retrained_graph.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

    print 'Took {} seconds to unpersist the graph'.format(timeit.default_timer() - start_time)

initialSetup()

with tf.Session() as sess:
    start_time = timeit.default_timer()

    # Feed the image_data as input to the graph and get first prediction
    softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')

    print 'Took {} seconds to feed data to graph'.format(timeit.default_timer() - start_time)

    while True:
        frame = grabVideoFeed()

        if frame is None:
            raise SystemError('Issue grabbing the frame')

        frame = cv2.resize(frame, (299, 299), interpolation=cv2.INTER_CUBIC)

        cv2.imshow('Main', frame)

        # adhere to TS graph input structure
        numpy_frame = np.asarray(frame)
        numpy_frame = cv2.normalize(numpy_frame.astype('float'), None, -0.5, .5, cv2.NORM_MINMAX)
        numpy_final = np.expand_dims(numpy_frame, axis=0)

        start_time = timeit.default_timer()

        # This takes 2-5 seconds as well
        predictions = sess.run(softmax_tensor, {'Mul:0': numpy_final})

        print 'Took {} seconds to perform prediction'.format(timeit.default_timer() - start_time)

        start_time = timeit.default_timer()

        # Sort to show labels of first prediction in order of confidence
        top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]

        print 'Took {} seconds to sort the predictions'.format(timeit.default_timer() - start_time)

        for node_id in top_k:
            human_string = label_lines[node_id]
            score = predictions[0][node_id]
            print('%s (score = %.5f)' % (human_string, score))

        print '********* Session Ended *********'

        if cv2.waitKey(1) & 0xFF == ord('q'):
            sess.close()
            break

camera.release()
cv2.destroyAllWindows()

Answer 1

@ dat-tran是正确的，虽然fater rcnn很快但它也会有些滞后。没有延迟你可以使用yolo，ssd模型，我用过yolo它很好。

对于队列和多处理，您可以使用以下代码。

from utils import FPS, WebcamVideoStream
from multiprocessing import Process, Queue, Pool

def worker(input_q, output_q):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     start_time = timeit.default_timer()
     # This takes 2-5 seconds to run
     # Unpersists graph from file

     graph_def = tf.Graph()
     with graph_def.as_default():
        graph_def_ = tf.GraphDef()
        with tf.gfile.FastGFile('retrained_graph.pb', 'rb') as f:
            graph_def_.ParseFromString(f.read())
            tf.import_graph_def(graph_def_, name='')

        sess = tf.Session(graph=graph_def)

    fps = FPS().start()
    while True:
        fps.update()
        frame = input_q.get()
         numpy_frame = np.asarray(frame)
         numpy_frame = cv2.normalize(numpy_frame.astype('float'), None, -0.5, .5, cv2.NORM_MINMAX)
         numpy_final = np.expand_dims(numpy_frame, axis=0)

         start_time = timeit.default_timer()

         # This takes 2-5 seconds as well
         predictions = sess.run(softmax_tensor, {'Mul:0': numpy_final})

         print 'Took {} seconds to perform prediction'.format(timeit.default_timer() - start_time)

         start_time = timeit.default_timer()

         # Sort to show labels of first prediction in order of confidence
         top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]

        print 'Took {} seconds to sort the predictions'.format(timeit.default_timer() - start_time)

        for node_id in top_k:
            human_string = label_lines[node_id]
            score = predictions[0][node_id]
            print('%s (score = %.5f)' % (human_string, score))

        output_q.put(frame)

    fps.stop()
    sess.close()

if __name__ == '__main__':
    input_q = Queue(maxsize=10)
    output_q = Queue(maxsize=10)

    process = Process(target=worker, args=((input_q, output_q)))
    process.daemon = True
    pool = Pool(1, worker, (input_q, output_q))

    video_capture = WebcamVideoStream(src=0,
                                       width=args.width,
                                       height=args.height).start()

    fps = FPS().start()

    while (video_capture.isOpened()):
        _,frame = video_capture.read()
        input_q.put(frame)
        cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('Image', 600, 600)
        cv2.imshow('Image', output_q.get())
        fps.update()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps.stop()

Answer 2

问题是因为这太迟了是由于您使用的模型。这些模型不是为了低延迟而制造的。使您的帧更流畅的一种方法是使用Mobilenets或F-RCNN等模型，这些模型速度更快但精度更低。万一，您对blogged about this on Medium感兴趣。

如果您仍想使用模型，另一个选项是使用队列和多重处理。您可以设置一个加载到映像中的队列和一个仅在加载另一个队列之前执行预测的队列。最后，这两个队列需要同步。

Opencv流媒体太迟钝了

2 个答案: