OpenCV Python对象跟踪器错误:( - 1)模型未初始化

时间:2017-07-12 07:12:44

标签: python opencv computer-vision object-detection video-tracking

我在OpenCV Python中执行对象检测和跟踪。对于每第10帧中的每个检测到的对象,我创建并初始化跟踪器对象并在后续帧中更新这些跟踪器。但是,在处理了大约582帧后,我收到以下错误 -

OpenCV Error: Backtrace (The model is not initialized) in init, file /home/rainer/libraries/opencv_contrib/modules/tracking/src/tracker.cpp, line 81
Traceback (most recent call last):
  File "evaluate.py", line 209, in <module>
    ret = tracker.init(frame, ped)
cv2.error: /home/rainer/libraries/opencv_contrib/modules/tracking/src/tracker.cpp:81: error: (-1) The model is not initialized in function init

我的代码的高级表示如下 -

detections, trackers, trackings = [], [], []
while video.isOpened():
    ret, frame = video.read()
    if not ret: break
    frame_no = frame_no + 1

    if frame_no % 10:
        detections = detect_objects(frame)
    else:
        for det in detections:
            tracker = cv2.Tracker_create('MIL')
            ret = tracker.init(frame, det)
            trackers.append(tracker)
        for trk in trackers:
            ret, bbox = trk.update(frame)
            trackings.append(bbox)

[编辑]完整的代码如下 -

import os
os.environ['GLOG_minloglevel'] = '2'

import caffe
caffe.set_mode_gpu()
caffe.set_device(0)

import sys
import time
import dlib
import argparse
import cv2 as cv
import numpy as np

DETECTION_INTERVAL = 10
PEDESTRIANS = []

class Pedestrian():
    def __init__(self, pid):
        self._pid = pid
        self._bboxes = []
        self._tracker = cv.Tracker_create('KCF')

    def get_pid(self):
        return self._pid

    def get_bboxes(self):
        return self._bboxes

    def add_bbox(self, bbox):
        self._bboxes.append(bbox)

    def update_tracker(self, frame):
        ret, bbox = self._tracker.update(frame)
        if not ret:
            return None

        bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
        self._bboxes.append(bbox)
        return bbox

def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument('-m', '--model_file', default=-1)
    parser.add_argument('-v', '--video_file', default=-1)
    parser.add_argument('-w', '--weight_file', default=-1)

    args = parser.parse_args()
    return args

def forward_pass(net, transformer, img):
    img = transformer.preprocess('data', img)
    out = net.forward_all(data=np.asarray(img, dtype=np.float32))

    return out

def iou(box1, box2):
    tb = min(box1[0] + 0.5*box1[2], box2[0] + 0.5*box2[2]) - max(box1[0] - 0.5*box1[2], box2[0] - 0.5*box2[2])
    lr = min(box1[1] + 0.5*box1[3], box2[1] + 0.5*box2[3]) - max(box1[1] - 0.5*box1[3], box2[1] - 0.5*box2[3])
    if tb < 0 or lr < 0:
        intersection = 0
    else:
        intersection =  tb * lr

    return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)

def detect_regions(output, img_width, img_height):
    num_boxes = 2
    grid_size = 7
    threshold = 0.3
    iou_thresh = 0.5
    num_classes = 20

    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", \
            "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", \
            "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

    probs = np.zeros((7, 7, 2, 20))
    class_probs = np.reshape(output[0:980], (7, 7, 20))
    scales = np.reshape(output[980:1078], (7, 7, 2))
    boxes = np.reshape(output[1078:], (7, 7, 2, 4))
    offset = np.transpose(np.reshape(np.array([np.arange(7)] * 14), (2, 7, 7)), (1, 2, 0))

    boxes[:, :, :, 0] += offset
    boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
    boxes[:, :, :, 0:2] = boxes[:, :, :, 0:2] / 7.0
    boxes[:, :, :, 2] = np.multiply(boxes[:, :, :, 2], boxes[:, :, :, 2])
    boxes[:, :, :, 3] = np.multiply(boxes[:, :, :, 3], boxes[:, :, :, 3])

    boxes[:, :, :, 0] *= img_width
    boxes[:, :, :, 1] *= img_height
    boxes[:, :, :, 2] *= img_width
    boxes[:, :, :, 3] *= img_height

    for i in range(2):
        for j in range(20):
            probs[:, :, i, j] = np.multiply(class_probs[:, :, j], scales[:, :, i])

    filter_mat_probs = np.array(probs>=threshold, dtype='bool')
    filter_mat_boxes = np.nonzero(filter_mat_probs)
    boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]
    probs_filtered = probs[filter_mat_probs]
    classes_num_filtered = np.argmax(probs, axis=3)[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]

    argsort = np.array(np.argsort(probs_filtered))[::-1]
    boxes_filtered = boxes_filtered[argsort]
    probs_filtered = probs_filtered[argsort]
    classes_num_filtered = classes_num_filtered[argsort]

    for i in range(len(boxes_filtered)):
        if probs_filtered[i] == 0:
            continue
        for j in range(i+1, len(boxes_filtered)):
            if iou(boxes_filtered[i], boxes_filtered[j]) > iou_thresh:
                probs_filtered[j] = 0.0

    filter_iou = np.array(probs_filtered>0.0, dtype='bool')
    boxes_filtered = boxes_filtered[filter_iou]
    probs_filtered = probs_filtered[filter_iou]
    classes_num_filtered = classes_num_filtered[filter_iou]

    result = []
    for i in range(len(boxes_filtered)):
        result.append([classes[classes_num_filtered[i]], boxes_filtered[i][0], boxes_filtered[i][1], boxes_filtered[i][2], boxes_filtered[i][3], probs_filtered[i]])

    return result

def box_overlap(bbox1, bbox2):
    l1 = min(bbox1[2], bbox2[2]) - max(bbox1[0], bbox2[0])
    l2 = min(bbox1[3], bbox2[3]) - max(bbox1[1], bbox2[1])

    if l1 < 0 or l2 < 0:
        return 0.0
    else:
        intersection = float(l1 * l2)

    union = float((box1[2]-box1[0])*(box1[3]-box1[1]) + (box2[2]-box2[0])*(box2[3]-box2[1]) - intersection)
    return intersection / union

def proc_detections(detections, img_width, img_height):
    pedestrians = []

    for i in range(len(detections)):
        if detections[i][0] == 'person':
            x = int(detections[i][1])
            y = int(detections[i][2])
            w = int(detections[i][3]) // 2
            h = int(detections[i][4]) // 2

            xmin = 0 if x-w < 0 else x-w
            xmax = img_width if x+w > img_width else x+w
            ymin = 0 if y-h < 0 else y-h
            ymax = img_height if y+h > img_height else y+h

            pedestrians.append((xmin, ymin, xmax, ymax))

    return pedestrians

def proc_detections_trackings(detections, trackers, trackings):
    if len(detections) > len(trackings):
        pass
    elif len(detections) < len(trackings):
        pass
    elif len(detections) == len(trackings):
        trackers, trackings = [], []

    return detections, trackers, trackings

def create_output(img, boxes, color):
    for box in boxes:
        cv.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2)

    return img

if __name__ == '__main__':
    loading_start_time = time.time()

    args = get_args()
    model_filename = args.model_file
    weight_filename = args.weight_file
    video_filename = args.video_file

    video_ip = cv.VideoCapture(video_filename)
    video_w = int(video_ip.get(3))
    video_h = int(video_ip.get(4))
    video_op = cv.VideoWriter('output.avi', cv.VideoWriter_fourcc(*'XVID'), 30.0, (video_w, video_h))

    net = caffe.Net(model_filename, weight_filename, caffe.TEST)
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2, 0, 1))
    transformer.set_raw_scale('data', 1.0/255.0)

    loading_end_time = time.time()
    print '\nLoading Time = %.3fs\n' %(loading_end_time - loading_start_time)

    num_frames = 0
    avg_time = 0.0
    detections, trackers, trackings = [], [], []
    while video_ip.isOpened():
        ret, frame = video_ip.read()
        if not ret: break
        num_frames = num_frames + 1

        start_time = time.time()

        if num_frames % DETECTION_INTERVAL == 1:
            trackers = []
            print 'Frame: %d, Performing detection' %(num_frames)

            output = forward_pass(net, transformer, frame)
            regions = detect_regions(output['result'][0], frame.shape[1], frame.shape[0])
            detections = proc_detections(regions, frame.shape[1], frame.shape[0])

            frame = create_output(frame, detections, (0, 255, 0))
        else:
            if not detections:
                print 'Frame: %d, No detections found, Performing detection' %(num_frames)

                output = forward_pass(net, transformer, frame)
                regions = detect_regions(output['result'][0], frame.shape[1], frame.shape[0])
                detections = proc_detections(regions, frame.shape[1], frame.shape[0])

                frame = create_output(frame, detections, (0, 255, 0))
            else:
                if not trackers:
                    print 'Frame: %d, No trackers found, Initializing trackers' %(num_frames)

                    for det in detections:
                        tracker = dlib.correlation_tracker()
                        bbox = dlib.rectangle(det[0], det[1], det[2], det[3])
                        tracker.start_track(frame, bbox)
                        trackers.append(tracker)
                else:
                    trackings = []
                    print 'Frame: %d, Updating trackers' %(num_frames)

                    for trk in trackers:
                        trk.update(frame)
                        bbox = trk.get_position()
                        trackings.append((int(bbox.left()), int(bbox.top()), int(bbox.right()), int(bbox.bottom())))

                frame = create_output(frame, trackings, (0, 0, 255))

        end_time = time.time()
        avg_time = avg_time + (end_time - start_time)

        cv.imwrite('images/frame_' + str(num_frames) + '.jpg', frame)
        video_op.write(frame)

    avg_time = avg_time / num_frames
    print '\nAverage Processing Time = %.3fs\n' %(avg_time)

0 个答案:

没有答案