我在OpenCV Python中执行对象检测和跟踪。对于每第10帧中的每个检测到的对象,我创建并初始化跟踪器对象并在后续帧中更新这些跟踪器。但是,在处理了大约582帧后,我收到以下错误 -
OpenCV Error: Backtrace (The model is not initialized) in init, file /home/rainer/libraries/opencv_contrib/modules/tracking/src/tracker.cpp, line 81
Traceback (most recent call last):
File "evaluate.py", line 209, in <module>
ret = tracker.init(frame, ped)
cv2.error: /home/rainer/libraries/opencv_contrib/modules/tracking/src/tracker.cpp:81: error: (-1) The model is not initialized in function init
我的代码的高级表示如下 -
detections, trackers, trackings = [], [], []
while video.isOpened():
ret, frame = video.read()
if not ret: break
frame_no = frame_no + 1
if frame_no % 10:
detections = detect_objects(frame)
else:
for det in detections:
tracker = cv2.Tracker_create('MIL')
ret = tracker.init(frame, det)
trackers.append(tracker)
for trk in trackers:
ret, bbox = trk.update(frame)
trackings.append(bbox)
[编辑]完整的代码如下 -
import os
os.environ['GLOG_minloglevel'] = '2'
import caffe
caffe.set_mode_gpu()
caffe.set_device(0)
import sys
import time
import dlib
import argparse
import cv2 as cv
import numpy as np
DETECTION_INTERVAL = 10
PEDESTRIANS = []
class Pedestrian():
def __init__(self, pid):
self._pid = pid
self._bboxes = []
self._tracker = cv.Tracker_create('KCF')
def get_pid(self):
return self._pid
def get_bboxes(self):
return self._bboxes
def add_bbox(self, bbox):
self._bboxes.append(bbox)
def update_tracker(self, frame):
ret, bbox = self._tracker.update(frame)
if not ret:
return None
bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
self._bboxes.append(bbox)
return bbox
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model_file', default=-1)
parser.add_argument('-v', '--video_file', default=-1)
parser.add_argument('-w', '--weight_file', default=-1)
args = parser.parse_args()
return args
def forward_pass(net, transformer, img):
img = transformer.preprocess('data', img)
out = net.forward_all(data=np.asarray(img, dtype=np.float32))
return out
def iou(box1, box2):
tb = min(box1[0] + 0.5*box1[2], box2[0] + 0.5*box2[2]) - max(box1[0] - 0.5*box1[2], box2[0] - 0.5*box2[2])
lr = min(box1[1] + 0.5*box1[3], box2[1] + 0.5*box2[3]) - max(box1[1] - 0.5*box1[3], box2[1] - 0.5*box2[3])
if tb < 0 or lr < 0:
intersection = 0
else:
intersection = tb * lr
return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
def detect_regions(output, img_width, img_height):
num_boxes = 2
grid_size = 7
threshold = 0.3
iou_thresh = 0.5
num_classes = 20
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", \
"cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", \
"person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
probs = np.zeros((7, 7, 2, 20))
class_probs = np.reshape(output[0:980], (7, 7, 20))
scales = np.reshape(output[980:1078], (7, 7, 2))
boxes = np.reshape(output[1078:], (7, 7, 2, 4))
offset = np.transpose(np.reshape(np.array([np.arange(7)] * 14), (2, 7, 7)), (1, 2, 0))
boxes[:, :, :, 0] += offset
boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
boxes[:, :, :, 0:2] = boxes[:, :, :, 0:2] / 7.0
boxes[:, :, :, 2] = np.multiply(boxes[:, :, :, 2], boxes[:, :, :, 2])
boxes[:, :, :, 3] = np.multiply(boxes[:, :, :, 3], boxes[:, :, :, 3])
boxes[:, :, :, 0] *= img_width
boxes[:, :, :, 1] *= img_height
boxes[:, :, :, 2] *= img_width
boxes[:, :, :, 3] *= img_height
for i in range(2):
for j in range(20):
probs[:, :, i, j] = np.multiply(class_probs[:, :, j], scales[:, :, i])
filter_mat_probs = np.array(probs>=threshold, dtype='bool')
filter_mat_boxes = np.nonzero(filter_mat_probs)
boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]
probs_filtered = probs[filter_mat_probs]
classes_num_filtered = np.argmax(probs, axis=3)[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]
argsort = np.array(np.argsort(probs_filtered))[::-1]
boxes_filtered = boxes_filtered[argsort]
probs_filtered = probs_filtered[argsort]
classes_num_filtered = classes_num_filtered[argsort]
for i in range(len(boxes_filtered)):
if probs_filtered[i] == 0:
continue
for j in range(i+1, len(boxes_filtered)):
if iou(boxes_filtered[i], boxes_filtered[j]) > iou_thresh:
probs_filtered[j] = 0.0
filter_iou = np.array(probs_filtered>0.0, dtype='bool')
boxes_filtered = boxes_filtered[filter_iou]
probs_filtered = probs_filtered[filter_iou]
classes_num_filtered = classes_num_filtered[filter_iou]
result = []
for i in range(len(boxes_filtered)):
result.append([classes[classes_num_filtered[i]], boxes_filtered[i][0], boxes_filtered[i][1], boxes_filtered[i][2], boxes_filtered[i][3], probs_filtered[i]])
return result
def box_overlap(bbox1, bbox2):
l1 = min(bbox1[2], bbox2[2]) - max(bbox1[0], bbox2[0])
l2 = min(bbox1[3], bbox2[3]) - max(bbox1[1], bbox2[1])
if l1 < 0 or l2 < 0:
return 0.0
else:
intersection = float(l1 * l2)
union = float((box1[2]-box1[0])*(box1[3]-box1[1]) + (box2[2]-box2[0])*(box2[3]-box2[1]) - intersection)
return intersection / union
def proc_detections(detections, img_width, img_height):
pedestrians = []
for i in range(len(detections)):
if detections[i][0] == 'person':
x = int(detections[i][1])
y = int(detections[i][2])
w = int(detections[i][3]) // 2
h = int(detections[i][4]) // 2
xmin = 0 if x-w < 0 else x-w
xmax = img_width if x+w > img_width else x+w
ymin = 0 if y-h < 0 else y-h
ymax = img_height if y+h > img_height else y+h
pedestrians.append((xmin, ymin, xmax, ymax))
return pedestrians
def proc_detections_trackings(detections, trackers, trackings):
if len(detections) > len(trackings):
pass
elif len(detections) < len(trackings):
pass
elif len(detections) == len(trackings):
trackers, trackings = [], []
return detections, trackers, trackings
def create_output(img, boxes, color):
for box in boxes:
cv.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2)
return img
if __name__ == '__main__':
loading_start_time = time.time()
args = get_args()
model_filename = args.model_file
weight_filename = args.weight_file
video_filename = args.video_file
video_ip = cv.VideoCapture(video_filename)
video_w = int(video_ip.get(3))
video_h = int(video_ip.get(4))
video_op = cv.VideoWriter('output.avi', cv.VideoWriter_fourcc(*'XVID'), 30.0, (video_w, video_h))
net = caffe.Net(model_filename, weight_filename, caffe.TEST)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_raw_scale('data', 1.0/255.0)
loading_end_time = time.time()
print '\nLoading Time = %.3fs\n' %(loading_end_time - loading_start_time)
num_frames = 0
avg_time = 0.0
detections, trackers, trackings = [], [], []
while video_ip.isOpened():
ret, frame = video_ip.read()
if not ret: break
num_frames = num_frames + 1
start_time = time.time()
if num_frames % DETECTION_INTERVAL == 1:
trackers = []
print 'Frame: %d, Performing detection' %(num_frames)
output = forward_pass(net, transformer, frame)
regions = detect_regions(output['result'][0], frame.shape[1], frame.shape[0])
detections = proc_detections(regions, frame.shape[1], frame.shape[0])
frame = create_output(frame, detections, (0, 255, 0))
else:
if not detections:
print 'Frame: %d, No detections found, Performing detection' %(num_frames)
output = forward_pass(net, transformer, frame)
regions = detect_regions(output['result'][0], frame.shape[1], frame.shape[0])
detections = proc_detections(regions, frame.shape[1], frame.shape[0])
frame = create_output(frame, detections, (0, 255, 0))
else:
if not trackers:
print 'Frame: %d, No trackers found, Initializing trackers' %(num_frames)
for det in detections:
tracker = dlib.correlation_tracker()
bbox = dlib.rectangle(det[0], det[1], det[2], det[3])
tracker.start_track(frame, bbox)
trackers.append(tracker)
else:
trackings = []
print 'Frame: %d, Updating trackers' %(num_frames)
for trk in trackers:
trk.update(frame)
bbox = trk.get_position()
trackings.append((int(bbox.left()), int(bbox.top()), int(bbox.right()), int(bbox.bottom())))
frame = create_output(frame, trackings, (0, 0, 255))
end_time = time.time()
avg_time = avg_time + (end_time - start_time)
cv.imwrite('images/frame_' + str(num_frames) + '.jpg', frame)
video_op.write(frame)
avg_time = avg_time / num_frames
print '\nAverage Processing Time = %.3fs\n' %(avg_time)