我有以下用于识别对象/符号的代码。 我的问题是如何改进我的代码,以便在距离更近或更远时识别对象? 假设我加载了一个符号,我需要在不同的范围内识别它。
import cv2
import numpy as np
#Camera
cap = cv2.VideoCapture(0)
#symbool inladen
symbool = cv2.imread('klaver.jpg',0)
w, h = symbool.shape[::-1]
while(1):
res, frame = cap.read()
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(img_gray,symbool,cv2.TM_CCOEFF_NORMED)
threshold = 0.9
loc = np.where( res >= threshold)
for pt in zip(*loc[::-1]):
# print "hallo"
cv2.rectangle(img_gray, pt, (pt[0] + w, pt[1] + h), (0,255,255), 1)
cv2.imshow('Resultaat', img_gray)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
已更新:
我已经尝试了下面的教程并提出了以下内容。 问题是识别对象,这个方法是绘制随机矩形而不是专注于它自己的对象/符号
import cv2
import numpy as np
import imutils
#Camera
cap = cv2.VideoCapture(0)
#symbool inladen
symbool = cv2.imread('klaver.jpg',0)
w, h = symbool.shape[::-1]
while(1):
res, frame = cap.read()
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
found = None
#res = cv2.matchTemplate(img_gray,symbool,cv2.TM_CCOEFF_NORMED)
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
resized = imutils.resize(img_gray, width = int(img_gray.shape[1] * scale))
r = img_gray.shape[1] / float(resized.shape[1])
if resized.shape[0] < h or resized.shape[1] < w:
break
edged = cv2.Canny(resized, 50, 200)
result = cv2.matchTemplate(edged, symbool, cv2.TM_CCOEFF_NORMED)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
clone = np.dstack([edged, edged, edged])
cv2.rectangle(clone, (maxLoc[0], maxLoc[1]),
(maxLoc[0] + w, maxLoc[1] + h), (0, 0, 255), 2)
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + w) * r), int((maxLoc[1] + h) * r))
threshold = 0.9
loc = np.where( result >= threshold)
for pt in zip(*loc[::-1]):
# print "hallo"
# cv2.rectangle(img_gray, pt, (pt[0] + w, pt[1] + h), (0,255,255), 1)
cv2.rectangle(img_gray, (startX, startY), (endX, endY), (0, 255, 255), 1)
cv2.imshow('Resultaat', img_gray)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
答案 0 :(得分:1)
在同一框架内工作的最简单方法是实现金字塔方法。只需以多种分辨率加载图像(或模板)并循环显示。这是Python + OpenCV代码的一个很好的指南:http://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/
此方法是提供比例不变的模板匹配。使用特征检测(关键点)可以获得更强大的功能,这使您可以对模板执行缩放和旋转不变性。有关使用SURF的示例,请参阅http://robocv.blogspot.com/2012/02/real-time-object-detection-in-opencv.html。
编辑以回应OP的评论如下!
要解决您的第二个代码段,有一些问题,但您比您想象的更接近!
loc = np.where(result >= threshold)
,但您已将最大值及其相应位置存储在found
中。您需要做的只是在maxVal >= threshold
。import cv2
import numpy as np
#Camera
cap = cv2.VideoCapture(0)
#symbool inladen
symbool = cv2.imread('klaver.jpg',0)
w, h = symbool.shape[::-1]
while(1):
res, frame = cap.read()
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
found = None
#res = cv2.matchTemplate(img_gray,symbool,cv2.TM_CCOEFF_NORMED)
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
resized = cv2.resize(img_gray, None, fx=scale, fy=scale)
r = img_gray.shape[1] / float(resized.shape[1])
if resized.shape[0] < h or resized.shape[1] < w:
break
result = cv2.matchTemplate(resized, symbool, cv2.TM_CCOEFF_NORMED)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
(maxVal, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + w) * r), int((maxLoc[1] + h) * r))
threshold = 0.5
if maxVal >= threshold:
cv2.rectangle(img_gray, (startX, startY), (endX, endY), (0, 255, 255), 1)
cv2.imshow('Resultaat', img_gray)
k = cv2.waitKey(5) & 0xFF
if k == 27:
print(maxVal)
break
cap.release()
cv2.destroyAllWindows()