我试图在图像中获取目标区域时出现问题。
现在我只想拆掉纸张部分。
实际上,我终于通过以下代码得到了这个人:
import numpy as np
import argparse
import cv2
import math
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to input image")
args = vars(ap.parse_args())
origin = cv2.imread(args["image"])
image = cv2.imread(args["image"], 0)
image2 = image.copy()
w, h = image.shape[::-1]
image = cv2.blur(image, (9, 9), 0)
img = cv2.GaussianBlur(image, (9, 9), 0)
canny = cv2.Canny(img, 50, 150)
cv2.imshow('canny', canny)
cv2.imwrite('./tmp_canny.png', canny)
cv2.waitKey(0)
line_arrays = []
lines_p = cv2.HoughLinesP(canny, 1, np.pi / 180, 80, minLineLength=150, maxLineGap=10)
lines1_p = lines_p[:, 0, :]
for x1, y1, x2, y2 in lines1_p[:]:
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 0), 1)
if float(x1 - x2) != 0.0:
k = float(y1 - y2) / float(x1 - x2)
b = float(y1) - float(x1) * float(y1 - y2) / float(x1 - x2)
line_arrays.append([k, b, x1, y1, x2, y2])
print '*************************line_arrays: %s**************' % len(line_arrays)
print line_arrays
cv2.imshow('img-withline', img)
cv2.imwrite('./img-withline.png', img)
cv2.waitKey(0)
intersections = []
for index1, obj in enumerate(line_arrays):
cv2.line(origin, (obj[2], obj[3]), (obj[4], obj[5]), (255, 0, 0), 3)
for index2, obj_1 in enumerate(line_arrays):
if index2 > index1:
cv2.line(origin, (obj_1[2], obj_1[3]), (obj_1[4], obj_1[5]), (0, 0, 255), 3)
# cv2.imshow('calc intersections...', origin)
# cv2.imwrite('./tmp_calc_intersections.png', origin)
# cv2.waitKey(0)
if float(obj[0]) * float(obj_1[0]) <= 0 and float(obj_1[0]) != float(obj[0]):
x = (float(obj[1]) - float(obj_1[1])) / (float(obj_1[0]) - float(obj[0]))
y = float(obj[0]) * x + float(obj[1])
if x > 0 and y > 0 and x < w and y < h:
intersections.append([x, y])
print '********************intersections: %s********************' % len(intersections)
print intersections
for j in intersections:
cv2.circle(origin, (int(j[0]), int(j[1])), 2, (0, 255, 0), 3) # img to origin
# cv2.imshow('img-withdot-loop', origin)
# cv2.imwrite('./img-withdot-loop.png', origin)
# cv2.waitKey(0)
cv2.imshow('img-withdot', origin)
cv2.imwrite('./img-withdot.png', origin)
cv2.waitKey(0)
for jd in intersections:
if jd[0] < w // 2 and jd[1] < h // 2:
left_top = jd
elif jd[0] > w // 2 and jd[1] < h // 2:
right_top = jd
elif jd[0] < w // 2 and jd[1] > h // 2:
left_bottom = jd
elif jd[0] > w // 2 and jd[1] > h // 2:
right_bottom = jd
else:
pass
text_width = math.sqrt((left_top[0] - right_top[0]) * (left_top[0] - right_top[0]) + (left_top[1] - right_top[1]) * (
left_top[1] - right_top[1]))
text_hight = math.sqrt(
(left_top[0] - left_bottom[0]) * (left_top[0] - left_bottom[0]) + (left_top[1] - left_bottom[1]) * (
left_top[1] - left_bottom[1]))
if text_width < text_hight:
pts3 = np.float32([left_top, right_top,
left_bottom, right_bottom])
pts4 = np.float32([[0, 0], [text_width, 0], [0, text_hight], [text_width, text_hight]])
else:
pts3 = np.float32([left_bottom, left_top,
right_bottom, right_top])
pts4 = np.float32([[0, 0], [text_hight, 0], [0, text_width], [text_hight, text_width]])
text_width,text_hight = text_hight,text_width
M_perspective = cv2.getPerspectiveTransform(pts3, pts4)
img_perspective = cv2.warpPerspective(image2, M_perspective, (0, 0))
cv2.imshow('img-perspective', img_perspective)
cv2.waitKey(0)
crop_img = img_perspective[0:int(text_hight), 0:int(text_width)]
cv2.imshow('crop_img', crop_img)
cv2.imwrite('crop_img.png', crop_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
所以,它没有得到纸张的外边框。
然后我怀疑它与模糊有关,然后,我评论这一行:
image = cv2.blur(image, (9, 9), 0)
这只是论文的一部分。
我认为这是因为我检测到线条检测到的错误和交叉点,但我还没有尝试优化代码(我会尝试优化代码以便稍后过滤掉正确的线条或点)
所以,这让我很困惑是否或何时使用模糊。
如何在每个输入图像上动态执行此操作?
还是有更好的通用方法来获得论文吗?
ENV:
opencv 3.2.0
Python 2.7.10
任何建议都非常感谢?
感谢。
韦斯利
[EDIT1]
根据Alex的评论,代码片段在这里:
tmpimage, contours, h = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
largest_areas = sorted(contours, key=cv2.contourArea)
print [cv2.contourArea(cnt) for cnt in largest_areas]
cv2.drawContours(origin, [largest_areas[-1]], 0, (0, 0, 255), 3)
cv2.imshow('largestareacnt', origin)
cv2.imwrite('largestareacnt.png', origin)
cv2.waitKey(0)
x, y, w, h = cv2.boundingRect(largest_areas[-1])
cv2.rectangle(origin, (x, y), (x + w, y + h), (255, 0, 0), 2)
rect = cv2.minAreaRect(largest_areas[-1])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(origin, [box], 0, (0, 255, 0), 2)
因此,它将红色部分视为具有最大面积的部分。
我认为这是因为检测到的轮廓不是封闭的。
但我们无法确保检测到的每个轮廓都已关闭。