我正在尝试从图像识别文本。主要目的是从图像中提取关键因素,例如对于产品标签,我想确定其成分和营养成分。问题是图像尺寸不标准。我附上了一些示例-sample 1和sample 2。我尝试了多种方法,主要使用Canny边缘检测,robert cross,laplacian和gaussian编码,但是每个图像的结果都不相同。我可能在这里错过了一些东西。任何帮助将不胜感激。
def channel(image):
accumEdged = np.zeros(image.shape[:2], dtype="uint8")
for chan in cv2.split(image):
mean = np.mean(chan)
min_value = 0.66*mean
max_value = 1.33*mean
chan = cv2.GaussianBlur(chan, (7,7), 0)
edged = cv2.Canny(chan, min_value, max_value)
accumEdged = cv2.bitwise_xor(accumEdged, edged)
return accumEdged, min_value, max_value
temp = []
for item in os.listdir(image_dir):
img = "/home/abc/Test images/" + item
image = cv2.imread(img)
base_image = cv2.imread(img)
length, width = base_image.shape[:2]
edge, min_value, max_value = channel(image)
(_, cnts, _) = cv2.findContours(edge.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
threshold_area = 10000
kernel = np.ones((5,5), np.uint8)
image = cv2.erode(image, kernel, iterations = 5)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 35))
threshed = cv2.morphologyEx(edge.copy(), cv2.MORPH_OPEN, rect_kernel)
(_, cnts_alt, _) = cv2.findContours(threshed, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
idx = 0
cnts.extend(cnts_alt)
threshold_area = 10000
for c in cnts:
area1 = cv2.contourArea(c)
if area1 > 10000:
hull = cv2.convexHull(c)
x,y,w,h = cv2.boundingRect(hull)
area = w*h
temp.append([area1, area])
if area > 10000:
idx += 1
new_img_1=base_image[y:y+h, x:x+w]
out_path = '/home/abc/Test images/'+item.split('.')[0]+'/'+str(idx)+'.png'
cv2.imwrite(out_path, new_img_1)
cv2.rectangle(base_image,(x,y),(x+w,y+h), (0,0,255), 5)
path = '/home/abc/Test images/'+item.split('.')[0]+'.png'
cv2.imwrite(path, base_image)