我正在使用OCR进行文本检测,并且在我的程序中,如果一张照片包含多个文本,它将创建多个边界框。我想知道是否有一种方法可以组合所有框并制作新的裁剪后的文本输出图像。 PS:我正在使用EAST深度学习课本。问题是它检测到图像中的文本,但是如果文本之间的距离稍远,它会基于该原始图像创建2或3个图像,我正在尝试寻找一种方法来将这2-3种作物合并为1。
(newW, newH) = (args["width"], args["height"])
rW = W / float(newW)
rH = H / float(newH)
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True,
crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
print("[INFO] text detection took {:.6f} seconds".format(end - start))
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
for y in range(0, numRows):
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(0, numCols):
if scoresData[x] < args["min_confidence"]:
continue
(offsetX, offsetY) = (x * 4.0, y * 4.0)
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
boxes = non_max_suppression(np.array(rects), probs=confidences)
for number, (startX, startY, endX, endY) in enumerate(boxes):
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
Final = orig[startY:endY, startX:endX]
cv2.imshow("Text Detection", Final)
cv2.waitKey(0)
cv2.imwrite("crop{}.jpg".format(number), Final)