我正在使用Google Vision API。
我想获取google vision在其中找到单词块的矩形的顶点((x,y)位置)。到目前为止,我正在从Google客户端获取文本。
credentials = service_account.Credentials.from_service_account_file("/api-key.json")
client = vision.ImageAnnotatorClient(credentials=credentials)
#open file
with io.open(path, 'rb') as image_file:
content = image_file.read()
#call api
image = types.Image(content=content)
response = client.document_text_detection(image=image)
document = response.full_text_annotation
我想要获得document.text
中每个单词块的顶点。
答案 0 :(得分:0)
在Google documentation中,您可以找到API响应的结构(块,段落,...)以及如何检索相应的顶点。
特别是此功能:
def get_document_bounds(image_file, feature):
"""Returns document bounds given an image."""
client = vision.ImageAnnotatorClient()
bounds = []
with io.open(image_file, 'rb') as image_file:
content = image_file.read()
image = types.Image(content=content)
response = client.document_text_detection(image=image)
document = response.full_text_annotation
# Collect specified feature bounds by enumerating all document features
for page in document.pages:
for block in page.blocks:
for paragraph in block.paragraphs:
for word in paragraph.words:
for symbol in word.symbols:
if (feature == FeatureType.SYMBOL):
bounds.append(symbol.bounding_box)
if (feature == FeatureType.WORD):
bounds.append(word.bounding_box)
if (feature == FeatureType.PARA):
bounds.append(paragraph.bounding_box)
if (feature == FeatureType.BLOCK):
bounds.append(block.bounding_box)
if (feature == FeatureType.PAGE):
bounds.append(block.bounding_box)
# The list `bounds` contains the coordinates of the bounding boxes.
return bounds