我有一个带有Adobe Acrobat注释的pdf文件。我已经使用fitz python软件包提取了文本和框的坐标,但是,有些框带有箭头。我还需要提取箭头的坐标。
pdf = fitz.open('/home/dfoundry/Downloads/blankcrf.pdf')
import fitz
for i in range(n):
page = pdf[i]
content=[] # for content
coordinates=[] # for rectangle coordinates
color=[] # for rectangle RGB colors
x = page.firstAnnot
i=0
while True:
try:
if x.info['content']!="":
content.append(x.info['content'])
coordinates.append(x.rect)
color.append(x.colors)
x = x.next
except AttributeError:
break
i+=1
pdf.close()