我需要从图像中提取URL和错误消息。图像的DPI为300,尺寸为W 1536像素H 1068像素。
import glob
list_f = glob.glob('/home/agnihotri/img/*.jpg')
if len(list_f) != 0:
res_final = []
for f in list_f:
from PIL import Image
import pytesseract
import argparse
import cv2
import os, glob
from matplotlib import pyplot as plt
import re
if not os.path.exists('processed/images'):
os.makedirs('processed/images')
image = cv2.imread(f)
if image.shape[0]< 715 or image.shape[1]<1020:
print('Resolution too low')
else:
image = cv2.resize(image, None, fx = 10, fy = 10)
config='-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:.// -psm 3'
text = pytesseract.image_to_string(image, config=config, nice=2)
try:
url = (''.join(
re.findall('(http|ftp|https):(//|H)([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?',
text)[0]).replace('H', '://').replace('::', ':').replace('I', '/').replace('BBBB', '8888'))
except:
url = 'No url found in image'
print(url)
err_message = []
for l in text.split(sep='\n'):
if any(i in l.lower() for i in ['error', 'cannot', 'warning', 'problem', "can't", 'unable', 'could not', 'invalid']):
err_message.append(l)
if len(err_message) == 0:
err_message=['No error message found']
err_message = '\n'.join(err_message)
print (err_message)
但是,在执行上述代码后,我确实收到了错误消息:
No url found in image
Traceback (most recent call last): File "./oc_script.py", line 37, in <module>
TypeError: split() takes no keyword arguments
输入图片的链接是https://drive.google.com/open?id=1drdqJjR06EIkt_YbBcVLDZW1s8FRj1J6
有谁知道为什么会这样?
答案 0 :(得分:0)
更改
Not_Passed
到
for l in text.split(sep='\n'):