我需要准备文档图像以便进一步识别。我国有一条严格的规则,文件必须用黑色或灰色字体书写。签名和邮票也必须是蓝色的。因此,在当前步骤中,我需要通过删除非灰度色彩元素来删除标记和签名。这是文档片段的示例: 我已编写代码将灰度级组件和非灰度级组件分别转换为灰度级:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
from skimage import io
from skimage.color import rgba2rgb, rgb2grey, colorconv
import matplotlib.pyplot as plt
def separate_color_regions(im):
if im.shape[-1]==4:
im=rgba2rgb(im)
#representin each color chanel as float in range [0, 1.]
im=colorconv._prepare_colorarray(im)
#because we wont decompose black color instead of white
im=1.-im
#calculate gray component using the formula: grey=cos_a*|color|*(1, 1, 1)/sqrt(3),
#according to scalar multiplication formula cos_a*|color|=color*(1, 1, 1)/sqrt(3)
#here sqrt(3) added since |(1, 1, 1)|=sqrt(3)
gray=(im[..., 0]+im[..., 1]+im[..., 2])/3
proj=im.copy()
for i in range(3):
proj[..., i]-=gray
def norm(inp):
"""
convert each color vector to its l2 norm
"""
t=inp*inp
return np.sqrt(np.sum(t, -1))
def mask_nan(inp):
where_are_NaNs = np.isnan(inp)
inp[where_are_NaNs] = 0.
return inp
return 1.-mask_nan(norm(proj)/norm(im)), 1.-mask_nan((3.**0.5)*gray/norm(im))
im=io.imread('stamps.png')
fig, (ax1, ax2, ax3) = plt.subplots(nrows=3)
ax1.imshow(im)
ax1.axis('off')
ax1.set_title('Original image')
nongray, gray=separate_color_regions(im)
ax2.imshow(nongray, cmap=plt.cm.gray, aspect='auto', interpolation='none')
ax2.axis('off')
ax2.set_title('Non-grey_components filter')
ax3.imshow(gray, cmap=plt.cm.gray, aspect='auto', interpolation='none')
ax3.axis('off')
ax3.set_title('Grey component filter')
fig.tight_layout()
plt.show()
以下是它的表现结果: 您会发现它只能突出显示非灰度对象,但无法从中分离灰度对象并为图像带来许多噪点。如果我的案例有更好的解决方案,请告诉我。
答案 0 :(得分:0)
使用手动设置的阈值解决此问题:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
from skimage import io
from skimage.color import rgba2rgb, rgb2grey, colorconv
import matplotlib.pyplot as plt
def separate_color_regions(im):
if im.shape[-1]==4:
im=rgba2rgb(im)
#representin each color chanel as float in range [0, 1.]
im=colorconv._prepare_colorarray(im)
#because we wont decompose black color instead of white
im=1.-im
#calculate gray component using the formula: grey=cos_a*|color|*(1, 1, 1)/sqrt(3),
#according to scalar multiplication formula cos_a*|color|=color*(1, 1, 1)/sqrt(3)
#here sqrt(3) added since |(1, 1, 1)|=sqrt(3)
gray=(im[..., 0]+im[..., 1]+im[..., 2])/3
proj=im.copy()
for i in range(3):
proj[..., i]-=gray
def norm(inp):
"""
convert each color vector to its l2 norm
"""
t=inp*inp
return np.sqrt(np.sum(t, -1))
def mask_nan(inp):
where_are_NaNs = np.isnan(inp)
inp[where_are_NaNs] = 0.
return inp
return 1.-mask_nan(norm(proj)/norm(im)), 1.-mask_nan((3.**0.5)*gray/norm(im))
im=io.imread('stamps.png')
fig, (ax1, ax2, ax3) = plt.subplots(nrows=3)
ax1.imshow(im)
ax1.axis('off')
ax1.set_title('Original image')
nongray, gray=separate_color_regions(im)
ax2.imshow(nongray, cmap=plt.cm.gray, aspect='auto', interpolation='none')
ax2.axis('off')
ax2.set_title('Non-grey_components filter')
mask=np.vectorize(lambda d: 0. if d>0.1 else 1.)
ax3.imshow(1.-(1.-rgb2grey(im))*mask(1.-nongray), cmap=plt.cm.gray, aspect='auto', interpolation='none')
ax3.axis('off')
ax3.set_title('Grey component filter')
fig.tight_layout()
plt.show()