我的透明背景图片带有一些不透明的文字。
我想找到文本中每个单词的所有边界框。
以下是有关创建透明图像和绘制一些文本的代码(例如,#34; Hello World"),然后进行仿射变换和缩略图。
from PIL import Image, ImageFont, ImageDraw, ImageOps
import numpy as np
fontcolor = (255,255,255)
fontsize = 180
# padding rate for setting the image size of font
fimg_padding = 1.1
# check code bbox padding rate
bbox_gap = fontsize * 0.05
# Rrotation +- N degree
# Choice a font type for output---
font = ImageFont.truetype('Fonts/Bebas.TTF', fontsize)
# the text is "Hello World"
code = "Hello world"
# Get the related info of font---
code_w, code_h = font.getsize(code)
# Setting the image size of font---
img_size = int((code_w) * fimg_padding)
# Create a RGBA image with transparent background
img = Image.new("RGBA", (img_size,img_size),(255,255,255,0))
d = ImageDraw.Draw(img)
# draw white text
code_x = (img_size-code_w)/2
code_y = (img_size-code_h)/2
d.text( ( code_x, code_y ), code, fontcolor, font=font)
# img.save('initial.png')
# Transform the image---
img = img_transform(img)
# crop image to the size equal to the bounding box of whole text
alpha = img.split()[-1]
img = img.crop(alpha.getbbox())
# resize the image
img.thumbnail((512,512), Image.ANTIALIAS)
# img.save('myimage.png')
# what I want is to find all the bounding box of each individual word
boxes=find_all_bbx(img)
以下是关于仿射变换的代码(此处为想要进行实验的人提供)
def find_coeffs(pa, pb):
matrix = []
for p1, p2 in zip(pa, pb):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = np.matrix(matrix, dtype=np.float)
B = np.array(pb).reshape(8)
res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
return np.array(res).reshape(8)
def rand_degree(st,en,gap):
return (np.fix(np.random.random()* (en-st) * gap )+st)
def img_transform(img):
width, height = img.size
print img.size
m = -0.5
xshift = abs(m) * width
new_width = width + int(round(xshift))
img = img.transform((new_width, height), Image.AFFINE,
(1, m, -xshift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
range_n = width*0.2
gap_n = 1
x1 = rand_degree(0,range_n,gap_n)
y1 = rand_degree(0,range_n,gap_n)
x2 = rand_degree(width-range_n,width,gap_n)
y2 = rand_degree(0,range_n,gap_n)
x3 = rand_degree(width-range_n,width,gap_n)
y3 = rand_degree(height-range_n,height,gap_n)
x4 = rand_degree(0,range_n,gap_n)
y4 = rand_degree(height-range_n,height,gap_n)
coeffs = find_coeffs(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4)],
[(0, 0), (width, 0), (new_width, height), (xshift, height)])
img = img.transform((width, height), Image.PERSPECTIVE, coeffs, Image.BICUBIC)
return img
如何实现find_all_bbx
来查找每个单词的边界框?
例如,其中一个框可以在' H' (您可以下载图像以查看部分结果。)
答案 0 :(得分:0)
对于您想要做的事情,您需要标记单个单词,然后使用相同的标签计算每个对象的边界框。
这里最直接的方法是获取构成该单词的像素的最小和最大位置。
标签有点困难。例如,您可以使用形态学操作来组合单词的字母(形态开放,see PIL documentation),然后使用ImageDraw.floodfill
。或者您可以尝试从第一次绘制文本的位置预测单词的位置
code_x
和code_y
以及字母和间距的选择字体和大小(我认为这会比较棘手)。