我想为任何形状的图像创建一个去噪自动编码器。那里的大多数解决方案的图像形状都不大于(500,500),而我拥有的图像是形状为(3000,2000)的文档扫描。我试图重塑图像并建立模型,但是预测不正确。有人可以帮我吗?
我尝试使用此处的https://github.com/mrdragonbear/Autoencoders/blob/master/Autoencoder-Tutorial.ipynb代码构建模型,围绕图像形状运行,但预测失败。
答案 0 :(得分:0)
我已经有文档去噪器。 无需为大型形状提供模型,您可以将其拆分,然后将其输入模型,然后将预测的块再次合并在一起。 我的模型接受形状为512x512的图像,因此我必须将图像按512x512块进行拆分。 图片必须大于或等于512x512。 如果图像较小,则只需调整其大小或使其适合512x512形状即可。
def split_page(page):
chunk_size = (512, 512)
main_size = page.shape[:2]
chunks=[]
chunk_grid = tuple(np.array(main_size)//np.array(chunk_size))
extra_chunk = tuple(np.array(main_size)%np.array(chunk_size))
for yc in range(chunk_grid[0]):
row = []
for xc in range(chunk_grid[1]):
chunk = page[yc*chunk_size[0]:yc*chunk_size[0]+chunk_size[0], xc*chunk_size[1]: xc*chunk_size[1]+chunk_size[1]]
row.append(chunk)
if extra_chunk[1]:
chunk = page[yc*chunk_size[0]:yc*chunk_size[0]+chunk_size[0], page.shape[1]-chunk_size[1]:page.shape[1]]
row.append(chunk)
chunks.append(row)
if extra_chunk[0]:
row = []
for xc in range(chunk_grid[1]):
chunk = page[page.shape[0]-chunk_size[0]:page.shape[0], xc*chunk_size[1]: xc*chunk_size[1]+chunk_size[1]]
row.append(chunk)
if extra_chunk[1]:
chunk = page[page.shape[0]-chunk_size[0]:page.shape[0], page.shape[1]-chunk_size[1]:page.shape[1]]
row.append(chunk)
chunks.append(row)
return chunks, page.shape[:2]
def merge_chunks(chunks, osize):
extra = np.array(osize)%512
page = np.ones(osize)
for i, row in enumerate(chunks[:-1]):
for j, chunk in enumerate(row[:-1]):
page[i*512:i*512+512,j*512:j*512+512]=chunk
page[i*512:i*512+512,osize[1]-512:osize[1]]=chunks[i,-1]
if extra[0]:
for j, chunk in enumerate(chunks[-1][:-1]):
page[osize[0]-512:osize[0],j*512:j*512+512]=chunk
page[osize[0]-512:osize[0],osize[1]-512:osize[1]]=chunks[-1,-1]
else:
for j, chunk in enumerate(chunks[-1][:-1]):
page[osize[0]-512:osize[0],j*512:j*512+512]=chunk
page[osize[0]-512:osize[0],osize[1]-512:osize[1]]=chunks[-1,-1]
return page
def denoise(chunk):
chunk = chunk.reshape(1,512,512,1)/255.
denoised = model.predict(chunk).reshape(512,512)*255.
return denoised
def denoise_page(page):
chunks, osize= split_page(page)
chunks = np.array(chunks)
denoised_chunks = np.ones(chunks.shape)
for i, row in enumerate(chunks):
for j, chunk in enumerate(row):
denoised = denoise(chunk)
denoised_chunks[i][j]=denoised
denoised_page = merge_chunks(denoised_chunks, osize)
return denoised_page