Question

我有一个功能，可对输入图像进行如下蒙版操作：

file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
    print(i)
    image = skimage.io.imread(file_names[i])
    predictions = model.detect([image],  verbose=1)
    p = predictions[0]
    masks = p['masks']
    merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
    for j in range(masks.shape[2]):
        merged_mask[masks[:,:,j]==True] = True
        masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)

因此，基本上，它从目录中读取所有图像，为每个图像创建一个掩码并运行检测。

但是，由于图像的大小不同，因此无法正常工作

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-10-764e6229811a> in <module>()
     10     for j in range(masks.shape[2]):
     11         merged_mask[masks[:,:,j]==True] = True
---> 12         masks_prediction[:,:,i] = merged_mask
     13 print(masks_prediction.shape)

ValueError: could not broadcast input array from shape (1518,1077) into shape (2000,2000)

我在想一种方法，可以在应用遮罩操作之前（错误消息中的第12行之前）知道每个图像的大小，从而为遮罩操作正确传递确切的图像形状大小。

在Python中这有可能吗？

编辑：因此，很明显，人们以某种方式没有得到我想要实现的目标-尽管我真正相信它是以非常简单的方式编写的。不过，这里是函数所在的完整代码（从ipython笔记本复制）：

import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import skimage.draw

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from glob import glob


import components

%matplotlib inline 

# Directories to be referred
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
IMAGE_DIR = os.path.join(ROOT_DIR, "datasets/components/back/predict")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "datasets/components/front/")
WEIGHTS_PATH = os.path.join(ROOT_DIR, "logs/back/mask_rcnn_components_0100.h5")

config = components.ComponentsConfig()
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
# Run detection on one image at a time
GPU_COUNT = 1
IMAGES_PER_GPU = 1

 config = InferenceConfig()
 config.display()
 # Create model in inference mode
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
                              config=config)
# Load weights
print("Loading weights ", WEIGHTS_PATH)
model.load_weights(WEIGHTS_PATH, by_name=True)

file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
    print(i)
    image = skimage.io.imread(file_names[i])
    predictions = model.detect([image],  verbose=1)
    p = predictions[0]
    masks = p['masks']
    merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
    for j in range(masks.shape[2]):
        merged_mask[masks[:,:,j]==True] = True
        masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)

dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")

accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
    name = dataset.image_info[image_id]['id']
    file_name = os.path.join(IMAGE_DIR, name)
    image_id_pred = file_names.index(file_name)
    merged_mask = masks_prediction[:, :, image_id_pred]

    annotated_mask = dataset.load_mask(image_id)[0]
    merged_annotated_mask = np.zeros((510, 510))
    for i in range(annotated_mask.shape[2]):
        merged_annotated_mask[annotated_mask[:,:,i]==True] = True
    accuracy  += np.sum(merged_mask==merged_annotated_mask) / (1200 * 1600)
    all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
    precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))

file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
class_names = ['BG', 'screw', 'lid']
test_image = skimage.io.imread(file_names[random.randint(0,len(file_names)-1)])
predictions = model.detect([test_image], verbose=1) # We are replicating the same image to fill up the batch_size
p = predictions[0]
visualize.display_instances(test_image, p['rois'], p['masks'], p['class_ids'], 
                            class_names, p['scores'])

Answer 1

图像只是一个numpy数组。因此，回答您的问题“是否有可能知道每个图像的大小”：是的，只需使用图像的shape。

如果您要处理许多不同尺寸的图像，则可能需要将它们调整为统一的分辨率。 skimage具有内置功能skimage.transform.resize。查看文档here。

如果使用调整大小，则应确保没有伪像引入图像。使用调整大小之前，请检查其大小。

调整skimage的大小相当慢。如果需要更高的性能，可以使用opencv。他们有一个很棒的python API，并且由于有一个conda软件包，所以安装变得非常容易。

resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg")) 
for i in range(len(file_names)):
    print("Resizing: " + str(i))
    image = skimage.io.imread(file_names[i])
    image_resized = resize(image, (1200, 800),anti_aliasing=True)
    resized_images.append(image_resized)

Python：如何根据图像输入重塑数组？

1 个答案: