Question

我有以下一段代码，它加载一个数据集，将图像调整为1200 * 800，加载它们的注释，然后报告准确性和预测：

# resize images
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg")) 
for i in range(len(file_names)):
    print("Resizing: " + str(i))
    image = skimage.io.imread(file_names[i])
    image_resized = resize(image, (1200, 800),anti_aliasing=True)
    resized_images.append(image_resized)



masks_prediction = np.zeros((1200, 800, len(file_names)))
for i in range(len(resized_images)):
    print(i)
    image = resized_images[i]
    predictions = model.detect([image],  verbose=1)
    p = predictions[0]
    masks = p['masks']
    merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
    for j in range(masks.shape[2]):
        merged_mask[masks[:,:,j]==True] = True
        masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)

#load annotations
dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg")) 
for i in range(len(file_names)):
    print("Resizing: " + str(i))
    image = skimage.io.imread(file_names[i])
    image_resized = resize(image, (1200, 800),anti_aliasing=True)
    resized_images.append(image_resized)

# report the accuracy and prediction
accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
    name = dataset.image_info[image_id]['id']
    file_name = os.path.join(IMAGE_DIR, name)
    image_id_pred = file_names.index(file_name)
    merged_mask = masks_prediction[:, :, image_id_pred]

    annotated_mask = dataset.load_mask(image_id)[0]
    merged_annotated_mask = np.zeros((1200, 800))
    for i in range(annotated_mask.shape[2]):
        merged_annotated_mask[annotated_mask[:,:,i]==True] = True
    accuracy  += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
    all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
    precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))

但是我收到以下错误，这使我认为尺寸完全不对：

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-38-a652e79112fb> in <module>()
     10     merged_annotated_mask = np.zeros((1200, 800))
     11     for i in range(annotated_mask.shape[2]):
---> 12         merged_annotated_mask[annotated_mask[:,:,i]==True] = True
     13     accuracy  += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
     14     all_correct = np.sum(merged_annotated_mask[merged_mask == 1])

IndexError: boolean index did not match indexed array along dimension 0; dimension is 1200 but corresponding boolean dimension is 1572

在将尺寸调整为1200 * 800之前，我没有收到此错误，但是随后出现另一个错误，告诉我尺寸不匹配。在这一点上，我将非常感谢能够以某种方式一次解决此大小问题的解决方案。我时不时在Python中遇到这个问题，而且越来越令人沮丧。

编辑：包括数据集类。

############################################################
#  Dataset
############################################################

class ComponentsDataset(utils.Dataset):

    def load_components(self, dataset_dir, subset):
        """Load a subset of the Components dataset.
        dataset_dir: Root directory of the dataset.
        subset: Subset to load: train or val
        """
        # Add classes.
        self.add_class("components", 1, "screw")
        self.add_class("components", 2, "lid")

        # which dataset?
        assert subset in ["train", "val", "predict"]
        dataset_dir = os.path.join(dataset_dir, subset)

        # We mostly care about the x and y coordinates of each region
        annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. There are stored in the
            # shape_attributes (see json format above)
            polygons = [r['shape_attributes'] for r in a['regions']]
            names = [r['region_attributes'] for r in a['regions']]
            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            # the image. This is only managable since the dataset is tiny.
            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "components",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons,
                names=names)

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "components":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        class_names = info["names"]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Assign class_ids by reading class_names
        class_ids = np.zeros([len(info["polygons"])])
        # In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
        for i, p in enumerate(class_names):
            # "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
            if p['name'] == 'screw':
                class_ids[i] = 1
            elif p['name'] == 'lid':
                class_ids[i] = 2
                # assert code here to extend to other labels
        class_ids = class_ids.astype(int)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), class_ids

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "components":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

    def load_mask_hc(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a components dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "components":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        # "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
        class_names = info["names"]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1
        # Assign class_ids by reading class_names
        class_ids = np.zeros([len(info["polygons"])])
        # In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
        for i, p in enumerate(class_names):
            if p['name'] == 'lid':
                class_ids[i] = 14
            elif p['name'] == 'error':
                pass
            else:
                class_ids[i] = int(p['name'])
                # assert code here to extend to other labels
        class_ids = class_ids.astype(int)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), class_ids

Answer 1

列表resized_images中的图像均已正确调整为（1200，800）。但是带注释的蒙版是从数据集中加载的，不会即时调整大小：

annotated_mask = dataset.load_mask(image_id)[0]

方法load_mask生成图像的原始height和width的遮罩图像，而不是调整大小的图像。遮罩的大小和图像的大小需要匹配。

您可以在加载遮罩图像后调整其大小（类似于调整输入图像的大小），以使此方法有效。

另一种选择是进行批处理-将所有图像调整为相同的大小，再次将其保存为.jpg并将其用作新输入，而在此程序中根本不进行任何调整。但是随后，您还必须仔细调整其他数据（例如多边形）以匹配新坐标。

Python图像大小不匹配导致IndexError：布尔索引与维度0上的索引数组不匹配

1 个答案: