Question

我正在训练一个yolo网络来检测足球，以了解更多信息并更好地理解这种体系结构。在大多数情况下，我都能检测到球。我的问题是生成的绑定框太大，覆盖的像素比实际球要多。

我知道参数锚框对绑定框的影响。我已经尝试了一些配置值，还尝试了代码来生成锚点。由代码生成的锚点太小，通常我们甚至看不到方框，只有标题。我还认为这不是处理一类问题的最佳方法，因为我们使用k均值来生成锚点。当我尝试更改值时，覆盖区域完全错误，但是盒子确实很小，覆盖的范围小于原始锚点。

有我的代码生成锚点。我删除了一些方法来使代码更干净。


    def __init__(self, dir_images, dir_labels, width = 416, height = 416, anchors = 9):
        # dir_images: Directory that contains all images that will be used or
        # already has used to training the neural network
        # Example: /home/enacom/Desktop/darknet_football_resized_images/

        #dir_labels: Directory that contains all labels that will be used or]
        # already has used to training the neural network
        # Example: /home/enacom/Desktop/BBoxLabelTool/Images/labels6/

        # anchors: the number of anchor that you desire
        # default value is 9 on yoloV3, but you can add more if want detect more objects
        self.dir_images = dir_images
        self.dir_labels = dir_labels
        self.grid_w = width/32
        self.grid_h = height/32
        self.anchors = anchors
        self.annotation_dims = []

        file_list = os.listdir(self.dir_images)
        boxes = self.__read_labels()

        for file in file_list:
            actual_dir = self.dir_images + file
            try:
                image = cv2.imread(actual_dir)
                width, height = image.shape[:2]
                cell_w = width / self.grid_w
                cell_h = height / self.grid_h
                box = boxes[file.replace("jpg", "txt")]
                relative_w = (float(box[0][2]) - float(box[0][0])) # / cell_w
                relatice_h = (float(box[0][3]) - float(box[0][1])) # / cell_h
                self.annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))

            except AttributeError:
                print("Failure when reading image")

        self.annotation_dims = np.array(self.annotation_dims)
        print(self.annotation_dims / 32)

    def IOU(self, ann, centroids):
        w, h = ann
        similarities = []

        for centroid in centroids:
            c_w, c_h = centroid

            if c_w >= w and c_h >= h:
                similarity = w * h / (c_w * c_h)
            elif c_w >= w and c_h <= h:
                similarity = w * c_h / (w * h + (c_w - w) * c_h)
            elif c_w <= w and c_h >= h:
                similarity = c_w * h / (w * h + c_w * (c_h - h))
            else:  # means both w,h are bigger than c_w and c_h respectively
                similarity = (c_w * c_h) / (w * h)
            similarities.append(similarity)  # will become (k,) shape

        return np.array(similarities)

    def run_kmeans(self):
        ann_num = self.annotation_dims.shape[0]
        iterations = 0
        prev_assignments = np.ones(ann_num) * (-1)
        iteration = 0
        old_distances = np.zeros((ann_num, self.anchors))

        indices = [random.randrange(self.annotation_dims.shape[0]) for i in range(self.anchors)]
        centroids = self.annotation_dims[indices]
        anchor_dim = self.annotation_dims.shape[1]

        while True:
            distances = []
            iteration += 1
            for i in range(ann_num):
                d = 1 - self.IOU(self.annotation_dims[i], centroids)
                distances.append(d)
            distances = np.array(distances)  # distances.shape = (ann_num, anchor_num)

            print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances - distances))))

            # assign samples to centroids
            assignments = np.argmin(distances, axis=1)

            if (assignments == prev_assignments).all():
                return centroids

            # calculate new centroids
            centroid_sums = np.zeros((self.anchors, anchor_dim), np.float)
            for i in range(ann_num):
                centroid_sums[assignments[i]] += self.annotation_dims[i]
            for j in range(self.anchors):
                centroids[j] = centroid_sums[j] / (np.sum(assignments == j) + 1e-6)

            prev_assignments = assignments.copy()

            old_distances = distances.copy()


a = AnchorGenerator("/home/myDir/Desktop/BBoxLabelTool/Images/5_images_clean/",
                    "/home/myDir/Desktop/BBoxLabelTool/Labels/005/",
                    anchors = 9)

centroids = a.run_kmeans()
a.print_anchors(centroids)

使用原始锚点的结果：https://imgur.com/a/5OhiTfl 结果与创建我的锚点：https://imgur.com/a/YCW2aam 在手工锚创建的情况下，这几乎是正确的，但我不知道如何更精确地进行。这是我用来生成此结果的锚点。

anchors = 3,1, 7,4 5,5, 12,12, 15,15, 16,16, 18,17

YoloV3装订框太大

0 个答案: