我正在训练一个yolo网络来检测足球,以了解更多信息并更好地理解这种体系结构。在大多数情况下,我都能检测到球。我的问题是生成的绑定框太大,覆盖的像素比实际球要多。
我知道参数锚框对绑定框的影响。我已经尝试了一些配置值,还尝试了代码来生成锚点。由代码生成的锚点太小,通常我们甚至看不到方框,只有标题。我还认为这不是处理一类问题的最佳方法,因为我们使用k均值来生成锚点。当我尝试更改值时,覆盖区域完全错误,但是盒子确实很小,覆盖的范围小于原始锚点。
有我的代码生成锚点。我删除了一些方法来使代码更干净。
def __init__(self, dir_images, dir_labels, width = 416, height = 416, anchors = 9):
# dir_images: Directory that contains all images that will be used or
# already has used to training the neural network
# Example: /home/enacom/Desktop/darknet_football_resized_images/
#dir_labels: Directory that contains all labels that will be used or]
# already has used to training the neural network
# Example: /home/enacom/Desktop/BBoxLabelTool/Images/labels6/
# anchors: the number of anchor that you desire
# default value is 9 on yoloV3, but you can add more if want detect more objects
self.dir_images = dir_images
self.dir_labels = dir_labels
self.grid_w = width/32
self.grid_h = height/32
self.anchors = anchors
self.annotation_dims = []
file_list = os.listdir(self.dir_images)
boxes = self.__read_labels()
for file in file_list:
actual_dir = self.dir_images + file
try:
image = cv2.imread(actual_dir)
width, height = image.shape[:2]
cell_w = width / self.grid_w
cell_h = height / self.grid_h
box = boxes[file.replace("jpg", "txt")]
relative_w = (float(box[0][2]) - float(box[0][0])) # / cell_w
relatice_h = (float(box[0][3]) - float(box[0][1])) # / cell_h
self.annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
except AttributeError:
print("Failure when reading image")
self.annotation_dims = np.array(self.annotation_dims)
print(self.annotation_dims / 32)
def IOU(self, ann, centroids):
w, h = ann
similarities = []
for centroid in centroids:
c_w, c_h = centroid
if c_w >= w and c_h >= h:
similarity = w * h / (c_w * c_h)
elif c_w >= w and c_h <= h:
similarity = w * c_h / (w * h + (c_w - w) * c_h)
elif c_w <= w and c_h >= h:
similarity = c_w * h / (w * h + c_w * (c_h - h))
else: # means both w,h are bigger than c_w and c_h respectively
similarity = (c_w * c_h) / (w * h)
similarities.append(similarity) # will become (k,) shape
return np.array(similarities)
def run_kmeans(self):
ann_num = self.annotation_dims.shape[0]
iterations = 0
prev_assignments = np.ones(ann_num) * (-1)
iteration = 0
old_distances = np.zeros((ann_num, self.anchors))
indices = [random.randrange(self.annotation_dims.shape[0]) for i in range(self.anchors)]
centroids = self.annotation_dims[indices]
anchor_dim = self.annotation_dims.shape[1]
while True:
distances = []
iteration += 1
for i in range(ann_num):
d = 1 - self.IOU(self.annotation_dims[i], centroids)
distances.append(d)
distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances - distances))))
# assign samples to centroids
assignments = np.argmin(distances, axis=1)
if (assignments == prev_assignments).all():
return centroids
# calculate new centroids
centroid_sums = np.zeros((self.anchors, anchor_dim), np.float)
for i in range(ann_num):
centroid_sums[assignments[i]] += self.annotation_dims[i]
for j in range(self.anchors):
centroids[j] = centroid_sums[j] / (np.sum(assignments == j) + 1e-6)
prev_assignments = assignments.copy()
old_distances = distances.copy()
a = AnchorGenerator("/home/myDir/Desktop/BBoxLabelTool/Images/5_images_clean/",
"/home/myDir/Desktop/BBoxLabelTool/Labels/005/",
anchors = 9)
centroids = a.run_kmeans()
a.print_anchors(centroids)
使用原始锚点的结果:https://imgur.com/a/5OhiTfl 结果与创建我的锚点:https://imgur.com/a/YCW2aam 在手工锚创建的情况下,这几乎是正确的,但我不知道如何更精确地进行。这是我用来生成此结果的锚点。
anchors = 3,1, 7,4 5,5, 12,12, 15,15, 16,16, 18,17