Question

我需要检测图像上的手。我使用Darkflow：https://github.com/thtrieu/darkflow.git

我在此处使用权重的tiny-yolo配置：https://github.com/digitalbrain79/pyyolo/raw/master/tiny-yolo.weights

我将tiny-yolo.cfg复制到tempCfg.cfg，并更改了自述文件中的参数。

我使用以下数据集：http://www.robots.ox.ac.uk/~vgg/data/hands/

这是我用于转换注释文件的脚本：

import os
from lxml import etree
import xml.etree.cElementTree as ET
import cv2
import scipy.io
import numpy as np

for wrongAnnotation in os.scandir('/content/hand_dataset/training_dataset/training_data/annotations'):
  if(wrongAnnotation.path.endswith('.mat') == False): continue
  path = wrongAnnotation.path
  path = path.replace('.mat', '.xml')
  path = path.replace('annotations', 'annotationsXML')
  imgPath = path.replace ('annotationsXML', 'images')
  imgPath = imgPath.replace ('.xml', '.jpg')
  #print(wrongAnnotation.path)

  image = cv2.imread(imgPath)
  height, width, depth = image.shape

  annotation = ET.Element('annotation')
  ET.SubElement(annotation, 'folder').text = '/content/hand_dataset/training_dataset/training_data/annotationsXML'
  ET.SubElement(annotation, 'filename').text = os.path.basename(imgPath)
  ET.SubElement(annotation, 'segmented').text = '0'
  size = ET.SubElement(annotation, 'size')
  ET.SubElement(size, 'width').text = str(width)
  ET.SubElement(size, 'height').text = str(height)
  ET.SubElement(size, 'depth').text = str(depth)

  data = scipy.io.loadmat(wrongAnnotation.path)
  for x in data.get('boxes')[0]:
    xTemp = x[0][0]
    bottom = np.min([y[0][0] for y in xTemp.item()[:4]])
    left = np.min([y[0][1] for y in xTemp.item()[:4]])
    right = np.max([y[0][1] for y in xTemp.item()[:4]])
    top = np.max([y[0][0] for y in xTemp.item()[:4]])

    ob = ET.SubElement(annotation, 'object')
    ET.SubElement(ob, 'name').text = 'hand'
    ET.SubElement(ob, 'pose').text = 'Unspecified'
    ET.SubElement(ob, 'truncated').text = '0'
    ET.SubElement(ob, 'difficult').text = '0'
    bbox = ET.SubElement(ob, 'bndbox')
    ET.SubElement(bbox, 'xmin').text = str(left)
    ET.SubElement(bbox, 'ymin').text = str(bottom)
    ET.SubElement(bbox, 'xmax').text = str(right)
    ET.SubElement(bbox, 'ymax').text = str(top)

  xml_str = ET.tostring(annotation)
  root = etree.fromstring(xml_str)
  xml_str = etree.tostring(root, pretty_print = True)

  with open(path, 'wb') as newXML:
    newXML.write(xml_str)

它可以从如下形式转换注释：

{'__header__': b'MATLAB 5.0 MAT-file, Platform: MACI, Created on: Mon Jul 18 17:53:23 2011', '__version__': '1.0', '__globals__': [], 'boxes': array([[array([[(array([[215.63338254, 412.85084076]]), array([[223.47936255, 421.93052125]]), array([[250.44014687, 398.63303021]]), array([[242.59416686, 389.55334972]]), array(['L'], dtype='<U1'), array([], shape=(0, 0), dtype=uint8))]],
      dtype=[('a', 'O'), ('b', 'O'), ('c', 'O'), ('d', 'O'), ('handtype', 'O'), ('truncated', 'O')]),
        array([[(array([[194.93452622, 273.71437979]]), array([[178.00174829, 275.22468831]]), array([[179.83998359, 295.8340073 ]]), array([[196.77276151, 294.32369879]]), array(['R'], dtype='<U1'), array([], shape=(0, 0), dtype=uint8))]],
      dtype=[('a', 'O'), ('b', 'O'), ('c', 'O'), ('d', 'O'), ('handtype', 'O'), ('truncated', 'O')]),
        array([[(array([[174.40487672, 310.17948749]]), array([[183.82551544, 317.61262721]]), array([[193.01669191, 305.96388169]]), array([[183.59605319, 298.53074197]]), array(['L'], dtype='<U1'), array([], shape=(0, 0), dtype=uint8))]],
      dtype=[('a', 'O'), ('b', 'O'), ('c', 'O'), ('d', 'O'), ('handtype', 'O'), ('truncated', 'O')])]],
      dtype=object)}

对此：

<annotation>
  <folder>/content/hand_dataset/training_dataset/training_data/annotationsXML</folder>
  <filename>VOC2007_100.jpg</filename>
  <segmented>0</segmented>
  <size>
    <width>500</width>
    <height>375</height>
    <depth>3</depth>
  </size>
  <object>
    <name>hand</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>389.5533497217231</xmin>
      <ymin>215.6333825448044</ymin>
      <xmax>421.9305212460187</xmax>
      <ymax>250.44014686696025</ymax>
    </bndbox>
  </object>
  <object>
    <name>hand</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>273.7143797917917</xmin>
      <ymin>178.00174829348282</ymin>
      <xmax>295.8340073049824</xmax>
      <ymax>196.77276151043867</ymax>
    </bndbox>
  </object>
  <object>
    <name>hand</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>298.53074196966594</xmin>
      <ymin>174.40487671681234</ymin>
      <xmax>317.6126272059612</xmax>
      <ymax>193.01669191063854</ymax>
    </bndbox>
  </object>
</annotation>

之后，我使用此命令开始训练：

flow --model cfg/tempCfg.cfg --load bin/tiny-yolo.weights --train --annotation /content/hand_dataset/training_dataset/training_data/annotationsXML --dataset /content/hand_dataset/training_dataset/training_data/images --gpu 1.0 --epoch 30 --batch 10 --verbalise false

然后我尝试做：

flow --imgdir /content/hand_dataset/test_dataset/test_data/demoImages --model cfg/tempCfg.cfg --load bin/tiny-yolo.weights --gpu 1.0

结果是完全随机的。它检测到很多空白区域，并且不检测到手。这是因为我只使用了30个纪元，还是做错了什么？培训需要花费几个小时来进行gpu训练，因此，我认为在此之后它应该会或多或少地产生视觉效果，但只会带来混乱。

Darkflow错误结果

0 个答案: