我只需要检测该人并忽略其余物体
如何过滤类
我尝试了更多时间,但不幸的是失败了
我从这里enter link description here
下载了mscoco_label_map.pbtxt。和
从这里enter link description here到frozen_inference_graph.pb
这是我的代码:
#!/usr/bin/env python
import os
import cv2
import logging
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from abc import ABCMeta, abstractmethod
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from imutils.video import WebcamVideoStream
class ObjectDetector():
"""
Base class for object detectors used by the package.
"""
__metaclass__ = ABCMeta
def __init__(self):
# create logger
self._logger = logging.getLogger('dodo_detector')
self._logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
self._fh = logging.FileHandler('/tmp/dodo_detector.log')
self._fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
self._ch = logging.StreamHandler()
self._ch.setLevel(logging.DEBUG)
# create formatter and add it to the handlers
self._formatter = logging.Formatter('[%(asctime)s - %(name)s]: %(levelname)s: %(message)s')
self._fh.setFormatter(self._formatter)
self._ch.setFormatter(self._formatter)
# add the handlers to the logger
self._logger.addHandler(self._fh)
self._logger.addHandler(self._ch)
@abstractmethod
def from_image(self, frame):
"""
Detects objects in an image
:param frame: a numpy.ndarray containing the image where objects will be detected
:return: a tuple containing the image, with objects marked by rectangles,
and a dictionary listing objects and their locations as `(ymin, xmin, ymax, xmax)`
"""
pass
def _detect_from_stream(self, get_frame, stream):
"""
This internal method detects objects from images retrieved from a stream, given a method that extracts frames from this stream
:param get_frame: a method that extracts frames from the stream
:param stream: an object representing a stream of images
"""
ret, frame = get_frame(stream)
while ret:
marked_frame, objects = self.from_image(frame)
print ((objects))
cv2.imshow("image", marked_frame)
if cv2.waitKey(1) == 27:
break # ESC to quit
ret, frame = get_frame(stream)
cv2.destroyAllWindows()
def from_camera(self, camera_id=0):
"""
Detects objects in frames from a camera feed
:param camera_id: the ID of the camera in the system
"""
def get_frame(stream):
frame = stream.read()
ret = True
return ret, frame
stream = WebcamVideoStream(src=camera_id)
stream.start()
self._detect_from_stream(get_frame, stream)
stream.stop()
def from_video(self, filepath):
"""
Detects objects in frames from a video file
:param filepath: the path to the video file
"""
def get_frame(stream):
ret, frame = stream.read()
return ret, frame
stream = cv2.VideoCapture()
stream.open(filename=filepath)
self._detect_from_stream(get_frame, stream)
class SingleShotDetector(ObjectDetector):
"""
Object detector powered by the TensorFlow Object Detection API.
:param path_to_frozen_graph: path to the frozen inference graph file, a file with a `.pb` extension.
:param path_to_labels: path to the label map, a text file with the `.pbtxt` extension.
:param num_classes: number of object classes that will be detected. If None, it will be guessed by the contents of the label map.
:param confidence: a value between 0 and 1 representing the confidence level the network has in the detection to consider it an actual detection.
"""
def __init__(self, path_to_frozen_graph, path_to_labels, num_classes=None, confidence=.8):
super(ObjectDetector, self).__init__()
if not 0 < confidence <= 1:
raise ValueError("confidence must be between 0 and 1")
# load (frozen) tensorflow model into memory
# path_to_frozen_graph= '/frozen_inference_graph.pb'
self._detection_graph = tf.Graph()
with self._detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Label maps map indices to category names, so that when our convolution
# network predicts 5, we know that this corresponds to airplane.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
# path_to_labels= 'mscoco_label_map.pbtxt'
label_map = label_map_util.load_labelmap(path_to_labels)
# this is a workaround to guess the number of classes by the contents of the label map
# it may not be perfect
if num_classes is None:
label_map_contents = open(path_to_labels, 'r').read()
num_classes = label_map_contents.count('name:')
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes, use_display_name=True)
self._category_index = label_map_util.create_category_index(categories)
print self._category_index
print ('categories', categories)
self._categories = {}
# print self._categories
#
self._categories_public = []
for tmp in categories:
self._categories[int(tmp['id'])] = tmp['name']
self._categories_public.append(tmp['name'])
self._confidence = confidence
# create a session that will be used until our detector is set on fire by the gc
self._session = tf.Session(graph=self._detection_graph)
@property
def confidence(self):
return self._confidence
@property
def categories(self):
return self._categories_public
@confidence.setter
def confidence(self, value):
self._confidence = value
def from_image(self, frame):
# object recognition begins here
height, width, z = frame.shape
image_np_expanded = np.expand_dims(frame, axis=0)
image_tensor = self._detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = self._detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = self._detection_graph.get_tensor_by_name('detection_scores:0')
classes = self._detection_graph.get_tensor_by_name('detection_classes:0')
# print classes
num_detections = self._detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection
boxes, scores, classes, num_detections = self._session.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded})
# count how many scores are above the designated threshold
worthy_detections = sum(score >= self._confidence for score in scores[0])
# self._logger.debug('Found ' + str(worthy_detections) + ' objects')
detected_objects = {}
# analyze all worthy detections
for x in range(worthy_detections):
# capture the class of the detected object
class_name = self._categories[int(classes[0][x])]
# get the detection box around the object
box_objects = boxes[0][x]
# positions of the box are between 0 and 1, relative to the size of the image
# we multiply them by the size of the image to get the box location in pixels
ymin = int(box_objects[0] * height)
xmin = int(box_objects[1] * width)
ymax = int(box_objects[2] * height)
xmax = int(box_objects[3] * width)
# print ymin
if class_name not in detected_objects:
detected_objects[class_name] = []
# print detected_objects
detected_objects[class_name].append((ymin, xmin, ymax, xmax))
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
self._category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=self._confidence
)
return frame, detected_objects
def main():
# SingleShotDetector(self.path_to_frozen_graph, self.path_to_labels)
SingleShotDetector('frozen_inference_graph.pb', 'mscoco_label_map.pbtxt').from_camera(0)
if __name__ == '__main__':
main()
请帮助我或任何建议
先谢谢您