我想从Tensorflow对象检测api获取标签并将它们放入数组而不是在视频中显示它们
这是detect_object函数
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
return image_np
答案 0 :(得分:2)
经过研究,这是我想出的
final_score = np.squeeze(scores)
count = 0
for i in range(100):
if scores is None or final_score[i] > 0.5:
count = count + 1
print('cpunt',count)
printcount =0;
for i in classes[0]:
printcount = printcount +1
print(category_index[i]['name'])
if(printcount == count):
break
这将打印所有检测到的对象,如果要返回它,可以将其添加到某个变量中并返回。
如果只想打印检测到的对象,请在util文件夹内的visualization_utils.py文件中添加print(class_name)
if not agnostic_mode:
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
**print(class_name)** --> this line
else:
答案 1 :(得分:0)
classes=output_dict['detection_classes']
boxes =output_dict['detection_boxes']
scores = output_dict['detection_scores']
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh :
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
print(class_name)
当我给max_boxes_to_draw = 20 min_score_thresh = 0.5(这些是默认值)时,它起作用了。
您可以在visualization_utils.py文件中找到这段代码。
答案 2 :(得分:0)
对于按照上述 2 个答案未获得确切标签的任何人
好吧,我尝试了上述两个答案中的每一个,但没有给出确切的标签。所以我所做的是从 https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
复制并粘贴整个代码将所有代码复制到我的 jupyter notebook 后,我更改了原始代码的某些部分。它是将检测到的标签与 image
函数内的 visualize_boxes_and_labels_on_image_array()
数组一起返回
因此,visualize_boxes_and_labels_on_image_array()
函数的部分代码应如下所示:
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index,
instance_masks=None,
instance_boundaries=None,
keypoints=None,
keypoint_scores=None,
keypoint_edges=None,
track_ids=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4,
mask_alpha=.4,
groundtruth_box_visualization_color='black',
skip_boxes=False,
skip_scores=False,
skip_labels=False,
skip_track_ids=False):
"""Overlay labeled boxes on an image with formatted scores and label names.
This function groups boxes that correspond to the same location
and creates a display string for each detection and overlays these
on the image. Note that this function modifies the image in place, and returns
that same image.
Args:
image: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
be None.
keypoint_scores: a numpy array of shape [N, num_keypoints], can be None.
keypoint_edges: A list of tuples with keypoint indices that specify which
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
edges from keypoint 0 to 1 and from keypoint 2 to 4.
track_ids: a numpy array of shape [N] with unique track ids. If provided,
color-coding of boxes will be determined by these ids, and not the class
indices.
use_normalized_coordinates: whether boxes is to be interpreted as
normalized coordinates or not.
max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
all boxes.
min_score_thresh: minimum score threshold for a box or keypoint to be
visualized.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not. This mode will display scores but ignore
classes.
line_thickness: integer (default: 4) controlling line width of the boxes.
mask_alpha: transparency value between 0 and 1 (default: 0.4).
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
skip_boxes: whether to skip the drawing of bounding boxes.
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
skip_track_ids: whether to skip track id when drawing a single detection
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
"""
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map = collections.defaultdict(list)
box_to_color_map = collections.defaultdict(str)
box_to_instance_masks_map = {}
box_to_instance_boundaries_map = {}
box_to_keypoints_map = collections.defaultdict(list)
box_to_keypoint_scores_map = collections.defaultdict(list)
box_to_track_ids_map = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(boxes.shape[0]):
if max_boxes_to_draw == len(box_to_color_map):
break
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if instance_masks is not None:
box_to_instance_masks_map[box] = instance_masks[i]
if instance_boundaries is not None:
box_to_instance_boundaries_map[box] = instance_boundaries[i]
if keypoints is not None:
box_to_keypoints_map[box].extend(keypoints[i])
if keypoint_scores is not None:
box_to_keypoint_scores_map[box].extend(keypoint_scores[i])
if track_ids is not None:
box_to_track_ids_map[box] = track_ids[i]
if scores is None:
box_to_color_map[box] = groundtruth_box_visualization_color
else:
display_str = ''
if not skip_labels:
if not agnostic_mode:
if classes[i] in six.viewkeys(category_index):
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
final_label = display_str
if not skip_scores:
if not display_str:
display_str = '{}%'.format(round(100*scores[i]))
final_label = display_str
else:
display_str = '{}: {}%'.format(display_str, round(100*scores[i]))
if not skip_track_ids and track_ids is not None:
if not display_str:
display_str = 'ID {}'.format(track_ids[i])
final_label = track_ids[i]
else:
display_str = '{}: ID {}'.format(display_str, track_ids[i])
box_to_display_str_map[box].append(display_str)
if agnostic_mode:
box_to_color_map[box] = 'DarkOrange'
elif track_ids is not None:
prime_multipler = _get_multiplier_for_color_randomness()
box_to_color_map[box] = STANDARD_COLORS[
(prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
else:
box_to_color_map[box] = STANDARD_COLORS[
classes[i] % len(STANDARD_COLORS)]
# Draw all boxes onto image.
for box, color in box_to_color_map.items():
ymin, xmin, ymax, xmax = box
if instance_masks is not None:
draw_mask_on_image_array(
image,
box_to_instance_masks_map[box],
color=color,
alpha=mask_alpha
)
if instance_boundaries is not None:
draw_mask_on_image_array(
image,
box_to_instance_boundaries_map[box],
color='red',
alpha=1.0
)
draw_bounding_box_on_image_array(
image,
ymin,
xmin,
ymax,
xmax,
color=color,
thickness=0 if skip_boxes else line_thickness,
display_str_list=box_to_display_str_map[box],
use_normalized_coordinates=use_normalized_coordinates)
if keypoints is not None:
keypoint_scores_for_box = None
if box_to_keypoint_scores_map:
keypoint_scores_for_box = box_to_keypoint_scores_map[box]
draw_keypoints_on_image_array(
image,
box_to_keypoints_map[box],
keypoint_scores_for_box,
min_score_thresh=min_score_thresh,
color=color,
radius=line_thickness / 2,
use_normalized_coordinates=use_normalized_coordinates,
keypoint_edges=keypoint_edges,
keypoint_edge_color=color,
keypoint_edge_width=line_thickness // 2)
return final_label, image
在这里,我刚刚创建了一个名为 final_label
的变量,它是检测到的边界框的类名。
在调用这个函数时,你必须使用这个代码:
label, _ = visualize_boxes_and_labels_on_image_array(
image_np_with_detections, #change this according to your image array name
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=8,
min_score_thresh=.50,
agnostic_mode=False)
print(label)
它肯定会给出检测到的确切标签:)