我的目标是将网络摄像头与 Google Colab 上的对象检测模型集成。 我有 JavaScript 代码来访问网络摄像头和 python 代码以与 Google Colab 上的模型集成。相机正在打开,几秒钟后会话崩溃。如何成功地将模型与网络摄像头集成?下面是代码片段:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
while True:
js_reply = video_frame(label_html, bbox)
if not js_reply:
break
# convert JS response to OpenCV Image
frame = js_to_image(js_reply["img"])
h, w = frame.shape[:2]
# create transparent overlay for bounding box
#bbox_array = np.zeros([480,640,4], dtype=np.uint8)
# call our darknet helper on video frame
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
network.setInput(blob)
output_from_network = network.forward(layers_names_output)
bounding_boxes = []
confidences = []
class_numbers = []
# Going through all output layers after feed forward pass
for result in output_from_network:
# Going through all detections from current output layer
for detected_objects in result:
# Getting 80 classes' probabilities for current detected object
scores = detected_objects[5:]
# Getting index of the class with the maximum value of probability
class_current = np.argmax(scores)
# Getting value of probability for defined class
confidence_current = scores[class_current]
# # Check point
# # Every 'detected_objects' numpy array has first 4 numbers with
# # bounding box coordinates and rest 80 with probabilities
# # for every class
# print(detected_objects.shape) # (85,)
# Eliminating weak predictions with minimum probability
if confidence_current > probability_minimum:
# Scaling bounding box coordinates to the initial frame size
# YOLO data format keeps coordinates for center of bounding box
# and its current width and height
# That is why we can just multiply them elementwise
# to the width and height
# of the original frame and in this way get coordinates for center
# of bounding box, its width and height for original frame
box_current = detected_objects[0:4] * np.array([w, h, w, h])
# Now, from YOLO data format, we can get top left corner coordinates
# that are x_min and y_min
x_center, y_center, box_width, box_height = box_current
x_min = int(x_center - (box_width / 2))
y_min = int(y_center - (box_height / 2))
# Adding results into prepared lists
bounding_boxes.append([x_min, y_min,
int(box_width), int(box_height)])
confidences.append(float(confidence_current))
class_numbers.append(class_current)
results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, probability_minimum, threshold)
if len(results) > 0:
# Going through indexes of results
for i in results.flatten():
# Getting current bounding box coordinates,
# its width and height
x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
# Preparing colour for current bounding box
# and converting from numpy array to list
colour_box_current = colours[class_numbers[i]].tolist()
# # # Check point
# print(type(colour_box_current)) # <class 'list'>
# print(colour_box_current) # [172 , 10, 127]
# Drawing bounding box on the original current frame
cv2.rectangle(frame, (x_min, y_min),
(x_min + box_width, y_min + box_height),
colour_box_current, 2)
# Preparing text with label and confidence for current bounding box
text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
confidences[i])
# Putting text with label and confidence on the original image
cv2.putText(frame, text_box_current, (x_min, y_min - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)
cv2.namedWindow('YOLO v3 Real Time Detections', cv2.WINDOW_NORMAL)
# Pay attention! 'cv2.imshow' takes images in BGR format
cv2.imshow('YOLO v3 Real Time Detections', frame)
# Breaking the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break