我正在使用预先训练的 TensorFlow 模型在具有GPU的Google Colab 中进行对象检测。但是,推理时间明显高于Tensorflow模型动物园中报告的速度。
任何人都可以告诉我这样做的原因,以及如何使其运行得更快吗?
此外,我正在尝试处理非常大的图像。因此,我正在使用滑动窗口来平铺图像并在每个平铺上进行检测。有更好的方法吗?
非常感谢!
import numpy as np
import cv2
import time
image_path = "/content/drive/My Drive/datasets/20190206142141.jpg"
img = np.array(Image.open(image_path))
img = cv2.resize(img, (3000,2000))
(winW, winH) = (500,500)
i = 0
imgs=[]
dics=[]
for (x, y, window) in sliding_window(img, stepSize=500, windowSize=(winW, winH)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
print(x, x+winW, y, y+winH)
crop_img = img[y:y+winH, x:x+winW]
start_time = time.time()
img_i, dic_i = detect(detection_model, crop_img)
print("--- %s seconds ---" % (time.time() - start_time))
imgs.append(img_i)
dics.append(dic_i)
display(Image.fromarray(imgs[i]))
i = i+1
half1 = cv2.hconcat([imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5]])
half2 = cv2.hconcat([imgs[6], imgs[7], imgs[8], imgs[9], imgs[10], imgs[11]])
half3 = cv2.hconcat([imgs[12], imgs[13], imgs[14], imgs[15], imgs[16], imgs[17]])
half4 = cv2.hconcat([imgs[18], imgs[19], imgs[20], imgs[21], imgs[22], imgs[23]])
full_img = cv2.vconcat([half1, half2, half3, half4])
full_img = cv2.resize(full_img, (1500,1000))
display(Image.fromarray(full_img))
# supporting functions
def sliding_window(image, stepSize, windowSize):
# slide a window across the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
# yield the current window
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def detect(model, image_np):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
# Actual detection.
start_time = time.time()
output_dict = run_inference_for_single_image(model, image_np)
print("--- %s seconds (detection) ---" % (time.time() - start_time))
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=2)
print(output_dict['detection_boxes'])
# display(Image.fromarray(image_np))
return image_np, output_dict