我一直试图提取在图像(在这种情况下为视频帧)上绘制的边界框。我正在稳定上面带有边框的无人机视频,然后需要从稳定的帧中提取那些坐标。这是管道: 1.加载当前帧。 2.在框架上绘制边界框。 3.稳定框架。 4.从稳定框架中提取边界框。 5.保存边框的坐标和框。
以下是数据集的链接:https://engineering.purdue.edu/~bouman/UAV_Dataset/
问题是,我似乎无法可靠地找到这些边界框。
我尝试使用稳定产生的变换值,并使用cv2.transform()和cv2.perspectiveTransform()将其应用于边界框坐标,但是使用此方法时边界框根本不正确,这是为什么我试图以上述方式做到这一点。
我尝试使用cv2.inRange查找遮罩,但似乎无法找到边界框。我也尝试过手动搜索图像中的像素值以找到边框颜色值,但似乎根本找不到它。我也尝试过cv2.findContours,但是由于这需要灰度图像,因此对我来说是无用的。
我使用vidstab库进行视频稳定。
from vidstab.VidStab import VidStab
from vidstab.vidstab_utils import build_transformation_matrix
from glob import glob
import cv2
import argparse
import os
import numpy as np
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument("-videos", type=str, help="path to videos to stablize or single video")
parser.add_argument("-smooth", type=int, default=30, help="smoothing window size")
parser.add_argument("-border", type=str, default="black", help="border type")
parser.add_argument("-plot", type=bool, default=True, help="True or False for plotting transforms on graph")
options = parser.parse_args()
# get coordinates from raw annotation file
def get_coords(raw_anno):
coords = raw_anno.split(":")[2].strip().strip(",").split(")")
coords = [c.strip().replace("(", "").strip(",").strip() for c in coords if c != ""]
return coords
if os.path.isdir(options.videos):
video_paths = sorted(glob(options.videos + "*.mov"))
else:
video_paths = [options.videos]
frame_save_path = "{}_{}_frames/"
for i, video in enumerate(video_paths):
print("Processing {} videos.".format(len(video_paths)))
clip_path = video.split(".")[0]
save_path = frame_save_path.format(clip_path, options.border)
stabilizer = VidStab(kp_method='SURF')
stabilizer_save = VidStab(kp_method='SURF')
vidcap = cv2.VideoCapture(video)
counter = 0
os.makedirs(save_path, exist_ok=True)
os.makedirs(clip_path + "_ground_truth", exist_ok=True)
print("Processing video {}: {}".format(i+1, video))
pbar = tqdm(desc="stabilizing {}".format(video), total=vidcap.get(cv2.CAP_PROP_FRAME_COUNT), unit="frame")
with open(clip_path + "_gt.txt", "r") as f:
raw_annos = f.readlines()
del raw_annos[-1]
transformed_annos = []
while True:
grabbed_frame, frame = vidcap.read()
no_gt_frame = np.copy(frame)
# draw bounding box on original frame
if counter < vidcap.get(cv2.CAP_PROP_FRAME_COUNT) - 1:
current_anno = get_coords(raw_annos[counter])
for c in current_anno:
c = [int(int_c) for int_c in c.split(",")]
c = [c[1], c[0], c[3], c[2]]
cv2.rectangle(frame, (c[0], c[1]), (c[2], c[3]), (0, 255, 0), 1)
# stabilize the frame
stabilized_frame = stabilizer.stabilize_frame(input_frame=frame, border_type=options.border, smoothing_window=options.smooth)
no_gt_stabilized_frame = stabilizer_save.stabilize_frame(input_frame=no_gt_frame, border_type=options.border, smoothing_window=options.smooth)
if stabilized_frame is None:
# There are no more frames available to stabilize
break
# save frames:
if counter > options.smooth:
cv2.imwrite(clip_path + "_ground_truth/{}.jpg".format(counter - options.smooth), frame)
cv2.imwrite(save_path + "{}.jpg".format(counter - options.smooth), no_gt_stabilized_frame)
# extract bounding box once smoothing window is reached
if counter > options.smooth:
"""
#gray_stable = cv2.cvtColor(stabilized_frame, cv2.COLOR_BGR2GRAY)
#_, contours, hierarchy = cv2.findContours(gray_stable, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
bound = np.array([0, 255, 0])
mask = cv2.inRange(stabilized_frame, bound, bound)
current_t_anno = []
for y in range(stabilized_frame.shape[0]):
for x in range(stabilized_frame.shape[1])
#print("Points: {}".format((x, y, x2, y)))
if stabilized_frame[y, x][1] == 255 and stabilized_frame[y, x][2] == 0 and stabilized_frame[y, x][0] == 0:
# bounding box found
#current_t_anno.append((y, x, y2, x2))
"""
if not current_t_anno:
transformed_annos.append("None")
else:
transformed_annos.append("{}".format(current_t_anno).strip("[").strip("]"))
pbar.update()
counter += 1
# save annos:
with open(clip_path + "_gt_transformed.txt", "w") as f:
for line in transformed_annos:
f.write(line + "\n")
pbar.close()
在图像稳定后,我需要能够提取绘制在图像上的所有边界框。我花了一个星期的时间尝试将转换值直接应用于边界框坐标,但失败了,所以这是我需要采取的选择。任何帮助表示赞赏!