Question

我一直试图提取在图像（在这种情况下为视频帧）上绘制的边界框。我正在稳定上面带有边框的无人机视频，然后需要从稳定的帧中提取那些坐标。这是管道： 1.加载当前帧。 2.在框架上绘制边界框。 3.稳定框架。 4.从稳定框架中提取边界框。 5.保存边框的坐标和框。

以下是数据集的链接：https://engineering.purdue.edu/~bouman/UAV_Dataset/

问题是，我似乎无法可靠地找到这些边界框。

我尝试使用稳定产生的变换值，并使用cv2.transform（）和cv2.perspectiveTransform（）将其应用于边界框坐标，但是使用此方法时边界框根本不正确，这是为什么我试图以上述方式做到这一点。

我尝试使用cv2.inRange查找遮罩，但似乎无法找到边界框。我也尝试过手动搜索图像中的像素值以找到边框颜色值，但似乎根本找不到它。我也尝试过cv2.findContours，但是由于这需要灰度图像，因此对我来说是无用的。

我使用vidstab库进行视频稳定。

from vidstab.VidStab import VidStab
from vidstab.vidstab_utils import build_transformation_matrix
from glob import glob
import cv2
import argparse
import os
import numpy as np
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument("-videos", type=str, help="path to videos to stablize or single video")
parser.add_argument("-smooth", type=int, default=30, help="smoothing window size")
parser.add_argument("-border", type=str, default="black", help="border type")
parser.add_argument("-plot", type=bool, default=True, help="True or False for plotting transforms on graph")
options = parser.parse_args()

# get coordinates from raw annotation file
def get_coords(raw_anno):
    coords = raw_anno.split(":")[2].strip().strip(",").split(")")
    coords = [c.strip().replace("(", "").strip(",").strip() for c in coords if c != ""]
    return coords

if os.path.isdir(options.videos):
    video_paths = sorted(glob(options.videos + "*.mov"))
else:
    video_paths = [options.videos]

frame_save_path = "{}_{}_frames/"

for i, video in enumerate(video_paths):
    print("Processing {} videos.".format(len(video_paths)))
    clip_path = video.split(".")[0]
    save_path = frame_save_path.format(clip_path, options.border)
    stabilizer = VidStab(kp_method='SURF')
    stabilizer_save = VidStab(kp_method='SURF')
    vidcap = cv2.VideoCapture(video)
    counter = 0
    os.makedirs(save_path, exist_ok=True)
    os.makedirs(clip_path + "_ground_truth", exist_ok=True)
    print("Processing video {}: {}".format(i+1, video))
    pbar = tqdm(desc="stabilizing {}".format(video), total=vidcap.get(cv2.CAP_PROP_FRAME_COUNT), unit="frame")

    with open(clip_path + "_gt.txt", "r") as f:
        raw_annos = f.readlines()

    del raw_annos[-1]
    transformed_annos = []

    while True:
        grabbed_frame, frame = vidcap.read()
        no_gt_frame = np.copy(frame)
        # draw bounding box on original frame
        if counter < vidcap.get(cv2.CAP_PROP_FRAME_COUNT) - 1:
            current_anno = get_coords(raw_annos[counter])
            for c in current_anno:
                c = [int(int_c) for int_c in c.split(",")]
                c = [c[1], c[0], c[3], c[2]]
                cv2.rectangle(frame, (c[0], c[1]), (c[2], c[3]), (0, 255, 0), 1)

        # stabilize the frame
        stabilized_frame = stabilizer.stabilize_frame(input_frame=frame, border_type=options.border, smoothing_window=options.smooth)
        no_gt_stabilized_frame = stabilizer_save.stabilize_frame(input_frame=no_gt_frame, border_type=options.border, smoothing_window=options.smooth)
        if stabilized_frame is None:
            # There are no more frames available to stabilize
            break

        # save frames:
        if counter > options.smooth:
            cv2.imwrite(clip_path + "_ground_truth/{}.jpg".format(counter - options.smooth), frame)
            cv2.imwrite(save_path + "{}.jpg".format(counter - options.smooth), no_gt_stabilized_frame)

        # extract bounding box once smoothing window is reached
        if counter > options.smooth:
            """
            #gray_stable = cv2.cvtColor(stabilized_frame, cv2.COLOR_BGR2GRAY)
            #_, contours, hierarchy = cv2.findContours(gray_stable, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            bound = np.array([0, 255, 0])
            mask = cv2.inRange(stabilized_frame, bound, bound)
            current_t_anno = []
            for y in range(stabilized_frame.shape[0]):
                for x in range(stabilized_frame.shape[1])
                    #print("Points: {}".format((x, y, x2, y)))
                    if stabilized_frame[y, x][1] == 255 and stabilized_frame[y, x][2] == 0 and stabilized_frame[y, x][0] == 0:
                        # bounding box found
                        #current_t_anno.append((y, x, y2, x2))
            """
            if not current_t_anno:
                transformed_annos.append("None")
            else:
                transformed_annos.append("{}".format(current_t_anno).strip("[").strip("]"))
            pbar.update()

        counter += 1

    # save annos:
    with open(clip_path + "_gt_transformed.txt", "w") as f:
        for line in transformed_annos:
            f.write(line + "\n")

    pbar.close()

在图像稳定后，我需要能够提取绘制在图像上的所有边界框。我花了一个星期的时间尝试将转换值直接应用于边界框坐标，但失败了，所以这是我需要采取的选择。任何帮助表示赞赏！

如何使用OpenCV从图像中提取边界框坐标？

0 个答案: