我正在尝试重写一个脚本,使其能够在更高维度的张量(数据批次)上运行。
此功能有效,尽管它不支持批处理中心/比例尺:
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale * 200.0
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
此函数的新版本提供与上述函数类似的输出(例如,如果我有一批两个bbox,则此函数返回的输出与我在每个bbox上分别运行上述函数的输出相同):>
def get_affine_transforms(
num_boxes,
center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0
):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale * 200.0
src_w = scale_tmp[:, 0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir, dst_dir = [], []
for i in range(num_boxes):
src_dir.append(get_dir([0, src_w[i] * -0.5], rot_rad))
dst_dir.append(np.array([0, dst_w * -0.5], np.float32))
src = np.zeros((num_boxes, 3, 2), dtype=np.float32)
dst = np.zeros((num_boxes, 3, 2), dtype=np.float32)
src[:, 0, :] = center + scale_tmp * shift
src[:, 1, :] = center + src_dir + scale_tmp * shift
dst[:, 0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[:, 1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
src[:, 2:, :] = get_3rd_points(src[:, 0, :], src[:, 1, :])
dst[:, 2:, :] = get_3rd_points(dst[:, 0, :], dst[:, 1, :])
trans = []
for dst_, src_ in zip(src, dst):
if inv:
trans.append(cv2.getAffineTransform(np.float32(dst_), np.float32(src_)))
else:
trans.append(cv2.getAffineTransform(np.float32(dst_), np.float32(src_)))
return np.array(trans)
但是,在程序后面,由于cv2.warpAffine
(data_numpy是图像),以下代码段失败:
trans = get_affine_transforms(len(boxes), c, s, r, config.MODEL.IMAGE_SIZE)
input = cv2.warpAffine(data_numpy,
trans,
(int(config.MODEL.IMAGE_SIZE[0]),
int(config.MODEL.IMAGE_SIZE[1])),
flags=cv2.INTER_LINEAR)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
input = transform(input).unsqueeze(0)
失败:
OpenCV(3.4.1) Error: Assertion failed ((M0.type() == 5 || M0.type() == 6) && M0.rows == 2 && M0.cols == 3) in warpAffine, file /io/opencv/modules/imgproc/src/imgwarp.cpp, line 2700
我怎么称呼cv2.warpAffine
,以便我可以创建更高维度的input
(最终将被馈送到神经网络中)。