我通过ctypes构建c ++ dll的python包装器。前面提到的库广泛使用OpenCV2.2(使用旧的C Api)。
我想将OpenCV的IplImage结构的char * imageData字段转换为numpy数组。我已经搜索了SO和网络几天,但似乎没有任何解决方案可以解决我的问题。
这就是问题所在。我用2张图片测试了我的实现:一张尺寸为600x599(并且一切都很好),另一张尺寸为602x600(这是问题所在)。两者都是彩色图像(3通道图像)。我用几张尺寸为602x600的图像测试了实现,并且总是让图像失真。
我猜测OpenCV添加到图像中的填充可能会有一些奇怪的事情(尽管我认为在我的实现中已经处理了它),但我无法完全理解它
事情是,第二张图像显示在"处理后#34;在c ++ dll中执行(暂时没有),我只能认为我做错了将IplImage数据(imageData)转换回numpy数组。
这是C ++源代码:
char* do_something(IplImage *img, int* image_size)
{
// returning the image data
return get_data_from_iplimage
(
img, // doing zero processing for now
image_size
);
}
char* get_data_from_iplimage(IplImage* img, int* image_size)
{
// getting the image total size
*image_size = img->imageSize;
// copying data
char* image_data = new char[img->imageSize];
memcpy(image_data, img->imageData, img->imageSize);
// releasing the Iplimage*
cvReleaseImage(&img);
// returning the image data
return image_data;
}
这是Python源代码:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
def depth2dtype(depth):
if depth is IPL_DEPTH_8U:
return np.dtype('uint8')
elif depth is IPL_DEPTH_8S:
return np.dtype('int8')
elif depth is IPL_DEPTH_16U:
return np.dtype('uint16')
elif depth is IPL_DEPTH_16S:
return np.dtype('int16')
elif depth is IPL_DEPTH_32S:
return np.dtype('int32')
elif depth is IPL_DEPTH_32F:
return np.dtype('float32')
elif depth is IPL_DEPTH_64F:
return np.dtype('float64')
else:
# This is probably a good default
return np.dtype('uint8')
def get_iplimage_ptr(img):
# None is considered as the NULL pointer
if img is None:
return None # the same thing as 'return img'
# getting image dimensions and data
height, width, n_channels = get_ndarray_dimensions(img)
img_data = img.tostring()
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), cv2.cv.IPL_DEPTH_8U, n_channels)
width_step = img.dtype.itemsize * n_channels * width # creating the famous 'width_step' parameter
cv2.cv.SetData(cv_img, None, width_step)
# setting the data (img is a numpy array)
ipl = iplimage_t.from_address(id(cv_img))
ipl_img_ptr = ipl.ipl_ptr.contents
ipl_img_ptr.imageData = img_data
# returning the OpenCV2.2 compatible image (IplImage*)
return ipl_img_ptr
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(str_data, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the ndarray datatype
dtype = depth2dtype(IPL_DEPTH_8U)
# building a numpy ndarray from the string data
ndarray = np.fromstring(str_data, dtype)
# reshaping to 'height' rows
# width_step = len(str_data) / height
ndarray = ndarray.reshape(height, -1)
# removing the padding added by opencv to each row
cols = dtype.itemsize * width * n_channels
ndarray = ndarray[:, :cols]
# reshaping to the final ndarray dimensions
ndarray = ndarray.reshape((height, width, n_channels))
# returning the numpy array that represents the image
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
image_size = c_int(0)
byte_data = __c_do_something(iplimage_ptr, byref(image_size))
str_data = string_at(byte_data, image_size.value)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(str_data, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, c_byte_p,
('img', POINTER(IplImage), 1), # IplImage *img
('image_size', c_int_p, 1), # int* image_size
)
我为源代码的长度道歉(尽管缺少一些定义),但我想&#34;显式优于隐式&#34;,jeje。
任何帮助都将不胜感激。
PD:如果它有助于我使用Python 2.7,Numpy 1.7.1,OpenCV2.2(预编译),Visual Studio 2013(Visual C ++)和Windows 8.1。
答案 0 :(得分:1)
我可能错了,但是...对我来说,你可以将IPlImage转换为Mat,然后将其转换为python numpy数组。当然你也可以用另一种方式做到这一点 - numpy数组到Mat和Mat到IPlImage。 Here有一些代码适用于OpenCV 2.x(在Opencv 2.4.10上测试,但也适用于2.2)。如果它不适用于您的版本,它至少应该是一个很好的提示,并帮助您为您的版本编写boost python转换器。
如果 - 由于某种原因 - 您无法将IplImage转换为Mat,请告诉我们为什么我们可以尝试制定其他解决方案。
答案 1 :(得分:0)
经过几天坚持这个问题,我想我终于达成了解决方案。我决定用ctypes在Python中复制OpenCV IplImage结构,而不是传递imageData(char *)。然后,从收到的IplImage指针构建numpy数组。
顺便说一句,我还是不知道之前发生了什么,但我想在char * imageData转换为Python中的字符串时会发生一些疯狂的事情(0值转换为空字符 - 等等。)。
C ++片段现在有点简单,因为我不需要提取&#34;提取&#34;来自图像的imageData。在这里:
IplImage* do_something(IplImage *img)
{
// doing nothing
return img;
}
在Python方面,代码有点类似于旧代码。但是有一些关键方面:
以下是代码:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
# subclassing the ctypes.Structure class to add new features
class _Structure(Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
class IplTileInfo(_Structure):
_fields_ = []
class IplROI(_Structure):
_fields_ = \
[
# 0 - no COI (all channels are selected)
# 1 - 0th channel is selected ...
('coi', c_int),
('xOffset', c_int),
('yOffset', c_int),
('width', c_int),
('height', c_int),
]
# ipl image header
class IplImage(_Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
if field[0] in ['imageData', 'imageDataOrigin']:
continue
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
IplImage._fields_ = [
("nSize", c_int),
("ID", c_int),
("nChannels", c_int),
("alphaChannel", c_int),
("depth", c_int),
("colorModel", c_char * 4),
("channelSeq", c_char * 4),
("dataOrder", c_int),
("origin", c_int),
("align", c_int),
("width", c_int),
("height", c_int),
("roi", POINTER(IplROI)),
("maskROI", POINTER(IplImage)),
("imageID", c_void_p),
("tileInfo", POINTER(IplTileInfo)),
("imageSize", c_int),
("imageData", c_byte_p),
("widthStep", c_int),
("BorderMode", c_int * 4),
("BorderConst", c_int * 4),
("imageDataOrigin", c_char_p)]
class iplimage_t(_Structure):
_fields_ = \
[
('ob_refcnt', c_ssize_t),
('ob_type', py_object),
('ipl_ptr', POINTER(IplImage)),
('data', py_object),
('offset', c_size_t)
]
# gets the dimensions of a numpy ndarray
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(iplimage_ptr, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the IplImage*
iplimage = iplimage_ptr.contents
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), IPL_DEPTH_8U, n_channels)
# getting the char* from byte data
str_data = string_at(iplimage.imageData, iplimage.imageSize)
# setting the image data
cv2.cv.SetData(cv_img, str_data, iplimage.widthStep)
# building a CvMat image
cv_mat = cv_img[:, :]
# building the ndarray from the CvMat image
ndarray = np.asarray(cv_mat)
# returing the built ndarray
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
ipl_ptr = __c_do_something(iplimage_ptr)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(ipl_ptr, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, POINTER(IplImage),
('img', POINTER(IplImage), 1), # IplImage *img
)
希望它有所帮助;)。
PS:我为代码的长度道歉,但我试图提供最接近的工作示例。使用ctypes加载已编译的C ++ .dll取决于您(:。
答案 2 :(得分:0)
你不需要Python IplImage,只需这样做
C档案:
void *return_frame;
extern "C" void* get_rgb_frame(){
return return_frame;
}
#define FRAME_BUFSIZE (1920 * 1080 * 3 + 1)
return_frame = malloc(FRAME_BUFSIZE);
memset(return_frame, 0x00, FRAME_BUFSIZE + 1);
IplImage* pImg = cvLoadImage("test.jpg",-1);
memcpy(return_frame, 1920 * 1080 * 3);
cvReleaseImage(&pImg);
Python文件:
dll.get_rgb_frame.restype = c_void_p
yv12_img = dll.get_rgb_frame()
imagedata = string_at(yv12_img, 1920 * 1080 * 3)
cv_img = cv2.cv.CreateImageHeader((1920, 1080), cv2.cv.IPL_DEPTH_8U, 3)
cv2.cv.SetData(cv_img, imagedata, 3 * 1920)
cv_mat = cv_img[:]
array = np.asarray(cv_mat)
cv2.imshow('jinfeng', array)
你可以在Python中显示图像