我正在尝试释放分配给CString
的内存并使用ctypes传递给Python。但是,Python崩溃了一个malloc错误:
python(30068,0x7fff73f79000) malloc: *** error for object 0x103be2490: pointer being freed was not allocated
以下是我用来将指针传递给ctypes的Rust函数:
#[repr(C)]
pub struct Array {
pub data: *const c_void,
pub len: libc::size_t,
}
// Build &mut[[f64; 2]] from an Array, so it can be dropped
impl<'a> From<Array> for &'a mut [[f64; 2]] {
fn from(arr: Array) -> Self {
unsafe { slice::from_raw_parts_mut(arr.data as *mut [f64; 2], arr.len) }
}
}
// Build an Array from a Vec, so it can be leaked across the FFI boundary
impl<T> From<Vec<T>> for Array {
fn from(vec: Vec<T>) -> Self {
let array = Array {
data: vec.as_ptr() as *const libc::c_void,
len: vec.len() as libc::size_t,
};
mem::forget(vec);
array
}
}
// Build a Vec from an Array, so it can be dropped
impl From<Array> for Vec<[f64; 2]> {
fn from(arr: Array) -> Self {
unsafe { Vec::from_raw_parts(arr.data as *mut [f64; 2], arr.len, arr.len) }
}
}
// Decode an Array into a Polyline
impl From<Array> for String {
fn from(incoming: Array) -> String {
let result: String = match encode_coordinates(&incoming.into(), 5) {
Ok(res) => res,
// we don't need to adapt the error
Err(res) => res
};
result
}
}
#[no_mangle]
pub extern "C" fn encode_coordinates_ffi(coords: Array) -> *mut c_char {
let s: String = coords.into();
CString::new(s).unwrap().into_raw()
}
我用它来释放指针,当它被Python返回时
pub extern "C" fn drop_cstring(p: *mut c_char) {
unsafe { CString::from_raw(p) };
}
我正在用Python函数将指针转换为str
:
def char_array_to_string(res, _func, _args):
""" restype is c_void_p to prevent automatic conversion to str
which loses pointer access
"""
converted = cast(res, c_char_p)
result = converted.value
drop_cstring(converted)
return result
我正在使用Python函数生成Array
结构以传递给Rust:
class _FFIArray(Structure):
"""
Convert sequence of float lists to a C-compatible void array
example: [[1.0, 2.0], [3.0, 4.0]]
"""
_fields_ = [("data", c_void_p),
("len", c_size_t)]
@classmethod
def from_param(cls, seq):
""" Allow implicit conversions """
return seq if isinstance(seq, cls) else cls(seq)
def __init__(self, seq, data_type = c_double):
arr = ((c_double * 2) * len(seq))()
for i, member in enumerate(seq):
arr[i][0] = member[0]
arr[i][1] = member[1]
self.data = cast(arr, c_void_p)
self.len = len(seq)
argtype
和restype
定义:
encode_coordinates = lib.encode_coordinates_ffi
encode_coordinates.argtypes = (_FFIArray,)
encode_coordinates.restype = c_void_p
encode_coordinates.errcheck = char_array_to_string
drop_cstring = lib.drop_cstring
drop_cstring.argtypes = (c_char_p,)
drop_cstring.restype = None
我倾向于认为它不是Rust函数,因为dylib崩溃会导致段错(并且FFI测试在Rust端传递)。在调用FFI函数之后,我还可以继续使用Python中的其他操作 - 当进程退出时会发生malloc错误。
答案 0 :(得分:2)
我认为代码的Rust端承担了数据的所有权,并在进程退出时尝试解除分配数据,因此不应该责怪Python代码。作为证明,以下调用encode_coordinates_ffi
和drop_cstring
的C代码也会导致分段错误。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef struct {
double longitude;
double latitude;
} coord_t;
typedef struct {
coord_t * data;
size_t count;
} points_t;
char * encode_coordinates_ffi(points_t points);
void drop_cstring(void * str);
int main(void)
{
points_t data;
coord_t * points;
char * res;
data.data = malloc(sizeof(coord_t) * 2);
data.count = 2;
points = (coord_t *)data.data;
points[0].latitude = 1.0;
points[0].longitude = 2.0;
points[1].latitude = 3.0;
points[1].longitude = 4.0;
res = encode_coordinates_ffi(data);
printf("%s\n", res);
free(data.data);
drop_cstring(res);
return 0;
}
valgrind -v
提供以下信息
Invalid free() / delete / delete[] / realloc()
at 0x4C2CDFB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
by 0x4007EB: main (in /tmp/rusti/a.out)
Address 0x5ea8040 is 0 bytes inside a block of size 32 free'd
at 0x4C2CDFB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
by 0x4EE7FB4: alloc::heap::deallocate::h74ff05db8ae4652e (heap.rs:113)
by 0x4EE7F52: _$LT$alloc..raw_vec..RawVec$LT$T$GT$$u20$as$u20$std..ops..Drop$GT$::drop::ha72c57f32dae0328 (raw_vec.rs:567)
by 0x4EE7E5D: alloc..raw_vec..RawVec$LT$$u5b$f64$u3b$$u20$2$u5d$$GT$::drop.6367::h05166e3a96ef1f41 (in /tmp/rusti/polyline_ffi/target/debug/libpolyline_ffi.so)
by 0x4EE7E45: std..vec..Vec$LT$$u5b$f64$u3b$$u20$2$u5d$$GT$::drop_contents.6364::h68f73d9e22af548c (in /tmp/rusti/polyline_ffi/target/debug/libpolyline_ffi.so)
by 0x4EE7C69: std..vec..Vec$LT$$u5b$f64$u3b$$u20$2$u5d$$GT$::drop.6314::h68f73d9e22af548c (in /tmp/rusti/polyline_ffi/target/debug/libpolyline_ffi.so)
by 0x4EE7B9B: polyline_ffi::_$LT$impl$u20$std..convert..From$LT$Array$GT$$u20$for$u20$std..string..String$GT$::from::h3b597d62ca6eb863 (lib.rs:46)
by 0x4EE84D9: _$LT$T$u20$as$u20$std..convert..Into$LT$U$GT$$GT$::into::h996bdd9d6ba87f7b (convert.rs:209)
by 0x4EE83EB: encode_coordinates_ffi (lib.rs:57)
by 0x4007D8: main (in /tmp/rusti/a.out)
Block was alloc'd at
at 0x4C2BBCF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
by 0x400795: main (in /tmp/rusti/a.out)
如果遗漏了free(data.data)
,程序就会在没有分段错误的情况下完成,并且valgrind没有发现任何内存泄漏。
我会尝试实现接口,使其对应
typedef struct {
double longitude;
double latitude;
} coord_t;
int coordinates_ffi(char * dst, size_t n, coord_t * points, size_t npoints);
其中dst
将用于编码字符串(长度限制n
,基于坐标数npoints
的某些近似值),因此调用者无需取消分配锈字符串。
答案 1 :(得分:1)
感谢J.J. Hakala's answer所做的努力,我能够在纯Rust中产生MCVE:
extern crate libc;
use std::ffi::CString;
use libc::c_void;
fn encode_coordinates(coordinates: &Vec<[f64; 2]>) -> String {
format!("Encoded coordinates {:?}", coordinates)
}
struct Array {
data: *const c_void,
len: libc::size_t,
}
impl From<Array> for Vec<[f64; 2]> {
fn from(arr: Array) -> Self {
unsafe { Vec::from_raw_parts(arr.data as *mut [f64; 2], arr.len, arr.len) }
}
}
impl From<Array> for String {
fn from(incoming: Array) -> String {
encode_coordinates(&incoming.into())
}
}
fn encode_coordinates_ffi(coords: Array) -> CString {
CString::new(String::from(coords)).unwrap()
}
fn main() {
for _ in 0..10 {
let i_own_this = vec![[1.0, 2.0], [3.0, 4.0]];
let array = Array {
data: i_own_this.as_ptr() as *const _,
len: i_own_this.len(),
};
println!("{:?}", encode_coordinates_ffi(array))
}
}
打印:
"Encoded coordinates [[1, 2], [3, 4]]"
"Encoded coordinates [[1, 2], [3, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
"Encoded coordinates [[0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012169663452665325, 213780573330512200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000], [3.0000002417770535, 4]]"
主要问题在于:
impl From<Array> for Vec<[f64; 2]> {
fn from(arr: Array) -> Self {
unsafe { Vec::from_raw_parts(arr.data as *mut [f64; 2], arr.len, arr.len) }
}
}
让我们查看documentation for Vec::from_raw_parts
:
由于未检查的不变量的数量,这非常不安全:
ptr
之前需要通过String
/Vec<T>
进行分配(至少,如果不是,则极有可能是错误的。)length
必须是小于或等于capacity
的长度。capacity
需要是分配指针的容量。违反这些可能会导致破坏分配器内部数据结构等问题。
但是,显示的原始代码违反第一点 - 指针由malloc
分配。
为什么会发挥作用?当您致电Vec::from_raw_parts
时,它将获得指针的所有权。当Vec
超出范围时,指向的内存解除分配。这意味着您尝试多次释放该指针。
因为函数的安全性取决于传入的内容,entire function should be marked unsafe
。在这种情况下,这将违反特征的界面,因此您需要将其移动到其他位置。
更为理智的是,您可以将Array
转换为切片。这仍然不安全,因为它取决于传入的指针,但它不拥有底层指针。然后,您可以将切片变为Vec
,分配新内存并复制内容。
由于您可以控制encode_coordinates
,因此您还应该更改其签名。在99.99%的情况下,&Vec<T>
没用,实际效率可能更低:它需要两个指针解引用而不是一个。相反,接受&[T]
。这允许传递更广泛的类型,包括数组和Vec
s。