我试图使用ctypes从Python(3)程序中调用iconv(3)
。 iconv
的C类型签名是
size_t iconv(iconv_t cd,
char **inptr, size_t *inbytesleft,
char **outptr, size_t *outbytesleft);
你应该像这样称呼它:
char *inp = "abcdef";
char outbuf[16];
char *outp = outbuf;
size_t ibytes = strlen(inbuf);
size_t obytes = sizeof outbuf;
size_t rv = iconv(cd, &inp, &ibytes, &outp, &obytes);
显然,它会写入outbuf
,并且还会修改所有四个变量 inp
,outp
,ibytes
,和obytes
表示在遇到问题(如果有的话)之前它与转换的距离。它保证不写入输入字符串,尽管不是const
。
现在,天真地,你在ctypes
中反映出这样:
iconv = libc.iconv
iconv.restype = ctypes.c_size_t
iconv.argtypes = [ctypes.c_void_p,
ctypes.POINTER(ctypes.c_char_p),
ctypes.POINTER(ctypes.c_size_t),
ctypes.POINTER(ctypes.c_char_p),
ctypes.POINTER(ctypes.c_size_t)]
(iconv_t
是C库中void *
的typedef我正在测试中)但是当我尝试调用时,我得到错误:< / p>
>>> obuf = ctypes.create_string_buffer(16)
>>> obuflen = ctypes.c_size_t(16)
>>> iconv(utf8_to_utf16,
... ctypes.byref(ctypes.c_char_p(b"abcdef")),
... ctypes.byref(ctypes.c_size_t(6)),
... ctypes.byref(obuf),
... ctypes.byref(obuflen))
ArgumentError: argument 4: <class 'TypeError'>: expected LP_c_char_p
instance instead of pointer to c_char_Array_16
尝试将obuf
明确转换为c_char_p
也不起作用:
>>> optr = ctypes.c_char_p(obuf)
TypeError: bytes or integer address expected instead of c_char_Array_16 instance
它在错误消息中使用的这些类型名称不会出现在手册中,而且我很难过。什么是正确的方法?
(如果你想知道我为什么要这样做而不是使用Python的内置编码转换器,那么简短的版本是因为Python的转换器不支持相同的一组编码为[GNU] iconv,也没有//TRANSLIT
功能。)
答案 0 :(得分:0)
下面是我用来查找通过转换转换为'“'的字符的代码。对不起,它不是很优美,但我希望它对某些人仍然可用。:)
我需要在python2.6下运行它,所以我尝试将它写得更加通用。 (2.6有一些陷阱,因此在python3下代码可能会更好,更简单)
from __future__ import print_function
import sys
import ctypes
libc = ctypes.cdll.LoadLibrary("libc.so.6")
if sys.version_info[0]>2:
def unichr(a):
return chr(a)
LP_c_char2 = ctypes.POINTER( ctypes.c_char_p)
LP_c_char = ctypes.POINTER(ctypes.create_string_buffer(16).__class__)
get_errno_loc = libc.__errno_location
get_errno_loc.restype = ctypes.POINTER(ctypes.c_int)
class MyError(OSError):
def __init__(self, e):
if sys.version_info[0]<=2:
super(MyError, self).__init__(e)
else:
super().__init__(e)
def errcheck(ret, func, args):
if ret == -1 or ret == 2**64-1:
e = get_errno_loc()[0]
raise MyError(e)
return ret
iconv_open = libc.iconv_open
iconv_open.restype = ctypes.c_void_p
ret = iconv_open(
ctypes.c_char_p(b"ISO8859-2//TRANSLIT"),
ctypes.c_char_p(b"UTF-8"))
iconv = libc.iconv
iconv.errcheck = errcheck
iconv.restype = ctypes.c_size_t
obuf = ctypes.create_string_buffer(16)
obuflen = ctypes.c_size_t(16)
optr = LP_c_char(obuf)
inp = b'\xe2\x80\x9c'
iconv.argtypes = [ctypes.c_void_p, LP_c_char2, ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(LP_c_char), ctypes.POINTER(ctypes.c_size_t)]
r = iconv(ret, LP_c_char2(ctypes.c_char_p(inp)), ctypes.byref(ctypes.c_size_t(len(inp))), ctypes.byref(optr), ctypes.byref(obuflen))
print(obuf.value)
def func(inp = b"bbb"):
assert len(inp)<16 , "too big input"
obuf = ctypes.create_string_buffer(16)
obuflen = ctypes.c_size_t(16)
optr = LP_c_char(obuf)
r = iconv(ret, LP_c_char2(ctypes.c_char_p(inp)), ctypes.byref(ctypes.c_size_t(len(inp))), ctypes.byref(optr), ctypes.byref(obuflen))
return obuf, obuflen, r, obuf.value[:16-obuflen.value]
oo, uu, r, vys = func(b'\xe2\x80\x9c\xe2\x80\x9c')
print(oo.raw, uu, r, vys)
for i in range(sys.maxunicode):
try:
oo, uu, r, vys = func(unichr(i).encode('utf-8'))
if vys==b'"':
print(i, unichr(i))
except UnicodeEncodeError:
pass
except MyError as E:
pass
# print("MyError: {E} , {i}".format(E=E, i=i))