我试图将python模块转换为cython,它会进行大量序列化和反序列化工作。
目前我必须这样做:
import struct
from libc.stdint cimport (
int32_t,
int64_t,
)
cpdef bytes write_int(int32_t i):
return struct.pack("!i", i)
cpdef bytes write_long(int64_t i):
return struct.pack("!q", i)
cdef bytes write_double(double val):
return struct.pack("!d", val)
cdef bytes write_string(bytes val):
cdef int32_t length = len(val)
cdef str fmt
fmt = "!i%ds" % length
return struct.pack(fmt, length, val)
c lib与struct.pack和struct.unpack中是否相同?什么是在cython中做这样的事情的最好方法?
答案 0 :(得分:8)
我查看了模块(this和this),只是将代码翻译成Cython并删除了PyObject
部分。理论上这应该有用,但有些部分(比如float
部分)我无法严格测试:
部分导入:
from cpython.array cimport array, clone
from libc.string cimport memcmp, memcpy
from libc.math cimport frexp, ldexp
from libc.stdint cimport int32_t, int64_t
保存一些融合类型的代码。它在技术上不是一个稳定的功能,但它对我来说完美无缺:
ctypedef fused integer:
int32_t
int64_t
此部分测试机器的字节顺序。它适用于我,但这不是一个完整的套件。 OTOH,它看起来是正确的
cdef enum float_format_type:
unknown_format,
ieee_big_endian_format,
ieee_little_endian_format
# Set-up
cdef array stringtemplate = array('B')
cdef float_format_type double_format
cdef double x = 9006104071832581.0
if sizeof(double) == 8:
if memcmp(&x, b"\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0:
double_format = ieee_big_endian_format
elif memcmp(&x, b"\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0:
double_format = ieee_little_endian_format
else:
double_format = unknown_format
else:
double_format = unknown_format;
(stringtemplate
用于快速制作bytes
个对象
这部分很简单:
cdef void _write_integer(integer x, char* output):
cdef int i
for i in range(sizeof(integer)-1, -1, -1):
output[i] = <char>x
x >>= 8
cpdef bytes write_int(int32_t i):
cdef array output = clone(stringtemplate, sizeof(int32_t), False)
_write_integer(i, output.data.as_chars)
return output.data.as_chars[:sizeof(int32_t)]
cpdef bytes write_long(int64_t i):
cdef array output = clone(stringtemplate, sizeof(int64_t), False)
_write_integer(i, output.data.as_chars)
return output.data.as_chars[:sizeof(int64_t)]
array
与malloc
类似,但它是垃圾收集:)。
这部分我大部分都不知道。我的“测试”通过了,但它主要是希望:
cdef void _write_double(double x, char* output):
cdef:
unsigned char sign
int e
double f
unsigned int fhi, flo, i
char *s
if double_format == unknown_format or True:
if x < 0:
sign = 1
x = -x
else:
sign = 0
f = frexp(x, &e)
# Normalize f to be in the range [1.0, 2.0)
if 0.5 <= f < 1.0:
f *= 2.0
e -= 1
elif f == 0.0:
e = 0
else:
raise SystemError("frexp() result out of range")
if e >= 1024:
raise OverflowError("float too large to pack with d format")
elif e < -1022:
# Gradual underflow
f = ldexp(f, 1022 + e)
e = 0;
elif not (e == 0 and f == 0.0):
e += 1023
f -= 1.0 # Get rid of leading 1
# fhi receives the high 28 bits; flo the low 24 bits (== 52 bits)
f *= 2.0 ** 28
fhi = <unsigned int>f # Truncate
assert fhi < 268435456
f -= <double>fhi
f *= 2.0 ** 24
flo = <unsigned int>(f + 0.5) # Round
assert(flo <= 16777216);
if flo >> 24:
# The carry propagated out of a string of 24 1 bits.
flo = 0
fhi += 1
if fhi >> 28:
# And it also progagated out of the next 28 bits.
fhi = 0
e += 1
if e >= 2047:
raise OverflowError("float too large to pack with d format")
output[0] = (sign << 7) | (e >> 4)
output[1] = <unsigned char> (((e & 0xF) << 4) | (fhi >> 24))
output[2] = 0xFF & (fhi >> 16)
output[3] = 0xFF & (fhi >> 8)
output[4] = 0xFF & fhi
output[5] = 0xFF & (flo >> 16)
output[6] = 0xFF & (flo >> 8)
output[7] = 0xFF & flo
else:
s = <char*>&x;
if double_format == ieee_little_endian_format:
for i in range(8):
output[i] = s[7-i]
else:
for i in range(8):
output[i] = s[i]
如果您能理解它的工作原理,请务必亲自检查。
然后我们像以前一样包装它:
cdef bytes write_double(double x):
cdef array output = clone(stringtemplate, sizeof(double), False)
_write_double(x, output.data.as_chars)
return output.data.as_chars[:sizeof(double)]
字符串1实际上非常简单,并解释了为什么我像上面那样设置它:
cdef bytes write_string(bytes val):
cdef:
int32_t int_length = sizeof(int32_t)
int32_t input_length = len(val)
array output = clone(stringtemplate, int_length + input_length, True)
_write_integer(input_length, output.data.as_chars)
memcpy(output.data.as_chars + int_length, <char*>val, input_length)
return output.data.as_chars[:int_length + input_length]
答案 1 :(得分:0)
如果您只为每个命令打包一种类型的数据(例如,一组ints
,然后是一组floats
等),您可以使用array.array()
通过Python或Cython获得更快的结果。