我想使用python在c ++中实现以下逻辑。
struct hash_string ///
{
hash_string() {}
uint32_t operator ()(const std::string &text) const
{
//std::cout << text << std::endl;
static const uint32_t primes[16] =
{
0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B,
};
//std::cout << text.size() << std::endl;
uint32_t sum = 0;
for (size_t i = 0; i != text.size(); i ++) {
sum += primes[i & 15] * (unsigned char)text[i];
//std::cout << text[i] <<std::endl;
// std::cout << (unsigned char)text[i] << std::endl;
}
return sum;
}
};
python版本是这样的,尚未完成,因为我还没有找到将文本转换为unsigned char的方法。所以,请帮忙!
# -*- coding: utf-8 -*-
text = u'连衣裙女韩范'
primes = [0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B]
//*text[i] does not work (of course), but how to mimic the logic above
rand = [primes[i & 15]***text[i]** for i in range(len(text))]
print rand
sum_agg = sum(rand)
print sum_agg
拿text = u&#39;连衣裙女韩范&#39;例如,c ++版本为text.size()返回18,sum为2422173716,而在python中,我不知道如何使它成为18。
文本大小的平等至关重要,至少是一个开始。
答案 0 :(得分:2)
因为你使用的是unicode,为了精确再现,你需要在一系列字节中转换text
(c ++中的字符)。
bytes_ = text.encode("utf8")
# when iterated over this will yield ints (in python 3)
# or single character strings in python 2
你应该使用更多的pythonic习语来迭代一对序列
pairs = zip(bytes_, primes)
如果bytes_
比素数更长,该怎么办?使用itertools.cycle
from itertools import cycle
pairs = zip(bytes_, cycle(primes))
所有在一起:
from itertools import cycle
text = u'连衣裙女韩范'
primes = [0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B]
# if python 3
rand = [byte * prime for byte, prime in zip(text.encode("utf8"), cycle(primes))]
# else if python 2 (use ord to convert single character string to int)
rand = [ord(byte) * prime for byte, prime in zip(text.encode("utf8"), cycle(primes))]
hash_ = sum(rand)