我正在练习哈希字符串到int,按长度,第一个字符,余数等。 目前,我正在研究“余数哈希”部分。在余数散列函数之后,我必须得到小于1的概率(p <1)。但是,我的余数函数导致p = 1.它说我必须通过计算总和的循环来模拟模数。总哈希表大小为65536,我必须以模数65413小于表大小。请提出建议。
#include <iostream>
#include <cstdint>
#include <vector>
#include <fstream>
#include <string>
#include <boost/math/distributions/chi_squared.hpp>
using namespace std;
vector<int> hashes(65536);
vector<int> entry_size(65536);
ifstream file("words.txt");
string line;
boost::math::chi_squared c2d(65535.0);
void hash_by_remainder(){
int counter = 0;
while(file >> line){
uint16_t hashed = 0;
for ( int i=0; i < line.size(); i++){
uint16_t s_value = line.at(i);
hashed = hashed + s_value;
hashed = hashed % 65413;
}
uint16_t hashed_value = hash<string>{}(line);
hashes.at(hashed) = hashed_value;
entry_size.at(hashed) += 1;
counter++;
}
float expected = counter/65536;
float c2 = 0;
for ( int i = 0; i < 65536; i++){
c2 = c2 + (expected - entry_size[i])*(expected - entry_size[i])/expected;
}
float p = boost::math::cdf(c2d,c2);
cout << c2 << endl;
cout << p << endl; //checking the probability
}