Question

创建了一个模板类，它将一个字符串散列为0到999种不同方式的整数。我试图比较散列值和索引值，当它们不同时添加到一个计数然后返回该数字以获得总数的冲突。我的问题是，如果我正确地做到了这一点，因为我只是猜测我什么时候把它们全部放在一起。

代码：

#include <string>

#include <list>

template<typename T>
class Hash
{
protected:

    // Capacity of the hash table
    static const size_t SIZE = 1000;

    // Defines an entry in the hash table
    class Entry
    {
    public:
        std::string key;
        T value;
        bool used;
        Entry()
        {
            used = false;
            value = T();
        }
    };

    // The hash table entries
    Entry entries[SIZE];

    // Hash function #1
    size_t hash1(const std::string& k) const;

    // Hash function #2
    size_t hash2(const std::string& k) const;

    // Hash function #3
    size_t hash3(const std::string& k) const;

    // Calculate the hash of a given key
    //  TODO: change this to use the desired hash function
    size_t hash(const std::string& k) const
    {
        return hash1(k);
    }

    // Perform linear probing on the given key and index to get index
    size_t probe(const std::string& k, size_t i) const;
public:
    // Access data item in hash for the given key
    T& operator[](const std::string& k);

    void print() const
    {
        for (size_t i = 0; i < SIZE; i++)
            std::cout << i << "-" << entries[i].used << "-" << entries[i].key
                    << "-" << entries[i].value << std::endl;
    }
    size_t collision(const std::string& k) const;
};

template<typename T>
size_t Hash<T>::hash1(const std::string& k) const
{
    int index = 0;
    for (size_t i = 0; i < k.size(); i++)
        index += k[i];
    return index % SIZE;
}

template<typename T>
size_t Hash<T>::hash2(const std::string& k) const
{
    int index = 0;
    for (size_t i = 0; i < k.size(); i++)
    {
        index += (k[i] + 27 * k[i] + 729 * k[i]);
    }
    return index % SIZE;
}

template<typename T>
size_t Hash<T>::hash3(const std::string& k) const
{
    int index = 0;
    for (size_t i = 0; i < k.size(); i++)
    {
        index += 37 * index + k[i];
    }
    return index % SIZE;
}

template<typename T>
size_t Hash<T>::probe(const std::string& k, size_t i) const
{
    int index = i;
    int count = 0;
    while (entries[index].used && entries[index].key != k && count < SIZE)
        index = (index + 1) % SIZE;
    return index;
}

template<typename T>
T& Hash<T>::operator[](const std::string& k)
{
    int index = hash(k);
    if (entries[index].used && entries[index].key != k)
        index = probe(k, index);
    if (!entries[index].used)
    {
        entries[index].key = k;
        entries[index].used = true;
    }
    return entries[index].value;
}

template<typename T>
size_t Hash<T>::collision(const std::string& k) const
{
    int count = 0;
    for (int j = 0; j < SIZE; j++)
    {
        if (j == entries[j].value)
        {
            count++;
        }
    }
    return count;
}

由于列表的大小是常数1000，索引值总是0到999.知道这一点，如果我将该数字与散列值（entries [j] .value）进行比较，它们是相同的那么我知道值是相同的并且发生了碰撞，所以我可以添加到计数中。

我这样做是因为我的教授说“你可以通过检查哈希表中的每个条目并将存储的键值的哈希值与表中的索引进行比较来确定这一点。“说实话，我不确定他的意思是什么。我会事先道歉，因为我有强烈的感觉这是完全错误的。

Answer 1

键的哈希值告诉您表中值应该去哪里。如果该值不在密钥哈希建议的位置的表中，则表示发生了冲突。

您的代码在计算碰撞时会犯两个错误：1。它没有考虑到表中的某个位置可能未被使用。 2.它没有计算和测试密钥的散列。

template<typename T>
size_t Hash<T>::collision() const // doesn't seem to be any reason to pass in a key
{
    size_t count = 0; // returning size_t, might as well use size_t
                      // negative collisions isn't a possibility anyway
    for (size_t j = 0; j < SIZE; j++) // compare like datatypes where possible
    {
        if (entries[j].used) // can't be a collision if the location is empty
        {
            size_t index = hash(entries[j].key); // get correct index for key
            if (j != index) // increment count if in wrong slot in table
            {
                count++;
            }
        }

    }
    return count;
}

计算模板类c ++

1 个答案: