在c ++中重新实现hash,rehash()方法中的问题

时间:2011-08-10 17:50:25

标签: c++ hash

我被要求在c ++中重新实现哈希。哈希将用于集合。我有所有的代码。我只需要rehash功能。 rehash函数的目标是根据数据增加哈希表的大小;因此,如果数据超过表的一半大小,则应增加表大小(基于某些素数值),并在其中重新分配元素。我无法更改公共界面,但我可以更改私有部分。

#ifndef HASHSET_H
#define HASHSET_H

//
//      Hash table implementation of set
//         (open addressing, quadratic hashing)


#include <algorithm>
#include <functional>
#include <iterator>
#include <vector>
#include <iostream>
#include <cassert>


enum HashStatus { Occupied, Empty, Deleted };

template <class T>
struct HashEntry 
{
  T data;
  unsigned hash;
  HashStatus info;

  HashEntry(): info(Empty)  {}
  HashEntry(const T& v, unsigned h, HashStatus status)
    : data(v), hash(h), info(status) {}

private:
  // Forbid copying of HashEntries
  HashEntry(const HashEntry<T>&) {}
  void operator=(const HashEntry<T>&) {}
  // (If you think you need to copy these, you should consider
  // copying pointers to them instead.)
};


template <class T, class HashFun, class CompareEQ=std::equal_to<T> >
class hash_set  
{
public:
  typedef ptrdiff_t size_type;
  typedef T key_type;
  typedef T value_type;


  hash_set ();
  hash_set (const hash_set<T,HashFun,CompareEQ>&);
  ~hash_set ();

  const hash_set<T,HashFun,CompareEQ>&
    operator= (const hash_set<T,HashFun,CompareEQ>&);

  bool empty() const {return theSize == 0;}
  size_type size() const  {return theSize;}

  void insert (const T& element);

  unsigned count (const T& element) const;

  void erase (const T& element);

  void clear();


  class const_iterator
  {
  public:
    typedef std::bidirectional_iterator_tag iterator_category;
    typedef T                          value_type;
    typedef ptrdiff_t                  difference_type;
    typedef const T*                   pointer;
    typedef const T&                   reference;


    const_iterator& operator ++()
      {++current; advance(); return *this;}
    const_iterator operator ++(int)
      {
    const_iterator clone = this;
    ++current;
    advance();
    return clone;
      }

    const_iterator & operator --()
      {--current; regress(); return *this;}
    const_iterator operator --(int)
      {const_iterator clone = this;
       --current;
       regress();
       return clone;}

    reference operator * () { return (*current)->data; }
    pointer operator -> () { return &((*current)->data); }

    bool operator == (const const_iterator & rhs) const
      { return current == rhs.current; }
    bool operator != (const const_iterator & rhs) const
      { return current != rhs.current; }

  protected:
    typename std::vector<HashEntry<T>*>::const_iterator current;
    const std::vector<HashEntry<T>*>* v;

    const_iterator (typename std::vector<HashEntry<T>*>::const_iterator p,
          const std::vector<HashEntry<T>*>& theVector)
      : current(p), v(&theVector) {}

    void advance()
      {
    while (current != v->end()
           && ((*current) == 0
           || (*current)->info == Deleted
           || (*current)->info == Empty))
      ++current;
      }

    void regress()
      {
    while (current != v->begin()
           && ((*current) == 0
           || (*current)->info == Deleted
           || (*current)->info == Empty))
      --current;
      }

    friend class hash_set<T,HashFun,CompareEQ>;
  };

  typedef const_iterator iterator;

  const_iterator begin() const
    {
      const_iterator i = const_iterator(table.begin(), table);
      i.advance();
      return i;
    }
  const_iterator end() const {return const_iterator(table.end(), table);}

  iterator begin()
    {iterator i = iterator(table.begin(), table);
     i.advance();
     return i;
    }
  iterator end() {return iterator(table.end(), table);}

  const_iterator find (const T& element) const;
  iterator find (const T& element);

  void printStats(std::ostream& out) const
  {
    using namespace std;
    out << "size: " << theSize << "   hSize: " << hSize << endl;
  }    
private:
  static unsigned primes[13];


  unsigned find (const T& element, unsigned h0) const;
  void rehash();

  std::vector<HashEntry<T>*> table;
  int theSize;
  unsigned hSize;
  unsigned hSize_index;
  HashFun hash;
  CompareEQ compare;
};

template <class T, class Hash1, class Hash2, class Equals1, class Equals2>
bool operator== (const hash_set<T,Hash1,Equals1>& left,
         const hash_set<T,Hash2,Equals2>& right)
{
  if (left.size() == right.size())
    {
      for (typename hash_set<T,Hash1,Equals1>::const_iterator p = left.begin();
       p != left.end(); ++p)
    {
      if (right.count(*p) == 0)
        return false;
    }
      return true;
    }
  else
    return false;
}



// Table of prime numbers:  Use these for table sizes so that
// quadratic probing will succeed.
template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::primes[13] =
{5, 13, 31, 61, 127, 521, 1279, 2281, 3217,
 9689, 19937, 44497, 86243};





template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
hash_set (): theSize(0), hSize_index(0)  
{
  // hash table size is the hsize_Index_th prime number in the primes table
  hSize = primes[hSize_index];
  table.resize(hSize);

  // Fill table with null pointers to strt with
  fill (table.begin(), table.end(), (HashEntry<T>*)0);
}

template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
  hash_set (const hash_set<T,HashFun,CompareEQ>& hs)
    : hSize(hs.hSize), hSize_index(hs.hSize_index), theSize(0) 
{
  table.resize(hSize);
  for (unsigned i = 0; i < hSize; ++i)
    if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied))
      insert (hs.table[i]->data);
}

template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
  ~hash_set ()
{
  clear();
}


template <class T, class HashFun, class CompareEQ>
const hash_set<T,HashFun,CompareEQ>&
hash_set<T,HashFun,CompareEQ>::
  operator= (const hash_set<T,HashFun,CompareEQ>& hs)
{
  if (this != &hs)
    {
      clear();
      for (unsigned i = 0; i < hs.hSize; ++i)
        if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied))
          insert (hs.table[i]->data);
    }
  return *this;
}


template <class T, class HashFun, class CompareEQ>
void  hash_set<T,HashFun,CompareEQ>::
insert (const T& element)
{
  unsigned h0 = hash(element);

  // Is this item already in the table?
  unsigned h = find(element, h0);
  if (h == hSize)
    {
      // No.  Find an empty or deleted slot to put it into.
      unsigned count = 0;
      h = h0 % hSize;
      while (table[h] != 0
         && table[h]->info == Occupied 
         && count < hSize)
    {
      ++count;
      h = (h0 + count*count) % hSize;
    }

      assert (count < hSize);  // could not insert (table filled?)

      delete table[h];
      table[h] = new HashEntry<T>(element, h0, Occupied);
      ++theSize;
    }
  else
    { // Yes.  Replace the data field.
      table[h]->data = element;
    }

  // Is the table half full or more?  If so, increase the table size
  // and redistribute the data.
  if (2*theSize >= hSize)
    rehash();
}


template <class T, class HashFun, class CompareEQ>
void  hash_set<T,HashFun,CompareEQ>::
rehash ()
{
  //***  It should increase the size of
  //*** the hash table to primes[hSize_index+1], redistributing the
  //*** entries already in the table to their new positions.
}


template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::
count (const T& element) const
{
  unsigned h0 = hash(element);
  unsigned h = find(element, h0);
  return  (h != hSize) ? 1 : 0;
}




template <class T, class HashFun, class CompareEQ>
void hash_set<T,HashFun,CompareEQ>::
erase (const T& element)
{
  unsigned h0 = hash(element);
  unsigned h = find(element, h0);
  if (h != hSize)
    {
      table[h]->info = Deleted;
      --theSize;
    }
}


template <class T, class HashFun, class CompareEQ>
void hash_set<T,HashFun,CompareEQ>::
clear()
{
  for (unsigned i = 0; i < hSize; ++i)
    {
      delete table[i];
      table[i] = 0;
    }
  theSize = 0;
}



template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::
find (const T& element, unsigned h0) const
{
  unsigned h = h0 % hSize;
  unsigned count = 0;

  // Search for element, stopping at any empty slot
  // or when we've searched the whole table 
  while (table[h] != 0 &&
     (table[h]->info == Deleted ||
      (table[h]->info == Occupied 
       && (!compare(table[h]->data, element))))
     && count < hSize)
    {
      ++count;
      h = (h0 + count*count) % hSize;
    }

  if (count >= hSize 
      || table[h] == 0
      || table[h]->info == Empty)
    return hSize;  // didn't find element
  else 
    return h;      // found it
}


template <class T, class HashFun, class CompareEQ>
typename hash_set<T,HashFun,CompareEQ>::const_iterator
 hash_set<T,HashFun,CompareEQ>::
find (const T& element) const
{
  unsigned h0 = hash(element);
  unsigned h = find(element, h0);
  if (h == hSize)
    return end();
  else
    return const_iterator(table.begin()+h, table);
}


template <class T, class HashFun, class CompareEQ>
typename hash_set<T,HashFun,CompareEQ>::iterator
 hash_set<T,HashFun,CompareEQ>::
find (const T& element)
{
  unsigned h0 = hash(element);
  unsigned h = find(element, h0);
  if (h == hSize)
    return end();
  else
    return iterator(table.begin()+h, table);
}





#endif

1 个答案:

答案 0 :(得分:0)

您创建了一个所需大小的新向量,然后使用当前table的迭代器将HashEntry从后者插入到前者的相应索引中。 我注意到类保留了成员HashEntry::hash中的完整哈希码,因此不需要重新计算对象哈希值。 然后,您需要使用新向量替换table

请记住使用std::vector::swap以避免在std::vector内部进行不必要的幕后分配和复制。

除了hSizehSize_index上的一些内务管理外,我认为就是这样。

有一种方法可以重新分配现有table中的条目,我想您可能会对此感到疑惑。我不确定这样做有什么好处。