Question

我有一个大的查找表，目前每个条目需要12位。是否有一个标准类将为我提供一个内存有效的容器来存储奇数大小的数据？我在表中有大约10亿个项目，所以我更关心内存效率而不是速度。

我需要能够获取底层数据并将其读/写到文件中。

Answer 1

你看过Boost::dynamic_bitset了吗？我并不是说它会成为所有梦想，结束你所有的梦想，但它可以帮助你了解你所描述的一些特征。它与标准库的bitset非常相似，只有可调整大小的选项。

我可能不会尝试单独使用它来解决您的问题。相反，我可能会将它与另一个容器类结合使用，并将其与某种映射方案结合使用。我不知道什么类型的映射，因为它将取决于数据和周期的频率。但是，更多地考虑这个：

std::vector<std::bitset<12> > oneBillionDollars; //Austin Powers, my hero!

Answer 2

这个怎么样：

#include <stdio.h>

typedef unsigned char byte;
typedef unsigned short word;
typedef unsigned int uint;
typedef unsigned long long int qword;

enum {
  bits_per_cell = 12, cellmask = (1<<bits_per_cell)-1,
  N_cells = 1000000,
  bufsize = (N_cells*bits_per_cell+7)/8,
};

byte* buf;

byte* Alloc( void ) {
  buf = new byte[bufsize];
  return buf;
};

// little-endian only
void put( uint i, uint c ) {
  qword x = qword(i)*bits_per_cell;
  uint  y = x&15, z = (x>>4)<<1;
  uint& a = (uint&)buf[z];
  uint mask = ~(cellmask<<y);
  a = a & mask | ((c&cellmask)<<y);
}

uint get( uint i ) {
  qword x = qword(i)*bits_per_cell;
  uint  y = x&15, z = (x>>4)<<1;
  uint& a = (uint&)buf[z];
  return (a>>y)&cellmask;
}

/* 

// bigendian/universal
void put( uint i, uint c ) {
  qword x = qword(i)*bits_per_cell;
  uint y = x&7, z = (x>>3);
  uint a = buf[z] + (buf[z+1]<<8) + (buf[z+2]<<16);
  uint mask = ~(cellmask<<y);
  a = a & mask | ((c&cellmask)<<y);
  buf[z] = byte(a); buf[z+1]=byte(a>>8); buf[z+2]=byte(a>>16);
}

uint get( uint i ) {
  qword x = qword(i)*bits_per_cell;
  uint  y = x&7, z = (x>>3);
  uint a = buf[z] + (buf[z+1]<<8) + (buf[z+2]<<16);
  return (a>>y)&cellmask;
}
*/

int main( void ) {

  if( Alloc()==0 ) return 1;

  uint i;

  for( i=0; i<N_cells; i++ ) put( i^1, i );

  for( i=0; i<N_cells; i++ ) {
    uint j = i^1, c, d; 
    c = get(j); d = i & cellmask;
    if( c!=d ) printf( "x[%08X]=%04X, not %04X\n", j,c,d );
  }

}

Answer 3

你有包装问题。我能得到的唯一想法就是你想找到N // General structure template <size_t N> class Pack { public: static size_t const Number = N; static size_t const Density = 0; // number of sets of N bits typedef char UnpackedType; // some integral UnpackedType Get(size_t i) const; // i in [0..Density) void Set(size_t i, UnpackedType t); // i in [0..Density) // arbitrary representation }; // Example, for 12 bits // Note: I assume that all is set, you'll have to pad... // but for a million one or two more should not be too much of an issue I guess // if it is, the table shall need one more data member, which is reasonnable class Pack12 { public: typedef uint16_t UnpackedType; static size_t const Number = 12; static size_t const Density = 4; UnpackedType get(size_t i) const; void set(size_t i, UnpackedType t); private: uint16_t data[3]; };和2的幂。不会那么容易，但绝对可行。

此外，您无法真正操纵一些奇怪的大小数据，因此您需要将其打包为更大的整数类型。该表将包含打包的数据，但“访问者”将生成一个解压缩的数据。

template <typename Pack>
class Table
{
public:
  typedef typename Pack::UnpackedType UnpackedType;

  bool empty() const;
  size_t size() const;

  UnpackedType get(size_t i) const;
  void set(size_t i, UnpackedType t);

private:
  static size_t const NumberBits = Pack::Number;
  static size_t const Density = Pack::Density;

  std::deque<Pack> data;
};

template <typename Pack>
bool Table<Pack>::empty() const { return data.empty(); }

template <typename Pack>
size_t Table<Pack>::size() const { return data.size() * Density; }

template <typename Pack>
typename Table<Pack>::UnpackedType Table<Pack>::get(size_t i) const
{
  Pack const& pack = data.at(i / Density);
  return pack.get(i % Density);
}

// Table<Pack>::set is the same

现在我们可以在此基础上构建一个适用于任何包的通用表：

Pack<N>

Pack更聪明的方法是能够推断出getter和表示......但是它似乎不值得努力，因为Table接口是最小的{{1可以提供更丰富的界面而不需要更多。

C ++任意宽度容器

3 个答案: