Question

我尝试实现自定义allocator以在std::vector中存储内存映射文件。由boost::iostreams::mapped_file

执行的文件映射

文件内存映射的分配器类型：

template<typename T>
class mmap_allocator 
{
public:
  typedef T value_type;

  mmap_allocator(const std::string& filename) 
  : _mmfile(filename) {  } 

  T* allocate (size_t n) 
  { 
     return reinterpret_cast<T*>(_mmfile.data());
  }
  void deallocate (T* p, size_t n) 
  { 
     p = nullptr;
     _mmfile.close();
  }

private:
  boost::iostreams::mapped_file _mmfile;
};

内存映射文件的容器，基于std::vector：

//Get file size
long GetFileSize(std::string filename)
{
    FILE *p_file = NULL;
    p_file = fopen(filename.c_str(),"rb");
    fseek(p_file,0,SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template<typename T>
class mm_vector : public std::vector<T, mmap_allocator<T> >
{
public:
  typedef mmap_allocator<T> allocator_type;
  typedef std::vector<T, allocator_type > b_vector;

  mm_vector(const std::string filename) : b_vector(GetFileSize(filename)/sizeof(T), allocator_type(filename)) 
  {  
    b_vector::reserve(GetFileSize(filename)/sizeof(T));
  }
};

测试代码：

int main()
{
  mm_vector<int> v("test.f");//test.f - binary file contain several integers
  for(auto x : v) std::cout<<x<<"  ";
}

此代码无法正常工作 - 输出始终等于零。文件包含正确的内容 - 几个整数。这段代码效果很好：

boost::iostreams::mapped_file _mmfile("test.f");
int* p = (int*)(_mmfile.data());
std::cout<<p[0];

我做错了什么？

Answer 1

问题是零初始化，调用接收大小的构造函数，分配器会将向量元素初始化为元素类型的默认值（在本例中为0）。这是标准规定的。

23.3.7.2向量构造函数，复制和赋值[vector.cons] §23.3.7.2789

explicit vector(size_type n, const Allocator& = Allocator());

-Effects：使用指定的分配器构造一个带有n个缺省插入元素的向量 - 要求：T应该是DefaultInsertable到* this -Complexity：n。中的线性

在我的情况下，使用过的文件也填充了0。在GCC 4.9.0中测试。有逻辑，因为mapped_file的默认mapmode是readwrite。

在示例代码中，我在分配发生时（在自定义分配器中），在构造向量中添加了映射内存内容的打印并且已存在在主要打印。第一个 print 输出文件的正确数据，第二个输出归零版本。

#include <vector>
#include <iostream>
#include <chrono>
#include <iomanip>
#include <boost/iostreams/device/mapped_file.hpp>

template <typename T>
class mmap_allocator {
public:
    typedef T value_type;

    mmap_allocator(const std::string& filename) : _mmfile(filename) {}

    T* allocate(size_t n) {
        std::cout << "OUTPUT 1:" << std::endl;
        auto v = reinterpret_cast<T*>(_mmfile.data());
        for (unsigned long idx = 0; idx < _mmfile.size()/sizeof(int); idx++)
            std::cout << v[idx] << " ";
        return reinterpret_cast<T*>(_mmfile.data());
    }
    void deallocate(T* p, size_t n) {
        p = nullptr;
        _mmfile.close();
    }

private:
    boost::iostreams::mapped_file _mmfile;
};

// Get file size
long GetFileSize(std::string filename) {
    FILE* p_file = NULL;
    p_file = fopen(filename.c_str(), "rb");
    fseek(p_file, 0, SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template <typename T>
class mm_vector : public std::vector<T, mmap_allocator<T>> {
public:
    typedef mmap_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type> b_vector;

    mm_vector(const std::string filename)
        : b_vector(GetFileSize(filename) / sizeof(T),
                   allocator_type(filename)) {
        std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
        for (auto x : *this)
            std::cout << x << "  ";
        b_vector::reserve(GetFileSize(filename) / sizeof(T));
    }
};

int main(int argc, char* argv[]) {
    std::chrono::system_clock::time_point begin_time =
        std::chrono::system_clock::now();

    mm_vector<int> v("H:\\save.txt");
    std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
    for (auto x : v)
        std::cout << x << "  ";

    std::chrono::system_clock::time_point end_time =
        std::chrono::system_clock::now();
    long long elapsed_miliseconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(
            end_time - begin_time).count();
    std::cout << "Duration (min:seg:mili): " << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds / 60000) << ":"
              << std::setw(2) << std::setfill('0')
              << ((elapsed_miliseconds / 1000) % 60) << ":" << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds % 1000) << std::endl;
    std::cout << "Total milliseconds: " << elapsed_miliseconds << std::endl;

    return 0;
}

Answer 2

您可能想要提供

https://github.com/johannesthoma/mmap_allocator

试一试。它使用mmap的文件内容作为向量的后备存储，并且是LGPL，因此您应该能够在项目中使用它。请注意，目前，gcc是一项要求，但可以轻松扩展。

Answer 3

要明确显示NetVipeC答案的建议（在约翰内斯·托马（Johannes Thoma）建议的mmap_allocator库的帮助下），如果您使用的是GNU标准C ++库，则对mm_vector类的以下替换将阻止内存映射向量的内容从初始化为零（并消除了对GetFileSize函数的需要）：

template <typename T>
class mm_vector : public std::vector<T, mmap_allocator<T>> {
public:
    typedef mmap_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type> b_vector;

    mm_vector(const std::string filename)
        : b_vector(allocator_type(filename)) {

        allocator_type * a = &b_vector::_M_get_Tp_allocator();
        size_t n = a->size() / sizeof(T);
        b_vector::reserve(n);
        // _M_set_finish(n);
        this->_M_impl._M_finish = this->_M_impl._M_end_of_storage = this->_M_impl._M_start + n;
    }
};

我们通过允许将向量的默认大小初始化为0来防止向量的内容被清零，然后再用其内部进行调整以调整大小。这不太可能是一个完整的解决方案。例如，我尚未检查更改矢量大小的操作是否正常工作。

存储器映射文件存储在stl向量中

3 个答案: