Cython:堆栈分配对象的赋值运算符导致段错误

时间:2016-01-25 17:12:05

标签: python c++ cython

我是Cython的新手,但我对它承诺的C / C ++和Python之间的流畅接口非常感兴趣。我遇到了一个我自己无法解决的问题。

执行摘要

当我尝试使用另一个的值分配在Cython中包装的堆栈分配的C ++对象时,我得到一个段错误。我不明白为什么会这样。

详情

我正在使用带有类的代码库来读取和写入某些数据的二进制序列化的自定义格式。下面是从二进制文件中读取的C ++代码的最小重新实现。 Datastore类是二进制数据文件的主要接口。它使用类似于生成器的对象(实现为子类Datastore<T>::generator)来读取包含二进制数据的文件,直到文件耗尽为止。单个记录被输入为内部类Datastore<T>::record

// datastore.hpp:  a minimal implementation of the custom binary reader

#include <fstream>
#include <string>
#include <memory>
#include <stdexcept>

template <typename T>
class Datastore
{

public:
    class generator;
    class record;
    class end_of_datastore_error;

    Datastore(
        std::string  _datastoreFileName
    ):
        datastoreFileName(_datastoreFileName)
    {
    }

    typename Datastore<T>::generator begin()
    {
        return generator(datastoreFileName, false);
    }

    typename Datastore<T>::generator end()
    {
        return generator(datastoreFileName, true);
    }

private:
    std::string  datastoreFileName;
};


template <typename T>
class Datastore<T>::record
{
public:
    record():
        data("")
    {
    }

    record(
        const std::string& _data
    ):
        data(_data)
    {
    }

    ~record() { }

    std::string getData() const { return data; }

private:
    std::string  data;
};


template <typename T>
class Datastore<T>::generator
{
public:
    generator():
        datastoreFileName(""),
        currentRecord(record()),
        done(false),
        data_stream(std::unique_ptr<std::ifstream>(nullptr))
    {
    }

    generator(std::string _datastoreFileName,
              bool        _done);

    ~generator() { if (data_stream->is_open()) data_stream->close(); }

    generator(const generator& rhs) = delete;
    generator& operator=(const generator& rhs) = delete;

    generator(generator&& rhs) = default;
    generator& operator=(generator&& rhs) = default;

    void next();

    bool operator==(const generator& rhs) {
      // equality holds if both generators are not done and point to the same record
      // or if they are both done
      return ((not done and not rhs.done
               and &currentRecord == &rhs.currentRecord)
              or (done and rhs.done));
    }

    bool operator!=(const generator& rhs) {
      return not ((not done and not rhs.done
                   and &currentRecord == &rhs.currentRecord)
                  or (done and rhs.done));
    }

    generator&& operator++();

    Datastore<T>::record& operator*() { return currentRecord; }

    Datastore<T>::record* operator->() { return &currentRecord; }

    std::string             datastoreFileName;
    Datastore<T>::record    currentRecord;
    bool                    done;

private:
    std::unique_ptr<std::ifstream>  data_stream;
};


template <typename T>
class Datastore<T>::end_of_datastore_error:
  virtual public std::ifstream::failure
{
public:
  end_of_datastore_error(const std::string& _msg):
    std::ifstream::failure(_msg)
  {}
};

//------------------------  Implementation  ----------------------------------

template <typename T>
Datastore<T>::generator::generator(
    std::string  _datastoreFileName,
    bool         _done
):
    datastoreFileName(_datastoreFileName),
    currentRecord(Datastore<T>::record()),
    done(_done),
    data_stream(std::unique_ptr<std::ifstream>(
            new std::ifstream(datastoreFileName,
                  std::ios::in | std::ios::binary)))
{
    data_stream->exceptions(
        std::ifstream::failbit | std::ifstream::badbit | std::ios::eofbit);

    next();
}

template <typename T>
typename Datastore<T>::generator&& Datastore<T>::generator::operator++()
{
    try
    {
        next();
    }
    catch (Datastore<T>::end_of_datastore_error& e)
    {
    }

    return std::move(*this);
}

template <typename T>
void Datastore<T>::generator::next()
{
    try
    {
      std::string line;
      std::getline(*data_stream, line);
      currentRecord = record(line);
    }
    catch (std::ifstream::failure e)
    {
        if (data_stream->eof())
        {
            done = true;  // flag used to compare to Datastore<T>::end()
            throw Datastore<T>::end_of_datastore_error("Done reading datastore");
        }
        else
        {
            throw e;
        }
    }
}

我想将这个库(用C ++编写)公开给主要使用Python的人。在Python中使用生成器构造似乎很自然,因此可以在Python中使用以下接口:

In [1]: from datastore import Datastore as D                                   
In [2]: d = D("example_data/data.dat")                                         
In [3]: for x in d.iteritems():
            some_func(x)  # for example

在我的iteritems方法的实现中(见下文),一切正常,除非我尝试将值赋给生成器类型的堆栈分配变量。这是我的Cython描述和实现文件。

#cdatastore.pxd
from libcpp.string cimport string

cdef extern from "cpp/datastore.hpp":
    cdef cppclass Datastore[T]:
        cppclass record:
            record()
            string getData()
        cppclass generator:
            generator()
            generator(generator&&)
            generator& operator=(generator&&)
            Datastore[T].record& operator*()
            generator&& operator++()

        Datastore(string)
        generator begin()
        generator end()


######################################
#datastore.pyx
cimport cdatastore

from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as inc

cdef class Datastore:
    cdef cdatastore.Datastore[int]* _c_datastore
    cdef cppclass generator
    cdef cppclass record

    def __cinit__(self, data_filename):
        self._c_datastore = new cdatastore.Datastore[int](data_filename)
        if self._c_datastore is NULL:
            raise MemoryError()

    def __dealloc__(self):
       if self._c_datastore is not NULL:
           del self._c_datastore

    def iteritems(self):
        print deref(self._c_datastore.begin()).getData()
        print deref(inc(self._c_datastore.begin())).getData()

        # to show
        print "a"
        cdef cdatastore.Datastore[int].generator g
        print "b"
        print deref(g).getData()
        print "c"
        self._c_datastore.begin()
        print "d"
        deref(self._c_datastore.begin())
        print "e"
        g = self._c_datastore.begin()
        print "f"

使用虚拟文件,内容为“数据行1 \ ndata line2(等)”,我真正看到的是:

In [1]: from datastore import Datastore as D                                   
In [2]: d = D("example_data/data.dat")                                         
In [3]: d.iteritems()
data line 1
data line 2
a
b

c
d
e
Segmentation fault: 11 (core dumped)

问题行是g = self._c_datastore.begin(),我认为这意味着我的问题来自generator& operator=(generator&&)上的包装,但我无法解析确切的原因并找到解决方案。任何帮助将不胜感激!

1 个答案:

答案 0 :(得分:1)

我认为这是>>> from itertools import zip_longest >>> >>> f = ['foo', 'btextr', 'cool', 'monsttex'] >>> >>> ['_'.join(i) for i in zip_longest(f, f[1:], fillvalue='NA')] ['foo_btextr', 'btextr_cool', 'cool_monsttex', 'monsttex_NA'] 中的一行:

~generator()

你在空指针上调用函数(因为你在{ if (data_stream->is_open()) data_stream->close(); } 构造函数中初始化了data_streamnullptr。)