Python扩展类型:在不同线程上执行循环GC

时间:2018-12-03 06:38:04

标签: python c multithreading

我正在研究添加了多种扩展类型的Python扩展模块。其中一些类型包含循环引用,例如:

class Child:
    def __init__(self, parent):
        self.parent = parent

class Parent:
    def __init__(self):
        self.child = Child(self)

这本身并不是问题,因为python具有循环垃圾收集功能。

第三方库使用thread-local storage。其中一种扩展类型具有tp_new方法,该方法最终调用malloc并将返回的指针存储在线程本地存储中。此内存是通过类型的tp_dealloc方法清除的。在单线程python代码中使用此模块时,一切正常。在线程化python代码中使用模块时,tp_dealloc最终从不同于tp_new的线程中被调用,并且无法清理分配的内存。这是最小的复制品:

#include <Python.h>
#include "structmember.h"
#include <pthread.h>

#define log fprintf(stderr, "\t%s:%d - [thread_id='%p', refcnt='%ld']\n", __FUNCTION__, __LINE__, pthread_self(), Py_REFCNT(self));

/* [forward declarations] --------------------------------------------------*/
typedef struct _ChildObject_s ChildObject;
typedef struct _ParentObject_s ParentObject;
ChildObject *Child_new(ParentObject *);
static PyTypeObject ChildType;

/* [parent type definition] ------------------------------------------------*/
struct _ParentObject_s {
    PyObject_HEAD
    PyObject *childObject;
    pthread_t threadId;
};

static int
Parent_traverse(ParentObject *self, visitproc visit, void *arg)
{
    log
    Py_VISIT(self->childObject);
    return 0;
}

static int
Parent_clear(ParentObject *self)
{
    log
    Py_CLEAR(self->childObject);
    return 0;
}

static void
Parent_dealloc(ParentObject *self)
{
    log
    if (pthread_equal(self->threadId, pthread_self()) == 0) {
        fputs("delloc called from different thread than new\n", stderr);
        abort();
    }
    PyObject_GC_UnTrack(self);
    Parent_clear(self);
    Py_TYPE(self)->tp_free((PyObject *) self);
}

static PyObject *
Parent_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    ParentObject *self;
    self = (ParentObject *) type->tp_alloc(type, 0);
    if (self != NULL) {
        self->threadId = pthread_self();
    }

    log

    return (PyObject *) self;
}

static int
Parent_init(ParentObject *self, PyObject *args, PyObject *kwds)
{
    log
    self->childObject = (PyObject *) Child_new(self);
    log
    return 0;
}

static PyTypeObject ParentType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "custom.Parent",
    .tp_doc = "Parent objects",
    .tp_basicsize = sizeof(ParentObject),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    .tp_new = Parent_new,
    .tp_init = (initproc) Parent_init,
    .tp_dealloc = (destructor) Parent_dealloc,
    .tp_traverse = (traverseproc) Parent_traverse,
    .tp_clear = (inquiry) Parent_clear,
};

/* [child type definition] -------------------------------------------------*/
struct _ChildObject_s{
    PyObject_HEAD
    ParentObject *parentObject;
};

static int
Child_traverse(ChildObject *self, visitproc visit, void *arg)
{
    log
    Py_VISIT((PyObject *) self->parentObject);
    return 0;
}

static int
Child_clear(ChildObject *self)
{
    log
    Py_CLEAR(self->parentObject);
    return 0;
}

static void
Child_dealloc(ChildObject *self)
{
    log
    PyObject_GC_UnTrack(self);
    Child_clear(self);
    Py_TYPE(self)->tp_free((PyObject *) self);
}

ChildObject *
Child_new(ParentObject *parentObject)
{
    ChildObject *self;
    self = (ChildObject *) PyObject_GC_New(ChildObject, &ChildType);
    if (self != NULL) {
        Py_INCREF(parentObject);
        self->parentObject = parentObject;
        PyObject_GC_Track(self);
    }

    log

    return self;
}

static PyTypeObject ChildType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "custom.Child",
    .tp_doc = "Child objects",
    .tp_basicsize = sizeof(ChildObject),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    .tp_new = PyType_GenericNew,
    .tp_dealloc = (destructor) Child_dealloc,
    .tp_traverse = (traverseproc) Child_traverse,
    .tp_clear = (inquiry) Child_clear,
};

/* [module definition] -----------------------------------------------------*/

static PyModuleDef custommodule = {
    PyModuleDef_HEAD_INIT,
    .m_name = "custom",
    .m_doc = "Example module that creates an extension type.",
    .m_size = -1,
};

PyMODINIT_FUNC
PyInit_custom(void)
{
    PyObject *m;
    if (PyType_Ready(&ParentType) < 0)
        return NULL;

    if (PyType_Ready(&ChildType) < 0)
        return NULL;

    m = PyModule_Create(&custommodule);
    if (m == NULL)
        return NULL;

    Py_INCREF(&ParentType);
    PyModule_AddObject(m, "Parent", (PyObject *) &ParentType);

    Py_INCREF(&ChildType);
    PyModule_AddObject(m, "Child", (PyObject *) &ChildType);
    return m;
}

使用以下测试程序运行该程序时:

import custom
import threading


def use_custom():
    print("beginning of use_custom")
    c = custom.Parent()
    print("end of use_custom")

thread = threading.Thread(target=use_custom)
thread.start()
thread.join()

我得到以下输出:

beginning of use_custom
    Parent_new:58 - [thread_id='0x7f785e1abb10', refcnt='1']
    Parent_new:59 - [thread_id='0x7f785e1abb10', refcnt='1']
    Parent_init:67 - [thread_id='0x7f785e1abb10', refcnt='1']
    Child_new:129 - [thread_id='0x7f785e1abb10', refcnt='1']
    Parent_init:69 - [thread_id='0x7f785e1abb10', refcnt='2']
end of use_custom
    Parent_traverse:23 - [thread_id='0x7f785e750b88', refcnt='1']
    Child_traverse:96 - [thread_id='0x7f785e750b88', refcnt='1']
    Parent_traverse:23 - [thread_id='0x7f785e750b88', refcnt='1']
    Child_traverse:96 - [thread_id='0x7f785e750b88', refcnt='1']
    Parent_clear:31 - [thread_id='0x7f785e750b88', refcnt='2']
    Child_dealloc:112 - [thread_id='0x7f785e750b88', refcnt='0']
    Child_clear:104 - [thread_id='0x7f785e750b88', refcnt='0']
    Parent_dealloc:39 - [thread_id='0x7f785e750b88', refcnt='0']
delloc called from different thread than new
Aborted

通过将此del c; gc.collect()添加到use_custom()方法的末尾,可以消除错误。

  1. 是否有办法确保在创建对象的同一线程上对对象进行垃圾回收?
  2. 问题的根源似乎是子对象持有对父对象的引用,并将其引用保持为非零。我知道扩展类型可以包含弱引用列表。

    1. 对于弱引用,这是否是合适的用例?
    2. 如何从C代码添加/清除弱引用?文档说明

        

      tp_dealloc需要清除所有弱引用(通过调用PyObject_ClearWeakRefs()),如果该字段不是NULL

      但没有提到如何首先在列表中添加弱引用

0 个答案:

没有答案