我正在为计算密集型算法开发Python C扩展,出于优化目的,我使用pthreads来并行执行C代码。在每个线程中,我经过一些处理后更新了一个字典。字典在线程之间共享。
问题是当我使用1个线程运行时,它运行正常,但是当我使用多个线程时,它会给我一个奇怪的输出,如下所示:
{0: 5, 1: 3, 249: 1, 253: 4, 254: 3, 255: 4, 256: 4, <refcnt -7 at 0x104036e10>: 1, <refcnt -7 at 0x104036e10>: 5, <refcnt -8 at 0x104036e10>: 4, <refcnt -7 at 0x104036e10>: 8, <refcnt -7 at 0x104036e10>: 5, <refcnt -7 at 0x104036e10>: 4, <refcnt -7 at 0x104036e10>: 6, <refcnt -7 at 0x104036e10>: 3, <refcnt -7 at 0x104036e10>: 2, <refcnt -7 at 0x104036e10>: 2, <refcnt -7 at 0x104036e10>: 3, <refcnt -7 at 0x104036e10>: 3, <refcnt -8 at 0x104036e10>: 1, <refcnt -7 at 0x104036e10>: 3, <refcnt -7 at 0x104036e10>: 5, <refcnt -7 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 7, <refcnt 0 at 0x104036e10>: 3, <refcnt 0 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 7, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 1, <refcnt 0 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 1, <refcnt 0 at 0x104036e10>: 4, <refcnt 0 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 1, <refcnt 0 at 0x104036e10>: 2, <refcnt 0 at 0x104036e10>: 5, <refcnt 0 at 0x104036e10>: 6, -5: 1, -4: 1, -3: 5, -1: 3}
有没有人有任何想法,为什么它使用超过1个线程,并且当只使用1个线程时运行正常?
注意:我使用过互斥锁并经过测试,因此竞争条件非常不可能。
这是每个线程运行的函数。
static PyObject*
threaded_delt_freq(void *args)
{
PyDictObject * n_dict = NULL;
struct ThreadData * recvd_data = (struct ThreadData *)args;
if(recvd_data != NULL)
{
n_dict = glob_large_dict;
PyObject *matched_dic = PyDict_GetItemString(n_dict, "Matched Dict");
PyObject *strky_time_dict = PyDict_GetItemString(n_dict, "strky TIme Dict");
PyListObject *py_big_list = PyDict_GetItemString(n_dict, "big_lst");
int st = recvd_data->start;
int stp = recvd_data->stop;
if(py_big_list != NULL)
{
PyListObject * matched_dic_keys = PyDict_Keys(matched_dic);
int client_num_strky = PyList_GET_SIZE(matched_dic_keys);
int client_strky_iter = 0;
for(client_strky_iter =st ; client_strky_iter < stp; client_strky_iter+=1)
{
if(client_strky_iter < client_num_strky)
{
PyObject *curr_client_strky = PyList_GetItem(matched_dic_keys, client_strky_iter);
PyListObject *curr_strky_t_list = PyDict_GetItem(strky_time_dict, curr_client_strky);
int t_list_size = PyList_GET_SIZE(curr_strky_t_list);
int curr_t_list_iter = 0;
PyListObject *zipped_list = PyDict_GetItem(matched_dic, curr_client_strky);
int zipped_list_iter = 0;
int zipped_list_size = PyList_GET_SIZE(zipped_list);
for(curr_t_list_iter = 0; curr_t_list_iter < t_list_size; curr_t_list_iter+=1)
{
PyIntObject * curr_t_obj = PyList_GetItem(curr_strky_t_list, curr_t_list_iter);
long curr_t_val = PyInt_AsLong(curr_t_obj);
for(zipped_list_iter=0; zipped_list_iter < zipped_list_size; zipped_list_iter+=1)
{
PyTupleObject *loc_obj_tuple = PyList_GetItem(zipped_list, zipped_list_iter);
PyObject * t_db_loc_obj = PyTuple_GetItem(loc_obj_tuple, 0);
char *t_str = PyString_AsString(t_db_loc_obj);
char *s_str = PyString_AsString(obj_id_obj);
long t_db_loc = (long)atoi(t_str);
long obj_id = (long)atoi(s_str);
if((obj_id-1) < PyList_GET_SIZE(py_big_list))
{
pthread_mutex_lock(&mutexes[obj_id-1]);
PyIntObject * diff_val_check = PyInt_FromLong((long)(t_db_loc - curr_t_val));
PyIntObject * delt_val = PyDict_GetItem(obj_time_dict, diff_val_check);
Py_DECREF(diff_val_check);
if(delt_val != NULL)
{
PyIntObject * delt_new_val = PyInt_FromLong(delt_long_val);
int rslt = PyDict_SetItem(obj_time_dict, delt_tmp_key , delt_new_val );
Py_DECREF(delt_tmp_key);
}
else{
PyIntObject * delt_tmp_key = PyInt_FromLong((long)(t_db_loc - curr_t_val));
int rslt = PyDict_SetItem(obj_time_dict, delt_tmp_key , PyInt_FromLong((long)1));
Py_DECREF(delt_tmp_key);
}
pthread_mutex_unlock(&mutexes[obj_id-1]);
}
}
}
}
}
}
}
return n_dict;
}
其中:
strky_time_dict:是一个字典,带有字符串键和整数值列表
matched_dic:是一个字典,带有字符串键,压缩列表为值
py_big_list:是字典列表,(这些字典正在更新)
互斥:是pthread_mutex_t的数组,在程序开始时使用pthread_mutex_init()调用初始化。