Question

所以我试图加速我的python脚本从文件加载数据并将其存储在数组中。但是我意识到如果我在1020次左右重复这个程序，不要问我为什么会出现分段错误。加载数据的代码是：

import os,sys
import numpy as np
import pandas as pd
import ctypes as ct

VERSION = 0.1

try:
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "_vec")
except NameError:
    path = "./_im7"

if not(sys.platform in ('win32', 'cygwin')):
    path += '.so.'+str(VERSION)
    libfunctions = ct.cdll.LoadLibrary(path)
else:
    libfunctions = ct.cdll.LoadLibrary(path)

libfunctions.readvec.restype = ct.c_void_p
libfunctions.readvec.argtypes = [ct.c_char_p, np.ctypeslib.ndpointer(ct.c_float), \
    ct.c_int,ct.c_int,ct.c_int]


def readvecCT(filename,nx,ny,nz):
    # we are supposing for the moment that the naming scheme PIV__vxy.case PIV__vxy.geo not changes should that
    # not be the case appropriate changes have to be made to the corresponding file

    # ctypes
    data_temp = np.zeros((2*ny*nx,1),dtype=np.dtype('f4'))
    libfunctions.readvec(ct.c_char_p(filename),data_temp,ct.c_int(nx), ct.c_int(ny),ct.c_int(3))

    # W value
    if (nz)>1:
        return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:2*nx*ny].reshape(ny, nx), data_temp[2*nx*ny:].reshape(ny, nx)
    else:    
        return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:].reshape(ny, nx)

底层C函数：

#include <stdio.h>


void readvec(const char *fname, float *data,  int nx, int ny,int skiprows) {
    //void cfun(const double * indata, int rowcount, int colcount, double * outdata) {
    int i,j,check;
    size_t length;
    FILE *file;
    char buffer[1024];
    char *buffer_ptr = &buffer[0];
    //puts("Here we go!");

    file = fopen(fname, "r");
    //printf("Nx: %d Ny: %d skiprows: %d \n",nx,ny,skiprows);
    //printf("Filename %s \n",fname);
    for (i=0;i<=skiprows;i++){
    check=getline(&buffer_ptr,&length,file);
    //printf("buffer: %s \n",buffer);
    if (check==-1){
        puts("ERROR");
    }
    }

    for (i = 0; i < ny; i++) {
    for (j=0;j<nx;j++){
        check=fscanf(file,"%f",&data[i*nx+j]);
        //printf("Data %s\n",buffer);
        if (check==-1){
        puts("ERROR");
        }
    }
    }

    for (i = 0; i < ny; i++) {
    for (j=0;j<nx;j++){
        check=fscanf(file,"%f",&data[ny*nx+i*nx+j]);
        if (check==-1){
        puts("ERROR");
        }
    }
    }

    //fclose(file);
    //puts("Done!");
}

和产生错误的简单测试：

import time
import numpy as np
import libvec.libvec as vec
import matplotlib.pyplot as plt


tmp_geo = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.geo'
tmp_file = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy_01019.vec'
tmp_case = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.case'

x,y,z = vec.readgeo(tmp_geo)
nx = len(x)
ny = len(y)
nz = len(z)
iterations = 1100

start_time = time.time()

for i in range(iterations):
    U,V = vec.readvecCT(tmp_file,nx,ny,nz);

print("Ctypes --- %f seconds ---" % ((time.time() - start_time)/iterations))

现在，当迭代次数为1000时，没有问题发生，但在1100，我得到了分段错误。我想它与内存管理有关但是我不知道如何解决它甚至从哪里开始！任何帮助将不胜感激。

提前很多

J

Answer 1

原来：

fclose(file)

被评论，所以这不会直接导致错误，但同时它会在n次迭代后导致段错误

非常感谢大家！

Ctypes分段错误

1 个答案: