所以我试图加速我的python脚本从文件加载数据并将其存储在数组中。但是我意识到如果我在1020次左右重复这个程序,不要问我为什么会出现分段错误。加载数据的代码是:
import os,sys
import numpy as np
import pandas as pd
import ctypes as ct
VERSION = 0.1
try:
path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "_vec")
except NameError:
path = "./_im7"
if not(sys.platform in ('win32', 'cygwin')):
path += '.so.'+str(VERSION)
libfunctions = ct.cdll.LoadLibrary(path)
else:
libfunctions = ct.cdll.LoadLibrary(path)
libfunctions.readvec.restype = ct.c_void_p
libfunctions.readvec.argtypes = [ct.c_char_p, np.ctypeslib.ndpointer(ct.c_float), \
ct.c_int,ct.c_int,ct.c_int]
def readvecCT(filename,nx,ny,nz):
# we are supposing for the moment that the naming scheme PIV__vxy.case PIV__vxy.geo not changes should that
# not be the case appropriate changes have to be made to the corresponding file
# ctypes
data_temp = np.zeros((2*ny*nx,1),dtype=np.dtype('f4'))
libfunctions.readvec(ct.c_char_p(filename),data_temp,ct.c_int(nx), ct.c_int(ny),ct.c_int(3))
# W value
if (nz)>1:
return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:2*nx*ny].reshape(ny, nx), data_temp[2*nx*ny:].reshape(ny, nx)
else:
return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:].reshape(ny, nx)
底层C函数:
#include <stdio.h>
void readvec(const char *fname, float *data, int nx, int ny,int skiprows) {
//void cfun(const double * indata, int rowcount, int colcount, double * outdata) {
int i,j,check;
size_t length;
FILE *file;
char buffer[1024];
char *buffer_ptr = &buffer[0];
//puts("Here we go!");
file = fopen(fname, "r");
//printf("Nx: %d Ny: %d skiprows: %d \n",nx,ny,skiprows);
//printf("Filename %s \n",fname);
for (i=0;i<=skiprows;i++){
check=getline(&buffer_ptr,&length,file);
//printf("buffer: %s \n",buffer);
if (check==-1){
puts("ERROR");
}
}
for (i = 0; i < ny; i++) {
for (j=0;j<nx;j++){
check=fscanf(file,"%f",&data[i*nx+j]);
//printf("Data %s\n",buffer);
if (check==-1){
puts("ERROR");
}
}
}
for (i = 0; i < ny; i++) {
for (j=0;j<nx;j++){
check=fscanf(file,"%f",&data[ny*nx+i*nx+j]);
if (check==-1){
puts("ERROR");
}
}
}
//fclose(file);
//puts("Done!");
}
和产生错误的简单测试:
import time
import numpy as np
import libvec.libvec as vec
import matplotlib.pyplot as plt
tmp_geo = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.geo'
tmp_file = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy_01019.vec'
tmp_case = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.case'
x,y,z = vec.readgeo(tmp_geo)
nx = len(x)
ny = len(y)
nz = len(z)
iterations = 1100
start_time = time.time()
for i in range(iterations):
U,V = vec.readvecCT(tmp_file,nx,ny,nz);
print("Ctypes --- %f seconds ---" % ((time.time() - start_time)/iterations))
现在,当迭代次数为1000时,没有问题发生,但在1100,我得到了分段错误。我想它与内存管理有关但是我不知道如何解决它甚至从哪里开始!任何帮助将不胜感激。
提前很多
J
答案 0 :(得分:1)
原来:
fclose(file)
被评论,所以这不会直接导致错误,但同时它会在n次迭代后导致段错误
非常感谢大家!