我的程序是否导致我的内存不足(被杀死)错误?

时间:2019-06-11 07:14:56

标签: c++ tensorflow memory out-of-memory ram

我已经使用Tensorflow的C API编写了自己的代码,以在C ++ Fluid Dynamics模拟程序中进行推理(=使用经过训练的人工神经网络)。但是在某些时候,计算会停止并给我这个错误:

mpirun noticed that process rank 10 with PID 0 on node node134 exited on signal 9 (Killed).

我同时注意到,这可能是由于没有剩余内存的事实而发生的:在计算停止的那一刻,RAM和Swp都被完全占用。

我不明白为什么会这样。但是自从程序运行无错误以来,我唯一更改的就是添加到其中的代码。

在流体动力学软件中,我对此进行了编程:

auto t_start_0 = std::chrono::high_resolution_clock::now();
const char* frozenGraphName = "/home/elias/Lr75-57_FPVANN_premix/data/FPV_ANN_tabulated_Standard_500.pb";
const char* inputOperationName = "input_1";
const char* outputOperationName = "dense_2/BiasAdd";
int no_of_inputs = in_mean.size();
int no_of_outputs = out_mean.size();
int cellsAndPatches = (input_f_zeta_PVNorm.size())/no_of_inputs;
std::vector<int64_t> input_dimensions = {cellsAndPatches,no_of_inputs};
std::vector<int64_t> output_dimensions = {cellsAndPatches,no_of_outputs};

Inference* inf = new Inference(frozenGraphName,inputOperationName,outputOperationName,no_of_inputs,no_of_outputs,input_dimensions,output_dimensions,cellsAndPatches);
output_real = inf->doInference(input_f_zeta_PVNorm);
delete inf;
auto t_end_0 = std::chrono::high_resolution_clock::now();
auto total_0 = std::chrono::duration<float, std::milli>(t_end_0 - t_start_0).count();
std::cout << "TOTAL INFERENCE TIME C API: " << total_0 << std::endl;

我的类推断的构造函数如下:

Inference::Inference(const char* fgn, const char* iname, const char* oname, int nIn, int nOut, std::vector<int64_t> dimIn,std::vector<int64_t> dimOut, int CP):no_input_sizes(nIn),no_output_sizes(nOut),noCellsPatches(CP)
{
    TF_Buffer* graph_def = read_file(fgn);
    graph = TF_NewGraph();
    status = TF_NewStatus();
    TF_ImportGraphDefOptions* graph_opts = TF_NewImportGraphDefOptions();
    TF_GraphImportGraphDef(graph, graph_def, graph_opts, status);
    if(TF_GetCode(status)!=TF_OK)   
    {
        std::cout << "ERROR: Unable to import graph " << TF_Message(status) << std::endl;
    }

    num_bytes_in = noCellsPatches*no_input_sizes*sizeof(float);
    num_bytes_out = noCellsPatches*no_output_sizes*sizeof(float);
    in_dims = dimIn;
    out_dims = dimOut;
    in_name =  strdup(iname);
    out_name = strdup(oname);

    TF_DeleteImportGraphDefOptions(graph_opts);
    TF_DeleteBuffer(graph_def);
}

doInference方法如下:

std::vector<float> Inference::doInference(std::vector<float> inVals)
{   
    assert((inVals.size()%no_input_sizes)==0);
    std::cout << "EFFECTIVE BATCH SIZE: " << inVals.size() << std::endl;

    float **normalizedInputs = new float* [noCellsPatches]; // allocate pointers
    normalizedInputs[0] = new float [noCellsPatches*no_input_sizes]; // allocate data

    // set pointers
    for (int i = 1; i < noCellsPatches; ++i) {
        normalizedInputs[i] = &normalizedInputs[i-1][no_input_sizes];
    }


    for(int i=0;i<noCellsPatches;i++)
    {
        for(int j=0;j<no_input_sizes;j++)
        {
            normalizedInputs[i][j]=inVals.at(no_input_sizes*i+j);
        }
    }

    const char* iname = in_name;
    TF_Operation* input_op = TF_GraphOperationByName(graph,iname);     // assure string value is correct by viewing the frozen graph in Tensorboard
    TF_Output input = {input_op,0};
    inputs = &input;
    assert(inputs!=0);

    const char* oname = out_name;
    TF_Operation* output_op = TF_GraphOperationByName(graph,oname);   // assure string value is correct by viewing the frozen graph in Tensorboard
    TF_Output output = {output_op,0};
    outputs = &output;
    assert(outputs!=0);

    int64_t in_dims_arr[] = {noCellsPatches,no_input_sizes};

    TF_Tensor* input_value = TF_NewTensor(TF_FLOAT,in_dims_arr,2,&normalizedInputs[0][0],num_bytes_in,&Deallocator, 0); // normalizedInputs at Arg 4 before
    TF_Tensor* const input_value_const = input_value; // const pointer to TF_Tensor
    TF_Tensor* const* input_values = &input_value_const; // pointer to const pointer to TF_Tensor
    assert(input_values!=0);

    int64_t out_dims_arr[] = {noCellsPatches,no_output_sizes};

    TF_Tensor* output_value = TF_AllocateTensor(TF_FLOAT, out_dims_arr, 2, num_bytes_out); // pointer to TF_Tensor  //Arg2!
    TF_Tensor** output_values = &output_value;  // pointer to pointer to TF_Tensor
    assert(output_values!=0);

    std::cout << "Running session..." << std::endl;
    TF_SessionOptions* sess_opts = TF_NewSessionOptions();

    int limitCPUThreads = 1; // if you want to limit the inference to a number of CPU Threads you can do that here
    int limitNumberOfCPUs = 0;

    if((limitCPUThreads!=0)&&(limitNumberOfCPUs!=0))
    {
        std::cout << "ERROR! You cannnot limit both number of CPUs and number of threads!" << std::endl;
    }
    if((limitCPUThreads!=0)&&(limitNumberOfCPUs==0))
    {
        std::cout << "WARNING! You are limiting CPU inference to " << limitCPUThreads << " CPU Thread(s) / Core(s)!" << std::endl;

        uint8_t intra_op_parallelism_threads = limitCPUThreads; // for operations that can be parallelized internally, such as matrix multiplication 
        uint8_t inter_op_parallelism_threads = limitCPUThreads; // for operationss that are independent in your TensorFlow graph because there is no directed path between them in the dataflow graph
        uint8_t config[]={0x10,intra_op_parallelism_threads,0x28,inter_op_parallelism_threads};
        TF_SetConfig(sess_opts,config,sizeof(config),status);
        if (TF_GetCode(status) != TF_OK)
        {
            printf("ERROR: %s\n", TF_Message(status));
        }

    }
    if((limitCPUThreads==0)&&(limitNumberOfCPUs!=0))  // HIER SCHEINT NOCH ETWAS NICHT ZU STIMMEN!
    {
        std::cout << "WARNING! You are limiting CPU inference to " << limitNumberOfCPUs << " CPU(s)!" << std::endl;
        uint8_t numberOfCPUs = limitNumberOfCPUs;
        uint8_t config[] = {0xa, 0x7, 0xa, 0x3, 0x43, 0x50, 0x55, 0x10, 0x01};
        std::cout << config << std::endl;
        TF_SetConfig(sess_opts,config,sizeof(config),status);
        if (TF_GetCode(status) != TF_OK)
        {
            printf("ERROR: %s\n", TF_Message(status));
        }
    }

    TF_Session* session = TF_NewSession(graph, sess_opts, status);
    assert(TF_GetCode(status)==TF_OK);
    auto t_start = std::chrono::high_resolution_clock::now();
    TF_SessionRun(session,nullptr,inputs,input_values,1,outputs,output_values,1,nullptr,0,nullptr,status);
    auto t_end = std::chrono::high_resolution_clock::now();
    auto total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
    std::cout << "time required for inference: " << total << std::endl;
    float* out_vals = static_cast<float*>(TF_TensorData(*output_values));

    std::vector<float> results(no_output_sizes*noCellsPatches,0);
    for(int i=0;i<noCellsPatches;i++)
    {
        for(int j=0;j<no_output_sizes;j++)
        {
            results.at(i*no_output_sizes+j) = *out_vals;
            out_vals++;
        }
    }

    std::cout << "Successfully ran session!" << std::endl;

    TF_CloseSession(session,status);
    TF_DeleteSession(session,status);
    TF_DeleteSessionOptions(sess_opts);

    delete [] normalizedInputs[0];
    delete [] normalizedInputs;
    return results;
}

是否存在我不认识的某种内存泄漏?还是它可以运行数百个时间步然后崩溃的原因是什么?

谢谢!

0 个答案:

没有答案