在递归函数上使用unordered_map unordered_set进行分段错误

时间:2016-05-01 16:21:30

标签: c++ pointers memory recursion unordered-map

我是比较新的编程c ++。我正在使用unorderd_map为树数据结构的实现实现一个类似于索引的树,以存储子节点。当我使用树状结构时,构造搜索方法是递归的,我也存储节点的指针,所以我怀疑我可能有一种处理不当的内存问题。我遇到了分段错误。接下来是我的代码及其输出。

#include <memory>
#include <sstream>
#include <unordered_map>
#include <iostream>
#include <string>
#include <sqlite3.h>
#include "aux_functions.cpp"
#include <math.h>

using namespace std;


class TreeLikeIndex
{
  public:
    TreeLikeIndex(string attribute, string indices, int indices_count, short int is_leaf, unordered_map<string, TreeLikeIndex*> children);
    TreeLikeIndex(string indices, int indices_count);
    TreeLikeIndex();
    string search(unordered_map<string, string> *);
  private:
     string indices;
     int indices_count;
     short int is_leaf;
     string attribute;
     unordered_map<string, TreeLikeIndex*> children;

};


string TreeLikeIndex::search(unordered_map<string, string> * _tuple)
{
  if((*_tuple).empty() || this->is_leaf ) return this->indices;
  string att_val = (*_tuple)[this->attribute];  
  (*_tuple).erase(this->attribute);
  TreeLikeIndex * child_with_that_value = this->children[att_val];
  return (*child_with_that_value).search(_tuple);
}



class DecisionTreeLikeIndexer
{

 public:
    DecisionTreeLikeIndexer(string, string, string);
    int rebuild_index();   
    TreeLikeIndex * get_index();

 private:
    TreeLikeIndex * build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count);
    TreeLikeIndex * index;
    string source_db_address;
    string dest_folder_address;
    time_t time_of_last_build;
    unordered_set<string> columns_names;
    string source_table_name;
    unordered_set<string> temp_tables_names;
    string id_column_name;
    sqlite3 * source_db_connection;
    int table_count;
};

int DecisionTreeLikeIndexer::rebuild_index()
{
  this->index = this->build_index(this->columns_names, 0, "", 0);
  this->time_of_last_build = time(NULL);
  return 0;
}

TreeLikeIndex *  DecisionTreeLikeIndexer::get_index()
{
  return this->index; 
}

DecisionTreeLikeIndexer::DecisionTreeLikeIndexer(string source_db_address, string table_name, string dest_folder_address)
{
  this->source_db_address = source_db_address;
  this->dest_folder_address = dest_folder_address;
  this->columns_names = Aux::get_column_names(source_db_address, table_name); 
  this->source_table_name = table_name;
  this->id_column_name = "rowid";
  this->source_db_connection = Aux::get_db_connection(this->source_db_address);

  // Getting count of this table

  sqlite3_stmt* statement;
  string query = "SELECT count(*) FROM " + this->source_table_name + ";";
  if(sqlite3_prepare(this->source_db_connection, query.c_str(), -1, &statement, 0) == SQLITE_OK)
   {
    int res = sqlite3_step(statement);
    const unsigned char * count_char = sqlite3_column_text(statement,0);
    if(res == SQLITE_ROW)
     {
      stringstream _temp;
      _temp << count_char;
      _temp >> this->table_count;      
     }
     sqlite3_finalize(statement);     
   }
  else
   {
    cout <<  "Error initializating Indexer (Getting initial table count): " << sqlite3_errmsg(this->source_db_connection) << endl;
   }

}

TreeLikeIndex *  DecisionTreeLikeIndexer::build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count)
{

  if( attributes_list.size() <=1 || (depth > 0 && ids_list_count <= 1))
   {
     Aux::tabs(depth);
     cout << "Leaf at depth: " << depth << " Ids are: " << comma_separated_ids << " Ids count: " << ids_list_count << endl; 
     static TreeLikeIndex * node = new TreeLikeIndex((string)comma_separated_ids, (int)ids_list_count);
     return node;
   }

  string source_table = this->source_table_name;
  int count = this->table_count;

  if(depth > 0)
  {
    while(1)
    {
     source_table = *Aux::get_random_list_of_strings(1).begin();
     if(this->temp_tables_names.insert(source_table).second) break;
    }

    const string create_temp_table_stmnt = "CREATE TEMP TABLE " + source_table + " AS SELECT * FROM " + this->source_table_name + " WHERE " + this->id_column_name + " IN(" + comma_separated_ids + ")";  
    sqlite3_exec(this->source_db_connection, create_temp_table_stmnt.c_str(),Aux::sqlt_callback,0,NULL);
    count = ids_list_count;
    Aux::tabs(depth);
    cout << "Not root node" << endl; 
  }

  Aux::tabs(depth);
  cout << "Source table is: " << source_table << " Table count is: " << count << endl;
  Aux::tabs(depth);
  cout << "Attributes list is: "; for_each(attributes_list.begin(), attributes_list.end(),[](string v){cout << v << " ";}); 
  cout << endl; 
  const double E = log2(count) ;
  Aux::tabs(depth);
  cout << "Entropy of node: " << E << endl;
  string best_attribute;
  double best_gain;
  unordered_set<string> best_attribute_values;

  for(string attr: attributes_list)
  { 
    Aux::tabs(depth+1);
cout << "Analysing attribute: " << attr << endl;
const string get_at_count_values_query = "SELECT " + attr + ", count(" + attr + ") FROM " + source_table + " GROUP BY " + attr + ";";
    sqlite3_stmt * stmnt;
    double weighted_entropy = 0;
    unordered_set<string> this_att_values;
    if(sqlite3_prepare(this->source_db_connection, get_at_count_values_query.c_str(), -1, &stmnt, 0) == SQLITE_OK)
    {
      for(;;)
      {


       int res = sqlite3_step(stmnt);

       if(res == SQLITE_DONE || res==SQLITE_ERROR)
       {
        double gAti = E - weighted_entropy;
        Aux::tabs(depth+1);
        cout << "Finish computing WE for att: " << attr << " Gain is: " << gAti << endl;
        if(gAti > best_gain)
         {
          Aux::tabs(depth+1);
          cout << "Found attribute with better gain." << endl;
          best_gain = gAti;
          best_attribute = attr;
          best_attribute_values.clear();

         Aux::tabs(depth+1);
         for(string v:this_att_values)
          {
           best_attribute_values.insert(v);  
          }
          cout << endl;

          this_att_values.clear();
         }
       sqlite3_finalize(stmnt);
       //delete &res; 
       break;
       }
       if(res == SQLITE_ROW)
       {

        string val = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
        int vSize = sqlite3_column_int(stmnt,1);
        Aux::tabs(depth+2);
        this_att_values.insert(val);
        double ratio = double(vSize) / double(count);
        weighted_entropy += double(ratio) * double(log2(vSize));
        Aux::tabs(depth+2);
        cout << "Processing value: " << val << " With vSize: " << vSize << " Current WE is: " << weighted_entropy << endl;
      }
    }
  }
 }

  Aux::tabs(depth);
  cout << "Finish processing attributes list. Best attribute is: " <<     best_attribute << " Best gain is: " << best_gain << endl;
  Aux::tabs(depth);
  cout << "Best attribute values are: ";     for_each(best_attribute_values.begin(), best_attribute_values.end(), [](string v){cout << v << ",";}); cout << endl; 
  unordered_map<string, TreeLikeIndex *> children;
  for(string val: best_attribute_values)
  {

    const string get_ids_of_bestatt_val = "SELECT rowid FROM " + source_table + " WHERE " + best_attribute + " = " + val + ";";
    int ids_count = 0;
    sqlite3_stmt * stmnt;
    string ids = "";
    bool first = 1;
    int next_depth = depth + 1;
    unordered_set<string> next_attributes_set;

    for(string attr: attributes_list) if(attr != best_attribute) next_attributes_set.insert(attr);

    if(sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK)
     {
       for(;;)
        { 
         int res = sqlite3_step(stmnt);

         if(res == SQLITE_ROW)
          {
            string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
            if(!first) ids += "," + id;
            else ids += id;
            ids_count++;

          }

         if(res == SQLITE_DONE || res == SQLITE_ERROR)
          {
           Aux::tabs(depth+1);
           cout << "Adding branch for val: " << val << endl;
           Aux::tabs(depth+1);
           cout << " Next attributes are: ";     for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
           cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
           sqlite3_finalize(stmnt);
           static TreeLikeIndex * temp_child =  this->build_index(next_attributes_set, next_depth, ids, ids_count);
           pair<string, TreeLikeIndex*> child (val, temp_child);
       children.insert(child);
          }
        }
     }
  }

  Aux::tabs(depth);
  cout << "Finish processing node, will return." << endl;
  static TreeLikeIndex * no_leaf_node = new TreeLikeIndex(best_attribute, "all", count, 0, children);
  return no_leaf_node;

}
}

TreeLikeIndex::TreeLikeIndex(std::string attribute, std::string indices, int indices_count, short int is_leaf, unordered_map<std::string, TreeLikeIndex*> children) 
{
  this->attribute = attribute;
  this->indices = indices;
  this->is_leaf = is_leaf;
  this->children = children; 
  this->children.clear();
  for(pair<string, TreeLikeIndex*> p: children) this->children.insert(p);
  this->indices_count = indices_count; 
}

TreeLikeIndex::TreeLikeIndex(string indices, int indices_count)
{
   this->indices = indices;
   this->indices_count = indices_count; 
   this->is_leaf = 1;
}

TreeLikeIndex::TreeLikeIndex()
{
   this->indices = "";
   this->indices_count = 0; 
   this->is_leaf = 1;
}




int main()
{

string source_db_address = "my_table";
string table_name = "b";
string dest_folder_address = ".";

DecisionTreeLikeIndexer indexer(source_db_address, table_name, dest_folder_address);
indexer.rebuild_index();
}

输出是:

Source table is: b Table count is: 9
Attributes list is: cant_n_dec cant_n_des cant_n_control 
Entropy of node: 3.16993
    Analysing attribute: cant_n_dec
                    Processing value: 1 With vSize: 1 Current WE is: 0
                    Processing value: 2 With vSize: 4 Current WE is: 0.888889
                    Processing value: 3 With vSize: 2 Current WE is: 1.11111
                    Processing value: 4 With vSize: 1 Current WE is: 1.11111
                    Processing value: 5 With vSize: 1 Current WE is: 1.11111
    Finish computing WE for att: cant_n_dec Gain is: 2.05881
    Found attribute with better gain.

    Analysing attribute: cant_n_des
                    Processing value: 1 With vSize: 2 Current WE is: 0.222222
                    Processing value: 2 With vSize: 4 Current WE is: 1.11111
                    Processing value: 3 With vSize: 2 Current WE is: 1.33333
                    Processing value: 5 With vSize: 1 Current WE is: 1.33333
    Finish computing WE for att: cant_n_des Gain is: 1.83659
    Analysing attribute: cant_n_control
                    Processing value: 1 With vSize: 2 Current WE is: 0.222222
                    Processing value: 2 With vSize: 3 Current WE is: 0.750543
                    Processing value: 3 With vSize: 3 Current WE is: 1.27886
                    Processing value: 5 With vSize: 1 Current WE is: 1.27886
    Finish computing WE for att: cant_n_control Gain is: 1.89106
Finish processing attributes list. Best attribute is: cant_n_dec Best gain is: 2.05881
Best attribute values are: 1,2,3,4,5,
    Adding branch for val: 1
     Next attributes are: cant_n_control,cant_n_des, Depth is: 1 Ids are: 3 Ids count: 1
    Leaf at depth: 1 Ids are: 3 Ids count: 1
Segmentation fault

1 个答案:

答案 0 :(得分:1)

我不是,但是......

我认为问题可能出在以下周期

   for(;;)
    { 
     int res = sqlite3_step(stmnt);

     if(res == SQLITE_ROW)
      {
        string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
        if(!first) ids += "," + id;
        else ids += id;
        ids_count++;

      }

     if(res == SQLITE_DONE || res == SQLITE_ERROR)
      {
       Aux::tabs(depth+1);
       cout << "Adding branch for val: " << val << endl;
       Aux::tabs(depth+1);
       cout << " Next attributes are: ";     for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
       cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
       sqlite3_finalize(stmnt);
       static TreeLikeIndex * temp_child =  this->build_index(next_attributes_set, next_depth, ids, ids_count);
       pair<string, TreeLikeIndex*> child (val, temp_child);
   children.insert(child);
      }
    }

我不知道何时终止(for(;;)中没有退出条件,没有return&没有break&#39} )。

我怀疑che分段错误是由以下指令引起的

 int res = sqlite3_step(stmnt);

SQLITE_DONESQLITE_ERROR案例之后(通过调用

 sqlite3_finalize(stmnt);

),循环再次迭代,stmnt无效。

以下可以解决方案吗?

if ( sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK)
 {
   while ( sqlite3_step(stmnt) == SQLITE_ROW )
    {
      ids += ( first ? "" : "," )
         + std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
      ids_count++;
    }

   Aux::tabs(depth+1);
   cout << "Adding branch for val: " << val << endl;
   Aux::tabs(depth+1);
   cout << " Next attributes are: ";
   for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
   cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
   sqlite3_finalize(stmnt);
   static TreeLikeIndex * temp_child =  this->build_index(next_attributes_set, next_depth, ids, ids_count);
   pair<string, TreeLikeIndex*> child (val, temp_child);
   children.insert(child);
 }