使用fstream从文件中读取缺失值“NA”

时间:2016-02-23 03:05:30

标签: c++ fstream

我正在使用这些代码从txt文件中读取数据,如果我的数据包含所有数字,则可以正常工作。不幸的是,文件中当前表示为“NA”的缺失值。你会建议任何方式/代码来处理这个问题,不会影响以后的计算。感谢

#include <iostream>
#include <fstream>
 #include <string>
#include <vector>


using namespace std;

struct Data {
    vector<double> cow_id;
    vector<double> age_obs;
    vector<double> dim_obs;
vector<double> my_obs;
vector<double> mcf_obs;
vector<double> mcp_obs;
vector<double> mcl_obs;
vector<double> bw_obs;
vector<double> bcs_obs;

void read_input (const string filepath)

{
   ifstream data_in (filepath.c_str());
    if (!data_in)
        {cout<<"Failed to open"<<endl;}
    else
        {
        double id,age, dim, my, mcf, mcp, mcl, bw, bcs;
        string dummy_line;
        getline(data_in, dummy_line);
        string line;
        while (data_in>>id>>age>>dim>>my>>mcf>>mcp>>mcl>>bw>>bcs)
            {
            cow_id.push_back(id);
            age_obs.push_back(age);
            dim_obs.push_back(dim);
            my_obs.push_back(my);
            mcf_obs.push_back(mcf);
            mcp_obs.push_back(mcp);
            mcl_obs.push_back(mcl);
            bw_obs.push_back(bw);
            bcs_obs.push_back(bcs);

            }
            data_in.close();
        }

        size_t size=age_obs.size();
       for (size_t i=0; i<size; i++)
       {
           cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
       }
}

};

int main()
{
Data input;
input.read_input("C:\\Data\\C++\\learncpp\\data.txt");

}

1 个答案:

答案 0 :(得分:1)

您需要在字符串标记中读取文件。然后将标记转换为整数,如果它们不等于“NA”。

// Helper function to get a double from a token.
// If the string is "NA", decide what's an appropriate value to return.
double get_number(string const& str)
{
   if ( str == "NA" )
   {
      return 0.0; // Maybe?
   }
   else
   {
      return std::stod(str);
   }
}

void read_input (const string filepath)

{
   ifstream data_in (filepath.c_str());
   if (!data_in)
   {cout<<"Failed to open"<<endl;}
   else
   {
      // Read tokens as strings.
      string id, age, dim, my, mcf, mcp, mcl, bw, bcs;

      string dummy_line;
      getline(data_in, dummy_line);
      string line;

      while (data_in >> id >> age >> dim >> my >> mcf >> mcp >> mcl >> bw >> bcs)
      {
         // Get the number from the string and add to the vectors.
         cow_id.push_back(get_number(id));
         age_obs.push_back(get_number(age));
         dim_obs.push_back(get_number(dim));
         my_obs.push_back(get_number(my));
         mcf_obs.push_back(get_number(mcf));
         mcp_obs.push_back(get_number(mcp));
         mcl_obs.push_back(get_number(mcl));
         bw_obs.push_back(get_number(bw));
         bcs_obs.push_back(get_number(bcs));

      }
      data_in.close();
   }

   size_t size=age_obs.size();
   for (size_t i=0; i<size; i++)
   {
      cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
   }
}

为OP工作的get_number的实施。

double get_number(string const& str)
{
   if ( str == "NA" )
   {
      return 0.0; // Maybe?
   }
   else
   {
      std:istringstream iss(str);
      double val;
      iss >> val;
      return val;
   }
}