C ++读取元数据文件,并处理由空格和以下内容分隔的字段:

时间:2013-12-03 19:29:57

标签: c++ string getline stringstream

我知道有很多帖子与在C +中阅读文件有关,但我仍然无法弄清楚如何将其用于我的特定目的。

我有一个如下文件,前两行指定可变数量的区域,从第3行开始是一些文件地址。 所以,我想读取前两个文件并将每个字段保存在两个整数数组中:从[]到[]

例如,在这种情况下,我想填充两个整数数组:from [0] = 48 from [1] = 68 from [2] = 93 and to [0] = 49 to [1] = 70到[2] = 100 第二行也将以相同的方式处理,并且第三行向前,字符串应该被分配给一个字符串数组。

48:49 68:70 93:100
22:33 34:47 50:67 71:92
tr429a.frank/tr429a.reg1.0.pdb
tr429a.frank/tr429a.reg1.1.pdb
tr429a.frank/tr429a.reg1.2.pdb
tr429a.frank/tr429a.reg1.3.pdb
tr429a.frank/tr429a.reg1.4.pdb
tr429a.frank/tr429a.reg1.5.pdb
tr429a.frank/tr429a.reg1.6.pdb
tr429a.frank/tr429a.reg1.7.pdb

非常感谢任何帮助!

3 个答案:

答案 0 :(得分:1)

在步骤中进行更复杂的解析要容易得多。抱歉混合风格,

void parserange()
{
  char  s[100] ;
  char * p, * q ; 

  for ( int i= 2 ; ( i -- ) ; )
  { 
    std::cin.getline( s, sizeof(s )) ;

    for (q= s; ( q && * q ) ; q= p )
    {
      if (( p= strchr( q, ' ' ))) { *( p ++)= '\0' ; }
      std::cout << q << "\n" ;
    }
  }
}

所以你在s[100]读了一行,然后寻找空格并将字符串分成几部分。在我做cout时,你会进一步解析&amp;太

parserange()的输出:

48:49
68:70
93:100
22:33
34:47
50:67
71:92

答案 1 :(得分:1)

如果这些是可变数量的“区域”和可变数量的文件名字符串,那么使用向量而不是数组可以让您的生活更轻松。

总的来说,我不会在C ++中做这样的事情,而是Python,Ruby等。 C ++真的不是为这样的任务而做的...... 无论如何,这里有一些快速而脏的代码可以完成这项工作。你可以通过重构来清理它......

#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <numeric>
#include <algorithm>

using std::vector;
using std::cout;
using std::cin;
using std::endl;
using std::string;
using std::ifstream;
using std::istringstream;

void split(const string &s, vector<string> &elems, char sep=' ') {
  string element = "";
  vector<string> splitted_ints;
  for (auto c : s) {
    if (c != sep)
      element.push_back(c);
    else {
      elems.push_back(element);
      element = "";
    } // end if
  } // end for loop
  if (element != "")
    elems.push_back(element);
} // end split()


int main() {
  string line;
  vector<string> first_line;
  vector<string> second_line;
  vector<string> splitted_ints;
  vector<int> from;
  vector<int> to;
  vector<string> filenames;
  ifstream in_file("input.txt"); 

  getline(in_file, line);
  split(line, first_line, ' ');
  getline(in_file, line);
  split(line, second_line, ' ');


  for (string ele : first_line) 
    split(ele, splitted_ints, ':');
  // now you have the integer sequence of the first line
  // in splitted_ints, e.g., 48, 49, 68, 70, 93, 100
  size_t i = 0;
  while (i < splitted_ints.size()){ 
    int num;
    istringstream (splitted_ints[i]) >> num;
    from.push_back(num);
    ++i;
    istringstream (splitted_ints[i]) >> num;
    to.push_back(num);
    ++i;
  }
  splitted_ints.clear();


  // repeat for second line
  for (string ele : second_line) 
    split(ele, splitted_ints, ':');
  i = 0;
  while (i < splitted_ints.size()){

    int num;
    istringstream (splitted_ints[i]) >> num;
    from.push_back(num);
    ++i;
    istringstream (splitted_ints[i]) >> num;
    to.push_back(num);
    ++i;
  }

  // append the rest of the input file (i.e., the filenames)
  while (getline(in_file, line)) {
    filenames.push_back(line);    
  }

//打印出内容以确保其正常工作

  cout << "FILENAMES:\n";
  for (auto ele : filenames)
    cout << ele << ",";

  cout << "\nFROM numbers:\n";
  for (auto ele : from)
    cout << ele << ",";

  cout << "\nTO numbers:\n";
  for (auto ele : to)
    cout << ele << ",";
  cout << endl;


/*
FILENAMES:
tr429a.frank/tr429a.reg1.0.pdb
,tr429a.frank/tr429a.reg1.1.pdb
,tr429a.frank/tr429a.reg1.2.pdb
,tr429a.frank/tr429a.reg1.3.pdb
,tr429a.frank/tr429a.reg1.4.pdb
,tr429a.frank/tr429a.reg1.5.pdb
,tr429a.frank/tr429a.reg1.6.pdb
,tr429a.frank/tr429a.reg1.7.pdb,
FROM numbers:
48,68,93,22,34,50,71,
TO numbers:
49,70,100,33,47,67,92,
*/

  return 0;
}

答案 2 :(得分:0)

尝试使用Boost Spirit:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <fstream>

struct region { int32_t from,to; };

BOOST_FUSION_ADAPT_STRUCT(region, (int32_t,from)(int32_t,to))

int main()
{
    using namespace boost::spirit::qi;
    rule<boost::spirit::istream_iterator, region(), blank_type> region_ = int_ >> ':' >> int_ ;

    std::ifstream ifs("input.txt");

    std::vector<region> line1, line2;
    std::vector<std::string> filenames;

    if (ifs >> std::noskipws >> phrase_match(
                +region_ >> eol >> +region_ >> eol >>
                lexeme[+(char_ - eol)] % eol,
                blank, line1, line2, filenames))
    {
        std::cout << "Parse success\n";
        for (auto& r : line1) std::cout << "line1: from " << r.from << " to " << r.to << "\n";
        for (auto& r : line2) std::cout << "line2: from " << r.from << " to " << r.to << "\n";
        for (auto& fn : filenames) std::cout << "filename '" << fn << "'\n";
    }
}

Coliru已关闭,但这是我的机器输入文件的输出:

Parse success
line1: from 48 to 49
line1: from 68 to 70
line1: from 93 to 100
line2: from 22 to 33
line2: from 34 to 47
line2: from 50 to 67
line2: from 71 to 92
filename 'tr429a.frank/tr429a.reg1.0.pdb'
filename 'tr429a.frank/tr429a.reg1.1.pdb'
filename 'tr429a.frank/tr429a.reg1.2.pdb'
filename 'tr429a.frank/tr429a.reg1.3.pdb'
filename 'tr429a.frank/tr429a.reg1.4.pdb'
filename 'tr429a.frank/tr429a.reg1.5.pdb'
filename 'tr429a.frank/tr429a.reg1.6.pdb'
filename 'tr429a.frank/tr429a.reg1.7.pdb'