如何拆分字符串并在c ++中得到我想要的?

时间:2014-11-21 06:37:32

标签: c++ bioinformatics

有一个这样的字符串:M90I4D7

我需要将其推入这种结构:

struct  CigarOp {

    char     Type;   //!< CIGAR operation type (MIDNSHPX=)
    uint32_t Length; //!< CIGAR operation length (number of bases)

    //! constructor
    CigarOp(const char type = '\0', 
            const uint32_t& length = 0)
        : Type(type)
        , Length(length) 
    { }
};

这意味着我需要将它分成3组,每组都是CigarOp('M',90'I',4'D',7)

2 个答案:

答案 0 :(得分:2)

假设字符串的格式为([A-Z] [0-9] +)*,您可以简单地执行以下操作:

#include <sstream>

...

std::vector<CigarOp> cigars;
std::istringstream parser("M90I4D7");

char c;
std::uint32_t l;

while(parser >> c >> l) {
  cigars.push_back(CigarOp(c, l));
}

请注意,此代码不进行任何类型的验证。如果需要验证,实现它的一种方法是使用Boost.Spirit(在http://boost.org上找到):

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>

#include <cstdint>
#include <iostream>

struct CigarOp {
  char          Type;
  std::uint32_t Length;
};

BOOST_FUSION_ADAPT_STRUCT(CigarOp, (char, Type) (std::uint32_t, Length))

int main() {
  using boost::spirit::qi::phrase_parse;
  using boost::spirit::qi::char_;
  using boost::spirit::qi::uint_;
  using boost::spirit::qi::standard::space;

  std::vector<CigarOp> cigars;

  std::string s = "M90I4D7";
  std::string::const_iterator first = s.begin(), last = s.end();

  bool r = phrase_parse(first, last, *(char_ >> uint_), space, cigars);

  if(r && first == last) {
    // string was well-formed
    for(auto const &cigar : cigars) {
      std::cout << cigar.Type << ", " << cigar.Length << '\n';
    }
  }
}

答案 1 :(得分:-2)

怎么样:

#include <cstdio>
#include <cctype>
#include <vector>
#include <iostream>
#include <cstdlib>
using namespace std;

struct  CigarOp {
    char op;   //!< CIGAR operation type (MIDNSHPX=)
    int size; //!< CIGAR operation length (number of bases)
    static int parse(const char* s,vector<CigarOp>& v)
        {
        char* p=(char*)(s);
        while(*p!=0)
            {
            char* endptr;
            CigarOp c;
            c.op = *p;
            if(!isalpha(c.op)) return -1;
            ++p;
            if(!isdigit(*p)) return -1;
            c.size =strtol(p,&endptr,10);
            if(c.size<=0) return -1;
            v.push_back(c);
            p=endptr;
            }
        return 0;
        }
};

int main(int argc,char** argv)
    {
    vector<CigarOp> cigar;
    if(CigarOp::parse("M90I4D7",cigar)!=0) return -1;
    for(size_t i=0;i< cigar.size();++i)
        {
        cout << cigar[i].op << ":" << cigar[i].size << endl;
        }
    return 0;
    }
不过,对于生物信息学,你应该问biostars.org。