为什么我的词法分析器无法识别引号“”

时间:2014-10-18 20:37:33

标签: c++ quotes analyzer lexical

我希望有人可以帮我解决这个问题。我正在用c ++创建一个HTML词法分析器。据老师说我应该有3个档案。一个标题和两个主要.cpp,它应该能够读取文件 这是我的文件try.txt

<<<<<Hello there <H1 style=”BOLD”>header!!</H1> 
<<
<< =

这是我的标题

#ifndef tokens_h
#define tokens_h
#include <string>
#include <iostream>



        enum tokens {TEXT, LANGLE = 60, RANGLE = 62, SLASH = 47, ID, EQ = 61, QSTRING = 34, OTHER, END};

/* TEXT    = 0
   LANGLE  = 60
   RANGLE  = 62
   SLASH   = 47
   ID      = 48
   EQ      = 61
   QSTRING = 34
   OTHER   = 36
   END     = 36

*/
            int getToken(std::istream *br, std::string a);

#endif

这是我的main.cpp

#include <iostream>
#include <fstream>
#include <vector>
#include "tokens.h"


using namespace std;

int main(int argc, char *argv[])
{
    //defineTokens();
    istream *br;
    ifstream infile;
    string output;
    int a;
    vector<int> count;
    int langle = 0;

            string line;
    if(argc == 1){
        while(cin.good() ){     //Get continous input
                br = &cin;

           getline(cin,line);
           getToken(br,line);
       }
    }
    else if(argc != 2){
        return 1;
    }else{
        infile.open(argv[1]);
        if( infile.is_open()){
            br = &infile;
            while(!infile.eof()){
            getline(infile,output);
        getToken(br,output);
            }
        }

        else{
            cout << argv[1] << "Can't Be Opened" << endl;
            return 1;
        }
    }
}

这是我的tokens.cpp,我打印结果

#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <map>
#include <utility>
#include "tokens.h"



using namespace std;

void compar(int ch)
{
    vector<int> text;
    vector<int> langle;
    //string langle;
    vector<int> rangle;
    vector<int> slash;
    vector<int> id;
    vector<int> eq;
    vector<int> qstring;
    vector<int> other;
    map <string, int> result;
    int c=0;
    int d=0;
    int sum;
    string r;

    switch(ch){
        case 60:static int countlangle = 0;
                countlangle ++;
                result["LANGLE"]= countlangle; 
                cout << "LANGLE: " << result["LANGLE"] << " ";
                break;

        case 62:static int countrangle = 0;
                countrangle ++;
                result["RANGLE"]= countrangle; 
                cout << "RANGLE: " << result["RANGLE"] << " ";
                break;

        case 47:static int countslash = 0;
                countslash ++;
                result["SLASH"]= countslash; 
                cout << "SLASH: " << result["SLASH"] << " ";
                break;      

        case 61:static int counteq = 0;
                counteq ++;
                result["EQ"]= counteq; 
                cout << "EQ: " << result["EQ"] << " ";
                break;                                      

        case 34:static int countqstring = 0;
                countqstring ++;
                result["QSTRING"]= countqstring; 
                cout << "QSTRING: " << result["QSTRING"] << " ";
                break;  
    }


}
int getToken(istream *br, string a)
{

    int b;
    string d = "no";
    string f = "no";
    string r;
    vector<char> st;
    vector<string> trial;
    vector<int> countr;
    vector<int> countl;
    vector<char> quotes;
    string ans;
    int x=0;

    r = a;
    cout << a[27];


     int found;
            found = a.find('\"');
                        cout << found<<"XXxxxxxX";  


        for(int i = 0; i< a.length();i++){  //read entire string
        if(a[i] == '<'){
            // cout << LANGLE << " "; 
            d="yes";
            x +=1;
            countr.push_back(LANGLE);
            //cout << count.size();
            //cout << x;
            compar(LANGLE);
            b =LANGLE;

    //    return LANGLE;
        }
        else if(a[i]== '>' && d == "yes"){
            f = "yes";
            b = RANGLE;  //assing to the int variable the value from the enum header
            compar(RANGLE);

        }
        else if(a[i]== '/' && d == "yes"){
            compar(SLASH);

        }
        else if(a[i] == '=' && d == "yes"){
            compar(EQ);

        }
        else if(a[found] == '\"' && d == "yes"){

           //   for(int k =0;k < quotes.size();k++)
            //cout << r[found] <<"XXX"; 
            compar(QSTRING);

        }   

        }
    return 0;
}

该程序读取&lt;&gt; =没有问题但是当我尝试阅读&#39; \&#34;&#39;与cout&lt;&lt;一个[27];  我明白了:? 如果我打印cout&lt;&lt;一个; 我得到<<<<<Hello there <H1 style=”BOLD”>header!!</H1> //这是我想要阅读的字符串

当我使用found = a.find('\"');时,它给了我一个-1 我的问题是为什么我的程序无法识别报价?这是我阅读文件的方式吗?

提前致谢

1 个答案:

答案 0 :(得分:4)

您的文件包含:

而你的词霸则寻找:

这些是截然不同的。