Question

我需要从键盘（stdin）中读取以下文字。 Pl请注意，用户只能以这种格式从键盘输入。

 #the total size of physical memory (units are B, KB, MB, GB)
    512MB   2       #the following are memory allocations
    {
            abc = alloc(1KB);
             {
                y_ = alloc(128MB);
                x1= alloc(128MB);
                y_ = alloc(32MB);
               for (i = 0; i < 256; i++) abc[i] =alloc(512kB);
                  x1 = alloc(32MB); x2 = alloc(32MB); x3 = alloc(32MB);
               x1.next = x2, x2.next = x3, x3.next = x1;
    }
    abc = alloc(256MB);
    }

所以基本上让我分解吧。以＃符号开头的行被视为注释，将被忽略。前两个分配是物理内存大小和代数。将打开一个全球括号。它后面可能跟一个名为

的行

abc = alloc(1KB);

其中abc是对象名，1KB是分配的内存大小。 x1.next = x2,此处x1指向x2。

for (i = 0; i < 256; i++) abc[i] =alloc(512kB);

以这种格式输入的for循环os，它可以有相同的行命令，也可以嵌套for循环。

我有以下代码可以解决这个问题。我想知道改进它。请帮忙。

我的代码就是这样：

#include <iostream>
#include <algorithm>
#include <string>
#include <iomanip>
#include <limits>
#include <stdio.h>
#include <sstream>



using namespace std;
using std::stringstream;

string pMem,sGen, comment,val,input,input_for,id_size,id,init_str1, init_str2, inc_str, id_dummy,s_out,sss, id_dummy1;
int gen=0, pMem_int=0,i=0, gBrckt =0,cBrckt=0, oBrckt=0, id_size_int,v1,v2, for_oBrckt=0,for_cBrckt=0,y=0, y1=0, g=0;
unsigned long pMem_ulong =0, id_size_ulong;
char t[20], m[256], init1[10],init2[10],inc[10];
unsigned pos_start, pos,pos_strt=0,pos_end=0;

string extract(string pMem_extract);
unsigned long toByte(int pMem_int_func, string val);
void commentIgnore(string& input);
void func_insert();
void func_insert_for();
stringstream out;
void commentIgnore_for(string& input_for);

int main() 
{
  /* Reading the input main memory and num of generations */
  /* Ignoring comment line */
  cin >> pMem;
  if(pMem == "#") {
    cin.clear();
    pMem.clear();
    getline(cin,comment);
    cin >> pMem;
  }
  if(pMem == "#") {
    cin.clear();
    pMem.clear();
    getline(cin,comment);
    cin >> pMem;
  }
  if(pMem == "#") {
    cin.clear();
    pMem.clear();
    getline(cin,comment);
    cin >> pMem;
    }
  /* Reading input generations */
  cin>> sGen;
  if(sGen == "#") {
    cin.clear();
    sGen.clear();
    getline(cin,comment);
    cin >> sGen;
  }
  if(sGen == "#") {
    cin.clear();
    sGen.clear();
    getline(cin,comment);
    cin >> sGen;
  }
  if(sGen == "#") {
    cin.clear();
    sGen.clear();
    getline(cin,comment);
    cin >> sGen;
  }
  /* Convert sGen and physical memory to int and report error if not a number */
  gen = atoi(sGen.c_str());
  if(gen ==0) {
    cerr << "Generation must be a number"<<endl;
      exit(0);
  }
  pMem_int = atoi(pMem.c_str());
  //  cout<< gen<<" "<<pMem_int<<endl;

  /* Now that the number from pMem is removed, get its unit B,MB,KB */
  extract(pMem); /* returns val(string) */

  /* convert the given physical memory to Byte. input: pMem_int*/
  toByte(pMem_int, val); /*  return(pMem_ulong)*/
  // move pMem_ulond to another location to keep address intact
  /* read rest of the inputs  */
  /* Ignore comment lines before the global bracket */
   cin >> input;
  if(input == "#"){
    cin.clear();
    input.clear();
    getline(cin,comment);
    cin >> input;
  }
  if(input == "#"){
    cin.clear();
    input.clear();
    getline(cin,comment);
    cin >> input;
  }
  if(input == "#"){
    cin.clear();
    input.clear();
    getline(cin,comment);
    cin >> input;
  }

  if(input.compare("{") ==0)
    gBrckt=1;

  else {
    cerr<< "Syntax error\n";
    exit(0);
  }

  /* Clearing the input stream for next input */
  cin.ignore(numeric_limits<streamsize>::max(), '\n');
  cin.clear();
  input.clear();
  //cout<<"input: "<<input<<endl;
  while( getline(cin,input)) {

    if(input == "CTRL-D")
      break;

    commentIgnore(input);
    //cout<<"inputloop: "<<input<<endl;

    /* If input = '{' or '}'*/
    if(input.compare("{") ==0)
      oBrckt = oBrckt + 1;

     if (input.compare("}") ==0)
      cBrckt = cBrckt + 1;

     if (((input.find("alloc"))!= string::npos) && (input.find("alloc") < input.find("for"))) {
       func_insert();
       //call the allocate function here with name: id, size: id_size_ulong
     }

     if ((input.find("for")) != string::npos) {
        sscanf(input.c_str(), "for (%s = %d; %s < %d; %[^)])", init1, &v1, init2, &v2, inc);
    init_str1 = init1, init_str2 = init2, inc_str = inc;

    cout<<init1<<" ="<< v1<<" "<<init_str1<<" < " << v2<< " "<< inc_str<<endl;
    cout << input <<endl;

    if(init_str1 != init_str2) {
      cerr << "Error!\n";
      exit(0);
    }

    if ((input.find("alloc"))!= string::npos) {
      // unsigned pos = (input.find("alloc"));

      if((input.find(";")) != string::npos) {

         pos_start = (input.find(")")+1);
        string alloc_substr  = input.substr(pos_start);
        cout<<"Substring alloc: "<< alloc_substr<<endl;

        func_insert();
        //call the allocate function here with name: id, size: id_size_ulong
      }
      else {
        cerr << "ERROR: SYNTAX\n";
        exit(0);
      }


    }
    //  cin.ignore();
    while(getline(cin,input_for)) {
      commentIgnore_for(input_for);

      if ((input_for.find("{") != string::npos)) {
        pos = input_for.find("{");
        for_oBrckt = for_oBrckt+1;
        string for_brckt = input_for.substr(pos,pos);
        cout<< "Found: " << for_oBrckt<<endl;
      }


      if ((input_for.find("}") != string::npos)) {
         pos = input_for.find("}");
        for_cBrckt = for_cBrckt+1;
        string for_brckt = input_for.substr(pos,pos);
        cout<< "Found: " << for_cBrckt<<endl;
      }

      if (((input_for.find("alloc"))!= string::npos) && (input_for.find("alloc") < input_for.find("for"))) {
        func_insert_for();
        //call the allocate function here with name: id, size: id_size_ulong
      }




      if(for_oBrckt == for_cBrckt)
        break;


    }
    cout<<"out of break"<<endl;
     }

      if (((input.find(".next"))!= string::npos) && (input.find(".next") < input.find("for"))) {
       func_insert();
       //call the allocate function here with name: id, size: id_size_ulong
     }

      if(((cBrckt-oBrckt)) == gBrckt)
       break;
  }

}

/*---------------------- Function definitions --------------------------------*/
/* Function to extract the string part of physical memory */
string extract(string pMem_extract) {
  i=0;
  const char *p = pMem_extract.c_str();
  for(i=0; i<=(pMem_extract.length()); i++) {
    if (*p=='0'|| *p=='1'|| *p=='2'|| *p=='3'|| *p =='4'|| *p=='5'|| *p=='6'|| *p=='7'|| *p=='8'|| *p=='9')
      *p++;
    else {
      val = pMem_extract.substr(i);
      return(val);
    }
  }
}

/* Convert the physical memory to bytes. return(pMem_ulong);*/
unsigned long toByte(int pMem_int_func, string val)
{
  if (val == "KB")
    pMem_ulong =  (unsigned long) pMem_int_func * 1024;
  else if (val == "B")
    pMem_ulong = (unsigned long) pMem_int_func;
  else if (val == "GB")
    pMem_ulong = (unsigned long) pMem_int_func * 1073741824;
  else if (val == "MB")
    pMem_ulong = (unsigned long) pMem_int_func * 1048576;
  else {
    cerr<<"Missing the value in memory, B, KB, MB, GB\n";
    exit(0);
  }

  return(pMem_ulong);
}


/*Ignoring comment line*/
void commentIgnore(string& input)
{
  unsigned found = input.find('#');

  if (found!=std::string::npos)
   input= input.erase(found);

  else
    return;
  return;
}


void func_insert() {
 sscanf(input.c_str(), "%s = alloc(%[^)]);", t, m);
       id =t;
       id_size =m;
       cout<<"Tag: "<<id <<"  Memory: "<<id_size<<endl;
       extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
       id_size_int = atoi(id_size.c_str());
       /* Convert object size to B */
       toByte(id_size_int, val); /* return(pMem_ulong) */
       id_size_ulong = pMem_ulong;

}

void func_insert_for() {
  sscanf(input_for.c_str(), "%s = alloc(%[^)]);", t, m);
  id =t;
  id_size =m;
  if(!((id.find("[")) && (id.find("]")) != string::npos)) {
    cout<<"Tag: "<<id <<"  Memory: "<<id_size<<endl;
    extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
    id_size_int = atoi(id_size.c_str());
    /* Convert object size to B */
    toByte(id_size_int, val); /* return(pMem_ulong) */
    id_size_ulong = pMem_ulong;
    // allocate here
    return;
  }
  else {
    if(inc_str.find("++"))
      y1 =1;
    if(inc_str.find("="))
      {
    sss = inc_str.substr(inc_str.find("+") +1);
    y1 = atoi(sss.c_str());
    cout<<"y1:"<<y1<<endl;

      }
    pos_strt = id.find("[");
    pos_end = id.find("]") -1;
    cout<<"Positions start and ebd: " << pos_strt<<pos_end<<endl;
    id_dummy = id.substr(0,pos_strt);
    id = id_dummy;
    cout<<"Tag: "<<id_dummy <<"  Memory: "<<id_size<<endl;
    extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
    id_size_int = atoi(id_size.c_str());
    /* Convert object size to B */
    toByte(id_size_int, val); /* return(pMem_ulong) */
    id_size_ulong = pMem_ulong;
    //allocate here
    cout<<"v1: " << v1 << " " << v2<<endl;
    // g = 0;
    for(y = v1; y < v2; y= y+y1) {
      // allocate here
    }
  }
  return;
}

void commentIgnore_for(string& input_for)
{
  unsigned found = input_for.find('#');

  if (found!=std::string::npos)
   input_for= input_for.erase(found);

  else
    return;
  return;
}

此外，我需要使其与空白兼容。这意味着输入也可以输入一行。像一行中的两个分配。我没有能够照顾。我需要帮助。

Answer 1

我的建议是你写一个合适的标记化器 - 一段能够理解属于一起的代码（例如“单词”），以及在哪里分割（例如“（，），{，}”）。 tokenizer会返回一个枚举，如下所示：

enum Token {
    Token_Unknown,   // Error indiciation. 
    Token_LeftParen,
    Token_RightParen,
    Token_LeftBracket,
    Token_RightBracket,
    Token_Comma,
    Token_Semicolon, 
    Token_Equal,
    Token_Word,        // Sequence of alphanumerics
};

获得令牌后，您需要了解该令牌是什么。有一个“已知”令牌（又名“关键词”）表可能是有意义的，例如“mem”，“alloc”，“for”等。如果某些东西不是关键字，那么它就是符号的名称，例如变量。您将它们存储在表中，以便稍后可以引用它们。

你将不得不使用某种堆栈，所以你可以在完成后回到你来自的地方。

编写通用解析器并不是非常困难，并且您最有可能在执行此操作时使用比当前代码少得多的代码。

当然，你可以摆脱所有：

  if(input == "#"){
    cin.clear();
    input.clear();
    getline(cin,comment);
    cin >> input;
  }

让解析器检查是否有'#'作为输入，并跳到行的末尾（如果有的话，完成当前令牌，如果没有，只需继续）。

Answer 2

IIUC，输入是不行导向，所以通常的规则使用std::getline可能不适用。我自己的方法是：

插入过滤streambuf以删除评论。
使用某种基于正则表达式的词法分析器（例如flex）将输入分解为令牌。
定义语法;一旦我走到这一步，我会用bison，为了生成解析语法的代码，但是很简单递归下降解析器不应该太难写。

一旦你没有说明你必须对信息做些什么解析了它，所以很难给出更精确的建议。

从键盘读取输入

2 个答案: