Question

我为自己的编程语言制作翻译，这是一种爱好。我的问题是在Windows CMD中错误地显示非ascii字符。我读取的源文件保存为UTF-8。我认为没有BOM的是UTF-8。例如，当我的源文件显示时;

print "á"

在我的Mac上，我得到了预期的输出。字母á但在我的电脑上我得到├í。我认为这是一个代码页问题，我使用的代码页有á字母。然后我尝试了另一种字体。 Lucida Grande的作品。但是在Python解释器中，字母á以默认字体显示。

我问StackOverflow上的人，有人说我的程序本身可能是用错误的编码编译的。所以我的问题是，如何指定/更改C ++编译文件时使用的编码。我使用TDM-GCC作为我的编译器，我也使用过MinGW并遇到了同样的问题。

感谢您的帮助

--- --- EDIT

以下是我的整个源文件。你可以像这样编译它：

c++ myfile.cc -o myprogram -std=c++11

每当我运行＆＃34; myprogram.exe somefile.mylang＆＃34;，其中somefile.mylang说：

print "Hello á"

我在Windows CMD上得到了这个输出：

"Hello ├í"

我不知道Python，Lua，Ruby等...如何使用默认的控制台字体并输出正确的字符。

#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <cstdlib>

using namespace std;

/* Global Variables */
/* Not all of these are actual "keywords" that can be used in programs.
   They are called keywords because they are reserved, either because they
   are specified as keywords in the grammar or because they are reserved by
   the interpreter for internal use. */
string keywords[9] = { "print", "string", "sc", "variable", "eq", "undefined", "nl", "num", "expr" };
/* We store tokens in a vector, we could use an array but specifying an arrays
   size at runtime is technically impossible and the work arounds are a pain. */
vector<string> tokens;

/* Our "symbol table" is just a vector too because, we can only determine how
   large the symbol table should be at runtime, so we use a vector to make things
   easier. */
vector<string> variables;

/* Function Declarations */
/* We declare all of the functions up here because it makes it easy to see how many
   functions we have and it makes it easier to find inefficiencies, also it makes the
   code look nicer. */
void exec_program();
string load_program();
string lex();
void parse();

void doPRINT();
void doASSIGN();
void goGETVAR();

/* Definitions */
/* These are our constants, these are defined as constant at the start of the program so
   that if anything goes wrong in the execution of the code we can always display the
   right kind of errors. */
#define IO_ERROR "[IO ERROR] "
#define SYNTAX_ERROR "[SYNTAX ERROR] "
#define ASSIGN_ERROR "[ASSIGN ERROR] "

/* We load the program into the interpreter by reading the file */
string load_program(string filename) {

    string filedata;

    ifstream rdfile(filename);
    /* We check to see whether or not we can open the file. This doesn't tell use whether
       the file exists because permissions could also prevent us being able to open the file. */
    if (!rdfile) {
        cout << IO_ERROR << "Unable to open the file \"" << filename << "\"." << endl;
        exit(0);
    }
    /* Loop through and grab each line of the file, then store each line in filedata. */
    for (std::string line; std::getline(rdfile, line); )
    {
        filedata += line;
        filedata += "\n";
    }

    /* Close the file when we're done. */
    rdfile.close();

    /* Return the data so that the rest of the program can use it. */
    return filedata;                       
}

void lex(string prog) {
    int i = 0;
    string toks = "";
  string n = "";
  string expr = "";
    bool state = 0;
  bool exprStarted = 0;
  bool isexpr = 0;
    string s = "";

    for(i = 0; i < prog.size(); ++i) {
        toks += prog[i];
        if (toks == " " and state == 0) {
        toks = "";
        if (n != "") {
          //isexpr = 1;
          //tokens.push_back(keywords[7] + ":" + n);
        }
        n = "";
      } else if (toks == ";" and state == 0) {
        toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
      } else if (toks == "\n" and state == 0) {
            toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
        } else if (toks == "0" or toks == "1" or toks == "2" or toks == "3" or toks == "4" or toks == "5" 
        or toks == "6" or toks == "7" or toks == "8" or toks == "9") {
        if (state == 0) {
          n += toks;
          expr += toks;
        } else {
          s += toks;
        }
        toks = "";
      } else if (toks == "+" or toks == "-" or toks == "*" or toks == "/") {
        expr += toks;
        isexpr = 1;
        toks = "";
        n = "";
      } else if (toks == keywords[0]) {
            tokens.push_back(keywords[0]);
            toks = "";
        } else if (toks == "\"") {
            if (state == 0) {
                state = 1;
            } else if (state == 1) {
                state = 0;
                tokens.push_back(keywords[1] + ":" + s + "\"");
                s = "";
                toks = "";
            }
        } else if (state == 1) {
            s += toks;
            toks = "";
        }
    }
    int ii = 0;
    while (ii < tokens.size()) {
        //cout << tokens[ii] << endl;
        ii++;
    }
}

string evalExpression(string expr) {
  int res = 0;
  int getnextnum = 0;
  int iter = 0;
  int it = 0;
  string opp = "";
  string num = "";
  string num1 = "";
  string num2 = "";
  string result = "";
  vector<string> numholder;

  for (char & c : expr) {
    if (c == '0' or c == '1' or c == '2' or c == '3' or c == '4' or c == '5' or
      c == '6' or c == '7' or c == '8' or c == '9') {
      // c is a number
      num += c;

    } else if (c == '+' or c == '-' or c == '*' or c == '/') {
      // c is an operator
      numholder.push_back(num);
      if (c == '+') {
        opp = "+";
      } else if (c == '-') {
        opp = "-";
      } else if (c == '*') {
        opp = "*";
      } else if (c == '/') {
        opp = "/";
      }
      numholder.push_back(opp);
      num = "";

    } else if (c == ']') {
      // end of expression
      numholder.push_back(num);

    } else if (c == '(' or c == ')') {
      // c is a round bracket

    }
  }

  for ( iter = 0; iter < numholder.size(); ++iter) {
    if (numholder[iter][0] == '+' or numholder[iter][0] == '-' or numholder[iter][0] == '*' or numholder[iter][0] == '/') {
      iter++;
    }
    if (numholder[iter][0] == '0' or '1' or '2' or '3' or '4' or '5' or '6' or '7' or '8' or '9') {
      // num = NUMBER
      if (num1 == "") {
        num1 = numholder[iter];
      }
      else if (num2 == "") {
        num2 = numholder[iter];
      }
    }

    if (iter-1 >= 0) {
        it = iter - 1;
        //cout << numholder[iter] << "    " << numholder[iter-1] << "    num1 = " << num1 << "    num2 = " << num2 << endl;

        if (numholder[it][0] == '+' and num1 != "" and num2 != "") {
          res = stoi(num1) + stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '-' and num1 != "" and num2 != "") {
          res = stoi(num1) - stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '*' and num1 != "" and num2 != "") {
          res = stoi(num1) * stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '/' and num1 != "" and num2 != "") {
          res = stoi(num1) / stoi(num2);
          num1 = to_string(res);
          num2 = "";
        }
    }
    //iter++;
  }
  numholder.clear();
  num1 = "";
  num2 = "";
  num = "";
  //cout << res << endl;
  expr = to_string(res);

  return expr;
}

void doPRINT(string toPrint) {
  if (toPrint.substr(0,6) == "string") {
    toPrint = toPrint.substr (7);
    toPrint = toPrint.substr(1,toPrint.size() - 2);
  } else if (toPrint.substr(0,3) == "num") {
    toPrint = toPrint.substr (4);
  } else if (toPrint.substr(0,4) == "expr") {
    toPrint = toPrint.substr (6);
    toPrint = evalExpression(toPrint);
  }
  cout << toPrint << endl;
}

void parse(vector<string> tokens) {
    int i = 0;
    while (i < tokens.size()) {

    if (tokens[i] + " " + tokens[i+1].substr(0,6) + " " + tokens[i+2] == "print string sc" or
        tokens[i] + " " + tokens[i+1].substr(0,3) + " " + tokens[i+2] == "print num sc" or
        tokens[i] + " " + tokens[i+1].substr(0,4) + " " + tokens[i+2] == "print expr sc") {
      doPRINT(tokens[i+1]);
      i+=3;
    }
    }
}

/* Main program exec function */
void exec_program(string filename) {
    lex(load_program(filename));
    parse(tokens);
}

/* The main function, we have to start somewhere. */
int main(int argc, char* argv[]) {

    if (!argv[1]) {
        cout << "Usage: reedoo <filename> [args]" << endl;
    } else {
    exec_program(argv[1]);
    }
    return 0;
}

Answer 1

这不是关于你如何编译myprogram.exe，它是myprogram.exe对somefile.mylang的作用

作为语言开发人员，您有责任说“mylang脚本中程序的源文件应为utf-8”，或者在源文件中提供识别代码页标记。你还应该说“mylang语言中的字符串被编码为UTF-foo”（因为这会影响“hello”.charAt（3）等操作或者你所拥有的等效方法。）

然后编译器/解释器（myprogram.exe）的职责是使用正确的编码打开源（somefile.mylang），并将其转换为UTF-foo以进行内部表示。

如何使用指定的编码编译C ++？

1 个答案: