当我读入一个文件时,我首先要读取三个不在文件中的字符。
他们是:“”
该文件包含:
<root>
<tag>
<anothertag>Text</anothertag>
</tag>
<tag>
<anothertag>Text</anothertag>
</tag>
</root>
我的代码:
//scanner.h
enum TOKEN_TYPE {
OPEN, CLOSE, TEXT
};
struct Token {
int type;
std::string value;
};
class Scanner {
public:
Scanner(std::string path);
void scan(std::string path);
private:
std::vector<Token> tokens;
Token readToken(std::ifstream& in);
};
std::string read_to(std::istream& in, char delim);
//scanner.cpp
Scanner::Scanner(std::string path) {
scan(path);
}
void Scanner::scan(std::string path) {
std::ifstream in(path, std::ios_base::beg);
if(!in.is_open()) {
return;
}
if(!tokens.empty()) {
tokens.erase(tokens.begin(), tokens.end());
}
std::ofstream out("output.txt");
while(!in.eof()) {
tokens.push_back(readToken(in));
if(tokens.back().value.empty()) {
tokens.pop_back();
continue;
}
out << tokens.size() << "\t" << tokens.back().value << "\t" << tokens.back().value.length() << std::endl;
}
in.close();
out << "\n";
for(int i = 0; i < tokens.size(); ++i) {
if(tokens[i].type == TOKEN_TYPE::TEXT) {
out << tokens[i].value << std::endl;
}
}
out.close();
}
Token Scanner::readToken(std::ifstream& in) {
Token token;
char c = in.get();
while(c == '\n' || c == '\t' || c == ' ') {
c = in.get();
}
if(c == '<') {
c = in.get();
if(c == '/') {
token.type = TOKEN_TYPE::CLOSE;
token.value = read_to(in, '>');
} else {
in.unget();
token.type = TOKEN_TYPE::OPEN;
token.value = read_to(in, '>');
}
} else {
in.unget();
token.type = TOKEN_TYPE::TEXT;
token.value = read_to(in, '<');
in.unget();
}
return token;
}
inline std::string read_to(std::istream& in, char delim) {
std::stringstream str;
char c;
while((c = in.get()) != delim && in.good()) {
str << c;
}
return str.str();
}
int main(int argc, char** argv) {
Scanner scanner("test.xml");
return 0;
}
为什么读取这些字符的流实际上并不存在?