我想从文本文件中逐字逐句阅读。这是我在C ++中的代码:
int main(int argc, const char * argv[]) {
// insert code here...
ifstream file("./wordCount.txt");
string word;
while(file >> word){
cout<<word<<endl;
}
return 0;
}
文本文件包含以下句子:
I don't have power, but he has power.
这是我得到的结果:
I
don\241\257t
have
power,
but
he
has
power.
您能告诉我如何获得如下格式的结果:
I
don't
have
power
but
he
has
power
感谢。
答案 0 :(得分:4)
我知道您正在寻找摆脱标点符号。
不幸的是,从流中提取字符串只会将空格视为分隔符。所以&#34;不要&#34;或者&#34;你好,世界&#34;将被视为一个单词,并且&#34; don&#39; T&#34;或者&#34;你好,世界&#34;作为两个字。
另一种方法是逐行阅读文本,并使用string::find_first_of()
从分隔符跳转到分隔符:
string separator{" \t\r\n,.!?;:"};
string line;
string word;
while(getline (cin, line)){ // read line by line
size_t e,s=0; // s = offset of next word, e = end of next word
do {
s = line.find_first_not_of(separator,s); // skip leading separators
if (s==string::npos) // stop if no word left
break;
e=line.find_first_of(separator, s); // find next separator
string word(line.substr(s,e-s)); // construct the word
cout<<word<<endl;
s=e+1; // position after the separator
} while (e!=string::npos); // loop if end of line not reached
}
答案 1 :(得分:1)
下面的代码摆脱了标点符号,除了撇号:
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
using namespace std;
int main(int argc, const char * argv[]) {
ifstream file("wordCount.txt");
string word;
while(file >> word) {
for (auto c : word)
if (ispunct(c) && c != '`')
word.erase(word.find_first_of(c));
cout << word << endl;
}
return 0;
}
应该产生所需的输出:
Georgioss-MacBook-Pro:~ gsamaras$ g++ -Wall -std=c++0x main.cpp
Georgioss-MacBook-Pro:~ gsamaras$ ./a.out
I
don`t
have
power
but
he
has
power
对于某些字符的问题,我建议你检查文件的编码,所以试试(按照here所述):
file -I wordCount.txt
wordCount.txt: text/plain; charset=us-ascii
这对我有用。或者只需打开文本编辑器并确保字符有效。
答案 2 :(得分:0)
为了简化调试,我用std :: istringstream替换文件。
我还添加了一个bool(类数据属性)来简化其他诊断信息的启用/禁用。 (m_dbg)
#include <algorithm>
#include <chrono>
// 'compressed' chrono access --------------vvvvvvv
typedef std::chrono::high_resolution_clock HRClk_t; // std-chrono-hi-res-clk
typedef HRClk_t::time_point Time_t; // std-chrono-hi-res-clk-time-point
typedef std::chrono::microseconds MS_t; // std-chrono-milliseconds
typedef std::chrono::microseconds US_t; // std-chrono-microseconds
typedef std::chrono::nanoseconds NS_t; // std-chrono-nanoseconds
using namespace std::chrono_literals; // support suffixes like 100ms, 2s, 30us
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
#include <vector>
class T496_t
{
std::array<char, 256> m_keep;
std::vector<std::string> m_wordVec;
bool m_dbg = false;
public:
T496_t()
{
for (uint i=0; i<256; ++i)
m_keep[i] = static_cast<char>(i);
m_keep[uint(',')] = 0;
m_keep[uint('.')] = 0;
}
~T496_t() = default;
int exec()
{
std::istringstream file(
"Hello\n"
"I don't have power, but he has power.\n"
"I don't have power , but he has power.\n"
); //ifstream file("./wordCount.txt");
uint lineCount = 1;
while(1)
{
std::string line;
(void)std::getline(file, line);
if(file.eof())
{
ltrim(line);
if(0 != line.size())
if(m_dbg) std::cout << __LINE__ << " tail: " << line << std::endl;
break;
}
if(m_dbg) std::cout << "\n line " << lineCount++ << " : '"
<< line << "'\n " << std::setfill('-')
<< std::setw(static_cast<int>(line.size())+12)
<< "-" << std::setfill(' ');
std::cout << '\n';
size_t sz = line.size();
if(0 == sz)
continue; // ignore empty lines
extractWordsFrom(line); // extract words
if(file.eof()) break;
}
return(0);
}
private: // methods
void extractWordsFrom(std::string& unfiltered)
{
std::string line; // filtered
filter(unfiltered, line);
if(0 == line.size()) {
if(m_dbg) std::cout << " empty line" << std::endl; return;
}
size_t indx1 = 0;
do {
while(isspace(line[indx1])) { indx1 += 1; } // skip leading spaces
size_t indx2 = line.find(" ", indx1);
if(std::string::npos == indx2)
{
m_wordVec.push_back(line.substr(indx1));
if(m_dbg) std::cout << " word(" << std::setw(3) << indx1 << ", eoln): ";
std::cout << " " << m_wordVec.back() << std::endl;
break;
}
m_wordVec.push_back(line.substr(indx1, indx2-indx1));
if(m_dbg) std::cout << " word(" << std::setw(3) << indx1 << ","
<< std::setw(3) << indx2 << "): ";
std::cout << " " << m_wordVec.back() << std::endl;
indx1 = indx2+1;
}while(1);
}
void filter(std::string& unfiltered, std::string& line)
{
ltrim(unfiltered); // remove leading blanks
for(uint i=0; i<unfiltered.size(); ++i) // transfer all chars
if(m_keep[unfiltered[i]]) // exception check
line.push_back(unfiltered[i]);
}
// trim from start
void ltrim(std::string &s) {
s.erase(s.begin(),
std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace)) ));
}
// trim from end
void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(),s.end());
}
// trim from both ends
void lrtrim(std::string &s) { rtrim(s); ltrim(s); }
}; // class T496_t
int main(int /*argc*/, char** /*argv[]*/)
{
setlocale(LC_ALL, "");
std::ios::sync_with_stdio(false);
Time_t start_us = HRClk_t::now();
int retVal = -1;
{
T496_t t496;
retVal = t496.exec();
}
auto duration_us = std::chrono::duration_cast<US_t>(HRClk_t::now() - start_us);
std::cout << "\n\n FINI " << duration_us.count() << " us" << std::endl;
return(retVal);
}
// desired output:
// I
// don't
// have
// power
// but
// he
// has
// power
此代码的输出:
Hello
I
don't
have
power
but
he
has
power
I
don't
have
power
but
he
has
power
输出m_dbg = true
line 1 : 'Hello'
-----------------
word( 0, eoln): Hello
line 2 : 'I don't have power, but he has power.'
-------------------------------------------------
word( 0, 1): I
word( 2, 7): don't
word( 8, 12): have
word( 13, 18): power
word( 19, 22): but
word( 23, 25): he
word( 26, 29): has
word( 30, eoln): power
line 3 : 'I don't have power , but he has power.'
---------------------------------------------------
word( 0, 1): I
word( 2, 7): don't
word( 9, 13): have
word( 14, 19): power
word( 21, 24): but
word( 25, 27): he
word( 28, 31): has
word( 32, eoln): power
FINI 215 us
答案 3 :(得分:0)
一种简单的方法是首先过滤字符串。删除除撇号(即')之外的任何标点符号,并用空格替换它们以进行进一步操作(即利用某些内置函数)。
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <sstream>
#include <iterator>
using namespace std;
bool isOk(char c)
{
if ( ispunct(c) )
if ( c == '\'' )
return false;
return ispunct(c);
}
int main()
{
ifstream file("data.txt");
string word;
while(file >> word){
std::replace_if(word.begin(), word.end(), isOk, ' ');
istringstream ss(word);
copy(istream_iterator<string>(ss), istream_iterator<string>(), ostream_iterator<string>(cout, "\n"));
}
return 0;
}
输出
I
don't
have
power
but
he
has
power