Question

我一直在寻找如何读取文件并计算一个句子中有多少单词的例子，我的理解是计算多少个白色字符。这是我应该采取的方法吗？我还有其他问题，但我想逐一完成它们。如果我可以避免使用main函数，我想创建一个我自己的函数，类似于我已经使用的函数但是用于单词。

#include<iostream>
#include<cctype>
#include<cstring>
#include<stdio.h>
#include<fstream>
#include<stdlib.h>
#include<iomanip>
#include<string.h>

using namespace std;

//single structure that contains all functions + the line itself
struct Counts
{
  int countChars;
  int countNW;
  int countAlpha;
  int countDigits;
  int countPunctuation;
};

//function names
int countChars(Counts&, char[]);
int countNW(Counts&, char[]);
int countAlpha(Counts&, char[]);
int countDigits(Counts&, char[]);
int countPunctuation(Counts&, char[]);
void printRprt(Counts&);  

int main()
{
  //open input file (C-style)
  FILE *fp;
  fp = fopen("inp1.txt", "r");
  //error message
  if(fp == NULL)
  {
    printf("Could not open input file\n");
    exit(0);
  }

  //declare variables
  Counts counts;
  int i;
  char line[80];
  char delims[] = " ,\t\n"; //delimeters: space, ',', tab, return

  //get lines (max 80) from file
  //a line is a group of characters terminated by a \n
  //Call to the functions
  while(fgets(line, 80, fp))
  {
    printf(line);
    countChars(counts, line);
    countNW(counts, line);
    countAlpha(counts, line);
    countDigits(counts, line);
    countPunctuation(counts, line);
    printRprt(counts);
  }
}

/*
 * Function name: countChars
 * Output: An integer
 * Description: Counts how many characters are on the given line.
*/
int countChars(Counts& counts, char line[])
{
  counts.countChars=0;
  int i=0; 
  while (line[i] !='\0')
  {
   {
   counts.countChars++;
   }
  i++;
  }
}
/*
 * Function name: countNW
 * Output: An integer
 * Description: counts how many characters are not white space.
*/
int countNW(Counts& counts, char line[])
{
  counts.countNW=0;
  char c;
  int i=0;
  while (line[i]!='\0')
  {
    c= line[i];
    if (!isspace(c))
    {
    counts.countNW++;
    }
  i++;
  }
}

/*
 * Function name: countAlpha
 * Output: An integer
 * Description: counts how many characters are alphabetic letters.
*/
int countAlpha(Counts& counts, char line[])
{
  counts.countAlpha=0;
  int i=0;
  while (line[i]!='\0')
  {
    if (isalpha(line[i]))
    {
    counts.countAlpha++;
    }
    i++;
  }
}

/*
 * Function name: countDigits
 * Output: An integer
 * Description: Counts how many characters are numeric digits.
*/
int countDigits(Counts& counts, char line[])
{
  counts.countDigits=0;
  int i;
  i=0;
  while ((line[i]!='\0'))
  {
   if (isdigit(line[i]))
   {
   counts.countDigits++;
   }
  i++;
  }
}

/*
 * Function name: countPunctuation
 * Output: An integer
 * Description: Counts how many characters are punctuation. (non-numeric, non-alphabetical)
*/
int countPunctuation(Counts& counts, char line[])
{
  counts.countPunctuation=0;
  int i=0;
  int cx=0;
  while (line[i]!='\0')
  {
    if (ispunct(line[i]))
    {
    counts.countPunctuation++;
    }
   cx++;
   i++;
  }
}

/*
 * Function name: printReport
 * Output: Results of all the functions.
 * Description: Prints a report of all the functions.
*/
void printRprt(Counts& counts)
{
  cout <<"Total characters: " << counts.countChars <<"\nNon-white space: " << counts.countNW << "\nAlphabetic: " << counts.countAlpha <<"\nDigits: " << counts.countDigits << "\nPunctuation: " << counts.countPunctuation <<endl;
}

从我的理解，看起来我可以使用非白色空间函数，但我想知道我可以编辑什么，所以它可以读取单词，而不仅仅是每个字符。如果您无法理解，请在投票前告诉我。谢谢！

Answer 1

对不起我第一次发帖时没有解释，我被叫了。

如果使用C ++，我会使用标准库中的正则表达式类。下面的代码是一个快速入侵，它计算文件的感兴趣项目，并在文件中将每个句子分解，其中一个句子以一个或多个标点符号'。'，'！'结尾。要么 '？'和一些空白。

这样做的好处是它相当灵活，维护的代码非常少。

代码可以改进。 main（）应检查命令行参数，而不是盲目地假设有一个。并且如果无法打开if流则应该出错。并且应该有一个用法声明。并且可以改进每句代码，例如使用regex_replace（）在调用Countem（）之前删除句子中的换行符。这些都很容易添加。

学习如何在标准库中使用正则表达式工具的努力将为文本处理活动提供相当快的回报。它们非常强大。

#include <algorithm>
#include <fstream>
#include <iostream>
#include <regex>
#include <string>

using namespace std;

size_t Countem(const string & s, const string & re)
{
  regex  rgx(re);
  auto   b = sregex_iterator(s.begin(), s.end(), rgx);
  auto   e = sregex_iterator();
  return distance(b, e);
}

void PerSentence(const string & s)
{
  regex  sentence("[^\\!\\.\\?]+[\\!\\.\\?]+[ \t\n]+");
  auto   b = sregex_iterator(s.begin(), s.end(), sentence);
  auto   e = sregex_iterator();
  for ( ; b != e; ++b) {
    cout << "Sentence: " << b->str() << '\n'
         << "    Words: " << Countem(b->str(), "[^ \t\n]+") << '\n'
         << "    Alphas: " << Countem(b->str(), "[[:alpha:]]") << '\n'
         << "    Digits: " << Countem(b->str(), "[[:digit:]]") << '\n'
         << "    Punctuation: " << Countem(b->str(), "[[:punct:]]") << '\n';
  }
  return;
}

int main(int argc, char *argv[])
{
  ifstream  is(argv[1]);
  string    s((istreambuf_iterator<char>(is)),
              istreambuf_iterator<char>());
  cout << "File:\n"
       << "    Words: " << Countem(s, "[^ \t\n]+") << '\n'
       << "    Alphas: " << Countem(s, "[[:alpha:]]") << '\n'
       << "    Digits: " << Countem(s, "[[:digit:]]") << '\n'
       << "    Punctuation: " << Countem(s, "[[:punct:]]") << '\n'
       << "    Sentences: " << Countem(s, "[\\!\\.\\?][ \t\n]+") << "\n\n";

  PerSentence(s);
  return 0;
}

如何使用函数

1 个答案: