Question

我正在为我的C ++课程开发实验室。我有一个非常基本的工作版本，我的实验室正在运行，但它不是应该如何。

作业：

编写一个程序，一次读取一个单词的文本文件。首次遇到时，将单词存储到动态创建的数组中。创建一个并行整数数组，以保存每个特定单词在文本文件中出现的次数。如果单词出现在文本文件中多次，请不要将其添加到动态数组中，但请确保在并行整数数组中增加相应的单词频率计数器。在进行任何比较之前，请从所有单词中删除任何尾随标点符号。

创建并使用包含Bill Cosby报价的以下文本文件来测试您的程序。

我不知道成功的关键，但失败的关键是试图取悦所有人。

在程序结束时，生成一个报告，以类似于以下的格式打印两个数组的内容：

词频分析

单词频率我1 不要1 知道1 2 关键2 ...

我可以弄清楚一个单词是否在数组中重复多次，但我无法弄清楚如何不向数组中添加/删除重复的单词。例如，单词“to”出现三次，但它应该只出现在输出中一次（意味着它在数组中的一个位置）。

我的代码：

using namespace std;

int main()
{
    ifstream file;
    file.open("Quote.txt");
    if (!file)
    {
        cout << "Error: Failed to open the file.";
    }

else
{
    string stringContents;
    int stringSize = 0;

    // find the number of words in the file
    while (file >> stringContents)
    {
        stringSize++;
    }

    // close and open the file to start from the beginning of the file
    file.close();
    file.open("Quote.txt");

    // create dynamic string arrays to hold the contents of the file
    // these will be used to compare with each other the frequency
    // of the words in the file
    string *mainContents = new string[stringSize];
    string *compareContents = new string[stringSize];

    // holds the frequency of each word found in the file
    int frequency[stringSize];

    // initialize frequency array
    for (int i = 0; i < stringSize; i++)
    {
        frequency[i] = 0;
    }

    stringContents = "";

    cout << "Word\t\tFrequency\n";
    for (int i = 0; i < stringSize; i++)
    {
        // if at the beginning of the iteration
        // don't check for the reoccurence of the same string in the array
        if (i == 0)
        {
            file >> stringContents;

            // convert the current word to a c-string
            // so we can remove any trailing punctuation
            int wordLength = stringContents.length() + 1;
            char *word = new char[wordLength];
            strcpy(word, stringContents.c_str());

            // set this to no value so that if the word has punctuation
            // needed to remove, we can modify this string
            stringContents = "";

            // remove punctuation except for apostrophes
            for (int j = 0; j < wordLength; j++)
            {
                if (ispunct(word[j]) && word[j] != '\'')
                {
                    word[j] = '\0';
                }

                stringContents += word[j];
            }

            mainContents[i] = stringContents;
            compareContents[i] = stringContents;
            frequency[i] += 1;
        }

        else
        {
            file >> stringContents;
            int wordLength = stringContents.length() + 1;
            char *word = new char[wordLength];
            strcpy(word, stringContents.c_str());

            // set this to no value so that if the word has punctuation
            // needed to remove, we can modify this string
            stringContents = "";

            for (int j = 0; j < wordLength; j++)
            {
                if (ispunct(word[j]) && word[j] != '\'')
                {
                    word[j] = '\0';
                }

                stringContents += word[j];
            }

            // stringContents = "dont";
            //mainContents[i] = stringContents;
            compareContents[i] = stringContents;

            // search for reoccurence of the word in the array
            // if the array already contains the word
            // don't add the word to our main array
            // this is where I am having difficulty
            for (int j = 0; j < stringSize; j++)
            {
                if (compareContents[i].compare(compareContents[j]) == 0)
                {
                    frequency[i] += 1;
                }

                else
                {
                    mainContents[i] = stringContents;
                }
            }
        }

        cout << mainContents[i] << "\t\t" << frequency[i];
        cout << "\n";
    }

}

file.close();

return 0;

}

如果代码难以理解/遵循，我道歉。任何反馈表示赞赏：]

Answer 1

如果使用stl，可以轻松解决整个问题，编码较少。

#include <iostream>
#include <fstream>
#include <string>
#include <unordered_map>
#include <algorithm>

using namespace std;

int main()
{
    ifstream file("Quote.txt");
    string aword;
    unordered_map<string,int> wordFreq;
    if (!file.good()) {
        cout << "Error: Failed to open the file.";
        return 1;
    }
    else {
        while( file >> aword ) {
            aword.erase(remove_if(aword.begin (), aword.end (), ::ispunct), aword.end ()); //Remove Punctuations from string
            unordered_map<string,int>::iterator got = wordFreq.find(aword);
            if ( got == wordFreq.end() )
              wordFreq.insert(std::make_pair<string,int>(aword.c_str(),1)); //insert the unique strings with default freq 1
            else
              got->second++; //found - increment freq
         }
    }
    file.close();

    cout << "\tWord Frequency Analyser\n"<<endl;
    cout << "     Frequency\t    Unique Words"<<endl;
    unordered_map<string,int>::iterator it;
    for ( it = wordFreq.begin(); it != wordFreq.end(); ++it )
      cout << "\t" << it->second << "\t\t" << it->first << endl;

    return 0;
}

Answer 2

对于这样一个简单的任务，您使用的算法非常复杂。这就是你要做的事情：

好的，首先阅读通过以确定最大尺寸阵列
然后第二次阅读，直接看看该做什么：如果字符串已经在表中，只需增加其频率，否则将其添加到表中。
输出表格

您的代码的else块将如下所示：

    string stringContents;
    int stringSize = 0;

    // find the number of words in the file
    while (file >> stringContents)
        stringSize++;

    // close and open the file to start from the beginning of the file
    file.close();
    file.open("Quote.txt");

    string *mainContents = new string[stringSize];   // dynamic array for strings found
    int *frequency = new int[stringSize];           // dynamic array for frequency
    int uniqueFound = 0;                            // no unique string found

    for (int i = 0; i < stringSize && (file >> stringContents); i++)
    {
        //remove trailing punctuations 
        while (stringContents.size() && ispunct(stringContents.back()))
            stringContents.pop_back();

        // process string found 
        bool found = false;
        for (int j = 0; j < uniqueFound; j++)
            if (mainContents[j] == stringContents) {  // if string already exist
                frequency[j] ++;     // increment frequency 
                found = true;
            }
        if (!found) {   // if string not found, add it !  
            mainContents[uniqueFound] = stringContents;
            frequency[uniqueFound++] = 1;   // and increment number of found
        }
    }
    // display results
    cout << "Word\t\tFrequency\n";
    for (int i=0; i<uniqueFound; i++)
        cout << mainContents[i] << "\t\t" << frequency[i] <<endl;
}

好的，这是一项任务。所以你必须使用数组。稍后你可以将这段代码sumamrize：

    string stringContents;
    map<string, int> frequency; 

    while (file >> stringContents) {
        while (stringContents.size() && ispunct(stringContents.back()))
            stringContents.pop_back();
        frequency[stringContents]++;
    }
    cout << "Word\t\tFrequency\n";
    for (auto w:frequency) 
        cout << w.first << "\t\t" << w.second << endl;

甚至按字母顺序排序。

Answer 3

根据您的作业是否需要使用＆＃39;数组＆＃39;本身，您可以考虑使用std::vector甚至{{1对于C ++ / CLI。

使用向量，您的代码可能如下所示：

System::Collections::Generic::List

我试图用注释来注释任何新代码/概念，以便于理解，所以希望你能发现它很有用。

作为旁注，您可能会注意到我已将许多重复代码移出#include <vector> #include <string> #include <fstream> #include <iostream> using namespace std; int wordIndex(string); //Protoype a function to check if the vector contains the word void processWord(string); //Prototype a function to handle each word found vector<string> wordList; //The dynamic word list vector<int> wordCount; //The dynamic word count void main() { ifstream file("Quote.txt"); if (!file) { cout << "Error: Failed to read file" << endl; } else { //Read each word into the 'word' variable string word; while (!file.eof()) { file >> word; //Algorithm to remove punctuation here processWord(word); } } //Write the output to the console for (int i = 0, j = wordList.size(); i < j; i++) { cout << wordList[i] << ": " << wordCount[i] << endl; } system("pause"); return; } void processWord(string word) { int index = wordIndex(word); //Get the index of the word in the vector - if the word isn't in the vector yet, the function returns -1. //This serves a double purpose: Check if the word exsists in the vector, and if it does, what it's index is. if (index > -1) { wordCount[index]++; //If the word exists, increment it's word count in the parallel vector. } else { wordList.push_back(word); //If not, add a new entry wordCount.push_back(1); //in both vectors. } } int wordIndex(string word) { //Iterate through the word list vector for (int i = 0, j = wordList.size(); i < j; i++) { if (wordList[i] == word) { return i; //The word has been found. return it's index. } } return -1; //The word is not in the vector. Return -1 to tell the program that the word hasn't been added yet. }函数并转移到其他函数中。这样可以实现更高效和可读的编码，因为您可以将每个问题划分为易于管理的小问题。

希望这可以有所帮助。

动态数组和字符串出现的麻烦（C ++）

3 个答案: