无法找到单词C ++的频率

时间:2015-12-27 10:32:34

标签: c++

我试图显示一个单词出现在文件中的次数。我不允许使用第三方库(c ++ stl,boost等)这就是我遇到麻烦的原因。这是我到目前为止所得到的;

我创建了一个数组列表来存储文件中的单词。我摆脱了标点符号和数字。现在我需要用它们的频率打印所有单词并按照这个频率排序;

Words:         Frequency:
their          13
how            10
apple          9
is             5

arrayList.h

#include <iostream>
#include <string>
using namespace std;

class arrayList
{
public:
    bool isEmpty() const;
    bool isFull() const;
    int listSize() const;
    void print() const;

    void insertAt(int location, const string& insertItem);
    void removeAt(int location);
    string retrieveAt(int location) const;
    bool seqSearch(const string& item) const;
    void insert(const string& insertItem);
    void remove(const string& removeItem);

    arrayList(int);
    ~arrayList();

private:
    string *list;
    int length; 
    int maxSize;
};

arrayList.cpp

 #include "arrayList.h"


bool arrayList::isEmpty() const
{
    return (length == 0);
}

bool arrayList::isFull() const
{
    return (length == maxSize);
}

int arrayList::listSize() const
{
    return length;
}

void arrayList::print() const
{
    for (int i = 0; i < length; i++)
        cout << list[i];
}

void arrayList::insertAt(int location, const string& insertItem)
{
    list[location] = insertItem;
    length++;
}

void arrayList::removeAt(int location)
{

        for (int i = location; i < length - 1; i++)
            list[i] = list[i+1];

        length--;
}

string arrayList::retrieveAt(int location) const
{
        return list[location];
}


bool arrayList::seqSearch(const string& item) const
{
    int loc;
    bool found = false;

    for (loc = 0; loc < length; loc++)
        if (list[loc] == item)
        {
            found = true;
            break;
        }
        if (found)
            return 1;
        else
            return 0;
}

void arrayList::insert(const string& insertItem)
{
    list[length++] = insertItem;
}

void arrayList::remove(const string& removeItem)
{
    int loc;
    loc = seqSearch(removeItem);

    removeAt(loc);
}

arrayList::arrayList(int size)
{
    maxSize = size;
    length = 0;

    list = new string[maxSize];
}


arrayList::~arrayList(void)
{
    delete [] list;
}

Source.cpp

#include <iostream>
#include <fstream>
#include <string>
#include "arrayList.h"
#include <cctype>

using namespace std;


int wordCount(ifstream &file)
{
    string word;;
    int count=0;
    while (file >> word)
    {
        count++;
    }
    file.clear();
    file.seekg(0, ios::beg);
    return count;
}

string removePunct(string word)
{
    for (unsigned int i = 0; i < word.length(); i++)
    {
        if( word[i] == '.')
            word[i] = ' ';
        else if(word[i] == ',')
            word[i] = ' ';
        else if(word[i] == ';')
            word[i] = ' ';
        else if(word[i] == ':')
            word[i] = ' ';
        else if(word[i] == '?')
            word[i] = ' ';
        else if(word[i] == '-')
            word[i] = ' ';
        else if(word[i] == '[')
            word[i] = ' ';
        else if(word[i] == ']')
            word[i] = ' ';
        else if(word[i] == '(')
            word[i] = ' ';
        else if(word[i] == ')')
            word[i] = ' ';
        else if(word[i] == '!')
            word[i] = ' ';
        else if(word[i] == '\"')
            word[i] = ' ';
        else if(word[i] == '\'')
            word[i] = ' ';
        else if(word[i] == '_')
            word[i] = ' ';
    }
    return word;
}

string makelower (string word)
{
    for (unsigned int i = 0; i < word.length(); i++)
    {
        if ( isupper(word[i]) )
            word[i] = tolower(word[i]);
    }
    return word;
}

int main ()
{
    string fileName;
    ifstream file;

    cout << "Please enter the file name: ";
    getline(cin,fileName);
    file.open(fileName);

    int listSize = wordCount(file);
    arrayList list1(listSize*2);

    string word, newWord;
    int i = 0;
    while (file >> word)
    {
        if (word[i] >= '1' && word[i]<= '9') 
        {
            list1.insert(" ");
            i++;
        }
        else
        {
            newWord = makelower(word);
            list1.insert(removePunct(newWord));
            list1.insert(" ");
        }
    }
    /*int *counter = new int [listSize*2]; //I tried this but don't think its working
    string item;

    for (int i = 0; i < list1.listSize(); i++)
    {
        if( list1.retrieveAt(i) != " " )
        {
            string item = list1.retrieveAt(i);
            while ( list1.seqSearch(item) )
                counter[i]++;
        }
    }*/



    system("pause");
    return 0;
}

感谢任何帮助。

1 个答案:

答案 0 :(得分:0)

您的代码中发生了奇怪的事情。但我认为它不会影响你的真正问题。

解决方案是这样的:

class freqList
{
    freqList(int size)
    {
        words = new string[size];
        freqList = new int[size];
        memset(freqList, 0, sizeof(int)*size); //set freqs to 0 
        length = 0;
    }
    int seqSearch(const string& item) const
    {
        ...
        return index_of_item; //-1 if you can't find it
    }
    void insertWord(string word)
    {
        int idx = searchSeq(word);
        if(idx >= 0)
        {//it already exists in the words list
            freqs[idx]++;
        }
        else
        {// new word, add to the end of the list
            words[length] = word;
            freqs[length]++;
            length++;
        }
    }
    ...
    string *words; 
    int *freqs; //same size of words
    ...
};


int wordCount(ifstream &file)
{
     string word;;
     int count=0;
     while (file >> word)
     {
         word = removePunct(word);
         word = wordToLower(word);
         if(isWord(word))
             count++;
     }
     file.clear();
     file.seekg(0, ios::beg);
     return count;
}

bool isWord(word)
{
   //write a function which fills your constraints

}

现在,主要功能应该是这样的:

 int listSize = wordCount(file);
 freqList freqs(listSize);
 string word;
 int i = 0;
 while (file >> word)
 {
    word = removePunct(word);
    word = wordToLower(word);
    ... // maybe you have other constraints
    if (isWord(word)) 
    {
        freqs.insert(word);
    }
}

在while循环之后,你有freq list,其中包含与每个单词对应的单词和freq。