在c中找到文本文件中单词的频率与单词总数之比的问题

时间:2011-05-11 06:51:19

标签: c

#include<conio.h>
#include<stdio.h>
#include<iostream.h>
#define NULL 0



int main()
{
    char name[20],c;
    int nw=0;
    int j=0;
    int t=0;
    char s[] = "newas";  // find the frequency of this word in abc.txt
    char p[5];
    FILE *fpt;
    //printf("Enter the name of file to be checked:- ");
    //gets(name);
    fpt=fopen("abc.txt","r");
    if (fpt==NULL)
    {
        printf("ERROR - can/'t open file %s",name);
        getch();
        exit(0);
    }
    else
    {
        while ((c=getc(fpt))!=EOF)
        {
            switch(1)
            {
                case 1:
                    if (c==' ')
                    {
point:
                        while((c=getc(fpt))==' ');

                        if (c!=' ')
                            nw=nw+1;
                        // if(c==' ')
                            // nw--;
                        if(j < 5)
                            p[j++] = c;
                        printf("\n %c ",p[j]);
                        if(j == 5)
                        {
                            if(p == s)
                            {
                                t++;
                                j = 0;    
                            }
                        }
                    }

                    if(c==' ')
                    {
                        j = 0;
                        goto point;
                    }
            }
        }
    }
    printf("\n The no. of words is %d. ",nw);
    printf("\n Freq of words %s is %d. ",s,t);
    getch();
}

上面的代码给出了单词总数的正确答案,但没有给出特定单词的频率值[给定代码中的s],请对此进行评论,如何计算特定单词的频率在文本文件中。

4 个答案:

答案 0 :(得分:2)

当你包括iostream.h时,我猜这应该是某种形式的C ++,而不是C.如果是这样,这就是你的字频率:

#include <iostream>
#include <map>
#include <string>
#include <fstream>

using namespace std;

typedef map <string, int> FreqMap;

int main() {

    FreqMap frequencies;
    ifstream ifs( "words.txt" );
    string word;

    while( ifs >> word ) {
        frequencies[ word ] += 1;
    }

    for ( FreqMap::const_iterator it = frequencies.begin();
            it != frequencies.end(); ++it ) {
        cout << it->first << " " << it->second << "\n";
    }
}

答案 1 :(得分:2)

此代码查找有趣的单词:

                        if(p == s)
                        {
                            t++;
                            j = 0;    
                        }

错了。您无法比较C中的字符串,这只会比较指针值,而不是指向的字符(字符串的内容)。

假设代码的其余部分设置正确,以便p真正指向一个真正的字符串,您可以这样做:

if(strcmp(p, s) == 0)
{
  t++;
  j = 0;
}

这要求p指向一个完全以0结尾的字符串,如果它指向一行中间的某个字符,则上述字符串将不起作用。

答案 2 :(得分:1)

我并没有完全回答这个问题,但这是一些可能会帮助你的反馈......

#include<conio.h>
#include<stdio.h>
#include<iostream.h>
#define NULL 0



int main()
{
/* 
 * GIVE YOUR VARIABLES NAMES THAT MAKE SENSE
 * j, t, c, s, nw are meaningless to anybody picking up the code 
 */
    char name[20],c;
    int nw=0;
    int j=0;
    int t=0;
    char s[] = "newas";  // find the frequency of this word in abc.txt
/* 
 * Personally, I'd tend to have p as an array of 6, so that it's the same size as
 * s and I'd initialize it to "", so that it's got a null terminator.
 */
    char p[5]; 
    FILE *fpt;

    fpt=fopen("abc.txt","r");
    if (fpt==NULL)
    {
        printf("ERROR - can/'t open file %s",name);
        getch();
        exit(0);
    }

/*
 * you don't need an else here... the other flow has already terminated */
 */

    else
    {
        while ((c=getc(fpt))!=EOF)
        {
/*
 * What is the point of this switch statement?  It may as well say if(true)
 */
            switch(1)
            {
                case 1:
                    if (c==' ')
                    {
/*
 * If you start using goto's in your code, it's usually a good sign that there's
 * something wrong
 */
point:
/*
 * It's hard to follow what you're doing because your variables don't have names
 * and your code has no clear intent.  If the while loop was in a function
 * 'SkipToNextWord', the intent would be clearer, which would make it easier to find
 * issues.  What happens if there is a space at the end of your file?
 */
                        while((c=getc(fpt))==' ');

/*
 * 'c' is never going to equal ' ', if it did, you'd still be in the while loop
 */
                        if (c!=' ')
                            nw=nw+1;
                        // if(c==' ')
                            // nw--;
                        if(j < 5)
                            p[j++] = c;
                        printf("\n %c ",p[j]);
/*
 * This as written, could be a compound if statement...
 *     if(j == 5 && p == s)
 */
                        if(j == 5)

                        {
/*
 * However, it looks like you're trying to do a string comparison?
 *     if(strncmp(p, s, sizeof(s)-1)==0)
 */
                            if(p == s)
                            {
                                t++;
/* 
 * This 'j=0' should be outside of the inner if, otherwise if there isn't a match
 * you don't reset j to 0
 */
                                j = 0;    
                            }
                        }
                    }

/* 
 * If you have a six letter word in your file, j is never reset to
 * 0 and next time round the loop, you're not going to collect the
 * letters correctly
 */
                    if(c==' ')
                    {
                        j = 0;
                        goto point;
                    }
            }
        }
    }
    printf("\n The no. of words is %d. ",nw);
    printf("\n Freq of words %s is %d. ",s,t);
    getch();
}

答案 3 :(得分:0)

我认为以下代码将回答您的问题:

#include <stdio.h>
#include <conio.h>

int main(int argc, char* argv[])
{
    char*   name = "abc.txt";
    char*   word = "newas";
    FILE*   fpt = fopen(name, "rt");
    int     c;
    int     nw = 0;
    int     t = 0;
    int     i;

    if (fpt == NULL)
    {
        printf("ERROR - can't open file %s\n", name);
        getch();
        return 0;
    }

    while ((c = getc(fpt)) != EOF)
    {
        // Skip spaces
        if (c == ' ')
            continue;   

        // Increase num of words
        nw++;

        // Check match
        i = 0;
        while ((c != EOF) && (c != ' ') && ((char)c == word[i]) && (word[i] != '\0'))
        {
            c = getc(fpt);
            i++;
        }

        if (((c == ' ') || (c == EOF)) && (word[i] == '\0'))
            t++; 

        // skip this word
        while ((c != EOF) && (c != ' '))
            c = getc(fpt);
    }

    fclose(fpt);

    printf("\n The no. of words is %d.\n", nw);
    printf("\n Freq of words %s is %d.\n", word, t);
    getch();

    return 0;
}