Question

我想为我必须在我的大学发送的练习创建一个哈希表。该程序将打开许多文件，将每个文件的内容分解为<<words>>（令牌），并将每个<<word>>保存在哈希表中，每个<<word>>的频率。

如果单词已经在哈希表中，程序将增加单词的频率。

最后，程序将相应地打印单词及其频率。此外，频率应从最高字频率打印到最低频率。 <<words>>的比较将忽略大写和小写字母。

例如，如果文件包含：one two three four Two Three Four THREE FOUR FoUr 它应该打印：

  四个4
  三3   两个2
  一个1

教授给了我们一个模板，我们应该完成，但我真的很困惑如何处理insert_ht()和clear_ht()函数以及比较函数。

以下是代码：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024

/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };

link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */

unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );

void Process(FILE *fp);


int main(int argc, char *argv[])
{
    int i;
    FILE *fp;
    for (i=1; i < argc; i++)
    {
        fp = fopen(argv[i],"r");
        if (NULL == fp)
        {
            fprintf(stderr,"Problem opening file: %s\n",argv[i]);
            continue;
        }
    Process(fp);
    fclose(fp);
    }
    print_ht();
    clear_ht();
    return 0;
}


void Process(FILE *fp)
{
    const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";

    char line[MAX_LINE_SIZ];
    char *s;
    while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
    {
        for (s=strtok(line,seperators); s; s=strtok(NULL,seperators))
            insert_ht(s);
        }
    }

/* Hash Function */
unsigned int hash(char *tok)
{
    unsigned int hv = 0;
    while (*tok)
        hv = (hv << 4) | toupper(*tok++);
    return hv % HTABLE_SIZ;
}


void insert_ht(char *token)
{
……………………………………………
}
void clear_ht()
{
……………………………………………
}
int compare(const void *elem1, const void *elem2)
{
……………………………………………
}
void print_ht()
{
    int i, j=0;
    link l, *vector = (link*) malloc(sizeof(link)*size);
    for (i=0; i < HTABLE_SIZ; i++)
        for (l=htable[i]; l; l=l->next)
            vector[j++] = l;
        qsort(vector,size,sizeof(link),compare);
        for (i=0; i < size; i++)
            printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
        free(vector);
}

Answer 1

Double and Final edit：Ι找到了解决方案。显然由于某些原因我的compare功能是错误的。我仍然没有弄清楚为什么，但这里是正确的，希望其他人会发现这篇文章有用！

int compare(const void *elem1, const void *elem2)
{
    
     return (*(link*)elem2)->freq - (*(link*)elem1)->freq;
}

编辑：删除旧答案。找到了正确的方法，但我现在有另一个问题。 compare功能无法正常运行。我的printf很好，但它并没有根据频率对它们进行排序。我希望它们从最高到最低排序。

在此示例中：文件包含 - ＆gt;一二三四四三四四四福我得到：两个2 一个1 四.4 三个3

虽然我应该得到：四.4 三3 两个2 一个1

这是代码。随意帮忙！

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024

/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };

link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */

unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );

void Process(FILE *fp);


int main(int argc, char *argv[])
{
    int i;
    FILE *fp;
    printf("prin tin for \n");
    for (i=1; i < argc; i++)
    {
        printf("prin tin fopen \n");
        fp = fopen(argv[i],"r");
        if (NULL == fp)
        {
            fprintf(stderr,"Problem opening file: %s\n",argv[i]);
            continue;
        }
        printf("prin tin process \n");
    Process(fp);
    fclose(fp);
    }
    print_ht();
    //clear_ht();
    return 0;
}


void Process(FILE *fp)
{
    const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";

    char line[MAX_LINE_SIZ];
    char *s;
    while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
    {
        for (s=strtok(line,seperators); s; s=strtok(NULL,seperators)){
            printf("prin tin insert %s \n",s);
            insert_ht(s);
        }
            
        }
    }
    
/* Hash Function */
unsigned int hash(char *tok)
{
    printf("bike stin hash \n");
    unsigned int hv = 0;
    while (*tok)
        hv = (hv << 4) | toupper(*tok++);
    printf("VGAINEIIIIIIIIIIIIII %d \n",hv);
    return hv % HTABLE_SIZ;
}



void insert_ht(char *token)
{
    printf("bike stin insert %s \n",token);
    unsigned int hashval = hash(token);

    if (htable[hashval]==NULL){
        printf("mesa stin prwti if %u %s \n",hashval,token);
        //token = strdup(token);
        htable[hashval] = malloc(sizeof(token));
        htable[hashval]->token = token ;
        htable[hashval]->freq = 1;
        size++;
        
    }else {
        htable[hashval]->freq++;
    }
    printf("ta evale epitixws \n");
    
}



int compare(const void *elem1, const void *elem2)
{
    const struct node *p1 = elem1;    
    const struct node *p2 = elem2;
    
    if ( p1->freq < p2->freq)
      return -1;

   else if (p1->freq > p2->freq)
      return 1;

   else
      return 0;
}
void print_ht()
{
    int i, j=0;
    link l, *vector = (link*) malloc(sizeof(link)*size);
    for (i=0; i < HTABLE_SIZ; i++)
        for (l=htable[i]; l; l=l->next)
            vector[j++] = l;
        qsort(vector,size,sizeof(link),compare);
        for (i=0; i < size; i++)
            printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
        free(vector);
}

Answer 2

我会在新帖子中回答你，因为在评论中很难详尽无遗。

1。的malloc

为什么我需要使用malloc呢？我不应该直接写到htable吗？（在insert_ht（）函数上）

您需要使用 malloc ，因为您在struct（char *token）中声明了一个char指针。问题是你从来没有初始化指向任何东西的指针，并且你不知道令牌的大小，你需要malloc每个令牌。

但是，当你使用strdup(token)时，你不需要malloc令牌，因为strdup会这样做。所以不要忘记释放每个令牌以避免内存泄漏。

2。段错误

我无法测试您的代码，但似乎以下行会导致分段错误：

list = htable[hashval]->token

实际上，当htable[hashval]为NULL时，您尝试访问令牌，并将char *分配给 link 类型（列表）。

你需要循环：

for(list = htable[hashval]; list != NULL; list = list->next) { ... }

3。备注

if (x=1)应为if(x==1)。
如果您不需要，请不要选择malloc new_list 。
因为当htable [hashval]为NULL时使用new_list，new_list->next = htable[hashval];会在NULL旁边设置new_list-＆gt;
您应该在gcc中使用 -Wall 选项（用于警告），您可以使用 valgrind 来了解您的细分错误。在这种情况下，请使用带调试模式的gcc（ -g ）。

Answer 3

抱歉我的英语不好。

我认为：

insert(char *token)接受文件的一个字并放入哈希表。简而言之，如果哈希表中存在单词，则只需增加其频率即可。否则，您需要创建另一个节点并将频率设置为1，然后将其添加到阵列。最后，每个唯一单词都有一个条目。
compare(const void *elem1, const void *elem2)。如果elem1 = elem2则返回0，如果elem1 0，则为0 elem2时。通过将比较传递给qsort，您可以允许qsort根据您自己的标准对数组进行排序。
clear_ht()可能会将数组的所有值设置为NULL，以便重新启动另一个计数？

C中的哈希表（找到每个单词的频率）

3 个答案:

1。的malloc

2。段错误

3。备注