我是C的新手,我遇到了为HashTable实现插入功能的问题。
以下是我的结构:
typedef struct HashTableNode {
char *url; // url previously seen
struct HashTableNode *next; // pointer to next node
} HashTableNode;
typedef struct HashTable {
HashTableNode *table[MAX_HASH_SLOT]; // actual hashtable
} HashTable;
以下是我如何创建表:
HashTable *initTable(){
HashTable* d = (HashTable*)malloc(sizeof(HashTable));
int i;
for (i = 0; i < MAX_HASH_SLOT; i++) {
d->table[i] = NULL;
}
return d;
}
这是我的插入功能:
int HashTableInsert(HashTable *table, char *url){
long int hashindex = JenkinsHash(url, MAX_HASH_SLOT);
int uniqueBool = 2; // 0 for true, 1 for false, 2 for init
HashTableNode* theNode = (HashTableNode*)malloc(sizeof(HashTableNode));
theNode->url = url;
if (table->table[hashindex] != NULL) { // if we have a collision
HashTableNode* currentNode = (HashTableNode*)malloc(sizeof(HashTableNode));
currentNode = table->table[hashindex]->next; // the next node in the list
if (currentNode == NULL) { // only one node currently in list
if (strcmp(table->table[hashindex]->url, theNode->url) != 0) { // unique node
table->table[hashindex]->next = theNode;
return 0;
}
else{
printf("Repeated Node\n");
return 1;
}
}
else { // multiple nodes in this slot
printf("There was more than one element in this slot to start with. \n");
while (currentNode != NULL)
{
// SEGFAULT when accessing currentNode->url HERE
if (strcmp(currentNode->url, table->table[hashindex]->url) == 0 ){ // same URL
uniqueBool = 1;
}
else{
uniqueBool = 0;
}
currentNode = currentNode->next;
}
}
if (uniqueBool == 0) {
printf("Unique URL\n");
theNode->next = table->table[hashindex]->next; // splice current node in
table->table[hashindex]->next = theNode; // needs to be a node for each slot
return 0;
}
}
else{
printf("simple placement into an empty slot\n");
table->table[hashindex] = theNode;
}
return 0;
}
每当我尝试访问currentNode-&gt; url(给定插槽的链接列表中的下一个节点)时,我都会收到SegFault,如果节点本身不是NULL,那么它应该有一个字符串。
我知道这段代码有点冒险,所以提前感谢任何人接受挑战。
芯片
更新:
这是调用所有ht函数的函数。通过我对hash table.c的main()中常规字符串的测试,我得出结论,segfault是由于某些原因:
void crawlPage(WebPage * page){
char * new_url = NULL;
int pos= 0;
pos = GetNextURL(page->html, pos, URL_PREFIX, &new_url);
while (pos != -1){
if (HashTableLookup(URLsVisited, new_url) == 1){ // url not in table
printf("url is not in table......\n");
hti(URLsVisited, new_url);
WebPage * newPage = (WebPage*) calloc(1, sizeof(WebPage));
newPage->url = new_url;
printf("Adding to LIST...\n");
add(&URLList, newPage); // added & to it.. no seg fault
}
else{
printf("skipping url cuz it is already in table\n");
}
new_url = NULL;
pos = GetNextURL(page->html, pos, URL_PREFIX, &new_url);
}
printf("freeing\n");
free(new_url); // cleanup
free(page); // free current page
}
答案 0 :(得分:1)
您的哈希表插入逻辑违反了一些相当基本的规则。
currentNode
分配url
指针的可疑所有权语义。除此之外,这个算法的方式太复杂了。
仅在上面的#2中,您实际上是否分配了一个碰撞节点并将其链接到您现有的碰撞列表。当采用指向指针的方法时,大部分是琐事,我将在下面演示:
int HashTableInsert(HashTable *table, const char *url)
{
// find collision list starting point
long int hashindex = JenkinsHash(url, MAX_HASH_SLOT);
HashTableNode **pp = table->table+hashindex;
// walk the collision list looking for a match
while (*pp && strcmp(url, (*pp)->url))
pp = &(*pp)->next;
if (!*pp)
{
// no matching node found. insert a new one.
HashTableNode *pNew = malloc(sizeof *pNew);
pNew->url = strdup(url);
pNew->next = NULL;
*pp = pNew;
}
else
{ // url already in the table
printf("url \"%s\" already present\n", url);
return 1;
}
return 0;
}
真的就是这一切。
我之前提到的url
所有权问题通过使用strdup()
的字符串复制在上面解决。虽然不是标准的库函数,但它符合POSIX标准,并且我在过去二十年中看到的每个非neanderthal半成品实现都提供了它。如果你不(a)我想知道你正在使用什么,以及(b)用strlen
和malloc
实现它的微不足道。无论如何,在删除值或表擦除期间释放节点时,请确保free
节点url
之前free
节点本身。
祝你好运。