我正在编写一个遍历目录系统的程序,为找到的文件创建哈希值,然后如果找到具有相同哈希键(重复文件)的任何其他文件,则将它们组合在一起。我打算使用嵌入在另一个链表中的链表。顶级链表包含散列键,然后与该键关联的节点包含重复文件的值。目前,我在尝试初始化顶级列表时遇到困难。我将第一个键值成功传递到链接列表,创建headList
。但是,在初始传递并继续遍历之后,密钥的值丢失了,我不确定为什么。我使用gdb跟踪它,发现当我回到searchDirects
函数时,某些地方会删除该值。
这是我的代码:
#define _GNU_SOURCE // for asprintf(), if needed
#include <unistd.h>
#include <stdio.h>
#include <dirent.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>
#include <openssl/md5.h>
//stat -c "%s %n" filename
//The above will display filename and size of file
//gcc -g -Wall -o file file.c -lssl -lcrypto
//the above is needed to link libraries so the encryption will work
#define table_size 20
#define BUFFER_SIZE 4096 //max path buffer size
#define X (37)
void directoryCheck(char *fileName, char *dirValue);
//This will be the struct that contains the binned hash keys (the rows)
struct LinkNode {
char pathValue[BUFFER_SIZE]; // this will be used to hold the directory value
struct LinkNode *nextNode;
};
struct List {
struct LinkNode *headNode; // Embedded linked list
unsigned char *key; // Hash key value
struct List *nextList;
//char *value;
}*headList;
void printAllThatShit(struct List **headList)
{
struct List *temp;
temp = *headList;
while(temp != NULL)
{
printf("Here is the key when sent to print function:\n");
for(int i = 0; i < 16; i++)
printf("%02x", temp->key[i]);
printf("\n");
temp = temp->nextList;
}
}
//Function to add to List
void addToList(unsigned char *key, char* dirValue)
{
//head is Null, so we will put first key in
if(headList == NULL)
{
printf("Adding to head of list!\n");
headList = malloc(sizeof(struct List));
headList->nextList = NULL;
headList->key = key;
/*printf("Here is the key: %s\n", headList->key);
for(int i = 0; i < 16; i++)
printf("%02x", headList->key[i]);
printf(" %s\n", dirValue);*/
}
else
{
currentList = headList;
if(currentList != 0)
{
while(currentList->nextList != 0)
{
if(currentList->key == key)
{
printf("Found the same key!\n");
return;
}
currentList = currentList->nextList;
}
currentList->nextList = malloc(sizeof(struct List));
currentList = currentList->nextList;
currentList->key = key;
currentList->nextList = NULL;
}
}
printAllThatShit(&headList); //After this initial print
//with headList, the key value is junked
}
void deleteList(struct List **headList)
{
struct List *current, *next;
current = *headList;
while(current != NULL)
{
printf("Here is the current value: %s", current->key);
next = current->nextList;
free(current);
current = next;
}
*headList = NULL;
}
void md5Hash(char *path)
{
unsigned char key[MD5_DIGEST_LENGTH]; //16 bytes for the output
struct stat statbuf;
FILE *inFile;
MD5_CTX mdx;
int bytes, i, size;
stat(path, &statbuf);
size = statbuf.st_size;
inFile = fopen(path, "rb");
/*if(size == -1)
{
fprintf(stderr, "Unable to open %s", File);
return;
}*/
if(inFile == NULL)
{
fprintf(stderr, "Unable to open %s", path);
return;
}
unsigned char data[size];
//Initialize the structure
MD5_Init(&mdx);
//fread read the file byte-by-byte for 1024 bytes, and reads it into
//the buffer(data). The function returns how many bytes that were
//successfully read. Then, MD5_Update hashes and updates the structure using
//the bytes in the data buffer, and goes in 1 byte increments.
while((bytes = fread(data, 1, size, inFile)) != 0)
MD5_Update(&mdx, data, bytes);
MD5_Final(key, &mdx); //Place the final 16 byte output in key
for(i = 0; i < MD5_DIGEST_LENGTH; i++)
printf("%02x", key[i]);
printf(" %s\n", path);
fclose(inFile);
printf("Here is that file path while in the hashing function: %s\n", path);
addToList(key, path);
}
void newFile(char *fileName, char *dirValue)
{
printf("Made it to add a new file!\n");
char *appendPath = NULL;
asprintf(&appendPath,"%s/%s", dirValue, fileName);
printf("Here is that file you are now sending to be Hashed: %s\n", fileName);
md5Hash(appendPath);
}
//CHeck if the argument is a file
int is_regular_file(const char *path)
{
struct stat path_stat;
stat(path, &path_stat);
return S_ISREG(path_stat.st_mode);
}
void searchDirects(char *path, int depth)
{
DIR *dp; // represents directory stream
struct dirent *entry; // This is used for traversing directories
struct stat statbuf; // this is so you can use the stat()
int file;
stat(path, &statbuf);
dp = opendir(path);
if(dp)
{
while((entry = readdir(dp)) != NULL)
{
if(entry->d_type == DT_REG)
{
printf("Found a file in the directory!\n");
newFile(entry->d_name, path); //send the file name and directory to be added
}
else if(strcmp(".",entry->d_name) == 0 || strcmp("..",entry->d_name) == 0)
{
printf("Found files with . or ..!\n");
continue;
}
else
{
printf("Attempting to check a directory\n");
directoryCheck(entry->d_name, path);
}
printf("[%s]\n", entry->d_name);
}
closedir(dp);
}
}
void directoryCheck(char *fileName, char *dirValue)
{
char *appendPath;
asprintf(&appendPath,"%s/%s", dirValue, fileName);
searchDirects(appendPath, 1);
free(appendPath);
}
int main(int argc, char * argv[])
{
headList = NULL;
//headNode = NULL;
//struct node* newnode = (struct node*)malloc(20 * sizeof(struct node));
if(argc <= 1)
{
return 0;
}
int i = 0;
for(i = 1; i < argc; i++)
{
if(is_regular_file(argv[i]))
{
printf("Put function to handle file\n");
//getHashKey(argv[i]);
md5Hash(argv[i]);
}
else
searchDirects(argv[i], 1);
}
printAllThatShit(&headList);
printf("Going to delete the list now!\n");
deleteList(&headList);
printf("Scan of current directory:\n");
printf("Scan of current directory: %s\n", argv[1]);
printf("done.\n");
//free(newnode);
exit(0);
}
我知道我有很多不同的错误,而且很可能是内存泄漏。但是,我只想弄清楚为什么在初始传递后headList-&gt;键中的键值丢失了。并且,任何后续传递都会丢失密钥以添加节点。我认为这可能是因为我创建哈希键并传递它的方式,但是当我运行gdb时,我意识到当我回到while()搜索目录时(如上所述),密钥丢失了。感谢您的帮助或见解。
答案 0 :(得分:2)
问题是由常见错误引起的。在addToList
:
headList->key = key;
这会创建一个指向key
缓冲区的指针。但是,在addToList
函数中以这种方式调用md5Hash
:
unsigned char key[MD5_DIGEST_LENGTH];
addToList(key, path);
在该代码中key
是一个局部变量。当md5Hash
函数退出时,它超出范围。因此链表key
字段是无效指针。此时所有的赌注都是关闭的,因为从无效指针访问内存是Undefined Behavior。
一个修复方法是memcpy
链接列表中的键值。
将key
定义为数组而不是指针。
struct List {
struct LinkNode *headNode; // Embedded linked list
unsigned char key[MD5_DIGEST_LENGTH]; // Hash key value
struct List *nextList;
//char *value;
}*headList;
在addToList
:
// REMOVE THIS LINE:
// headList->key = key;
// REPLACE WITH THIS:
memcpy(headList->key, key, sizeof(headList->key));