我正在逐行读取一个小文件,并将第一列作为键存储,并将后续列存储为不同哈希值中的各种值。我正在使用How to delete element from hsearch使用POSIX哈希函数得到的建议。
但是,如果没有内存错误,我无法将哈希值读入内存。
我正在阅读的数据是以制表符分隔的表格:
sample detailed_category primary disease or tissue _primary_site _sample_type _gender _study
TCGA-V4-A9EE-01 Uveal Melanoma Uveal Melanoma Eye Primary Tumor Male TCGA
TCGA-VD-AA8N-01 Uveal Melanoma Uveal Melanoma Eye Primary Tumor Male TCGA
TCGA-V4-A9EI-01 Uveal Melanoma Uveal Melanoma Eye Primary Tumor Male TCGA
TCGA-VD-AA8O-01 Uveal Melanoma Uveal Melanoma Eye Primary Tumor Male TCGA
我的C程序如下:
#include <stdio.h>
#include <stdlib.h>
#define GNU_SOURCE
#define __USE_GNU
#include <search.h>//hcreate_r, h*_r
#include <string.h>//strok_r
#define NIL (-1L)
//https://stackoverflow.com/questions/25971505/how-to-delete-element-from-hsearch
void hadd_char(struct hsearch_data *restrict tab, char *restrict key, const char *restrict value) {
ENTRY item = {key, (char *restrict ) value};
ENTRY *pitem = &item;
if (hsearch_r(item, ENTER, &pitem, tab)) {
pitem->data = (char *restrict ) value;
}
}
char * hfind(struct hsearch_data *restrict tab, char *restrict key) {
ENTRY item = {key};
ENTRY *pitem = &item;
if (hsearch_r(item, FIND, &pitem, tab)) {
return (char *) pitem->data;
}
return NULL;
}
int main(void) {
const char PHENOTYPE_FILENAME[] = "head_TcgaTargetGTEX_phenotype.txt";
FILE *restrict phenotype_fh = fopen(PHENOTYPE_FILENAME, "r");
if (phenotype_fh == NULL) {
printf("failed to open %s\n", PHENOTYPE_FILENAME);
perror("");
exit(EXIT_FAILURE);
}
char *line = NULL;//necessary for reading file
size_t len = 0;//necessary for reading file
ssize_t read = 0;//necessary for reading file
_Bool header = 0;//necessary for reading file
struct hsearch_data patient_disease = {0};//the hash
hcreate_r(15, &patient_disease);//bigger than it needs to be
//start reading file
while ((read = getline ( &line, &len, phenotype_fh)) != -1) {
if (header == 0) {//skip header
header = 1;
continue;
}
char *restrict tmp_string = NULL;
char * tmp_pointer = NULL;
tmp_string = strtok_r(line, "\t", &tmp_pointer);
char *restrict patient = strdup(tmp_string);
unsigned short int column = 1;
while (tmp_string != NULL) {//read each field in line
tmp_string = strtok_r(NULL, "\t", &tmp_pointer);
if (tmp_string == NULL) {
break;
}
column++;
if (column == 2) {
printf("patient %s = %s\n", patient, tmp_string);
hadd_char(&patient_disease, patient, tmp_string);
}
}
free(patient); patient = NULL;
}
free(line); line = NULL;
fclose(phenotype_fh);
//try to get a value back from the hash
char *restrict x = hfind(&patient_disease, "TCGA-V4-A9EI-01");
puts(x);
free(x); x = NULL;
hdestroy_r(&patient_disease);
return 0;
}
然而,这个段错误会从valgrind中获取错误:
=17700== HEAP SUMMARY:
==17700== in use at exit: 0 bytes in 0 blocks
==17700== total heap usage: 9 allocs, 9 frees, 6,144 bytes allocated
==17700==
==17700== All heap blocks were freed -- no leaks are possible
==17700==
==17700== ERROR SUMMARY: 24 errors from 2 contexts (suppressed: 0 from 0)
==17700==
==17700== 2 errors in context 1 of 2:
==17700== Invalid read of size 1
==17700== at 0x4C33DA3: strcmp (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x4F4BDCA: hsearch_r (hsearch_r.c:171)
==17700== by 0x108A93: hadd_char (graeme.c:15)
==17700== by 0x108A93: main (graeme.c:65)
==17700== Address 0x521d510 is 0 bytes inside a block of size 16 free'd
==17700== at 0x4C30D3B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x108AB8: main (graeme.c:68)
==17700== Block was alloc'd at
==17700== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x4ECFC99: strdup (strdup.c:42)
==17700== by 0x108A0B: main (graeme.c:55)
==17700==
==17700==
==17700== 22 errors in context 2 of 2:
==17700== Invalid read of size 1
==17700== at 0x4C33DC7: strcmp (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x4F4BDCA: hsearch_r (hsearch_r.c:171)
==17700== by 0x108A93: hadd_char (graeme.c:15)
==17700== by 0x108A93: main (graeme.c:65)
==17700== Address 0x521d511 is 1 bytes inside a block of size 16 free'd
==17700== at 0x4C30D3B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x108AB8: main (graeme.c:68)
==17700== Block was alloc'd at
==17700== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==17700== by 0x4ECFC99: strdup (strdup.c:42)
==17700== by 0x108A0B: main (graeme.c:55)
==17700==
==17700== ERROR SUMMARY: 24 errors from 2 contexts (suppressed: 0 from 0)
我真的很喜欢链接帖子中的作者如何添加功能,但我无法复制他的成功。
最后,应测试输出,以便每个键都匹配正确的值。
如何从这个哈希中成功读取和写入?
答案 0 :(得分:0)
Valgrind表示您在第55行使用strdup
,此处:
char *restrict patient = strdup(tmp_string);
并在第68行释放此字符串:
free(patient); patient = NULL;
但你把它(第65行)作为hadd_char()
参数传递给key
:
void hadd_char(struct hsearch_data *restrict tab, char *restrict key, const char *restrict value) {
ENTRY item = {key, (char *restrict ) value};
ENTRY *pitem = &item;
if (hsearch_r(item, ENTER, &pitem, tab)) {
pitem->data = (char *restrict ) value;
}
}
指针被复制到ITEM
,hsearch_r(..., ENTER, ...)
复制ITEM
,包括指针,并将其放入哈希表中。所以当你free(patient)
时,你会在哈希表中留下一个悬空指针。
您需要安排在将新数据插入哈希表时,只要它存在就保持活动(这在C ++中更容易;在C中,管理对象生命周期是最困难的问题之一)