我试图在BST中搜索字符串键。
node和keydata_t的结构如下。
struct node {
char *key;
char *details;
BST* left;
BST* right;
};
typedef struct{
char *name;
char *data;
}datatype_t;
typedef struct{
char *name;
}keydata_t;
我的搜索功能代码:
struct node*search(BST *root, keydata_t *t){
BST *r = root;
if (r != NULL){
printf("here\n");
if(strcmp(r->key, t->name) == 0){
printf("found\n");
}
else if(strcmp(r->key,t->name) < 0){
printf("left\n");
return search(r->left, t);
}
else {
printf("right\n");
return search(r->right, t);
}
}
插入功能:
struct node*insert(struct node*r, datatype_t *d){
if(r == NULL)
{
//printf("empty\n");
r = (struct node*)malloc(sizeof(struct node));
r->key = d->name;
r->details = d->data;
r->left = r->right = NULL;
return r;
}
else if(strcmp(r->key, d->name ) <= 0){
r->left = insert(r->left ,d);
}
else if(strcmp(r->key, d->name ) > 0){
r->right = insert(r->right,d);
}
return r;
}
阅读功能:
FILE* safe_open_file(const char* file_name, const char* mode)
{
FILE* csv = fopen(file_name,mode);
if(csv==NULL){
printf("%s %c ad\n",file_name,*mode);
exit(EXIT_FAILURE);
return NULL;
}
return csv;
}
void printfile(FILE* fp, datatype_t* d)
{
char name[64+1];
char data[1465+1];
fprintf(fp, "%s--> %s", d->name, d->data);
}
datatype_t*read(FILE* fp)
{
char name[66];
char data[1466];
if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
d->name = strdup(name);
d->data = strdup(data);
return d;
}
return NULL;
}
keydata_t*read_key(FILE* fp){
char buffer[66];
if (fgets(buffer,sizeof buffer, fp) != NULL ) {
keydata_t *k = (keydata_t*)malloc(sizeof(keydata_t));
size_t len = strlen(buffer);
if(buffer > 0 && buffer[len-1] == '\n'){
buffer[--len] = '\0';
}
k->name = strdup(buffer);
return k;
}
return NULL;
}
主要功能:
int main(int argc, char** argv)
{
char* dataset = NULL;
char* dataset1 = NULL;
char* key = NULL;
char* read_mode="r";
char* write_mode="w+";
struct node* root = NULL;
if(argv[1]!=NULL && argv[2] != NULL && argv[3] != NULL)
{
dataset=argv[1];
dataset1 = argv[2];
key = argv[3];
}
else
{
return EXIT_FAILURE;
}
FILE* Data_input = safe_open_file(dataset,read_mode);
FILE* Data_output = safe_open_file(dataset1, write_mode);
FILE* keyfile = safe_open_file(key, read_mode);
datatype_t **ptr = (datatype_t**)malloc(MAX_NUM_LINE*sizeof(datatype_t *));
datatype_t *ptr_one;
keydata_t *keyt;
int index = 0;
while((ptr_one = read(Data_input)) != NULL)
{
root = insert(root, ptr_one);
}
while((keyt = read_key(keyfile))!= NULL){
search(root, keyt);
}
fclose(Data_input);
fclose(Data_output);
fclose(keyfile);
}
由于某些原因,它不会在任何时候打印出来。
这些是我使用fgets从txt文件获得的密钥。
尼尔先生
Verizon公司
keylor
所以从我的Bst开始它应该返回找到的第一个键但不是其余的。我的输出不打印任何东西,它只是执行。
答案 0 :(得分:0)
您的代码中存在很多问题
您的search()
函数是递归的,但在root == NULL
时它不会返回,并且会调用未定义的行为。
您的代码中有datatype_t
类型未在任何地方定义,而且BST
也没有定义,所以我怀疑这是真正的代码,因为它没有& #39; t compile。
您从[{1}} is not only unecessary but can be a problem返回void *
。但是你从不检查它是否返回malloc()
,我认为这不是一个问题但是很好的做法。
你永远不会NULL
free()
内存,这也不是一个问题,但总的来说很糟糕。特别是,如果你的程序应该保留很长时间。
答案 1 :(得分:0)
一些缓冲区(66,1466)的大小提醒我,我帮助另一张海报遇到了类似问题。他们得到了一个大的Yelp文件,按摩成90MB的CSV文件,其中名称(包括空格)作为键,其余作为数据。当然,使用名称作为键意味着该键不是唯一的,我提出了一个链接列表来处理数据(这是我能想到的最简单的)。
另一个问题:CSV文件已经过排序,因此简单的BST会非常偏斜,需要进行平衡。
由于非常类似于OP的问题,虽然不是实际的问题,但我冒昧地在这里发布。愿它有用。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
// make a string out of a number macro
#define MAKE_STRING(x) #x
#define CONVERT_TO_STRING(x) MAKE_STRING(x)
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
// For search-key buffer
#define MAXNAMELENGTH 512
#define MAXSCANF CONVERT_TO_STRING(MAXNAMELENGTH)
// linked-list node to hold the actual data
// TODO: extend to a skip-list/another tree
struct list {
char *data;
struct list *next;
};
typedef struct list list;
// AVL tree
struct node {
// should have been named "key", I s'ppose?
char *name;
// Data goes in a linked list
list *ll;
struct node *left;
struct node *right;
// the single extra information needed for balancing: the level
int height;
};
typedef struct node node;
node *search(node ** tree, char *key);
int height(node * tree);
int diff_height(node * tree);
node *rot_left(node * tree);
node *rot_right(node * tree);
int insertion(node ** r, char *key, char *value);
void bt_print(node * leaf);
void deltree(node * tree);
//void trim(char *source);
// getline() used for clarity of this example.
// Uses a version from the Stackoverflow documentation
// if not available natively. See getline() at the end
// of this code for a link
#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L
# include <sys/types.h>
ssize_t getline(char **pline_buf, size_t * pn, FILE * fin);
#endif
#ifdef SORTED_LIST
int ll_sorted_insert(list ** head, char *value);
#else
int ll_insert(list ** head, char *value);
#endif
void ll_free(list * knot);
void ll_print(list * knot);
void ll_fprint(FILE * fp, list * knot);
// int ll_search(list * knot, char *value);
int main(int argc, char **argv)
{
node *root;
FILE *fp;
int res;
size_t bufsize = MAXNAMELENGTH + 2;
char *buffer;
char *keyin = NULL;
char *valuein = NULL;
char *comma;
char *point;
char *line_buf = NULL;
size_t line_buf_size = 0;
ssize_t line_size;
int exitvalue = EXIT_SUCCESS;
int processed = 0;
root = NULL;
// must have at least one argument
if (argc < 2) {
fprintf(stderr, "Usage: %s datafile [< key-list]\n", argv[0]);
exit(EXIT_FAILURE);
}
// tried to keep everything on the heap.
buffer = malloc(bufsize);
if (buffer == NULL) {
fprintf(stderr, "Malloc failed\n");
exit(EXIT_FAILURE);
}
// the datafile
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "Opening \"%s\" for reading failed\n", argv[1]);
exit(EXIT_FAILURE);
}
// takes search keys from stdin only. You might see a need to change that
// main difference to fgets(): getline() takes care of the memory
while ((line_size = getline(&line_buf, &line_buf_size, fp)) > 0) {
if (ferror(fp)) {
// TODO: check errno for the exact kind of error
fprintf(stderr, "An eror occured during reading \"%s\"\n", argv[1]);
exitvalue = EXIT_FAILURE;
goto _END;
} else if (feof(fp)) {
break;
}
// assuming strict KEY","DATA"\n" and without whitespace
//TODO: check if line_size < bufsize!
// valuein points to the comma before DATA
valuein = strchr(line_buf, ',');
// skip comma
valuein++;
// valuein points to DATA now
// might have no new line at the end
if ((point = strchr(valuein, '\n')) != NULL) {
*point = '\0';
}
//printf("%s",valuein);
// comma points to the comma before DATA
comma = strchr(line_buf, ',');
// make *comma NUL, that is: replace ',' with '\0'
*comma = '\0';
keyin = line_buf;
// keyin points to KEY now
if (!insertion(&root, keyin, valuein)) {
fprintf(stderr, "An eror occured during inserting \"%s\"\n", keyin);
exitvalue = EXIT_FAILURE;
goto _END;
}
if (++processed % 1000 == 0) {
printf("Processing line %d\n", processed);
}
}
// bt_print(root);
free(line_buf);
// search-keys come from either stdin or get typed in.
// things typed in are also stdin
puts("Searching...");
while ((res = scanf("%" MAXSCANF "s", buffer)) == 1) {
if (strcmp(buffer, "exit") == 0) {
break;
}
// ignoring return for now
(void) search(&root, buffer);
}
_END:
// clean up
deltree(root);
free(buffer);
fclose(fp);
exit(exitvalue);
}
// Not used here, code is already overly cluttered
/*
#include <ctype.h>
// trim whitespace, fore and aft
// look into your textbook, you might find it there, too
char *trim_both(char *s)
{
char *end;
while (isspace(*s)) {
s++;
}
if (*s == '\0') {
return s;
}
end = s + stringlength(s) - 1;
while (end > s && isspace(*end)) {
end--;
}
*(end + 1) = '\0';
return s;
}
*/
// traverse tree (defauls to preorder) and print the linked lists at the nodes
void bt_print(node * leaf)
{
if (leaf) {
#ifdef PRINT_POSTORDER
bt_print(leaf->left);
bt_print(leaf->right);
puts("LEAF_START");
printf("%s\n", leaf->name);
ll_print(leaf->ll);
puts("LEAF_END\n");
#elif (defined PRINT_INORDER)
bt_print(leaf->left);
puts("LEAF_START");
printf("%s\n", leaf->name);
ll_print(leaf->ll);
puts("LEAF_END\n");
#else
puts("LEAF_START");
printf("%s\n", leaf->name);
ll_print(leaf->ll);
puts("LEAF_END\n");
bt_print(leaf->left);
bt_print(leaf->right);
#endif
}
}
// (simple) implementation of an AVL tree
// or use a macro if "inline" is not possible
// get height
inline int height(node * tree)
{
return (tree == NULL) ? 0 : tree->height;
}
inline int diff_height(node * tree)
{
return (tree == NULL) ? 0 : height(tree->left) - height(tree->right);
}
// rotation of branch for balancing
node *rot_left(node * tree)
{
node *right = tree->right;
// temporary variable for the swap, pardon, rotation
node *left;
// rotate
left = right->left;
right->left = tree;
tree->right = left;
// increment heights
tree->height = MAX(height(tree->left), height(tree->right)) + 1;
right->height = MAX(height(right->left), height(right->right)) + 1;
return right;
}
// rotation of branch for balancing
node *rot_right(node * tree)
{
node *left = tree->left;
node *right;
right = left->right;
left->right = tree;
tree->left = right;
tree->height = MAX(height(tree->left), height(tree->right)) + 1;
left->height = MAX(height(left->left), height(left->right)) + 1;
return left;
}
// insert a key/value pair into the tree
int insertion(node ** r, char *key, char *value)
{
int heightdiff = 0;
if (*r == NULL) {
// allocate memory for a new node
*r = malloc(sizeof(node));
if (r == NULL) {
return 0;
}
// the caller gave us nothing but pointers to the actual content
// so allocate memory...
(*r)->name = malloc(strlen(key) + 1);
if ((*r)->name == NULL) {
return 0;
}
// and make a copy
strcpy((*r)->name, key);
// Must be NULL for ll_sorted_insert() to work
(*r)->ll = NULL;
// insert value into a linked list either sorted or
// or as it comes
#ifdef SORTED_LIST
ll_sorted_insert(&(*r)->ll, value);
#else
ll_insert(&(*r)->ll, value);
#endif
// terminate node
(*r)->left = NULL;
(*r)->right = NULL;
// set height, used for the AVL-tree balancing, to one
(*r)->height = 1;
}
// search a place for the key
// Checks of returns omitted for clarity
else if (strcmp(key, (*r)->name) < 0) {
insertion(&(*r)->left, key, value);
} else if (strcmp(key, (*r)->name) > 0) {
insertion(&(*r)->right, key, value);
} else {
// insert the data for the key into the linked list
#ifdef SORTED_LIST
ll_sorted_insert(&(*r)->ll, value);
#else
ll_insert(&(*r)->ll, value);
#endif
return 1;
}
// And now balance the tree
// see your textbook or even wikipedia for an explanation
// (they have colored pictures!)
(*r)->height = MAX(height((*r)->left), height((*r)->right)) + 1;
heightdiff = diff_height(*r);
// left-left
if (heightdiff > 1 && strcmp(key, (*r)->left->name) < 0) {
*r = rot_right(*r);
return 1;
}
// right-right
if (heightdiff < -1 && strcmp(key, (*r)->right->name) > 0) {
*r = rot_left(*r);
return 1;
}
// left-right
if (heightdiff > 1 && strcmp(key, (*r)->left->name) > 0) {
*r = rot_right(*r);
return 1;
}
// right-left
if (heightdiff < -1 && strcmp(key, (*r)->right->name) < 0) {
*r = rot_left(*r);
return 1;
}
return 1;
}
// insert data into a linked list. Sorted here...
int ll_sorted_insert(list ** head, char *value)
{
list *current;
list *llnode;
llnode = malloc(sizeof(list));
if (llnode == NULL) {
return 0;
}
// as with the key, we only got a pointer and need to take care of the
// memory here
llnode->data = malloc(strlen(value) + 1);
if (llnode->data == NULL) {
return 0;
}
strcpy(llnode->data, value);
if (*head == NULL || strcmp((*head)->data, value) >= 0) {
llnode->next = *head;
*head = llnode;
} else {
// put before
current = *head;
while (current->next != NULL &&
strcmp(current->next->data, llnode->data) < 0) {
current = current->next;
}
llnode->next = current->next;
current->next = llnode;
}
return 1;
}
// ... and here as it comes in
int ll_insert(list ** head, char *value)
{
list *llnode;
llnode = malloc(sizeof(list));
if (llnode == NULL) {
return 0;
}
llnode->data = malloc(strlen(value) + 1);
if (llnode->data == NULL) {
return 0;
}
// put in front
strcpy(llnode->data, value);
llnode->next = *head;
*head = llnode;
return 1;
}
// traverse the tree and delete everything
void deltree(node * tree)
{
if (tree) {
deltree(tree->left);
deltree(tree->right);
ll_free(tree->ll);
free(tree->name);
free(tree);
tree = NULL;
}
}
node *search(node ** tree, char *key)
{
node *tmp;
if (!(*tree)) {
// search exhausted (or no tree at all)
printf("NOT FOUND %s\n", key);
return NULL;
} else {
if (strcmp(key, (*tree)->name) < 0) {
tmp = search(&(*tree)->left, key);
return tmp;
} else if (strcmp(key, (*tree)->name) > 0) {
tmp = search(&(*tree)->right, key);
return tmp;
} else {
// found, print the linked list
printf("FOUND %s\n", key);
ll_print((*tree)->ll);
return *tree;
}
}
}
// Helpers for the linked list
// free the linked list
void ll_free(list * knot)
{
list *cur;
while (knot != NULL) {
cur = knot;
free(knot->data);
knot = knot->next;
free(cur);
}
}
// print the linked list to stdout
void ll_print(list * knot)
{
while (knot != NULL) {
printf("\t%s\n", knot->data);
knot = knot->next;
}
}
// search list, not used here
/*
int ll_search(list * knot, char *value)
{
while (knot != NULL) {
if(strcmp(knot->data, value) == 0){
return 1;
}
knot = knot->next;
}
return 0;
}
*/
// getline() from http://stackoverflow.com/documentation/c/507/files-and-i-o-streams/8274/get-lines-from-a-file-using-getline
#include <errno.h>
#include <stdint.h>
#if !(defined _POSIX_C_SOURCE)
typedef long int ssize_t;
#endif
/* Only include our version of getline() if the POSIX version isn't available. */
#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L
# if !(defined SSIZE_MAX)
# define SSIZE_MAX (SIZE_MAX >> 1)
# endif
ssize_t getline(char **pline_buf, size_t * pn, FILE * fin)
{
const size_t INITALLOC = 16;
const size_t ALLOCSTEP = 16;
size_t num_read = 0;
if ((NULL == pline_buf) || (NULL == pn) || (NULL == fin)) {
errno = EINVAL;
return -1;
}
if (NULL == *pline_buf) {
*pline_buf = malloc(INITALLOC);
if (NULL == *pline_buf) {
return -1;
} else {
*pn = INITALLOC;
}
}
{
int c;
while (EOF != (c = getc(fin))) {
num_read++;
if (num_read >= *pn) {
size_t n_realloc = *pn + ALLOCSTEP;
char *tmp = realloc(*pline_buf, n_realloc + 1);
if (NULL != tmp) {
*pline_buf = tmp;
*pn = n_realloc;
} else {
return -1;
}
if (SSIZE_MAX < *pn) {
errno = ERANGE;
return -1;
}
}
(*pline_buf)[num_read - 1] = (char) c;
if (c == '\n') {
break;
}
}
if (EOF == c) {
errno = 0;
return -1;
}
}
(*pline_buf)[num_read] = '\0';
return (ssize_t) num_read;
}
#endif
为简洁起见,省略了一些必要的检查!
$ gcc-4.9 -O3 -g3 -W -Wall -Wextra -std=c11 balancbst.c -o balancbst
$ wc < yelp_academic_dataset_user_alternative.csv
552275 17307225 93975270
$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./balancbst yelp_academic_dataset_user_alternative.csv
==9100== Memcheck, a memory error detector
==9100== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==9100== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==9100== Command: ./Yelp1M yelp_academic_dataset_user_alternative.csv
==9100==
Processing line 1000
Processing line 2000
...
Processing line 550000
Processing line 551000
Processing line 552000
Searching...
aron
FOUND aron
yelping_since: 2008 01 || votes: funny 17 useful 19 cool 15 || name: aron || type: user || compliments: funny 2 plain 4 writer 3 note 4 hot 2 cool 1 || fans: 2 || average_stars: 4 28 || review_count: 45 ||
Aron
FOUND Aron
yelping_since: 2011 01 || votes: funny 2 useful 14 cool 4 || name: Aron || type: user || compliments: note 1 || fans: 0 || average_stars: 3 91 || review_count: 35 ||
yelping_since: 2009 07 || votes: funny 0 useful 5 cool 2 || name: Aron || type: user || fans: 0 || average_stars: 3 6 || review_count: 5 ||
yelping_since: 2012 07 || votes: funny 0 useful 4 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 5 ||
yelping_since: 2011 03 || votes: funny 1 useful 16 cool 3 || name: Aron || type: user || fans: 0 || average_stars: 3 75 || review_count: 8 ||
yelping_since: 2012 10 || votes: funny 0 useful 2 cool 1 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 1 ||
yelping_since: 2013 07 || votes: funny 0 useful 0 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 1 ||
yelping_since: 2010 01 || votes: funny 0 useful 0 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 ||
yelping_since: 2012 11 || votes: funny 0 useful 5 cool 1 || name: Aron || type: user || fans: 0 || average_stars: 4 29 || review_count: 14 ||
yelping_since: 2011 11 || votes: funny 8 useful 9 cool 2 || name: Aron || type: user || fans: 0 || average_stars: 2 57 || review_count: 21 ||
yelping_since: 2012 03 || votes: funny 0 useful 9 cool 1 || name: Aron || type: user || compliments: plain 1 || fans: 2 || average_stars: 4 92 || review_count: 21 ||
yelping_since: 2008 03 || votes: funny 11 useful 44 cool 13 || name: Aron || type: user || compliments: profile 1 writer 1 note 2 hot 2 cool 1 more 1 || fans: 0 || average_stars: 3 7 || review_count: 19 ||
yelping_since: 2009 05 || votes: funny 0 useful 8 cool 1 || name: Aron || type: user || fans: 0 || average_stars: 3 83 || review_count: 6 ||
yelping_since: 2015 02 || votes: funny 0 useful 0 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 ||
yelping_since: 2010 11 || votes: funny 9 useful 20 cool 16 || name: Aron || type: user || compliments: writer 1 || fans: 0 || average_stars: 3 71 || review_count: 6 ||
yelping_since: 2009 10 || votes: funny 12 useful 83 cool 12 || name: Aron || type: user || compliments: funny 1 || fans: 0 || average_stars: 3 8 || review_count: 42 ||
yelping_since: 2014 10 || votes: funny 17 useful 57 cool 47 || name: Aron || elite: 2015 || type: user || compliments: funny 3 cute 2 plain 17 writer 9 list 1 note 3 photos 5 hot 3 more 1 cool 15 || fans: 6 || average_stars: 4 39 || review_count: 32 ||
yelping_since: 2012 04 || votes: funny 0 useful 0 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 ||
yelping_since: 2012 11 || votes: funny 3 useful 4 cool 1 || name: Aron || type: user || fans: 0 || average_stars: 4 57 || review_count: 7 ||
yelping_since: 2014 08 || votes: funny 2 useful 6 cool 3 || name: Aron || type: user || fans: 0 || average_stars: 3 5 || review_count: 4 ||
yelping_since: 2010 03 || votes: funny 121 useful 465 cool 157 || name: Aron || elite: 2012 2013 2014 2015 || type: user || compliments: funny 2 plain 12 writer 9 note 2 photos 1 hot 7 more 1 cool 5 || fans: 10 || average_stars: 3 78 || review_count: 233 ||
yelping_since: 2014 08 || votes: funny 3 useful 37 cool 5 || name: Aron || type: user || fans: 1 || average_stars: 3 93 || review_count: 14 ||
yelping_since: 2012 04 || votes: funny 0 useful 5 cool 0 || name: Aron || type: user || fans: 0 || average_stars: 1 0 || review_count: 1 ||
yelping_since: 2011 01 || votes: funny 123 useful 492 cool 213 || name: Aron || elite: 2012 2013 2014 2015 || type: user || compliments: profile 3 cute 3 funny 3 plain 27 writer 13 list 1 note 8 hot 16 more 7 cool 27 || fans: 12 || average_stars: 4 24 || review_count: 378 ||
qqqqqqqqqqqqqqqqq
NOT FOUND qqqqqqqqqqqqqqqqq
exi==9100==
==9100== HEAP SUMMARY:
==9100== in use at exit: 0 bytes in 0 blocks
==9100== total heap usage: 1,212,396 allocs, 1,212,396 frees, 101,900,376 bytes allocated
==9100==
==9100== All heap blocks were freed -- no leaks are possible
==9100==
==9100== For counts of detected and suppressed errors, rerun with: -v
==9100== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)t
$ time echo "exit" | ./balancbst yelp_academic_dataset_user_alternative.csv
Processing line 1000
Processing line 2000
...
Processing line 551000
Processing line 552000
Searching...
real 0m1.391s
user 0m1.279s
sys 0m0.097s