解析到链表中的分段错误

时间:2015-05-21 02:19:33

标签: c parsing segmentation-fault

我有这个程序解析并将RSS提供到链表中。

#include"util.h"
#include<stdio.h>
#include<stdlib.h>
#include<sys/stat.h>
#include<string.h>

void parse_tc(){

    struct node *head = NULL;
    char *bytes = 0;

    struct stat st;
    stat("techcrunch.txt", &st);

    int size = st.st_size;
    FILE *f = fopen("techcrunch.txt", "rb");

    bytes = (char*)malloc(size + 1);
    size_t nread = fread(bytes,1,size,f);
    bytes[nread] = 0;
    fclose(f);


    struct node *temp = (struct node*)malloc(sizeof(struct node));
    temp->position = 1;
    printf("%d. ", temp->position);
    char *a = title_parser_tc(bytes,temp);
    head = temp;

    for(int i = 2; i<21; i++){
            temp = (struct node*)malloc(sizeof(struct node));
            temp->position = i;
            printf("%d. ", temp->position);
            a = title_parser_tc(a, temp);
            struct node* temp1 = head;

            while(temp1->link != NULL)
            {
                    temp1 = temp1->link;
            }
            temp1->link = temp;

    }

    free(bytes);
    int holder = 0;
    int check = 0;
    do {
            printf("Enter a number: ");
            scanf("%d", &holder);
            if(holder<1 || holder > 20){
                    puts("Invalid input");
                    check = 1;
            }
            else{
                    check = 0;
            }
    } while(check);

    get_feed(holder, head);
}

char* title_parser_tc(char *bytes, struct node *temp){


    char *ptr = strstr(bytes, "<title>");

    if (ptr) {

            ptr += 7;
            char *ptr2 = strstr(ptr, "</title>");

            if (ptr2) {

                    char* output = malloc(ptr2 - ptr + 1);
                    memcpy(output, ptr, ptr2 - ptr);
                    output[ptr2 - ptr] = 0;

                    if(strcmp(output,"TechCrunch")!=0){

                            temp->title = output;
                            puts(temp->title);
                            temp->link = NULL;free(output);

                            char *load = pubdate_parser_tc(ptr2, temp);
                            return load;
                    }
                    else{

                            char *load = title_parser_tc(ptr2, temp);
                            free(output);
                            return load;
                    }
            }
    }
    return NULL;
}

char* pubdate_parser_tc(char *bytes, struct node *temp){

    char *ptr = strstr(bytes, "<pubDate>");

    if (ptr) {

            ptr += 9;
            char *ptr2 = strstr(ptr, "</pubDate>");

            if (ptr2) {
                    char* output = malloc(ptr2 - ptr + 1);
                    memcpy(output, ptr, ptr2 - ptr);
                    output[ptr2 - ptr] = 0;
                    temp->pubdate = output;
                    free(output);
                    char *load = description_parser_tc(ptr2, temp);
                    return load;

            }
    }
    return NULL;
}

char* description_parser_tc(char *bytes, struct node *temp){

    char *ptr = strstr(bytes, "<description>");

    if (ptr) {

            ptr += 13;
            char *ptr2 = strstr(ptr, "</description>");
            if (ptr2){
                    char* output = malloc(ptr2 - ptr + 1);
                    memcpy(output, ptr, ptr2 - ptr);

                    output[ptr2 - ptr] = 0;
                    description_cleaner_tc(output, temp);
                    free(output);

                    char *load = url_parser_tc(ptr2, temp);
                    return load;
            }
    }
    return NULL;
}

void description_cleaner_tc(char *bytes, struct node *temp){

    char *ptr = strstr(bytes, "&amp;nbsp;");

    if (ptr) {

            ptr += 10;
            char *ptr2 = strstr(ptr, "&lt;a ");
            if (ptr2) {
                    char* output = malloc(ptr2 - ptr + 1);
                    memcpy(output, ptr, ptr2 - ptr);
                    output[ptr2 - ptr] = 0;
                    temp->description = output;
                    puts(temp->description);
                    free(output);
            }
    }
}

char* url_parser_tc(char *bytes, struct node *temp){
    char *ptr = strstr(bytes, "href");

    if (ptr) {

            ptr += 6;
            char *ptr2 = strstr(ptr, "&gt;");

            if (ptr2) {
                    char* output = (char*)malloc(ptr2 - ptr);
                    memcpy(output, ptr, ptr2 - ptr - 1);
                    output[ptr2 - ptr - 1] = 0;
                    temp->url = output;
                    puts(temp->pubdate);
                    puts(temp->url);
                    puts("");
                    free(output);
                    return ptr2;
            }
    }
    return NULL;
}

我的问题是,对于这个文件textcrunch.txt,我的程序在parse_tc()的第10个循环附近发生了分段错误。该程序适用于另一个文件,但此文件给我一个错误。任何解决方案?

对于要解析的不同字符串,代码基本上是相同的函数。

1 个答案:

答案 0 :(得分:0)

分段错误通常意味着解除引用空指针(或指向未初始化内存的指针)。如果您正在使用GCC或Clang,则可以使用string postOrderExample = @"<node value='11'> <node value='4'> <node value='1'/> <node value='2'/> <node value='3'/> </node> <node value='10'> <node value='8'> <node value='5'/> <node value='7'> <node value='6'/> </node> </node> <node value='9'/> </node> </node>"; var postOrderDoc = XDocument.Parse(postOrderExample); query = PostOrder(new[] { postOrderDoc.Root }, node => node.Elements()); foreach (var node in query) Console.WriteLine(node.Attribute("value").Value); 标志重新编译并通过-g运行生成的程序:

gdb

第一行启动gdb,提供您通常运行它的命令。 gdb --args .... r bt 开始运行,r从程序因分段错误停止的点开始回溯。至少这会为您提供代码中出现问题的位置。您可以使用print语句进行进一步的调试,或者在那里添加一些防御性编码。