检索sizeof(buff)时c程序崩溃

时间:2018-02-10 22:40:04

标签: c pthreads

我在C中创建一个程序,它将一个大文本文件分成10个段,然后创建10个线程,每个线程为每个段生成一个字数。我从此代码中获取了word_count函数:https://github.com/prateek-khatri/seaOfC/blob/master/frequencyMultiThread.c。该程序对我来说很好,但是当我尝试在我自己的程序中使用word_count时,它会在尝试获取缓冲区大小时崩溃。

函数getCurrentSegmentWordcount似乎一切正常,但当该函数调用word_count时,它会在行printf("sizeof Buff: %d", sizeof(buff));处崩溃(分段错误)。

#define _GNU_SOURCE
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#define NUMBER_OF_THREADS 10

//struct taken from reference:
struct return_val{
    char wordlist[100][100]; //[chars][lines]
    int count[100];
} *arr; //array of words

void *print_hello_world(void * tid)
{
    //This function prints the thread’s identifier and then exits.
    printf("Hello World. Greetings from thread %d\n", tid);
    pthread_exit(NULL);
}

void *word_count(void* num)
{ 

    int *ln = num;
    unsigned int line_number = *ln;
    //line_number++;

    printf("Thread %d\n",line_number);

    char cmd_p1[9] = "sed -n '\0";
    char cmd_p2[2];
    sprintf(cmd_p2,"%d",line_number); //stores string in buffer
    char cmd_p3[21] = "p' 'maintainers.txt'\0";
    char command[100];
    command[0] = '\0';

    //char * strcat ( char * destination, const char * source );
    //appends a copy of source to destination
    strcat(command,cmd_p1);
    strcat(command,cmd_p2);
    strcat(command,cmd_p3);
    usleep(line_number);

    char cmd[100] = " | tr [:space:] '\\n' | grep -v '^\\s*$' | sort | uniq -c | sort\0";
    strcat(command,cmd);
    printf("Command: %s\n",command);
    //fflush(stdout);



    FILE *in;
    in= popen(command, "r"); //read command and pipe into the shell
    rewind(in); //set file position to beginning of 'in'
    char buff[50];
    int counter = 0;


    //char * fgets ( char * str, int num, FILE * stream );
    //reads chars from stream and stores them as string into buff until all of buffer has been read
    printf("before\n");
    bool testBool = fgets(buff,sizeof(buff),in);
    printf("testBool: %d\n", testBool);


    //CRASH HAPPENS HERE:
    //buff 
    printf("sizeof Buff: %d", sizeof(buff));


    while(fgets(buff,sizeof(buff),in))
    {
        printf("fire 0.5");
        char c=' ';
        int i = 0;
        int cnt = atoi(buff); //converts string to int.. buff == # of chars in file?
        arr[line_number-1].count[counter] = cnt; //at this point line_number == 1
        printf("fire1\n");

        while(c!='\0')
        {
            c=buff[i];
            buff[i]=buff[i+6];
            i++;
        }


        int cnnt = 0;
        while(c!=' ')
        {
            c = buff[cnnt];
            cnnt++;
        }
        i=0;
        while(c!='\0')
        {
            c=buff[i];
            buff[i]=buff[i+cnnt];
            i++;
        }
        sprintf(arr[line_number-1].wordlist[counter],"%s",buff);
        printf("%d %s",arr[line_number-1].count[counter],arr[line_number-1].wordlist[counter]);
        counter++;
    }
    printf("final count: %d", counter);
    arr[line_number-1].count[counter] = -1;


    fclose(in);



    //pthread_exit(NULL); //didn't help to move here from getCurrentSegment...()
    return NULL;
}



void *getCurrentSegmentWordcount(void * tid) { //declaring file pointer (value?)
    int segment = tid;
    segment = segment + 1; //converts to int
    printf("segment/thread: %d \n", segment);
    char text[1000];
    //char buffer[150];
    FILE *fp = fopen("words.txt", "r");
    if(fp == NULL) {
        printf("null file");
    }
    int i = 0;

    long lSize;
    char *buffer;
    if( !fp ) perror("words.txt"),exit(1);

    fseek( fp , 0L , SEEK_END);
    lSize = ftell( fp );
    rewind( fp );

    buffer = calloc( 1, lSize+1 );
    if( !buffer ) fclose(fp),fputs("memory alloc fails",stderr),exit(1);

    if( 1!=fread( buffer , lSize, 1 , fp) )
      fclose(fp),free(buffer),fputs("entire read fails",stderr),exit(1);

    //printf(buffer);

    char *token = strtok(buffer, "~");

    if(segment == 1) {
        printf("segment 1: %s", token);
        word_count(&segment);
    }

    if(segment == 2) {
        token = strtok(NULL,"~");
        printf("segment 2: %s", token);
    }

    if(segment == 3) {
        token = strtok(NULL,"~");
        token = strtok(NULL,"~");
        printf("segment 3: %s", token);
    }

    if(segment == 4) {
        token = strtok(NULL,"~");
        token = strtok(NULL,"~");
        token = strtok(NULL,"~");
        printf("segment 4: %s", token);
    }

    fclose(fp);
    free(buffer);
    //pthread_exit(NULL);//moving to end of word_count()
}

int main(int argc, char *argv[])
{
    //The main program creates x threads and then exits.
    pthread_t threads[NUMBER_OF_THREADS];
    int status, i;

    for(i=0; i < NUMBER_OF_THREADS; i++) {
        printf("Main here. Creating thread %d\n", i+1);
        status = pthread_create(&threads[i], NULL, getCurrentSegmentWordcount, (void * )i);
        if (status != 0) {
            printf("Oops. pthread create returned error code %d\n", status);
            exit(-1);
        }
    }
    sleep(8);
    exit(NULL);
}

输出:

Main here. Creating thread 1
Main here. Creating thread 2
segment/thread: 1 
Main here. Creating thread 3
segment 1: test(segment 1, handled my thread 1)
Thread 1
Main here. Creating thread 4
Command: sed -n '1p' 'maintainers.txt' | tr [:space:] '\n' | grep -v '^\s*$' | sort | uniq -c | sort
Main here. Creating thread 5
segment/thread: 2 
before
segment/thread: 4 
Main here. Creating thread 6
segment 4: 
test test test test (segment 4, handled by thread 4)
Main here. Creating thread 7
segment 2: 
test test (segment 2, handled by thread 2)
Main here. Creating thread 8
Main here. Creating thread 9
Main here. Creating thread 10
segment/thread: 3 
segment 3: 
test test test (segment 3, handled by thread 3)
segment/thread: 10 
segment/thread: 9 
segment/thread: 8 
segment/thread: 5 
segment/thread: 6 
segment/thread: 7 
testBool: 1
Makefile:20: recipe for target 'all' failed
make: *** [all] Segmentation fault (core dumped)

1 个答案:

答案 0 :(得分:1)

此代码存在许多问题,其中一些已经提到过 user3629249,所以我会尝试总结这些错误。

为线程的参数传递(void * )i是相当丑陋的。当然可以 但这对我来说是草率的编程,我会声明一个int数组并填充 它带有id值并传递一个指向位置的指针。

int ids[NUMBER_OF_THREADS];

for(i=0; i < NUMBER_OF_THREADS; i++) {
    ids[i] = i+1;
    status = pthread_create(&threads[i], NULL, getCurrentSegmentWordcount, ids + i);
    ...
}

然后在线程函数中:

void *getCurrentSegmentWordcount(void * tid) { //declaring file pointer (value?)
    int segment = *((int*) tid);
    // segment = segment + 1; not needed anymore
    ...
}

此代码更干净,更易于理解,也适合代码审阅者, 不会转发丑陋的不必要的演员阵容,而且更便携。

相同
void *print_hello_world(void *tid)
{
    //This function prints the thread’s identifier and then exits.
    printf("Hello World. Greetings from thread %d\n", tid);
    pthread_exit(NULL);
}

这是uggly,你试图将指针作为int传递。一个大小 指针可能与int的大小不同。使用相同的方式 将指针传递给int(例如getCurrentSegmentWordcount):

void *print_hello_world(void *tid)
{
    //This function prints the thread’s identifier and then exits.
    printf("Hello World. Greetings from thread %d\n", *((int*) tid));
    pthread_exit(NULL);
}

将错误消息写入stderr。因此,打开此FILE缓冲区, 这是人们对计划的期望。当你执行一个程序时,你 可以做到这一点:

$ program 2>/tmp/error.log

or this

$ program 2>/dev/null | some_other_tool

这样您就可以将正常输出与错误输出分开。

当系统功能失败时,errno变量将设置为错误代码。 您可以使用perror作为标准错误消息,或者如果您想要自定义错误消息, 使用strerror

pid_t p = fork();

if(p < 0)
{
    perror("fork failed");
    // or
    fprintf(stderr, "Error while executing fork: %s\n", strerror(errno));
    return; // or exit or whatever
}

如果要进入C混淆比赛,可以在一行中编写代码, 否则不要这样做。这对你来说很难读,很难读懂 代码审查员/同事/上级。你从中获得了一切。

而不是

if( !buffer ) fclose(fp),fputs("memory alloc fails",stderr),exit(1);

DO

if(buffer == NULL)
{
    fclose(fp);
    fputs("memory alloc fails", stderr);
    exit(EXIT_FAILURE); // or exit(your_exit_status)
}

每个人都更容易阅读。

您应该始终检查返回指针的函数的返回值。 检查malloccallocreallocstrtok等的返回值。

if(segment == 2) {
    token = strtok(NULL,"~");
    printf("segment 2: %s", token);
}

如果strtok返回NULL,则printf行会产生未定义的行为。 见3.5.3.3 comment 2

  

3.5.3.3

     

概要

      #define __STDC_WANT_LIB_EXT1__ 1
      #include <stdio.h>
      int printf_s(const char * restrict format, ...);
     

[...]

     

2 格式不应为空指针。 %n说明符(由标志,字段宽度或精度修改或不修改)不应出现在格式指向的字符串中。 printf_s说明符对应的%s的任何参数都不应为空指针

     

[...]

     

4 printf_s函数等效于printf函数,但上面列出的显式运行时约束除外。

某些libc实现可能会原谅您将NULL传递给printf %s 并打印(null),但这不是可移植的,并且是未定义的行为。那么你 如果printf不是token ,则只能执行NULL

word_count函数有点可怕,特别是你的构造方式 命令。

char cmd_p1[9] = "sed -n '\0";

可以改写为

char cmd_p1[] = "sed -n '";

这将创建一个具有正确字节数的char数组并初始化 它使用有效的以0结尾的字符串,无需添加&#39; \0&#39;自己。

相同的命令,意味着他们不需要来自a的值 变量可以存储在char[]中,甚至存储在const char*中。然后构建 整个事情snprintfsprintf,线条少,错误少:

void *word_count(void* num)
{
    ...
    const char *pipe_cmd = "| tr [:space:] '\\n' | grep -v '^\\s*$' | sort | uniq -c | sort";
    const char *format = "sed -n '%dp' 'maintainers.txt' %s";

    int cmd_size = snprintf(NULL, 0, format, line_number, pipe_cmd);

    char *command = malloc(cmd_size + 1);
    if(command == NULL)
        return NULL;

    sprintf(command, format, line_number, pipe_cmd);

    ...

    FILE *in;
    in= popen(command, "r");
    free(command);
    ...
}

另请注意

char cmd_p2[2];
sprintf(cmd_p2,"%d",line_number); //stores string in buffer
如果行号大于9,

将溢出缓冲区。

bool testBool = fgets(buff,sizeof(buff),in);
printf("testBool: %d\n", testBool);

fgets返回指向char的指针,而不是boolprintf将打印出来 指针的值为整数。指针大小不一定相同 大小int,实际上在我的系统上,指针长度为8个字节,int为4个字节 长。你应该这样做:

if(fgets(buff, sizeof(buff), in))
    puts("fgets success");
//CRASH HAPPENS HERE:
//buff 
printf("sizeof Buff: %d", sizeof(buff));
  1. 由于sizeof,它不会崩溃。 sizeof在编译时进行评估, 不是在运行时间。
  2. sizeof - 运算符返回size_t
  3. %d不是size_t的正确说明符,%lu是,它应该是

    printf("sizeof buff: %lu\n", sizeof buff);
    
  4. 由于之前的所有未定义行为,它很可能会崩溃 这一点。

  5. arr[line_number-1].count[counter] = cnt;
    

    在整个代码中,arr未初始化,因此您正在访问某个值 通过未初始化的指针。这种未定义的行为可能导致一个 段错误。

    我想在这里引用user3629249

      

    user3629249写了

         

    main()函数正在启动多个线程,然后立即退出。退出过程还消除了线程Suggest:在main()中为每个线程调用pthread_join()。在线程中,最后调用pthread_exit()

    请不要忽略编译器警告,他们不会惹恼你,他们是 有帮助你。他们暗示你所做的事可能不是你的 真的很想要。未定义的行为,段错误等通常是由此引起的 那。所以注意编译器的警告,当你看到它时,看看你的代码, 试着理解它并修复它。如果你不理解警告,你可以 来这里问一个问题。但有成千上万的警告和 忽视它们会导致令人头痛的问题,并且浪费了很多时间 你和我们的一方。

    所以,请修复所有这些警告和细节,查看警告信息 编译器和代码可能没有问题。