我在C中创建一个程序,它将一个大文本文件分成10个段,然后创建10个线程,每个线程为每个段生成一个字数。我从此代码中获取了word_count
函数:https://github.com/prateek-khatri/seaOfC/blob/master/frequencyMultiThread.c。该程序对我来说很好,但是当我尝试在我自己的程序中使用word_count
时,它会在尝试获取缓冲区大小时崩溃。
函数getCurrentSegmentWordcount
似乎一切正常,但当该函数调用word_count
时,它会在行printf("sizeof Buff: %d", sizeof(buff));
处崩溃(分段错误)。
#define _GNU_SOURCE
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#define NUMBER_OF_THREADS 10
//struct taken from reference:
struct return_val{
char wordlist[100][100]; //[chars][lines]
int count[100];
} *arr; //array of words
void *print_hello_world(void * tid)
{
//This function prints the thread’s identifier and then exits.
printf("Hello World. Greetings from thread %d\n", tid);
pthread_exit(NULL);
}
void *word_count(void* num)
{
int *ln = num;
unsigned int line_number = *ln;
//line_number++;
printf("Thread %d\n",line_number);
char cmd_p1[9] = "sed -n '\0";
char cmd_p2[2];
sprintf(cmd_p2,"%d",line_number); //stores string in buffer
char cmd_p3[21] = "p' 'maintainers.txt'\0";
char command[100];
command[0] = '\0';
//char * strcat ( char * destination, const char * source );
//appends a copy of source to destination
strcat(command,cmd_p1);
strcat(command,cmd_p2);
strcat(command,cmd_p3);
usleep(line_number);
char cmd[100] = " | tr [:space:] '\\n' | grep -v '^\\s*$' | sort | uniq -c | sort\0";
strcat(command,cmd);
printf("Command: %s\n",command);
//fflush(stdout);
FILE *in;
in= popen(command, "r"); //read command and pipe into the shell
rewind(in); //set file position to beginning of 'in'
char buff[50];
int counter = 0;
//char * fgets ( char * str, int num, FILE * stream );
//reads chars from stream and stores them as string into buff until all of buffer has been read
printf("before\n");
bool testBool = fgets(buff,sizeof(buff),in);
printf("testBool: %d\n", testBool);
//CRASH HAPPENS HERE:
//buff
printf("sizeof Buff: %d", sizeof(buff));
while(fgets(buff,sizeof(buff),in))
{
printf("fire 0.5");
char c=' ';
int i = 0;
int cnt = atoi(buff); //converts string to int.. buff == # of chars in file?
arr[line_number-1].count[counter] = cnt; //at this point line_number == 1
printf("fire1\n");
while(c!='\0')
{
c=buff[i];
buff[i]=buff[i+6];
i++;
}
int cnnt = 0;
while(c!=' ')
{
c = buff[cnnt];
cnnt++;
}
i=0;
while(c!='\0')
{
c=buff[i];
buff[i]=buff[i+cnnt];
i++;
}
sprintf(arr[line_number-1].wordlist[counter],"%s",buff);
printf("%d %s",arr[line_number-1].count[counter],arr[line_number-1].wordlist[counter]);
counter++;
}
printf("final count: %d", counter);
arr[line_number-1].count[counter] = -1;
fclose(in);
//pthread_exit(NULL); //didn't help to move here from getCurrentSegment...()
return NULL;
}
void *getCurrentSegmentWordcount(void * tid) { //declaring file pointer (value?)
int segment = tid;
segment = segment + 1; //converts to int
printf("segment/thread: %d \n", segment);
char text[1000];
//char buffer[150];
FILE *fp = fopen("words.txt", "r");
if(fp == NULL) {
printf("null file");
}
int i = 0;
long lSize;
char *buffer;
if( !fp ) perror("words.txt"),exit(1);
fseek( fp , 0L , SEEK_END);
lSize = ftell( fp );
rewind( fp );
buffer = calloc( 1, lSize+1 );
if( !buffer ) fclose(fp),fputs("memory alloc fails",stderr),exit(1);
if( 1!=fread( buffer , lSize, 1 , fp) )
fclose(fp),free(buffer),fputs("entire read fails",stderr),exit(1);
//printf(buffer);
char *token = strtok(buffer, "~");
if(segment == 1) {
printf("segment 1: %s", token);
word_count(&segment);
}
if(segment == 2) {
token = strtok(NULL,"~");
printf("segment 2: %s", token);
}
if(segment == 3) {
token = strtok(NULL,"~");
token = strtok(NULL,"~");
printf("segment 3: %s", token);
}
if(segment == 4) {
token = strtok(NULL,"~");
token = strtok(NULL,"~");
token = strtok(NULL,"~");
printf("segment 4: %s", token);
}
fclose(fp);
free(buffer);
//pthread_exit(NULL);//moving to end of word_count()
}
int main(int argc, char *argv[])
{
//The main program creates x threads and then exits.
pthread_t threads[NUMBER_OF_THREADS];
int status, i;
for(i=0; i < NUMBER_OF_THREADS; i++) {
printf("Main here. Creating thread %d\n", i+1);
status = pthread_create(&threads[i], NULL, getCurrentSegmentWordcount, (void * )i);
if (status != 0) {
printf("Oops. pthread create returned error code %d\n", status);
exit(-1);
}
}
sleep(8);
exit(NULL);
}
输出:
Main here. Creating thread 1
Main here. Creating thread 2
segment/thread: 1
Main here. Creating thread 3
segment 1: test(segment 1, handled my thread 1)
Thread 1
Main here. Creating thread 4
Command: sed -n '1p' 'maintainers.txt' | tr [:space:] '\n' | grep -v '^\s*$' | sort | uniq -c | sort
Main here. Creating thread 5
segment/thread: 2
before
segment/thread: 4
Main here. Creating thread 6
segment 4:
test test test test (segment 4, handled by thread 4)
Main here. Creating thread 7
segment 2:
test test (segment 2, handled by thread 2)
Main here. Creating thread 8
Main here. Creating thread 9
Main here. Creating thread 10
segment/thread: 3
segment 3:
test test test (segment 3, handled by thread 3)
segment/thread: 10
segment/thread: 9
segment/thread: 8
segment/thread: 5
segment/thread: 6
segment/thread: 7
testBool: 1
Makefile:20: recipe for target 'all' failed
make: *** [all] Segmentation fault (core dumped)
答案 0 :(得分:1)
此代码存在许多问题,其中一些已经提到过 user3629249,所以我会尝试总结这些错误。
为线程的参数传递(void * )i
是相当丑陋的。当然可以
但这对我来说是草率的编程,我会声明一个int
数组并填充
它带有id值并传递一个指向位置的指针。
int ids[NUMBER_OF_THREADS];
for(i=0; i < NUMBER_OF_THREADS; i++) {
ids[i] = i+1;
status = pthread_create(&threads[i], NULL, getCurrentSegmentWordcount, ids + i);
...
}
然后在线程函数中:
void *getCurrentSegmentWordcount(void * tid) { //declaring file pointer (value?)
int segment = *((int*) tid);
// segment = segment + 1; not needed anymore
...
}
此代码更干净,更易于理解,也适合代码审阅者, 不会转发丑陋的不必要的演员阵容,而且更便携。
与
相同void *print_hello_world(void *tid)
{
//This function prints the thread’s identifier and then exits.
printf("Hello World. Greetings from thread %d\n", tid);
pthread_exit(NULL);
}
这是uggly,你试图将指针作为int
传递。一个大小
指针可能与int
的大小不同。使用相同的方式
将指针传递给int(例如getCurrentSegmentWordcount
):
void *print_hello_world(void *tid)
{
//This function prints the thread’s identifier and then exits.
printf("Hello World. Greetings from thread %d\n", *((int*) tid));
pthread_exit(NULL);
}
将错误消息写入stderr
。因此,打开此FILE
缓冲区,
这是人们对计划的期望。当你执行一个程序时,你
可以做到这一点:
$ program 2>/tmp/error.log
or this
$ program 2>/dev/null | some_other_tool
这样您就可以将正常输出与错误输出分开。
当系统功能失败时,errno
变量将设置为错误代码。
您可以使用perror
作为标准错误消息,或者如果您想要自定义错误消息,
使用strerror
:
pid_t p = fork();
if(p < 0)
{
perror("fork failed");
// or
fprintf(stderr, "Error while executing fork: %s\n", strerror(errno));
return; // or exit or whatever
}
如果要进入C混淆比赛,可以在一行中编写代码, 否则不要这样做。这对你来说很难读,很难读懂 代码审查员/同事/上级。你从中获得了一切。
而不是
if( !buffer ) fclose(fp),fputs("memory alloc fails",stderr),exit(1);
DO
if(buffer == NULL)
{
fclose(fp);
fputs("memory alloc fails", stderr);
exit(EXIT_FAILURE); // or exit(your_exit_status)
}
每个人都更容易阅读。
您应该始终检查返回指针的函数的返回值。
检查malloc
,calloc
,realloc
,strtok
等的返回值。
if(segment == 2) {
token = strtok(NULL,"~");
printf("segment 2: %s", token);
}
如果strtok
返回NULL
,则printf
行会产生未定义的行为。
见3.5.3.3 comment 2:
概要
#define __STDC_WANT_LIB_EXT1__ 1 #include <stdio.h> int printf_s(const char * restrict format, ...);
[...]
2 格式不应为空指针。
%n
说明符(由标志,字段宽度或精度修改或不修改)不应出现在格式指向的字符串中。 与printf_s
说明符对应的%s
的任何参数都不应为空指针。[...]
4
printf_s
函数等效于printf
函数,但上面列出的显式运行时约束除外。
某些libc实现可能会原谅您将NULL
传递给printf
%s
并打印(null)
,但这不是可移植的,并且是未定义的行为。那么你
如果printf
不是token
,则只能执行NULL
。
word_count
函数有点可怕,特别是你的构造方式
命令。
char cmd_p1[9] = "sed -n '\0";
可以改写为
char cmd_p1[] = "sed -n '";
这将创建一个具有正确字节数的char
数组并初始化
它使用有效的以0结尾的字符串,无需添加&#39; \0
&#39;自己。
相同的命令,意味着他们不需要来自a的值
变量可以存储在char[]
中,甚至存储在const char*
中。然后构建
整个事情snprintf
和sprintf
,线条少,错误少:
void *word_count(void* num)
{
...
const char *pipe_cmd = "| tr [:space:] '\\n' | grep -v '^\\s*$' | sort | uniq -c | sort";
const char *format = "sed -n '%dp' 'maintainers.txt' %s";
int cmd_size = snprintf(NULL, 0, format, line_number, pipe_cmd);
char *command = malloc(cmd_size + 1);
if(command == NULL)
return NULL;
sprintf(command, format, line_number, pipe_cmd);
...
FILE *in;
in= popen(command, "r");
free(command);
...
}
另请注意
char cmd_p2[2];
sprintf(cmd_p2,"%d",line_number); //stores string in buffer
如果行号大于9,将溢出缓冲区。
bool testBool = fgets(buff,sizeof(buff),in);
printf("testBool: %d\n", testBool);
fgets
返回指向char
的指针,而不是bool
。 printf
将打印出来
指针的值为整数。指针大小不一定相同
大小int
,实际上在我的系统上,指针长度为8个字节,int
为4个字节
长。你应该这样做:
if(fgets(buff, sizeof(buff), in))
puts("fgets success");
//CRASH HAPPENS HERE:
//buff
printf("sizeof Buff: %d", sizeof(buff));
sizeof
,它不会崩溃。 sizeof
在编译时进行评估,
不是在运行时间。sizeof
- 运算符返回size_t
。 %d
不是size_t
的正确说明符,%lu
是,它应该是
printf("sizeof buff: %lu\n", sizeof buff);
由于之前的所有未定义行为,它很可能会崩溃 这一点。
arr[line_number-1].count[counter] = cnt;
在整个代码中,arr
未初始化,因此您正在访问某个值
通过未初始化的指针。这种未定义的行为可能导致一个
段错误。
我想在这里引用user3629249:
user3629249写了:
main()
函数正在启动多个线程,然后立即退出。退出过程还消除了线程Suggest:在main()
中为每个线程调用pthread_join()
。在线程中,最后调用pthread_exit()
请不要忽略编译器警告,他们不会惹恼你,他们是 有帮助你。他们暗示你所做的事可能不是你的 真的很想要。未定义的行为,段错误等通常是由此引起的 那。所以注意编译器的警告,当你看到它时,看看你的代码, 试着理解它并修复它。如果你不理解警告,你可以 来这里问一个问题。但有成千上万的警告和 忽视它们会导致令人头痛的问题,并且浪费了很多时间 你和我们的一方。
所以,请修复所有这些警告和细节,查看警告信息 编译器和代码可能没有问题。