出于某种原因,GDB在踩着我开发的守护进程时停止运行,我找不到有什么问题。在进行了一些代码更改后,问题就出现了,并且出现了Segmentation Fault错误。为了跟踪错误,我通过gdb运行守护进程,就像我在这种情况下一直做的那样,但这一次,我的错误代码似乎正在崩溃GDB本身。在这种情况下可能出现什么问题以及如何处理?我附加了已更改的函数的执行跟踪。在线826我动态调用一个函数,也许这与它有关? TIA。
Breakpoint 1, serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:320
320 job_data=jobs[job_idx].data;
(gdb) step
322 fl_iocbs_top--;
(gdb)
323 if (fl_iocbs_top==0) {
(gdb)
327 iocb_idx=fl_iocbs[fl_iocbs_top];
(gdb)
328 memset(&iocbs[iocb_idx],0,sizeof(struct iocb));
(gdb)
329 iocb_ptrs[num_iocb_submits]=&iocbs[iocb_idx];
(gdb)
330 num_iocb_submits++;
(gdb)
331 io_prep_pread(&iocbs[iocb_idx],company_infos[cliconns[conn_id].company_idx].fd_dependencies,&jobs[job_idx].aux.outlinks_id,sizeof(uint),sizeof(t_dependency_t)*job_data->dep_id+offsetof(t_dependency_t,tail_outlinks_id));
(gdb)
io_prep_pread (iocb=0x620020 <iocbs+1216>, fd=20, buf=0x68f300 <jobs+13536>, count=4, offset=274) at /usr/include/libaio.h:173
173 memset(iocb, 0, sizeof(*iocb));
(gdb)
174 iocb->aio_fildes = fd;
(gdb)
175 iocb->aio_lio_opcode = IO_CMD_PREAD;
(gdb)
176 iocb->aio_reqprio = 0;
(gdb)
177 iocb->u.c.buf = buf;
(gdb)
178 iocb->u.c.nbytes = count;
(gdb)
179 iocb->u.c.offset = offset;
(gdb)
180 }
(gdb)
serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:332
332 callback=iocb_idx;
(gdb)
333 io_set_callback(&iocbs[iocb_idx], (io_callback_t) callback);
(gdb)
io_set_callback (iocb=0x620020 <iocbs+1216>, cb=0x13) at /usr/include/libaio.h:168
168 iocb->data = (void *)cb;
(gdb)
169 }
(gdb)
serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:334
334 aio_infos[iocb_idx].job_idx=job_idx;
(gdb)
335 aio_infos[iocb_idx].conn_id=conn_id;
(gdb)
336 aio_infos[iocb_idx].op_code=AIO_OP_READ_DEPENDENCY_OUTLINKS_ID;
(gdb)
337 jobs[job_idx].pending_ops++;
(gdb)
338 }
(gdb)
process_command (conn_id=31, job_idx=241) at depserv.c:493
493 break;
(gdb)
565 }
(gdb)
main_event_loop () at depserv.c:1087
1087 _assign_job(job_idx);
(gdb)
jobs_top=240, job_idx[jobs_top]=240
1088 memset(&jobs[job_idx],0,sizeof(jobs[job_idx]));
(gdb)
1089 jobs[job_idx].conn_id=conn_id;
(gdb)
1090 jobs[job_idx].company_idx=cliconns[conn_id].company_idx;
(gdb)
1075 while(count>0) {
(gdb)
1078 count = read (infd, &jobs[job_idx].dscmd, sizeof(dscmd_t));
(gdb)
1079 printf("count=%zd\n",count);
(gdb)
count=-1
1080 if (count>0) {
(gdb)
1098 if (count==0) {
(gdb)
1101 break;
(gdb)
1049 for (i = 0; i < n; i++) {
(gdb)
1108 } // while
(gdb)
982 usleep(2000); // for debugging because we need pending_aio_submits to be valid, should be removed for production
(gdb)
983 check_aio();
(gdb)
check_aio () at depserv.c:807
807 num_events = io_getevents(io_ctx, 0, MAX_IO_EVENTS, aio_events, NULL);
(gdb)
808 if (num_events<0) {
(gdb)
811 for(i=0;i<num_events;i++) {
(gdb)
843 if (num_iocb_submits>0) {
(gdb)
845 write_cycle();
(gdb)
write_cycle () at depserv.c:225
225 for(i=0,j=0;i<num_iocb_submits;i++) {
(gdb)
226 callback=(long) iocb_ptrs[i]->data;
(gdb)
227 iocb_idx=callback;
(gdb)
228 switch(aio_infos[iocb_idx].op_code) {
(gdb)
225 for(i=0,j=0;i<num_iocb_submits;i++) {
(gdb)
245 if (!j) return;
(gdb)
269 }
(gdb)
check_aio () at depserv.c:846
846 for(i=0;i<num_iocb_submits;i++) {
(gdb)
847 ret = io_submit(io_ctx, 1, &iocb_ptrs[i]);
(gdb)
848 if (ret<0) printf("bad iosubmit ret=%d\n",ret);
(gdb)
846 for(i=0;i<num_iocb_submits;i++) {
(gdb)
850 pending_aio_submits=pending_aio_submits+1;
(gdb)
856 num_iocb_submits=0;
(gdb)
858 }
(gdb)
main_event_loop () at depserv.c:985
985 n=MAX_IOCBS-fl_iocbs_top;
(gdb)
986 if (pending_aio_submits>n) {
(gdb)
990 if (recvfrom(identityd_socket, udp_buf, MAX_UDP_PACKET_SIZE, 0, (struct sockaddr*) &remote_addr, &slen)==-1) {
(gdb)
995 if (!announced) continue;
(gdb)
997 n = epoll_wait(listening_efd, listening_events, MAX_EPOLL_EVENTS_LISTEN, 1);
(gdb)
1000 for (i = 0; i < n; i++) {
(gdb)
1048 n = epoll_wait(accepted_efd, accepted_events, MAX_EPOLL_EVENTS_ACCEPTED, 1);
(gdb)
1049 for (i = 0; i < n; i++) {
(gdb)
1108 } // while
(gdb)
982 usleep(2000); // for debugging because we need pending_aio_submits to be valid, should be removed for production
(gdb)
983 check_aio();
(gdb)
check_aio () at depserv.c:807
807 num_events = io_getevents(io_ctx, 0, MAX_IO_EVENTS, aio_events, NULL);
(gdb)
808 if (num_events<0) {
(gdb)
811 for(i=0;i<num_events;i++) {
(gdb)
812 pending_aio_submits--;
(gdb)
813 iocb_ptr=aio_events[i].obj;
(gdb)
814 callback=(long) iocb_ptr->data;
(gdb)
815 iocb_idx=callback;
(gdb)
816 op_code=aio_infos[iocb_idx].op_code;
(gdb)
817 job_idx=aio_infos[iocb_idx].job_idx;
(gdb)
818 if (job_idx!=MAX_JOBS) {
(gdb)
819 jobs[job_idx].pending_ops--;
(gdb)
821 func=aio_op_funcs[op_code];
(gdb)
822 if (func==0) {
(gdb)
826 func(iocb_ptr,aio_events[i].res);
(gdb)
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:346
346 if (res!=iocb_ptr->u.c.nbytes) { /// error
(gdb)
350 callback=(long) iocb_ptr->data;
(gdb)
351 iocb_idx=callback;
(gdb)
352 job_idx=aio_infos[iocb_idx].job_idx;
(gdb)
353 conn_id=aio_infos[iocb_idx].conn_id;
(gdb)
355 outlinks_id=jobs[job_idx].aux.outlinks_id;
(gdb)
356 job_data=(job_read_outlinks_t*) malloc(sizeof(job_read_outlinks_t));
(gdb)
357 if (!job_data) {
(gdb)
361 memset(&job_data,0,sizeof(job_read_outlinks_t));
(gdb) print outlinks_id
$1 = 2
(gdb) step
362 jobs[job_idx].data=job_data;
(gdb)
364 fl_iocbs_top--;
(gdb)
365 if (fl_iocbs_top==0) {
(gdb)
369 iocb_idx=fl_iocbs[fl_iocbs_top];
(gdb)
370 memset(&iocbs[iocb_idx],0,sizeof(struct iocb));
(gdb)
371 iocb_ptrs[num_iocb_submits]=&iocbs[iocb_idx];
(gdb)
372 num_iocb_submits++;
(gdb)
373 io_prep_pread(&iocbs[iocb_idx],company_infos[cliconns[conn_id].company_idx].fd_outlinks,&job_data->r_outlinks,sizeof(t_outlinks_t),sizeof(t_outlinks_t)*outlinks_id);
(gdb)
io_prep_pread (iocb=0x61ffe0 <iocbs+1152>, fd=0, buf=0x10, count=32, offset=0) at /usr/include/libaio.h:173
173 memset(iocb, 0, sizeof(*iocb));
(gdb)
174 iocb->aio_fildes = fd;
(gdb)
175 iocb->aio_lio_opcode = IO_CMD_PREAD;
(gdb)
176 iocb->aio_reqprio = 0;
(gdb)
177 iocb->u.c.buf = buf;
(gdb)
178 iocb->u.c.nbytes = count;
(gdb)
179 iocb->u.c.offset = offset;
(gdb)
180 }
(gdb)
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:374
374 callback=iocb_idx;
(gdb)
375 io_set_callback(&iocbs[iocb_idx], (io_callback_t) callback);
(gdb)
io_set_callback (iocb=0x61ffe0 <iocbs+1152>, cb=0x12) at /usr/include/libaio.h:168
168 iocb->data = (void *)cb;
(gdb)
169 }
(gdb)
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:376
376 aio_infos[iocb_idx].job_idx=job_idx;
(gdb)
377 aio_infos[iocb_idx].conn_id=conn_id;
(gdb)
378 aio_infos[iocb_idx].op_code=AIO_OP_READ_OUTLINKS;
(gdb)
379 jobs[job_idx].pending_ops++;
(gdb)
380 return(0);
(gdb)
381 }
(gdb)
Warning:
Cannot insert breakpoint 0.
Cannot access memory at address 0x0
0x0000000000000000 in ?? ()
(gdb)
Cannot find bounds of current function
(gdb)
答案 0 :(得分:2)
我的错误代码似乎正在崩溃GDB本身。
不,它没有。这样:
381 }
(gdb)
Warning:
Cannot insert breakpoint 0.
Cannot access memory at address 0x0
0x0000000000000000 in ?? ()
(gdb)
Cannot find bounds of current function
通常意味着您的程序已跳转到位置0,并且GDB无法为step
命令设置内部断点。
这种“返回0”的最可能原因是堆栈损坏:你用0覆盖了你的返回地址。
您可以使用run
验证这一点,而不是单步执行该程序。如果run
也像这样终止:
Program received signal SIGSEGV, Segmentation fault.
0x0000000000000000 in ?? ()
然后我的猜测得到了证实。那么你能做些什么来捕捉这个错误?
让我们举个例子:
#include <string.h>
int foo()
{
char buf[1];
memset(buf, 0, 1024);
}
int main()
{
return foo();
}
首先我们进入foo
:
(gdb) b foo
Breakpoint 1 at 0x400535: file t.c, line 6.
(gdb) r
Starting program: /tmp/a.out
Breakpoint 1, foo () at t.c:6
6 memset(buf, 0, 1024);
接下来我们确认我们的(返回)堆栈仍然完好无损:
(gdb) bt
#0 foo () at t.c:6
#1 0x000000000040055b in main () at t.c:11
现在我们需要在堆栈上找到存储返回地址的位置:
(gdb) disas
Dump of assembler code for function foo:
0x000000000040052d <+0>: push %rbp
0x000000000040052e <+1>: mov %rsp,%rbp
0x0000000000400531 <+4>: sub $0x10,%rsp
=> 0x0000000000400535 <+8>: lea -0x1(%rbp),%rax
0x0000000000400539 <+12>: mov $0x400,%edx
0x000000000040053e <+17>: mov $0x0,%esi
0x0000000000400543 <+22>: mov %rax,%rdi
0x0000000000400546 <+25>: callq 0x400410 <memset@plt>
0x000000000040054b <+30>: leaveq
0x000000000040054c <+31>: retq
End of assembler dump.
这告诉我们返回地址将在$rbp+8
,确实我们在那里找到了它:
(gdb) x/a $rbp+8
0x7fffffffe2b8: 0x40055b <main+14>
最后我们在位置0x7fffffffe2b8
上设置了一个观察点,因此GDB将在覆盖该位置时停止:
(gdb) watch *(int**)0x7fffffffe2b8
Hardware watchpoint 2: *(int**)0x7fffffffe2b8
最后我们继续:
(gdb) c
Continuing.
Hardware watchpoint 2: *(int**)0x7fffffffe2b8
Old value = (int *) 0x40055b <main+14>
New value = (int *) 0x0
memset () at ../sysdeps/x86_64/memset.S:79
79 ../sysdeps/x86_64/memset.S: No such file or directory.
现在我们停在堆栈溢出导致我们“忘记”返回地址的确切位置。使用bt
确认堆栈现已损坏(main
不再出现在回溯中):
(gdb) bt
#0 memset () at ../sysdeps/x86_64/memset.S:79
#1 0x000000000040054b in foo () at t.c:6
#2 0x0000000000000000 in ?? ()
最后,让我们看看step
ping这个程序是否会产生与原始程序相同的结果。
(gdb) r
Starting program: /tmp/a.out
Breakpoint 1, foo () at t.c:6
6 memset(buf, 0, 1024);
(gdb) n
7 }
(gdb) s
Warning:
Cannot insert breakpoint 0.
Cannot access memory at address 0x0
0x0000000000000000 in ?? ()
是的,确实如此。 QED。