我正在尝试使用Odd-Even转置对一组随机数进行排序,但在运行代码时我遇到了分段错误:
[islb:48966] *** Process received signal ***
[islb:48966] Signal: Segmentation fault (11)
[islb:48966] Signal code: Address not mapped (1)
[islb:48966] Failing at address: 0x28
[islb:48966] [ 0] /lib64/libpthread.so.0(+0xf810)[0x7fc3da4cb810]
[islb:48966] [ 1] /lib64/libc.so.6(memcpy+0xa3)[0x7fc3da1c7cf3]
[islb:48966] [ 2] /usr/local/lib/libopen-pal.so.6(opal_convertor_unpack+0x10b)[0x7fc3d9c372db]
[islb:48966] [ 3] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress_match+0x138)[0x7fc3d58507a8]
[islb:48966] [ 4] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_req_start+0x1b1)[0x7fc3d5850d11]
[islb:48966] [ 5] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv+0x139)[0x7fc3d5849489]
[islb:48966] [ 6] /usr/local/lib/libmpi.so.1(MPI_Recv+0xc0)[0x7fc3da742f40]
[islb:48966] [ 7] oddEven[0x40115a]
[islb:48966] [ 8] /lib64/libc.so.6(__libc_start_main+0xe6)[0x7fc3da161c36]
[islb:48966] [ 9] oddEven[0x400c19]
[islb:48966] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 48966 on node islb exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
程序分配数组,当它出现在错误似乎发生的进程中,因为直接在分散调用之后的打印规则只打印进程0然后打印错误消息。
这是我的代码:
#include <stdio.h>
#include <math.h>
#include <malloc.h>
#include <time.h>
#include <string.h>
#include "mpi.h"
const int MAX = 10000;
int myid, numprocs;
int i, n, j, k, arrayChunk, minindex;
int A, B;
int temp;
int swap(int *x, int *y) {
temp = *x;
*x = *y;
*y = temp;
return 0;
}
int main(int argc, char **argv) {
int* arr = NULL;
int* value = NULL;
MPI_Status status;
//int arr[] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
srand(time(0));
time_t t1, t2;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (myid == 0) {
printf("Enter the number of elements you would like in the array \n");
scanf("%d", &n);
arrayChunk = n/numprocs;
//printf("cpus: %d, #s per cpu: %d\n", numprocs, arrayChunk);
//Allocate memory for the array
arr = malloc(n * sizeof(int));
value = malloc(n * sizeof(int));
// Generate an array of size n random numbers and prints them
printf("Elements in the array: ");
for (i = 0; i < n; i++) {
arr[i] = (rand() % 100) + 1;
printf("%d ", arr[i]);
}
printf("\n");
time(&t1);
}
if ((n % numprocs) != 0) {
if (myid == 0)
printf("Number of Elements are not divisible by numprocs \n");
MPI_Finalize();
return(0);
}
// Broadcast the size of each chunk
MPI_Bcast(&arrayChunk, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&arr, arrayChunk, MPI_INT, &value, arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
printf("Processor %d receives %d\n", myid, value[0]);
for (i = 0; i < numprocs; i++) {
if (i % 2 == 0) {
if (myid%2 == 0) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
} else {
if ((myid%2 == 1) && (myid != (numprocs-1))) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else if (myid != 0 && myid != (numprocs-1)) {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], 1, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
}
}
MPI_Gather(&value[0], arrayChunk, MPI_INT, &arr[0], arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
if (myid == 0) {
time(&t2);
printf("Sorted array: ");
for (i = 0; i < n; i++) {
printf("%d ", arr[i]);
}
printf("\n");
printf("Time in sec. %f\n", difftime(t2, t1));
}
// Free allocated memory
if (arr != NULL) {
free(arr);
arr = NULL;
free(value);
value = NULL;
}
MPI_Finalize();
return 0;
}
我对C不太熟悉,很可能是因为我错误地使用了malloc
和/或地址和指针,所以它可能很简单。
很抱歉代码的数量,但我认为提供所有代码以便进行适当的调试会更好。
答案 0 :(得分:0)
我将使用调试信息构建程序(很可能是-g
编译标志),尝试使用coredump并尝试使用gdb
调试器来查找错误。 Corefile是在进程崩溃时创建的,它在崩溃时保存进程内存映像。
如果在程序崩溃后没有创建coredump文件,您需要弄清楚如何在您的系统上启用它。您可以创建简单的错误程序(例如使用a=x/0;
或类似错误)并播放一些。 Coredump可能被称为core
,PID.core
(PID - 崩溃进程的数量)或类似的东西。有时使用ulimit
设置核心文件大小 tu 无限就足够了。另请在Linux上查看kernel.core_*
sysctl。
一旦你有了corecump,你可以将它与gdb
或类似的debuger(ddd
)一起使用:
gdb executable_file core
答案 1 :(得分:0)
问题出在您的MPI_Scatter
命令中。您尝试将信息分散并存储在value
中,但如果您查看该代码,则只有0级已为value
分配了任何内存。当任何和所有其他等级尝试将数据存储到value
时,您将得到分段错误(实际上您确实如此)。而是从value = malloc(...);
块中删除if
行,并将其放在MPI_Bcast
value = malloc(arrayChunk * sizeof(int));
之后。我没有查看其余的代码,看看其他地方是否也有任何问题,但这可能是导致初始seg-fault的原因。