我写了我的unix grep程序的并行版本。在这个程序中,你搜索作为程序参数给出的文件中的模式。
在根进程中,我计算每个进程的文件数和每个进程的第一个文件ID。在计算之后,我将此值发送到每个根进程。
在非root进程中,我正在接收num文件和第一个文件ID,并在analyzeFile方法中分析每个文件。在该方法中,我将包含所有带有模式的行的字符串发送回根进程。收集非根据过期的所有数据后,我打印出最终结果。
问题是当我尝试运行超过3个进程的程序时。它最终导致分段错误。
我没有想法。 Valgrind没有显示有关基本扫描的任何错误报告。
PS。我知道代码需要重构,但首先我要摆脱这个bug。
感谢您的帮助。
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <stdbool.h>
#include <stdlib.h>
#include <mpi.h>
#define YEL "\x1B[33m"
#define BLU "\x1B[34m"
#define MAG "\x1B[35m"
#define RESET "\x1B[0m"
#define send_data_tag 1000
#define return_data_tag 1001
struct linesRange
{
int lineFrom;
int lineTo;
int posFrom;
int posTo;
};
int countNumOfLines(char *str)
{
int numOfLines = 0;
int i = 0;
while(str[i] != '\0')
{
if(str[i] == '\n')
{
++numOfLines;
}
++i;
}
return numOfLines;
}
long int findLinePos(char *str, int lineNumToFind)
{
long int linePos = 0;
long int lineNum = 0;
long int i = 0;
if(lineNumToFind == 0)
{
return linePos;
}
while(str[i] != '\0')
{
if(str[i] == '\n')
{
++lineNum;
if(lineNum == lineNumToFind)
{
linePos = i;
break;
}
}
++i;
}
return linePos;
}
long int findNextLinePos(char *str, int startPos)
{
long int i = startPos;
long int nextLinePos = 0;
while(str[i] != '\0')
{
if(str[i] == '\n')
{
nextLinePos = i;
break;
}
++i;
}
return nextLinePos;
}
bool strstrn(char *str, char *pattern, long int posStart, long int posEnd)
{
long int pos_search = 0;
long int pos_text = 0;
int len_search = strlen(pattern);
for (pos_text = posStart; pos_text < posEnd; ++pos_text)
{
if(str[pos_text] == pattern[pos_search])
{
++pos_search;
if(pos_search == len_search)
{
return true;
}
}
else
{
pos_text -= pos_search;
pos_search = 0;
}
}
return false;
}
char * searchPatternInText(char *text, char *inputPattern, char *fileName, int proc_id)
{
char *bufToAdd;
int lineNum = 0;
char * resultBuf = (char*)malloc(sizeof(char));
memset(resultBuf, 0, sizeof(char));
char * curLine = text;
while(curLine)
{
++lineNum;
char * nextLine = strchr(curLine, '\n');
if (nextLine)
*nextLine = '\0';
if(strstr(curLine, inputPattern))
{
bufToAdd = (char*)malloc(sizeof(char));
memset(bufToAdd, 0, sizeof(char));
asprintf (&bufToAdd, BLU "Ścieżka:%s\tProces:%d\n" MAG "Numer linii:%d\t" RESET "%s\n", fileName, proc_id, lineNum, curLine);
resultBuf = (char*)realloc(resultBuf, strlen(resultBuf) + strlen(bufToAdd) + 1);
strcat(resultBuf, bufToAdd);
free(bufToAdd);
}
if (nextLine)
*nextLine = '\n';
curLine = nextLine ? (nextLine+ 1) : NULL;
}
return resultBuf;
}
void analyzeFile(struct stat sb, int fd, char *fileName, char *pattern, int proc_id)
{
char *text = mmap (0, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (text == MAP_FAILED) {
perror ("mmap");
return;
}
if (close (fd) == -1)
{
perror ("close");
return;
}
char *tempTxt;
tempTxt = (char*)malloc(sizeof(char) * (strlen(text) + 1));
strcpy(tempTxt, text);
tempTxt[strlen(text)] = '\0';
//munmap(text, sb.st_size);
char *result;
result = searchPatternInText(tempTxt, pattern, fileName, proc_id);
free(tempTxt);
if(proc_id != 0)
{
MPI_Send(result, strlen(result), MPI_CHAR,
0, return_data_tag, MPI_COMM_WORLD);
}
else
{
printf(result);
}
free(result);
}
int main (int argc, char *argv[])
{
int patternLen = 10;
char pattern [patternLen + 1];
float elapsed;
int numOfFiles = argc - 2;
int proc_id, num_procs, procIterator, filesIterator, root = 0;
int *numOfFilesPerProcess, *firstFileIdPerProcess;
if (argc < 3) {
fprintf (stderr, "usage: %s <file>\n", argv[0]);
return 1;
}
else
{
strncpy(pattern, argv[1], patternLen);
pattern[patternLen] = '\0';
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &proc_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
MPI_Barrier(MPI_COMM_WORLD);
double startTime = MPI_Wtime();
if(proc_id == root)
{
MPI_Status statusMaster;
numOfFilesPerProcess = (int*)malloc(sizeof(int) * num_procs);
firstFileIdPerProcess = (int*)malloc(sizeof(int) * num_procs);
for(int i = 0; i < num_procs; ++i)
{
numOfFilesPerProcess[i] = 0;
firstFileIdPerProcess[i] = 0;
}
filesIterator = 0;
procIterator = 0;
while(filesIterator < numOfFiles)
{
if(procIterator == num_procs)
{
procIterator = 0;
}
++numOfFilesPerProcess[procIterator];
++procIterator;
++filesIterator;
}
firstFileIdPerProcess[0] = 2;
//pierwszy numer argumentu argv[] per proces
for(int jj = 1; jj < num_procs; ++jj)
{
firstFileIdPerProcess[jj] = firstFileIdPerProcess[jj - 1] + numOfFilesPerProcess[jj - 1];
}
for(int j = 1; j < num_procs; ++j)
{
MPI_Send(&firstFileIdPerProcess[j], 1, MPI_UNSIGNED,
j, send_data_tag, MPI_COMM_WORLD);
MPI_Send(&numOfFilesPerProcess[j], 1, MPI_UNSIGNED,
j, send_data_tag, MPI_COMM_WORLD);
}
int firstFileIdForProcess = firstFileIdPerProcess[0];
int numOfFilesForProcess = numOfFilesPerProcess[0];
int fd;
struct stat sb;
for(int i = 0; i < numOfFilesForProcess; ++i)
{
fd = open (argv[firstFileIdForProcess + i], O_RDONLY);
if ( fd == -1
|| fstat (fd, &sb) == -1
|| !S_ISREG (sb.st_mode))
{
perror("file");
}
else
{
analyzeFile(sb, fd, argv[firstFileIdForProcess + i], pattern, proc_id);
}
}
for(int ii = numOfFilesPerProcess[0]; ii < numOfFiles; ++ii)
{
int resultLen;
char *result;
MPI_Probe(MPI_ANY_SOURCE, return_data_tag, MPI_COMM_WORLD, &statusMaster);
MPI_Get_count(&statusMaster, MPI_CHAR, &resultLen);
result = (char *)malloc(sizeof(char) * resultLen + 1);
MPI_Recv( result, resultLen, MPI_CHAR,
MPI_ANY_SOURCE, return_data_tag, MPI_COMM_WORLD, &statusMaster);
result[resultLen] = '\0';
printf(result);
free(result);
}
free(numOfFilesPerProcess);
free(firstFileIdPerProcess);
}
else
{
MPI_Status statusSlave;
int firstFileIdForProcess = 0;
int numOfFilesForProcess = 0;
int fd;
struct stat sb;
MPI_Recv( &firstFileIdForProcess, 1, MPI_UNSIGNED,
root, send_data_tag, MPI_COMM_WORLD, &statusSlave);
MPI_Recv( &numOfFilesForProcess, 1, MPI_UNSIGNED,
root, send_data_tag, MPI_COMM_WORLD, &statusSlave);
for(int i = 0; i < numOfFilesForProcess; ++i)
{
fd = open (argv[firstFileIdForProcess + i], O_RDONLY);
if ( fd == -1
|| fstat (fd, &sb) == -1
|| !S_ISREG (sb.st_mode))
{
perror("file");
}
else
{
analyzeFile(sb, fd, argv[firstFileIdForProcess + i], pattern, proc_id);
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
double endTime = MPI_Wtime();
if(proc_id == root)
{
printf("Czas wykonania: %fs\n", endTime - startTime);
}
MPI_Finalize();
return 0;
}