所以我正在编写一个程序来遍历目录中的每个文件/文件夹,对于它遇到的每个CSV文件,fork()然后对其进行排序并输出CSV文件。
排序发生在beginSort()中,它在CSV上运行mergesort然后输出。
这是问题...当我在默认主父进程上运行beginSort()时,它完全按预期工作。但是,当它从forked()子进程运行时,代码在此行170之后立即失败:
fgets(titleRow.rowValue, 999, fp);
我无法理解正在发生的事情。当我在这一行之前设置一个断点时... GDB仍然只是通过它运行,我无法获得有意义的信息。这是完整的sorter.c文件:
#include "Sorter.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>
int isString(char* string) {
int i = 0;
int decimal = 0;
// empty string
if(string == NULL || string == ""){
return 1;
}
// goes character by character to check if its a number or string
while(i < strlen(string)){
if( !isdigit(string[i])){
if(string[i] == '.' && !decimal){
if(i == strlen(string) - 1 || !isdigit(string[i + 1])){
return 1;
}
decimal = 1;
i++;
continue;
}
return 1;
}
i++;
}
return 0;
}
// trims trailing and leading blank spaces in a string
char* removeWhitespace(char *string, int i) {
char *final;
while(isspace((unsigned char)*string)){
string++;
}
if(*string == 0) {
return string;
}
final = string + i;
while(final > string && isspace((unsigned char)*final)) {
final--;
}
*(final+1) = 0;
return string;
}
// splits row by commas and places them into the structs
char** customStrTok(char* line, int sortedColumnNum) {
int i = 0;
int j = 0;
int k = 0;
// stores resulting fields
char** result = (char**)malloc(sizeof(char*) * (sortedColumnNum + 1));
char* container = (char*)malloc(500);
// checks for quotation marks in string
int boolIsQuote = 0;
//go through each character
while(i < strlen(line)){
if(line[i] == '"' && boolIsQuote == 0){
boolIsQuote = 1;
}
else if(line[i] == '"' && boolIsQuote == 1){
//store value in result
result[k] = (char*) malloc((j + 1) * sizeof(char));
container = removeWhitespace(container, j - 1);
strcpy(result[k], container);
memset(&container[0], 0, strlen(container));
boolIsQuote = 0;
j = 0;
k++;
i++;
}
//splits row by columns
else if((line[i] == ',' || i == strlen(line) - 1) && boolIsQuote != 1){
//if there is no character; (eg: ,,)
if(!container){
container[0] = '\0';
}
if(i == strlen(line) - 1 && line[i] != '\n'){
container[j] = line[i];
j++;
}
// copy into result array
result[k] = (char*)malloc((j+1) * sizeof(char));
container = removeWhitespace(container, j - 1);
strcpy(result[k], container);
memset(&container[0], 0, strlen(container));
j = 0;
k++;
// if comma is at the end
if(line[i] == ',' && i == strlen(line) - 2){
container[0] = '\0';
result[k] = (char*)malloc((j+1) * sizeof(char));
strcpy(result[k], container);
memset(&container[0], 0, strlen(container));
}
} else{
//copy into container
if(j == 0){
if(line[i] == ' '){
i++;
continue;
}
}
container[j] = line[i];
j++;
}
i++;
}
i = 0;
return result;
}
void beginSort(char* selectedColumn, char* fileName){
printf("%s\n", "Begin sort called");
FILE* fp;
fp = fopen(fileName, "r");
row titleRow;
int sortedColumnNum = 1;
char *token;
// sets up the row of column titles
titleRow.rowValue = (char*) malloc (sizeof(char) * 1000);
printf("%s\n", "Program stops here when on forked process");
printf("I failed. This is my id, %d, and this is my parents id %d\n", getpid(), getppid());
fgets(titleRow.rowValue, 999, fp);
printf("%s\n", "Got passed fgets");
titleRow.rowLength = strlen(titleRow.rowValue);
titleRow.fields = (char**) malloc(sizeof(char *) * titleRow.rowLength);
token = strtok(titleRow.rowValue, ",");
titleRow.fields[0] = token;
//Beginning splitting the tokens and check if the column name entered exists
int selectedColumnExist = 0;
while((token = strtok(NULL, ","))){
titleRow.fields[sortedColumnNum] = token;
//This removes the last whitespace value (\n) because for the last column in the CSV, the check would fail
titleRow.fields[sortedColumnNum] = removeWhitespace(titleRow.fields[sortedColumnNum], strlen(titleRow.fields[sortedColumnNum]) - 1);
if (strcmp(titleRow.fields[sortedColumnNum], selectedColumn) == 0){
//the column exists
selectedColumnExist = 1;
}
sortedColumnNum++;
}
if (selectedColumnExist != 1){
printf("Sorry, the column you entered doesn't exist in the csv\n");
return;
}
titleRow.sortedColumnNum = sortedColumnNum;
int length = strlen(titleRow.fields[sortedColumnNum - 1]);
if(titleRow.fields[sortedColumnNum - 1][length - 1] == '\n'){
titleRow.fields[sortedColumnNum - 1][length - 2] = '\0';
}
// trim column titles
int i = 0;
while(i < sortedColumnNum){
titleRow.fields[i] = removeWhitespace(titleRow.fields[i], strlen(titleRow.fields[i]) - 1);
i++;
}
row *data;
int numberOfRows;
data = (row*) malloc (sizeof(row) * 15000); //size matters
// non title rows, aka all the other ones
row regularRow;
regularRow.rowValue = (char*) malloc (sizeof(char) * 1000);
int currentRow = 0;
while(fgets(regularRow.rowValue, 999, fp) != NULL){
regularRow.rowLength = strlen(regularRow.rowValue);
regularRow.fields = (char**) malloc(sizeof(char *) * (sortedColumnNum+1));
regularRow.fields = customStrTok(regularRow.rowValue, sortedColumnNum);
data[currentRow++] = regularRow;
}
numberOfRows = currentRow;
int columnToSort = 0;
while(columnToSort < titleRow.sortedColumnNum){
if(strcmp(titleRow.fields[columnToSort], selectedColumn) == 0){
break;
}
columnToSort++;
}
//Call mergesort
mergeSort(data, columnToSort, numberOfRows);
//Export to a new file
FILE *fp2;
char* filename2;
//make this work
//filename2= strcat(fileName, "-sorted-.csv");
filename2="result.csv";
printf("%s\n", "Begin export");
printf("%s\n", filename2);
fp2=fopen(filename2,"w+");
int vv,zz;
vv = 0;
while(vv < sortedColumnNum){
fprintf(fp2, titleRow.fields[vv]);
if(vv != sortedColumnNum - 1){
fprintf(fp2, ",");
}else{
fprintf(fp2, "\n");
}
vv++;
}
vv = 0;
zz = 0;
while(vv < numberOfRows){
while(zz < sortedColumnNum){
fprintf(fp2, data[vv].fields[zz]);
if(zz != sortedColumnNum - 1){
fprintf(fp2, ",");
}else{
fprintf(fp2, "\n");
}
zz++;
}
vv++;
zz = 0;
}
fclose(fp2);
}
void traverseDirectory(char* dirName, char* selectedColumn){
DIR *dir;
struct dirent *ent;
if ((dir = opendir (dirName)) != NULL) {
/* print all the files and directories within directory */
while ((ent = readdir (dir)) != NULL) {
char* itemName = ent->d_name;
int length = strlen(itemName);
int pid;
//CSV FILE FOUND
if (length > 0 && itemName[length - 1] == 'v'
&& itemName[length - 2] == 's'
&& itemName[length - 3] == 'c'
&& itemName[length - 4] == '.' )
{
//confirm if valid csv file (opens correctly and has valid headers)
pid = fork();
printf("%d\n", pid);
printf("%s\n", "CSV found");
}
switch(pid){
case 0:
beginSort(itemName, selectedColumn);
break;
case -1:
printf("%s\n", "Error creating fork");
default:
continue;
}
return;
// printf("%d\n", strlen(itemName));
//printf ("%s\n", itemName);
}
closedir (dir);
} else {
/* could not open directory */
perror ("");
return;
}
//CODE TO OPEN DIRECTORY GIVEN GOES HERE
/*
* DIR* test = opendir(dirName);
* dirent* newfile = readdir(test);
*
*/
/*for (loop to iterate through everything in given dirName){
*
if (DIRECTORY){
pid = fork();
switch(pid){
case 0:
traverseDirectory(directoryName);
case -1:
error
default:
continue;
}
if (FILE){
if (currentFile == CSV && currentFile == validCSV file){
pid == fork();
}
switch(pid){
case 0:
beginSort(currentfile, selectedColumn)
break;
case -1:
error
default:
continue;
}
}
return;
}*/
}
int main (int argc, char* argv[]){
// column to sort by
char* selectedColumn;
if (strcmp(argv[1],"-c") != 0){
printf("Sorry, you must use the -c flag to declare a column\n");
return 1;
}
if (argc != 3 && argc != 5 && argc != 7){
printf("Invalid argument size\n");
return 1;
}
selectedColumn = argv[2];
//beginSort(selectedColumn, "movie_metadata.csv");
if (argc == 3){
traverseDirectory("./", selectedColumn);
}
if (argc == 5){
//check if argv[3] == -d
//{ do something }
//check if arv[3] == -o
//{ do something }
}
if (argc == 7){
//check if argv[3] == -d && argv[5] == -o
//
}
return 0;
}
任何人都有任何想法? GDB似乎在程序实际失败的行之前报告了seg故障方式,所以我在这里被撕裂了。
答案 0 :(得分:1)
所以你的代码看起来有点像这样......我已经删除了与问题无关的行。
fp = fopen(fileName, "r");
// ....
fgets(titleRow.rowValue, 999, fp);
如果filename
不存在,您的代码会发生什么?您应该始终检查函数的返回值,以确保它们有效。
您正在假设因为traverseDirectory
找到了该文件,您可以将其打开。但是您忘记了itemName
与dirName
的关系。你需要将两者合并,以获得完整的文件名,以便能够打开它。
如何识别CSV文件也存在问题。
if (length > 0 && itemName[length - 1] == 'v'
&& itemName[length - 2] == 's'
&& itemName[length - 3] == 'c'
&& itemName[length - 4] == '.' )
如果length
为3(例如),您最终会访问itemName[-1]
,这是不对的。你想确保长度至少为5(除非一个名为&#34; .csv&#34;的文件有效吗?)你可以使用strcmp
来更容易看到你的内容这样做。
if (length > 4 && strcmp(itemName+length-4,".csv")==0)