分叉()子进程

时间:2017-10-27 13:48:53

标签: c csv sorting fork parent-child

所以我正在编写一个程序来遍历目录中的每个文件/文件夹,对于它遇到的每个CSV文件,fork()然后对其进行排序并输出CSV文件。

排序发生在beginSort()中,它在CSV上运行mergesort然后输出。

这是问题...当我在默认主父进程上运行beginSort()时,它完全按预期工作。但是,当它从forked()子进程运行时,代码在此行170之后立即失败:

fgets(titleRow.rowValue, 999, fp);

我无法理解正在发生的事情。当我在这一行之前设置一个断点时... GDB仍然只是通过它运行,我无法获得有意义的信息。这是完整的sorter.c文件:

#include "Sorter.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>


int isString(char* string) {

    int i = 0;
    int decimal = 0;

    // empty string
    if(string == NULL || string == ""){
        return 1;
    }

    // goes character by character to check if its a number or string
    while(i < strlen(string)){

        if( !isdigit(string[i])){
            if(string[i] == '.' && !decimal){
                if(i == strlen(string) - 1 || !isdigit(string[i + 1])){
                    return 1;
                }
                decimal = 1;
                i++;
                continue;
            }
            return 1;
        }
        i++;
    }
    return 0;
}

// trims trailing and leading blank spaces in a string
char* removeWhitespace(char *string, int i) {

  char *final;

  while(isspace((unsigned char)*string)){
    string++;
  }

  if(*string == 0) {
    return string;
  }
  final = string + i;
  while(final > string && isspace((unsigned char)*final)) {
      final--;
  }
  *(final+1) = 0;
  return string;
}

// splits row by commas and places them into the structs
char** customStrTok(char* line, int sortedColumnNum) {

    int i = 0;
    int j = 0;
    int k = 0;

    // stores resulting fields
    char** result = (char**)malloc(sizeof(char*) * (sortedColumnNum + 1));

    char* container = (char*)malloc(500);

    // checks for quotation marks in string
    int boolIsQuote = 0;

    //go through each character
    while(i < strlen(line)){

        if(line[i] == '"' && boolIsQuote == 0){
            boolIsQuote = 1;
        }

        else if(line[i] == '"' && boolIsQuote == 1){
            //store value in result
            result[k] = (char*) malloc((j + 1) * sizeof(char));
            container = removeWhitespace(container, j - 1);
            strcpy(result[k], container);
            memset(&container[0], 0, strlen(container));
            boolIsQuote = 0;
            j = 0;
            k++;
            i++;
        }

        //splits row by columns
        else if((line[i] == ',' || i == strlen(line) - 1) && boolIsQuote != 1){
            //if there is no character; (eg: ,,)
            if(!container){
                container[0] = '\0';
            }
            if(i == strlen(line) - 1 && line[i] != '\n'){
                container[j] = line[i];
                j++;
            }
            // copy into result array
            result[k] = (char*)malloc((j+1) * sizeof(char));
            container = removeWhitespace(container, j - 1);

            strcpy(result[k], container);

            memset(&container[0], 0, strlen(container));

            j = 0;
            k++;

            // if comma is at the end
            if(line[i] == ',' && i == strlen(line) - 2){

                container[0] = '\0';

                result[k] = (char*)malloc((j+1) * sizeof(char));

                strcpy(result[k], container);
                memset(&container[0], 0, strlen(container));
            }

        } else{

            //copy into container
            if(j == 0){
                if(line[i] == ' '){
                    i++;
                    continue;
                }
            }
            container[j] = line[i];
            j++;
        }
        i++;
    }
    i = 0;

    return result;
}

void beginSort(char* selectedColumn, char* fileName){

    printf("%s\n", "Begin sort called");

    FILE* fp;

    fp = fopen(fileName, "r");

        row titleRow;
        int sortedColumnNum = 1;
        char *token;

        // sets up the row of column titles

        titleRow.rowValue = (char*) malloc (sizeof(char) * 1000);



        printf("%s\n", "Program stops here when on forked process");
        printf("I failed. This is my id, %d, and this is my parents id %d\n", getpid(), getppid());
        fgets(titleRow.rowValue, 999, fp);
        printf("%s\n", "Got passed fgets");


        titleRow.rowLength = strlen(titleRow.rowValue);
        titleRow.fields = (char**) malloc(sizeof(char *) * titleRow.rowLength);



        token = strtok(titleRow.rowValue, ",");
        titleRow.fields[0] = token;

        //Beginning splitting the tokens and check if the column name entered exists
        int selectedColumnExist = 0;



        while((token = strtok(NULL, ","))){



            titleRow.fields[sortedColumnNum] = token;

            //This removes the last whitespace value (\n) because for the last column in the CSV, the check would fail
            titleRow.fields[sortedColumnNum] = removeWhitespace(titleRow.fields[sortedColumnNum], strlen(titleRow.fields[sortedColumnNum]) - 1);


            if (strcmp(titleRow.fields[sortedColumnNum], selectedColumn) == 0){
                //the column exists
                selectedColumnExist = 1;

            }

            sortedColumnNum++;
        }

        if (selectedColumnExist != 1){
            printf("Sorry, the column you entered doesn't exist in the csv\n");
            return;
        }

        titleRow.sortedColumnNum = sortedColumnNum;

        int length = strlen(titleRow.fields[sortedColumnNum - 1]);
        if(titleRow.fields[sortedColumnNum - 1][length - 1] == '\n'){
            titleRow.fields[sortedColumnNum - 1][length - 2] = '\0';
        }

        // trim column titles
        int i = 0;
        while(i < sortedColumnNum){
            titleRow.fields[i] = removeWhitespace(titleRow.fields[i], strlen(titleRow.fields[i]) - 1);
            i++;
        }

        row *data;
        int numberOfRows;
        data = (row*) malloc (sizeof(row) * 15000); //size matters


        // non title rows, aka all the other ones
        row regularRow;
        regularRow.rowValue = (char*) malloc (sizeof(char) * 1000);
        int currentRow = 0;

        while(fgets(regularRow.rowValue, 999, fp) != NULL){
            regularRow.rowLength = strlen(regularRow.rowValue);
            regularRow.fields = (char**) malloc(sizeof(char *) * (sortedColumnNum+1));
            regularRow.fields = customStrTok(regularRow.rowValue, sortedColumnNum);
            data[currentRow++] = regularRow;
        }

        numberOfRows = currentRow;

        int columnToSort = 0;

        while(columnToSort < titleRow.sortedColumnNum){
            if(strcmp(titleRow.fields[columnToSort], selectedColumn) == 0){
                break;
            }
            columnToSort++;
        }

        //Call mergesort

        mergeSort(data, columnToSort, numberOfRows);



        //Export to a new file

        FILE *fp2;
        char* filename2;

        //make this work
        //filename2= strcat(fileName, "-sorted-.csv");
filename2="result.csv";

        printf("%s\n", "Begin export");
        printf("%s\n", filename2);


        fp2=fopen(filename2,"w+");


        int vv,zz;
        vv = 0;

            while(vv < sortedColumnNum){

                fprintf(fp2, titleRow.fields[vv]);

                if(vv != sortedColumnNum - 1){
                    fprintf(fp2, ",");
                }else{
                    fprintf(fp2, "\n");
                }

                vv++;
            }

            vv = 0;
            zz = 0;

            while(vv < numberOfRows){

                while(zz < sortedColumnNum){

                    fprintf(fp2, data[vv].fields[zz]);

                    if(zz != sortedColumnNum - 1){
                        fprintf(fp2, ",");
                    }else{
                        fprintf(fp2, "\n");
                    }

                    zz++;
                }
                vv++;
                zz = 0;
            }

            fclose(fp2);


}

void traverseDirectory(char* dirName, char* selectedColumn){

    DIR *dir;
    struct dirent *ent;
    if ((dir = opendir (dirName)) != NULL) {
      /* print all the files and directories within directory */
      while ((ent = readdir (dir)) != NULL) {
          char* itemName = ent->d_name;
          int length = strlen(itemName);
         int pid;


         //CSV FILE FOUND
          if (length > 0 && itemName[length - 1] == 'v'
                  && itemName[length - 2] == 's'
                && itemName[length - 3] == 'c'
                && itemName[length - 4] == '.' )
{
              //confirm if valid csv file (opens correctly and has valid headers)
                            pid = fork();
                            printf("%d\n", pid);
                            printf("%s\n", "CSV found");


}


                        switch(pid){
                        case 0:

                        beginSort(itemName, selectedColumn);
                            break;


                        case -1:
                            printf("%s\n", "Error creating fork");

                        default:
                            continue;
                        }





                        return;



         // printf("%d\n", strlen(itemName));



        //printf ("%s\n", itemName);
      }
      closedir (dir);
    } else {
      /* could not open directory */
      perror ("");
      return;
    }

    //CODE TO OPEN DIRECTORY GIVEN GOES HERE
    /*
     * DIR* test = opendir(dirName);
     * dirent* newfile = readdir(test);
     *
     */


    /*for (loop to iterate through everything in given dirName){
         *

         if (DIRECTORY){

             pid = fork();

             switch(pid){
             case 0:
                traverseDirectory(directoryName);


             case -1:
             error

             default:
             continue;

         }

        if (FILE){
            if (currentFile == CSV && currentFile == validCSV file){

                pid == fork();

            }

            switch(pid){
            case 0:
            beginSort(currentfile, selectedColumn)
                break;


            case -1:
                error

            default:
                continue;
            }



            }

            return;
        }*/


}

int main (int argc, char* argv[]){

    // column to sort by
    char* selectedColumn;

    if (strcmp(argv[1],"-c") != 0){
                printf("Sorry, you must use the -c flag to declare a column\n");
                return 1;
        }

    if (argc != 3 && argc != 5 && argc != 7){
        printf("Invalid argument size\n");
                    return 1;
    }

    selectedColumn = argv[2];


    //beginSort(selectedColumn, "movie_metadata.csv");

    if (argc == 3){
        traverseDirectory("./", selectedColumn);
    }



    if (argc == 5){
        //check if argv[3] == -d
        //{ do something }

        //check if arv[3] == -o
        //{ do something }
    }

    if (argc == 7){
        //check if argv[3] == -d && argv[5] == -o
        //
    }



    return 0;
}

任何人都有任何想法? GDB似乎在程序实际失败的行之前报告了seg故障方式,所以我在这里被撕裂了。

1 个答案:

答案 0 :(得分:1)

所以你的代码看起来有点像这样......我已经删除了与问题无关的行。

fp = fopen(fileName, "r");
// ....
fgets(titleRow.rowValue, 999, fp);

如果filename不存在,您的代码会发生什么?您应该始终检查函数的返回值,以确保它们有效。

您正在假设因为traverseDirectory找到了该文件,您可以将其打开。但是您忘记了itemNamedirName的关系。你需要将两者合并,以获得完整的文件名,以便能够打开它。

如何识别CSV文件也存在问题。

if (length > 0 && itemName[length - 1] == 'v'
    && itemName[length - 2] == 's'
    && itemName[length - 3] == 'c'
    && itemName[length - 4] == '.' )

如果length为3(例如),您最终会访问itemName[-1],这是不对的。你想确保长度至少为5(除非一个名为&#34; .csv&#34;的文件有效吗?)你可以使用strcmp来更容易看到你的内容这样做。

if (length > 4 && strcmp(itemName+length-4,".csv")==0)