C程序将文本文件转换为CSV文件

时间:2019-03-25 22:29:37

标签: c csv text

问题是使用C编程将文本文件转换为CSV文件。输入文本文件的格式如下: JACK Maria Stephan Nora 20 34 45 28 London NewYork Toronto Berlin

输出的CSV文件应如下所示:

Jack,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin

以下代码是到目前为止我尝试过的:

void  load_and_convert(const char* filename){
    FILE *fp1, *fp2;
    char ch;

    fp1=fopen(filename,"r");
    fp2=fopen("output.csv","w");

    for(int i=0;i<1000;i++){
         ch=fgetc(fp1);
         fprintf(fp2,"%c",ch);    
         if(ch==' '|| ch=='\n')
              fprintf(fp2,"%c,\n",ch);
}
    fclose(fp1);
    fclose(fp2);

}

我的代码输出如下:

Jack,
Maria,
Stephan,
Nora,
20,
34,
45,
28,
London,
NewYork,
Toronto,
Berlin,

我应该如何修改我的代码以使其正常工作?

治疗这个问题的想法是什么?

2 个答案:

答案 0 :(得分:0)

由于我有一段时间,因此这里为您提供了一个可行的解决方案(尽我所能,使解决方案尽可能优雅):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_STRING_LENGTH 50
#define MAX_NUMBER_OF_PEOPLE 50

typedef struct  
{  
  char name[MAX_STRING_LENGTH];
  int age;
  char city[MAX_STRING_LENGTH];
} Person;

void getName(char *src, char *delim, Person *people) {
  char *ptr = strtok(src, delim);
  int i = 0;
  while(ptr != NULL)
  {
    strncpy(people[i].name, ptr, MAX_STRING_LENGTH);
    ptr = strtok(NULL, delim);
    i++;
  }
}

void getAge(char *src, char *delim, Person *people) {
  char *ptr = strtok(src, delim);
  int i = 0;
  while(ptr != NULL)
  {
    people[i].age = atoi(ptr);
    i++;
    ptr = strtok(NULL, delim);
  }
}

void getCity(char *src, char *delim, Person *people) {
  char *ptr = strtok(src, delim);
  int i = 0;
  while(ptr != NULL)
  {
    strncpy(people[i].city, ptr, MAX_STRING_LENGTH);
    i++;
    ptr = strtok(NULL, delim);
  }
}

int main(void)
{
  Person somebody[MAX_NUMBER_OF_PEOPLE];
  FILE *fp;
  char *line = NULL;
  size_t len = 0;
  ssize_t read;
  int ln = 0;

  fp = fopen("./test.txt", "r");
  if (fp == NULL)
      return -1;

  // Read every line, support first line is name, second line is age...
  while ((read = getline(&line, &len, fp)) != -1) {
    // remote trailing newline character
    line = strtok(line, "\n");
    if (ln == 0) {
      getName(line, " ", somebody);
    } else if (ln == 1) {
      getAge(line, " ", somebody);
    } else {
      getCity(line, " ", somebody);
    }
    ln++;
  }

  for (int j = 0; j < MAX_NUMBER_OF_PEOPLE; j++) {
      if (somebody[j].age == 0) 
        break;
      printf("%s, %d, %s\n", somebody[j].name, somebody[j].age, somebody[j].city);
  }

  fclose(fp);
  if (line)
      free(line);

  return 0;
}

答案 1 :(得分:0)

如果要解决将所有值都保留在内存中的问题,因为您需要将每行具有4个字段的3行转换为具有3个字段的4行格式,那么您需要做的事情并不简单。每行字段。因此,当您的数据文件包含:

示例输入文件

$ cat dat/col2csv3x4.txt
JACK Maria Stephan Nora
20 34 45 28
London NewYork Toronto Berlin

您想阅读三行中的每一行,然后将列转置为行,以输出.csv。这意味着您最终将获得4行3个csv字段,例如

程序预期输出

$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin

执行此操作并不困难,但是要特别注意处理对象的内存存储以及分配/重新分配以处理4行数据3行到3行数据4行之间的转换。数据。

一种方法是将所有原始行读入典型的char设置指针。然后将列转换/转置为行。由于可以想象到下次可能会有100行与500字段,因此您将要使用索引和计数器来进行转换,以跟踪您的分配和重新分配要求,以使完成的代码能够处理将通用数量的行和字段转换为fields-每行具有等于原始行数的vales的行数。

您可以设计代码以提供两种基本功能的转换。第一个读取并存储行(say getlines`),第二个然后将这些行转换为新的char指针到指针,以便可以将其输出为逗号分隔的值

实现这两个功能的一种方法类似于以下将文件名作为第一个参数读取的方法(或者,如果没有给出参数,默认情况下将从stdin读取)。该代码并非无关紧要,但也不难。只需跟踪所有分配,并保留指向每个分配的开头的指针,以便在不再需要时可以释放内存,例如

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NPTR 2
#define NWRD 128
#define MAXC 1024

/** getlines allocates all storage required to read all lines from file.
 *  the pointers are doubled each time reallocation is needed and then
 *  realloc'ed a final time to exactly size to the number of lines. all
 *  lines are stored with the exact memory required.
 */
char **getlines (size_t *n, FILE *fp)
{
    size_t nptr = NPTR;     /* tracks number of allocated pointers */
    char buf[MAXC];         /* tmp buffer sufficient to hold each line */
    char **lines = calloc (nptr, sizeof *lines);

    if (!lines) {   /* validate EVERY allocaiton */
        perror ("calloc-lines");
        return NULL;
    }

    *n = 0;         /* pointer tracks no. of lines read */
    rewind (fp);    /* clears stream error state if set */

    while (fgets (buf, MAXC, fp)) { /* read each line o finput */
        size_t len;

        if (*n == nptr) {   /* check/realloc ptrs if required */
            void *tmp = realloc (lines, 2 * nptr * sizeof *lines);
            if (!tmp) {     /* validate reallocation */
                perror ("realloc-tmp");
                break;
            }
            lines = tmp;    /* assign new block, (opt, zero new mem below) */
            memset (lines + nptr, 0, nptr * sizeof *lines);
            nptr *= 2;      /* increment allocated pointer count */
        }

        buf[(len = strcspn(buf, "\r\n"))] = 0;  /* get line, remove '\n' */
        lines[*n] = malloc (len + 1);           /* allocate for line */
        if (!lines[*n]) {                       /* validate */
            perror ("malloc-lines[*n]");
            break;
        }
        memcpy (lines[(*n)++], buf, len + 1);   /* copy to line[*n] */
    }

    if (!*n) {          /* if no lines read */
        free (lines);   /* free pointers */
        return NULL;
    }

    /* optional final realloc to free unused pointers */
    void *tmp = realloc (lines, *n * sizeof *lines);
    if (!tmp) {
        perror ("final-realloc");
        return lines;
    }

    return (lines = tmp);   /* return ptr to exact no. of required ptrs */
}

/** free all pointers and n alocated arrays */
void freep2p (void *p2p, size_t n)
{
    for (size_t i = 0; i < n; i++)
        free (((char **)p2p)[i]);
    free (p2p);
}

/** transpose a file of n rows and a varying number of fields to an
 *  allocated pointer-to-pointer t0 char structure with a fields number 
 *  of rows and n csv values per row.
 */
char **transpose2csv (size_t *n, FILE *fp)
{
    char **l = NULL, **t = NULL;
    size_t  csvl = 0,       /* csv line count */
            ncsv = 0,       /* number of csv lines allocated */
            nchr = MAXC,    /* initial chars alloc for csv line */
            *offset,        /* array tracking read offsets in lines */
            *used;          /* array tracking write offset to csv lines */

    if (!(l = getlines (n, fp))) {  /* read all lines to l */
        fputs ("error: getlines failed.\n", stderr);
        return NULL;
    }
    ncsv = *n;
#ifdef DEBUG
    for (size_t i = 0; i < *n; i++)
        puts (l[i]);
#endif

    if (!(t = malloc (ncsv * sizeof *t))) { /* alloc ncsv ptrs for csv */
        perror ("malloc-t");
        freep2p (l, *n);        /* free everything else on failure */
        return NULL;
    }

    for (size_t i = 0; i < ncsv; i++)   /* alloc MAXC chars to csv ptrs */
        if (!(t[i] = malloc (nchr * sizeof *t[i]))) {
            perror ("malloc-t[i]");
            while (i--)         /* free everything else on failure */
                free (t[i]);
            free (t);
            freep2p (l, *n);
            return NULL;
        }

    if (!(offset = calloc (*n, sizeof *offset))) {  /* alloc offsets array */
        perror ("calloc-offsets");
        free (t);
        freep2p (l, *n);
        return NULL;
    }

    if (!(used = calloc (ncsv, sizeof *used))) {    /* alloc used array */
        perror ("calloc-used");
        free (t);
        free (offset);
        freep2p (l, *n);
        return NULL;
    }

    for (;;) {  /* loop continually transposing cols to csv rows */
        for (size_t i = 0; i < *n; i++) { /* read next word from each line */
            char word[NWRD];    /* tmp buffer for word */
            int off;            /* number of characters consumed in read */
            if (sscanf (l[i] + offset[i], "%s%n", word, &off) != 1)
                goto readdone;  /* break nested loops on read failure */
            size_t len = strlen (word);         /* get word length */
            offset[i] += off;                   /* increment read offset */
            if (csvl == ncsv) { /* check/realloc new csv row as required */
                size_t newsz = ncsv + 1;    /* allocate +1 row over *n */
                void *tmp = realloc (t, newsz * sizeof *t); /* realloc ptrs */
                if (!tmp) {
                    perror ("realloc-t");
                    freep2p (t, ncsv);
                    goto readdone;
                }
                t = tmp;
                t[ncsv] = NULL;     /* set new pointer NULL */

                /* allocate nchr chars to new pointer */
                if (!(t[ncsv] = malloc (nchr * sizeof *t[ncsv]))) {
                    perror ("malloc-t[i]");
                    while (ncsv--)   /* free everything else on failure */
                        free (t[ncsv]);
                    goto readdone;
                }

                tmp = realloc (used, newsz * sizeof *used); /* realloc used */
                if (!tmp) {
                    perror ("realloc-used");
                    freep2p (t, ncsv);
                    goto readdone;
                }
                used = tmp;
                used[ncsv] = 0;

                ncsv++;
            }
            if (nchr - used[csvl] - 2 < len) {  /* check word fits in line */
                /* realloc t[i] if required (left for you) */
                fputs ("realloc t[i] required.\n", stderr);
            }
            /* write word to csv line at end */
            sprintf (t[csvl] + used[csvl], used[csvl] ? ",%s" : "%s", word);
            t[csvl][used[csvl] ? used[csvl] + len + 1 : len] = 0;
            used[csvl] += used[csvl] ? len + 1 : len;
        }
        csvl++;
    }
    readdone:;

    freep2p (l, *n);
    free (offset);
    free (used);

    *n = csvl;

    return t;
}

int main (int argc, char **argv) {

    char **t;
    size_t n = 0;
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    if (!(t = transpose2csv (&n, fp))) {
        fputs ("error: transpose2csv failed.\n", stderr);
        return 1;
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    for (size_t i = 0; i < n; i++)
        if (t[i])
        puts (t[i]);

    freep2p (t, n);

    return 0;
}

使用/输出示例

$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin

内存使用/错误检查

在您编写的任何动态分配内存的代码中,对于任何分配的内存块,您都有2个职责:(1)始终保留指向起始地址的指针因此,(2)当不再需要它时可以释放

当务之急是使用一个内存错误检查程序来确保您不会尝试访问内存或在已分配的块的边界之外/之外进行写入,不要试图以未初始化的值读取或基于条件跳转,最后,以确认您释放了已分配的所有内存。

对于Linux,valgrind是正常选择。每个平台都有类似的内存检查器。它们都很容易使用,只需通过它运行程序即可。

$ valgrind ./bin/transpose2csv < dat/col2csv3x4.txt
==18604== Memcheck, a memory error detector
==18604== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==18604== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==18604== Command: ./bin/transpose2csv
==18604==
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
==18604==
==18604== HEAP SUMMARY:
==18604==     in use at exit: 0 bytes in 0 blocks
==18604==   total heap usage: 15 allocs, 15 frees, 4,371 bytes allocated
==18604==
==18604== All heap blocks were freed -- no leaks are possible
==18604==
==18604== For counts of detected and suppressed errors, rerun with: -v
==18604== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

始终确认已释放已分配的所有内存,并且没有内存错误。

仔细检查一下,如果还有其他问题,请告诉我。