如何转义使用C语言编写CSV文件的值

时间:2014-03-06 10:52:00

标签: c csv

我正在开发一个将数据写为CSV文件的项目。 如何转义使用C语言编写CSV文件的值。

void writeToFile(struct raDataStructure data)
{
    FILE *fp;
    fp = fopen("result.data", "a");
    fprintf(fp, "%s,", data.long);
    fprintf(fp, "%s,", data.lat);
    fprintf(fp, "%s,", data.city);
    fprintf(fp, "%d,", data.pobox);
    fprintf(fp, "%s,", data.bio);
    fprintf(fp, "%d,", data.bNumber);
    fclose(fp);
}

字段data.bio可能包含任何字符,包括逗号,引号和斜杠。如何在写入文件以生成有效的csv文件之前将其转义。

3 个答案:

答案 0 :(得分:0)

这应该会给你一个良好的起点。请注意,完成后它需要free()返回的值。

char* escapeCSV(char* in) {
  int in_len = strlen(in);
  char *out_buf = malloc(in_len*2+3);
  int out_idx = 0;
  int in_idx = 0;

  out_buf[out_idx++] = '"';
  for(in_idx=0; in_idx < in_len; in_idx++) {
    if(in[in_idx] == '"') {
      out_buf[out_idx++] = '"';
      out_buf[out_idx++] = '"';
    } else {
      out_buf[out_idx++] = in[in_idx];
    }
  }
  out_buf[out_idx++] = '"';
  out_buf[out_idx++] = 0;
  return out_buf;
}

答案 1 :(得分:0)

csv.h

/*
@(#)File:           $RCSfile: csv.h,v $
@(#)Version:        $Revision: 2.1 $
@(#)Last changed:   $Date: 2012/11/01 23:45:19 $
@(#)Purpose:        Scanner for Comma Separated Variable (CSV) Data
@(#)Author:         J Leffler
@(#)Origin:         Kernighan & Pike, 'The Practice of Programming'
*/

#ifndef CSV_H
#define CSV_H

#include <stdio.h>

extern char  *csvgetline(FILE *ifp);    /* Read next input line */
extern char  *csvgetfield(size_t n);    /* Return field n */
extern size_t csvnfield(void);          /* Return number of fields */
extern void   csvreset(void);           /* Release space used by CSV */

extern int    csvputfield(FILE *ofp, const char *field);
extern int    csvputline(FILE *ofp, char **fields, int nfields);
extern void   csvseteol(const char *eol);

#endif /* CSV_H */

csv.c

/*
@(#)File:           $RCSfile: csv.c,v $
@(#)Version:        $Revision: 2.2 $
@(#)Last changed:   $Date: 2013/01/28 03:28:55 $
@(#)Purpose:        Scanner for Comma Separated Variable (CSV) Data
@(#)Author:         J Leffler
@(#)Origin:         Kernighan & Pike, 'The Practice of Programming'
*/

/* See RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) */

#include "csv.h"
#include <stdlib.h>
#include <string.h>

enum { NOMEM = -2 };

static char *line = 0;      /* Input line */
static char *sline = 0;     /* Split line */
static size_t maxline = 0;  /* Size of line[] and sline[] */
static char **field = 0;    /* Field pointers */
static size_t maxfield = 0; /* Size of field[] */
static size_t nfield = 0;   /* Number of fields */

static char fieldsep[]= ",";    /* Field separator characters */
static char fieldquote = '"';   /* Quote character */

static char eolstr[8] = "\n";

void csvreset(void)
{
    free(line);
    free(sline);
    free(field);
    line = 0;
    sline = 0;
    field = 0;
    maxline = maxfield = nfield = 0;
}

static int endofline(FILE *ifp, int c)
{
    int eol = (c == '\r' || c == '\n');
    if (c == '\r')
    {
        c = getc(ifp);
        if (c != '\n' && c != EOF)
            ungetc(c, ifp);
    }
    return(eol);
}

static char *advquoted(char *p)
{
    size_t i;
    size_t j;
    for (i = j = 0; p[j] != '\0'; i++, j++)
    {
        if (p[j] == fieldquote && p[++j] != fieldquote)
        {
            size_t k = strcspn(p+j, fieldsep);
            memmove(p+i, p+j, k);
            i += k;
            j += k;
            break;
        }
        p[i] = p[j];
    }
    p[i] = '\0';
    return(p + j);
}

static int split(void)
{
    char *p;
    char **newf;
    char *sepp;
    int sepc;

    nfield = 0;
    if (line[0] == '\0')
        return(0);
    strcpy(sline, line);
    p = sline;

    do
    {
        if (nfield >= maxfield)
        {
            maxfield *= 2;
            newf = (char **)realloc(field, maxfield * sizeof(field[0]));
            if (newf == 0)
                return NOMEM;
            field = newf;
        }
        if (*p == fieldquote)
            sepp = advquoted(++p);
        else
            sepp = p + strcspn(p, fieldsep);
        sepc = sepp[0];
        sepp[0] = '\0';
        field[nfield++] = p;
        p = sepp + 1;
    } while (sepc == ',');

    return(nfield);
}

char *csvgetline(FILE *ifp)
{
    size_t i;
    int    c;

    if (line == NULL)
    {
        /* Allocate on first call */
        maxline = maxfield = 1;
        line = (char *)malloc(maxline);     /*=C++=*/
        sline = (char *)malloc(maxline);    /*=C++-*/
        field = (char **)malloc(maxfield*sizeof(field[0])); /*=C++=*/
        if (line == NULL || sline == NULL || field == NULL)
        {
            csvreset();
            return(NULL);   /* out of memory */
        }
    }
    for (i = 0; (c = getc(ifp)) != EOF && !endofline(ifp, c); i++)
    {
        if (i >= maxline - 1)
        {
            char  *newl;
            char  *news;
            maxline *= 2;
            newl = (char *)realloc(line, maxline);  /*=C++=*/
            news = (char *)realloc(sline, maxline); /*=C++-*/
            if (newl == NULL || news == NULL)
            {
                csvreset();
                return(NULL);   /* out of memory */
            }
            line = newl;
            sline = news;
        }
        line[i] = c;
    }
    line[i] = '\0';
    if (split() == NOMEM)
    {
        csvreset();
        return(NULL);
    }
    return((c == EOF && i == 0) ? NULL : line);
}


char *csvgetfield(size_t n)
{
    if (n >= nfield)
        return(0);
    return(field[n]);
}

size_t csvnfield(void)
{
    return(nfield);
}

int csvputfield(FILE *ofp, const char *ofield)
{
    const char escapes[] = "\",\r\n";
    if (strpbrk(ofield, escapes) != 0)
    {
        size_t len = strlen(ofield) + 2;
        const char *pos = ofield;
        while ((pos = strchr(pos, '"')) != 0)
        {
            len++;
            pos++;
        }
        char *space = malloc(len+1);
        if (space == 0)
            return EOF;
        char *cpy = space;
        pos = ofield;
        *cpy++ = '"';
        char c;
        while ((c = *pos++) != '\0')
        {
            if (c == '"')
                *cpy++ = c;
            *cpy++ = c;
        }
        *cpy++ = '"';
        *cpy = '\0';
        int rc = fputs(space, ofp);
        free(space);
        return rc;
    }
    else
        return fputs(ofield, ofp);
}

int csvputline(FILE *ofp, char **fields, int nfields)
{
    for (int i = 0; i < nfields; i++)
    {
        if (i > 0)
            putc(',', ofp);
        if (csvputfield(ofp, fields[i]) == EOF)
            return EOF;
    }
    return(fputs(eolstr, ofp));
}

void csvseteol(const char *eol)
{
    size_t nbytes = strlen(eol);
    if (nbytes >= sizeof(eolstr))
        nbytes = sizeof(eolstr) - 1;
    memmove(eolstr, eol, nbytes);
    eolstr[nbytes] = '\0';
}

#ifdef TEST

int main(void)
{
    char *in_line;

    while ((in_line = csvgetline(stdin)) != 0)
    {
        size_t n = csvnfield();
        char *fields[n];        /* C99 VLA */
        printf("line = '%s'\n", in_line);
        for (size_t i = 0; i < n; i++)
        {
            printf("field[%zu] = '%s'\n", i, csvgetfield(i));
            printf("field[%zu] = [", i);
            csvputfield(stdout, csvgetfield(i));
            fputs("]\n", stdout);
            fields[i] = csvgetfield(i);
        }
        printf("fields[0..%zu] = ", n-1);
        csvputline(stdout, fields, n);
    }

    return(0);
}

#endif /* TEST */

答案 2 :(得分:0)

使用类似C的转义序列进行打印:

void EscapePrint_CSV(FILE *outf, int ch) {
  // Delete or adjust these 2 arrays per code's goals
  // All simple-escape-sequence C11 6.4.4.4 and , for CSV
  static const char *escapev = ",\a\b\t\n\v\f\r\"\'\?\\";
  static const char *escapec = ",abtnvfr\"\'\?\\";
  char *p = strchr(escapev, ch);
  if (p && *p) {
    return fprintf(outf, "\\%c", escapec[p - escapev]);
  if (isprint(ch)) {
    return fputc(ch, outf);
  // Use octal as hex is problematic reading back
  return fprintf(outf, "\\%03o", ch);
  }
}

void EscapePrints_CSV(FILE *outf, const char *s) {
  int retval = 0;
  retval = EscapePrint_CSV(outf, '\"');
  if (retval) return retval;
  while (*s) {
    retval = EscapePrint_CSV(outf, *s++);
    if (retval) return retval;
  }
  return EscapePrint_CSV(outf, '\"');
}

CSV处理嵌入式','的方式的细节会有所不同。有些人逃脱了。

其他人引用整个字符串,如"Hello, World"。然后转义"成为一个问题。有些人只需重复'\"'即可逃避""

YMMV。