CSV分析功能无法识别空值

时间:2016-02-27 11:59:15

标签: c csv sqlite

我有一个包含7个字段的 CSV 文件:

void readcsv() {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];
        while (fgets(line, 1024, stream)) {
            char *tmp = strdup(line);      
            char a1[20] = "";
            char b1[20] = "";
            char c1[20] = "";
            char d1[20] = "";
            char e1[20] = "";
            char f1[20] = ""; 
            char g1[20] = ""; 

            strcat(a1, getcsvfield(tmp, 1));
            strcat(b1, getcsvfield(tmp, 2));
            strcat(c1, getcsvfield(tmp, 3));
            strcat(d1, getcsvfield(tmp, 4));
            strcat(e1, getcsvfield(tmp, 5));
            strcat(f1, getcsvfield(tmp, 6));
            strcat(g1, getcsvfield(tmp, 7));

            //printf("Field 1 would be %s\n", a1);
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 2));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 3));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 4));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 5));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 6));
            execute("INSERT INTO sdata  (uid,sid,name,area,type,stbamount,pkgamount)"
                    "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
                    a1, b1, c1, d1, e1, f1, g1);
            // NOTE strtok clobbers tmp
            free(tmp);
        }
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}

//Used for parsing CSV
const char *getcsvfield(char *line, int num) {
    char buffer[1024] = { 0 };
    strcpy(buffer, line);
    const char *tok;
    for (tok = strtok(buffer, ",");
         tok && *tok;
         tok = strtok(NULL, ",\n"))
    {
        if (!--num)
            return tok;
    }
    return NULL;
}

我正在使用以下函数解析它:

val5

但是如果缺少第6个字段(val6val5,则会在countCopies的位置插入表格,其中实际上应该为空白。

我做错了什么?

2 个答案:

答案 0 :(得分:1)

您的代码有几个问题

  • 主要问题是您在getcsvfield中返回指向自动存储的指针:您将line复制到本地数组buffer并使用strtok进行解析它。当您返回第n个元素时,tok指向buffer内的本地数组。从函数getcsvfield返回后引用此数组会调用未定义的行为。您可以通过将字段复制到作为getcsvfield的参数接收的缓冲区来解决此问题。

  • 关于空值,您不能使用strtok来解析CSV格式:它首先会跳过所有出现的分隔符字符,因此您不能将空字段作为,的序列被解释为单个分隔符。 strtok是一个使用隐藏全局状态的过时函数,您可能应该避免在其他地方使用它。

以下是改进版本:

#include <stdio.h>
#include <string.h>

//Used for parsing CSV
char *getcsvfield(char *dest, int size, const char *line, int num) {
    const char *p;

    for (p = line; *p != '\0' && *p != '\n';) {
        int len = strcspn(p, ",\n");  /* parse field characters */
        if (--num <= 0) {
            if (len >= size)
                len = size - 1;
            memcpy(dest, p, len);
            dest[len] = '\0';
            return dest;
        }
        p += len;
        if (*p == ',')
            p++;
    }
    *dest = '\0';
    return NULL;
}

void readcsv(void) {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];
        while (fgets(line, 1024, stream)) {
            char a1[20], b1[20], c1[20], d1[20], e1[20], f1[20], g1[20]; 

            getcsvfield(a1, sizeof a1, line, 1);
            getcsvfield(b1, sizeof b1, line, 2);
            getcsvfield(c1, sizeof c1, line, 3);
            getcsvfield(d1, sizeof d1, line, 4);
            getcsvfield(e1, sizeof e1, line, 5);
            getcsvfield(f1, sizeof f1, line, 6);
            getcsvfield(g1, sizeof g1, line, 7);

            execute("INSERT INTO sdata  (uid,sid,name,area,type,stbamount,pkgamount)"
                    "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
                    a1, b1, c1, d1, e1, f1, g1);
        }
        fclose(stream);
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}

请注意,您的插入方法可能允许攻击者通过CSV文件执行SQL注入。在上面的示例中,由于每个字段限制20个字节,因此很困难,但在其他地方,在编写SQL命令时应该更加小心。 SQlite也可能对execute参数执行完整性检查。

答案 1 :(得分:1)

如果您可以使用数组fields[ITEMS][LENGTH]而不是单个变量a1[LENGTH]b1[LENGTH],...,则可以将数组传递给函数并填充一个调用。< / p>

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ITEMS 7
#define LENGTH 20

void getcsvfields ( char *psource, char *pdelim, char (*fields)[LENGTH], int index);

int main()
{
    char csv1[] = "me;val1;;val3;val4;;val6;";
    char csv2[] = ";val1;;val3;";
    char fields[ITEMS][LENGTH] = {{'\0'}};//array to store values
    int each = 0;

    //pass the line, delimiter(s), array, number of array elements
    getcsvfields ( csv1, ";", fields, ITEMS);
    //print each extracted string
    for ( each = 0; each < ITEMS; each++) {
        printf ( "fields[%d] %s\n", each, fields[each]);
    }

    //pass the line, delimiter(s), array, number of array elements
    getcsvfields ( csv2, ";", fields, ITEMS);
    //print each extracted string
    for ( each = 0; each < ITEMS; each++) {
        printf ( "fields[%d] %s\n", each, fields[each]);
    }

    return 0;
}

void getcsvfields ( char *psource, char *pdelim, char (*fields)[LENGTH], int index) {
    char *pnt;
    char *cur;
    int span = 0;
    int item = 0;

    if ( psource && pdelim) {//check for null pointers
        cur = psource;
        pnt = psource;
        while ( pnt) {
            pnt = strpbrk ( cur, pdelim);
            if ( pnt) {
                fields[item][0] = '\0';
                if ( pnt != cur) {
                    span = pnt - cur;
                    if ( span < LENGTH - 1) {
                        memcpy ( &fields[item][0], cur, span);
                        fields[item][span] = '\0';
                    }
                }

                item++;
                if ( item >= index) {
                    return;
                }
                cur = pnt + 1;
            }
        }
        while ( item < index) {
            fields[item][0] = '\0';
            item++;
        }
    }
}

使用程序中的函数看起来像:

void readcsv() {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];

        //declare the array
        char fields[7][20] = {{'\0'}};

        while (fgets(line, 1024, stream)) {

            // call the function here to get the line into the array
            getcsvfields ( line, ";", fields, 7);

            execute("INSERT INTO sdata (uid,sid,name,area,type,stbamount,pkgamount)"
            "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
            fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6]);
        }
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}