在已经使用另一个分隔符时在C中分隔空格(一次)的简单方法

时间:2012-04-19 16:03:32

标签: c whitespace strtok scanf

我有以下代码

while (*ptr != ',') {
    int items_read = sscanf(ptr, "%91[^,]%n", field, &n);
    printf("field = \"%s\"\n", field);
    field[0]='\0';
    if (items_read == 1)
        ptr += n; /* advance the pointer by the number of characters read */
    if ( *ptr != ',' ) {
        break; /* didn't find an expected delimiter, done? */
    }
    ++ptr; /* skip the delimiter */
}

它读取我的字符串并为它在逗号之间找到的每个字符串创建一个新数组,但是我在字符串中间有空格。我有许多字符串将被读入,其中一些字符串在多个区域(在中间位置之后)有空白区域,我不希望占用空白区域,只是第一个实例。

我知道我可以将|| ' '放在while lineint line和/或第二if line上,但我没有做我需要做的事情

我听说使用strtok并不好,或者至少有更好的方法可以做到这一点。我不知道如何为旁路代码添加额外的“标志”(如我的代码中的逗号)。

例如:
3,875,837 file1.ext
3,437,281文件two.txt

我已经开始做我想做的事了。

field1 = 3  
field2 = 875  
field3 = 837

但是我希望这两个输入上的其余字符串进入单独的char数组并打印出来如下:

field4 = file1  
field5 = ext  

field4 = document  
field5 = two  
field6 = txt

或者,我现在在想,无论如何我可以提取char array field的最后一次迭代,以便通过完全相同的代码使用另一个循环,除了用{{替换, s 1}}空格?

3 个答案:

答案 0 :(得分:1)

取决于您想要使用数据的内容。如何在代码中的其他地方使用它。

猜猜我经常在这样的解析中使用普通迭代。如果您想使用sscanf,这可能有所帮助;举个例子:

#include <stdio.h>

int prnt_fields(char **ptr)
{
    char field[128];
    int n;
    int i = 0;

    printf("Item {\n");
    while (**ptr) {
        if(sscanf(*ptr, "%127[^\t\n., ]%n", field, &n) != 1)
            break;
        printf("  %d => '%s'\n", i++, field);
        (*ptr) += n;
        /* To handle multiple spaces tabs etc.: */
        if(sscanf(*ptr, "%127[ \t.,]%n", field, &n))
            (*ptr) += n;
        /* Or:
        n = 0;
        sscanf(*ptr, "%*[ \t.,]%n", &n);
        (*ptr) += n;
        */

        if (**ptr == '\n') {
            printf("} OK!\n");
            (*ptr)++;
            return **ptr != '\0';
        }
    }
    /* Should never end here unless input is invalid;
     * aka not separated with , \n or over long fields (>127) */
    printf("} something failed!\n");
    return 0;
}

int main(void)
{
    char *tst = "3,875,837 file1.ext\n"
        "6,875,847 file2.ext\n"
        "3,437,281 document two.txt\n"
        "9,991,123\tdrei.txt\n"
        "4,494,123        vier    fünf .    txt\n"
        ;
    char field[128];
    int n;
    char *ptr = tst;
    int i = 0;

    while (*ptr) {
        if(sscanf(ptr, "%127[^, \n.]%n", field, &n) != 1)
            break;
        printf("field%d = '%s'\n", i++, field);
        ptr += n+1;

        /* If one want to validate separator

        ptr += n;
        if (*ptr != ',' && *ptr != ' ' && *ptr != '\n')
            break;
        ++ptr;

        */
    }

    ptr = tst;

    printf("\n---------------\n");
    while (prnt_fields(&ptr) > 0)
        ;

    return 0;
}

应该给出类似的东西:

field0 = '3'
field1 = '875'
field2 = '837'
...
field18 = '123  drei' <= breaks here
field19 = 'txt'
field20 = '4'
field21 = '494'
field22 = '123'
Item {
  0 => '3'
  1 => '875'
  2 => '837'
  3 => 'file1'
  4 => 'ext'
} OK!
Item {
  0 => '6'
  1 => '875'
  2 => '847'
  3 => 'file2'
  4 => 'ext'
} OK!
Item {
  0 => '3'
  1 => '437'
  2 => '281'
  3 => 'document'
  4 => 'two'
  5 => 'txt'
} OK!
Item {
  0 => '9'
  1 => '991'
  2 => '123'
  3 => 'drei'
  4 => 'txt'
} OK!
Item {
  0 => '4'
  1 => '494'
  2 => '123'
  3 => 'vier'
  4 => 'fünf'
  5 => 'txt'
} OK!

编辑:

行。这可以做得更干净,更好,但你可能会有一个想法:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

struct file_data {
    struct date {
        int y;
        int m;
        int d;
    } date;
    struct time {
        int h;
        int m;
    } time;
    int size_prt[8];    /* max 2^63 : 9223372036854775808 bytes */
    double size;
    char name[512];
    char ext[16];
    char ext_tr[16];
    char name_prt[32][128]; /* should be malloc or ptrs or done in func or.. */
};

/* Default */
static const struct file_data file_def = {
    {0,0,0},{0,0},{-1},0,{'\0'},{'\0'},{'\0'},{{'\0'}}
};

void prnt_filedata(FILE *fh, struct file_data fd)
{
    int i = 0;
    fprintf(fh,
        "File { \n"
        "  Date: %4d-%02d-%02d\n"
        "  Time: %02d:%02d\n"
        "  Size: %.f\n"
        "  Size: %.2f K\n"
        "  Size: %.2f M\n"
        "  Size: %.2f G\n"
        "  Name: '%s'\n"
        "  Ext : '%s'\n"
        "  ExtT: '%s'\n"
        "  Szpt: ",
        fd.date.y, fd.date.m, fd.date.d,
        fd.time.h, fd.time.m,
        fd.size,
        (fd.size / (1 << 10)),
        (fd.size / (1 << 20)),
        (fd.size / (1 << 30)),
        fd.name,
        fd.ext,
        fd.ext_tr
        );
    while (fd.size_prt[i] != -1)
        fprintf(fh, "%d ", fd.size_prt[i++]);
    fprintf(fh, "\n  Fprt: ");
    i = 0;
    while (*fd.name_prt[i])
        fprintf(fh, "'%s' ", fd.name_prt[i++]);
    fprintf(fh, "\n}\n");
}

int extr_file_data(char **ptr, struct file_data *fd)
{
    int i;
    int n;
    char size[26];
    char name[512];
    char *p;
    *fd = file_def;

    while (**ptr) {
        if (sscanf(*ptr,
            "%4d-%2d-%2d %d:%d %25[0123456789,] %511[^\n]%n",
            &fd->date.y, &fd->date.m, &fd->date.d,
            &fd->time.h, &fd->time.m,
            size, name, &n) != 7) {
            fprintf(stderr,
                " * ERR; Unable to extract from %s\n",
                *ptr);
            return 0;
        }
        (*ptr) += n;
        p = size;
        i = 0;
        /* Size parts + total */
        while (*p && i < 7) {
            fd->size_prt[i] = (int)strtol(p, &p, 0);
            fd->size *= 1000;
            fd->size += fd->size_prt[i++];
            if (*p) ++p;
        }
        fd->size_prt[i] = -1;   /* flag next to last  */
        /* get .ext */
        if ((p = strrchr(name, '.')) != NULL) {
            strncpy(fd->name, name, p - name);
            strncpy(fd->ext, p + 1, 16);
            /* trimmed ext */
            if (sscanf(fd->ext, "%15s", fd->ext_tr) != 1)
                *fd->ext_tr = '\0';
        } else {
            strncpy(fd->name, name, 511);
        }
        /* get trimmed parts of name */
        p = fd->name;
        i = 0;
        while (sscanf(p, "%127s%n", fd->name_prt[i++], &n) == 1 && i < 32)
            p+=n;
            ;
        *fd->name_prt[n] = '\0';

        if (**ptr == '\n')
            (*ptr)++;
        return **ptr != '\0';
    }
    return 0;
}

int main(void)
{
    char *tst =
        "2012-04-18 13:28                  32 ein.ext\n"
        "2012-04-18 13:28       2,446,875,847 zwei.xt  \n"
        "2012-04-18 13:28                   0 drei  .xt\n"
        "2012-04-18 13:28 7,694,587,183,883,665 vier fünf.txt\n"
        "2012-04-18 13:28 9,991,123\t\tsechs\n"
        "2012-04-18 13:28 4,494,123        sieben     acht  .   txt\n"
        ;
    char *ptr = tst;
    struct file_data fd;

    while (extr_file_data(&ptr, &fd) > 0)
        prnt_filedata(stdout, fd);
    prnt_filedata(stdout, fd);

    return 0;
}

应该给:

...
File { 
  Date: 2012-04-18
  Time: 13:28
  Size: 2446875847
  Size: 2389527.19 K
  Size: 2333.52 M
  Size: 2.28 G
  Name: 'zwei'
  Ext : 'xt  '
  ExtT: 'xt'
  Szpt: 2 446 875 847 
  Fprt: 'zwei' 
}
...
File { 
  Date: 2012-04-18
  Time: 13:28
  Size: 4494123
  Size: 4388.79 K
  Size: 4.29 M
  Size: 0.00 G
  Name: 'sieben     acht  '
  Ext : '   txt'
  ExtT: 'txt'
  Szpt: 4 494 123 
  Fprt: 'sieben' 'acht' 
}
...

编辑(再次);对不起,只是一个奇怪的转变我忘了在测试后改变。

答案 1 :(得分:1)

您可以查看strcspn()方法。

作为一个很好的副作用,它可以实现多字节字符安全。

答案 2 :(得分:1)

我如何做到这一点取决于数据。我可能会在一个字符串中读取,在第一个空格中将其解析为两个,可能使用strchr,然后通过逗号分隔第一个字符串。