在C中,解析一个由多个空格分隔的整数组成的字符串

时间:2015-01-30 10:06:34

标签: c string file parsing io

我试图使用C将包含多行空白分隔整数的文件解析为动态int数组的动态数组。每行都是数组数组中的数组。每行中的行数和元素是非常量的。

到目前为止,我所做的是使用fgets将每一行作为字符串抓取。

但是,我不能弄清楚如何解析一串空格分隔的整数。

我以为我可以使用sscanf(因为fscanf可用于解析整个空格分隔的整数文件)。但是,似乎sscanf具有不同的功能。 sscanf只解析字符串中的第一个数字。我的猜测是,因为该行是一个字符串不是一个流。

我已经四处查找了一种从字符串中创建流的方法,但它看起来并不像C中那样(我无法使用非标准库)。

char* line;
char lineBuffer[BUFFER_SIZE];
FILE *filePtr;
int value;

...

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);

    while(sscanf(lineBuffer, "%d ", &value) > 0) {
        printf("%d\n", value);
    }
}

我可以使用什么来解析字符串。如果没有,是否有替代整个系统?我宁愿不使用REGEX。

5 个答案:

答案 0 :(得分:4)

使用strtol(),如果有匹配则给出指向匹配结束的指针,并使用char指针存储当前位置:

    while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);
    char* p = lineBuffer;
    while(p < lineBuffer+BUFFER_SIZE ) {
        char* end;
        long int value = strtol( p , &end , 10 );
        if( value == 0L && end == p )  //docs also suggest checking errno value
            break;

        printf("%ld\n", value);
        p = end ;
    }
}

答案 1 :(得分:2)

通过fgets()阅读一行是很好的第一步。

2种方法:strtol()(更好的错误处理)和sscanf()

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
  char *endptr;
  while (1) {  
    errno = 0;
    long num = strtol(line, &endptr, 10);
    if (line == endptr) break;  // no conversion
    if (errno) break;  // out of range or other error

    #if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
    // long and int may have different ranges
    if (num < INT_MIN || num > INT_MAX) {
      errno = ERANGE; 
      break;  // out of range
    }
    #endif

    int value = (int) num;
    printf("%d\n", value);
    line = endptr;
  } 
  while (isspace((unsigned char) *endptr)) endptr++;
  if (*endptr != '\0') Handle_ExtraGarbageAtEndOfLine();
}

“sscanf只解析字符串中的第一个数字。”并非如此。使用sscanf()"%n"来记录扫描停止的位置。

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
  int n;
  while (1) {  
    n = 0;
    int value;
    if (sscanf(line, "%d %n", &value, &n) != 1) break;
    printf("%d\n", value);
    line += n;
  } 
  if (line[n] != '\0') Handle_ExtraGarbageAtEndOfLine();
}

答案 2 :(得分:1)

使用带有" "(空格)的strtok()函数作为分隔符,并将其置于循环中,当strtok()返回NULL时,该循环将终止,以获取每个标记,然后打印每个每个令牌:

while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {

    printf("%s\n", lineBuffer);

    char *token=strtok(line," ");

    while(token!=NULL)
    {
        if(sscanf(token, "%d", &value) > 0)
             printf("%d\n", value);
         token=strtok(NULL," ");
    }
}

答案 3 :(得分:0)

只需在输入行上使用一个循环,利用atol()无论如何都会停留在下一个空格分隔符上。只适用于正整数;)但速度很快,你不需要阅读大量的strtok和sscanf文档,它们甚至可以在&#34;噪声和#34;在你的整数之间散落。
为了使它适用于负数,也可以使用!isspace()替换isdigit(),然后就可以了。

void bla()
{
    const char * input = "    1           3           4       6     ";
    size_t i;
    size_t len = strlen(input);
    for (i = 0; i < len; ++i)
    {
        if (isdigit(input[i]))
        {
            printf("%d\n", atol(&input[i]));
            while (i < len && isdigit(input[i]))
                ++i;
        }

    }
}

void bla1()
{ // positive and negative ints version
    const char * input = "    10           -3           42       6     ";
    size_t i;
    size_t len = strlen(input);
    for (i = 0; i < len; ++i)
    {
        if (!isspace(input[i]))
        {
            printf("%d\n", atol(&input[i]));
            while (i < len && !isspace(input[i]))
                ++i;
        }
    }
    /* Output: 
        10
        -3
        42
        6

    */
}

你的问题的下一部分是(隐式地),如何处理动态数组来存储你解析的int值。这里是一个基于上面代码的解决方案。 chunkSize对于输入设置得太小,所以我可以测试realloc代码部分是否也有效。

typedef struct DataRow_tag
{
    int32_t *data;
    size_t length;
} DataRow_t;

// Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
// (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
// Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
// Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
// The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
// 0: false -> fail
// 1: true -> success!
int 
ReadRowWithUnknownNumberOfColumnsOfInt32
    ( const char * row      // Zero terminated string containing 1 row worth of data.
    , DataRow_t *result     // Pointer to the place the data will be stored at.
    )
{
    int success = 0;
    size_t chunkSize = 10; // Set this value to something most likely large enough for your application.

    // This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
    assert(NULL != result && NULL == result->data);
    if (NULL != result && NULL == result->data)
    {
        result->length = 0;
        size_t rowLength = strlen(row);
        const char *pInput = row;
        const char *pEnd = &row[rowLength-1];

        result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
        if (NULL != result->data )
        {
            for (; pInput < pEnd; ++pInput)
            {
                assert(pInput <= pEnd);
                assert(*pInput != 0);
                if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
                {
                    long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
                    // TODO: we could test here if lval value fits in an int32_t...platform dependent!
                    result->data[result->length++] = lval;
                    if (result->length == chunkSize)
                    { // our buffer was too small... we need a bigger one.
                        chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
                        int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
                        if (NULL == temp)
                        { // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
                            // So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
                            free(result->data);
                            result->length = 0;
                            break;
                        }
                        else
                        {
                            result->data = temp;
                        }
                    }
                    while (pInput < pEnd && !isspace(*pInput))
                        ++pInput;
                }
            }
            if (pInput >= pEnd)
                success = 1;
            else
            { // make sure we do not leave result in some funny state.
                result->length = 0;
                free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
                result->data = NULL;
            }
        }
    }

    return success;
}
void Bla2()
{
    const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
    DataRow_t dataRow = { 0 };
    if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
    {
        for (size_t i = 0; i < dataRow.length; ++i)
        {
            printf("%d ", dataRow.data[i]);
        }
        printf("\n");

        free(dataRow.data);
        dataRow.data = NULL;
        dataRow.length = 0;
    }
}

答案 4 :(得分:0)

您应该使用:

lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));

比:

char lineBuffer[BUFFER_SIZE];

你的筹码将会感谢你!