从字符串中有效地提取令牌

时间:2013-09-16 14:19:10

标签: c string

我有当前的C代码:

#include <string.h> // strdup
#include <stdio.h> // printf
#include <stdlib.h> // free

int main(int argc, char *argv[])
{
  const char isostr[] = "\\ISO 2022 IR 13\\ISO 2022 IR 87";

  char *query = strdup( isostr );
  char *token;
  char *str1;
  char *saveptr1;
  const char delim[] = "\\";

  for (str1 = query; ; str1 = NULL)
    {
    token = strtok_r(str1, delim, &saveptr1);
    if (token == NULL)
      break;
    printf(" --> %s\n", token);
    }
  free( query );

  return 0;
}

然而它正在回归:

 --> ISO 2022 IR 13
 --> ISO 2022 IR 87

虽然我需要回复:

 --> [null/empty]
 --> ISO 2022 IR 13
 --> ISO 2022 IR 87

strtok_r似乎在字符串“AA \ BB \ CC”和“AA \ BB \ CC \”或“\ AA \ BB \\ CC”之间没有任何区别。

3 个答案:

答案 0 :(得分:2)

如果分隔符只有1个字符,则可以使用strchr搜索它而不是标记化。 strtok将所有分隔符作为分隔符。

str1 = query;
while (1)
{
    char * sep = strchr(str1, '\\')
    if (sep != NULL) *sep = 0;
    printf(" --> %s\n", str1);
    if (sep == NULL) break;
    str1 = sep + 1;
}

答案 1 :(得分:1)

如果分隔符始终是单个字符,则可以这样执行:

char isostr[] = "\\ISO 2022 IR 13\\ISO 2022 IR 87";
char *p = isostr;
for (;;) {
    char *next = strchr(p, '\\');
    int len = next ? next-p : strlen(p);
    // This code makes a copy and frees it right away.
    // You may prefer your approach of `strdup`-ing, and setting '\0' in place.
    char *s = malloc(len+1);
    memcpy(s, p, len);
    s[len] = '\0';
    printf("--> '%s'\n", s);
    free(s);
    if (!next) break;
    p = next+1;
}

这可以处理前导和尾随空标记,只要中间有空标记即可。这是demo on ideone

答案 2 :(得分:0)

您可以轻松地实现具有该行为的strtok类功能:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>


/* \short tokenize a string
 * \param delim a delimiter char
 * \param str the string to tokenize or NULL for the next result
 * \return the current token or NULL if end of tokens.
 */
char* tokenize(char delim, char *str) {
    static char *stat_str = NULL;
    static char *position = NULL;
    static char *end;
    char *c;

    /* Add some assertions here... */

    if (str != NULL) {
        stat_str = str;
        end = stat_str + strlen(str) + 1;
    }
    position = stat_str;

    /* If the current position is the end of the original string, return NULL */
    if (position == end) {
        return NULL;
    }

    /* Search for the next occurence of the delimiter and change it to a null char */
    for (c = position; *c != 0; c++) {
        if (*c == delim) {
            *c = 0;
            break;
        }
    }

    /* Set the position to the next char */
    stat_str = c + 1;

    /* return the current token */
    return position;
}

int main() {
    const char str[] = "\\ISO 2022 IR 13\\ISO 2022 IR 87";

    char *query = strdup(str);

    for (char *token = tokenize('\\', query); token != NULL;token = tokenize('\\', NULL)) {
        printf("Token [%s]\n", token);
    }

    free(query);
}