如何将字符串拆分为字符串数组?

时间:2012-04-03 20:10:59

标签: c string split

例如:

输入(字符串):foo $$ foo ## foo []

搜索(字符串):foo

输出(数组):$$ ,## ,[]

我试过了:

char * str = "foo $$ foo ## foo []";
    char * s = "foo";

    int buf_len = 0;
    int len = strlen(s);
    int i = 0;

    char ** buffer = malloc(MAX_BUFFER_SIZE);
    char * tmpbuf = malloc(MAX_BUFFER_SIZE);
    char * p = str;
    char ** buf = buffer;
    char * tbuf = tmpbuf;

    while(*p)
    {
        if(*p == *s) 
        {
            while(*p == *(s + i)) 
            { 
                i++;
                p++;
            }

            if(i == len) 
            {
                *buf ++ = tbuf;
                memset(tbuf,0,buf_len);
                i = buf_len = 0;
            }
        }
        else 
        {
            *tbuf ++= *p;
            buf_len ++;
        }

        p++;
    }

    *buf ++= NULL;

    int x;
    for(x = 0; buffer[x]; x++)
    {
        printf("%s\n", buffer[x]);
    }

    free(buffer);
    free(tmpbuf);

显示以下输出:

$$ ## []
## []
[]

但预期是:

$$  
##  
[]

如何解决这个问题?

5 个答案:

答案 0 :(得分:5)

这是一个将字符串拆分为字符串数组的函数:

#include <assert.h>
#include <string.h>

/*
 * Split a string by a delimiter.
 *
 * This function writes the beginning of each item to @pointers_out
 * (forming an array of C strings), and writes the actual string bytes
 * to @bytes_out.  Both buffers are assumed to be big enough for all of the
 * strings.
 *
 * Returns the number of strings written to @pointers_out.
 */
size_t explode(const char *delim, const char *str,
               char **pointers_out, char *bytes_out)
{
    size_t  delim_length        = strlen(delim);
    char   **pointers_out_start = pointers_out;

    assert(delim_length > 0);

    for (;;) {
        /* Find the next occurrence of the item delimiter. */
        const char *delim_pos = strstr(str, delim);

        /*
         * Emit the current output buffer position, since that is where the
         * next item will be written.
         */
        *pointers_out++ = bytes_out;

        if (delim_pos == NULL) {
            /*
             * No more item delimiters left.  Treat the rest of the input
             * string as the last item.
             */
            strcpy(bytes_out, str);
            return pointers_out - pointers_out_start;
        } else {
            /*
             * Item delimiter found.  The bytes leading up to it form the next
             * string.
             */
            while (str < delim_pos)
                *bytes_out++ = *str++;

            /* Don't forget the NUL terminator. */
            *bytes_out++ = '\0';

            /* Skip over the delimiter. */
            str += delim_length;
        }
    }
}

用法:

#include <stdio.h>
/* ... */

#define BIG_ENOUGH 1000

int main(void)
{
    char    *items[BIG_ENOUGH];
    char     item_bytes[BIG_ENOUGH];
    size_t   i;
    size_t   count;

    count = explode("foo", "foo $$ foo ## foo []", items, item_bytes);

    for (i = 0; i < count; i++)
        printf("\"%s\"\n", items[i]);

    return 0;
}

输出:

""
" $$ "
" ## "
" []"

这不会产生您要求的确切输出,因为我不确定您希望如何处理周围空格以及字符串开头处的项目分隔符(在您的示例中为"foo")的出现次数。相反,我模仿了PHP的explode函数。


我想指出我的explode函数如何在内存管理上发挥作用。由调用者决定缓冲区是否足够大。这对于快速脚本来说很好,但在一个更严肃的程序中可能会很烦人,你需要做一些数学运算才能正确使用这个函数。我本可以编写一个更“强大”的实现来执行自己的分配,但是:

  • 这会使实施变得混乱。

  • 它不会让调用者选择使用自己的内存分配器。

因此,我执行explode的方式是“糟糕”,因为它很难正确使用,更糟糕的是,容易使用错误。另一方面,它是“好”的,它将功能和内存管理的问题分开。

答案 1 :(得分:3)

这是因为当您说:

时,您不会将tbuf的内容复制到buf
*buf ++ = tbuf;

您所做的是在tbuf(或tmpbuf中保存对当前位置的引用,如果您愿意的话)。

除了分隔符之外,

tmpbuf充满了所有东西。

类似于,在循环结束时:

          01234567 <- offset
tmpbuf = "$$ ## []"

buf[0] = tmpbuf+0;
buf[1] = tmpbuf+3;
buf[2] = tmpbuf+6;

非常简化的内存表:

        memory
       address        value   
tmpbuf -> 0x01       [   $] <- buffer[0] points here
          0x02       [   $]
          0x03       [    ]
          0x04       [   #] <- buffer[1] points here
          0x05       [   #]
          0x06       [    ]
          0x07       [   [] <- buffer[2] points here
          0x08       [   ]]
          0x09       [    ]
          ...
buffer -> 0x3A       [0x01]
          0x3B       [0x04]
          0x3C       [0x07]
          0x3D       [    ]
          0x3E       [    ]
          ...

修改

对于它的ph ;;指针,动态,方式,不使用strstr()

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_needle(char *hay, char *needle)
{
    while (*hay && *++hay == *++needle);
    return *needle == '\0';
}

char *find(char *hay, char *needle)
{
    while (*hay) {
        if (*hay == *needle && is_needle(hay, needle))
            return hay;
        ++hay;
    }
    return hay;
}

int pushstr(char ***vs, size_t *vslen, char *val, size_t slen)
{
    char **vsp = *vs + *vslen;

    if ((*vsp = realloc(*(*vs + *vslen), slen + 1)) == NULL) {
        perror("pushstr.1"); exit(1);
    }

    memcpy(*vsp, val, slen);
    *(*vsp + slen) = '\0';

    if ((*vs  = realloc(*vs, sizeof(char*) * (++*vslen + 1))) == NULL) {
        perror("pushstr.2"); exit(1);
    }
    *(*vs + *vslen) = NULL;

    return *vslen;
}

int main(void)
{
    char *hay    = "foo $$ foo ## foo [] fox @@ foo ??";
    char *needle = "foo";
    char *np;
    char **vs;
    size_t vslen = 0;
    size_t nlen  = strlen(needle);

    if ((vs = malloc(sizeof(char*))) == NULL) {
        perror("main");
        return 1;
    }
    *vs = NULL;

    while (*(np = find(hay, needle))) {
        if (np != hay) {
            pushstr(&vs, &vslen, hay, np - hay);
            hay = np + nlen;
        } else {
            hay += nlen;
        }
    }
    if (np != hay)
        pushstr(&vs, &vslen, hay, np - hay);

    while (*vs)
        printf("V: '%s'\n", *vs++);
    vs -= vslen;

    while (*vs)
        free(*vs++);
    vs -= vslen;
    free(vs);

    return 0;
}

答案 2 :(得分:1)

这是strstr()的任务。我改变了你的代码以便利用它。

int add_to_buf(char *str, size_t len, char ***buf)
{
  if (len <= 0) return 0;
  **buf = malloc (len);
  strncpy (**buf, str, len);
  ++*buf;
  return 1;
}

int main()
{
  char *str = "foo $$ foo ## foo []";
  char *s = "foo";

  char **buffer = malloc (MAX_BUFFER_SIZE*sizeof(*buffer)), **buf = buffer;
  char *start, *end;

  int s_len = strlen (s);

  start = str;
  end = strstr (str, s);
  while (end) {
    add_to_buf (start, end-start, &buf);
    start = end + s_len;
    end = strstr (start, s);
  }
  add_to_buf (start, strlen (str) - (start-str), &buf);
  *buf = 0;

  for (buf = buffer; *buf; ++buf)
      printf ("%s\n", *buf);

  free (buffer);
  return 0;
}

答案 3 :(得分:1)

对于一个简单的程序,你使用了太多的指针,而你使用它们的方式使它很难理解。我看到的一个简单的错误是你正在使用buffer**(字符串数组),但你只是分配一个字符串。你是这个存储令牌的字符串数组,它会在某处发生一些内存违规。

由于您要打印令牌,因此无需将它们存储在单独的阵列中。这样做:

#include<stdio.h>
#include<string.h>

int main(int ac, char*argv[]) {
char str[] = "foo $$ foo ## foo []";
char * s = "foo";   
char *p;

p = strtok(str, " "); // tokenize

while(p!=NULL)
{
if(strcmp(p, s)) //print non matching tokens
printf("%s\n", p);
p = strtok(NULL, " ");
}

return 0;
}

请注意,此处的分隔符是空格,这使得此处更容易。

答案 4 :(得分:0)

strtok函数专为此任务而设计:

#include <string.h>
...
char *token;
char *line = "LINE TO BE SEPARATED";
char *search = " ";


/* Token will point to "LINE". */
token = strtok(line, search);


/* Token will point to "TO". */
token = strtok(NULL, search);