需要帮助,以定界符分割字符串(并将定界符保留在令牌列表中)

时间:2019-05-25 10:05:17

标签: c string split

我想用定界符分割字符串并将定界符保留在令牌列表中

我有一个函数,其功能与strtok相同,但具有字符串定界符(而不是一组字符),但它不保留定界符,并且不能将定界符数组作为参数

此函数可以像strtok一样将字符串拆分为标记,但要使用定界符

static char *strtokstr(char *str, char *delimiter)
{
    static char *string;
    char *end;
    char *ret;

    if (str != NULL)
        string = str;
    if (string == NULL)
        return string;
    end = strstr(string, delimiter);
    if (end == NULL) {
        char *ret = string;
        string = NULL;
        return ret;
    }
    ret = string;
    *end = '\0';
    string = end + strlen(delimiter);
    return ret;
}

我想要一个char **split(char *str, char **delimiters_list),该字符串用一组定界符将字符串分开,并将定界符保留在令牌列表中

我认为我还需要一个函数来计算令牌的数量,以便我可以malloc返回我的split函数

// delimiters是一个包含["&&", "||" and NULL]的数组 split("ls > file&&foo || bar", delimiters)应该返回包含["ls > file", "&&", "foo ", "||", " bar"]

的数组

如何实现?

3 个答案:

答案 0 :(得分:2)

#include <string.h>
#include <stdio.h>
#include <stdlib.h>


char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);

int main()
{

    char **split_str;
    char *delimiters[] = {
        "&&",
        "||"
    };


    int rows_in_returned_array;

    split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);

    int i;

    for (i = 0 ; i < rows_in_returned_array  ; ++i)
    {
        printf("\n%s\n", split_str[i]);
    }


    return 0;
}


char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
    //temporary storage for array to be returned
    char temp_store[100][200];
    int row = 0;//row size of array that will be returned

    char **split_str;

    int i, j, k, l, mark = 0;
    char temp[100];

    for (i = 0 ; str[i] != '\0' ; ++i)
    {

        //Iterating through all delimiters to check if any is str
        for (j = 0 ; j < number_of_delimiters ; ++j )
        {
            l = i;

            for (k = 0 ; delimiters[j][k] != '\0' ; ++k)
            {
                if (str[i] != delimiters[j][k])
                {
                    break;
                }

                ++l;
            }

            //This means delimiter is in string
            if (delimiters[j][k] == '\0')
            {
                //store the string before delimiter
                strcpy(temp_store[row], &str[mark]);
                temp_store[row ++][i - mark] = '\0';

                //store string after delimiter
                strcpy(temp_store[row], &str[i]);
                temp_store[row ++][k] = '\0';

                //mark index where this delimiter ended
                mark = l;

                //Set i to where delimiter ends and break so that outermost loop
                //can iterate from where delimiter ends
                i = l - 1;

                break;

            }

        }
    }

    //store the string remaining
    strcpy(temp_store[row++], &str[mark]);

    //Allocate the split_str and store temp_store into it
    split_str = (char **)malloc(row * sizeof(char *));
    for (i=0 ; i < row; i++)
    {
        split_str[i] = (char *)malloc(200 * sizeof(char));
        strcpy(split_str[i], temp_store[i]);
    }

    *number_of_rows_in_return_array = row;

    return split_str;

}

这可能应该起作用。请注意,我已通过ref传递了int * number_of_rows_in_return_array,因为我们需要知道重新调整后的数组的行大小。

答案 1 :(得分:1)

首先,您在这里遇到内存错误:

static char *string;

if (str != NULL)
    string = str;
if (string == NULL)
    return string;

如果str为NULL,则不初始化字符串,并且在比较中使用未初始化的值。

如果要复制字符串,则必须使用strdup函数,=将仅复制指针而不是指针内容。


这是一种实现方法:

#include <stdlib.h>
#include <string.h>

char *get_delimiters(char *str, char **delims)
{
  for (int i = 0; delims[i]; i++)
    if (!strncmp(str, delims[i], strlen(delims[i])))
      return delims[i];
  return NULL;
}

char **split(char *str, char **delimiters)
{
  char *string = strdup(str);
  char **result = NULL;
  int n = 0;
  char *delim = NULL;

  for (int i = 0; string[i]; i++)
    if (get_delimiters(string + i, delimiters))
      n++;
  result = malloc((n * 2 + 2) * sizeof(char *));
  if (!result)
    return NULL;
  result[0] = string;
  n = 1;
  for (int i = 0; string[i]; i++) {
    delim = get_delimiters(string + i, delimiters);
    if (delim) {
      string[i] = '\0';
      result[n++] = delim;
      result[n++] = string + i + strlen(delim);
    }
  }
  result[n] = NULL;
  return result;
}

结果:

  

[0]'ls>文件'
  [1]'&&'
  [2]'foo'
  [3]'||'
  [4]'bar'

请记住,resultstring已分配,因此必须释放resultresult[0]

答案 2 :(得分:0)

我进入了抽象。首先,我创建了一个“句子”库,该库允许处理以NULL结尾的字符串列表(char*)。我写了一些初始访问器(sentence_initsentence_sizesentence_freesentence_add_str等)。

然后我去split,女巫变得非常,非常容易-如果找到了分隔符,则在该分隔符中添加字符串并将其添加到句子中。然后增加字符串指针的位置。如果找不到分隔符,则将剩余的字符串添加到句子中。

双指针tho确实存在问题,因为char **不能隐式转换为const char **。对于生产代码,我可能打算重构代码,并尝试考虑const-正确性。

#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>

/*
 * sentence - list of words
 */
/* ----------------------------------------------------------- */

// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.

char **sentence_init(void) {
    return NULL;
}

size_t sentence_size(char * const *t) {
    if (t == NULL) return 0;
    size_t i;
    for (i = 0; t[i] != NULL; ++i) {
        continue;
    }
    return i;
}

void sentence_free(char * const *t) {
    if (t == NULL) return;
    for (char * const *i = t; *i != NULL; ++i) {
        free(*i);
    }
    free((void*)t);
}

void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
    for (char * const *i = t; *i != NULL; ++i) {
        printf(fmt1, *i);
        if (*(i + 1) != NULL) {
            printf(delim);
        }
    }
    printf(end);
}

void sentence_print(char * const *t) {
    sentence_printex(t, "%s", " ", "\n");
}

void sentence_print_quote_words(char * const *t) {
    sentence_printex(t, "'%s'", " ", "\n");
}

bool sentence_cmp_const(const char * const *t, const char * const *other) {
    const char * const *t_i = t;
    const char * const *o_i = other;
    while (*t_i != NULL && o_i != NULL) {
        if (strcmp(*t_i, *o_i) != 0) {
            return false;
        }
        ++t_i;
        ++o_i;
    }
    return *t_i == NULL && *o_i == NULL;
}

// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
    const size_t n = sentence_size(t);
    const size_t add = 1 + 1;
    const size_t new_n = n + add;
    void * const pnt = realloc(t,  new_n * sizeof(char*));
    if (pnt == NULL) goto REALLOC_FAIL;

    // we have to have place for terminating NULL pointer
    assert(new_n >= 2);
    t = pnt;
    t[new_n - 2] = strdupped;
    t[new_n - 1] = NULL;

    // ownership of str goes to t
    return t;

    // ownership of str stays in the caller
    REALLOC_FAIL:
    sentence_free(t);
    return NULL;
}

char **sentence_add_strlened(char **t, const char *str, size_t len) {
    char *strdupped = malloc(len + 1);
    if (strdupped == NULL) goto MALLOC_FAIL;

    memcpy(strdupped, str, len);
    strdupped[len] = '\0';

    t = sentence_add_strdupped(t, strdupped);
    if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;

    return t;

    SENTENCE_ADD_STRDUPPED_FAIL:
    free(strdupped);
    MALLOC_FAIL:
    sentence_free(t);
    return NULL;
}

char **sentence_add_str(char **t, const char *str) {
    const size_t str_len = strlen(str);
    return sentence_add_strlened(t, str, str_len);
}

/* ----------------------------------------------------------- */

/**
 * Puff. Run strstr for each of the elements inside NULL delimeters dellist.
 * If any returns not NULL, return the pointer as returned by strstr
 * And fill dellist_found with the pointer inside dellist (can be NULL).
 * Finally! A 3 star award is mine!
 */
char *str_find_any_strings(const char *str,
        const char * const *dellist,
        const char * const * *dellist_found) {
    assert(str != NULL);
    assert(dellist != NULL);
    for (const char * const *i = &dellist[0]; *i != NULL; ++i) {

        const char *found = strstr(str, *i);
        if (found != NULL) {
            if (dellist_found != NULL) {
                *dellist_found = i;
            }
            // __UNCONST(found)
            return (char*)found;
        }

    }
    return NULL;
}

/**
 * Split the string str according to the list od delimeters dellist
 * @param str
 * @param dellist
 * @return returns a dictionary
 */
char **split(const char *str, const char * const *dellist) {
    assert(str != NULL);
    assert(dellist != NULL);

    char **sen = sentence_init();

    while (*str != '\0') {
        const char * const *del_pnt = NULL;
        const char *found = str_find_any_strings(str, dellist, &del_pnt);
        if (found == NULL) {
            // we don't want an empty string to be the last...
            if (*str != '\0') {
                sen = sentence_add_str(sen, str);
                if (sen == NULL) return NULL;
            }
            break;
        }

        // Puff, so a delimeter is found at &str[found - str]
        const size_t idx = found - str;
        sen = sentence_add_strlened(sen, str, idx);
        if (sen == NULL) return NULL;

        assert(del_pnt != NULL);
        const char *del = *del_pnt;
        assert(del != NULL);
        assert(*del != '\0');
        const size_t del_len = strlen(del);
        sen = sentence_add_strlened(sen, del, del_len);
        if (sen == NULL) return NULL;

        str += idx + del_len;
    }

    return sen;
}

int main()
{
    char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
    assert(sentence != NULL);
    sentence_print_quote_words(sentence);
    printf("cmp = %d\n", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
    sentence_free(sentence);

    return 0;
}

程序将输出:

'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1