我想用定界符分割字符串并将定界符保留在令牌列表中
我有一个函数,其功能与strtok相同,但具有字符串定界符(而不是一组字符),但它不保留定界符,并且不能将定界符数组作为参数
此函数可以像strtok
一样将字符串拆分为标记,但要使用定界符
static char *strtokstr(char *str, char *delimiter)
{
static char *string;
char *end;
char *ret;
if (str != NULL)
string = str;
if (string == NULL)
return string;
end = strstr(string, delimiter);
if (end == NULL) {
char *ret = string;
string = NULL;
return ret;
}
ret = string;
*end = '\0';
string = end + strlen(delimiter);
return ret;
}
我想要一个char **split(char *str, char **delimiters_list)
,该字符串用一组定界符将字符串分开,并将定界符保留在令牌列表中
我认为我还需要一个函数来计算令牌的数量,以便我可以malloc
返回我的split
函数
// delimiters
是一个包含["&&", "||" and NULL]
的数组
split("ls > file&&foo || bar", delimiters)
应该返回包含["ls > file", "&&", "foo ", "||", " bar"]
如何实现?
答案 0 :(得分:2)
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);
int main()
{
char **split_str;
char *delimiters[] = {
"&&",
"||"
};
int rows_in_returned_array;
split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);
int i;
for (i = 0 ; i < rows_in_returned_array ; ++i)
{
printf("\n%s\n", split_str[i]);
}
return 0;
}
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
//temporary storage for array to be returned
char temp_store[100][200];
int row = 0;//row size of array that will be returned
char **split_str;
int i, j, k, l, mark = 0;
char temp[100];
for (i = 0 ; str[i] != '\0' ; ++i)
{
//Iterating through all delimiters to check if any is str
for (j = 0 ; j < number_of_delimiters ; ++j )
{
l = i;
for (k = 0 ; delimiters[j][k] != '\0' ; ++k)
{
if (str[i] != delimiters[j][k])
{
break;
}
++l;
}
//This means delimiter is in string
if (delimiters[j][k] == '\0')
{
//store the string before delimiter
strcpy(temp_store[row], &str[mark]);
temp_store[row ++][i - mark] = '\0';
//store string after delimiter
strcpy(temp_store[row], &str[i]);
temp_store[row ++][k] = '\0';
//mark index where this delimiter ended
mark = l;
//Set i to where delimiter ends and break so that outermost loop
//can iterate from where delimiter ends
i = l - 1;
break;
}
}
}
//store the string remaining
strcpy(temp_store[row++], &str[mark]);
//Allocate the split_str and store temp_store into it
split_str = (char **)malloc(row * sizeof(char *));
for (i=0 ; i < row; i++)
{
split_str[i] = (char *)malloc(200 * sizeof(char));
strcpy(split_str[i], temp_store[i]);
}
*number_of_rows_in_return_array = row;
return split_str;
}
这可能应该起作用。请注意,我已通过ref传递了int * number_of_rows_in_return_array
,因为我们需要知道重新调整后的数组的行大小。
答案 1 :(得分:1)
首先,您在这里遇到内存错误:
static char *string;
if (str != NULL)
string = str;
if (string == NULL)
return string;
如果str
为NULL,则不初始化字符串,并且在比较中使用未初始化的值。
如果要复制字符串,则必须使用strdup
函数,=
将仅复制指针而不是指针内容。
这是一种实现方法:
#include <stdlib.h>
#include <string.h>
char *get_delimiters(char *str, char **delims)
{
for (int i = 0; delims[i]; i++)
if (!strncmp(str, delims[i], strlen(delims[i])))
return delims[i];
return NULL;
}
char **split(char *str, char **delimiters)
{
char *string = strdup(str);
char **result = NULL;
int n = 0;
char *delim = NULL;
for (int i = 0; string[i]; i++)
if (get_delimiters(string + i, delimiters))
n++;
result = malloc((n * 2 + 2) * sizeof(char *));
if (!result)
return NULL;
result[0] = string;
n = 1;
for (int i = 0; string[i]; i++) {
delim = get_delimiters(string + i, delimiters);
if (delim) {
string[i] = '\0';
result[n++] = delim;
result[n++] = string + i + strlen(delim);
}
}
result[n] = NULL;
return result;
}
结果:
[0]'ls>文件'
[1]'&&'
[2]'foo'
[3]'||'
[4]'bar'
请记住,result
和string
已分配,因此必须释放result
和result[0]
答案 2 :(得分:0)
我进入了抽象。首先,我创建了一个“句子”库,该库允许处理以NULL结尾的字符串列表(char*
)。我写了一些初始访问器(sentence_init
,sentence_size
,sentence_free
,sentence_add_str
等)。
然后我去split
,女巫变得非常,非常容易-如果找到了分隔符,则在该分隔符中添加字符串并将其添加到句子中。然后增加字符串指针的位置。如果找不到分隔符,则将剩余的字符串添加到句子中。
双指针tho确实存在问题,因为char **
不能隐式转换为const char **
。对于生产代码,我可能打算重构代码,并尝试考虑const
-正确性。
#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
/*
* sentence - list of words
*/
/* ----------------------------------------------------------- */
// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.
char **sentence_init(void) {
return NULL;
}
size_t sentence_size(char * const *t) {
if (t == NULL) return 0;
size_t i;
for (i = 0; t[i] != NULL; ++i) {
continue;
}
return i;
}
void sentence_free(char * const *t) {
if (t == NULL) return;
for (char * const *i = t; *i != NULL; ++i) {
free(*i);
}
free((void*)t);
}
void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
for (char * const *i = t; *i != NULL; ++i) {
printf(fmt1, *i);
if (*(i + 1) != NULL) {
printf(delim);
}
}
printf(end);
}
void sentence_print(char * const *t) {
sentence_printex(t, "%s", " ", "\n");
}
void sentence_print_quote_words(char * const *t) {
sentence_printex(t, "'%s'", " ", "\n");
}
bool sentence_cmp_const(const char * const *t, const char * const *other) {
const char * const *t_i = t;
const char * const *o_i = other;
while (*t_i != NULL && o_i != NULL) {
if (strcmp(*t_i, *o_i) != 0) {
return false;
}
++t_i;
++o_i;
}
return *t_i == NULL && *o_i == NULL;
}
// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
const size_t n = sentence_size(t);
const size_t add = 1 + 1;
const size_t new_n = n + add;
void * const pnt = realloc(t, new_n * sizeof(char*));
if (pnt == NULL) goto REALLOC_FAIL;
// we have to have place for terminating NULL pointer
assert(new_n >= 2);
t = pnt;
t[new_n - 2] = strdupped;
t[new_n - 1] = NULL;
// ownership of str goes to t
return t;
// ownership of str stays in the caller
REALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_strlened(char **t, const char *str, size_t len) {
char *strdupped = malloc(len + 1);
if (strdupped == NULL) goto MALLOC_FAIL;
memcpy(strdupped, str, len);
strdupped[len] = '\0';
t = sentence_add_strdupped(t, strdupped);
if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;
return t;
SENTENCE_ADD_STRDUPPED_FAIL:
free(strdupped);
MALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_str(char **t, const char *str) {
const size_t str_len = strlen(str);
return sentence_add_strlened(t, str, str_len);
}
/* ----------------------------------------------------------- */
/**
* Puff. Run strstr for each of the elements inside NULL delimeters dellist.
* If any returns not NULL, return the pointer as returned by strstr
* And fill dellist_found with the pointer inside dellist (can be NULL).
* Finally! A 3 star award is mine!
*/
char *str_find_any_strings(const char *str,
const char * const *dellist,
const char * const * *dellist_found) {
assert(str != NULL);
assert(dellist != NULL);
for (const char * const *i = &dellist[0]; *i != NULL; ++i) {
const char *found = strstr(str, *i);
if (found != NULL) {
if (dellist_found != NULL) {
*dellist_found = i;
}
// __UNCONST(found)
return (char*)found;
}
}
return NULL;
}
/**
* Split the string str according to the list od delimeters dellist
* @param str
* @param dellist
* @return returns a dictionary
*/
char **split(const char *str, const char * const *dellist) {
assert(str != NULL);
assert(dellist != NULL);
char **sen = sentence_init();
while (*str != '\0') {
const char * const *del_pnt = NULL;
const char *found = str_find_any_strings(str, dellist, &del_pnt);
if (found == NULL) {
// we don't want an empty string to be the last...
if (*str != '\0') {
sen = sentence_add_str(sen, str);
if (sen == NULL) return NULL;
}
break;
}
// Puff, so a delimeter is found at &str[found - str]
const size_t idx = found - str;
sen = sentence_add_strlened(sen, str, idx);
if (sen == NULL) return NULL;
assert(del_pnt != NULL);
const char *del = *del_pnt;
assert(del != NULL);
assert(*del != '\0');
const size_t del_len = strlen(del);
sen = sentence_add_strlened(sen, del, del_len);
if (sen == NULL) return NULL;
str += idx + del_len;
}
return sen;
}
int main()
{
char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
assert(sentence != NULL);
sentence_print_quote_words(sentence);
printf("cmp = %d\n", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
sentence_free(sentence);
return 0;
}
程序将输出:
'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1