如何编写一个函数来拆分和返回带有C编程语言分隔符的字符串的数组?
char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');
答案 0 :(得分:149)
您可以使用strtok()
函数拆分字符串(并指定要使用的分隔符)。请注意,strtok()
将修改传递给它的字符串。如果其他地方需要原始字符串,请复制该字符串并将副本传递给strtok()
。
编辑:
示例(注意它不处理连续的分隔符,例如“JAN ,,, FEB,MAR”):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
char** str_split(char* a_str, const char a_delim)
{
char** result = 0;
size_t count = 0;
char* tmp = a_str;
char* last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char** tokens;
printf("months=[%s]\n\n", months);
tokens = str_split(months, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("month=[%s]\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}
输出:
$ ./main.exe
months=[JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]
month=[JAN]
month=[FEB]
month=[MAR]
month=[APR]
month=[MAY]
month=[JUN]
month=[JUL]
month=[AUG]
month=[SEP]
month=[OCT]
month=[NOV]
month=[DEC]
答案 1 :(得分:60)
我认为strsep
仍然是最好的工具:
while ((token = strsep(&str, ","))) my_fn(token);
这实际上是一行分割字符串。
额外的括号是一个样式元素,表示我们有意测试赋值的结果,而不是相等运算符==
。
要使该模式生效,token
和str
都有char *
类型。如果你开始使用字符串文字,那么你首先需要复制它:
// More general pattern:
const char *my_str_literal = "JAN,FEB,MAR";
char *token, *str, *tofree;
tofree = str = strdup(my_str_literal); // We own str's memory now.
while ((token = strsep(&str, ","))) my_fn(token);
free(tofree);
如果在str
中一起显示两个分隔符,则会得到一个token
值,即空字符串。 str
的值被修改,因为遇到的每个分隔符都被零字节覆盖 - 这是另一个复制首先被解析的字符串的好理由。
在评论中,有人建议strtok
优于strsep
,因为strtok
更具可移植性。 Ubuntu和Mac OS X有strsep
;可以安全地猜测其他unixy系统也能做到。 Windows缺少strsep
,但它有strbrk
,可以实现这个简短而甜蜜的strsep
替换:
char *strsep(char **stringp, const char *delim) {
if (*stringp == NULL) { return NULL; }
char *token_start = *stringp;
*stringp = strpbrk(token_start, delim);
if (*stringp) {
**stringp = '\0';
(*stringp)++;
}
return token_start;
}
Here是对strsep
vs strtok
的一个很好的解释。可以主观判断利弊;但是,我认为这是一个明显的迹象,strsep
被设计为strtok
的替代。
答案 2 :(得分:25)
String tokenizer这段代码应该让你朝着正确的方向前进。
int main(void) {
char st[] ="Where there is will, there is a way.";
char *ch;
ch = strtok(st, " ");
while (ch != NULL) {
printf("%s\n", ch);
ch = strtok(NULL, " ,");
}
getch();
return 0;
}
答案 3 :(得分:12)
下面的方法将为您完成所有工作(内存分配,计算长度)。可以在此处找到更多信息和说明 - Implementation of Java String.split() method to split C string
int split (const char *str, char c, char ***arr)
{
int count = 1;
int token_len = 1;
int i = 0;
char *p;
char *t;
p = str;
while (*p != '\0')
{
if (*p == c)
count++;
p++;
}
*arr = (char**) malloc(sizeof(char*) * count);
if (*arr == NULL)
exit(1);
p = str;
while (*p != '\0')
{
if (*p == c)
{
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
token_len = 0;
i++;
}
p++;
token_len++;
}
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
i = 0;
p = str;
t = ((*arr)[i]);
while (*p != '\0')
{
if (*p != c && *p != '\0')
{
*t = *p;
t++;
}
else
{
*t = '\0';
i++;
t = ((*arr)[i]);
}
p++;
}
return count;
}
如何使用它:
int main (int argc, char ** argv)
{
int i;
char *s = "Hello, this is a test module for the string splitting.";
int c = 0;
char **arr = NULL;
c = split(s, ' ', &arr);
printf("found %d tokens.\n", c);
for (i = 0; i < c; i++)
printf("string #%d: %s\n", i, arr[i]);
return 0;
}
答案 4 :(得分:6)
这是我的两分钱:
int split (const char *txt, char delim, char ***tokens)
{
int *tklen, *t, count = 1;
char **arr, *p = (char *) txt;
while (*p != '\0') if (*p++ == delim) count += 1;
t = tklen = calloc (count, sizeof (int));
for (p = (char *) txt; *p != '\0'; p++) *p == delim ? *t++ : (*t)++;
*tokens = arr = malloc (count * sizeof (char *));
t = tklen;
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
while (*txt != '\0')
{
if (*txt == delim)
{
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
txt++;
}
else *p++ = *txt++;
}
free (tklen);
return count;
}
<强>用法:强>
char **tokens;
int count, i;
const char *str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
count = split (str, ',', &tokens);
for (i = 0; i < count; i++) printf ("%s\n", tokens[i]);
/* freeing tokens */
for (i = 0; i < count; i++) free (tokens[i]);
free (tokens);
答案 5 :(得分:4)
在上面的例子中,有一种方法可以在字符串中返回一个空终止字符串数组(就像你想要的那样)。虽然它不可能传递文字字符串,因为它必须由函数修改:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char** str_split( char* str, char delim, int* numSplits )
{
char** ret;
int retLen;
char* c;
if ( ( str == NULL ) ||
( delim == '\0' ) )
{
/* Either of those will cause problems */
ret = NULL;
retLen = -1;
}
else
{
retLen = 0;
c = str;
/* Pre-calculate number of elements */
do
{
if ( *c == delim )
{
retLen++;
}
c++;
} while ( *c != '\0' );
ret = malloc( ( retLen + 1 ) * sizeof( *ret ) );
ret[retLen] = NULL;
c = str;
retLen = 1;
ret[0] = str;
do
{
if ( *c == delim )
{
ret[retLen++] = &c[1];
*c = '\0';
}
c++;
} while ( *c != '\0' );
}
if ( numSplits != NULL )
{
*numSplits = retLen;
}
return ret;
}
int main( int argc, char* argv[] )
{
const char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char* strCpy;
char** split;
int num;
int i;
strCpy = malloc( strlen( str ) * sizeof( *strCpy ) );
strcpy( strCpy, str );
split = str_split( strCpy, ',', &num );
if ( split == NULL )
{
puts( "str_split returned NULL" );
}
else
{
printf( "%i Results: \n", num );
for ( i = 0; i < num; i++ )
{
puts( split[i] );
}
}
free( split );
free( strCpy );
return 0;
}
可能有一种更简洁的方法,但你明白了。
答案 6 :(得分:3)
尝试使用它。
char** strsplit(char* str, const char* delim){
char** res = NULL;
char* part;
int i = 0;
char* aux = strdup(str);
part = strdup(strtok(aux, delim));
while(part){
res = (char**)realloc(res, (i + 1) * sizeof(char*));
*(res + i) = strdup(part);
part = strdup(strtok(NULL, delim));
i++;
}
res = (char**)realloc(res, i * sizeof(char*));
*(res + i) = NULL;
return res;
}
答案 7 :(得分:3)
以下是zString library的strtok()
实施。
zstring_strtok()
与标准库strtok()
的区别在于它对待连续分隔符的方式。
看看下面的代码,确保你会了解它是如何工作的(我试着尽量使用尽可能多的评论)
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
以下是一个示例用法...
Example Usage
char str[] = "A,B,,,C";
printf("1 %s\n",zstring_strtok(s,","));
printf("2 %s\n",zstring_strtok(NULL,","));
printf("3 %s\n",zstring_strtok(NULL,","));
printf("4 %s\n",zstring_strtok(NULL,","));
printf("5 %s\n",zstring_strtok(NULL,","));
printf("6 %s\n",zstring_strtok(NULL,","));
Example Output
1 A
2 B
3 ,
4 ,
5 C
6 (null)
可以从Github下载该库 https://github.com/fnoyanisi/zString
答案 8 :(得分:3)
这是一个字符串拆分函数,可以处理多字符分隔符。请注意,如果分隔符长于要分割的字符串,则buffer
和stringLengths
将设置为(void *) 0
,numStrings
将设置为{{1} }}
此算法已经过测试,并且有效。 (免责声明:尚未对非ASCII字符串进行测试,并假设调用者提供了有效参数)
0
示例代码:
void splitString(const char *original, const char *delimiter, char ** & buffer, int & numStrings, int * & stringLengths){
const int lo = strlen(original);
const int ld = strlen(delimiter);
if(ld > lo){
buffer = (void *)0;
numStrings = 0;
stringLengths = (void *)0;
return;
}
numStrings = 1;
for(int i = 0;i < (lo - ld);i++){
if(strncmp(&original[i], delimiter, ld) == 0) {
i += (ld - 1);
numStrings++;
}
}
stringLengths = (int *) malloc(sizeof(int) * numStrings);
int currentStringLength = 0;
int currentStringNumber = 0;
int delimiterTokenDecrementCounter = 0;
for(int i = 0;i < lo;i++){
if(delimiterTokenDecrementCounter > 0){
delimiterTokenDecrementCounter--;
} else if(i < (lo - ld)){
if(strncmp(&original[i], delimiter, ld) == 0){
stringLengths[currentStringNumber] = currentStringLength;
currentStringNumber++;
currentStringLength = 0;
delimiterTokenDecrementCounter = ld - 1;
} else {
currentStringLength++;
}
} else {
currentStringLength++;
}
if(i == (lo - 1)){
stringLengths[currentStringNumber] = currentStringLength;
}
}
buffer = (char **) malloc(sizeof(char *) * numStrings);
for(int i = 0;i < numStrings;i++){
buffer[i] = (char *) malloc(sizeof(char) * (stringLengths[i] + 1));
}
currentStringNumber = 0;
currentStringLength = 0;
delimiterTokenDecrementCounter = 0;
for(int i = 0;i < lo;i++){
if(delimiterTokenDecrementCounter > 0){
delimiterTokenDecrementCounter--;
} else if(currentStringLength >= stringLengths[currentStringNumber]){
buffer[currentStringNumber][currentStringLength] = 0;
delimiterTokenDecrementCounter = ld - 1;
currentStringLength = 0;
currentStringNumber++;
} else {
buffer[currentStringNumber][currentStringLength] = (char)original[i];
currentStringLength++;
}
}
buffer[currentStringNumber][currentStringLength] = 0;
}
库:
int main(){
const char *string = "STRING-1 DELIM string-2 DELIM sTrInG-3";
char **buffer;
int numStrings;
int * stringLengths;
splitString(string, " DELIM ", buffer, numStrings, stringLengths);
for(int i = 0;i < numStrings;i++){
printf("String: %s\n", buffer[i]);
}
}
答案 9 :(得分:3)
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
/**
* splits str on delim and dynamically allocates an array of pointers.
*
* On error -1 is returned, check errno
* On success size of array is returned, which may be 0 on an empty string
* or 1 if no delim was found.
*
* You could rewrite this to return the char ** array instead and upon NULL
* know it's an allocation problem but I did the triple array here. Note that
* upon the hitting two delim's in a row "foo,,bar" the array would be:
* { "foo", NULL, "bar" }
*
* You need to define the semantics of a trailing delim Like "foo," is that a
* 2 count array or an array of one? I choose the two count with the second entry
* set to NULL since it's valueless.
* Modifies str so make a copy if this is a problem
*/
int split( char * str, char delim, char ***array, int *length ) {
char *p;
char **res;
int count=0;
int k=0;
p = str;
// Count occurance of delim in string
while( (p=strchr(p,delim)) != NULL ) {
*p = 0; // Null terminate the deliminator.
p++; // Skip past our new null
count++;
}
// allocate dynamic array
res = calloc( 1, count * sizeof(char *));
if( !res ) return -1;
p = str;
for( k=0; k<count; k++ ){
if( *p ) res[k] = p; // Copy start of string
p = strchr(p, 0 ); // Look for next null
p++; // Start of next string
}
*array = res;
*length = count;
return 0;
}
char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";
int main() {
char **res;
int k=0;
int count =0;
int rc;
rc = split( str, ',', &res, &count );
if( rc ) {
printf("Error: %s errno: %d \n", strerror(errno), errno);
}
printf("count: %d\n", count );
for( k=0; k<count; k++ ) {
printf("str: %s\n", res[k]);
}
free(res );
return 0;
}
答案 10 :(得分:3)
此函数接受char *字符串并通过deliminator对其进行拆分。连续可以有多个分隔符。请注意,该函数会修改orignal字符串。如果您需要原始字符串保持不变,则必须首先复制原始字符串。此函数不使用任何cstring函数调用,因此它可能比其他函数快一些。如果您不关心内存分配,可以在函数顶部使用size strlen(src_str)/ 2分配sub_strings,并且(如提到的c ++“version”)跳过函数的下半部分。如果这样做,函数将减少为O(N),但下面显示的内存优化方式为O(2N)。
功能:
char** str_split(char *src_str, const char deliminator, size_t &num_sub_str){
//replace deliminator's with zeros and count how many
//sub strings with length >= 1 exist
num_sub_str = 0;
char *src_str_tmp = src_str;
bool found_delim = true;
while(*src_str_tmp){
if(*src_str_tmp == deliminator){
*src_str_tmp = 0;
found_delim = true;
}
else if(found_delim){ //found first character of a new string
num_sub_str++;
found_delim = false;
//sub_str_vec.push_back(src_str_tmp); //for c++
}
src_str_tmp++;
}
printf("Start - found %d sub strings\n", num_sub_str);
if(num_sub_str <= 0){
printf("str_split() - no substrings were found\n");
return(0);
}
//if you want to use a c++ vector and push onto it, the rest of this function
//can be omitted (obviously modifying input parameters to take a vector, etc)
char **sub_strings = (char **)malloc( (sizeof(char*) * num_sub_str) + 1);
const char *src_str_terminator = src_str_tmp;
src_str_tmp = src_str;
bool found_null = true;
size_t idx = 0;
while(src_str_tmp < src_str_terminator){
if(!*src_str_tmp) //found a NULL
found_null = true;
else if(found_null){
sub_strings[idx++] = src_str_tmp;
//printf("sub_string_%d: [%s]\n", idx-1, sub_strings[idx-1]);
found_null = false;
}
src_str_tmp++;
}
sub_strings[num_sub_str] = NULL;
return(sub_strings);
}
如何使用
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char *str = strdup(months);
size_t num_sub_str;
char **sub_strings = str_split(str, ',', num_sub_str);
char *endptr;
if(sub_strings){
for(int i = 0; sub_strings[i]; i++)
printf("[%s]\n", sub_strings[i]);
}
free(sub_strings);
free(str);
答案 11 :(得分:2)
我的方法是扫描字符串并让指针指向分隔符(和第一个字符)后面的每个字符,同时将字符串中的deliminator外观分配给&#39; \ 0&#39;。
首先制作原始字符串的副本(因为它是常量),然后通过扫描获得分割数,将其传递给指针参数 len 。之后,将第一个结果指针指向复制字符串指针,然后扫描复制字符串:一旦遇到分隔符,将其分配给&#39; \ 0&#39;因此,前一个结果字符串终止,并将下一个结果字符串指针指向下一个字符指针。
char** split(char* a_str, const char a_delim, int* len){
char* s = (char*)malloc(sizeof(char) * strlen(a_str));
strcpy(s, a_str);
char* tmp = a_str;
int count = 0;
while (*tmp != '\0'){
if (*tmp == a_delim) count += 1;
tmp += 1;
}
*len = count;
char** results = (char**)malloc(count * sizeof(char*));
results[0] = s;
int i = 1;
while (*s!='\0'){
if (*s == a_delim){
*s = '\0';
s += 1;
results[i++] = s;
}
else s += 1;
}
return results;
}
答案 12 :(得分:2)
此优化方法在* result中创建(或更新现有的)指针数组,并返回* count中的元素数。
使用“max”表示您期望的最大字符串数(当您指定现有数组或任何其他reaseon时),否则将其设置为0
要与分隔符列表进行比较,请将delim定义为char *并替换该行:
if (str[i]==delim) {
以下两行:
char *c=delim; while(*c && *c!=str[i]) c++;
if (*c) {
享受
#include <stdlib.h>
#include <string.h>
char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
size_t i;
char **_result;
// there is at least one string returned
*count=1;
_result= *result;
// when the result array is specified, fill it during the first pass
if (_result) {
_result[0]=str;
}
// scan the string for delimiter, up to specified length
for (i=0; i<len; ++i) {
// to compare against a list of delimiters,
// define delim as a string and replace
// the next line:
// if (str[i]==delim) {
//
// with the two following lines:
// char *c=delim; while(*c && *c!=str[i]) c++;
// if (*c) {
//
if (str[i]==delim) {
// replace delimiter with zero
str[i]=0;
// when result array is specified, fill it during the first pass
if (_result) {
_result[*count]=str+i+1;
}
// increment count for each separator found
++(*count);
// if max is specified, dont go further
if (max && *count==max) {
break;
}
}
}
// when result array is specified, we are done here
if (_result) {
return _result;
}
// else allocate memory for result
// and fill the result array
*result=malloc((*count)*sizeof(char*));
if (!*result) {
return NULL;
}
_result=*result;
// add first string to result
_result[0]=str;
// if theres more strings
for (i=1; i<*count; ++i) {
// find next string
while(*str) ++str;
++str;
// add next string to result
_result[i]=str;
}
return _result;
}
用法示例:
#include <stdio.h>
int main(int argc, char **argv) {
char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char **result=malloc(6*sizeof(char*));
char **result2=0;
unsigned long count;
unsigned long count2;
unsigned long i;
split(strdup(str),strlen(str),',',&result,&count,6);
split(strdup(str),strlen(str),',',&result2,&count2,0);
if (result)
for (i=0; i<count; ++i) {
printf("%s\n",result[i]);
}
printf("\n");
if (result2)
for (i=0; i<count2; ++i) {
printf("%s\n", result2[i]);
}
return 0;
}
答案 13 :(得分:2)
我的代码(已测试):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int dtmsplit(char *str, const char *delim, char ***array, int *length ) {
int i=0;
char *token;
char **res = (char **) malloc(0 * sizeof(char *));
/* get the first token */
token = strtok(str, delim);
while( token != NULL )
{
res = (char **) realloc(res, (i + 1) * sizeof(char *));
res[i] = token;
i++;
token = strtok(NULL, delim);
}
*array = res;
*length = i;
return 1;
}
int main()
{
int i;
int c = 0;
char **arr = NULL;
int count =0;
char str[80] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
c = dtmsplit(str, ",", &arr, &count);
printf("Found %d tokens.\n", count);
for (i = 0; i < count; i++)
printf("string #%d: %s\n", i, arr[i]);
return(0);
}
结果:
Found 12 tokens.
string #0: JAN
string #1: FEB
string #2: MAR
string #3: APR
string #4: MAY
string #5: JUN
string #6: JUL
string #7: AUG
string #8: SEP
string #9: OCT
string #10: NOV
string #11: DEC
答案 14 :(得分:2)
我的版本:
int split(char* str, const char delimeter, char*** args) {
int cnt = 1;
char* t = str;
while (*t == delimeter) t++;
char* t2 = t;
while (*(t2++))
if (*t2 == delimeter && *(t2 + 1) != delimeter && *(t2 + 1) != 0) cnt++;
(*args) = malloc(sizeof(char*) * cnt);
for(int i = 0; i < cnt; i++) {
char* ts = t;
while (*t != delimeter && *t != 0) t++;
int len = (t - ts + 1);
(*args)[i] = malloc(sizeof(char) * len);
memcpy((*args)[i], ts, sizeof(char) * (len - 1));
(*args)[i][len - 1] = 0;
while (*t == delimeter) t++;
}
return cnt;
}
答案 15 :(得分:2)
我认为以下解决方案是理想的:
代码说明:
token
以存储令牌的地址和长度str
完全由分隔符组成,因此有strlen(str) + 1
令牌,所有这些都是空字符串str
,记录每个令牌的地址和长度NULL
哨兵值的额外空间memcpy
,因为它比strcpy
更快,我们知道
长度typedef struct {
const char *start;
size_t len;
} token;
char **split(const char *str, char sep)
{
char **array;
unsigned int start = 0, stop, toks = 0, t;
token *tokens = malloc((strlen(str) + 1) * sizeof(token));
for (stop = 0; str[stop]; stop++) {
if (str[stop] == sep) {
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
start = stop + 1;
}
}
/* Mop up the last token */
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
array = malloc((toks + 1) * sizeof(char*));
for (t = 0; t < toks; t++) {
/* Calloc makes it nul-terminated */
char *token = calloc(tokens[t].len + 1, 1);
memcpy(token, tokens[t].start, tokens[t].len);
array[t] = token;
}
/* Add a sentinel */
array[t] = NULL;
free(tokens);
return array;
}
为了简洁起见, 注意 malloc
省略。
一般情况下,我不会从这样的分割函数返回一个char *
指针数组,因为它会给调用者很大的责任以正确释放它们。我更喜欢的一个接口是允许调用者传递一个回调函数并为每个令牌调用它,正如我在这里描述的那样:Split a String in C。
答案 16 :(得分:1)
爆炸&amp; implode - 初始字符串保持不变,动态内存分配
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
typedef struct
{
uintptr_t ptr;
int size;
} token_t;
int explode(char *str, int slen, const char *delimiter, token_t **tokens)
{
int i = 0, c1 = 0, c2 = 0;
for(i = 0; i <= slen; i++)
{
if(str[i] == *delimiter)
{
c1++;
}
}
if(c1 == 0)
{
return -1;
}
*tokens = (token_t*)calloc((c1 + 1), sizeof(token_t));
((*tokens)[c2]).ptr = (uintptr_t)str;
i = 0;
while(i <= slen)
{
if((str[i] == *delimiter) || (i == slen))
{
((*tokens)[c2]).size = (int)((uintptr_t)&(str[i]) - (uintptr_t)(((*tokens)[c2]).ptr));
if(i < slen)
{
c2++;
((*tokens)[c2]).ptr = (uintptr_t)&(str[i + 1]);
}
}
i++;
}
return (c1 + 1);
}
char* implode(token_t *tokens, int size, const char *delimiter)
{
int i, len = 0;
char *str;
for(i = 0; i < len; i++)
{
len += tokens[i].size + 1;
}
str = (char*)calloc(len, sizeof(char));
len = 0;
for(i = 0; i < size; i++)
{
memcpy((void*)&str[len], (void*)tokens[i].ptr, tokens[i].size);
len += tokens[i].size;
str[(len++)] = *delimiter;
}
str[len - 1] = '\0';
return str;
}
用法:
int main(int argc, char **argv)
{
int i, c;
char *exp = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
token_t *tokens;
char *imp;
printf("%s\n", exp);
if((c = explode(exp, strlen(exp), ",", &tokens)) > 0)
{
imp = implode(tokens, c, ",");
printf("%s\n", imp);
for(i = 0; i < c; i++)
{
printf("%.*s, %d\n", tokens[i].size, (char*)tokens[i].ptr, tokens[i].size);
}
}
free((void*)tokens);
free((void*)imp);
return 0;
}
答案 17 :(得分:1)
没有经过测试,可能是错误的,但应该让你对它应该如何运作有一个良好的开端:
*char[] str_split(char* str, char delim) {
int begin = 0;
int end = 0;
int j = 0;
int i = 0;
char *buf[NUM];
while (i < strlen(str)) {
if(*str == delim) {
buf[j] = malloc(sizeof(char) * (end-begin));
strncpy(buf[j], *(str + begin), (end-begin));
begin = end;
j++;
}
end++;
i++;
}
return buf;
}
答案 18 :(得分:0)
如果您愿意使用外部库,我无法推荐bstrlib
。它需要一些额外的设置,但从长远来看更容易使用。
例如,拆分下面的字符串,首先使用bstring
调用创建bfromcstr()
。 (bstring
是char缓冲区的包装器)。
接下来,在逗号上拆分字符串,将结果保存在struct bstrList
,其中包含字段qty
和数组entry
,这是bstring
s的数组。
bstrlib
还有许多其他功能可以在bstring
s
很容易就像...
#include "bstrlib.h"
#include <stdio.h>
int main() {
int i;
char *tmp = "Hello,World,sak";
bstring bstr = bfromcstr(tmp);
struct bstrList *blist = bsplit(bstr, ',');
printf("num %d\n", blist->qty);
for(i=0;i<blist->qty;i++) {
printf("%d: %s\n", i, bstr2cstr(blist->entry[i], '_'));
}
}
答案 19 :(得分:0)
此处列出了strtok()的一些问题:http://benpfaff.org/writings/clc/strtok.html
因此,避免strtok 会更好。
现在,考虑一个包含空字段的字符串,如下所示:
char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here
您可以使用简单函数将CSV格式的字符串转换为将其读取为float数组:
int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim);
我们在这里将分隔符指定为逗号。 适用于其他单字符分隔符。
请在下方找到用法:
#include <stdio.h>
#include <stdlib.h>
int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim);
void main()
{
char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here
float floatArr[10]; // specify size of float array here
int totalValues = 0;
char myDelim = ','; // specify delimiter here
printf("myCSVString == %s \n",&myCSVString[0]);
totalValues = strCSV2Float(&floatArr[0] , &myCSVString[0], myDelim); // call the function here
int floatValueCount = 0;
for (floatValueCount = 0 ; floatValueCount < totalValues ; floatValueCount++)
{
printf("floatArr[%d] = %f\n",floatValueCount , floatArr[floatValueCount]);
}
}
int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim)
{
int strLen = 0;
int commaCount =0; // count the number of commas
int commaCountOld =0; // count the number of commas
int wordEndChar = 0;
int wordStartChar = -1;
int wordLength =0;
for(strLen=0; myCSVStringing[strLen] != '\0'; strLen++) // first get the string length
{
if ( (myCSVStringing[strLen] == delim) || ( myCSVStringing[strLen+1] == '\0' ))
{
commaCount++;
wordEndChar = strLen;
}
if ( (commaCount - commaCountOld) > 0 )
{
int aIter =0;
wordLength = (wordEndChar - wordStartChar);
char word[55] = "";
for (aIter = 0; aIter < wordLength; aIter++)
{
word[aIter] = myCSVStringing[strLen-wordLength+aIter+1];
}
if (word[aIter-1] == delim)
word[aIter-1] = '\0';
// printf("\n");
word[wordLength] = '\0';
strFloatArray[commaCount-1] = atof(&word[0]);
wordLength = 0;
wordStartChar = wordEndChar;
commaCountOld = commaCount;
}
}
return commaCount;
}
输出如下:
myCSVString == -1.4,2.6,,-0.24,1.26
floatArr[0] = -1.400000
floatArr[1] = 2.600000
floatArr[2] = 0.000000
floatArr[3] = -0.240000
floatArr[4] = 1.260000
答案 20 :(得分:0)
For:Hassan A. El-Seoudy
您的机票已关闭,因此我无法回复它^^'。 但你可以试试这个:
'
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int countChar(char *str)
{
int count;
int i;
i = 0;
count = 0;
while (str[i] != '=') // our delimiter character
{
i++;
count++;
}
return (count);
}
void split(char *str)
{
int i;
int j;
int count;
int restCount;
char *str1;
char *str2;
i = 0;
j = 0;
count = countChar(str) - 1; // we have our str1 lenght, -1 for the ' '
restCount = (strlen(str) - count) -1; // we have our str2 legnht, -1 for the ' '
str1 = malloc(sizeof(char) * count);
str2 = malloc(sizeof(char) * restCount);
while(i < count)
{
str1[i] = str[i++];
}
i = i + 2; // to jump directly to the first char of our str2 (no ' = ')
while (str[i])
{
str2[j++] = str[i++];
}
printf("str1 = %s, str2 = %s\n", str1, str2);
}
int main()
{
char *str = "Xo = 100k";
split(str);
return (0);
}'
答案 21 :(得分:0)
又一个答案(这是从here移到这里):
尝试使用strtok函数:
这里的问题是您必须立即处理 List<Double> l1 = new ArrayList<>(Arrays.asList(2.0, 0.0, 1.0));
List<Double> l2 = new ArrayList<>(l1.subList(0, 3));
Collections.sort(l1);
System.out.println(l2.get(0));
。如果你想将它存储在一个数组中,你必须分配words
,因为它是未知的。
例如:
correct size
注意强>: 我们使用相同的循环和函数来计算计数(传递一个)和制作副本(传递两个),以避免分配问题。
注2 : 您可以在单独的帖子中使用strtok的其他一些实现。
你可以使用它:
char **Split(char *in_text, char *in_sep)
{
char **ret = NULL;
int count = 0;
char *tmp = strdup(in_text);
char *pos = tmp;
// This is the pass ONE: we count
while ((pos = strtok(pos, in_sep)) != NULL)
{
count++;
pos = NULL;
}
// NOTE: the function strtok changes the content of the string! So we free and duplicate it again!
free(tmp);
pos = tmp = strdup(in_text);
// We create a NULL terminated array hence the +1
ret = calloc(count+1, sizeof(char*));
// TODO: You have to test the `ret` for NULL here
// This is the pass TWO: we store
count = 0;
while ((pos = strtok(pos, in_sep)) != NULL)
{
ret[count] = strdup(pos);
count++;
pos = NULL;
}
free(tmp);
return count;
}
// Use this to free
void Free_Array(char** in_array)
{
char *pos = in_array;
while (pos[0] != NULL)
{
free(pos[0]);
pos++;
}
free(in_array);
}
(我没有测试过,所以如果它不起作用,请告诉我!)
答案 22 :(得分:0)
static int count_token(char *iptr, char delim) {
int token_count = 0;
while (*iptr && isspace(*iptr))
iptr++;
while (*iptr) {
if ((*iptr != delim)) {
token_count++;
while (*iptr && (*iptr != delim))
iptr++;
}
else {
iptr++;
}
}
return token_count;
}
static char** split(char* input, int* argc){
char** argv;
int token_count = count_token(input, ' ');
argv = (char**)malloc(sizeof(char*)*token_count);
int i = 0;
char *token = strtok(input, " ");
while(token) {
puts(token);
argv[i] = strdup(token);
token = strtok(NULL, " ");
i++;
}
assert(i == token_count);
*argc = token_count;
return argv;
}
答案 23 :(得分:0)
围绕此问题的两个问题是内存管理和线程安全。从众多帖子中可以看到, 在C中无缝地完成这项任务并非易事。我想要一个解决方案,即:
我提出的解决方案符合所有这些条件。设置可能需要更多工作 比这里发布的一些其他解决方案要好,但是我认为在实践中,额外的工作值得 以避免其他解决方案的常见陷阱。
#include <stdio.h>
#include <string.h>
struct splitFieldType {
char *field;
int maxLength;
};
typedef struct splitFieldType splitField;
int strsplit(splitField *fields, int expected, const char *input, const char *fieldSeparator, void (*softError)(int fieldNumber,int expected,int actual)) {
int i;
int fieldSeparatorLen=strlen(fieldSeparator);
const char *tNext, *tLast=input;
for (i=0; i<expected && (tNext=strstr(tLast, fieldSeparator))!=NULL; ++i) {
int len=tNext-tLast;
if (len>=fields[i].maxLength) {
softError(i,fields[i].maxLength-1,len);
len=fields[i].maxLength-1;
}
fields[i].field[len]=0;
strncpy(fields[i].field,tLast,len);
tLast=tNext+fieldSeparatorLen;
}
if (i<expected) {
if (strlen(tLast)>fields[i].maxLength) {
softError(i,fields[i].maxLength,strlen(tLast));
} else {
strcpy(fields[i].field,tLast);
}
return i+1;
} else {
return i;
}
}
void monthSplitSoftError(int fieldNumber, int expected, int actual) {
fprintf(stderr,"monthSplit: input field #%d is %d bytes, expected %d bytes\n",fieldNumber+1,actual,expected);
}
int main() {
const char *fieldSeparator=",";
const char *input="JAN,FEB,MAR,APRI,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR";
struct monthFieldsType {
char field1[4];
char field2[4];
char field3[4];
char field4[4];
char field5[4];
char field6[4];
char field7[4];
char field8[4];
char field9[4];
char field10[4];
char field11[4];
char field12[4];
} monthFields;
splitField inputFields[12] = {
{monthFields.field1, sizeof(monthFields.field1)},
{monthFields.field2, sizeof(monthFields.field2)},
{monthFields.field3, sizeof(monthFields.field3)},
{monthFields.field4, sizeof(monthFields.field4)},
{monthFields.field5, sizeof(monthFields.field5)},
{monthFields.field6, sizeof(monthFields.field6)},
{monthFields.field7, sizeof(monthFields.field7)},
{monthFields.field8, sizeof(monthFields.field8)},
{monthFields.field9, sizeof(monthFields.field9)},
{monthFields.field10, sizeof(monthFields.field10)},
{monthFields.field11, sizeof(monthFields.field11)},
{monthFields.field12, sizeof(monthFields.field12)}
};
int expected=sizeof(inputFields)/sizeof(splitField);
printf("input data: %s\n", input);
printf("expecting %d fields\n",expected);
int ct=strsplit(inputFields, expected, input, fieldSeparator, monthSplitSoftError);
if (ct!=expected) {
printf("string split %d fields, expected %d\n", ct,expected);
}
for (int i=0;i<expected;++i) {
printf("field %d: %s\n",i+1,inputFields[i].field);
}
printf("\n");
printf("Direct structure access, field 10: %s", monthFields.field10);
}
下面是一个示例编译和输出。请注意,在我的示例中,我故意拼出了“ APRIL”,以便您可以看到软错误的工作原理。
$ gcc strsplitExample.c && ./a.out
input data: JAN,FEB,MAR,APRIL,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR
expecting 12 fields
monthSplit: input field #4 is 5 bytes, expected 3 bytes
field 1: JAN
field 2: FEB
field 3: MAR
field 4: APR
field 5: MAY
field 6: JUN
field 7: JUL
field 8: AUG
field 9: SEP
field 10: OCT
field 11: NOV
field 12: DEC
Direct structure access, field 10: OCT
享受!
答案 24 :(得分:0)
这是另一个可以安全操作的实现,用于对与问题中请求的原型匹配的 string-literal 进行令牌化,将分配的指向指针的指针返回给char(例如char **
)。分隔符字符串可以包含多个字符,输入字符串可以包含任意数量的标记。所有分配和重新分配均由malloc
或realloc
处理,而没有POSIX strdup
。
分配的指针的初始数量由NPTRS
常数控制,唯一的限制是它大于零。返回的char **
在类似于NULL
的最后一个标记之后,包含一个 sentinel *argv[]
,其形式为execv
,execvp
和execve
。
与strtok()
一样,多个顺序定界符被视为单个定界符,因此"JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC"
将被解析为好像只有一个','
分隔"MAY,JUN"
一样。
下面的函数被内联注释,并添加了一个简短的main()
以分隔月份。分配的初始指针数设置为2
,以在对输入字符串进行标记化期间强制进行三个重新分配:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NPTRS 2 /* initial number of pointers to allocate (must be > 0) */
/* split src into tokens with sentinel NULL after last token.
* return allocated pointer-to-pointer with sentinel NULL on success,
* or NULL on failure to allocate initial block of pointers. The number
* of allocated pointers are doubled each time reallocation required.
*/
char **strsplit (const char *src, const char *delim)
{
int i = 0, in = 0, nptrs = NPTRS; /* index, in/out flag, ptr count */
char **dest = NULL; /* ptr-to-ptr to allocate/fill */
const char *p = src, *ep = p; /* pointer and end-pointer */
/* allocate/validate nptrs pointers for dest */
if (!(dest = malloc (nptrs * sizeof *dest))) {
perror ("malloc-dest");
return NULL;
}
*dest = NULL; /* set first pointer as sentinel NULL */
for (;;) { /* loop continually until end of src reached */
if (!*ep || strchr (delim, *ep)) { /* if at nul-char or delimiter char */
size_t len = ep - p; /* get length of token */
if (in && len) { /* in-word and chars in token */
if (i == nptrs - 1) { /* used pointer == allocated - 1? */
/* realloc dest to temporary pointer/validate */
void *tmp = realloc (dest, 2 * nptrs * sizeof *dest);
if (!tmp) {
perror ("realloc-dest");
break; /* don't exit, original dest still valid */
}
dest = tmp; /* assign reallocated block to dest */
nptrs *= 2; /* increment allocated pointer count */
}
/* allocate/validate storage for token */
if (!(dest[i] = malloc (len + 1))) {
perror ("malloc-dest[i]");
break;
}
memcpy (dest[i], p, len); /* copy len chars to storage */
dest[i++][len] = 0; /* nul-terminate, advance index */
dest[i] = NULL; /* set next pointer NULL */
}
if (!*ep) /* if at end, break */
break;
in = 0; /* set in-word flag 0 (false) */
}
else { /* normal word char */
if (!in) /* if not in-word */
p = ep; /* update start to end-pointer */
in = 1; /* set in-word flag 1 (true) */
}
ep++; /* advance to next character */
}
return dest;
}
int main (void) {
char *str = "JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC",
**tokens; /* pointer to pointer to char */
if ((tokens = strsplit (str, ","))) { /* split string into tokens */
for (char **p = tokens; *p; p++) { /* loop over filled pointers */
puts (*p);
free (*p); /* don't forget to free allocated strings */
}
free (tokens); /* and pointers */
}
}
使用/输出示例
$ ./bin/splitinput
JAN
FEB
MAR
APR
MAY
JUN
JUL
AUG
SEP
OCT
NOV
DEC
让我知道您是否还有其他问题。
答案 25 :(得分:0)
我知道聚会晚了,但这里还有 2 个功能可以使用,并且可能会根据您的需要进一步调整(帖子底部的源代码)
另请参阅下面的实施说明,以确定哪个功能更适合您的需求。
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h> // C99
// tokenize destructively
char **str_toksarray_alloc(
char **strp, /* InOut: pointer to the source non-constant c-string */
const char *delim, /* c-string containing the delimiting chars */
size_t *ntoks, /* InOut: # of tokens to parse/parsed (NULL or *ntoks==0 for all tokens) */
bool keepnulls /* false ignores empty tokens, true includes them */
);
// tokenize non-destructively
char **str_toksarray_alloc2(
const char *str, /* the source c-string */
const char *delim,
size_t *ntoks,
bool keepnulls
);
除了源字符串(分别为 strp
和 str
)之外,它们的原型几乎相同。
strp
(指向字符串的指针)是已分配的非常量 c 字符串的地址,将被就地标记。 str
是一个未更改的 c 字符串(它甚至可以是字符串文字)。 c-string 我的意思是一个以 nul
结尾的字符缓冲区。两个函数的其余参数相同。
要解析所有可用的标记,静音 ntoks
(意思是在将其传递给任何函数或将其作为 NULL
指针传递之前将其设置为 0)。否则,函数最多解析 *ntoks
个标记,或者直到没有更多标记(以先到者为准)。在任何情况下,当 ntoks
为 non-NULL
时,它都会使用成功解析的令牌计数进行更新。
另请注意,非静音 ntoks
决定将分配多少个指针。因此,如果源字符串包含 10 个标记并且我们将 ntoks
设置为 1000,我们最终将得到 990 个不必要的分配指针。另一方面,如果源字符串包含 1000 个标记,但我们只需要前 10 个标记,那么将 ntoks
设置为 10 听起来是一个更明智的选择。
两个函数都分配并返回一个字符指针数组,但 str_toksarray_alloc()
使它们指向修改后的源字符串本身中的标记,而 str_toksarray_alloc2()
使它们指向指向动态分配的令牌副本(名称末尾的 2 表示 2 级分配)。
返回的数组附加了一个 NULL
哨兵指针,ntoks
的传回值中不考虑该指针(否则,当 non-NULL
时,{{1 }} 将返回数组的长度传回给调用者,而不是它的第一级大小)。
当 ntoks
设置为 keepnulls
时,生成的标记与我们对 strsep() 的期望相似功能。主要意味着源字符串中的连续分隔符产生空标记(空值),如果 true
是一个空的 c 字符串或在源字符串中没有找到它包含的分隔符字符,结果只是 1令牌:源字符串。与 strsep() 相反,可以通过将 delim
设置为 keepnulls
来忽略空标记。
失败调用可以通过检查函数的返回值与 false
或通过检查 NULL
的传回值与 0(提供 {{ 1}} 是 ntoks
)。我建议在尝试访问返回的数组之前总是检查失败,因为这些函数包括健全性检查,这可以推迟否则立即崩溃(例如,将 ntoks
指针作为源字符串传递)。
成功后,调用者应该在完成后释放数组。
对于 non-NULL
,一个简单的 free() 就足够了。对于 NULL
,由于第二级分配,涉及循环。 str_toksarray_alloc()
哨兵(或 str_toksarray_alloc2()
NULL
的传回值)使这变得微不足道,但我还在下面提供了一个 non-NULL
函数,对于所有懒惰的人蜜蜂在那里:)
使用这两个函数的简化示例如下。
准备:
ntoks
str_toksarray_alloc():
toksarray_free2()
str_toksarray_alloc2():
const char *src = ";b,test,Tèst,;;cd;ελληνικά,nørmälize,;string to";
const char *delim = ";,";
bool keepnulls = true;
size_t ntoks = 0;
这两个函数都使用 strsep() 进行标记化,这使得它们线程安全,但这不是标准函数。如果未提供,您始终可以使用开源实现(例如 GNU's 或 Apple's)。 // destructive (use copy of src)
char *scopy = strdup( src );
if (!scopy) { ... }; // handle strdup failure
printf( "%s\n", src );
char **arrtoks = str_toksarray_alloc( &scopy, delim, &ntoks, keepnulls );
printf( "%lu tokens read\n", ntoks );
if ( arrtoks ) {
for (int i=0; arrtoks[i]; i++) {
printf( "%d: %s\n", i, arrtoks[i] );
}
}
free( scopy );
free( arrtoks );
/* OUTPUT
;b,test,Tèst,;;cd;ελληνικά,nørmälize,;string to
11 tokens read
0:
1: b
2: test
3: Tèst
4:
5:
6: cd
7: ελληνικά
8: nørmälize
9:
10: string to
*/
中使用的函数 strdup() 也是如此(它的实现很简单,但这里还是 GNU's 和 Apple's 的例子)。< /p>
在 // non-destructive
keepnulls = false; // reject empty tokens
printf( "%s\n", src );
arrtoks = str_toksarray_alloc2( src, delim, &ntoks, keepnulls );
printf( "%lu tokens read\n", ntoks );
if ( arrtoks ) {
for (int i=0; arrtoks[i]; i++) {
printf( "%d: %s\n", i, arrtoks[i] );
}
}
toksarray_free2( arrtoks ); // dangling arrtoks
// or: arrtoks = toksarray_free2( arrtoks ); // non-dangling artoks
/* OUTPUT
;b,test,Tèst,;;cd;ελληνικά,nørmälize,;string to
7 tokens read
0: b
1: test
2: Tèst
3: cd
4: ελληνικά
5: nørmälize
6: string to
*/
中使用 strsep() 的副作用是源字符串的起始指针在解析循环的每一步中都不断移动到下一个标记.这意味着调用者将无法释放解析后的字符串,除非他们将起始地址保存到一个额外的指针。 我们为他们省去了麻烦,方法是在函数中使用 str_toksarray_alloc2()
指针在本地执行此操作。 str_toksarray_alloc()
不受此影响,因为它不接触源字符串。
这两个函数的主要区别在于 strpSaved
不会为找到的令牌分配内存。它只是为数组指针分配空间并将它们设置为直接指向源字符串。这是有效的,因为 strsep() str_toksarray_alloc2()
- 就地终止找到的标记。这种依赖性会使您的支持代码复杂化,但对于大字符串,它也会对性能产生很大影响。如果保留源字符串并不重要,它也会对内存占用产生很大影响。
另一方面,str_toksarray_alloc()
分配并返回动态分配的令牌副本的自持数组,没有进一步的依赖关系。它首先通过从源字符串的本地副本创建数组来实现,其次通过将实际标记内容复制到数组中。与 nul
相比,这要慢得多,并且会留下更大的内存占用,但它没有进一步的依赖关系,并且对源字符串的性质没有特殊要求。这样可以更轻松地编写更简单(因此更易于维护)的支持代码。
这两个函数之间的另一个区别是 str_toksarray_alloc2()
被静音时的第一级分配(数组指针)。它们都解析所有可用的令牌,但它们采用完全不同的方法。 str_toksarray_alloc()
使用初始大小为 16(字符指针)的 alloc-ahead,在解析循环中按需将其加倍。 ntoks
进行第一遍计数所有可用的标记,然后它只分配那么多字符指针一次。第 1 次通过使用标准函数 strpbrk() 和 strchr() 的辅助函数 str_toksarray_alloc()
完成。我也在下面提供该函数的源代码。
哪种方法更好完全由您决定,具体取决于您的项目需求。随意将每个函数的代码调整为任何一种方法并从那里开始。
我想说,平均而言,对于真正大的字符串,alloc-ahead 的速度要快得多,尤其是当初始大小和增长因子在每个案例的基础上进行微调时(例如,使它们成为函数参数)。保存所有这些 str_toksarray_alloc2()
和 str_toksfound()
的额外传递可以在那里产生影响。然而,对于相对较小的字符串,这几乎是常态,提前分配一堆字符指针只是一种矫枉过正。它不会受到伤害,但在这种情况下它确实无缘无故地使代码混乱。无论如何,请随意选择最适合您的那个。
这两个函数也是如此。我会说在大多数情况下 strchr()
处理起来要简单得多,因为内存和性能对于中小型字符串来说很少成为问题。如果您必须处理大字符串,请考虑使用 strpbrk()
(尽管在这种情况下,您应该推出一个专门的字符串解析函数,以接近您的项目需求和输入规范)。
哦,伙计,我认为这不仅仅是 2 美分(笑)。
无论如何,这里是 2 个函数和辅助函数的代码(我已经删除了他们的大部分描述注释,因为我已经涵盖了几乎所有内容)。
str_toksarray_alloc():
str_toksarray_alloc2()
str_toksarray_alloc2():
str_toksarray_alloc()
str_tokscount() - str_toksarr_alloc2() 使用的辅助函数:
// ----------------------------------------
// Tokenize destructively a nul-terminated source-string.
// Return a dynamically allocated, NULL terminated array of char-pointers
// each pointing to each token found in the source-string, or NULL on error.
//
char **str_toksarray_alloc(char **strp, const char *delim, size_t *ntoks, bool keepnulls)
{
// sanity checks
if ( !strp || !*strp || !**strp || !delim ) {
goto failed;
}
char *strpSaved = *strp; // save initial *strp pointer
bool ntoksOk = (ntoks && *ntoks); // false when ntoks is muted
size_t _ntoks = (ntoksOk ? *ntoks : 16); // # of tokens to alloc-ahead
// alloc array of char-pointers (+1 for NULL sentinel)
char **toksarr = malloc( (_ntoks+1) * sizeof(*toksarr) );
if ( !toksarr ) {
goto failed;
}
// Parse *strp tokens into the array
size_t i = 0; // # of actually parsed tokens
char *tok;
while ( (tok = strsep(strp, delim)) ) {
// if requested, ignore empty tokens
if ( *tok == '\0' && !keepnulls ) {
continue;
}
// non-muted ntoks reached? we are done
if ( ntoksOk && i == _ntoks ) {
*ntoks = i;
break;
}
// muted ntoks & ran out of space? double toksarr and keep parsing
if ( !ntoksOk && i == _ntoks ) {
_ntoks *= 2;
char **tmparr = realloc( toksarr, (_ntoks+1) * sizeof(*tmparr) );
if ( !tmparr ) {
*strp = strpSaved;
free( toksarr );
goto failed;
}
toksarr = tmparr;
}
toksarr[i++] = tok; // get token address
}
toksarr[i] = NULL; // NULL sentinel
*strp = strpSaved; // restore initial *strp pointer
if (ntoks) *ntoks = i; // pass to caller # of parsed tokens
return toksarr;
failed:
if (ntoks) *ntoks = 0;
return NULL;
}
toksarray_free2() - 在 str_toksarr_alloc2() 返回的数组上使用它:
// ----------------------------------------
// Tokenize non-destructively a nul-terminated source-string.
// Return a dynamically allocated, NULL terminated array of dynamically
// allocated and nul-terminated string copies of each token found in the
// source-string. Return NULL on error.
// The 2 at the end of the name means 2-levels of allocation.
//
char **str_toksarray_alloc2( const char *str, const char *delim, size_t *ntoks, bool keepnulls )
{
// sanity checks
if ( !str || !*str || !delim ) {
if (ntoks) *ntoks = 0;
return NULL;
}
// make a copy of str to work with
char *_str = strdup( str );
if ( !_str ) {
if (ntoks) *ntoks = 0;
return NULL;
}
// if ntoks is muted we'll allocate str_tokscount() tokens, else *ntoks
size_t _ntoks = (ntoks && *ntoks) ? *ntoks : str_tokscount(_str, delim, keepnulls);
if ( _ntoks == 0 ) { // str_tokscount() failed
goto fail_free_str;
}
// alloc the array of strings (+1 for an extra NULL sentinel)
char **toksarr = malloc( (_ntoks+1) * sizeof(*toksarr) );
if ( !toksarr ) {
goto fail_free_str;
}
// Parse str tokens and duplicate them into the array
size_t i = 0; // # of actually parsed tokens
char *tok;
while ( i < _ntoks && (tok = strsep(&_str, delim)) ) {
// if requested, skip empty tokens
if ( *tok == '\0' && !keepnulls ) {
continue;
}
// duplicate current token into the array
char *tmptok = strdup( tok );
if ( !tmptok ) {
goto fail_free_arr;
}
toksarr[i++] = tmptok;
}
toksarr[i] = NULL; // NULL sentinel
free( _str ); // release the local copy of the source-string
if (ntoks) *ntoks = i; // pass to caller the # of parsed tokens
return toksarr;
// cleanup before failing
fail_free_arr:
for (size_t idx=0; idx < i; idx++) {
free( toksarr[idx] );
}
free( toksarr );
fail_free_str:
free( _str );
if (ntoks) *ntoks = 0;
return NULL;
}
答案 26 :(得分:-1)
这可能会解决您的目的
#include <stdio.h>
#include <string.h>
int main()
{
int i = 0,j = 0,k = 0;
char name[] = "jrSmith-Rock";
int length = strlen(name);
char store[100][100];
for(i = 0, j = 0,k = 0; i < length;) {
if((name[i] >= 'a' && name[i] <= 'z') || (name[i] >= 'A' && name[i] <= 'Z')) {
store[j][k] = name[i];
k++;
i++;
}
else{
while(! isalpha(name[i])) {
i++;
}
j++;
k = 0;
}
}
for(i = 0; i <= j; i++) {
printf("%s\n", store[i]);
}
return 0;
}
输出:
jrSmith
Rock
答案 27 :(得分:-1)
这是一种不同的方法,也适用于大型文件。
/******************************************************************************
Online C Compiler.
Code, Compile, Run and Debug C program online.
Write your code in this editor and press "Run" button to compile and execute it.
*******************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char* aux_slice(const char* str, char delimiter, const char** pRet);
char **split(const char *str, char delimiter, int *ret_size);
int main()
{
int count = 0, i = 0;
char** splits = split(",,, 1,2,3,4,5,6,7,8,9,10, aaaa, a,a a,aa,,a,,,,a,a a,a a,a,,,,,ashajhsas asjas,,a,a,,aa"
"aaaaaaaaaaa.......,p,p,p,p,p,p,p, this is last,,,,,,,,,,", ',', &count);
printf("Strings (%d)\n", count);
for (i=0 ; i < count; i++) {
printf("%s\n", splits[i]);
}
for(i=0; i < count; i++) {
free(splits[i]);
}
free(splits);
return 0;
}
char* aux_slice(const char* str, char delimiter, const char** pRet)
{
int size = 0, i = 0;
const char* begin = str;
char *ret = NULL;
int match = 0;
if (!str) {
return NULL;
}
while (*begin != '\0') {
if (*begin == delimiter) {
match++;
break;
}
size++;
begin++;
}
ret = (char*)malloc(sizeof(char) * size);
if(ret == NULL) {
return NULL;
}
if (match) {
/* we have a delimiter ??? */
for(i = 0; str[i] != delimiter; ++i) {
ret[i] = str[i];
}
ret[i] = '\0';
while (*begin == delimiter) {
begin++;
}
(*pRet) = begin;
} else {
/* or we just copy the remaining string.... */
for(i=0; str[i] != '\0'; ++i) {
ret[i] = str[i];
}
ret[i] = '\0';
(*pRet) = NULL;
}
return ret;
}
char **split(const char *str, char delimiter, int *ret_size)
{
int diff = 0, splits = 0, i=0;
const char* begin = str;
const char* end = &str[strlen(str)-1];
while (*begin == delimiter) begin++;
while (*end == delimiter) end--;
diff = (end - begin)+1;
while (i < diff) {
// avoid cases of adjacent delimiters
// like "str1,str2,,,,,str3
if (begin[i] == delimiter) {
while (begin[i] == delimiter) i++;
splits++;
}
i++;
}
splits += 1;
*ret_size = splits;
char** split_str = (char**)malloc(sizeof(char**)*splits);
if (split_str == NULL) {
return NULL;
}
for(i=0; i < splits; ++i) {
split_str[i] = aux_slice(begin, delimiter, &begin);
}
return split_str;
}