我正在编写一个函数来将一个字符串拆分为一个指向指针的指针,如果separator是space,我想只拆分不在引号内的单词。例如Hello world "not split"
应该返回
Hello
world
"not split"
某种程度上,该函数将引号内的单词拆分,并且不会在引号之外拆分单词。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int is_quotes(char *s)
{
int i;
int count;
i = 0;
count = 0;
while (s[i])
{
if (s[i] == '"')
count++;
i++;
}
if (count == 0)
count = 1;
return (count % 2);
}
int count_words(char *s, char sep)
{
int check;
int i;
int count;
check = 0;
if (sep == ' ')
check = 1;
i = 0;
count = 0;
while (*s && *s == sep)
++s;
if (*s)
count = 1;
while (s[i])
{
if (s[i] == sep)
{
if (!is_quotes(s + i) && check)
{
i += 2;
while (s[i] != 34 && s[i])
i++;
}
count++;
}
i++;
}
return (count);
}
char *ft_strsub(char const *s, unsigned int start, size_t len)
{
char *sub;
sub = malloc(len + 1);
if (sub)
memcpy(sub, s + start, len);
return (sub);
}
char **ft_strsplit(char const *s, char c)
{
int words;
char *start;
char **result;
int i;
words = count_words((char *)s, c);
if (!s || !c || words == 0)
return (NULL);
i = 0;
result = (char **)malloc(sizeof(char *) * (words + 1));
start = (char *)s;
while (s[i])
{
if (s[i] == c)
{
if (is_quotes((char *)s + i) == 0 && c == ' ')
{
i += 2;
while (s[i] != '"' && s[i])
i++;
i -= 1;
}
if (start != (s + i))
*(result++) = ft_strsub(start, 0, (s + i) - start);
start = (char *)(s + i) + 1;
}
++i;
}
if (start != (s + i))
*(result++) = ft_strsub(start, 0, (s + i) - start);
*result = NULL;
return (result - words);
}
int main(int argc, char **argv)
{
if (argc > 1)
{
char **s;
s = ft_strsplit(argv[1], ' ');
int i = 0;
while (s[i])
printf("%s\n", s[i++]);
}
return 0;
}
当我使用hello world "hello hello"
运行此代码时,我得到以下内容
hello world
"hello
hello"
答案 0 :(得分:2)
你需要一个具有两种状态的状态机,报价和报价。当你点击引号时,翻转状态。当你点击一个空格时,如果有报价则转换为换行符,而不是报价。 (你很快就会想让它更精细地允许字符串转义等,状态机方法可以扩展到那个)。
答案 1 :(得分:0)
试试这个(修复并减少)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct token {
const char *top;
const char *end;//point to next character
} Token;
Token getToken(const char **sp, char sep){
const char *s = *sp;
const char *top, *end;
Token token = { NULL, NULL};
while(*s && *s == sep)//skip top separators
++s;
if(!*s){
*sp = s;
return token;//return null token
}
token.top = s;
while(*s && *s != sep){
if(*s == '"'){
char *p = strchr(s + 1, '"');//search end '"'
if(p)
s = p;//skip to '"'
}
++s;
}
token.end = s;
*sp = s;
return token;
}
int count_words(const char *s, char sep){
int count = 0;
Token token = getToken(&s, sep);
while(token.top != NULL){
++count;
token = getToken(&s, sep);
}
return count;
}
char *ft_strsub(Token token){
size_t len = token.end - token.top;
char *sub = malloc(len + 1);
if (sub){
memcpy(sub, token.top, len);
sub[len] = 0;
}
return sub;
}
char **ft_strsplit(const char *s, char sep){
int words;
if (!s || !sep || !(words = count_words(s, sep)))
return NULL;
char **result = malloc(sizeof(char *) * (words + 1));
if(!result){
perror("malloc");
return NULL;
}
int i = 0;
Token token = getToken(&s, sep);
while(token.top != NULL){
result[i++] = ft_strsub(token);
token = getToken(&s, sep);
}
result[i] = NULL;
return result;
}
int main(int argc, char **argv){
const char *text = "Hello world \"not split\"";
char **s = ft_strsplit(text, ' ');
int i = 0;
while (s[i]){
printf("%s\n", s[i]);
free(s[i++]);
}
free(s);
return 0;
}
转义字符处理版本。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ESCAPE '\\' //ESCAPE CHARACTER
typedef struct token {
const char *top;
const char *end;//point to next character
} Token;
Token getToken(const char **sp, char sep){
const char *s = *sp;
const char *top, *end;
Token token = { NULL, NULL};
while(*s && *s == sep)//skip top separators
++s;
if(!*s){
*sp = s;
return token;
}
token.top = s;
while(*s && *s != sep){
if(*s == ESCAPE)
++s;
else if(*s == '"'){
char *p = strchr(s + 1, '"');//search end '"'
while(p && p[-1] == ESCAPE)
p = strchr(p + 1, '"');
if(p)
s = p;
}
++s;
}
token.end = s;
*sp = s;
return token;
}
int count_words(const char *s, char sep){
int count = 0;
Token token = getToken(&s, sep);
while(token.top != NULL){
++count;
token = getToken(&s, sep);
}
return count;
}
char *remove_escape(char *s){
char *from, *to;
from = to = s;
while(*from){
if(*from != ESCAPE)
*to++ = *from;
++from;
}
*to = 0;
return s;
}
char *ft_strsub(Token token){
size_t len = token.end - token.top;
char *sub = malloc(len + 1);
if (sub){
memcpy(sub, token.top, len);
sub[len] = 0;
}
return sub;
}
char **ft_strsplit(const char *s, char sep){
int words;
if (!s || !sep || !(words = count_words(s, sep)))
return NULL;
char **result = malloc(sizeof(char *) * (words + 1));
if(!result){
perror("malloc");
return NULL;
}
Token token = getToken(&s, sep);
int i = 0;
while(token.top != NULL){
result[i] = ft_strsub(token);
remove_escape(result[i++]);
token = getToken(&s, sep);
}
result[i] = NULL;
return result;
}
void test(const char *text){
printf("original:%s\n", text);
printf("result of split:\n");
char **s = ft_strsplit(text, ' ');
int i = 0;
while (s[i]){
printf("%s\n", s[i]);
free(s[i++]);
}
free(s);
puts("");
}
int main(int argc, char **argv){
test("Hello world \"not split\"");
test("Hello world \"not \\\" split\"");//include " in "..."
test("Hello world not\\ split");//escape separator
return 0;
}
结果:
original:Hello world "not split"
result of split:
Hello
world
"not split"
original:Hello world "not \" split"
result of split:
Hello
world
"not " split"
original:Hello world not\ split
result of split:
Hello
world
not split