如何在没有正则表达式的C中的字符串中的双引号之间提取文本?

时间:2017-08-24 12:58:46

标签: c string

假设我的字符串是:haha "lol"
我只想提取lol

#include<stdio.h>

int main() {  
   char a[20]={0};  
   char *s="haha \"lol\"";  
   if(sscanf(s,"%*[^\"]'%[^\"]\"",a)==1){  
      printf("Found stuff inside quotes");
   } 
}

3 个答案:

答案 0 :(得分:2)

通过为正在解析的源语言应用适当的解析器。

解析输入的单行通常很脆弱而且很难正确。

那就是说,你可以试试像

这样的东西
const char *input = "haha \"lol\"";
char quoted[32];

if(sscanf(input, "%*[^\"]\"%31[^\"]\"", quoted) == 1)
{
  printf("got '%s'\n", quoted);
}

这应该是强化的,但足以让你开始。

答案 1 :(得分:1)

有时候一些代码会走很长的路。所需要的只是2次调用strchr()

extract_quoted_string()已更改为伪代码。

const char *extract_quoted_string(const char *s, size_t *sz) {
  const char *left = look_for_quote_start_at_s;
  if (failure?) {
    return NULL;
  }
  Update_left_to_the_next_possible_position
  const char *right = look_for_quote_start_at_updated_left;
  if (failure?) {
    return NULL;
  }
  Compute_and_save_size_based_on_left_and_right
  return left;
}

测试工具

void test(const char *s) {
  printf("<%s> --> ", s);
  size_t sz;
  const char *extract =  extract_quoted_string(s, &sz);
  if (extract) {
    printf("<%.*s>\n", (int) sz, extract);

  } else {
    printf("None\n");
  }
}

int main() {
  test("");
  test("123");
  test("\"123");
  test("123\"");
  test("\"123\"");
  test("abc\"123");
  test("abc\"123\"");
  test("123\"xyz");
  test("\"123\"xyz");
  test("abc\"123\"xyz");
}

预期输出

<> --> None
<123> --> None
<"123> --> None
<123"> --> None
<"123"> --> <123>
<abc"123> --> None
<abc"123"> --> <123>
<123"xyz> --> None
<"123"xyz> --> <123>
<abc"123"xyz> --> <123>

答案 2 :(得分:0)

我认为在unwind的答案中已经足够了(即使您根据要求添加代码)

类似于这个问题的问题已经存在 Split unquoted string in C

使用这种方法时,您可以写下如下:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ESCAPE '\\' //ESCAPE CHARACTER

typedef struct token {
    const char *top;
    const char *end;//point to next character
} Token;

Token getToken(const char **sp, char sep){
    const char *s = *sp;
    const char *top, *end;
    Token token = { NULL, NULL};

    while(*s && *s == sep)//skip top separators
        ++s;
    if(!*s){
        *sp = s;
        return token;
    }
    token.top = s;
    while(*s && *s != sep){
        if(*s == ESCAPE)
            ++s;
        else if(*s == '"'){
            char *p = strchr(s + 1, '"');//search end '"'
            while(p && p[-1] == ESCAPE)
                p = strchr(p + 1, '"');
            if(p)
                s = p;
        }
        ++s;
    }
    token.end = s;
    *sp = s;

    return token;
}

char *remove_escape(char *s){
    char *from, *to;
    from = to = s;
    while(*from){
        if(*from != ESCAPE)
            *to++ = *from;
        ++from;
    }
    *to = 0;
    return s;
}

char *ft_strsub(Token token){
    size_t len = token.end - token.top;
    char *sub = malloc(len + 1);//check return value
    if (sub){
        memcpy(sub, token.top, len);
        sub[len] = 0;
    }
    return sub;
}

int main(int argc, char **argv){
    char *str = NULL;  
    const char *s="haha \"lol\"";  

    Token token = getToken(&s, ' ');

    while(token.top != NULL){
        str = ft_strsub(token);
        remove_escape(str);
        if(*str == '"')//find it!
            break;
        free(str);
        token = getToken(&s, ' ');
    }
    if(str){
        printf("Found stuff inside quotes: ");
        //remove "
        size_t len = strlen(str);
        str[len-1] = 0;
        printf("'%s'\n", str + 1);//ignore first character or use memmove
        free(str);
    }

    return 0;
}