我有以下字符串,我试图解析变量。
char data[]="to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards."
我开始使用strtok和以下代码
char *to=parsePostData("to",data);
char* parsePostData(char s[],char t[])
{
char *postVal;
char *pch;
char tCpy[512];//Make a copy. Otherwise, strtok works on the char pointer, and original char array gets modified/ corrupted.
strcpy(tCpy,t);
pch = strtok (tCpy,"=&");
while (pch != NULL)
{
if(strcmp(pch,s)==0) {
pch= strtok (NULL, "&");
return pch;
}else{
pch = strtok (NULL, "=&");
}
}
}
这样可以正常工作,除非是连续分隔符,例如" title"之后的分隔符。所以我发现了这个自定义的strtok_single实现。 Need to know when no data appears between two token separators using strtok()
char * strtok_single (char * str, char const * delims)
{
static char * src = NULL;
char * p, * ret = 0;
if (str != NULL)
src = str;
if (src == NULL)
return NULL;
if ((p = strpbrk (src, delims)) != NULL) {
*p = 0;
ret = src;
src = ++p;
}
return ret;
}
但有了这个,问题是我无法得到"签名",因为没有&之后的分隔符。
我怎样才能混合使用这两个,所以我不会错过最后一个变量,我可以处理连续的分隔符?
答案 0 :(得分:3)
这里潜伏着两个虫子。一个在strtok_single()
。如果您重复运行它,则在=
签名后strtok()
之后不会返回最后一段,与parsePostData()
不同。
当问题解决之后,strtok()
中的代码仍然存在问题;它返回一个指向自动变量的指针。必须以不同方式处理字符串的副本;最简单的方法(与使用strtok_r()
而不是strtok_s()
或tCpy
一致)是将emt.c
变量设为静态。
strtok()
这是一个复合程序,可以显示问题以及一组修复程序。它适用于不同的分配器'函数 - 与strtok_single()
具有相同签名的函数 - 对数据。它演示了strtok_fixed()
中的错误以及parsePostData()
修复了该错误。它表明strtok_fixed()
中的代码在修复后正常工作且使用了#include <stdio.h>
#include <string.h>
/* Function pointer for strtok, strtok_single, strtok_fixed */
typedef char *(*Splitter)(char *str, const char *delims);
/* strtok_single - as quoted in SO 30294129 (from SO 8705844) */
static char *strtok_single(char *str, char const *delims)
{
static char *src = NULL;
char *p, *ret = 0;
if (str != NULL)
src = str;
if (src == NULL)
return NULL;
if ((p = strpbrk(src, delims)) != NULL)
{
*p = 0;
ret = src;
src = ++p;
}
return ret;
}
/* strtok_fixed - fixed variation of strtok_single */
static char *strtok_fixed(char *str, char const *delims)
{
static char *src = NULL;
char *p, *ret = 0;
if (str != NULL)
src = str;
if (src == NULL || *src == '\0') // Fix 1
return NULL;
ret = src; // Fix 2
if ((p = strpbrk(src, delims)) != NULL)
{
*p = 0;
//ret = src; // Unnecessary
src = ++p;
}
else
src += strlen(src);
return ret;
}
/* Raw test of splitter functions */
static void parsePostData1(const char *s, const char *t, Splitter splitter)
{
static char tCpy[512];
strcpy(tCpy, t);
char *pch = splitter(tCpy, "=&");
while (pch != NULL)
{
printf(" [%s]\n", pch);
if (strcmp(pch, s) == 0)
printf("matches %s\n", s);
pch = splitter(NULL, "=&");
}
}
/* Fixed version of parsePostData() from SO 30294129 */
static char *parsePostData2(const char *s, const char *t, Splitter splitter)
{
static char tCpy[512];
strcpy(tCpy, t);
char *pch = splitter(tCpy, "=&");
while (pch != NULL)
{
if (strcmp(pch, s) == 0)
{
pch = splitter(NULL, "&");
return pch;
}
else
{
pch = splitter(NULL, "=&");
}
}
return NULL;
}
/* Composite test program */
int main(void)
{
char data[] = "to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards.";
char *tags[] = { "to", "cc", "title", "content", "signature" };
enum { NUM_TAGS = sizeof(tags) / sizeof(tags[0]) };
printf("\nCompare variants on strtok()\n");
{
int i = NUM_TAGS - 1;
printf("strtok():\n");
parsePostData1(tags[i], data, strtok);
printf("strtok_single():\n");
parsePostData1(tags[i], data, strtok_single);
printf("strtok_fixed():\n");
parsePostData1(tags[i], data, strtok_fixed);
}
printf("\nCompare variants on strtok()\n");
for (int i = 0; i < NUM_TAGS; i++)
{
char *value1 = parsePostData2(tags[i], data, strtok);
printf("strtok: [%s] = [%s]\n", tags[i], value1);
char *value2 = parsePostData2(tags[i], data, strtok_single);
printf("single: [%s] = [%s]\n", tags[i], value2);
char *value3 = parsePostData2(tags[i], data, strtok_fixed);
printf("fixed: [%s] = [%s]\n", tags[i], value3);
}
return 0;
}
。
emt
Compare variants on strtok()
strtok():
[to]
[myself@gmail.com]
[cc]
[youself@gmail.com]
[title]
[content]
[how are you?]
[signature]
matches signature
[best regards.]
strtok_single():
[to]
[myself@gmail.com]
[cc]
[youself@gmail.com]
[title]
[]
[content]
[how are you?]
[signature]
matches signature
strtok_fixed():
[to]
[myself@gmail.com]
[cc]
[youself@gmail.com]
[title]
[]
[content]
[how are you?]
[signature]
matches signature
[best regards.]
Compare variants on strtok()
✓ strtok: [to] = [myself@gmail.com]
✓ single: [to] = [myself@gmail.com]
✓ fixed: [to] = [myself@gmail.com]
✓ strtok: [cc] = [youself@gmail.com]
✓ single: [cc] = [youself@gmail.com]
✓ fixed: [cc] = [youself@gmail.com]
✕ strtok: [title] = [content=how are you?]
✓ single: [title] = []
✓ fixed: [title] = []
✓ strtok: [content] = [how are you?]
✓ single: [content] = [how are you?]
✓ fixed: [content] = [how are you?]
✓ strtok: [signature] = [best regards.]
✕ single: [signature] = [(null)]
✓ fixed: [signature] = [best regards.]
和
arr
在发布答案时手动添加了正确的(✓= U + 2713)和不正确的(✕= U + 2715)标记。
观察标记&#39;固定&#39;每次都包含所需的内容。
答案 1 :(得分:1)
你还没有完全告诉我们你的意思“这样可以正常工作”,虽然看起来你想要解析一个application/x-www-form-urlencoded
字符串似乎就足够了。你为什么不首先这么说呢?
考虑第一个字段key
可能会被'='
或'&'
中的第一个终止。搜索以这两个字符结尾的标记是合适的,以提取key
。
然而,第二个字段value
不会被'='
字符终止,因此搜索该字符以提取value
是不合适的。您只想搜索'&'
。
不确定。您可以使用strtok
来解析它,但我确信还有更多合适的工具。例如,strcspn
不会对data
进行任何更改,这意味着您不需要像{1}}那样复制data
...
#include <stdio.h>
#include <string.h>
int main(void) {
char data[]="to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards.";
char *key = data;
do {
int key_length = strcspn(key, "&=");
char *value = key + key_length + (key[key_length] == '=');
int value_length = strcspn(value, "&");
printf("Key: %.*s\n"
"Value: %.*s\n\n",
key_length, key,
value_length, value);
key = value + value_length + (value[value_length] == '&');
} while (*key);
return 0;
}