在C项目中,我编写了一个函数来返回正则表达式搜索中的第一个捕获组。
output of this online parser最好地描述了我期望实现的目标(注意右侧面板上的捕获组输出)。
我写的函数和测试代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <assert.h>
typedef int bool;
#define true 1
#define false 0
/*
* Obtains the first group that matches the regex pattern in input string
* The output pointer is made to point to:
* - in case the regexp compilation succeeded
* - the result in case there was a match found
* - or NULL in case there was no match
* - in case the regexp compilation failed
* - the error from the compilation process
*
* If there was an error while compiling the input reg_exp, then this function
* returns false, if not, it returns true.
*
* NOTE: The user is responsible for free-ing the memory for *output
*/
bool get_first_match(const char* search_str, const char* reg_exp, char** output)
{
int res, len;
regex_t preg;
regmatch_t pmatch;
// Compile the input regexp
if( (res = regcomp(&preg, reg_exp, REG_EXTENDED)) != 0)
{
char* error = (char*)malloc(1024*sizeof(char));
regerror(res, &preg, error, 1024);
output = &error;
return false;
}
res = regexec(&preg, search_str, 1, &pmatch, REG_NOTBOL);
if(res == REG_NOMATCH)
{
return true;
}
len = pmatch.rm_eo - pmatch.rm_so;
char* result = (char*)malloc( (len + 1) * sizeof(char) );
memcpy(result, search_str + pmatch.rm_so, len);
result[len] = 0; // null-terminate the result
*output = result;
regfree(&preg);
return true;
}
int main()
{
const char* search_str = "param1=blah¶m2=blahblah¶m3=blahetc&map=/usr/bin/blah.map";
const char* regexp = "map=([^\\&]*)(&|$)";
char* output;
bool status = get_first_match(search_str, regexp, &output);
if(status){
if(output)
printf("Found match: %s\n", output);
else
printf("No match found.");
}
else{
printf("Regex error: %s\n", output);
}
free(output);
return 0;
}
但是,output I get from the C code包含字符串中的map=
部分,即使我已在第一个捕获组中明确排除了它。
如果没有map=
部分,我该怎么做才能获得捕获组?为什么与我的C程序相比,我从在线解析器得到的结果不同?
答案 0 :(得分:2)
这里发生的是,你有这样的模式:
const char* regexp = "map=([^\\&]*)(&|$)";
其中,结果(我们称之为数组result
)将根据以下内容填充:
result = {
"map=/usr/bin/blah.map",
"/usr/bin/blah.map",
""
}
现在,因为您按以下方式呼叫regexc
:
res = regexec(&preg, search_str, 1, &pmatch, REG_NOTBOL);
// Notice the argument 1 here ---^
参数1
表示最多只有一个结果存储在pmatch
数组中。因此,您从上方获得result[0]
。由于您需要第一个匹配的组(而不是整个匹配的字符串),您必须:
pmatch
定义为至少2
的大小数组。2
作为参数传递给上面的regexc
。完成上述操作后:
bool get_first_match(const char* search_str, const char* reg_exp, char** output)
{
int res, len;
regex_t preg;
regmatch_t pmatch[3];
// SNIP
// SNIP
res = regexec(&preg, search_str, 2, &pmatch, REG_NOTBOL);
if(res == REG_NOMATCH)
{
return true;
}
// Notice changes in the lines below
// I am using pmatch[1] since that is equivalent to our
// result[1] from above
len = pmatch[1].rm_eo - pmatch[1].rm_so;
char* result = (char*) malloc( (len + 1) * sizeof(char) );
memcpy(result, search_str + pmatch[1].rm_so, len);
result[len] = 0; // null-terminate the result
*output = result;
regfree(&preg);
return true;
}