我已经查看了POSIX正则表达式库和PCRE库中的正则表达式函数,但它们似乎都没有字符串替换函数。我不想使用C ++,如果我不需要链接另一个库(但我可以,如果必须),那将是最好的。我需要手动更换字符串吗?如果是这样,我如何使用捕获组?
答案 0 :(得分:23)
regex.h不提供对字符串替换的本机支持,但是它确实提供了子表达式/捕获组,使其更容易。我假设您熟悉正则表达式编译并跳过正则表达式执行和子表达式。
regexec()在regex.h中定义如下(/ usr / include /):
extern int regexec (const regex_t *__restrict __preg,
const char *__restrict __string, size_t __nmatch,
regmatch_t __pmatch[__restrict_arr],
int __eflags);
第一个,第二个和最后一个参数分别是要执行的正则表达式,字符串和执行标志。第三个和第四个参数用于指定regmatch_t的数组。 regmatch_t由两个字段组成:rm_so和rm_eo,它们分别是匹配区域的开始和结束的索引或偏移。这些索引可以与string.h中的memcpy(),memset()和memmove()一起使用,以执行字符串替换。
我会做一个小例子并稍后发布。
祝你好运,我希望这有所帮助。答案 1 :(得分:6)
PCRE库本身不提供替换功能,但PCRE下载页面提供了一个包装函数,它接受perl样式=~ s/pattern/replace/
语法,然后使用PCRE本机函数进行替换/替换您。转到http://www.pcre.org/,然后点击下载链接:ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/,然后点击Contrib目录。您想要的包/项目是:pcrs-0.0.3-src.tar.gz
。
请注意,我自己没有使用过这个,所以我无法证明它的效果如何。然而,这是一个相当小而简单的代码,所以它可以很好地满足您的目的。
答案 2 :(得分:0)
/* regex_replace.c
:w | !gcc % -o .%<
:w | !gcc % -o .%< && ./.%<
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <regex.h>
void // *str MUST can be freed, i.e. obtainde by strdup, malloc, ...
regex_replace(char **str, const char *pattern, const char *replace) {
regex_t reg;
// if regex can't commpile pattern, do nothing
if(!regcomp(®, pattern, REG_EXTENDED)) {
size_t nmatch = reg.re_nsub;
regmatch_t m[nmatch + 1];
const char *rpl, *p;
// count back references in replace
int br = 0;
p = replace;
while(1) {
while(*++p > 31);
if(*p) br++;
else break;
} // if br is not equal to nmatch, leave
if(br != nmatch) return;
// look for matches and replace
char *new;
while(!regexec(®, *str, nmatch + 1, m, REG_NOTBOL)) {
// make enough room
new = (char *)malloc(strlen(*str) + strlen(rpl));
if(!new) exit(EXIT_FAILURE);
*new = 0;
p = rpl = replace;
int c;
strncat(new, *str, m[0].rm_so); // test before pattern
for(int k=0; k<nmatch; k++) {
while(*++p > 16); // skip printable char
c = *p; // back referenc (e.g. \1, \2, ...)
strncat(new, rpl, p - rpl); // add head of rpl
// concat match
strncat(new, *str + m[c].rm_so, m[c].rm_eo - m[c].rm_so);
rpl = p++; // skip back reference, next match
}
strcat(new, p ); // trailing of rpl
strcat(new, *str + m[0].rm_eo); // trainling text in *str
free(*str);
*str = strdup(new);
free(new);
}
// ajust size
*str = (char *)realloc(*str, strlen(*str) + 1);
} else
printf("Could not compile regex: %s\n", replace);
}
int main(int argc, char *argv[])
{
char *pattern = "\\[([^-]+)->([^]]+)\\]";
char *str = strdup("before [link->address] some text [link2->addr2] trail");
char rpl[] = "<a href=\"\2\">\1</a>";
puts(str);
regex_replace(&str, pattern, rpl);
puts(str);
free(str);
}
答案 3 :(得分:0)
我已经采纳了@marnout 的帖子并修复了它,解决了许多错误和拼写错误。修复:内存泄漏,如果替换包含模式,则无限替换,用返回值替换函数中的打印,返回引用值实际上高达 31,文档,更多测试示例。
/* regex_replace.c
:w | !gcc % -o .%<
:w | !gcc % -o .%< && ./.%<
:w | !gcc % -o .%< && valgrind -v ./.%<
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <regex.h>
int regex_replace(char **str, const char *pattern, const char *replace) {
// replaces regex in pattern with replacement observing capture groups
// *str MUST be free-able, i.e. obtained by strdup, malloc, ...
// back references are indicated by char codes 1-31 and none of those chars can be used in the replacement string such as a tab.
// will not search for matches within replaced text, this will begin searching for the next match after the end of prev match
// returns:
// -1 if pattern cannot be compiled
// -2 if count of back references and capture groups don't match
// otherwise returns number of matches that were found and replaced
//
regex_t reg;
unsigned int replacements = 0;
// if regex can't commpile pattern, do nothing
if(!regcomp(®, pattern, REG_EXTENDED)) {
size_t nmatch = reg.re_nsub;
regmatch_t m[nmatch + 1];
const char *rpl, *p;
// count back references in replace
int br = 0;
p = replace;
while(1) {
while(*++p > 31);
if(*p) br++;
else break;
} // if br is not equal to nmatch, leave
if(br != nmatch) {
regfree(®);
return -2;
}
// look for matches and replace
char *new;
char *search_start = *str;
while(!regexec(®, search_start, nmatch + 1, m, REG_NOTBOL)) {
// make enough room
new = (char *)malloc(strlen(*str) + strlen(replace));
if(!new) exit(EXIT_FAILURE);
*new = '\0';
strncat(new, *str, search_start - *str);
p = rpl = replace;
int c;
strncat(new, search_start, m[0].rm_so); // test before pattern
for(int k=0; k<nmatch; k++) {
while(*++p > 31); // skip printable char
c = *p; // back reference (e.g. \1, \2, ...)
strncat(new, rpl, p - rpl); // add head of rpl
// concat match
strncat(new, search_start + m[c].rm_so, m[c].rm_eo - m[c].rm_so);
rpl = p++; // skip back reference, next match
}
strcat(new, p ); // trailing of rpl
unsigned int new_start_offset = strlen(new);
strcat(new, search_start + m[0].rm_eo); // trailing text in *str
free(*str);
*str = (char *)malloc(strlen(new)+1);
strcpy(*str,new);
search_start = *str + new_start_offset;
free(new);
replacements++;
}
regfree(®);
// ajust size
*str = (char *)realloc(*str, strlen(*str) + 1);
return replacements;
} else {
return -1;
}
}
const char test1[] = "before [link->address] some text [link2->addr2] trail[a->[b->c]]";
const char *pattern1 = "\\[([^-]+)->([^]]+)\\]";
const char replace1[] = "<a href=\"\2\">\1</a>";
const char test2[] = "abcabcdefghijklmnopqurstuvwxyzabc";
const char *pattern2 = "abc";
const char replace2[] = "!abc";
const char test3[] = "a1a1a1a2ba1";
const char *pattern3 = "a";
const char replace3[] = "aa";
int main(int argc, char *argv[])
{
char *str1 = (char *)malloc(strlen(test1)+1);
strcpy(str1,test1);
puts(str1);
printf("test 1 Before: [%s], ",str1);
unsigned int repl_count1 = regex_replace(&str1, pattern1, replace1);
printf("After replacing %d matches: [%s]\n",repl_count1,str1);
free(str1);
char *str2 = (char *)malloc(strlen(test2)+1);
strcpy(str2,test2);
puts(str2);
printf("test 2 Before: [%s], ",str2);
unsigned int repl_count2 = regex_replace(&str2, pattern2, replace2);
printf("After replacing %d matches: [%s]\n",repl_count2,str2);
free(str2);
char *str3 = (char *)malloc(strlen(test3)+1);
strcpy(str3,test3);
puts(str3);
printf("test 3 Before: [%s], ",str3);
unsigned int repl_count3 = regex_replace(&str3, pattern3, replace3);
printf("After replacing %d matches: [%s]\n",repl_count3,str3);
free(str3);
}