Linux posix C regexec()没有返回所有匹配项

时间:2014-02-05 13:37:13

标签: c regex linux posix

我有以下脚本解析一个寻找字符串匹配的进程内存,一切正常但是转储编辑器的过程(在这种情况下为nano)与1193可能的匹配(如果我转储内存则有效)然后对它做一个egrep)但我的代码只输出3个匹配项。有什么想法吗?

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
 #define MEM_MAX (1ULL << 48)
#else
 #define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <regex.h>

int main(int argc, char **argv)
{
        if (argc < 2) {
                printf("Usage: %s <pid>\n", argv[0]);
                exit(1);
        }

        char buf[128];
        int pid = atoi(argv[1]);
        snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
        int fd = open(buf, O_RDONLY);
        if (fd == -1) {
                fprintf(stderr, "Error opening mem file: %m\n");
                exit(1);
        }

        int status ,i;
        int cflags = REG_EXTENDED;
        regmatch_t pmatch[1];
        const size_t nmatch=1;
        regex_t reg;
        const char *pattern="([a-zA-Z]{18,20})";
        regcomp(&reg, pattern, cflags);

        long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
        if (ptret == -1) {
                fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
                close(fd);
                exit(1);
        }

        unsigned char page[4096];
        unsigned long long offset = 0;

        while (offset < MEM_MAX) {
                lseek64(fd, offset, SEEK_SET);

                ssize_t ret;
                ret = read(fd, page, sizeof(page));

                if (ret > 0) {
                        status = regexec(&reg, page, nmatch, pmatch, 0);
                        if(status == 0){
                                for (i=pmatch[0].rm_so; i<pmatch[0].rm_eo; ++i) {
                                        putchar(page[i]);
                                }
                                printf("\n");
                        }
                }

                offset += sizeof(page);
        }

        ptrace(PTRACE_DETACH, pid, 0, 0);
        close(fd);
        regfree(&reg);
        return 0;
}

nano with pid 2208 [ Read 1193 lines ] alpha之间18-20个字符:

root ~/coding/proc/regex # ./memregmatch 22008
ABCABCABCABCABCABC
ABCABCABCABCABCABCAC
ABCCBAABCCBAABCCBABA
root ~/coding/proc/regex #

1 个答案:

答案 0 :(得分:0)

好的,用libpcre

做了
#include <pcre.h>
#include <locale.h>

....

        const char *error;
        int   erroffset;
        pcre *re;
        int   rc;
        int   i;
        int   ovector[100];
        char *regex = "([a-zA-Z]{18,20})";
        re = pcre_compile (regex,          /* the pattern */
                        PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                        &error,         /* for error message */
                        &erroffset,     /* for error offset */
                        0);             /* use default character tables */
        if (!re)
        {
                printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
        }

....

                if (ret > 0) {
                        //
                        unsigned int offset = 0;
                        while (offset < sizeof(page) && (rc = pcre_exec(re, 0, page, sizeof(page), offset, 0, ovector, sizeof(ovector))) >= 0)
                        {
                                for(i = 0; i < rc; ++i)
                                {
                                        printf("%.*s\n", ovector[2*i+1] - ovector[2*i], page + ovector[2*i]);
                                }
                                offset = ovector[1];
                        }
                        //
                }