使用c REGEX表达式列出和过滤文件

时间:2019-06-29 09:42:41

标签: c regex file

在以下代码中,我仅尝试使用正则表达式(名为test.jpg的文件)进行过滤,我在做什么错,因为下面的代码没有将其过滤掉?

我知道有更简单的方法,但是最终我想将正则表达式更改为^(image_)\\d{3,6}_201412\\d{2}_\\d{6}\\.(jpg) 而且我的文件夹中包含100,000+个文件,所以我只能使用c getdents函数,该函数比任何其他方法都快

我得到以下输出:

  

**************发现*******

     

image_0179_20141212_060714.jpg

#define _GNU_SOURCE
#include <dirent.h>     /* Defines DT_* constants */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <string.h>
#include <regex.h>
#include <stdio.h>

#define handle_error(msg) \
       do { perror(msg); exit(EXIT_FAILURE); } while (0)

struct linux_dirent {
    long           d_ino;
    off_t          d_off;
    unsigned short d_reclen;
    char           d_name[];
};

#define BUF_SIZE 1024*1024*5

int
main(int argc, char *argv[])
{
    int fd, nread;
    char buf[BUF_SIZE];
    struct linux_dirent *d;
    int bpos;
    char d_type;
    regex_t reg;
    regmatch_t pmatch[40];
    #define NAME "image_0179_20141212_060714.jpg"
    const char *pattern = "^(image_)\\d{3,6}_201412\\d{2}_\\d{6}\\.(jpg)";
    regcomp(&reg, pattern, REG_ICASE | REG_EXTENDED);
    int retval = 0;
    char buffer[1024] = "";
    fd = open(argc > 1 ? argv[1] : ".", O_RDONLY | O_DIRECTORY);
    if (fd == -1)
         handle_error("open");
    for ( ; ; )
    {
       nread = syscall(SYS_getdents, fd, buf, BUF_SIZE);
       if (nread == -1)
         handle_error("getdents");
       if (nread == 0)
          break;
       for (bpos = 0; bpos < nread;)
       {
           d = (struct linux_dirent *) (buf + bpos);
           d_type = *(buf + bpos + d->d_reclen - 1);
           if( d->d_ino != 0 && d_type == DT_REG )
           {
                //printf("%s\n", (char *)d->d_name );
                if (strstr(d->d_name, NAME) != NULL)
                {
                    printf("**************found*******\n");
                    printf("%s\n", (char *)d->d_name );
                };

                retval = regexec(&reg, d->d_name, 2, pmatch, 0);
                //printf("%d\n",retval);
                if(retval==0)
                {
                    printf("**************found regex*******\n");
                    printf("%s\n", (char *)d->d_name );
                }
            }
            bpos += d->d_reclen;
        }
     }
     regfree(&reg);
     exit(EXIT_SUCCESS);
}

1 个答案:

答案 0 :(得分:1)

改为使用以下正则表达式

^image_[0-9]{3,6}_201412[0-9]{2}_[0-9]{6}\.jpg$