如何在Linux上递归列出C中的目录?

时间:2011-12-08 19:57:15

标签: c linux recursion

我需要递归列出C编程中的所有目录和文件。我已经研究过FTW但是我没有使用这两种操作系统(Fedora和Minix)。从过去几个小时里读到的所有不同的东西开始,我开始感到头疼。

如果有人知道我可以看到的代码片段,那将是惊人的,或者如果有人能给我一个很好的指导,我将非常感激。

5 个答案:

答案 0 :(得分:75)

为什么每个人都坚持一次又一次地重新发明轮子?

POSIX.1-2008标准化nftw()函数,也在单Unix规范v4(SuSv4)中定义,可在Linux(glibc,man 3 nftw),OS X和大多数当前BSD中使用变种。它根本不是新的。

Naïveopendir() / readdir() / closedir()基于实现几乎从不处理在树遍历期间移动,重命名或删除目录或文件的情况,而{{1}应该优雅地处理它们。

作为示例,请考虑以下C程序,该程序列出从当前工作目录开始的目录树,或命令行上指定的每个目录,或仅列出命令行中指定的文件:

nftw()

上面的大多数代码都在/* We want POSIX.1-2008 + XSI, i.e. SuSv4, features */ #define _XOPEN_SOURCE 700 /* Added on 2017-06-25: If the C library can support 64-bit file sizes and offsets, using the standard names, these defines tell the C library to do so. */ #define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include <stdlib.h> #include <unistd.h> #include <ftw.h> #include <time.h> #include <stdio.h> #include <string.h> #include <errno.h> /* POSIX.1 says each process has at least 20 file descriptors. * Three of those belong to the standard streams. * Here, we use a conservative estimate of 15 available; * assuming we use at most two for other uses in this program, * we should never run into any problems. * Most trees are shallower than that, so it is efficient. * Deeper trees are traversed fine, just a bit slower. * (Linux allows typically hundreds to thousands of open files, * so you'll probably never see any issues even if you used * a much higher value, say a couple of hundred, but * 15 is a safe, reasonable value.) */ #ifndef USE_FDS #define USE_FDS 15 #endif int print_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo) { /* const char *const filename = filepath + pathinfo->base; */ const double bytes = (double)info->st_size; /* Not exact if large! */ struct tm mtime; localtime_r(&(info->st_mtime), &mtime); printf("%04d-%02d-%02d %02d:%02d:%02d", mtime.tm_year+1900, mtime.tm_mon+1, mtime.tm_mday, mtime.tm_hour, mtime.tm_min, mtime.tm_sec); if (bytes >= 1099511627776.0) printf(" %9.3f TiB", bytes / 1099511627776.0); else if (bytes >= 1073741824.0) printf(" %9.3f GiB", bytes / 1073741824.0); else if (bytes >= 1048576.0) printf(" %9.3f MiB", bytes / 1048576.0); else if (bytes >= 1024.0) printf(" %9.3f KiB", bytes / 1024.0); else printf(" %9.0f B ", bytes); if (typeflag == FTW_SL) { char *target; size_t maxlen = 1023; ssize_t len; while (1) { target = malloc(maxlen + 1); if (target == NULL) return ENOMEM; len = readlink(filepath, target, maxlen); if (len == (ssize_t)-1) { const int saved_errno = errno; free(target); return saved_errno; } if (len >= (ssize_t)maxlen) { free(target); maxlen += 1024; continue; } target[len] = '\0'; break; } printf(" %s -> %s\n", filepath, target); free(target); } else if (typeflag == FTW_SLN) printf(" %s (dangling symlink)\n", filepath); else if (typeflag == FTW_F) printf(" %s\n", filepath); else if (typeflag == FTW_D || typeflag == FTW_DP) printf(" %s/\n", filepath); else if (typeflag == FTW_DNR) printf(" %s/ (unreadable)\n", filepath); else printf(" %s (unknown)\n", filepath); return 0; } int print_directory_tree(const char *const dirpath) { int result; /* Invalid directory path? */ if (dirpath == NULL || *dirpath == '\0') return errno = EINVAL; result = nftw(dirpath, print_entry, USE_FDS, FTW_PHYS); if (result >= 0) errno = result; return errno; } int main(int argc, char *argv[]) { int arg; if (argc < 2) { if (print_directory_tree(".")) { fprintf(stderr, "%s.\n", strerror(errno)); return EXIT_FAILURE; } } else { for (arg = 1; arg < argc; arg++) { if (print_directory_tree(argv[arg])) { fprintf(stderr, "%s.\n", strerror(errno)); return EXIT_FAILURE; } } } return EXIT_SUCCESS; } 中。它的任务是打印出每个目录条目。在print_entry()中,我们告诉print_directory_tree()为它看到的每个目录条目调用它。

上面唯一的手工波形细节是决定应该让nftw()使用多少个文​​件描述符。如果您的程序在文件树遍历期间最多使用两个额外的文件描述符(除了标准流),则已知15是安全的(在所有具有nftw()并且大部分符合POSIX的系统上)。

在Linux中,您可以使用nftw()查找最大打开文件数,并减去与sysconf(_SC_OPEN_MAX)调用同时使用的数字,但我不会打扰(除非我知道实用程序主要用于病态深度目录结构)。十五个描述符限制树深度; nftw()只是变得更慢(并且如果走过比那个目录更深的13个目录的目录,可能无法检测目录中的变化,尽管在系统和C库实现之间检测变化的权衡和一般能力也不同)。只需使用类似的编译时常量就可以保持代码的可移植性 - 它不仅适用于Linux,而且适用于Mac OS X和所有当前的BSD变体,以及大多数其他不太旧的Unix变体。

在评论中,Ruslan提到他们必须切换到nftw(),因为他们的文件系统条目需要64位大小/偏移,nftw64()的“正常”版本失败nftw() 1}}。正确的解决方案是不要切换到GLIBC特定的64位函数,而是定义errno == EOVERFLOW_LARGEFILE64_SOURCE。这些告诉C库在可能的情况下切换到64位文件大小和偏移,同时使用标准函数(_FILE_OFFSET_BITS 64nftw()等)和类型名称(fstat()等。 )。

答案 1 :(得分:56)

这是一个递归版本:

#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>

void listdir(const char *name, int indent)
{
    DIR *dir;
    struct dirent *entry;

    if (!(dir = opendir(name)))
        return;

    while ((entry = readdir(dir)) != NULL) {
        if (entry->d_type == DT_DIR) {
            char path[1024];
            if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
                continue;
            snprintf(path, sizeof(path), "%s/%s", name, entry->d_name);
            printf("%*s[%s]\n", indent, "", entry->d_name);
            listdir(path, indent + 2);
        } else {
            printf("%*s- %s\n", indent, "", entry->d_name);
        }
    }
    closedir(dir);
}

int main(void) {
    listdir(".", 0);
    return 0;
}

答案 2 :(得分:8)

int is_directory_we_want_to_list(const char *parent, char *name) {
  struct stat st_buf;
  if (!strcmp(".", name) || !strcmp("..", name))
    return 0;
  char *path = alloca(strlen(name) + strlen(parent) + 2);
  sprintf(path, "%s/%s", parent, name);
  stat(path, &st_buf);
  return S_ISDIR(st_buf.st_mode);
}

int list(const char *name) {
  DIR *dir = opendir(name);
  struct dirent *ent;
  while (ent = readdir(dir)) {
    char *entry_name = ent->d_name;
    printf("%s\n", entry_name);
    if (is_directory_we_want_to_list(name, entry_name)) {
      // You can consider using alloca instead.
      char *next = malloc(strlen(name) + strlen(entry_name) + 2);
      sprintf(next, "%s/%s", name, entry_name);
      list(next);
      free(next);
    }
  }
  closedir(dir);
}

在此上下文中值得浏览的头文件:stat.hdirent.h。请记住,上面的代码不会检查可能发生的任何错误。

ftw.h中定义的ftw提供了一种完全不同的方法。

答案 3 :(得分:4)

正如我在评论中提到的,我认为这种递归方法会给这项任务带来两个固有的缺陷。

第一个缺陷是打开文件的限制。此限制对深度遍历施加了限制。如果有足够的子文件夹,递归方法将会中断。 (请参阅有关堆栈溢出的编辑

第二个缺陷更微妙。递归方法使得测试硬链接变得非常困难。如果文件夹树是循环的(由于硬链接),递归方法将中断(希望没有堆栈溢出)。 (请参阅有关硬链接的修改

但是,通过用单个文件描述符和链接列表替换递归来避免这些问题非常简单。

我认为这不是一个学校项目,递归是可选的。

这是一个示例应用程序。

使用a.out ./查看文件夹树。

我为宏和东西道歉...我通常使用内联函数,但我认为如果它只是在一个函数中就会更容易理解代码。

#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

int main(int argc, char const *argv[]) {
  /* print use instruction unless a folder name was given */
  if (argc < 2)
    fprintf(stderr,
            "\nuse:\n"
            "    %s <directory>\n"
            "for example:\n"
            "    %s ./\n\n",
            argv[0], argv[0]),
        exit(0);

  /*************** a small linked list macro implementation ***************/

  typedef struct list_s {
    struct list_s *next;
    struct list_s *prev;
  } list_s;

#define LIST_INIT(name)                                                        \
  { .next = &name, .prev = &name }

#define LIST_PUSH(dest, node)                                                  \
  do {                                                                         \
    (node)->next = (dest)->next;                                               \
    (node)->prev = (dest);                                                     \
    (node)->next->prev = (node);                                               \
    (dest)->next = (node);                                                     \
  } while (0);

#define LIST_POP(list, var)                                                    \
  if ((list)->next == (list)) {                                                \
    var = NULL;                                                                \
  } else {                                                                     \
    var = (list)->next;                                                        \
    (list)->next = var->next;                                                  \
    var->next->prev = var->prev;                                               \
  }

  /*************** a record (file / folder) item type ***************/

  typedef struct record_s {
    /* this is a flat processing queue. */
    list_s queue;
    /* this will list all queued and processed folders (cyclic protection) */
    list_s folders;
    /* this will list all the completed items (siblings and such) */
    list_s list;
    /* unique ID */
    ino_t ino;
    /* name length */
    size_t len;
    /* name string */
    char name[];
  } record_s;

/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name)                                           \
  ((record_s *)(((uintptr_t)(node)) -                                          \
                ((uintptr_t) & ((record_s *)0)->list_name)))

/* initializes a new record */
#define RECORD_INIT(name)                                                      \
  (record_s){.queue = LIST_INIT((name).queue),                                 \
             .folders = LIST_INIT((name).folders),                             \
             .list = LIST_INIT((name).list)}

  /*************** the actual code ***************/

  record_s records = RECORD_INIT(records);
  record_s *pos, *item;
  list_s *tmp;
  DIR *dir;
  struct dirent *entry;

  /* initialize the root folder record and add it to the queue */
  pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
  *pos = RECORD_INIT(*pos);
  pos->len = strlen(argv[1]);
  memcpy(pos->name, argv[1], pos->len);
  if (pos->name[pos->len - 1] != '/')
    pos->name[pos->len++] = '/';
  pos->name[pos->len] = 0;
  /* push to queue, but also push to list (first item processed) */
  LIST_PUSH(&records.queue, &pos->queue);
  LIST_PUSH(&records.list, &pos->list);

  /* as long as the queue has items to be processed, do so */
  while (records.queue.next != &records.queue) {
    /* pop queued item */
    LIST_POP(&records.queue, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, queue);
    /* add record to the processed folder list */
    LIST_PUSH(&records.folders, &pos->folders);

    /* process the folder and add all folder data to current list */
    dir = opendir(pos->name);
    if (!dir)
      continue;

    while ((entry = readdir(dir)) != NULL) {

      /* create new item, copying it's path data and unique ID */
      item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
      *item = RECORD_INIT(*item);
      item->len = pos->len + entry->d_namlen;
      memcpy(item->name, pos->name, pos->len);
      memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
      item->name[item->len] = 0;
      item->ino = entry->d_ino;
      /* add item to the list, right after the `pos` item */
      LIST_PUSH(&pos->list, &item->list);

      /* unless it's a folder, we're done. */
      if (entry->d_type != DT_DIR)
        continue;

      /* test for '.' and '..' */
      if (entry->d_name[0] == '.' &&
          (entry->d_name[1] == 0 ||
           (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
        continue;

      /* add folder marker */
      item->name[item->len++] = '/';
      item->name[item->len] = 0;

      /* test for cyclic processing */
      list_s *t = records.folders.next;
      while (t != &records.folders) {
        if (NODE2RECORD(t, folders)->ino == item->ino) {
          /* we already processed this folder! */
          break; /* this breaks from the small loop... */
        }
        t = t->next;
      }
      if (t != &records.folders)
        continue; /* if we broke from the small loop, entry is done */

      /* item is a new folder, add to queue */
      LIST_PUSH(&records.queue, &item->queue);
    }
    closedir(dir);
  }

  /*************** Printing the results and cleaning up ***************/
  while (records.list.next != &records.list) {
    /* pop list item */
    LIST_POP(&records.list, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, list);
    /* prepare for next iteration */
    LIST_POP(&records.list, tmp);
    fwrite(pos->name, pos->len, 1, stderr);
    fwrite("\n", 1, 1, stderr);
    free(pos);
  }
  return 0;
}

修改

@Stargateur在评论中提到递归代码可能会在达到打开文件限制之前溢出堆栈。

虽然我不知道堆栈溢出是如何更好的,但只要进程在调用时没有接近文件限制,这个评估可能是正确的。

@Stargateur在评论中提到的另一点是,递归代码的深度受到子目录的最大数量(ext4文件系统上的64000)的限制,并且硬链接极不可能(因为硬链接到文件夹)在Linux / Unix上是不允许的。)

如果代码在Linux上运行(根据问题,这是一个好消息),所以这个问题并不是真正令人担忧的问题(除非在macOS或Windows上运行代码)。虽然递归中的64K子文件夹可能会使堆栈大开。

话虽如此,无递归选项仍然具有优势,例如能够轻松地为处理的项目数量添加限制以及能够缓存结果。

<强> P.S。

根据评论,这里是一个非递归版本的代码,它不会检查循环层次结构。它的速度更快,应该足够安全,可以在不允许使用文件夹硬链接的Linux机器上使用。

#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

int main(int argc, char const *argv[]) {
  /* print use instruction unless a folder name was given */
  if (argc < 2)
    fprintf(stderr,
            "\nuse:\n"
            "    %s <directory>\n"
            "for example:\n"
            "    %s ./\n\n",
            argv[0], argv[0]),
        exit(0);

  /*************** a small linked list macro implementation ***************/

  typedef struct list_s {
    struct list_s *next;
    struct list_s *prev;
  } list_s;

#define LIST_INIT(name)                                                        \
  { .next = &name, .prev = &name }

#define LIST_PUSH(dest, node)                                                  \
  do {                                                                         \
    (node)->next = (dest)->next;                                               \
    (node)->prev = (dest);                                                     \
    (node)->next->prev = (node);                                               \
    (dest)->next = (node);                                                     \
  } while (0);

#define LIST_POP(list, var)                                                    \
  if ((list)->next == (list)) {                                                \
    var = NULL;                                                                \
  } else {                                                                     \
    var = (list)->next;                                                        \
    (list)->next = var->next;                                                  \
    var->next->prev = var->prev;                                               \
  }

  /*************** a record (file / folder) item type ***************/

  typedef struct record_s {
    /* this is a flat processing queue. */
    list_s queue;
    /* this will list all the completed items (siblings and such) */
    list_s list;
    /* unique ID */
    ino_t ino;
    /* name length */
    size_t len;
    /* name string */
    char name[];
  } record_s;

/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name)                                           \
  ((record_s *)(((uintptr_t)(node)) -                                          \
                ((uintptr_t) & ((record_s *)0)->list_name)))

/* initializes a new record */
#define RECORD_INIT(name)                                                      \
  (record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)}

  /*************** the actual code ***************/

  record_s records = RECORD_INIT(records);
  record_s *pos, *item;
  list_s *tmp;
  DIR *dir;
  struct dirent *entry;

  /* initialize the root folder record and add it to the queue */
  pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
  *pos = RECORD_INIT(*pos);
  pos->len = strlen(argv[1]);
  memcpy(pos->name, argv[1], pos->len);
  if (pos->name[pos->len - 1] != '/')
    pos->name[pos->len++] = '/';
  pos->name[pos->len] = 0;
  /* push to queue, but also push to list (first item processed) */
  LIST_PUSH(&records.queue, &pos->queue);
  LIST_PUSH(&records.list, &pos->list);

  /* as long as the queue has items to be processed, do so */
  while (records.queue.next != &records.queue) {
    /* pop queued item */
    LIST_POP(&records.queue, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, queue);

    /* process the folder and add all folder data to current list */
    dir = opendir(pos->name);
    if (!dir)
      continue;

    while ((entry = readdir(dir)) != NULL) {

      /* create new item, copying it's path data and unique ID */
      item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
      *item = RECORD_INIT(*item);
      item->len = pos->len + entry->d_namlen;
      memcpy(item->name, pos->name, pos->len);
      memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
      item->name[item->len] = 0;
      item->ino = entry->d_ino;
      /* add item to the list, right after the `pos` item */
      LIST_PUSH(&pos->list, &item->list);

      /* unless it's a folder, we're done. */
      if (entry->d_type != DT_DIR)
        continue;

      /* test for '.' and '..' */
      if (entry->d_name[0] == '.' &&
          (entry->d_name[1] == 0 ||
           (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
        continue;

      /* add folder marker */
      item->name[item->len++] = '/';
      item->name[item->len] = 0;

      /* item is a new folder, add to queue */
      LIST_PUSH(&records.queue, &item->queue);
    }
    closedir(dir);
  }

  /*************** Printing the results and cleaning up ***************/
  while (records.list.next != &records.list) {
    /* pop list item */
    LIST_POP(&records.list, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, list);
    /* prepare for next iteration */
    LIST_POP(&records.list, tmp);
    fwrite(pos->name, pos->len, 1, stderr);
    fwrite("\n", 1, 1, stderr);
    free(pos);
  }
  return 0;
}

答案 4 :(得分:3)

这是一个使用递归但使用更少堆栈空间的简化版本:

#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>

void listdir(char *path, size_t size) {
    DIR *dir;
    struct dirent *entry;
    size_t len = strlen(path);

    if (!(dir = opendir(path))) {
        fprintf(stderr, "path not found: %s: %s\n",
                path, strerror(errno));
        return;
    }

    puts(path);
    while ((entry = readdir(dir)) != NULL) {
        char *name = entry->d_name;
        if (entry->d_type == DT_DIR) {
            if (!strcmp(name, ".") || !strcmp(name, ".."))
                continue;
            if (len + strlen(name) + 2 > size) {
                fprintf(stderr, "path too long: %s/%s\n", path, name);
            } else {
                path[len] = '/';
                strcpy(path + len + 1, name);
                listdir(path, size);
                path[len] = '\0';
            }
        } else {
            printf("%s/%s\n", path, name);
        }
    }
    closedir(dir);
}

int main(void) {
    char path[1024] = ".";
    listdir(path, sizeof path);
    return 0;
}

在我的系统上,其输出与find .

的输出完全相同