有没有办法规范化不存在文件的文件路径?

时间:2019-08-08 17:50:18

标签: c file path posix filepath

我想在当前不存在的文件的文件名中解析任何"."".."引用。

因此,我想做的事情与realpath(3)类似,但以下情况除外:

  • 文件名可能是不存在的文件
  • 不能解析符号链接(主要是因为路径组件可能不存在)

是否存在可以用来执行此操作的现有库代码,或者我必须编写新代码?

因此,本质上是:

  • a/../b/c.txt将成为b/c.txt
  • a/../../b/c.txt将成为../b/c.txt
  • a/./b/./c.txt将成为a/b/c.txt

万一有人想知道,出于安全性考虑,我不是这么做的:我正在编写一个工具,该工具无论如何都可以执行任意的shell命令,因此安全性不是问题。我只需要一种对路径进行规范化表示的方法即可。

1 个答案:

答案 0 :(得分:1)

这是我30多年前首次编写的代码。您要使用的代码有两种变体。

该代码在Stack Overflow所使用的CC-by-SA 3.0许可下可用-您可以带署名使用该代码。

/*
@(#)File:           $RCSfile: clnpath.c,v $
@(#)Version:        $Revision: 2.19 $
@(#)Last changed:   $Date: 2017/03/26 06:32:49 $
@(#)Purpose:        Clean up pathname (lexical analysis only)
@(#)Author:         J Leffler
@(#)Copyright:      (C) JLSS 1987-2017
*/

/*TABSTOP=4*/

#include "clnpath.h"

#include "jlss.h"
#include "tokenise.h"
#include <string.h>

#define MAX_PATH_ELEMENTS   64  /* Number of levels of directory */
#define strequal(a,b)       (strcmp((a),(b)) == 0)
#define DIM(x)              (sizeof(x)/sizeof(*(x)))

#if !defined(lint)
/* Prevent over-aggressive optimizers from eliminating ID string */
extern const char jlss_id_clnpath_c[];
const char jlss_id_clnpath_c[] = "@(#)$Id: clnpath.c,v 2.19 2017/03/26 06:32:49 jleffler Exp $";
#endif /* lint */

void clnpath(char *path)
{
    char           *src;
    char           *dst;
    char            c;
    int             slash = 0;

    /* Convert multiple adjacent slashes to single slash */
    src = dst = path;
    while ((c = *dst++ = *src++) != '\0')
    {
        if (c == '/')
        {
            slash = 1;
            while (*src == '/')
                src++;
        }
    }

    if (slash == 0)
        return;

    /* Remove "./" from "./xxx" but leave "./" alone. */
    /* Remove "/." from "xxx/." but reduce "/." to "/". */
    /* Reduce "xxx/./yyy" to "xxx/yyy" */
    src = dst = (*path == '/') ? path + 1 : path;
    while (src[0] == '.' && src[1] == '/' && src[2] != '\0')
        src += 2;
    while ((c = *dst++ = *src++) != '\0')
    {
        if (c == '/' && src[0] == '.' && (src[1] == '\0' || src[1] == '/'))
        {
            src++;
            dst--;
        }
    }
    if (path[0] == '/' && path[1] == '.' &&
        (path[2] == '\0' || (path[2] == '/' && path[3] == '\0')))
        path[1] = '\0';

    /* Remove trailing slash, if any.  There is at most one! */
    /* dst is pointing one beyond terminating null */
    if ((dst -= 2) > path && *dst == '/')
        *dst++ = '\0';
}

/*
** clnpath2() is not part of the basic clnpath() function because it can
** change the meaning of a path name if there are symbolic links on the
** system.  For example, suppose /usr/tmp is a symbolic link to /var/tmp.
** If the user supplies /usr/tmp/../abcdef as the directory name, clnpath
** would transform that to /usr/abcdef, not to /var/abcdef which is what
** the kernel would interpret it as.
*/

void clnpath2(char *path)
{
    char *token[MAX_PATH_ELEMENTS];
    int   ntok;

    clnpath(path);

    /* Reduce "<name>/.." to "/" */
    if ((ntok = tokenise(path, "/", token, MAX_PATH_ELEMENTS, 0)) > 1)
    {
        for (int i = 0; i < ntok - 1; i++)
        {
            if (!strequal(token[i], "..") && strequal(token[i + 1], ".."))
            {
                if (*token[i] == '\0')
                    continue;
                while (i < ntok - 1)
                {
                    token[i] = token[i + 2];
                    i++;
                }
                ntok -= 2;
                i = -1;     /* Restart enclosing for loop */
            }
        }
    }

    /* Reassemble string */
    char *dst = path;
    if (ntok == 0)
    {
        *dst++ = '.';
        *dst = '\0';
    }
    else
    {
        if (token[0][0] == '\0')
        {
            int   i;
            for (i = 1; i < ntok && strequal(token[i], ".."); i++)
                ;
            if (i > 1)
            {
                int j;
                for (j = 1; i < ntok; i++)
                    token[j++] = token[i];
                ntok = j;
            }
        }
        if (ntok == 1 && token[0][0] == '\0')
        {
            *dst++ = '/';
            *dst = '\0';
        }
        else
        {
            for (int i = 0; i < ntok; i++)
            {
                char *src = token[i];
                while ((*dst++ = *src++) != '\0')
                    ;
                *(dst - 1) = '/';
            }
            *(dst - 1) = '\0';
        }
    }
}

#if defined(TEST)

#include <stdio.h>

#include "phasedtest.h"

/* -- PHASE 1 TESTING -- */

/* -- Phase 1 - Testing clnpath() -- */
typedef struct p1_test_case
{
    const char *input;
    const char *output;
} p1_test_case;

/* This stress tests the cleaning, concentrating on the boundaries. */
static const p1_test_case p1_tests[] =
{
    { "/",                                  "/",            },
    { "//",                                 "/",            },
    { "///",                                "/",            },
    { "/.",                                 "/",            },
    { "/./",                                "/",            },
    { "/./.",                               "/",            },
    { "/././.profile",                      "/.profile",    },
    { "./",                                 ".",            },
    { "./.",                                ".",            },
    { "././",                               ".",            },
    { "./././.profile",                     ".profile",     },
    { "abc/.",                              "abc",          },
    { "abc/./def",                          "abc/def",      },
    { "./abc",                              "abc",          },

    { "//abcd///./abcd////",                "/abcd/abcd",                   },
    { "//abcd///././../defg///ddd//.",      "/abcd/../defg/ddd",            },
    { "/abcd/./../././defg/./././ddd",      "/abcd/../defg/ddd",            },
    { "//abcd//././../defg///ddd//.///",    "/abcd/../defg/ddd",            },

    /* Most of these are minimal interest in phase 1 */
    { "/usr/tmp/clnpath.c",                 "/usr/tmp/clnpath.c",           },
    { "/usr/tmp/",                          "/usr/tmp",                     },
    { "/bin/..",                            "/bin/..",                      },
    { "bin/..",                             "bin/..",                       },
    { "/bin/.",                             "/bin",                         },
    { "sub/directory",                      "sub/directory",                },
    { "sub/directory/file",                 "sub/directory/file",           },
    { "/part1/part2/../.././../",           "/part1/part2/../../..",        },
    { "/.././../usr//.//bin/./cc",          "/../../usr/bin/cc",            },
};

static void p1_tester(const void *data)
{
    const p1_test_case *test = (const p1_test_case *)data;
    char  buffer[256];

    strcpy(buffer, test->input);
    clnpath(buffer);
    if (strcmp(buffer, test->output) == 0)
        pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
    else
    {
        pt_fail("<<%s>> - unexpected output from clnpath()\n", test->input);
        pt_info("Wanted <<%s>>\n", test->output);
        pt_info("Actual <<%s>>\n", buffer);
    }
}

/* -- PHASE 2 TESTING -- */

/* -- Phase 2 - Testing clnpath2() -- */
typedef struct p2_test_case
{
    const char *input;
    const char *output;
} p2_test_case;

static const p2_test_case p2_tests[] =
{
    { "/abcd/../defg/ddd",              "/defg/ddd"         },
    { "/bin/..",                        "/"                 },
    { "bin/..",                         "."                 },
    { "/usr/bin/..",                    "/usr"              },
    { "/usr/bin/../..",                 "/"                 },
    { "usr/bin/../..",                  "."                 },
    { "../part/of/../the/way",          "../part/the/way"   },
    { "/../part/of/../the/way",         "/part/the/way"     },
    { "part1/part2/../../part3",        "part3"             },
    { "part1/part2/../../../part3",     "../part3"          },
    { "/part1/part2/../../../part3",    "/part3"            },
    { "/part1/part2/../../../",         "/"                 },
    { "/../../usr/bin/cc",              "/usr/bin/cc"       },
    { "../../usr/bin/cc",               "../../usr/bin/cc"  },
    { "part1/./part2/../../part3",      "part3"             },
    { "./part1/part2/../../../part3",   "../part3"          },
    { "/part1/part2/.././../../part3",  "/part3"            },
    { "/part1/part2/../.././../",       "/"                 },
    { "/.././..//./usr///bin/cc/",      "/usr/bin/cc"       },
};

static void p2_tester(const void *data)
{
    const p2_test_case *test = (const p2_test_case *)data;
    char  buffer[256];

    strcpy(buffer, test->input);
    clnpath2(buffer);
    if (strcmp(buffer, test->output) == 0)
        pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
    else
    {
        pt_fail("<<%s>> - unexpected output from clnpath2()\n", test->input);
        pt_info("Wanted <<%s>>\n", test->output);
        pt_info("Actual <<%s>>\n", buffer);
    }
}

/* -- Phased Test Infrastructure -- */

static pt_auto_phase phases[] =
{
    { p1_tester, PT_ARRAYINFO(p1_tests), 0, "Phase 1 - Testing clnpath()" },
    { p2_tester, PT_ARRAYINFO(p2_tests), 0, "Phase 2 - Testing clnpath2()" },
};

int main(int argc, char **argv)
{

#if 0
    /* Interactive testing */
    printf("Enter pathname: ");
    while (fgets(buffer, sizeof(buffer), stdin) != NULL)
    {
        buffer[strlen(buffer) - 1] = '\0';  /* Zap newline */
        printf("Unclean: <<%s>>\n", buffer);
        clnpath(buffer);
        printf("Clean 1: <<%s>>\n", buffer);
        clnpath2(buffer);
        printf("Clean 2: <<%s>>\n", buffer);
        printf("Enter pathname: ");
    }
    putchar('\n');
#endif /* 0 */

    return(pt_auto_harness(argc, argv, phases, DIM(phases)));
}

#endif /* TEST */

第二个变体使用上面的源代码中未包含的函数tokenise()。如果需要,可以将其设置为可用。

该代码包含测试示例,尽管它使用了一个名为“ Phased Test”的库,但由于一些打包问题需要解决,所以我尚未正式发布该库。如果认为合适,可以在短时间内提供。