我想在当前不存在的文件的文件名中解析任何"."
和".."
引用。
因此,我想做的事情与realpath(3)
类似,但以下情况除外:
是否存在可以用来执行此操作的现有库代码,或者我必须编写新代码?
因此,本质上是:
a/../b/c.txt
将成为b/c.txt
a/../../b/c.txt
将成为../b/c.txt
a/./b/./c.txt
将成为a/b/c.txt
万一有人想知道,出于安全性考虑,我不是这么做的:我正在编写一个工具,该工具无论如何都可以执行任意的shell命令,因此安全性不是问题。我只需要一种对路径进行规范化表示的方法即可。
答案 0 :(得分:1)
这是我30多年前首次编写的代码。您要使用的代码有两种变体。
该代码在Stack Overflow所使用的CC-by-SA 3.0许可下可用-您可以带署名使用该代码。
/*
@(#)File: $RCSfile: clnpath.c,v $
@(#)Version: $Revision: 2.19 $
@(#)Last changed: $Date: 2017/03/26 06:32:49 $
@(#)Purpose: Clean up pathname (lexical analysis only)
@(#)Author: J Leffler
@(#)Copyright: (C) JLSS 1987-2017
*/
/*TABSTOP=4*/
#include "clnpath.h"
#include "jlss.h"
#include "tokenise.h"
#include <string.h>
#define MAX_PATH_ELEMENTS 64 /* Number of levels of directory */
#define strequal(a,b) (strcmp((a),(b)) == 0)
#define DIM(x) (sizeof(x)/sizeof(*(x)))
#if !defined(lint)
/* Prevent over-aggressive optimizers from eliminating ID string */
extern const char jlss_id_clnpath_c[];
const char jlss_id_clnpath_c[] = "@(#)$Id: clnpath.c,v 2.19 2017/03/26 06:32:49 jleffler Exp $";
#endif /* lint */
void clnpath(char *path)
{
char *src;
char *dst;
char c;
int slash = 0;
/* Convert multiple adjacent slashes to single slash */
src = dst = path;
while ((c = *dst++ = *src++) != '\0')
{
if (c == '/')
{
slash = 1;
while (*src == '/')
src++;
}
}
if (slash == 0)
return;
/* Remove "./" from "./xxx" but leave "./" alone. */
/* Remove "/." from "xxx/." but reduce "/." to "/". */
/* Reduce "xxx/./yyy" to "xxx/yyy" */
src = dst = (*path == '/') ? path + 1 : path;
while (src[0] == '.' && src[1] == '/' && src[2] != '\0')
src += 2;
while ((c = *dst++ = *src++) != '\0')
{
if (c == '/' && src[0] == '.' && (src[1] == '\0' || src[1] == '/'))
{
src++;
dst--;
}
}
if (path[0] == '/' && path[1] == '.' &&
(path[2] == '\0' || (path[2] == '/' && path[3] == '\0')))
path[1] = '\0';
/* Remove trailing slash, if any. There is at most one! */
/* dst is pointing one beyond terminating null */
if ((dst -= 2) > path && *dst == '/')
*dst++ = '\0';
}
/*
** clnpath2() is not part of the basic clnpath() function because it can
** change the meaning of a path name if there are symbolic links on the
** system. For example, suppose /usr/tmp is a symbolic link to /var/tmp.
** If the user supplies /usr/tmp/../abcdef as the directory name, clnpath
** would transform that to /usr/abcdef, not to /var/abcdef which is what
** the kernel would interpret it as.
*/
void clnpath2(char *path)
{
char *token[MAX_PATH_ELEMENTS];
int ntok;
clnpath(path);
/* Reduce "<name>/.." to "/" */
if ((ntok = tokenise(path, "/", token, MAX_PATH_ELEMENTS, 0)) > 1)
{
for (int i = 0; i < ntok - 1; i++)
{
if (!strequal(token[i], "..") && strequal(token[i + 1], ".."))
{
if (*token[i] == '\0')
continue;
while (i < ntok - 1)
{
token[i] = token[i + 2];
i++;
}
ntok -= 2;
i = -1; /* Restart enclosing for loop */
}
}
}
/* Reassemble string */
char *dst = path;
if (ntok == 0)
{
*dst++ = '.';
*dst = '\0';
}
else
{
if (token[0][0] == '\0')
{
int i;
for (i = 1; i < ntok && strequal(token[i], ".."); i++)
;
if (i > 1)
{
int j;
for (j = 1; i < ntok; i++)
token[j++] = token[i];
ntok = j;
}
}
if (ntok == 1 && token[0][0] == '\0')
{
*dst++ = '/';
*dst = '\0';
}
else
{
for (int i = 0; i < ntok; i++)
{
char *src = token[i];
while ((*dst++ = *src++) != '\0')
;
*(dst - 1) = '/';
}
*(dst - 1) = '\0';
}
}
}
#if defined(TEST)
#include <stdio.h>
#include "phasedtest.h"
/* -- PHASE 1 TESTING -- */
/* -- Phase 1 - Testing clnpath() -- */
typedef struct p1_test_case
{
const char *input;
const char *output;
} p1_test_case;
/* This stress tests the cleaning, concentrating on the boundaries. */
static const p1_test_case p1_tests[] =
{
{ "/", "/", },
{ "//", "/", },
{ "///", "/", },
{ "/.", "/", },
{ "/./", "/", },
{ "/./.", "/", },
{ "/././.profile", "/.profile", },
{ "./", ".", },
{ "./.", ".", },
{ "././", ".", },
{ "./././.profile", ".profile", },
{ "abc/.", "abc", },
{ "abc/./def", "abc/def", },
{ "./abc", "abc", },
{ "//abcd///./abcd////", "/abcd/abcd", },
{ "//abcd///././../defg///ddd//.", "/abcd/../defg/ddd", },
{ "/abcd/./../././defg/./././ddd", "/abcd/../defg/ddd", },
{ "//abcd//././../defg///ddd//.///", "/abcd/../defg/ddd", },
/* Most of these are minimal interest in phase 1 */
{ "/usr/tmp/clnpath.c", "/usr/tmp/clnpath.c", },
{ "/usr/tmp/", "/usr/tmp", },
{ "/bin/..", "/bin/..", },
{ "bin/..", "bin/..", },
{ "/bin/.", "/bin", },
{ "sub/directory", "sub/directory", },
{ "sub/directory/file", "sub/directory/file", },
{ "/part1/part2/../.././../", "/part1/part2/../../..", },
{ "/.././../usr//.//bin/./cc", "/../../usr/bin/cc", },
};
static void p1_tester(const void *data)
{
const p1_test_case *test = (const p1_test_case *)data;
char buffer[256];
strcpy(buffer, test->input);
clnpath(buffer);
if (strcmp(buffer, test->output) == 0)
pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
else
{
pt_fail("<<%s>> - unexpected output from clnpath()\n", test->input);
pt_info("Wanted <<%s>>\n", test->output);
pt_info("Actual <<%s>>\n", buffer);
}
}
/* -- PHASE 2 TESTING -- */
/* -- Phase 2 - Testing clnpath2() -- */
typedef struct p2_test_case
{
const char *input;
const char *output;
} p2_test_case;
static const p2_test_case p2_tests[] =
{
{ "/abcd/../defg/ddd", "/defg/ddd" },
{ "/bin/..", "/" },
{ "bin/..", "." },
{ "/usr/bin/..", "/usr" },
{ "/usr/bin/../..", "/" },
{ "usr/bin/../..", "." },
{ "../part/of/../the/way", "../part/the/way" },
{ "/../part/of/../the/way", "/part/the/way" },
{ "part1/part2/../../part3", "part3" },
{ "part1/part2/../../../part3", "../part3" },
{ "/part1/part2/../../../part3", "/part3" },
{ "/part1/part2/../../../", "/" },
{ "/../../usr/bin/cc", "/usr/bin/cc" },
{ "../../usr/bin/cc", "../../usr/bin/cc" },
{ "part1/./part2/../../part3", "part3" },
{ "./part1/part2/../../../part3", "../part3" },
{ "/part1/part2/.././../../part3", "/part3" },
{ "/part1/part2/../.././../", "/" },
{ "/.././..//./usr///bin/cc/", "/usr/bin/cc" },
};
static void p2_tester(const void *data)
{
const p2_test_case *test = (const p2_test_case *)data;
char buffer[256];
strcpy(buffer, test->input);
clnpath2(buffer);
if (strcmp(buffer, test->output) == 0)
pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
else
{
pt_fail("<<%s>> - unexpected output from clnpath2()\n", test->input);
pt_info("Wanted <<%s>>\n", test->output);
pt_info("Actual <<%s>>\n", buffer);
}
}
/* -- Phased Test Infrastructure -- */
static pt_auto_phase phases[] =
{
{ p1_tester, PT_ARRAYINFO(p1_tests), 0, "Phase 1 - Testing clnpath()" },
{ p2_tester, PT_ARRAYINFO(p2_tests), 0, "Phase 2 - Testing clnpath2()" },
};
int main(int argc, char **argv)
{
#if 0
/* Interactive testing */
printf("Enter pathname: ");
while (fgets(buffer, sizeof(buffer), stdin) != NULL)
{
buffer[strlen(buffer) - 1] = '\0'; /* Zap newline */
printf("Unclean: <<%s>>\n", buffer);
clnpath(buffer);
printf("Clean 1: <<%s>>\n", buffer);
clnpath2(buffer);
printf("Clean 2: <<%s>>\n", buffer);
printf("Enter pathname: ");
}
putchar('\n');
#endif /* 0 */
return(pt_auto_harness(argc, argv, phases, DIM(phases)));
}
#endif /* TEST */
第二个变体使用上面的源代码中未包含的函数tokenise()
。如果需要,可以将其设置为可用。
该代码包含测试示例,尽管它使用了一个名为“ Phased Test”的库,但由于一些打包问题需要解决,所以我尚未正式发布该库。如果认为合适,可以在短时间内提供。