我有一个名为ArrayA的char数组来自某些用户输入,其中可能包含转义序列字符。我想逐个字符地将ArrayA复制到ArrayB中。有了这个说我怎么会将ArrayA中的转义字符表示为ArrayB?如果我打印ArrayB正确,简单地复制字符实际上不会将\ t转换为选项卡?它只是将\ t作为一个字符而不是正确的?我可以在\ t转换序列中向ArrayB输入0x09,以便在打印ArrayB时实际打印阵列中特定位置的选项卡吗?
答案 0 :(得分:1)
cstrlit_chr()
这个怎么样:
/* Convert C Character Literal in (str..end] (excluding surrounding quotes) */
/* to character, returning converted char or -1 if string is invalid. */
/* Convert string containing C character literal to character value */
/* Returns -1 if character literal is invalid, otherwise 0x00..0xFF */
/* Does not support extension \E for ESC \033. */
/* Does not support any extension for DEL \177. */
/* Does not support control-char notation ^A for CTRL-A \001. */
/* Accepts \z as valid z when z is not otherwise special. */
/* Accepts \038 as valid CTRL-C \003; next character starts with the 8. */
/* Accepts \x3Z as valid CTRL-C \003; next character starts with the Z. */
/* Treats invalid octal escape \8 or \9 as 8 or 9 */
int cstrlit_chr(const char *str, const char *end, char const ** const eptr)
{
unsigned char u;
int rv;
if (str >= end)
rv = -1; /* String contains no data */
else if ((u = *str++) != '\\')
rv = u;
else if (str == end)
rv = -1; /* Just a backslash - invalid */
else if ((u = *str++) == 'x')
{
/**
** Hex character constant - \xHH or \xH, where H is a hex digit.
** Technically, can be \xHHH too, if CHAR_BIT > 8; this nicety
** is being studiously ignored.
*/
int x1;
int x2;
if (str == end)
rv = -1;
else if ((x1 = basedigit(*str++, 16)) < 0)
{
rv = -1; /* Invalid hex constant */
str--;
}
else if (str == end)
rv = x1; /* Single digit hex constant */
else if ((x2 = basedigit(*str++, 16)) < 0)
{
rv = x1; /* Single-digit hex constant */
str--;
}
else
rv = (x1 << 4) | x2; /* Double-digit hex constant */
}
else if (isdigit(u))
{
/**
** Octal character constant - \O or \OO or \OOO, where O is an
** octal digit. Technically, the constant extends for an
** indefinite number of octal digits; this nicety is being
** studiously ignored. Treat \8 as 8 and \9 as 9.
*/
int o1;
int o2;
int o3;
if ((o1 = basedigit(u, 8)) < 0)
rv = u; /* Invalid octal constant (\8 or \9) */
else if (str == end)
rv = o1; /* Single-digit octal constant */
else if ((o2 = basedigit(*str++, 8)) < 0)
{
rv = o1; /* Single-digit octal constant */
str--;
}
else if (str == end)
rv = (o1 << 3) | o2; /* Double-digit octal constant */
else if ((o3 = basedigit(*str++, 8)) < 0)
{
rv = (o1 << 3) | o2; /* Double-digit octal constant */
str--;
}
else if (o1 >= 4)
rv = -1; /* Out of range 0x00..0xFF (\000..\377) */
else
rv = (((o1 << 3) | o2) << 3) | o3;
}
else
{
/* Presumably \a, \b, \f, \n, \r, \t, \v, \', \", \? or \\ - or an error */
switch (u)
{
case 'a':
rv = '\a';
break;
case 'b':
rv = '\b';
break;
case 'f':
rv = '\f';
break;
case 'n':
rv = '\n';
break;
case 'r':
rv = '\r';
break;
case 't':
rv = '\t';
break;
case 'v':
rv = '\v';
break;
case '\"':
rv = '\"';
break;
case '\'':
rv = '\'';
break;
case '\?':
rv = '\?';
break;
case '\\':
rv = '\\';
break;
case '\0': /* Malformed: solitary backslash followed by NUL */
rv = -1;
break;
default:
rv = u; /* Nominally invalid: \X but X not special; return X. */
break;
}
}
if (eptr != 0)
*eptr = str;
return(rv);
}
它处理C89字符序列;它不处理Unicode(通用)字符(\uXXXX
或\U00XXXXXX
)。
basedigit()
/*
** Convert character to digit in given base,
** returning -1 for invalid bases and characters.
*/
int basedigit(char c, int base)
{
int i;
#if (('z' - 'a') != 25 || ('Z' - 'A') != 25)
#error Faulty Assumption
This code assumes the code set is ASCII, ISO 646, ISO 8859, or something similar.
#endif /* Alphabet test */
if (base < 2 || base > 36)
i = -1;
else if (c >= '0' && c <= '9')
i = c - '0';
else if (c >= 'A' && c <= 'Z')
i = c - 'A' + 10;
else if (c >= 'a' && c <= 'z')
i = c - 'a' + 10;
else
i = -1;
return((i < base) ? i : -1);
}
/* Sample usage */
#include <stdio.h>
#include <string.h>
int main(void)
{
const char data[] = "ab\\xFF\\03\\7\\377\\t\\?\\'\\\\yz";
const char *end = data + strlen(data);
const char *start = data;
const char *next;
int c;
while ((c = cstrlit_chr(start, end, &next)) != -1)
{
char buffer[20];
snprintf(buffer, sizeof(buffer), "[[%.*s]]", (int)(next-start), start);
printf("%3d (0x%.2X) %-10s - [[%s]]\n", c, c & 0xFF, buffer, next);
start = next;
}
return 0;
}
请注意,扫描的范围由指向开始字符的指针标识,结束由范围结束后指向字符的指针标识(在示例中,结尾处的'\0'
string,但该函数适用于任意数据,并且不要求它以空值终止。输入字符串在源代码中使用反斜杠加倍,以便实际字符串包含单个反斜杠。
如果c == -1
,转换失败。否则,c
包含该字符,end
是指向转换完成位置的指针。