我正在寻找一种快速方式(在C#中)来确定字符串是否是有效的变量名。我的第一个直觉是鞭打一些正则表达式,但我想知道是否有更好的方法来做到这一点。就像某种秘密方法隐藏在一个名为IsThisAValidVariableName(字符串名称)的地方,或者其他一些光滑的方法,这样做不容易因为缺乏正则表达式而产生的错误。
答案 0 :(得分:46)
试试这个:
// using System.CodeDom.Compiler;
CodeDomProvider provider = CodeDomProvider.CreateProvider("C#");
if (provider.IsValidIdentifier (YOUR_VARIABLE_NAME)) {
// Valid
} else {
// Not valid
}
答案 1 :(得分:3)
public static bool IsIdentifier(string text)
{
if (string.IsNullOrEmpty(text))
return false;
if (!char.IsLetter(text[0]) && text[0] != '_')
return false;
for (int ix = 1; ix < text.Length; ++ix)
if (!char.IsLetterOrDigit(text[ix]) && text[ix] != '_')
return false;
return true;
}
答案 2 :(得分:2)
@
字符周围有一些特殊情况很容易忘记检查 - 即'@'
本身不是有效的标识符,"@1foo"
也不是。{1}}。要捕获这些字符串,您可以先检查字符串是否为关键字,然后从字符串的开头删除@
,然后检查剩下的字符是否为有效标识符(不允许@
字符)。
这里我将它与一个解析标识符中的Unicode转义序列的方法相结合,并希望完成C#(5.0)Unicode字符检查。要使用它,首先调用TryParseRawIdentifier()
来处理关键字,转义序列,格式化字符(被删除)和逐字标识符。接下来,将结果传递给IsValidParsedIdentifier()
以检查第一个和后续字符是否有效。请注意,当且仅当C#认为标识符相同时,从TryParseRawIdentifier()
返回的字符串才相等。
public static class CSharpIdentifiers
{
private static HashSet<string> _keywords = new HashSet<string> {
"abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked",
"class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else",
"enum", "event", "explicit", "extern", "false", "finally", "fixed", "float", "for",
"foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", "is", "lock",
"long", "namespace", "new", "null", "object", "operator", "out", "override", "params",
"private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed",
"short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw",
"true", "try", "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using",
"virtual", "void", "volatile", "while"
};
public static IReadOnlyCollection<string> Keywords { get { return _keywords; } }
public static bool TryParseRawIdentifier(string str, out string parsed)
{
if (string.IsNullOrEmpty(str) || _keywords.Contains(str)) { parsed = null; return false; }
StringBuilder sb = new StringBuilder(str.Length);
int verbatimCharWidth = str[0] == '@' ? 1 : 0;
for (int i = verbatimCharWidth; i < str.Length; ) //Manual increment
{
char c = str[i];
if (c == '\\')
{
char next = str[i + 1];
int charCodeLength;
if (next == 'u') charCodeLength = 4;
else if (next == 'U') charCodeLength = 8;
else { parsed = null; return false; }
//No need to check for escaped backslashes or special sequences like \n,
//as they not valid identifier characters
int charCode;
if (!TryParseHex(str.Substring(i + 2, charCodeLength), out charCode)) { parsed = null; return false; }
sb.Append(char.ConvertFromUtf32(charCodeLength)); //Handle characters above 2^16 by converting them to a surrogate pair
i += 2 + charCodeLength;
}
else if (char.GetUnicodeCategory(str, i) == UnicodeCategory.Format)
{
//Use (string, index) in order to handle surrogate pairs
//Skip this character
if (char.IsSurrogatePair(str, i)) i += 2;
else i += 1;
}
else
{
sb.Append(c);
i++;
}
}
parsed = sb.ToString();
return true;
}
private static bool TryParseHex(string str, out int result)
{
return int.TryParse(str, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out result);
//NumberStyles.AllowHexSpecifier forces all characters to be hex digits
}
public static bool IsValidParsedIdentifier(string str)
{
if (string.IsNullOrEmpty(str)) return false;
if (!IsValidParsedIdentifierStart(str, 0)) return false;
int firstCharWidth = char.IsSurrogatePair(str, 0) ? 2 : 1;
for (int i = firstCharWidth; i < str.Length; ) //Manual increment
{
if (!IsValidParsedIdentifierPart(str, i)) return false;
if (char.IsSurrogatePair(str, i)) i += 2;
else i += 1;
}
return true;
}
//(String, index) pairs are used instead of chars in order to support surrogate pairs
//(Unicode code-points above 2^16 represented using two 16-bit characters)
public static bool IsValidParsedIdentifierStart(string s, int index)
{
return s[index] == '_' || char.IsLetter(s, index) || char.GetUnicodeCategory(s, index) == UnicodeCategory.LetterNumber;
}
public static bool IsValidParsedIdentifierPart(string s, int index)
{
if (s[index] == '_' || (s[index] >= '0' && s[index] <= '9') || char.IsLetter(s, index)) return true;
switch (char.GetUnicodeCategory(s, index))
{
case UnicodeCategory.LetterNumber: //Eg. Special Roman numeral characters (not covered by IsLetter())
case UnicodeCategory.DecimalDigitNumber: //Includes decimal digits in other cultures
case UnicodeCategory.ConnectorPunctuation:
case UnicodeCategory.NonSpacingMark:
case UnicodeCategory.SpacingCombiningMark:
//UnicodeCategory.Format handled in TryParseRawIdentifier()
return true;
default:
return false;
}
}
}
答案 3 :(得分:1)
更长的路,加上它慢得多,就是使用反射来迭代类/命名空间的成员,并通过检查反射的成员**。ToString()**是否与字符串输入相同来进行比较,这需要预先加载组件。
另一种方法(通过使用已经可用的Antlr扫描程序/解析器克服正则表达式使用的更长的方法)接近解析/删除C#代码然后扫描成员名称(即变量)和比较用作输入的字符串,例如,输入一个名为'fooBar'的字符串,然后指定源(例如汇编或C#代码)并通过分析专门查看成员的声明来扫描它,例如
private int fooBar;
是的,它很复杂,但是当你意识到编译器编写者正在做什么时会产生强大的理解,并且会将你对C#语言的了解提升到一个你对语法及其特性非常熟悉的程度。
答案 4 :(得分:0)
在 WPF 中,这可用于检查字符串是否为有效的变量名。但是它不能识别“ public”之类的保留字符串。
// works only in WPF!
public static bool CheckIfStringIsValidVarName(string stringToCheck)
{
if (string.IsNullOrWhiteSpace(stringToCheck))
return false;
TextBox textBox = new TextBox();
try
{
// stringToCheck == ""; // !!! does NOT throw !!!
// stringToCheck == "Name$"; // throws
// stringToCheck == "0"; // throws
// stringToCheck == "name with blank"; // throws
// stringToCheck == "public"; // does NOT throw
// stringToCheck == "ValidName";
textBox.Name = stringToCheck;
}
catch (ArgumentException ex)
{
return false;
}
return true;
}