好奇这是否可以简化......
internal static IEnumerable<string> Split(string str, char sep = ',')
{
int lastIndex = 0;
bool quoted = false;
bool escaped = false;
bool bracketed = false;
char lastQuote = '\0';
for (int i = 0; i < str.Length; ++i)
{
if (str[i] == '[')
{
if (!quoted && !escaped)
bracketed = true;
escaped = false;
}
else if (str[i] == ']')
{
if (!quoted && !escaped)
bracketed = false;
escaped = false;
}
else if (str[i] == '\\')
{
escaped = !escaped;
}
else if (str[i] == '"' || str[i] == '\'')
{
if (!escaped)
{
if (quoted)
{
if (lastQuote == str[i])
quoted = false;
}
else
{
quoted = true;
lastQuote = str[i];
}
}
escaped = false;
}
else if (str[i] == sep)
{
if (!quoted && !escaped && !bracketed)
{
yield return str.Substring(lastIndex, i - lastIndex);
lastIndex = i + 1;
}
escaped = false;
}
else
{
escaped = false;
}
}
yield return str.Substring(lastIndex);
}
写这个方法来分割不在[]
内的逗号,没有引用,也不会被转义。这本质上是一个棘手的问题,还是我采取了愚蠢的方法?
输入:
foreach(var sel in SharpQuery.SplitCommas("\"comma, in quotes\", comma[in,brackets], comma[in \"quotes, and brackets\"], \"woah, 'nelly,' \\\"now you,re [talking, crazy\\\"\"")) {
Console.WriteLine(sel);
}
预期产出:
"comma, in quotes"
comma[in,brackets]
comma[in "quotes, and brackets"]
"woah, 'nelly,' \"now you,re [talking, crazy\""
答案 0 :(得分:6)
保持自动机状态的一些尴尬选择。在这种情况下,我会使用单个变量或堆栈。因此,您当前的状态始终为stateStack.Peek()
。易于阅读。易于处理嵌套状态。
编辑:这是一个快速的样本。我相信你可以扩展它以添加错误处理和规则细节。
enum ParserState
{
Text,
Bracketed,
Quoted,
EscapChar,
}
internal static IEnumerable<string> Split(string str, char sep)
{
int lastIdx = 0;
char c;
ParserState s;
Stack<ParserState> state = new Stack<ParserState>();
state.Push(ParserState.Text);
for (int i = 0; i < str.Length; i++)
{
c = str[i];
s = state.Peek();
if (s == ParserState.EscapChar
|| (s == ParserState.Bracketed && c == ']')
|| (s == ParserState.Quoted && c == '"'))
{
state.Pop();
}
else if (c == '[')
state.Push(ParserState.Bracketed);
else if (c == '"')
state.Push(ParserState.Quoted);
else if (c == '\\')
state.Push(ParserState.EscapChar);
else if (s == ParserState.Text && c == sep)
{
yield return str.Substring(lastIdx, i - lastIdx);
lastIdx = i + 1;
}
}
yield return str.Substring(lastIdx);
}