Question

文件中有一个命令列表，如下所示：

command1 argument1 argument2
command2 argument3 argument4

结果应该看起来像

//Dictionary<command name,list of arguments>
Dictionary<string,List<string>>

当然，可以有任何数量的论据，而不仅仅是其中的两个。解析它是一块蛋糕。但问题是，可能存在多行争论。

command {some 
amount
of random text
} {and the second
argument} and_here_goes_argument_3

这是棘手的地方。我创建了一个带有if条件来解析这个文件的while循环，但它花了我200多行代码并且完全不可读。我打赌有更好的方法来做到这一点。当然，我不是要求你为我编写代码。我所需要的只是一种基本方法。至于语言 - 它可以是C＃或C ++。

Answer 1

显示使用正则表达式做多少痛苦：

string text = @"command1 argument1 argument2
command2 argument3 argument4
command {some 
amount
of random text
} {and the second
argument} and_here_goes_argument_3";

var rx = new Regex(@"^(?<command>(?:(?!\r|$)[^ ])*) +(?:(?<argument>{[^}]*}|(?!\r?$|{)(?:(?!\r|$)[^ ])+)(?: +\r?$?|\r?$))*", RegexOptions.Multiline | RegexOptions.ExplicitCapture);

var matches = rx.Matches(text);

foreach (Match match in matches)
{
    Console.WriteLine($"Command: {match.Groups["command"].Value}");

    foreach (Capture capture in match.Groups["argument"].Captures)
    {
        Console.WriteLine($" - arg: [{capture.Value}]");
    }

    Console.WriteLine();
}

问题在于此正则表达式不可读 和脆弱。尝试在x之后添加argument}，例如argument}x。处理格式错误的文本非常困难。

唯一有趣的部分是我使用RegexOptions.Multiline处理多行文字，而$与\n匹配但不匹配我手动处理的\r

矛盾的是，使用库的小语法可能是最简单的＃34;溶液...

好吧现在有些＆＃34;真实＆＃34;代码：

private static readonly string[] commandDelimiters = new[] { " ", "\r", "\n" };

// We don't want the { to be used inside arguments that aren't in the form {...}
// Note that at this time there is no way to "escape" the }
private static readonly string[] argumentDelimiters = new[] { " ", "\r", "\n", "{" };

public static IEnumerable<Tuple<string, string[]>> ParseCommands(string str)
{
    int ix = 0;
    int line = 0;
    int ixStartLine = 0;

    var args = new List<string>();

    while (ix < str.Length)
    {
        string command = ParseWord(str, ref ix, commandDelimiters);

        if (command.Length == 0)
        {
            throw new Exception($"No command, at line {line}, col {ix - ixStartLine}");
        }

        while (true)
        {
            SkipSpaces(str, ref ix);

            if (IsEOL(str, true, ref ix))
            {
                line++;
                ixStartLine = ix;
                break;
            }

            if (str[ix] == '{')
            {
                int ix2 = str.IndexOf('}', ix + 1);

                if (ix2 == -1)
                {
                    throw new Exception($"Unclosed {{ at line {line}, col {ix - ixStartLine}");
                }

                // Skipping the {
                ix++;

                // Skipping the }, because we don't do ix2 - ix -1
                string arg = str.Substring(ix, ix2 - ix);

                // We count the new lines "inside" the { }
                for (int i = 0; i < arg.Length; )
                {
                    if (IsEOL(arg, true, ref i))
                    {
                        line++;
                        ixStartLine = ix + i + 1;
                    }
                    else
                    {
                        i++;
                    }
                }

                // Skipping the }
                ix = ix2 + 1;

                // If there is no space of eol after the } then error
                if (ix < str.Length && str[ix] != ' ' && !IsEOL(str, false, ref ix))
                {
                    throw new Exception($"Unexpected character at line {line}, col {ix - ixStartLine}");
                }

                args.Add(arg);
            }
            else
            {
                string arg = ParseWord(str, ref ix, commandDelimiters);

                // If the terminator is {, then error.
                if (ix < str.Length && str[ix] == '{')
                {
                    throw new Exception($"Unexpected character at line {line}, col {ix - ixStartLine}");
                }

                args.Add(arg);
            }
        }

        var args2 = args.ToArray();
        args.Clear();

        yield return Tuple.Create(command, args2);
    }
}

// Stops at any of terminators, doesn't "consume" it advancing ix
public static string ParseWord(string str, ref int ix, string[] terminators)
{
    int start = ix;
    int curr = ix;

    while (curr < str.Length && !terminators.Any(x => string.CompareOrdinal(str, curr, x, 0, x.Length) == 0))
    {
        curr++;
    }

    ix = curr;
    return str.Substring(start, curr - start);
}

public static bool SkipSpaces(string str, ref int ix)
{
    bool atLeastOne = false;

    while (ix < str.Length && str[ix] == ' ')
    {
        atLeastOne = true;
        ix++;
    }

    return atLeastOne;
}

// \r\n, \r, \n, end-of-string == true
public static bool IsEOL(string str, bool advance, ref int ix)
{
    if (ix == str.Length)
    {
        return true;
    }

    if (str[ix] == '\r')
    {
        if (advance)
        {
            if (ix + 1 < str.Length && str[ix + 1] == '\n')
            {
                ix += 2;
            }

            ix += 2;
        }

        return true;
    }

    if (str[ix] == '\n')
    {
        if (advance)
        {
            ix++;
        }

        return true;
    }

    return false;
}

这很长，但我确实认为这很清楚。错误应该非常准确（line和col给出）。请注意，}无法转义。以优雅的方式做这件事很复杂。

使用它像：

var res = ParseCommands(text).ToArray();

使用命令列表

1 个答案: