匹配2个字符串的序列

时间:2017-02-16 12:43:23

标签: c#

我做了一个小应用程序,我有一个原始字符串和一个编辑过的字符串。原始字符串被称为"一个"我编辑的字符串被称为"两个"。我想查看对字符串进行的每个字符串和编辑,并将原始字符串中编辑过的单词以大写形式添加,例如Original "This is original" edited "This is edited"输出("This is original EDITED")。我希望它通过一个字符串找到匹配的字符串,一旦它得到一个更改停止并更改它的大写并将该单词添加到原始字符串的该位置。这是我到目前为止找到字符串中所有已编辑的单词。我的问题是加入字符串。预期输出"This This THIS is a new value VALUES"

我的代码是休闲

string one = "This is a new value";
        string two = "This This is a new values";
        int index = 0;
        var coll = two.Split(' ').Select(p => one.Contains(p) ? p : p.ToUpperInvariant());

        var col2 = two.Split(' ');
        var col1 = one.Split(' ');


        for (int i = 0; i < col1.Length; i++)
        {
            var a = two.IndexOf(col2[i].ToString(), index);
            if (col2[index].ToString()==col1[i].ToString())
            {
                Debug.WriteLine(col2[index]);
            }
            else
            {




                Debug.WriteLine(col2[index].ToUpper());
                two.Insert(index, col1[i].ToString().ToUpper());
                //Debug.WriteLine(col1[i]);

                i--;

            }
            index++;
            if (index==col2.Length)
            {
                break;
            }
        }

        Console.WriteLine(string.Join(" ", two));
        Console.ReadKey();

1 个答案:

答案 0 :(得分:3)

您正在解决Edit Distance问题。您有一系列项目 - 您的案例中的单词 - 并且您正在尝试计算对第一个序列所做的最小更改次数以达到第二个序列。

我建议你按照上面链接的维基百科文章中的算法进行操作,你会得到很好的实现。这些算法起初可能看起来很可怕,但实际上当你进入它们时它们非常简单。

以下是C#中的整个实现。它基于动态编程,并重建从原始字符串到最终字符串的步骤。请注意,我的解决方案是将删除的单词写在方括号中。如果您只想跳过已删除的字词,请避免将它们添加到ReconstructEdit()方法的输出中。

private static string CalculateMinimumEdit(string[] original, string[] final)
{
    int[,] costs = new int[original.Length + 1, final.Length + 1];

    // =, +, - or * for equal words, inserted, deleted or modified word
    char[,] resultOf = new char[original.Length + 1, final.Length + 1];

    // Set all costs to invalid values (mark all positions not reached)
    InitializeInvalidCosts(costs);

    // Empty sequences are equal and their edit costs is 0
    // This is setting the initial state for the following calculation
    resultOf[0, 0] = '=';
    costs[0, 0] = 0;

    for (int originalIndex = 0; originalIndex < original.Length + 1; originalIndex++)
    {
        for (int finalIndex = 0; finalIndex < final.Length + 1; finalIndex++)
        {
            SetDeleteCost(costs, resultOf, originalIndex, finalIndex);
            SetInsertCost(costs, resultOf, originalIndex, finalIndex);
            SetModifiedCost(costs, resultOf, originalIndex, finalIndex);
            SetEqualCost(costs, resultOf, originalIndex, finalIndex, original, final);
        }
    }

    return ReconstructEdit(costs, resultOf, original, final);
}

private static void InitializeInvalidCosts(int[,] costs)
{
    // Set all costs to negative values
    // That will indicate that none of the positions
    // in the costs matrix has been analyzed yet
    for (int i = 0; i < costs.GetLength(0); i++)
    {
        for (int j = 0; j < costs.GetLength(1); j++)
        {
            costs[i, j] = -1;
        }
    }
}

private static void SetInsertCost(int[,] costs, char[,] resultOf, 
                                    int originalIndex, int finalIndex)
{
    // You can always assume that the new word was inserted
    // Position in original sequence remains the same
    // Position in final sequence moves by one and that is the new word
    // Cost of this change is 1
    SetCostIfBetter(costs, resultOf, originalIndex, finalIndex + 1,
                    costs[originalIndex, finalIndex] + 1, '+');
}

private static void SetDeleteCost(int[,] costs, char[,] resultOf,
                                    int originalIndex, int finalIndex)
{
    // You can always assume that one word was deleted from original sequence
    // Position in original sequence moves by one and that is the deleted word
    // Position in final sequence remains the same
    // Cost of this change is 1
    SetCostIfBetter(costs, resultOf, originalIndex + 1, finalIndex,
                    costs[originalIndex, finalIndex] + 1, '-');
}

private static void SetModifiedCost(int[,] costs, char[,] resultOf,
                                    int originalIndex, int finalIndex)
{
    // You can always assume that one word was replaced with another
    // Both positions in original and final sequences move by one
    // That means that one word from input was consumed
    // and it was replaced by a new word from the final sequence
    // Cost of this change is 1
    SetCostIfBetter(costs, resultOf, originalIndex + 1, finalIndex + 1,
                    costs[originalIndex, finalIndex] + 1, '*');
}

private static void SetEqualCost(int[,] costs, char[,] resultOf,
                                    int originalIndex, int finalIndex,
                                    string[] original, string[] final)
{
    // If incoming words in original and final sequence are the same
    // then you can take advantage and move to the next position
    // at no cost
    // Position in original sequence moves by 1
    // Position in final sequence moves by 1
    // Cost of this change is 0
    if (originalIndex < original.Length &&
        finalIndex < final.Length &&
        original[originalIndex] == final[finalIndex])
    {
        // Attempt to set new cost only if incoming words are equal
        SetCostIfBetter(costs, resultOf, originalIndex + 1, finalIndex + 1,
                        costs[originalIndex, finalIndex], '=');
    }
}

private static void SetCostIfBetter(int[,] costs, char[,] resultOf,
                                    int originalIndex, int finalIndex,
                                    int cost, char operation)
{
    // If destination cost is not set (i.e. it is negative)
    // or destination cost is non-negative but new cost is lower than that
    // then the cost can be set to new value and 
    // new operation which has caused the change can be indicated
    if (IsBetterCost(costs, originalIndex, finalIndex, cost))
    {
        costs[originalIndex, finalIndex] = cost;
        resultOf[originalIndex, finalIndex] = operation;
    }
}

private static bool IsBetterCost(int[,] costs, int originalIndex, 
                                    int finalIndex, int cost)
{
    // New cost is better than existing cost if
    // either existing cost is negative (not set), 
    // or new cost is lower
    return
        originalIndex < costs.GetLength(0) && 
        finalIndex < costs.GetLength(1) &&
        (costs[originalIndex, finalIndex] < 0 ||
            cost < costs[originalIndex, finalIndex]);
}

private static string ReconstructEdit(int[,] costs, char[,] resultOf,
                                        string[] original, string[] final)
{
    string edit = string.Empty;

    int originalIndex = original.Length;
    int finalIndex = final.Length;

    string space = string.Empty;

    while (originalIndex > 0 || finalIndex > 0)
    {
        edit = space + edit;
        space = " ";

        char operation = resultOf[originalIndex, finalIndex];

        switch (operation)
        {
            case '=':
                originalIndex -= 1;
                finalIndex -= 1;
                edit = original[originalIndex] + edit;
                break;
            case '*':
                originalIndex -= 1;
                finalIndex -= 1;
                edit = final[finalIndex].ToUpper() + edit;
                break;
            case '+':
                finalIndex -= 1;
                edit = final[finalIndex].ToUpper() + edit;
                break;
            case '-':
                originalIndex -= 1;
                edit = "[" + original[originalIndex] + "]" + edit;
                break;
        }
    }

    return edit;
}