对齐相似词算法

时间:2018-07-12 10:17:04

标签: java arrays algorithm

我将对齐两个字符串数组,如下所示:

| welcome | to | my | home | ___ | _____ | ___| _______ | ____

| _______ | __ | my | home | is  | where | my | parents | live  

我正在使用Java和2个arrayLists。你能建议我做一个现成的算法来完成这项工作吗?

1 个答案:

答案 0 :(得分:1)

正如注释中提到的@tobias_k,这是一个著名的问题,称为Longest Common Subsequence。下面的解决方案使用动态编程技术以获得更好的性能。您可以找到有关此解决方案here的更多详细说明。

import java.util.ArrayList;
import java.util.Arrays;

public class LongestCommonSubstring {
    public static ArrayList lcs(ArrayList l1, ArrayList l2) {
        int[][] d = new int[l1.size() + 1][l2.size() + 1];
        for (int i1 = 1; i1 <= l1.size(); i1++) {
            for (int i2 = 1; i2 <= l2.size(); i2++) {
                if (l1.get(i1 - 1).equals(l2.get(i2 - 1))) {
                    d[i1][i2] = d[i1 - 1][i2 - 1] + 1;
                } else {
                    d[i1][i2] = Math.max(d[i1 - 1][i2], d[i1][i2 - 1]);
                }
            }
        }
        int i1 = l1.size(), i2 = l2.size();
        ArrayList result = new ArrayList(Arrays.asList(new String[d[i1][i2]]));
        while (i1 > 0 && i2 > 0) {
            if (l1.get(i1 - 1).equals(l2.get(i2 - 1))) {
                result.set(d[i1][i2] - 1, l1.get(i1 - 1));
                i1 -= 1;
                i2 -= 1;
            } else if (d[i1][i2] == d[i1 - 1][i2]) {
                i1 -= 1;
            } else {
                i2 -= 1;
            }
        }
        return result;
    }

    public static void main(String[] args) {
        ArrayList l1 = new ArrayList(Arrays.asList("welcome to my home".split(" ")));
        ArrayList l2 = new ArrayList(Arrays.asList("my home is where my parents live".split(" ")));
        System.out.println(lcs(l1, l2));
    }
}