[访谈]类似词语距离计算

时间:2013-11-11 10:40:19

标签: java string algorithm

在一次采访中询问了这个问题

  

假设您有一个单词词典:(如果有,请使用   的/ usr /共享/字典/字)。

     

给出一个词(例如:板球),给我字典中的所有单词   通过n次操作可以达到。哪个操作是一个   of   此外
  更换
  缺失

例如,如果只允许1次操作,我们可以找到可以从“板球”形成的所有单词。

{'word':'clicket','op':['replace']} {'word':'crickey','op':['replace']} {'word':'crickety','op':['addition']} 等

我以自己的格式打印,但你得到了主旨。

以下是我的尝试

  1. 根据操作次数计算所有可能的列表 序列。
  2. 然后迭代列表并逐个应用它们 并存储字典中的单词。
  3. 这是强力解决方案。我想知道是否有一个有效的解决方案。以下是强力解决方案的代码

    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.List;
    import java.util.Map;
    
    
    public class SimilarWordDistance {
    
        Map<String,Boolean> dictionary = new HashMap<String,Boolean>();
        int ADDTION = -1;
        int REPLACE = 0;
        int DELETION = 1;
    
        /**
         * @param args
         * @throws IOException 
         */
        public static void main(String[] args) throws IOException {
    
            SimilarWordDistance swd = new SimilarWordDistance();
            swd.readDictionary();
            //swd.findSimilar("cricket", 1);
            swd.findSimilar("happiness", 3);
        }
    
        public void findSimilar(String word,int num) {
            int possibleOperations = (int) Math.pow(3 , num);
            Integer[][] operations = new Integer[possibleOperations][num];
            buildOperationsArray(num, possibleOperations, operations);
            List<String> l = new ArrayList<String>();
            l.add(word);
            Map<String,Integer[]> sols = new HashMap<String,Integer[]>();
    
            for(int i=0;i<operations.length;i++)
                applyOperation(operations[i],l,sols);
    
            Iterator<String> itr = sols.keySet().iterator();
            while(itr.hasNext()) {
                String n = itr.next();
                printSolution(sols.get(n), n);
            }
        }
    
    
        private void applyOperation(Integer[] operation,List<String> word,Map<String,Integer[]> sols) {
            List<String> possiblities = word;
             for(int i=0;i<operation.length;i++) {
                if(operation[i] == ADDTION) {
                    List<String> temp = new ArrayList<String>();
                    for(int j =0;j<possiblities.size();j++) {
                       temp.addAll(applyAdditionOperation(possiblities.get(j)));
                       //System.out.println(temp.size());
                    }
                    possiblities = temp;
                } 
                if(operation[i] == REPLACE) {
                    List<String> temp = new ArrayList<String>();
                    for(int j =0;j<possiblities.size();j++) {
                        temp.addAll(applyReplace(possiblities.get(j)));
                        //System.out.println(temp.size());
                     }
                    possiblities = temp;
                }
                if(operation[i] == DELETION) {
                    List<String> temp = new ArrayList<String>();
                    for(int j =0;j<possiblities.size();j++) {
                        temp.addAll(applyDeletion(possiblities.get(j)));
                     }
                    possiblities = temp;
                }
            }
    
            for(int i=0;i<possiblities.size() ;i++) {
                String w = possiblities.get(i);
                if(dictionary.containsKey(w)) {
                    sols.put(w, operation);
                }
            }
    
        }
    
        protected void printSolution(Integer[] operation, String w) {
            System.out.print(w+"\t" );
            for(int j=0;j<operation.length;j++) {
                System.out.print(printOperation(operation[j])+"\t");
            }
            System.out.println();
        }
    
        private String printOperation(Integer integer) {
            if(integer == ADDTION) {
                return "Addition";
            } if(integer == REPLACE) {
                return "Replace";
            } else {
                return "Deletion";
            }
        }
    
        private List<String> applyAdditionOperation(String word) {
            char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
            List<String> possibleWords = new ArrayList<String>();
            for(int i=0;i<possiblities.length;i++) {
                for(int j=0;j<word.length();j++) {
                    String temp = insertAt(word,j,possiblities[i]);
                    possibleWords.add(temp);
                }
            }
            return possibleWords;
        }
    
        private List<String> applyDeletion(String word) {
            List<String> possibleWord = new ArrayList<String>();
            for(int i=0;i<word.length();i++) {
                String prefix = word.substring(0,i);
                String suffix = word.substring(i+1,word.length());
                String tenp = prefix+suffix;
                possibleWord.add(tenp);
            }
            return possibleWord;
        }
    
        private List<String> applyReplace(String word) {
            char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
            List<String> possibleWord = new ArrayList<String>();
            for(int i=0;i<possiblities.length;i++) {
                for(int j=0;j<word.length();j++) {
                    String temp = word.substring(0,j)+possiblities[i]+word.substring(j+1,word.length());
                    if(temp.length()!=word.length()) 
                        System.out.println("#####################");
                    possibleWord.add(temp);
                }
            }
            return possibleWord;
        }
    
        private String insertAt(String word, int j, char c) {
            String prefix = word.substring(0,j);
            String suffix = word.substring(j+1,word.length());
            String ret = prefix+c+suffix;
            return ret;
        }
    
        protected void buildOperationsArray(int num, int possibleOperations,
                Integer[][] operations) {
            for(int i=0;i<possibleOperations;i=i+9){
                for(int j=0;j<num;j++) {
                    fillPossiblities(num, operations, ADDTION, i, j); // 3 rows
                    if(i+3<possibleOperations)
                        fillPossiblities(num, operations, REPLACE, i+3, j); // 3 rows
                    if(i+6 < possibleOperations)
                    fillPossiblities(num, operations, DELETION, i+6, j);  // 3 rows
                }
            }
           /* System.out.println(operations.length);
            for(int i=0;i<operations.length;i++) {
                for(int j=0;j<operations[0].length;j++) {
                    System.out.print(operations[i][j]+"\t");
                }
                System.out.println();
            }*/
        }
    
    
        /**
         * Every time this method is called it will fill all the colums of the passed row
         * with 1 default value and the fill the next 2 rows with possible permutation of that
         * column
         * @param num
         * @param operations
         * @param def
         * @param curRow
         */
        protected void fillPossiblities(int num, Integer[][] operations,int def,int curRow,int curColumn) {
            for(int i=0;i<num;i++) {
                operations[curRow][i] = def;
            }
            for(int i=0;i<num;i++) {
                if(i!=curColumn)
                    operations[curRow+1][i] = def;
            }
            operations[curRow+1][curColumn] = getNext(def); //
            int def1 = getNext(def);
            for(int i=0;i<num;i++) {
                if(i!=curColumn)
                    operations[curRow+2][i] = def;
            }
            operations[curRow+2][curColumn] = getNext(def1);
        }
    
        private int getNext(int def) {
            if(def == -1) {
                return REPLACE;
            }
            if(def == 0) {
                return DELETION;
            } else {
                return ADDTION;
            }
        }
    
        public void readDictionary() throws IOException {
    
            BufferedReader in = new BufferedReader(new FileReader("C:\\Documents\\Downloads\\words"));
    
            while (in.ready()) {
              String s = in.readLine();
              dictionary.put(s, true);
            }
            in.close();
        }
    
    }
    

2 个答案:

答案 0 :(得分:1)

For each word in the-dictionary
   d = minimum-edit-distance (given-word, word)
   if d <= n
      print (word)

最小编辑距离可以通过众所周知的动态编程算法来解决 复杂度O(n*m),其中nm是两个单词的长度。

维基百科文章有实现:http://en.wikipedia.org/wiki/Levenshtein_distance

答案 1 :(得分:0)

一种解决方案是您可以修改字典数据结构并以图形的形式表示。

图表的每个节点都代表一个单词。如果一个单词与另一个单词不同,则会有一个节点到另一个节点的边缘。

在你的情况下,'板球'和'蟋蟀'之间可能有一个节点。

一旦将字典加载到此表单中,之后查询此类操作所产生的单词将是节点直接连接到cricket。