用字典解决完整的字谜

时间:2015-03-10 22:02:53

标签: java algorithm constraint-programming anagram

我正在解决一个经典问题。确切地说,我正试图解决一个完整的字谜。

任何准确再现其他顺序字母的单词或短语都是字谜。

我有一个字谜,字典和哈希。我需要提出最初被散列的短语,因此程序应该使用给定的字典生成所有排列并检查其中任何一个是否是答案。

总而言之,有人为我隐瞒了一条消息,我需要破解它!

示例输入:

  

scofriybaarae dict.txt FD8D80332CCA32905F11860FB866CA92

以下所有短语都是 scofriybaarae 的有效字谜,因此它们所包含的单词可能不同或顺序不同。

是frisco a bay

frisco area bay

frisco bay area

然而,只有最后一个才是答案。这是因为 frisco bay area 的MD5与作为参数给出的MD5匹配。

我们可以分开处理字典,生成组合和检查md5的任务。

我使用字母树,该树的某些节点可以指示和单词。分支的结尾始终是一个单词的结尾。这意味着单个分支可以表示许多单词,例如粗体字母表示完整性

  

AI - [R POR的

在上面的示例中,存储了两个单词,因此当您走过时很容易删除使用过的字母。

我的程序可以很快地从字典中构建一个树,虽然我对解算器的性能不满意。

我发现的问题只是大量的组合,我不知道如何缓解。例如,给出13个字母和多个字典单词,长度从1到13.在这种情况下,有6227020800个单字母单词的组合,你可以想象可能有多少组合。

我注意到我说的词越短越好。

我想知道我是否在正确的轨道上或者它在概念上是错误的?

我应该使用数据库引擎吗?

为方便起见,我的词典中有一大块:

  

bay ara area aera fbaer frisco friscob friscoba afriscoar friscobay   bayfrisco aabceforsy

package margana;

import java.io.*;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Solution {

    private final String givenLetterSet;
    private String file;
    private final ExecutorService executorService = Executors.newFixedThreadPool(16);

    LetterNode root = new LetterNode('\u03A9', null); // omega root node
    private Map<Character, Long> countedOriginalLetters = new HashMap<Character, Long>();

    /**
     * Mixed Anatree class
     */
    public static class LetterNode implements Comparable<LetterNode> {
        private final char letter;// does not matter for the root node
        private boolean ending;
        private Map<Character, LetterNode> leaves = new HashMap<Character, LetterNode>();
        private LetterNode motherNode;
        private String wholeCachedWord;
        private int length = 1;

        public LetterNode(char oneLetter, LetterNode mom) {
            letter = oneLetter;
            if (mom != null) {
                if (mom.motherNode != null) {
                    length += mom.length;// all consecutive nodes minus mom length
                }
                motherNode = mom;
            }
        }

        public char getLetter() {
            return letter;
        }

        public Character getCharacter() {
            return Character.valueOf(letter);
        }

        public boolean isEnding() {
            return ending;
        }

        public void setEnding(boolean ending) {
            this.ending = ending;
        }

        public Map<Character, LetterNode> getLeaves() {
            return leaves;
        }

        public int getLength() {
            return length;
        }

        public LetterNode getMotherNode() {
            return motherNode;
        }

        public String compileNodesIntoWord() {
            if (wholeCachedWord != null) {
                return wholeCachedWord;
            }
            LetterNode node = motherNode;
            StringBuilder buffer = new StringBuilder(length);
            buffer.append(letter);
            while (node.motherNode != null) {
                buffer.insert(0, node.letter);
                if (node.motherNode.motherNode == null) {
                    break;
                }
                node = node.motherNode;
            }
            wholeCachedWord = buffer.toString();
            return wholeCachedWord;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            LetterNode that = (LetterNode) o;
            if (letter != that.letter) {
                return false;
            }
            return true;
        }

        @Override
        public int hashCode() {
            return (int) letter;
        }

        @Override
        public int compareTo(LetterNode o) {
            return Character.compare(letter, o.letter);
        }

        @Override
        public String toString() {
            if (ending) {
                return compileNodesIntoWord();
            }
            return String.valueOf(letter);
        }
    }

    public Solution(String anagram, String dictionaryFile) {
        file = dictionaryFile;
        byte[] tempArray = anagram.toLowerCase().replaceAll(" ", "").getBytes();
        Arrays.sort(tempArray);
        givenLetterSet = new String(tempArray);
        for (char oneChar : anagram.toLowerCase().toCharArray()) {
            Long numberOfOccurrences = countedOriginalLetters.get(Character.valueOf(oneChar));
            if (numberOfOccurrences == null) {
                countedOriginalLetters.put(new Character(oneChar), new Long(1));
            } else {
                countedOriginalLetters.put(new Character(oneChar), new Long(numberOfOccurrences.longValue() + 1));
            }
        }
    }

    /**
     * Rule out rubbish words
     *
     * @param oneWord
     * @return
     */
    private boolean invalidAgainstGivenSentence(String oneWord) {
        if (oneWord.length() > givenLetterSet.length()) {
            return true;
        }
        for (char oneChar : oneWord.toLowerCase().toCharArray()) {
/*            if (oneChar == "'".charAt(0)) {// to regards ' as a letter
                continue;
            }*/
            Long amountOfParticularLetter = countedOriginalLetters.get(Character.valueOf(oneChar));
            if (amountOfParticularLetter == null) {
                return true;
            }
        }
        return false;
    }

    public void growTree() throws IOException {
        BufferedReader br = new BufferedReader(new FileReader(file));
        String oneWord;
        long depth = 0; // for fun
        long candidate = 0;
        boolean isNewWord = false;
        while ((oneWord = br.readLine()) != null) {
            if (invalidAgainstGivenSentence(oneWord)) {
                continue;//is not a valid chunk of the given anagram
            }
            LetterNode previousNode = root;
            isNewWord = false;
            for (char one : oneWord.toCharArray()) {
                LetterNode currentLetter = previousNode.getLeaves().get(Character.valueOf(one));
                if (currentLetter == null) {// letter does not exists, let us add it
                    LetterNode newNode = new LetterNode(one, previousNode);
                    previousNode.getLeaves().put(Character.valueOf(one), newNode);
                    currentLetter = newNode;
                    isNewWord = true;
                }
                previousNode = currentLetter;
            }
            if (isNewWord) {
                candidate += 1;
            }
            previousNode.setEnding(true);
            depth = Math.max(depth, previousNode.getLength());
        }
        System.out.println("Created an anatree comprising of " + candidate + " words, and " + depth + " levels");
        br.close();
    }

    public void solve(String md5) throws NoSuchAlgorithmException {
        List<LetterNode> foundWords = new ArrayList<LetterNode>();
        LinkedList<Character> input = new LinkedList<Character>();
        Set<Character> inputSet = new HashSet<Character>();
        for (Character one : givenLetterSet.toCharArray()) {
            input.add(one);
            inputSet.add(one);
        }
        NavigableSet<LetterNode> firstLevel = new TreeSet(root.getLeaves().values()).descendingSet();
        for (LetterNode node: firstLevel) {
            if (inputSet.contains(node.getCharacter())) {
                executorService.execute(new SolverRunnable(foundWords, input, node, md5.toLowerCase()));
            }
        }
        executorService.shutdown();
    }

    class SolverRunnable implements Runnable {
        private List<LetterNode> initialWords;
        private List<Character> spareCharacters;
        private LetterNode initialNode;
        private String md5Hash;

        public SolverRunnable(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) {
            initialNode = route;
            initialWords = foundWords;
            spareCharacters = spareLetters;
            md5Hash = md5;
        }

        public void run() {
            System.out.println("Started solving branch '" + initialNode.getCharacter() + "' from root ");
            try {
                solve(initialWords, spareCharacters, initialNode, md5Hash);
            } catch (NoSuchAlgorithmException e) {
                e.printStackTrace();
            }
        }
    }

    private void solve(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) throws NoSuchAlgorithmException {
        List<LetterNode> localFoundWords = new ArrayList<LetterNode>(foundWords);
        List<Character> workspace = new LinkedList<Character>();
        LetterNode current = route;
        workspace.addAll(spareLetters);
        while (!current.getLeaves().isEmpty()) {
            if (!workspace.contains(current.getCharacter())) {
                break;
            }
            workspace.remove(current.getCharacter());
            if (current.getLeaves().size() > 1) {// start solving recursively then quit
                for (LetterNode node: new TreeSet<LetterNode>(current.getLeaves().values())) {//checking every branch
                    if (workspace.contains(node.getCharacter())) {
                        solve(localFoundWords, workspace, node, md5);
                    }
                }
                break;//we solve routes without forks
            }
            if (workspace.isEmpty()) {
                break;
            }
            if (current.isEnding()) {//recursively solving a shorter word first then continue
                localFoundWords.add(current);
                startOver(workspace, localFoundWords, md5);
                localFoundWords.remove(current);
            }
            current = (LetterNode) current.getLeaves().values().toArray()[0];
        }
        if (current.isEnding()) {
            localFoundWords.add(current);
            workspace.remove(current.getCharacter());
            if (workspace.isEmpty()) {
                check(localFoundWords, md5);
                return;
            }
            startOver(workspace, localFoundWords, md5);
        }
    }

    private void check(List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
        if (isPreliminaryValid(localFoundWords)) {
            String phrase = concatenateNodesWithSpaces(localFoundWords);
            if (md5.equalsIgnoreCase(digest(phrase))) {
                System.out.println(phrase);
                executorService.shutdownNow();
                System.exit(0);
            }
        }
    }

    private void startOver(List<Character> workspace, List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
        for (LetterNode node: root.getLeaves().values()) {
            if (workspace.contains(node.getCharacter())) {
                solve(localFoundWords, workspace, node, md5);
            }
        }
    }

    public boolean isPreliminaryValid(List<LetterNode> words) {
        StringBuilder builder = new StringBuilder();
        int total = 0;
        for (LetterNode word : words) {
            builder.append(word.compileNodesIntoWord());
            total += word.length;
        }
        if (total != givenLetterSet.length()) {
            return false;
        }
        char[] letters = builder.toString().toCharArray();
        Arrays.sort(letters);
        return new String(letters).equals(givenLetterSet);
    }

    public static String concatenateNodesWithSpaces(List<LetterNode> words) {
        StringBuilder builder = new StringBuilder();
        int spaces = words.size() - 1;
        for (LetterNode word : words) {
            builder.append(word.compileNodesIntoWord());
            if (spaces > 0) {
                spaces--;
                builder.append(" ");
            }
        }
        return builder.toString();
    }

    public static String digest(String original) throws NoSuchAlgorithmException {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(original.getBytes());
        StringBuilder sb = new StringBuilder(34);
        for (byte b : md.digest()) {
            sb.append(String.format("%02x", b & 0xff));
        }
        return sb.toString();
    }

    public static void main(String[] args) throws IOException, NoSuchAlgorithmException {
        Solution s = new Solution(args[0], args[1]);
        s.growTree();
/*
        s.solve("BE2B1B1409746B5416F44FB6D9C16A55");// cop pop
        //s.solve("493DF2D8AC7EDB14CD50CA07A539A805");// cop p'op
*/
        s.solve(args[2]); //frisco bay area
    }

}

1 个答案:

答案 0 :(得分:0)

可能的解决方案(nodejs):

var
  md5 = require('MD5'),
  fs = require('fs');

function createIndex(str) {
  var i, index = {}, chr;
  for (i = 0; i < str.length; i++) {
    chr = str[i];
    index[chr] = (index[chr] || 0) + 1;
  }
  return index;
}

function indexContains(index, subIndex) {
  var chr;
  for (chr in subIndex) {
    if (subIndex.hasOwnProperty(chr) && (!index.hasOwnProperty(chr) || subIndex[chr] > index[chr])) {
      return false;
    }
  }
  return true;
}

function excludeIndex(index, subIndex) {
  var newIndex = {}, chr, value, empty = true;
  for (chr in index) {
    if (index.hasOwnProperty(chr)) {
      value = index[chr];
      if (subIndex.hasOwnProperty(chr)) {
        value -= subIndex[chr];
      }
      if (value) {
        newIndex[chr] = value;
        empty = false;
      }
    }
  }
  return empty ? null : newIndex;
}

function uniqueByProperty(items, property) {
  return items.filter(function (item, index) {
    var i, value = item[property];
    for (i = 0; i < index; i++) {
      if (items[i][property] === value) {
        return false;
      }
    }
    return true;
  });
}

function findAnagram(charsIndex, dict, prevWords, targetHash) {
  var i, item, nextCharsIndex, result, words;  
  dict = dict.filter(function (item) {
    return indexContains(charsIndex, item.index);
  });  
  if (!prevWords.length) {    
    dict = uniqueByProperty(dict, 'word');
  }  
  for (i = 0; i < dict.length; i++) {
    item = dict[i];
    nextCharsIndex = excludeIndex(charsIndex, item.index);
    words = prevWords.concat(item.word);
    if (nextCharsIndex) {
      result = findAnagram(nextCharsIndex, dict, words, targetHash);
      if (result) {
        return result;
      }
    } else {
      result = words.join(' ');
      if (md5(result) === targetHash) {
        return result;
      }
    }
  }
  return null;
}

var      
  dict = fs.readFileSync('./data/wordlist.txt', 'utf8').split('\n')
    .filter(function (str) {
      return str.replace(/ /, '');
    })
    .map(function (word) {
      return {word: word, index: createIndex(word)};
    }),
  initialStr = "poultry outwits ants",
  finalMD5 = "4624d200580677270a54ccff86b9610e",    
  result = findAnagram(createIndex(initialStr.replace(/ /, '')), dict, [], finalMD5); 

console.log(result);