How to retrieve a random word of a given length from a Trie
上面的答案解释了如何选择第一个字符,但我很困惑我们将如何继续。我想要长度为L的单词,但是当我开始遍历树时,我不知道正在遍历的分支是否具有深度L.
词典
package com.FastDictionary;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import sun.rmi.runtime.Log;
/**
* Dictionary implementation.
* Uses Trie Data Structure
* Creates a singleton object
*/
public class FastDictionary {
private int nineWordCount;
private int totalWordCount;
// Root Node
private DictionaryNode root;
// Singleton object
private static FastDictionary fastDictionary;
// Flag; True if words.txt has been processed once
private boolean isProcessed;
private FastDictionary() {
this.root = new DictionaryNode();
isProcessed = false;
this.nineWordCount = 0;
this.totalWordCount = 0;
}
private boolean sanitiseSearch(String text) {
if (text == null) {
return false;
}
else {
return text.matches("[a-zA-Z]");
}
}
/**
* Add a word to Dictionary
* @param word word to be added
*/
public void addWord(String word) {
if (word == null) {
throw new IllegalArgumentException("Word to be added to Dictionary can't be null");
}
// Sanitise input
if (word.contains(" ")) {
throw new IllegalArgumentException(
"Word to be added to Dictionary can't contain white spaces");
}
DictionaryNode currentNode = this.root;
for (char c: word.toCharArray()) {
DictionaryNode child = currentNode.getChild(c);
if (child == null) {
currentNode = currentNode.addChild(c);
}
else {
currentNode = child;
}
}
// Last node contains last character of valid word
// Set that node as Leaf Node for valid word
currentNode.setLeaf();
}
/**
*
* @param word String to be checked if it is a valid word
* @return True if valid word
*/
public boolean isWord(String word) {
if (word == null) {
throw new IllegalArgumentException("Word to be added to Dictionary can't be null");
}
// Sanitise input
if (word.contains(" ")) {
throw new IllegalArgumentException(
"Word to be added to Dictionary can't contain white spaces");
}
DictionaryNode currentNode = this.root;
for (char c: word.toCharArray()) {
DictionaryNode child = currentNode.getChild(c);
if (child == null) {
return false;
}
currentNode = child;
}
// Returns true if Last Character was leaf
return currentNode.isLeaf();
}
/**
*
* @param text String that needs to be searched
* @return List of Strings which are valid words searched using 'text'
*
*/
public ArrayList<String> getWords(String text) {
ArrayList<String> words = new ArrayList<String>();
DictionaryNode currentNode = this.root;
for (int i = 0; i < text.length() ; i++) {
DictionaryNode child = currentNode.getChild(text.charAt(i));
if (child == null) {
return words;
}
if (child.isLeaf()) {
words.add(text.substring(0,i+1));
}
currentNode = child;
}
return words;
}
/**
*
* @param inputFileStream Text file containing list of valid words
* Switches Flag isProcessed to True
*/
public void processFile(InputStream inputFileStream) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(inputFileStream));
String line;
while((line = br.readLine()) != null) {
line = line.trim();
this.addWord(line);
// Nine Word test
if (line.length() == 9) {
this.nineWordCount++;
}
this.totalWordCount++;
}
}
catch(Exception e){
System.out.print(e);
}
this.isProcessed = true;
}
/**
*
* @return True if valid words text file has been processed
* Word file needs to be processed just once
*/
public boolean isProcessed() {
return this.isProcessed;
}
/**
* Factory method to create Singleton Object
* @return Singleton object
*/
public static FastDictionary getInstance() {
if (fastDictionary == null) {
fastDictionary = new FastDictionary();
}
return fastDictionary;
}
public int getNineWordCount() {
return this.nineWordCount;
}
}
**Node**
package com.FastDictionary;
import java.util.HashMap;
/**
* Node of the Trie Data Structure used for FastDictionary
*/
public class DictionaryNode {
// Character which the Node represents
private char nodeChar;
// Points to children
private HashMap<Character, DictionaryNode> children = new HashMap<Character,DictionaryNode>();
// Is Node the last character for a valid word
private boolean isLeaf;
/**
* To create Root Node
*/
DictionaryNode() {
this.nodeChar = '.';
this.isLeaf = false;
}
/**
* To create Child Node
* @param c Character that Node represents
*/
DictionaryNode(char c) {
this.nodeChar = c;
isLeaf = false;
}
/**
*
* @param c Character that Node represents
* @return Child Node which was created
*/
public DictionaryNode addChild(char c) {
DictionaryNode child = new DictionaryNode(c);
this.children.put(c, child);
return child;
}
/**
*
* @return true if Node is the last character for a valid word; default is false
*/
public boolean isLeaf() {
return this.isLeaf;
}
/**
* Set Node as Leaf Node for a valid word
*/
public void setLeaf() {
this.isLeaf = true;
}
/**
*
* @param c the character which the Child Node represnts
* @return Child Node representing character c; null if no such Child exists
*/
public DictionaryNode getChild(char c) {
DictionaryNode child = this.children.get(c);
return child;
}
}
答案 0 :(得分:0)
是的,他只展示了如何从根节点中选择第一个字符。但是,在更新跟随该字符的currentNode
后,您可以应用完全相同的主体来查找新节点中的下一个字符。查看他的算法所做的另一种方法是,给定一个节点,一个整数L(在他的例子中是5),找到我在该节点的子树中的单词(在他的例子中为1234)并且距离正好是L深度它
因此,在完成第一步之后,您可以从新节点递归调用该算法,并以L-1
作为深度。这是基本的想法。当然,需要填写一些细节。
首先,在下一次递归调用之前更新i
。 Say算法选择第一个字符为d
。前3个字母,即a b c
,共有1000
个5个字母的单词。所以现在,您需要从这个新节点中找到(1234-1000)=234th
个字。
其次,对于整个树而言,不需要lengthFrequencyByLetter
和totalLengthFrequency
,现在需要为每个节点提供它们,这将需要大量的ram。 (你可以通过使用HashMap来优化它。)
非常高级的实施可能是:
String randomWord(Node currentNode,int L,int index){
if(L==0) return node.wordContainedWithin();
char ch = find_next_character(node,L,index); //'d' in our example
newNode = currentNode.getChild(ch); //node following d
//following example, words_before = 1000
int words_before = sum(lengthFrequencyByLetter[x][L] of all x before ch)
int new_index = index - words_before;
return randomWord(newNode,L-1,new_index);
}
现在要获得一个随机的L字母单词,查找根totalLengthFrequency[L]
,在0到该值之间生成一个数字i
(此处为1234),并将randomWord
称为:< / p>
randomWord(tree.root,L,i)