我在拼写检查程序中使用了trie数据结构。我使用方法insertWord
将文本文件中的单词插入到我在网上找到的trie结构中,但我很困惑为什么使用变量offset
。为什么从char数组letters[i]
中减去一个整数?该程序按预期运行。我只是想更多地理解代码。任何帮助将不胜感激!
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class spellChecker {
static TrieNode createTree()
{
return(new TrieNode('\0', false));
}
static void insertWord(TrieNode root, String word)
{
int offset = 97;
int l = word.length();
char[] letters = word.toCharArray();
TrieNode curNode = root;
for (int i = 0; i < l; i++)
{
if (curNode.links[letters[i]-offset] == null)
curNode.links[letters[i]-offset] = new TrieNode(letters[i], i == l-1 ? true : false);
curNode = curNode.links[letters[i]-offset];
}
}
static boolean find(TrieNode root, String word)
{
char[] letters = word.toCharArray();
int l = letters.length;
int offset = 97;
TrieNode curNode = root;
int i;
for (i = 0; i < l; i++)
{
if (curNode == null)
return false;
curNode = curNode.links[letters[i]-offset];
}
if (i == l && curNode == null)
return false;
if (curNode != null && !curNode.fullWord)
return false;
return true;
}
private static String[] dictionaryArray;
public String[] dictionaryRead() throws Exception
{
// Find and read the file into array
String token = "";
// Use scanner for input file
Scanner dictionaryScan = new Scanner(new File("dictionary2.txt")).useDelimiter("\\s+");
List<String> dictionary = new ArrayList<String>();
//Check for next line in text file
while (dictionaryScan.hasNext())
{
token = dictionaryScan.next();
dictionary.add(token);
}
dictionaryScan.close();
dictionaryArray = dictionary.toArray(new String[0]);
return dictionaryArray;
}
public static void main(String[] args) throws Exception
{
spellChecker spellcheck = new spellChecker();
spellcheck.dictionaryRead();
TrieNode tree = createTree();
for (int i = 0; i < dictionaryArray.length; i++)
insertWord(tree, dictionaryArray[i]);
Scanner inputFileScan = new Scanner(new File("test.txt")).useDelimiter("\\s+");
//Check for next line in text file,
//then write arraylist to trie data structure
boolean mispelled = false;
while (inputFileScan.hasNext())
{
String word = inputFileScan.next();
if (!find(tree, word))
{
System.out.println("Mispelled word: " + word);
mispelled = true;
}
}
inputFileScan.close();
if(mispelled == false)
{
System.out.println("There are no errors.");
}
}
}
class TrieNode
{
char letter;
TrieNode[] links;
boolean fullWord;
TrieNode(char letter, boolean fullWord)
{
this.letter = letter;
links = new TrieNode[100];
this.fullWord = fullWord;
}
}
答案 0 :(得分:4)
97是&#39; a&#39;。所以变量offset
用于处理char&#39; a&#39;作为第一个(就像我们在字母表中所做的那样)
答案 1 :(得分:1)
97
是字符'a'
的数值。当您希望获得与links
字符对应的letters[i]
数组的索引时,您必须从该字符中减去97
,以便将'a'
映射到索引0,'b'
映射到1,...,'z'
映射到25。