因此,我有一个用于HTML文档的Binary Search Tree实现,该实现接受文档名称和一些HTML内容,并构造单词树以及它们在文档和文档名称中的计数。然后将树显示在TreeView控件中,如下所示:
- Root: html - d1, C: 4
- - Left: head - d1, C: 2
- - - Left: document - d1, C: 2
- - - - Left: body - d1, C: 2
- - Right: title - d1, C: 2
- - - Left: simple - d1, C: 2
- - - - Left: p - d1, C: 4
- - - - - Right: paragraphs - d1, C: 1
- - - Right: two - d1, C: 1
输入的HTML文档如下所示(停用词和特殊字符已删除):
<html>
<head>
<title>
A Simple HTML Document
</title>
</head>
<body>
<p> This is a very simple HTML document</p>
<p>It only has two paragraphs</p>
</body>
</html>
现在,我的任务是使用其他文档来更新(和扩展)此树。问题在于,树中已经存在单词的节点需要追加,而不是像这样添加为新节点:
- Root: html - d1, C: 4, d2, C: 2
-- Left: head - d1, C: 2, d2, C: 2, d3, C:3 (and so on)
这是我的树的主要代码:
public class Node
{
public string data;
public Node left { get; set; }
public Node right { get; set; }
public Node(string data)
{
this.data = data;
left = right = null;
}
}
public class Tree
{
public Node root;
public Tree()
{
root = null;
}
public void insert(string data, TreeView view)
{
Node newItem = new Node(data);
if (root == null)
{
root = newItem;
}
else
{
Node current = root;
Node parent = null;
while (current != null)
{
parent = current;
if (String.Compare(data, current.data) < 0)
{
current = current.left;
if (current == null)
{
parent.left = newItem;
}
}
else
{
current = current.right;
if (current == null)
{
parent.right = newItem;
}
}
}
}
}
public String search(string element, Node root)
{
Node current = root;
if (current == null)
{
return "Not found";
}
if (String.Compare(element, current.data) == 0)
{
return element;
}
if (String.Compare(element, current.data) < 0)
{
return this.search(element, current.left);
}
else
{
return this.search(element, current.right);
}
}
}
public void preOrder(Node node, TreeNode treeNode)
{
treeNode.Text += node.data;
if (node.left != null)
{
preOrder(node.left, treeNode.Nodes.Add("Left: "));
}
if (node.right != null)
{
preOrder(node.right, treeNode.Nodes.Add("Right: "));
}
}
void DisplayTree(Tree tree)
{
preOrder(tree.root, treeView1.Nodes.Add("Root: "));
}
onclick方法:
private void button6_Click(object sender, EventArgs e)
{
string d_name = textBox1.Text;
if(d_name == "")
{
MessageBox.Show("Please enter a document name.", "Indexing");
return;
}
Tree glb_tree = new Tree();
string data = richTextBox1.Text;
if(data == "")
{
MessageBox.Show("Missing input.", "Indexing");
return;
}
//creates a list of the needed words
List<string> dataList = createList(data, d_name);
glb_tree = createTree(dataList, treeView1, glb_tree, doc_names, d_name);
}
还有树函数:
public Tree createTree(List<string> input, TreeView treeView, Tree bst, List<string> doc_names, string cur_name)
{
int word_count = 1;
string res = "";
string tree_res = "";
var match = doc_names.FirstOrDefault(stringToCheck => stringToCheck.Contains(cur_name));
// Do not allow existing document names
if (match == null)
{
doc_names.Add(cur_name);
}
else
{
MessageBox.Show("Document Name already exists!", "Tree Create");
return null;
}
for (int i = 0; i < input.Count; i++)
{
for (int j = i + 1; j < input.Count; j++)
{
//Calculate word count
if (input[i] == input[j])
{
input[j] = "";
word_count++;
}
}
res = input[i] + " - " + cur_name + ", C:" + word_count;
//Some elements in word list are empty, skip them
if (res.StartsWith(" -"))
{
word_count = 1;
continue;
}
bst.insert(res, treeView);
word_count = 1;
}
DisplayTree(bst);
treeView1.ExpandAll();
return bst;
}
编辑: 没关系,解决了我的问题,这实际上很简单。我只需要修改输入到树的列表,而不修改树本身。所以我只是添加了一个简单的辅助函数来添加新数据:
public List<string> appendToList(List<string> cur_data, string data, string dname)
{
List<string> newDataList = createList(data, dname);
bool found = false;
if (newDataList != null && cur_data != null)
{
for(int i=0; i<newDataList.Count; i++)
{
found = false;
for(int j=0; j < cur_data.Count; j++)
{
string op1 = newDataList[i].Substring(0, newDataList[i].IndexOf(' '));
string op2 = cur_data[j].Substring(0, cur_data[j].IndexOf(' '));
if (op1.Equals(op2))
{
int wcsub = newDataList[i].IndexOf(':') + 2;
int wclength = newDataList[i].IndexOf(';') - wcsub;
string wc = newDataList[i].Substring(wcsub, wclength);
cur_data[j] += dname + ": " + wc + "; ";
found = true;
break;
}
}
if (!found)
{
cur_data.Add(newDataList[i]);
}
}
}
return cur_data;
}