我在一个论坛上看到了这个问题,想要了解这个问题。是期望从xml或xml树本身创建一个二进制树来打印(打印xml)。问题列在下面。我们可以使用类似堆栈的结构打印xml但我只需要一个例子来理解这好一点。
“将xml文件转换为树,假设文件已被解析,并且您有迭代器来获取下一个节点”
答案 0 :(得分:0)
/*
The code is an exercise while going through the educative.io coderust
questions. The code is in GO & followed the suggestion in coderust to use
a stack and xml tokenizer to decide the tree level of insertion.
The stack is a simple implementation using the local stack without using
standard package in container/list, the xml decoder is from encoding/xml,
the decoder already did tag matching so the code just handle the insertion
of the tag using the xml.StartElement until it hits xml.CharData which is
the inner xml string in the leaf node that needs to be inserted.
The code does not handle xml syntax errors like unmatched tags, for fully
functional parser, the xml.Unmarshal() should be used. This is an example
for a coding question on the spot implementation.
I added the testing xml strings and printTree() function to print the tree
output for testing. No plan to improve this code further.
*/
package main
import (
"encoding/xml"
"fmt"
"strings"
)
const (
xmlStr1 = `<?xml version="1.0" encoding="UTF-8"?>
<users>
<user type="admin">
<name>Elliot</name>
<social>
<facebook>https://facebook.com</facebook>
<twitter>https://twitter.com</twitter>
<youtube>https://youtube.com</youtube>
</social>
</user>
<user type="reader">
<name>Fraser</name>
<social>
<facebook>https://facebook.com</facebook>
<twitter>https://twitter.com</twitter>
<youtube>https://youtube.com</youtube>
</social>
</user>
</users>`
xmlStr2 = `
<html>
<body>
<div>
<h1>CodeRust</h1>
<a>http://coderust.com</a>
</div>
<div>
<h2>Chapter 1</h2>
</div>
<div>
<h3>Chapter 2</h3>
<h4>Chapter 2.1</h4>
</div>
</body>
</html>`
)
type node struct {
name string // tag name from xml.StartElement.Name.Local
tagOpen bool
child []*node
}
// minimum error handling on xml syntax or missing tags
func xml2Tree(s string) (*node, error) { // return nil if xml error
stack := []*node{} // empty stack using slice
empty := func() bool { return len(stack) == 0 }
push := func(n *node) { stack = append(stack, n) }
pop := func() { stack = stack[0:len(stack) - 1] }
peek := func() *node { return stack[len(stack)-1] }
root := (*node)(nil)
decoder := xml.NewDecoder(strings.NewReader(s))
insertTag := func(n *node) {
if root == nil {
root = n
push(n)
return
}
peek().child = append(peek().child, n)
push(n)
}
insertData := func(n *node) error { // remove end tag to simplify the stack & state
peek().child = append(peek().child, n)
token, _ := decoder.Token()
_, ok := token.(xml.EndElement) // skip end tag
if !ok { return fmt.Errorf("syntax error @ %s", n.name) }
pop() // remove tag, e.g., <h1> or <a> for this leaf
return nil // full element handled here, no push & pop
}
for {
token, _ := decoder.Token() // no need to check EOF
if token == nil { return root, nil } // end of input string
switch elem := token.(type) {
case xml.StartElement:
n := &node{ name: elem.Name.Local, tagOpen: true }
insertTag(n)
case xml.CharData:
c := elem[0]
if c == ' ' || c == '\t' || c == '\n' { break } // filter stuff between tags
n := &node{ name: string(elem) } // nil []child by default
if err := insertData(n); err != nil {
return nil, err
}
case xml.EndElement:
if empty() { return root, nil }
if peek().tagOpen && strings.Contains(elem.Name.Local, peek().name) { // match tag
pop()
} else {
return nil, fmt.Errorf("unmatching tags %s %s", elem.Name, peek().name)
}
}
}
return root, nil
}
var level = 0
func printTree(r *node) {
tab := func() {
for i := 0; i < level; i++ {
fmt.Printf(" ")
}
}
level++
tab()
fmt.Printf("%s(%d)\n", r.name, level)
for i := 0; i < len(r.child); i++ {
printTree(r.child[i])
}
level--
}
func main() {
root := (*node)(nil)
root, _ = xml2Tree(xmlStr1)
printTree(root)
root, _ = xml2Tree(xmlStr2)
printTree(root)
}