Go HTML parsing package忽略输入标记并将其解释为表单标记的文本内容。绕过此限制的最佳选择是什么?
package main
import (
"fmt"
"strings"
"golang.org/x/net/html"
)
const HTML = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>selected attribute</title>
</head>
<body>
<form method="GET">
<input type="submit" value="submit"/>
</form>
</body>
</html>
`
func main() {
z := html.NewTokenizer(strings.NewReader(HTML))
tt := html.TokenType(7)
for tt != html.ErrorToken {
tt = z.Next()
if tt == html.StartTagToken {
name, _ := z.TagName()
fmt.Println(string(name))
}
}
}
答案 0 :(得分:2)
忽略输入标记并将其解释为表单标记的文本内容
您的前提是错误的,它不会忽略输入标记,如下所示:
package main
import (
"fmt"
"golang.org/x/net/html"
"strings"
)
const HTML = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>selected attribute</title>
</head>
<body>
<form method="GET">
<input type="submit" value="submit"/>
</form>
</body>
</html>
`
func main() {
z, _ := html.Parse(strings.NewReader(HTML))
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "input" {
for _, a := range n.Attr {
if a.Key == "value" {
fmt.Println(a.Val)
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(z)
}
这将打印出“submit”,输入标签的值。
答案 1 :(得分:0)
应该使用html.SelfClosingTagToken
代替html.StartTagToken
来解析input
代码。