我正在解析网站,以下内容因错误而失败(我无法通过Google解决):
from lxml.html.soupparser import fromstring
# etree.LXML_VERSION = (4, 1, 1, 0)
# www.hbs-info.de /produkte /schweisselemente.html
fromstring(open(r"HBS Schweißelemente.htm").read())
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-caba4799682e> in <module>()
1 from lxml.html.soupparser import fromstring
----> 2 fromstring(open(r"HBS Schweißelemente.htm").read())
\lib\site-packages\lxml\html\soupparser.py in fromstring(data, beautifulsoup, makeelement, **bsargs)
31 used.
32 """
---> 33 return _parse(data, beautifulsoup, makeelement, **bsargs)
34
35
\lib\site-packages\lxml\html\soupparser.py in _parse(source, beautifulsoup, makeelement, **bsargs)
77 bsargs['features'] = 'html.parser' # use Python html parser
78 tree = beautifulsoup(source, **bsargs)
---> 79 root = _convert_tree(tree, makeelement)
80 # from ET: wrap the document in a html root element, if necessary
81 if len(root) == 1 and root[0].tag == "html":
\lib\site-packages\lxml\html\soupparser.py in _convert_tree(beautiful_soup_tree, makeelement)
153 prev = res_root
154 for e in reversed(pre_root):
--> 155 converted = convert_node(e)
156 if converted is not None:
157 prev.addprevious(converted)
\lib\site-packages\lxml\html\soupparser.py in convert_node(bs_node, parent)
214 if handler is None:
215 return None
--> 216 return handler(bs_node, parent)
217
218 def map_attrs(bs_attrs):
\lib\site-packages\lxml\html\soupparser.py in convert_pi(bs_node, parent)
271 # interpreted it as being SGML style (<?as df>). Fix.
272 bs_node = bs_node[:-1]
--> 273 res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
274 if parent is not None:
275 parent.append(res)
src/lxml/etree.pyx in lxml.etree.ProcessingInstruction (src\lxml\etree.c:79300)()
ValueError: Invalid PI name 'b'xml''
可能是什么原因?