import urllib
from urllib.request import urlopen
import xml.etree.ElementTree as etree
response = urllib.request.urlopen("http://regnskaber.virk.dk/32673592/eGJybHN0b3JlOi8vWC1GNzY5MUY0Ny0yMDE0MDMyOV8xMzQxNThfMTc5L3hicmw.xml")
print (response.getcode())
print (response.readline()) # it gets the first line if you need to the check the output
请帮助解决此编码问题。我需要解析XML内容。
答案 0 :(得分:4)
响应开头的魔术字节0x1f8b
表示zlib压缩。服务器通常会压缩数据以进行传输,浏览器会自动将其解压缩。在这里,您必须自己执行第二步:
import urllib
from urllib.request import urlopen
import xml.etree.ElementTree as ET
from io import BytesIO
import gzip
response = urllib.request.urlopen("http://regnskaber.virk.dk/32673592/eGJybHN0b3JlOi8vWC1GNzY5MUY0Ny0yMDE0MDMyOV8xMzQxNThfMT\
c5L3hicmw.xml")
print (response.getcode())
data = response.read()
compdata = BytesIO(data)
text = []
for unit in gzip.GzipFile(fileobj=compdata):
text.append(unit)
text = b"".join(text)
tree = ET.fromstring(text)
print(tree)
输出:
200
<Element '{http://www.xbrl.org/2003/instance}xbrl' at 0x104d09098>