import urllib.request
from urllib.request import Request, urlopen
url = "http://www.hltv.org/match/2294502-clg-liquid-esea-invite-season-18-na"
#sock = urllib.request.urlopen(url)
sock = Request(url, "headers={'User-Agent': 'Mozilla/5.0'}")
#myhtml = sock.read()
myhtml = urlopen(sock).read()
for item in myhtml.split("</span>"):
if '<span style="font-size:12px;">' in item:
print (item [ item.find('<span style="font-size:12px;">' + len('<tag>')) : ])
这是在编译和运行代码时拆分出来的错误。
Traceback (most recent call last):
File "Z:/hltv.py", line 10, in <module>
myhtml = urlopen(sock).read()
File "C:\Python34\lib\urllib\request.py", line 153, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 453, in open
req = meth(req)
File "C:\Python34\lib\urllib\request.py", line 1104, in do_request_
raise TypeError(msg)
TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str.
我是python的新手,所以请尽量让修复尽可能简单谢谢。 (目前使用的是python 3.x)
答案 0 :(得分:1)
我使用Python 2重写它
请注意最后一行的括号!
应该是
item.find(TAG) + len(TAG)
不是
item.find(TAG + len(TAG))
在你的代码中!
# -*- coding: utf-8-*-
import urllib2
req = urllib2.Request("http://www.hltv.org/match/2294502-clg-liquid-esea-invite-season-18-na")
req.add_header('User-Agent', 'Mozilla/5.0')
response = urllib2.urlopen(req)
the_page = response.read()
TAG = '<span style="font-size:12px;">'
for item in the_page.split("</span>"):
if TAG in item:
print (item [ item.find(TAG) + len(TAG) : ])
hazed
ptr
FNS
tarik
reltuC
nitr0
adreN
FugLy
NAF-FLY
daps
BeautifualSoup更适合HTML内容的复杂查询。
答案 1 :(得分:1)
这不是解析html
的正确方法。您可以根据具体要求使用标准库,例如BeautifulSoup或lxml。 BeautifulSoup有各种API来选择标签等。
对于Eg:
import urllib2
from bs4 import BeautifulSoup
req = urllib2.Request("http://www.hltv.org/match/2294502-clg-liquid-esea-invite-season-18-na")
req.add_header('User-Agent', 'Mozilla/5.0')
response = urllib2.urlopen(req)
the_page = response.read()
soup = soup = BeautifulSoup(the_page)
#To select all the span tags
span_tags = soup.find_all("span")
#To get the player names
player_names = soup.find_all("span" ,attrs={"style":"font-size:12px;"})