嗨我想通过python xpath从xxx网站获取数据......它只是给我空白数据..我从chrome复制了xpath ..请让我知道我在这里做错了什么。谢谢你
from lxml import html,etree
import requests
import urllib2
def webText(url):
import urllib2
response = urllib2.urlopen(url)
html = response.read()
return html
x=webText("http://www.sportscardforum.com/ttm.php?s=3161e010cc6e6fd80ddb2e6b18ab2c5d&do=listp&pl=13450&sp=4");
f = open("foo.html", "w");
f.write(x)
f.close()
R=open("foo.html").read().strip()
tree =etree.HTML(R)
x = tree.xpath('//*[@id="vbulletin_html"]/body/div[2]/table/tbody/tr/td[3]/table[2]/tbody/tr[1]/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr/td[1]')
print x
答案 0 :(得分:0)
您可以使用以下xpath:
//b[contains(text(),'Address:')]/parent::td[1]/following-sibling::td[1]
答案 1 :(得分:0)
无需先将html写入文件,然后再次读取该文件。以下示例获取您之后的数据,
import requests
import urllib2
from lxml import html, etree
def webText(url):
response = urllib2.urlopen(url)
html = response.read()
return html
data = webText("http://www.sportscardforum.com/ttm.php?s=3161e010cc6e6fd80ddb2e6b18ab2c5d&do=listp&pl=13450&sp=4");
tree = etree.HTML(data)
info = tree.xpath("//b[contains(text(),'Address:')]/ancestor::tr[1]//table//td[1]/text()")
for i in info:
print i