在第29行获取语法错误,其中包括以下内容:links = parsed_body.xpath(' // div [contains(@class," b-thumb-128px")] / a / @ href')]。我已经为目标网站编写了有效的xpath,因此我不确定为什么或如何解决此特定错误。
import requests
from lxml import html
from pprint import pprint
from urlparse import urljoin
from thready import threaded
import os
import math
import csv
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'wanpy')
def get_links():
STARTING_URL = 'http://example.com/en/search/?h=3&k=&p=1&sid=wan'
results_per_page = 60
response = requests.get(STARTING_URL)
dive = html.fromstring(response.text)
div = dive.xpath("//div[contains(@class, 'b-tabs-utility')]")[0].text
last_pg = math.ceil(int(div.split()[-2]) / results_per_page)
BASE_URL = 'http://example.com/en/search/?h=3&k=&p=%d&sid=wanboo'
urls = []
for i in xrange(last_pg):
response = requests.get(BASE_URL % i)
parsed_body = html.fromstring(response.text)
links = parsed_body.xpath('//div[contains(@class, "b-thumb-128px")]//a/@href')]
for link in links:
urls.append(link)
threaded(urls, scrape_inventory, num_threads=10)
def scrape_inventory():
with open("data/wan.csv", "w") as f:
fieldnames = ("model", "title", "description", "price", "image","additional_image", "scrape_url")
output = csv.writer(f, delimiter="\t")
output.writerow(fieldnames)
print "scraping %s ..." % url
response = requests.get(url)
parsed_body = html.fromstring(response.text)
name = re.sub(r'\D\W\S', "", parsed_body.xpath("//h1[contains(@class, 'b-ttl-main')]/text()"))
#description = re.sub(r'\D\W\S', "", parsed_body.xpath("//div[contains(@class, 'b-container b-editable')]/text()"))
price = re.sub(r'\D\W\S', "", round(float(parsed_body.xpath("//span[contains(@class, 'b-text-xxlarge b-text-prime')]/text()")) * 2 + 15), 2)
output.writerow([name, price])
if __name__ == '__main__':
get_links()
答案 0 :(得分:0)
相关行末尾的]
未与任何[
对齐。