这是我的代码:
from scrapy.spider import Spider
from scrapy.selector import Selector
from thuctapsinhvietnam.items import ThuctapsinhvietnamItem
class ThuctapsinhvietnamSpider(Spider):
name = "thuctapsinhvietnam"
allowed_domains ["thuctapsinhvietnam.com"]
start_urls = [
"http://www.thuctapsinhvietnam.com/thuctap-phuong-vinh-hao-3970.html"
]
def parse(self, response):
def parse(self, response):
cases = Selector(response).xpath('//div[@id="areaThongTinCaNhan"]/table/tbody/tr[2]/td/table/tbody/tr/td/table/tbody/tr')
for case in cases:
item = ThuctapsinhvietnamItem()
item['Name'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Phone'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Email'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Dob'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Sex'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['School'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Introduction'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
item['Place'] = question.xpath(
'//td[3][@class="thongtin"]/text()')extract()[0]
yield item
我尝试在ubuntu中进行scrapy时遇到问题,我使用xpath来获取数据。 我正在尝试来自这个网站的scrapy信息,但这里有错误:
File "thuctapsinhvietnam_spider.py", line 15
cases = Selector(response).xpath('//div[@id="areaThongTinCaNhan"]/table/tbody/tr[2]/td/table/tbody/tr/td/table/tbody/tr')
^
IndentationError: expected an indented block
答案 0 :(得分:0)
你有两个:
def parse(self, response):
def parse(self, response):