以下是我试图从中获取数据的网站的代码以及代码,但程序运行时不会向csv输出文件写入任何内容。另外,我从chrome检查器控制台获得了xpath,因此必须正确。
以下是代码:
from scrapy.spiders import CrawlSpider, Rule
from scrapy.selector import HtmlXPathSelector
from insta.items import Insta
class instagram(CrawlSpider):
name = "instagram"
allowed_domains = ["zymanga.com"]
start_urls = ['http://zymanga.com/millionplus/%sf' % page for page in range(1,163)]
def parse_items(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.xpath('//*[@id="username"]/')
items = []
for title in titles:
item = Insta()
item["username"] = title.select("a/text()").extract()
items.append(item)
return(items)