如果我对以下代码的理解是正确的,那么Python正在从我正在抓取的网页的源代码中返回一个列表(数据不存储在任何HTML标记中,因此我不使用XPath或其他任何我想要的数据):
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import Selector
from scrapy.item import Item
from scrapy.spider import BaseSpider
import re
import json
class ExampleSpider(CrawlSpider):
name = "goal4"
allowed_domains = ["whoscored.com"]
start_urls = ["http://www.whoscored.com"]
download_delay = 1
rules = [Rule(SgmlLinkExtractor(allow=(''),deny=('/News', '/Fixtures', '/Graphics', '/Articles', '/Live', '/Matches', '/Explanations', '/Glossary', '/Players', 'ContactUs', 'TermsOfUse'),), follow=False, callback='parse_item')]
def parse_item(self, response):
sel = Selector(response)
match1 = re.search(re.escape("DataStore.prime('stage-player-stat', defaultTeamPlayerStatsConfigParams.defaultParams , ") \
+ '(\[.*\])' + re.escape(");"), response.body)
if match1 is not None:
playerdata1 = match1.group(1)
for player in json.loads(playerdata1):
player['Name'],',',player['FirstName'],',',player['LastName']
我已经弄清楚如何在我正在打印的数据元素之间添加一个逗号,但这是在每个元素和逗号之间打印一个空格,方式如下:
Name , FirstName , LastName
我怎样才能这样打印它:
Name,FirstName,LastName
由于
答案 0 :(得分:3)
简单地player['Name'] + ',' + player['FirstName'] + ',' + player['LastName']
会做你想做的事。这样做的原因是,
连接<space><string>
,其中<space>
是空格,<string>
是您的字符串。
答案 1 :(得分:1)
print("{Name},{FirstName},{LastName}".decode().format(**player)) # gets values of keys specified in format args from player