你好stackoverflow社区,请帮帮我,我的代码是:{
import scrapy
import pandas as pd
class QuotesSpider(scrapy.Spider):
organization=pd.read_csv("/home/jihane/Téléchargements/odm.csv/organizations.csv")
data = organization.twitter_url.tolist()
def start_requests(self):
urls = data
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
y=dict()
page=response.url
societe= response.url.split("/")[-1]
y["url"]=page
y["name"]=societe
for t, v in zip(response.css("span.ProfileNavlabel::text"),response.css("span.Profile-Nav-value::text")):
t= t.extract()
v= v.extract()
y[t]=v
print(y)
}
我想要使用csv文件列中的列表的部分给出了错误,我还想知道如何将字典转换为数据帧。谢谢你太多了
答案 0 :(得分:0)
这段代码对我有用;具有相同结构的网址列表
import scrapy
class QuotesSpider(scrapy.Spider):
name="popularity"
def start_requests(self):
with open('csvfile', 'rb') as f:
list=[]
for line in f.readlines():
array = line.split(',')
url = array[9]
list.append(url)
list.pop(0)
for url in list:
if url != "":
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
y={}
page=response.url
societe= response.url.split("/")[-1]
y={"url":page,"name":societe} #url and name of the entreprise
for t, v in zip(response.css("span.ProfileNav-label::text"),response.css("span.ProfileNav-value::text")):
t = t.extract() #label
v = v.extract() #valeur label
y[t]=v
yield y