我有一个爬虫蜘蛛,我想将其与管道连接,我的爬虫项目是
def parse(self, response):
x = response.xpath("//script[starts-with(.,'window._sharedData')]/text()").extract_first()
json_string = x.strip().split('= ')[1][:-1]
data = json.loads(json_string)
edges = data['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']
for i in edges:
url = 'https://www.instagram.com/p/' + i['node']['shortcode']
video = i['node']['is_video']
date_posted_timestamp = i['node']['taken_at_timestamp']
date_posted_human = datetime.fromtimestamp(date_posted_timestamp).strftime("%d/%m/%Y %H:%M:%S")
like_count = i['node']['edge_media_preview_like']['count'] if "edge_media_preview_like" in i['node'].keys() else ''
comment_count = i['node']['edge_media_to_comment']['count'] if 'owner' in i['node'].keys() else ''
handle = i['node']['owner']['id'] if 'owner' in i['node'].keys() else ''
usernameid = i['node']['owner']['username']
captions = ""
if i['node']['edge_media_to_caption']:
for i2 in i['node']['edge_media_to_caption']['edges']:
captions += i2['node']['text'] + "\n"
if video:
image_url = i['node']['display_url']
else:
image_url = i['node']['thumbnail_resources'][-1]['src']
item = {'handleid': handle,'usernameid': usernameid,'postURL': url, 'isVideo': video, 'date_posted': date_posted_human,
'timestamp': date_posted_timestamp, 'likeCount': like_count, 'commentCount': comment_count, 'image_url': image_url,
'captions': captions[:-1]}
if video:
yield scrapy.Request(get_url(url), callback=self.get_video, meta={'item': item})
else:
item['videoURL'] = ''
yield item
pipelines.py 的内容:
class InstascraperPipeline:
def process_item(self, item, spider):
print("pipeline :" + item, ['handleid'][0])
return item
它给了我这个错误,我不知道从哪里开始
2021-01-11 17:48:45 [scrapy.core.scraper] ERROR: Error processing {'handleid': '40501747559', 'usernameid': 'omnesinfluencers', 'postURL': 'https://www.instagram.com/p/CIk88MzouYm', 'isVi
deo': False, 'date_posted': '09/12/2020 16:41:47', 'timestamp': 1607517707, 'likeCount': 732, 'commentCount': 2, 'image_url': 'https://instagram.fbsb8-1.fna.fbcdn.net/v/t51.2885-15/sh0.08
/e35/s640x640/130284179_224818179237248_5049337129452224360_n.jpg?_nc_ht=instagram.fbsb8-1.fna.fbcdn.net&_nc_cat=108&_nc_ohc=G5LBpPMpPvsAX_C8YfB&tp=1&oh=d42c038193b615d50e11d75edc367217&o
e=6025DA66', 'captions': 'OMNES Influencers’ platform is influencer approved! \nWe take care of our beloved OMNESians. \nSign up now and enjoy being one!', 'videoURL': ''}
Traceback (most recent call last):
File "c:\users\wanna\pycharmprojects\pythonproject\venv\lib\site-packages\twisted\internet\defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "c:\users\wanna\pycharmprojects\pythonproject\venv\lib\site-packages\scrapy\utils\defer.py", line 150, in f
return deferred_from_coro(coro_f(*coro_args, **coro_kwargs))
File "C:\Users\wanna\PycharmProjects\pythonProject\instascraper\instascraper\pipelines.py", line 11, in process_item
print("pipeline :" + item, ['handleid'][0])
TypeError: can only concatenate str (not "dict") to str
2021-01-11 17:48:45 [scrapy.core.engine] INFO: Closing spider (finished)
谁能帮助我我做错了什么?