我有以下代码,我不知道如何打印下一页的链接,如何进入下一页?
#!/usr/bin/python2.4
# -*- coding: utf-8 -*-
import pprint
from apiclient.discovery import build
def main():
service = build("customsearch", "v1",
developerKey="")
res = service.cse().list(
q='lectures',
cx='013036536707430787589:_pqjad5hr1a',
num=10, #Valid values are integers between 1 and 10, inclusive.
).execute()
for value in res:
#print value
if 'items' in value:
for results in res[value]:
print results['formattedUrl']
if __name__ == '__main__':
main()
答案 0 :(得分:11)
响应对象包含'nextPage'字典。您可以使用它来确定下一个请求的起始索引。像这样:
res = service.cse().list(
q='lectures',
cx='013036536707430787589:_pqjad5hr1a',
num=10, #Valid values are integers between 1 and 10, inclusive.
).execute()
next_response = service.cse().list(
q='lectures',
cx='013036536707430787589:_pqjad5hr1a',
num=10,
start=res['queries']['nextPage'][0]['startIndex'],
).execute()
答案 1 :(得分:5)
我的主张是添加下一个参数。在当前的软件中,你有q,cx和num。您可以尝试添加start = 10然后执行代码。
res = service.cse().list(
q='lectures',
cx='013036536707430787589:_pqjad5hr1a',
num=10,
start=10,
).execute()
第一个结果页面网址没有启动参数。第二页的URL包含start = 10参数。第三页的网址包含start = 20 ...
祝你好运答案 2 :(得分:0)
# define the pages you want to scrap
max_page = 3
def google_search(service, query_keywords, api_key, cse_id):
res = service.cse().list(q=query_keywords, cx=cse_id).execute()
return res
def google_next_page(service, query_keywords, api_key, cse_id, res, page, max_page, url_items):
next_res = service.cse().list(q=query_keywords, cx=cse_id, num=10, start=res['queries']['nextPage'][0]['startIndex'],).execute()
for item in next_res['items']:
url_items.append(item)
page += 1
if page == max_page:
return url_items
return google_next_page(service, query_keywords, api_key, cse_id, next_res, page, max_page, url_items)