我正在使用Google API v4从Google Analytics(分析)下载一些数据。我正在获取数据,并且在超出pageSize时尝试使用pageToken
参数来请求下一页。然而,我的分页功能,这应通过新的pageToken
到一个新的请求,进入一个循环,其中它执行无休止相同,第一请求(考虑到这行:print(response['reports'][0]['nextPageToken'])
打印总是的最大值pagesize,即nextPageToken
在第一个请求中所取的值)。
查询应产生约8000个结果/行。
我试图什么是建立在该请求的pageToken
参数可变,使该变量取nextPageToken
的值在由递归函数提出了新的要求:
pageTokenVariable = "whatever"
sample_request = {
'viewId': '1234',
'dateRanges': {
'startDate': datetime.strftime(datetime.now() - timedelta(days = 1),'%Y-%m-%d'),
'endDate': datetime.strftime(datetime.now(),'%Y-%m-%d')
},
'dimensions': [
{'name': 'ga:date'},
{'name': 'ga:eventlabel'}
],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:totalevents'}
],
'pageToken':pageTokenVariable,
'pageSize': 1000
}
# pagination function
def main(client, pageTokenVariable):
response = client.reports().batchGet(
body={
'reportRequests':sample_request
}).execute()
if 'nextPageToken' in response['reports'][0]:
print(response['reports'][0]['nextPageToken']) #trying to debug
pageTokenVariable = response['reports'][0]['nextPageToken']
response = main(client, pageTokenVariable)
return(response)
尽管如此,它无法按预期工作。我想念什么?
答案 0 :(得分:0)
我不知道这是否是一个可能的答案,但你可以考虑取消pageSize
并添加max-results
parameter?
此选项最多可查询10.000个元素,如果超过10.000个元素,则可以使用start-index
option以10.000、20.000等开头。
您总是可以知道总共有多少个结果,因为答案中的一个字段包含此信息。
答案 1 :(得分:0)
您需要执行以下操作
### Something like this works for me
list = [] #I usually store the output of the pagination in a list
# pagination function
def main(client, pageTokenVariable):
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': '123',
"pageToken": pageTokenVariable,
#All your other stuff like dates etc goes here
}]
}
).execute()
response = main(client, "0")
for report in response.get(reports, []) #All the stuff you want to do
pagetoken = report.get('nextPageToken', None) #Get your page token fron the FIRST request and store it a variabe
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
# create dict for each row
dict = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
# fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimensionHeaders, dimensions):
dict[header] = dimension
# fill dict with metric header (key) and metric value (value)
for i, values in enumerate(dateRangeValues):
for metric, value in zip(metricHeaders, values.get('values')):
#set int as int, float a float
if ',' in value or ',' in value:
dict[metric.get('name')] = float(value)
else:
dict[metric.get('name')] = int(value)
list.append(dict) #Append that data to a list as a dictionary
while pagetoken: #This says while there is info in the nextPageToken get the data, process it and add to the list
response = main(client, pagetoken)
for row in rows:
# create dict for each row
dict = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
# fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimensionHeaders, dimensions):
dict[header] = dimension
# fill dict with metric header (key) and metric value (value)
for i, values in enumerate(dateRangeValues):
for metric, value in zip(metricHeaders, values.get('values')):
#set int as int, float a float
if ',' in value or ',' in value:
dict[metric.get('name')] = float(value)
else:
dict[metric.get('name')] = int(value)
list.append(dict) #Append that data to a list as a dictionary
#So to recap
#You make an initial call to your function passing a pagetoken to get it started.
#Get the nextPageToken), process the data and append to list
#If there is data in the nextPageToken call the function, process, add to list until nextPageToken is empty
答案 2 :(得分:0)
**这是工作12个小时后的最终代码**
"""Author :AMARNADH G(INDIA)
Date last modified :2020-12-12
Description :Pulls Google Anlytics data with pagination and unsampled data
Comments :Dimentions, Metrics and DateRanges are dynamic in nature in which daterange is parameterised"""
###GOOGLE ANALYICS V4
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime, timedelta
import io
todayStr = datetime.today().strftime('%Y-%m-%d')
YstrdyInt = datetime.today() - timedelta(days=1)
YstrdyStr = datetime.strftime(YstrdyInt, '%Y-%m-%d')
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'C:\\Users\Amarnadh\Desktop\Python\Secret.json'
VIEW_ID = 'XXXX'
PAGESIZE = 100000
def initialize_analyticsreporting():
credentials = \
ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION,
SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4',
credentials=credentials)
return analytics
def get_PT(response):
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader',
{}).get('metricHeaderEntries', [])
pageToken = report.get('nextPageToken', None)
print str(pageToken) + ' at 43'
return pageToken
def get_report(analytics, pageToken='unknown'):
return analytics.reports().batchGet(body={'reportRequests': [{
'viewId': VIEW_ID,
'pageSize': PAGESIZE,
'samplingLevel': 'LARGE',
'pageToken': pageToken,
'dateRanges': [{'startDate': '2020-10-11',
'endDate': '2020-12-11'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [
{'name': 'ga:longitude'},
{'name': 'ga:latitude'},
{'name': 'ga:country'},
{'name': 'ga:region'},
{'name': 'ga:date'},
{'name': 'ga:pagePath'},
],
}]}).execute()
def print_response(response):
f = io.open('Essex_GA_Geo' + todayStr + '.txt', 'a+',
encoding='utf-8')
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader',
{}).get('metricHeaderEntries', [])
# pageToken=report.get('nextPageToken', None)
# rint(pageToken)
print columnHeader
# writing dimention header
for D_header in dimensionHeaders:
f.write(str.capitalize(str.replace(D_header, 'ga:', ''))
+ '|')
# print(D_header)
for M_header in list(columnHeader['metricHeader'
]['metricHeaderEntries']):
f.write(str.capitalize(str.replace(M_header['name'], 'ga:',
'')) + '|')
f.write('\n')
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
Metrics = row.get('metrics', [])
# writing dimention header row data
for dimension in dimensions:
f.write(dimension + '|')
# writing metric header
for (i, values) in enumerate(Metrics):
for (metricHeader, value) in zip(metricHeaders,
values.get('values')):
f.write(value + '|')
f.write('\n')
f.close()
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
pageToken = get_PT(response)
print str(pageToken) + ' at 108'
print_response(response)
while pageToken:
print 'inside while ' + str(pageToken)
analytics = initialize_analyticsreporting()
response = get_report(analytics, pageToken)
pageToken = get_PT(response)
print_response(response)
print str(pageToken) + ' at 118'
if __name__ == '__main__':
main()