为了减少对Sheets API的API调用次数并避免可怕的“错误429”消息,我希望利用Sheets API的“ batchGet”功能。我已将所有相关信息放入一个Google电子表格spreadsheet_id
,其中包含多个工作表ranges
。下一步是将此batchGet请求转换为Pandas Dataframe。
这是我的代码...如果任何人都可以提供后续步骤的指导,将其纳入熊猫df,那就太好了。
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
SCOPES = [ 'https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/spreadsheets']
credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', SCOPES)
service = discovery.build('sheets', 'v4', credentials=credentials)
# The ID of the spreadsheet to retrieve data from.
spreadsheet_id = 'my_spreadheet_id' # TODO: Update placeholder value.
# The A1 notation of the values to retrieve.
ranges = ['2016_IGA!A2:BD', '2017_IGA!A2:BD', '2018_IGA!A2:BD', '2019_IGA!A2:BD', '2020_IGA!A2:BD',
'2016_Coles!A2:BD', '2017_Coles!A2:BD', '2018_Coles!A2:BD', '2019_Coles!A2:BD', '2020_Coles!A2:BD', # TODO: Update placeholder value.
'2016_WW!A2:BD', '2017_WW!A2:BD', '2018_WW!A2:BD', '2019_WW!A2:BD', '2020_WW!A2:BD',
'2018_Aldi!A2:BD', '2019_Aldi!A2:BD', '2020_Aldi!A2:BD']
value_render_option = 'FORMATTED_VALUE'
request = service.spreadsheets().values().batchGet(spreadsheetId=spreadsheet_id, ranges=ranges, valueRenderOption=value_render_option)
response = request.execute()
答案 0 :(得分:1)
您必须从响应中获取values
,然后从结果列表中创建DataFrame
。
sheet_values = response.get('values', [])
# Optional: Perform any data cleaning/wrangling operations (Date/currency conversion)
# Create a dataframe with the extracted values
df_sheet = DataFrame(sheet_values, columns=['A', 'B', 'C'])
答案 1 :(得分:1)
在@juan Morais注释的基础上,我做了一些修改,这是最终的解决方案。
from googleapiclient import discovery
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from pandas.io.json import json_normalize
SCOPES = [ 'https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/spreadsheets']
credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', SCOPES)
service = discovery.build('sheets', 'v4', credentials=credentials)
# The ID of the spreadsheet to retrieve data from.
spreadsheet_id = 'my_spreadheet_id'
# The A1 notation of the values to retrieve.
ranges = ['2016_IGA!A2:Be', '2017_IGA!A2:Be', '2018_IGA!A2:Be', '2019_IGA!A2:Be', '2020_IGA!A2:Be',
'2016_Coles!A2:Be', '2017_Coles!A2:Be', '2018_Coles!A2:Be', '2019_Coles!A2:Be', '2020_Coles!A2:Be', # TODO: Update placeholder value.
'2016_WW!A2:Be', '2017_WW!A2:Be', '2018_WW!A2:Be', '2019_WW!A2:Be', '2020_WW!A2:Be',
'2018_Aldi!A2:Be', '2019_Aldi!A2:Be', '2020_Aldi!A2:Be']
value_render_option = 'FORMATTED_VALUE'
request = service.spreadsheets().values().batchGet(spreadsheetId=spreadsheet_id, ranges=ranges, valueRenderOption=value_render_option,majorDimension='ROWS')
response = request.execute()
sheet_values = response.get('valueRanges', [])
df = json_normalize(sheet_values, sep = ",",record_path='values')