如何使用GoogleDrive的Python快速入门循环遍历nextPageToken

时间:2017-03-23 15:15:15

标签: python loops google-drive-api

我的目标是列出所有项目&每个人的Google云端硬盘中的文件夹。我开始尝试确保脚本可以自己运行。我已阅读封面至封面Drive REST API文档,并最终找到了此代码,也可以找到here

from __future__ import print_function
import httplib2
import os
import sys

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage

reload(sys)
sys.setdefaultencoding('utf-8')

try:
    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'


def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
    """
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    credential_path = os.path.join(credential_dir,
                                   'drive-python-quickstart.json')

    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials = tools.run(flow, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def main():
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    """
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service = discovery.build('drive', 'v3', http=http)

    results = service.files().list(
        pageSize=1000,fields="nextPageToken, files(mimeType, name)").execute()
    items = results.get('files', [])
    if not items:
        print('No files found.')
    else:
        print('Files:')
        for item in items:
            print('{0} ({1})'.format(item['name'], item['mimeType']))

if __name__ == '__main__':
    main()

我的问题在于nextPageToken,以及如何正确使用它。最大PageSize是1000,所以我必须遍历nextPageToken,从生成的JSON中获取它,将其放回原始循环(第66行?),以获得另外1000个结果。我该怎么做?

4 个答案:

答案 0 :(得分:3)

让我们看一下File:list Method

的google drive api文档

在您的请求字段中,您询问nextPageToken,结果将包含nextPage的标记(如果nextPage存在)。 结果将是这样的:

{
 ...,
 "nextPageToken": "V1*3|0|XXXXXX",
 "files": [
  {
   ...
  },...
  ]
}

您可以提取nextPageToken值,如:

token = results.get('nextPageToken', None)

List方法可以使用字符串参数pageToken

  

用于在下一页继续上一个列表请求的令牌。   这应该设置为前一个'nextPageToken'的值   响应。

只需在下一个请求中设置参数pageToken即可获得下一页结果:

    results = service.files().list(
        pageSize=1000,
        pageToken=token,
        fields="nextPageToken, files(mimeType, name)").execute()
    items = results.get('files', [])

现在您可以轻松制作循环以获得所有结果。

答案 1 :(得分:0)

我将尝试为您演示这个概念,但您将在Python中进行实现。简短的回答是nextPageTokennextPageToken使您能够从下一页检索结果。

enter image description here

当您执行GET请求时,nextPageToken将始终包含在响应中,因此如果您有1000个结果,但您只想显示每页20个,则可以使用nextPageToken获取剩余的980个文件。

Run this URL你会看到类似的内容:

"kind": "drive#fileList",
 "nextPageToken": "V1*3|0|CjkxOHY2aDdROE9JYkJGWUJEaU5Ybm1OVURSemJTcWFMa2lRQlVJSnVxYmI2YkYzMmhnVHozeWkwRnASBxCqqcG4kis",
 "incompleteSearch": false,

此处nextPageToken的值是您用于转到下一页的内容。当您进入下一页并获得更多结果时,将为您生成新的nextPageToken,直到您查看/获取所有结果(980-1000)。

答案 2 :(得分:0)

我遇到了很多麻烦。我没有仔细阅读这个例子,注意到nextPageToken& newStartPageToken不是一回事。

我将功能稍微分开并添加了一个循环。基本上,返回 startPageToken 并循环遍历相同的函数/根据需要调用函数。

from __future__ import print_function
import httplib2
import os

#julian
import time

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage

try:
    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'


def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
    """
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    credential_path = os.path.join(credential_dir,'drive-python-quickstart.json')

    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials = tools.run(flow, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def main():
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    """
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service = discovery.build('drive', 'v3', http=http)

    saved_start_page_token = StartPage_v3(service)
    saved_start_page_token = DetectChanges_v3(service, saved_start_page_token)

    starttime=time.time()
    while True:
        saved_start_page_token = DetectChanges_v3(service, saved_start_page_token)
        time.sleep(10.0 - ((time.time() - starttime) % 10.0))

def StartPage_v3(service):
    response = service.changes().getStartPageToken().execute()
    print('Start token: %s' % response.get('startPageToken'))
    return response.get('startPageToken')

def DetectChanges_v3(service, saved_start_page_token):
    # Begin with our last saved start token for this user or the
    # current token from getStartPageToken()
    page_token = saved_start_page_token;
    while page_token is not None:
        response = service.changes().list(pageToken=page_token, spaces='drive').execute()
        for change in response.get('changes'):
            # Process change
            mimeType = change.get('file').get('mimeType')
            print( 'Change found for: %s' % change)
        if 'newStartPageToken' in response:
            # Last page, save this token for the next polling interval
            saved_start_page_token = response.get('newStartPageToken')
        page_token = response.get('nextPageToken')
    return saved_start_page_token

if __name__ == '__main__':
    main()

答案 3 :(得分:0)

当令牌(用于下一页)不为空时,您必须循环,就像最后的代码一样:

(不要忘记安装 -->

pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

复制并粘贴此代码(不要忘记在最后更改您的路径和您的 googleDrive 文件夹 ID)

from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
            
# If modifying these scopes, delete the file token.pickle.
SCOPES = [
'https://www.googleapis.com/auth/spreadsheets', 
"https://www.googleapis.com/auth/drive.file", 
"https://www.googleapis.com/auth/drive"
]
             

# FOR AUTHENTICATION
def authenticate():
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'YOUR PATH FOR THE CREDENTIALS JSON/credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        
        with open('YOUR PATH /token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    
    service = build('drive', 'v3', credentials=creds)
    return service

    
# LISTS TO TAKE ALL FILES AND IDs FROM SPECIFIC FOLDER
listFilesDrive=[]
line = []
        
# TO TAKE ALL FILES FROM SPECIFIC FOLDER
def listFilesFromGoogleFolder(IDFolder):
    service = authenticate()
    # Call the Drive v3 API
    results = service.files().list( q="'{}' in parents".format(FolderIDFromGDrive),
        fields="nextPageToken, files(id, name)").execute()
    items = results.get('files', [])
    # TAKE TOKEN FROM THE NEXT PAGE (IF THERE IS NO ONE, THIS VALUE WILL BE NULL)
    token = results.get('nextPageToken', None)
    #print('token->>',nextPageToken)

    if not items:
        print('No files found.')
    else:
        print('Files:')
        line = []
        for item in items:
            # TAKE FIRST PAGE IN A LIST ->> "listFilesDrive"
            arquivo = item['name']
            IDarquivo = item['id']
            line.append(arquivo)
            line.append(IDarquivo)
            listFilesDrive.append(line)
            line=[]
            print(u'{0} ({1})'.format(item['name'], item['id']))

    # LOOPING WHILE TOKEN FOR OTHER PAGES IS NOT NULL TOKEN 
    while token!=None:
        service = authenticate()
        results = service.files().list( q="'{}' in parents".format(IDFolder),
            pageToken=token,
            fields="nextPageToken, files(id, name)").execute()
        items = results.get('files', [])
        # TAKE A NEW TOKEN FOR THE NEXT PAGE, IF THERE IS NO, THIS TOKEN WILL BE NULL ("None")
        token = results.get('nextPageToken', None)

        if not items:
            print('No files found.')
        else:
            print('Files:')
            linha = []
            for item in items:
                arquivo = item['name']
                IDarquivo = item['id']
                line.append(arquivo)
                line.append(IDarquivo)
                listFilesDrive.append(line)
                line=[]
                print(u'{0} ({1})'.format(item['name'], item['id']))

    print(len(listFilesDrive))
    print(listFilesDrive)
        
        
        
# put your specific information
if __name__ == '__main__':
    FolderIDFromGDrive='YOUR FOLDER ID'
    listFilesFromGoogleFolder(FolderIDFromGDrive)