我正在尝试将目录及其所有内容上传到Google云端硬盘。我可以在Python中完成这个并且文件上传,但是它一次只有1个文件,每个文件都要求API,而且速度非常慢。我现在正在练习一个小目录,但是当我将来有2000个文件时,它将需要for-ev-er。有没有更快的方法来完成它,可能只有一个请求而不是每个文件的请求?
由于
这是我的主要计划:
# user wants to upload to Google Drive HOME-SYNC
print("4: upload to HOME-SYNC on Google Drive")
# assuming HOME-SYNC is empty, for now first step is copying directory
# structure on local machine to HOME-SYNC
# in the future need to ask if want to backup HOME-SYNC first, and if
# so back it up
# then need to empty it
# specify the start path
start_path = "/home/geoff/HOME-SYNC"
start_path = ff.abs_path_from_user_input(start_path)
print("START PATH")
print(start_path)
# create a directory object with start path
start_directory = Directory(start_path)
# create a google drive service resource
google_service = ff.create_google_token()
# create the directory tree on google drive
# '1YOTDKowprC2Paq95X-MIKSUG_vpuViQw' is the id of HOME-SYNC on
# Google Drive
start_directory.create_google_drive_tree(
'HOME-SYNC',
google_service,
'1YOTDKowprC2Paq95X-MIKSUG_vpuViQw')
print("FINISHED")
这是我的目录类:
class Directory():
def __init__(self, directory_path):
"""Initialize directory"""
self.directory_path = directory_path
#print("__INIT__ DIR PATH=" + self.directory_path)
def create_google_drive_tree(
self,
google_drive_folder="",
google_service=False,
parent_dir_id=''):
"""Creates the same tree in google drive that is in the Directory
object, with 'google_drive_folder' as the ROOT directory
(== Directory obj)"""
# google_drive_folder = name of the current directory
# google_service = Google API resource
# parent_dir_id = id of the parent dir on Google drive
# create the files_and_dirs list in the current directory
files_and_dirs = \
[files_and_dirs for files_and_dirs in listdir(self.directory_path)]
print(files_and_dirs)
# sorts the files and dirs so their alphabetical and files come first
files_and_dirs = \
ff.sort_files_and_dirs(self.directory_path, files_and_dirs)
# loop through files and directories, outputting if its a file or dir
# if its a dir and full_tree==true, make a recursive call by creating
# new Directory instance then listing the contents of that as well
for fd in files_and_dirs:
abs_path = ff.abs_path_from_local_dir(self.directory_path, fd)
if ff.check_file_or_dir(abs_path) == "file":
# its a file
# need to copy the file to Google Drive
file_metadata = {
'name': fd,
'parents': [parent_dir_id]
}
media = MediaFileUpload(abs_path)
file = google_service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
else:
# its a directory
# create the directory in google drive
file_metadata = {
'name': fd,
'mimeType': 'application/vnd.google-apps.folder',
'parents': [parent_dir_id]
}
file = google_service.files().create(body=file_metadata,
fields='id').execute()
# create a new Directory obj with the current Directory
# which is a subdirectory of the current Directory
sub_dir = Directory(abs_path)
# Recursively build tree inside the subdirectory
sub_dir.create_google_drive_tree(
fd,
google_service,
file.get('id'))
我在file_functions.py
中有实用程序功能def abs_path_from_user_input(start_path):
if start_path[:1] == '/':
path_type = "absolute"
else:
path_type = "relative"
if path_type != "absolute":
start_path = realpath(start_path)
return start_path
def abs_path_from_local_dir(directory, content):
abs_path = realpath(join(directory, content))
return abs_path
def sort_files_and_dirs(curr_path, files_and_dirs):
files = []
dirs = []
for file_dir in files_and_dirs:
abs_path = abs_path_from_local_dir(curr_path, file_dir)
if check_file_or_dir(abs_path) == "file":
files.append(file_dir)
else:
dirs.append(file_dir)
files.sort()
dirs.sort()
combined = []
for f in files:
combined.append(f)
for d in dirs:
combined.append(d)
return combined
def check_file_or_dir(path):
if not exists(path):
print("ERROR: PATH IS NOT VALID: " + path)
return False;
else:
if isfile(path):
return "file"
else:
return "dir"
def is_valid_dir(path):
if exists(path):
# the path is a valid path
if not isfile(path):
# its a valid directory
return True
else:
# its a valid file, but we want directories
return False
else:
# the path doesnt exist
return False
def create_google_token():
store = file.Storage('credentials.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store)
# service resource is the connection to google drive
service = build('drive', 'v3', http=creds.authorize(Http()))
return service
答案 0 :(得分:0)
一次一个文件,每个文件都要求API,而且速度非常慢。
上传确实需要时间。
有没有更快的方法可以实现它,可能只需要一个请求而不是每个文件的请求?
没有用于上传文件的批处理方法。您需要像现在一样一次上传一个文件。请记住,有一个配额限制,你只能这么快上传。您可以考虑对此进行多线程处理,并为要上载的每个文件运行脚本版本。但是,我不建议将其视为同一个用户,并且最终会出现配额和泛滥错误。
注意:您可以批量处理元数据,但这实际上无法解决您的问题batching request.