我的本地目录中有10000个10Mb文件,我正在尝试使用boto3通过顺序上传方法将其上传到Amazon S3中的存储桶。我面临的唯一问题是将大量文件上传到S3需要花费大量时间。我想知道是否有有效的方法(使用多线程或多处理)将文件上传到Amazon S3?
root_path ="/home/shivraj/folder/"
path = root_path+'folder_raw/' # use your path
dest_path = root_path+'folder_parsed/'
backup_path = root_path+'folder_backup/'
def parse_ivn_files():
src_files_list = glob.glob(path + "*.txt.zip") # .log files in the path files
try:
if src_files_list:
for file_ in src_files_list:
df = pd.read_csv(file_,compression="zip",sep="|", header=None)
file = file_.replace(path,'')
file_name = file.replace(".txt.zip",'')
df.columns=["Date","Time","System_Event","Event_Type","Event_sub_type","Latitude","Longitude","Field_1","Field_2","Field_3","Field_4","Event_Number","Event_Description"]
new_df=df['Event_Description'].str.split(',',expand=True)
large_df = pd.concat([df,new_df],axis=1)
large_df.to_csv(dest_path+file_name+".csv",index=False)
s3.meta.client.upload_file(dest_path+file_name+".csv", 's3-bucket-name-here', 'ivn_parsed/'+file_name+".csv")
s3.meta.client.upload_file(path+file_name+".txt.zip", 's3-bucket-name-here', 'ivn_raw_backup/'+file_name+"_bk.txt.zip")
os.rename(path+file_name+".txt.zip", backup_path+file_name+"_bk.txt.zip")
else:
print("No files in the source folder")
except:
raise FileNotFoundError
root_path ="/home/shivraj/folder/"
path = root_path+'folder_raw/' # use your path
dest_path = root_path+'folder_parsed/'
backup_path = root_path+'folder_backup/'
def parse_ivn_files():
src_files_list = glob.glob(path + "*.txt.zip") # .log files in the path files
try:
if src_files_list:
for file_ in src_files_list:
df = pd.read_csv(file_,compression="zip",sep="|", header=None)
file = file_.replace(path,'')
file_name = file.replace(".txt.zip",'')
df.columns=["Date","Time","System_Event","Event_Type","Event_sub_type","Latitude","Longitude","Field_1","Field_2","Field_3","Field_4","Event_Number","Event_Description"]
new_df=df['Event_Description'].str.split(',',expand=True)
large_df = pd.concat([df,new_df],axis=1)
large_df.to_csv(dest_path+file_name+".csv",index=False)
s3.meta.client.upload_file(dest_path+file_name+".csv", 's3-bucket-name-here', 'ivn_parsed/'+file_name+".csv")
s3.meta.client.upload_file(path+file_name+".txt.zip", 's3-bucket-name-here', 'ivn_raw_backup/'+file_name+"_bk.txt.zip")
os.rename(path+file_name+".txt.zip", backup_path+file_name+"_bk.txt.zip")
else:
print("No files in the source folder")
except:
raise FileNotFoundError