我正在将文件并行上传到amazon s3 bukcet
。我的代码完全符合我的需求。出于调试目的,我使用的是from multiprocessing.dummy import Pool
。现在该程序正常运行,我摆脱了.dummy
并且仅使用from multiprocessing import Pool
。但是,在这个小小的更改之后,我的文件上传失败了(跳转到“其他文件上传已取消”)。我不确定这似乎是什么问题。下面发布的是一个代码。我已经删除了大部分不相关的部分。
def multipart_upload(bucketname, keyname, parallel_processes=5):
"""
Parallel multipart upload.
"""
source_size = os.stat(keyname).st_size
mp = bucket.initiate_multipart_upload( keyname )
bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)),
5242880)
chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
def _upload(keyname, offset, part_num,bytes):
try:
print( 'Start uploading part #%d ...' % part_num )
with FileChunkIO(keyname, 'r', offset=offset,
bytes=bytes) as fp:
mp.upload_part_from_file( fp=fp, part_num=part_num )
except Exception as e:
print( 'FAILED uploading part #%d' % part_num )
raise e
else:
print( 'UPLOADED part #%d' % part_num )
if s3_has_uptodate_file(bucketname, keyname):
print ( 'File already present in bucket. Upload Cancelled')
mp.cancel_upload()
else:
print ( 'File Upload begininning with {0} cores'.format( parallel_processes ) )
pool = Pool(processes=parallel_processes)
for i in range( chunk_amount ):
offset = i * bytes_per_chunk
remaining_bytes = source_size - offset
bytes = min( [bytes_per_chunk, remaining_bytes] )
part_num = i + 1
start = timer()
pool.apply_async( _upload, [keyname, offset, part_num, bytes] )
pool.close()
pool.join()
end = timer()
if len( mp.get_all_parts() ) == chunk_amount:
mp.complete_upload()
print ( 'File Upload Complete\nTime taken {0} seconds'.format( end - start ) )
else:
print ( 'File Upload Cancelled' )
mp.cancel_upload()
multipart_upload(default_bucket, 'mybigfile.txt')