AWS S3:从ec2到s3的大文件上传失败

时间:2019-05-13 04:58:42

标签: python python-2.7 amazon-s3 amazon-ec2 aws-lambda

我正在尝试将大型文件从ec2实例上传到AWS s3。我正在使用python多部分加载将文件上传到s3。当我获取40gb左右的文件时,它成功执行,但是当我获取70gb以上的文件时,代码在加载20%左右后失败。我在下面附上我的代码和错误消息:

import threading
import boto3
import os
import sys
from boto3.s3.transfer import TransferConfig

s3 = boto3.resource('s3')

local_fs_path=sys.argv[1]
subject_area=sys.argv[2]
py_file=sys.argv[3]
s3_path=sys.argv[4]

BUCKET_NAME = "***********"
def multi_part_upload_with_s3():
    # Multipart upload
    config = TransferConfig(multipart_threshold=1024 * 5, max_concurrency=10,
                            multipart_chunksize=1024 * 5, use_threads=True)
    file_path = os.path.dirname(local_fs_path) + '/' + py_file
    key_path = s3_path + '/' + subject_area + '/' +  py_file
    s3.meta.client.upload_file(file_path, BUCKET_NAME, key_path,
                            ExtraArgs={'ACL': 'public-read'},
                            Config=config,
                            Callback=ProgressPercentage(file_path)
                            )
class ProgressPercentage(object):
    def __init__(self, filename):
        self._filename = filename
        self._size = float(os.path.getsize(filename))
        self._seen_so_far = 0
        self._lock = threading.Lock()
    def __call__(self, bytes_amount):
        # To simplify we'll assume this is hooked up
        # to a single filename.
        with self._lock:
            self._seen_so_far += bytes_amount
            percentage = (self._seen_so_far / self._size) * 100
            sys.stdout.write(
                "\r%s  %s / %s  (%.2f%%)" % (
                    self._filename, self._seen_so_far, self._size,
                    percentage))
            sys.stdout.flush()

if __name__ == '__main__':
   multi_part_upload_with_s3()

我收到的错误消息是:

  /home/*****/etl/******/*******/*******/*********/***.20190510.dat  13080289280 / 88378295325.0  (14.80%)Traceback (most recent call last):
  File "/home/*****/etl/******/*******/*******/*********/multipart_load.py", line 46, in <module>
    multi_part_upload_with_s3()
  File "/home/*****/etl/******/*******/*******/*********/multipart_load.py", line 25, in multi_part_upload_with_s3
    Callback=ProgressPercentage(file_path)
  File "/usr/local/lib/python2.7/dist-packages/boto3/s3/inject.py", line 131, in upload_file
    extra_args=ExtraArgs, callback=Callback)
  File "/usr/local/lib/python2.7/dist-packages/boto3/s3/transfer.py", line 287, in upload_file
    filename, '/'.join([bucket, key]), e))
boto3.exceptions.S3UploadFailedError: Failed to upload /home/*****/etl/******/*******/*******/*********/********.20190510.dat to aws_bucket_name/******/*****/temp_dir/*******.20190510.dat: An error occurred (RequestTimeout) when calling the UploadPart operation (reached max retries: 4): Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.

0 个答案:

没有答案