我只想从S3存储桶中的特定路径文件夹(或路径前缀)中获取文件。但是下面的代码递归地在路径中搜索文件,并从子文件夹提供文件。
s3_resource = boto3.resource(
's3',
aws_access_key_id='xx',
aws_secret_access_key='xx'
)
my_bucket = s3_resource.Bucket('test')
for s3_object in my_bucket.objects.filter(Prefix='\test\folder_with_files'):
path, filename = os.path.split(s3_object.key)
keyslist.append(s3_object.key)
bucket, key = s3_object.key.split('/',2)[-1].split('/',1)
#I am trying to get the last slice from the output path prefix as filename above
答案 0 :(得分:0)
s3sr = resource('s3')
bucket = 'your-bucket'
prefix = 'your-prefix/' # if no prefix, pass ''
def get_keys_from_prefix(bucket, prefix):
'''gets list of keys and dates for given bucket and prefix'''
keys_list = []
paginator = s3sr.meta.client.get_paginator('list_objects_v2')
# use Delimiter to limit search to that level of hierarchy
for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
keys = [content['Key'] for content in page.get('Contents')]
print('keys in page: ', len(keys))
keys_list.extend(keys)
return keys_list
keys_list = get_keys_from_prefix(bucket, prefix)