使用Python"从一个谷歌云存储区域到另一个谷歌云存储区域的存储传输作业​​无法重写对象。"

时间:2018-05-17 08:59:37

标签: python google-cloud-storage

我正在尝试运行存储传输作业​​,将文件从起始位置存储区复制到目标位置的存储区。

以下代码无法将启动存储桶文件重写到目标存储区。

import datetime
import sys
import time

import googleapiclient.discovery
from google.cloud import bigquery, storage
import json
import pytz


PROJECT_ID = 'myproject_id'  # TODO: set this to your project name

FROM_BUCKET = 'source_bucket'  

TO_BUCKET = 'destination_bucket'  
# Construct API clients.
bq_client = bigquery.Client(project=PROJECT_ID)
transfer_client = googleapiclient.discovery.build('storagetransfer', 'v1')

def transfer_buckets():
# Transfer files from one region to another using storage transfer service.
print('Transferring bucket {} to {}'.format(FROM_BUCKET, TO_BUCKET))
now = datetime.datetime.now(pytz.utc)
transfer_job = {
    'description': '{}-{}-{}_once'.format(
        PROJECT_ID, FROM_BUCKET, TO_BUCKET),
    'status': 'ENABLED',
    'projectId': PROJECT_ID,
    'transferSpec': {
        'transferOptions': {
            'overwriteObjectsAlreadyExistingInSink': True,
        },
        'gcsDataSource': {
            'bucketName': FROM_BUCKET,
        },
        'gcsDataSink': {
            'bucketName': TO_BUCKET,
        },
    },
    # Set start and end date to today (UTC) without a time part to start
    # the job immediately.
    'schedule': {
        'scheduleStartDate': {
            'year': now.year,
            'month': now.month,
            'day': now.day,
        },
        'scheduleEndDate': {
            'year': now.year,
            'month': now.month,
            'day': now.day,
        },
    },
}
transfer_job = transfer_client.transferJobs().create(
    body=transfer_job).execute()
print('Returned transferJob: {}'.format(
    json.dumps(transfer_job, indent=4)))

# Find the operation created for the job.
job_filter = {
    'project_id': PROJECT_ID,
    'job_names': [transfer_job['name']],
}

# Wait until the operation has started.
response = {}
while ('operations' not in response) or (not response['operations']):
    time.sleep(1)
    response = transfer_client.transferOperations().list(
        name='transferOperations', filter=json.dumps(job_filter)).execute()

operation = response['operations'][0]
print('Returned transferOperation: {}'.format(
    json.dumps(operation, indent=4)))

# Wait for the transfer to complete.
print('Waiting ', end='')
while operation['metadata']['status'] == 'IN_PROGRESS':
    print('.', end='')
    sys.stdout.flush()
    time.sleep(5)
    operation = transfer_client.transferOperations().get(
        name=operation['name']).execute()
print()

print('Finished transferOperation: {}'.format(
    json.dumps(operation, indent=4)))

我用以下方式拨打电话:

transfer_buckets()

错误说明:

Finished transferOperation: {
"name": "transferOperations/transferJobs-11674570151123793778-1526499677451343",
"response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
},
"metadata": {
    "transferJobName": "transferJobs/11674570151123793778",
    "status": "FAILED",
    "name": "transferOperations/transferJobs-11674570151123793778-1526499677451343",
    "startTime": "2018-05-16T19:41:17.495502150Z",
    "errorBreakdowns": [
        {
            "errorCode": "PERMISSION_DENIED",
            "errorCount": "4",
            "errorLogEntries": [
                {
                    "url": "gs://source_bucket/file1",
                    "errorDetails": [
                        "Failed to rewrite object."
                    ]
                },

    ...   

用于传输操作的基础服务帐户被授予对具有对象列表/创建/删除的现有存储桶的读写访问权。

1 个答案:

答案 0 :(得分:1)

出现此错误是因为两个存储桶位于不同的位置。

尝试通过直接在Google存储中移动文件来复制操作,我遇到了以下错误。

You must use gsutil for this move. This is because the destination uses a different storage class or location from the source.

解决这个问题的方法是在python脚本中调用gsutil。

import os

os.system('gsutil mv gs://source_bucket/file1 gs://destination_bucket')

os.system('gsutil rm -r gs://source_bucket/file1')