使用Python从Google云端存储加载数据 - 未指定架构RuntimeError

时间:2017-06-21 07:54:04

标签: python google-bigquery google-cloud-storage

我正在尝试编写一个函数,将我在Google云端存储上的JSON文件加载到BigQuery数据集中,但是,即使我明确地传递了架构,它仍然会说"在作业或表格上没有指定架构& #34;

import oauth2client
import uuid
import time
from google.cloud import bigquery as bq
# from oauth2client.client import GoogleCredentials

# Configuration
BILLING_PROJECT_ID = ---
DATASET_NAME = ---
TABLE_NAME = ---
BUCKET_NAME = ---
FILE = ---
SOURCE = 'gs://{}/{}'.format(BUCKET_NAME, FILE)

SCHEMA = [
    bq.SchemaField('question_id', 'INTEGER'),
    bq.SchemaField('accepted_answer', 'INTEGER'),
    bq.SchemaField('answer_count', 'INTEGER')
]

# CREDENTIALS = GoogleCredentials.get_application_efault()

client = bq.Client(project=BILLING_PROJECT_ID)


# Dataset
# Check if the dataset exists
def create_datasets(name):
    dataset = client.dataset(name)
    try:
        assert not dataset.exists()
        dataset.create()
        assert dataset.exists()
        print("Dataset {} created".format(name))
    except(AssertionError):
        pass


def load_data_from_gcs(dataset_name, table_name, source, schema):
    '''
    Load Data from Google Cloud Storage
    '''
    dataset = client.dataset(dataset_name)
    table = dataset.table(table_name)
    table.schema = schema
    job_name = str(uuid.uuid4())
    job = client.load_table_from_storage(
        job_name, table, source)
    job.source_format = 'NEWLINE_DELIMITED_JSON'

    job.begin()
    wait_for_job(job)

    print('Loaded {} rows into {}:{}.'.format(
        job.output_rows, dataset_name, table_name))


def wait_for_job(job):
    while True:
        job.reload()
        if job.state == 'DONE':
            if job.error_result:
                raise RuntimeError(job.errors)
            return
        time.sleep(1)


load_data_from_gcs(dataset_name=DATASET_NAME,
                   table_name=TABLE_NAME,
                   source=SOURCE,
                   schema=SCHEMA)

1 个答案:

答案 0 :(得分:0)

我已经解决了这个问题,在这种情况下,我忘了在开始工作之前调用 table.create()