如何通过自动检测上传的csv文件中的模式,将数据从谷歌云存储上传到大查询并动态创建表???
我使用下面的代码但我需要每次都定义模式。如何获得用于创建表和插入数据的自动模式检测
目前使用的代码:
import numpy as np
import uuid
from gcloud import bigquery
def load_data_from_gcs(dataset_name, table_name, source):
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)
job_name = str(uuid.uuid4())
if table.exists():
table.delete()
table.schema = (
bigquery.SchemaField('ID', 'STRING'),
bigquery.SchemaField('days', 'STRING'),
bigquery.SchemaField('last_activ_date', 'STRING'),
)
table.create()
job_name = str(uuid.uuid4())
job = bigquery_client.load_table_from_storage(
job_name, table, source)
job.begin()
wait_for_job(job)
print('Loaded {} rows into {}:{}.'.format(
job.output_rows, dataset_name, table_name))
def wait_for_job(job):
while True:
job.reload()
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.errors)
return
time.sleep(1)
if __name__ == "__main__":
load_data_from_gcs('my_model','my_output', 'gs://path-
uat/data_project/my_output.csv')