我想将存储在txt文件中的BigQuery查询的结果写入BigQuery表。 我将查询文本作为变量传递给下面的函数,但是得到以下错误:
error_info = method +''+ url)google.cloud.exceptions.BadRequest:400缺少必需参数(POST https://www.googleapis.com/bigquery/v2/projects/myproject/jobs)
我错过了什么?
功能:
from google.cloud import bigquery
import uuid
def async_query(query, dataset_id, dest_table, project_Id):
# configuration json
query_data = {
"configuration": {
"query": {
"query": query,
"defaultDataset": dataset_id,
"allowLargeResults": True,
"destinationTable": {
"projectId": project_Id,
"datasetId": dataset_id,
"tableId": dest_table
},
"createDisposition": 'CREATE_IF_NEEDED',
"writeDisposition": 'WRITE_TRUNCATE'
}
}
}
client = bigquery.Client()
query_job = client.run_async_query(str(uuid.uuid4()), query_data)
query_job.use_legacy_sql = False
query_job.begin()
wait_for_job(query_job)
# Drain the query results by requesting a page at a time.
query_results = query_job.results()
page_token = None
while True:
rows, total_rows, page_token = query_results.fetch_data(
max_results=10,
page_token=page_token)
for row in rows:
print(row)
if not page_token:
break
def wait_for_job(job):
while True:
job.reload() # Refreshes the state via a GET request.
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.errors)
return
time.sleep(1)
答案 0 :(得分:1)
您可以在配置中修复defaultDataset,如下所示
# configuration json
query_data = {
"configuration": {
"query": {
"query": query,
"defaultDataset": {
"projectId": project_Id,
"datasetId": dataset_id
},
"allowLargeResults": True,
"destinationTable": {
"projectId": project_Id,
"datasetId": dataset_id,
"tableId": dest_table
},
"createDisposition": 'CREATE_IF_NEEDED',
"writeDisposition": 'WRITE_TRUNCATE'
}
}
}
注意:“projectId”:project_Id在defaultDataset
中是可选的整个defaultDataset也是可选的,在你的情况下你可以省略它,如
# configuration json
query_data = {
"configuration": {
"query": {
"query": query,
"allowLargeResults": True,
"destinationTable": {
"projectId": project_Id,
"datasetId": dataset_id,
"tableId": dest_table
},
"createDisposition": 'CREATE_IF_NEEDED',
"writeDisposition": 'WRITE_TRUNCATE'
}
}
}
答案 1 :(得分:1)
我已经使用以下解决方案将目标表添加为queryquob中的bigquery.Table
from google.cloud import bigquery
import uuid
def async_query(query, dataset_id, dest_table, project_Id):
client = bigquery.Client()
query_job = client.run_async_query(str(uuid.uuid4()), query)
query_job.use_legacy_sql = False
dataset = bigquery.Dataset(dataset_id, client)
table = bigquery.Table(dest_table, dataset)
query_job.destination = table
query_job.write_disposition = 'WRITE_TRUNCATE'
query_job.begin()
wait_for_job(query_job)
# Drain the query results by requesting a page at a time.
query_results = query_job.results()
page_token = None
while True:
rows, total_rows, page_token = query_results.fetch_data(
max_results=10,
page_token=page_token)
for row in rows:
print(row)
if not page_token:
break
def wait_for_job(job):
while True:
job.reload() # Refreshes the state via a GET request.
答案 2 :(得分:0)
我认为defaultDataset
实际上是一个对象,所以你需要在里面设置datasetId
字段(这可能是触发错误的原因)。
你可以尝试纠正它,看看它是否有帮助?您可以看到complete options for the query
API in the documentation。