我正在使用python 2.7(现在无法更改)和google.cloud.bigquery的Google python客户端库v0.28,而压缩=“GZIP”或“NONE”参数/设置不似乎对我有用,其他人可以尝试一下,让我知道它是否适用于他们?
在下面的代码中你可以看到我一直在玩这个,但每次在GCS上我的文件看起来都是非压缩的,无论我用什么压缩。
注意:我的导入用于更大的代码集,而不是此代码段所需的全部代码
from pandas.io import gbq
import google.auth
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
from google.cloud.bigquery import LoadJobConfig
from google.cloud.bigquery import Table
import json
import re
from google.cloud import storage
bigquery_client = bigquery.Client(project=project)
dataset_ref = bigquery_client.dataset(dataset_name)
table_ref = dataset_ref.table(table_name)
job_id_prefix = "bqTools_export_job"
job_config = bigquery.LoadJobConfig()
# default is ","
if field_delimiter:
job_config.field_delimiter = field_delimiter
# default is true
if print_header:
job_config.print_header = print_header
# CSV, NEWLINE_DELIMITED_JSON, or AVRO
if destination_format:
job_config.destination_format = destination_format
# GZIP or NONE
if compression:
job_config.compression = compression
job_config.Compression = "GZIP"
job_config.compression = "GZIP"
job = bigquery_client.extract_table(table_ref, destination, job_config=job_config, job_id_prefix=job_id_prefix)
# job.begin()
job.result() # Wait for job to complete
returnMsg = 'Exported {}:{} to {}'.format(dataset_name, table_name, destination)
相关链接:
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression
我确定我做了些蠢事,谢谢你的帮助...... Rich
以下编辑
为了分享,我认为我们的最终代码将是...... Rich
# export a table from bq into a file on gcs,
# the destination should look like the following, with no brackets {}
# gs://{bucket-name-here}/{file-name-here}
def export_data_to_gcs(dataset_name, table_name, destination,
field_delimiter=",", print_header=None,
destination_format="CSV", compression="GZIP", project=None):
try:
bigquery_client = bigquery.Client(project=project)
dataset_ref = bigquery_client.dataset(dataset_name)
table_ref = dataset_ref.table(table_name)
job_id_prefix = "bqTools_export_job"
job_config = bigquery.ExtractJobConfig()
# default is ","
if field_delimiter:
job_config.field_delimiter = field_delimiter
# default is true
if print_header:
job_config.print_header = print_header
# CSV, NEWLINE_DELIMITED_JSON, or AVRO
if destination_format:
job_config.destination_format = destination_format
# GZIP or NONE
if compression:
job_config.compression = compression
# if it should be compressed, make sure there is a .gz on the filename, add if needed
if compression == "GZIP":
if destination.lower()[-3:] != ".gz":
destination = str(destination) + ".gz"
job = bigquery_client.extract_table(table_ref, destination, job_config=job_config, job_id_prefix=job_id_prefix)
# job.begin()
job.result() # Wait for job to complete
returnMsg = 'Exported {}:{} to {}'.format(dataset_name, table_name, destination)
return returnMsg
except Exception as e:
errorStr = 'ERROR (export_data_to_gcs): ' + str(e)
print(errorStr)
raise
答案 0 :(得分:1)
对于表格提取,您应该使用ExtractJobConfig