我正试图让一个简单的虚拟工作进入Airflow for BigQuery,但遇到了我认为可能是auth问题,但我不太确定。
我的DAG:
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta
from airflow.contrib.hooks.bigquery_hook import BigQueryHook
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2017, 1, 1),
'email': ['airflow@airflow.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
}
with DAG('my_bq_dag', schedule_interval=timedelta(days=1),
default_args=default_args) as dag:
bq_extract_one_day = BigQueryOperator(
task_id='my_bq_task1',
bql='SELECT 666 as msg',
destination_dataset_table='airflow.msg',
write_disposition='WRITE_TRUNCATE',
bigquery_conn_id='bigquery_default'
)
然后当我尝试测试:
@airflow-server:~/$ airflow test my_bq_dag my_bq_task1 2017-01-01
我明白了:
[2017-03-09 17:06:05,629] {__init__.py:36} INFO - Using executor LocalExecutor
[2017-03-09 17:06:05,735] {driver.py:120} INFO - Generating grammar tables from /usr/lib/python2.7/lib2to3/Grammar.txt
[2017-03-09 17:06:05,764] {driver.py:120} INFO - Generating grammar tables from /usr/lib/python2.7/lib2to3/PatternGrammar.txt
[2017-03-09 17:06:06,091] {models.py:154} INFO - Filling up the DagBag from /home/user/myproject/airflow/dags
[2017-03-09 17:06:06,385] {models.py:1196} INFO -
--------------------------------------------------------------------------------
Starting attempt 1 of 2
--------------------------------------------------------------------------------
[2017-03-09 17:06:06,386] {models.py:1219} INFO - Executing <Task(BigQueryOperator): my_bq_task1> on 2017-01-01 00:00:00
[2017-03-09 17:06:06,396] {bigquery_operator.py:55} INFO - Executing: SELECT 666 as msg
[2017-03-09 17:06:06,425] {discovery.py:810} INFO - URL being requested: POST https://www.googleapis.com/bigquery/v2/projects/myproject/jobs?alt=json
[2017-03-09 17:06:06,425] {client.py:570} INFO - Attempting refresh to obtain initial access_token
[2017-03-09 17:06:06,426] {models.py:1286} ERROR - []
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/airflow/models.py", line 1245, in run
result = task_copy.execute(context=context)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/operators/bigquery_operator.py", line 59, in execute
cursor.run_query(self.bql, self.destination_dataset_table, self.write_disposition, self.allow_large_results, self.udf_config)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/hooks/bigquery_hook.py", line 207, in run_query
return self.run_with_configuration(configuration)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/hooks/bigquery_hook.py", line 437, in run_with_configuration
.insert(projectId=self.project_id, body=job_data) \
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 140, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 722, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 572, in new_request
self._refresh(request_orig)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 842, in _refresh
self._do_refresh_request(http_request)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 869, in _do_refresh_request
body = self._generate_refresh_request_body()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 1549, in _generate_refresh_request_body
assertion = self._generate_assertion()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 1677, in _generate_assertion
private_key, self.private_key_password), payload)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/_openssl_crypt.py", line 117, in from_string
pkey = crypto.load_privatekey(crypto.FILETYPE_PEM, parsed_pem_key)
File "/usr/local/lib/python2.7/dist-packages/OpenSSL/crypto.py", line 2583, in load_privatekey
_raise_current_error()
File "/usr/local/lib/python2.7/dist-packages/OpenSSL/_util.py", line 48, in exception_from_error_queue
raise exception_type(errors)
Error: []
[2017-03-09 17:06:06,428] {models.py:1298} INFO - Marking task as UP_FOR_RETRY
[2017-03-09 17:06:06,428] {models.py:1327} ERROR - []
Traceback (most recent call last):
File "/usr/local/bin/airflow", line 15, in <module>
args.func(args)
File "/usr/local/lib/python2.7/dist-packages/airflow/bin/cli.py", line 352, in test
ti.run(force=True, ignore_dependencies=True, test_mode=True)
File "/usr/local/lib/python2.7/dist-packages/airflow/utils/db.py", line 53, in wrapper
result = func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/airflow/models.py", line 1245, in run
result = task_copy.execute(context=context)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/operators/bigquery_operator.py", line 59, in execute
cursor.run_query(self.bql, self.destination_dataset_table, self.write_disposition, self.allow_large_results, self.udf_config)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/hooks/bigquery_hook.py", line 207, in run_query
return self.run_with_configuration(configuration)
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/hooks/bigquery_hook.py", line 437, in run_with_configuration
.insert(projectId=self.project_id, body=job_data) \
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 140, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 722, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 572, in new_request
self._refresh(request_orig)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 842, in _refresh
self._do_refresh_request(http_request)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 869, in _do_refresh_request
body = self._generate_refresh_request_body()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 1549, in _generate_refresh_request_body
assertion = self._generate_assertion()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 1677, in _generate_assertion
private_key, self.private_key_password), payload)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/_openssl_crypt.py", line 117, in from_string
pkey = crypto.load_privatekey(crypto.FILETYPE_PEM, parsed_pem_key)
File "/usr/local/lib/python2.7/dist-packages/OpenSSL/crypto.py", line 2583, in load_privatekey
_raise_current_error()
File "/usr/local/lib/python2.7/dist-packages/OpenSSL/_util.py", line 48, in exception_from_error_queue
raise exception_type(errors)
OpenSSL.crypto.Error: []
我一直试图在我的bq项目中写一个简单的工作来写一个表。部分使用此帖子作为指南https://medium.com/google-cloud/airflow-for-google-cloud-part-1-d7da9a048aa4#.5qclla82t