我正在从python 2.7的1.9.0升级到Airflow 1.10.2,我遇到了airflow/contrib/operators/bigquery_operator.py
的麻烦,更确切地说,是弃用了bql
参数,而赞成{ {1}}
我有一个基于sql
BigQueryOperator
BigQueryToPartitionTableOperator -> BigQueryFromExternalSqlOperator -> BigQueryOperator
我的DAG之一正在使用class BigQueryFromExternalSqlOperator(BigQueryOperator):
template_fields = BigQueryOperator.template_fields + ('get_sql_kwargs',)
def __init__(self, get_sql_func, get_sql_kwargs={}, *args, **kwargs):
super(BigQueryFromExternalSqlOperator, self).__init__(bql='', #/!\ problematic parameter
*args,
**kwargs)
self.get_sql_func = get_sql_func
self.get_sql_kwargs = get_sql_kwargs
def get_sql(self):
return self.get_sql_func(**self.get_sql_kwargs)
def pre_execute(self, context):
self.bql = self.get_sql()
class BigQueryToPartitionTableOperator(BigQueryFromExternalSqlOperator):
template_fields = ('get_schema_kwargs',) + BigQueryFromExternalSqlOperator.template_fields
template_ext = ('_.sql',)
def __init__(self, get_schema_func, get_schema_kwargs={}, *args, **kwargs):
super(BigQueryToPartitionTableOperator, self).__init__(*args, **kwargs)
self.hook = BigQueryTableHook(bigquery_conn_id=self.bigquery_conn_id,
delegate_to=self.delegate_to)
self.get_schema_func = get_schema_func
self.get_schema_kwargs = get_schema_kwargs
self.schema = None
。
当我执行BigQueryToPartitionTableOperator
来获取可解析的代码时,这就是我得到的
airflow list_dags
当我检查BigQueryOpertor的代码时,下面Traceback (most recent call last):
File "/usr/local/lib/airflow/airflow/models.py", line 374, in process_file
m = imp.load_source(mod_name, filepath)
File "/home/airflow/gcs/dags/processing/dags/learning/clustering_activity/dag.py", line 37, in <module>
"period": Variable.get("activity_clustering.period")
File "/home/airflow/gcs/dags/processing/common/dags/inference_dag.py", line 215, in __enter__
dataset_partitioned=self.dataset,
File "/home/airflow/gcs/dags/processing/common/operators/big_query_operator.py", line 79, in __init__
super(BigQueryShardedToPartitionedOperator, self).__init__(bql=None, *args, **kwargs)
File "/usr/local/lib/airflow/airflow/utils/decorators.py", line 97, in wrapper
result = func(*args, **kwargs)
File "/usr/local/lib/airflow/airflow/contrib/operators/bigquery_operator.py", line 176, in __init__
'argument: `sql`'.format(self.task_id))
TypeError: inferred_to_partitioned missing 1 required positional argument: `sql`
中最相关的部分,__init__
受到了测试
self.sql
尽管我在@apply_defaults
def __init__(sql=None, bql=None, ...):
...
self.sql = sql if sql else bql # /!\ how self.sql is set
...
# TODO remove `bql` in Airflow 2.0
if self.bql:
import warnings
warnings.warn('Deprecated parameter `bql` used in Task id: {}. '
'Use `sql` parameter instead to pass the sql to be '
'executed. `bql` parameter is deprecated and '
'will be removed in a future version of '
'Airflow.'.format(self.task_id),
category=DeprecationWarning)
if self.sql is None:
raise TypeError('{} missing 1 required positional '
'argument: `sql`'.format(self.task_id))
中为bql
,bql=''
设置了默认值,但仍然遇到与上面相同的异常。
我不知道这在实例化对象时是否与python中的继承和默认args有关。
或者也许decorators.py中的BigQueryFromExternalSqlOperator
装饰器正在更改传递到apply_defaults
的{{1}}函数的参数。
编辑1: 这是我叫接线员的方式
BigQueryOperator
答案 0 :(得分:1)
感谢您添加摘要。如果我正确理解这一点,那么您就不会传递sql
参数,说明正在抱怨错误消息TypeError: inferred_to_partitioned missing 1 required positional argument: sql
尝试以这种方式修复它:
sql
属性传递给您的父BigQueryOpertor,该属性不为空,仅用于调试class BigQueryFromExternalSqlOperator(BigQueryOperator):
template_fields = BigQueryOperator.template_fields + ('get_sql_kwargs',)
def __init__(self, get_sql_func, get_sql_kwargs={}, *args, **kwargs):
super(BigQueryFromExternalSqlOperator, self).__init__(sql = 'SELECT ....',
*args,
**kwargs)
BigQueryOperator
sql参数,或者,如果您不想将查询执行委托给它,覆盖执行它的方法。但是,如果您不需要执行BigQueryOperator
,则摆脱此父级将更容易。