我创建了一个子目录,其中包含多个任务。
我将subdag插入另一个dag。
当我运行主dag并放大子subdag时,所有任务都在运行,最终它们全部成功。
当我缩小时,回到主dag,subdag任务显示成功,但是几秒钟后,两个任务都被删除。因此,主要任务永远不会去执行下一个任务。
这是子代码的代码:
def insert_from_blob_dag(parent_dag_name, child_dag_name,
start_date):
dag = DAG(
'%s.%s' % (parent_dag_name, child_dag_name),
schedule_interval=None,
start_date=start_date
)
clean_table_for_specified_field_query = Variable.get("clean_table_for_specified_field_template") \
.replace("<tableSchema>", "dataviz") \
.replace("<tableName>", "recofree_daily_recotopbycat") \
.replace("<field>", "recoDate") \
.replace("<tableExtName>", "recofree_daily_recotopbycat_ext")
insert_into_table_query = Variable.get("insert_into_table_template") \
.replace("<tableSchema>", "dataviz") \
.replace("<tableName>", "recofree_daily_recotopbycat") \
.replace("<query>", "select * from dataviz.recofree_daily_recotopbycat_ext where recoDate != 'recoDate';")
drop_ext_table_query = Variable.get("drop_ext_table_template") \
.replace("<tableSchema>", "dataviz") \
.replace("<tableExtName>", "recofree_daily_recotopbycat_ext")
create_ext_table = CreateExtTableOperator(
task_id='create_ext_table',
schema=Variable.get('test_schema'),
table_schema='dataviz',
table_name='recofree_daily_recotopbycat_ext',
blob_location='/free_qtqml/metrics_daily/recoTopByCat/2018-07-04/',
data_source='AzureStorage_reco',
file_format='TextFileFormatSemiColonSeparated',
autocommit=True,
mssql_conn_id='sqldwh-tf1crmdata',
dag=dag)
clean_table_for_specified_field = CustomMssqlOperator(
task_id='clean_table_for_specified_field',
autocommit=True,
mssql_conn_id='sqldwh-tf1crmdata',
sql=clean_table_for_specified_field_query,
dag=dag)
insert_into_table = CustomMssqlOperator(
task_id='insert_into_table',
autocommit=True,
mssql_conn_id='sqldwh-tf1crmdata',
sql=insert_into_table_query,
dag=dag)
drop_ext_table = CustomMssqlOperator(
task_id='drop_ext_table',
autocommit=True,
mssql_conn_id='sqldwh-tf1crmdata',
sql=drop_ext_table_query,
dag=dag)
create_ext_table >> clean_table_for_specified_field >> insert_into_table >> drop_ext_table
return dag
以下是主要dag的代码:
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2018, 8, 1),
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 0,
'retry_delay': timedelta(minutes=5),
}
dag = DAG('mssql_test', default_args=default_args)
test_sql_variable = Variable.get("test_mssql")
check_blobs_sub_dag = SubDagOperator(
subdag=insert_from_blob_dag('mssql_test', "insert_from_blob", datetime(2018, 8, 1)),
task_id='insert_from_blob',
dag=dag,
default_args=default_args,
)
test_shell = BashOperator(
task_id='test_shell',
bash_command='echo "task 1"',
retries=0,
xcom_push=True,
dag=dag)
check_blobs_sub_dag >> test_shell