我想设计一个包含多个任务的Airflow DAG,并希望它们以该图像Example和甘特图(如this)执行,并在下面列出一些说明:
我已经通过下面的代码创建了所需的DAG
main_task_list = ["T1", "T2", "T3"]
def decide_what_to_do(table_name, **context):
if random.randint(0, 100) > 80:
return tid_prefix_zip_file + table_name
else:
return tid_prefix_do_nothing + table_name
def create_tasks_list(table_name):
tid_call_api = tid_prefix_call_api + table_name
py_op_call_api = DummyOperator(
task_id= tid_call_api
)
tid_branch_operator = tid_prefix_branch + table_name
py_op_new_data_come_in = BranchPythonOperator(
task_id=tid_branch_operator,
python_callable=decide_what_to_do,
op_args=[table_name]
)
tid_zip_file = tid_prefix_zip_file + table_name
ssh_op_zip_file = DummyOperator(
task_id=tid_zip_file
)
tid_upload_blob = tid_prefix_upload + table_name
ssh_op_upload_file = DummyOperator(
task_id=tid_upload_blob
)
tid_update_table_setting = tid_prefix_update_table + table_name
py_update_tables_setting = DummyOperator(
task_id=tid_update_table_setting
)
tid_execute_databricks = tid_prefix_call_databricks + table_name
db_op_execute_notebook = DummyOperator(
task_id=tid_execute_databricks
)
dummy_op_do_nothing = DummyOperator(
task_id= tid_prefix_do_nothing + table_name
)
# branch 1
first_pipeline = [py_op_call_api, py_op_new_data_come_in, ssh_op_zip_file, ssh_op_upload_file, py_update_tables_setting, db_op_execute_notebook]
airflow.utils.helpers.chain(*first_pipeline)
# branch 1
second_pipeline = [py_op_new_data_come_in, dummy_op_do_nothing]
airflow.utils.helpers.chain(*second_pipeline)
tasks_list = [first_pipeline, second_pipeline]
return tasks_list
with DAG(dag_id, default_args = default_args) as dag:
tasks_chain_list = [create_tasks_list(each) for each in main_task_list]
start = DummyOperator(
task_id="start"
)
start >> tasks_chain_list[0][0][0]
for n in range(0, len(tasks_chain_list)-1):
tasks_chain_list[n][0][0] >> tasks_chain_list[n+1][0][0]
但是,如果我想为每个任务链添加更多分支,这些代码将不灵活。 有谁可以帮助我改善代码? 谢谢。