具有从主dag调用不同dag的任务列表。我正在使用TriggerDagrunoperator来完成此任务。但是面临的问题很少。
答案 0 :(得分:1)
在触发外部dag之后运行此任务:
import time
from airflow.models import DagRun
from airflow import AirflowException
from airflow.operators.python_operator import PythonOperator
def get_external_dag_status(dag_id,**kwargs):
dag_id = dag_id
dag_runs = DagRun.find(dag_id=dag_id)
for dag_run in dag_runs:
#print("state = "+dag_run.state)
res1 = dag_run.state
#print(dag_run)
return res1
def check_status(dag_id,**kwargs):
st = get_external_dag_status(dag_id)
while st != 'success':
if st == 'failed':
print(st)
break
time.sleep(300) #optional if need to check for every 5 minutes
st = get_external_dag_status(dag_id)
if st == 'success':
return st
elif st == 'failed':
raise ValueError('Dag Failed')
status_check = PythonOperator(task_id="dag_check",
python_callable=check_status,
op_kwargs={'dag_id':'your external dag id'},
dag=spark_dag
)
答案 1 :(得分:0)
使用TriggerDagrunoperator
触发Dag之后,您可以考虑调用DagSensor
,它将等待Dag完成,然后才触发其他日期。这里是我们如何实现我们的版本的方法(不是很完美,但是确实可以做到):
import logging
from airflow.plugins_manager import AirflowPlugin
from airflow.models import DagRun
from airflow.sensors.base_sensor_operator import BaseSensorOperator
from airflow.utils.db import provide_session
from airflow.utils.decorators import apply_defaults
from airflow.utils.state import State
logger = logging.getLogger('airflow.dag_sensor')
class DagSensor(BaseSensorOperator):
"""
Sensor that check if a Dag is currently running.
It proceeds only if the Dag is in not running.
"""
template_fields = ['external_dag_id']
ui_color = '#FFFFCC'
@apply_defaults
def __init__(self,
external_dag_id,
*args,
**kwargs):
super(DagSensor, self).__init__(*args, **kwargs)
self.external_dag_id = external_dag_id
@provide_session
def poke(self, context, session=None):
dag_run = DagRun
count = session.query(dag_run).filter(
dag_run.dag_id == self.external_dag_id,
dag_run._state.in_([State.RUNNING])
).count()
session.commit()
session.close()
logger.info(f'Dag {self.external_dag_id} in running status: {count}')
if count > 0:
return False
else:
return True
class DagSensorPlugin(AirflowPlugin):
name = 'dag_sensor_plugin'
operators = [DagSensor]
在这里如何称呼它:
from airflow.operators import DagSensor
check_my_dag_completion = DagSensor(
dag=dag,
task_id='check_my_dag_completion',
external_dag_id='my_dag',
poke_interval=30,
timeout=3600
)
这意味着您可以在工作流程中包含以下内容:
call_dag_a >> check_dag_a >> call_dag_b >> check_dag_b