我是Apache airflow的新手,正在使用DAG。 y代码如下。
在输入json中,我有一个名为“ sports_category”的参数。如果其值为“足球”,则需要运行football_players任务;如果其值为cricket,则需要运行“ cricket_players”任务。
import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2020, 6, 23)
}
dag = DAG('PLAYERS_DETAILS',default_args=default_args,schedule_interval=None,max_active_runs=5)
football_players = DatabricksSubmitRunOperator(
task_id='football_players',
databricks_conn_id='football_players_details',
existing_cluster_id='{{ dag_run.conf.clusterId }}',
libraries= [
{
'jar': {{ jar path }}
}
],
databricks_retry_limit = 3,
spark_jar_task={
'main_class_name': 'football class name1',
'parameters' : [
'json ={{ dag_run.conf.json }}'
]
}
)
cricket_players = DatabricksSubmitRunOperator(
task_id='cricket_players',
databricks_conn_id='cricket_players_details',
existing_cluster_id='{{ dag_run.conf.clusterId }}',
libraries= [
{
'jar': {{ jar path }}
}
],
databricks_retry_limit = 3,
spark_jar_task={
'main_class_name': 'cricket class name2',
'parameters' : [
'json ={{ dag_run.conf.json }}'
]
}
)