我试图弄清楚为什么我的动态subdag失败了,我尝试了各种方法,例如更改start_date或使用上下文管理器,但是没有用。在用户界面中,我可以看到我的任务,但是在错误日志中,它告诉我找不到任务,并且subdag失败了。
代码如下:
args = {
'owner': 'airflow',
'start_date': airflow.utils.dates.days_ago(2),
}
dag_id = 'airflow_play'
dag = DAG(
dag_id=dag_id,
default_args=args,
schedule_interval=timedelta(seconds=5),
)
def get_files_path(path):
# only files
return [join(path, f) for f in listdir(path) if isfile(join(path, f))]
def process_file(file_path):
logging.info(f' execution of task {file_path}')
with open(file_path, 'r') as fp:
content = fp.read()
logging.info(f'process form file : {file_path} , content: {content}')
# write file content in directory
with open(join(PROCESSED_PATH, f'_{file_path}'), 'w') as fp:
json.dump(content, fp)
def load_subdag(parent_dag_id, child_dag_id):
dag_subdag = DAG(
dag_id='{0}.{1}'.format(parent_dag_id, child_dag_id),
# default_args=args,
start_date=airflow.utils.dates.days_ago(3),
schedule_interval=None,
)
for file_path in get_files_path(SPLIT_FILES_PATH):
PythonOperator(
task_id=f"load_subdag_{hash(file_path)}",
default_args=args,
python_callable=process_file,
op_kwargs={'file_path': file_path},
dag=dag_subdag,
)
return dag_subdag
load_tasks = SubDagOperator(
task_id='load_tasks',
subdag=load_subdag(dag_id, 'load_tasks'),
default_args=args,
dag=dag
)
遇到此错误:
> Executing command: airflow run airflow_play.load_tasks
> load_subdag_1681310256622353627 2019-02-18T16:11:40.342468+00:00
> --local -sd DAGS_FOLDER/airflow_dag_test.py --cfg_path /tmp/tmplg8ozot6
> [2019-02-18 16:14:53,265] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks [2019-02-18 16:14:53,264] {__init__.py:51}
> INFO - Using executor SequentialExecutor
> [2019-02-18 16:14:54,025] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks [2019-02-18 16:14:54,024] {models.py:273} INFO
> - Filling up the DagBag from /app/dags/airflow_dag_test.py
> [2019-02-18 16:14:54,042] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks Traceback (most recent call last):
> [2019-02-18 16:14:54,043] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks File "/usr/local/bin/airflow", line 32, in
> <module>
> [2019-02-18 16:14:54,043] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks args.func(args)
> [2019-02-18 16:14:54,043] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/cli.py", line
> 74, in wrapper
> [2019-02-18 16:14:54,044] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks return f(*args, **kwargs)
> [2019-02-18 16:14:54,044] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks File
> "/usr/local/lib/python3.6/site-packages/airflow/bin/cli.py", line 513,
> in run
> [2019-02-18 16:14:54,044] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks task = dag.get_task(task_id=args.task_id)
> [2019-02-18 16:14:54,044] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks File
> "/usr/local/lib/python3.6/site-packages/airflow/models.py", line 4162,
> in get_task
> [2019-02-18 16:14:54,044] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks raise AirflowException("Task {task_id} not
> found".format(**locals()))
> [2019-02-18 16:14:54,045] {base_task_runner.py:101} INFO - Job 444: Subtask load_tasks airflow.exceptions.AirflowException: Task
> load_subdag_1681310256622353627 not found
> [2019-02-18 16:14:54,316] {logging_mixin.py:95} INFO - [2019-02-18 16:14:54,316] {sequential_executor.py:52} ERROR - Failed to execute
> task Command 'airflow run airflow_play.load_tasks
> load_subdag_1681310256622353627 2019-02-18T16:11:40.342468+00:00
> --local -sd DAGS_FOLDER/airflow_dag_test.py --cfg_path /tmp/tmplg8ozot6' returned non-zero exit status 1..