如何解决气流中的“超时”错误

时间:2019-04-22 14:59:05

标签: error-handling timeout airflow

我有一个执行3任务操作的新DAG,DAG运行正常,但有时我会在顶部显示红色“超时”错误消息。我现在知道为什么会这样。

有人知道是什么原因吗?

这是我的代码(出于谨慎考虑,我更改了一些参数):

from airflow import DAG
from airflow.operators.mysql_operator import MySqlOperator
from datetime import datetime
from airflow.operators.sensors import NamedHivePartitionSensor
from airflow.hooks.presto_hook import PrestoHook
import sys
import os
import logging
sys.path.append(os.environ['SSSSSS'] + '/WWW/WWWWW')
from utils import sql_to_string, parse_exec_to_time, parse_exec_to_date, NewPrestoOperator
from config import emails
from NotifyOperator import NotifyOperator

########################################################################
# Parameters to be set


default_args = {
    'owner': 'etl',
    'start_date': datetime(2019, 04, 15, 0, 0),
    'depends_on_past': True,
    'wait_for_downstream': True,
    'email': data_team_emails,
    'email_on_failure': True,
    'email_on_retry': False
}

dag = DAG(dag_id='g13-new_lead_form_alert',
          default_args=default_args,
          max_active_runs=1,
          schedule_interval='0 * * * *')


def _get_records_pandas(query):
    start_time = datetime.now()
    logging.log(logging.INFO, "Extract Query={}".format(query))
    records = PrestoHook(presto_conn_id='{0}-new'.format(os.environ['YYYYY'])).get_pandas_df(query)
    logging.log(logging.INFO, "Extract completed. it took:{}".format(str(datetime.now() - start_time)))
    return records


SELECT_ALL_QUERY = 'select title, pageloadid from mysql.{0}.agg_pageloadid_lead_form'.format(os.environ['DDDDDD'])

t0 = NamedHivePartitionSensor(task_id='g13-00-wait_for_partition',
                              partition_names=['{2}.table/dt={0}/tm={1}/'.format(
                                  '{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%Y-%m-%d\')}}',
                                  '{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%H\')}}',
                                  os.environ['XXXXX'])],
                              metastore_conn_id='RRRRRR',
                              dag=dag,
                              soft_fail=True,
                              pool='sensor_tasks',
                              retries=5
                              )

t1 = MySqlOperator(
    task_id='g13-01-truncate',
    sql='''
    truncate table {0}.agg_pageloaduid_lead_form
    '''.format(os.environ['LLLLL']),
    mysql_conn_id='AAAA',
    dag=dag)


t2 = NewPrestoOperator(
    task_id="g13-02-insert_new_lead",
    sql=sql_to_string("/g13_insert_new_lead.sql").format(
        os.environ['YYYYY'],
        '{{execution_date.strftime(\'%Y-%m-%d\')}}',
        '{{execution_date.strftime(\'%H\')}}',
        os.environ['ETL_ENVIRONMENT']),
    presto_conn_id='{0}-new'.format(os.environ['XXXXX']),
    provide_context=True,
    fail_on_zero_rows=False,
    retries=5,
    retry_delay=60,
    pool='presto_tasks',
    dag=dag
)


t3 = NotifyOperator(
    task_id='g13-03-notification',
    channels=['test'],
    email_recipients=[],
    email_subject='New Lead Alert',
    email_template="""abc""",
    op_kwargs={
        'title': 'New Lead Form',
        'response': _get_records_pandas(SELECT_ALL_QUERY)
    },
    dag=dag
)

t0 >> t1 >> t2 >> t3

任何主意是什么原因造成的?

0 个答案:

没有答案