气流在运转,什么也没发生

时间:2019-09-10 15:04:14

标签: airflow google-cloud-composer

我的气流DAG正在显示其运行状态,但是什么也没有发生。我不明白怎么了?花很多时间。 代码中的第一个dag应该设置气流变量,第二个dag应该读取该变量并根据变量中的值运行任务。第二个DAG在第一个DAG之后15分钟运行。但始终显示其运行。 有人请帮忙。谢谢。

气流版本1.9.0 enter image description here

import datetime as dt
from airflow import models
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
from airflow.operators import python_operator
from utils.etl_fail_slack_alert import task_fail_slack_alert
from airflow.contrib.hooks.bigquery_hook import BigQueryHook
from airflow.operators.dummy_operator import DummyOperator
from airflow.models import Variable

project = 'asd'
source_dataset = 'dfghdfh'
destination_dataset = 'dfhdfhdf'
table_prefix = ''

default_args = {
    'start_date': '2019-09-10',
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=2),
    'on_failure_callback': task_fail_slack_alert,
}

def set_views_av():
    bq_hook = BigQueryHook(bigquery_conn_id='bigquery_default',
                           delegate_to=None, use_legacy_sql=False)
    query = ("SELECT table_id FROM `{project}.{dataset}.{table}`;".format(
        project=project, dataset=source_dataset, table='__TABLES__'))

    df = bq_hook.get_pandas_df(sql=query, dialect='standard')

    view_names = df['table_id'].tolist()
    Variable.set('view_list', '|'.join(view_names))


def bq_operator(vname, dag):
    sql = ("SELECT * FROM `{project}.{dataset}.{table}`".format(
        project=project, dataset=source_dataset, table=vname))

    materialize_view_bq = BigQueryOperator(bql=sql,
                                           destination_dataset_table = project + "." + destination_dataset + "." + table_prefix + vname,
                                           task_id="materialize_" + vname,
                                           bigquery_conn_id="bigquery_default",
                                           google_cloud_storage_conn_id="google_cloud_default",
                                           use_legacy_sql=False,
                                           write_disposition="WRITE_TRUNCATE",
                                           create_disposition="CREATE_IF_NEEDED",
                                           query_params={},
                                           allow_large_results=True,
                                           dag=dag
                                           )
    return materialize_view_bq


with models.DAG(dag_id="materialize_init_views", default_args=default_args, schedule_interval="15 09 * * *", catchup=True) as dag_init:
    bridge = DummyOperator(
        task_id='bridge',
        dag=dag_init
    )
    set_views = python_operator.PythonOperator(task_id="set_views",
                                               python_callable=set_views_av
                                               )

    bridge >> set_views


with models.DAG(dag_id="materialize_views_dynamic", default_args=default_args, schedule_interval="30 09 * * *", catchup=True) as dag:

    views = Variable.get("view_list").split("|")
    bridge = DummyOperator(
        task_id='bridge',
        dag=dag
    )

    for vname in views:
        materialize_view_bq = bq_operator(vname, dag)
        bridge >> materialize_view_bq

0 个答案:

没有答案