我正在尝试创建一个dag,我在一个任务中从表跟踪器获取表名,然后我想在每个表上运行不同的SQL操作(任务) 我能够使用xcom将表名从一个任务传递到另一个任务,但for循环中的任务没有被执行 我甚至无法在Web UI中看到这些任务
from __future__ import print_function
from airflow import DAG
from airflow.operators.sensors import SqlSensor
from airflow import hooks
from airflow.operators import BashOperator, PostgresOperator
from airflow.operators.python_operator import PythonOperator
from datetime import datetime,date, timedelta
import logging
import time
from time import sleep
import psycopg2
from pprint import pprint
default_args = {
'owner' : 'navjot',
'depends_on_past': False,
'start_date': datetime(2016,12,29,23,0),
'email': ['navjot@airflow.com'],
'email_on_failure': False,
'email_on_retry': False,
#'retries': 1,
'retry_delay': timedelta(minutes=5)
}
dag = DAG('sensesql_v5', default_args=default_args, schedule_interval = '@hourly')
""" This function gives all the tables which are ready to be enriched, List of tables names would be generated using taskt2"""
def get_tablename_enrich():
conn = psycopg2.connect(database='xxx',user=config['user'],password=config['password'],host=config['host'],port = config['port'])
cursor = conn.cursor()
query = """Select fore_table_name from results.forecasting_run_tabt where enrich_status = 'pending';"""
cursor.execute(query)
row = cursor.fetchall()
table_name=[]
for r in row:
table_name.append(r[0])
conn.close()
return table_name
#def testpo():
# value1 = 4
# return value1
#value=4
#def push_function(value):
# return value
""" sensing table tracket in t1"""
t1 = SqlSensor(
task_id='sqlsensing',
poke_interval=10,
timeout = 3200,
sql = """Select * from results.forecasting_run_tabt where enrich_status = 'pending';""",
conn_id = 'postgresterra',
dag=dag
)
"""getting table names with a condition"""
t2 = PythonOperator(
task_id='testpo1',
python_callable=get_tablename_enrich,
dag=dag)
"""run enrichment SQL operations on each table which we got from task t2. There is just on task in this function
we are going to run 11-12 tasks on each table"""
def run_enrich_ontables(*args, **kwargs):
ti = kwargs['ti']
pprint(kwargs)
tablenames = ti.xcom_pull(task_ids='testpo1')
pprint(tablenames)
for i in range(len(tablenames)):
t4 = PostgresOperator(
task_id='testxcom'+str(i),
sql = "update results.forecasting_run_tabt set enrich_status = 'running' where fore_table_name = '{}';".format(ti.xcom_pull(task_ids='testpo1')[i]),
postgres_conn_id = 'postgrestest',
autocommit = True,
dag=dag)
t4.set_upstream(t3)
"""This task is calling function which is enriching tables"""
t3 = PythonOperator(
task_id='run_all_tables',
provide_context=True,
python_callable=run_enrich_ontables,
dag=dag)
t2.set_upstream(t1)
t3.set_upstream(t2)
答案 0 :(得分:0)
我认为dag
函数的执行上下文中run_enrich_ontables
为None
。为了在传递给PythonOperator
的函数中的上下文中引用宏,您应该使用kwargs
dict,就像使用ti
一样。要么添加
dag = kwargs['dag']
到函数顶部,以便稍后引用dag
或更改t4定义以直接使用kwargs
t4 = PostgresOperator(
task_id='testxcom'+str(i),
sql = "update results.forecasting_run_tabt set enrich_status = 'running' where fore_table_name = '{}';".format(ti.xcom_pull(task_ids='testpo1')[i]),
postgres_conn_id = 'postgrestest',
autocommit = True,
dag=kwargs['dag'])