自定义气流BashOperator

时间:2019-11-20 15:31:07

标签: python airflow

我正在尝试自定义Airflow BashOperator,但是它不起作用。到目前为止,我已经尝试过了

my_operators.py

from airflow.utils.decorators import apply_defaults
from airflow.operators.bash_operator import BashOperator

class MyCopyOperator(BashOperator):

    template_fields = ('bash_command', 'source_file', 'source_dir', 'target_file', 'target_dir')


    @apply_defaults
    def __init__(
            self,
            bash_command,
            source_file, 
            source_dir, 
            target_file, 
            target_dir,
            *args, **kwargs):

        super(MyCopyOperator, self).__init__(*args, **kwargs)
        self.bash_command = bash_command
        self.source_file = source_file
        self.source_dir = source_dir
        self.target_file = target_file
        self.target_dir = target_dir     

    def execute(self, context):


        self.bash_command =  "cp " + " " + self.source_dir + "/" + self.source_file + " " + self.target_dir + "/" + self.target_file
        super().bash_command =  self.bash_command
        print(super.bash_command)
        print(F"inherited {self.bash_command}")
        super().execute(self,context)

operator_example.py

from datetime import datetime, timedelta
from airflow import DAG
from airflow.models import Variable
from airflow.exceptions import AirflowException
from my_operators import MyCopyOperator

dag_name= 'my_test_dag'

owner = Variable.get("owner_" + dag_name)


default_args = {
    "owner": owner,
    "depends_on_past": False,
    "start_date": datetime(2019, 10, 31),
    'email': ['airflow@example.com'],
    "email_on_failure": False,
    "email_on_retry": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
}

dag = DAG(dag_id=dag_name, default_args=default_args, schedule_interval=None)


copytest_task=MyCopyOperator(
     task_id='copytest_task',
     bash_command="cp",
     source_file="test_file.txt",
     source_dir='/usr/local/airflow',
     target_file="test_file.copied.txt",
     target_dir='/usr/local/airflow',
     dag=dag,
     provide_context=True,
)


copytest_task

Airflow GUI显示错误消息损坏的DAG:[/usr/local/airflow/dags/operator_example.py]参数['bash_command']是必需的

此尝试有什么问题?我知道我可以从https://github.com/apache/airflow/blob/master/airflow/operators/bash_operator.py复制或模仿BashOperator的实现,但这不是我想要的。

1 个答案:

答案 0 :(得分:0)

使用以下运算符。注意我们如何将bash_command传递给我们继承的类。

from airflow.utils.decorators import apply_defaults
from airflow.operators.bash_operator import BashOperator

class MyCopyOperator(BashOperator):

    template_fields = ('bash_command', 'source_file', 'source_dir', 'target_file', 'target_dir')


    @apply_defaults
    def __init__(
            self,
            source_file, 
            source_dir, 
            target_file, 
            target_dir,
            *args, **kwargs):

        super(MyCopyOperator, self).__init__(bash_command="cp " + " " + source_dir + "/" + source_file + " " + target_dir + "/" + target_file, *args, **kwargs)
        self.source_file = source_file
        self.source_dir = source_dir
        self.target_file = target_file
        self.target_dir = target_dir     

示例任务:

MyCopyOperator(
    task_id='print_date12',
    source_file='test.txt',
    source_dir='/Users/kaxilnaik/Desktop',
    target_file="test1.txt",
    target_dir="/Users/kaxilnaik/Desktop/abc",
    dag=dag)