我正在尝试自定义Airflow BashOperator,但是它不起作用。到目前为止,我已经尝试过了
my_operators.py
from airflow.utils.decorators import apply_defaults
from airflow.operators.bash_operator import BashOperator
class MyCopyOperator(BashOperator):
template_fields = ('bash_command', 'source_file', 'source_dir', 'target_file', 'target_dir')
@apply_defaults
def __init__(
self,
bash_command,
source_file,
source_dir,
target_file,
target_dir,
*args, **kwargs):
super(MyCopyOperator, self).__init__(*args, **kwargs)
self.bash_command = bash_command
self.source_file = source_file
self.source_dir = source_dir
self.target_file = target_file
self.target_dir = target_dir
def execute(self, context):
self.bash_command = "cp " + " " + self.source_dir + "/" + self.source_file + " " + self.target_dir + "/" + self.target_file
super().bash_command = self.bash_command
print(super.bash_command)
print(F"inherited {self.bash_command}")
super().execute(self,context)
operator_example.py
from datetime import datetime, timedelta
from airflow import DAG
from airflow.models import Variable
from airflow.exceptions import AirflowException
from my_operators import MyCopyOperator
dag_name= 'my_test_dag'
owner = Variable.get("owner_" + dag_name)
default_args = {
"owner": owner,
"depends_on_past": False,
"start_date": datetime(2019, 10, 31),
'email': ['airflow@example.com'],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}
dag = DAG(dag_id=dag_name, default_args=default_args, schedule_interval=None)
copytest_task=MyCopyOperator(
task_id='copytest_task',
bash_command="cp",
source_file="test_file.txt",
source_dir='/usr/local/airflow',
target_file="test_file.copied.txt",
target_dir='/usr/local/airflow',
dag=dag,
provide_context=True,
)
copytest_task
Airflow GUI显示错误消息损坏的DAG:[/usr/local/airflow/dags/operator_example.py]参数['bash_command']是必需的
此尝试有什么问题?我知道我可以从https://github.com/apache/airflow/blob/master/airflow/operators/bash_operator.py复制或模仿BashOperator的实现,但这不是我想要的。
答案 0 :(得分:0)
使用以下运算符。注意我们如何将bash_command
传递给我们继承的类。
from airflow.utils.decorators import apply_defaults
from airflow.operators.bash_operator import BashOperator
class MyCopyOperator(BashOperator):
template_fields = ('bash_command', 'source_file', 'source_dir', 'target_file', 'target_dir')
@apply_defaults
def __init__(
self,
source_file,
source_dir,
target_file,
target_dir,
*args, **kwargs):
super(MyCopyOperator, self).__init__(bash_command="cp " + " " + source_dir + "/" + source_file + " " + target_dir + "/" + target_file, *args, **kwargs)
self.source_file = source_file
self.source_dir = source_dir
self.target_file = target_file
self.target_dir = target_dir
示例任务:
MyCopyOperator(
task_id='print_date12',
source_file='test.txt',
source_dir='/Users/kaxilnaik/Desktop',
target_file="test1.txt",
target_dir="/Users/kaxilnaik/Desktop/abc",
dag=dag)