python函数重构

时间:2018-10-22 08:49:22

标签: python refactoring

我在下面编写了两个函数:

def dataproc_first_job (self,task_id, app,job):
    return DataProcSparkOperator(
    task_id=task_id,
    dataproc_spark_jars=self.jar,
    cluster_name=self.cluster,
    main_class=self.main_name,
    dataproc_spark_properties={
        'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s    -DTrmRaw.Options.jobName=%s'  % (app, job) },
    trigger_rule=TriggerRule.ALL_DONE
)

def dataproc_second_job (self,task_id, app,job,prefix,dataset):
    return DataProcSparkOperator(
    task_id=task_id,
    dataproc_spark_jars=self.jar,
    cluster_name=self.cluster,
    main_class=self.main_name,
    dataproc_spark_properties={
        'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s      -DTrmRaw.Options.jobName=%s   -DTrm.Options.prefix=%s   -DTrm.Metadata.outputBase=%s'   %(app,  domain, job, prefix, dataset) },
    trigger_rule=TriggerRule.ALL_DONE
    )

我的目标是将python代码重构为仅使用一个函数而不是两个函数。我考虑过使用装饰器。我不确定这是否是最佳解决方案。

请问有什么想法可以解决这个问题吗?

2 个答案:

答案 0 :(得分:1)

我认为您不需要装饰器。我会这样:

def dataproc_first_job (self, task_id, app, job, prefix=None, dataset=None):
    if prefix is None or dataset is None:
        dataproc_spark_properties={
            'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s -DTrm.Options.prefix=%s -DTrm.Metadata.outputBase=%s' %(app,domain, job, prefix, dataset)
        }
    else:
        dataproc_spark_properties={
            'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s' % (app, job)
        }
    return DataProcSparkOperator(
        task_id=task_id,
        dataproc_spark_jars=self.jar,
        cluster_name=self.cluster,
        main_class=self.main_name,
        dataproc_spark_properties,
        trigger_rule=TriggerRule.ALL_DONE,
    )

答案 1 :(得分:0)

您可以对函数使用默认参数:

def dataproc_job(self, task_id, app, job, prefix=None, dataset=None):
    if prefix is not None and dataset is not None:
        props={
            'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s -DTrm.Options.prefix=%s -DTrm.Metadata.outputBase=%s' %(app, domain, job, prefix, dataset)
        }
    else:
        props={
        'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s' % (app, job)
        }

    return DataProcSparkOperator( ... ) # Build your object as normal

或者,如果有更多可能要使用的参数,则可以使用kwargs

def dataproc_job(self, task_id, app, job, **kwargs):
    if kwargs["prefix"] is not None and kwargs["dataset"] is not None:
        props={
            'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s -DTrm.Options.prefix=%s -DTrm.Metadata.outputBase=%s' %(app, domain, job, kwargs["prefix"], kwargs["dataset"])
        }
    else:
        props={
        'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s' % (app, job)
        }

    return DataProcSparkOperator( ... ) # Build your object as normal

在这个阶段,无论哪种方式,我都会再次重构并将属性构建器部分提取为单独的函数:

def get_dataproc_spark_properties(prefix=None, dataset=None):
    if prefix is not None and dataset is not None:
    # etc...
    #Build and return the properties string

def get_dataproc_spark_properties(**kwargs):
    if kwargs["prefix"] is not None and kwargs["dataset"] is not None:
    # etc...
    #Build and return the properties string

然后在您的dataproc_job函数中调用此函数:

def dataproc_job(self, task_id, app, job, prefix=None, dataset=None):
    props = get_dataproc_spark_properties(prefix, dataset)

def dataproc_job(self, task_id, app, job, **kwargs):
    props = get_dataproc_spark_properties(**kwargs)