“运行时未满足的依赖关系”:创建动态依赖关系时出错

时间:2019-03-16 16:43:40

标签: python luigi

当我试图从包含变量的列表中创建Luigi任务类时,该变量包含类名,文件名和“ depend_task”类(即该类所依赖的另一个类),因此出现上述运行时错误。

挑战在于,类定义应包括同时定义的“ depend_task”类的定义。以下是我的失败尝试和错误消息。

您的帮助将不胜感激。

class BDX_Query_0XX(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter() 
    runDesc = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(BDX_Query_0XX, self).__init__(*args, **kwargs)

        # if self.trans_id is None:
        self.trans_id =  "00902_BDX_Query_0XX" + "__" + self.runDesc  # static.


    def run(self):
        YY = self.acctDate[:4]
        MM = self.acctDate[4:6]
        acctDate = self.acctDate
        ssisDate = self.ssisDate
        runDesc = self.runDesc
        bdx_sql = r'r:\\1.SQL\\BDX_SQL\\'
        cmdList = [
                ('BDX010',f'{bdx_sql}BDX_001_NI_DM 010.sql -o output010.txt',None),
                ('BDX020',f'{bdx_sql}BDX_001_NI_DM 020.sql -o output020.txt','BDX010'),
               ('BDX022a',f'{bdx_sql}BDX_022_P038_All_Final_CatAdj 010.sql -o output022a.txt','BDX020'),
            ('BDX022b',f'{bdx_sql}BDX_022_P038_All_Final_CatAdj 020.sql -o output022b.txt -v Year1={YY} MM={MM}','BDX022a'),
            ('BDX022c',f'{bdx_sql}BDX_022_P038_All_Final_CatAdj 030.sql -o output022c.txt -v Year={YY} Month={MM}', 'BDX022b'),
            ('BDX023',f'{bdx_sql}BDX_023_P031_MTD_All_Final_CatAdj.sql -o output023.txt ','BDX020'),
            ('BDX024',f'{bdx_sql}BDX_024_P031_ITD_All_Final_CatAdj.sql -o output024.txt','BDX020'),
            ('BDX025a',f'{bdx_sql}BDX_025_P038_All_Final_CatAdj 010.sql -o output025a.txt','BDX020'),
            ('BDX025b',f'{bdx_sql}BDX_025_P038_All_Final_CatAdj 020.sql -o output025b.txt -v Year={YY} Month={MM}','BDX025a'),
            ('BDX025c',f'{bdx_sql}BDX_025_P038_All_Final_CatAdj 030.sql -o output025c.txt -v YYMM={acctDate}','BDX025b')
            ]

        tasks = []
        for queryKey, queryCmd, dependQry in cmdList:
            class_name = queryKey
            klass = type(queryKey, (BDX_Task,),{}) 

            **#>>>> ERROR OCCURS IN THE FOLLOWING LINE >>>>>>>**
            tasks.append(klass(acctDate=self.acctDate, ssisDate =self.ssisDate, queryKey = queryKey, queryCmd = queryCmd, runDesc = self.runDesc, dependQry = dependQry)) # this addes Task class named after queryKey to dependency
        yield tasks

        self.get_target().touch()



class BDX_Task_Base(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter(default=None)
    queryKey = luigi.Parameter()
    queryCmd = luigi.Parameter()
    runDesc = luigi.Parameter()
    dependQry = luigi.Parameter()


    def __init__(self, *args, **kwargs):
        super(BDX_Task_Base, self).__init__(*args, **kwargs)

        # if self.trans_id is None:
        self.trans_id = f"00903_BDX_Query_{self.queryKey}__{self.runDesc}"


class BDX_Task(BDX_Task_Base):

    def requires(self):
        dependQry = self.dependQry
        if dependQry:
            if type(dependQry) is str:   # class name in string, then convert to SQLTask here; 
                depend_class= type(dependQry, (BDX_Task_Base,), {} )
                depend_task = [depend_class(acctDate = self.acctDate,ssisDate = self.ssisDate, queryKey = self.queryKey, queryCmd = self.queryCmd, runDesc = self.runDesc, dependQry = self.dependQry)]
            else:
                depend_task =[dependQry(acctDate= self.acctDate, ssisDate = self.ssisDate,queryKey = self.queryKey, queryCmd = self.queryCmd, runDesc = self.runDesc, dependQry = self.dependQry)]
        else:    #class itself
            depend_task = []

        return depend_task

    def run(self):
        print(subprocess.call(self.queryCmd, shell=True))
        self.get_target().touch()

完整堆栈跟踪

 Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 182, in run
    raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing)))
RuntimeError: Unfulfilled dependency at run time: BDX025b_201902_BDX025b_r___1_SQL__BDX_S_7bda6cb3b4



Scheduled 34 tasks of which:
* 20 ran successfully:
    - 2 BDX010(...)
    - 8 BDX020(acctDate=201902, ssisDate=201903, queryKey=BDX022a, queryCmd=r:\\1.SQL\\BDX_SQL\\BDX_022_P038_All_Final_CatAdj 010.sql -o output022a.txt, runDesc=201902 Luigi test1, dependQry=BDX020) ...
    - 2 BDX022a(...)
    - 2 BDX022b(...)
    - 2 BDX025a(...)
    ...
* 10 failed:
    - 1 BDX010(acctDate=201902, ssisDate=201903, queryKey=BDX010, queryCmd=r:\\1.SQL\\BDX_SQL\\BDX_001_NI_DM 010.sql -o output010.txt, runDesc=201902 Luigi test1, dependQry=None)
    - 1 BDX020(acctDate=201902, ssisDate=201903, queryKey=BDX020, queryCmd=r:\\1.SQL\\BDX_SQL\\BDX_001_NI_DM 020.sql -o output020.txt, runDesc=201902 Luigi test1, dependQry=BDX010)
    - 1 BDX022a(acctDate=201902, ssisDate=201903, queryKey=BDX022a, queryCmd=r:\\1.SQL\\BDX_SQL\\BDX_022_P038_All_Final_CatAdj 010.sql -o output022a.txt, runDesc=201902 Luigi test1, dependQry=BDX020)
    - 1 BDX022b(...)
    - 1 BDX022c(...)
    ...
* 4 were left pending, among these:
    * 1 were missing external dependencies:
        - 1 BDX_Query_0XX(acctDate=201902, ssisDate=201903, runDesc=201902 Luigi test1)

1 个答案:

答案 0 :(得分:0)

BDX_TaskBDX_Task_Base的子类。在BDX_Task_Base的初始化中,您有一个对super(BDX_Task)的{​​{1}} ...的调用,因此它再次运行init,它将再次调用BDX_Task_Base,依此类推。我假设您打算编写super(BDX_Task),但这不会导致无限递归。只要您使用的是最新版本的python,super(BDX_Task_Base)...就会和super().__init__(...)一样。