我有一个Databricks计划作业,该作业顺序运行5个不同的笔记本,每个笔记本包含5个不同的命令单元。当作业在笔记本3的cmd单元3中失败时,我可以从故障中正确恢复,尽管我不确定是否可以通过笔记本3,单元4甚至从笔记本4的开头重新启动计划的作业。 ,如果我已经手动完成了笔记本3中的剩余cmd。这是我的一项工作的示例
%python
import sys
try:
dbutils.notebook.run("/01. SMETS1Mig/" + dbutils.widgets.get("env_parent_directory") + "/02 Processing Curated Staging/02 Build - Parameterised/Load CS Feedback Firmware STG", 6000, {
"env_ingest_db": dbutils.widgets.get("env_ingest_db")
, "env_stg_db": dbutils.widgets.get("env_stg_db")
, "env_tech_db": dbutils.widgets.get("env_tech_db")
})
except Exception as error:
sys.exit('Failure in Load CS Feedback Firmware STG ({error})')
try:
dbutils.notebook.run("/01. SMETS1Mig/" + dbutils.widgets.get("env_parent_directory") + "/03 Processing Curated Technical/02 Build - Parameterised/Load CS Feedback Firmware TECH", 6000, {
"env_ingest_db": dbutils.widgets.get("env_ingest_db")
, "env_stg_db": dbutils.widgets.get("env_stg_db")
, "env_tech_db": dbutils.widgets.get("env_tech_db")
})
except Exception as error:
sys.exit('Failure in Load CS Feedback Firmware TECH ({error})')
try:
dbutils.notebook.run("/01. SMETS1Mig/" + dbutils.widgets.get("env_parent_directory") + "/02 Processing Curated Staging/02 Build - Parameterised/STA_6S - CS Firmware Success", 6000, {
"env_ingest_db": dbutils.widgets.get("env_ingest_db")
, "env_stg_db": dbutils.widgets.get("env_stg_db")
, "env_tech_db": dbutils.widgets.get("env_tech_db")
})
except Exception as error:
sys.exit('Failure in STA_6S - CS Firmware Success ({error})')
答案 0 :(得分:1)
sys.exit
,因为它会退出Python解释器。只要发生异常,就让它冒出来。%pip install retry
,以安装retry软件包from retry import retry, retry_call
@retry(Exception, tries=3)
def idempotent_run(notebook, timeout=6000, **args):
# this is only approximate code to be used for inspiration and you should adjust it to your needs. It's not guaranteed to work for your case.
did_it_run_before = spark.sql(f"SELECT COUNT(*) FROM meta.state WHERE notebook = '{notebook}' AND args = '{sorted(args.items())}'").first()[0]
if did_it_run_before > 0:
return
result = dbutils.notebook.run(notebook, timeout, args)
spark.sql(f"INSERT INTO meta.state SELECT '{notebook}' AS notebook, '{sorted(args.items())}' AS args")
return result
pd = dbutils.widgets.get("env_parent_directory")
# call this within respective cells.
idempotent_run(
f"/01. SMETS1Mig/{pd}/03 Processing Curated Technical/02 Build - Parameterised/Load CS Feedback Firmware TECH",
# set it to something, that would define the frequency of the job
this_date='2020-09-28',
env_ingest_db=dbutils.widgets.get("env_ingest_db"),
env_stg_db=dbutils.widgets.get("env_stg_db"),
env_tech_db=dbutils.widgets.get("env_tech_db"))