我正在尝试使用pyspark数据砖环境处理多个状态数据。我想将执行记录到使用python日志记录模块的所需位置。
JSON
{
"version": 1,
"disable_existing_loggers": "False",
"handlers": {
"console": {
"level": "DEBUG",
"formatter": "detailed",
"class": "logging.StreamHandler"
},
"file": {
"class": "logging.FileHandler",
"level": "DEBUG",
"formatter": "detailed",
"filename": "logfile",
"mode": "w"
},
"default": {
"level": "INFO",
"class": "logging.StreamHandler"
}
},
"formatters": {
"detailed": {
"format": "(\"%(asctime)s.%(msecs)03d %(levelname)s -\" \"%(message)s\")"
}
},
"loggers": {
"driver": {
"level": "INFO",
"handlers": [
"console",
"file"
],
"propagate": false
},
"executor": {
"level": "DEBUG",
"handlers": [
"console",
"file"
],
"propagate": false
}
}
}
代码getlogger功能:
def getlogger(name,state,logfile):
now_time = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
log_file = os.path.join(logfile,state)
if not os.path.isdir(log_file):
os.mkdir(log_file)
with open(log_settings_path, encoding='UTF-8') as json_data_file:
log_settings = json.load(json_data_file)
log_file_name = state +'_'+ now_time +'.log'
log_settings['handlers']['file']['filename'] = log_file +'/'+ log_file_name
logging.config.dictConfig(log_settings)
logger = logging.getLogger(name)
return logger
使用Databricks环境pyspark,方案是我想为我处理到目录中的每个状态创建单独的日志文件
import logging
import os
logfile = /home/logs_dir
imput_path = /home/data/
states = [a,b,c,d]
for state in states:
log = getlogger('executor',state,logfile)
dataframe = spark.read.csv(input_path,inferSchema=True,header=True)
log.debug('state executing: %s',(state))
我面临两种类型的问题: