在添加“JavaPackage 对象不可调用错误:Pyspark”中建议的代码后,即使在 Databricks pysparks 中出现错误“'JavaPackage' 对象不可调用”错误
我的代码
from pyspark.sql import SparkSession
import sagemaker_pyspark
import pydeequ
from pydeequ.analyzers import *
from pydeequ.verification import *
from pydeequ.suggestions import *
classpath = ":".join(sagemaker_pyspark.classpath_jars())
spark.sql("set spark.sql.legacy.timeParserPolicy=LEGACY")
spark = SparkSession \
.builder \
.appName("Sales_Transaction_Full_load") \
.config("spark.some.config.option", "some-value") \
.config("spark.driver.extraClassPath", classpath) \
.config("spark.jars.packages", pydeequ.deequ_maven_coord) \
.config("spark.jars.excludes", pydeequ.f2j_maven_coord) \
.getOrCreate()
storage_account_name = "xxxxxxxx"
storage_account_access_key = "xxxxxxxxxxxxxxxxxxxxxxxxxx"
spark.conf.set("fs.azure.account.key."+storage_account_name+".blob.core.windows.net",storage_account_access_key)
blob_file_type = "csv"
blob_folder_order = "xxxxxxxxxx"
timestr = time.strftime("%Y%m%d_%H%M%S")
try :
df = spark.read.format(blob_file_type).option("inferSchema", "true").option("header","true").load(blob_folder_order)
df.show()
suggestionResult = ConstraintSuggestionRunner(spark) \
.onData(df) \
.addConstraintRule(DEFAULT()) \
.run()
print(json.dumps(suggestionResult, indent=2))
except:
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()1])