我在Python3中的代码失败,并出现以下错误:
Py4JJavaError:调用o45.load时发生错误。 : java.lang.RuntimeException:java.lang.ClassNotFoundException:类 找不到org.apache.hadoop.fs.s3a.S3AFileSystem
代码:
from pyspark.sql.functions import udf
from pyspark.sql.types import *
import os
from pandas.io.json import json_normalize
from pyspark.sql.types import *
os.environ['PYSPARK_SUBMIT_ARGS'] = '--driver-memory 10g --packages org.elasticsearch:elasticsearch-hadoop:6.5.2,org.apache.hadoop:hadoop-aws:3.1.1 pyspark-shell'
from pyspark import SparkConf
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").config(conf=SparkConf()).config("spark.local.dir","/Users/psuresh
/spark_local_dir").enableHiveSupport().getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
hadoopConf = spark._jsc.hadoopConfiguration()
hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hadoopConf.set("fs.s3a.access.key", "XXXXXX")
hadoopConf.set("fs.s3a.secret.key","XXXXXXXXXX")
from pyspark.sql.functions import input_file_name
from pyspark.sql.functions import regexp_extract, col
from pyspark.sql.functions import *
df= spark.read.format("parquet").load("s3a://a/b/c/d/dump/")