我无法理解为什么此代码会生成以下错误。
“使用AWS KMS托管密钥指定服务器端加密的请求需要AWS签名版本4”
我已尝试将boto_config设置为使用sigv4,但这也无效。
import os
import boto
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext, SparkSession
def main():
os.environ['PYSPARK_SUBMIT_ARGS'] =\
' --packages com.amazonaws:aws-java-sdk:1.11.199,org.apache.hadoop:hadoop-aws:2.9.0 pyspark-shell'
conf = (SparkConf()
.setAppName("s3a_test")
.set("spark.executor.instances", "8")
.set("spark.executor.cores", 2)
.set("spark.shuffle.compress", "true")
.set("spark.io.compression.codec", "snappy")
.set("spark.executor.memory", "2g"))
sc = SparkContext(conf=conf)
sc._jsc.hadoopConfiguration().set("fs.s3.awsAccessKeyId", "myAccessKeyId")
sc._jsc.hadoopConfiguration().set("fs.s3.awsSecretAccessKey", "mySecretAccessKey")
sc._jsc.hadoopConfiguration().set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem")
sc._jsc.hadoopConfiguration().set("fs.s3.endpoint", "s3.us-east-1.amazonaws.com")
df = sc.textFile("s3://s3-bucket/xyz/myfile.csv")
print(df.count())
sc.stop()
if __name__ == "__main__":
main()