我在AWS MySQL RDS中有数据,要求是将数据从表中获取到csv文件并将其放置在S3中。为了实现这一点,我正在使用AWS胶水,并具有以下代码。作业运行没有错误,并且输出未显示在S3存储桶中。请帮忙。
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
import boto3
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
aws_region = "your-aws-region-code"
s3_path = "s3-prefix"
glue_database = "glue-database-name"
table="glue-table name"
target_format = "csv"
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
client = boto3.client(service_name='glue', region_name=aws_region)
responseGetTables = client.get_tables(DatabaseName=glue_database)
tableList = responseGetTables['TableList']
tables = []
for tableDict in tableList:
tables.append(tableDict['Name'])
for table in tables:
datasource = glueContext.create_dynamic_frame.from_catalog(database = glue_database, table_name = table)
datasink = glueContext.write_dynamic_frame.from_options(frame = datasource, connection_type = "s3", connection_options = {"path": s3Path + table}, format = target_format)
job.commit()
答案 0 :(得分:0)
用此替换第二行,实际上它应该是s3_path而不是s3path
datasink = gumContext.write_dynamic_frame.from_options(框架=数据源,connection_type =“ s3”,connection_options = {“ path”: s3_path + table},format = target_format)