Question

我在AWS MySQL RDS中有数据，要求是将数据从表中获取到csv文件并将其放置在S3中。为了实现这一点，我正在使用AWS胶水，并具有以下代码。作业运行没有错误，并且输出未显示在S3存储桶中。请帮忙。

 import sys
   from awsglue.transforms import *
   from awsglue.utils import getResolvedOptions
   from pyspark.context import SparkContext
   from awsglue.context import GlueContext
   from awsglue.job import Job
    import boto3

    ## @params: [JOB_NAME]
    args = getResolvedOptions(sys.argv, ['JOB_NAME'])

    aws_region = "your-aws-region-code"
    s3_path = "s3-prefix"
    glue_database = "glue-database-name"
    table="glue-table name"
    target_format = "csv"

    sc = SparkContext()
    glueContext = GlueContext(sc)
    spark = glueContext.spark_session
    job = Job(glueContext)
    job.init(args['JOB_NAME'], args)

    client = boto3.client(service_name='glue', region_name=aws_region)
    responseGetTables = client.get_tables(DatabaseName=glue_database)

    tableList = responseGetTables['TableList']
    tables = []
    for tableDict in tableList:
      tables.append(tableDict['Name'])

    for table in tables:
      datasource = glueContext.create_dynamic_frame.from_catalog(database = glue_database, table_name = table)
      datasink = glueContext.write_dynamic_frame.from_options(frame = datasource, connection_type = "s3", connection_options = {"path": s3Path + table}, format = target_format)

    job.commit()

Answer 1

用此替换第二行，实际上它应该是s3_path而不是s3path

datasink = gumContext.write_dynamic_frame.from_options（框架=数据源，connection_type =“ s3”，connection_options = {“ path”： s3_path + table}，format = target_format）

AWS Glue-作业运行没有错误，但未显示输出

1 个答案: