我正在使用CDK的Python包装器来创建Glue作业。 command
属性需要类型为IResolvable | JobCommandProperty
的对象。我试图在此处放置JobCommandProperty
对象,但出现异常。
我创建了一个JobCommandProperty
对象。我在某处寻找.builder()
函数(类似于Java API),但找不到。
from aws_cdk import (
aws_glue as glue,
aws_iam as iam,
core
)
class ScheduledGlueJob (core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
policy_statement = iam.PolicyStatement(
actions=['logs:*','s3:*','ec2:*','iam:*','cloudwatch:*','dynamodb:*','glue:*']
)
policy_statement.add_all_resources()
glue_job_role = iam.Role(
self,
'Glue-Job-Role',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com')
).add_to_policy(
policy_statement
)
job = glue.CfnJob(
self,
'glue-test-job',
role=glue_job_role,
allocated_capacity=10,
command=glue.CfnJob.JobCommandProperty(
name='glueetl',
script_location='s3://my-bucket/glue-scripts/job.scala'
))
错误消息是这样的:
$cdk synth
Traceback (most recent call last):
File "app.py", line 30, in <module>
glue_job = ScheduledGlueJob(app, 'Cronned-Glue-Job')
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/jsii/_runtime.py", line 66, in __call__
inst = super().__call__(*args, **kwargs)
File "/Users/d439087/IdeaProjects/ds/test_cdk/glue/scheduled_job.py", line 33, in __init__
script_location='s3://my-bucket/glue-scripts/job.scala'
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/jsii/_runtime.py", line 66, in __call__
inst = super().__call__(*args, **kwargs)
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/aws_cdk/aws_glue/__init__.py", line 2040, in __init__
jsii.create(CfnJob, self, [scope, id, props])
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/jsii/_kernel/__init__.py", line 208, in create
overrides=overrides,
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/jsii/_kernel/providers/process.py", line 331, in create
return self._process.send(request, CreateResponse)
File "/Users/d439087/IdeaProjects/ds/test_cdk/.env/lib/python3.7/site-packages/jsii/_kernel/providers/process.py", line 316, in send
raise JSIIError(resp.error) from JavaScriptError(resp.stack)
jsii.errors.JSIIError: Expected 'string', got true (boolean)
也许有人有一个有效的CDK(python)示例来创建CfnJob
对象?
答案 0 :(得分:2)
没关系,role
属性的类型必须为string
,我对JSII错误消息感到困惑。
答案 1 :(得分:1)
请注意,crawler
与job
不同,尽管如此,我认为权限是相似的。
截至2020年8月16日,此功能已为爬虫工作(不幸的是,以前的答案都没有)
from aws_cdk import (
aws_iam as iam,
aws_glue as glue,
core
)
class MyDataScienceStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
statement = iam.PolicyStatement(actions=["s3:GetObject","s3:PutObject"],
resources=["arn:aws:s3:::mybucketname",
"arn:aws:s3:::mybucketname/data_warehouse/units/*"])
write_to_s3_policy = iam.PolicyDocument(statements=[statement])
glue_role = iam.Role(
self, 'GlueCrawlerFormyDataScienceRole',
role_name = 'GlueCrawlerFormyDataScienceRole',
inline_policies=[write_to_s3_policy],
assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSGlueServiceRole')]
)
glue_crawler = glue.CfnCrawler(
self, 'glue-crawler-id',
description="Glue Crawler for my-data-science-s3",
name='any name',
database_name='units',
schedule={"scheduleExpression": "cron(5 * * * ? *)"},
role=glue_role.role_arn,
targets={"s3Targets": [{"path": "s3://mybucketname/data_warehouse/units"}]}
)
答案 2 :(得分:0)
glue_job_role 变量的类型不再是“角色”,因为您已向其中添加了.add_to_policy。下面的代码应该可以工作。
glue_job_role = iam.Role(
self,
'Glue-Job-Role',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com')
)
glue_job_role.add_to_policy(
policy_statement
)
job = glue.CfnJob(
self,
'glue-test-job',
role=glue_job_role.arn,
allocated_capacity=10,
command=glue.CfnJob.JobCommandProperty(
name='glueetl',
script_location='s3://my-bucket/glue-scripts/job.scala'
))