所以我正在尝试在Python API中配置Google Dataproc集群。下面是我用来创建集群的函数,但是当我尝试运行脚本时,它会发出错误,所以出了点问题,但我不知道是什么。
def create_cluster(dataproc, bucket, project, zone, region, cluster_name, master_type, worker_type, image_version, init):
print('Creating cluster...')
zone_uri = 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
project, zone)
cluster_data = {
'projectId': project,
'clusterName': cluster_name,
'config': {
'configBucket': bucket,
'gceClusterConfig': {
'zoneUri': zone_uri,
'metadata': {
'HASH': '6e815ac3d973',
'SPARK': '2.0.2' ,
'HAIL_VERSION': '0.1'
}
},
'masterConfig': {
'machineTypeUri' : master_type
},
'workerConfig': {
'machineTypeUri' : worker_type
},
'softwareConfig': {
'imageVersion': image_version
},
'initializationActions': [
{
'executableFile': 'gs://{}/{}'.format(bucket, init)
}
]
}
}
result = dataproc.projects().regions().clusters().create(
projectId=project,
region=region,
body=cluster_data).execute()
return result
通过以下方式创建群集时,它可以正常工作:
gcloud dataproc clusters create testing-automation --image-version 1.1 --project perfect-atrium-179917 --bucket hail-qc-data --master-machine-type n1-standard-2 --worker-machine-type n1-standard-2 --zone us-east4-b --initialization-actions gs://hail-qc-data/initialization_action.sh --metadata HASH=6e815ac3d973,SPARK=2.0.2,HAIL_VERSION=0.1