1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| client = boto3.client('emr', region_name='us-west-2', verify=False)
response = client.run_job_flow( Name="launch test cluster", LogUri=S3_LOG_URI, ReleaseLabel='emr-5.19.0', Instances={ 'MasterInstanceType': 'm4.xlarge', 'SlaveInstanceType': 'r3.xlarge', 'InstanceCount': 3, 'KeepJobFlowAliveWhenNoSteps': True, 'TerminationProtected': False, 'Ec2SubnetId': EC2_SUBNET, 'Ec2KeyName': EC2_KEY, }, VisibleToAllUsers=True, JobFlowRole='EMR_EC2_DefaultRole', ServiceRole='EMR_DefaultRole', Applications=[ { 'Name': 'Spark' }, { 'Name': 'Zeppelin' } ], Steps=[ { 'Name': 'Setup Debugging', 'ActionOnFailure': 'TERMINATE_CLUSTER', 'HadoopJarStep': { 'Jar': 's3://us-west-2.elasticmapreduce/libs/script-runner/script-runner.jar', 'Args': ['s3://us-west-2.elasticmapreduce/libs/state-pusher/0.1/fetch'] } } ] )
cluster_id = response["JobFlowId"]
print('launching...')
waiter = client.get_waiter('cluster_running') waiter.wait(ClusterId=cluster_id)
print('launch cluster id: {0}'.format(cluster_id))
|