# coding: UTF-8import boto3, json, requests, requestsfrom datetime import datetime
def get_region(): # 这个地址不用改 r = requests.get("http://169.254.169.254/latest/dynamic/instance-identity/document") response_json = r.json() return response_json.get('region')
def client(region_name): global emr emr = boto3.client('emr', region_name=region_name)
# 创建EMRdef create_cluster(name): param = { # 修改需要的框架 "Applications":[{ "Name":"Hadoop" },{ "Name":"Hive" },{ "Name":"Spark" }], # 这里的名字会显示到控制台 "Name":name, "ServiceRole":"EMR_DefaultRole", "Tags":[], "ReleaseLabel":"emr-5.26.0", "Instances":{ "TerminationProtected":False, "EmrManagedMasterSecurityGroup":"sg-0085fba9c3a6818f5", "InstanceGroups":[{ "InstanceCount":1, "Name":"主实例组 - 1", "InstanceRole":"MASTER", "EbsConfiguration":{ "EbsBlockDeviceConfigs":[{ "VolumeSpecification":{ "SizeInGB":32, "VolumeType":"gp2" }, "VolumesPerInstance":1 }] }, # 修改需要的硬件配置 "InstanceType":"m4.large", "Market":"ON_DEMAND", "Configurations":[{ # 修改Hive的meta源 "Classification":"hive-site", "Properties":{ "javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8", "javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver", "javax.jdo.option.ConnectionUserName":"user", "javax.jdo.option.ConnectionPassword":"pwd" } },{ "Classification":"yarn-env", "Properties":{}, "Configurations":[{ "Classification":"export", "Properties":{ "AWS_REGION":"cn-northwest-1", "S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn", "S3_USE_HTTPS":"0", "S3_VERIFY_SSL":"0" } }] }] },{ "InstanceRole":"CORE", "InstanceCount":1, "Name":"核心实例组 - 2", "Market":"ON_DEMAND", # 修改需要的硬件配置 "InstanceType":"r5d.2xlarge", "Configurations":[{ "Classification":"hive-site", "Properties":{ "javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8", "javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver", "javax.jdo.option.ConnectionUserName":"user", "javax.jdo.option.ConnectionPassword":"pwd" } },{ "Classification":"yarn-env", "Properties":{}, "Configurations":[{ "Classification":"export", "Properties":{ "AWS_REGION":"cn-northwest-1", "S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn", "S3_USE_HTTPS":"0", "S3_VERIFY_SSL":"0" } }] }] },{ # 修改需要的工作节点数 "InstanceCount":4, "Name":"任务实例组 - 4", "InstanceRole":"TASK", "EbsConfiguration":{ "EbsBlockDeviceConfigs":[{ "VolumeSpecification":{ "SizeInGB":32, "VolumeType":"gp2" }, "VolumesPerInstance":4 }] }, # 修改需要的硬件配置 "InstanceType":"r5d.2xlarge", "Market":"ON_DEMAND", "Configurations":[{ "Classification":"hive-site", "Properties":{ "javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8", "javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver", "javax.jdo.option.ConnectionUserName":"user", "javax.jdo.option.ConnectionPassword":"pwd" } },{ "Classification":"yarn-env", "Properties":{}, "Configurations":[{ "Classification":"export", "Properties":{ "AWS_REGION":"cn-northwest-1", "S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn", "S3_USE_HTTPS":"0", "S3_VERIFY_SSL":"0" } }] }] }], "KeepJobFlowAliveWhenNoSteps":True, "Ec2SubnetId":"subnet-027bff297ea95039b", "Ec2KeyName":"hifive.airflow", "EmrManagedSlaveSecurityGroup":"sg-05a0e076ee7babb9e" }, "JobFlowRole":"EMR_EC2_DefaultRole", "Steps":[{ "HadoopJarStep":{ "Args":["state-pusher-script"], "Jar":"command-runner.jar" }, "Name":"Setup Hadoop Debugging" }], "ScaleDownBehavior":"TERMINATE_AT_TASK_COMPLETION", "VisibleToAllUsers":True, "EbsRootVolumeSize":10, "LogUri":"s3n://aws-logs-550775287661-cn-northwest-1/elasticmapreduce/", "AutoScalingRole":"EMR_AutoScaling_DefaultRole" } cluster_response = emr.run_job_flow(**param) return cluster_response['JobFlowId']
# 获取EMR访问入口def get_cluster_dns(cluster_id): response = emr.describe_cluster(ClusterId=cluster_id) return response['Cluster']['MasterPublicDnsName']
# 等待集群创建完成def wait_for_cluster_creation(cluster_id): emr.get_waiter('cluster_running').wait(ClusterId=cluster_id)
# 关闭EMRdef terminate_cluster(cluster_id): emr.terminate_job_flows(JobFlowIds=[cluster_id])
评论