# coding: UTF-8
import boto3, json, requests, requests
from datetime import datetime
def get_region():
# 这个地址不用改
r = requests.get("http://169.254.169.254/latest/dynamic/instance-identity/document")
response_json = r.json()
return response_json.get('region')
def client(region_name):
global emr
emr = boto3.client('emr', region_name=region_name)
# 创建EMR
def create_cluster(name):
param = {
# 修改需要的框架
"Applications":[{
"Name":"Hadoop"
},{
"Name":"Hive"
},{
"Name":"Spark"
}],
# 这里的名字会显示到控制台
"Name":name,
"ServiceRole":"EMR_DefaultRole",
"Tags":[],
"ReleaseLabel":"emr-5.26.0",
"Instances":{
"TerminationProtected":False,
"EmrManagedMasterSecurityGroup":"sg-0085fba9c3a6818f5",
"InstanceGroups":[{
"InstanceCount":1,
"Name":"主实例组 - 1",
"InstanceRole":"MASTER",
"EbsConfiguration":{
"EbsBlockDeviceConfigs":[{
"VolumeSpecification":{
"SizeInGB":32,
"VolumeType":"gp2"
},
"VolumesPerInstance":1
}]
},
# 修改需要的硬件配置
"InstanceType":"m4.large",
"Market":"ON_DEMAND",
"Configurations":[{
# 修改Hive的meta源
"Classification":"hive-site",
"Properties":{
"javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8",
"javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver",
"javax.jdo.option.ConnectionUserName":"user",
"javax.jdo.option.ConnectionPassword":"pwd"
}
},{
"Classification":"yarn-env",
"Properties":{},
"Configurations":[{
"Classification":"export",
"Properties":{
"AWS_REGION":"cn-northwest-1",
"S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn",
"S3_USE_HTTPS":"0",
"S3_VERIFY_SSL":"0"
}
}]
}]
},{
"InstanceRole":"CORE",
"InstanceCount":1,
"Name":"核心实例组 - 2",
"Market":"ON_DEMAND",
# 修改需要的硬件配置
"InstanceType":"r5d.2xlarge",
"Configurations":[{
"Classification":"hive-site",
"Properties":{
"javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8",
"javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver",
"javax.jdo.option.ConnectionUserName":"user",
"javax.jdo.option.ConnectionPassword":"pwd"
}
},{
"Classification":"yarn-env",
"Properties":{},
"Configurations":[{
"Classification":"export",
"Properties":{
"AWS_REGION":"cn-northwest-1",
"S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn",
"S3_USE_HTTPS":"0",
"S3_VERIFY_SSL":"0"
}
}]
}]
},{
# 修改需要的工作节点数
"InstanceCount":4,
"Name":"任务实例组 - 4",
"InstanceRole":"TASK",
"EbsConfiguration":{
"EbsBlockDeviceConfigs":[{
"VolumeSpecification":{
"SizeInGB":32,
"VolumeType":"gp2"
},
"VolumesPerInstance":4
}]
},
# 修改需要的硬件配置
"InstanceType":"r5d.2xlarge",
"Market":"ON_DEMAND",
"Configurations":[{
"Classification":"hive-site",
"Properties":{
"javax.jdo.option.ConnectionURL":"jdbc:mysql://host:port/db?useUnicode=true&characterEncoding=UTF-8",
"javax.jdo.option.ConnectionDriverName":"org.mariadb.jdbc.Driver",
"javax.jdo.option.ConnectionUserName":"user",
"javax.jdo.option.ConnectionPassword":"pwd"
}
},{
"Classification":"yarn-env",
"Properties":{},
"Configurations":[{
"Classification":"export",
"Properties":{
"AWS_REGION":"cn-northwest-1",
"S3_ENDPOINT":"s3.cn-northwest-1.amazonaws.com.cn",
"S3_USE_HTTPS":"0",
"S3_VERIFY_SSL":"0"
}
}]
}]
}],
"KeepJobFlowAliveWhenNoSteps":True,
"Ec2SubnetId":"subnet-027bff297ea95039b",
"Ec2KeyName":"hifive.airflow",
"EmrManagedSlaveSecurityGroup":"sg-05a0e076ee7babb9e"
},
"JobFlowRole":"EMR_EC2_DefaultRole",
"Steps":[{
"HadoopJarStep":{
"Args":["state-pusher-script"],
"Jar":"command-runner.jar"
},
"Name":"Setup Hadoop Debugging"
}],
"ScaleDownBehavior":"TERMINATE_AT_TASK_COMPLETION",
"VisibleToAllUsers":True,
"EbsRootVolumeSize":10,
"LogUri":"s3n://aws-logs-550775287661-cn-northwest-1/elasticmapreduce/",
"AutoScalingRole":"EMR_AutoScaling_DefaultRole"
}
cluster_response = emr.run_job_flow(**param)
return cluster_response['JobFlowId']
# 获取EMR访问入口
def get_cluster_dns(cluster_id):
response = emr.describe_cluster(ClusterId=cluster_id)
return response['Cluster']['MasterPublicDnsName']
# 等待集群创建完成
def wait_for_cluster_creation(cluster_id):
emr.get_waiter('cluster_running').wait(ClusterId=cluster_id)
# 关闭EMR
def terminate_cluster(cluster_id):
emr.terminate_job_flows(JobFlowIds=[cluster_id])
评论