使用 Terraform 实现 AWS EC2 蓝绿部署
通过 Terraform 基础设施即代码,详解 AWS EC2 跨可用区蓝绿部署架构与自动化切换。
什么是蓝绿部署
蓝绿部署是一种降低发布风险的策略,通过同时运行两套完整环境(蓝色和绿色)来实现无缝切换。在传统模式下,部署新版本往往伴随着服务中断,而蓝绿部署可以在几乎零停机的情况下完成版本升级。
架构设计
整体架构
code
┌─────────────────┐
│ Route 53 │
│ DNS 记录 │
└────────┬────────┘
│
┌────────▼────────┐
│ ALB / CLB │
│ 负载均衡器 │
└┬───────────────┬┘
│ │
┌────────────▼──┐ ┌──────▼──────────┐
│ Blue 环境 │ │ Green 环境 │
│ (当前生产) │ │ (待部署) │
│ EC2 + ASG │ │ EC2 + ASG │
└───────────────┘ └─────────────────┘Terraform 目录结构
bash
├── main.tf
├── variables.tf
├── outputs.tf
├── versions.tf
├── modules/
│ ├── vpc/
│ │ ├── main.tf
│ │ └── variables.tf
│ ├── ec2/
│ │ ├── main.tf
│ │ └── variables.tf
│ └── alb/
│ ├── main.tf
│ └── variables.tf
└── scripts/
├── switch.sh
└── rollback.shTerraform 配置实现
1. 版本和提供商配置
hcl
# versions.tf
terraform {
required_version = ">= 1.5.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
}
provider "aws" {
region = var.aws_region
}2. VPC 模块
hcl
# modules/vpc/main.tf
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Name = "${var.project}-vpc"
}
}
resource "aws_subnet" "blue_a" {
vpc_id = aws_vpc.main.id
cidr_block = var.blue_subnet_a_cidr
availability_zone = "${var.aws_region}a"
map_public_ip_on_launch = true
tags = {
Name = "${var.project}-subnet-blue-a"
Slot = "blue"
}
}
resource "aws_subnet" "blue_b" {
vpc_id = aws_vpc.main.id
cidr_block = var.blue_subnet_b_cidr
availability_zone = "${var.aws_region}b"
map_public_ip_on_launch = true
tags = {
Name = "${var.project}-subnet-blue-b"
Slot = "blue"
}
}
resource "aws_subnet" "green_a" {
vpc_id = aws_vpc.main.id
cidr_block = var.green_subnet_a_cidr
availability_zone = "${var.aws_region}a"
map_public_ip_on_launch = true
tags = {
Name = "${var.project}-subnet-green-a"
Slot = "green"
}
}
resource "aws_subnet" "green_b" {
vpc_id = aws_vpc.main.id
cidr_block = var.green_subnet_b_cidr
availability_zone = "${var.aws_region}b"
map_public_ip_on_launch = true
tags = {
Name = "${var.project}-subnet-green-b"
Slot = "green"
}
}3. EC2 蓝绿环境配置
hcl
# modules/ec2/main.tf
resource "aws_launch_template" "blue" {
name_prefix = "${var.project}-blue-"
image_id = var.ami_id
instance_type = var.instance_type
vpc_security_group_ids = var.security_group_ids
user_data = base64encode(<<-EOF
#!/bin/bash
echo "Blue Environment - Version ${var.blue_version}" >> /var/log/app.log
yum install -y nginx
systemctl start nginx
EOF)
tag_specifications {
resource_type = "instance"
tags = {
Name = "${var.project}-blue"
Slot = "blue"
Version = var.blue_version
}
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_launch_template" "green" {
name_prefix = "${var.project}-green-"
image_id = var.ami_id
instance_type = var.instance_type
vpc_security_group_ids = var.security_group_ids
user_data = base64encode(<<-EOF
#!/bin/bash
echo "Green Environment - Version ${var.green_version}" >> /var/log/app.log
yum install -y nginx
systemctl start nginx
EOF)
tag_specifications {
resource_type = "instance"
tags = {
Name = "${var.project}-green"
Slot = "green"
Version = var.green_version
}
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_autoscaling_group" "blue" {
name = "${var.project}-asg-blue"
vpc_zone_identifier = var.blue_subnet_ids
desired_capacity = var.desired_capacity
min_size = var.min_size
max_size = var.max_size
health_check_type = "ELB"
health_check_grace_period = 300
launch_template {
id = aws_launch_template.blue.id
version = "$Latest"
}
tag {
key = "Name"
value = "${var.project}-blue"
propagate_at_launch = true
}
tag {
key = "Slot"
value = "blue"
propagate_at_launch = true
}
}
resource "aws_autoscaling_group" "green" {
name = "${var.project}-asg-green"
vpc_zone_identifier = var.green_subnet_ids
desired_capacity = 0
min_size = 0
max_size = var.max_size
health_check_type = "ELB"
health_check_grace_period = 300
launch_template {
id = aws_launch_template.green.id
version = "$Latest"
}
tag {
key = "Name"
value = "${var.project}-green"
propagate_at_launch = true
}
tag {
key = "Slot"
value = "green"
propagate_at_launch = true
}
}4. 负载均衡器配置
hcl
# modules/alb/main.tf
resource "aws_lb" "main" {
name = "${var.project}-alb"
internal = false
load_balancer_type = "application"
security_groups = var.security_group_ids
subnets = var.public_subnet_ids
enable_deletion_protection = false
tags = {
Name = "${var.project}-alb"
}
}
resource "aws_lb_target_group" "blue" {
name = "${var.project}-tg-blue"
port = 80
protocol = "HTTP"
vpc_id = var.vpc_id
health_check {
enabled = true
healthy_threshold = 2
unhealthy_threshold = 2
timeout = 5
interval = 30
path = "/health"
}
stickiness {
enabled = true
type = "lb_cookie"
duration = 3600
}
tags = {
Slot = "blue"
}
}
resource "aws_lb_target_group" "green" {
name = "${var.project}-tg-green"
port = 80
protocol = "HTTP"
vpc_id = var.vpc_id
health_check {
enabled = true
healthy_threshold = 2
unhealthy_threshold = 2
timeout = 5
interval = 30
path = "/health"
}
stickiness {
enabled = true
type = "lb_cookie"
duration = 3600
}
tags = {
Slot = "green"
}
}
resource "aws_lb_listener" "blue" {
load_balancer_arn = aws_lb.main.arn
port = 80
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.blue.arn
}
}
resource "aws_lb_target_group_attachment" "blue" {
target_group_arn = aws_lb_target_group.blue.arn
target_id = var.blue_instance_id
port = 80
}5. 变量定义
hcl
# variables.tf
variable "project" {
description = "项目名称"
type = string
default = "blue-green-app"
}
variable "aws_region" {
description = "AWS 区域"
type = string
default = "us-east-1"
}
variable "ami_id" {
description = "EC2 AMI ID"
type = string
}
variable "instance_type" {
description = "EC2 实例类型"
type = string
default = "t3.medium"
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "blue_version" {
description = "蓝色环境版本"
type = string
default = "v1.0.0"
}
variable "green_version" {
description = "绿色环境版本"
type = string
default = "v1.1.0"
}
variable "desired_capacity" {
description = "ASG 期望容量"
type = number
default = 2
}
variable "min_size" {
description = "ASG 最小实例数"
type = number
default = 2
}
variable "max_size" {
description = "ASG 最大实例数"
type = number
default = 4
}6. 主配置文件
hcl
# main.tf
module "vpc" {
source = "./modules/vpc"
project = var.project
aws_region = var.aws_region
vpc_cidr = var.vpc_cidr
blue_subnet_a_cidr = "10.0.1.0/24"
blue_subnet_b_cidr = "10.0.2.0/24"
green_subnet_a_cidr = "10.0.3.0/24"
green_subnet_b_cidr = "10.0.4.0/24"
}
module "ec2" {
source = "./modules/ec2"
project = var.project
ami_id = var.ami_id
instance_type = var.instance_type
blue_version = var.blue_version
green_version = var.green_version
blue_subnet_ids = [module.vpc.blue_subnet_a.id, module.vpc.blue_subnet_b.id]
green_subnet_ids = [module.vpc.green_subnet_a.id, module.vpc.green_subnet_b.id]
security_group_ids = [module.security_group.id]
desired_capacity = var.desired_capacity
min_size = var.min_size
max_size = var.max_size
}
module "alb" {
source = "./modules/alb"
project = var.project
vpc_id = module.vpc.vpc_id
public_subnet_ids = [module.vpc.blue_subnet_a.id, module.vpc.blue_subnet_b.id]
security_group_ids = [module.security_group.id]
}自动化切换脚本
切换脚本
bash
#!/bin/bash
# scripts/switch.sh
set -e
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m'
echo -e "${GREEN}开始蓝绿环境切换...${NC}"
# 1. 验证绿色环境健康状态
echo "步骤 1: 验证绿色环境健康状态..."
GREEN_TG_ARN="${GREEN_TARGET_GROUP_ARN}"
# 2. 增加绿色环境实例数
echo "步骤 2: 扩展绿色环境..."
aws autoscaling set-desired-capacity \
--auto-scaling-group-name "${PROJECT}-asg-green" \
--desired-capacity 2 \
--region ${AWS_REGION}
# 3. 等待实例注册完成
echo "步骤 3: 等待实例注册完成..."
aws elb wait target-in-service \
--target-group-arn ${GREEN_TG_ARN} \
--region ${AWS_REGION}
# 4. 修改 ALB 监听器指向绿色环境
echo "步骤 4: 修改 ALB 监听器..."
aws elbv2 modify-listener \
--listener-arn ${LISTENER_ARN} \
--default-actions Type=forward,TargetGroupArn=${GREEN_TG_ARN} \
--region ${AWS_REGION}
# 5. 缩减蓝色环境
echo "步骤 5: 缩减蓝色环境..."
aws autoscaling set-desired-capacity \
--auto-scaling-group-name "${PROJECT}-asg-blue" \
--desired-capacity 0 \
--region ${AWS_REGION}
echo -e "${GREEN}切换完成!${NC}"回滚脚本
bash
#!/bin/bash
# scripts/rollback.sh
set -e
echo -e "${RED}开始回滚操作...${NC}"
# 1. 扩展蓝色环境
echo "步骤 1: 扩展蓝色环境..."
aws autoscaling set-desired-capacity \
--auto-scaling-group-name "${PROJECT}-asg-blue" \
--desired-capacity 2 \
--region ${AWS_REGION}
# 2. 等待蓝色环境就绪
echo "步骤 2: 等待蓝色环境就绪..."
sleep 60
# 3. 切换回蓝色环境
echo "步骤 3: 切换回蓝色环境..."
aws elbv2 modify-listener \
--listener-arn ${LISTENER_ARN} \
--default-actions Type=forward,TargetGroupArn=${BLUE_TG_ARN} \
--region ${AWS_REGION}
# 4. 缩减绿色环境
echo "步骤 4: 缩减绿色环境..."
aws autoscaling set-desired-capacity \
--auto-scaling-group-name "${PROJECT}-asg-green" \
--desired-capacity 0 \
--region ${AWS_REGION}
echo -e "${RED}回滚完成!${NC}"部署流程
完整部署步骤
bash
# 1. 初始化 Terraform
terraform init
# 2. 规划部署
terraform plan -var-file="prod.tfvars"
# 3. 应用配置
terraform apply -var-file="prod.tfvars"
# 4. 验证部署
./scripts/switch.sh
# 5. 监控切换后状态
aws cloudwatch get-metric-statistics \
--namespace AWS/EC2 \
--metric-name CPUUtilization \
--dimensions Name=AutoScalingGroupName,Value="${PROJECT}-asg-green" \
--start-time $(date -u -d '1 hour ago' +%Y-%m-%dT%H:%M:%S) \
--end-time $(date -u +%Y-%m-%dT%H:%M:%S) \
--period 300 \
--statistics Average总结
通过 Terraform 实现 AWS EC2 蓝绿部署,我们可以:
- 基础设施即代码 - 所有配置版本化管理,易于回溯和审计
- 跨可用区高可用 - 分布在多个 AZ,确保故障隔离
- 自动化切换 - 一键完成环境切换,最小化人工错误
- 快速回滚 - 保留旧环境,支持秒级回滚
- 可扩展性 - 通过 ASG 实现自动伸缩,应对流量高峰
蓝绿部署是生产环境发布的最佳实践之一,配合 Terraform 的声明式配置,可以构建可靠、可重复的基础设施部署流程。