Infrastructure as Code with Terraform: Managing 100+ AWS Resources
Our AWS infrastructure was a mess. Manual changes, configuration drift, no version control.
Migrated to Terraform. 100+ resources as code, deployment 2h → 10min, zero drift.
Table of Contents
The Problem
Before Terraform:
- 100+ AWS resources
- Manual console changes
- No version control
- Configuration drift
- Deployment: 2 hours
- Rollback: Impossible
Basic Setup
# main.tf
terraform {
required_version = ">= 0.13"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
backend "s3" {
bucket = "my-terraform-state"
key = "prod/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
provider "aws" {
region = var.aws_region
}
VPC Module
# modules/vpc/main.tf
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Name = "${var.environment}-vpc"
Environment = var.environment
}
}
resource "aws_subnet" "public" {
count = length(var.public_subnets)
vpc_id = aws_vpc.main.id
cidr_block = var.public_subnets[count.index]
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = {
Name = "${var.environment}-public-${count.index + 1}"
}
}
resource "aws_subnet" "private" {
count = length(var.private_subnets)
vpc_id = aws_vpc.main.id
cidr_block = var.private_subnets[count.index]
availability_zone = var.availability_zones[count.index]
tags = {
Name = "${var.environment}-private-${count.index + 1}"
}
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = {
Name = "${var.environment}-igw"
}
}
resource "aws_nat_gateway" "main" {
count = length(var.public_subnets)
allocation_id = aws_eip.nat[count.index].id
subnet_id = aws_subnet.public[count.index].id
tags = {
Name = "${var.environment}-nat-${count.index + 1}"
}
}
resource "aws_eip" "nat" {
count = length(var.public_subnets)
vpc = true
tags = {
Name = "${var.environment}-nat-eip-${count.index + 1}"
}
}
EKS Cluster
# modules/eks/main.tf
resource "aws_eks_cluster" "main" {
name = "${var.environment}-eks"
role_arn = aws_iam_role.cluster.arn
version = var.kubernetes_version
vpc_config {
subnet_ids = var.subnet_ids
endpoint_private_access = true
endpoint_public_access = true
}
depends_on = [
aws_iam_role_policy_attachment.cluster_policy,
aws_iam_role_policy_attachment.service_policy
]
}
resource "aws_eks_node_group" "main" {
cluster_name = aws_eks_cluster.main.name
node_group_name = "${var.environment}-node-group"
node_role_arn = aws_iam_role.node.arn
subnet_ids = var.subnet_ids
scaling_config {
desired_size = var.desired_size
max_size = var.max_size
min_size = var.min_size
}
instance_types = var.instance_types
depends_on = [
aws_iam_role_policy_attachment.node_policy,
aws_iam_role_policy_attachment.cni_policy,
aws_iam_role_policy_attachment.registry_policy
]
}
RDS Database
# modules/rds/main.tf
resource "aws_db_instance" "main" {
identifier = "${var.environment}-db"
engine = "postgres"
engine_version = "12.5"
instance_class = var.instance_class
allocated_storage = var.allocated_storage
max_allocated_storage = var.max_allocated_storage
storage_encrypted = true
db_name = var.database_name
username = var.master_username
password = var.master_password
vpc_security_group_ids = [aws_security_group.db.id]
db_subnet_group_name = aws_db_subnet_group.main.name
backup_retention_period = 7
backup_window = "03:00-04:00"
maintenance_window = "mon:04:00-mon:05:00"
skip_final_snapshot = false
final_snapshot_identifier = "${var.environment}-db-final-snapshot"
tags = {
Name = "${var.environment}-db"
Environment = var.environment
}
}
resource "aws_db_subnet_group" "main" {
name = "${var.environment}-db-subnet-group"
subnet_ids = var.subnet_ids
tags = {
Name = "${var.environment}-db-subnet-group"
}
}
Environment Configuration
# environments/prod/main.tf
module "vpc" {
source = "../../modules/vpc"
environment = "prod"
vpc_cidr = "10.0.0.0/16"
public_subnets = ["10.0.1.0/24", "10.0.2.0/24"]
private_subnets = ["10.0.10.0/24", "10.0.20.0/24"]
availability_zones = ["us-east-1a", "us-east-1b"]
}
module "eks" {
source = "../../modules/eks"
environment = "prod"
subnet_ids = module.vpc.private_subnet_ids
kubernetes_version = "1.18"
desired_size = 3
max_size = 10
min_size = 3
instance_types = ["t3.large"]
}
module "rds" {
source = "../../modules/rds"
environment = "prod"
instance_class = "db.t3.large"
allocated_storage = 100
max_allocated_storage = 1000
database_name = "myapp"
master_username = var.db_username
master_password = var.db_password
subnet_ids = module.vpc.private_subnet_ids
}
Deployment Workflow
#!/bin/bash
# deploy.sh
set -e
ENVIRONMENT=$1
if [ -z "$ENVIRONMENT" ]; then
echo "Usage: ./deploy.sh <environment>"
exit 1
fi
cd environments/$ENVIRONMENT
# Initialize
terraform init
# Plan
terraform plan -out=tfplan
# Apply
terraform apply tfplan
# Clean up
rm tfplan
State Management
# backend.tf
resource "aws_s3_bucket" "terraform_state" {
bucket = "my-terraform-state"
versioning {
enabled = true
}
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
lifecycle {
prevent_destroy = true
}
}
resource "aws_dynamodb_table" "terraform_locks" {
name = "terraform-locks"
billing_mode = "PAY_PER_REQUEST"
hash_key = "LockID"
attribute {
name = "LockID"
type = "S"
}
}
Results
Deployment:
- Time: 2h → 10min (-92%)
- Errors: 50% → 0%
- Rollback: Impossible → 2min
Infrastructure:
- Resources managed: 100+
- Configuration drift: 0
- Version control: ✅
- Reproducibility: 100%
Team Productivity:
- Infrastructure changes: 5x faster
- Onboarding time: -70%
- Documentation: Auto-generated
Lessons Learned
- Modules essential: Reusability
- State management critical: S3 + DynamoDB
- Plan before apply: Catch errors
- Version everything: Git for infrastructure
- Gradual migration: Don’t rush
Conclusion
Terraform transformed our infrastructure management. 100+ resources as code, deployment 2h → 10min, zero drift.
Key takeaways:
- Deployment: 2h → 10min (-92%)
- Configuration drift: 0
- Resources managed: 100+
- Rollback: 2min
- Team productivity: 5x
Use Terraform. Infrastructure as code works.