Docker Compose is often seen as a development tool, but with the right patterns, it’s powerful enough for production. Here’s what I learned deploying 12 services to production using Docker Compose.

Table of contents

Why Docker Compose for Production?

Common Objection: “Docker Compose is only for development!”

Reality: For small to medium deployments (< 50 containers), Docker Compose offers:

  • Simplicity over Kubernetes complexity
  • Declarative configuration
  • Easy rollbacks
  • Lower operational overhead
  • Cost-effective for smaller teams

Production-Ready Compose File

Basic Structure

version: '3.8'

services:
  web:
    image: myapp:${VERSION:-latest}
    deploy:
      replicas: 3
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
      resources:
        limits:
          cpus: '1.0'
          memory: 1G
        reservations:
          cpus: '0.5'
          memory: 512M
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    environment:
      - NODE_ENV=production
      - DATABASE_URL=${DATABASE_URL}
    secrets:
      - db_password
      - api_key
    networks:
      - frontend
      - backend
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

secrets:
  db_password:
    external: true
  api_key:
    external: true

networks:
  frontend:
    driver: bridge
  backend:
    driver: bridge
    internal: true

Health Checks

Application Health Check

services:
  api:
    image: api:latest
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:3000/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s

Database Health Check

services:
  postgres:
    image: postgres:14
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5

Custom Health Check Script

#!/bin/bash
# health-check.sh

# Check if application is responding
if ! curl -f http://localhost:8080/health > /dev/null 2>&1; then
    exit 1
fi

# Check database connection
if ! pg_isready -h db -U postgres > /dev/null 2>&1; then
    exit 1
fi

# Check Redis connection
if ! redis-cli -h redis ping > /dev/null 2>&1; then
    exit 1
fi

exit 0
healthcheck:
  test: ["CMD", "/app/health-check.sh"]
  interval: 30s
  timeout: 10s
  retries: 3

Secrets Management

Using Docker Secrets

# Create secrets
echo "my_db_password" | docker secret create db_password -
echo "my_api_key" | docker secret create api_key -
services:
  app:
    image: app:latest
    secrets:
      - db_password
      - api_key
    environment:
      - DB_PASSWORD_FILE=/run/secrets/db_password
      - API_KEY_FILE=/run/secrets/api_key

secrets:
  db_password:
    external: true
  api_key:
    external: true

Application Code

# app.py
import os

def read_secret(secret_name):
    """Read secret from Docker secrets or environment"""
    secret_file = os.getenv(f'{secret_name}_FILE')
    if secret_file and os.path.exists(secret_file):
        with open(secret_file, 'r') as f:
            return f.read().strip()
    return os.getenv(secret_name)

# Usage
db_password = read_secret('DB_PASSWORD')
api_key = read_secret('API_KEY')

Resource Limits

CPU and Memory Limits

services:
  web:
    image: web:latest
    deploy:
      resources:
        limits:
          cpus: '2.0'
          memory: 2G
        reservations:
          cpus: '1.0'
          memory: 1G

Monitoring Resource Usage

# Monitor container resources
docker stats

# Check specific service
docker stats $(docker-compose ps -q web)

Logging Strategy

Centralized Logging

services:
  app:
    image: app:latest
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "5"
        labels: "service,environment"
        
  # Log aggregator
  fluentd:
    image: fluent/fluentd:latest
    volumes:
      - ./fluentd/conf:/fluentd/etc
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
    ports:
      - "24224:24224"

Fluentd Configuration

# fluentd/conf/fluent.conf
<source>
  @type forward
  port 24224
</source>

<match docker.**>
  @type elasticsearch
  host elasticsearch
  port 9200
  logstash_format true
  logstash_prefix docker
  include_tag_key true
  tag_key @log_name
</match>

High Availability Patterns

Load Balancing with Nginx

services:
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
      - ./ssl:/etc/nginx/ssl:ro
    depends_on:
      - app
    networks:
      - frontend

  app:
    image: app:latest
    deploy:
      replicas: 3
    networks:
      - frontend
      - backend

Nginx Configuration

# nginx.conf
upstream app_servers {
    least_conn;
    server app:8080 max_fails=3 fail_timeout=30s;
}

server {
    listen 80;
    server_name example.com;
    
    location / {
        proxy_pass http://app_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        
        # Health check
        proxy_next_upstream error timeout http_502 http_503 http_504;
        proxy_connect_timeout 5s;
        proxy_send_timeout 10s;
        proxy_read_timeout 10s;
    }
    
    location /health {
        access_log off;
        return 200 "healthy\n";
    }
}

Database Backup Strategy

Automated Backups

services:
  postgres:
    image: postgres:14
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./backups:/backups
    environment:
      - POSTGRES_PASSWORD_FILE=/run/secrets/db_password
    secrets:
      - db_password

  backup:
    image: postgres:14
    depends_on:
      - postgres
    volumes:
      - ./backups:/backups
    environment:
      - PGPASSWORD_FILE=/run/secrets/db_password
    secrets:
      - db_password
    entrypoint: |
      bash -c 'bash -s <<EOF
      trap "break;exit" SIGHUP SIGINT SIGTERM
      while /bin/true; do
        pg_dump -h postgres -U postgres -Fc mydb > /backups/backup_$$(date +%Y%m%d_%H%M%S).dump
        find /backups -type f -mtime +7 -delete
        sleep 86400
      done
      EOF'

Backup Script

#!/bin/bash
# backup.sh

BACKUP_DIR="/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="$BACKUP_DIR/backup_$TIMESTAMP.dump"

# Create backup
docker-compose exec -T postgres pg_dump -U postgres -Fc mydb > "$BACKUP_FILE"

# Compress
gzip "$BACKUP_FILE"

# Upload to S3
aws s3 cp "$BACKUP_FILE.gz" "s3://my-backups/postgres/"

# Clean old backups (keep last 7 days)
find "$BACKUP_DIR" -type f -mtime +7 -delete

echo "Backup completed: $BACKUP_FILE.gz"

Zero-Downtime Deployments

Rolling Update Strategy

#!/bin/bash
# deploy.sh

set -e

VERSION=$1

if [ -z "$VERSION" ]; then
    echo "Usage: ./deploy.sh <version>"
    exit 1
fi

echo "Deploying version $VERSION..."

# Pull new image
docker-compose pull app

# Update with rolling restart
docker-compose up -d --no-deps --scale app=6 app

# Wait for health checks
sleep 30

# Scale down old containers
docker-compose up -d --no-deps --scale app=3 app

# Clean up old images
docker image prune -f

echo "Deployment completed successfully"

Blue-Green Deployment

# docker-compose.blue.yml
services:
  app-blue:
    image: app:${BLUE_VERSION}
    networks:
      - app_network

# docker-compose.green.yml
services:
  app-green:
    image: app:${GREEN_VERSION}
    networks:
      - app_network
#!/bin/bash
# blue-green-deploy.sh

CURRENT_COLOR=$(cat .current_color)
NEW_COLOR=$([ "$CURRENT_COLOR" = "blue" ] && echo "green" || echo "blue")

echo "Current: $CURRENT_COLOR, Deploying: $NEW_COLOR"

# Start new version
docker-compose -f docker-compose.$NEW_COLOR.yml up -d

# Wait for health checks
sleep 30

# Switch traffic
docker-compose -f docker-compose.nginx.yml restart

# Stop old version
docker-compose -f docker-compose.$CURRENT_COLOR.yml down

# Update current color
echo "$NEW_COLOR" > .current_color

echo "Deployment completed"

Monitoring and Alerting

Prometheus + Grafana

services:
  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
    ports:
      - "9090:9090"

  grafana:
    image: grafana/grafana:latest
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
    ports:
      - "3000:3000"
    depends_on:
      - prometheus

  node-exporter:
    image: prom/node-exporter:latest
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'

volumes:
  prometheus_data:
  grafana_data:

Environment-Specific Configurations

Base Configuration

# docker-compose.yml
version: '3.8'

services:
  app:
    image: app:${VERSION:-latest}
    environment:
      - NODE_ENV=${NODE_ENV:-production}

Production Override

# docker-compose.prod.yml
version: '3.8'

services:
  app:
    deploy:
      replicas: 5
      resources:
        limits:
          cpus: '2.0'
          memory: 2G
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "5"

Usage

# Production deployment
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d

# Staging deployment
docker-compose -f docker-compose.yml -f docker-compose.staging.yml up -d

Security Best Practices

1. Non-Root User

# Dockerfile
FROM node:16-alpine

# Create app user
RUN addgroup -g 1001 -S nodejs && \
    adduser -S nodejs -u 1001

# Set ownership
COPY --chown=nodejs:nodejs . /app

USER nodejs

CMD ["node", "server.js"]

2. Read-Only Filesystem

services:
  app:
    image: app:latest
    read_only: true
    tmpfs:
      - /tmp
      - /var/run

3. Network Isolation

services:
  web:
    networks:
      - frontend
  
  api:
    networks:
      - frontend
      - backend
  
  database:
    networks:
      - backend

networks:
  frontend:
    driver: bridge
  backend:
    driver: bridge
    internal: true  # No external access

Conclusion

Docker Compose is production-ready when used with proper patterns:

Use for:

  • Small to medium deployments
  • Simpler infrastructure needs
  • Teams without Kubernetes expertise
  • Cost-sensitive projects

Avoid for:

  • Large-scale deployments (100+ containers)
  • Multi-datacenter setups
  • Complex orchestration needs
  • Auto-scaling requirements

My Production Stack:

  • 12 services
  • 3 servers
  • 99.9% uptime
  • $200/month infrastructure cost

Docker Compose strikes the perfect balance between simplicity and capability for many production workloads.