Our Nginx server was struggling. 10K concurrent connections max, high latency, no caching.

Optimized configuration. 100K concurrent connections, latency -60%, intelligent caching. Here’s how.

Table of Contents

Basic Optimization

# nginx.conf
user nginx;
worker_processes auto;  # Auto-detect CPU cores
worker_rlimit_nofile 100000;

events {
    worker_connections 10000;
    use epoll;  # Efficient on Linux
    multi_accept on;
}

http {
    # Basic settings
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    
    # Timeouts
    keepalive_timeout 65;
    keepalive_requests 100;
    client_body_timeout 12;
    client_header_timeout 12;
    send_timeout 10;
    
    # Buffer sizes
    client_body_buffer_size 128k;
    client_max_body_size 10m;
    client_header_buffer_size 1k;
    large_client_header_buffers 4 8k;
    
    # Compression
    gzip on;
    gzip_vary on;
    gzip_proxied any;
    gzip_comp_level 6;
    gzip_types text/plain text/css text/xml text/javascript 
               application/json application/javascript application/xml+rss;
    
    include /etc/nginx/conf.d/*.conf;
}

Caching

# Cache configuration
proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=my_cache:10m 
                 max_size=10g inactive=60m use_temp_path=off;

server {
    listen 80;
    server_name example.com;
    
    location / {
        proxy_pass http://backend;
        
        # Caching
        proxy_cache my_cache;
        proxy_cache_valid 200 60m;
        proxy_cache_valid 404 10m;
        proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
        proxy_cache_background_update on;
        proxy_cache_lock on;
        
        # Cache headers
        add_header X-Cache-Status $upstream_cache_status;
        
        # Bypass cache for certain requests
        proxy_cache_bypass $http_pragma $http_authorization;
        proxy_no_cache $http_pragma $http_authorization;
    }
    
    # Static files caching
    location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
        expires 1y;
        add_header Cache-Control "public, immutable";
    }
}

Load Balancing

upstream backend {
    # Load balancing method
    least_conn;  # or ip_hash, hash $request_uri
    
    # Servers
    server backend1.example.com:8080 weight=3 max_fails=3 fail_timeout=30s;
    server backend2.example.com:8080 weight=2 max_fails=3 fail_timeout=30s;
    server backend3.example.com:8080 weight=1 max_fails=3 fail_timeout=30s;
    
    # Backup server
    server backup.example.com:8080 backup;
    
    # Keepalive connections
    keepalive 32;
}

server {
    listen 80;
    server_name example.com;
    
    location / {
        proxy_pass http://backend;
        
        # Proxy settings
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 10s;
        proxy_read_timeout 10s;
    }
}

Rate Limiting

# Define rate limit zones
limit_req_zone $binary_remote_addr zone=general:10m rate=10r/s;
limit_req_zone $binary_remote_addr zone=api:10m rate=100r/s;
limit_conn_zone $binary_remote_addr zone=addr:10m;

server {
    listen 80;
    server_name example.com;
    
    # General rate limit
    location / {
        limit_req zone=general burst=20 nodelay;
        limit_conn addr 10;
        
        proxy_pass http://backend;
    }
    
    # API rate limit
    location /api/ {
        limit_req zone=api burst=50 nodelay;
        limit_conn addr 20;
        
        proxy_pass http://backend;
    }
}

SSL/TLS Optimization

server {
    listen 443 ssl http2;
    server_name example.com;
    
    # Certificates
    ssl_certificate /etc/nginx/ssl/cert.pem;
    ssl_certificate_key /etc/nginx/ssl/key.pem;
    
    # SSL optimization
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256';
    ssl_prefer_server_ciphers off;
    
    # SSL session cache
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
    ssl_session_tickets off;
    
    # OCSP stapling
    ssl_stapling on;
    ssl_stapling_verify on;
    
    # HSTS
    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
    
    location / {
        proxy_pass http://backend;
    }
}

Monitoring

# Status endpoint
server {
    listen 8080;
    server_name localhost;
    
    location /nginx_status {
        stub_status on;
        access_log off;
        allow 127.0.0.1;
        deny all;
    }
}

Prometheus Exporter:

from prometheus_client import Gauge, start_http_server
import requests
import re

# Metrics
nginx_connections = Gauge('nginx_connections', 'Active connections', ['state'])
nginx_requests = Gauge('nginx_requests_total', 'Total requests')

def parse_nginx_status():
    """Parse nginx status."""
    response = requests.get('http://localhost:8080/nginx_status')
    text = response.text
    
    # Parse connections
    active = int(re.search(r'Active connections: (\d+)', text).group(1))
    nginx_connections.labels(state='active').set(active)
    
    # Parse requests
    requests_match = re.search(r'(\d+)\s+(\d+)\s+(\d+)', text)
    total_requests = int(requests_match.group(3))
    nginx_requests.set(total_requests)

if __name__ == '__main__':
    start_http_server(9113)
    while True:
        parse_nginx_status()
        time.sleep(10)

System Tuning

# /etc/sysctl.conf
# Increase file descriptors
fs.file-max = 1000000

# Network tuning
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 30

# Apply changes
sysctl -p

Results

Performance:

MetricBeforeAfterImprovement
Concurrent connections10K100K10x
Requests/sec5K50K10x
Latency (p95)500ms200ms60%
Cache hit rate0%80%-

Resource Usage:

  • CPU: 80% → 40% (-50%)
  • Memory: 2GB → 4GB (+100%)
  • Network: Saturated → 60%

Cost Savings:

  • Servers: 10 → 3 (-70%)
  • Monthly cost: $2K → $600 (-70%)

Lessons Learned

  1. Worker processes matter: Auto-detect CPUs
  2. Caching essential: 80% hit rate
  3. Keepalive connections help: Reduce overhead
  4. Rate limiting protects: Prevent abuse
  5. System tuning critical: File descriptors

Conclusion

Nginx optimization delivered massive gains. 100K concurrent connections, latency -60%, 70% cost reduction.

Key takeaways:

  1. Concurrent connections: 10K → 100K (10x)
  2. Requests/sec: 5K → 50K (10x)
  3. Latency: 500ms → 200ms (-60%)
  4. Cache hit rate: 80%
  5. Cost: $2K → $600/month (-70%)

Optimize Nginx. Performance and cost matter.