Nginx Performance Tuning: Handling 100K Concurrent Connections
Our Nginx server was struggling. 10K concurrent connections max, high latency, no caching.
Optimized configuration. 100K concurrent connections, latency -60%, intelligent caching. Here’s how.
Table of Contents
Basic Optimization
# nginx.conf
user nginx;
worker_processes auto; # Auto-detect CPU cores
worker_rlimit_nofile 100000;
events {
worker_connections 10000;
use epoll; # Efficient on Linux
multi_accept on;
}
http {
# Basic settings
sendfile on;
tcp_nopush on;
tcp_nodelay on;
# Timeouts
keepalive_timeout 65;
keepalive_requests 100;
client_body_timeout 12;
client_header_timeout 12;
send_timeout 10;
# Buffer sizes
client_body_buffer_size 128k;
client_max_body_size 10m;
client_header_buffer_size 1k;
large_client_header_buffers 4 8k;
# Compression
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_types text/plain text/css text/xml text/javascript
application/json application/javascript application/xml+rss;
include /etc/nginx/conf.d/*.conf;
}
Caching
# Cache configuration
proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=my_cache:10m
max_size=10g inactive=60m use_temp_path=off;
server {
listen 80;
server_name example.com;
location / {
proxy_pass http://backend;
# Caching
proxy_cache my_cache;
proxy_cache_valid 200 60m;
proxy_cache_valid 404 10m;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_background_update on;
proxy_cache_lock on;
# Cache headers
add_header X-Cache-Status $upstream_cache_status;
# Bypass cache for certain requests
proxy_cache_bypass $http_pragma $http_authorization;
proxy_no_cache $http_pragma $http_authorization;
}
# Static files caching
location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}
Load Balancing
upstream backend {
# Load balancing method
least_conn; # or ip_hash, hash $request_uri
# Servers
server backend1.example.com:8080 weight=3 max_fails=3 fail_timeout=30s;
server backend2.example.com:8080 weight=2 max_fails=3 fail_timeout=30s;
server backend3.example.com:8080 weight=1 max_fails=3 fail_timeout=30s;
# Backup server
server backup.example.com:8080 backup;
# Keepalive connections
keepalive 32;
}
server {
listen 80;
server_name example.com;
location / {
proxy_pass http://backend;
# Proxy settings
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Timeouts
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
}
Rate Limiting
# Define rate limit zones
limit_req_zone $binary_remote_addr zone=general:10m rate=10r/s;
limit_req_zone $binary_remote_addr zone=api:10m rate=100r/s;
limit_conn_zone $binary_remote_addr zone=addr:10m;
server {
listen 80;
server_name example.com;
# General rate limit
location / {
limit_req zone=general burst=20 nodelay;
limit_conn addr 10;
proxy_pass http://backend;
}
# API rate limit
location /api/ {
limit_req zone=api burst=50 nodelay;
limit_conn addr 20;
proxy_pass http://backend;
}
}
SSL/TLS Optimization
server {
listen 443 ssl http2;
server_name example.com;
# Certificates
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
# SSL optimization
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256';
ssl_prefer_server_ciphers off;
# SSL session cache
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
ssl_session_tickets off;
# OCSP stapling
ssl_stapling on;
ssl_stapling_verify on;
# HSTS
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
location / {
proxy_pass http://backend;
}
}
Monitoring
# Status endpoint
server {
listen 8080;
server_name localhost;
location /nginx_status {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
}
Prometheus Exporter:
from prometheus_client import Gauge, start_http_server
import requests
import re
# Metrics
nginx_connections = Gauge('nginx_connections', 'Active connections', ['state'])
nginx_requests = Gauge('nginx_requests_total', 'Total requests')
def parse_nginx_status():
"""Parse nginx status."""
response = requests.get('http://localhost:8080/nginx_status')
text = response.text
# Parse connections
active = int(re.search(r'Active connections: (\d+)', text).group(1))
nginx_connections.labels(state='active').set(active)
# Parse requests
requests_match = re.search(r'(\d+)\s+(\d+)\s+(\d+)', text)
total_requests = int(requests_match.group(3))
nginx_requests.set(total_requests)
if __name__ == '__main__':
start_http_server(9113)
while True:
parse_nginx_status()
time.sleep(10)
System Tuning
# /etc/sysctl.conf
# Increase file descriptors
fs.file-max = 1000000
# Network tuning
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 30
# Apply changes
sysctl -p
Results
Performance:
| Metric | Before | After | Improvement |
|---|---|---|---|
| Concurrent connections | 10K | 100K | 10x |
| Requests/sec | 5K | 50K | 10x |
| Latency (p95) | 500ms | 200ms | 60% |
| Cache hit rate | 0% | 80% | - |
Resource Usage:
- CPU: 80% → 40% (-50%)
- Memory: 2GB → 4GB (+100%)
- Network: Saturated → 60%
Cost Savings:
- Servers: 10 → 3 (-70%)
- Monthly cost: $2K → $600 (-70%)
Lessons Learned
- Worker processes matter: Auto-detect CPUs
- Caching essential: 80% hit rate
- Keepalive connections help: Reduce overhead
- Rate limiting protects: Prevent abuse
- System tuning critical: File descriptors
Conclusion
Nginx optimization delivered massive gains. 100K concurrent connections, latency -60%, 70% cost reduction.
Key takeaways:
- Concurrent connections: 10K → 100K (10x)
- Requests/sec: 5K → 50K (10x)
- Latency: 500ms → 200ms (-60%)
- Cache hit rate: 80%
- Cost: $2K → $600/month (-70%)
Optimize Nginx. Performance and cost matter.