Docker Compose Production Deployment
Production-ready Docker Compose configurations with security, reliability, and scalability best practices.
Production-Ready Base Template
A comprehensive production template with essential configurations:
version: '3.8'
services: nginx: image: nginx:1.25-alpine container_name: production-nginx restart: unless-stopped ports: - "80:80" - "443:443" volumes: - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro - ./nginx/ssl:/etc/nginx/ssl:ro - nginx-cache:/var/cache/nginx - nginx-logs:/var/log/nginx networks: - frontend healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"] interval: 30s timeout: 10s retries: 3 start_period: 40s logging: driver: "json-file" options: max-size: "10m" max-file: "3" deploy: resources: limits: cpus: '1.0' memory: 512M reservations: cpus: '0.5' memory: 256M
api: image: mycompany/api:${API_VERSION:-latest} container_name: production-api restart: unless-stopped networks: - frontend - backend environment: NODE_ENV: production DATABASE_URL: postgresql://postgres:5432/production_db REDIS_URL: redis://cache:6379 LOG_LEVEL: ${LOG_LEVEL:-info} PORT: 3000 env_file: - .env.production secrets: - db_password - jwt_secret healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s depends_on: database: condition: service_healthy cache: condition: service_healthy logging: driver: "json-file" options: max-size: "10m" max-file: "5" deploy: resources: limits: cpus: '2.0' memory: 2G reservations: cpus: '1.0' memory: 1G
worker: image: mycompany/worker:${WORKER_VERSION:-latest} container_name: production-worker restart: unless-stopped networks: - backend environment: NODE_ENV: production DATABASE_URL: postgresql://postgres:5432/production_db REDIS_URL: redis://cache:6379 QUEUE_NAME: ${QUEUE_NAME:-default} env_file: - .env.production secrets: - db_password depends_on: database: condition: service_healthy cache: condition: service_healthy logging: driver: "json-file" options: max-size: "10m" max-file: "5" deploy: replicas: 3 resources: limits: cpus: '1.0' memory: 1G reservations: cpus: '0.5' memory: 512M
database: image: postgres:15-alpine container_name: production-db restart: unless-stopped networks: - backend environment: POSTGRES_DB: production_db POSTGRES_USER: postgres POSTGRES_PASSWORD_FILE: /run/secrets/db_password POSTGRES_INITDB_ARGS: "-E UTF8 --locale=en_US.UTF-8" secrets: - db_password volumes: - postgres-data:/var/lib/postgresql/data - ./db/init:/docker-entrypoint-initdb.d:ro - postgres-logs:/var/log/postgresql healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres -d production_db"] interval: 10s timeout: 5s retries: 5 start_period: 30s command: - "postgres" - "-c" - "max_connections=200" - "-c" - "shared_buffers=256MB" - "-c" - "effective_cache_size=1GB" - "-c" - "maintenance_work_mem=64MB" - "-c" - "checkpoint_completion_target=0.9" - "-c" - "wal_buffers=16MB" - "-c" - "default_statistics_target=100" - "-c" - "random_page_cost=1.1" - "-c" - "effective_io_concurrency=200" - "-c" - "work_mem=1MB" - "-c" - "min_wal_size=1GB" - "-c" - "max_wal_size=4GB" logging: driver: "json-file" options: max-size: "10m" max-file: "5" deploy: resources: limits: cpus: '2.0' memory: 2G reservations: cpus: '1.0' memory: 1G
cache: image: redis:7-alpine container_name: production-cache restart: unless-stopped networks: - backend command: > redis-server --appendonly yes --appendfsync everysec --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD} volumes: - redis-data:/data healthcheck: test: ["CMD", "redis-cli", "--raw", "incr", "ping"] interval: 10s timeout: 5s retries: 5 start_period: 20s logging: driver: "json-file" options: max-size: "10m" max-file: "3" deploy: resources: limits: cpus: '1.0' memory: 768M reservations: cpus: '0.5' memory: 512M
backup: image: prodrigestivill/postgres-backup-local:15-alpine container_name: production-backup restart: unless-stopped networks: - backend environment: POSTGRES_HOST: database POSTGRES_DB: production_db POSTGRES_USER: postgres POSTGRES_PASSWORD_FILE: /run/secrets/db_password SCHEDULE: "@daily" BACKUP_KEEP_DAYS: 7 BACKUP_KEEP_WEEKS: 4 BACKUP_KEEP_MONTHS: 6 HEALTHCHECK_PORT: 8080 secrets: - db_password volumes: - ./backups:/backups depends_on: database: condition: service_healthy
networks: frontend: driver: bridge backend: driver: bridge internal: true
volumes: postgres-data: driver: local driver_opts: type: none o: bind device: /data/postgres redis-data: driver: local nginx-cache: driver: local nginx-logs: driver: local postgres-logs: driver: local
secrets: db_password: file: ./secrets/db_password.txt jwt_secret: file: ./secrets/jwt_secret.txt
Security Hardening
Production security configurations:
version: '3.8'
services: web: image: nginx:1.25-alpine restart: unless-stopped read_only: true tmpfs: - /var/cache/nginx - /var/run cap_drop: - ALL cap_add: - NET_BIND_SERVICE security_opt: - no-new-privileges:true - seccomp:./security/seccomp-profile.json user: "nginx:nginx" networks: - frontend ports: - "80:80" - "443:443" volumes: - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro - ./nginx/ssl:/etc/nginx/ssl:ro
api: image: mycompany/api:${VERSION} restart: unless-stopped read_only: true tmpfs: - /tmp cap_drop: - ALL security_opt: - no-new-privileges:true - seccomp:./security/seccomp-profile.json user: "1000:1000" networks: - frontend - backend environment: NODE_ENV: production env_file: - .env.production secrets: - source: db_password target: /run/secrets/db_password mode: 0400 - source: api_key target: /run/secrets/api_key mode: 0400
database: image: postgres:15-alpine restart: unless-stopped read_only: true tmpfs: - /tmp - /run/postgresql cap_drop: - ALL cap_add: - CHOWN - DAC_OVERRIDE - FOWNER - SETGID - SETUID security_opt: - no-new-privileges:true user: "postgres:postgres" networks: - backend environment: POSTGRES_PASSWORD_FILE: /run/secrets/db_password secrets: - source: db_password mode: 0400 volumes: - postgres-data:/var/lib/postgresql/data
networks: frontend: driver: bridge driver_opts: com.docker.network.bridge.enable_icc: "false" backend: driver: bridge internal: true
volumes: postgres-data:
secrets: db_password: file: ./secrets/db_password.txt api_key: file: ./secrets/api_key.txt
Resource Limits and Reservations
Comprehensive resource management:
version: '3.8'
services: web: image: nginx:alpine restart: unless-stopped deploy: resources: limits: cpus: '0.50' memory: 256M pids: 100 reservations: cpus: '0.25' memory: 128M ulimits: nofile: soft: 1024 hard: 2048 nproc: soft: 64 hard: 128
api: image: node:18-alpine restart: unless-stopped deploy: resources: limits: cpus: '2.0' memory: 2G pids: 200 reservations: cpus: '1.0' memory: 1G ulimits: nofile: soft: 4096 hard: 8192 nproc: soft: 256 hard: 512
database: image: postgres:15-alpine restart: unless-stopped deploy: resources: limits: cpus: '4.0' memory: 4G pids: 500 reservations: cpus: '2.0' memory: 2G ulimits: nofile: soft: 8192 hard: 16384 shm_size: '256mb' volumes: - postgres-data:/var/lib/postgresql/data
cache: image: redis:7-alpine restart: unless-stopped deploy: resources: limits: cpus: '1.0' memory: 1G reservations: cpus: '0.5' memory: 512M sysctls: net.core.somaxconn: 1024 volumes: - redis-data:/data
volumes: postgres-data: redis-data:
High Availability Configuration
Multiple replicas with load balancing:
version: '3.8'
services: loadbalancer: image: nginx:alpine restart: unless-stopped ports: - "80:80" - "443:443" volumes: - ./nginx/nginx-lb.conf:/etc/nginx/nginx.conf:ro - ./nginx/ssl:/etc/nginx/ssl:ro networks: - frontend healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"] interval: 10s timeout: 5s retries: 3 deploy: resources: limits: cpus: '1.0' memory: 512M
api: image: mycompany/api:${VERSION} restart: unless-stopped networks: - frontend - backend environment: NODE_ENV: production DATABASE_URL: postgresql://postgres:5432/app INSTANCE_ID: "{{.Task.Slot}}" deploy: replicas: 5 update_config: parallelism: 2 delay: 10s order: start-first failure_action: rollback rollback_config: parallelism: 2 delay: 10s restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s resources: limits: cpus: '1.0' memory: 1G reservations: cpus: '0.5' memory: 512M healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s
database: image: postgres:15-alpine restart: unless-stopped networks: - backend environment: POSTGRES_PASSWORD_FILE: /run/secrets/db_password secrets: - db_password volumes: - postgres-data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready"] interval: 10s timeout: 5s retries: 5 deploy: resources: limits: cpus: '4.0' memory: 4G
database-replica: image: postgres:15-alpine restart: unless-stopped networks: - backend environment: POSTGRES_PASSWORD_FILE: /run/secrets/db_password POSTGRES_PRIMARY_HOST: database POSTGRES_PRIMARY_PORT: 5432 secrets: - db_password volumes: - postgres-replica-data:/var/lib/postgresql/data - ./db/replica-setup.sh:/docker-entrypoint-initdb.d/replica-setup.sh:ro depends_on: database: condition: service_healthy deploy: resources: limits: cpus: '2.0' memory: 2G
networks: frontend: driver: bridge backend: driver: bridge internal: true
volumes: postgres-data: postgres-replica-data:
secrets: db_password: file: ./secrets/db_password.txt
Monitoring and Observability
Production monitoring stack:
version: '3.8'
services: prometheus: image: prom/prometheus:latest container_name: prometheus restart: unless-stopped command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' - '--web.enable-lifecycle' volumes: - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./monitoring/alerts:/etc/prometheus/alerts:ro - prometheus-data:/prometheus networks: - monitoring ports: - "9090:9090" healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9090/-/healthy"] interval: 30s timeout: 10s retries: 3 deploy: resources: limits: cpus: '1.0' memory: 2G
grafana: image: grafana/grafana:latest container_name: grafana restart: unless-stopped environment: GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana_password GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource GF_SERVER_ROOT_URL: https://monitoring.example.com secrets: - grafana_password volumes: - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro - grafana-data:/var/lib/grafana networks: - monitoring - frontend ports: - "3001:3000" depends_on: - prometheus healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/api/health"] interval: 30s timeout: 10s retries: 3 deploy: resources: limits: cpus: '0.5' memory: 512M
node-exporter: image: prom/node-exporter:latest container_name: node-exporter restart: unless-stopped command: - '--path.rootfs=/host' - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' volumes: - /:/host:ro,rslave networks: - monitoring ports: - "9100:9100" deploy: resources: limits: cpus: '0.2' memory: 128M
cadvisor: image: gcr.io/cadvisor/cadvisor:latest container_name: cadvisor restart: unless-stopped privileged: true devices: - /dev/kmsg volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro - /dev/disk:/dev/disk:ro networks: - monitoring ports: - "8080:8080" deploy: resources: limits: cpus: '0.3' memory: 256M
loki: image: grafana/loki:latest container_name: loki restart: unless-stopped command: -config.file=/etc/loki/local-config.yaml volumes: - ./monitoring/loki-config.yml:/etc/loki/local-config.yaml:ro - loki-data:/loki networks: - monitoring ports: - "3100:3100" deploy: resources: limits: cpus: '1.0' memory: 1G
promtail: image: grafana/promtail:latest container_name: promtail restart: unless-stopped command: -config.file=/etc/promtail/config.yml volumes: - ./monitoring/promtail-config.yml:/etc/promtail/config.yml:ro - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro networks: - monitoring depends_on: - loki deploy: resources: limits: cpus: '0.2' memory: 256M
networks: monitoring: driver: bridge frontend: driver: bridge
volumes: prometheus-data: grafana-data: loki-data:
secrets: grafana_password: file: ./secrets/grafana_password.txt
Logging Configuration
Centralized logging setup:
version: '3.8'
services: app: image: myapp:latest restart: unless-stopped logging: driver: "json-file" options: max-size: "10m" max-file: "5" labels: "app,environment,version" tag: "{{.Name}}/{{.ID}}" labels: app: "myapp" environment: "production" version: "${VERSION}"
nginx: image: nginx:alpine restart: unless-stopped logging: driver: "syslog" options: syslog-address: "tcp://logserver:514" tag: "nginx" syslog-format: "rfc5424micro"
api: image: api:latest restart: unless-stopped logging: driver: "fluentd" options: fluentd-address: "localhost:24224" tag: "docker.{{.Name}}" fluentd-async-connect: "true" fluentd-retry-wait: "1s" fluentd-max-retries: "30"
database: image: postgres:15-alpine restart: unless-stopped logging: driver: "json-file" options: max-size: "50m" max-file: "10" compress: "true" volumes: - postgres-data:/var/lib/postgresql/data
volumes: postgres-data:
Environment Configuration Management
Multi-environment setup:
version: '3.8'
services: app: image: myapp:${VERSION:-latest} restart: unless-stopped environment: NODE_ENV: ${NODE_ENV:-production} LOG_LEVEL: ${LOG_LEVEL:-info} PORT: ${APP_PORT:-3000} DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@database:5432/${DB_NAME} REDIS_URL: redis://:${REDIS_PASSWORD}@cache:6379 JWT_SECRET: ${JWT_SECRET} API_TIMEOUT: ${API_TIMEOUT:-30000} MAX_CONNECTIONS: ${MAX_CONNECTIONS:-100} env_file: - .env.${ENVIRONMENT:-production} - .env.secrets networks: - app-network
database: image: postgres:${POSTGRES_VERSION:-15}-alpine restart: unless-stopped environment: POSTGRES_DB: ${DB_NAME} POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD: ${DB_PASSWORD} POSTGRES_INITDB_ARGS: ${POSTGRES_INITDB_ARGS:--E UTF8} volumes: - postgres-data:/var/lib/postgresql/data networks: - app-network
cache: image: redis:${REDIS_VERSION:-7}-alpine restart: unless-stopped command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory ${REDIS_MAX_MEMORY:-256mb} volumes: - redis-data:/data networks: - app-network
networks: app-network: driver: bridge
volumes: postgres-data: redis-data:
Health Checks and Readiness
Comprehensive health monitoring:
version: '3.8'
services: web: image: nginx:alpine restart: unless-stopped healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"] interval: 30s timeout: 10s retries: 3 start_period: 40s
api: image: node:18-alpine restart: unless-stopped healthcheck: test: ["CMD", "node", "healthcheck.js"] interval: 30s timeout: 10s retries: 3 start_period: 60s depends_on: database: condition: service_healthy cache: condition: service_healthy
database: image: postgres:15-alpine restart: unless-stopped healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres -d production_db || exit 1"] interval: 10s timeout: 5s retries: 5 start_period: 30s volumes: - postgres-data:/var/lib/postgresql/data
cache: image: redis:7-alpine restart: unless-stopped healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s timeout: 5s retries: 5 start_period: 20s volumes: - redis-data:/data
queue: image: rabbitmq:3-management-alpine restart: unless-stopped healthcheck: test: ["CMD", "rabbitmq-diagnostics", "ping"] interval: 30s timeout: 10s retries: 5 start_period: 60s volumes: - rabbitmq-data:/var/lib/rabbitmq
volumes: postgres-data: redis-data: rabbitmq-data:
Backup and Recovery
Automated backup configuration:
version: '3.8'
services: database: image: postgres:15-alpine restart: unless-stopped environment: POSTGRES_PASSWORD_FILE: /run/secrets/db_password secrets: - db_password volumes: - postgres-data:/var/lib/postgresql/data networks: - backend
db-backup: image: prodrigestivill/postgres-backup-local:15-alpine restart: unless-stopped environment: POSTGRES_HOST: database POSTGRES_DB: ${DB_NAME} POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD_FILE: /run/secrets/db_password SCHEDULE: "@daily" BACKUP_KEEP_DAYS: 7 BACKUP_KEEP_WEEKS: 4 BACKUP_KEEP_MONTHS: 6 BACKUP_DIR: /backups HEALTHCHECK_PORT: 8080 secrets: - db_password volumes: - ./backups:/backups - ./backup-scripts:/scripts:ro networks: - backend depends_on: database: condition: service_healthy
volume-backup: image: futurice/docker-volume-backup:2.6.0 restart: unless-stopped environment: BACKUP_CRON_EXPRESSION: "0 2 * * *" BACKUP_FILENAME: "backup-%Y-%m-%d_%H-%M-%S.tar.gz" BACKUP_RETENTION_DAYS: 30 AWS_S3_BUCKET_NAME: ${S3_BACKUP_BUCKET} AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY_FILE: /run/secrets/aws_secret secrets: - aws_secret volumes: - postgres-data:/backup/postgres-data:ro - redis-data:/backup/redis-data:ro - /var/run/docker.sock:/var/run/docker.sock:ro - ./backup-archive:/archive
networks: backend: driver: bridge
volumes: postgres-data: redis-data:
secrets: db_password: file: ./secrets/db_password.txt aws_secret: file: ./secrets/aws_secret.txt
When to Use This Skill
Use docker-compose-production when you need to:
Deploy Docker Compose applications to production environments Implement security hardening and best practices Configure resource limits and reservations Set up health checks and readiness probes Implement high availability with multiple replicas Configure production-grade logging and monitoring Set up automated backups and disaster recovery Manage secrets and sensitive configuration Implement zero-downtime deployments Configure multi-environment deployment strategies Set up container orchestration for production workloads Optimize performance and resource utilization Best Practices
Always Use Version Pinning: Pin specific image versions instead of using latest to ensure reproducible deployments.
Implement Health Checks: Configure health checks for all services to enable automatic recovery and proper dependency management.
Set Resource Limits: Always define CPU and memory limits to prevent resource exhaustion and ensure predictable performance.
Use Secrets Management: Never store secrets in environment variables or compose files; use Docker secrets or external secret managers.
Configure Restart Policies: Use restart: unless-stopped for production services to ensure automatic recovery from failures.
Implement Proper Logging: Configure structured logging with rotation and retention policies to manage disk space.
Use Read-Only Filesystems: Set read_only: true where possible and use tmpfs for temporary data to improve security.
Drop Unnecessary Capabilities: Use cap_drop: ALL and only add required capabilities to follow the principle of least privilege.
Enable Monitoring: Deploy monitoring and observability tools to track application health and performance metrics.
Implement Automated Backups: Configure regular automated backups with retention policies and test recovery procedures.
Use Internal Networks: Mark backend networks as internal to prevent direct external access to databases and caches.
Configure Update Strategies: Define update and rollback configurations for zero-downtime deployments.
Implement Resource Reservations: Set resource reservations to guarantee minimum resources for critical services.
Use Multi-Stage Dependencies: Configure depends_on with health check conditions to ensure proper startup order.
Document Configuration: Maintain comprehensive documentation of your production configuration and deployment procedures.
Common Pitfalls
Using Latest Tags: Using latest or unversioned images can cause unexpected behavior when images are updated; always pin versions.
Ignoring Resource Limits: Not setting resource limits can allow one service to consume all available resources and crash others.
Missing Health Checks: Without health checks, Docker cannot determine if services are actually ready or need to be restarted.
Storing Secrets in Plain Text: Committing secrets to version control or storing them in environment variables exposes sensitive data.
Not Testing Backups: Creating backups without regularly testing restoration procedures leads to data loss during actual incidents.
Exposing Unnecessary Ports: Publishing all service ports to the host increases attack surface; only expose what's needed.
Running as Root: Not specifying a non-root user leaves containers vulnerable to privilege escalation attacks.
Ignoring Log Rotation: Without log rotation, logs can fill up disk space and crash services or hosts.
Missing Monitoring: Deploying without monitoring makes it impossible to detect and diagnose issues before they impact users.
Not Using Networks: Running all services on the default network prevents proper segmentation and increases security risk.
Forgetting Readiness Checks: Starting dependent services before dependencies are ready causes connection failures and restarts.
Hardcoding Configuration: Embedding environment-specific values in the compose file makes it difficult to deploy to multiple environments.
Neglecting Security Updates: Not regularly updating base images leaves services vulnerable to known security issues.
Insufficient Start Period: Setting health check start periods too short causes false positives during slow application startup.
Not Planning for Scale: Designing services without considering horizontal scaling makes it difficult to handle increased load.
Resources Official Documentation Docker Compose Production Docker Security Best Practices Docker Secrets Deployment Guides Deploy on Production Configure Container Resources Container Security Tools and Images Docker Volume Backup Postgres Backup Local Watchtower - Automated container updates Monitoring Prometheus Grafana cAdvisor