veza/docker-compose.prod.yml

version: '3.8'

services:
  # ============================================================================
  # INFRASTRUCTURE SERVICES
  # ============================================================================
  postgres:
    image: postgres:16-alpine
    container_name: veza_postgres
    restart: unless-stopped
    environment:
      POSTGRES_USER: ${DB_USER:-veza}
      POSTGRES_PASSWORD: ${DB_PASS:?DB_PASS must be set for production}
      POSTGRES_DB: ${DB_NAME:-veza}
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${DB_USER:-veza}"]
      interval: 5s
      timeout: 5s
      retries: 5
    networks:
      - veza-network
    deploy:
      resources:
        limits:
          cpus: '0.50'
          memory: 256M

  redis:
    image: redis:7-alpine
    container_name: veza_redis
    restart: unless-stopped
    command: ["redis-server", "--requirepass", "${REDIS_PASSWORD:?REDIS_PASSWORD must be set for production}", "--appendonly", "yes"]
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
      interval: 5s
      timeout: 3s
      retries: 5
    networks:
      - veza-network
    deploy:
      resources:
        limits:
          cpus: '0.25'
          memory: 64M

  # SECURITY(MEDIUM-008): Use rabbitmq:3-alpine (no management UI) in production.
  # Management UI exposes internal metrics/config and is unnecessary in prod.
  rabbitmq:
    image: rabbitmq:3-alpine
    container_name: veza_rabbitmq
    restart: unless-stopped
    environment:
      RABBITMQ_DEFAULT_USER: ${DB_USER:-veza}
      RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASS:?RABBITMQ_PASS must be set for production}
    volumes:
      - rabbitmq_data:/var/lib/rabbitmq
    healthcheck:
      test: rabbitmq-diagnostics -q ping
      interval: 10s
      timeout: 10s
      retries: 5
    networks:
      - veza-network
    deploy:
      resources:
        limits:
          cpus: '0.50'
          memory: 256M

  # SECURITY(MEDIUM-003): Pin ClamAV image to specific version instead of :latest
  clamav:
    image: clamav/clamav:1.4
    container_name: veza_clamav
    restart: unless-stopped
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "clamdscan", "--ping", "1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 180s
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 1G

  # ============================================================================
  # PAYMENT ROUTER (Hyperswitch)
  # ============================================================================
  hyperswitch_postgres:
    image: postgres:16-alpine
    container_name: veza_hyperswitch_postgres
    restart: unless-stopped
    environment:
      POSTGRES_USER: ${HYPERSWITCH_DB_USER:-hyperswitch}
      POSTGRES_PASSWORD: ${HYPERSWITCH_DB_PASS:?HYPERSWITCH_DB_PASS must be set for production}
      POSTGRES_DB: ${HYPERSWITCH_DB_NAME:-hyperswitch}
    volumes:
      - hyperswitch_postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${HYPERSWITCH_DB_USER:-hyperswitch}"]
      interval: 5s
      timeout: 5s
      retries: 5
    networks:
      - veza-network
    deploy:
      resources:
        limits:
          cpus: "0.25"
          memory: 128M

  # SECURITY(LOW-002): Pin to specific Hyperswitch version. Check https://github.com/juspay/hyperswitch/releases for updates.
  hyperswitch:
    image: juspaydotin/hyperswitch-router:2026.03.11.0-standalone
    container_name: veza_hyperswitch
    restart: unless-stopped
    environment:
      DATABASE_URL: postgresql://${HYPERSWITCH_DB_USER:-hyperswitch}:${HYPERSWITCH_DB_PASS:?HYPERSWITCH_DB_PASS must be set}@hyperswitch_postgres:5432/${HYPERSWITCH_DB_NAME:-hyperswitch}?sslmode=require
      REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
    depends_on:
      hyperswitch_postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 3
    deploy:
      resources:
        limits:
          cpus: "0.5"
          memory: 256M

  # ============================================================================
  # APPLICATION SERVICES - Blue-Green Deployment
  # STACK_COLOR=blue|green. Use scripts/deploy-blue-green.sh to switch.
  # ============================================================================
  backend-api-blue:
    build:
      context: ./veza-backend-api
      dockerfile: Dockerfile.production
    image: veza-backend-api:latest
    container_name: veza_backend_api_blue
    restart: unless-stopped
    environment:
      - APP_ENV=production
      - STACK_COLOR=blue
      - DATABASE_URL=postgres://${DB_USER:-veza}:${DB_PASS:?DB_PASS must be set}@postgres:5432/${DB_NAME:-veza}?sslmode=require
      - REDIS_URL=redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379
      - AMQP_URL=amqp://${DB_USER:-veza}:${RABBITMQ_PASS:?RABBITMQ_PASS must be set}@rabbitmq:5672
      # SECURITY(HIGH-002): Use RS256 asymmetric keys in production instead of HS256 shared secret.
      # Generate: openssl genrsa -out jwt_private.pem 2048 && openssl rsa -in jwt_private.pem -pubout -out jwt_public.pem
      - JWT_PRIVATE_KEY_PATH=${JWT_PRIVATE_KEY_PATH:-/secrets/jwt_private.pem}
      - JWT_PUBLIC_KEY_PATH=${JWT_PUBLIC_KEY_PATH:-/secrets/jwt_public.pem}
      - COOKIE_SECURE=true
      - COOKIE_SAME_SITE=strict
      - COOKIE_HTTP_ONLY=true
      - CORS_ALLOWED_ORIGINS=${CORS_ORIGINS:-http://veza.fr}
      - HYPERSWITCH_URL=http://hyperswitch:8080
      - HYPERSWITCH_API_KEY=${HYPERSWITCH_API_KEY:-}
      - HYPERSWITCH_WEBHOOK_SECRET=${HYPERSWITCH_WEBHOOK_SECRET:-}
      - HYPERSWITCH_ENABLED=${HYPERSWITCH_ENABLED:-false}
      - HYPERSWITCH_LIVE_MODE=${HYPERSWITCH_LIVE_MODE:-false}
      - CHECKOUT_SUCCESS_URL=${CHECKOUT_SUCCESS_URL:-https://veza.fr/purchases}
      - ENABLE_CLAMAV=true
      - CLAMAV_REQUIRED=true
      - CLAMAV_ADDRESS=clamav:3310
      - AWS_S3_ENDPOINT=http://minio:9000
      - AWS_S3_BUCKET=veza-files
      - AWS_ACCESS_KEY_ID=${S3_ACCESS_KEY:?S3_ACCESS_KEY must be set}
      - AWS_SECRET_ACCESS_KEY=${S3_SECRET_KEY:?S3_SECRET_KEY must be set}
      - AWS_REGION=${AWS_REGION:-us-east-1}
      # v1.0.10 polish: enable the S3 stack and route track uploads through
      # MinIO end-to-end. Without these two flags, defaults (local +
      # disabled) win and the AWS_S3_* credentials above are inert. With
      # blue/green active/active behind HAProxy, local-disk uploads on
      # one pod are invisible to the other — S3 is required for HA.
      - AWS_S3_ENABLED=true
      - TRACK_STORAGE_BACKEND=s3
      # WebRTC ICE servers — populated from the coturn service above.
      # Empty TURN vars degrade to STUN-only (calls work peer-to-peer
      # but fail behind symmetric NAT); the all-or-nothing rule in
      # webrtc_config_handler.go means partial config is rejected.
      - WEBRTC_STUN_URLS=stun:${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set}:3478
      - WEBRTC_TURN_URLS=turn:${WEBRTC_TURN_PUBLIC_IP}:3478,turns:${WEBRTC_TURN_PUBLIC_IP}:5349
      - WEBRTC_TURN_USERNAME=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set}
      - WEBRTC_TURN_CREDENTIAL=${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set}
      - HLS_STREAMING=true
      - HLS_STORAGE_DIR=/data/hls
    volumes:
      - hls_prod_data:/data/hls
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
      rabbitmq:
        condition: service_healthy
      clamav:
        condition: service_started
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8080/api/v1/health"]
      interval: 10s
      timeout: 5s
      retries: 3

  backend-api-green:
    build:
      context: ./veza-backend-api
      dockerfile: Dockerfile.production
    image: veza-backend-api:latest
    container_name: veza_backend_api_green
    restart: unless-stopped
    environment:
      - APP_ENV=production
      - STACK_COLOR=green
      - DATABASE_URL=postgres://${DB_USER:-veza}:${DB_PASS:?DB_PASS must be set}@postgres:5432/${DB_NAME:-veza}?sslmode=require
      - REDIS_URL=redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379
      - AMQP_URL=amqp://${DB_USER:-veza}:${RABBITMQ_PASS:?RABBITMQ_PASS must be set}@rabbitmq:5672
      # SECURITY(HIGH-002): RS256 asymmetric keys for production
      - JWT_PRIVATE_KEY_PATH=${JWT_PRIVATE_KEY_PATH:-/secrets/jwt_private.pem}
      - JWT_PUBLIC_KEY_PATH=${JWT_PUBLIC_KEY_PATH:-/secrets/jwt_public.pem}
      - COOKIE_SECURE=true
      - COOKIE_SAME_SITE=strict
      - COOKIE_HTTP_ONLY=true
      - CORS_ALLOWED_ORIGINS=${CORS_ORIGINS:-http://veza.fr}
      - HYPERSWITCH_URL=http://hyperswitch:8080
      - HYPERSWITCH_API_KEY=${HYPERSWITCH_API_KEY:-}
      - HYPERSWITCH_WEBHOOK_SECRET=${HYPERSWITCH_WEBHOOK_SECRET:-}
      - HYPERSWITCH_ENABLED=${HYPERSWITCH_ENABLED:-false}
      - HYPERSWITCH_LIVE_MODE=${HYPERSWITCH_LIVE_MODE:-false}
      - CHECKOUT_SUCCESS_URL=${CHECKOUT_SUCCESS_URL:-https://veza.fr/purchases}
      - ENABLE_CLAMAV=true
      - CLAMAV_REQUIRED=true
      - CLAMAV_ADDRESS=clamav:3310
      - AWS_S3_ENDPOINT=http://minio:9000
      - AWS_S3_BUCKET=veza-files
      - AWS_ACCESS_KEY_ID=${S3_ACCESS_KEY:?S3_ACCESS_KEY must be set}
      - AWS_SECRET_ACCESS_KEY=${S3_SECRET_KEY:?S3_SECRET_KEY must be set}
      - AWS_REGION=${AWS_REGION:-us-east-1}
      # v1.0.10 polish: enable the S3 stack and route track uploads through
      # MinIO end-to-end. Without these two flags, defaults (local +
      # disabled) win and the AWS_S3_* credentials above are inert. With
      # blue/green active/active behind HAProxy, local-disk uploads on
      # one pod are invisible to the other — S3 is required for HA.
      - AWS_S3_ENABLED=true
      - TRACK_STORAGE_BACKEND=s3
      # WebRTC ICE servers — populated from the coturn service above.
      # Empty TURN vars degrade to STUN-only (calls work peer-to-peer
      # but fail behind symmetric NAT); the all-or-nothing rule in
      # webrtc_config_handler.go means partial config is rejected.
      - WEBRTC_STUN_URLS=stun:${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set}:3478
      - WEBRTC_TURN_URLS=turn:${WEBRTC_TURN_PUBLIC_IP}:3478,turns:${WEBRTC_TURN_PUBLIC_IP}:5349
      - WEBRTC_TURN_USERNAME=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set}
      - WEBRTC_TURN_CREDENTIAL=${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set}
      - HLS_STREAMING=true
      - HLS_STORAGE_DIR=/data/hls
    volumes:
      - hls_prod_data:/data/hls
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
      rabbitmq:
        condition: service_healthy
      clamav:
        condition: service_started
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8080/api/v1/health"]
      interval: 10s
      timeout: 5s
      retries: 3

  stream-server-blue:
    build:
      context: ./veza-stream-server
      dockerfile: Dockerfile.production
    image: veza-stream-server:latest
    container_name: veza_stream_server_blue
    restart: unless-stopped
    environment:
      - DATABASE_URL=postgres://${DB_USER:-veza}:${DB_PASS:?DB_PASS must be set}@postgres:5432/${DB_NAME:-veza}?sslmode=require
      - REDIS_URL=redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379
      # SECURITY(HIGH-002): Stream server uses public key only (verification)
      - JWT_PUBLIC_KEY_PATH=${JWT_PUBLIC_KEY_PATH:-/secrets/jwt_public.pem}
      - PORT=3001
      - HLS_OUTPUT_DIR=/data/hls
    volumes:
      - hls_prod_data:/data/hls
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3001/health"]
      interval: 10s
      timeout: 5s
      retries: 3

  stream-server-green:
    build:
      context: ./veza-stream-server
      dockerfile: Dockerfile.production
    image: veza-stream-server:latest
    container_name: veza_stream_server_green
    restart: unless-stopped
    environment:
      - DATABASE_URL=postgres://${DB_USER:-veza}:${DB_PASS:?DB_PASS must be set}@postgres:5432/${DB_NAME:-veza}?sslmode=require
      - REDIS_URL=redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379
      # SECURITY(HIGH-002): Stream server uses public key only (verification)
      - JWT_PUBLIC_KEY_PATH=${JWT_PUBLIC_KEY_PATH:-/secrets/jwt_public.pem}
      - PORT=3001
      - HLS_OUTPUT_DIR=/data/hls
    volumes:
      - hls_prod_data:/data/hls
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3001/health"]
      interval: 10s
      timeout: 5s
      retries: 3

  minio:
    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
    container_name: veza_minio
    restart: unless-stopped
    command: server /data --console-address ":9001"
    environment:
      MINIO_ROOT_USER: ${S3_ACCESS_KEY:?S3_ACCESS_KEY must be set}
      MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:?S3_SECRET_KEY must be set}
    volumes:
      - minio_data:/data
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 10s
      timeout: 5s
      retries: 3

  minio-init:
    image: minio/mc:RELEASE.2025-09-07T05-25-40Z
    depends_on:
      minio:
        condition: service_healthy
    entrypoint: >
      /bin/sh -c "
      mc alias set veza http://minio:9000 $${MINIO_ROOT_USER} $${MINIO_ROOT_PASSWORD};
      mc mb --ignore-existing veza/veza-files;
      exit 0;
      "
    environment:
      MINIO_ROOT_USER: ${S3_ACCESS_KEY:?S3_ACCESS_KEY must be set}
      MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:?S3_SECRET_KEY must be set}
    networks:
      - veza-network

  # ============================================================================
  # COTURN — TURN/STUN relay for WebRTC NAT traversal (v1.0.10 polish)
  # ----------------------------------------------------------------------------
  # Calls (1:1 audio/video) signal through chat WebSocket but the actual
  # media stream needs a relay when both peers are behind symmetric NAT.
  # Without this service, every call between users on corporate firewalls,
  # mobile CGNAT or Incus default networking will silently fail with
  # iceConnectionState=failed after ~30s.
  #
  # network_mode: host is REQUIRED — coturn allocates UDP ports in the
  # 49152-65535 range for media relay, and Docker's NAT layer drops them.
  # Host networking exposes the host's public IP directly, which is what
  # WEBRTC_TURN_PUBLIC_IP must point at (so coturn advertises the right
  # candidate to remote peers).
  #
  # The infra/coturn/README.md describes a parallel Incus-native deploy
  # path; this compose service is the simpler dev/single-host option.
  # If you run prod on multiple hosts behind a load balancer, prefer the
  # Ansible/Incus path so coturn lives on a host with a stable public IP.
  # ============================================================================
  coturn:
    image: coturn/coturn:4.6.2
    container_name: veza_coturn
    restart: unless-stopped
    network_mode: host
    command:
      - "-n"
      - "--listening-port=3478"
      - "--tls-listening-port=5349"
      - "--external-ip=${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set (the public IP coturn advertises to peers)}"
      - "--realm=${WEBRTC_TURN_REALM:-turn.veza.fr}"
      - "--lt-cred-mech"
      - "--user=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set}:${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set}"
      - "--min-port=49152"
      - "--max-port=65535"
      - "--no-cli"
      - "--no-tlsv1"
      - "--no-tlsv1_1"
      - "--cert=/etc/coturn/cert.pem"
      - "--pkey=/etc/coturn/key.pem"
    volumes:
      # Map the TLS cert dir read-only. Default points at a Let's Encrypt
      # rotation managed outside this compose (certbot on the host or
      # similar). Override TURN_CERT_DIR for self-signed dev certs.
      - ${TURN_CERT_DIR:-/etc/letsencrypt/live/turn.veza.fr}:/etc/coturn:ro
    healthcheck:
      # nc -uz checks UDP/3478 is bound; doesn't validate auth but catches
      # crashes / cert-load failures cleanly.
      test: ["CMD-SHELL", "nc -zu localhost 3478 || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3

  # ============================================================================
  # MONITORING - Alertmanager
  # Set SLACK_WEBHOOK_URL for Slack notifications. Works with Prometheus.
  # ============================================================================
  alertmanager:
    image: prom/alertmanager:v0.26.0
    container_name: veza_alertmanager
    restart: unless-stopped
    ports:
      - "9093:9093"
    volumes:
      - ./config/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
    command:
      - '--config.file=/etc/alertmanager/alertmanager.yml'
      - '--storage.path=/alertmanager'
    environment:
      - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
    networks:
      - veza-network

  web-blue:
    build:
      context: ./apps/web
      dockerfile: Dockerfile.production
    image: veza-web:latest
    container_name: veza_web_blue
    restart: unless-stopped
    environment:
      - VITE_API_URL=http://haproxy/api/v1
      - VITE_STREAM_URL=ws://haproxy/stream
      - VITE_UPLOAD_URL=http://haproxy/api/v1/uploads
    depends_on:
      - backend-api-blue
      - stream-server-blue
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:5173"]
      interval: 10s
      timeout: 5s
      retries: 3

  web-green:
    build:
      context: ./apps/web
      dockerfile: Dockerfile.production
    image: veza-web:latest
    container_name: veza_web_green
    restart: unless-stopped
    environment:
      - VITE_API_URL=http://haproxy/api/v1
      - VITE_STREAM_URL=ws://haproxy/stream
      - VITE_UPLOAD_URL=http://haproxy/api/v1/uploads
    depends_on:
      - backend-api-green
      - stream-server-green
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:5173"]
      interval: 10s
      timeout: 5s
      retries: 3

  # ============================================================================
  # REVERSE PROXY - HAProxy (Blue-Green)
  # ============================================================================
  haproxy:
    image: haproxy:2.8-alpine
    container_name: veza_haproxy
    restart: unless-stopped
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
    ports:
      - "${PORT_HAPROXY:-80}:80"
      - "443:443"
    volumes:
      - ./config/haproxy/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
      - ./config/ssl:/etc/ssl/veza:ro
    depends_on:
      - backend-api-blue
      - backend-api-green
      - stream-server-blue
      - stream-server-green
      - web-blue
      - web-green
    networks:
      - veza-network
    healthcheck:
      test: ["CMD", "haproxy", "-c", "-f", "/usr/local/etc/haproxy/haproxy.cfg"]
      interval: 10s
      timeout: 5s
      retries: 3

networks:
  veza-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16

volumes:
  postgres_data:
  redis_data:
  rabbitmq_data:
  hyperswitch_postgres_data:
  minio_data:
  hls_prod_data: