From bf24a5e3cecaa070e9dfcb9681879d903bd85bc4 Mon Sep 17 00:00:00 2001 From: senke Date: Thu, 30 Apr 2026 15:44:12 +0200 Subject: [PATCH] feat(infra): add coturn service + wire WEBRTC_TURN_* envs in compose WebRTC 1:1 calls were silently broken behind symmetric NAT (corporate firewalls, mobile CGNAT, Incus default networking) because no TURN relay was deployed. The /api/v1/config/webrtc endpoint and the useWebRTC frontend hook were both wired correctly from v1.0.9 Day 1, but with no TURN box on the network the handler returned STUN-only and the SPA's `nat.hasTurn` flag stayed false. Added : * docker-compose.prod.yml: new `coturn` service using the official coturn/coturn:4.6.2 image, network_mode: host (UDP relay range 49152-65535 doesn't survive Docker NAT), config passed entirely via CLI args so no template render is needed. TLS cert volume points at /etc/letsencrypt/live/turn.veza.fr by default; override with TURN_CERT_DIR for non-LE setups. Healthcheck uses nc -uz to catch crashed/unbound listeners. * Both backend services (blue + green): WEBRTC_STUN_URLS, WEBRTC_TURN_URLS, WEBRTC_TURN_USERNAME, WEBRTC_TURN_CREDENTIAL pulled from env with `:?` strict-fail markers so a misconfigured deploy crashes loudly instead of degrading silently to STUN-only. * docker-compose.staging.yml: same 4 env vars but with safe fallback defaults (Google STUN, no TURN) so staging boots without a coturn box. Operators can flip to relay by setting the envs externally. Operator must set the following secrets at deploy time : WEBRTC_TURN_PUBLIC_IP the host's public IP (used both by coturn --external-ip and by the backend STUN/TURN URLs the SPA receives) WEBRTC_TURN_USERNAME static long-term credential username WEBRTC_TURN_CREDENTIAL static long-term credential password WEBRTC_TURN_REALM optional, defaults to turn.veza.fr Smoke test : turnutils_uclient -u $USER -w $CRED -p 3478 $PUBLIC_IP should return a relay allocation within ~1s. From the SPA, watch chrome://webrtc-internals during a call and confirm the selected candidate pair is `relay` when both peers are on symmetric NAT. The Ansible role under infra/coturn/ is the canonical Incus-native deploy path documented in infra/coturn/README.md; this compose service is the simpler single-host option that unblocks calls today. v1.1 will switch from static to ephemeral REST-shared-secret credentials per ORIGIN_SECURITY_FRAMEWORK.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.prod.yml | 69 ++++++++++++++++++++++++++++++++++++++ docker-compose.staging.yml | 6 ++++ 2 files changed, 75 insertions(+) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index f1054adba..77da3490c 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -188,6 +188,14 @@ services: # one pod are invisible to the other — S3 is required for HA. - AWS_S3_ENABLED=true - TRACK_STORAGE_BACKEND=s3 + # WebRTC ICE servers — populated from the coturn service above. + # Empty TURN vars degrade to STUN-only (calls work peer-to-peer + # but fail behind symmetric NAT); the all-or-nothing rule in + # webrtc_config_handler.go means partial config is rejected. + - WEBRTC_STUN_URLS=stun:${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set}:3478 + - WEBRTC_TURN_URLS=turn:${WEBRTC_TURN_PUBLIC_IP}:3478,turns:${WEBRTC_TURN_PUBLIC_IP}:5349 + - WEBRTC_TURN_USERNAME=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set} + - WEBRTC_TURN_CREDENTIAL=${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set} - HLS_STREAMING=true - HLS_STORAGE_DIR=/data/hls volumes: @@ -250,6 +258,14 @@ services: # one pod are invisible to the other — S3 is required for HA. - AWS_S3_ENABLED=true - TRACK_STORAGE_BACKEND=s3 + # WebRTC ICE servers — populated from the coturn service above. + # Empty TURN vars degrade to STUN-only (calls work peer-to-peer + # but fail behind symmetric NAT); the all-or-nothing rule in + # webrtc_config_handler.go means partial config is rejected. + - WEBRTC_STUN_URLS=stun:${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set}:3478 + - WEBRTC_TURN_URLS=turn:${WEBRTC_TURN_PUBLIC_IP}:3478,turns:${WEBRTC_TURN_PUBLIC_IP}:5349 + - WEBRTC_TURN_USERNAME=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set} + - WEBRTC_TURN_CREDENTIAL=${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set} - HLS_STREAMING=true - HLS_STORAGE_DIR=/data/hls volumes: @@ -364,6 +380,59 @@ services: networks: - veza-network + # ============================================================================ + # COTURN — TURN/STUN relay for WebRTC NAT traversal (v1.0.10 polish) + # ---------------------------------------------------------------------------- + # Calls (1:1 audio/video) signal through chat WebSocket but the actual + # media stream needs a relay when both peers are behind symmetric NAT. + # Without this service, every call between users on corporate firewalls, + # mobile CGNAT or Incus default networking will silently fail with + # iceConnectionState=failed after ~30s. + # + # network_mode: host is REQUIRED — coturn allocates UDP ports in the + # 49152-65535 range for media relay, and Docker's NAT layer drops them. + # Host networking exposes the host's public IP directly, which is what + # WEBRTC_TURN_PUBLIC_IP must point at (so coturn advertises the right + # candidate to remote peers). + # + # The infra/coturn/README.md describes a parallel Incus-native deploy + # path; this compose service is the simpler dev/single-host option. + # If you run prod on multiple hosts behind a load balancer, prefer the + # Ansible/Incus path so coturn lives on a host with a stable public IP. + # ============================================================================ + coturn: + image: coturn/coturn:4.6.2 + container_name: veza_coturn + restart: unless-stopped + network_mode: host + command: + - "-n" + - "--listening-port=3478" + - "--tls-listening-port=5349" + - "--external-ip=${WEBRTC_TURN_PUBLIC_IP:?WEBRTC_TURN_PUBLIC_IP must be set (the public IP coturn advertises to peers)}" + - "--realm=${WEBRTC_TURN_REALM:-turn.veza.fr}" + - "--lt-cred-mech" + - "--user=${WEBRTC_TURN_USERNAME:?WEBRTC_TURN_USERNAME must be set}:${WEBRTC_TURN_CREDENTIAL:?WEBRTC_TURN_CREDENTIAL must be set}" + - "--min-port=49152" + - "--max-port=65535" + - "--no-cli" + - "--no-tlsv1" + - "--no-tlsv1_1" + - "--cert=/etc/coturn/cert.pem" + - "--pkey=/etc/coturn/key.pem" + volumes: + # Map the TLS cert dir read-only. Default points at a Let's Encrypt + # rotation managed outside this compose (certbot on the host or + # similar). Override TURN_CERT_DIR for self-signed dev certs. + - ${TURN_CERT_DIR:-/etc/letsencrypt/live/turn.veza.fr}:/etc/coturn:ro + healthcheck: + # nc -uz checks UDP/3478 is bound; doesn't validate auth but catches + # crashes / cert-load failures cleanly. + test: ["CMD-SHELL", "nc -zu localhost 3478 || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + # ============================================================================ # MONITORING - Alertmanager # Set SLACK_WEBHOOK_URL for Slack notifications. Works with Prometheus. diff --git a/docker-compose.staging.yml b/docker-compose.staging.yml index 74e55aa46..e35b66565 100644 --- a/docker-compose.staging.yml +++ b/docker-compose.staging.yml @@ -82,6 +82,12 @@ services: # disabled) win and the AWS_S3_* credentials above are inert. - AWS_S3_ENABLED=true - TRACK_STORAGE_BACKEND=s3 + # WebRTC ICE — STUN-only by default in staging (no public TURN + # box). Set the WEBRTC_TURN_* envs externally to flip to relay. + - WEBRTC_STUN_URLS=${WEBRTC_STUN_URLS:-stun:stun.l.google.com:19302} + - WEBRTC_TURN_URLS=${WEBRTC_TURN_URLS:-} + - WEBRTC_TURN_USERNAME=${WEBRTC_TURN_USERNAME:-} + - WEBRTC_TURN_CREDENTIAL=${WEBRTC_TURN_CREDENTIAL:-} - HLS_STREAMING=true - HLS_STORAGE_DIR=/data/hls volumes: