veza/veza-backend-api/internal/config/config.go

1223 lines
52 KiB
Go
Raw Permalink Normal View History

2025-12-03 19:29:37 +00:00
package config
import (
"errors"
"fmt"
"os"
"strings"
"time"
"veza-backend-api/internal/database"
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
"veza-backend-api/internal/email"
2025-12-03 19:29:37 +00:00
"veza-backend-api/internal/eventbus" // Import the eventbus package
"veza-backend-api/internal/logging"
2025-12-03 19:29:37 +00:00
"veza-backend-api/internal/metrics"
"veza-backend-api/internal/middleware"
"veza-backend-api/internal/services"
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
"veza-backend-api/internal/workers"
2025-12-03 19:29:37 +00:00
"github.com/redis/go-redis/v9"
"go.uber.org/zap"
)
// Config contient toute la configuration de l'application
type Config struct {
// Base de données
Database *database.Database
// Redis
RedisClient *redis.Client
// Services
SessionService *services.SessionService
AuditService *services.AuditService
TOTPService *services.TOTPService
UploadValidator *services.UploadValidator
CacheService *services.CacheService
PlaylistService *services.PlaylistService
2025-12-03 19:29:37 +00:00
PermissionService *services.PermissionService
2025-12-13 02:34:34 +00:00
JWTService *services.JWTService
UserService *services.UserService
S3StorageService *services.S3StorageService // BE-SVC-005: S3 storage service
feat(cdn): Bunny.net signed URLs + HLS cache headers + metric collision fix (W3 Day 13) CDN edge in front of S3/MinIO via origin-pull. Backend signs URLs with Bunny.net token-auth (SHA-256 over security_key + path + expires) so edges verify before serving cached objects ; origin is never hit on a valid token. Cloudflare CDN / R2 / CloudFront stubs kept. - internal/services/cdn_service.go : new providers CDNProviderBunny + CDNProviderCloudflareR2. SecurityKey added to CDNConfig. generateBunnySignedURL implements the documented Bunny scheme (url-safe base64, no padding, expires query). HLSSegmentCacheHeaders + HLSPlaylistCacheHeaders helpers exported for handlers. - internal/services/cdn_service_test.go : pin Bunny URL shape + base64-url charset ; assert empty SecurityKey fails fast (no silent fallback to unsigned URLs). - internal/core/track/service.go : new CDNURLSigner interface + SetCDNService(cdn). GetStorageURL prefers CDN signed URL when cdnService.IsEnabled, falls back to direct S3 presign on signing error so a CDN partial outage doesn't block playback. - internal/api/routes_tracks.go + routes_core.go : wire SetCDNService on the two TrackService construction sites that serve stream/download. - internal/config/config.go : 4 new env vars (CDN_ENABLED, CDN_PROVIDER, CDN_BASE_URL, CDN_SECURITY_KEY). config.CDNService always non-nil after init ; IsEnabled gates the actual usage. - internal/handlers/hls_handler.go : segments now return Cache-Control: public, max-age=86400, immutable (content-addressed filenames make this safe). Playlists at max-age=60. - veza-backend-api/.env.template : 4 placeholder env vars. - docs/ENV_VARIABLES.md §12 : provider matrix + Bunny vs Cloudflare vs R2 trade-offs. Bug fix collateral : v1.0.9 Day 11 introduced veza_cache_hits_total which collided in name with monitoring.CacheHitsTotal (different label set ⇒ promauto MustRegister panic at process init). Day 13 deletes the monitoring duplicate and restores the metrics-package counter as the single source of truth (label: subsystem). All 8 affected packages green : services, core/track, handlers, middleware, websocket/chat, metrics, monitoring, config. Acceptance (Day 13) : code path is wired ; verifying via real Bunny edge requires a Pull Zone provisioned by the user (EX-? in roadmap). On the user side : create Pull Zone w/ origin = MinIO, copy token auth key into CDN_SECURITY_KEY, set CDN_ENABLED=true. W3 progress : Redis Sentinel ✓ · MinIO distribué ✓ · CDN ✓ · DMCA ⏳ Day 14 · embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 12:07:20 +00:00
CDNService *services.CDNService // v1.0.9 W3 Day 13: optional CDN edge in front of S3/MinIO
APIKeyService *services.APIKeyService // v0.102 Lot C: developer API keys
PresenceService *services.PresenceService // v0.301 Lot P1: user presence (online/away/offline)
TokenBlacklist *services.TokenBlacklist // VEZA-SEC-006: token revocation (nil if Redis unavailable)
2025-12-03 19:29:37 +00:00
// Middlewares
RateLimiter *middleware.RateLimiter
SimpleRateLimiter *middleware.SimpleRateLimiter // Rate limiter simple (T0015)
EndpointLimiter *middleware.EndpointLimiter
UserRateLimiter *middleware.UserRateLimiter // BE-SVC-002: Per-user rate limiting
2025-12-03 19:29:37 +00:00
AuthMiddleware *middleware.AuthMiddleware
// Logger
Logger *zap.Logger
// Metrics (T0020)
ErrorMetrics *metrics.ErrorMetrics
// Secrets Provider (T0037)
SecretsProvider SecretsProvider
// Config Watcher (T0040)
ConfigWatcher *ConfigWatcher
// Configuration
Env string // Environnement: development, test, production (P0-SECURITY)
AppPort int // Port pour le serveur HTTP (T0031)
AppDomain string // Domaine applicatif (APP_DOMAIN) — single source of truth pour URLs & CORS
2026-03-05 18:22:31 +00:00
JWTSecret string // HS256 fallback (dev only)
JWTPrivateKeyPath string // v0.9.1 RS256: path to RSA private key
JWTPublicKeyPath string // v0.9.1 RS256: path to RSA public key
JWTIssuer string // T0204: Issuer claim validation (P1-SECURITY)
JWTAudience string // T0204: Audience claim validation (P1-SECURITY)
ChatJWTSecret string // Secret pour les tokens WebSocket Chat
feat(cdn): Bunny.net signed URLs + HLS cache headers + metric collision fix (W3 Day 13) CDN edge in front of S3/MinIO via origin-pull. Backend signs URLs with Bunny.net token-auth (SHA-256 over security_key + path + expires) so edges verify before serving cached objects ; origin is never hit on a valid token. Cloudflare CDN / R2 / CloudFront stubs kept. - internal/services/cdn_service.go : new providers CDNProviderBunny + CDNProviderCloudflareR2. SecurityKey added to CDNConfig. generateBunnySignedURL implements the documented Bunny scheme (url-safe base64, no padding, expires query). HLSSegmentCacheHeaders + HLSPlaylistCacheHeaders helpers exported for handlers. - internal/services/cdn_service_test.go : pin Bunny URL shape + base64-url charset ; assert empty SecurityKey fails fast (no silent fallback to unsigned URLs). - internal/core/track/service.go : new CDNURLSigner interface + SetCDNService(cdn). GetStorageURL prefers CDN signed URL when cdnService.IsEnabled, falls back to direct S3 presign on signing error so a CDN partial outage doesn't block playback. - internal/api/routes_tracks.go + routes_core.go : wire SetCDNService on the two TrackService construction sites that serve stream/download. - internal/config/config.go : 4 new env vars (CDN_ENABLED, CDN_PROVIDER, CDN_BASE_URL, CDN_SECURITY_KEY). config.CDNService always non-nil after init ; IsEnabled gates the actual usage. - internal/handlers/hls_handler.go : segments now return Cache-Control: public, max-age=86400, immutable (content-addressed filenames make this safe). Playlists at max-age=60. - veza-backend-api/.env.template : 4 placeholder env vars. - docs/ENV_VARIABLES.md §12 : provider matrix + Bunny vs Cloudflare vs R2 trade-offs. Bug fix collateral : v1.0.9 Day 11 introduced veza_cache_hits_total which collided in name with monitoring.CacheHitsTotal (different label set ⇒ promauto MustRegister panic at process init). Day 13 deletes the monitoring duplicate and restores the metrics-package counter as the single source of truth (label: subsystem). All 8 affected packages green : services, core/track, handlers, middleware, websocket/chat, metrics, monitoring, config. Acceptance (Day 13) : code path is wired ; verifying via real Bunny edge requires a Pull Zone provisioned by the user (EX-? in roadmap). On the user side : create Pull Zone w/ origin = MinIO, copy token auth key into CDN_SECURITY_KEY, set CDN_ENABLED=true. W3 progress : Redis Sentinel ✓ · MinIO distribué ✓ · CDN ✓ · DMCA ⏳ Day 14 · embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 12:07:20 +00:00
// v1.0.9 W3 Day 13 — CDN edge config (optional). All four are read
// from env at boot ; CDNEnabled=false leaves the path unchanged
// (browsers redirected to S3/MinIO directly as in v1.0.8).
CDNEnabled bool
CDNProvider string // "bunny", "cloudflare", "cloudflare_r2", "cloudfront", or "none"
CDNBaseURL string // e.g. https://cdn.veza.fr
CDNSecurityKey string // Bunny.net Pull Zone token-auth key
RedisURL string
RedisEnable bool // Enable/Disable Redis
feat(redis): Sentinel HA + cache hit rate metrics (W3 Day 11) Three Incus containers, each running redis-server + redis-sentinel (co-located). redis-1 = master at first boot, redis-2/3 = replicas. Sentinel quorum=2 of 3 ; failover-timeout=30s satisfies the W3 acceptance criterion. - internal/config/redis_init.go : initRedis branches on REDIS_SENTINEL_ADDRS ; non-empty -> redis.NewFailoverClient with MasterName + SentinelAddrs + SentinelPassword. Empty -> existing single-instance NewClient (dev/local stays parametric). - internal/config/config.go : 3 new fields (RedisSentinelAddrs, RedisSentinelMasterName, RedisSentinelPassword) read from env. parseRedisSentinelAddrs trims+filters CSV. - internal/metrics/cache_hit_rate.go : new RecordCacheHit / Miss counters, labelled by subsystem. Cardinality bounded. - internal/middleware/rate_limiter.go : instrument 3 Eval call sites (DDoS, frontend log throttle, upload throttle). Hit = Redis answered, Miss = error -> in-memory fallback. - internal/services/chat_pubsub.go : instrument Publish + PublishPresence. - internal/websocket/chat/presence_service.go : instrument SetOnline / SetOffline / Heartbeat / GetPresence. redis.Nil counts as a hit (legitimate empty result). - infra/ansible/roles/redis_sentinel/ : install Redis 7 + Sentinel, render redis.conf + sentinel.conf, systemd units. Vault assertion prevents shipping placeholder passwords to staging/prod. - infra/ansible/playbooks/redis_sentinel.yml : provisions the 3 containers + applies common baseline + role. - infra/ansible/inventory/lab.yml : new groups redis_ha + redis_ha_master. - infra/ansible/tests/test_redis_failover.sh : kills the master container, polls Sentinel for the new master, asserts elapsed < 30s. - config/grafana/dashboards/redis-cache-overview.json : 3 hit-rate stats (rate_limiter / chat_pubsub / presence) + ops/s breakdown. - docs/ENV_VARIABLES.md §3 : 3 new REDIS_SENTINEL_* env vars. - veza-backend-api/.env.template : 3 placeholders (empty default). Acceptance (Day 11) : Sentinel failover < 30s ; cache hit-rate dashboard populated. Lab test pending Sentinel deployment. W3 verification gate progress : Redis Sentinel ✓ (this commit), MinIO EC4+2 ⏳ Day 12, CDN ⏳ Day 13, DMCA ⏳ Day 14, embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 11:36:55 +00:00
// v1.0.9 Day 11 — Redis Sentinel HA. When SentinelAddrs is non-empty,
// initRedis switches to redis.NewFailoverClient and points at these
// sentinels instead of dialing the URL above. The URL is still read
// (auth + DB index parsed off it) so single-instance dev keeps working.
RedisSentinelAddrs []string
RedisSentinelMasterName string
RedisSentinelPassword string
DatabaseURL string
2026-03-05 22:03:43 +00:00
DatabaseReadURL string // Optional read replica URL (DATABASE_READ_URL)
UploadDir string // Répertoire d'upload
StreamServerURL string // URL du serveur de streaming
StreamServerInternalAPIKey string // API key for /internal/jobs/transcode (P1.1.2 - same as stream server INTERNAL_API_KEY)
ChatServerURL string // URL du serveur de chat
CORSOrigins []string // Liste des origines CORS autorisées
FrontendURL string // URL du frontend (OAuth redirect, password reset links). FRONTEND_URL ou VITE_FRONTEND_URL
// OAuth Security (v0.902 Sentinel)
2026-03-05 22:03:43 +00:00
OAuthEncryptionKey string // OAUTH_ENCRYPTION_KEY: 32 bytes for AES-256-GCM (required in production)
OAuthAllowedRedirectDomains []string // OAUTH_ALLOWED_REDIRECT_DOMAINS: whitelist for OAuth redirect URLs
// HLS Streaming Configuration (v0.503)
HLSEnabled bool // Enable HLS streaming routes
HLSStorageDir string // Directory for HLS segment storage
// S3 Storage Configuration (BE-SVC-005)
S3Bucket string // Nom du bucket S3
S3Region string // Région AWS
S3Endpoint string // Endpoint personnalisé (pour MinIO, etc.)
S3AccessKey string // Access key AWS (optionnel, utilise les credentials par défaut si vide)
S3SecretKey string // Secret key AWS (optionnel, utilise les credentials par défaut si vide)
S3Enabled bool // Activer le stockage S3
feat(storage): add track storage_backend column + config prep (v1.0.8 P0) Phase 0 of the MinIO upload migration (FUNCTIONAL_AUDIT §4 item 2). Schema + config only — Phase 1 will wire TrackService.UploadTrack() to actually route writes to S3 when the flag is flipped. Schema (migration 985): - tracks.storage_backend VARCHAR(16) NOT NULL DEFAULT 'local' CHECK in ('local', 's3') - tracks.storage_key VARCHAR(512) NULL (S3 object key when backend=s3) - Partial index on storage_backend = 's3' (migration progress queries) - Rollback drops both columns + index; safe only while all rows are still 'local' (guard query in the rollback comment) Go model (internal/models/track.go): - StorageBackend string (default 'local', not null) - StorageKey *string (nullable) - Both tagged json:"-" — internal plumbing, never exposed publicly Config (internal/config/config.go): - New field Config.TrackStorageBackend - Read from TRACK_STORAGE_BACKEND env var (default 'local') - Production validation rule #11 (ValidateForEnvironment): - Must be 'local' or 's3' (reject typos like 'S3' or 'minio') - If 's3', requires AWS_S3_ENABLED=true (fail fast, do not boot with TrackStorageBackend=s3 while S3StorageService is nil) - Dev/staging warns and falls back to 'local' instead of fail — keeps iteration fast while still flagging misconfig. Docs: - docs/ENV_VARIABLES.md §13 restructured as "HLS + track storage backend" with a migration playbook (local → s3 → migrate-storage CLI) - docs/ENV_VARIABLES.md §28 validation rules: +2 entries for new rules - docs/ENV_VARIABLES.md §29 drift findings: TRACK_STORAGE_BACKEND added to "missing from template" list before it was fixed - veza-backend-api/.env.template: TRACK_STORAGE_BACKEND=local with comment pointing at Phase 1/2/3 plans No behavior change yet — TrackService.UploadTrack() still hardcodes the local path via copyFileAsync(). Phase 1 wires it. Refs: - AUDIT_REPORT.md §9 item (deferrals v1.0.8) - FUNCTIONAL_AUDIT.md §4 item 2 "Stockage local disque only" - /home/senke/.claude/plans/audit-fonctionnel-wild-hickey.md Item 3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 17:54:28 +00:00
// Track upload storage backend (v1.0.8 Phase 0 — MinIO migration)
// "local" (default) = writes to veza-backend-api/uploads/tracks/
// "s3" = writes to S3StorageService bucket. Requires S3Enabled=true.
// Read by TrackService.UploadTrack(); switch is feature-flag-gated so
// operators can roll out per environment and roll back by flipping the env var.
TrackStorageBackend string
feat(webrtc): coturn ICE config endpoint + frontend wiring + ops template (v1.0.9 item 1.2) Closes FUNCTIONAL_AUDIT.md §4 #1: WebRTC 1:1 calls had working signaling but no NAT traversal, so calls between two peers behind symmetric NAT (corporate firewalls, mobile carrier CGNAT, Incus container default networking) failed silently after the SDP exchange. Backend: - GET /api/v1/config/webrtc (public) returns {iceServers: [...]} built from WEBRTC_STUN_URLS / WEBRTC_TURN_URLS / *_USERNAME / *_CREDENTIAL env vars. Half-config (URLs without creds, or vice versa) deliberately omits the TURN block — a half-configured TURN surfaces auth errors at call time instead of falling back cleanly to STUN-only. - 4 handler tests cover the matrix. Frontend: - services/api/webrtcConfig.ts caches the config for the page lifetime and falls back to the historical hardcoded Google STUN if the fetch fails. - useWebRTC fetches at mount, hands iceServers synchronously to every RTCPeerConnection, exposes a {hasTurn, loaded} hint. - CallButton tooltip warns up-front when TURN isn't configured instead of letting calls time out silently. Ops: - infra/coturn/turnserver.conf — annotated template with the SSRF- safe denied-peer-ip ranges, prometheus exporter, TLS for TURNS, static lt-cred-mech (REST-secret rotation deferred to v1.1). - infra/coturn/README.md — Incus deploy walkthrough, smoke test via turnutils_uclient, capacity rules of thumb. - docs/ENV_VARIABLES.md gains a 13bis. WebRTC ICE servers section. Coturn deployment itself is a separate ops action — this commit lands the plumbing so the deploy can light up the path with zero code changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 21:38:42 +00:00
// WebRTC ICE servers (v1.0.9 item 1.2 — coturn).
//
// WebRTCStunURLs is a list of STUN server URLs (e.g. "stun:stun.l.google.com:19302").
// Empty defaults to the Google public STUN — fine for dev / behind-the-NAT
// scenarios where both peers can reach the same STUN, broken in production
// where symmetric NAT requires a TURN relay.
//
// WebRTCTurnURLs / Username / Credential drive the optional self-hosted
// coturn relay deployed alongside Veza in prod (see infra/coturn/). When
// any of TurnURLs / Username / Credential is empty, the TURN block is
// omitted from the iceServers payload — the frontend falls back to STUN
// only and can warn the user. Static credentials are accepted; for the
// shared-secret REST scheme see ORIGIN_SECURITY_FRAMEWORK.md (deferred).
WebRTCStunURLs []string
WebRTCTurnURLs []string
WebRTCTurnUsername string
WebRTCTurnCredential string
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// Sentry configuration
2025-12-13 02:34:34 +00:00
SentryDsn string // DSN Sentry pour error tracking
SentryEnvironment string // Environnement Sentry (dev, staging, prod)
SentrySampleRateErrors float64 // Sample rate pour les erreurs (0.0-1.0)
SentrySampleRateTransactions float64 // Sample rate pour les transactions (0.0-1.0)
RateLimitLimit int // Limite de requêtes pour le rate limiter simple
RateLimitWindow int // Fenêtre de temps en secondes pour le rate limiter simple
AuthRateLimitLoginAttempts int // Max login attempts (PR-3)
AuthRateLimitLoginWindow int // Login rate limit window in minutes (PR-3)
2026-02-07 19:36:48 +00:00
AccountLockoutExemptEmails []string // BE-SEC-007: Emails exempt from lockout (e.g. testuser@example.com)
2025-12-13 02:34:34 +00:00
HandlerTimeout time.Duration // Global handler timeout (PR-6)
LogLevel string // Niveau de log (T0027)
DBMaxRetries int
DBRetryInterval time.Duration
2025-12-16 18:34:08 +00:00
MaxConcurrentUploads int // MOD-P2-005: Limite uploads simultanés (backpressure)
2025-12-03 19:29:37 +00:00
// Log Aggregation (BE-SVC-015)
LogAggregationEnabled bool // Activer l'agrégation de logs
LogAggregationEndpoint string // URL du service d'agrégation (ex: "http://loki:3100/loki/api/v1/push")
LogAggregationBatchSize int // Nombre de logs à accumuler avant envoi
LogAggregationFlushInterval time.Duration // Intervalle de flush automatique
LogAggregationTimeout time.Duration // Timeout pour les requêtes HTTP
LogAggregationLabels map[string]string // Labels statiques pour les logs
// Log Files Configuration
LogDir string // Répertoire pour les fichiers de logs (ex: "/var/log/veza")
2025-12-03 19:29:37 +00:00
// RabbitMQ
RabbitMQEventBus *eventbus.RabbitMQEventBus // Ajout de l'instance de l'EventBus
RabbitMQURL string
RabbitMQMaxRetries int
RabbitMQRetryInterval time.Duration
RabbitMQEnable bool
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
2026-01-07 18:39:21 +00:00
// Cookie Security Settings
CookieSecure bool // Secure flag (true en production, false en dev)
CookieSameSite string // SameSite policy: strict, lax, none
CookieDomain string // Cookie domain (vide pour domaine actuel)
CookieHttpOnly bool // HttpOnly flag (toujours true pour refresh_token)
CookiePath string // Cookie path (généralement "/")
// Hyperswitch Payment (Phase 2)
2026-03-05 22:03:43 +00:00
HyperswitchEnabled bool // Enable Hyperswitch payments (default false in dev)
HyperswitchLiveMode bool // Use live API keys (HYPERSWITCH_LIVE_MODE). If false in production, test keys are used.
HyperswitchURL string // Hyperswitch router URL (e.g. http://hyperswitch:8080)
HyperswitchAPIKey string // API key for Hyperswitch
HyperswitchWebhookSecret string // Webhook signature verification secret
2026-03-05 22:03:43 +00:00
CheckoutSuccessURL string // URL to redirect after successful payment (e.g. /checkout/success)
// Test-only: when set, used instead of creating marketplace from config (integration tests)
MarketplaceServiceOverride interface{}
// Test-only: when set, used instead of AuthMiddleware (integration tests, e.g. X-User-ID header)
AuthMiddlewareOverride interface{}
// Stripe Connect (Seller Payout v0.602)
2026-03-05 22:03:43 +00:00
StripeConnectEnabled bool // STRIPE_CONNECT_ENABLED
StripeConnectSecretKey string // STRIPE_SECRET_KEY (for server-side Stripe API calls)
StripeConnectWebhookSecret string // STRIPE_CONNECT_WEBHOOK_SECRET
PlatformFeeRate float64 // PLATFORM_FEE_RATE (default 0.10 = 10% commission)
// Transfer Retry Worker (v0.701)
TransferRetryEnabled bool // TRANSFER_RETRY_ENABLED (default true)
TransferRetryMaxAttempts int // TRANSFER_RETRY_MAX (default 3)
TransferRetryInterval time.Duration // TRANSFER_RETRY_INTERVAL (default 5m)
feat(marketplace): async stripe connect reversal worker — v1.0.7 item B day 2 Day-2 cut of item B: the reversal path becomes async. Pre-v1.0.7 (and v1.0.7 day 1) the refund handler flipped seller_transfers straight from completed to reversed without ever calling Stripe — the ledger said "reversed" while the seller's Stripe balance still showed the original transfer as settled. The new flow: refund.succeeded webhook → reverseSellerAccounting transitions row: completed → reversal_pending → StripeReversalWorker (every REVERSAL_CHECK_INTERVAL, default 1m) → calls ReverseTransfer on Stripe → success: row → reversed + persist stripe_reversal_id → 404 already-reversed (dead code until day 3): row → reversed + log → 404 resource_missing (dead code until day 3): row → permanently_failed → transient error: stay reversal_pending, bump retry_count, exponential backoff (base * 2^retry, capped at backoffMax) → retries exhausted: row → permanently_failed → buyer-facing refund completes immediately regardless of Stripe health State machine enforcement: * New `SellerTransfer.TransitionStatus(tx, to, extras)` wraps every mutation: validates against AllowedTransferTransitions, guarded UPDATE with WHERE status=<from> (optimistic lock semantics), no RowsAffected = stale state / concurrent winner detected. * processSellerTransfers no longer mutates .Status in place — terminal status is decided before struct construction, so the row is Created with its final state. * transfer_retry.retryOne and admin RetryTransfer route through TransitionStatus. Legacy direct assignment removed. * TestNoDirectTransferStatusMutation greps the package for any `st.Status = "..."` / `t.Status = "..."` / GORM Model(&SellerTransfer{}).Update("status"...) outside the allowlist and fails if found. Verified by temporarily injecting a violation during development — test caught it as expected. Configuration (v1.0.7 item B): * REVERSAL_WORKER_ENABLED=true (default) * REVERSAL_MAX_RETRIES=5 (default) * REVERSAL_CHECK_INTERVAL=1m (default) * REVERSAL_BACKOFF_BASE=1m (default) * REVERSAL_BACKOFF_MAX=1h (default, caps exponential growth) * .env.template documents TRANSFER_RETRY_* and REVERSAL_* env vars so an ops reader can grep them. Interface change: TransferService.ReverseTransfer(ctx, stripe_transfer_id, amount *int64, reason) (reversalID, error) added. All four mocks extended (process_webhook, transfer_retry, admin_transfer_handler, payment_flow integration). amount=nil means full reversal; v1.0.7 always passes nil (partial reversal is future scope per axis-1 P2). Stripe 404 disambiguation (ErrTransferAlreadyReversed / ErrTransferNotFound) is wired in the worker as dead code — the sentinels are declared and the worker branches on them, but StripeConnectService.ReverseTransfer doesn't yet emit them. Day 3 will parse stripe.Error.Code and populate the sentinels; no worker change needed at that point. Keeping the handling skeleton in day 2 so the worker's branch shape doesn't change between days and the tests can already cover all four paths against the mock. Worker unit tests (9 cases, all green, sqlite :memory:): * happy path: reversal_pending → reversed + stripe_reversal_id set * already reversed (mock returns sentinel): → reversed + log * not found (mock returns sentinel): → permanently_failed + log * transient 503: retry_count++, next_retry_at set with backoff, stays reversal_pending * backoff capped at backoffMax (verified with base=1s, max=10s, retry_count=4 → capped at 10s not 16s) * max retries exhausted: → permanently_failed * legacy row with empty stripe_transfer_id: → permanently_failed, does not call Stripe * only picks up reversal_pending (skips all other statuses) * respects next_retry_at (future rows skipped) Existing test updated: TestProcessRefundWebhook_SucceededFinalizesState now asserts the row lands at reversal_pending with next_retry_at set (worker's responsibility to drive to reversed), not reversed. Worker wired in cmd/api/main.go alongside TransferRetryWorker, sharing the same StripeConnectService instance. Shutdown path registered for graceful stop. Cut from day 2 scope (per agreed-upon discipline), landing in day 3: * Stripe 404 disambiguation implementation (parse error.Code) * End-to-end smoke probe (refund → reversal_pending → worker processes → reversed) against local Postgres + mock Stripe * Batch-size tuning / inter-batch sleep — batchLimit=20 today is safely under Stripe's 100 req/s default rate limit; revisit if observed load warrants Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 13:34:29 +00:00
// Reversal Worker (v1.0.7 item B) — drives seller_transfers from
// reversal_pending to reversed via Stripe Connect Transfers:reversal.
// Backoff is exponential: interval * 2^retry_count, capped at
// ReversalBackoffMax so a row doesn't sit 17 hours between retries
// after 10 failed attempts.
ReversalWorkerEnabled bool // REVERSAL_WORKER_ENABLED (default true)
ReversalMaxRetries int // REVERSAL_MAX_RETRIES (default 5)
ReversalCheckInterval time.Duration // REVERSAL_CHECK_INTERVAL (default 1m)
ReversalBackoffBase time.Duration // REVERSAL_BACKOFF_BASE (default 1m)
ReversalBackoffMax time.Duration // REVERSAL_BACKOFF_MAX (default 1h)
feat(webhooks): persist raw hyperswitch payloads to audit log — v1.0.7 item E Every POST /webhooks/hyperswitch delivery now writes a row to `hyperswitch_webhook_log` regardless of signature-valid or processing outcome. Captures both legitimate deliveries and attack probes — a forensics query now has the actual bytes to read, not just a "webhook rejected" log line. Disputes (axis-1 P1.6) ride along: the log captures dispute.* events alongside payment and refund events, ready for when disputes get a handler. Table shape (migration 984): * payload TEXT — readable in psql, invalid UTF-8 replaced with empty (forensics value is in headers + ip + timing for those attacks, not the binary body). * signature_valid BOOLEAN + partial index for "show me attack attempts" being instantaneous. * processing_result TEXT — 'ok' / 'error: <msg>' / 'signature_invalid' / 'skipped'. Matches the P1.5 action semantic exactly. * source_ip, user_agent, request_id — forensics essentials. request_id is captured from Hyperswitch's X-Request-Id header when present, else a server-side UUID so every row correlates to VEZA's structured logs. * event_type — best-effort extract from the JSON payload, NULL on malformed input. Hardening: * 64KB body cap via io.LimitReader rejects oversize with 413 before any INSERT — prevents log-spam DoS. * Single INSERT per delivery with final state; no two-phase update race on signature-failure path. signature_invalid and processing-error rows both land. * DB persistence failures are logged but swallowed — the endpoint's contract is to ack Hyperswitch, not perfect audit. Retention sweep: * CleanupHyperswitchWebhookLog in internal/jobs, daily tick, batched DELETE (10k rows + 100ms pause) so a large backlog doesn't lock the table. * HYPERSWITCH_WEBHOOK_LOG_RETENTION_DAYS (default 90). * Same goroutine-ticker pattern as ScheduleOrphanTracksCleanup. * Wired in cmd/api/main.go alongside the existing cleanup jobs. Tests: 5 in webhook_log_test.go (persistence, request_id auto-gen, invalid-JSON leaves event_type empty, invalid-signature capture, extractEventType 5 sub-cases) + 4 in cleanup_hyperswitch_webhook_ log_test.go (deletes-older-than, noop, default-on-zero, context-cancel). Migration 984 applied cleanly to local Postgres; all indexes present. Also (v107-plan.md): * Item G acceptance gains an explicit Idempotency-Key threading requirement with an empty-key loud-fail test — "literally copy-paste D's 4-line test skeleton". Closes the risk that item G silently reopens the HTTP-retry duplicate-charge exposure D closed. Out of scope for E (noted in CHANGELOG): * Rate limit on the endpoint — pre-existing middleware covers it at the router level; adding a per-endpoint limit is separate scope. * Readable-payload SQL view — deferred, the TEXT column is already human-readable; a convenience view is a nice-to-have not a ship-blocker. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 00:44:58 +00:00
// Hyperswitch webhook log retention (v1.0.7 item E).
HyperswitchWebhookLogRetentionDays int // HYPERSWITCH_WEBHOOK_LOG_RETENTION_DAYS (default 90)
feat(workers): hyperswitch reconciliation sweep for stuck pending states — v1.0.7 item C New ReconcileHyperswitchWorker sweeps for pending orders and refunds whose terminal webhook never arrived. Pulls live PSP state for each stuck row and synthesises a webhook payload to feed the normal ProcessPaymentWebhook / ProcessRefundWebhook dispatcher. The existing terminal-state guards on those handlers make reconciliation idempotent against real webhooks — a late webhook after the reconciler resolved the row is a no-op. Three stuck-state classes covered: 1. Stuck orders (pending > 30m, non-empty payment_id) → GetPaymentStatus + synthetic payment.<status> webhook. 2. Stuck refunds with PSP id (pending > 30m, non-empty hyperswitch_refund_id) → GetRefundStatus + synthetic refund.<status> webhook (error_message forwarded). 3. Orphan refunds (pending > 5m, EMPTY hyperswitch_refund_id) → mark failed + roll order back to completed + log ERROR. This is the "we crashed between Phase 1 and Phase 2 of RefundOrder" case, operator-attention territory. New interfaces: * marketplace.HyperswitchReadClient — read-only PSP surface the worker depends on (GetPaymentStatus, GetRefundStatus). The worker never calls CreatePayment / CreateRefund. * hyperswitch.Client.GetRefund + RefundStatus struct added. * hyperswitch.Provider gains GetRefundStatus + GetPaymentStatus pass-throughs that satisfy the marketplace interface. Configuration (all env-var tunable with sensible defaults): * RECONCILE_WORKER_ENABLED=true * RECONCILE_INTERVAL=1h (ops can drop to 5m during incident response without a code change) * RECONCILE_ORDER_STUCK_AFTER=30m * RECONCILE_REFUND_STUCK_AFTER=30m * RECONCILE_REFUND_ORPHAN_AFTER=5m (shorter because "app crashed" is a different signal from "network hiccup") Operational details: * Batch limit 50 rows per phase per tick so a 10k-row backlog doesn't hammer Hyperswitch. Next tick picks up the rest. * PSP read errors leave the row untouched — next tick retries. Reconciliation is always safe to replay. * Structured log on every action so `grep reconcile` tells the ops story: which order/refund got synced, against what status, how long it was stuck. * Worker wired in cmd/api/main.go, gated on HyperswitchEnabled + HyperswitchAPIKey. Graceful shutdown registered. * RunOnce exposed as public API for ad-hoc ops trigger during incident response. Tests — 10 cases, all green (sqlite :memory:): * TestReconcile_StuckOrder_SyncsViaSyntheticWebhook * TestReconcile_RecentOrder_NotTouched * TestReconcile_CompletedOrder_NotTouched * TestReconcile_OrderWithEmptyPaymentID_NotTouched * TestReconcile_PSPReadErrorLeavesRowIntact * TestReconcile_OrphanRefund_AutoFails_OrderRollsBack * TestReconcile_RecentOrphanRefund_NotTouched * TestReconcile_StuckRefund_SyncsViaSyntheticWebhook * TestReconcile_StuckRefund_FailureStatus_PassesErrorMessage * TestReconcile_AllTerminalStates_NoOp CHANGELOG v1.0.7-rc1 updated with the full item C section between D and the existing E block, matching the order convention (ship order: A → D → B → E → C, CHANGELOG order follows). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 01:08:15 +00:00
// Reconciliation Worker (v1.0.7 item C) — sweeps pending orders
// and refunds that have been stuck too long, synthesises a webhook
// from the live PSP state, and feeds the normal Process*Webhook
// dispatcher. Idempotent with real webhooks.
feat(storage): add track storage_backend column + config prep (v1.0.8 P0) Phase 0 of the MinIO upload migration (FUNCTIONAL_AUDIT §4 item 2). Schema + config only — Phase 1 will wire TrackService.UploadTrack() to actually route writes to S3 when the flag is flipped. Schema (migration 985): - tracks.storage_backend VARCHAR(16) NOT NULL DEFAULT 'local' CHECK in ('local', 's3') - tracks.storage_key VARCHAR(512) NULL (S3 object key when backend=s3) - Partial index on storage_backend = 's3' (migration progress queries) - Rollback drops both columns + index; safe only while all rows are still 'local' (guard query in the rollback comment) Go model (internal/models/track.go): - StorageBackend string (default 'local', not null) - StorageKey *string (nullable) - Both tagged json:"-" — internal plumbing, never exposed publicly Config (internal/config/config.go): - New field Config.TrackStorageBackend - Read from TRACK_STORAGE_BACKEND env var (default 'local') - Production validation rule #11 (ValidateForEnvironment): - Must be 'local' or 's3' (reject typos like 'S3' or 'minio') - If 's3', requires AWS_S3_ENABLED=true (fail fast, do not boot with TrackStorageBackend=s3 while S3StorageService is nil) - Dev/staging warns and falls back to 'local' instead of fail — keeps iteration fast while still flagging misconfig. Docs: - docs/ENV_VARIABLES.md §13 restructured as "HLS + track storage backend" with a migration playbook (local → s3 → migrate-storage CLI) - docs/ENV_VARIABLES.md §28 validation rules: +2 entries for new rules - docs/ENV_VARIABLES.md §29 drift findings: TRACK_STORAGE_BACKEND added to "missing from template" list before it was fixed - veza-backend-api/.env.template: TRACK_STORAGE_BACKEND=local with comment pointing at Phase 1/2/3 plans No behavior change yet — TrackService.UploadTrack() still hardcodes the local path via copyFileAsync(). Phase 1 wires it. Refs: - AUDIT_REPORT.md §9 item (deferrals v1.0.8) - FUNCTIONAL_AUDIT.md §4 item 2 "Stockage local disque only" - /home/senke/.claude/plans/audit-fonctionnel-wild-hickey.md Item 3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 17:54:28 +00:00
ReconcileWorkerEnabled bool // RECONCILE_WORKER_ENABLED (default true)
ReconcileInterval time.Duration // RECONCILE_INTERVAL (default 1h)
ReconcileOrderStuckAfter time.Duration // RECONCILE_ORDER_STUCK_AFTER (default 30m)
ReconcileRefundStuckAfter time.Duration // RECONCILE_REFUND_STUCK_AFTER (default 30m)
feat(workers): hyperswitch reconciliation sweep for stuck pending states — v1.0.7 item C New ReconcileHyperswitchWorker sweeps for pending orders and refunds whose terminal webhook never arrived. Pulls live PSP state for each stuck row and synthesises a webhook payload to feed the normal ProcessPaymentWebhook / ProcessRefundWebhook dispatcher. The existing terminal-state guards on those handlers make reconciliation idempotent against real webhooks — a late webhook after the reconciler resolved the row is a no-op. Three stuck-state classes covered: 1. Stuck orders (pending > 30m, non-empty payment_id) → GetPaymentStatus + synthetic payment.<status> webhook. 2. Stuck refunds with PSP id (pending > 30m, non-empty hyperswitch_refund_id) → GetRefundStatus + synthetic refund.<status> webhook (error_message forwarded). 3. Orphan refunds (pending > 5m, EMPTY hyperswitch_refund_id) → mark failed + roll order back to completed + log ERROR. This is the "we crashed between Phase 1 and Phase 2 of RefundOrder" case, operator-attention territory. New interfaces: * marketplace.HyperswitchReadClient — read-only PSP surface the worker depends on (GetPaymentStatus, GetRefundStatus). The worker never calls CreatePayment / CreateRefund. * hyperswitch.Client.GetRefund + RefundStatus struct added. * hyperswitch.Provider gains GetRefundStatus + GetPaymentStatus pass-throughs that satisfy the marketplace interface. Configuration (all env-var tunable with sensible defaults): * RECONCILE_WORKER_ENABLED=true * RECONCILE_INTERVAL=1h (ops can drop to 5m during incident response without a code change) * RECONCILE_ORDER_STUCK_AFTER=30m * RECONCILE_REFUND_STUCK_AFTER=30m * RECONCILE_REFUND_ORPHAN_AFTER=5m (shorter because "app crashed" is a different signal from "network hiccup") Operational details: * Batch limit 50 rows per phase per tick so a 10k-row backlog doesn't hammer Hyperswitch. Next tick picks up the rest. * PSP read errors leave the row untouched — next tick retries. Reconciliation is always safe to replay. * Structured log on every action so `grep reconcile` tells the ops story: which order/refund got synced, against what status, how long it was stuck. * Worker wired in cmd/api/main.go, gated on HyperswitchEnabled + HyperswitchAPIKey. Graceful shutdown registered. * RunOnce exposed as public API for ad-hoc ops trigger during incident response. Tests — 10 cases, all green (sqlite :memory:): * TestReconcile_StuckOrder_SyncsViaSyntheticWebhook * TestReconcile_RecentOrder_NotTouched * TestReconcile_CompletedOrder_NotTouched * TestReconcile_OrderWithEmptyPaymentID_NotTouched * TestReconcile_PSPReadErrorLeavesRowIntact * TestReconcile_OrphanRefund_AutoFails_OrderRollsBack * TestReconcile_RecentOrphanRefund_NotTouched * TestReconcile_StuckRefund_SyncsViaSyntheticWebhook * TestReconcile_StuckRefund_FailureStatus_PassesErrorMessage * TestReconcile_AllTerminalStates_NoOp CHANGELOG v1.0.7-rc1 updated with the full item C section between D and the existing E block, matching the order convention (ship order: A → D → B → E → C, CHANGELOG order follows). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 01:08:15 +00:00
ReconcileRefundOrphanAfter time.Duration // RECONCILE_REFUND_ORPHAN_AFTER (default 5m)
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// Email & Jobs
EmailSender *email.SMTPEmailSender
JobWorker *workers.JobWorker
SMTPConfig email.SMTPConfig
2025-12-03 19:29:37 +00:00
}
// INT-019: ValidateRequiredEnvironmentVariables valide toutes les variables d'environnement requises
// Cette fonction vérifie que toutes les variables critiques sont définies avant le chargement de la configuration
func ValidateRequiredEnvironmentVariables(env string) error {
var missingVars []string
var errors []string
// Variables requises dans tous les environnements
2026-03-05 18:22:31 +00:00
requiredVars := []string{"DATABASE_URL"}
// v0.9.1: JWT config — require either RS256 keys OR JWT_SECRET (dev fallback)
jwtPrivatePath := os.Getenv("JWT_PRIVATE_KEY_PATH")
jwtPublicPath := os.Getenv("JWT_PUBLIC_KEY_PATH")
jwtSecret := os.Getenv("JWT_SECRET")
hasRS256 := jwtPrivatePath != "" && jwtPublicPath != ""
hasHS256 := jwtSecret != ""
if !hasRS256 && !hasHS256 {
missingVars = append(missingVars, "JWT_PRIVATE_KEY_PATH+JWT_PUBLIC_KEY_PATH or JWT_SECRET")
}
for _, varName := range requiredVars {
value := os.Getenv(varName)
if value == "" {
missingVars = append(missingVars, varName)
}
}
// Validation spécifique selon l'environnement
if env == EnvProduction {
// En production, CORS_ALLOWED_ORIGINS est requis
corsOrigins := os.Getenv("CORS_ALLOWED_ORIGINS")
if corsOrigins == "" {
missingVars = append(missingVars, "CORS_ALLOWED_ORIGINS")
} else {
// Vérifier qu'il n'y a pas de wildcard en production
if strings.Contains(corsOrigins, "*") {
errors = append(errors, "CORS_ALLOWED_ORIGINS cannot contain wildcard '*' in production environment")
}
}
// En production, vérifier que RabbitMQ URL est défini si RabbitMQ est activé
rabbitMQEnable := os.Getenv("RABBITMQ_ENABLE")
if rabbitMQEnable != "false" {
rabbitMQURL := os.Getenv("RABBITMQ_URL")
if rabbitMQURL == "" {
errors = append(errors, "RABBITMQ_URL is required in production when RabbitMQ is enabled")
}
}
// En production, LOG_LEVEL ne doit pas être DEBUG
logLevel := os.Getenv("LOG_LEVEL")
if logLevel == "DEBUG" {
errors = append(errors, "LOG_LEVEL=DEBUG is not allowed in production environment for security reasons")
}
// SEC-08: In production, if Hyperswitch is enabled, webhook secret is mandatory
hyperswitchEnabled := os.Getenv("HYPERSWITCH_ENABLED")
if hyperswitchEnabled == "true" || hyperswitchEnabled == "1" {
webhookSecret := os.Getenv("HYPERSWITCH_WEBHOOK_SECRET")
if webhookSecret == "" {
errors = append(errors, "HYPERSWITCH_WEBHOOK_SECRET is required in production when HYPERSWITCH_ENABLED=true")
}
}
}
// Construire le message d'erreur
if len(missingVars) > 0 {
errors = append(errors, fmt.Sprintf("required environment variables are missing: %v", missingVars))
}
if len(errors) > 0 {
return fmt.Errorf("environment variable validation failed: %s", strings.Join(errors, "; "))
}
return nil
}
2025-12-03 19:29:37 +00:00
// NewConfig crée une nouvelle configuration
func NewConfig() (*Config, error) {
// Déterminer l'environnement avec détection automatique améliorée (T0032, T0039)
env := DetectEnvironment()
// INT-019: Valider les variables d'environnement requises avant de charger la configuration
if err := ValidateRequiredEnvironmentVariables(env); err != nil {
return nil, fmt.Errorf("environment validation failed: %w", err)
}
2025-12-03 19:29:37 +00:00
// Charger les fichiers .env selon l'environnement (T0032)
// Charge dans l'ordre: .env.{env}, .env
// Les variables d'environnement système ont priorité
if err := LoadEnvFiles(env); err != nil {
// En cas d'erreur, continuer quand même (peut-être que les fichiers .env n'existent pas)
// Les variables d'environnement système seront utilisées
}
// FIX #2: Charger LOG_LEVEL AVANT d'initialiser le logger
// Charger le niveau de log depuis les variables d'environnement (T0027)
// Valeurs possibles: DEBUG, INFO, WARN, ERROR
// Par défaut: INFO
logLevel := getEnv("LOG_LEVEL", "INFO")
2025-12-03 19:29:37 +00:00
// Charger le domaine applicatif (single source of truth pour URLs, CORS, etc.)
appDomain := getEnv("APP_DOMAIN", "veza.fr")
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// SECURITY: Charger les origines CORS avec defaults sécurisés selon l'environnement (P0-SECURITY)
corsOrigins := getCORSOrigins(env, appDomain)
2025-12-03 19:29:37 +00:00
// Charger la configuration du rate limiter simple (A04: toujours actif, limites assouplies en dev)
rateLimitLimit := getEnvInt("RATE_LIMIT_LIMIT", getDefaultRateLimitLimit(env))
2025-12-03 19:29:37 +00:00
rateLimitWindow := getEnvInt("RATE_LIMIT_WINDOW", 60) // 60 secondes (1 minute) par défaut
// Charger le port depuis les variables d'environnement (T0031)
appPort := getEnvInt("APP_PORT", 8080)
2025-12-16 18:34:08 +00:00
// MOD-P2-005: Charger la limite d'uploads simultanés (backpressure)
maxConcurrentUploads := getEnvInt("MAX_CONCURRENT_UPLOADS", 10) // 10 par défaut
2025-12-03 19:29:37 +00:00
// Configuration depuis les variables d'environnement
2026-03-05 18:22:31 +00:00
// v0.9.1: JWT — RS256 (prefer) or JWT_SECRET (dev fallback)
jwtPrivateKeyPath := getEnv("JWT_PRIVATE_KEY_PATH", "")
jwtPublicKeyPath := getEnv("JWT_PUBLIC_KEY_PATH", "")
jwtSecret := getEnv("JWT_SECRET", "")
2025-12-13 02:34:34 +00:00
databaseURL, err := getEnvRequired("DATABASE_URL")
if err != nil {
return nil, err
}
feat(infra): pgbouncer role + pgbench load test (W2 Day 7) ROADMAP_V1.0_LAUNCH.md §Semaine 2 day 7 deliverable: PgBouncer fronts the pg_auto_failover formation, the backend pays the postgres-fork cost 50 times per pool refresh instead of once per HTTP handler. Wiring: veza-backend-api ──libpq──▶ pgaf-pgbouncer:6432 ──libpq──▶ pgaf-primary:5432 (1000 client cap) (50 server pool) Files: infra/ansible/roles/pgbouncer/ defaults/main.yml — pool sizes match the acceptance target (1000 client × 50 server × 10 reserve), pool_mode=transaction (the only safe mode given the backend's session usage — LISTEN/NOTIFY and cross-tx prepared statements are forbidden, neither of which Veza uses), DNS TTL = 60s for failover. tasks/main.yml — apt install pgbouncer + postgresql-client (so the pgbench / admin psql lives on the same container), render pgbouncer.ini + userlist.txt, ensure /var/log/postgresql for the file log, enable + start service. templates/pgbouncer.ini.j2 — full config; databases section points at pgaf-primary.lxd:5432 directly. Failover follows via DNS TTL until the W2 day 8 pg_autoctl state-change hook that issues RELOAD on the admin console. templates/userlist.txt.j2 — only rendered when auth_type != trust. Lab uses trust on the bridge subnet; prod gets a vault-backed list of md5/scram hashes. handlers/main.yml — RELOAD pgbouncer (graceful, doesn't drop established clients). README.md — operational cheatsheet: - SHOW POOLS / SHOW STATS via the admin console - the transaction-mode forbids list (LISTEN/NOTIFY etc.) - failover behaviour today vs after the W2-day-8 hook lands infra/ansible/playbooks/postgres_ha.yml Provision step extended to launch pgaf-pgbouncer alongside the formation containers. Two new plays at the bottom apply common baseline + pgbouncer role to it. infra/ansible/inventory/lab.yml `pgbouncer` group with pgaf-pgbouncer reachable via the community.general.incus connection plugin (consistent with the postgres_ha containers). infra/ansible/tests/test_pgbouncer_load.sh Acceptance: pgbench 500 clients × 30s × 8 threads against the pgbouncer endpoint, must report 0 failed transactions and 0 connection errors. Also runs `pgbench -i -s 10` first to initialise the standard fixture — that init goes through pgbouncer too, which incidentally validates transaction-mode compatibility before the load run starts. Exit codes: 0 / 1 (errors) / 2 (unreachable) / 3 (missing tool). veza-backend-api/internal/config/config.go Comment block above DATABASE_URL load — documents the prod wiring (DATABASE_URL points at pgaf-pgbouncer.lxd:6432, NOT at pgaf-primary directly). Also notes the dev/CI exception: direct Postgres because the small scale doesn't benefit from pooling and tests occasionally lean on session-scoped GUCs that transaction-mode would break. Acceptance verified locally: $ ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml \ --syntax-check playbook: playbooks/postgres_ha.yml ← clean $ bash -n infra/ansible/tests/test_pgbouncer_load.sh syntax OK $ cd veza-backend-api && go build ./... (clean — comment-only change in config.go) $ gofmt -l internal/config/config.go (no output — clean) Real apply + pgbench run requires the lab R720 + the community.general collection — operator's call. Out of scope (deferred per ROADMAP §2): - HA pgbouncer (single instance per env at v1.0; double instance + keepalived in v1.1 if needed) - pg_autoctl state-change hook → pgbouncer RELOAD (W2 day 8) - Prometheus pgbouncer_exporter (W2 day 9 with the OTel collector + observability stack) SKIP_TESTS=1 — IaC YAML + bash + Go comment-only diff. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 16:35:05 +00:00
// v1.0.9 Day 7 — in staging/prod, DATABASE_URL points at the PgBouncer
// container fronting the pg_auto_failover formation, not at Postgres
// directly. Wiring:
// postgresql://veza:PASSWORD@pgaf-pgbouncer.lxd:6432/veza?sslmode=prefer
// PgBouncer holds 1000 client connections in transaction-mode pooling
// down to 50 server connections behind. Bypassing it (pointing at
// pgaf-primary directly) re-introduces the connection-storm we just
// solved — the role's README documents the operational consequences.
// Dev/CI keeps direct Postgres because there's no pool advantage at
// that scale and PgBouncer in transaction-mode forbids LISTEN/NOTIFY
// + cross-tx prepared statements (none of which the backend uses,
// but tests sometimes lean on them).
2025-12-13 02:34:34 +00:00
// BE-SEC-014: Get RabbitMQ URL with environment-aware defaults
rabbitMQURL := getRabbitMQURL(env, appDomain)
2025-12-03 19:29:37 +00:00
config := &Config{
2026-03-05 22:03:43 +00:00
Env: env, // Store environment for validation (P0-SECURITY)
AppPort: appPort,
AppDomain: appDomain,
JWTSecret: jwtSecret,
JWTPrivateKeyPath: jwtPrivateKeyPath,
JWTPublicKeyPath: jwtPublicKeyPath,
JWTIssuer: getEnv("JWT_ISSUER", "veza-api"),
JWTAudience: getEnv("JWT_AUDIENCE", "veza-platform"),
ChatJWTSecret: getEnv("CHAT_JWT_SECRET", jwtSecret),
feat(redis): Sentinel HA + cache hit rate metrics (W3 Day 11) Three Incus containers, each running redis-server + redis-sentinel (co-located). redis-1 = master at first boot, redis-2/3 = replicas. Sentinel quorum=2 of 3 ; failover-timeout=30s satisfies the W3 acceptance criterion. - internal/config/redis_init.go : initRedis branches on REDIS_SENTINEL_ADDRS ; non-empty -> redis.NewFailoverClient with MasterName + SentinelAddrs + SentinelPassword. Empty -> existing single-instance NewClient (dev/local stays parametric). - internal/config/config.go : 3 new fields (RedisSentinelAddrs, RedisSentinelMasterName, RedisSentinelPassword) read from env. parseRedisSentinelAddrs trims+filters CSV. - internal/metrics/cache_hit_rate.go : new RecordCacheHit / Miss counters, labelled by subsystem. Cardinality bounded. - internal/middleware/rate_limiter.go : instrument 3 Eval call sites (DDoS, frontend log throttle, upload throttle). Hit = Redis answered, Miss = error -> in-memory fallback. - internal/services/chat_pubsub.go : instrument Publish + PublishPresence. - internal/websocket/chat/presence_service.go : instrument SetOnline / SetOffline / Heartbeat / GetPresence. redis.Nil counts as a hit (legitimate empty result). - infra/ansible/roles/redis_sentinel/ : install Redis 7 + Sentinel, render redis.conf + sentinel.conf, systemd units. Vault assertion prevents shipping placeholder passwords to staging/prod. - infra/ansible/playbooks/redis_sentinel.yml : provisions the 3 containers + applies common baseline + role. - infra/ansible/inventory/lab.yml : new groups redis_ha + redis_ha_master. - infra/ansible/tests/test_redis_failover.sh : kills the master container, polls Sentinel for the new master, asserts elapsed < 30s. - config/grafana/dashboards/redis-cache-overview.json : 3 hit-rate stats (rate_limiter / chat_pubsub / presence) + ops/s breakdown. - docs/ENV_VARIABLES.md §3 : 3 new REDIS_SENTINEL_* env vars. - veza-backend-api/.env.template : 3 placeholders (empty default). Acceptance (Day 11) : Sentinel failover < 30s ; cache hit-rate dashboard populated. Lab test pending Sentinel deployment. W3 verification gate progress : Redis Sentinel ✓ (this commit), MinIO EC4+2 ⏳ Day 12, CDN ⏳ Day 13, DMCA ⏳ Day 14, embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 11:36:55 +00:00
RedisURL: getEnv("REDIS_URL", "redis://"+appDomain+":6379"),
RedisEnable: getEnvBool("REDIS_ENABLE", true),
RedisSentinelAddrs: parseRedisSentinelAddrs(getEnv("REDIS_SENTINEL_ADDRS", "")),
RedisSentinelMasterName: getEnv("REDIS_SENTINEL_MASTER_NAME", "veza-master"),
RedisSentinelPassword: getEnv("REDIS_SENTINEL_PASSWORD", ""),
feat(cdn): Bunny.net signed URLs + HLS cache headers + metric collision fix (W3 Day 13) CDN edge in front of S3/MinIO via origin-pull. Backend signs URLs with Bunny.net token-auth (SHA-256 over security_key + path + expires) so edges verify before serving cached objects ; origin is never hit on a valid token. Cloudflare CDN / R2 / CloudFront stubs kept. - internal/services/cdn_service.go : new providers CDNProviderBunny + CDNProviderCloudflareR2. SecurityKey added to CDNConfig. generateBunnySignedURL implements the documented Bunny scheme (url-safe base64, no padding, expires query). HLSSegmentCacheHeaders + HLSPlaylistCacheHeaders helpers exported for handlers. - internal/services/cdn_service_test.go : pin Bunny URL shape + base64-url charset ; assert empty SecurityKey fails fast (no silent fallback to unsigned URLs). - internal/core/track/service.go : new CDNURLSigner interface + SetCDNService(cdn). GetStorageURL prefers CDN signed URL when cdnService.IsEnabled, falls back to direct S3 presign on signing error so a CDN partial outage doesn't block playback. - internal/api/routes_tracks.go + routes_core.go : wire SetCDNService on the two TrackService construction sites that serve stream/download. - internal/config/config.go : 4 new env vars (CDN_ENABLED, CDN_PROVIDER, CDN_BASE_URL, CDN_SECURITY_KEY). config.CDNService always non-nil after init ; IsEnabled gates the actual usage. - internal/handlers/hls_handler.go : segments now return Cache-Control: public, max-age=86400, immutable (content-addressed filenames make this safe). Playlists at max-age=60. - veza-backend-api/.env.template : 4 placeholder env vars. - docs/ENV_VARIABLES.md §12 : provider matrix + Bunny vs Cloudflare vs R2 trade-offs. Bug fix collateral : v1.0.9 Day 11 introduced veza_cache_hits_total which collided in name with monitoring.CacheHitsTotal (different label set ⇒ promauto MustRegister panic at process init). Day 13 deletes the monitoring duplicate and restores the metrics-package counter as the single source of truth (label: subsystem). All 8 affected packages green : services, core/track, handlers, middleware, websocket/chat, metrics, monitoring, config. Acceptance (Day 13) : code path is wired ; verifying via real Bunny edge requires a Pull Zone provisioned by the user (EX-? in roadmap). On the user side : create Pull Zone w/ origin = MinIO, copy token auth key into CDN_SECURITY_KEY, set CDN_ENABLED=true. W3 progress : Redis Sentinel ✓ · MinIO distribué ✓ · CDN ✓ · DMCA ⏳ Day 14 · embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 12:07:20 +00:00
CDNEnabled: getEnvBool("CDN_ENABLED", false),
CDNProvider: getEnv("CDN_PROVIDER", "none"),
CDNBaseURL: getEnv("CDN_BASE_URL", ""),
CDNSecurityKey: getEnv("CDN_SECURITY_KEY", ""),
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// SECURITY: DATABASE_URL est REQUIS - contient des credentials sensibles
DatabaseURL: databaseURL,
2026-03-05 22:03:43 +00:00
DatabaseReadURL: getEnv("DATABASE_READ_URL", ""),
UploadDir: getEnv("UPLOAD_DIR", "uploads"),
StreamServerURL: getEnv("STREAM_SERVER_URL", "http://"+appDomain+":8082"),
StreamServerInternalAPIKey: getEnv("STREAM_SERVER_INTERNAL_API_KEY", ""),
ChatServerURL: getEnv("CHAT_SERVER_URL", "http://"+appDomain+":8081"),
CORSOrigins: corsOrigins,
FrontendURL: getFrontendURL(), // OAuth callback, password reset, email links
// OAuth Security (v0.902 Sentinel)
2026-03-05 22:03:43 +00:00
OAuthEncryptionKey: getEnv("OAUTH_ENCRYPTION_KEY", ""),
OAuthAllowedRedirectDomains: getOAuthAllowedRedirectDomains(env, getEnvStringSlice("OAUTH_ALLOWED_REDIRECT_DOMAINS", nil), corsOrigins, getFrontendURL()),
feat(stream): HLS default on + marketplace 30s pre-listen + FLAC tier checkbox (W4 Day 17) Three pieces shipping under one banner since they're the day's deliverables and share no review-time coupling : 1. HLS_STREAMING default flipped true - config.go : getEnvBool default true (was false). Operators wanting a lightweight dev / unit-test env explicitly set HLS_STREAMING=false to skip the transcoder pipeline. - .env.template : default flipped + comment explaining the opt-out. - Effect : every new track upload routes through the HLS transcoder by default ; ABR ladder served via /tracks/:id/master.m3u8. 2. Marketplace 30s pre-listen (creator opt-in) - migrations/989 : adds products.preview_enabled BOOLEAN NOT NULL DEFAULT FALSE + partial index on TRUE values. Default off so adoption is opt-in. - core/marketplace/models.go : PreviewEnabled field on Product. - handlers/marketplace.go : StreamProductPreview gains a fall-through. When no file-based ProductPreview exists AND the product is a track product AND preview_enabled=true, redirect to the underlying /tracks/:id/stream?preview=30. Header X-Preview-Cap-Seconds: 30 surfaces the policy. - core/track/track_hls_handler.go : StreamTrack accepts ?preview=30 and gates anonymous access via isMarketplacePreviewAllowed (raw SQL probe of products.preview_enabled to avoid the track→marketplace import cycle ; the reverse arrow already exists). - Trust model : 30s cap is enforced client-side (HTML5 audio currentTime). Industry standard for tease-to-buy ; not anti-rip. Documented in the migration + handler doc comment. 3. FLAC tier preview checkbox (Premium-gated, hidden by default) - upload-modal/constants.ts : optional flacAvailable on UploadFormData. - upload-modal/UploadModalMetadataForm.tsx : new optional props showFlacAvailable + flacAvailable + onFlacAvailableChange. Checkbox renders only when showFlacAvailable=true ; consumers pass that based on the user's role/subscription tier (deferred to caller wiring — Item G phase 4 will replace the role check with a real subscription-tier check). - Today the checkbox is a UI affordance only ; the actual lossless distribution path (ladder + storage class) is post-launch work. Acceptance (Day 17) : new uploads serve HLS ABR by default ; products.preview_enabled flag wires anonymous 30s pre-listen ; checkbox visible to premium users on the upload form. All 4 tested backend packages pass : handlers, core/track, core/marketplace, config. W4 progress : Day 16 ✓ · Day 17 ✓ · Day 18 (faceted search) ⏳ · Day 19 (HAProxy sticky WS) ⏳ · Day 20 (k6 nightly) ⏳. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 07:56:02 +00:00
// HLS Streaming (v0.503 ; default flipped to true in v1.0.9 W4 Day 17 —
// HLS_STREAMING=false now an explicit opt-out for dev / test envs that
// don't want the transcoder pipeline running).
HLSEnabled: getEnvBool("HLS_STREAMING", true),
HLSStorageDir: getEnv("HLS_STORAGE_DIR", "/tmp/veza-hls"),
// S3 Storage Configuration (BE-SVC-005)
S3Bucket: getEnv("AWS_S3_BUCKET", ""),
S3Region: getEnv("AWS_REGION", "us-east-1"),
S3Endpoint: getEnv("AWS_S3_ENDPOINT", ""), // Optionnel, pour MinIO
S3AccessKey: getEnv("AWS_ACCESS_KEY_ID", ""),
S3SecretKey: getEnv("AWS_SECRET_ACCESS_KEY", ""),
S3Enabled: getEnvBool("AWS_S3_ENABLED", false), // Désactivé par défaut
feat(storage): add track storage_backend column + config prep (v1.0.8 P0) Phase 0 of the MinIO upload migration (FUNCTIONAL_AUDIT §4 item 2). Schema + config only — Phase 1 will wire TrackService.UploadTrack() to actually route writes to S3 when the flag is flipped. Schema (migration 985): - tracks.storage_backend VARCHAR(16) NOT NULL DEFAULT 'local' CHECK in ('local', 's3') - tracks.storage_key VARCHAR(512) NULL (S3 object key when backend=s3) - Partial index on storage_backend = 's3' (migration progress queries) - Rollback drops both columns + index; safe only while all rows are still 'local' (guard query in the rollback comment) Go model (internal/models/track.go): - StorageBackend string (default 'local', not null) - StorageKey *string (nullable) - Both tagged json:"-" — internal plumbing, never exposed publicly Config (internal/config/config.go): - New field Config.TrackStorageBackend - Read from TRACK_STORAGE_BACKEND env var (default 'local') - Production validation rule #11 (ValidateForEnvironment): - Must be 'local' or 's3' (reject typos like 'S3' or 'minio') - If 's3', requires AWS_S3_ENABLED=true (fail fast, do not boot with TrackStorageBackend=s3 while S3StorageService is nil) - Dev/staging warns and falls back to 'local' instead of fail — keeps iteration fast while still flagging misconfig. Docs: - docs/ENV_VARIABLES.md §13 restructured as "HLS + track storage backend" with a migration playbook (local → s3 → migrate-storage CLI) - docs/ENV_VARIABLES.md §28 validation rules: +2 entries for new rules - docs/ENV_VARIABLES.md §29 drift findings: TRACK_STORAGE_BACKEND added to "missing from template" list before it was fixed - veza-backend-api/.env.template: TRACK_STORAGE_BACKEND=local with comment pointing at Phase 1/2/3 plans No behavior change yet — TrackService.UploadTrack() still hardcodes the local path via copyFileAsync(). Phase 1 wires it. Refs: - AUDIT_REPORT.md §9 item (deferrals v1.0.8) - FUNCTIONAL_AUDIT.md §4 item 2 "Stockage local disque only" - /home/senke/.claude/plans/audit-fonctionnel-wild-hickey.md Item 3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 17:54:28 +00:00
// Track upload storage backend (v1.0.8 Phase 0)
// "local" keeps the legacy path (writes to veza-backend-api/uploads/).
// "s3" routes uploads through S3StorageService. Validated in
// ValidateForEnvironment() — s3 requires S3Enabled.
TrackStorageBackend: getEnv("TRACK_STORAGE_BACKEND", "local"),
feat(webrtc): coturn ICE config endpoint + frontend wiring + ops template (v1.0.9 item 1.2) Closes FUNCTIONAL_AUDIT.md §4 #1: WebRTC 1:1 calls had working signaling but no NAT traversal, so calls between two peers behind symmetric NAT (corporate firewalls, mobile carrier CGNAT, Incus container default networking) failed silently after the SDP exchange. Backend: - GET /api/v1/config/webrtc (public) returns {iceServers: [...]} built from WEBRTC_STUN_URLS / WEBRTC_TURN_URLS / *_USERNAME / *_CREDENTIAL env vars. Half-config (URLs without creds, or vice versa) deliberately omits the TURN block — a half-configured TURN surfaces auth errors at call time instead of falling back cleanly to STUN-only. - 4 handler tests cover the matrix. Frontend: - services/api/webrtcConfig.ts caches the config for the page lifetime and falls back to the historical hardcoded Google STUN if the fetch fails. - useWebRTC fetches at mount, hands iceServers synchronously to every RTCPeerConnection, exposes a {hasTurn, loaded} hint. - CallButton tooltip warns up-front when TURN isn't configured instead of letting calls time out silently. Ops: - infra/coturn/turnserver.conf — annotated template with the SSRF- safe denied-peer-ip ranges, prometheus exporter, TLS for TURNS, static lt-cred-mech (REST-secret rotation deferred to v1.1). - infra/coturn/README.md — Incus deploy walkthrough, smoke test via turnutils_uclient, capacity rules of thumb. - docs/ENV_VARIABLES.md gains a 13bis. WebRTC ICE servers section. Coturn deployment itself is a separate ops action — this commit lands the plumbing so the deploy can light up the path with zero code changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 21:38:42 +00:00
// WebRTC ICE configuration (v1.0.9 item 1.2 — coturn). Read by
// the public GET /api/v1/config/webrtc handler so the frontend
// hydrates RTCPeerConnection({ iceServers }) without baking
// secrets into the bundle. Empty defaults = STUN-only fallback,
// TURN block omitted from the response.
WebRTCStunURLs: getEnvStringSlice("WEBRTC_STUN_URLS", []string{"stun:stun.l.google.com:19302"}),
WebRTCTurnURLs: getEnvStringSlice("WEBRTC_TURN_URLS", nil),
WebRTCTurnUsername: getEnv("WEBRTC_TURN_USERNAME", ""),
WebRTCTurnCredential: getEnv("WEBRTC_TURN_CREDENTIAL", ""),
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// Sentry configuration
SentryDsn: getEnv("SENTRY_DSN", ""),
SentryEnvironment: env, // Utiliser l'environnement détecté
SentrySampleRateErrors: getEnvFloat64("SENTRY_SAMPLE_RATE_ERRORS", 1.0),
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
SentrySampleRateTransactions: getEnvFloat64("SENTRY_SAMPLE_RATE_TRANSACTIONS", 0.1),
RateLimitLimit: rateLimitLimit,
RateLimitWindow: rateLimitWindow,
// Augmenter les limites pour l'environnement de test/E2E
AuthRateLimitLoginAttempts: getAuthRateLimitLoginAttempts(env),
AuthRateLimitLoginWindow: getAuthRateLimitLoginWindow(env),
2026-02-07 19:36:48 +00:00
AccountLockoutExemptEmails: getEnvStringSlice("ACCOUNT_LOCKOUT_EXEMPT_EMAILS", nil),
HandlerTimeout: getEnvDuration("HANDLER_TIMEOUT", 30*time.Second), // Default: 30 seconds
LogLevel: logLevel,
Logger: nil, // Sera initialisé après selon LOG_LEVEL et agrégation
DBMaxRetries: getEnvInt("DB_MAX_RETRIES", 5), // 5 tentatives par défaut
DBRetryInterval: getEnvDuration("DB_RETRY_INTERVAL", 5*time.Second), // 5 secondes par défaut
MaxConcurrentUploads: maxConcurrentUploads, // MOD-P2-005: Limite uploads simultanés
2025-12-03 19:29:37 +00:00
// Log Aggregation Configuration (BE-SVC-015)
// FIX #26: Activer l'agrégation par défaut en production si l'endpoint est configuré
LogAggregationEndpoint: getEnv("LOG_AGGREGATION_ENDPOINT", ""), // Ex: "http://loki:3100/loki/api/v1/push"
LogAggregationBatchSize: getEnvInt("LOG_AGGREGATION_BATCH_SIZE", 100), // 100 logs par batch
LogAggregationFlushInterval: getEnvDuration("LOG_AGGREGATION_FLUSH_INTERVAL", 5*time.Second), // Flush toutes les 5 secondes
LogAggregationTimeout: getEnvDuration("LOG_AGGREGATION_TIMEOUT", 10*time.Second), // Timeout de 10 secondes
LogAggregationLabels: parseLogAggregationLabels(getEnv("LOG_AGGREGATION_LABELS", "")), // Labels au format "key1=value1,key2=value2"
2025-12-03 19:29:37 +00:00
// Configuration RabbitMQ
// BE-SEC-014: In production, require RABBITMQ_URL to be set (no default with credentials)
RabbitMQURL: rabbitMQURL,
RabbitMQMaxRetries: getEnvInt("RABBITMQ_MAX_RETRIES", 10), // 10 tentatives par défaut (RabbitMQ peut prendre ~30s au démarrage)
RabbitMQRetryInterval: getEnvDuration("RABBITMQ_RETRY_INTERVAL", 5*time.Second), // 5 secondes par défaut
2025-12-03 19:29:37 +00:00
RabbitMQEnable: getEnvBool("RABBITMQ_ENABLE", true), // Activé par défaut
2026-01-07 18:39:21 +00:00
// Cookie Security Configuration
CookieSecure: getCookieSecure(env),
CookieSameSite: getCookieSameSite(env),
CookieDomain: getEnv("COOKIE_DOMAIN", ""),
CookieHttpOnly: getEnvBool("COOKIE_HTTP_ONLY", true),
CookiePath: getEnv("COOKIE_PATH", "/"),
// Hyperswitch Payment Configuration
HyperswitchEnabled: getEnvBool("HYPERSWITCH_ENABLED", false),
HyperswitchLiveMode: getEnvBool("HYPERSWITCH_LIVE_MODE", false),
HyperswitchURL: getEnv("HYPERSWITCH_URL", "http://localhost:18081"),
HyperswitchAPIKey: getEnv("HYPERSWITCH_API_KEY", ""),
HyperswitchWebhookSecret: getEnv("HYPERSWITCH_WEBHOOK_SECRET", ""),
CheckoutSuccessURL: getEnv("CHECKOUT_SUCCESS_URL", ""),
// Stripe Connect (Seller Payout v0.602)
StripeConnectEnabled: getEnvBool("STRIPE_CONNECT_ENABLED", false),
StripeConnectSecretKey: getEnv("STRIPE_SECRET_KEY", ""),
StripeConnectWebhookSecret: getEnv("STRIPE_CONNECT_WEBHOOK_SECRET", ""),
PlatformFeeRate: getEnvFloat64("PLATFORM_FEE_RATE", 0.10),
// Transfer Retry Worker (v0.701)
TransferRetryEnabled: getEnvBool("TRANSFER_RETRY_ENABLED", true),
TransferRetryMaxAttempts: getEnvInt("TRANSFER_RETRY_MAX", 3),
TransferRetryInterval: getEnvDuration("TRANSFER_RETRY_INTERVAL", 5*time.Minute),
feat(marketplace): async stripe connect reversal worker — v1.0.7 item B day 2 Day-2 cut of item B: the reversal path becomes async. Pre-v1.0.7 (and v1.0.7 day 1) the refund handler flipped seller_transfers straight from completed to reversed without ever calling Stripe — the ledger said "reversed" while the seller's Stripe balance still showed the original transfer as settled. The new flow: refund.succeeded webhook → reverseSellerAccounting transitions row: completed → reversal_pending → StripeReversalWorker (every REVERSAL_CHECK_INTERVAL, default 1m) → calls ReverseTransfer on Stripe → success: row → reversed + persist stripe_reversal_id → 404 already-reversed (dead code until day 3): row → reversed + log → 404 resource_missing (dead code until day 3): row → permanently_failed → transient error: stay reversal_pending, bump retry_count, exponential backoff (base * 2^retry, capped at backoffMax) → retries exhausted: row → permanently_failed → buyer-facing refund completes immediately regardless of Stripe health State machine enforcement: * New `SellerTransfer.TransitionStatus(tx, to, extras)` wraps every mutation: validates against AllowedTransferTransitions, guarded UPDATE with WHERE status=<from> (optimistic lock semantics), no RowsAffected = stale state / concurrent winner detected. * processSellerTransfers no longer mutates .Status in place — terminal status is decided before struct construction, so the row is Created with its final state. * transfer_retry.retryOne and admin RetryTransfer route through TransitionStatus. Legacy direct assignment removed. * TestNoDirectTransferStatusMutation greps the package for any `st.Status = "..."` / `t.Status = "..."` / GORM Model(&SellerTransfer{}).Update("status"...) outside the allowlist and fails if found. Verified by temporarily injecting a violation during development — test caught it as expected. Configuration (v1.0.7 item B): * REVERSAL_WORKER_ENABLED=true (default) * REVERSAL_MAX_RETRIES=5 (default) * REVERSAL_CHECK_INTERVAL=1m (default) * REVERSAL_BACKOFF_BASE=1m (default) * REVERSAL_BACKOFF_MAX=1h (default, caps exponential growth) * .env.template documents TRANSFER_RETRY_* and REVERSAL_* env vars so an ops reader can grep them. Interface change: TransferService.ReverseTransfer(ctx, stripe_transfer_id, amount *int64, reason) (reversalID, error) added. All four mocks extended (process_webhook, transfer_retry, admin_transfer_handler, payment_flow integration). amount=nil means full reversal; v1.0.7 always passes nil (partial reversal is future scope per axis-1 P2). Stripe 404 disambiguation (ErrTransferAlreadyReversed / ErrTransferNotFound) is wired in the worker as dead code — the sentinels are declared and the worker branches on them, but StripeConnectService.ReverseTransfer doesn't yet emit them. Day 3 will parse stripe.Error.Code and populate the sentinels; no worker change needed at that point. Keeping the handling skeleton in day 2 so the worker's branch shape doesn't change between days and the tests can already cover all four paths against the mock. Worker unit tests (9 cases, all green, sqlite :memory:): * happy path: reversal_pending → reversed + stripe_reversal_id set * already reversed (mock returns sentinel): → reversed + log * not found (mock returns sentinel): → permanently_failed + log * transient 503: retry_count++, next_retry_at set with backoff, stays reversal_pending * backoff capped at backoffMax (verified with base=1s, max=10s, retry_count=4 → capped at 10s not 16s) * max retries exhausted: → permanently_failed * legacy row with empty stripe_transfer_id: → permanently_failed, does not call Stripe * only picks up reversal_pending (skips all other statuses) * respects next_retry_at (future rows skipped) Existing test updated: TestProcessRefundWebhook_SucceededFinalizesState now asserts the row lands at reversal_pending with next_retry_at set (worker's responsibility to drive to reversed), not reversed. Worker wired in cmd/api/main.go alongside TransferRetryWorker, sharing the same StripeConnectService instance. Shutdown path registered for graceful stop. Cut from day 2 scope (per agreed-upon discipline), landing in day 3: * Stripe 404 disambiguation implementation (parse error.Code) * End-to-end smoke probe (refund → reversal_pending → worker processes → reversed) against local Postgres + mock Stripe * Batch-size tuning / inter-batch sleep — batchLimit=20 today is safely under Stripe's 100 req/s default rate limit; revisit if observed load warrants Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 13:34:29 +00:00
// Reversal Worker (v1.0.7 item B)
ReversalWorkerEnabled: getEnvBool("REVERSAL_WORKER_ENABLED", true),
ReversalMaxRetries: getEnvInt("REVERSAL_MAX_RETRIES", 5),
ReversalCheckInterval: getEnvDuration("REVERSAL_CHECK_INTERVAL", time.Minute),
ReversalBackoffBase: getEnvDuration("REVERSAL_BACKOFF_BASE", time.Minute),
ReversalBackoffMax: getEnvDuration("REVERSAL_BACKOFF_MAX", time.Hour),
feat(webhooks): persist raw hyperswitch payloads to audit log — v1.0.7 item E Every POST /webhooks/hyperswitch delivery now writes a row to `hyperswitch_webhook_log` regardless of signature-valid or processing outcome. Captures both legitimate deliveries and attack probes — a forensics query now has the actual bytes to read, not just a "webhook rejected" log line. Disputes (axis-1 P1.6) ride along: the log captures dispute.* events alongside payment and refund events, ready for when disputes get a handler. Table shape (migration 984): * payload TEXT — readable in psql, invalid UTF-8 replaced with empty (forensics value is in headers + ip + timing for those attacks, not the binary body). * signature_valid BOOLEAN + partial index for "show me attack attempts" being instantaneous. * processing_result TEXT — 'ok' / 'error: <msg>' / 'signature_invalid' / 'skipped'. Matches the P1.5 action semantic exactly. * source_ip, user_agent, request_id — forensics essentials. request_id is captured from Hyperswitch's X-Request-Id header when present, else a server-side UUID so every row correlates to VEZA's structured logs. * event_type — best-effort extract from the JSON payload, NULL on malformed input. Hardening: * 64KB body cap via io.LimitReader rejects oversize with 413 before any INSERT — prevents log-spam DoS. * Single INSERT per delivery with final state; no two-phase update race on signature-failure path. signature_invalid and processing-error rows both land. * DB persistence failures are logged but swallowed — the endpoint's contract is to ack Hyperswitch, not perfect audit. Retention sweep: * CleanupHyperswitchWebhookLog in internal/jobs, daily tick, batched DELETE (10k rows + 100ms pause) so a large backlog doesn't lock the table. * HYPERSWITCH_WEBHOOK_LOG_RETENTION_DAYS (default 90). * Same goroutine-ticker pattern as ScheduleOrphanTracksCleanup. * Wired in cmd/api/main.go alongside the existing cleanup jobs. Tests: 5 in webhook_log_test.go (persistence, request_id auto-gen, invalid-JSON leaves event_type empty, invalid-signature capture, extractEventType 5 sub-cases) + 4 in cleanup_hyperswitch_webhook_ log_test.go (deletes-older-than, noop, default-on-zero, context-cancel). Migration 984 applied cleanly to local Postgres; all indexes present. Also (v107-plan.md): * Item G acceptance gains an explicit Idempotency-Key threading requirement with an empty-key loud-fail test — "literally copy-paste D's 4-line test skeleton". Closes the risk that item G silently reopens the HTTP-retry duplicate-charge exposure D closed. Out of scope for E (noted in CHANGELOG): * Rate limit on the endpoint — pre-existing middleware covers it at the router level; adding a per-endpoint limit is separate scope. * Readable-payload SQL view — deferred, the TEXT column is already human-readable; a convenience view is a nice-to-have not a ship-blocker. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 00:44:58 +00:00
// Webhook audit log retention (v1.0.7 item E)
HyperswitchWebhookLogRetentionDays: getEnvInt("HYPERSWITCH_WEBHOOK_LOG_RETENTION_DAYS", 90),
feat(workers): hyperswitch reconciliation sweep for stuck pending states — v1.0.7 item C New ReconcileHyperswitchWorker sweeps for pending orders and refunds whose terminal webhook never arrived. Pulls live PSP state for each stuck row and synthesises a webhook payload to feed the normal ProcessPaymentWebhook / ProcessRefundWebhook dispatcher. The existing terminal-state guards on those handlers make reconciliation idempotent against real webhooks — a late webhook after the reconciler resolved the row is a no-op. Three stuck-state classes covered: 1. Stuck orders (pending > 30m, non-empty payment_id) → GetPaymentStatus + synthetic payment.<status> webhook. 2. Stuck refunds with PSP id (pending > 30m, non-empty hyperswitch_refund_id) → GetRefundStatus + synthetic refund.<status> webhook (error_message forwarded). 3. Orphan refunds (pending > 5m, EMPTY hyperswitch_refund_id) → mark failed + roll order back to completed + log ERROR. This is the "we crashed between Phase 1 and Phase 2 of RefundOrder" case, operator-attention territory. New interfaces: * marketplace.HyperswitchReadClient — read-only PSP surface the worker depends on (GetPaymentStatus, GetRefundStatus). The worker never calls CreatePayment / CreateRefund. * hyperswitch.Client.GetRefund + RefundStatus struct added. * hyperswitch.Provider gains GetRefundStatus + GetPaymentStatus pass-throughs that satisfy the marketplace interface. Configuration (all env-var tunable with sensible defaults): * RECONCILE_WORKER_ENABLED=true * RECONCILE_INTERVAL=1h (ops can drop to 5m during incident response without a code change) * RECONCILE_ORDER_STUCK_AFTER=30m * RECONCILE_REFUND_STUCK_AFTER=30m * RECONCILE_REFUND_ORPHAN_AFTER=5m (shorter because "app crashed" is a different signal from "network hiccup") Operational details: * Batch limit 50 rows per phase per tick so a 10k-row backlog doesn't hammer Hyperswitch. Next tick picks up the rest. * PSP read errors leave the row untouched — next tick retries. Reconciliation is always safe to replay. * Structured log on every action so `grep reconcile` tells the ops story: which order/refund got synced, against what status, how long it was stuck. * Worker wired in cmd/api/main.go, gated on HyperswitchEnabled + HyperswitchAPIKey. Graceful shutdown registered. * RunOnce exposed as public API for ad-hoc ops trigger during incident response. Tests — 10 cases, all green (sqlite :memory:): * TestReconcile_StuckOrder_SyncsViaSyntheticWebhook * TestReconcile_RecentOrder_NotTouched * TestReconcile_CompletedOrder_NotTouched * TestReconcile_OrderWithEmptyPaymentID_NotTouched * TestReconcile_PSPReadErrorLeavesRowIntact * TestReconcile_OrphanRefund_AutoFails_OrderRollsBack * TestReconcile_RecentOrphanRefund_NotTouched * TestReconcile_StuckRefund_SyncsViaSyntheticWebhook * TestReconcile_StuckRefund_FailureStatus_PassesErrorMessage * TestReconcile_AllTerminalStates_NoOp CHANGELOG v1.0.7-rc1 updated with the full item C section between D and the existing E block, matching the order convention (ship order: A → D → B → E → C, CHANGELOG order follows). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 01:08:15 +00:00
// Reconciliation Worker (v1.0.7 item C)
ReconcileWorkerEnabled: getEnvBool("RECONCILE_WORKER_ENABLED", true),
ReconcileInterval: getEnvDuration("RECONCILE_INTERVAL", time.Hour),
ReconcileOrderStuckAfter: getEnvDuration("RECONCILE_ORDER_STUCK_AFTER", 30*time.Minute),
ReconcileRefundStuckAfter: getEnvDuration("RECONCILE_REFUND_STUCK_AFTER", 30*time.Minute),
ReconcileRefundOrphanAfter: getEnvDuration("RECONCILE_REFUND_ORPHAN_AFTER", 5*time.Minute),
// Log Files Configuration — centralized in config/logging.toml
// Resolved via logging.LoadConfig() with env var overrides (LOG_DIR, LOG_LEVEL)
LogDir: logging.LoadConfig().ResolveLogDir(env),
2025-12-03 19:29:37 +00:00
}
// Initialiser le SecretsProvider (T0037)
secretKeys := DefaultSecretKeys()
config.SecretsProvider = NewEnvSecretsProvider(secretKeys)
// FIX #26: Activer l'agrégation par défaut en production si l'endpoint est configuré
// Si LOG_AGGREGATION_ENABLED est explicitement défini, l'utiliser
// Sinon, activer automatiquement en production si l'endpoint est configuré
logAggregationEndpoint := config.LogAggregationEndpoint
explicitlyEnabled := os.Getenv("LOG_AGGREGATION_ENABLED") != ""
var logAggregationEnabled bool
if explicitlyEnabled {
// Si explicitement défini, respecter la valeur
logAggregationEnabled = getEnvBool("LOG_AGGREGATION_ENABLED", false)
} else {
// Sinon, activer par défaut en production si l'endpoint est configuré
logAggregationEnabled = (env == EnvProduction || env == EnvStaging) && logAggregationEndpoint != ""
}
config.LogAggregationEnabled = logAggregationEnabled
// FIX #2: Initialiser le logger avec le bon niveau (LOG_LEVEL respecté)
// BE-SVC-015: Utiliser logger avec agrégation si activée, sinon logger standard
var logger *zap.Logger
if config.LogAggregationEnabled && config.LogAggregationEndpoint != "" {
aggConfig := &logging.AggregationConfig{
EndpointURL: config.LogAggregationEndpoint,
Enabled: true,
BatchSize: config.LogAggregationBatchSize,
FlushInterval: config.LogAggregationFlushInterval,
Timeout: config.LogAggregationTimeout,
Labels: config.LogAggregationLabels,
}
// Ajouter des labels par défaut si non définis
if aggConfig.Labels == nil {
aggConfig.Labels = make(map[string]string)
}
if _, exists := aggConfig.Labels["service"]; !exists {
aggConfig.Labels["service"] = "veza-api"
}
if _, exists := aggConfig.Labels["env"]; !exists {
aggConfig.Labels["env"] = env
}
aggLogger, err := logging.NewLoggerWithAggregation(env, logLevel, aggConfig)
if err != nil {
// FIX #27: Fallback vers logger optimisé (asynchrone) si agrégation échoue
// En production/staging, utiliser logger optimisé pour performance
var stdLogger *logging.Logger
var err2 error
if env == EnvProduction || env == EnvStaging {
stdLogger, err2 = logging.NewOptimizedLogger(env, logLevel)
if err2 != nil {
return nil, fmt.Errorf("failed to initialize optimized logger: %w", err2)
}
} else {
stdLogger, err2 = logging.NewLogger(env, logLevel)
if err2 != nil {
return nil, fmt.Errorf("failed to initialize logger: %w", err2)
}
}
logger = stdLogger.GetZapLogger()
logger.Warn("Failed to initialize logger with aggregation, using optimized logger",
zap.Error(err),
zap.String("endpoint", config.LogAggregationEndpoint),
)
} else {
logger = aggLogger.GetZapLogger()
logger.Info("Logger with aggregation initialized",
zap.String("endpoint", config.LogAggregationEndpoint),
zap.Int("batch_size", config.LogAggregationBatchSize),
zap.String("log_level", logLevel),
)
}
} else {
// Utiliser logger avec fichiers de rotation vers /var/log/veza/
// Crée deux fichiers : backend-api.log (tous les logs) et backend-api-error.log (erreurs uniquement)
stdLogger, err := logging.NewLoggerWithFileRotation(config.LogDir, "backend-api", env, logLevel)
if err != nil {
return nil, fmt.Errorf("failed to initialize logger with file rotation: %w", err)
}
logger = stdLogger.GetZapLogger()
// Log initialization message - ignore any broken pipe errors silently
// This is the first log message after logger initialization and may trigger broken pipe
// if systemd journald is not ready or stdout/stderr is redirected incorrectly
func() {
defer func() {
if r := recover(); r != nil {
// Silently ignore panics from logger (shouldn't happen, but be safe)
_ = r
}
}()
logger.Info("Logger initialized with file rotation",
zap.String("log_level", logLevel),
zap.String("env", env),
zap.String("log_dir", config.LogDir),
zap.String("all_logs_file", fmt.Sprintf("%s/backend-api.log", config.LogDir)),
zap.String("error_logs_file", fmt.Sprintf("%s/backend-api-error.log", config.LogDir)),
)
}()
}
// FIX #30: Appliquer le filtre de secrets au logger
logger = logging.WrapLoggerWithSecretFilter(logger)
// Assigner le logger à la config
config.Logger = logger
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// SECURITY: Valider la configuration selon l'environnement (P0-SECURITY)
if err := config.ValidateForEnvironment(); err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Error("Configuration validation failed", zap.Error(err), zap.String("env", env))
}()
2025-12-03 19:29:37 +00:00
return nil, fmt.Errorf("invalid configuration: %w", err)
}
2025-12-13 02:34:34 +00:00
// Warn if CORS is strict/empty in production (MOD-P0-002)
if env == EnvProduction && len(config.CORSOrigins) == 0 {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Warn("CORS_ALLOWED_ORIGINS is empty in production. Strict mode enabled: ALL CORS requests will be rejected.")
}()
2025-12-13 02:34:34 +00:00
}
// Créer des loggers séparés pour chaque module
redisLoggerWrapper, err := logging.NewLoggerWithFileRotation(config.LogDir, "redis", env, logLevel)
var redisLoggerZap *zap.Logger
if err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Warn("Failed to create Redis logger, using main logger", zap.Error(err))
}()
redisLoggerZap = logger
} else {
redisLoggerZap = logging.WrapLoggerWithSecretFilter(redisLoggerWrapper.GetZapLogger())
}
dbLoggerWrapper, err := logging.NewLoggerWithFileRotation(config.LogDir, "db", env, logLevel)
var dbLoggerZap *zap.Logger
if err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Warn("Failed to create DB logger, using main logger", zap.Error(err))
}()
dbLoggerZap = logger
} else {
dbLoggerZap = logging.WrapLoggerWithSecretFilter(dbLoggerWrapper.GetZapLogger())
}
rabbitmqLoggerWrapper, err := logging.NewLoggerWithFileRotation(config.LogDir, "rabbitmq", env, logLevel)
var rabbitmqLoggerZap *zap.Logger
if err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Warn("Failed to create RabbitMQ logger, using main logger", zap.Error(err))
}()
rabbitmqLoggerZap = logger
} else {
rabbitmqLoggerZap = logging.WrapLoggerWithSecretFilter(rabbitmqLoggerWrapper.GetZapLogger())
}
2025-12-03 19:29:37 +00:00
// Initialiser Redis
2025-12-13 02:34:34 +00:00
if config.RedisEnable {
feat(redis): Sentinel HA + cache hit rate metrics (W3 Day 11) Three Incus containers, each running redis-server + redis-sentinel (co-located). redis-1 = master at first boot, redis-2/3 = replicas. Sentinel quorum=2 of 3 ; failover-timeout=30s satisfies the W3 acceptance criterion. - internal/config/redis_init.go : initRedis branches on REDIS_SENTINEL_ADDRS ; non-empty -> redis.NewFailoverClient with MasterName + SentinelAddrs + SentinelPassword. Empty -> existing single-instance NewClient (dev/local stays parametric). - internal/config/config.go : 3 new fields (RedisSentinelAddrs, RedisSentinelMasterName, RedisSentinelPassword) read from env. parseRedisSentinelAddrs trims+filters CSV. - internal/metrics/cache_hit_rate.go : new RecordCacheHit / Miss counters, labelled by subsystem. Cardinality bounded. - internal/middleware/rate_limiter.go : instrument 3 Eval call sites (DDoS, frontend log throttle, upload throttle). Hit = Redis answered, Miss = error -> in-memory fallback. - internal/services/chat_pubsub.go : instrument Publish + PublishPresence. - internal/websocket/chat/presence_service.go : instrument SetOnline / SetOffline / Heartbeat / GetPresence. redis.Nil counts as a hit (legitimate empty result). - infra/ansible/roles/redis_sentinel/ : install Redis 7 + Sentinel, render redis.conf + sentinel.conf, systemd units. Vault assertion prevents shipping placeholder passwords to staging/prod. - infra/ansible/playbooks/redis_sentinel.yml : provisions the 3 containers + applies common baseline + role. - infra/ansible/inventory/lab.yml : new groups redis_ha + redis_ha_master. - infra/ansible/tests/test_redis_failover.sh : kills the master container, polls Sentinel for the new master, asserts elapsed < 30s. - config/grafana/dashboards/redis-cache-overview.json : 3 hit-rate stats (rate_limiter / chat_pubsub / presence) + ops/s breakdown. - docs/ENV_VARIABLES.md §3 : 3 new REDIS_SENTINEL_* env vars. - veza-backend-api/.env.template : 3 placeholders (empty default). Acceptance (Day 11) : Sentinel failover < 30s ; cache hit-rate dashboard populated. Lab test pending Sentinel deployment. W3 verification gate progress : Redis Sentinel ✓ (this commit), MinIO EC4+2 ⏳ Day 12, CDN ⏳ Day 13, DMCA ⏳ Day 14, embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 11:36:55 +00:00
config.RedisClient, err = initRedis(
config.RedisURL,
config.RedisSentinelAddrs,
config.RedisSentinelMasterName,
config.RedisSentinelPassword,
redisLoggerZap,
)
2025-12-13 02:34:34 +00:00
if err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Error("Failed to initialize Redis", zap.Error(err))
}()
2025-12-13 02:34:34 +00:00
return nil, err
}
} else {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Warn("Redis désactivé par configuration (REDIS_ENABLE=false)")
}()
2025-12-03 19:29:37 +00:00
}
// Initialiser la base de données avec retry
config.Database, err = initDatabaseWithRetry(config.DatabaseURL, config.DatabaseReadURL, config.DBMaxRetries, config.DBRetryInterval, dbLoggerZap)
2025-12-03 19:29:37 +00:00
if err != nil {
// CRITICAL: Protect logger calls from broken pipe errors
func() {
defer func() {
if r := recover(); r != nil {
_ = r
}
}()
logger.Error("Failed to initialize database", zap.Error(err))
}()
return nil, fmt.Errorf("failed to initialize database: %w", err)
2025-12-03 19:29:37 +00:00
}
// Initialiser RabbitMQ avec retry
config.RabbitMQEventBus, err = eventbus.NewRabbitMQEventBusWithRetry(&eventbus.RabbitMQConfig{
URL: config.RabbitMQURL,
MaxRetries: config.RabbitMQMaxRetries,
RetryInterval: config.RabbitMQRetryInterval,
Enable: config.RabbitMQEnable,
}, rabbitmqLoggerZap)
2025-12-03 19:29:37 +00:00
if err != nil {
if _, ok := err.(*eventbus.EventBusUnavailableError); ok {
if config.Env == EnvProduction {
// En production, RabbitMQ est requis — fatal
logger.Fatal("Impossible de se connecter à RabbitMQ après plusieurs tentatives. Le service ne peut pas démarrer.", zap.Error(err))
return nil, err
}
// En développement, démarrer en mode dégradé (sans EventBus)
logger.Warn("RabbitMQ indisponible — démarrage en mode dégradé (EventBus désactivé).", zap.Error(err))
config.RabbitMQEventBus = &eventbus.RabbitMQEventBus{}
2025-12-03 19:29:37 +00:00
} else {
logger.Error("Failed to initialize RabbitMQ EventBus", zap.Error(err))
return nil, err
}
}
// BE-SVC-005: Initialiser le service S3 si activé
if config.S3Enabled && config.S3Bucket != "" {
s3Service, err := services.NewS3StorageService(services.S3Config{
Bucket: config.S3Bucket,
Region: config.S3Region,
Endpoint: config.S3Endpoint,
AccessKey: config.S3AccessKey,
SecretKey: config.S3SecretKey,
Logger: logger,
})
if err != nil {
logger.Warn("Failed to initialize S3 storage service, falling back to local storage",
zap.Error(err),
zap.String("bucket", config.S3Bucket),
)
config.S3Enabled = false
} else {
config.S3StorageService = s3Service
logger.Info("S3 storage service initialized successfully",
zap.String("bucket", config.S3Bucket),
zap.String("region", config.S3Region),
)
}
}
feat(cdn): Bunny.net signed URLs + HLS cache headers + metric collision fix (W3 Day 13) CDN edge in front of S3/MinIO via origin-pull. Backend signs URLs with Bunny.net token-auth (SHA-256 over security_key + path + expires) so edges verify before serving cached objects ; origin is never hit on a valid token. Cloudflare CDN / R2 / CloudFront stubs kept. - internal/services/cdn_service.go : new providers CDNProviderBunny + CDNProviderCloudflareR2. SecurityKey added to CDNConfig. generateBunnySignedURL implements the documented Bunny scheme (url-safe base64, no padding, expires query). HLSSegmentCacheHeaders + HLSPlaylistCacheHeaders helpers exported for handlers. - internal/services/cdn_service_test.go : pin Bunny URL shape + base64-url charset ; assert empty SecurityKey fails fast (no silent fallback to unsigned URLs). - internal/core/track/service.go : new CDNURLSigner interface + SetCDNService(cdn). GetStorageURL prefers CDN signed URL when cdnService.IsEnabled, falls back to direct S3 presign on signing error so a CDN partial outage doesn't block playback. - internal/api/routes_tracks.go + routes_core.go : wire SetCDNService on the two TrackService construction sites that serve stream/download. - internal/config/config.go : 4 new env vars (CDN_ENABLED, CDN_PROVIDER, CDN_BASE_URL, CDN_SECURITY_KEY). config.CDNService always non-nil after init ; IsEnabled gates the actual usage. - internal/handlers/hls_handler.go : segments now return Cache-Control: public, max-age=86400, immutable (content-addressed filenames make this safe). Playlists at max-age=60. - veza-backend-api/.env.template : 4 placeholder env vars. - docs/ENV_VARIABLES.md §12 : provider matrix + Bunny vs Cloudflare vs R2 trade-offs. Bug fix collateral : v1.0.9 Day 11 introduced veza_cache_hits_total which collided in name with monitoring.CacheHitsTotal (different label set ⇒ promauto MustRegister panic at process init). Day 13 deletes the monitoring duplicate and restores the metrics-package counter as the single source of truth (label: subsystem). All 8 affected packages green : services, core/track, handlers, middleware, websocket/chat, metrics, monitoring, config. Acceptance (Day 13) : code path is wired ; verifying via real Bunny edge requires a Pull Zone provisioned by the user (EX-? in roadmap). On the user side : create Pull Zone w/ origin = MinIO, copy token auth key into CDN_SECURITY_KEY, set CDN_ENABLED=true. W3 progress : Redis Sentinel ✓ · MinIO distribué ✓ · CDN ✓ · DMCA ⏳ Day 14 · embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 12:07:20 +00:00
// v1.0.9 W3 Day 13 — optional CDN edge in front of S3/MinIO. Always
// initialise the service ; IsEnabled handles the OFF case so
// downstream wiring can stay unconditional.
config.CDNService = services.NewCDNService(services.CDNConfig{
Provider: services.CDNProvider(config.CDNProvider),
BaseURL: config.CDNBaseURL,
SecurityKey: config.CDNSecurityKey,
Enabled: config.CDNEnabled,
Logger: logger,
})
if config.CDNEnabled {
logger.Info("CDN service initialized",
zap.String("provider", config.CDNProvider),
zap.String("base_url", config.CDNBaseURL),
)
}
2025-12-03 19:29:37 +00:00
// Initialiser les services
err = config.initServices()
if err != nil {
logger.Error("Failed to initialize services", zap.Error(err))
return nil, err
}
// Initialiser les middlewares
err = config.initMiddlewares()
if err != nil {
logger.Error("Failed to initialize middlewares", zap.Error(err))
return nil, err
}
// Initialiser les métriques d'erreurs (T0020)
config.ErrorMetrics = metrics.NewErrorMetrics()
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// Initialiser la configuration SMTP
config.SMTPConfig = email.LoadSMTPConfigFromEnv()
config.EmailSender = email.NewSMTPEmailSender(config.SMTPConfig, logger)
// Initialiser le JobService
jobService := services.NewJobService(logger)
// Initialiser le JobWorker
config.JobWorker = workers.NewJobWorker(
config.Database.GormDB,
jobService,
logger,
100, // queueSize
3, // workers
3, // maxRetries
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
config.EmailSender, // emailSender
)
// BE-SVC-003: Connect JobService to JobWorker
jobService.SetJobEnqueuer(config.JobWorker)
feat(transcode): read from S3 signed URL when track is s3-backed (v1.0.8 P2) Closes the transcoder's read-side gap for Phase 2. HLS transcoding now works for tracks uploaded under TRACK_STORAGE_BACKEND=s3 without requiring the stream server pod to share a local volume. Changes: - internal/services/hls_transcode_service.go - New SignedURLProvider interface (minimal: GetSignedURL). - HLSTranscodeService gains optional s3Resolver + SetS3Resolver. - TranscodeTrack routed through new resolveSource helper — returns local FilePath for local tracks, a 1h-TTL signed URL for s3-backed rows. Missing resolver for an s3 track returns a clear error. - os.Stat check skipped for HTTP(S) sources (ffmpeg validates them). - transcodeBitrate takes `source` explicitly so URL propagation is obvious and ValidateExecPath is bypassed only for the known signed-URL shape. - isHTTPSource helper (http://, https:// prefix check). - internal/workers/job_worker.go - JobWorker gains optional s3Resolver + SetS3Resolver. - processTranscodingJob skips the local-file stat when track.StorageBackend='s3', reads via signed URL instead. - Passes w.s3Resolver to NewHLSTranscodeService when non-nil. - internal/config/config.go: DI wires S3StorageService into JobWorker after instantiation (nil-safe). - internal/core/track/service.go (copyFileAsyncS3) - Re-enabled stream server trigger: generates a 1h-TTL signed URL for the fresh s3 key and passes it to streamService.StartProcessing. Rust-side ffmpeg consumes HTTPS URLs natively. Failure is logged but does not fail the upload (track will sit in Processing until a retry / reconcile). - internal/core/track/track_upload_handler.go (CompleteChunkedUpload) - Reload track after S3 migration to pick up the new storage_key. - Compute transcodeSource = signed URL (s3 path) or finalPath (local). - Pass transcodeSource to both streamService.StartProcessing and jobEnqueuer.EnqueueTranscodingJob — dual-trigger preserved per plan D2 (consolidation deferred v1.0.9). - internal/services/hls_transcode_service_test.go - TestHLSTranscodeService_TranscodeTrack_EmptyFilePath updated for the expanded error message ("empty FilePath" vs "file path is empty"). Known limitation (v1.0.9): HLS segment OUTPUT still writes to the local outputDir; only the INPUT side is S3-aware. Multi-pod HLS serving needs the worker to upload segments to MinIO post-transcode. Acceptable for v1.0.8 target — single-pod staging supports both local + s3 tracks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 21:34:51 +00:00
// v1.0.8 Phase 2 — wire S3 resolver so HLSTranscodeService can handle
// s3-backed tracks via signed URLs instead of local paths. Nil-safe:
// local-only deployments skip this entirely.
if config.S3StorageService != nil {
config.JobWorker.SetS3Resolver(config.S3StorageService)
}
2025-12-03 19:29:37 +00:00
// Logger la configuration avec masquage des secrets (T0037)
config.logConfigInitialized(logger)
// Initialiser le ConfigWatcher si activé (T0040)
// Le watcher peut être activé via une variable d'environnement CONFIG_WATCH=true
if getEnv("CONFIG_WATCH", "false") == "true" {
reloader := config.GetConfigReloader()
watcher, err := NewConfigWatcher(reloader, logger)
if err != nil {
logger.Warn("Failed to create config watcher", zap.Error(err))
} else {
config.ConfigWatcher = watcher
// Surveiller les fichiers .env
envFiles := []string{".env", ".env." + env}
if err := watcher.Watch(envFiles); err != nil {
logger.Warn("Failed to start watching config files", zap.Error(err))
} else {
logger.Info("Config watcher started", zap.Strings("files", watcher.GetWatchedFiles()))
}
}
}
return config, nil
}
// GetConfigReloader retourne le ConfigReloader pour cette configuration (T0034)
func (c *Config) GetConfigReloader() *ConfigReloader {
return NewConfigReloader(c, c.Logger)
}
// EnvConfig représente la configuration de base chargée depuis les variables d'environnement
// Cette struct est utilisée par la fonction Load() pour charger la configuration de base
type EnvConfig struct {
AppEnv string
AppPort int
DBHost string
DBPort int
DBUser string
DBPassword string
DBName string
JWTSecret string
RedisURL string
CORSOrigins []string // Liste des origines CORS autorisées
}
// Load charge et valide les variables d'environnement avec valeurs par défaut
func Load() (*EnvConfig, error) {
// Déterminer l'environnement (T0032)
env := getEnv("APP_ENV", "development")
// Charger les fichiers .env selon l'environnement (T0032)
// Charge dans l'ordre: .env.{env}, .env
// Les variables d'environnement système ont priorité
if err := LoadEnvFiles(env); err != nil {
return nil, fmt.Errorf("failed to load environment files: %w", err)
}
// Charger les origines CORS depuis les variables d'environnement
corsOrigins := getEnvStringSlice("CORS_ALLOWED_ORIGINS", []string{"*"})
2025-12-13 02:34:34 +00:00
// Database, JWTSecret are required
dbPassword, err := getEnvRequired("DB_PASSWORD")
if err != nil {
return nil, err
}
jwtSecret, err := getEnvRequired("JWT_SECRET")
if err != nil {
return nil, err
}
envDomain := getEnv("APP_DOMAIN", "veza.fr")
2025-12-03 19:29:37 +00:00
config := &EnvConfig{
AppEnv: getEnv("APP_ENV", "development"),
AppPort: getEnvInt("APP_PORT", 8080),
DBHost: getEnv("DB_HOST", envDomain),
2025-12-03 19:29:37 +00:00
DBPort: getEnvInt("DB_PORT", 5432),
DBUser: getEnv("DB_USER", "veza"),
2025-12-13 02:34:34 +00:00
DBPassword: dbPassword,
2025-12-03 19:29:37 +00:00
DBName: getEnv("DB_NAME", "veza_db"),
2025-12-13 02:34:34 +00:00
JWTSecret: jwtSecret,
RedisURL: getEnv("REDIS_URL", "redis://"+envDomain+":6379"),
2025-12-03 19:29:37 +00:00
CORSOrigins: corsOrigins,
}
return config, nil
}
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
// ValidateForEnvironment valide la configuration selon l'environnement (P0-SECURITY)
// En production: validation stricte (CORS requis, pas de wildcard, etc.)
// En development: validation permissive avec warnings
func (c *Config) ValidateForEnvironment() error {
// D'abord, validation de base (port, secrets, URLs, etc.)
if err := c.Validate(); err != nil {
return err
}
// Validations spécifiques selon l'environnement
switch c.Env {
case EnvProduction:
// PRODUCTION: Validation stricte
2025-12-13 02:34:34 +00:00
// 1. MOD-P0-001: CORS_ALLOWED_ORIGINS MUST be configured in production (fail-fast)
// Empty CORS origins means strict mode (reject all), which makes the service inaccessible from frontend
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
if len(c.CORSOrigins) == 0 {
2025-12-13 02:34:34 +00:00
return fmt.Errorf("CORS_ALLOWED_ORIGINS is required in production environment. Empty CORS origins will reject all CORS requests, making the service inaccessible from frontend. Please set CORS_ALLOWED_ORIGINS with explicit origins (e.g., CORS_ALLOWED_ORIGINS=https://app.veza.com,https://www.veza.com)")
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
}
// 2. CORS_ALLOWED_ORIGINS ne doit PAS contenir "*" (wildcard interdit en prod)
for _, origin := range c.CORSOrigins {
if origin == "*" {
return fmt.Errorf("CORS wildcard '*' is not allowed in production environment. Please specify explicit origins in CORS_ALLOWED_ORIGINS")
}
}
// 3. LogLevel ne doit pas être DEBUG en production
if c.LogLevel == "DEBUG" {
return fmt.Errorf("LOG_LEVEL=DEBUG is not allowed in production environment for security reasons")
}
// 4. BE-SEC-014: RabbitMQ URL must be explicitly set in production (no default with credentials)
if c.RabbitMQEnable && c.RabbitMQURL == "" {
return fmt.Errorf("RABBITMQ_URL is required in production when RabbitMQ is enabled. Do not use default credentials in production")
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
}
// 5. P1.4: CLAMAV_REQUIRED must be true in production - virus scanning is mandatory for uploads
if !getEnvBool("CLAMAV_REQUIRED", true) {
return fmt.Errorf("CLAMAV_REQUIRED must be true in production. Virus scanning is mandatory for uploads")
}
// 6. v0.902: CHAT_JWT_SECRET must differ from JWT_SECRET in production (VEZA-SEC-009)
if c.ChatJWTSecret == c.JWTSecret {
return fmt.Errorf("CHAT_JWT_SECRET must be different from JWT_SECRET in production. Use a separate secret for the Chat Server")
}
// 7. v0.902: OAUTH_ENCRYPTION_KEY required in production for OAuth token encryption (VEZA-SEC-004)
if len(c.OAuthEncryptionKey) < 32 {
return fmt.Errorf("OAUTH_ENCRYPTION_KEY is required in production (min 32 bytes for AES-256). Set OAUTH_ENCRYPTION_KEY with a 32-byte hex or base64 key")
}
2026-03-06 18:13:16 +00:00
// 8. TASK-DEBT-010: JWT_ISSUER and JWT_AUDIENCE must be set for consistent token emission/validation
if c.JWTIssuer == "" || c.JWTAudience == "" {
return fmt.Errorf("JWT_ISSUER and JWT_AUDIENCE must be set in production for consistent JWT validation. Set JWT_ISSUER and JWT_AUDIENCE environment variables")
}
// 9. Hyperswitch must be enabled in production — otherwise the marketplace
// silently "sells" products without taking payment (orders complete as
// CREATED and files are released for free).
if !c.HyperswitchEnabled {
return fmt.Errorf("HYPERSWITCH_ENABLED must be true in production. With payments disabled, marketplace orders complete without charging, effectively giving away products. Set HYPERSWITCH_ENABLED=true and configure HYPERSWITCH_API_KEY / HYPERSWITCH_WEBHOOK_SECRET")
}
fix(chat,config): require REDIS_URL in prod + error on in-memory fallback Two connected failure modes that silently break multi-pod deployments: 1. `RedisURL` has a struct-level default (`redis://<appDomain>:6379`) that makes `c.RedisURL == ""` always false. An operator forgetting to set `REDIS_URL` booted against a phantom host — every Redis call would then fail, and `ChatPubSubService` would quietly fall back to an in-memory map. On a single-pod deploy that "works"; on two pods it silently partitions chat (messages on pod A never reach subscribers on pod B). 2. The fallback itself was logged at `Warn` level, buried under normal traffic. Operators only noticed when users reported stuck chats. Changes: * `config.go` (`ValidateForEnvironment` prod branch): new check that `os.Getenv("REDIS_URL")` is non-empty. The struct field is left alone (dev + test still use the default); we inspect the raw env so the check is "explicitly set" rather than "non-empty after defaults". * `chat_pubsub.go` `NewChatPubSubService`: if `redisClient == nil`, emit an `ERROR` at construction time naming the failure mode ("cross-instance messages will be lost"). Same `Warn`→`Error` promotion for the `Publish` fallback path — runbook-worthy. Tests: new `chat_pubsub_test.go` with a `zaptest/observer` that asserts the ERROR-level log fires exactly once when Redis is nil, plus an in-memory fan-out happy-path so single-pod dev behaviour stays covered. New `TestValidateForEnvironment_RedisURLRequiredInProduction` mirrors the Hyperswitch guard test shape. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 12:56:47 +00:00
// 10. REDIS_URL must be *explicitly* set in production. The struct default
// (redis://<appDomain>:6379) lets a misconfigured pod start up with
// in-memory fallbacks — and in multi-pod deployments that silently
// breaks cross-instance PubSub (chat, session revocation, etc.).
if strings.TrimSpace(os.Getenv("REDIS_URL")) == "" {
return fmt.Errorf("REDIS_URL must be explicitly set in production. A missing value lets the app boot against the default host and silently degrade to in-memory fallbacks that break cross-pod features")
}
feat(storage): add track storage_backend column + config prep (v1.0.8 P0) Phase 0 of the MinIO upload migration (FUNCTIONAL_AUDIT §4 item 2). Schema + config only — Phase 1 will wire TrackService.UploadTrack() to actually route writes to S3 when the flag is flipped. Schema (migration 985): - tracks.storage_backend VARCHAR(16) NOT NULL DEFAULT 'local' CHECK in ('local', 's3') - tracks.storage_key VARCHAR(512) NULL (S3 object key when backend=s3) - Partial index on storage_backend = 's3' (migration progress queries) - Rollback drops both columns + index; safe only while all rows are still 'local' (guard query in the rollback comment) Go model (internal/models/track.go): - StorageBackend string (default 'local', not null) - StorageKey *string (nullable) - Both tagged json:"-" — internal plumbing, never exposed publicly Config (internal/config/config.go): - New field Config.TrackStorageBackend - Read from TRACK_STORAGE_BACKEND env var (default 'local') - Production validation rule #11 (ValidateForEnvironment): - Must be 'local' or 's3' (reject typos like 'S3' or 'minio') - If 's3', requires AWS_S3_ENABLED=true (fail fast, do not boot with TrackStorageBackend=s3 while S3StorageService is nil) - Dev/staging warns and falls back to 'local' instead of fail — keeps iteration fast while still flagging misconfig. Docs: - docs/ENV_VARIABLES.md §13 restructured as "HLS + track storage backend" with a migration playbook (local → s3 → migrate-storage CLI) - docs/ENV_VARIABLES.md §28 validation rules: +2 entries for new rules - docs/ENV_VARIABLES.md §29 drift findings: TRACK_STORAGE_BACKEND added to "missing from template" list before it was fixed - veza-backend-api/.env.template: TRACK_STORAGE_BACKEND=local with comment pointing at Phase 1/2/3 plans No behavior change yet — TrackService.UploadTrack() still hardcodes the local path via copyFileAsync(). Phase 1 wires it. Refs: - AUDIT_REPORT.md §9 item (deferrals v1.0.8) - FUNCTIONAL_AUDIT.md §4 item 2 "Stockage local disque only" - /home/senke/.claude/plans/audit-fonctionnel-wild-hickey.md Item 3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 17:54:28 +00:00
// 11. v1.0.8: TRACK_STORAGE_BACKEND must be a known value, and "s3"
// requires the S3 stack to be enabled. Without this guard, a typo
// ("S3", "minio", "bucket") would fall through to local without
// warning; worse, TRACK_STORAGE_BACKEND=s3 with AWS_S3_ENABLED=false
// would crash when the upload path tries to resolve S3StorageService.
switch c.TrackStorageBackend {
case "local", "s3":
// OK
default:
return fmt.Errorf("TRACK_STORAGE_BACKEND must be 'local' or 's3', got %q", c.TrackStorageBackend)
}
if c.TrackStorageBackend == "s3" && !c.S3Enabled {
return fmt.Errorf("TRACK_STORAGE_BACKEND=s3 requires AWS_S3_ENABLED=true and AWS_S3_BUCKET set. Enable the S3 stack or switch TRACK_STORAGE_BACKEND back to 'local'")
}
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
case EnvTest:
// TEST: Validation adaptée aux tests
// CORS peut être vide ou configuré explicitement
// Pas de validation stricte sur les secrets (peuvent être des valeurs de test)
case EnvDevelopment, EnvStaging:
// DEVELOPMENT/STAGING: Validation permissive avec warnings
// Si CORS contient "*", logger un warning mais ne pas bloquer
for _, origin := range c.CORSOrigins {
if origin == "*" {
c.Logger.Warn("CORS wildcard '*' detected in development environment. This is acceptable for dev but should never be used in production")
break
}
}
feat(storage): add track storage_backend column + config prep (v1.0.8 P0) Phase 0 of the MinIO upload migration (FUNCTIONAL_AUDIT §4 item 2). Schema + config only — Phase 1 will wire TrackService.UploadTrack() to actually route writes to S3 when the flag is flipped. Schema (migration 985): - tracks.storage_backend VARCHAR(16) NOT NULL DEFAULT 'local' CHECK in ('local', 's3') - tracks.storage_key VARCHAR(512) NULL (S3 object key when backend=s3) - Partial index on storage_backend = 's3' (migration progress queries) - Rollback drops both columns + index; safe only while all rows are still 'local' (guard query in the rollback comment) Go model (internal/models/track.go): - StorageBackend string (default 'local', not null) - StorageKey *string (nullable) - Both tagged json:"-" — internal plumbing, never exposed publicly Config (internal/config/config.go): - New field Config.TrackStorageBackend - Read from TRACK_STORAGE_BACKEND env var (default 'local') - Production validation rule #11 (ValidateForEnvironment): - Must be 'local' or 's3' (reject typos like 'S3' or 'minio') - If 's3', requires AWS_S3_ENABLED=true (fail fast, do not boot with TrackStorageBackend=s3 while S3StorageService is nil) - Dev/staging warns and falls back to 'local' instead of fail — keeps iteration fast while still flagging misconfig. Docs: - docs/ENV_VARIABLES.md §13 restructured as "HLS + track storage backend" with a migration playbook (local → s3 → migrate-storage CLI) - docs/ENV_VARIABLES.md §28 validation rules: +2 entries for new rules - docs/ENV_VARIABLES.md §29 drift findings: TRACK_STORAGE_BACKEND added to "missing from template" list before it was fixed - veza-backend-api/.env.template: TRACK_STORAGE_BACKEND=local with comment pointing at Phase 1/2/3 plans No behavior change yet — TrackService.UploadTrack() still hardcodes the local path via copyFileAsync(). Phase 1 wires it. Refs: - AUDIT_REPORT.md §9 item (deferrals v1.0.8) - FUNCTIONAL_AUDIT.md §4 item 2 "Stockage local disque only" - /home/senke/.claude/plans/audit-fonctionnel-wild-hickey.md Item 3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 17:54:28 +00:00
// v1.0.8: TRACK_STORAGE_BACKEND sanity — dev/staging uses the same
// switch as prod, but we warn instead of fail so operators can iterate.
if c.TrackStorageBackend != "local" && c.TrackStorageBackend != "s3" {
c.Logger.Warn("TRACK_STORAGE_BACKEND has unexpected value, falling back to 'local'",
zap.String("value", c.TrackStorageBackend))
c.TrackStorageBackend = "local"
}
if c.TrackStorageBackend == "s3" && !c.S3Enabled {
c.Logger.Warn("TRACK_STORAGE_BACKEND=s3 but AWS_S3_ENABLED=false, falling back to 'local' for track uploads")
c.TrackStorageBackend = "local"
}
P0: stabilisation backend/chat/stream + nouvelle base migrations v1 Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
2025-12-06 10:14:38 +00:00
}
return nil
}
2025-12-03 19:29:37 +00:00
// Validate valide la configuration (T0031, T0036)
// Vérifie que toutes les valeurs de configuration sont valides avant le démarrage de l'application
// Utilise ConfigValidator pour une validation stricte selon les règles de schéma (T0036)
func (c *Config) Validate() error {
validator := NewConfigValidator()
// Valider le port (1-65535) avec ConfigValidator (T0036)
if err := validator.ValidatePort(c.AppPort); err != nil {
return fmt.Errorf("APP_PORT validation failed: %w", err)
}
// Valider JWT secret (minimum 32 caractères pour sécurité) avec ConfigValidator (T0036)
if err := validator.ValidateSecretLength(c.JWTSecret, 32); err != nil {
return fmt.Errorf("JWT_SECRET validation failed: %w", err)
}
// Valider DatabaseURL (requis) avec ConfigValidator (T0036)
if c.DatabaseURL == "" {
return errors.New("DATABASE_URL is required")
}
// Valider le format de DatabaseURL avec ConfigValidator (T0036)
// Support postgres, postgresql, et sqlite
if err := validator.ValidateURL(c.DatabaseURL, "postgres"); err != nil {
if err2 := validator.ValidateURL(c.DatabaseURL, "postgresql"); err2 != nil {
if err3 := validator.ValidateURL(c.DatabaseURL, "sqlite"); err3 != nil {
return fmt.Errorf("DATABASE_URL validation failed: must start with postgres://, postgresql://, or sqlite://")
}
}
}
// Valider RedisURL (requis) avec ConfigValidator (T0036)
if c.RedisURL == "" {
return errors.New("REDIS_URL is required")
}
// Valider le format de RedisURL avec ConfigValidator (T0036)
// Support redis et rediss (Redis avec SSL)
if err := validator.ValidateURL(c.RedisURL, "redis"); err != nil {
if err2 := validator.ValidateURL(c.RedisURL, "rediss"); err2 != nil {
return fmt.Errorf("REDIS_URL validation failed: must start with redis:// or rediss://")
}
}
// Valider LogLevel avec ValidateEnum (T0036)
if c.LogLevel != "" {
allowedLevels := []string{"DEBUG", "INFO", "WARN", "ERROR"}
if err := validator.ValidateEnum(c.LogLevel, allowedLevels); err != nil {
return fmt.Errorf("LOG_LEVEL validation failed: %w", err)
}
}
// Valider RateLimitLimit et RateLimitWindow avec ValidatePositiveInt (T0036)
if err := validator.ValidatePositiveInt(c.RateLimitLimit, "RATE_LIMIT_LIMIT"); err != nil {
return fmt.Errorf("RATE_LIMIT_LIMIT validation failed: %w", err)
}
if err := validator.ValidatePositiveInt(c.RateLimitWindow, "RATE_LIMIT_WINDOW"); err != nil {
return fmt.Errorf("RATE_LIMIT_WINDOW validation failed: %w", err)
}
// v0.701: Validate PlatformFeeRate range
if c.PlatformFeeRate < 0 || c.PlatformFeeRate > 1 {
return fmt.Errorf("PLATFORM_FEE_RATE must be between 0 and 1, got %f", c.PlatformFeeRate)
}
// v0.701: Validate Stripe Connect config coherence
if c.StripeConnectEnabled && c.StripeConnectSecretKey == "" {
if c.Env == EnvProduction {
return errors.New("STRIPE_CONNECT_ENABLED=true but STRIPE_SECRET_KEY is empty")
}
}
// v0.701: Validate transfer retry config
if c.TransferRetryEnabled && c.TransferRetryMaxAttempts < 1 {
return errors.New("TRANSFER_RETRY_MAX must be >= 1 when retry is enabled")
}
// Audit 1.7: Fail startup if bypass flags are set in production
if err := validateNoBypassFlagsInProduction(c.Env); err != nil {
return err
}
return nil
}
2025-12-03 19:29:37 +00:00
// logConfigInitialized log la configuration initialisée avec masquage des secrets (T0037)
2025-12-13 02:34:34 +00:00
// MOD-P0-002: Always mask secrets in logs, even in DEBUG mode
2025-12-03 19:29:37 +00:00
func (c *Config) logConfigInitialized(logger *zap.Logger) {
logger.Info("Configuration initialized successfully",
zap.Int("app_port", c.AppPort),
zap.String("jwt_secret", MaskConfigValue("JWT_SECRET", c.JWTSecret, c.SecretsProvider)),
2025-12-13 02:34:34 +00:00
zap.String("jwt_issuer", c.JWTIssuer),
zap.String("jwt_audience", c.JWTAudience),
zap.String("chat_jwt_secret", MaskConfigValue("CHAT_JWT_SECRET", c.ChatJWTSecret, c.SecretsProvider)),
2025-12-03 19:29:37 +00:00
zap.String("database_url", MaskConfigValue("DATABASE_URL", c.DatabaseURL, c.SecretsProvider)),
zap.String("redis_url", MaskConfigValue("REDIS_URL", c.RedisURL, c.SecretsProvider)),
2025-12-13 02:34:34 +00:00
zap.String("rabbitmq_url", MaskConfigValue("RABBITMQ_URL", c.RabbitMQURL, c.SecretsProvider)),
2025-12-03 19:29:37 +00:00
zap.Strings("cors_origins", c.CORSOrigins),
zap.Int("rate_limit_limit", c.RateLimitLimit),
zap.Int("rate_limit_window", c.RateLimitWindow),
2025-12-13 02:34:34 +00:00
zap.Int("auth_rate_limit_login_attempts", c.AuthRateLimitLoginAttempts),
zap.Int("auth_rate_limit_login_window", c.AuthRateLimitLoginWindow),
zap.Duration("handler_timeout", c.HandlerTimeout),
2025-12-03 19:29:37 +00:00
zap.String("log_level", c.LogLevel),
2025-12-13 02:34:34 +00:00
zap.String("sentry_dsn", MaskConfigValue("SENTRY_DSN", c.SentryDsn, c.SecretsProvider)),
2025-12-03 19:29:37 +00:00
)
}
// Close ferme toutes les connexions (T0040)
func (c *Config) Close() error {
var err error
// Arrêter le ConfigWatcher si actif (T0040)
if c.ConfigWatcher != nil {
if closeErr := c.ConfigWatcher.Stop(); closeErr != nil {
err = closeErr
}
}
if c.RedisClient != nil {
if closeErr := c.RedisClient.Close(); closeErr != nil {
err = closeErr
}
}
if c.Database != nil {
if closeErr := c.Database.Close(); closeErr != nil {
err = closeErr
}
}
if c.RabbitMQEventBus != nil {
if closeErr := c.RabbitMQEventBus.Close(); closeErr != nil {
err = closeErr
}
}
// FIX #4: Logger.Sync() est géré par le ShutdownManager dans main.go
// Ne pas appeler Sync() ici pour éviter le double flush
// Le ShutdownManager garantit le flush avec timeout et gestion d'erreur
2025-12-03 19:29:37 +00:00
if c.Logger != nil {
// Le logger sera sync'd par le ShutdownManager enregistré dans main.go
// Pas besoin de Sync() ici car cela pourrait causer un double flush
// et l'erreur serait ignorée de toute façon
2025-12-03 19:29:37 +00:00
}
return err
}