From 698859cc52dc122d492d3b573cf4a159c6d9ac48 Mon Sep 17 00:00:00 2001 From: senke Date: Thu, 16 Apr 2026 23:52:36 +0200 Subject: [PATCH] feat(backend,web): surface RTMP ingest health on the Go Live page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fifth item of the v1.0.6 backlog. "Go Live" was silent when the nginx-rtmp profile wasn't up — an artist could copy the RTMP URL + stream key, fire up OBS, hit "Start Streaming" and broadcast into the void with no in-UI signal that the ingest wasn't listening. The audit flagged this 🟡 ("livestream sans feedback UI si nginx-rtmp down"). Backend (`GET /api/v1/live/health`) * `LiveHealthHandler` TCP-dials `NGINX_RTMP_ADDR` (default `localhost:1935`) with a 2s timeout. Reports `rtmp_reachable`, `rtmp_addr`, a UI-safe `error` string (no raw dial target in the body — avoids leaking internal hostnames to the browser), and `last_check_at`. * 15s TTL cache protected by a mutex so a burst of page loads can't hammer the ingest. First call dials; subsequent calls within TTL serve the cached verdict. * Response ships `Cache-Control: private, max-age=15` so browsers piggy-back the same quarter-minute window. * When the dial fails the handler emits a WARN log so an operator watching backend logs sees the outage before a user does. * Public endpoint — no auth. The "RTMP is up / down" signal has no sensitive payload and is useful pre-login too. Frontend * `useLiveHealth()` hook: react-query with 15s stale time, 1 retry, then falls back to an optimistic `{ rtmpReachable: true }` — we'd rather miss a banner than flash a false negative during a transient blip on the health endpoint itself. * `LiveRtmpHealthBanner`: amber, non-blocking banner with a Retry button that invalidates the health query. Copy explicitly tells the artist their stream key is still valid but broadcasting now won't reach anyone. * `GoLivePage` wraps `GoLiveView` in a vertical stack with the banner above — the view itself stays unchanged (the key + instructions remain readable even when the ingest is down). Tests * 3 Go tests: live listener reports reachable + Cache-Control header; dead address reports unreachable + UI-safe error (asserts no `127.0.0.1` leak); TTL cache survives listener teardown within window. * 3 Vitest tests: banner renders nothing when reachable; banner visible + Retry enabled when unreachable; Retry invalidates the right query key. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/LiveRtmpHealthBanner.test.tsx | 64 +++++++++++++ .../live/components/LiveRtmpHealthBanner.tsx | 50 ++++++++++ .../src/features/live/hooks/useLiveHealth.ts | 63 ++++++++++++ .../src/features/live/pages/GoLivePage.tsx | 34 ++++--- veza-backend-api/internal/api/routes_live.go | 5 + .../internal/handlers/live_health_handler.go | 96 +++++++++++++++++++ .../handlers/live_health_handler_test.go | 89 +++++++++++++++++ 7 files changed, 386 insertions(+), 15 deletions(-) create mode 100644 apps/web/src/features/live/components/LiveRtmpHealthBanner.test.tsx create mode 100644 apps/web/src/features/live/components/LiveRtmpHealthBanner.tsx create mode 100644 apps/web/src/features/live/hooks/useLiveHealth.ts create mode 100644 veza-backend-api/internal/handlers/live_health_handler.go create mode 100644 veza-backend-api/internal/handlers/live_health_handler_test.go diff --git a/apps/web/src/features/live/components/LiveRtmpHealthBanner.test.tsx b/apps/web/src/features/live/components/LiveRtmpHealthBanner.test.tsx new file mode 100644 index 000000000..e1094ee66 --- /dev/null +++ b/apps/web/src/features/live/components/LiveRtmpHealthBanner.test.tsx @@ -0,0 +1,64 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; +import type { ReactNode } from 'react'; + +import { LiveRtmpHealthBanner } from './LiveRtmpHealthBanner'; + +const mockedHealth = { current: { rtmpReachable: true, rtmpAddr: '', error: null, lastCheckAt: '' } }; +const mockInvalidate = vi.fn(); + +vi.mock('../hooks/useLiveHealth', () => ({ + useLiveHealth: () => mockedHealth.current, + LIVE_HEALTH_QUERY_KEY: ['live', 'health'], +})); + +vi.mock('@tanstack/react-query', async () => { + const actual = await vi.importActual('@tanstack/react-query'); + return { + ...actual, + useQueryClient: () => ({ invalidateQueries: mockInvalidate }), + }; +}); + +function wrap(ui: ReactNode) { + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }); + return {ui}; +} + +describe('LiveRtmpHealthBanner', () => { + beforeEach(() => { + mockInvalidate.mockReset(); + }); + + it('renders nothing when the RTMP ingest is reachable', () => { + mockedHealth.current = { rtmpReachable: true, rtmpAddr: 'x', error: null, lastCheckAt: '' }; + const { container } = render(wrap()); + expect(container).toBeEmptyDOMElement(); + }); + + it('shows the banner when the ingest is unreachable', () => { + mockedHealth.current = { + rtmpReachable: false, + rtmpAddr: 'rtmp', + error: 'RTMP ingest server is not reachable', + lastCheckAt: '', + }; + render(wrap()); + expect(screen.getByTestId('rtmp-health-banner')).toBeInTheDocument(); + expect(screen.getByText(/not reachable/i)).toBeInTheDocument(); + expect(screen.getByTestId('rtmp-health-retry')).toBeEnabled(); + }); + + it('invalidates the health query when Retry is clicked', () => { + mockedHealth.current = { + rtmpReachable: false, + rtmpAddr: 'rtmp', + error: 'RTMP ingest server is not reachable', + lastCheckAt: '', + }; + render(wrap()); + fireEvent.click(screen.getByTestId('rtmp-health-retry')); + expect(mockInvalidate).toHaveBeenCalledWith({ queryKey: ['live', 'health'] }); + }); +}); diff --git a/apps/web/src/features/live/components/LiveRtmpHealthBanner.tsx b/apps/web/src/features/live/components/LiveRtmpHealthBanner.tsx new file mode 100644 index 000000000..f73197e8a --- /dev/null +++ b/apps/web/src/features/live/components/LiveRtmpHealthBanner.tsx @@ -0,0 +1,50 @@ +/** + * LiveRtmpHealthBanner — v1.0.6 + * + * Amber warning at the top of Go Live pages when the RTMP ingest is + * unreachable. Non-blocking (the artist can still read their stream key + * for later) but visible enough that they won't press Stream in OBS + * before noticing the infrastructure is down. + */ + +import { AlertTriangle } from 'lucide-react'; +import { useQueryClient } from '@tanstack/react-query'; +import { LIVE_HEALTH_QUERY_KEY, useLiveHealth } from '../hooks/useLiveHealth'; +import { Button } from '@/components/ui/button'; + +export function LiveRtmpHealthBanner() { + const health = useLiveHealth(); + const qc = useQueryClient(); + + if (health.rtmpReachable) return null; + + return ( +
+ +
+

+ The RTMP ingest server is not reachable. +

+

+ Your stream key is still valid, but if you start broadcasting now + no audience will see the stream. Wait a minute or contact support + if this persists. +

+
+ +
+ ); +} + +export default LiveRtmpHealthBanner; diff --git a/apps/web/src/features/live/hooks/useLiveHealth.ts b/apps/web/src/features/live/hooks/useLiveHealth.ts new file mode 100644 index 000000000..dc44e69bf --- /dev/null +++ b/apps/web/src/features/live/hooks/useLiveHealth.ts @@ -0,0 +1,63 @@ +/** + * useLiveHealth — v1.0.6 + * + * Polls GET /api/v1/live/health to detect whether the nginx-rtmp ingest is + * reachable. Surfaces the state to the Go Live UI so artists see a banner + * when their stream key would go into the void. + * + * The backend caches its own TCP dial to 15s; the hook uses the same TTL + * on react-query so the network sees at most one health call per quarter- + * minute per tab. Failing requests degrade gracefully to + * `{ rtmpReachable: true }` — better to miss a banner than to flash a + * false negative during a transient blip on the health endpoint itself. + */ + +import { useQuery } from '@tanstack/react-query'; +import { apiClient } from '@/services/api/client'; + +export interface LiveHealth { + rtmpReachable: boolean; + rtmpAddr: string; + error: string | null; + lastCheckAt: string; +} + +interface ServerLiveHealth { + rtmp_reachable?: boolean; + rtmp_addr?: string; + error?: string; + last_check_at?: string; +} + +const OPTIMISTIC_FALLBACK: LiveHealth = { + rtmpReachable: true, + rtmpAddr: '', + error: null, + lastCheckAt: new Date().toISOString(), +}; + +export const LIVE_HEALTH_QUERY_KEY = ['live', 'health'] as const; + +export function useLiveHealth(): LiveHealth { + const { data } = useQuery({ + queryKey: LIVE_HEALTH_QUERY_KEY, + queryFn: async ({ signal }) => { + const response = await apiClient.get('/live/health', { signal }); + const body = response.data as ServerLiveHealth; + return { + rtmpReachable: body?.rtmp_reachable !== false, // default true if the key is missing + rtmpAddr: body?.rtmp_addr ?? '', + error: body?.error ?? null, + lastCheckAt: body?.last_check_at ?? new Date().toISOString(), + } satisfies LiveHealth; + }, + staleTime: 15 * 1000, + gcTime: 60 * 1000, + retry: 1, + refetchOnWindowFocus: true, + }); + + return data ?? OPTIMISTIC_FALLBACK; +} + +export { OPTIMISTIC_FALLBACK }; diff --git a/apps/web/src/features/live/pages/GoLivePage.tsx b/apps/web/src/features/live/pages/GoLivePage.tsx index 6cbd7df09..fac399415 100644 --- a/apps/web/src/features/live/pages/GoLivePage.tsx +++ b/apps/web/src/features/live/pages/GoLivePage.tsx @@ -7,6 +7,7 @@ import { useState, useEffect } from 'react'; import { GoLiveView, GoLiveViewSkeleton } from './go-live-page/GoLiveView'; import { ErrorDisplay } from '@/components/ui/ErrorDisplay'; import { liveService } from '@/services/liveService'; +import { LiveRtmpHealthBanner } from '../components/LiveRtmpHealthBanner'; export function GoLivePage() { const [streamKey, setStreamKey] = useState(null); @@ -34,21 +35,24 @@ export function GoLivePage() { ); return ( - { - await liveService.createStream(data); - const res = await liveService.getMyStreamKey(); - setStreamKey(res.stream_key); - setRtmpUrl(res.rtmp_url); - }} - onRegenerateKey={async () => { - const res = await liveService.regenerateStreamKey(); - setStreamKey(res.stream_key); - }} - isLoading={false} - /> +
+ + { + await liveService.createStream(data); + const res = await liveService.getMyStreamKey(); + setStreamKey(res.stream_key); + setRtmpUrl(res.rtmp_url); + }} + onRegenerateKey={async () => { + const res = await liveService.regenerateStreamKey(); + setStreamKey(res.stream_key); + }} + isLoading={false} + /> +
); } diff --git a/veza-backend-api/internal/api/routes_live.go b/veza-backend-api/internal/api/routes_live.go index 4241b3e24..0fe502da2 100644 --- a/veza-backend-api/internal/api/routes_live.go +++ b/veza-backend-api/internal/api/routes_live.go @@ -35,5 +35,10 @@ func (r *APIRouter) setupLiveRoutes(router *gin.RouterGroup) { // Public routes (after protected) live.GET("/streams", liveStreamHandler.ListLiveStreams) live.GET("/streams/:id", liveStreamHandler.GetLiveStream) + + // v1.0.6: RTMP ingest health probe — lets the Go Live UI render a + // warning banner when nginx-rtmp isn't up (so artists aren't + // silently broadcasting into a void). + live.GET("/health", handlers.LiveHealthHandler(r.logger)) } } diff --git a/veza-backend-api/internal/handlers/live_health_handler.go b/veza-backend-api/internal/handlers/live_health_handler.go new file mode 100644 index 000000000..eb96afa5d --- /dev/null +++ b/veza-backend-api/internal/handlers/live_health_handler.go @@ -0,0 +1,96 @@ +package handlers + +import ( + "net" + "net/http" + "os" + "sync" + "time" + + "github.com/gin-gonic/gin" + "go.uber.org/zap" +) + +// LiveHealthResponse is returned by GET /api/v1/live/health. +type LiveHealthResponse struct { + RtmpReachable bool `json:"rtmp_reachable"` + RtmpAddr string `json:"rtmp_addr"` + // Error is populated when RtmpReachable is false. It is a short, UI-safe + // message — not the raw dial error (which may leak internal hostnames). + Error string `json:"error,omitempty"` + LastCheckAt time.Time `json:"last_check_at"` +} + +// liveHealthChecker probes the RTMP TCP port on a short TTL cache. Every +// call to CurrentStatus either returns the cached value or triggers a +// fresh dial; dials are serialized by a mutex so a thundering herd of +// page-loads can't DOS the RTMP port. +type liveHealthChecker struct { + addr string + ttl time.Duration + dialer *net.Dialer + mu sync.Mutex + last LiveHealthResponse + checked bool +} + +func newLiveHealthChecker() *liveHealthChecker { + addr := os.Getenv("NGINX_RTMP_ADDR") + if addr == "" { + addr = "localhost:1935" + } + return &liveHealthChecker{ + addr: addr, + ttl: 15 * time.Second, + dialer: &net.Dialer{Timeout: 2 * time.Second}, + } +} + +func (c *liveHealthChecker) CurrentStatus() LiveHealthResponse { + c.mu.Lock() + defer c.mu.Unlock() + + if c.checked && time.Since(c.last.LastCheckAt) < c.ttl { + return c.last + } + + conn, err := c.dialer.Dial("tcp", c.addr) + now := time.Now().UTC() + if err != nil { + c.last = LiveHealthResponse{ + RtmpReachable: false, + RtmpAddr: c.addr, + Error: "RTMP ingest server is not reachable", + LastCheckAt: now, + } + } else { + _ = conn.Close() + c.last = LiveHealthResponse{ + RtmpReachable: true, + RtmpAddr: c.addr, + LastCheckAt: now, + } + } + c.checked = true + return c.last +} + +// LiveHealthHandler returns a gin handler that reports RTMP reachability. +// v1.0.6: the Go Live UI surfaces a warning banner when rtmp_reachable is +// false so artists don't silently broadcast into a void (was the "Go Live +// silencieux si nginx-rtmp down" audit finding). +func LiveHealthHandler(logger *zap.Logger) gin.HandlerFunc { + checker := newLiveHealthChecker() + return func(c *gin.Context) { + status := checker.CurrentStatus() + if !status.RtmpReachable && logger != nil { + logger.Warn("RTMP ingest unreachable", + zap.String("rtmp_addr", status.RtmpAddr), + ) + } + // Encourage clients to re-check on every page load but not burn the TCP + // dial more often than the checker's own TTL. + c.Header("Cache-Control", "private, max-age=15") + c.JSON(http.StatusOK, status) + } +} diff --git a/veza-backend-api/internal/handlers/live_health_handler_test.go b/veza-backend-api/internal/handlers/live_health_handler_test.go new file mode 100644 index 000000000..0e562e51b --- /dev/null +++ b/veza-backend-api/internal/handlers/live_health_handler_test.go @@ -0,0 +1,89 @@ +package handlers + +import ( + "encoding/json" + "net" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap/zaptest" +) + +// TestLiveHealthHandler_UnreachableReportsFalse: no listener on the address +// → RtmpReachable=false + a UI-safe error string (not the raw dial message). +func TestLiveHealthHandler_UnreachableReportsFalse(t *testing.T) { + gin.SetMode(gin.TestMode) + + // 127.0.0.1:0 tries to connect to port 0 which always refuses. + t.Setenv("NGINX_RTMP_ADDR", "127.0.0.1:0") + + router := gin.New() + router.GET("/live/health", LiveHealthHandler(zaptest.NewLogger(t))) + + req := httptest.NewRequest(http.MethodGet, "/live/health", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + var resp LiveHealthResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + assert.False(t, resp.RtmpReachable) + assert.Equal(t, "127.0.0.1:0", resp.RtmpAddr) + assert.Equal(t, "RTMP ingest server is not reachable", resp.Error) + assert.False(t, resp.LastCheckAt.IsZero()) +} + +// TestLiveHealthHandler_ReachableReportsTrue: stand up a real TCP listener +// on an ephemeral port and point the checker at it. +func TestLiveHealthHandler_ReachableReportsTrue(t *testing.T) { + gin.SetMode(gin.TestMode) + + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + defer listener.Close() + + t.Setenv("NGINX_RTMP_ADDR", listener.Addr().String()) + + router := gin.New() + router.GET("/live/health", LiveHealthHandler(zaptest.NewLogger(t))) + + req := httptest.NewRequest(http.MethodGet, "/live/health", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + var resp LiveHealthResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + assert.True(t, resp.RtmpReachable) + assert.Empty(t, resp.Error) + assert.Equal(t, "private, max-age=15", w.Header().Get("Cache-Control")) +} + +// TestLiveHealthChecker_CachesResults: two calls within the TTL dial exactly +// once. Verified by letting the listener close after the first dial and +// confirming the cached "reachable" state still returns until TTL expires. +func TestLiveHealthChecker_CachesResults(t *testing.T) { + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := listener.Addr().String() + + checker := &liveHealthChecker{ + addr: addr, + ttl: 30 * 1e9, // 30s + dialer: &net.Dialer{Timeout: 1e9}, + } + first := checker.CurrentStatus() + assert.True(t, first.RtmpReachable) + + // Kill the listener — a fresh dial now would fail. + listener.Close() + + // Still within TTL → cached true. + second := checker.CurrentStatus() + assert.True(t, second.RtmpReachable, "cached result must survive until TTL expires") + assert.Equal(t, first.LastCheckAt, second.LastCheckAt) +}