package monitoring

// Real User Monitoring (RUM) — v1.0.10 ops item 9.
//
// Web Vitals is the standard Google-defined frontend perf signal :
// LCP (Largest Contentful Paint), CLS (Cumulative Layout Shift),
// FID (First Input Delay — being deprecated), INP (Interaction to
// Next Paint — replaces FID), TTFB (Time to First Byte). Each
// metric has a published "good / needs improvement / poor" budget ;
// p75 across users is the headline number.
//
// We collect from the browser via the npm `web-vitals` package
// (instrumented in apps/web/src/observability/webVitals.ts) and
// receive POSTs at /api/v1/observability/web-vitals. The handler
// pushes into the histograms below. Synthetic probes already
// cover server-side latency ; RUM closes the "user's actual
// browser experience" gap (slow CDN edges, third-party scripts,
// device CPU, mobile networks).
//
// Cardinality discipline : labels are bounded — metric name (5
// values), route (capped via the truncation logic in the handler),
// device (3 values mobile/desktop/tablet). No user_id, no URL
// query string, no full path with IDs. Prometheus tolerates a few
// thousand label combinations ; per-user labels would explode it.

import (
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
	// WebVitalsLCPSeconds — Largest Contentful Paint, in seconds.
	// Google budget : <2.5s good, 2.5–4s needs improvement, >4s poor.
	// Buckets cover the typical range with extra resolution near the
	// "good" boundary which is what we tune against.
	WebVitalsLCPSeconds = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "veza_web_vitals_lcp_seconds",
			Help:    "Largest Contentful Paint reported by the browser, in seconds.",
			Buckets: []float64{0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 6.0, 10.0},
		},
		[]string{"route", "device"},
	)

	// WebVitalsCLS — Cumulative Layout Shift, unitless. Google
	// budget : <0.1 good, 0.1–0.25 needs improvement, >0.25 poor.
	// Histogram (not counter) so we can pull p75 over windows.
	WebVitalsCLS = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "veza_web_vitals_cls",
			Help:    "Cumulative Layout Shift score reported by the browser (unitless).",
			Buckets: []float64{0.05, 0.1, 0.15, 0.25, 0.5, 1.0},
		},
		[]string{"route", "device"},
	)

	// WebVitalsINPSeconds — Interaction to Next Paint, in seconds.
	// Google budget : <0.2s good, 0.2–0.5s needs improvement, >0.5s
	// poor. Replaces FID as the primary responsiveness metric in
	// Core Web Vitals 2024+.
	WebVitalsINPSeconds = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "veza_web_vitals_inp_seconds",
			Help:    "Interaction to Next Paint reported by the browser, in seconds.",
			Buckets: []float64{0.05, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0, 2.0},
		},
		[]string{"route", "device"},
	)

	// WebVitalsFIDSeconds — First Input Delay (legacy, kept for
	// backwards-compat with old browsers that don't report INP).
	// Buckets share INP's structure for dashboard symmetry.
	WebVitalsFIDSeconds = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "veza_web_vitals_fid_seconds",
			Help:    "First Input Delay reported by the browser, in seconds (legacy ; INP is preferred).",
			Buckets: []float64{0.05, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0, 2.0},
		},
		[]string{"route", "device"},
	)

	// WebVitalsTTFBSeconds — Time To First Byte, in seconds. Often
	// dominated by network + edge cache miss ; useful for tracking
	// CDN-edge degradations (compare with the synthetic probe TTFB).
	WebVitalsTTFBSeconds = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "veza_web_vitals_ttfb_seconds",
			Help:    "Time to First Byte reported by the browser, in seconds.",
			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.8, 1.5, 3.0},
		},
		[]string{"route", "device"},
	)

	// WebVitalsBeaconsTotal counts inbound RUM beacons regardless
	// of metric. Drives the "RUM stopped flowing" alert (no beacons
	// in 30m = frontend instrumentation broken or CDN blocking us).
	WebVitalsBeaconsTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "veza_web_vitals_beacons_total",
			Help: "Total RUM beacons received broken down by metric name.",
		},
		[]string{"metric"},
	)

	// WebVitalsRejectedTotal counts beacons rejected for invalid
	// payload (bad metric name, out-of-range value, missing route).
	// Bumps on this counter mean a frontend regression or a hostile
	// caller trying to seed the metric.
	WebVitalsRejectedTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "veza_web_vitals_rejected_total",
			Help: "RUM beacons rejected during validation, by reason.",
		},
		[]string{"reason"},
	)
)

// RecordWebVital dispatches a single metric observation into the
// right histogram. metric is one of "LCP", "CLS", "INP", "FID",
// "TTFB" (case-insensitive). value is the metric value as reported
// by the browser, in milliseconds for time metrics and unitless
// for CLS — the function converts ms→s where appropriate so the
// histograms stay consistent (seconds + unitless for CLS).
//
// Returns false if the metric name is unknown ; the handler
// increments WebVitalsRejectedTotal{reason="bad_metric"} on
// false.
func RecordWebVital(metric, route, device string, value float64) bool {
	WebVitalsBeaconsTotal.WithLabelValues(metric).Inc()
	switch metric {
	case "LCP":
		WebVitalsLCPSeconds.WithLabelValues(route, device).Observe(value / 1000.0)
	case "CLS":
		WebVitalsCLS.WithLabelValues(route, device).Observe(value)
	case "INP":
		WebVitalsINPSeconds.WithLabelValues(route, device).Observe(value / 1000.0)
	case "FID":
		WebVitalsFIDSeconds.WithLabelValues(route, device).Observe(value / 1000.0)
	case "TTFB":
		WebVitalsTTFBSeconds.WithLabelValues(route, device).Observe(value / 1000.0)
	default:
		return false
	}
	return true
}