Some checks failed
Veza CI / Notify on failure (push) Blocked by required conditions
Security Scan / Secret Scanning (gitleaks) (push) Waiting to run
Veza CI / Frontend (Web) (push) Has been cancelled
Veza CI / Backend (Go) (push) Has been cancelled
E2E Playwright / e2e (full) (push) Has been cancelled
Veza CI / Rust (Stream Server) (push) Has been cancelled
Self-hosted edge cache on a dedicated Incus container, sits between clients and the MinIO EC:2 cluster. Replaces the need for an external CDN at v1.0 traffic levels — handles thousands of concurrent listeners on the R720, leaks zero logs to a third party. This is the phase-1 alternative documented in the v1.0.9 CDN synthesis : phase-1 = self-hosted Nginx, phase-2 = 2 cache nodes + GeoDNS, phase-3 = Bunny.net via the existing CDN_* config (still inert with CDN_ENABLED=false). - infra/ansible/roles/nginx_proxy_cache/ : install nginx + curl, render nginx.conf with shared zone (128 MiB keys + 20 GiB disk, inactive=7d), render veza-cache site that proxies to the minio_nodes upstream pool with keepalive=32. HLS segments cached 7d via 1 MiB slice ; .m3u8 cached 60s ; everything else 1h. - Cache key excludes Authorization / Cookie (presigned URLs only in v1.0). slice_range included for segments so byte-range requests with arbitrary offsets all hit the same cached chunks. - proxy_cache_use_stale error timeout updating http_500..504 + background_update + lock — survives MinIO partial outages without cold-storming the origin. - X-Cache-Status surfaced on every response so smoke tests + operators can verify HIT/MISS without parsing access logs. - stub_status bound to 127.0.0.1:81/__nginx_status for the future prometheus nginx_exporter sidecar. - infra/ansible/playbooks/nginx_proxy_cache.yml : provisions the Incus container + applies common baseline + role. - inventory/lab.yml : new nginx_cache group. - infra/ansible/tests/test_nginx_cache.sh : MISS→HIT roundtrip via X-Cache-Status, on-disk entry verification. Acceptance : smoke test reports MISS then HIT for the same URL ; cache directory carries on-disk entries. No backend code change — the cache is transparent. To route through it, flip AWS_S3_ENDPOINT=http://nginx-cache.lxd:80 in the API env. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
107 lines
4.2 KiB
Bash
Executable file
107 lines
4.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# test_nginx_cache.sh — verify the phase-1 edge cache actually caches.
|
|
#
|
|
# Sequence :
|
|
# 1. Pre-flight : nginx-cache reachable + /health returns 200.
|
|
# 2. Curl a fixed URL twice. First request must be MISS, second
|
|
# must be HIT. Surfaces via the X-Cache-Status response header.
|
|
# 3. Verify the cache directory has at least one entry on disk.
|
|
#
|
|
# Run on the Incus host that owns the nginx-cache container, OR
|
|
# from a workstation that can reach nginx-cache.lxd:80. The test
|
|
# uses a public bucket path so it doesn't need MinIO credentials.
|
|
#
|
|
# v1.0.9 — phase-1 edge cache acceptance.
|
|
#
|
|
# Usage:
|
|
# TEST_URL=http://nginx-cache.lxd/some-public-object.m4s \
|
|
# bash infra/ansible/tests/test_nginx_cache.sh
|
|
#
|
|
# Exit codes :
|
|
# 0 — cache is working (MISS then HIT)
|
|
# 1 — cache not reachable
|
|
# 2 — second request did not return HIT
|
|
# 3 — required tool missing
|
|
set -euo pipefail
|
|
|
|
CACHE_HOST=${CACHE_HOST:-nginx-cache.lxd}
|
|
CACHE_PORT=${CACHE_PORT:-80}
|
|
TEST_URL=${TEST_URL:-http://${CACHE_HOST}:${CACHE_PORT}/health}
|
|
RUN_VIA_INCUS=${RUN_VIA_INCUS:-0}
|
|
|
|
log() { printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; }
|
|
fail() { log "FAIL: $*"; exit "${2:-2}"; }
|
|
|
|
require() {
|
|
command -v "$1" >/dev/null 2>&1 || fail "required tool missing on host: $1" 3
|
|
}
|
|
|
|
require curl
|
|
require date
|
|
|
|
# helper : run curl in the right context. If RUN_VIA_INCUS=1 the
|
|
# command runs inside the host (which can resolve .lxd hostnames).
|
|
do_curl() {
|
|
if [ "$RUN_VIA_INCUS" = "1" ]; then
|
|
incus exec nginx-cache -- curl -sS "$@"
|
|
else
|
|
curl -sS "$@"
|
|
fi
|
|
}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# 1. Pre-flight — /health is alive.
|
|
# -----------------------------------------------------------------------------
|
|
log "step 0: pre-flight — GET ${CACHE_HOST}:${CACHE_PORT}/health"
|
|
health_url="http://${CACHE_HOST}:${CACHE_PORT}/health"
|
|
if ! body=$(do_curl --max-time 5 -o /dev/null -w "%{http_code}" "$health_url" 2>&1); then
|
|
fail "cache not reachable at $health_url" 1
|
|
fi
|
|
if [ "$body" != "200" ]; then
|
|
fail "cache /health returned HTTP $body, want 200" 1
|
|
fi
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# 2. The ${TEST_URL} dance — first MISS, second HIT.
|
|
# -----------------------------------------------------------------------------
|
|
log "step 1: first request to $TEST_URL — expect MISS"
|
|
status1=$(do_curl --max-time 10 -o /dev/null -D - "$TEST_URL" 2>&1 \
|
|
| tr -d '\r' | awk -F': ' 'tolower($1) == "x-cache-status" {print $2}' | tr -d ' ')
|
|
log " X-Cache-Status: $status1"
|
|
if [ -z "$status1" ]; then
|
|
fail "first response missing X-Cache-Status header — site config not applying ?" 2
|
|
fi
|
|
|
|
# A cacheable URL with no prior entry is MISS or EXPIRED. STALE means
|
|
# the upstream is unhealthy, which is a different bug — fail loud.
|
|
case "$status1" in
|
|
MISS|EXPIRED|REVALIDATED|BYPASS) ;;
|
|
HIT) log " WARN: first request already HIT (a previous run cached it)" ;;
|
|
STALE|UPDATING) fail "origin upstream unhealthy — got STALE on cold cache" 2 ;;
|
|
*) fail "unexpected cache status on first request: $status1" 2 ;;
|
|
esac
|
|
|
|
log "step 2: second request to $TEST_URL — expect HIT"
|
|
sleep 1 # give nginx a moment to flush the entry to disk
|
|
status2=$(do_curl --max-time 10 -o /dev/null -D - "$TEST_URL" 2>&1 \
|
|
| tr -d '\r' | awk -F': ' 'tolower($1) == "x-cache-status" {print $2}' | tr -d ' ')
|
|
log " X-Cache-Status: $status2"
|
|
|
|
if [ "$status2" != "HIT" ]; then
|
|
fail "second request returned X-Cache-Status=$status2, want HIT — cache config not effective" 2
|
|
fi
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# 3. On-disk verification — at least one entry under the cache root.
|
|
# -----------------------------------------------------------------------------
|
|
log "step 3: verifying cache directory has on-disk entries"
|
|
if [ "$RUN_VIA_INCUS" = "1" ]; then
|
|
count=$(incus exec nginx-cache -- bash -c "find /var/cache/nginx/veza -type f 2>/dev/null | wc -l" || echo 0)
|
|
log " on-disk entries: $count"
|
|
if [ "$count" = "0" ]; then
|
|
fail "cache directory is empty after a HIT — unexpected" 2
|
|
fi
|
|
fi
|
|
|
|
log "PASS: edge cache MISS→HIT roundtrip OK (status1=$status1, status2=$status2)"
|
|
exit 0
|