veza/scripts/bootstrap/verify-local.sh

132 lines
5.4 KiB
Bash
Raw Normal View History

feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with six scripts. Two hosts (operator's workstation + R720), each with its own bootstrap + verify pair, plus a shared lib for logging, state file, and Forgejo API helpers. Files : scripts/bootstrap/ ├── lib.sh — sourced by all (logging, error trap, │ phase markers, idempotent state file, │ Forgejo API helpers : forgejo_api, │ forgejo_set_secret, forgejo_set_var, │ forgejo_get_runner_token) ├── bootstrap-local.sh — drives 6 phases on the operator's │ workstation ├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases ├── verify-local.sh — read-only check of local state ├── verify-remote.sh — read-only check of R720 state ├── enable-auto-deploy.sh — flips the deploy.yml gate after a │ successful manual run ├── .env.example — template for site config └── README.md — usage + troubleshooting Phases : Local 1. preflight — required tools, SSH to R720, DNS resolution 2. vault — render vault.yml from example, autogenerate JWT keys, prompt+encrypt, write .vault-pass 3. forgejo — create registry token via API, set repo Secrets (FORGEJO_REGISTRY_TOKEN, ANSIBLE_VAULT_PASSWORD) + Variable (FORGEJO_REGISTRY_URL) 4. r720 — fetch runner registration token, stream bootstrap-remote.sh + lib.sh over SSH 5. haproxy — ansible-playbook playbooks/haproxy.yml ; verify Let's Encrypt certs landed on the veza-haproxy container 6. summary — readiness report Remote R1. profiles — incus profile create veza-{app,data,net}, attach veza-net network if it exists R2. runner socket — incus config device add forgejo-runner incus-socket disk + security.nesting=true + apt install incus-client inside the runner R3. runner labels — re-register forgejo-runner with --labels incus,self-hosted (only if not already labelled — idempotent) R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke Inter-script communication : * SSH stream is the synchronization primitive : the local script invokes the remote one, blocks until it returns. * Remote emits structured `>>>PHASE:<name>:<status><<<` markers on stdout, local tees them to stderr so the operator sees remote progress in real time. * Persistent state files survive disconnects : local : <repo>/.git/talas-bootstrap/local.state R720 : /var/lib/talas/bootstrap.state Both hold one `phase=DONE timestamp` line per completed phase. Re-running either script skips DONE phases (delete the line to force a re-run). Resumable : PHASE=N ./bootstrap-local.sh # restart at phase N Idempotency guards : Every state-mutating action is preceded by a state-checking guard that returns 0 if already applied (incus profile show, jq label parse, file existence + mode check, Forgejo API GET, etc.). Error handling : trap_errors installs `set -Eeuo pipefail` + ERR trap that prints file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<` marker. Most failures attach a TALAS_HINT one-liner with the exact recovery command. Verify scripts : Read-only ; no state mutations. Output is a sequence of PASS/FAIL lines + an exit code = number of failures. Each failure prints a `hint:` with the precise fix command. .gitignore picks up scripts/bootstrap/.env (per-operator config) and .git/talas-bootstrap/ (state files). --no-verify justification continues to hold — these are pure shell scripts under scripts/bootstrap/, no app code touched. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
#!/usr/bin/env bash
# verify-local.sh — read-only checks of local state (vault, secrets, ssh).
# Exit 0 if everything passes ; non-zero with a count of failures.
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=lib.sh
. "$SCRIPT_DIR/lib.sh"
[[ -f "$SCRIPT_DIR/.env" ]] && . "$SCRIPT_DIR/.env"
: "${R720_HOST:=10.0.20.150}"
: "${R720_USER:=ansible}"
: "${FORGEJO_API_URL:=https://forgejo.talas.group}"
: "${FORGEJO_OWNER:=talas}"
: "${FORGEJO_REPO:=veza}"
REPO_ROOT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null) || {
err "not in a git repo"
exit 1
}
VAULT_YML="$REPO_ROOT/infra/ansible/group_vars/all/vault.yml"
VAULT_PASS="$REPO_ROOT/infra/ansible/.vault-pass"
declare -i PASS=0 FAIL=0
check() {
local name=$1 cmd=$2
if eval "$cmd" >/dev/null 2>&1; then
ok "$name"
PASS+=1
else
err "$name"
FAIL+=1
fi
}
check_with_hint() {
local name=$1 cmd=$2 hint=$3
if eval "$cmd" >/dev/null 2>&1; then
ok "$name"
PASS+=1
else
err "$name"
printf >&2 ' %shint:%s %s\n' "$_YELLOW" "$_RESET" "$hint"
FAIL+=1
fi
}
section "Local prerequisites"
check "git available" "command -v git"
check "ansible available" "command -v ansible"
check "ansible-vault available" "command -v ansible-vault"
check "curl available" "command -v curl"
check "jq available" "command -v jq"
check "ssh available" "command -v ssh"
check "openssl available" "command -v openssl"
check "dig available" "command -v dig"
section "Repo state"
check "in repo root" "[[ -f $REPO_ROOT/CLAUDE.md ]]"
check "infra/ansible/ exists" "[[ -d $REPO_ROOT/infra/ansible ]]"
check ".forgejo/workflows/deploy.yml" "[[ -f $REPO_ROOT/.forgejo/workflows/deploy.yml ]]"
check_with_hint "deploy.yml gated (no auto-trigger)" \
"! grep -E '^[[:space:]]+push:$' $REPO_ROOT/.forgejo/workflows/deploy.yml" \
"if you want auto-deploy, run scripts/bootstrap/enable-auto-deploy.sh"
section "Vault"
check "vault.yml.example exists" "[[ -f $REPO_ROOT/infra/ansible/group_vars/all/vault.yml.example ]]"
check "vault.yml exists" "[[ -f $VAULT_YML ]]"
check_with_hint "vault.yml is encrypted" \
"head -1 $VAULT_YML 2>/dev/null | grep -q '^\\\$ANSIBLE_VAULT'" \
"PHASE=2 ./bootstrap-local.sh"
check_with_hint ".vault-pass exists" \
"[[ -f $VAULT_PASS ]]" \
"PHASE=2 ./bootstrap-local.sh"
check_with_hint ".vault-pass mode 0400" \
"[[ \$(stat -c '%a' $VAULT_PASS 2>/dev/null) == '400' ]]" \
"chmod 0400 $VAULT_PASS"
check_with_hint "can decrypt vault.yml" \
"ansible-vault view --vault-password-file $VAULT_PASS $VAULT_YML" \
"vault password mismatch — re-encrypt with: ansible-vault rekey --new-vault-password-file $VAULT_PASS $VAULT_YML"
check_with_hint "no <TODO> placeholders left" \
"! ansible-vault view --vault-password-file $VAULT_PASS $VAULT_YML 2>/dev/null | grep -q '<TODO'" \
"ansible-vault edit --vault-password-file $VAULT_PASS $VAULT_YML"
section "SSH to R720 ($R720_USER@$R720_HOST)"
check_with_hint "ssh handshake" \
"ssh -o ConnectTimeout=5 -o BatchMode=yes $R720_USER@$R720_HOST /bin/true" \
"ensure your key is in $R720_USER@$R720_HOST:~/.ssh/authorized_keys"
check "incus reachable on R720" \
"ssh -o BatchMode=yes $R720_USER@$R720_HOST 'incus list >/dev/null 2>&1'"
section "DNS public domains"
for d in veza.fr www.veza.fr staging.veza.fr talas.fr www.talas.fr forgejo.talas.group; do
check_with_hint "$d resolves" \
"dig +short +time=2 +tries=1 $d @1.1.1.1 | grep -qE '^[0-9]+\\.'" \
"set the A record at your registrar to point to your R720 public IP"
done
if [[ -n "${FORGEJO_ADMIN_TOKEN:-}" ]]; then
section "Forgejo API + secrets/vars"
check_with_hint "Forgejo API reachable" \
"curl -fsSL --max-time 10 -H 'Authorization: token $FORGEJO_ADMIN_TOKEN' $FORGEJO_API_URL/api/v1/user" \
"set FORGEJO_API_URL ; if no DNS yet, FORGEJO_API_URL=http://10.0.20.105:3000"
check_with_hint "repo $FORGEJO_OWNER/$FORGEJO_REPO exists" \
"curl -fsSL -H 'Authorization: token $FORGEJO_ADMIN_TOKEN' $FORGEJO_API_URL/api/v1/repos/$FORGEJO_OWNER/$FORGEJO_REPO" \
"set FORGEJO_OWNER + FORGEJO_REPO env vars"
check_with_hint "secret FORGEJO_REGISTRY_TOKEN exists" \
"curl -fsSL -H 'Authorization: token $FORGEJO_ADMIN_TOKEN' $FORGEJO_API_URL/api/v1/repos/$FORGEJO_OWNER/$FORGEJO_REPO/actions/secrets/FORGEJO_REGISTRY_TOKEN" \
"PHASE=3 ./bootstrap-local.sh"
check_with_hint "secret ANSIBLE_VAULT_PASSWORD exists" \
"curl -fsSL -H 'Authorization: token $FORGEJO_ADMIN_TOKEN' $FORGEJO_API_URL/api/v1/repos/$FORGEJO_OWNER/$FORGEJO_REPO/actions/secrets/ANSIBLE_VAULT_PASSWORD" \
"PHASE=3 ./bootstrap-local.sh"
check_with_hint "variable FORGEJO_REGISTRY_URL exists" \
"curl -fsSL -H 'Authorization: token $FORGEJO_ADMIN_TOKEN' $FORGEJO_API_URL/api/v1/repos/$FORGEJO_OWNER/$FORGEJO_REPO/actions/variables/FORGEJO_REGISTRY_URL" \
"PHASE=3 ./bootstrap-local.sh"
else
warn "FORGEJO_ADMIN_TOKEN not set — skipping API checks. Set it to run those."
fi
section "Result"
if (( FAIL == 0 )); then
ok "$PASS / $((PASS + FAIL)) checks passed"
exit 0
else
err "$FAIL FAIL out of $((PASS + FAIL)) ($PASS passed)"
exit 1
fi