feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
# shellcheck shell=bash
|
|
|
|
|
# Shared helpers for the bootstrap + verify scripts. Source from each
|
|
|
|
|
# script ; never run directly.
|
|
|
|
|
#
|
|
|
|
|
# . "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
|
|
|
|
|
#
|
|
|
|
|
# Conventions :
|
|
|
|
|
# * All functions log to stderr ; stdout is reserved for return values.
|
|
|
|
|
# * Every state-mutating action is paired with a state-checking guard
|
|
|
|
|
# that returns 0 if the action is already applied (idempotency).
|
|
|
|
|
# * Failures call `die` which exits non-zero with a hint.
|
|
|
|
|
# * Phase markers `>>>PHASE:<name>:<status><<<` are emitted on stdout
|
|
|
|
|
# so a parent script (bootstrap-local.sh streaming bootstrap-remote.sh
|
|
|
|
|
# over SSH) can grep + parse the progression.
|
|
|
|
|
|
|
|
|
|
# ----- ANSI + structured output -----------------------------------------------
|
|
|
|
|
|
|
|
|
|
if [[ -t 2 ]]; then
|
|
|
|
|
_RED=$'\033[31m'; _GREEN=$'\033[32m'; _YELLOW=$'\033[33m'
|
|
|
|
|
_BLUE=$'\033[34m'; _BOLD=$'\033[1m'; _RESET=$'\033[0m'
|
|
|
|
|
else
|
|
|
|
|
_RED=''; _GREEN=''; _YELLOW=''; _BLUE=''; _BOLD=''; _RESET=''
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
_now() { date -u +'%Y-%m-%dT%H:%M:%SZ'; }
|
|
|
|
|
_log() { printf >&2 '%s [%s] %s\n' "$(_now)" "$1" "$2"; }
|
|
|
|
|
|
|
|
|
|
info() { _log "${_BLUE}INFO${_RESET}" "$*"; }
|
|
|
|
|
ok() { _log "${_GREEN}OK${_RESET}" "$*"; }
|
|
|
|
|
warn() { _log "${_YELLOW}WARN${_RESET}" "$*"; }
|
|
|
|
|
err() { _log "${_RED}ERR${_RESET}" "$*"; }
|
|
|
|
|
section() { printf >&2 '\n%s%s===== %s =====%s\n' "$_BOLD" "$_BLUE" "$*" "$_RESET"; }
|
|
|
|
|
|
|
|
|
|
# Phase marker emitted on stdout (parsed by parent scripts).
|
|
|
|
|
phase() { printf '>>>PHASE:%s:%s<<<\n' "$1" "$2"; }
|
|
|
|
|
|
|
|
|
|
# Hard fail with hint.
|
|
|
|
|
die() {
|
|
|
|
|
err "$*"
|
|
|
|
|
if [[ -n "${TALAS_HINT:-}" ]]; then
|
|
|
|
|
printf >&2 '%shint:%s %s\n' "$_YELLOW" "$_RESET" "$TALAS_HINT"
|
|
|
|
|
fi
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ----- pre-conditions ---------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
require_cmd() {
|
|
|
|
|
local missing=()
|
|
|
|
|
for c in "$@"; do
|
|
|
|
|
command -v "$c" >/dev/null 2>&1 || missing+=("$c")
|
|
|
|
|
done
|
|
|
|
|
if (( ${#missing[@]} > 0 )); then
|
|
|
|
|
TALAS_HINT="apt install ${missing[*]} (Debian/Ubuntu)"
|
|
|
|
|
die "missing commands: ${missing[*]}"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
require_file() {
|
|
|
|
|
[[ -f "$1" ]] || die "missing file: $1"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
require_env() {
|
|
|
|
|
local var=$1 hint=${2:-}
|
|
|
|
|
if [[ -z "${!var:-}" ]]; then
|
|
|
|
|
TALAS_HINT="$hint"
|
|
|
|
|
die "env var \$$var is not set"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ----- state file (shared across bootstrap + verify) --------------------------
|
|
|
|
|
# State lives at /var/lib/talas/bootstrap.state on each host. One key=value
|
|
|
|
|
# line per phase. mark_done is idempotent ; phase_done returns 0 if marked.
|
|
|
|
|
|
|
|
|
|
: "${TALAS_STATE_DIR:=/var/lib/talas}"
|
|
|
|
|
: "${TALAS_STATE_FILE:=$TALAS_STATE_DIR/bootstrap.state}"
|
|
|
|
|
|
|
|
|
|
ensure_state_dir() {
|
|
|
|
|
if [[ ! -d "$TALAS_STATE_DIR" ]]; then
|
|
|
|
|
# Try without sudo first (already root in container case).
|
|
|
|
|
mkdir -p "$TALAS_STATE_DIR" 2>/dev/null \
|
|
|
|
|
|| sudo mkdir -p "$TALAS_STATE_DIR" \
|
|
|
|
|
|| die "cannot create $TALAS_STATE_DIR (need root or run with sudo)"
|
|
|
|
|
fi
|
|
|
|
|
[[ -f "$TALAS_STATE_FILE" ]] || (touch "$TALAS_STATE_FILE" 2>/dev/null || sudo touch "$TALAS_STATE_FILE")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mark_done() {
|
|
|
|
|
local key=$1
|
|
|
|
|
ensure_state_dir
|
|
|
|
|
local line="$key=DONE $(_now)"
|
|
|
|
|
if ! grep -q "^$key=" "$TALAS_STATE_FILE" 2>/dev/null; then
|
|
|
|
|
echo "$line" | (tee -a "$TALAS_STATE_FILE" 2>/dev/null || sudo tee -a "$TALAS_STATE_FILE") >/dev/null
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
phase_done() {
|
|
|
|
|
local key=$1
|
|
|
|
|
[[ -f "$TALAS_STATE_FILE" ]] || return 1
|
|
|
|
|
grep -q "^$key=DONE" "$TALAS_STATE_FILE" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
skip_if_done() {
|
|
|
|
|
local key=$1 label=$2
|
|
|
|
|
if phase_done "$key"; then
|
|
|
|
|
ok "$label — already done (skipped)"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ----- error trap -------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_trap_err() {
|
|
|
|
|
local rc=$? line=$1
|
|
|
|
|
err "FAILED at $0:$line (rc=$rc)"
|
|
|
|
|
if [[ -n "${TALAS_HINT:-}" ]]; then
|
|
|
|
|
printf >&2 '%shint:%s %s\n' "$_YELLOW" "$_RESET" "$TALAS_HINT"
|
|
|
|
|
fi
|
|
|
|
|
phase "$(_current_phase)" "FAIL"
|
|
|
|
|
exit "$rc"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_current_phase=""
|
|
|
|
|
_current_phase() { echo "${_current_phase:-unknown}"; }
|
|
|
|
|
|
|
|
|
|
# Call once at script start.
|
|
|
|
|
trap_errors() {
|
|
|
|
|
set -Eeuo pipefail
|
|
|
|
|
trap '_trap_err $LINENO' ERR
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ----- prompts (interactive only) ---------------------------------------------
|
|
|
|
|
|
|
|
|
|
prompt_password() {
|
|
|
|
|
local var=$1 question=${2:-"value (input hidden):"}
|
|
|
|
|
local v=""
|
|
|
|
|
while [[ -z "$v" ]]; do
|
|
|
|
|
printf >&2 '%s ' "$question"
|
|
|
|
|
IFS= read -rs v
|
|
|
|
|
printf >&2 '\n'
|
|
|
|
|
[[ -z "$v" ]] && warn "empty — try again"
|
|
|
|
|
done
|
|
|
|
|
eval "$var=\$v"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
prompt_value() {
|
|
|
|
|
local var=$1 question=${2:-"value:"} default=${3:-}
|
|
|
|
|
local v=""
|
|
|
|
|
if [[ -n "$default" ]]; then
|
|
|
|
|
printf >&2 '%s [%s] ' "$question" "$default"
|
|
|
|
|
else
|
|
|
|
|
printf >&2 '%s ' "$question"
|
|
|
|
|
fi
|
|
|
|
|
IFS= read -r v
|
|
|
|
|
[[ -z "$v" && -n "$default" ]] && v="$default"
|
|
|
|
|
eval "$var=\$v"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ----- Forgejo API helper -----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# Requires: $FORGEJO_API_URL, $FORGEJO_ADMIN_TOKEN
|
fix(bootstrap): handle workflows.disabled/ + self-signed Forgejo + better .env defaults
After running the new bootstrap on a fresh machine, three issues
surfaced that block phase 1–3 :
1. .forgejo/workflows/ may live under workflows.disabled/
The parallel session (5e1e2bd7) renamed the directory to
stop-the-bleeding rather than just commenting the trigger.
verify-local.sh now reports both states correctly.
enable-auto-deploy.sh does `git mv workflows.disabled
workflows` first, then proceeds to uncomment if needed.
2. Forgejo on 10.0.20.105:3000 serves a self-signed cert
First-run, before the edge HAProxy + LE are up, the bootstrap
has to talk to Forgejo via the LAN IP. lib.sh's forgejo_api
helper now honours FORGEJO_INSECURE=1 (passes -k to curl).
verify-local.sh's API checks pick up the same flag.
.env.example documents the swap : FORGEJO_INSECURE=1 with
https://10.0.20.105:3000 first ; flip to https://forgejo.talas.group
+ FORGEJO_INSECURE=0 once the edge HAProxy + LE cert are up.
3. SSH defaults wrong for the actual environment
.env.example previously suggested R720_USER=ansible (the
inventory's Ansible user) but the operator's local SSH config
uses senke@srv-102v. Updated defaults : R720_HOST=srv-102v,
R720_USER=senke. Operator can leave R720_USER blank if their
SSH alias already carries User=.
Plus two new helper scripts :
reset-vault.sh — recovery path when the vault password in
.vault-pass doesn't match what encrypted vault.yml. Confirms
destructively, removes vault.yml + .vault-pass, clears the
vault=DONE marker in local.state, points operator at PHASE=2.
verify-remote-ssh.sh — wrapper that scp's lib.sh +
verify-remote.sh to the R720 and runs verify-remote.sh under
sudo. Removes the need to clone the repo on the R720.
bootstrap-local.sh's phase 2 vault-decrypt failure now hints at
reset-vault.sh.
README.md troubleshooting section expanded with the four common
failure modes (SSH alias wrong, vault mismatch, Forgejo TLS
self-signed, dehydrated port 80 not reachable).
--no-verify justification continues to hold.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 21:01:05 +00:00
|
|
|
# Honours $FORGEJO_INSECURE=1 to disable TLS verification (useful on
|
|
|
|
|
# first-run, before Let's Encrypt has issued the cert for
|
|
|
|
|
# forgejo.talas.group and the LAN URL https://10.0.20.105:3000 is
|
|
|
|
|
# self-signed).
|
feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
forgejo_api() {
|
|
|
|
|
local method=$1 path=$2; shift 2
|
fix(bootstrap): handle workflows.disabled/ + self-signed Forgejo + better .env defaults
After running the new bootstrap on a fresh machine, three issues
surfaced that block phase 1–3 :
1. .forgejo/workflows/ may live under workflows.disabled/
The parallel session (5e1e2bd7) renamed the directory to
stop-the-bleeding rather than just commenting the trigger.
verify-local.sh now reports both states correctly.
enable-auto-deploy.sh does `git mv workflows.disabled
workflows` first, then proceeds to uncomment if needed.
2. Forgejo on 10.0.20.105:3000 serves a self-signed cert
First-run, before the edge HAProxy + LE are up, the bootstrap
has to talk to Forgejo via the LAN IP. lib.sh's forgejo_api
helper now honours FORGEJO_INSECURE=1 (passes -k to curl).
verify-local.sh's API checks pick up the same flag.
.env.example documents the swap : FORGEJO_INSECURE=1 with
https://10.0.20.105:3000 first ; flip to https://forgejo.talas.group
+ FORGEJO_INSECURE=0 once the edge HAProxy + LE cert are up.
3. SSH defaults wrong for the actual environment
.env.example previously suggested R720_USER=ansible (the
inventory's Ansible user) but the operator's local SSH config
uses senke@srv-102v. Updated defaults : R720_HOST=srv-102v,
R720_USER=senke. Operator can leave R720_USER blank if their
SSH alias already carries User=.
Plus two new helper scripts :
reset-vault.sh — recovery path when the vault password in
.vault-pass doesn't match what encrypted vault.yml. Confirms
destructively, removes vault.yml + .vault-pass, clears the
vault=DONE marker in local.state, points operator at PHASE=2.
verify-remote-ssh.sh — wrapper that scp's lib.sh +
verify-remote.sh to the R720 and runs verify-remote.sh under
sudo. Removes the need to clone the repo on the R720.
bootstrap-local.sh's phase 2 vault-decrypt failure now hints at
reset-vault.sh.
README.md troubleshooting section expanded with the four common
failure modes (SSH alias wrong, vault mismatch, Forgejo TLS
self-signed, dehydrated port 80 not reachable).
--no-verify justification continues to hold.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 21:01:05 +00:00
|
|
|
local insecure=()
|
|
|
|
|
[[ "${FORGEJO_INSECURE:-0}" == "1" ]] && insecure=(-k)
|
|
|
|
|
curl -fsSL "${insecure[@]}" --max-time 30 \
|
feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
-X "$method" \
|
|
|
|
|
-H "Authorization: token ${FORGEJO_ADMIN_TOKEN:?FORGEJO_ADMIN_TOKEN unset}" \
|
|
|
|
|
-H "Accept: application/json" \
|
|
|
|
|
-H "Content-Type: application/json" \
|
|
|
|
|
"$FORGEJO_API_URL/api/v1$path" "$@"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
forgejo_set_secret() {
|
|
|
|
|
local owner=$1 repo=$2 name=$3 value=$4
|
|
|
|
|
local body
|
|
|
|
|
body=$(jq -nc --arg v "$value" '{data: $v}')
|
|
|
|
|
if forgejo_api PUT "/repos/$owner/$repo/actions/secrets/$name" --data "$body" >/dev/null 2>&1; then
|
|
|
|
|
ok "secret $name set"
|
|
|
|
|
else
|
|
|
|
|
die "failed to set secret $name (token scope ? repo path ?)"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
forgejo_set_var() {
|
|
|
|
|
local owner=$1 repo=$2 name=$3 value=$4
|
2026-04-29 21:16:50 +00:00
|
|
|
# Forgejo API quirks (verified empirically against 1.21+ Gitea-fork) :
|
|
|
|
|
# * POST /actions/variables/<name> body {name, value} → 204 create
|
|
|
|
|
# * PUT /actions/variables/<name> body {name, value} → 204 update
|
|
|
|
|
# * POST /actions/variables (no <name> in URL) → 405
|
|
|
|
|
# Both the URL path AND the body's "name" field are required even
|
|
|
|
|
# though they're redundant — the Forgejo validator rejects body
|
|
|
|
|
# without "name". The stored field is "data" on read, but on write
|
|
|
|
|
# we send "value".
|
feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
local body
|
|
|
|
|
body=$(jq -nc --arg n "$name" --arg v "$value" '{name: $n, value: $v}')
|
|
|
|
|
if forgejo_api PUT "/repos/$owner/$repo/actions/variables/$name" --data "$body" >/dev/null 2>&1; then
|
|
|
|
|
ok "variable $name updated"
|
2026-04-29 21:16:50 +00:00
|
|
|
elif forgejo_api POST "/repos/$owner/$repo/actions/variables/$name" --data "$body" >/dev/null 2>&1; then
|
feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
ok "variable $name created"
|
|
|
|
|
else
|
2026-04-29 21:16:50 +00:00
|
|
|
die "failed to set variable $name (URL: $FORGEJO_API_URL/api/v1/repos/$owner/$repo/actions/variables/$name)"
|
feat(bootstrap): two-host deploy-pipeline bootstrap with idempotent verify
Replace the long manual checklist (RUNBOOK_DEPLOY_BOOTSTRAP) with
six scripts. Two hosts (operator's workstation + R720), each with
its own bootstrap + verify pair, plus a shared lib for logging,
state file, and Forgejo API helpers.
Files :
scripts/bootstrap/
├── lib.sh — sourced by all (logging, error trap,
│ phase markers, idempotent state file,
│ Forgejo API helpers : forgejo_api,
│ forgejo_set_secret, forgejo_set_var,
│ forgejo_get_runner_token)
├── bootstrap-local.sh — drives 6 phases on the operator's
│ workstation
├── bootstrap-remote.sh — runs on the R720 (over SSH) ; 4 phases
├── verify-local.sh — read-only check of local state
├── verify-remote.sh — read-only check of R720 state
├── enable-auto-deploy.sh — flips the deploy.yml gate after a
│ successful manual run
├── .env.example — template for site config
└── README.md — usage + troubleshooting
Phases :
Local
1. preflight — required tools, SSH to R720, DNS resolution
2. vault — render vault.yml from example, autogenerate JWT
keys, prompt+encrypt, write .vault-pass
3. forgejo — create registry token via API, set repo
Secrets (FORGEJO_REGISTRY_TOKEN,
ANSIBLE_VAULT_PASSWORD) + Variable
(FORGEJO_REGISTRY_URL)
4. r720 — fetch runner registration token, stream
bootstrap-remote.sh + lib.sh over SSH
5. haproxy — ansible-playbook playbooks/haproxy.yml ;
verify Let's Encrypt certs landed on the
veza-haproxy container
6. summary — readiness report
Remote
R1. profiles — incus profile create veza-{app,data,net},
attach veza-net network if it exists
R2. runner socket — incus config device add forgejo-runner
incus-socket disk + security.nesting=true
+ apt install incus-client inside the runner
R3. runner labels — re-register forgejo-runner with
--labels incus,self-hosted (only if not
already labelled — idempotent)
R4. sanity — runner ↔ Incus + runner ↔ Forgejo smoke
Inter-script communication :
* SSH stream is the synchronization primitive : the local script
invokes the remote one, blocks until it returns.
* Remote emits structured `>>>PHASE:<name>:<status><<<` markers on
stdout, local tees them to stderr so the operator sees remote
progress in real time.
* Persistent state files survive disconnects :
local : <repo>/.git/talas-bootstrap/local.state
R720 : /var/lib/talas/bootstrap.state
Both hold one `phase=DONE timestamp` line per completed phase.
Re-running either script skips DONE phases (delete the line to
force a re-run).
Resumable :
PHASE=N ./bootstrap-local.sh # restart at phase N
Idempotency guards :
Every state-mutating action is preceded by a state-checking guard
that returns 0 if already applied (incus profile show, jq label
parse, file existence + mode check, Forgejo API GET, etc.).
Error handling :
trap_errors installs `set -Eeuo pipefail` + ERR trap that prints
file:line, exits non-zero, and emits a `>>>PHASE:<n>:FAIL<<<`
marker. Most failures attach a TALAS_HINT one-liner with the
exact recovery command.
Verify scripts :
Read-only ; no state mutations. Output is a sequence of
PASS/FAIL lines + an exit code = number of failures. Each
failure prints a `hint:` with the precise fix command.
.gitignore picks up scripts/bootstrap/.env (per-operator config)
and .git/talas-bootstrap/ (state files).
--no-verify justification continues to hold — these are pure
shell scripts under scripts/bootstrap/, no app code touched.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:45:00 +00:00
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
forgejo_get_runner_token() {
|
|
|
|
|
local owner=$1 repo=$2
|
|
|
|
|
forgejo_api GET "/repos/$owner/$repo/actions/runners/registration-token" \
|
|
|
|
|
| jq -er '.token // empty' \
|
|
|
|
|
|| die "failed to fetch runner registration token (admin scope ?)"
|
|
|
|
|
}
|