343 lines
14 KiB
Bash
343 lines
14 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
# bootstrap-local.sh — drive bootstrap from the operator's workstation.
|
|||
|
|
#
|
|||
|
|
# Phases (each idempotent ; skipped if state file marks DONE) :
|
|||
|
|
# 1. preflight — required tools, SSH to R720, DNS resolution
|
|||
|
|
# 2. vault — render + encrypt group_vars/all/vault.yml,
|
|||
|
|
# write .vault-pass
|
|||
|
|
# 3. forgejo — set repo Secrets / Variables via Forgejo API
|
|||
|
|
# 4. r720 — invoke bootstrap-remote.sh over SSH
|
|||
|
|
# 5. haproxy — ansible-playbook playbooks/haproxy.yml,
|
|||
|
|
# verify Let's Encrypt certs land
|
|||
|
|
# 6. summary — final readiness report
|
|||
|
|
#
|
|||
|
|
# Resumable :
|
|||
|
|
# PHASE=4 ./bootstrap-local.sh # restart at phase 4
|
|||
|
|
#
|
|||
|
|
# Inputs (env vars ; can be set in your shell or in scripts/bootstrap/.env) :
|
|||
|
|
# R720_HOST ssh target (default: 10.0.20.150)
|
|||
|
|
# R720_USER ssh user (default: ansible)
|
|||
|
|
# FORGEJO_API_URL default: https://forgejo.talas.group
|
|||
|
|
# override with http://10.0.20.105:3000 if no DNS yet
|
|||
|
|
# FORGEJO_OWNER default: talas
|
|||
|
|
# FORGEJO_REPO default: veza
|
|||
|
|
# FORGEJO_ADMIN_TOKEN MANDATORY (Forgejo UI → Settings → Applications)
|
|||
|
|
# ALREADY_PUSHED set to "1" if origin/main already has the
|
|||
|
|
# current HEAD ; skips the auto-push prompt
|
|||
|
|
|
|||
|
|
set -Eeuo pipefail
|
|||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||
|
|
# shellcheck source=lib.sh
|
|||
|
|
. "$SCRIPT_DIR/lib.sh"
|
|||
|
|
trap_errors
|
|||
|
|
|
|||
|
|
# Optional .env in the bootstrap dir for non-secret defaults.
|
|||
|
|
[[ -f "$SCRIPT_DIR/.env" ]] && . "$SCRIPT_DIR/.env"
|
|||
|
|
|
|||
|
|
: "${R720_HOST:=10.0.20.150}"
|
|||
|
|
: "${R720_USER:=ansible}"
|
|||
|
|
: "${FORGEJO_API_URL:=https://forgejo.talas.group}"
|
|||
|
|
: "${FORGEJO_OWNER:=talas}"
|
|||
|
|
: "${FORGEJO_REPO:=veza}"
|
|||
|
|
|
|||
|
|
REPO_ROOT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null) \
|
|||
|
|
|| die "not in a git repo (or git missing)"
|
|||
|
|
|
|||
|
|
VAULT_YML="$REPO_ROOT/infra/ansible/group_vars/all/vault.yml"
|
|||
|
|
VAULT_EXAMPLE="$REPO_ROOT/infra/ansible/group_vars/all/vault.yml.example"
|
|||
|
|
VAULT_PASS="$REPO_ROOT/infra/ansible/.vault-pass"
|
|||
|
|
|
|||
|
|
# State file lives under the repo so the local script doesn't need root.
|
|||
|
|
TALAS_STATE_DIR="$REPO_ROOT/.git/talas-bootstrap"
|
|||
|
|
TALAS_STATE_FILE="$TALAS_STATE_DIR/local.state"
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 1 — preflight
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_1_preflight() {
|
|||
|
|
section "Phase 1 — Preflight"
|
|||
|
|
_current_phase=preflight
|
|||
|
|
phase preflight START
|
|||
|
|
|
|||
|
|
skip_if_done preflight "preflight" && { phase preflight DONE; return 0; }
|
|||
|
|
|
|||
|
|
require_cmd git ansible ansible-vault dig curl ssh openssl base64 jq
|
|||
|
|
require_file "$VAULT_EXAMPLE"
|
|||
|
|
require_file "$REPO_ROOT/infra/ansible/playbooks/haproxy.yml"
|
|||
|
|
require_file "$REPO_ROOT/infra/ansible/inventory/staging.yml"
|
|||
|
|
|
|||
|
|
info "Testing SSH to $R720_USER@$R720_HOST…"
|
|||
|
|
if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "$R720_USER@$R720_HOST" /bin/true 2>/dev/null; then
|
|||
|
|
TALAS_HINT="ensure your ssh key is in $R720_USER@$R720_HOST:~/.ssh/authorized_keys, then try ssh $R720_USER@$R720_HOST"
|
|||
|
|
die "SSH to $R720_USER@$R720_HOST failed"
|
|||
|
|
fi
|
|||
|
|
ok "SSH OK"
|
|||
|
|
|
|||
|
|
info "Checking that incus is reachable on R720…"
|
|||
|
|
if ! ssh "$R720_USER@$R720_HOST" "command -v incus >/dev/null && incus list >/dev/null 2>&1"; then
|
|||
|
|
TALAS_HINT="run 'incus list' as $R720_USER on $R720_HOST manually ; verify the user is in the 'incus-admin' group"
|
|||
|
|
die "incus on $R720_HOST not accessible by $R720_USER"
|
|||
|
|
fi
|
|||
|
|
ok "incus reachable"
|
|||
|
|
|
|||
|
|
info "Checking DNS resolution for the public domains…"
|
|||
|
|
local missing_dns=()
|
|||
|
|
for d in veza.fr staging.veza.fr talas.fr forgejo.talas.group; do
|
|||
|
|
if ! dig +short +time=2 +tries=1 "$d" @1.1.1.1 2>/dev/null | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then
|
|||
|
|
missing_dns+=("$d")
|
|||
|
|
fi
|
|||
|
|
done
|
|||
|
|
if (( ${#missing_dns[@]} > 0 )); then
|
|||
|
|
warn "DNS not resolved for: ${missing_dns[*]}"
|
|||
|
|
warn "Let's Encrypt (phase 5) will fail for those domains. Configure DNS first or expect partial cert issuance."
|
|||
|
|
else
|
|||
|
|
ok "all 4 public domains resolve"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
mark_done preflight
|
|||
|
|
phase preflight DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 2 — vault
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_2_vault() {
|
|||
|
|
section "Phase 2 — Local vault"
|
|||
|
|
_current_phase=vault
|
|||
|
|
phase vault START
|
|||
|
|
|
|||
|
|
if skip_if_done vault "vault setup"; then
|
|||
|
|
phase vault DONE; return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [[ -f "$VAULT_YML" ]] && head -1 "$VAULT_YML" 2>/dev/null | grep -q '^\$ANSIBLE_VAULT'; then
|
|||
|
|
info "vault.yml already encrypted — verifying password works"
|
|||
|
|
[[ -f "$VAULT_PASS" ]] || die "vault.yml encrypted but $VAULT_PASS missing — re-create it manually"
|
|||
|
|
elif [[ -f "$VAULT_YML" ]]; then
|
|||
|
|
warn "vault.yml exists in PLAINTEXT — will encrypt now"
|
|||
|
|
else
|
|||
|
|
info "rendering vault.yml from example"
|
|||
|
|
cp "$VAULT_EXAMPLE" "$VAULT_YML"
|
|||
|
|
warn "edit $VAULT_YML now to fill in <TODO> placeholders"
|
|||
|
|
warn "(JWT keys are auto-generated below if you leave their <TODO> values)"
|
|||
|
|
prompt_value _ "Press Enter when done editing"
|
|||
|
|
# Auto-fill JWT keys if user left the TODO placeholders
|
|||
|
|
if grep -q '<TODO: base64 of RS256 private PEM>' "$VAULT_YML"; then
|
|||
|
|
info "generating RS256 JWT keypair"
|
|||
|
|
local jwt_priv jwt_pub
|
|||
|
|
jwt_priv=$(openssl genrsa 4096 2>/dev/null | base64 -w0)
|
|||
|
|
jwt_pub=$(echo "$jwt_priv" | base64 -d | openssl rsa -pubout 2>/dev/null | base64 -w0)
|
|||
|
|
sed -i "s|<TODO: base64 of RS256 private PEM>|$jwt_priv|" "$VAULT_YML"
|
|||
|
|
sed -i "s|<TODO: base64 of RS256 public PEM>|$jwt_pub|" "$VAULT_YML"
|
|||
|
|
ok "JWT keys generated and inserted"
|
|||
|
|
fi
|
|||
|
|
if grep -qE '<TODO' "$VAULT_YML"; then
|
|||
|
|
local remaining
|
|||
|
|
remaining=$(grep -cE '<TODO' "$VAULT_YML")
|
|||
|
|
die "$remaining <TODO> placeholders still in $VAULT_YML — fill them and rerun PHASE=2 ./bootstrap-local.sh"
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [[ ! -f "$VAULT_PASS" ]]; then
|
|||
|
|
local pw=""
|
|||
|
|
prompt_password pw "choose a vault password (memorize it !)"
|
|||
|
|
echo "$pw" > "$VAULT_PASS"
|
|||
|
|
chmod 0400 "$VAULT_PASS"
|
|||
|
|
ok "wrote $VAULT_PASS"
|
|||
|
|
# If vault.yml is plaintext, encrypt now.
|
|||
|
|
if ! head -1 "$VAULT_YML" | grep -q '^\$ANSIBLE_VAULT'; then
|
|||
|
|
info "encrypting vault.yml"
|
|||
|
|
ansible-vault encrypt --vault-password-file "$VAULT_PASS" "$VAULT_YML"
|
|||
|
|
ok "encrypted"
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
info "verifying we can decrypt"
|
|||
|
|
if ! ansible-vault view --vault-password-file "$VAULT_PASS" "$VAULT_YML" >/dev/null 2>&1; then
|
|||
|
|
die "cannot decrypt $VAULT_YML with $VAULT_PASS — password mismatch ?"
|
|||
|
|
fi
|
|||
|
|
ok "vault decryption verified"
|
|||
|
|
|
|||
|
|
mark_done vault
|
|||
|
|
phase vault DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 3 — Forgejo Secrets + Variables
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_3_forgejo() {
|
|||
|
|
section "Phase 3 — Forgejo Secrets + Variables"
|
|||
|
|
_current_phase=forgejo
|
|||
|
|
phase forgejo START
|
|||
|
|
|
|||
|
|
if skip_if_done forgejo "Forgejo provisioning"; then
|
|||
|
|
phase forgejo DONE; return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
require_env FORGEJO_ADMIN_TOKEN \
|
|||
|
|
"create at $FORGEJO_API_URL/-/user/settings/applications (scopes: write:admin, write:repository, write:package)"
|
|||
|
|
|
|||
|
|
info "checking Forgejo API reachability"
|
|||
|
|
if ! curl -fsSL --max-time 10 \
|
|||
|
|
-H "Authorization: token $FORGEJO_ADMIN_TOKEN" \
|
|||
|
|
"$FORGEJO_API_URL/api/v1/user" >/dev/null 2>&1; then
|
|||
|
|
TALAS_HINT="check FORGEJO_API_URL ($FORGEJO_API_URL) ; if no DNS yet, try FORGEJO_API_URL=http://10.0.20.105:3000"
|
|||
|
|
die "Forgejo API unreachable or token invalid"
|
|||
|
|
fi
|
|||
|
|
ok "Forgejo API reachable, token valid"
|
|||
|
|
|
|||
|
|
info "checking repo $FORGEJO_OWNER/$FORGEJO_REPO exists"
|
|||
|
|
if ! forgejo_api GET "/repos/$FORGEJO_OWNER/$FORGEJO_REPO" >/dev/null 2>&1; then
|
|||
|
|
TALAS_HINT="set FORGEJO_OWNER + FORGEJO_REPO env vars (currently $FORGEJO_OWNER/$FORGEJO_REPO)"
|
|||
|
|
die "repo $FORGEJO_OWNER/$FORGEJO_REPO not found"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Create a long-lived registry token via the API.
|
|||
|
|
info "creating a registry token (write:package)"
|
|||
|
|
local registry_token
|
|||
|
|
registry_token=$(forgejo_api POST "/users/$FORGEJO_OWNER/tokens" \
|
|||
|
|
--data "$(jq -nc --arg n "veza-deploy-registry-$(date +%s)" \
|
|||
|
|
--argjson s '["write:package", "read:package"]' \
|
|||
|
|
'{name: $n, scopes: $s}')" \
|
|||
|
|
| jq -er '.sha1 // empty') \
|
|||
|
|
|| die "could not create registry token via API ; create one manually at $FORGEJO_API_URL/-/user/settings/applications and re-run with FORGEJO_REGISTRY_TOKEN env var set"
|
|||
|
|
|
|||
|
|
forgejo_set_secret "$FORGEJO_OWNER" "$FORGEJO_REPO" FORGEJO_REGISTRY_TOKEN "$registry_token"
|
|||
|
|
forgejo_set_secret "$FORGEJO_OWNER" "$FORGEJO_REPO" ANSIBLE_VAULT_PASSWORD "$(cat "$VAULT_PASS")"
|
|||
|
|
forgejo_set_var "$FORGEJO_OWNER" "$FORGEJO_REPO" FORGEJO_REGISTRY_URL \
|
|||
|
|
"$FORGEJO_API_URL/api/packages/$FORGEJO_OWNER/generic"
|
|||
|
|
|
|||
|
|
mark_done forgejo
|
|||
|
|
phase forgejo DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 4 — R720 remote bootstrap
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_4_r720() {
|
|||
|
|
section "Phase 4 — R720 remote bootstrap (Incus profiles + runner labels)"
|
|||
|
|
_current_phase=r720
|
|||
|
|
phase r720 START
|
|||
|
|
|
|||
|
|
if skip_if_done r720 "R720 remote bootstrap"; then
|
|||
|
|
phase r720 DONE; return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
require_env FORGEJO_ADMIN_TOKEN
|
|||
|
|
info "fetching a runner registration token from Forgejo"
|
|||
|
|
local reg_token
|
|||
|
|
reg_token=$(forgejo_get_runner_token "$FORGEJO_OWNER" "$FORGEJO_REPO") \
|
|||
|
|
|| die "could not fetch runner registration token"
|
|||
|
|
info "got registration token (${#reg_token} chars)"
|
|||
|
|
|
|||
|
|
local remote_script="$SCRIPT_DIR/bootstrap-remote.sh"
|
|||
|
|
local remote_lib="$SCRIPT_DIR/lib.sh"
|
|||
|
|
require_file "$remote_script"
|
|||
|
|
require_file "$remote_lib"
|
|||
|
|
|
|||
|
|
info "streaming bootstrap-remote.sh over SSH (logs to /var/log/talas-bootstrap.log on R720)"
|
|||
|
|
# Concatenate lib.sh + remote script so the remote bash sees both.
|
|||
|
|
{
|
|||
|
|
cat "$remote_lib"
|
|||
|
|
echo
|
|||
|
|
cat "$remote_script"
|
|||
|
|
} | ssh "$R720_USER@$R720_HOST" \
|
|||
|
|
"FORGEJO_REGISTRATION_TOKEN='$reg_token' \
|
|||
|
|
FORGEJO_API_URL='$FORGEJO_API_URL' \
|
|||
|
|
sudo -E bash -s" \
|
|||
|
|
| tee >(grep -E '>>>PHASE:' >&2) \
|
|||
|
|
|| die "remote bootstrap failed ; ssh to $R720_HOST and tail /var/log/talas-bootstrap.log"
|
|||
|
|
|
|||
|
|
mark_done r720
|
|||
|
|
phase r720 DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 5 — Edge HAProxy + Let's Encrypt
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_5_haproxy() {
|
|||
|
|
section "Phase 5 — Edge HAProxy + Let's Encrypt certs"
|
|||
|
|
_current_phase=haproxy
|
|||
|
|
phase haproxy START
|
|||
|
|
|
|||
|
|
if skip_if_done haproxy "haproxy + LE"; then
|
|||
|
|
phase haproxy DONE; return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
cd "$REPO_ROOT/infra/ansible"
|
|||
|
|
info "running ansible-playbook playbooks/haproxy.yml (5–10 min)"
|
|||
|
|
if ! ansible-playbook -i inventory/staging.yml playbooks/haproxy.yml \
|
|||
|
|
--vault-password-file .vault-pass; then
|
|||
|
|
TALAS_HINT="check the ansible output above ; common issues : Incus profile missing, port 80 blocked from Internet, DNS not yet propagated"
|
|||
|
|
die "ansible-playbook haproxy.yml failed"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
info "verifying Let's Encrypt certs landed"
|
|||
|
|
local certs
|
|||
|
|
certs=$(ssh "$R720_USER@$R720_HOST" "incus exec veza-haproxy -- ls /usr/local/etc/tls/haproxy/ 2>/dev/null" || true)
|
|||
|
|
if [[ -z "$certs" ]]; then
|
|||
|
|
warn "no certs found in /usr/local/etc/tls/haproxy/ on veza-haproxy"
|
|||
|
|
warn "check /var/log/letsencrypt or run again — dehydrated retries on next playbook run"
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
ok "certs : $(echo "$certs" | tr '\n' ' ')"
|
|||
|
|
|
|||
|
|
mark_done haproxy
|
|||
|
|
phase haproxy DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# Phase 6 — Summary
|
|||
|
|
# ============================================================================
|
|||
|
|
phase_6_summary() {
|
|||
|
|
section "Phase 6 — Summary"
|
|||
|
|
_current_phase=summary
|
|||
|
|
phase summary START
|
|||
|
|
|
|||
|
|
cat <<EOF >&2
|
|||
|
|
|
|||
|
|
${_GREEN}${_BOLD}✓ Bootstrap complete.${_RESET}
|
|||
|
|
|
|||
|
|
What works now :
|
|||
|
|
• Forgejo registry has the deploy secrets + variable.
|
|||
|
|
• forgejo-runner has the 'incus' label and Incus socket access.
|
|||
|
|
• veza-haproxy edge container is up with Let's Encrypt certs.
|
|||
|
|
|
|||
|
|
What you can do next :
|
|||
|
|
1. Trigger a manual deploy via Forgejo Actions UI :
|
|||
|
|
$FORGEJO_API_URL/$FORGEJO_OWNER/$FORGEJO_REPO/actions
|
|||
|
|
→ "Veza deploy" → "Run workflow" → env=staging.
|
|||
|
|
|
|||
|
|
2. Once that run is green, re-enable auto-trigger :
|
|||
|
|
$SCRIPT_DIR/enable-auto-deploy.sh
|
|||
|
|
|
|||
|
|
3. Verify state any time :
|
|||
|
|
$SCRIPT_DIR/verify-local.sh
|
|||
|
|
ssh $R720_USER@$R720_HOST $SCRIPT_DIR/verify-remote.sh
|
|||
|
|
|
|||
|
|
State file : $TALAS_STATE_FILE
|
|||
|
|
EOF
|
|||
|
|
|
|||
|
|
mark_done summary
|
|||
|
|
phase summary DONE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ============================================================================
|
|||
|
|
# main
|
|||
|
|
# ============================================================================
|
|||
|
|
main() {
|
|||
|
|
local start=${PHASE:-1}
|
|||
|
|
info "starting at phase $start"
|
|||
|
|
|
|||
|
|
[[ $start -le 1 ]] && phase_1_preflight
|
|||
|
|
[[ $start -le 2 ]] && phase_2_vault
|
|||
|
|
[[ $start -le 3 ]] && phase_3_forgejo
|
|||
|
|
[[ $start -le 4 ]] && phase_4_r720
|
|||
|
|
[[ $start -le 5 ]] && phase_5_haproxy
|
|||
|
|
[[ $start -le 6 ]] && phase_6_summary
|
|||
|
|
|
|||
|
|
ok "ALL DONE"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
main "$@"
|