#!/usr/bin/env bash # bootstrap-local.sh — run on the operator's laptop. Drives the # bootstrap end-to-end via Ansible (no NOPASSWD sudo, no manual # SSH-script-streaming). # # Phases (each idempotent ; resumable via PHASE=N) : # 1. preflight — required local tools, SSH to R720, DNS # 2. vault — render + encrypt vault.yml, write .vault-pass # 3. forgejo — set repo Secrets / Variables via Forgejo API # 4. ansible-bootstrap — single ansible-playbook run that does : # * Incus profiles on R720 # * forgejo-runner Incus socket + nesting + binary # * forgejo-runner registered with `incus` label # * HAProxy edge container + Let's Encrypt certs # 5. summary # # Inputs (env vars or .env file in this dir) : # R720_HOST ssh target (default: srv-102v) # R720_USER ssh user (leave empty if alias has User=) # FORGEJO_API_URL default: https://10.0.20.105:3000 # FORGEJO_INSECURE 1 to skip TLS verify (default: 1 for LAN) # FORGEJO_OWNER default: senke # FORGEJO_REPO default: veza # FORGEJO_ADMIN_TOKEN MANDATORY (Forgejo Settings → Applications) # # Sudo on the R720 : NOT NOPASSWD. Ansible prompts the operator ONCE # per run via --ask-become-pass. set -Eeuo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" . "$SCRIPT_DIR/lib.sh" trap_errors [[ -f "$SCRIPT_DIR/.env" ]] && . "$SCRIPT_DIR/.env" : "${R720_HOST:=srv-102v}" : "${R720_USER:=}" : "${FORGEJO_API_URL:=https://10.0.20.105:3000}" : "${FORGEJO_INSECURE:=1}" : "${FORGEJO_OWNER:=senke}" : "${FORGEJO_REPO:=veza}" REPO_ROOT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel) \ || die "not in a git repo (or git missing)" VAULT_YML="$REPO_ROOT/infra/ansible/group_vars/all/vault.yml" VAULT_EXAMPLE="$REPO_ROOT/infra/ansible/group_vars/all/vault.yml.example" VAULT_PASS="$REPO_ROOT/infra/ansible/.vault-pass" TALAS_STATE_DIR="$REPO_ROOT/.git/talas-bootstrap" TALAS_STATE_FILE="$TALAS_STATE_DIR/local.state" # SSH target = "user@host" or just "host" if R720_USER is empty # (alias's User= line wins). if [[ -n "$R720_USER" ]]; then SSH_TARGET="$R720_USER@$R720_HOST"; else SSH_TARGET="$R720_HOST"; fi # ============================================================================ # Vault helpers (used by phase 2) # ============================================================================ _rand_token() { local len=${1:-32} openssl rand -base64 $((len * 2)) 2>/dev/null | tr -dc 'A-Za-z0-9' | head -c "$len" } _autofill_field() { local file=$1 key=$2 value=$3 local esc=${value//|/\\|} sed -i "s|^${key}: \"' "$file" || return 0 info "generating RS256 JWT keypair" local priv pub priv=$(openssl genrsa 4096 2>/dev/null) || die "openssl genrsa failed" pub=$(echo "$priv" | openssl rsa -pubout 2>/dev/null) || die "openssl rsa -pubout failed" _autofill_field "$file" vault_jwt_signing_key_b64 "$(echo "$priv" | base64 -w0)" _autofill_field "$file" vault_jwt_public_key_b64 "$(echo "$pub" | base64 -w0)" ok "JWT keys generated" } _autofill_vault_secrets() { local file=$1 filled=() for k in vault_postgres_password vault_postgres_replication_password \ vault_redis_password vault_rabbitmq_password \ vault_minio_root_password vault_chat_jwt_secret \ vault_oauth_encryption_key vault_stream_internal_api_key; do if grep -q "^${k}: \" 0 )) && ok "auto-generated ${#filled[@]} secret(s) : ${filled[*]}" } # ============================================================================ # Phase 1 — preflight # ============================================================================ phase_1_preflight() { section "Phase 1 — Preflight" _current_phase=preflight skip_if_done preflight "preflight" && return 0 require_cmd git ansible ansible-vault dig curl ssh openssl base64 jq info "SSH to $SSH_TARGET" ssh -o ConnectTimeout=5 -o BatchMode=yes "$SSH_TARGET" /bin/true \ || { TALAS_HINT="check ~/.ssh/config (Host $R720_HOST) ; key in agent ?" die "SSH to $SSH_TARGET failed"; } ok "SSH OK" info "incus reachable on R720 (no sudo)" if ssh "$SSH_TARGET" "incus list >/dev/null 2>&1"; then ok "operator is in incus-admin group (no sudo needed)" else warn "operator can't `incus list` without sudo — fine, ansible will prompt for sudo password" fi info "DNS resolution" local missing=() for d in veza.fr staging.veza.fr talas.fr forgejo.talas.group; do dig +short +time=2 +tries=1 "$d" @1.1.1.1 2>/dev/null | grep -qE '^[0-9]+\.' \ || missing+=("$d") done (( ${#missing[@]} > 0 )) \ && warn "DNS missing for: ${missing[*]} — Let's Encrypt will fail for those" \ || ok "all 4 public domains resolve" mark_done preflight } # ============================================================================ # Phase 2 — vault # ============================================================================ phase_2_vault() { section "Phase 2 — Local vault" _current_phase=vault skip_if_done vault "vault setup" && return 0 if [[ -f "$VAULT_YML" ]] && head -1 "$VAULT_YML" 2>/dev/null | grep -q '^\$ANSIBLE_VAULT'; then info "vault.yml already encrypted" [[ -f "$VAULT_PASS" ]] || die "vault.yml encrypted but $VAULT_PASS missing — recover with ./reset-vault.sh" else if [[ ! -f "$VAULT_YML" ]]; then info "rendering vault.yml from example" cp "$VAULT_EXAMPLE" "$VAULT_YML" fi _autogen_jwt_keys "$VAULT_YML" _autofill_vault_secrets "$VAULT_YML" local remaining remaining=$(grep -cE ' 0 )); then warn "$remaining placeholders left (optional fields)" grep -n '&2 local cont; prompt_value cont "blank these out and continue ? (y/n)" "y" [[ "${cont,,}" == "y" ]] || die "edit $VAULT_YML manually then rerun PHASE=2" sed -i 's|" "$VAULT_PASS" chmod 0400 "$VAULT_PASS" fi ansible-vault encrypt --vault-password-file "$VAULT_PASS" "$VAULT_YML" ok "encrypted" fi ansible-vault view --vault-password-file "$VAULT_PASS" "$VAULT_YML" >/dev/null \ || { TALAS_HINT="run ./reset-vault.sh to start over" die "cannot decrypt $VAULT_YML"; } ok "decryption verified" mark_done vault } # ============================================================================ # Phase 3 — Forgejo Secrets + Variables # ============================================================================ phase_3_forgejo() { section "Phase 3 — Forgejo Secrets + Variables" _current_phase=forgejo skip_if_done forgejo "Forgejo provisioning" && return 0 require_env FORGEJO_ADMIN_TOKEN \ "create at $FORGEJO_API_URL/-/user/settings/applications (scopes: write:repository + write:package)" local insec=() [[ "${FORGEJO_INSECURE:-0}" == "1" ]] && insec=(-k) info "API reachability (auth-free /version probe)" curl -fsSL "${insec[@]}" --max-time 10 "$FORGEJO_API_URL/api/v1/version" >/dev/null \ || die "Forgejo API unreachable at $FORGEJO_API_URL" ok "reachable" info "repo $FORGEJO_OWNER/$FORGEJO_REPO + token write access" forgejo_api GET "/repos/$FORGEJO_OWNER/$FORGEJO_REPO" >/dev/null \ || die "repo not found or token lacks read:repository" ok "repo + token OK" # Registry token : skip if already set ; else prompt. local _exists=0 forgejo_api GET "/repos/$FORGEJO_OWNER/$FORGEJO_REPO/actions/secrets" 2>/dev/null \ | jq -e '.[]? | select(.name == "FORGEJO_REGISTRY_TOKEN")' >/dev/null \ && _exists=1 if [[ "${FORCE_FORGEJO_REPROMPT:-0}" != "1" ]] && (( _exists == 1 )); then ok "FORGEJO_REGISTRY_TOKEN already set (FORCE_FORGEJO_REPROMPT=1 to replace)" else local rtok="" if [[ -n "${FORGEJO_REGISTRY_TOKEN:-}" ]]; then rtok="$FORGEJO_REGISTRY_TOKEN" else warn "create the token manually at $FORGEJO_API_URL/-/user/settings/applications" warn " → name: veza-deploy-registry, scopes: write:package + read:package" prompt_password rtok "paste the token (input hidden)" fi forgejo_set_secret "$FORGEJO_OWNER" "$FORGEJO_REPO" FORGEJO_REGISTRY_TOKEN "$rtok" fi forgejo_set_secret "$FORGEJO_OWNER" "$FORGEJO_REPO" ANSIBLE_VAULT_PASSWORD "$(cat "$VAULT_PASS")" forgejo_set_var "$FORGEJO_OWNER" "$FORGEJO_REPO" FORGEJO_REGISTRY_URL \ "$FORGEJO_API_URL/api/packages/$FORGEJO_OWNER/generic" mark_done forgejo } # ============================================================================ # Phase 4 — single ansible-playbook bootstrap (no shell SSH plumbing) # ============================================================================ phase_4_ansible() { section "Phase 4 — Ansible bootstrap (runner pipeline + edge HAProxy)" _current_phase=ansible skip_if_done ansible "ansible bootstrap" && return 0 info "ensuring ansible collections (community.general / .postgresql / .rabbitmq)" for col in community.general community.postgresql community.rabbitmq; do ansible-galaxy collection list "$col" 2>/dev/null | grep -q "^$col" \ || ansible-galaxy collection install "$col" >/dev/null \ || die "ansible-galaxy install $col failed" done ok "collections present" require_env FORGEJO_ADMIN_TOKEN # Try to auto-fetch a runner registration token. The /actions/runners/ # registration-token endpoint sometimes hangs or 404s depending on the # Forgejo version + token scope. On failure, fall back to a manual # prompt (operator generates a token in the UI). info "fetching runner registration token from Forgejo" local reg_token if reg_token=$(forgejo_get_runner_token "$FORGEJO_OWNER" "$FORGEJO_REPO"); then ok "got runner registration token (${#reg_token} chars)" else warn "auto-fetch failed (timeout or scope) — falling back to manual prompt" warn "" warn "Generate the token at :" warn " $FORGEJO_API_URL/$FORGEJO_OWNER/$FORGEJO_REPO/settings/actions/runners" warn " → 'Create new runner' → copy the token (looks like a 40-char hex)" warn "" prompt_password reg_token "paste runner registration token (input hidden)" [[ -n "$reg_token" ]] || die "no token provided" fi cd "$REPO_ROOT/infra/ansible" # Detect Incus network from forgejo container (no sudo needed if # operator is in incus-admin group, otherwise skip — Ansible's own # tasks will handle it via the host's incus binary). info "detecting Incus network on R720" local detected_net detected_net=$(ssh "$SSH_TARGET" \ "incus config device get forgejo eth0 network 2>/dev/null" \ | tr -d '[:space:]' || true) [[ -z "$detected_net" || "$detected_net" == "None" ]] && detected_net="net-veza" ok "Incus network : $detected_net" info "running bootstrap_runner.yml + haproxy.yml" info " → ansible will prompt 'BECOME password:' below — type your sudo password ON THE R720" info " (NOT a NOPASSWD-sudo bypass — your password is sent over SSH and never stored)" # Single ansible-playbook invocation runs both playbooks in sequence. # --ask-become-pass prompts ONCE for sudo on the R720 ; that password # is held in memory by ansible and reused for every become: true task # in both playbooks. No NOPASSWD sudo needed. if ! ansible-playbook \ -i inventory/staging.yml \ --vault-password-file .vault-pass \ --ask-become-pass \ -e forgejo_registration_token="$reg_token" \ -e forgejo_api_url="$FORGEJO_API_URL" \ -e veza_incus_network="$detected_net" \ playbooks/bootstrap_runner.yml \ playbooks/haproxy.yml; then TALAS_HINT="check ansible output above ; common: wrong sudo password, port 80 not reachable from Internet (Let's Encrypt HTTP-01)" die "ansible-playbook failed" fi info "verifying Let's Encrypt certs landed" local certs certs=$(ssh "$SSH_TARGET" \ "incus exec veza-haproxy -- ls /usr/local/etc/tls/haproxy/ 2>/dev/null" || true) [[ -n "$certs" ]] \ && ok "certs : $(echo "$certs" | tr '\n' ' ')" \ || warn "no certs found — re-run, or check port 80 reachable from Internet" mark_done ansible } # ============================================================================ # Phase 5 — Summary # ============================================================================ phase_5_summary() { section "Phase 5 — Summary" _current_phase=summary cat >&2 <