#!/usr/bin/env bash # bootstrap-remote.sh — runs ON the R720, invoked over SSH by # bootstrap-local.sh. Idempotent ; resumable via PHASE env var. # # Inputs (from SSH-passed env vars) : # FORGEJO_REGISTRATION_TOKEN short-lived token to register runner # FORGEJO_API_URL default: https://forgejo.talas.group # # Each phase logs to /var/log/talas-bootstrap.log AND emits structured # >>>PHASE::<<< markers on stdout for the local script. # lib.sh is concatenated upstream by bootstrap-local before this file is # piped to bash. When run standalone, source it manually. if ! declare -F info >/dev/null 2>&1; then SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=lib.sh . "$SCRIPT_DIR/lib.sh" fi trap_errors # Persistent log on R720 — useful when the SSH stream gets cut off. exec > >(tee -a /var/log/talas-bootstrap.log) 2>&1 : "${FORGEJO_API_URL:=https://forgejo.talas.group}" # ============================================================================ # Phase R1 — Incus profiles # ============================================================================ remote_phase_1_profiles() { section "R1 — Incus profiles (veza-app, veza-data)" _current_phase=r1_profiles phase r1_profiles START if skip_if_done r1_profiles "incus profiles"; then phase r1_profiles DONE; return 0 fi # Two profiles only — `veza-app` for app/edge containers, `veza-data` # for the persistent data tier. Both empty by default (the operator # adds resource limits / AppArmor rules later). The network device # is NOT attached here ; playbooks pass `--network ` at launch # so the caller controls which bridge the container lands on. # An older revision created a `veza-net` profile too — drop it if # it's there from a previous bootstrap, since it's redundant with # the explicit --network flag. for p in veza-app veza-data; do if incus profile show "$p" >/dev/null 2>&1; then ok "profile $p already exists" else incus profile create "$p" ok "profile $p created (empty — operator may add limits later)" fi done if incus profile show veza-net >/dev/null 2>&1; then if [[ "$(incus profile device list veza-net 2>/dev/null | wc -l)" -eq 0 ]]; then warn "found legacy empty profile 'veza-net' — removing (network is set via --network on launch)" incus profile delete veza-net 2>/dev/null || true else warn "legacy 'veza-net' profile has devices attached — leaving alone" fi fi mark_done r1_profiles phase r1_profiles DONE } # ============================================================================ # Phase R2 — mount Incus socket into forgejo-runner container # ============================================================================ remote_phase_2_runner_socket() { section "R2 — mount /var/lib/incus/unix.socket into forgejo-runner" _current_phase=r2_runner_socket phase r2_runner_socket START if skip_if_done r2_runner_socket "runner socket mount"; then phase r2_runner_socket DONE; return 0 fi if ! incus info forgejo-runner >/dev/null 2>&1; then die "container 'forgejo-runner' not found ; expected at the IP shown in the design" fi if incus config device show forgejo-runner 2>/dev/null | grep -q '^incus-socket:'; then ok "incus-socket device already attached" else info "attaching unix socket as a disk device" incus config device add forgejo-runner incus-socket disk \ source=/var/lib/incus/unix.socket \ path=/var/lib/incus/unix.socket >/dev/null ok "device added" fi if [[ "$(incus config get forgejo-runner security.nesting)" != "true" ]]; then info "enabling security.nesting" incus config set forgejo-runner security.nesting=true ok "nesting=true ; restart required" info "restarting forgejo-runner container" incus restart forgejo-runner sleep 3 fi info "ensuring incus client binary is in the runner" if incus exec forgejo-runner -- command -v incus >/dev/null 2>&1; then ok "incus already in runner" elif [[ -x /usr/bin/incus ]]; then # Push the host's binary into the container — avoids apt repo # issues (Debian 13 doesn't ship incus-client as a separate # package, and the full `incus` package would also pull in the # daemon which we don't want in a runner container). info "pushing /usr/bin/incus from host into runner:/usr/local/bin/incus" incus file push /usr/bin/incus forgejo-runner/usr/local/bin/incus --mode 0755 ok "incus binary pushed" else die "no /usr/bin/incus on host AND none in runner — install incus on the host first" fi info "smoke-test : runner can incus list" if incus exec forgejo-runner -- incus list >/dev/null 2>&1; then ok "runner has Incus access" else # Common cause : the runner's process can read /var/lib/incus/ # unix.socket only if it has the right gid. The socket is owned # root:incus-admin (or equivalent) on the host. Inside the # container we either run as root (works) or need to add the # runner user to a group with the same gid as host's incus-admin. # We don't try to fix that here — it's runner-process-specific. warn "runner cannot incus list as default user" warn "this may be normal if the systemd unit runs as root inside" warn "the container ; if not, add the runner user to a group with" warn "the same gid as the host's incus-admin group" fi mark_done r2_runner_socket phase r2_runner_socket DONE } # ============================================================================ # Phase R3 — runner label = 'incus' # ============================================================================ remote_phase_3_runner_labels() { section "R3 — forgejo-runner labelled 'incus,self-hosted'" _current_phase=r3_runner_labels phase r3_runner_labels START if skip_if_done r3_runner_labels "runner labels"; then phase r3_runner_labels DONE; return 0 fi require_env FORGEJO_REGISTRATION_TOKEN \ "set on the SSH command-line by bootstrap-local.sh" # Find the runner config inside the container. Path varies by install # method ; act_runner default is /etc/forgejo-runner/.runner. local runner_cfg runner_cfg=$(incus exec forgejo-runner -- bash -c ' for f in /etc/forgejo-runner/.runner /var/lib/forgejo-runner/.runner /opt/forgejo-runner/.runner; do [[ -f "$f" ]] && echo "$f" && exit 0 done exit 1 ' 2>/dev/null) || true local labels="" if [[ -n "$runner_cfg" ]]; then labels=$(incus exec forgejo-runner -- jq -r '.labels[]?' "$runner_cfg" 2>/dev/null \ || incus exec forgejo-runner -- grep -oE '"labels":\[[^]]+' "$runner_cfg" 2>/dev/null \ || echo "") fi if echo "$labels" | grep -qw incus; then ok "runner already has 'incus' label" mark_done r3_runner_labels phase r3_runner_labels DONE return 0 fi info "re-registering runner with labels incus,self-hosted" # Stop systemd unit, wipe old registration, re-register, start. incus exec forgejo-runner -- systemctl stop forgejo-runner.service 2>/dev/null \ || incus exec forgejo-runner -- systemctl stop act_runner.service 2>/dev/null \ || warn "no systemd unit to stop ; will skip" [[ -n "$runner_cfg" ]] && incus exec forgejo-runner -- rm -f "$runner_cfg" # Detect runner binary name local runner_bin runner_bin=$(incus exec forgejo-runner -- bash -c ' for b in forgejo-runner act_runner; do command -v "$b" >/dev/null 2>&1 && echo "$b" && exit 0 done exit 1 ' 2>/dev/null) || die "no forgejo-runner / act_runner binary found in container" incus exec forgejo-runner -- "$runner_bin" register \ --no-interactive \ --instance "$FORGEJO_API_URL" \ --token "$FORGEJO_REGISTRATION_TOKEN" \ --name "r720-incus" \ --labels "incus,self-hosted" incus exec forgejo-runner -- systemctl start "$runner_bin.service" \ || incus exec forgejo-runner -- systemctl start forgejo-runner.service ok "runner re-registered with incus label" mark_done r3_runner_labels phase r3_runner_labels DONE } # ============================================================================ # Phase R4 — sanity, summary # ============================================================================ remote_phase_4_sanity() { section "R4 — sanity check" _current_phase=r4_sanity phase r4_sanity START info "incus profiles :" incus profile list -f csv | grep -E '^veza-' | awk -F, '{print " " $1}' info "forgejo-runner status :" incus exec forgejo-runner -- systemctl is-active forgejo-runner.service 2>/dev/null \ || incus exec forgejo-runner -- systemctl is-active act_runner.service 2>/dev/null \ || warn "no active runner service — verify manually" info "forgejo container reachable from runner :" if incus exec forgejo-runner -- curl -sSf -o /dev/null --max-time 5 \ "$FORGEJO_API_URL" 2>/dev/null \ || incus exec forgejo-runner -- curl -sSf -ko /dev/null --max-time 5 \ https://10.0.20.105:3000/ 2>/dev/null \ || incus exec forgejo-runner -- curl -sSf -o /dev/null --max-time 5 \ http://10.0.20.105:3000/ 2>/dev/null; then ok "runner can reach Forgejo" else warn "runner cannot reach Forgejo — check WireGuard / DNS / firewall" fi mark_done r4_sanity phase r4_sanity DONE } main() { local start=${PHASE:-1} info "remote bootstrap starting at phase $start (log: /var/log/talas-bootstrap.log)" [[ $start -le 1 ]] && remote_phase_1_profiles [[ $start -le 2 ]] && remote_phase_2_runner_socket [[ $start -le 3 ]] && remote_phase_3_runner_labels [[ $start -le 4 ]] && remote_phase_4_sanity ok "remote bootstrap done" } main "$@"