diff --git a/scripts/bfg-cleanup.sh b/scripts/bfg-cleanup.sh new file mode 100755 index 000000000..fb57abd40 --- /dev/null +++ b/scripts/bfg-cleanup.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +# ============================================================ +# BFG history cleanup for Veza monorepo +# ============================================================ +# Goal: strip committed audio (.mp3/.wav), certs (.pem/.key/.crt), +# Go binaries, and AI session artefacts from git history, then +# compact .git from ~2.3 GB down to an expected <500 MB. +# +# WHEN TO RUN: after commits 98ee449f4 + 1f00fb762 (untrack debris +# + dev key regen) have been pushed to origin and reviewed. +# +# CHOICE: this script uses `git-filter-repo` (modern, fast, pure +# Python). BFG (Java) is supported as a fallback — set +# USE_BFG=1 to force it. +# +# ============================================================ +# SAFETY MODEL +# ============================================================ +# This script NEVER force-pushes by itself. It: +# 1. Verifies prereqs +# 2. Clones repo as bare mirror to /tmp/veza-bfg.git +# 3. Strips blobs > SIZE_THRESHOLD +# 4. Strips files matching FILE_PATTERNS +# 5. Runs aggressive gc +# 6. Prints size-before / size-after +# 7. Prints the exact force-push commands for YOU to run manually +# +# You verify the bare clone by hand before force-pushing. No surprises. +# +# ============================================================ +# PREREQS +# ============================================================ +# git-filter-repo: pip install --user git-filter-repo +# OR: https://github.com/newren/git-filter-repo +# (fallback) BFG: https://rtyley.github.io/bfg-repo-cleaner/ +# Requires Java 8+. `brew install bfg` or download .jar +# +# ============================================================ + +set -euo pipefail + +# ---------- CONFIG ---------- +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BARE_CLONE="${BARE_CLONE:-/tmp/veza-bfg.git}" +SIZE_THRESHOLD="${SIZE_THRESHOLD:-5M}" +USE_BFG="${USE_BFG:-0}" + +# Files to strip from ALL history (even if they're &2; exit 1; } +section() { echo ""; echo "━━━ $* ━━━"; } + +check_tool() { + if command -v git-filter-repo >/dev/null 2>&1 && [[ "$USE_BFG" != "1" ]]; then + TOOL="filter-repo" + elif command -v bfg >/dev/null 2>&1; then + TOOL="bfg" + elif command -v java >/dev/null 2>&1 && [[ -f "${BFG_JAR:-/usr/local/lib/bfg.jar}" ]]; then + TOOL="bfg-jar" + else + die "Install git-filter-repo (pip install --user git-filter-repo) or BFG (https://rtyley.github.io/bfg-repo-cleaner/)" + fi + echo "Using: $TOOL" +} + +human_size() { + du -sh "$1" 2>/dev/null | awk '{print $1}' +} + +# ---------- SECTION 1: PREREQS ---------- +section "1. Prereqs" +check_tool + +[[ -d "$REPO_ROOT/.git" ]] || die "REPO_ROOT ($REPO_ROOT) is not a git repo" +cd "$REPO_ROOT" + +# Refuse to run if working tree is dirty +if ! git diff-index --quiet HEAD --; then + die "Working tree has uncommitted changes. Commit or stash first." +fi + +CURRENT_BRANCH="$(git branch --show-current)" +echo "Current branch: $CURRENT_BRANCH" +echo "Current .git size: $(human_size .git)" + +read -r -p "Proceed with bare mirror clone to $BARE_CLONE? [y/N] " ANSWER +[[ "$ANSWER" == "y" || "$ANSWER" == "Y" ]] || die "Aborted by user" + +# ---------- SECTION 2: BARE MIRROR CLONE ---------- +section "2. Bare mirror clone" +if [[ -e "$BARE_CLONE" ]]; then + read -r -p "$BARE_CLONE already exists. Delete and recreate? [y/N] " ANSWER + [[ "$ANSWER" == "y" || "$ANSWER" == "Y" ]] || die "Aborted" + rm -rf "$BARE_CLONE" +fi + +git clone --mirror "$REPO_ROOT" "$BARE_CLONE" +BEFORE_SIZE="$(human_size "$BARE_CLONE")" +echo "Bare clone size BEFORE: $BEFORE_SIZE" + +# ---------- SECTION 3: STRIP ---------- +section "3. Strip history" + +cd "$BARE_CLONE" + +if [[ "$TOOL" == "filter-repo" ]]; then + # Strip blobs bigger than threshold + git filter-repo --strip-blobs-bigger-than "$SIZE_THRESHOLD" --force + + # Strip specific path patterns + PATH_ARGS=() + for p in "${FILE_PATTERNS_FILTERREPO[@]}"; do + if [[ "$p" == "!"* ]]; then continue; fi # skip negations for now + PATH_ARGS+=(--path-glob "$p") + done + # filter-repo uses --invert-paths to DELETE matched paths + git filter-repo --invert-paths "${PATH_ARGS[@]}" --force + +elif [[ "$TOOL" == "bfg" ]]; then + # BFG: strip by size + bfg --strip-blobs-bigger-than "$SIZE_THRESHOLD" --no-blob-protection . + + # BFG: strip by filename (no path — matches filename anywhere in history) + for p in "${FILE_PATTERNS_BFG[@]}"; do + bfg --delete-files "$p" --no-blob-protection . + done + +elif [[ "$TOOL" == "bfg-jar" ]]; then + java -jar "${BFG_JAR}" --strip-blobs-bigger-than "$SIZE_THRESHOLD" --no-blob-protection . + for p in "${FILE_PATTERNS_BFG[@]}"; do + java -jar "${BFG_JAR}" --delete-files "$p" --no-blob-protection . + done +fi + +# ---------- SECTION 4: GC ---------- +section "4. Aggressive gc" +git reflog expire --expire=now --all +git gc --prune=now --aggressive + +AFTER_SIZE="$(human_size "$BARE_CLONE")" +echo "" +echo "━━━ RESULT ━━━" +echo "BEFORE: $BEFORE_SIZE" +echo "AFTER: $AFTER_SIZE" +echo "" + +# ---------- SECTION 5: NEXT STEPS ---------- +section "5. Next steps (manual)" +cat < veza + + Or if you trust this machine's local blob state: + cd $REPO_ROOT + git reflog expire --expire=now --all + git gc --prune=now --aggressive + +5. REGENERATE local dev secrets that live outside git: + ./scripts/generate-jwt-keys.sh + ./scripts/generate-ssl-cert.sh + +6. DELETE the bare clone once everything is verified stable: + rm -rf $BARE_CLONE + +MANUAL