#!/usr/bin/env bash # ============================================================ # BFG history cleanup for Veza monorepo # ============================================================ # Goal: strip committed audio (.mp3/.wav), certs (.pem/.key/.crt), # Go binaries, and AI session artefacts from git history, then # compact .git from ~2.3 GB down to an expected <500 MB. # # WHEN TO RUN: after commits 98ee449f4 + 1f00fb762 (untrack debris # + dev key regen) have been pushed to origin and reviewed. # # CHOICE: this script uses `git-filter-repo` (modern, fast, pure # Python). BFG (Java) is supported as a fallback — set # USE_BFG=1 to force it. # # ============================================================ # SAFETY MODEL # ============================================================ # This script NEVER force-pushes by itself. It: # 1. Verifies prereqs # 2. Clones repo as bare mirror to /tmp/veza-bfg.git # 3. Strips blobs > SIZE_THRESHOLD # 4. Strips files matching FILE_PATTERNS # 5. Runs aggressive gc # 6. Prints size-before / size-after # 7. Prints the exact force-push commands for YOU to run manually # # You verify the bare clone by hand before force-pushing. No surprises. # # ============================================================ # PREREQS # ============================================================ # git-filter-repo: pip install --user git-filter-repo # OR: https://github.com/newren/git-filter-repo # (fallback) BFG: https://rtyley.github.io/bfg-repo-cleaner/ # Requires Java 8+. `brew install bfg` or download .jar # # ============================================================ set -euo pipefail # ---------- CONFIG ---------- REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" BARE_CLONE="${BARE_CLONE:-/tmp/veza-bfg.git}" SIZE_THRESHOLD="${SIZE_THRESHOLD:-5M}" USE_BFG="${USE_BFG:-0}" # Files to strip from ALL history (even if they're &2; exit 1; } section() { echo ""; echo "━━━ $* ━━━"; } check_tool() { if command -v git-filter-repo >/dev/null 2>&1 && [[ "$USE_BFG" != "1" ]]; then TOOL="filter-repo" elif command -v bfg >/dev/null 2>&1; then TOOL="bfg" elif command -v java >/dev/null 2>&1 && [[ -f "${BFG_JAR:-/usr/local/lib/bfg.jar}" ]]; then TOOL="bfg-jar" else die "Install git-filter-repo (pip install --user git-filter-repo) or BFG (https://rtyley.github.io/bfg-repo-cleaner/)" fi echo "Using: $TOOL" } human_size() { du -sh "$1" 2>/dev/null | awk '{print $1}' } # ---------- SECTION 1: PREREQS ---------- section "1. Prereqs" check_tool [[ -d "$REPO_ROOT/.git" ]] || die "REPO_ROOT ($REPO_ROOT) is not a git repo" cd "$REPO_ROOT" # Refuse to run if working tree is dirty if ! git diff-index --quiet HEAD --; then die "Working tree has uncommitted changes. Commit or stash first." fi CURRENT_BRANCH="$(git branch --show-current)" echo "Current branch: $CURRENT_BRANCH" echo "Current .git size: $(human_size .git)" read -r -p "Proceed with bare mirror clone to $BARE_CLONE? [y/N] " ANSWER [[ "$ANSWER" == "y" || "$ANSWER" == "Y" ]] || die "Aborted by user" # ---------- SECTION 2: BARE MIRROR CLONE ---------- section "2. Bare mirror clone" if [[ -e "$BARE_CLONE" ]]; then read -r -p "$BARE_CLONE already exists. Delete and recreate? [y/N] " ANSWER [[ "$ANSWER" == "y" || "$ANSWER" == "Y" ]] || die "Aborted" rm -rf "$BARE_CLONE" fi git clone --mirror "$REPO_ROOT" "$BARE_CLONE" BEFORE_SIZE="$(human_size "$BARE_CLONE")" echo "Bare clone size BEFORE: $BEFORE_SIZE" # ---------- SECTION 3: STRIP ---------- section "3. Strip history" cd "$BARE_CLONE" if [[ "$TOOL" == "filter-repo" ]]; then # Strip blobs bigger than threshold git filter-repo --strip-blobs-bigger-than "$SIZE_THRESHOLD" --force # Strip specific path patterns PATH_ARGS=() for p in "${FILE_PATTERNS_FILTERREPO[@]}"; do if [[ "$p" == "!"* ]]; then continue; fi # skip negations for now PATH_ARGS+=(--path-glob "$p") done # filter-repo uses --invert-paths to DELETE matched paths git filter-repo --invert-paths "${PATH_ARGS[@]}" --force elif [[ "$TOOL" == "bfg" ]]; then # BFG: strip by size bfg --strip-blobs-bigger-than "$SIZE_THRESHOLD" --no-blob-protection . # BFG: strip by filename (no path — matches filename anywhere in history) for p in "${FILE_PATTERNS_BFG[@]}"; do bfg --delete-files "$p" --no-blob-protection . done elif [[ "$TOOL" == "bfg-jar" ]]; then java -jar "${BFG_JAR}" --strip-blobs-bigger-than "$SIZE_THRESHOLD" --no-blob-protection . for p in "${FILE_PATTERNS_BFG[@]}"; do java -jar "${BFG_JAR}" --delete-files "$p" --no-blob-protection . done fi # ---------- SECTION 4: GC ---------- section "4. Aggressive gc" git reflog expire --expire=now --all git gc --prune=now --aggressive AFTER_SIZE="$(human_size "$BARE_CLONE")" echo "" echo "━━━ RESULT ━━━" echo "BEFORE: $BEFORE_SIZE" echo "AFTER: $AFTER_SIZE" echo "" # ---------- SECTION 5: NEXT STEPS ---------- section "5. Next steps (manual)" cat < veza Or if you trust this machine's local blob state: cd $REPO_ROOT git reflog expire --expire=now --all git gc --prune=now --aggressive 5. REGENERATE local dev secrets that live outside git: ./scripts/generate-jwt-keys.sh ./scripts/generate-ssl-cert.sh 6. DELETE the bare clone once everything is verified stable: rm -rf $BARE_CLONE MANUAL