veza/scripts/minio-migrate-from-single.sh

#!/usr/bin/env bash
# minio-migrate-from-single.sh — copy objects from the single-node
# MinIO bucket (used through v1.0.8) into the new distributed cluster.
#
# v1.0.9 W3 Day 12. The migration is one-way and idempotent —
# `mc mirror --preserve` skips objects that already exist on the
# destination with the same etag, so re-runs are safe + cheap.
#
# Required env :
#   SOURCE_ENDPOINT      e.g. http://veza.fr:19000
#   SOURCE_ACCESS_KEY    minio root user on the old cluster
#   SOURCE_SECRET_KEY    minio root password on the old cluster
#   SOURCE_BUCKET        e.g. veza-files (the dev bucket name)
#   DEST_ENDPOINT        e.g. http://minio-1.lxd:9000
#   DEST_ACCESS_KEY      root user on the new distributed cluster
#   DEST_SECRET_KEY      root password on the new distributed cluster
#   DEST_BUCKET          e.g. veza-prod-tracks
#
# Optional :
#   DRY_RUN=1            print what would be copied, don't actually copy
#
# Exit codes :
#   0  — mirror complete + counts match
#   1  — required env missing
#   2  — source or dest cluster unreachable
#   3  — count mismatch after mirror (something silently dropped)
set -euo pipefail

require_env() {
  local v=$1
  if [ -z "${!v:-}" ]; then
    echo "FAIL: required env var $v is not set" >&2
    exit 1
  fi
}

require_env SOURCE_ENDPOINT
require_env SOURCE_ACCESS_KEY
require_env SOURCE_SECRET_KEY
require_env SOURCE_BUCKET
require_env DEST_ENDPOINT
require_env DEST_ACCESS_KEY
require_env DEST_SECRET_KEY
require_env DEST_BUCKET

if ! command -v mc >/dev/null 2>&1; then
  echo "FAIL: mc (MinIO client) not in PATH. Install with:" >&2
  echo "      curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc && chmod +x /usr/local/bin/mc" >&2
  exit 1
fi

DRY_RUN=${DRY_RUN:-0}

log() { printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; }

log "Setting mc aliases"
mc alias set veza-source "$SOURCE_ENDPOINT" "$SOURCE_ACCESS_KEY" "$SOURCE_SECRET_KEY" >/dev/null
mc alias set veza-dest   "$DEST_ENDPOINT"   "$DEST_ACCESS_KEY"   "$DEST_SECRET_KEY"   >/dev/null

# Sanity — both endpoints reachable.
if ! mc admin info veza-source >/dev/null 2>&1; then
  echo "FAIL: source cluster $SOURCE_ENDPOINT not reachable" >&2
  exit 2
fi
if ! mc admin info veza-dest >/dev/null 2>&1; then
  echo "FAIL: destination cluster $DEST_ENDPOINT not reachable" >&2
  exit 2
fi

log "Source bucket count :"
src_count=$(mc ls --recursive "veza-source/$SOURCE_BUCKET" 2>/dev/null | wc -l)
log "  $src_count objects in $SOURCE_BUCKET"

log "Destination bucket count (before) :"
dest_count_before=$(mc ls --recursive "veza-dest/$DEST_BUCKET" 2>/dev/null | wc -l || echo 0)
log "  $dest_count_before objects in $DEST_BUCKET"

if [ "$DRY_RUN" = "1" ]; then
  log "DRY_RUN=1 — running mirror with --dry-run flag"
  mc mirror --preserve --dry-run "veza-source/$SOURCE_BUCKET" "veza-dest/$DEST_BUCKET"
  exit 0
fi

log "Mirroring (this will take time proportional to bucket size)"
mc mirror --preserve "veza-source/$SOURCE_BUCKET" "veza-dest/$DEST_BUCKET"

log "Verifying object count after mirror"
dest_count_after=$(mc ls --recursive "veza-dest/$DEST_BUCKET" 2>/dev/null | wc -l)
log "  $dest_count_after objects in $DEST_BUCKET (was $dest_count_before before)"

if [ "$dest_count_after" -lt "$src_count" ]; then
  echo "FAIL: destination has fewer objects than source ($dest_count_after < $src_count). Mirror is incomplete." >&2
  exit 3
fi

log "PASS: mirror complete. Object counts match (src=$src_count dest=$dest_count_after)."
log ""
log "Next steps :"
log "  1. Update backend .env on every API host :"
log "       AWS_S3_ENDPOINT=$DEST_ENDPOINT"
log "       AWS_S3_BUCKET=$DEST_BUCKET"
log "       AWS_ACCESS_KEY_ID=$DEST_ACCESS_KEY"
log "       AWS_SECRET_ACCESS_KEY=<destination secret>"
log "  2. Rolling restart of the API tier."
log "  3. Smoke-test : POST /api/v1/tracks (chunked upload), GET /tracks/:id/stream."
log "  4. Keep the old cluster hot for ~ 1 week before decommissioning."
feat(infra): MinIO distributed EC:2 + migration script (W3 Day 12) Four-node distributed MinIO cluster, single erasure set EC:2, tolerates 2 simultaneous node losses. 50% storage efficiency. Pinned to RELEASE.2025-09-07T16-13-09Z to match docker-compose so dev/prod parity is preserved. - infra/ansible/roles/minio_distributed/ : install pinned binary, systemd unit pointed at MINIO_VOLUMES with bracket-expansion form, EC:2 forced via MINIO_STORAGE_CLASS_STANDARD. Vault assertion blocks shipping placeholder credentials to staging/prod. - bucket init : creates veza-prod-tracks, enables versioning, applies lifecycle.json (30d noncurrent expiry + 7d abort-multipart). Cold-tier transition ready but inert until minio_remote_tier_name is set. - infra/ansible/playbooks/minio_distributed.yml : provisions the 4 containers, applies common baseline + role. - infra/ansible/inventory/lab.yml : new minio_nodes group. - infra/ansible/tests/test_minio_resilience.sh : kill 2 nodes, verify EC:2 reconstruction (read OK + checksum matches), restart, wait for self-heal. - scripts/minio-migrate-from-single.sh : mc mirror --preserve from the single-node bucket to the new cluster, count-verifies, prints rollout next-steps. - config/prometheus/alert_rules.yml : MinIODriveOffline (warn) + MinIONodesUnreachable (page) — page fires at >= 2 nodes unreachable because that's the redundancy ceiling for EC:2. - docs/ENV_VARIABLES.md §12 : MinIO migration cross-ref. Acceptance (Day 12) : EC:2 survives 2 concurrent kills + self-heals. Lab apply pending. No backend code change — interface stays AWS S3. W3 progress : Redis Sentinel ✓ (Day 11), MinIO distribué ✓ (this), CDN ⏳ Day 13, DMCA ⏳ Day 14, embed ⏳ Day 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-28 11:46:42 +00:00			`#!/usr/bin/env bash`
			`# minio-migrate-from-single.sh — copy objects from the single-node`
			`# MinIO bucket (used through v1.0.8) into the new distributed cluster.`
			`#`
			`# v1.0.9 W3 Day 12. The migration is one-way and idempotent —`
			# `mc mirror --preserve` skips objects that already exist on the
			`# destination with the same etag, so re-runs are safe + cheap.`
			`#`
			`# Required env :`
			`# SOURCE_ENDPOINT e.g. http://veza.fr:19000`
			`# SOURCE_ACCESS_KEY minio root user on the old cluster`
			`# SOURCE_SECRET_KEY minio root password on the old cluster`
			`# SOURCE_BUCKET e.g. veza-files (the dev bucket name)`
			`# DEST_ENDPOINT e.g. http://minio-1.lxd:9000`
			`# DEST_ACCESS_KEY root user on the new distributed cluster`
			`# DEST_SECRET_KEY root password on the new distributed cluster`
			`# DEST_BUCKET e.g. veza-prod-tracks`
			`#`
			`# Optional :`
			`# DRY_RUN=1 print what would be copied, don't actually copy`
			`#`
			`# Exit codes :`
			`# 0 — mirror complete + counts match`
			`# 1 — required env missing`
			`# 2 — source or dest cluster unreachable`
			`# 3 — count mismatch after mirror (something silently dropped)`
			`set -euo pipefail`

			`require_env() {`
			`local v=$1`
			`if [ -z "${!v:-}" ]; then`
			`echo "FAIL: required env var $v is not set" >&2`
			`exit 1`
			`fi`
			`}`

			`require_env SOURCE_ENDPOINT`
			`require_env SOURCE_ACCESS_KEY`
			`require_env SOURCE_SECRET_KEY`
			`require_env SOURCE_BUCKET`
			`require_env DEST_ENDPOINT`
			`require_env DEST_ACCESS_KEY`
			`require_env DEST_SECRET_KEY`
			`require_env DEST_BUCKET`

			`if ! command -v mc >/dev/null 2>&1; then`
			`echo "FAIL: mc (MinIO client) not in PATH. Install with:" >&2`
			`echo " curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc && chmod +x /usr/local/bin/mc" >&2`
			`exit 1`
			`fi`

			`DRY_RUN=${DRY_RUN:-0}`

			`log() { printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; }`

			`log "Setting mc aliases"`
			`mc alias set veza-source "$SOURCE_ENDPOINT" "$SOURCE_ACCESS_KEY" "$SOURCE_SECRET_KEY" >/dev/null`
			`mc alias set veza-dest "$DEST_ENDPOINT" "$DEST_ACCESS_KEY" "$DEST_SECRET_KEY" >/dev/null`

			`# Sanity — both endpoints reachable.`
			`if ! mc admin info veza-source >/dev/null 2>&1; then`
			`echo "FAIL: source cluster $SOURCE_ENDPOINT not reachable" >&2`
			`exit 2`
			`fi`
			`if ! mc admin info veza-dest >/dev/null 2>&1; then`
			`echo "FAIL: destination cluster $DEST_ENDPOINT not reachable" >&2`
			`exit 2`
			`fi`

			`log "Source bucket count :"`
			`src_count=$(mc ls --recursive "veza-source/$SOURCE_BUCKET" 2>/dev/null \| wc -l)`
			`log " $src_count objects in $SOURCE_BUCKET"`

			`log "Destination bucket count (before) :"`
			`dest_count_before=$(mc ls --recursive "veza-dest/$DEST_BUCKET" 2>/dev/null \| wc -l \|\| echo 0)`
			`log " $dest_count_before objects in $DEST_BUCKET"`

			`if [ "$DRY_RUN" = "1" ]; then`
			`log "DRY_RUN=1 — running mirror with --dry-run flag"`
			`mc mirror --preserve --dry-run "veza-source/$SOURCE_BUCKET" "veza-dest/$DEST_BUCKET"`
			`exit 0`
			`fi`

			`log "Mirroring (this will take time proportional to bucket size)"`
			`mc mirror --preserve "veza-source/$SOURCE_BUCKET" "veza-dest/$DEST_BUCKET"`

			`log "Verifying object count after mirror"`
			`dest_count_after=$(mc ls --recursive "veza-dest/$DEST_BUCKET" 2>/dev/null \| wc -l)`
			`log " $dest_count_after objects in $DEST_BUCKET (was $dest_count_before before)"`

			`if [ "$dest_count_after" -lt "$src_count" ]; then`
			`echo "FAIL: destination has fewer objects than source ($dest_count_after < $src_count). Mirror is incomplete." >&2`
			`exit 3`
			`fi`

			`log "PASS: mirror complete. Object counts match (src=$src_count dest=$dest_count_after)."`
			`log ""`
			`log "Next steps :"`
			`log " 1. Update backend .env on every API host :"`
			`log " AWS_S3_ENDPOINT=$DEST_ENDPOINT"`
			`log " AWS_S3_BUCKET=$DEST_BUCKET"`
			`log " AWS_ACCESS_KEY_ID=$DEST_ACCESS_KEY"`
			`log " AWS_SECRET_ACCESS_KEY=<destination secret>"`
			`log " 2. Rolling restart of the API tier."`
			`log " 3. Smoke-test : POST /api/v1/tracks (chunked upload), GET /tracks/:id/stream."`
			`log " 4. Keep the old cluster hot for ~ 1 week before decommissioning."`