174 lines
5.8 KiB
Bash
174 lines
5.8 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# validate-cohort.sh — sanity-check a soft-launch beta cohort CSV
|
||
|
|
# before it gets fed to send-invitations.sh.
|
||
|
|
#
|
||
|
|
# The CSV is the operator's curated list of beta-tester emails +
|
||
|
|
# segmentation. This script catches the avoidable mistakes BEFORE
|
||
|
|
# we batch-insert 100 rows into beta_invites and start spraying
|
||
|
|
# emails :
|
||
|
|
#
|
||
|
|
# - Empty file or wrong header
|
||
|
|
# - Duplicate emails (would create 2 invites for the same person)
|
||
|
|
# - Malformed emails (missing @, leading/trailing whitespace)
|
||
|
|
# - Cohort distribution looks off (no creators, only one segment,
|
||
|
|
# under-50 total — soft-launch acceptance gate is ≥50 testers)
|
||
|
|
# - Email collisions with existing users (already registered = the
|
||
|
|
# invite code is wasted)
|
||
|
|
#
|
||
|
|
# v1.0.10 Cluster 3.4.
|
||
|
|
#
|
||
|
|
# Usage :
|
||
|
|
# bash scripts/soft-launch/validate-cohort.sh path/to/cohort.csv
|
||
|
|
#
|
||
|
|
# Optional env :
|
||
|
|
# DATABASE_URL if set, also checks for collisions with the users
|
||
|
|
# table (email already registered → flagged but not
|
||
|
|
# fatal — operator may want to invite an existing
|
||
|
|
# user back to test the new flows).
|
||
|
|
# MIN_COHORT minimum total rows required (default 50, matches the
|
||
|
|
# acceptance-gate threshold in SOFT_LAUNCH_BETA_2026.md).
|
||
|
|
# MIN_CREATORS minimum number of creator-* cohort rows (default 5).
|
||
|
|
#
|
||
|
|
# Exit codes :
|
||
|
|
# 0 — cohort valid
|
||
|
|
# 1 — cohort malformed (will block send-invitations.sh)
|
||
|
|
# 2 — cohort merely warns (size below minimum, missing collision
|
||
|
|
# check) ; operator may proceed with --force
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
CSV=${1:-}
|
||
|
|
if [ -z "$CSV" ] || [ ! -f "$CSV" ]; then
|
||
|
|
cat >&2 <<EOF
|
||
|
|
usage : bash scripts/soft-launch/validate-cohort.sh path/to/cohort.csv
|
||
|
|
|
||
|
|
CSV format (header required) :
|
||
|
|
email,cohort,sent_by_email
|
||
|
|
alice@example.com,creator-vinyl,ops@veza.fr
|
||
|
|
bob@example.com,listener-jazz,ops@veza.fr
|
||
|
|
...
|
||
|
|
|
||
|
|
cohort labels are free-text but should follow the convention
|
||
|
|
<role>-<segment> so the post-launch attribution report groups cleanly.
|
||
|
|
EOF
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
MIN_COHORT=${MIN_COHORT:-50}
|
||
|
|
MIN_CREATORS=${MIN_CREATORS:-5}
|
||
|
|
|
||
|
|
# 1. Header check.
|
||
|
|
header=$(head -1 "$CSV" | tr -d '\r')
|
||
|
|
if [ "$header" != "email,cohort,sent_by_email" ]; then
|
||
|
|
echo "ERROR: header line must be exactly 'email,cohort,sent_by_email' (got: $header)" >&2
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 2. Row count + duplicates + email shape (awk pipeline reads once).
|
||
|
|
total=0
|
||
|
|
malformed=0
|
||
|
|
duplicates=0
|
||
|
|
declare -A seen
|
||
|
|
declare -A cohort_count
|
||
|
|
declare -a malformed_lines
|
||
|
|
|
||
|
|
while IFS=, read -r email cohort sent_by_email; do
|
||
|
|
email=$(echo "$email" | tr -d '\r' | xargs)
|
||
|
|
cohort=$(echo "$cohort" | tr -d '\r' | xargs)
|
||
|
|
|
||
|
|
total=$(( total + 1 ))
|
||
|
|
|
||
|
|
# Email shape : must contain exactly one @, no whitespace, > 5 chars.
|
||
|
|
if [[ ! "$email" =~ ^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$ ]]; then
|
||
|
|
malformed=$(( malformed + 1 ))
|
||
|
|
malformed_lines+=(" line $(( total + 1 )) : invalid email '$email'")
|
||
|
|
continue
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Duplicate detection.
|
||
|
|
if [ -n "${seen[$email]:-}" ]; then
|
||
|
|
duplicates=$(( duplicates + 1 ))
|
||
|
|
malformed_lines+=(" line $(( total + 1 )) : duplicate email '$email' (first seen at line ${seen[$email]})")
|
||
|
|
continue
|
||
|
|
fi
|
||
|
|
seen[$email]=$(( total + 1 ))
|
||
|
|
|
||
|
|
# Cohort tally.
|
||
|
|
cohort_count[$cohort]=$(( ${cohort_count[$cohort]:-0} + 1 ))
|
||
|
|
done < <(tail -n +2 "$CSV")
|
||
|
|
|
||
|
|
echo "----------------------------------------------------------------"
|
||
|
|
echo "Cohort validation report"
|
||
|
|
echo "----------------------------------------------------------------"
|
||
|
|
echo " CSV file : $CSV"
|
||
|
|
echo " Total rows : $total"
|
||
|
|
echo " Unique emails : ${#seen[@]}"
|
||
|
|
echo " Malformed rows : $malformed"
|
||
|
|
echo " Duplicates : $duplicates"
|
||
|
|
echo ""
|
||
|
|
echo "Distribution by cohort :"
|
||
|
|
for c in "${!cohort_count[@]}"; do
|
||
|
|
printf " %-40s %d\n" "$c" "${cohort_count[$c]}"
|
||
|
|
done | sort
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
exit_code=0
|
||
|
|
|
||
|
|
# 3. Hard checks (block send).
|
||
|
|
if [ "$malformed" -gt 0 ] || [ "$duplicates" -gt 0 ]; then
|
||
|
|
echo "ERROR: $malformed malformed + $duplicates duplicate row(s) — fix before sending."
|
||
|
|
for line in "${malformed_lines[@]}"; do
|
||
|
|
echo "$line"
|
||
|
|
done
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 4. Soft checks (warn, don't block — operator decides).
|
||
|
|
if [ "$total" -lt "$MIN_COHORT" ]; then
|
||
|
|
echo "WARN : cohort has $total rows ; soft-launch acceptance gate is ≥ $MIN_COHORT."
|
||
|
|
exit_code=2
|
||
|
|
fi
|
||
|
|
|
||
|
|
creator_total=0
|
||
|
|
for c in "${!cohort_count[@]}"; do
|
||
|
|
if [[ "$c" == creator-* ]]; then
|
||
|
|
creator_total=$(( creator_total + cohort_count[$c] ))
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
if [ "$creator_total" -lt "$MIN_CREATORS" ]; then
|
||
|
|
echo "WARN : only $creator_total creator-* cohort rows ; goal is ≥ $MIN_CREATORS for upload-flow coverage."
|
||
|
|
exit_code=2
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [ "${#cohort_count[@]}" -lt 3 ]; then
|
||
|
|
echo "WARN : only ${#cohort_count[@]} distinct cohort labels — feedback will be narrow."
|
||
|
|
exit_code=2
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 5. Optional : DATABASE_URL collision check.
|
||
|
|
if [ -n "${DATABASE_URL:-}" ]; then
|
||
|
|
command -v psql >/dev/null 2>&1 || {
|
||
|
|
echo "WARN : DATABASE_URL set but psql not on \$PATH ; skipping collision check."
|
||
|
|
exit_code=2
|
||
|
|
}
|
||
|
|
if command -v psql >/dev/null 2>&1; then
|
||
|
|
emails_csv=$(printf '%s,' "${!seen[@]}" | sed 's/,$//')
|
||
|
|
collisions=$(psql "$DATABASE_URL" -A -t -c "
|
||
|
|
SELECT count(*) FROM users WHERE email = ANY(string_to_array('$emails_csv', ','));
|
||
|
|
" 2>/dev/null | tr -d ' ' || echo "?")
|
||
|
|
if [ "$collisions" = "?" ]; then
|
||
|
|
echo "WARN : couldn't query users table (psql connection issue) ; skipping collision check."
|
||
|
|
exit_code=2
|
||
|
|
elif [ "$collisions" -gt 0 ]; then
|
||
|
|
echo "INFO : $collisions email(s) in the cohort already exist in the users table — invite codes will be wasted on existing accounts."
|
||
|
|
exit_code=2
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
case $exit_code in
|
||
|
|
0) echo "PASS : cohort valid, ready for send-invitations.sh." ;;
|
||
|
|
2) echo "WARN : cohort valid but with caveats — review and re-run with --force from send-invitations.sh if intentional." ;;
|
||
|
|
esac
|
||
|
|
exit $exit_code
|