veza/veza-backend-api/cmd/tools/seed/seed_analytics.go
senke 2eff5a9b10 refactor(backend): split seed tool into domain-specific modules
Extract monolithic seed main.go into separate files per domain:
users, tracks, playlists, chat, analytics, marketplace, social,
content, live, moderation, notifications, and misc. Add config,
fake data helpers, and utility modules. Update Makefile targets.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 23:35:07 +01:00

211 lines
7.9 KiB
Go

package main
import (
"database/sql"
"fmt"
"time"
)
// SeedAnalytics creates track_plays, playback_history, daily_track_stats,
// geographic_play_stats, and analytics_events.
func SeedAnalytics(db *sql.DB, cfg Config, users []SeededUser, tracks []SeededTrack) error {
fmt.Println("\n═══ ANALYTICS ═══")
if len(tracks) == 0 || len(users) == 0 {
return nil
}
// ── 1. Track plays (the big one) ─────────────────────────────────────────
p := NewProgress("track_plays", cfg.PlayEvents)
playRows := make([][]interface{}, 0, cfg.PlayEvents)
monthsBack := cfg.AnalyticsMonths
startDate := time.Now().AddDate(0, -monthsBack, 0)
for i := 0; i < cfg.PlayEvents; i++ {
user := users[rng.Intn(len(users))]
// Power-law: popular tracks get way more plays
track := tracks[PowerLaw(0, len(tracks)-1, 1.5)]
playedAt := RandomTimeBetween(startDate, time.Now())
playedAt = RealisticHour(playedAt)
// Duration: 30-100% of track (most listen to 60%+)
pctListened := randInt(30, 100)
duration := track.Duration * pctListened / 100
if duration < 1 {
duration = 1
}
playRows = append(playRows, []interface{}{
newUUID(), track.ID, user.ID, duration,
playedAt, nil, nil, GenUserAgent(),
GenSource(), GenCountry(),
playedAt, playedAt, nil,
})
}
_, err := BulkInsert(db, "track_plays",
"id, track_id, user_id, duration, played_at, device, ip_address, user_agent, source, country_code, created_at, updated_at, deleted_at",
playRows)
if err != nil {
return fmt.Errorf("insert track_plays: %w", err)
}
p.Update(cfg.PlayEvents)
p.Done()
// ── 2. Playback history ──────────────────────────────────────────────────
historyCount := cfg.PlayEvents / 5 // 20% of plays recorded in history
p = NewProgress("playback_history", historyCount)
historyRows := make([][]interface{}, 0, historyCount)
for i := 0; i < historyCount; i++ {
user := users[rng.Intn(len(users))]
track := tracks[rng.Intn(len(tracks))]
playedAt := RandomTimeBetween(startDate, time.Now())
duration := randInt(30, track.Duration)
completion := 0
if track.Duration > 0 {
completion = duration * 100 / track.Duration
}
if completion > 100 {
completion = 100
}
source := GenSource()
device := pick([]string{"desktop", "mobile", "tablet"})
historyRows = append(historyRows, []interface{}{
newUUID(), user.ID, track.ID,
duration, completion, source, nil, device, playedAt,
})
}
_, err = BulkInsert(db, "playback_history",
"id, user_id, track_id, played_duration, completion_percentage, source, source_id, device_type, played_at",
historyRows)
if err != nil {
return fmt.Errorf("insert playback_history: %w", err)
}
p.Update(historyCount)
p.Done()
// ── 3. Daily track stats ─────────────────────────────────────────────────
// Top 200 tracks, 180 days of stats
topTrackCount := len(tracks) / 5
if topTrackCount > 200 {
topTrackCount = 200
}
days := monthsBack * 30
statsCount := topTrackCount * days
p = NewProgress("daily_track_stats", statsCount)
statsRows := make([][]interface{}, 0, statsCount)
for ti := 0; ti < topTrackCount; ti++ {
track := tracks[ti]
for d := 0; d < days; d++ {
date := time.Now().AddDate(0, 0, -d).Format("2006-01-02")
// Growth curve: more plays for newer days
dayMultiplier := float64(days-d) / float64(days)
basePlays := PowerLaw(0, 50, 1.0)
totalPlays := int(float64(basePlays) * (0.5 + dayMultiplier))
if totalPlays < 0 {
totalPlays = 0
}
uniqueListeners := totalPlays * randInt(50, 90) / 100
if uniqueListeners < 0 {
uniqueListeners = 0
}
completeListens := uniqueListeners * randInt(40, 80) / 100
totalPlayTime := totalPlays * track.Duration * randInt(60, 100) / 100
avgCompletion := randFloat(0.4, 0.95)
statsRows = append(statsRows, []interface{}{
track.ID, date, totalPlays, uniqueListeners,
completeListens, totalPlayTime, avgCompletion,
})
}
}
_, err = BulkInsertRaw(db, "daily_track_stats",
"track_id, date, total_plays, unique_listeners, complete_listens, total_play_time, avg_completion_rate",
statsRows, "ON CONFLICT (track_id, date) DO NOTHING")
if err != nil {
return fmt.Errorf("insert daily_track_stats: %w", err)
}
p.Update(len(statsRows))
p.Done()
// ── 4. Geographic play stats ─────────────────────────────────────────────
geoCount := topTrackCount * 10 // ~10 countries per top track
p = NewProgress("geographic_play_stats", geoCount)
geoRows := make([][]interface{}, 0, geoCount)
for ti := 0; ti < topTrackCount; ti++ {
track := tracks[ti]
countries := pickN(countryCodes, randInt(3, 10))
for _, cc := range countries {
date := time.Now().AddDate(0, 0, -randInt(0, days)).Format("2006-01-02")
geoRows = append(geoRows, []interface{}{
newUUID(), track.ID, cc, "", date,
int64(randInt(1, 5000)), int64(randInt(1, 3000)),
time.Now(), time.Now(),
})
}
}
_, _ = BulkInsert(db, "geographic_play_stats",
"id, track_id, country_code, region, date, play_count, unique_listeners, created_at, updated_at",
geoRows)
p.Update(len(geoRows))
p.Done()
// ── 5. Analytics events ──────────────────────────────────────────────────
eventCount := cfg.PlayEvents / 4
p = NewProgress("analytics_events", eventCount)
eventRows := make([][]interface{}, 0, eventCount)
eventTypes := []string{
"page_view", "track_play", "search", "playlist_create",
"follow", "signup", "login", "track_upload", "profile_view",
"playlist_view", "marketplace_view", "product_view",
}
for i := 0; i < eventCount; i++ {
user := users[rng.Intn(len(users))]
evt := pick(eventTypes)
createdAt := RandomTimeBetween(startDate, time.Now())
createdAt = RealisticHour(createdAt)
payload := fmt.Sprintf(`{"source":"%s","page":"/dashboard","session_id":"%s"}`, GenSource(), newUUID()[:8])
eventRows = append(eventRows, []interface{}{
newUUID(), evt, user.ID, payload, createdAt,
})
}
_, err = BulkInsert(db, "analytics_events",
"id, event_name, user_id, payload, created_at",
eventRows)
if err != nil {
return fmt.Errorf("insert analytics_events: %w", err)
}
p.Update(eventCount)
p.Done()
// ── 6. Update track play/like counts ─────────────────────────────────────
p = NewProgress("update track counts", 1)
_, _ = db.Exec("UPDATE tracks SET play_count = (SELECT COUNT(*) FROM track_plays WHERE track_plays.track_id = tracks.id)")
_, _ = db.Exec("UPDATE tracks SET like_count = (SELECT COUNT(*) FROM track_likes WHERE track_likes.track_id = tracks.id)")
_, _ = db.Exec("UPDATE tracks SET comment_count = (SELECT COUNT(*) FROM track_comments WHERE track_comments.track_id = tracks.id)")
p.Update(1)
p.Done()
// ── 7. Update user profile counts ────────────────────────────────────────
p = NewProgress("update profile counts", 1)
_, _ = db.Exec("UPDATE user_profiles SET follower_count = (SELECT COUNT(*) FROM follows WHERE follows.followed_id = user_profiles.user_id)")
_, _ = db.Exec("UPDATE user_profiles SET following_count = (SELECT COUNT(*) FROM follows WHERE follows.follower_id = user_profiles.user_id)")
_, _ = db.Exec("UPDATE user_profiles SET track_count = (SELECT COUNT(*) FROM tracks WHERE tracks.creator_id = user_profiles.user_id AND tracks.deleted_at IS NULL)")
_, _ = db.Exec("UPDATE user_profiles SET playlist_count = (SELECT COUNT(*) FROM playlists WHERE playlists.user_id = user_profiles.user_id AND playlists.deleted_at IS NULL)")
p.Update(1)
p.Done()
return nil
}