225 lines
6.3 KiB
Go
225 lines
6.3 KiB
Go
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"database/sql"
|
||
|
|
"fmt"
|
||
|
|
"math"
|
||
|
|
"strings"
|
||
|
|
)
|
||
|
|
|
||
|
|
// SeededTrack holds track data for cross-referencing.
|
||
|
|
type SeededTrack struct {
|
||
|
|
ID string
|
||
|
|
CreatorID string
|
||
|
|
Title string
|
||
|
|
Artist string
|
||
|
|
Genre string
|
||
|
|
Duration int
|
||
|
|
BPM int
|
||
|
|
Key string
|
||
|
|
AlbumID string
|
||
|
|
CreatedAt string // RFC3339
|
||
|
|
}
|
||
|
|
|
||
|
|
// SeededAlbum holds album data.
|
||
|
|
type SeededAlbum struct {
|
||
|
|
ID string
|
||
|
|
CreatorID string
|
||
|
|
Title string
|
||
|
|
}
|
||
|
|
|
||
|
|
// SeedTracks creates tracks with power-law distribution across artists.
|
||
|
|
func SeedTracks(db *sql.DB, cfg Config, users []SeededUser) ([]SeededTrack, error) {
|
||
|
|
fmt.Println("\n═══ TRACKS ═══")
|
||
|
|
|
||
|
|
artists := GetArtists(users)
|
||
|
|
if len(artists) == 0 {
|
||
|
|
return nil, fmt.Errorf("no artists found")
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── 1. Distribute tracks across artists using power law ──────────────────
|
||
|
|
// Top artists get many more tracks than bottom ones
|
||
|
|
trackCounts := distributeTracksToArtists(cfg.Tracks, len(artists))
|
||
|
|
|
||
|
|
// ── 2. Generate tracks ───────────────────────────────────────────────────
|
||
|
|
tracks := make([]SeededTrack, 0, cfg.Tracks)
|
||
|
|
trackRows := make([][]interface{}, 0, cfg.Tracks)
|
||
|
|
|
||
|
|
p := NewProgress("tracks", cfg.Tracks)
|
||
|
|
for ai, artist := range artists {
|
||
|
|
count := trackCounts[ai]
|
||
|
|
genres := GenreForArtist(ai)
|
||
|
|
primaryGenre := genres[0]
|
||
|
|
|
||
|
|
for ti := 0; ti < count; ti++ {
|
||
|
|
id := newUUID()
|
||
|
|
title := GenTrackTitle()
|
||
|
|
genre := primaryGenre
|
||
|
|
if len(genres) > 1 && randChance(30) {
|
||
|
|
genre = genres[rng.Intn(len(genres))]
|
||
|
|
}
|
||
|
|
|
||
|
|
duration := randInt(30, 720) // 30s to 12min
|
||
|
|
// Majority 2-5min
|
||
|
|
if randChance(60) {
|
||
|
|
duration = randInt(120, 300)
|
||
|
|
}
|
||
|
|
|
||
|
|
bpm := randInt(genre.BPMRange[0], genre.BPMRange[1])
|
||
|
|
key := ""
|
||
|
|
if len(genre.Keys) > 0 {
|
||
|
|
key = pick(genre.Keys)
|
||
|
|
}
|
||
|
|
|
||
|
|
createdAt := RandomTimeAfter(artist.CreatedAt)
|
||
|
|
createdAt = RealisticHour(createdAt)
|
||
|
|
filePath := fmt.Sprintf("audio/%s/%s.mp3", artist.Username, strings.ReplaceAll(strings.ToLower(title), " ", "_"))
|
||
|
|
fileSize := int64(duration) * int64(randInt(16000, 32000)) // ~128-256 kbps
|
||
|
|
|
||
|
|
tags := fmt.Sprintf("{%s,%s}", genre.Slug, pick([]string{"chill", "energetic", "dark", "melodic", "atmospheric", "groovy", "deep", "raw", "smooth", "heavy"}))
|
||
|
|
|
||
|
|
t := SeededTrack{
|
||
|
|
ID: id,
|
||
|
|
CreatorID: artist.ID,
|
||
|
|
Title: title,
|
||
|
|
Artist: artist.DisplayName,
|
||
|
|
Genre: genre.Slug,
|
||
|
|
Duration: duration,
|
||
|
|
BPM: bpm,
|
||
|
|
Key: key,
|
||
|
|
CreatedAt: createdAt.Format("2006-01-02T15:04:05Z"),
|
||
|
|
}
|
||
|
|
tracks = append(tracks, t)
|
||
|
|
|
||
|
|
trackRows = append(trackRows, []interface{}{
|
||
|
|
id, artist.ID, artist.ID, // creator_id, user_id
|
||
|
|
title, nil, // description
|
||
|
|
artist.DisplayName, nil, genre.Slug, // artist, album, genre
|
||
|
|
0, duration, bpm, key,
|
||
|
|
"public", false, // visibility, is_downloadable
|
||
|
|
nil, nil, // cover_art_file_id, waveform_data
|
||
|
|
0, 0, 0, 0, // counts (will be updated)
|
||
|
|
filePath, fileSize, "mp3", 320, 44100, // file details
|
||
|
|
nil, nil, // waveform_path, cover_art_path
|
||
|
|
"completed", nil, "ready", nil, // status, stream_status
|
||
|
|
true, tags, createdAt, createdAt, createdAt, nil,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
_, err := BulkInsert(db, "tracks",
|
||
|
|
"id, creator_id, user_id, title, description, artist, album, genre, year, duration, bpm, musical_key, visibility, is_downloadable, cover_art_file_id, waveform_data, play_count, like_count, comment_count, download_count, file_path, file_size, format, bitrate, sample_rate, waveform_path, cover_art_path, status, status_message, stream_status, stream_manifest_url, is_public, tags, published_at, created_at, updated_at, deleted_at",
|
||
|
|
trackRows)
|
||
|
|
if err != nil {
|
||
|
|
return nil, fmt.Errorf("insert tracks: %w", err)
|
||
|
|
}
|
||
|
|
p.Update(len(trackRows))
|
||
|
|
p.Done()
|
||
|
|
|
||
|
|
// ── 3. Link tracks to genres (track_genres table) ────────────────────────
|
||
|
|
p = NewProgress("track_genres", len(tracks))
|
||
|
|
// First, fetch genre IDs
|
||
|
|
genreMap := make(map[string]string)
|
||
|
|
rows, err := db.Query("SELECT id, slug FROM genres")
|
||
|
|
if err == nil {
|
||
|
|
for rows.Next() {
|
||
|
|
var id, slug string
|
||
|
|
_ = rows.Scan(&id, &slug)
|
||
|
|
genreMap[slug] = id
|
||
|
|
}
|
||
|
|
rows.Close()
|
||
|
|
}
|
||
|
|
|
||
|
|
if len(genreMap) > 0 {
|
||
|
|
tgRows := make([][]interface{}, 0, len(tracks))
|
||
|
|
for _, t := range tracks {
|
||
|
|
if gid, ok := genreMap[t.Genre]; ok {
|
||
|
|
tgRows = append(tgRows, []interface{}{t.ID, gid})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
_, _ = BulkInsert(db, "track_genres", "track_id, genre_id", tgRows)
|
||
|
|
}
|
||
|
|
p.Update(len(tracks))
|
||
|
|
p.Done()
|
||
|
|
|
||
|
|
// ── 4. Link tracks to tags (track_tags table) ────────────────────────────
|
||
|
|
p = NewProgress("track_tags", len(tracks))
|
||
|
|
// Fetch tag IDs
|
||
|
|
tagMap := make(map[string]string)
|
||
|
|
rows, err = db.Query("SELECT id, name FROM tags")
|
||
|
|
if err == nil {
|
||
|
|
for rows.Next() {
|
||
|
|
var id, name string
|
||
|
|
_ = rows.Scan(&id, &name)
|
||
|
|
tagMap[strings.ToLower(name)] = id
|
||
|
|
}
|
||
|
|
rows.Close()
|
||
|
|
}
|
||
|
|
|
||
|
|
if len(tagMap) > 0 {
|
||
|
|
ttRows := make([][]interface{}, 0, len(tracks)*2)
|
||
|
|
for _, t := range tracks {
|
||
|
|
if tid, ok := tagMap[t.Genre]; ok {
|
||
|
|
ttRows = append(ttRows, []interface{}{t.ID, tid})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if len(ttRows) > 0 {
|
||
|
|
_, _ = BulkInsert(db, "track_tags", "track_id, tag_id", ttRows)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
p.Update(len(tracks))
|
||
|
|
p.Done()
|
||
|
|
|
||
|
|
return tracks, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// distributeTracksToArtists distributes totalTracks across numArtists
|
||
|
|
// using a power-law distribution. Top artists get many more tracks.
|
||
|
|
func distributeTracksToArtists(totalTracks, numArtists int) []int {
|
||
|
|
counts := make([]int, numArtists)
|
||
|
|
|
||
|
|
// Generate power-law weights
|
||
|
|
weights := make([]float64, numArtists)
|
||
|
|
for i := range weights {
|
||
|
|
// Zipf-like: weight = 1/(rank^0.8)
|
||
|
|
weights[i] = 1.0 / math.Pow(float64(i+1), 0.8)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Normalize to sum to totalTracks
|
||
|
|
totalWeight := 0.0
|
||
|
|
for _, w := range weights {
|
||
|
|
totalWeight += w
|
||
|
|
}
|
||
|
|
|
||
|
|
assigned := 0
|
||
|
|
for i := range counts {
|
||
|
|
counts[i] = int(float64(totalTracks) * weights[i] / totalWeight)
|
||
|
|
if counts[i] < 1 {
|
||
|
|
counts[i] = 1
|
||
|
|
}
|
||
|
|
assigned += counts[i]
|
||
|
|
}
|
||
|
|
|
||
|
|
// Adjust to hit exact total
|
||
|
|
diff := totalTracks - assigned
|
||
|
|
for i := 0; diff > 0; i = (i + 1) % numArtists {
|
||
|
|
counts[i]++
|
||
|
|
diff--
|
||
|
|
}
|
||
|
|
for i := 0; diff < 0; i = (i + 1) % numArtists {
|
||
|
|
if counts[i] > 1 {
|
||
|
|
counts[i]--
|
||
|
|
diff++
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Cap at 80 tracks per artist
|
||
|
|
for i := range counts {
|
||
|
|
if counts[i] > 80 {
|
||
|
|
counts[i] = 80
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return counts
|
||
|
|
}
|