veza/veza-backend-api/internal/services/audio_transcode_service.go

380 lines
9.8 KiB
Go

package services
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"go.uber.org/zap"
)
// AudioFormat represents supported audio output formats
type AudioFormat string
const (
FormatMP3 AudioFormat = "mp3"
FormatAAC AudioFormat = "aac"
FormatFLAC AudioFormat = "flac"
FormatOGG AudioFormat = "ogg"
FormatWAV AudioFormat = "wav"
FormatM4A AudioFormat = "m4a"
)
// AudioQuality represents quality presets
type AudioQuality string
const (
QualityLow AudioQuality = "low" // 64-96 kbps
QualityMedium AudioQuality = "medium" // 128-192 kbps
QualityHigh AudioQuality = "high" // 256-320 kbps
QualityLossless AudioQuality = "lossless" // FLAC/WAV
)
// TranscodeOptions represents options for audio transcoding
type TranscodeOptions struct {
Format AudioFormat // Output format (default: MP3)
Bitrate int // Bitrate in kbps (0 = use quality preset)
Quality AudioQuality // Quality preset (overrides bitrate if set)
SampleRate int // Sample rate in Hz (0 = keep original)
Channels int // Number of channels (0 = keep original)
OutputPath string // Output file path
Timeout time.Duration // Processing timeout (default: 5 minutes)
}
// TranscodeResult represents the result of audio transcoding
type TranscodeResult struct {
OutputPath string `json:"output_path"`
Format AudioFormat `json:"format"`
Bitrate int `json:"bitrate"`
SampleRate int `json:"sample_rate"`
Channels int `json:"channels"`
Duration time.Duration `json:"duration"`
FileSize int64 `json:"file_size"`
ProcessingTime time.Duration `json:"processing_time"`
}
// AudioTranscodeService provides audio transcoding capabilities
// BE-SVC-011: Implement audio transcoding service
type AudioTranscodeService struct {
ffmpegPath string
logger *zap.Logger
}
// NewAudioTranscodeService creates a new audio transcoding service
func NewAudioTranscodeService(ffmpegPath string, logger *zap.Logger) *AudioTranscodeService {
if ffmpegPath == "" {
ffmpegPath = "ffmpeg" // Default to system PATH
}
if logger == nil {
logger = zap.NewNop()
}
return &AudioTranscodeService{
ffmpegPath: ffmpegPath,
logger: logger,
}
}
// IsFFmpegAvailable checks if FFmpeg is available
func (s *AudioTranscodeService) IsFFmpegAvailable(ctx context.Context) bool {
cmd := exec.CommandContext(ctx, s.ffmpegPath, "-version")
if err := cmd.Run(); err != nil {
s.logger.Warn("FFmpeg not available", zap.Error(err))
return false
}
return true
}
// Transcode transcodes an audio file with the given options
func (s *AudioTranscodeService) Transcode(
ctx context.Context,
inputPath string,
options TranscodeOptions,
) (*TranscodeResult, error) {
// Validate input file
if _, err := os.Stat(inputPath); os.IsNotExist(err) {
return nil, fmt.Errorf("input file does not exist: %s", inputPath)
}
// Check FFmpeg availability
if !s.IsFFmpegAvailable(ctx) {
return nil, fmt.Errorf("ffmpeg is not available")
}
// Set defaults
if options.Format == "" {
options.Format = FormatMP3
}
if options.Timeout == 0 {
options.Timeout = 5 * time.Minute
}
// Determine output path
outputPath := options.OutputPath
if outputPath == "" {
// Generate output path based on input
ext := filepath.Ext(inputPath)
base := strings.TrimSuffix(inputPath, ext)
outputPath = fmt.Sprintf("%s_%s.%s", base, string(options.Format), string(options.Format))
}
// Ensure output directory exists
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return nil, fmt.Errorf("failed to create output directory: %w", err)
}
// Determine bitrate from quality preset if not specified
bitrate := options.Bitrate
if bitrate == 0 && options.Quality != "" {
bitrate = s.getBitrateForQuality(options.Quality, options.Format)
}
if bitrate == 0 {
bitrate = 192 // Default bitrate
}
// Build FFmpeg command
args := s.buildFFmpegArgs(inputPath, outputPath, options, bitrate)
// Create context with timeout
transcodeCtx, cancel := context.WithTimeout(ctx, options.Timeout)
defer cancel()
// Execute FFmpeg
startTime := time.Now()
cmd := exec.CommandContext(transcodeCtx, s.ffmpegPath, args...)
// Capture stderr for logging
var stderr strings.Builder
cmd.Stderr = &stderr
err := cmd.Run()
processingTime := time.Since(startTime)
if err != nil {
s.logger.Error("FFmpeg transcoding failed",
zap.String("input", inputPath),
zap.String("output", outputPath),
zap.String("format", string(options.Format)),
zap.Int("bitrate", bitrate),
zap.String("stderr", stderr.String()),
zap.Error(err),
zap.Duration("processing_time", processingTime),
)
return nil, fmt.Errorf("ffmpeg transcoding failed: %w", err)
}
// Verify output file exists
fileInfo, err := os.Stat(outputPath)
if err != nil {
return nil, fmt.Errorf("output file was not created: %w", err)
}
// Get audio metadata (sample rate, channels)
sampleRate, channels := s.getAudioMetadata(transcodeCtx, outputPath)
result := &TranscodeResult{
OutputPath: outputPath,
Format: options.Format,
Bitrate: bitrate,
SampleRate: sampleRate,
Channels: channels,
FileSize: fileInfo.Size(),
ProcessingTime: processingTime,
}
s.logger.Info("Audio transcoding completed",
zap.String("input", inputPath),
zap.String("output", outputPath),
zap.String("format", string(options.Format)),
zap.Int("bitrate", bitrate),
zap.Int64("file_size", fileInfo.Size()),
zap.Duration("processing_time", processingTime),
)
return result, nil
}
// buildFFmpegArgs builds FFmpeg command arguments
func (s *AudioTranscodeService) buildFFmpegArgs(
inputPath, outputPath string,
options TranscodeOptions,
bitrate int,
) []string {
args := []string{
"-i", inputPath,
"-y", // Overwrite output file
}
// Set codec based on format
codec := s.getCodecForFormat(options.Format)
if codec != "" {
args = append(args, "-codec:a", codec)
}
// Set bitrate
if options.Format != FormatFLAC && options.Format != FormatWAV {
args = append(args, "-b:a", fmt.Sprintf("%dk", bitrate))
}
// Set sample rate
if options.SampleRate > 0 {
args = append(args, "-ar", fmt.Sprintf("%d", options.SampleRate))
}
// Set channels
if options.Channels > 0 {
args = append(args, "-ac", fmt.Sprintf("%d", options.Channels))
}
// Format-specific options
switch options.Format {
case FormatFLAC:
args = append(args, "-compression_level", "5")
case FormatOGG:
args = append(args, "-q:a", "5") // Vorbis quality (0-10)
}
// Output file
args = append(args, outputPath)
return args
}
// getCodecForFormat returns the FFmpeg codec name for a format
func (s *AudioTranscodeService) getCodecForFormat(format AudioFormat) string {
switch format {
case FormatMP3:
return "libmp3lame"
case FormatAAC, FormatM4A:
return "aac"
case FormatFLAC:
return "flac"
case FormatOGG:
return "libvorbis"
case FormatWAV:
return "pcm_s16le"
default:
return ""
}
}
// getBitrateForQuality returns bitrate for a quality preset
func (s *AudioTranscodeService) getBitrateForQuality(quality AudioQuality, format AudioFormat) int {
if format == FormatFLAC || format == FormatWAV {
return 0 // Lossless formats don't use bitrate
}
switch quality {
case QualityLow:
if format == FormatAAC || format == FormatM4A {
return 64
}
return 96
case QualityMedium:
if format == FormatAAC || format == FormatM4A {
return 128
}
return 192
case QualityHigh:
if format == FormatAAC || format == FormatM4A {
return 256
}
return 320
case QualityLossless:
return 0 // Lossless
default:
return 192 // Default
}
}
// getAudioMetadata extracts audio metadata using ffprobe
func (s *AudioTranscodeService) getAudioMetadata(ctx context.Context, filePath string) (sampleRate, channels int) {
// Try to use ffprobe if available
ffprobePath := strings.Replace(s.ffmpegPath, "ffmpeg", "ffprobe", 1)
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "error",
"-show_entries", "stream=sample_rate,channels",
"-of", "default=noprint_wrappers=1:nokey=1",
filePath,
)
output, err := cmd.CombinedOutput()
if err != nil {
s.logger.Warn("Failed to get audio metadata", zap.Error(err))
return 44100, 2 // Default values
}
// Parse output (format: sample_rate\nchannels\n)
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
if len(lines) >= 2 {
fmt.Sscanf(lines[0], "%d", &sampleRate)
fmt.Sscanf(lines[1], "%d", &channels)
}
if sampleRate == 0 {
sampleRate = 44100
}
if channels == 0 {
channels = 2
}
return sampleRate, channels
}
// TranscodeMultiple transcodes an audio file to multiple formats/qualities
func (s *AudioTranscodeService) TranscodeMultiple(
ctx context.Context,
inputPath string,
optionsList []TranscodeOptions,
) ([]*TranscodeResult, error) {
var results []*TranscodeResult
var errors []error
for _, options := range optionsList {
result, err := s.Transcode(ctx, inputPath, options)
if err != nil {
errors = append(errors, fmt.Errorf("transcode failed for format %s: %w", options.Format, err))
continue
}
results = append(results, result)
}
if len(errors) > 0 && len(results) == 0 {
return nil, fmt.Errorf("all transcoding operations failed: %v", errors)
}
if len(errors) > 0 {
s.logger.Warn("Some transcoding operations failed",
zap.Int("successful", len(results)),
zap.Int("failed", len(errors)),
)
}
return results, nil
}
// GetSupportedFormats returns list of supported output formats
func (s *AudioTranscodeService) GetSupportedFormats() []AudioFormat {
return []AudioFormat{
FormatMP3,
FormatAAC,
FormatFLAC,
FormatOGG,
FormatWAV,
FormatM4A,
}
}
// ValidateFormat checks if a format is supported
func (s *AudioTranscodeService) ValidateFormat(format AudioFormat) bool {
supported := s.GetSupportedFormats()
for _, f := range supported {
if f == format {
return true
}
}
return false
}