package services import ( "context" "encoding/json" "fmt" "time" "github.com/google/uuid" "go.uber.org/zap" "gorm.io/gorm" ) // AnalyticsAggregationService provides aggregation and processing for analytics events // BE-SVC-008: Implement analytics aggregation service type AnalyticsAggregationService struct { db *gorm.DB logger *zap.Logger } // NewAnalyticsAggregationService creates a new analytics aggregation service func NewAnalyticsAggregationService(db *gorm.DB, logger *zap.Logger) *AnalyticsAggregationService { if logger == nil { logger = zap.NewNop() } return &AnalyticsAggregationService{ db: db, logger: logger, } } // AnalyticsEvent represents an analytics event from the database type AnalyticsEvent struct { ID uuid.UUID `gorm:"type:uuid;primaryKey"` EventName string `gorm:"not null;index:idx_analytics_events_name"` UserID *uuid.UUID `gorm:"type:uuid;index:idx_analytics_events_user_id"` Payload string `gorm:"type:jsonb"` CreatedAt time.Time `gorm:"autoCreateTime;index:idx_analytics_events_created_at"` } // TableName defines the table name for GORM func (AnalyticsEvent) TableName() string { return "analytics_events" } // AnalyticsAggregationPeriod represents the period type for analytics aggregation type AnalyticsAggregationPeriod string const ( AnalyticsPeriodHour AnalyticsAggregationPeriod = "hour" AnalyticsPeriodDay AnalyticsAggregationPeriod = "day" AnalyticsPeriodWeek AnalyticsAggregationPeriod = "week" AnalyticsPeriodMonth AnalyticsAggregationPeriod = "month" ) // EventAggregationParams represents parameters for event aggregation type EventAggregationParams struct { EventNames []string // Filter by event names (empty = all events) UserID *uuid.UUID // Filter by user ID (nil = all users) StartDate time.Time // Start date for aggregation EndDate time.Time // End date for aggregation Period AnalyticsAggregationPeriod // Aggregation period (hour, day, week, month) } // EventAggregationResult represents aggregated analytics data type EventAggregationResult struct { Period string `json:"period"` // Format: YYYY-MM-DD, YYYY-WW, YYYY-MM, etc. EventName string `json:"event_name"` EventCount int64 `json:"event_count"` UniqueUsers int64 `json:"unique_users"` FirstEventAt *time.Time `json:"first_event_at,omitempty"` LastEventAt *time.Time `json:"last_event_at,omitempty"` AveragePerUser float64 `json:"average_per_user"` // Average events per user PayloadSummary map[string]interface{} `json:"payload_summary,omitempty"` // Aggregated payload data } // AggregationSummary represents the complete aggregation result type AggregationSummary struct { Results []EventAggregationResult `json:"results"` TotalEvents int64 `json:"total_events"` TotalUniqueUsers int64 `json:"total_unique_users"` Period AnalyticsAggregationPeriod `json:"period"` StartDate time.Time `json:"start_date"` EndDate time.Time `json:"end_date"` EventTypeCounts map[string]int64 `json:"event_type_counts"` // Count by event name UserActivityCount map[string]int64 `json:"user_activity_count,omitempty"` // Count by user (if UserID not specified) } // AggregateEvents aggregates analytics events according to the specified parameters func (s *AnalyticsAggregationService) AggregateEvents( ctx context.Context, params EventAggregationParams, ) (*AggregationSummary, error) { // Validate period if params.Period != AnalyticsPeriodHour && params.Period != AnalyticsPeriodDay && params.Period != AnalyticsPeriodWeek && params.Period != AnalyticsPeriodMonth { return nil, fmt.Errorf("invalid period: %s (must be hour, day, week, or month)", params.Period) } // Validate date range if params.StartDate.After(params.EndDate) { return nil, fmt.Errorf("start date must be before end date") } // Build base query query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate) // Filter by event names if len(params.EventNames) > 0 { query = query.Where("event_name IN ?", params.EventNames) } // Filter by user ID if params.UserID != nil { query = query.Where("user_id = ?", *params.UserID) } // Determine date format based on period var dateFormat string switch params.Period { case AnalyticsPeriodHour: dateFormat = "YYYY-MM-DD HH24:00:00" case AnalyticsPeriodDay: dateFormat = "YYYY-MM-DD" case AnalyticsPeriodWeek: dateFormat = "IYYY-IW" // ISO year and week case AnalyticsPeriodMonth: dateFormat = "YYYY-MM" default: dateFormat = "YYYY-MM-DD" } // Aggregate by period and event name var aggregationResults []struct { Period string `gorm:"column:period"` EventName string `gorm:"column:event_name"` EventCount int64 `gorm:"column:event_count"` UniqueUsers int64 `gorm:"column:unique_users"` FirstEvent time.Time `gorm:"column:first_event"` LastEvent time.Time `gorm:"column:last_event"` } // Use PostgreSQL date_trunc or to_char for period grouping periodExpr := fmt.Sprintf("to_char(created_at, '%s')", dateFormat) if params.Period == AnalyticsPeriodHour { periodExpr = "date_trunc('hour', created_at)" } if err := query. Select(fmt.Sprintf(` %s as period, event_name, COUNT(*) as event_count, COUNT(DISTINCT user_id) as unique_users, MIN(created_at) as first_event, MAX(created_at) as last_event `, periodExpr)). Group("period, event_name"). Order("period ASC, event_name ASC"). Scan(&aggregationResults).Error; err != nil { return nil, fmt.Errorf("failed to aggregate events: %w", err) } // Convert to EventAggregationResult results := make([]EventAggregationResult, 0, len(aggregationResults)) eventTypeCounts := make(map[string]int64) totalEvents := int64(0) for _, agg := range aggregationResults { // Calculate average per user avgPerUser := 0.0 if agg.UniqueUsers > 0 { avgPerUser = float64(agg.EventCount) / float64(agg.UniqueUsers) } // Get payload summary for this event type and period payloadSummary := s.getPayloadSummary(ctx, agg.EventName, agg.Period, params) result := EventAggregationResult{ Period: agg.Period, EventName: agg.EventName, EventCount: agg.EventCount, UniqueUsers: agg.UniqueUsers, FirstEventAt: &agg.FirstEvent, LastEventAt: &agg.LastEvent, AveragePerUser: avgPerUser, PayloadSummary: payloadSummary, } results = append(results, result) // Accumulate totals eventTypeCounts[agg.EventName] += agg.EventCount totalEvents += agg.EventCount // Note: We can't easily get unique users across all periods without another query // For now, we'll use a separate query for total unique users } // Get total unique users var totalUniqueUsers int64 uniqueUserQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate) if len(params.EventNames) > 0 { uniqueUserQuery = uniqueUserQuery.Where("event_name IN ?", params.EventNames) } if params.UserID != nil { uniqueUserQuery = uniqueUserQuery.Where("user_id = ?", *params.UserID) } if err := uniqueUserQuery. Distinct("user_id"). Where("user_id IS NOT NULL"). Count(&totalUniqueUsers).Error; err != nil { s.logger.Warn("Failed to count unique users", zap.Error(err)) } // Get user activity count if UserID is not specified userActivityCount := make(map[string]int64) if params.UserID == nil { var userCounts []struct { UserID uuid.UUID `gorm:"column:user_id"` EventCount int64 `gorm:"column:event_count"` } userQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("created_at >= ? AND created_at <= ? AND user_id IS NOT NULL", params.StartDate, params.EndDate) if len(params.EventNames) > 0 { userQuery = userQuery.Where("event_name IN ?", params.EventNames) } if err := userQuery. Select("user_id, COUNT(*) as event_count"). Group("user_id"). Order("event_count DESC"). Limit(100). // Limit to top 100 users Scan(&userCounts).Error; err == nil { for _, uc := range userCounts { userActivityCount[uc.UserID.String()] = uc.EventCount } } } return &AggregationSummary{ Results: results, TotalEvents: totalEvents, TotalUniqueUsers: totalUniqueUsers, Period: params.Period, StartDate: params.StartDate, EndDate: params.EndDate, EventTypeCounts: eventTypeCounts, UserActivityCount: userActivityCount, }, nil } // getPayloadSummary extracts and aggregates common payload fields func (s *AnalyticsAggregationService) getPayloadSummary( ctx context.Context, eventName string, period string, params EventAggregationParams, ) map[string]interface{} { // Get sample events for this period to analyze payload structure var events []AnalyticsEvent query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("event_name = ? AND created_at >= ? AND created_at <= ?", eventName, params.StartDate, params.EndDate). Limit(100) // Sample size if err := query.Find(&events).Error; err != nil || len(events) == 0 { return nil } // Analyze payload structure payloadSummary := make(map[string]interface{}) fieldCounts := make(map[string]int) fieldValues := make(map[string][]interface{}) for _, event := range events { var payload map[string]interface{} if err := json.Unmarshal([]byte(event.Payload), &payload); err != nil { continue } // Count field occurrences and collect sample values for key, value := range payload { fieldCounts[key]++ if len(fieldValues[key]) < 10 { // Keep up to 10 sample values fieldValues[key] = append(fieldValues[key], value) } } } // Build summary with most common fields for field, count := range fieldCounts { if count >= len(events)/2 { // Field present in at least 50% of events payloadSummary[field] = map[string]interface{}{ "occurrence_count": count, "sample_values": fieldValues[field][:min(5, len(fieldValues[field]))], } } } return payloadSummary } // GetEventCounts returns simple event counts by event name func (s *AnalyticsAggregationService) GetEventCounts( ctx context.Context, startDate, endDate time.Time, eventNames []string, ) (map[string]int64, error) { query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("created_at >= ? AND created_at <= ?", startDate, endDate) if len(eventNames) > 0 { query = query.Where("event_name IN ?", eventNames) } var results []struct { EventName string `gorm:"column:event_name"` EventCount int64 `gorm:"column:event_count"` } if err := query. Select("event_name, COUNT(*) as event_count"). Group("event_name"). Order("event_count DESC"). Scan(&results).Error; err != nil { return nil, fmt.Errorf("failed to get event counts: %w", err) } counts := make(map[string]int64) for _, r := range results { counts[r.EventName] = r.EventCount } return counts, nil } // GetTopEvents returns the top N events by count func (s *AnalyticsAggregationService) GetTopEvents( ctx context.Context, startDate, endDate time.Time, limit int, ) ([]EventAggregationResult, error) { if limit <= 0 { limit = 10 } if limit > 100 { limit = 100 } var results []struct { EventName string `gorm:"column:event_name"` EventCount int64 `gorm:"column:event_count"` UniqueUsers int64 `gorm:"column:unique_users"` LastEvent time.Time `gorm:"column:last_event"` } if err := s.db.WithContext(ctx).Model(&AnalyticsEvent{}). Where("created_at >= ? AND created_at <= ?", startDate, endDate). Select(` event_name, COUNT(*) as event_count, COUNT(DISTINCT user_id) as unique_users, MAX(created_at) as last_event `). Group("event_name"). Order("event_count DESC"). Limit(limit). Scan(&results).Error; err != nil { return nil, fmt.Errorf("failed to get top events: %w", err) } aggregationResults := make([]EventAggregationResult, len(results)) for i, r := range results { avgPerUser := 0.0 if r.UniqueUsers > 0 { avgPerUser = float64(r.EventCount) / float64(r.UniqueUsers) } aggregationResults[i] = EventAggregationResult{ EventName: r.EventName, EventCount: r.EventCount, UniqueUsers: r.UniqueUsers, LastEventAt: &r.LastEvent, AveragePerUser: avgPerUser, } } return aggregationResults, nil } // min returns the minimum of two integers func min(a, b int) int { if a < b { return a } return b }