veza/veza-backend-api/internal/services/analytics_aggregation_service.go

398 lines
13 KiB
Go
Raw Normal View History

package services
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
"gorm.io/gorm"
)
// AnalyticsAggregationService provides aggregation and processing for analytics events
// BE-SVC-008: Implement analytics aggregation service
type AnalyticsAggregationService struct {
db *gorm.DB
logger *zap.Logger
}
// NewAnalyticsAggregationService creates a new analytics aggregation service
func NewAnalyticsAggregationService(db *gorm.DB, logger *zap.Logger) *AnalyticsAggregationService {
if logger == nil {
logger = zap.NewNop()
}
return &AnalyticsAggregationService{
db: db,
logger: logger,
}
}
// AnalyticsEvent represents an analytics event from the database
type AnalyticsEvent struct {
ID uuid.UUID `gorm:"type:uuid;primaryKey"`
EventName string `gorm:"not null;index:idx_analytics_events_name"`
UserID *uuid.UUID `gorm:"type:uuid;index:idx_analytics_events_user_id"`
Payload string `gorm:"type:jsonb"`
CreatedAt time.Time `gorm:"autoCreateTime;index:idx_analytics_events_created_at"`
}
// TableName defines the table name for GORM
func (AnalyticsEvent) TableName() string {
return "analytics_events"
}
// AnalyticsAggregationPeriod represents the period type for analytics aggregation
type AnalyticsAggregationPeriod string
const (
AnalyticsPeriodHour AnalyticsAggregationPeriod = "hour"
AnalyticsPeriodDay AnalyticsAggregationPeriod = "day"
AnalyticsPeriodWeek AnalyticsAggregationPeriod = "week"
AnalyticsPeriodMonth AnalyticsAggregationPeriod = "month"
)
// EventAggregationParams represents parameters for event aggregation
type EventAggregationParams struct {
EventNames []string // Filter by event names (empty = all events)
UserID *uuid.UUID // Filter by user ID (nil = all users)
StartDate time.Time // Start date for aggregation
EndDate time.Time // End date for aggregation
Period AnalyticsAggregationPeriod // Aggregation period (hour, day, week, month)
}
// EventAggregationResult represents aggregated analytics data
type EventAggregationResult struct {
Period string `json:"period"` // Format: YYYY-MM-DD, YYYY-WW, YYYY-MM, etc.
EventName string `json:"event_name"`
EventCount int64 `json:"event_count"`
UniqueUsers int64 `json:"unique_users"`
FirstEventAt *time.Time `json:"first_event_at,omitempty"`
LastEventAt *time.Time `json:"last_event_at,omitempty"`
AveragePerUser float64 `json:"average_per_user"` // Average events per user
PayloadSummary map[string]interface{} `json:"payload_summary,omitempty"` // Aggregated payload data
}
// AggregationSummary represents the complete aggregation result
type AggregationSummary struct {
Results []EventAggregationResult `json:"results"`
TotalEvents int64 `json:"total_events"`
TotalUniqueUsers int64 `json:"total_unique_users"`
Period AnalyticsAggregationPeriod `json:"period"`
StartDate time.Time `json:"start_date"`
EndDate time.Time `json:"end_date"`
EventTypeCounts map[string]int64 `json:"event_type_counts"` // Count by event name
UserActivityCount map[string]int64 `json:"user_activity_count,omitempty"` // Count by user (if UserID not specified)
}
// AggregateEvents aggregates analytics events according to the specified parameters
func (s *AnalyticsAggregationService) AggregateEvents(
ctx context.Context,
params EventAggregationParams,
) (*AggregationSummary, error) {
// Validate period
if params.Period != AnalyticsPeriodHour && params.Period != AnalyticsPeriodDay &&
params.Period != AnalyticsPeriodWeek && params.Period != AnalyticsPeriodMonth {
return nil, fmt.Errorf("invalid period: %s (must be hour, day, week, or month)", params.Period)
}
// Validate date range
if params.StartDate.After(params.EndDate) {
return nil, fmt.Errorf("start date must be before end date")
}
// Build base query
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate)
// Filter by event names
if len(params.EventNames) > 0 {
query = query.Where("event_name IN ?", params.EventNames)
}
// Filter by user ID
if params.UserID != nil {
query = query.Where("user_id = ?", *params.UserID)
}
// Determine date format based on period
var dateFormat string
switch params.Period {
case AnalyticsPeriodHour:
dateFormat = "YYYY-MM-DD HH24:00:00"
case AnalyticsPeriodDay:
dateFormat = "YYYY-MM-DD"
case AnalyticsPeriodWeek:
dateFormat = "IYYY-IW" // ISO year and week
case AnalyticsPeriodMonth:
dateFormat = "YYYY-MM"
default:
dateFormat = "YYYY-MM-DD"
}
// Aggregate by period and event name
var aggregationResults []struct {
Period string `gorm:"column:period"`
EventName string `gorm:"column:event_name"`
EventCount int64 `gorm:"column:event_count"`
UniqueUsers int64 `gorm:"column:unique_users"`
FirstEvent time.Time `gorm:"column:first_event"`
LastEvent time.Time `gorm:"column:last_event"`
}
// Use PostgreSQL date_trunc or to_char for period grouping
periodExpr := fmt.Sprintf("to_char(created_at, '%s')", dateFormat)
if params.Period == AnalyticsPeriodHour {
periodExpr = "date_trunc('hour', created_at)"
}
if err := query.
Select(fmt.Sprintf(`
%s as period,
event_name,
COUNT(*) as event_count,
COUNT(DISTINCT user_id) as unique_users,
MIN(created_at) as first_event,
MAX(created_at) as last_event
`, periodExpr)).
Group("period, event_name").
Order("period ASC, event_name ASC").
Scan(&aggregationResults).Error; err != nil {
return nil, fmt.Errorf("failed to aggregate events: %w", err)
}
// Convert to EventAggregationResult
results := make([]EventAggregationResult, 0, len(aggregationResults))
eventTypeCounts := make(map[string]int64)
totalEvents := int64(0)
for _, agg := range aggregationResults {
// Calculate average per user
avgPerUser := 0.0
if agg.UniqueUsers > 0 {
avgPerUser = float64(agg.EventCount) / float64(agg.UniqueUsers)
}
// Get payload summary for this event type and period
payloadSummary := s.getPayloadSummary(ctx, agg.EventName, agg.Period, params)
result := EventAggregationResult{
Period: agg.Period,
EventName: agg.EventName,
EventCount: agg.EventCount,
UniqueUsers: agg.UniqueUsers,
FirstEventAt: &agg.FirstEvent,
LastEventAt: &agg.LastEvent,
AveragePerUser: avgPerUser,
PayloadSummary: payloadSummary,
}
results = append(results, result)
// Accumulate totals
eventTypeCounts[agg.EventName] += agg.EventCount
totalEvents += agg.EventCount
// Note: We can't easily get unique users across all periods without another query
// For now, we'll use a separate query for total unique users
}
// Get total unique users
var totalUniqueUsers int64
uniqueUserQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate)
if len(params.EventNames) > 0 {
uniqueUserQuery = uniqueUserQuery.Where("event_name IN ?", params.EventNames)
}
if params.UserID != nil {
uniqueUserQuery = uniqueUserQuery.Where("user_id = ?", *params.UserID)
}
if err := uniqueUserQuery.
Distinct("user_id").
Where("user_id IS NOT NULL").
Count(&totalUniqueUsers).Error; err != nil {
s.logger.Warn("Failed to count unique users", zap.Error(err))
}
// Get user activity count if UserID is not specified
userActivityCount := make(map[string]int64)
if params.UserID == nil {
var userCounts []struct {
UserID uuid.UUID `gorm:"column:user_id"`
EventCount int64 `gorm:"column:event_count"`
}
userQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("created_at >= ? AND created_at <= ? AND user_id IS NOT NULL", params.StartDate, params.EndDate)
if len(params.EventNames) > 0 {
userQuery = userQuery.Where("event_name IN ?", params.EventNames)
}
if err := userQuery.
Select("user_id, COUNT(*) as event_count").
Group("user_id").
Order("event_count DESC").
Limit(100). // Limit to top 100 users
Scan(&userCounts).Error; err == nil {
for _, uc := range userCounts {
userActivityCount[uc.UserID.String()] = uc.EventCount
}
}
}
return &AggregationSummary{
Results: results,
TotalEvents: totalEvents,
TotalUniqueUsers: totalUniqueUsers,
Period: params.Period,
StartDate: params.StartDate,
EndDate: params.EndDate,
EventTypeCounts: eventTypeCounts,
UserActivityCount: userActivityCount,
}, nil
}
// getPayloadSummary extracts and aggregates common payload fields
func (s *AnalyticsAggregationService) getPayloadSummary(
ctx context.Context,
eventName string,
period string,
params EventAggregationParams,
) map[string]interface{} {
// Get sample events for this period to analyze payload structure
var events []AnalyticsEvent
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("event_name = ? AND created_at >= ? AND created_at <= ?", eventName, params.StartDate, params.EndDate).
Limit(100) // Sample size
if err := query.Find(&events).Error; err != nil || len(events) == 0 {
return nil
}
// Analyze payload structure
payloadSummary := make(map[string]interface{})
fieldCounts := make(map[string]int)
fieldValues := make(map[string][]interface{})
for _, event := range events {
var payload map[string]interface{}
if err := json.Unmarshal([]byte(event.Payload), &payload); err != nil {
continue
}
// Count field occurrences and collect sample values
for key, value := range payload {
fieldCounts[key]++
if len(fieldValues[key]) < 10 { // Keep up to 10 sample values
fieldValues[key] = append(fieldValues[key], value)
}
}
}
// Build summary with most common fields
for field, count := range fieldCounts {
if count >= len(events)/2 { // Field present in at least 50% of events
payloadSummary[field] = map[string]interface{}{
"occurrence_count": count,
"sample_values": fieldValues[field][:min(5, len(fieldValues[field]))],
}
}
}
return payloadSummary
}
// GetEventCounts returns simple event counts by event name
func (s *AnalyticsAggregationService) GetEventCounts(
ctx context.Context,
startDate, endDate time.Time,
eventNames []string,
) (map[string]int64, error) {
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("created_at >= ? AND created_at <= ?", startDate, endDate)
if len(eventNames) > 0 {
query = query.Where("event_name IN ?", eventNames)
}
var results []struct {
EventName string `gorm:"column:event_name"`
EventCount int64 `gorm:"column:event_count"`
}
if err := query.
Select("event_name, COUNT(*) as event_count").
Group("event_name").
Order("event_count DESC").
Scan(&results).Error; err != nil {
return nil, fmt.Errorf("failed to get event counts: %w", err)
}
counts := make(map[string]int64)
for _, r := range results {
counts[r.EventName] = r.EventCount
}
return counts, nil
}
// GetTopEvents returns the top N events by count
func (s *AnalyticsAggregationService) GetTopEvents(
ctx context.Context,
startDate, endDate time.Time,
limit int,
) ([]EventAggregationResult, error) {
if limit <= 0 {
limit = 10
}
if limit > 100 {
limit = 100
}
var results []struct {
EventName string `gorm:"column:event_name"`
EventCount int64 `gorm:"column:event_count"`
UniqueUsers int64 `gorm:"column:unique_users"`
LastEvent time.Time `gorm:"column:last_event"`
}
if err := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
Where("created_at >= ? AND created_at <= ?", startDate, endDate).
Select(`
event_name,
COUNT(*) as event_count,
COUNT(DISTINCT user_id) as unique_users,
MAX(created_at) as last_event
`).
Group("event_name").
Order("event_count DESC").
Limit(limit).
Scan(&results).Error; err != nil {
return nil, fmt.Errorf("failed to get top events: %w", err)
}
aggregationResults := make([]EventAggregationResult, len(results))
for i, r := range results {
avgPerUser := 0.0
if r.UniqueUsers > 0 {
avgPerUser = float64(r.EventCount) / float64(r.UniqueUsers)
}
aggregationResults[i] = EventAggregationResult{
EventName: r.EventName,
EventCount: r.EventCount,
UniqueUsers: r.UniqueUsers,
LastEventAt: &r.LastEvent,
AveragePerUser: avgPerUser,
}
}
return aggregationResults, nil
}
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}