397 lines
13 KiB
Go
397 lines
13 KiB
Go
package services
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"go.uber.org/zap"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
// AnalyticsAggregationService provides aggregation and processing for analytics events
|
|
// BE-SVC-008: Implement analytics aggregation service
|
|
type AnalyticsAggregationService struct {
|
|
db *gorm.DB
|
|
logger *zap.Logger
|
|
}
|
|
|
|
// NewAnalyticsAggregationService creates a new analytics aggregation service
|
|
func NewAnalyticsAggregationService(db *gorm.DB, logger *zap.Logger) *AnalyticsAggregationService {
|
|
if logger == nil {
|
|
logger = zap.NewNop()
|
|
}
|
|
return &AnalyticsAggregationService{
|
|
db: db,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
// AnalyticsEvent represents an analytics event from the database
|
|
type AnalyticsEvent struct {
|
|
ID uuid.UUID `gorm:"type:uuid;primaryKey"`
|
|
EventName string `gorm:"not null;index:idx_analytics_events_name"`
|
|
UserID *uuid.UUID `gorm:"type:uuid;index:idx_analytics_events_user_id"`
|
|
Payload string `gorm:"type:jsonb"`
|
|
CreatedAt time.Time `gorm:"autoCreateTime;index:idx_analytics_events_created_at"`
|
|
}
|
|
|
|
// TableName defines the table name for GORM
|
|
func (AnalyticsEvent) TableName() string {
|
|
return "analytics_events"
|
|
}
|
|
|
|
// AnalyticsAggregationPeriod represents the period type for analytics aggregation
|
|
type AnalyticsAggregationPeriod string
|
|
|
|
const (
|
|
AnalyticsPeriodHour AnalyticsAggregationPeriod = "hour"
|
|
AnalyticsPeriodDay AnalyticsAggregationPeriod = "day"
|
|
AnalyticsPeriodWeek AnalyticsAggregationPeriod = "week"
|
|
AnalyticsPeriodMonth AnalyticsAggregationPeriod = "month"
|
|
)
|
|
|
|
// EventAggregationParams represents parameters for event aggregation
|
|
type EventAggregationParams struct {
|
|
EventNames []string // Filter by event names (empty = all events)
|
|
UserID *uuid.UUID // Filter by user ID (nil = all users)
|
|
StartDate time.Time // Start date for aggregation
|
|
EndDate time.Time // End date for aggregation
|
|
Period AnalyticsAggregationPeriod // Aggregation period (hour, day, week, month)
|
|
}
|
|
|
|
// EventAggregationResult represents aggregated analytics data
|
|
type EventAggregationResult struct {
|
|
Period string `json:"period"` // Format: YYYY-MM-DD, YYYY-WW, YYYY-MM, etc.
|
|
EventName string `json:"event_name"`
|
|
EventCount int64 `json:"event_count"`
|
|
UniqueUsers int64 `json:"unique_users"`
|
|
FirstEventAt *time.Time `json:"first_event_at,omitempty"`
|
|
LastEventAt *time.Time `json:"last_event_at,omitempty"`
|
|
AveragePerUser float64 `json:"average_per_user"` // Average events per user
|
|
PayloadSummary map[string]interface{} `json:"payload_summary,omitempty"` // Aggregated payload data
|
|
}
|
|
|
|
// AggregationSummary represents the complete aggregation result
|
|
type AggregationSummary struct {
|
|
Results []EventAggregationResult `json:"results"`
|
|
TotalEvents int64 `json:"total_events"`
|
|
TotalUniqueUsers int64 `json:"total_unique_users"`
|
|
Period AnalyticsAggregationPeriod `json:"period"`
|
|
StartDate time.Time `json:"start_date"`
|
|
EndDate time.Time `json:"end_date"`
|
|
EventTypeCounts map[string]int64 `json:"event_type_counts"` // Count by event name
|
|
UserActivityCount map[string]int64 `json:"user_activity_count,omitempty"` // Count by user (if UserID not specified)
|
|
}
|
|
|
|
// AggregateEvents aggregates analytics events according to the specified parameters
|
|
func (s *AnalyticsAggregationService) AggregateEvents(
|
|
ctx context.Context,
|
|
params EventAggregationParams,
|
|
) (*AggregationSummary, error) {
|
|
// Validate period
|
|
if params.Period != AnalyticsPeriodHour && params.Period != AnalyticsPeriodDay &&
|
|
params.Period != AnalyticsPeriodWeek && params.Period != AnalyticsPeriodMonth {
|
|
return nil, fmt.Errorf("invalid period: %s (must be hour, day, week, or month)", params.Period)
|
|
}
|
|
|
|
// Validate date range
|
|
if params.StartDate.After(params.EndDate) {
|
|
return nil, fmt.Errorf("start date must be before end date")
|
|
}
|
|
|
|
// Build base query
|
|
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate)
|
|
|
|
// Filter by event names
|
|
if len(params.EventNames) > 0 {
|
|
query = query.Where("event_name IN ?", params.EventNames)
|
|
}
|
|
|
|
// Filter by user ID
|
|
if params.UserID != nil {
|
|
query = query.Where("user_id = ?", *params.UserID)
|
|
}
|
|
|
|
// Determine date format based on period
|
|
var dateFormat string
|
|
switch params.Period {
|
|
case AnalyticsPeriodHour:
|
|
dateFormat = "YYYY-MM-DD HH24:00:00"
|
|
case AnalyticsPeriodDay:
|
|
dateFormat = "YYYY-MM-DD"
|
|
case AnalyticsPeriodWeek:
|
|
dateFormat = "IYYY-IW" // ISO year and week
|
|
case AnalyticsPeriodMonth:
|
|
dateFormat = "YYYY-MM"
|
|
default:
|
|
dateFormat = "YYYY-MM-DD"
|
|
}
|
|
|
|
// Aggregate by period and event name
|
|
var aggregationResults []struct {
|
|
Period string `gorm:"column:period"`
|
|
EventName string `gorm:"column:event_name"`
|
|
EventCount int64 `gorm:"column:event_count"`
|
|
UniqueUsers int64 `gorm:"column:unique_users"`
|
|
FirstEvent time.Time `gorm:"column:first_event"`
|
|
LastEvent time.Time `gorm:"column:last_event"`
|
|
}
|
|
|
|
// Use PostgreSQL date_trunc or to_char for period grouping
|
|
periodExpr := fmt.Sprintf("to_char(created_at, '%s')", dateFormat)
|
|
if params.Period == AnalyticsPeriodHour {
|
|
periodExpr = "date_trunc('hour', created_at)"
|
|
}
|
|
|
|
if err := query.
|
|
Select(fmt.Sprintf(`
|
|
%s as period,
|
|
event_name,
|
|
COUNT(*) as event_count,
|
|
COUNT(DISTINCT user_id) as unique_users,
|
|
MIN(created_at) as first_event,
|
|
MAX(created_at) as last_event
|
|
`, periodExpr)).
|
|
Group("period, event_name").
|
|
Order("period ASC, event_name ASC").
|
|
Scan(&aggregationResults).Error; err != nil {
|
|
return nil, fmt.Errorf("failed to aggregate events: %w", err)
|
|
}
|
|
|
|
// Convert to EventAggregationResult
|
|
results := make([]EventAggregationResult, 0, len(aggregationResults))
|
|
eventTypeCounts := make(map[string]int64)
|
|
totalEvents := int64(0)
|
|
|
|
for _, agg := range aggregationResults {
|
|
// Calculate average per user
|
|
avgPerUser := 0.0
|
|
if agg.UniqueUsers > 0 {
|
|
avgPerUser = float64(agg.EventCount) / float64(agg.UniqueUsers)
|
|
}
|
|
|
|
// Get payload summary for this event type and period
|
|
payloadSummary := s.getPayloadSummary(ctx, agg.EventName, agg.Period, params)
|
|
|
|
result := EventAggregationResult{
|
|
Period: agg.Period,
|
|
EventName: agg.EventName,
|
|
EventCount: agg.EventCount,
|
|
UniqueUsers: agg.UniqueUsers,
|
|
FirstEventAt: &agg.FirstEvent,
|
|
LastEventAt: &agg.LastEvent,
|
|
AveragePerUser: avgPerUser,
|
|
PayloadSummary: payloadSummary,
|
|
}
|
|
|
|
results = append(results, result)
|
|
|
|
// Accumulate totals
|
|
eventTypeCounts[agg.EventName] += agg.EventCount
|
|
totalEvents += agg.EventCount
|
|
|
|
// Note: We can't easily get unique users across all periods without another query
|
|
// For now, we'll use a separate query for total unique users
|
|
}
|
|
|
|
// Get total unique users
|
|
var totalUniqueUsers int64
|
|
uniqueUserQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("created_at >= ? AND created_at <= ?", params.StartDate, params.EndDate)
|
|
if len(params.EventNames) > 0 {
|
|
uniqueUserQuery = uniqueUserQuery.Where("event_name IN ?", params.EventNames)
|
|
}
|
|
if params.UserID != nil {
|
|
uniqueUserQuery = uniqueUserQuery.Where("user_id = ?", *params.UserID)
|
|
}
|
|
if err := uniqueUserQuery.
|
|
Distinct("user_id").
|
|
Where("user_id IS NOT NULL").
|
|
Count(&totalUniqueUsers).Error; err != nil {
|
|
s.logger.Warn("Failed to count unique users", zap.Error(err))
|
|
}
|
|
|
|
// Get user activity count if UserID is not specified
|
|
userActivityCount := make(map[string]int64)
|
|
if params.UserID == nil {
|
|
var userCounts []struct {
|
|
UserID uuid.UUID `gorm:"column:user_id"`
|
|
EventCount int64 `gorm:"column:event_count"`
|
|
}
|
|
userQuery := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("created_at >= ? AND created_at <= ? AND user_id IS NOT NULL", params.StartDate, params.EndDate)
|
|
if len(params.EventNames) > 0 {
|
|
userQuery = userQuery.Where("event_name IN ?", params.EventNames)
|
|
}
|
|
if err := userQuery.
|
|
Select("user_id, COUNT(*) as event_count").
|
|
Group("user_id").
|
|
Order("event_count DESC").
|
|
Limit(100). // Limit to top 100 users
|
|
Scan(&userCounts).Error; err == nil {
|
|
for _, uc := range userCounts {
|
|
userActivityCount[uc.UserID.String()] = uc.EventCount
|
|
}
|
|
}
|
|
}
|
|
|
|
return &AggregationSummary{
|
|
Results: results,
|
|
TotalEvents: totalEvents,
|
|
TotalUniqueUsers: totalUniqueUsers,
|
|
Period: params.Period,
|
|
StartDate: params.StartDate,
|
|
EndDate: params.EndDate,
|
|
EventTypeCounts: eventTypeCounts,
|
|
UserActivityCount: userActivityCount,
|
|
}, nil
|
|
}
|
|
|
|
// getPayloadSummary extracts and aggregates common payload fields
|
|
func (s *AnalyticsAggregationService) getPayloadSummary(
|
|
ctx context.Context,
|
|
eventName string,
|
|
period string,
|
|
params EventAggregationParams,
|
|
) map[string]interface{} {
|
|
// Get sample events for this period to analyze payload structure
|
|
var events []AnalyticsEvent
|
|
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("event_name = ? AND created_at >= ? AND created_at <= ?", eventName, params.StartDate, params.EndDate).
|
|
Limit(100) // Sample size
|
|
|
|
if err := query.Find(&events).Error; err != nil || len(events) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Analyze payload structure
|
|
payloadSummary := make(map[string]interface{})
|
|
fieldCounts := make(map[string]int)
|
|
fieldValues := make(map[string][]interface{})
|
|
|
|
for _, event := range events {
|
|
var payload map[string]interface{}
|
|
if err := json.Unmarshal([]byte(event.Payload), &payload); err != nil {
|
|
continue
|
|
}
|
|
|
|
// Count field occurrences and collect sample values
|
|
for key, value := range payload {
|
|
fieldCounts[key]++
|
|
if len(fieldValues[key]) < 10 { // Keep up to 10 sample values
|
|
fieldValues[key] = append(fieldValues[key], value)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build summary with most common fields
|
|
for field, count := range fieldCounts {
|
|
if count >= len(events)/2 { // Field present in at least 50% of events
|
|
payloadSummary[field] = map[string]interface{}{
|
|
"occurrence_count": count,
|
|
"sample_values": fieldValues[field][:min(5, len(fieldValues[field]))],
|
|
}
|
|
}
|
|
}
|
|
|
|
return payloadSummary
|
|
}
|
|
|
|
// GetEventCounts returns simple event counts by event name
|
|
func (s *AnalyticsAggregationService) GetEventCounts(
|
|
ctx context.Context,
|
|
startDate, endDate time.Time,
|
|
eventNames []string,
|
|
) (map[string]int64, error) {
|
|
query := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("created_at >= ? AND created_at <= ?", startDate, endDate)
|
|
|
|
if len(eventNames) > 0 {
|
|
query = query.Where("event_name IN ?", eventNames)
|
|
}
|
|
|
|
var results []struct {
|
|
EventName string `gorm:"column:event_name"`
|
|
EventCount int64 `gorm:"column:event_count"`
|
|
}
|
|
|
|
if err := query.
|
|
Select("event_name, COUNT(*) as event_count").
|
|
Group("event_name").
|
|
Order("event_count DESC").
|
|
Scan(&results).Error; err != nil {
|
|
return nil, fmt.Errorf("failed to get event counts: %w", err)
|
|
}
|
|
|
|
counts := make(map[string]int64)
|
|
for _, r := range results {
|
|
counts[r.EventName] = r.EventCount
|
|
}
|
|
|
|
return counts, nil
|
|
}
|
|
|
|
// GetTopEvents returns the top N events by count
|
|
func (s *AnalyticsAggregationService) GetTopEvents(
|
|
ctx context.Context,
|
|
startDate, endDate time.Time,
|
|
limit int,
|
|
) ([]EventAggregationResult, error) {
|
|
if limit <= 0 {
|
|
limit = 10
|
|
}
|
|
if limit > 100 {
|
|
limit = 100
|
|
}
|
|
|
|
var results []struct {
|
|
EventName string `gorm:"column:event_name"`
|
|
EventCount int64 `gorm:"column:event_count"`
|
|
UniqueUsers int64 `gorm:"column:unique_users"`
|
|
LastEvent time.Time `gorm:"column:last_event"`
|
|
}
|
|
|
|
if err := s.db.WithContext(ctx).Model(&AnalyticsEvent{}).
|
|
Where("created_at >= ? AND created_at <= ?", startDate, endDate).
|
|
Select(`
|
|
event_name,
|
|
COUNT(*) as event_count,
|
|
COUNT(DISTINCT user_id) as unique_users,
|
|
MAX(created_at) as last_event
|
|
`).
|
|
Group("event_name").
|
|
Order("event_count DESC").
|
|
Limit(limit).
|
|
Scan(&results).Error; err != nil {
|
|
return nil, fmt.Errorf("failed to get top events: %w", err)
|
|
}
|
|
|
|
aggregationResults := make([]EventAggregationResult, len(results))
|
|
for i, r := range results {
|
|
avgPerUser := 0.0
|
|
if r.UniqueUsers > 0 {
|
|
avgPerUser = float64(r.EventCount) / float64(r.UniqueUsers)
|
|
}
|
|
aggregationResults[i] = EventAggregationResult{
|
|
EventName: r.EventName,
|
|
EventCount: r.EventCount,
|
|
UniqueUsers: r.UniqueUsers,
|
|
LastEventAt: &r.LastEvent,
|
|
AveragePerUser: avgPerUser,
|
|
}
|
|
}
|
|
|
|
return aggregationResults, nil
|
|
}
|
|
|
|
// min returns the minimum of two integers
|
|
func min(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|