veza/veza-backend-api/internal/services/query_parser.go

274 lines
6.8 KiB
Go

// Package services - query_parser.go
// v0.203 Lot K: Boolean search query parser (AND, OR, NOT, "exact phrase")
package services
import (
"fmt"
"regexp"
"strings"
"gorm.io/gorm"
)
// ParsedQuery represents a parsed search query with boolean operators
type ParsedQuery struct {
ExactPhrase string // from "quoted phrase"
AndTerms []string // terms that must all match (AND)
OrTerms []string // terms where any can match (OR)
NotTerms []string // terms that must not match (NOT)
SimpleTerm string // single term when no operators (fallback)
}
// ParseSearchQuery parses a search query and extracts terms, operators, and exact phrases.
// Supports: AND, OR, NOT (case-insensitive) and "exact phrase" in double quotes.
func ParseSearchQuery(raw string) *ParsedQuery {
q := strings.TrimSpace(raw)
if q == "" {
return &ParsedQuery{}
}
out := &ParsedQuery{}
// Extract exact phrases (double-quoted) first
exactPhrases := extractQuoted(q)
if len(exactPhrases) > 0 {
out.ExactPhrase = exactPhrases[0]
// Remove the quoted part from q for further parsing
q = removeQuoted(q)
q = strings.TrimSpace(q)
}
// If only exact phrase remains, we're done
if q == "" {
return out
}
// Split by NOT (highest precedence: everything after NOT is excluded)
notParts := splitByWord(q, "NOT")
if len(notParts) > 1 {
out.NotTerms = parseTerms(notParts[1])
q = strings.TrimSpace(notParts[0])
}
// Split by OR (middle precedence)
orParts := splitByWord(q, "OR")
if len(orParts) > 1 {
out.OrTerms = parseTermsFromParts(orParts)
return out
}
// Split by AND (or single term)
andParts := splitByWord(q, "AND")
if len(andParts) > 1 {
out.AndTerms = parseTermsFromParts(andParts)
return out
}
// Single term
out.SimpleTerm = strings.TrimSpace(q)
return out
}
var quotedRegex = regexp.MustCompile(`"([^"]*)"`)
func extractQuoted(s string) []string {
matches := quotedRegex.FindAllStringSubmatch(s, -1)
var out []string
for _, m := range matches {
if len(m) >= 2 && strings.TrimSpace(m[1]) != "" {
out = append(out, strings.TrimSpace(m[1]))
}
}
return out
}
func removeQuoted(s string) string {
return quotedRegex.ReplaceAllString(s, " ")
}
func splitByWord(s, word string) []string {
sep := " " + strings.ToLower(word) + " "
var parts []string
remain := s
for {
lower := strings.ToLower(remain)
idx := strings.Index(lower, sep)
if idx < 0 {
if strings.TrimSpace(remain) != "" {
parts = append(parts, strings.TrimSpace(remain))
}
break
}
before := strings.TrimSpace(remain[:idx])
if before != "" {
parts = append(parts, before)
}
remain = remain[idx+len(sep):]
}
if len(parts) == 0 && strings.TrimSpace(s) != "" {
return []string{s}
}
return parts
}
func parseTerms(s string) []string {
parts := strings.Fields(s)
var out []string
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" && !isOperator(p) {
out = append(out, p)
}
}
return out
}
func parseTermsFromParts(parts []string) []string {
var out []string
for _, p := range parts {
terms := parseTerms(p)
out = append(out, terms...)
}
return out
}
func isOperator(s string) bool {
lower := strings.ToLower(s)
return lower == "and" || lower == "or" || lower == "not"
}
// HasBooleanStructure returns true if the query uses AND, OR, NOT, or exact phrase
func (p *ParsedQuery) HasBooleanStructure() bool {
return p.ExactPhrase != "" ||
len(p.AndTerms) > 1 ||
len(p.OrTerms) > 1 ||
len(p.NotTerms) > 0
}
// applyParsedQuery applies parsed boolean query conditions to a GORM query.
// Searches in title, artist, album columns.
func applyParsedQuery(db *gorm.DB, p *ParsedQuery) *gorm.DB {
// columnMatch builds (LOWER(col1) LIKE ? OR LOWER(col2) LIKE ? OR LOWER(col3) LIKE ?)
columnMatch := func(term string) (string, []interface{}) {
t := "%" + strings.ToLower(term) + "%"
return "(LOWER(title) LIKE ? OR LOWER(artist) LIKE ? OR LOWER(album) LIKE ?)", []interface{}{t, t, t}
}
// Exact phrase
if p.ExactPhrase != "" {
clause, args := columnMatch(p.ExactPhrase)
db = db.Where(clause, args...)
}
// AND terms (all must match)
for _, term := range p.AndTerms {
term = strings.TrimSpace(term)
if term == "" {
continue
}
clause, args := columnMatch(term)
db = db.Where(clause, args...)
}
// OR terms (any can match)
if len(p.OrTerms) > 0 {
var orClauses []string
var orArgs []interface{}
for _, term := range p.OrTerms {
term = strings.TrimSpace(term)
if term == "" {
continue
}
clause, args := columnMatch(term)
orClauses = append(orClauses, clause)
orArgs = append(orArgs, args...)
}
if len(orClauses) > 0 {
combined := "(" + strings.Join(orClauses, " OR ") + ")"
db = db.Where(combined, orArgs...)
}
}
// NOT terms (must not match)
for _, term := range p.NotTerms {
term = strings.TrimSpace(term)
if term == "" {
continue
}
t := "%" + strings.ToLower(term) + "%"
db = db.Where("NOT (LOWER(title) LIKE ? OR LOWER(artist) LIKE ? OR LOWER(album) LIKE ?)", t, t, t)
}
return db
}
// BuildWhereCondition returns a SQL WHERE clause and args for given columns.
// Uses $1, $2... placeholders for PostgreSQL. Used by SearchService.
func BuildWhereCondition(p *ParsedQuery, columns []string) (string, []interface{}) {
if p == nil || len(columns) == 0 {
return "", nil
}
argNum := 1
columnMatchRaw := func(term string) (string, []interface{}) {
t := "%" + strings.ToLower(strings.TrimSpace(term)) + "%"
var conds []string
var args []interface{}
for _, col := range columns {
conds = append(conds, "LOWER("+col+") ILIKE $"+fmt.Sprint(argNum))
args = append(args, t)
argNum++
}
return "(" + strings.Join(conds, " OR ") + ")", args
}
if !p.HasBooleanStructure() {
if p.SimpleTerm == "" {
return "", nil
}
clause, args := columnMatchRaw(p.SimpleTerm)
return clause, args
}
var allConds []string
var allArgs []interface{}
if p.ExactPhrase != "" {
c, a := columnMatchRaw(p.ExactPhrase)
allConds = append(allConds, c)
allArgs = append(allArgs, a...)
}
for _, term := range p.AndTerms {
if strings.TrimSpace(term) == "" {
continue
}
c, a := columnMatchRaw(term)
allConds = append(allConds, c)
allArgs = append(allArgs, a...)
}
if len(p.OrTerms) > 0 {
var orConds []string
for _, term := range p.OrTerms {
if strings.TrimSpace(term) == "" {
continue
}
c, a := columnMatchRaw(term)
orConds = append(orConds, c)
allArgs = append(allArgs, a...)
}
if len(orConds) > 0 {
allConds = append(allConds, "("+strings.Join(orConds, " OR ")+")")
}
}
for _, term := range p.NotTerms {
if strings.TrimSpace(term) == "" {
continue
}
c, a := columnMatchRaw(term)
allConds = append(allConds, "NOT "+c)
allArgs = append(allArgs, a...)
}
if len(allConds) == 0 {
return "", nil
}
return strings.Join(allConds, " AND "), allArgs
}