talas-group/talas-wiki/internal/wiki/search.go
senke 66471934af Initial commit: Talas Group project management & documentation
Knowledge base of ~80+ markdown files across 14 domains (00-13),
Logseq graph, hardware design files (KiCAD), infrastructure configs,
and talas-wiki static site.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 20:10:41 +02:00

282 lines
5.4 KiB
Go

package wiki
import (
"sort"
"strings"
"unicode"
)
type SearchResult struct {
Page *Page
Score int
Snippet string
}
type Suggestion struct {
Title string `json:"title"`
URLPath string `json:"path"`
Domain string `json:"domain"`
}
func (idx *Index) Search(query string, maxResults int) []SearchResult {
idx.mu.RLock()
defer idx.mu.RUnlock()
if query == "" {
return nil
}
queryLower := strings.ToLower(query)
tokens := tokenize(queryLower)
if len(tokens) == 0 {
return nil
}
var results []SearchResult
for _, page := range idx.allPages {
score := 0
snippet := ""
titleLower := strings.ToLower(page.Title)
nameLower := strings.ToLower(page.Name)
// Exact title match
if strings.Contains(titleLower, queryLower) {
score += 20
}
for _, token := range tokens {
// Title match
if strings.Contains(titleLower, token) {
score += 10
}
// Name match
if strings.Contains(nameLower, token) {
score += 8
}
// Tag match
for _, tag := range page.Tags {
if strings.Contains(strings.ToLower(tag), token) {
score += 7
break
}
}
// Fuzzy title match (edit distance)
if score == 0 {
titleWords := tokenize(titleLower)
for _, tw := range titleWords {
if fuzzyMatch(token, tw) {
score += 4
break
}
}
nameWords := tokenize(nameLower)
for _, nw := range nameWords {
if fuzzyMatch(token, nw) {
score += 3
break
}
}
}
}
// Content match from cache
contentLower := idx.contentCache[page.URLPath]
if contentLower == "" {
continue
}
lines := strings.Split(contentLower, "\n")
for _, token := range tokens {
count := strings.Count(contentLower, token)
if count > 0 {
score += min(count, 5)
for _, line := range lines {
if strings.HasPrefix(line, "#") && strings.Contains(strings.ToLower(line), token) {
score += 5
break
}
}
}
}
if score == 0 {
continue
}
snippet = extractSnippet(lines, queryLower, tokens)
results = append(results, SearchResult{
Page: page,
Score: score,
Snippet: snippet,
})
}
sort.Slice(results, func(i, j int) bool {
return results[i].Score > results[j].Score
})
if len(results) > maxResults {
results = results[:maxResults]
}
return results
}
// Suggest returns quick suggestions for as-you-type search
func (idx *Index) Suggest(query string, maxResults int) []Suggestion {
idx.mu.RLock()
defer idx.mu.RUnlock()
if len(query) < 2 {
return nil
}
queryLower := strings.ToLower(query)
type scored struct {
sug Suggestion
score int
}
var results []scored
for _, page := range idx.allPages {
titleLower := strings.ToLower(page.Title)
nameLower := strings.ToLower(page.Name)
s := 0
// Prefix match on name (highest)
if strings.HasPrefix(nameLower, queryLower) {
s += 20
} else if strings.Contains(nameLower, queryLower) {
s += 15
}
// Title contains
if strings.Contains(titleLower, queryLower) {
s += 10
}
// Fuzzy
if s == 0 {
tokens := tokenize(queryLower)
for _, token := range tokens {
titleWords := tokenize(titleLower)
for _, tw := range titleWords {
if fuzzyMatch(token, tw) {
s += 3
break
}
}
}
}
if s > 0 {
results = append(results, scored{
sug: Suggestion{Title: page.Title, URLPath: page.URLPath, Domain: page.Domain},
score: s,
})
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].score > results[j].score
})
if len(results) > maxResults {
results = results[:maxResults]
}
var suggestions []Suggestion
for _, r := range results {
suggestions = append(suggestions, r.sug)
}
return suggestions
}
// fuzzyMatch returns true if two strings are within edit distance 2
func fuzzyMatch(a, b string) bool {
if a == b {
return true
}
if len(a) < 3 || len(b) < 3 {
return false
}
// Check if one contains the other
if strings.Contains(a, b) || strings.Contains(b, a) {
return true
}
// Simple trigram overlap
if len(a) < 3 || len(b) < 3 {
return false
}
trigramsA := trigrams(a)
trigramsB := trigrams(b)
common := 0
for t := range trigramsA {
if trigramsB[t] {
common++
}
}
total := len(trigramsA)
if len(trigramsB) > total {
total = len(trigramsB)
}
if total == 0 {
return false
}
return float64(common)/float64(total) > 0.4
}
func trigrams(s string) map[string]bool {
result := make(map[string]bool)
runes := []rune(s)
for i := 0; i+2 < len(runes); i++ {
result[string(runes[i:i+3])] = true
}
return result
}
func tokenize(s string) []string {
var tokens []string
current := strings.Builder{}
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
current.WriteRune(r)
} else {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
}
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens
}
func extractSnippet(lines []string, query string, tokens []string) string {
for i, line := range lines {
lineLower := strings.ToLower(line)
for _, token := range tokens {
if strings.Contains(lineLower, token) {
start := i
if start > 0 {
start = i - 1
}
end := i + 2
if end > len(lines) {
end = len(lines)
}
snippet := strings.Join(lines[start:end], " ")
snippet = strings.TrimSpace(snippet)
if len(snippet) > 200 {
snippet = snippet[:200] + "..."
}
return snippet
}
}
}
return ""
}