Knowledge base of ~80+ markdown files across 14 domains (00-13), Logseq graph, hardware design files (KiCAD), infrastructure configs, and talas-wiki static site. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
282 lines
5.4 KiB
Go
282 lines
5.4 KiB
Go
package wiki
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
type SearchResult struct {
|
|
Page *Page
|
|
Score int
|
|
Snippet string
|
|
}
|
|
|
|
type Suggestion struct {
|
|
Title string `json:"title"`
|
|
URLPath string `json:"path"`
|
|
Domain string `json:"domain"`
|
|
}
|
|
|
|
func (idx *Index) Search(query string, maxResults int) []SearchResult {
|
|
idx.mu.RLock()
|
|
defer idx.mu.RUnlock()
|
|
|
|
if query == "" {
|
|
return nil
|
|
}
|
|
|
|
queryLower := strings.ToLower(query)
|
|
tokens := tokenize(queryLower)
|
|
if len(tokens) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var results []SearchResult
|
|
|
|
for _, page := range idx.allPages {
|
|
score := 0
|
|
snippet := ""
|
|
|
|
titleLower := strings.ToLower(page.Title)
|
|
nameLower := strings.ToLower(page.Name)
|
|
|
|
// Exact title match
|
|
if strings.Contains(titleLower, queryLower) {
|
|
score += 20
|
|
}
|
|
|
|
for _, token := range tokens {
|
|
// Title match
|
|
if strings.Contains(titleLower, token) {
|
|
score += 10
|
|
}
|
|
// Name match
|
|
if strings.Contains(nameLower, token) {
|
|
score += 8
|
|
}
|
|
// Tag match
|
|
for _, tag := range page.Tags {
|
|
if strings.Contains(strings.ToLower(tag), token) {
|
|
score += 7
|
|
break
|
|
}
|
|
}
|
|
// Fuzzy title match (edit distance)
|
|
if score == 0 {
|
|
titleWords := tokenize(titleLower)
|
|
for _, tw := range titleWords {
|
|
if fuzzyMatch(token, tw) {
|
|
score += 4
|
|
break
|
|
}
|
|
}
|
|
nameWords := tokenize(nameLower)
|
|
for _, nw := range nameWords {
|
|
if fuzzyMatch(token, nw) {
|
|
score += 3
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Content match from cache
|
|
contentLower := idx.contentCache[page.URLPath]
|
|
if contentLower == "" {
|
|
continue
|
|
}
|
|
lines := strings.Split(contentLower, "\n")
|
|
|
|
for _, token := range tokens {
|
|
count := strings.Count(contentLower, token)
|
|
if count > 0 {
|
|
score += min(count, 5)
|
|
for _, line := range lines {
|
|
if strings.HasPrefix(line, "#") && strings.Contains(strings.ToLower(line), token) {
|
|
score += 5
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if score == 0 {
|
|
continue
|
|
}
|
|
|
|
snippet = extractSnippet(lines, queryLower, tokens)
|
|
|
|
results = append(results, SearchResult{
|
|
Page: page,
|
|
Score: score,
|
|
Snippet: snippet,
|
|
})
|
|
}
|
|
|
|
sort.Slice(results, func(i, j int) bool {
|
|
return results[i].Score > results[j].Score
|
|
})
|
|
|
|
if len(results) > maxResults {
|
|
results = results[:maxResults]
|
|
}
|
|
return results
|
|
}
|
|
|
|
// Suggest returns quick suggestions for as-you-type search
|
|
func (idx *Index) Suggest(query string, maxResults int) []Suggestion {
|
|
idx.mu.RLock()
|
|
defer idx.mu.RUnlock()
|
|
|
|
if len(query) < 2 {
|
|
return nil
|
|
}
|
|
|
|
queryLower := strings.ToLower(query)
|
|
type scored struct {
|
|
sug Suggestion
|
|
score int
|
|
}
|
|
var results []scored
|
|
|
|
for _, page := range idx.allPages {
|
|
titleLower := strings.ToLower(page.Title)
|
|
nameLower := strings.ToLower(page.Name)
|
|
s := 0
|
|
|
|
// Prefix match on name (highest)
|
|
if strings.HasPrefix(nameLower, queryLower) {
|
|
s += 20
|
|
} else if strings.Contains(nameLower, queryLower) {
|
|
s += 15
|
|
}
|
|
|
|
// Title contains
|
|
if strings.Contains(titleLower, queryLower) {
|
|
s += 10
|
|
}
|
|
|
|
// Fuzzy
|
|
if s == 0 {
|
|
tokens := tokenize(queryLower)
|
|
for _, token := range tokens {
|
|
titleWords := tokenize(titleLower)
|
|
for _, tw := range titleWords {
|
|
if fuzzyMatch(token, tw) {
|
|
s += 3
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if s > 0 {
|
|
results = append(results, scored{
|
|
sug: Suggestion{Title: page.Title, URLPath: page.URLPath, Domain: page.Domain},
|
|
score: s,
|
|
})
|
|
}
|
|
}
|
|
|
|
sort.Slice(results, func(i, j int) bool {
|
|
return results[i].score > results[j].score
|
|
})
|
|
|
|
if len(results) > maxResults {
|
|
results = results[:maxResults]
|
|
}
|
|
|
|
var suggestions []Suggestion
|
|
for _, r := range results {
|
|
suggestions = append(suggestions, r.sug)
|
|
}
|
|
return suggestions
|
|
}
|
|
|
|
// fuzzyMatch returns true if two strings are within edit distance 2
|
|
func fuzzyMatch(a, b string) bool {
|
|
if a == b {
|
|
return true
|
|
}
|
|
if len(a) < 3 || len(b) < 3 {
|
|
return false
|
|
}
|
|
// Check if one contains the other
|
|
if strings.Contains(a, b) || strings.Contains(b, a) {
|
|
return true
|
|
}
|
|
// Simple trigram overlap
|
|
if len(a) < 3 || len(b) < 3 {
|
|
return false
|
|
}
|
|
trigramsA := trigrams(a)
|
|
trigramsB := trigrams(b)
|
|
common := 0
|
|
for t := range trigramsA {
|
|
if trigramsB[t] {
|
|
common++
|
|
}
|
|
}
|
|
total := len(trigramsA)
|
|
if len(trigramsB) > total {
|
|
total = len(trigramsB)
|
|
}
|
|
if total == 0 {
|
|
return false
|
|
}
|
|
return float64(common)/float64(total) > 0.4
|
|
}
|
|
|
|
func trigrams(s string) map[string]bool {
|
|
result := make(map[string]bool)
|
|
runes := []rune(s)
|
|
for i := 0; i+2 < len(runes); i++ {
|
|
result[string(runes[i:i+3])] = true
|
|
}
|
|
return result
|
|
}
|
|
|
|
func tokenize(s string) []string {
|
|
var tokens []string
|
|
current := strings.Builder{}
|
|
for _, r := range s {
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
|
current.WriteRune(r)
|
|
} else {
|
|
if current.Len() > 0 {
|
|
tokens = append(tokens, current.String())
|
|
current.Reset()
|
|
}
|
|
}
|
|
}
|
|
if current.Len() > 0 {
|
|
tokens = append(tokens, current.String())
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func extractSnippet(lines []string, query string, tokens []string) string {
|
|
for i, line := range lines {
|
|
lineLower := strings.ToLower(line)
|
|
for _, token := range tokens {
|
|
if strings.Contains(lineLower, token) {
|
|
start := i
|
|
if start > 0 {
|
|
start = i - 1
|
|
}
|
|
end := i + 2
|
|
if end > len(lines) {
|
|
end = len(lines)
|
|
}
|
|
snippet := strings.Join(lines[start:end], " ")
|
|
snippet = strings.TrimSpace(snippet)
|
|
if len(snippet) > 200 {
|
|
snippet = snippet[:200] + "..."
|
|
}
|
|
return snippet
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|