talas-group/talas-wiki/internal/wiki/intelligence.go
senke 66471934af Initial commit: Talas Group project management & documentation
Knowledge base of ~80+ markdown files across 14 domains (00-13),
Logseq graph, hardware design files (KiCAD), infrastructure configs,
and talas-wiki static site.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 20:10:41 +02:00

297 lines
6.9 KiB
Go

package wiki
import (
"sort"
"strings"
"time"
)
// LinkSuggestion represents a page mentioned but not linked
type LinkSuggestion struct {
Source string // page that mentions the target
Target string // page URL path that could be linked
TargetTitle string
Mentions int // how many times the name appears
}
// SimilarPage represents a page similar to another
type SimilarPage struct {
Page *Page
Similarity float64
}
// DuplicateGroup groups pages with very similar content
type DuplicateGroup struct {
Pages []*Page
Similarity float64
}
// DomainDep shows how many links connect two domains
type DomainDep struct {
Source string `json:"source"`
Target string `json:"target"`
Weight int `json:"weight"`
}
// TimelineEntry represents activity on a single day
type TimelineEntry struct {
Date string `json:"date"` // "2006-01-02"
Count int `json:"count"`
Pages []string `json:"pages,omitempty"`
}
// GetLinkSuggestions finds pages that mention other page names without linking to them
func (idx *Index) GetLinkSuggestions(maxResults int) []LinkSuggestion {
idx.mu.RLock()
defer idx.mu.RUnlock()
var suggestions []LinkSuggestion
for _, page := range idx.allPages {
contentLower := idx.contentCache[page.URLPath]
if contentLower == "" {
continue
}
// Check which pages are mentioned by name but not linked
outlinked := make(map[string]bool)
for _, target := range idx.outlinks[page.URLPath] {
outlinked[target] = true
}
for _, candidate := range idx.allPages {
if candidate.URLPath == page.URLPath {
continue
}
if outlinked[candidate.URLPath] {
continue
}
nameLower := strings.ToLower(candidate.Name)
if len(nameLower) < 4 {
continue // skip very short names (README etc)
}
// Count mentions of the candidate name
count := strings.Count(contentLower, nameLower)
if count > 0 {
suggestions = append(suggestions, LinkSuggestion{
Source: page.URLPath,
Target: candidate.URLPath,
TargetTitle: candidate.Title,
Mentions: count,
})
}
}
}
sort.Slice(suggestions, func(i, j int) bool {
return suggestions[i].Mentions > suggestions[j].Mentions
})
if len(suggestions) > maxResults {
suggestions = suggestions[:maxResults]
}
return suggestions
}
// GetSimilarPages finds pages with content similar to the given page
func (idx *Index) GetSimilarPages(urlPath string, maxResults int) []SimilarPage {
idx.mu.RLock()
defer idx.mu.RUnlock()
source := idx.contentCache[urlPath]
if source == "" {
return nil
}
sourceTrigrams := makeTrigrams(source[:min(len(source), 2000)])
var results []SimilarPage
for _, page := range idx.allPages {
if page.URLPath == urlPath {
continue
}
content := idx.contentCache[page.URLPath]
if content == "" {
continue
}
targetTrigrams := makeTrigrams(content[:min(len(content), 2000)])
common := 0
for t := range sourceTrigrams {
if targetTrigrams[t] {
common++
}
}
total := len(sourceTrigrams)
if len(targetTrigrams) > total {
total = len(targetTrigrams)
}
if total == 0 {
continue
}
sim := float64(common) / float64(total)
if sim > 0.25 {
results = append(results, SimilarPage{Page: page, Similarity: sim})
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].Similarity > results[j].Similarity
})
if len(results) > maxResults {
results = results[:maxResults]
}
return results
}
// GetDuplicates finds groups of pages with very similar content
func (idx *Index) GetDuplicates(threshold float64) []DuplicateGroup {
idx.mu.RLock()
defer idx.mu.RUnlock()
type pair struct {
a, b *Page
sim float64
}
var pairs []pair
pages := idx.allPages
for i := 0; i < len(pages); i++ {
contentA := idx.contentCache[pages[i].URLPath]
if len(contentA) < 100 {
continue
}
triA := makeTrigrams(contentA[:min(len(contentA), 1500)])
for j := i + 1; j < len(pages); j++ {
contentB := idx.contentCache[pages[j].URLPath]
if len(contentB) < 100 {
continue
}
triB := makeTrigrams(contentB[:min(len(contentB), 1500)])
common := 0
for t := range triA {
if triB[t] {
common++
}
}
total := len(triA)
if len(triB) > total {
total = len(triB)
}
if total > 0 {
sim := float64(common) / float64(total)
if sim >= threshold {
pairs = append(pairs, pair{pages[i], pages[j], sim})
}
}
}
}
// Simple grouping: each pair becomes a group
var groups []DuplicateGroup
for _, p := range pairs {
groups = append(groups, DuplicateGroup{
Pages: []*Page{p.a, p.b},
Similarity: p.sim,
})
}
sort.Slice(groups, func(i, j int) bool {
return groups[i].Similarity > groups[j].Similarity
})
if len(groups) > 20 {
groups = groups[:20]
}
return groups
}
// GetDomainDeps returns inter-domain link counts
func (idx *Index) GetDomainDeps() []DomainDep {
idx.mu.RLock()
defer idx.mu.RUnlock()
counts := make(map[string]int) // "source|target" → count
for source, targets := range idx.outlinks {
srcPage := idx.byFullPath[source]
if srcPage == nil || srcPage.Domain == "" {
continue
}
for _, target := range targets {
tgtPage := idx.byFullPath[target]
if tgtPage == nil || tgtPage.Domain == "" || tgtPage.Domain == srcPage.Domain {
continue
}
key := srcPage.Domain + "|" + tgtPage.Domain
counts[key]++
}
}
var deps []DomainDep
for key, count := range counts {
parts := strings.SplitN(key, "|", 2)
deps = append(deps, DomainDep{Source: parts[0], Target: parts[1], Weight: count})
}
sort.Slice(deps, func(i, j int) bool {
return deps[i].Weight > deps[j].Weight
})
return deps
}
// GetTimeline returns modification activity per day for the last N days
func (idx *Index) GetTimeline(days int) []TimelineEntry {
idx.mu.RLock()
defer idx.mu.RUnlock()
now := time.Now()
dayMap := make(map[string][]string)
for _, page := range idx.allPages {
dateStr := page.ModTime.Format("2006-01-02")
if now.Sub(page.ModTime).Hours() < float64(days*24) {
dayMap[dateStr] = append(dayMap[dateStr], page.Title)
}
}
var entries []TimelineEntry
for d := 0; d < days; d++ {
date := now.AddDate(0, 0, -d)
dateStr := date.Format("2006-01-02")
pages := dayMap[dateStr]
entries = append(entries, TimelineEntry{
Date: dateStr,
Count: len(pages),
Pages: pages,
})
}
return entries
}
// GetPageSummary returns the first meaningful non-heading line as a summary
func (idx *Index) GetPageSummary(urlPath string) string {
content := idx.GetContentForSearch(urlPath)
if content == "" {
return ""
}
lines := strings.Split(content, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "---") ||
strings.Contains(line, "::") || strings.HasPrefix(line, ">") || strings.HasPrefix(line, "|") {
continue
}
if len(line) > 160 {
return line[:160] + "..."
}
return line
}
return ""
}