Knowledge base of ~80+ markdown files across 14 domains (00-13), Logseq graph, hardware design files (KiCAD), infrastructure configs, and talas-wiki static site. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
311 lines
8.4 KiB
Go
311 lines
8.4 KiB
Go
package wiki
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
htmlpkg "html"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/yuin/goldmark"
|
|
"github.com/yuin/goldmark/extension"
|
|
goldhtml "github.com/yuin/goldmark/renderer/html"
|
|
)
|
|
|
|
type TOCEntry struct {
|
|
Level int
|
|
ID string
|
|
Text string
|
|
}
|
|
|
|
type RenderResult struct {
|
|
HTML string
|
|
TOC []TOCEntry
|
|
WordCount int
|
|
ReadingMinutes int
|
|
}
|
|
|
|
type Renderer struct {
|
|
idx *Index
|
|
md goldmark.Markdown
|
|
}
|
|
|
|
func NewRenderer(idx *Index) *Renderer {
|
|
md := goldmark.New(
|
|
goldmark.WithExtensions(extension.GFM),
|
|
goldmark.WithRendererOptions(goldhtml.WithUnsafe()),
|
|
)
|
|
return &Renderer{idx: idx, md: md}
|
|
}
|
|
|
|
var (
|
|
wikilinkPattern = regexp.MustCompile(`\[\[([^\]]+)\]\]`)
|
|
transclusionPattern = regexp.MustCompile(`!\[\[([^\]]+)\]\]`)
|
|
headingPattern = regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`)
|
|
)
|
|
|
|
func (r *Renderer) RenderPage(page *Page) (string, error) {
|
|
result, err := r.RenderPageFull(page)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return result.HTML, nil
|
|
}
|
|
|
|
func (r *Renderer) RenderPageFull(page *Page) (*RenderResult, error) {
|
|
content, err := os.ReadFile(page.AbsPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return r.renderMarkdownFull(string(content), page.Domain, 0)
|
|
}
|
|
|
|
func (r *Renderer) RenderMarkdown(content string, currentDomain string) (string, error) {
|
|
result, err := r.renderMarkdownFull(content, currentDomain, 0)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return result.HTML, nil
|
|
}
|
|
|
|
func (r *Renderer) renderMarkdownFull(content string, currentDomain string, depth int) (*RenderResult, error) {
|
|
content = stripLogseqProperties(content)
|
|
|
|
// Extract TOC from headings before processing
|
|
toc := extractTOC(content)
|
|
|
|
// Handle transclusions (max depth 3 to prevent infinite loops)
|
|
if depth < 3 {
|
|
content = transclusionPattern.ReplaceAllStringFunc(content, func(match string) string {
|
|
inner := match[3 : len(match)-2] // strip ![[...]]
|
|
target := inner
|
|
if pipeIdx := strings.Index(inner, "|"); pipeIdx >= 0 {
|
|
target = strings.TrimSpace(inner[:pipeIdx])
|
|
}
|
|
|
|
resolved := r.idx.ResolveLinkTarget(target)
|
|
if resolved == "" {
|
|
return fmt.Sprintf(`<div class="transclusion-error">Transclusion non trouvée: %s</div>`, htmlpkg.EscapeString(target))
|
|
}
|
|
|
|
page := r.idx.GetPage(resolved)
|
|
if page == nil {
|
|
return fmt.Sprintf(`<div class="transclusion-error">Page non trouvée: %s</div>`, htmlpkg.EscapeString(resolved))
|
|
}
|
|
|
|
transcludedContent, err := os.ReadFile(page.AbsPath)
|
|
if err != nil {
|
|
return fmt.Sprintf(`<div class="transclusion-error">Erreur lecture: %s</div>`, htmlpkg.EscapeString(err.Error()))
|
|
}
|
|
|
|
result, err := r.renderMarkdownFull(string(transcludedContent), page.Domain, depth+1)
|
|
if err != nil {
|
|
return fmt.Sprintf(`<div class="transclusion-error">Erreur rendu: %s</div>`, htmlpkg.EscapeString(err.Error()))
|
|
}
|
|
|
|
return fmt.Sprintf(`<div class="transclusion"><div class="transclusion-header"><a href="/wiki/%s">%s</a></div>%s</div>`,
|
|
encodeURLPath(resolved), htmlpkg.EscapeString(page.Title), result.HTML)
|
|
})
|
|
}
|
|
|
|
// Replace wikilinks with placeholders
|
|
type wikilinkInfo struct {
|
|
target string
|
|
display string
|
|
}
|
|
var links []wikilinkInfo
|
|
processed := wikilinkPattern.ReplaceAllStringFunc(content, func(match string) string {
|
|
inner := match[2 : len(match)-2]
|
|
target := inner
|
|
display := inner
|
|
if pipeIdx := strings.Index(inner, "|"); pipeIdx >= 0 {
|
|
target = strings.TrimSpace(inner[:pipeIdx])
|
|
display = strings.TrimSpace(inner[pipeIdx+1:])
|
|
}
|
|
idx := len(links)
|
|
links = append(links, wikilinkInfo{target: target, display: display})
|
|
return fmt.Sprintf("WLPH_%d_END", idx)
|
|
})
|
|
|
|
// Add heading IDs for TOC anchors
|
|
processed = headingPattern.ReplaceAllStringFunc(processed, func(match string) string {
|
|
sub := headingPattern.FindStringSubmatch(match)
|
|
if len(sub) < 3 {
|
|
return match
|
|
}
|
|
text := sub[2]
|
|
id := slugify(text)
|
|
return fmt.Sprintf("%s %s {#%s}", sub[1], text, id)
|
|
})
|
|
|
|
// Convert markdown to HTML
|
|
var buf bytes.Buffer
|
|
if err := r.md.Convert([]byte(processed), &buf); err != nil {
|
|
return nil, err
|
|
}
|
|
result := buf.String()
|
|
|
|
// Add IDs to heading tags
|
|
headingTagRe := regexp.MustCompile(`<(h[1-6])>([^{]*?)\s*\{#([^}]+)\}\s*</h[1-6]>`)
|
|
result = headingTagRe.ReplaceAllString(result, `<$1 id="$3">$2</$1>`)
|
|
|
|
// Replace wikilink placeholders
|
|
for i, link := range links {
|
|
placeholder := fmt.Sprintf("WLPH_%d_END", i)
|
|
resolved := r.idx.ResolveLinkTarget(link.target)
|
|
var replacement string
|
|
if resolved != "" {
|
|
href := "/wiki/" + encodeURLPath(resolved)
|
|
replacement = fmt.Sprintf(`<a href="%s" class="wikilink">%s</a>`, href, htmlpkg.EscapeString(link.display))
|
|
} else if isAssetLink(link.target) {
|
|
href := "/assets/" + encodeURLPath(link.target)
|
|
replacement = fmt.Sprintf(`<a href="%s" class="wikilink asset-link">%s</a>`, href, htmlpkg.EscapeString(link.display))
|
|
} else {
|
|
replacement = fmt.Sprintf(`<span class="wikilink-broken" title="Page non trouvée: %s">%s</span>`,
|
|
htmlpkg.EscapeString(link.target), htmlpkg.EscapeString(link.display))
|
|
}
|
|
result = strings.Replace(result, placeholder, replacement, 1)
|
|
}
|
|
|
|
result = fixImagePaths(result, currentDomain)
|
|
|
|
wordCount := countWords(content)
|
|
readingMin := wordCount / 200
|
|
if readingMin < 1 && wordCount > 0 {
|
|
readingMin = 1
|
|
}
|
|
|
|
return &RenderResult{HTML: result, TOC: toc, WordCount: wordCount, ReadingMinutes: readingMin}, nil
|
|
}
|
|
|
|
func extractTOC(content string) []TOCEntry {
|
|
var toc []TOCEntry
|
|
for _, match := range headingPattern.FindAllStringSubmatch(content, -1) {
|
|
if len(match) < 3 {
|
|
continue
|
|
}
|
|
level := len(match[1])
|
|
text := strings.TrimSpace(match[2])
|
|
// Remove any wikilinks from heading text for TOC display
|
|
text = wikilinkPattern.ReplaceAllStringFunc(text, func(m string) string {
|
|
inner := m[2 : len(m)-2]
|
|
if pipeIdx := strings.Index(inner, "|"); pipeIdx >= 0 {
|
|
return strings.TrimSpace(inner[pipeIdx+1:])
|
|
}
|
|
return inner
|
|
})
|
|
toc = append(toc, TOCEntry{
|
|
Level: level,
|
|
ID: slugify(text),
|
|
Text: text,
|
|
})
|
|
}
|
|
return toc
|
|
}
|
|
|
|
func slugify(text string) string {
|
|
text = strings.ToLower(text)
|
|
var b strings.Builder
|
|
for _, r := range text {
|
|
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
|
|
b.WriteRune(r)
|
|
} else if r == ' ' || r == '-' || r == '_' {
|
|
b.WriteByte('-')
|
|
}
|
|
// Accented characters: simple mapping
|
|
switch r {
|
|
case 'é', 'è', 'ê', 'ë':
|
|
b.WriteByte('e')
|
|
case 'à', 'â', 'ä':
|
|
b.WriteByte('a')
|
|
case 'ù', 'û', 'ü':
|
|
b.WriteByte('u')
|
|
case 'ô', 'ö':
|
|
b.WriteByte('o')
|
|
case 'î', 'ï':
|
|
b.WriteByte('i')
|
|
case 'ç':
|
|
b.WriteByte('c')
|
|
}
|
|
}
|
|
return strings.Trim(b.String(), "-")
|
|
}
|
|
|
|
func stripLogseqProperties(content string) string {
|
|
lines := strings.SplitN(content, "\n", 50)
|
|
startIdx := 0
|
|
for i, line := range lines {
|
|
trimmed := strings.TrimSpace(line)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
if strings.Contains(trimmed, ":: ") && !strings.HasPrefix(trimmed, "#") && !strings.HasPrefix(trimmed, "-") && !strings.HasPrefix(trimmed, ">") {
|
|
startIdx = i + 1
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
if startIdx > 0 && startIdx < len(lines) {
|
|
return strings.Join(lines[startIdx:], "\n")
|
|
}
|
|
return content
|
|
}
|
|
|
|
func encodeURLPath(path string) string {
|
|
segments := strings.Split(path, "/")
|
|
for i, seg := range segments {
|
|
segments[i] = url.PathEscape(seg)
|
|
}
|
|
return strings.Join(segments, "/")
|
|
}
|
|
|
|
func isAssetLink(target string) bool {
|
|
exts := []string{".pdf", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ods", ".xlsx", ".zip", ".mp3", ".wav"}
|
|
lower := strings.ToLower(target)
|
|
for _, ext := range exts {
|
|
if strings.HasSuffix(lower, ext) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func fixImagePaths(htmlContent string, currentDomain string) string {
|
|
imgRe := regexp.MustCompile(`src="([^"]+\.(png|jpg|jpeg|gif|svg))"`)
|
|
return imgRe.ReplaceAllStringFunc(htmlContent, func(match string) string {
|
|
submatch := imgRe.FindStringSubmatch(match)
|
|
if len(submatch) < 2 {
|
|
return match
|
|
}
|
|
src := submatch[1]
|
|
if strings.HasPrefix(src, "http") || strings.HasPrefix(src, "/") {
|
|
return match
|
|
}
|
|
if currentDomain != "" {
|
|
return fmt.Sprintf(`src="/assets/%s"`, filepath.Join(currentDomain, src))
|
|
}
|
|
return fmt.Sprintf(`src="/assets/%s"`, src)
|
|
})
|
|
}
|
|
|
|
func countWords(content string) int {
|
|
count := 0
|
|
inWord := false
|
|
for _, r := range content {
|
|
if r == ' ' || r == '\n' || r == '\r' || r == '\t' {
|
|
if inWord {
|
|
count++
|
|
inWord = false
|
|
}
|
|
} else {
|
|
inWord = true
|
|
}
|
|
}
|
|
if inWord {
|
|
count++
|
|
}
|
|
return count
|
|
}
|