Generalize the conversion engine from markdown-source-only to a (from→to)
dispatcher, convert.Convert, supporting:
md → docx | html | pdf
docx → md | html
html → md | docx
- convertToMarkdown (docx→md, html→md): pandoc -t gfm --wrap=none with an
embedded inline-media.lua filter that base64-inlines mediabag images as data:
URIs, so the output .md is self-contained (markdown has no --embed-resources).
- convertToHTML now takes a source format: docx→html reuses the doctype template
and --embed-resources base64-inlines the docx's images automatically.
- convertToDocx takes a source format: html→docx embeds images natively.
- ToDocx/ToHTML/ToPDF are kept as the md-source entry points, delegating to the
shared internals. writeScratchFiles generalizes the old template-set writer.
Routing (converthandler.go):
- RecognizeVirtualConvert maps any target ext {md,docx,html,pdf} to the first
existing real sibling source by precedence (md←docx,html; docx←md,html;
html←md,docx; pdf←md). Real files still win (dispatcher stats first).
- ServeConverted accepts md; buildAndStore dispatches on (ext(src), format) via
convert.Convert; purgeConverted clears all derived siblings on any write.
Tests: per-direction command-shape assertions (convert) + recognizer matrix and
precedence (handler). Verified end-to-end with real pandoc (docx→md/html,
html→md/docx, base64 images). Full ./... suite green.
PDF stays markdown-only for now (docx/html→pdf would need a two-stage hop).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
65 lines
1.8 KiB
Go
65 lines
1.8 KiB
Go
package handler
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
)
|
|
|
|
func TestRecognizeVirtualConvert_MatrixAndPrecedence(t *testing.T) {
|
|
root := t.TempDir()
|
|
write := func(rel string) {
|
|
p := filepath.Join(root, filepath.FromSlash(rel))
|
|
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(p, []byte("x"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Sources on disk: doc.md, only.docx, both.md + both.docx, page.html.
|
|
write("doc.md")
|
|
write("only.docx")
|
|
write("both.md")
|
|
write("both.docx")
|
|
write("page.html")
|
|
|
|
cases := []struct {
|
|
name string
|
|
url string
|
|
wantOK bool
|
|
wantSrcExt string
|
|
wantFormat string
|
|
}{
|
|
{"md→docx", "/doc.docx", true, ".md", "docx"},
|
|
{"md→html", "/doc.html", true, ".md", "html"},
|
|
{"md→pdf", "/doc.pdf", true, ".md", "pdf"},
|
|
{"docx→md (only docx present)", "/only.md", true, ".docx", "md"},
|
|
{"docx→html (only docx present)", "/only.html", true, ".docx", "html"},
|
|
{"docx has no pdf source", "/only.pdf", false, "", ""},
|
|
{"both present, html prefers md source", "/both.html", true, ".md", "html"},
|
|
{"html→md", "/page.md", true, ".html", "md"},
|
|
{"html→docx", "/page.docx", true, ".html", "docx"},
|
|
{"no source at all", "/missing.html", false, "", ""},
|
|
{"directory url ignored", "/doc/", false, "", ""},
|
|
{"non-convertible target", "/doc.txt", false, "", ""},
|
|
}
|
|
for _, c := range cases {
|
|
t.Run(c.name, func(t *testing.T) {
|
|
src, format, ok := RecognizeVirtualConvert(root, c.url)
|
|
if ok != c.wantOK {
|
|
t.Fatalf("ok=%v want %v (src=%q format=%q)", ok, c.wantOK, src, format)
|
|
}
|
|
if !ok {
|
|
return
|
|
}
|
|
if format != c.wantFormat {
|
|
t.Errorf("format=%q want %q", format, c.wantFormat)
|
|
}
|
|
if filepath.Ext(src) != c.wantSrcExt {
|
|
t.Errorf("source ext=%q want %q (src=%q)", filepath.Ext(src), c.wantSrcExt, src)
|
|
}
|
|
})
|
|
}
|
|
}
|