Generalize the conversion engine from markdown-source-only to a (from→to)
dispatcher, convert.Convert, supporting:
md → docx | html | pdf
docx → md | html
html → md | docx
- convertToMarkdown (docx→md, html→md): pandoc -t gfm --wrap=none with an
embedded inline-media.lua filter that base64-inlines mediabag images as data:
URIs, so the output .md is self-contained (markdown has no --embed-resources).
- convertToHTML now takes a source format: docx→html reuses the doctype template
and --embed-resources base64-inlines the docx's images automatically.
- convertToDocx takes a source format: html→docx embeds images natively.
- ToDocx/ToHTML/ToPDF are kept as the md-source entry points, delegating to the
shared internals. writeScratchFiles generalizes the old template-set writer.
Routing (converthandler.go):
- RecognizeVirtualConvert maps any target ext {md,docx,html,pdf} to the first
existing real sibling source by precedence (md←docx,html; docx←md,html;
html←md,docx; pdf←md). Real files still win (dispatcher stats first).
- ServeConverted accepts md; buildAndStore dispatches on (ext(src), format) via
convert.Convert; purgeConverted clears all derived siblings on any write.
Tests: per-direction command-shape assertions (convert) + recognizer matrix and
precedence (handler). Verified end-to-end with real pandoc (docx→md/html,
html→md/docx, base64 images). Full ./... suite green.
PDF stays markdown-only for now (docx/html→pdf would need a two-stage hop).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
88 lines
3 KiB
Go
88 lines
3 KiB
Go
package convert
|
|
|
|
import (
|
|
"embed"
|
|
"io/fs"
|
|
"path"
|
|
"sort"
|
|
)
|
|
|
|
// Default pandoc HTML templates, mirrored verbatim from /pandoc/templates/ by
|
|
// the top-level ./build (shared/build-lib.sh: sync_pandoc_templates). The runner
|
|
// writes the chosen template + its partials to a host scratch dir on each HTML
|
|
// conversion and bind-mounts them into the sandbox so pandoc can `--template`
|
|
// against them.
|
|
//
|
|
// pandoc/templates/ is the single source of truth; this directory is a build
|
|
// artifact kept in sync and guarded by TestEmbeddedTemplatesMatchSource. There's
|
|
// no symlink because go:embed paths must resolve under the containing module, and
|
|
// we want the binary to ship the bytes verbatim, not depend on the source tree at
|
|
// runtime.
|
|
//
|
|
// The set holds named doctype templates (report.html, letter.html,
|
|
// specification.html) plus the shared partials they include (_head.html,
|
|
// _doc.html, _scripts.html). A document picks one via its `template:` front
|
|
// matter; operators override individual files through the .zddc.d/templates/
|
|
// cascade (see internal/handler).
|
|
|
|
// `all:` is required so the `_`-prefixed partials (_head.html, _doc.html,
|
|
// _scripts.html) are embedded — a bare `//go:embed templates` excludes names
|
|
// beginning with `_` or `.`.
|
|
//
|
|
//go:embed all:templates
|
|
var templatesFS embed.FS
|
|
|
|
// inlineMediaLua is the pandoc filter that base64-inlines images into markdown
|
|
// output (docx→md / html→md), written to the per-call scratch dir alongside the
|
|
// conversion. Server-only — the CLI convert script extracts media to a folder
|
|
// instead.
|
|
//
|
|
//go:embed inline-media.lua
|
|
var inlineMediaLua []byte
|
|
|
|
// DefaultTemplateName is used when a document declares no `template:` field or
|
|
// names one that doesn't resolve.
|
|
const DefaultTemplateName = "report"
|
|
|
|
// embeddedTemplate returns the bytes of a baked-in template/partial by base file
|
|
// name (e.g. "report.html", "_head.html"), or nil if there is no such default.
|
|
func embeddedTemplate(name string) []byte {
|
|
b, err := templatesFS.ReadFile(path.Join("templates", name))
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return b
|
|
}
|
|
|
|
// embeddedTemplateFiles returns all baked-in template/partial files keyed by
|
|
// base name. The returned map is a fresh copy the caller may mutate (e.g. to
|
|
// overlay .zddc.d/templates overrides).
|
|
func embeddedTemplateFiles() map[string][]byte {
|
|
out := make(map[string][]byte)
|
|
entries, _ := fs.ReadDir(templatesFS, "templates")
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
continue
|
|
}
|
|
if b := embeddedTemplate(e.Name()); b != nil {
|
|
out[e.Name()] = b
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// EmbeddedTemplateNames lists the baked-in doctype template names (no extension,
|
|
// partials excluded — i.e. the names a `template:` field may select), sorted.
|
|
func EmbeddedTemplateNames() []string {
|
|
var names []string
|
|
entries, _ := fs.ReadDir(templatesFS, "templates")
|
|
for _, e := range entries {
|
|
n := e.Name()
|
|
if e.IsDir() || n == "" || n[0] == '_' || path.Ext(n) != ".html" {
|
|
continue
|
|
}
|
|
names = append(names, n[:len(n)-len(".html")])
|
|
}
|
|
sort.Strings(names)
|
|
return names
|
|
}
|