feat(server): full md/docx/html conversion matrix + base64 image inlining
Generalize the conversion engine from markdown-source-only to a (from→to)
dispatcher, convert.Convert, supporting:
md → docx | html | pdf
docx → md | html
html → md | docx
- convertToMarkdown (docx→md, html→md): pandoc -t gfm --wrap=none with an
embedded inline-media.lua filter that base64-inlines mediabag images as data:
URIs, so the output .md is self-contained (markdown has no --embed-resources).
- convertToHTML now takes a source format: docx→html reuses the doctype template
and --embed-resources base64-inlines the docx's images automatically.
- convertToDocx takes a source format: html→docx embeds images natively.
- ToDocx/ToHTML/ToPDF are kept as the md-source entry points, delegating to the
shared internals. writeScratchFiles generalizes the old template-set writer.
Routing (converthandler.go):
- RecognizeVirtualConvert maps any target ext {md,docx,html,pdf} to the first
existing real sibling source by precedence (md←docx,html; docx←md,html;
html←md,docx; pdf←md). Real files still win (dispatcher stats first).
- ServeConverted accepts md; buildAndStore dispatches on (ext(src), format) via
convert.Convert; purgeConverted clears all derived siblings on any write.
Tests: per-direction command-shape assertions (convert) + recognizer matrix and
precedence (handler). Verified end-to-end with real pandoc (docx→md/html,
html→md/docx, base64 images). Full ./... suite green.
PDF stays markdown-only for now (docx/html→pdf would need a two-stage hop).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
894610d59e
commit
16d88010a6
7 changed files with 317 additions and 52 deletions
|
|
@ -154,12 +154,20 @@ func currentChromiumBinary() string {
|
|||
// full file content (envelope + body); pandoc handles
|
||||
// `markdown+yaml_metadata_block` natively.
|
||||
func ToDocx(ctx context.Context, source []byte, m Metadata) ([]byte, error) {
|
||||
return convertToDocx(ctx, "markdown+yaml_metadata_block", source, m)
|
||||
}
|
||||
|
||||
// convertToDocx renders source (in pandoc input format fromFmt) to DOCX bytes
|
||||
// via a single pandoc exec (stdin → stdout; no scratch dir). Images in the
|
||||
// source's mediabag — present when fromFmt is "html" — are embedded into the
|
||||
// .docx natively by pandoc's docx writer.
|
||||
func convertToDocx(ctx context.Context, fromFmt string, source []byte, m Metadata) ([]byte, error) {
|
||||
r := currentRunner()
|
||||
if r == nil {
|
||||
return nil, ErrUnavailable
|
||||
}
|
||||
cmd := []string{
|
||||
"--from=markdown+yaml_metadata_block",
|
||||
"--from=" + fromFmt,
|
||||
"--to=docx",
|
||||
"--output=-",
|
||||
}
|
||||
|
|
@ -168,6 +176,68 @@ func ToDocx(ctx context.Context, source []byte, m Metadata) ([]byte, error) {
|
|||
return r.Run(ctx, currentPandocBinary(), source, "", cmd)
|
||||
}
|
||||
|
||||
// convertToMarkdown renders source (DOCX or HTML, per fromFmt) to GitHub-
|
||||
// flavored markdown. Embedded images are inlined as base64 data: URIs via the
|
||||
// inline-media.lua filter so the output .md is self-contained; --wrap=none keeps
|
||||
// paragraphs on one line (no hard line breaks).
|
||||
func convertToMarkdown(ctx context.Context, fromFmt string, source []byte) ([]byte, error) {
|
||||
r := currentRunner()
|
||||
if r == nil {
|
||||
return nil, ErrUnavailable
|
||||
}
|
||||
scratch, err := writeScratchFiles(currentScratchDir(), map[string][]byte{"inline-media.lua": inlineMediaLua})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scratch: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(scratch)
|
||||
cmd := []string{
|
||||
"--from=" + fromFmt,
|
||||
"--to=gfm",
|
||||
"--wrap=none",
|
||||
"--lua-filter=" + filepath.Join(scratch, "inline-media.lua"),
|
||||
"--output=-",
|
||||
"-",
|
||||
}
|
||||
return r.Run(ctx, currentPandocBinary(), source, scratch, cmd)
|
||||
}
|
||||
|
||||
// Convert renders source from one document format to another. Supported pairs:
|
||||
//
|
||||
// md → docx | html | pdf
|
||||
// docx → md | html
|
||||
// html → md | docx
|
||||
//
|
||||
// ts is the resolved HTML template set, used only for the *→html and md→pdf
|
||||
// directions and ignored otherwise. Unsupported pairs return an error.
|
||||
func Convert(ctx context.Context, from, to string, source []byte, m Metadata, ts TemplateSet) ([]byte, error) {
|
||||
switch from {
|
||||
case "md", "markdown":
|
||||
switch to {
|
||||
case "docx":
|
||||
return ToDocx(ctx, source, m)
|
||||
case "html":
|
||||
return ToHTML(ctx, source, m, ts)
|
||||
case "pdf":
|
||||
return ToPDF(ctx, source, m, ts)
|
||||
}
|
||||
case "docx":
|
||||
switch to {
|
||||
case "md":
|
||||
return convertToMarkdown(ctx, "docx", source)
|
||||
case "html":
|
||||
return convertToHTML(ctx, "docx", source, m, ts)
|
||||
}
|
||||
case "html", "htm":
|
||||
switch to {
|
||||
case "md":
|
||||
return convertToMarkdown(ctx, "html", source)
|
||||
case "docx":
|
||||
return convertToDocx(ctx, "html", source, m)
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported conversion %s→%s", from, to)
|
||||
}
|
||||
|
||||
// ToHTML renders source markdown to standalone HTML using the doctype
|
||||
// template in ts. Embeds CSS + images via --embed-resources. The
|
||||
// template + its partials live in a per-call scratch dir; the host path
|
||||
|
|
@ -175,6 +245,14 @@ func ToDocx(ctx context.Context, source []byte, m Metadata) ([]byte, error) {
|
|||
// sandbox at the same path. A zero-value ts falls back to the embedded
|
||||
// default template.
|
||||
func ToHTML(ctx context.Context, source []byte, m Metadata, ts TemplateSet) ([]byte, error) {
|
||||
return convertToHTML(ctx, "markdown+yaml_metadata_block", source, m, ts)
|
||||
}
|
||||
|
||||
// convertToHTML renders source (in pandoc input format fromFmt) to standalone
|
||||
// HTML through the doctype template in ts. --embed-resources base64-inlines CSS
|
||||
// and any mediabag images (so DOCX images survive docx→html with no extra
|
||||
// filter). The template + partials are written to a per-call scratch dir.
|
||||
func convertToHTML(ctx context.Context, fromFmt string, source []byte, m Metadata, ts TemplateSet) ([]byte, error) {
|
||||
r := currentRunner()
|
||||
if r == nil {
|
||||
return nil, ErrUnavailable
|
||||
|
|
@ -182,7 +260,7 @@ func ToHTML(ctx context.Context, source []byte, m Metadata, ts TemplateSet) ([]b
|
|||
if ts.Name == "" || len(ts.Files) == 0 {
|
||||
ts = DefaultTemplateSet(DefaultTemplateName)
|
||||
}
|
||||
scratch, err := writeTemplateSetToScratch(currentScratchDir(), ts)
|
||||
scratch, err := writeScratchFiles(currentScratchDir(), ts.Files)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scratch: %w", err)
|
||||
}
|
||||
|
|
@ -190,7 +268,7 @@ func ToHTML(ctx context.Context, source []byte, m Metadata, ts TemplateSet) ([]b
|
|||
|
||||
tplPath := filepath.Join(scratch, ts.Name)
|
||||
cmd := []string{
|
||||
"--from=markdown+yaml_metadata_block",
|
||||
"--from=" + fromFmt,
|
||||
"--to=html5",
|
||||
"--standalone",
|
||||
"--embed-resources",
|
||||
|
|
|
|||
|
|
@ -41,6 +41,77 @@ func (f *fakeRunner) lastCall() (string, []string) {
|
|||
return f.binaries[len(f.binaries)-1], f.calls[len(f.calls)-1]
|
||||
}
|
||||
|
||||
func TestConvert_Directions(t *testing.T) {
|
||||
cases := []struct {
|
||||
from, to string
|
||||
wantArgs []string // substrings that must appear in the pandoc command
|
||||
wantErr bool
|
||||
}{
|
||||
{"docx", "md", []string{"--from=docx", "--to=gfm", "--wrap=none"}, false},
|
||||
{"html", "md", []string{"--from=html", "--to=gfm", "--wrap=none"}, false},
|
||||
{"docx", "html", []string{"--from=docx", "--to=html5", "--embed-resources"}, false},
|
||||
{"html", "docx", []string{"--from=html", "--to=docx"}, false},
|
||||
{"md", "docx", []string{"--from=markdown+yaml_metadata_block", "--to=docx"}, false},
|
||||
{"md", "html", []string{"--from=markdown+yaml_metadata_block", "--to=html5"}, false},
|
||||
{"docx", "pdf", nil, true}, // pdf is markdown-only
|
||||
{"docx", "docx", nil, true}, // same-format is unsupported
|
||||
{"html", "html", nil, true},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.from+"_to_"+c.to, func(t *testing.T) {
|
||||
f := &fakeRunner{resp: []byte("OUT")}
|
||||
InstallRunner(f)
|
||||
t.Cleanup(func() { InstallRunner(nil) })
|
||||
SetBinaries("pandoc", "chromium-browser")
|
||||
|
||||
_, err := Convert(context.Background(), c.from, c.to, []byte("x"), Metadata{}, TemplateSet{})
|
||||
if c.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("Convert(%s→%s): expected error, got nil", c.from, c.to)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Convert(%s→%s): %v", c.from, c.to, err)
|
||||
}
|
||||
binary, call := f.lastCall()
|
||||
if binary != "pandoc" {
|
||||
t.Errorf("expected pandoc, got %q", binary)
|
||||
}
|
||||
for _, want := range c.wantArgs {
|
||||
if !contains(call, want) {
|
||||
t.Errorf("Convert(%s→%s) missing %q in %v", c.from, c.to, want, call)
|
||||
}
|
||||
}
|
||||
// To-markdown directions inline images via the lua filter.
|
||||
if c.to == "md" {
|
||||
if !hasPrefArg(call, "--lua-filter=") || !hasSuffArg(call, "inline-media.lua") {
|
||||
t.Errorf("Convert(%s→md) missing inline-media.lua filter: %v", c.from, call)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// hasPrefArg / hasSuffArg report whether any arg has the given prefix/suffix.
|
||||
func hasPrefArg(args []string, prefix string) bool {
|
||||
for _, a := range args {
|
||||
if strings.HasPrefix(a, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasSuffArg(args []string, suffix string) bool {
|
||||
for _, a := range args {
|
||||
if strings.HasSuffix(a, suffix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func TestToDocx_UsesPandocBinary(t *testing.T) {
|
||||
f := &fakeRunner{resp: []byte("FAKE-DOCX")}
|
||||
InstallRunner(f)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,14 @@ import (
|
|||
//go:embed all:templates
|
||||
var templatesFS embed.FS
|
||||
|
||||
// inlineMediaLua is the pandoc filter that base64-inlines images into markdown
|
||||
// output (docx→md / html→md), written to the per-call scratch dir alongside the
|
||||
// conversion. Server-only — the CLI convert script extracts media to a folder
|
||||
// instead.
|
||||
//
|
||||
//go:embed inline-media.lua
|
||||
var inlineMediaLua []byte
|
||||
|
||||
// DefaultTemplateName is used when a document declares no `template:` field or
|
||||
// names one that doesn't resolve.
|
||||
const DefaultTemplateName = "report"
|
||||
|
|
|
|||
31
zddc/internal/convert/inline-media.lua
Normal file
31
zddc/internal/convert/inline-media.lua
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
-- inline-media.lua — pandoc filter that rewrites every image to a self-contained
|
||||
-- base64 data: URI, pulling the bytes from pandoc's mediabag (populated when
|
||||
-- reading DOCX, or fetched for HTML). Used by the docx→md / html→md conversions
|
||||
-- so the resulting markdown carries its images inline (markdown output has no
|
||||
-- native --embed-resources equivalent).
|
||||
|
||||
local b = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
|
||||
|
||||
local function base64(data)
|
||||
return ((data:gsub('.', function(x)
|
||||
local r, byte = '', x:byte()
|
||||
for i = 8, 1, -1 do r = r .. (byte % 2 ^ i - byte % 2 ^ (i - 1) > 0 and '1' or '0') end
|
||||
return r
|
||||
end) .. '0000'):gsub('%d%d%d?%d?%d?%d?', function(x)
|
||||
if #x < 6 then return '' end
|
||||
local c = 0
|
||||
for i = 1, 6 do c = c + (x:sub(i, i) == '1' and 2 ^ (6 - i) or 0) end
|
||||
return b:sub(c + 1, c + 1)
|
||||
end) .. ({ '', '==', '=' })[#data % 3 + 1])
|
||||
end
|
||||
|
||||
function Image(img)
|
||||
local mt, data = pandoc.mediabag.lookup(img.src)
|
||||
if not data then
|
||||
mt, data = pandoc.mediabag.fetch(img.src)
|
||||
end
|
||||
if data then
|
||||
img.src = 'data:' .. (mt or 'application/octet-stream') .. ';base64,' .. base64(data)
|
||||
end
|
||||
return img
|
||||
end
|
||||
|
|
@ -274,26 +274,26 @@ func (r *ringWriter) String() string {
|
|||
return string(r.buf)
|
||||
}
|
||||
|
||||
// writeTemplateSetToScratch materialises a TemplateSet (the chosen doctype
|
||||
// template plus its partials) into a fresh scratch dir and returns the host
|
||||
// path. Caller is responsible for os.RemoveAll(dir) when done. Used by ToHTML,
|
||||
// which needs the template + partials visible inside the sandbox (pandoc
|
||||
// resolves `$partial()$` includes from the template's own directory).
|
||||
// writeScratchFiles materialises a set of named byte buffers (template +
|
||||
// partials, or a lua filter) into a fresh scratch dir and returns the host
|
||||
// path. Caller is responsible for os.RemoveAll(dir) when done. pandoc resolves
|
||||
// `$partial()$` includes and --lua-filter paths from this dir, so everything
|
||||
// lands flat alongside the entry file.
|
||||
//
|
||||
// scratchRoot controls where the temp dir lands. Empty means "use $TMPDIR".
|
||||
//
|
||||
// Files are written world-readable so the binary's default user can read them
|
||||
// through the wrapper's bind mount regardless of the host's umask. File names
|
||||
// are base names only (no path separators) — they all land flat in the dir.
|
||||
func writeTemplateSetToScratch(scratchRoot string, ts TemplateSet) (string, error) {
|
||||
// through the wrapper's bind mount regardless of the host's umask. Keys are
|
||||
// reduced to base names only (no path separators).
|
||||
func writeScratchFiles(scratchRoot string, files map[string][]byte) (string, error) {
|
||||
dir, err := os.MkdirTemp(scratchRoot, "zddc-convert-")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("scratch dir: %w", err)
|
||||
}
|
||||
for name, b := range ts.Files {
|
||||
for name, b := range files {
|
||||
if err := os.WriteFile(filepath.Join(dir, filepath.Base(name)), b, 0o644); err != nil {
|
||||
os.RemoveAll(dir)
|
||||
return "", fmt.Errorf("write template %q: %w", name, err)
|
||||
return "", fmt.Errorf("write scratch file %q: %w", name, err)
|
||||
}
|
||||
}
|
||||
if err := chmodTree(dir, 0o755, 0o644); err != nil {
|
||||
|
|
|
|||
|
|
@ -48,37 +48,52 @@ var convertSF singleflightGroup
|
|||
// runner itself enforces a finer-grained timeout on the container.
|
||||
const convertTimeout = 90 * time.Second
|
||||
|
||||
// convertSourceExts maps a requested target extension to the candidate source
|
||||
// extensions in precedence order — the first existing real sibling wins. The
|
||||
// matrix: md↔docx↔html all directions, plus md→pdf (PDF stays markdown-only).
|
||||
var convertSourceExts = map[string][]string{
|
||||
"md": {"docx", "html"},
|
||||
"docx": {"md", "html"},
|
||||
"html": {"md", "docx"},
|
||||
"pdf": {"md"},
|
||||
}
|
||||
|
||||
// RecognizeVirtualConvert reports whether urlPath names a virtual
|
||||
// "<file>.<format>" — a rendered form of a sibling markdown source.
|
||||
// Returns (mdAbsPath, format, true) when <file>.md exists on disk and
|
||||
// the requested extension is one of docx / html / pdf. The caller
|
||||
// (the dispatcher) only invokes this when a stat on the requested
|
||||
// path itself fails — a real on-disk file always wins.
|
||||
// "<file>.<format>" — a rendered form of a sibling source document in a
|
||||
// different format. Returns (srcAbsPath, format, true) when the requested
|
||||
// extension is convertible (md/docx/html/pdf) and a sibling source exists on
|
||||
// disk, picked by convertSourceExts precedence. The caller (the dispatcher) only
|
||||
// invokes this when a stat on the requested path itself fails — a real on-disk
|
||||
// file always wins.
|
||||
//
|
||||
// A virtual file URL means `<a href="…/foo.docx">` works without any
|
||||
// query-string handling, and a script's `curl -O …/foo.pdf` writes the
|
||||
// expected filename.
|
||||
func RecognizeVirtualConvert(fsRoot, urlPath string) (mdAbs, format string, ok bool) {
|
||||
// query-string handling, and a script's `curl -O …/foo.md` writes the expected
|
||||
// filename.
|
||||
func RecognizeVirtualConvert(fsRoot, urlPath string) (srcAbs, format string, ok bool) {
|
||||
lower := strings.ToLower(urlPath)
|
||||
for _, ext := range []string{".docx", ".html", ".pdf"} {
|
||||
for target, sources := range convertSourceExts {
|
||||
ext := "." + target
|
||||
if !strings.HasSuffix(lower, ext) {
|
||||
continue
|
||||
continue // distinct suffixes — at most one target matches
|
||||
}
|
||||
base := urlPath[:len(urlPath)-len(ext)]
|
||||
if base == "" || strings.HasSuffix(base, "/") {
|
||||
continue
|
||||
return "", "", false
|
||||
}
|
||||
rel := strings.Trim(base, "/") + ".md"
|
||||
abs := filepath.Join(fsRoot, filepath.FromSlash(rel))
|
||||
stem := strings.Trim(base, "/")
|
||||
for _, srcExt := range sources {
|
||||
abs := filepath.Join(fsRoot, filepath.FromSlash(stem+"."+srcExt))
|
||||
// Path containment.
|
||||
if abs != fsRoot && !strings.HasPrefix(abs, fsRoot+string(filepath.Separator)) {
|
||||
continue
|
||||
}
|
||||
if info, err := os.Stat(abs); err == nil && !info.IsDir() {
|
||||
return abs, ext[1:], true
|
||||
return abs, target, true
|
||||
}
|
||||
}
|
||||
return "", "", false
|
||||
}
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
// ServeConverted is the entry point. format is the requested target
|
||||
|
|
@ -87,9 +102,9 @@ func RecognizeVirtualConvert(fsRoot, urlPath string) (mdAbs, format string, ok b
|
|||
func ServeConverted(cfg config.Config, w http.ResponseWriter, r *http.Request, srcAbs, format string, chain zddc.PolicyChain) {
|
||||
format = strings.ToLower(strings.TrimSpace(format))
|
||||
switch format {
|
||||
case "docx", "html", "pdf":
|
||||
case "md", "docx", "html", "pdf":
|
||||
default:
|
||||
http.Error(w, "Bad Request — convert must be docx, html, or pdf", http.StatusBadRequest)
|
||||
http.Error(w, "Bad Request — convert must be md, docx, html, or pdf", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -159,17 +174,13 @@ func buildAndStore(ctx context.Context, fsRoot, srcAbs string, srcInfo os.FileIn
|
|||
ctx, cancel := context.WithTimeout(ctx, convertTimeout)
|
||||
defer cancel()
|
||||
|
||||
var out []byte
|
||||
switch format {
|
||||
case "docx":
|
||||
out, err = convert.ToDocx(ctx, source, meta)
|
||||
case "html":
|
||||
out, err = convert.ToHTML(ctx, source, meta, resolveTemplateSet(fsRoot, filepath.Dir(srcAbs), source))
|
||||
case "pdf":
|
||||
out, err = convert.ToPDF(ctx, source, meta, resolveTemplateSet(fsRoot, filepath.Dir(srcAbs), source))
|
||||
default:
|
||||
return fmt.Errorf("unsupported format %q", format)
|
||||
// Source format is the on-disk extension; target is the requested format.
|
||||
from := strings.TrimPrefix(strings.ToLower(filepath.Ext(srcAbs)), ".")
|
||||
var ts convert.TemplateSet
|
||||
if format == "html" || format == "pdf" {
|
||||
ts = resolveTemplateSet(fsRoot, filepath.Dir(srcAbs), source)
|
||||
}
|
||||
out, err := convert.Convert(ctx, from, format, source, meta, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -290,20 +301,21 @@ func contentDispositionFor(format, base string) string {
|
|||
return fmt.Sprintf(`inline; filename="%s.%s"`, base, format)
|
||||
}
|
||||
|
||||
// purgeConverted removes the cached .zddc.d/converted/<base>.{docx,html,pdf}
|
||||
// sidecars for an .md source. Called from the file API after a
|
||||
// successful PUT/DELETE/MOVE so the next GET ?convert= regenerates.
|
||||
// Best-effort: errors (including "directory doesn't exist") are
|
||||
// swallowed. Non-.md sources are a no-op so this is safe to call
|
||||
// purgeConverted removes the cached .zddc.d/converted/<base>.{md,docx,html,pdf}
|
||||
// sidecars for a convertible source. Called from the file API after a successful
|
||||
// PUT/DELETE/MOVE so the next virtual-convert GET regenerates. Best-effort:
|
||||
// errors (including "directory doesn't exist") are swallowed. Sources whose
|
||||
// extension isn't convertible are a no-op, so this is safe to call
|
||||
// unconditionally after any write.
|
||||
func purgeConverted(srcAbs string) {
|
||||
if !strings.HasSuffix(strings.ToLower(srcAbs), ".md") {
|
||||
ext := strings.TrimPrefix(strings.ToLower(filepath.Ext(srcAbs)), ".")
|
||||
if _, ok := convertSourceExts[ext]; !ok {
|
||||
return
|
||||
}
|
||||
dir := filepath.Dir(srcAbs)
|
||||
base := strings.TrimSuffix(filepath.Base(srcAbs), filepath.Ext(srcAbs))
|
||||
for _, ext := range []string{".docx", ".html", ".pdf"} {
|
||||
_ = os.Remove(filepath.Join(dir, ReservedSidecar, "converted", base+ext))
|
||||
for target := range convertSourceExts {
|
||||
_ = os.Remove(filepath.Join(dir, ReservedSidecar, "converted", base+"."+target))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
65
zddc/internal/handler/converthandler_test.go
Normal file
65
zddc/internal/handler/converthandler_test.go
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRecognizeVirtualConvert_MatrixAndPrecedence(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
write := func(rel string) {
|
||||
p := filepath.Join(root, filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(p, []byte("x"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Sources on disk: doc.md, only.docx, both.md + both.docx, page.html.
|
||||
write("doc.md")
|
||||
write("only.docx")
|
||||
write("both.md")
|
||||
write("both.docx")
|
||||
write("page.html")
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
url string
|
||||
wantOK bool
|
||||
wantSrcExt string
|
||||
wantFormat string
|
||||
}{
|
||||
{"md→docx", "/doc.docx", true, ".md", "docx"},
|
||||
{"md→html", "/doc.html", true, ".md", "html"},
|
||||
{"md→pdf", "/doc.pdf", true, ".md", "pdf"},
|
||||
{"docx→md (only docx present)", "/only.md", true, ".docx", "md"},
|
||||
{"docx→html (only docx present)", "/only.html", true, ".docx", "html"},
|
||||
{"docx has no pdf source", "/only.pdf", false, "", ""},
|
||||
{"both present, html prefers md source", "/both.html", true, ".md", "html"},
|
||||
{"html→md", "/page.md", true, ".html", "md"},
|
||||
{"html→docx", "/page.docx", true, ".html", "docx"},
|
||||
{"no source at all", "/missing.html", false, "", ""},
|
||||
{"directory url ignored", "/doc/", false, "", ""},
|
||||
{"non-convertible target", "/doc.txt", false, "", ""},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
src, format, ok := RecognizeVirtualConvert(root, c.url)
|
||||
if ok != c.wantOK {
|
||||
t.Fatalf("ok=%v want %v (src=%q format=%q)", ok, c.wantOK, src, format)
|
||||
}
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if format != c.wantFormat {
|
||||
t.Errorf("format=%q want %q", format, c.wantFormat)
|
||||
}
|
||||
if filepath.Ext(src) != c.wantSrcExt {
|
||||
t.Errorf("source ext=%q want %q (src=%q)", filepath.Ext(src), c.wantSrcExt, src)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue