// Package convert turns a markdown source byte-buffer into DOCX, HTML, // or PDF by exec'ing pandoc and chromium-browser. Each conversion runs // inside a sandbox provided by the IMAGE — typically a wrapper script // at /usr/local/bin/ that puts the real binary into a cgroup // v2 + bubblewrap sandbox before exec'ing it. See // zddc/runtime.Containerfile for the production setup. // // zddc-server's Go code is unaware of sandboxing: it just exec's // "pandoc" or "chromium-browser" and gets the corresponding tool's // behavior back. Operators who want a different isolation strategy // (firejail, systemd-nspawn, podman-run, raw exec for dev) replace // the wrapper script in their image; the Go binary doesn't change. // // Public surface: // // ToDocx(ctx, source, meta) → []byte (DOCX bytes) // ToHTML(ctx, source, meta) → []byte (standalone HTML) // ToPDF (ctx, source, meta) → []byte (PDF, via HTML + chromium) // // Probe(ctx) → Capabilities (call once at startup) // Available() → (Capabilities, bool) // SetBinaries(pandoc, chromium) — install binary names from config // SetScratchDir(dir) — install scratch root from config // // All three converters are safe for concurrent use; each call gets a // fresh scratch dir + (image-provided) sandbox. // // Metadata maps to the placeholders consumed by viewer-template.html. // title/tracking_number/revision/status/is_draft typically come from // the source filename (zddc.ParseFilename); client/project/contractor/ // project_number from the .zddc cascade `convert:` block. package convert import ( "context" "fmt" "os" "path/filepath" "strings" "sync/atomic" "time" ) // Metadata is the variable bag passed to pandoc as `--variable k=v` // pairs. Fields with zero values are omitted. The viewer-template.html // uses `$if(field)$ … $endif$` blocks so absent fields render cleanly. type Metadata struct { Title string TrackingNumber string Revision string Status string Client string Project string Contractor string ProjectNumber string GenerationTime time.Time IsDraft bool NoTOC bool } // Default binary names. The runtime image installs WRAPPER scripts at // /usr/local/bin/pandoc and /usr/local/bin/chromium-browser (shadowing // the real binaries in /usr/bin/) so these names resolve through the // sandbox automatically. Operators running zddc-server outside the // runtime image with raw binaries on PATH still get a working // conversion endpoint — just without the per-call sandbox. // // Alpine's chromium package installs the binary as "chromium-browser"; // debian/ubuntu ships "chromium". Operators override via // --convert-chromium-binary when the package on their image differs. const ( DefaultPandocBinary = "pandoc" DefaultChromiumBinary = "chromium-browser" ) var ( pandocBinary atomic.Pointer[string] chromiumBinary atomic.Pointer[string] scratchDir atomic.Pointer[string] ) // SetBinaries installs the binary names used by Probe/Run. Empty // values keep the previous setting (or the DefaultPandocBinary / // DefaultChromiumBinary constants on first call). The values are // PATH-resolved names (e.g. "pandoc", "chromium-browser") or // absolute paths. Called from cmd/zddc-server/main.go after flag // parsing. func SetBinaries(pandoc, chromium string) { if pandoc != "" { s := pandoc pandocBinary.Store(&s) } if chromium != "" { s := chromium chromiumBinary.Store(&s) } } // SetScratchDir installs the host-side scratch root used for // per-call intermediates (template, HTML, PDF). Empty means "use // $TMPDIR". The runtime-image wrapper bind-mounts the per-call // scratch dir into its sandbox at the same path, so any path under // this root works. func SetScratchDir(dir string) { s := dir scratchDir.Store(&s) } func currentScratchDir() string { if p := scratchDir.Load(); p != nil { return *p } return "" } func currentPandocBinary() string { if p := pandocBinary.Load(); p != nil && *p != "" { return *p } return DefaultPandocBinary } func currentChromiumBinary() string { if p := chromiumBinary.Load(); p != nil && *p != "" { return *p } return DefaultChromiumBinary } // ToDocx renders source markdown to DOCX bytes. Single pandoc exec; // no scratch dir needed (stdin → stdout). The caller passes the // full file content (envelope + body); pandoc handles // `markdown+yaml_metadata_block` natively. func ToDocx(ctx context.Context, source []byte, m Metadata) ([]byte, error) { r := currentRunner() if r == nil { return nil, ErrUnavailable } cmd := []string{ "--from=markdown+yaml_metadata_block", "--to=docx", "--output=-", } cmd = append(cmd, metadataArgs(m)...) cmd = append(cmd, "-") return r.Run(ctx, currentPandocBinary(), source, "", cmd) } // ToHTML renders source markdown to standalone HTML using // viewer-template.html. Embeds CSS + images via --embed-resources. // Template + custom.css live in a per-call scratch dir; the host // path is passed via ZDDC_SCRATCH so the wrapper bind-mounts it // into the sandbox at the same path. func ToHTML(ctx context.Context, source []byte, m Metadata) ([]byte, error) { r := currentRunner() if r == nil { return nil, ErrUnavailable } scratch, err := writeAssetsToScratch(currentScratchDir()) if err != nil { return nil, fmt.Errorf("scratch: %w", err) } defer os.RemoveAll(scratch) tplPath := filepath.Join(scratch, "viewer-template.html") cmd := []string{ "--from=markdown+yaml_metadata_block", "--to=html5", "--standalone", "--embed-resources", "--section-divs", "--id-prefix=", "--html-q-tags", "--template=" + tplPath, } if !m.NoTOC { cmd = append(cmd, "--toc", "--toc-depth=6") } cmd = append(cmd, metadataArgs(m)...) cmd = append(cmd, "--output=-", "-") return r.Run(ctx, currentPandocBinary(), source, scratch, cmd) } // ToPDF renders source markdown to PDF in two stages: pandoc // produces HTML using viewer-template.html (stage 1), then headless // chromium prints that HTML to PDF (stage 2). The two-stage choice // preserves the print-media CSS already authored in viewer- // template.html — pandoc's native --pdf-engine path uses LaTeX // which would bypass it entirely. // // Both stages share a single per-call scratch dir: pandoc writes // `in.html` and chromium reads it, then chromium writes `out.pdf` // which the host reads back. The wrapper bind-mounts the scratch // dir read-write into the sandbox at the same path. func ToPDF(ctx context.Context, source []byte, m Metadata) ([]byte, error) { html, err := ToHTML(ctx, source, m) if err != nil { return nil, err } r := currentRunner() if r == nil { return nil, ErrUnavailable } scratch, err := os.MkdirTemp(currentScratchDir(), "zddc-pdf-") if err != nil { return nil, fmt.Errorf("scratch: %w", err) } defer os.RemoveAll(scratch) htmlPath := filepath.Join(scratch, "in.html") pdfPath := filepath.Join(scratch, "out.pdf") if err := os.WriteFile(htmlPath, html, 0o644); err != nil { return nil, fmt.Errorf("write html: %w", err) } if err := chmodTree(scratch, 0o755, 0o644); err != nil { return nil, err } // --no-sandbox: the wrapper provides the sandbox; chromium's // own setuid sandbox would conflict (and fails inside our // user-namespace anyway). --disable-dev-shm-usage: chromium's // shared-memory fallback writes to /dev/shm which our sandbox // doesn't expose; redirect to /tmp (the wrapper's tmpfs). cmd := []string{ "--headless", "--disable-gpu", "--no-sandbox", "--disable-dev-shm-usage", "--user-data-dir=/tmp/chrome", "--no-pdf-header-footer", "--virtual-time-budget=10000", "--print-to-pdf=" + pdfPath, "file://" + htmlPath, } if _, err := r.Run(ctx, currentChromiumBinary(), nil, scratch, cmd); err != nil { return nil, err } out, err := os.ReadFile(pdfPath) if err != nil { return nil, fmt.Errorf("read pdf: %w", err) } if len(out) < 4 || string(out[:4]) != "%PDF" { return nil, &ConvertError{ Tool: currentChromiumBinary(), ExitCode: 0, Stderr: "chromium did not produce a valid PDF", Cause: fmt.Errorf("invalid PDF magic in output (got %d bytes)", len(out)), } } return out, nil } // metadataArgs renders Metadata into pandoc -V flags. Order is // stable so test fixtures don't churn. Empty values are omitted // (the template uses $if(...)$ blocks). func metadataArgs(m Metadata) []string { var out []string add := func(k, v string) { v = strings.TrimSpace(v) if v == "" { return } out = append(out, "-V", k+"="+v) } add("title", m.Title) add("tracking_number", m.TrackingNumber) add("revision", m.Revision) add("status", m.Status) add("client", m.Client) add("project", m.Project) add("contractor", m.Contractor) add("project_number", m.ProjectNumber) if !m.GenerationTime.IsZero() { add("generation_time", m.GenerationTime.Format("January 02, 2006 at 3:04:05 PM MST")) } if m.IsDraft { add("is_draft", "true") } if m.NoTOC { add("no-toc", "true") } return out }