Replaces the always-spawn-an-OCI-container model with a per-call
bubblewrap sandbox. Pandoc and chromium binaries are baked into the
zddc-server runtime image; each conversion runs them under bwrap's
Linux-namespace isolation. No daemon, no socket, no privileged outer
container, no OCI image pull at conversion time.
Why: the OCI engine paid ≈ 350 MB image pulls + 400 MB persistent
storage + ~300 ms per-conversion startup, plus required either an
on-host daemon socket (zddc-RCE → host-RCE in one hop) or nested
container privileges. bwrap gets the same sandbox properties
(--unshare-all, ro-bind /usr, tmpfs /tmp, clearenv, no-network) at
~5 ms per call and zero external dependencies. This is the same
primitive Flatpak uses for every app launch — battle-tested at scale
for "untrusted-input, short-lived, isolated."
Runner abstraction:
- `Runner.Run` signature: image string → ToolSpec{Image, Binary}.
Both fields populated by entry points; whichever engine is
installed reads the one it needs.
- `bwrapRunner` (new): assembles bwrap argv via `buildBwrapArgs`
helper (testable in isolation), spawns bwrap with the binary.
- `containerRunner` (renamed conceptually to "legacy fallback"):
unchanged behavior, still reachable for hosts that prefer OCI
containers per conversion.
Probe order in health.Probe: bwrap → podman → docker. First hit wins.
Engine kinds in Capabilities: "bwrap" | "podman" | "docker". The
no-engine error message now lists all three.
Config (cmd/zddc-server):
- new --convert-pandoc-binary / ZDDC_CONVERT_PANDOC_BINARY (default "pandoc")
- new --convert-chromium-binary / ZDDC_CONVERT_CHROMIUM_BINARY (default "chromium-browser")
- existing --convert-pandoc-image / --convert-chromium-image kept
for the OCI engine, doc updated to clarify they only apply there.
- --convert-engine helptext lists bwrap first.
Images:
- New `zddc/runtime.Containerfile` — alpine + bubblewrap + pandoc-cli +
chromium + font-noto. Documents build/publish workflow.
- helm/zddc-server-prod/values.yaml.example: runtimeImage default
switched to a placeholder for the new bundled runtime image; bare
alpine NO LONGER works for /.convert (clearly called out in the
comment).
- bitnest dev: /var/lib/zddc-dev-build/Containerfile mirrors the
production runtime image. Quadlet at /etc/containers/systemd/
zddc.container drops the podman-socket mount (no longer needed)
and sets ZDDC_CONVERT_ENGINE=bwrap explicitly to avoid silent
downgrades if a stray podman ends up on PATH.
Tests:
- convert_test.go: fakeRunner / recordingRunner now record ToolSpec.
- New TestToolSpecPopulation pins that both Image and Binary are
filled by every entry point.
- New TestBwrapArgs_SandboxFlagsPresent / MountTranslation /
RejectsBadMountSpec lock in the bwrap argv shape — a refactor that
drops a hardening flag or misroutes a mount fails this loud.
Docs:
- AGENTS.md § "Server-side document conversion" rewritten around
the bwrap-first model with podman/docker as legacy fallbacks.
- ARCHITECTURE.md convert reference updated.
- internal/convert package doc reflects the two-engine probe order.
Verified end-to-end on bitnest: probe reports
engine=bwrap pandoc_binary=pandoc chromium_binary=chromium-browser
on startup. All 15 Go test packages green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
264 lines
8 KiB
Go
264 lines
8 KiB
Go
package convert
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
// remoteURL is set by Probe from cfg.ConvertPodmanSocket. Empty means
|
|
// local mode.
|
|
var remoteURL atomic.Pointer[string]
|
|
|
|
// Capabilities is the snapshot of "can we convert right now?". The
|
|
// only hard requirement is a container runtime reachable from
|
|
// zddc-server — image presence is left to `--pull=missing` at
|
|
// conversion time, so a missing image surfaces as a normal
|
|
// ConvertError (not a probe failure).
|
|
//
|
|
// Mode applies to OCI engines (podman/docker): "local" when the
|
|
// engine creates containers in the same process as zddc-server,
|
|
// "remote" when zddc-server is the client of a podman-system-service
|
|
// sidecar. The bwrap engine has no mode (always direct exec).
|
|
type Capabilities struct {
|
|
Engine string // "bwrap" | "podman" | "docker" | ""
|
|
EngineVer string // first line of "<engine> --version"
|
|
Mode string // "local" or "remote" (OCI engines only)
|
|
RemoteURL string // populated in remote mode (OCI engines only)
|
|
PandocImage string // resolved pandoc image ref (OCI engines)
|
|
ChromiumImage string // resolved chromium image ref (OCI engines)
|
|
ProbedAt time.Time
|
|
Err error
|
|
}
|
|
|
|
// Ready reports whether conversions can be attempted. The first
|
|
// conversion may still fail if the configured binary or image isn't
|
|
// actually present (the runner will surface a clear error from the
|
|
// child process's stderr).
|
|
func (c Capabilities) Ready() bool {
|
|
return c.Engine != "" && c.Err == nil
|
|
}
|
|
|
|
// Reason returns a short human-friendly explanation when Ready() is
|
|
// false. Used as the body of a 503.
|
|
func (c Capabilities) Reason() string {
|
|
if c.Engine == "" {
|
|
return "no conversion sandbox found (looked for bwrap, podman, docker on PATH)"
|
|
}
|
|
if c.Err != nil {
|
|
if c.Mode == "remote" {
|
|
return fmt.Sprintf("podman remote socket unreachable (%s): %s", c.RemoteURL, c.Err.Error())
|
|
}
|
|
return c.Err.Error()
|
|
}
|
|
return "unavailable"
|
|
}
|
|
|
|
var (
|
|
caps atomic.Pointer[Capabilities]
|
|
probeCool sync.Mutex
|
|
)
|
|
|
|
// Available returns the current Capabilities snapshot and whether
|
|
// conversions can proceed.
|
|
func Available() (Capabilities, bool) {
|
|
p := caps.Load()
|
|
if p == nil {
|
|
return Capabilities{}, false
|
|
}
|
|
return *p, p.Ready()
|
|
}
|
|
|
|
// SetRemoteURL installs the podman remote socket URL for subsequent
|
|
// Probe / Reprobe calls. Empty means "local mode" (the engine binary
|
|
// creates containers in the same process). Called from
|
|
// cmd/zddc-server/main.go after flag parsing, before Probe.
|
|
func SetRemoteURL(url string) {
|
|
s := url
|
|
remoteURL.Store(&s)
|
|
}
|
|
|
|
func currentRemoteURL() string {
|
|
if p := remoteURL.Load(); p != nil {
|
|
return *p
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Probe locates the container engine and installs a containerRunner
|
|
// as the package default. Call once at server startup. Returns the
|
|
// captured Capabilities for logging.
|
|
//
|
|
// Engine order: engineOverride (if non-empty) → podman → docker. First
|
|
// hit wins. Image presence is NOT probed: the runner uses
|
|
// `--pull=missing` so the first conversion request will pull whichever
|
|
// image it needs.
|
|
//
|
|
// In remote mode (SetRemoteURL with non-empty URL), the probe also
|
|
// invokes `<engine> --remote --url=<url> version` to confirm the
|
|
// sidecar's socket is reachable. A reachable-engine-but-unreachable-
|
|
// socket state surfaces as Ready=false so conversion requests serve
|
|
// 503 until the sidecar comes up.
|
|
//
|
|
// Any failure here is non-fatal: the server still starts, conversion
|
|
// endpoints just return 503.
|
|
func Probe(ctx context.Context, engineOverride string) Capabilities {
|
|
probeCool.Lock()
|
|
defer probeCool.Unlock()
|
|
|
|
now := time.Now()
|
|
rURL := currentRemoteURL()
|
|
c := Capabilities{
|
|
PandocImage: currentPandocImage(),
|
|
ChromiumImage: currentChromiumImage(),
|
|
Mode: "local",
|
|
RemoteURL: rURL,
|
|
ProbedAt: now,
|
|
}
|
|
if rURL != "" {
|
|
c.Mode = "remote"
|
|
}
|
|
|
|
enginePath := resolveEngine(engineOverride)
|
|
if enginePath == "" {
|
|
c.Err = fmt.Errorf("no conversion sandbox found (tried: %s)", strings.Join(enginesTried(engineOverride), ", "))
|
|
caps.Store(&c)
|
|
slog.Warn("convert: probe failed", "reason", c.Err.Error())
|
|
return c
|
|
}
|
|
kind := engineKind(enginePath)
|
|
c.Engine = kind
|
|
|
|
if v, err := probeVersion(ctx, enginePath); err == nil {
|
|
c.EngineVer = v
|
|
}
|
|
|
|
// bwrap engine: no remote-mode concept, just install the runner.
|
|
// The bwrap binary IS the sandbox; conversion binaries (pandoc,
|
|
// chromium) are resolved separately from PATH at call time and
|
|
// reported by the convert-health endpoint when ready.
|
|
if kind == "bwrap" {
|
|
InstallRunner(newBwrapRunner(enginePath))
|
|
caps.Store(&c)
|
|
slog.Info("convert: ready",
|
|
"engine", kind,
|
|
"engine_path", enginePath,
|
|
"engine_version", c.EngineVer,
|
|
"pandoc_binary", currentPandocBinary(),
|
|
"chromium_binary", currentChromiumBinary())
|
|
return c
|
|
}
|
|
|
|
// Legacy OCI engine (podman/docker). Optional remote-socket
|
|
// connectivity check, then install containerRunner.
|
|
if rURL != "" {
|
|
if err := probeRemoteSocket(ctx, enginePath, rURL); err != nil {
|
|
c.Err = err
|
|
caps.Store(&c)
|
|
slog.Warn("convert: remote socket probe failed",
|
|
"engine", kind, "remote_url", rURL, "err", err)
|
|
return c
|
|
}
|
|
}
|
|
|
|
InstallRunner(newContainerRunner(enginePath, rURL))
|
|
caps.Store(&c)
|
|
slog.Info("convert: ready",
|
|
"engine", kind,
|
|
"engine_path", enginePath,
|
|
"engine_version", c.EngineVer,
|
|
"mode", c.Mode,
|
|
"remote_url", c.RemoteURL,
|
|
"pandoc_image", c.PandocImage,
|
|
"chromium_image", c.ChromiumImage)
|
|
return c
|
|
}
|
|
|
|
// probeRemoteSocket runs `<engine> --remote --url=<url> version` with
|
|
// a short timeout. Returns nil on success; a wrapped error otherwise.
|
|
// The remote URL is typically a Unix socket path
|
|
// (unix:///var/run/podman/podman.sock) in the sidecar pattern but a
|
|
// TCP form (tcp://host:port) is accepted too.
|
|
func probeRemoteSocket(ctx context.Context, engine, url string) error {
|
|
c := exec.CommandContext(ctx, engine, "--remote", "--url="+url, "version", "--format={{.Client.Version}}")
|
|
out, err := c.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("podman --remote version: %w (output: %s)", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Reprobe re-runs Probe with the existing configuration. Used by the
|
|
// handler when a request hits a not-Ready state — gives the operator
|
|
// a way to recover (e.g. installed podman after the server started)
|
|
// without a server restart. Cooldown of 60 s between probes to keep
|
|
// error-path requests cheap.
|
|
func Reprobe(ctx context.Context, engineOverride string) Capabilities {
|
|
if p := caps.Load(); p != nil {
|
|
if time.Since(p.ProbedAt) < 60*time.Second {
|
|
return *p
|
|
}
|
|
}
|
|
return Probe(ctx, engineOverride)
|
|
}
|
|
|
|
func resolveEngine(override string) string {
|
|
if override != "" {
|
|
if p, err := exec.LookPath(override); err == nil {
|
|
return p
|
|
}
|
|
return ""
|
|
}
|
|
// Probe order: bwrap (production default — lightest sandbox, no
|
|
// daemon, no OCI engine), then podman / docker as legacy fallbacks
|
|
// for hosts that already have a container engine and want OCI-image
|
|
// isolation per conversion.
|
|
for _, name := range []string{"bwrap", "podman", "docker"} {
|
|
if p, err := exec.LookPath(name); err == nil {
|
|
return p
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func enginesTried(override string) []string {
|
|
if override != "" {
|
|
return []string{override}
|
|
}
|
|
return []string{"bwrap", "podman", "docker"}
|
|
}
|
|
|
|
// engineKind returns the engine-family label for a resolved binary
|
|
// path. "bwrap" is its own engine; "podman" and "docker" are the
|
|
// OCI-container engines handled by containerRunner. Used by Probe to
|
|
// pick the right Runner implementation.
|
|
func engineKind(resolved string) string {
|
|
base := resolved
|
|
if i := strings.LastIndex(base, "/"); i >= 0 {
|
|
base = base[i+1:]
|
|
}
|
|
switch base {
|
|
case "bwrap":
|
|
return "bwrap"
|
|
case "podman", "podman-remote":
|
|
return "podman"
|
|
case "docker":
|
|
return "docker"
|
|
}
|
|
return base
|
|
}
|
|
|
|
func probeVersion(ctx context.Context, engine string) (string, error) {
|
|
c := exec.CommandContext(ctx, engine, "--version")
|
|
out, err := c.CombinedOutput()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
line := strings.SplitN(strings.TrimSpace(string(out)), "\n", 2)[0]
|
|
return line, nil
|
|
}
|