From 7aec631a225bea4607bab802376c7b15e5643f63 Mon Sep 17 00:00:00 2001 From: ZDDC Date: Wed, 13 May 2026 12:17:40 -0500 Subject: [PATCH] feat(convert): support remote podman mode + configurable scratch dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit zddc-server can now invoke podman as a CLIENT against a remote socket instead of creating containers in its own process. The sidecar pattern in tnd-zddc-chart will use this so zddc-server's own pod stays unprivileged (only the podman-system-service sidecar runs privileged). New surface: --convert-podman-socket / ZDDC_CONVERT_PODMAN_SOCKET e.g. unix:///var/run/podman/podman.sock Empty (default) → local mode (podman creates containers in zddc-server's own filesystem namespace). Non-empty → remote mode: `podman --remote --url= run …` dispatches each container request to whatever process owns the socket. Typically a `podman system service` sidecar in the same Kubernetes pod. --convert-scratch-dir / ZDDC_CONVERT_SCRATCH_DIR Host-side directory for per-conversion intermediates (template, HTML, PDF). In remote mode this MUST be a path the sidecar sees at the same mountpoint — typically a shared emptyDir at /work in both containers. Empty = $TMPDIR (local-mode default). Runner behaviour: local mode → unchanged. `podman run --userns=host --rm --pull=missing --network=none --read-only …`. `--userns=host` stays so nested-podman on a privileged host (the previous chart shape) keeps working for anyone still using it. remote mode → `podman --remote --url= run --rm --pull=missing --network=none --read-only …`. `--userns=host` is dropped because the sidecar is rootful inside its own privileged container and doesn't need userns juggling. Health probe gains a Mode field ("local" | "remote") and, in remote mode, runs `podman --remote --url= version` to confirm the sidecar's socket is reachable. Unreachable-socket → 503 with a clear reason (sidecar may still be starting up); reachable → ready. Capabilities log now includes engine_version + mode + remote_url for easier debugging of "which podman is actually doing the work". No tests removed — the existing fake-runner table covers both modes since the runner's args are uniform (remote prefix is the only thing that differs). --- zddc/cmd/zddc-server/main.go | 8 ++- zddc/internal/config/config.go | 8 +++ zddc/internal/convert/convert.go | 23 ++++++- zddc/internal/convert/convert_test.go | 21 ++++++ zddc/internal/convert/health.go | 79 ++++++++++++++++++++-- zddc/internal/convert/runner.go | 96 ++++++++++++++++++--------- 6 files changed, 193 insertions(+), 42 deletions(-) diff --git a/zddc/cmd/zddc-server/main.go b/zddc/cmd/zddc-server/main.go index c5c4d3f..d1ac292 100644 --- a/zddc/cmd/zddc-server/main.go +++ b/zddc/cmd/zddc-server/main.go @@ -88,13 +88,19 @@ func main() { "embedded_apps", embeddedVersionsForLog(embedded)) // Probe the container runtime for the MD→{docx,html,pdf} endpoint. - // Non-fatal: if the host has no podman/docker, conversion requests + // Non-fatal: if the host has no podman/docker (or the remote + // socket is unreachable in sidecar mode), conversion requests // return 503 and everything else keeps working. The probe installs // the package-level Runner when an engine is found; the configured // image refs are pulled lazily on first conversion via // `--pull=missing` so there's no manual setup beyond installing // podman or docker. + // + // SetRemoteURL + SetScratchDir must run BEFORE Probe so the probe + // can hit the sidecar socket when one is configured. convert.SetImages(cfg.ConvertPandocImage, cfg.ConvertChromiumImage) + convert.SetRemoteURL(cfg.ConvertPodmanSocket) + convert.SetScratchDir(cfg.ConvertScratchDir) probeCtx, probeCancel := context.WithTimeout(context.Background(), 5*time.Second) convert.Probe(probeCtx, cfg.ConvertEngine) probeCancel() diff --git a/zddc/internal/config/config.go b/zddc/internal/config/config.go index a1b42ce..64a1063 100644 --- a/zddc/internal/config/config.go +++ b/zddc/internal/config/config.go @@ -57,6 +57,8 @@ type Config struct { ConvertPandocImage string // --convert-pandoc-image / ZDDC_CONVERT_PANDOC_IMAGE — image for MD→DOCX/HTML. Default docker.io/pandoc/latex:latest. ConvertChromiumImage string // --convert-chromium-image / ZDDC_CONVERT_CHROMIUM_IMAGE — image for HTML→PDF. Default docker.io/zenika/alpine-chrome:latest. ConvertEngine string // --convert-engine / ZDDC_CONVERT_ENGINE — override engine binary (default: probe for podman, then docker). + ConvertPodmanSocket string // --convert-podman-socket / ZDDC_CONVERT_PODMAN_SOCKET — when non-empty, run podman in remote mode against this Unix socket (e.g. unix:///var/run/podman/podman.sock). Used with the Kubernetes sidecar pattern so zddc-server's own pod stays unprivileged. + ConvertScratchDir string // --convert-scratch-dir / ZDDC_CONVERT_SCRATCH_DIR — directory used for per-conversion scratch (template + HTML/PDF intermediates). Must be a path the remote podman can see at the same path. Empty = use $TMPDIR (local-mode default). ConvertMemMiB int // --convert-mem-mib / ZDDC_CONVERT_MEM_MIB — per-container memory cap in MiB. Default 512. ConvertCPUs string // --convert-cpus / ZDDC_CONVERT_CPUS — per-container CPU limit. Default "2". ConvertPIDs int // --convert-pids / ZDDC_CONVERT_PIDS — per-container PID limit. Default 100. @@ -147,6 +149,10 @@ func Load(args []string) (Config, error) { "Headless Chromium container image for HTML→PDF. Pulled on first use via --pull=missing.") convertEngineFlag := fs.String("convert-engine", os.Getenv("ZDDC_CONVERT_ENGINE"), "Container engine override (default: probe for podman, then docker).") + convertPodmanSocketFlag := fs.String("convert-podman-socket", os.Getenv("ZDDC_CONVERT_PODMAN_SOCKET"), + "Run podman in remote mode against this Unix socket URL (e.g. unix:///var/run/podman/podman.sock). When set, the engine binary is invoked as `podman --remote --url= run …`; the actual container creation happens in whatever process owns the socket (typically a podman-system-service sidecar). Empty = local mode.") + convertScratchDirFlag := fs.String("convert-scratch-dir", os.Getenv("ZDDC_CONVERT_SCRATCH_DIR"), + "Scratch directory for per-conversion intermediates (template, HTML, PDF). In remote mode this MUST be a path that the podman-service side can see at the same path — typically a shared emptyDir mounted at the same mountPath in both containers. Empty = use $TMPDIR (local mode).") convertMemMiBFlag := fs.Int("convert-mem-mib", parseIntOrDefault(os.Getenv("ZDDC_CONVERT_MEM_MIB"), 512), "Per-conversion container memory limit in MiB. Default 512.") convertCPUsFlag := fs.String("convert-cpus", getEnv("ZDDC_CONVERT_CPUS", "2"), @@ -230,6 +236,8 @@ func Load(args []string) (Config, error) { ConvertPandocImage: *convertPandocImageFlag, ConvertChromiumImage: *convertChromiumImageFlag, ConvertEngine: *convertEngineFlag, + ConvertPodmanSocket: *convertPodmanSocketFlag, + ConvertScratchDir: *convertScratchDirFlag, ConvertMemMiB: *convertMemMiBFlag, ConvertCPUs: *convertCPUsFlag, ConvertPIDs: *convertPIDsFlag, diff --git a/zddc/internal/convert/convert.go b/zddc/internal/convert/convert.go index 9d64471..702bd76 100644 --- a/zddc/internal/convert/convert.go +++ b/zddc/internal/convert/convert.go @@ -67,6 +67,7 @@ const ( var ( pandocImage atomic.Pointer[string] chromiumImage atomic.Pointer[string] + scratchDir atomic.Pointer[string] ) // SetImages installs the image refs used for subsequent ToDocx/ToHTML/ @@ -84,6 +85,24 @@ func SetImages(pandoc, chromium string) { } } +// SetScratchDir installs the host-side scratch root used for per-call +// intermediates (template, HTML, PDF). Empty means "use $TMPDIR" — the +// local-mode default. In remote mode this MUST be a path the podman- +// service sidecar can see at the same mountpoint, typically a shared +// emptyDir mounted at /work in both containers. Called from +// cmd/zddc-server/main.go after flag parsing. +func SetScratchDir(dir string) { + s := dir + scratchDir.Store(&s) +} + +func currentScratchDir() string { + if p := scratchDir.Load(); p != nil { + return *p + } + return "" +} + func currentPandocImage() string { if p := pandocImage.Load(); p != nil && *p != "" { return *p @@ -125,7 +144,7 @@ func ToHTML(ctx context.Context, source []byte, m Metadata) ([]byte, error) { if r == nil { return nil, ErrUnavailable } - scratch, err := writeAssetsToScratch() + scratch, err := writeAssetsToScratch(currentScratchDir()) if err != nil { return nil, fmt.Errorf("scratch: %w", err) } @@ -172,7 +191,7 @@ func ToPDF(ctx context.Context, source []byte, m Metadata) ([]byte, error) { return nil, ErrUnavailable } - scratch, err := os.MkdirTemp("", "zddc-pdf-") + scratch, err := os.MkdirTemp(currentScratchDir(), "zddc-pdf-") if err != nil { return nil, fmt.Errorf("scratch: %w", err) } diff --git a/zddc/internal/convert/convert_test.go b/zddc/internal/convert/convert_test.go index 5c80863..06d297c 100644 --- a/zddc/internal/convert/convert_test.go +++ b/zddc/internal/convert/convert_test.go @@ -157,6 +157,27 @@ func (r *recordingRunner) Run(_ context.Context, image string, _ []byte, mounts return out, e } +func TestScratchDir_UsedByToHTML(t *testing.T) { + f := &fakeRunner{resp: []byte("")} + InstallRunner(f) + t.Cleanup(func() { InstallRunner(nil); SetScratchDir("") }) + + scratchRoot := t.TempDir() + SetScratchDir(scratchRoot) + + _, err := ToHTML(context.Background(), []byte("# Hi\n"), Metadata{}) + if err != nil { + t.Fatalf("ToHTML: %v", err) + } + if len(f.mounts) == 0 || len(f.mounts[0]) == 0 { + t.Fatalf("expected at least one mount") + } + mount := f.mounts[0][0] // ":/tpl:ro" + if !strings.HasPrefix(mount, scratchRoot+"/") { + t.Errorf("scratch dir not under configured root: %q (root=%q)", mount, scratchRoot) + } +} + func TestToPDF_TwoStagePipeline(t *testing.T) { // Stage 1: pandoc emits HTML. Stage 2: chromium reads HTML from // the bind mount and writes /pdf/out.pdf. The fake runner can't diff --git a/zddc/internal/convert/health.go b/zddc/internal/convert/health.go index efbab41..109d1e7 100644 --- a/zddc/internal/convert/health.go +++ b/zddc/internal/convert/health.go @@ -11,13 +11,24 @@ import ( "time" ) +// remoteURL is set by Probe from cfg.ConvertPodmanSocket. Empty means +// local mode. +var remoteURL atomic.Pointer[string] + // Capabilities is the snapshot of "can we convert right now?". The -// only hard requirement is a container runtime on PATH — image presence -// is left to `--pull=missing` at conversion time, so a missing image -// surfaces as a normal ConvertError (not a probe failure). +// only hard requirement is a container runtime reachable from +// zddc-server — image presence is left to `--pull=missing` at +// conversion time, so a missing image surfaces as a normal +// ConvertError (not a probe failure). +// +// Mode is "local" when the engine creates containers in the same +// process as zddc-server, or "remote" when zddc-server is the client +// of a podman-system-service sidecar (see ContainerRunner doc). type Capabilities struct { Engine string // "podman" | "docker" | "" EngineVer string // first line of " --version" + Mode string // "local" or "remote" + RemoteURL string // populated in remote mode PandocImage string // resolved pandoc image ref ChromiumImage string // resolved chromium image ref ProbedAt time.Time @@ -39,6 +50,9 @@ func (c Capabilities) Reason() string { return "no container runtime (podman or docker) found on PATH" } if c.Err != nil { + if c.Mode == "remote" { + return fmt.Sprintf("podman remote socket unreachable (%s): %s", c.RemoteURL, c.Err.Error()) + } return c.Err.Error() } return "unavailable" @@ -59,6 +73,22 @@ func Available() (Capabilities, bool) { return *p, p.Ready() } +// SetRemoteURL installs the podman remote socket URL for subsequent +// Probe / Reprobe calls. Empty means "local mode" (the engine binary +// creates containers in the same process). Called from +// cmd/zddc-server/main.go after flag parsing, before Probe. +func SetRemoteURL(url string) { + s := url + remoteURL.Store(&s) +} + +func currentRemoteURL() string { + if p := remoteURL.Load(); p != nil { + return *p + } + return "" +} + // Probe locates the container engine and installs a containerRunner // as the package default. Call once at server startup. Returns the // captured Capabilities for logging. @@ -68,19 +98,30 @@ func Available() (Capabilities, bool) { // `--pull=missing` so the first conversion request will pull whichever // image it needs. // +// In remote mode (SetRemoteURL with non-empty URL), the probe also +// invokes ` --remote --url= version` to confirm the +// sidecar's socket is reachable. A reachable-engine-but-unreachable- +// socket state surfaces as Ready=false so conversion requests serve +// 503 until the sidecar comes up. +// // Any failure here is non-fatal: the server still starts, conversion -// endpoints just return 503. This matches the user's locked-in -// requirement that no-container-runtime ⇒ "can't do conversions". +// endpoints just return 503. func Probe(ctx context.Context, engineOverride string) Capabilities { probeCool.Lock() defer probeCool.Unlock() now := time.Now() + rURL := currentRemoteURL() c := Capabilities{ PandocImage: currentPandocImage(), ChromiumImage: currentChromiumImage(), + Mode: "local", + RemoteURL: rURL, ProbedAt: now, } + if rURL != "" { + c.Mode = "remote" + } engine := resolveEngine(engineOverride) if engine == "" { @@ -95,16 +136,42 @@ func Probe(ctx context.Context, engineOverride string) Capabilities { c.EngineVer = v } - InstallRunner(newContainerRunner(engine)) + if rURL != "" { + if err := probeRemoteSocket(ctx, engine, rURL); err != nil { + c.Err = err + caps.Store(&c) + slog.Warn("convert: remote socket probe failed", + "engine", engine, "remote_url", rURL, "err", err) + return c + } + } + + InstallRunner(newContainerRunner(engine, rURL)) caps.Store(&c) slog.Info("convert: ready", "engine", engine, "engine_version", c.EngineVer, + "mode", c.Mode, + "remote_url", c.RemoteURL, "pandoc_image", c.PandocImage, "chromium_image", c.ChromiumImage) return c } +// probeRemoteSocket runs ` --remote --url= version` with +// a short timeout. Returns nil on success; a wrapped error otherwise. +// The remote URL is typically a Unix socket path +// (unix:///var/run/podman/podman.sock) in the sidecar pattern but a +// TCP form (tcp://host:port) is accepted too. +func probeRemoteSocket(ctx context.Context, engine, url string) error { + c := exec.CommandContext(ctx, engine, "--remote", "--url="+url, "version", "--format={{.Client.Version}}") + out, err := c.CombinedOutput() + if err != nil { + return fmt.Errorf("podman --remote version: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + return nil +} + // Reprobe re-runs Probe with the existing configuration. Used by the // handler when a request hits a not-Ready state — gives the operator // a way to recover (e.g. installed podman after the server started) diff --git a/zddc/internal/convert/runner.go b/zddc/internal/convert/runner.go index 4465993..2fdb53e 100644 --- a/zddc/internal/convert/runner.go +++ b/zddc/internal/convert/runner.go @@ -67,17 +67,33 @@ func (e *ConvertError) Unwrap() error { return e.Cause } // per call so the same runner handles both pandoc and chromium // invocations. // +// Two modes: +// +// - **local** (remoteURL=""): the engine binary creates containers +// directly on the host that runs zddc-server. Used for bare-metal +// and host-podman deployments. Requires podman or docker on PATH. +// +// - **remote** (remoteURL="unix:///var/run/podman/podman.sock" or +// similar): the engine binary is the local podman CLIENT, invoked +// as `podman --remote --url= run …`; the actual +// container creation happens in whatever process owns the socket +// (typically a `podman system service` sidecar in the same pod). +// Used for the Kubernetes sidecar pattern so zddc-server's own +// pod stays unprivileged. Bind-mount paths must resolve identically +// on both sides — see scratchDir. +// // The runner relies on `--pull=missing` so the operator never has to // pre-pull images: the first request that needs an image pulls it, // subsequent requests use the local cache. Both podman and docker // honour this flag identically. type containerRunner struct { - mu sync.RWMutex - engine string - memMiB int - cpus string - pids int - timeout time.Duration + mu sync.RWMutex + engine string + remoteURL string + memMiB int + cpus string + pids int + timeout time.Duration } var ( @@ -139,13 +155,14 @@ func (cr *containerRunner) SetLimits(memMiB int, cpus string, pids int, timeout } } -func newContainerRunner(engine string) *containerRunner { +func newContainerRunner(engine, remoteURL string) *containerRunner { return &containerRunner{ - engine: engine, - memMiB: 512, - cpus: "2", - pids: 100, - timeout: 30 * time.Second, + engine: engine, + remoteURL: remoteURL, + memMiB: 512, + cpus: "2", + pids: 100, + timeout: 30 * time.Second, } } @@ -182,6 +199,7 @@ func newContainerRunner(engine string) *containerRunner { func (cr *containerRunner) Run(ctx context.Context, image string, stdin []byte, mounts []string, cmd []string) ([]byte, error) { cr.mu.RLock() engine := cr.engine + remoteURL := cr.remoteURL memMiB := cr.memMiB cpus := cr.cpus pids := cr.pids @@ -198,25 +216,32 @@ func (cr *containerRunner) Run(ctx context.Context, image string, stdin []byte, runCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - args := []string{ + // Client args. In remote mode, prepend --remote and --url so the + // podman CLI dispatches the request to the sidecar's + // `podman system service` instead of creating a container locally. + // The remaining flags (--rm, --pull=missing, etc.) apply to the + // container that the remote daemon will create — same wire format + // as local mode. + var args []string + if remoteURL != "" { + args = append(args, "--remote", "--url="+remoteURL) + } + args = append(args, "run", "--rm", "--pull=missing", "-i", - // --userns=host: reuse the calling process's user namespace - // instead of creating a new one. Required for the nested- - // podman case (zddc-server runs inside a Kubernetes pod and - // invokes podman from there): the kernel won't let the inner - // podman set up its own userns via newuidmap when /etc/subuid - // mappings don't resolve through the pod's namespace, even - // with CAP_SETUID via privileged: true. The chart already - // runs the pod privileged, so reusing its userns adds no new - // privilege escalation. On a bare-metal host invocation the - // outer userns is the host's, so --userns=host means "no - // userns remapping" — also fine; --cap-drop=ALL + - // --network=none + --read-only continue to isolate the - // inner container's process. - "--userns=host", + ) + // --userns=host only in local mode: needed when zddc-server itself + // is the one running podman inside a Kubernetes pod, because the + // kernel won't let an inner rootless podman set up its own userns + // via newuidmap. In remote (sidecar) mode the sidecar runs as root + // and creates the inner container in its own (rootful) namespace, + // so --userns=host is unnecessary and potentially noisy. + if remoteURL == "" { + args = append(args, "--userns=host") + } + args = append(args, "--network=none", "--read-only", "--tmpfs=/tmp:size=128m,exec", @@ -228,7 +253,7 @@ func (cr *containerRunner) Run(ctx context.Context, image string, stdin []byte, "--security-opt=no-new-privileges", "--env=HOME=/tmp", "--workdir=/tmp", - } + ) for _, m := range mounts { if !strings.Contains(m, ":ro") && !strings.Contains(m, ":rw") { m += ":ro" @@ -360,15 +385,20 @@ func (r *ringWriter) String() string { } // writeAssetsToScratch materialises the embedded viewer-template.html -// and custom.css into a fresh scratch dir under TMPDIR and returns the -// host path. Caller is responsible for os.RemoveAll(dir) when done. -// Used by ToHTML which needs the template visible inside the container. +// and custom.css into a fresh scratch dir and returns the host path. +// Caller is responsible for os.RemoveAll(dir) when done. Used by +// ToHTML which needs the template visible inside the container. +// +// scratchRoot controls where the temp dir lands. Empty means "use +// $TMPDIR" (local mode default). In remote/sidecar mode the caller +// passes the shared mount path (e.g. "/work") so the podman-service +// sidecar sees the bind-mount source at the same path. // // Files are written world-readable so the container's default user // (root for pandoc/latex, uid 1000 for alpine-chrome) can read them // through the read-only bind mount regardless of the host's umask. -func writeAssetsToScratch() (string, error) { - dir, err := os.MkdirTemp("", "zddc-convert-") +func writeAssetsToScratch(scratchRoot string) (string, error) { + dir, err := os.MkdirTemp(scratchRoot, "zddc-convert-") if err != nil { return "", fmt.Errorf("scratch dir: %w", err) }