zddc-server can now invoke podman as a CLIENT against a remote socket
instead of creating containers in its own process. The sidecar pattern
in tnd-zddc-chart will use this so zddc-server's own pod stays
unprivileged (only the podman-system-service sidecar runs privileged).
New surface:
--convert-podman-socket / ZDDC_CONVERT_PODMAN_SOCKET
e.g. unix:///var/run/podman/podman.sock
Empty (default) → local mode (podman creates containers in
zddc-server's own filesystem namespace).
Non-empty → remote mode: `podman --remote --url=<this> run …`
dispatches each container request to whatever process owns the
socket. Typically a `podman system service` sidecar in the same
Kubernetes pod.
--convert-scratch-dir / ZDDC_CONVERT_SCRATCH_DIR
Host-side directory for per-conversion intermediates (template,
HTML, PDF). In remote mode this MUST be a path the sidecar sees
at the same mountpoint — typically a shared emptyDir at /work
in both containers. Empty = $TMPDIR (local-mode default).
Runner behaviour:
local mode → unchanged. `podman run --userns=host --rm --pull=missing
--network=none --read-only …`. `--userns=host` stays so nested-podman
on a privileged host (the previous chart shape) keeps working for
anyone still using it.
remote mode → `podman --remote --url=<sock> run --rm --pull=missing
--network=none --read-only …`. `--userns=host` is dropped because
the sidecar is rootful inside its own privileged container and
doesn't need userns juggling.
Health probe gains a Mode field ("local" | "remote") and, in remote
mode, runs `podman --remote --url=<sock> version` to confirm the
sidecar's socket is reachable. Unreachable-socket → 503 with a clear
reason (sidecar may still be starting up); reachable → ready.
Capabilities log now includes engine_version + mode + remote_url for
easier debugging of "which podman is actually doing the work".
No tests removed — the existing fake-runner table covers both modes
since the runner's args are uniform (remote prefix is the only thing
that differs).
219 lines
6.4 KiB
Go
219 lines
6.4 KiB
Go
package convert
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
// remoteURL is set by Probe from cfg.ConvertPodmanSocket. Empty means
|
|
// local mode.
|
|
var remoteURL atomic.Pointer[string]
|
|
|
|
// Capabilities is the snapshot of "can we convert right now?". The
|
|
// only hard requirement is a container runtime reachable from
|
|
// zddc-server — image presence is left to `--pull=missing` at
|
|
// conversion time, so a missing image surfaces as a normal
|
|
// ConvertError (not a probe failure).
|
|
//
|
|
// Mode is "local" when the engine creates containers in the same
|
|
// process as zddc-server, or "remote" when zddc-server is the client
|
|
// of a podman-system-service sidecar (see ContainerRunner doc).
|
|
type Capabilities struct {
|
|
Engine string // "podman" | "docker" | ""
|
|
EngineVer string // first line of "<engine> --version"
|
|
Mode string // "local" or "remote"
|
|
RemoteURL string // populated in remote mode
|
|
PandocImage string // resolved pandoc image ref
|
|
ChromiumImage string // resolved chromium image ref
|
|
ProbedAt time.Time
|
|
Err error
|
|
}
|
|
|
|
// Ready reports whether conversions can be attempted. The first
|
|
// conversion may still fail if the configured image isn't reachable
|
|
// from the host's registry (the runner will surface a clear error
|
|
// from podman/docker stderr).
|
|
func (c Capabilities) Ready() bool {
|
|
return c.Engine != "" && c.Err == nil
|
|
}
|
|
|
|
// Reason returns a short human-friendly explanation when Ready() is
|
|
// false. Used as the body of a 503.
|
|
func (c Capabilities) Reason() string {
|
|
if c.Engine == "" {
|
|
return "no container runtime (podman or docker) found on PATH"
|
|
}
|
|
if c.Err != nil {
|
|
if c.Mode == "remote" {
|
|
return fmt.Sprintf("podman remote socket unreachable (%s): %s", c.RemoteURL, c.Err.Error())
|
|
}
|
|
return c.Err.Error()
|
|
}
|
|
return "unavailable"
|
|
}
|
|
|
|
var (
|
|
caps atomic.Pointer[Capabilities]
|
|
probeCool sync.Mutex
|
|
)
|
|
|
|
// Available returns the current Capabilities snapshot and whether
|
|
// conversions can proceed.
|
|
func Available() (Capabilities, bool) {
|
|
p := caps.Load()
|
|
if p == nil {
|
|
return Capabilities{}, false
|
|
}
|
|
return *p, p.Ready()
|
|
}
|
|
|
|
// SetRemoteURL installs the podman remote socket URL for subsequent
|
|
// Probe / Reprobe calls. Empty means "local mode" (the engine binary
|
|
// creates containers in the same process). Called from
|
|
// cmd/zddc-server/main.go after flag parsing, before Probe.
|
|
func SetRemoteURL(url string) {
|
|
s := url
|
|
remoteURL.Store(&s)
|
|
}
|
|
|
|
func currentRemoteURL() string {
|
|
if p := remoteURL.Load(); p != nil {
|
|
return *p
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Probe locates the container engine and installs a containerRunner
|
|
// as the package default. Call once at server startup. Returns the
|
|
// captured Capabilities for logging.
|
|
//
|
|
// Engine order: engineOverride (if non-empty) → podman → docker. First
|
|
// hit wins. Image presence is NOT probed: the runner uses
|
|
// `--pull=missing` so the first conversion request will pull whichever
|
|
// image it needs.
|
|
//
|
|
// In remote mode (SetRemoteURL with non-empty URL), the probe also
|
|
// invokes `<engine> --remote --url=<url> version` to confirm the
|
|
// sidecar's socket is reachable. A reachable-engine-but-unreachable-
|
|
// socket state surfaces as Ready=false so conversion requests serve
|
|
// 503 until the sidecar comes up.
|
|
//
|
|
// Any failure here is non-fatal: the server still starts, conversion
|
|
// endpoints just return 503.
|
|
func Probe(ctx context.Context, engineOverride string) Capabilities {
|
|
probeCool.Lock()
|
|
defer probeCool.Unlock()
|
|
|
|
now := time.Now()
|
|
rURL := currentRemoteURL()
|
|
c := Capabilities{
|
|
PandocImage: currentPandocImage(),
|
|
ChromiumImage: currentChromiumImage(),
|
|
Mode: "local",
|
|
RemoteURL: rURL,
|
|
ProbedAt: now,
|
|
}
|
|
if rURL != "" {
|
|
c.Mode = "remote"
|
|
}
|
|
|
|
engine := resolveEngine(engineOverride)
|
|
if engine == "" {
|
|
c.Err = fmt.Errorf("no container runtime found (tried: %s)", strings.Join(enginesTried(engineOverride), ", "))
|
|
caps.Store(&c)
|
|
slog.Warn("convert: probe failed", "reason", c.Err.Error())
|
|
return c
|
|
}
|
|
c.Engine = engine
|
|
|
|
if v, err := probeVersion(ctx, engine); err == nil {
|
|
c.EngineVer = v
|
|
}
|
|
|
|
if rURL != "" {
|
|
if err := probeRemoteSocket(ctx, engine, rURL); err != nil {
|
|
c.Err = err
|
|
caps.Store(&c)
|
|
slog.Warn("convert: remote socket probe failed",
|
|
"engine", engine, "remote_url", rURL, "err", err)
|
|
return c
|
|
}
|
|
}
|
|
|
|
InstallRunner(newContainerRunner(engine, rURL))
|
|
caps.Store(&c)
|
|
slog.Info("convert: ready",
|
|
"engine", engine,
|
|
"engine_version", c.EngineVer,
|
|
"mode", c.Mode,
|
|
"remote_url", c.RemoteURL,
|
|
"pandoc_image", c.PandocImage,
|
|
"chromium_image", c.ChromiumImage)
|
|
return c
|
|
}
|
|
|
|
// probeRemoteSocket runs `<engine> --remote --url=<url> version` with
|
|
// a short timeout. Returns nil on success; a wrapped error otherwise.
|
|
// The remote URL is typically a Unix socket path
|
|
// (unix:///var/run/podman/podman.sock) in the sidecar pattern but a
|
|
// TCP form (tcp://host:port) is accepted too.
|
|
func probeRemoteSocket(ctx context.Context, engine, url string) error {
|
|
c := exec.CommandContext(ctx, engine, "--remote", "--url="+url, "version", "--format={{.Client.Version}}")
|
|
out, err := c.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("podman --remote version: %w (output: %s)", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Reprobe re-runs Probe with the existing configuration. Used by the
|
|
// handler when a request hits a not-Ready state — gives the operator
|
|
// a way to recover (e.g. installed podman after the server started)
|
|
// without a server restart. Cooldown of 60 s between probes to keep
|
|
// error-path requests cheap.
|
|
func Reprobe(ctx context.Context, engineOverride string) Capabilities {
|
|
if p := caps.Load(); p != nil {
|
|
if time.Since(p.ProbedAt) < 60*time.Second {
|
|
return *p
|
|
}
|
|
}
|
|
return Probe(ctx, engineOverride)
|
|
}
|
|
|
|
func resolveEngine(override string) string {
|
|
if override != "" {
|
|
if p, err := exec.LookPath(override); err == nil {
|
|
return p
|
|
}
|
|
return ""
|
|
}
|
|
for _, name := range []string{"podman", "docker"} {
|
|
if p, err := exec.LookPath(name); err == nil {
|
|
return p
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func enginesTried(override string) []string {
|
|
if override != "" {
|
|
return []string{override}
|
|
}
|
|
return []string{"podman", "docker"}
|
|
}
|
|
|
|
func probeVersion(ctx context.Context, engine string) (string, error) {
|
|
c := exec.CommandContext(ctx, engine, "--version")
|
|
out, err := c.CombinedOutput()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
line := strings.SplitN(strings.TrimSpace(string(out)), "\n", 2)[0]
|
|
return line, nil
|
|
}
|