package convert import ( "bytes" "context" "errors" "fmt" "io" "io/fs" "os" "os/exec" "path/filepath" "strings" "sync" "time" ) // Runner executes a conversion sub-process and returns its stdout. // The host-side implementation (containerRunner) wraps `podman run` // or `docker run`; tests use a fake. // // image is the OCI image to invoke (e.g. "docker.io/pandoc/latex:latest" // or "docker.io/zenika/alpine-chrome:latest"). stdin is piped to the // container's stdin. cmd is the argv passed *to the image's entrypoint* // — for pandoc/latex the entrypoint is `pandoc`, for alpine-chrome it // is `chromium-browser`. mounts is a list of ":" // specs handed to --volume (":ro" is added if no mode segment is // present). // // All exec calls in this package go through Runner.Run. This is the // first os/exec site in the codebase; the hardening here is the // pattern for future shell-outs. type Runner interface { Run(ctx context.Context, image string, stdin []byte, mounts []string, cmd []string) ([]byte, error) } // ErrUnavailable means no container runtime is present on the host. // Handlers translate to HTTP 503. var ErrUnavailable = errors.New("conversion unavailable") // ConvertError carries the failure surface from a non-zero exit. // Stderr is captured (truncated to 4 KiB by the runner) so callers can // surface pandoc/chromium's own complaint. type ConvertError struct { Tool string // image name fragment, used only for logging ExitCode int Stderr string Cause error } func (e *ConvertError) Error() string { if e == nil { return "" } if e.Stderr != "" { return fmt.Sprintf("%s exit %d: %s", e.Tool, e.ExitCode, strings.TrimSpace(e.Stderr)) } return fmt.Sprintf("%s exit %d: %v", e.Tool, e.ExitCode, e.Cause) } func (e *ConvertError) Unwrap() error { return e.Cause } // containerRunner runs each conversion inside a fresh container. // The engine ("podman" preferred, "docker" fallback) is resolved once // at startup by Probe. Resource limits are configurable via // SetLimits (called from main.go after flag parsing). Images are passed // per call so the same runner handles both pandoc and chromium // invocations. // // The runner relies on `--pull=missing` so the operator never has to // pre-pull images: the first request that needs an image pulls it, // subsequent requests use the local cache. Both podman and docker // honour this flag identically. type containerRunner struct { mu sync.RWMutex engine string memMiB int cpus string pids int timeout time.Duration } var ( // shared default runner, populated by InstallRunner (called from // the health probe at startup once the engine is known). defaultRunnerMu sync.RWMutex defaultRunner Runner ) // InstallRunner sets the package-level Runner used by ToDocx/ToHTML/ToPDF. // Tests inject a fake; production code lets the health probe install a // containerRunner. Safe to call from multiple goroutines. func InstallRunner(r Runner) { defaultRunnerMu.Lock() defaultRunner = r defaultRunnerMu.Unlock() } // ConfigureLimits applies resource limits to the package-level Runner, // if it's a containerRunner. No-op when no runner is installed yet // (the probe failed) or when the installed runner doesn't accept // limits (e.g. a test fake). Zero values keep the previous setting. // // Called from cmd/zddc-server/main.go after Probe so the limits from // the operator's flags take effect before any conversion request lands. func ConfigureLimits(memMiB int, cpus string, pids int, timeout time.Duration) { defaultRunnerMu.RLock() r := defaultRunner defaultRunnerMu.RUnlock() if cr, ok := r.(*containerRunner); ok { cr.SetLimits(memMiB, cpus, pids, timeout) } } func currentRunner() Runner { defaultRunnerMu.RLock() r := defaultRunner defaultRunnerMu.RUnlock() return r } // SetLimits updates the resource ceilings used for subsequent Run // invocations. Zero values keep the previous setting (or the defaults // set at construction). Safe to call from multiple goroutines. func (cr *containerRunner) SetLimits(memMiB int, cpus string, pids int, timeout time.Duration) { cr.mu.Lock() defer cr.mu.Unlock() if memMiB > 0 { cr.memMiB = memMiB } if cpus != "" { cr.cpus = cpus } if pids > 0 { cr.pids = pids } if timeout > 0 { cr.timeout = timeout } } func newContainerRunner(engine string) *containerRunner { return &containerRunner{ engine: engine, memMiB: 512, cpus: "2", pids: 100, timeout: 30 * time.Second, } } // Run executes one container invocation. cmd is the argv passed to the // image's entrypoint (pandoc for pandoc/latex, chromium-browser for // alpine-chrome). mounts is a list of ":" // strings; ":ro" is appended when no mode segment is present. stdin is // piped to the container, stdout is returned as bytes (capped at // 128 MiB). // // Hardening: // - --pull=missing: image is fetched on first use, cached after. // Operator only needs podman/docker installed; no manual pull. // - --rm: container is removed on exit, even if killed. // - --network=none: no network inside the container. Prevents data // exfiltration through embedded URLs in source documents. // - --read-only + tmpfs on /tmp and /run: image fs is immutable; // pandoc/chromium scratch goes to tmpfs only. // - --memory / --cpus / --pids-limit: kernel-enforced caps. // - --cap-drop=ALL + --security-opt=no-new-privileges: standard // container-escape hardening. // - context-cancel kill + WaitDelay: a wedged podman gets force- // killed; pipes drop after 2s so we don't leak goroutines. // - cmd.Env minimal: only PATH + HOME are passed through to the // engine binary; the container itself sees only what the image // bakes in plus what --env adds (HOME=/tmp). // // Note: --user is intentionally NOT set so each image uses its // default user (pandoc/latex runs as root, alpine-chrome runs as // uid 1000). With --read-only + tmpfs + --cap-drop=ALL + // --network=none + --no-new-privileges the additional defense from // forcing nobody is small and would break alpine-chrome's own // user-data-dir layout. func (cr *containerRunner) Run(ctx context.Context, image string, stdin []byte, mounts []string, cmd []string) ([]byte, error) { cr.mu.RLock() engine := cr.engine memMiB := cr.memMiB cpus := cr.cpus pids := cr.pids timeout := cr.timeout cr.mu.RUnlock() if engine == "" { return nil, ErrUnavailable } if image == "" { return nil, fmt.Errorf("convert.Run: image is empty") } runCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() args := []string{ "run", "--rm", "--pull=missing", "-i", "--network=none", "--read-only", "--tmpfs=/tmp:size=128m,exec", "--tmpfs=/run:size=4m", fmt.Sprintf("--memory=%dm", memMiB), fmt.Sprintf("--cpus=%s", cpus), fmt.Sprintf("--pids-limit=%d", pids), "--cap-drop=ALL", "--security-opt=no-new-privileges", "--env=HOME=/tmp", "--workdir=/tmp", } for _, m := range mounts { if !strings.Contains(m, ":ro") && !strings.Contains(m, ":rw") { m += ":ro" } args = append(args, "--volume="+m) } args = append(args, image) args = append(args, cmd...) c := exec.CommandContext(runCtx, engine, args...) c.Cancel = func() error { if c.Process == nil { return nil } return c.Process.Kill() } c.WaitDelay = 2 * time.Second c.SysProcAttr = sysProcAttr() c.Env = []string{ "PATH=" + os.Getenv("PATH"), "HOME=" + os.TempDir(), } c.Stdin = bytes.NewReader(stdin) var stdoutBuf bytes.Buffer c.Stdout = &limitWriter{w: &stdoutBuf, max: 128 << 20} stderr := newRingWriter(4 << 10) c.Stderr = stderr err := c.Run() if err != nil { exitCode := -1 if ee, ok := err.(*exec.ExitError); ok { exitCode = ee.ExitCode() } toolName := imageTag(image) if runCtx.Err() == context.DeadlineExceeded { return nil, &ConvertError{ Tool: toolName, ExitCode: exitCode, Stderr: stderr.String(), Cause: fmt.Errorf("timeout after %s: %w", timeout, runCtx.Err()), } } return nil, &ConvertError{ Tool: toolName, ExitCode: exitCode, Stderr: stderr.String(), Cause: err, } } return stdoutBuf.Bytes(), nil } // imageTag extracts a short name for an image reference, used as the // "Tool" label on ConvertError. "docker.io/pandoc/latex:latest" → // "pandoc/latex". func imageTag(image string) string { s := image // Strip registry prefix. if i := strings.Index(s, "/"); i >= 0 { if strings.Contains(s[:i], ".") || strings.Contains(s[:i], ":") { s = s[i+1:] } } // Strip tag suffix. if i := strings.LastIndex(s, ":"); i >= 0 { s = s[:i] } return s } // limitWriter caps the underlying buffer at max bytes. Writes past the // cap return io.ErrShortWrite, which surfaces as a Run() error — the // caller then maps to 422 (output too large) at the handler edge. type limitWriter struct { w io.Writer max int64 n int64 } func (l *limitWriter) Write(p []byte) (int, error) { if l.n >= l.max { return 0, fmt.Errorf("output exceeded %d bytes", l.max) } rem := l.max - l.n if int64(len(p)) > rem { n, _ := l.w.Write(p[:rem]) l.n += int64(n) return n, fmt.Errorf("output exceeded %d bytes", l.max) } n, err := l.w.Write(p) l.n += int64(n) return n, err } // ringWriter keeps only the tail of what's written — useful for stderr // capture where the most-recent bytes are the ones with the actual // error message and earlier output is usually progress noise. type ringWriter struct { mu sync.Mutex buf []byte max int } func newRingWriter(max int) *ringWriter { return &ringWriter{max: max} } func (r *ringWriter) Write(p []byte) (int, error) { r.mu.Lock() defer r.mu.Unlock() if len(p) >= r.max { r.buf = append(r.buf[:0], p[len(p)-r.max:]...) return len(p), nil } r.buf = append(r.buf, p...) if len(r.buf) > r.max { r.buf = r.buf[len(r.buf)-r.max:] } return len(p), nil } func (r *ringWriter) String() string { r.mu.Lock() defer r.mu.Unlock() return string(r.buf) } // writeAssetsToScratch materialises the embedded viewer-template.html // and custom.css into a fresh scratch dir under TMPDIR and returns the // host path. Caller is responsible for os.RemoveAll(dir) when done. // Used by ToHTML which needs the template visible inside the container. // // Files are written world-readable so the container's default user // (root for pandoc/latex, uid 1000 for alpine-chrome) can read them // through the read-only bind mount regardless of the host's umask. func writeAssetsToScratch() (string, error) { dir, err := os.MkdirTemp("", "zddc-convert-") if err != nil { return "", fmt.Errorf("scratch dir: %w", err) } if err := os.WriteFile(filepath.Join(dir, "viewer-template.html"), viewerTemplate, 0o644); err != nil { os.RemoveAll(dir) return "", fmt.Errorf("write template: %w", err) } if err := os.WriteFile(filepath.Join(dir, "custom.css"), customCSS, 0o644); err != nil { os.RemoveAll(dir) return "", fmt.Errorf("write css: %w", err) } if err := chmodTree(dir, 0o755, 0o644); err != nil { os.RemoveAll(dir) return "", err } return dir, nil } func chmodTree(root string, dirMode, fileMode os.FileMode) error { return filepath.WalkDir(root, func(p string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return os.Chmod(p, dirMode) } return os.Chmod(p, fileMode) }) }