ZDDC/zddc/internal/convert/runner.go
2026-06-11 13:32:31 -05:00

316 lines
9 KiB
Go

package convert
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path/filepath"
"sync"
"time"
)
// Runner executes a conversion binary and returns its stdout. The
// production implementation (localRunner) just exec's the binary
// directly. Tests use a fake.
//
// binary is the PATH-resolvable name (or absolute path) of the
// conversion tool — typically "pandoc" or "chromium-browser". In the
// production runtime image those names resolve to wrapper scripts at
// /usr/local/bin/ that put the real binary into a cgroup + bwrap
// sandbox before exec'ing it. From zddc-server's perspective, that
// indirection is invisible: it just sees pandoc behavior.
//
// stdin is piped to the binary's stdin. scratchDir is an optional
// host directory the binary needs to read from / write to (template
// + intermediate HTML + PDF output); passed to the child via the
// ZDDC_SCRATCH env var, which the wrapper script bind-mounts into
// the sandbox at the same path. Empty means "no scratch dir
// needed" (DOCX flow — stdin to stdout, no files).
//
// cmd is the argv passed to the binary. Same shape across all
// runners; no shell quoting; no engine-specific flags.
//
// All exec calls in this package go through Runner.Run.
type Runner interface {
Run(ctx context.Context, binary string, stdin []byte, scratchDir string, cmd []string) ([]byte, error)
}
// ErrUnavailable means the conversion binary couldn't be found on
// PATH. Handlers translate to HTTP 503.
var ErrUnavailable = errors.New("conversion unavailable")
// ConvertError carries the failure surface from a non-zero exit.
// Stderr is captured (truncated to 4 KiB by the runner) so callers
// can surface the binary's own complaint.
type ConvertError struct {
Tool string // binary name, used only for logging
ExitCode int
Stderr string
Cause error
}
func (e *ConvertError) Error() string {
if e == nil {
return "<nil>"
}
if e.Stderr != "" {
return fmt.Sprintf("%s exit %d: %s", e.Tool, e.ExitCode, e.Stderr)
}
return fmt.Sprintf("%s exit %d: %v", e.Tool, e.ExitCode, e.Cause)
}
func (e *ConvertError) Unwrap() error { return e.Cause }
// localRunner exec's the conversion binary directly. The runtime
// image's wrapper script (at /usr/local/bin/<binary>) handles
// sandboxing + resource limits BETWEEN this exec and the real
// binary — invisible to this Runner.
//
// Resource limits stored here are advisory only; the wrapper reads
// them via env (ZDDC_CONV_MEM_MAX, ZDDC_CONV_PIDS_MAX) and applies
// them to its transient cgroup. Wall-clock timeout IS enforced
// here via context.WithTimeout.
type localRunner struct {
mu sync.RWMutex
memMiB int
pids int
timeout time.Duration
}
func newLocalRunner() *localRunner {
return &localRunner{
memMiB: 1024, // 1 GiB — matches the wrapper's default
pids: 256,
timeout: 60 * time.Second,
}
}
// SetLimits updates the resource ceilings advertised to the wrapper
// script via env vars + the wall-clock timeout enforced here.
// Zero values keep the previous setting (or constructor defaults).
// Safe to call from multiple goroutines.
func (lr *localRunner) SetLimits(memMiB int, pids int, timeout time.Duration) {
lr.mu.Lock()
defer lr.mu.Unlock()
if memMiB > 0 {
lr.memMiB = memMiB
}
if pids > 0 {
lr.pids = pids
}
if timeout > 0 {
lr.timeout = timeout
}
}
func (lr *localRunner) Run(ctx context.Context, binary string, stdin []byte, scratchDir string, cmd []string) ([]byte, error) {
lr.mu.RLock()
memMiB := lr.memMiB
pids := lr.pids
timeout := lr.timeout
lr.mu.RUnlock()
if binary == "" {
return nil, ErrUnavailable
}
runCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
c := exec.CommandContext(runCtx, binary, cmd...)
c.Cancel = func() error {
if c.Process == nil {
return nil
}
return c.Process.Kill()
}
c.WaitDelay = 2 * time.Second
c.SysProcAttr = sysProcAttr()
// Minimal env passed to the wrapper. The wrapper does
// --clearenv inside the bwrap sandbox so the real binary
// sees only what bwrap re-injects (HOME, PATH, LANG). These
// vars are read by the WRAPPER itself, not the binary, to
// drive its cgroup setup + scratch-dir bind mount.
env := []string{
"PATH=" + os.Getenv("PATH"),
"HOME=" + os.TempDir(),
fmt.Sprintf("ZDDC_CONV_MEM_MAX=%dM", memMiB),
fmt.Sprintf("ZDDC_CONV_PIDS_MAX=%d", pids),
}
if scratchDir != "" {
env = append(env, "ZDDC_SCRATCH="+scratchDir)
}
c.Env = env
c.Stdin = bytes.NewReader(stdin)
var stdoutBuf bytes.Buffer
c.Stdout = &limitWriter{w: &stdoutBuf, max: 128 << 20}
stderr := newRingWriter(4 << 10)
c.Stderr = stderr
if err := c.Run(); err != nil {
exitCode := -1
if ee, ok := err.(*exec.ExitError); ok {
exitCode = ee.ExitCode()
}
if runCtx.Err() == context.DeadlineExceeded {
return nil, &ConvertError{
Tool: binary,
ExitCode: exitCode,
Stderr: stderr.String(),
Cause: fmt.Errorf("timeout after %s: %w", timeout, runCtx.Err()),
}
}
return nil, &ConvertError{
Tool: binary,
ExitCode: exitCode,
Stderr: stderr.String(),
Cause: err,
}
}
return stdoutBuf.Bytes(), nil
}
var (
// shared default runner, populated by InstallRunner (called from
// the health probe at startup once the binaries are confirmed).
defaultRunnerMu sync.RWMutex
defaultRunner Runner
)
// InstallRunner sets the package-level Runner used by ToDocx/ToHTML/
// ToPDF. Tests inject a fake; production code lets the health probe
// install a localRunner. Safe to call from multiple goroutines.
func InstallRunner(r Runner) {
defaultRunnerMu.Lock()
defaultRunner = r
defaultRunnerMu.Unlock()
}
// ConfigureLimits applies resource limits to the package-level
// Runner, if it's a localRunner. No-op when no runner is installed
// yet (the probe failed) or when the installed runner doesn't accept
// limits (e.g. a test fake). Zero values keep the previous setting.
//
// Called from cmd/zddc-server/main.go after Probe so the limits
// from the operator's flags take effect before any conversion
// request lands.
func ConfigureLimits(memMiB int, pids int, timeout time.Duration) {
defaultRunnerMu.RLock()
r := defaultRunner
defaultRunnerMu.RUnlock()
if lr, ok := r.(*localRunner); ok {
lr.SetLimits(memMiB, pids, timeout)
}
}
func currentRunner() Runner {
defaultRunnerMu.RLock()
r := defaultRunner
defaultRunnerMu.RUnlock()
return r
}
// limitWriter caps the underlying buffer at max bytes. Writes past
// the cap return an error which surfaces as a Run() error — the
// caller then maps to 422 (output too large) at the handler edge.
type limitWriter struct {
w io.Writer
max int64
n int64
}
func (l *limitWriter) Write(p []byte) (int, error) {
if l.n >= l.max {
return 0, fmt.Errorf("output exceeded %d bytes", l.max)
}
rem := l.max - l.n
if int64(len(p)) > rem {
n, _ := l.w.Write(p[:rem])
l.n += int64(n)
return n, fmt.Errorf("output exceeded %d bytes", l.max)
}
n, err := l.w.Write(p)
l.n += int64(n)
return n, err
}
// ringWriter keeps only the tail of what's written — useful for
// stderr capture where the most-recent bytes carry the actual error
// message and earlier output is usually progress noise.
type ringWriter struct {
mu sync.Mutex
buf []byte
max int
}
func newRingWriter(max int) *ringWriter {
return &ringWriter{max: max}
}
func (r *ringWriter) Write(p []byte) (int, error) {
r.mu.Lock()
defer r.mu.Unlock()
if len(p) >= r.max {
r.buf = append(r.buf[:0], p[len(p)-r.max:]...)
return len(p), nil
}
r.buf = append(r.buf, p...)
if len(r.buf) > r.max {
r.buf = r.buf[len(r.buf)-r.max:]
}
return len(p), nil
}
func (r *ringWriter) String() string {
r.mu.Lock()
defer r.mu.Unlock()
return string(r.buf)
}
// writeScratchFiles materialises a set of named byte buffers (template +
// partials, or a lua filter) into a fresh scratch dir and returns the host
// path. Caller is responsible for os.RemoveAll(dir) when done. pandoc resolves
// `$partial()$` includes and --lua-filter paths from this dir, so everything
// lands flat alongside the entry file.
//
// scratchRoot controls where the temp dir lands. Empty means "use $TMPDIR".
//
// Files are written world-readable so the binary's default user can read them
// through the wrapper's bind mount regardless of the host's umask. Keys are
// reduced to base names only (no path separators).
func writeScratchFiles(scratchRoot string, files map[string][]byte) (string, error) {
dir, err := os.MkdirTemp(scratchRoot, "zddc-convert-")
if err != nil {
return "", fmt.Errorf("scratch dir: %w", err)
}
for name, b := range files {
if err := os.WriteFile(filepath.Join(dir, filepath.Base(name)), b, 0o644); err != nil {
os.RemoveAll(dir)
return "", fmt.Errorf("write scratch file %q: %w", name, err)
}
}
if err := chmodTree(dir, 0o755, 0o644); err != nil {
os.RemoveAll(dir)
return "", err
}
return dir, nil
}
func chmodTree(root string, dirMode, fileMode os.FileMode) error {
return filepath.WalkDir(root, func(p string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return os.Chmod(p, dirMode)
}
return os.Chmod(p, fileMode)
})
}