ZDDC/zddc/cmd/zddc-server/main.go
ZDDC e911806eda feat(server): pluggable OPA-compatible policy decider
Add an internal access-decision boundary that all handlers go through
instead of calling zddc.AllowedWithChain directly. Two implementations
ship:

  * InternalDecider — wraps the existing zddc.AllowedWithChain. The
    default. No new dependencies, identical semantics to the legacy
    code path. ZDDC_OPA_URL=internal (or unset).

  * HTTPDecider — POSTs the canonical OPA wire format
    (POST /v1/data/zddc/access/allow with {"input": {...}}, response
    {"result": true|false}) over HTTP, HTTPS, or a Unix-domain socket.
    For federal customers running their own audited Rego policies
    alongside zddc-server. ZDDC_OPA_URL=http(s)://… or unix:///….

External-mode failure semantics: unreachable / non-2xx / malformed
response → fail closed (deny) by default with a WARN log. Operators
who prefer availability over correctness flip with ZDDC_OPA_FAIL_OPEN=1.

The decider is constructed once at startup, plumbed through ACLMiddleware
into the request context. Handlers retrieve it via DeciderFromContext;
non-request callers (fs.ListDirectory, EnumerateProjects, enumerateAccess)
take it as an explicit parameter.

zddc.ZddcFile and zddc.ACLRules gain JSON tags so external Rego authors
get idiomatic input shape (acl.allow, admins, …) instead of Go field
names (ACL.Allow, Admins, …).

Test coverage:
  * InternalDecider parity tests against zddc.AllowedWithChain (every
    documented cascade scenario: empty chain, leaf-allow-wins, leaf-
    deny-beats-parent, leaf-allows-what-parent-denies, deepest-match-
    wins, etc.)
  * HTTPDecider happy-path test (canonical wire format)
  * Fail-closed / fail-open / malformed-response tests

Production binary size unchanged (no new deps; HTTP transport is
stdlib net/http). 11 ACL call sites migrated. End-to-end verified
against the worked-example layout in zddc/README.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 17:45:07 -05:00

525 lines
18 KiB
Go

package main
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
"codeberg.org/VARASYS/ZDDC/zddc/internal/archive"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/handler"
"codeberg.org/VARASYS/ZDDC/zddc/internal/policy"
"codeberg.org/VARASYS/ZDDC/zddc/internal/tlsutil"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
"github.com/klauspost/compress/gzhttp"
"gopkg.in/natefinch/lumberjack.v2"
)
// version is the binary's own version, injected at build time via
// `-ldflags="-X main.version=..."`. Defaults to "dev" for unreleased
// builds; release pipelines pass the result of `git describe --tags`.
var version = "dev"
func main() {
cfg, err := config.Load(os.Args[1:])
if errors.Is(err, config.ErrHelpRequested) {
config.Usage(os.Stderr)
os.Exit(0)
}
if errors.Is(err, config.ErrVersionRequested) {
printVersions(os.Stdout)
os.Exit(0)
}
if err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n\nRun with --help for usage.\n", err)
os.Exit(1)
}
logRing := setupLogger(cfg.LogLevel)
embedded := apps.EmbeddedVersions()
slog.Info("zddc-server starting",
"version", version,
"root", cfg.Root,
"addr", cfg.Addr,
"embedded_apps", embeddedVersionsForLog(embedded))
// Build archive index
slog.Info("building archive index...")
start := time.Now()
idx, err := archive.BuildIndex(cfg.Root)
if err != nil {
slog.Error("failed to build archive index", "err", err)
os.Exit(1)
}
slog.Info("archive index built", "duration", time.Since(start))
// Apps fetch+cache subsystem.
appsServer, err := setupApps(cfg)
if err != nil {
slog.Error("failed to set up apps subsystem", "err", err)
os.Exit(1)
}
// TLS config
tlsCfg, useTLS, err := tlsutil.TLSConfig(cfg)
if err != nil {
slog.Error("failed to configure TLS", "err", err)
os.Exit(1)
}
// Context for graceful shutdown
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer cancel()
// Start file-system watcher
watcher, err := archive.NewWatcher(cfg.Root, idx)
if err != nil {
slog.Warn("failed to start filesystem watcher (index will not auto-update)", "err", err)
} else {
go func() {
if err := watcher.Start(ctx); err != nil && ctx.Err() == nil {
slog.Error("watcher error", "err", err)
}
}()
}
// HTTP handler
mux := http.NewServeMux()
// Middleware chain (outermost → innermost):
// ACLMiddleware — extract email from cfg.EmailHeader, store in
// request context. Outermost so the email is
// available to AccessLogMiddleware (Go's context
// propagates DOWN the chain via r.WithContext, not
// UP — so AccessLog can't read a context value set
// by an inner middleware after next.ServeHTTP
// returns).
// AccessLogMiddleware — structured per-request log; reads email from
// the context the outer ACL middleware set.
// CORSMiddleware — Origin / preflight handling.
// dispatch — the actual request handler.
auditLogger := setupAccessAuditLog(cfg.AccessLog)
// Construct the policy decider once at startup. ZDDC_OPA_URL=internal
// (default) routes decisions through the in-process Go evaluator;
// http(s):// or unix:// values send each decision to an external
// OPA-compatible server (federal customers, custom Rego policies).
decider, err := policy.New(policy.Config{URL: cfg.OPAURL, FailOpen: cfg.OPAFailOpen})
if err != nil {
slog.Error("invalid OPA URL", "url", cfg.OPAURL, "err", err)
os.Exit(1)
}
slog.Info("policy decider ready", "mode", policyModeLabel(cfg.OPAURL), "url", cfg.OPAURL)
mux.Handle("/", handler.ACLMiddleware(cfg, decider, handler.AccessLogMiddleware(auditLogger, handler.CORSMiddleware(cfg, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
dispatch(cfg, idx, logRing, appsServer, w, r)
})))))
gzWrapper, err := newGzipWrapper()
if err != nil {
slog.Error("gzhttp wrapper init", "err", err)
os.Exit(1)
}
srv := &http.Server{
Addr: cfg.Addr,
Handler: gzWrapper(mux),
TLSConfig: tlsCfg,
// Conservative timeouts. ReadHeaderTimeout caps how long a slow
// client can hold the connection before sending request headers
// (the slowloris vector). Read/Write timeouts cap full-request
// processing — directory listings + tool HTML serving complete
// in milliseconds even with gzip, so 60s is generous. IdleTimeout
// is the keep-alive ceiling between requests on the same conn.
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 60 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
// Serve in goroutine
if useTLS {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", true)
if err := srv.ListenAndServeTLS("", ""); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
} else {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", false)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
}
<-ctx.Done()
slog.Info("shutting down...")
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
if err := srv.Shutdown(shutdownCtx); err != nil {
slog.Error("shutdown error", "err", err)
}
slog.Info("stopped")
}
// setupAccessAuditLog constructs a slog.Logger writing JSON lines to a
// size-rotated file at the operator-configured path. Returns nil if no
// path is configured (operator opted out via --access-log=) —
// AccessLogMiddleware then logs only to stderr.
//
// Auto-creates the parent directory (mode 0750) if missing, so the
// default path of <ZDDC_ROOT>/.zddc.d/logs/access-<host>.log "just
// works" on a fresh deployment without operator setup.
//
// Every record is tagged with `host` (os.Hostname). When multiple
// zddc-server replicas serve the same dataset (and write to the same
// .zddc.d/logs/ directory via per-host filenames), the host field also
// makes downstream-aggregated streams disambiguable.
//
// Rotation: lumberjack — 100 MB per file, 10 backups, 90-day max age,
// gzip compression on rotated files.
//
// File-permission posture: lumberjack creates new logs with mode 0600
// (running user only). For multi-user audit access, the operator should
// use group-readable parent directory permissions and either chmod the
// log out-of-band or run a forwarder that has its own read access.
// policyModeLabel collapses cfg.OPAURL to a one-word mode label for the
// startup log so operators can grep for the active decider quickly.
func policyModeLabel(opaURL string) string {
switch {
case opaURL == "" || strings.EqualFold(opaURL, "internal"):
return "internal"
case strings.HasPrefix(opaURL, "unix://"):
return "external-unix"
case strings.HasPrefix(opaURL, "https://"):
return "external-https"
case strings.HasPrefix(opaURL, "http://"):
return "external-http"
default:
return "unknown"
}
}
func setupAccessAuditLog(path string) *slog.Logger {
if path == "" {
return nil
}
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
slog.Error("could not create access-log directory; falling back to stderr-only",
"dir", filepath.Dir(path), "err", err)
return nil
}
rotator := &lumberjack.Logger{
Filename: path,
MaxSize: 100, // megabytes per file before rotation
MaxBackups: 10,
MaxAge: 90, // days
Compress: true,
}
host, _ := os.Hostname()
if host == "" {
host = "unknown"
}
// JSON handler — line-delimited JSON is the format every standard
// log shipper (Vector, Loki promtail, fluentbit, journalbeat) parses
// natively, and stays grep-friendly for ad-hoc inspection.
h := slog.NewJSONHandler(rotator, &slog.HandlerOptions{Level: slog.LevelInfo})
slog.Info("access log file enabled",
"path", path, "host", host,
"max_size_mb", 100, "max_backups", 10, "max_age_days", 90)
return slog.New(h).With("host", host)
}
// newGzipWrapper builds the gzip middleware applied to the entire mux.
// MinSize(1024) skips compressing tiny responses where the framing
// overhead exceeds the savings (304 Not Modified, error pages, small
// JSON listings under ~1 KB). The wrapper honors Accept-Encoding (passes
// through unchanged when the client doesn't advertise gzip), appends
// Vary: Accept-Encoding automatically, and passes through 304s untouched.
// Yields ~75% size reduction on the larger embedded HTML responses
// (mdedit: 920 KB → ~250 KB on the wire).
//
// Extracted so tests can construct an equivalent wrapper without going
// through the full main() server boot.
func newGzipWrapper() (func(http.Handler) http.HandlerFunc, error) {
return gzhttp.NewWrapper(gzhttp.MinSize(1024))
}
// setupApps creates the cache + fetcher + server. No seeding, no refresh,
// no admin UI — the server fetches once on first request, caches forever
// in <ZDDC_ROOT>/_app/, and falls back to the embedded HTML on any failure.
func setupApps(cfg config.Config) (*apps.Server, error) {
cache, err := apps.NewCache(filepath.Join(cfg.Root, apps.CacheDirName))
if err != nil {
return nil, fmt.Errorf("create cache: %w", err)
}
fetcher := apps.NewFetcher(cache, slog.Default())
return apps.NewServer(cfg.Root, cache, fetcher, version), nil
}
// printVersions writes the binary version + the build label of every app
// embedded into the binary. Called by --version and reused for the
// startup log line.
func printVersions(w *os.File) {
fmt.Fprintf(w, "zddc-server %s\n\n", version)
embedded := apps.EmbeddedVersions()
if len(embedded) == 0 {
fmt.Fprintln(w, "Embedded tools: (none — run `sh build.sh` to populate)")
return
}
fmt.Fprintln(w, "Embedded tools:")
keys := make([]string, 0, len(embedded))
for k := range embedded {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprintf(w, " %-12s %s\n", k, embedded[k])
}
}
// embeddedVersionsForLog formats the embedded-versions map as a single
// short string suitable for the startup `log/slog` line. Sorted by app
// name for stable output.
func embeddedVersionsForLog(embedded map[string]string) string {
if len(embedded) == 0 {
return "(none)"
}
keys := make([]string, 0, len(embedded))
for k := range embedded {
keys = append(keys, k)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, k := range keys {
// Strip any " · timestamp · sha" suffix so the log line stays compact;
// operators who want full detail run `zddc-server --version`.
v := embedded[k]
if i := strings.Index(v, " "); i > 0 {
v = v[:i]
}
parts = append(parts, k+"="+v)
}
return strings.Join(parts, " ")
}
// dispatch routes a request to the appropriate handler.
func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, appsSrv *apps.Server, w http.ResponseWriter, r *http.Request) {
urlPath := r.URL.Path
email := handler.EmailFromContext(r)
// Profile routes — the page itself is reachable to anyone (anonymous
// included); admin-only sub-resources (whoami / config / logs /
// projects / .zddc editor) keep their existing per-resource 404
// existence-leakage gates inside ServeProfile.
if urlPath == handler.ProfilePathPrefix || strings.HasPrefix(urlPath, handler.ProfilePathPrefix+"/") {
handler.ServeProfile(cfg, ring, w, r)
return
}
// Auth check endpoints — machine-only forward_auth targets used by
// upstream proxies (e.g. the dev-shell pod's Caddy in front of
// code-server) to gate routes on root-admin status. Handled before
// the reserved-prefix guard below so the .auth namespace passes
// through without being 404'd by the dot-prefix rule.
if urlPath == handler.AuthPathPrefix+"/admin" {
handler.ServeAuthAdmin(cfg, w, r)
return
}
// Project list API: GET / with Accept: application/json
if urlPath == "/" {
accept := r.Header.Get("Accept")
if strings.Contains(accept, "application/json") {
handler.ServeProjectList(cfg, w, r)
return
}
}
// Split path into segments
segments := strings.Split(strings.Trim(urlPath, "/"), "/")
// Reserve dot-prefixed path segments. The listing pipeline already hides
// hidden entries (internal/listing/listing.go:17, projectshandler.go:40),
// but direct URL access would still serve them. 404 here so hidden trees
// like /srv/.devshell (the in-image dev-shell's persistent home dir on
// the same Azure Files PVC as served data) cannot be fetched. The
// recognized virtual prefixes (.profile handled above, cfg.IndexPath
// handled below) are explicitly allowed through.
//
// Also reserve the apps cache directory (`_app`): the cached HTML files
// there must be served via the apps resolver (with proper headers and
// ACL), never raw at /_app/...html.
for _, seg := range segments {
if seg == "" {
continue
}
if seg == apps.CacheDirName {
http.NotFound(w, r)
return
}
if !strings.HasPrefix(seg, ".") {
continue
}
if seg == cfg.IndexPath {
continue
}
http.NotFound(w, r)
return
}
// Check for .archive segment in the path
for i, seg := range segments {
if seg == cfg.IndexPath {
// contextPath is everything before .archive
contextPath := "/" + strings.Join(segments[:i], "/")
var filename string
if i+1 < len(segments) {
filename = strings.Join(segments[i+1:], "/")
}
handler.ServeArchive(cfg, idx, w, r, contextPath, filename)
return
}
}
// Form-system intercept: *.form.html and *.yaml.html under a sibling form
// folder are virtual URLs that the form handler renders inline, reading
// the underlying *.form.yaml spec (and, for re-edit, the *.yaml data) from
// disk. RecognizeFormRequest returns nil when the spec doesn't exist, so
// non-form .html URLs fall through to the static-file path below.
if formReq := handler.RecognizeFormRequest(cfg.Root, r.Method, urlPath); formReq != nil {
handler.ServeForm(cfg, formReq, w, r)
return
}
// Apps resolution for the root landing path: GET / or /index.html with
// no real index.html on disk → serve via apps.Serve("landing"). The
// other four apps are caught by the "stat fails → app HTML?" branch
// below, which only triggers when no concrete file is at the URL path.
//
// The landing page is intentionally public (no ACL gate). It's a
// project picker — the per-project ACL filtering done by
// fs.ListDirectory still hides projects an anonymous (or unauthorized)
// caller can't reach. See also handler.ServeDirectory's matching
// root-path bypass.
if appsSrv != nil && (urlPath == "/" || urlPath == "/index.html") {
realIndex := filepath.Join(cfg.Root, "index.html")
if _, err := os.Stat(realIndex); os.IsNotExist(err) {
chain, _ := zddc.EffectivePolicy(cfg.Root, cfg.Root)
if apps.AppAvailableAt(cfg.Root, cfg.Root, "landing") {
appsSrv.Serve(w, r, "landing", chain, cfg.Root)
return
}
}
}
// Resolve the physical path
cleanPath := filepath.FromSlash(strings.TrimPrefix(urlPath, "/"))
absPath := filepath.Join(cfg.Root, cleanPath)
// Guard against path traversal
if !strings.HasPrefix(absPath, cfg.Root+string(filepath.Separator)) && absPath != cfg.Root {
http.Error(w, "Not Found", http.StatusNotFound)
return
}
// Check filesystem
info, err := os.Stat(absPath)
if err != nil {
if os.IsNotExist(err) {
// File doesn't exist at this path. If the URL matches one of
// the five canonical app HTML names AND the request directory
// is one where that app is available (Incoming/Working/Staging
// for classifier/mdedit/transmittal, anywhere for archive,
// root only for landing), resolve via the apps subsystem.
if appsSrv != nil {
if app, requestDirRel := apps.MatchAppHTML(urlPath); app != "" {
requestDir := filepath.Join(cfg.Root, filepath.FromSlash(requestDirRel))
if apps.AppAvailableAt(cfg.Root, requestDir, app) {
chain, _ := zddc.EffectivePolicy(cfg.Root, requestDir)
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
appsSrv.Serve(w, r, app, chain, requestDir)
return
}
}
}
http.Error(w, "Not Found", http.StatusNotFound)
} else {
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
}
return
}
if info.IsDir() {
// ACL check — bypassed at the root path so the landing page (the
// project picker) is reachable by anyone, including anonymous.
// Per-project filtering happens inside ServeDirectory →
// fs.ListDirectory, which hides directories the caller can't
// reach. Subdirectory requests still hit this gate.
isRoot := urlPath == "/"
if !isRoot {
chain, _ := zddc.EffectivePolicy(cfg.Root, absPath)
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
}
if !strings.HasSuffix(urlPath, "/") {
http.Redirect(w, r, urlPath+"/", http.StatusMovedPermanently)
return
}
handler.ServeDirectory(cfg, w, r)
return
}
// Regular file: ACL on parent directory
chain, _ := zddc.EffectivePolicy(cfg.Root, filepath.Dir(absPath))
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
handler.ServeFile(w, r, absPath)
}
// setupLogger installs a slog default that fans every record out to stderr
// (the existing TextHandler — user-visible logging is unchanged) AND to an
// in-memory ring buffer that backs the /.profile/logs endpoint. Returns
// the ring so handlers can read it.
func setupLogger(level string) *handler.LogRing {
var l slog.Level
switch strings.ToLower(level) {
case "debug":
l = slog.LevelDebug
case "warn":
l = slog.LevelWarn
case "error":
l = slog.LevelError
default:
l = slog.LevelInfo
}
ring := handler.NewLogRing(500)
text := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: l})
rh := handler.NewRingHandler(ring, l)
slog.SetDefault(slog.New(handler.NewMultiHandler(text, rh)))
return ring
}