ZDDC/zddc/cmd/zddc-server/main.go
ZDDC 8a049ca2a4 feat(client): outbox — offline write queue + replay with If-Unmodified-Since
PUT / POST / DELETE in client mode now work end-to-end. Online: the
cache layer forwards to upstream and (on success) drops any cached
entry for the path so the next read fetches fresh. PUT/DELETE include
If-Unmodified-Since derived from the cached file's mtime so the master
can reject conflicting writes with 412 Precondition Failed.

When upstream is unreachable, the request is captured in the outbox
at <root>/.zddc-outbox/<id>/ — directory per queued write, mode 0700,
containing meta.json (method, RawURI, Content-Type, base mtime,
queued-at) and body.bin (request body, capped at 256 MiB). The client
gets 202 Accepted + X-ZDDC-Cache: queued and a JSON envelope.

A background replay loop started by runClient processes the queue:
- 2xx → delete entry; drop cached path so next read fetches fresh
- 412 → rename to <id>.conflict-<RFC3339>/ for manual reconciliation
       (body + meta intact for inspection or re-submit)
- 4xx other → drop (retry won't help; logged at WARN)
- 5xx / transport error → leave for next pass

Replay schedule: eager at startup, then 30s while pending falling
back to 5min while idle. Loop honors graceful-shutdown context.
Disabled in --mode=proxy (proxy persists nothing by design — offline
writes return 503 instead of queueing).

Outbox IDs are <unix-nano-base16>-<hex-random> so lex-sort = queue
order; concurrent enqueues never collide. Conflict-rename appends a
4-char random suffix on the unlikely same-second collision.

The local cache is intentionally not updated for offline writes:
until upstream confirms the user reads still see the upstream-cached
version (or 503 if uncached). Trade-off: no "did my queued write
actually win?" ambiguity, at the cost of not seeing one's own
offline edits immediately. Phase 5 will surface .conflict-<ts>/
directories in browse views.

Tests (20 new in outbox_test.go, 5 new in cache_test.go covering
the write path): NewOutbox creates 0700 dir, Enqueue persists meta
+ body, Pending returns lex-sorted entries excluding conflicts,
Replay deletes on 2xx / renames on 412 / leaves on transport error
/ leaves on 5xx / drops on 4xx-other, IUS sent only for PUT/DELETE
with base mtime, query string preserved, ServeHTTP online write
forwards + evicts cache, ServeHTTP offline write queues with 202,
ServeHTTP offline + no outbox returns 503, ServeHTTP PUT sends IUS
from cached mtime, oversize body rejected, IDs lex-sortable,
RunReplayLoop stops on context cancel, concurrent Enqueue 30×
no collisions. Full suite + go vet clean.

Doc updates: zddc/README.md gains a "Writes (online + offline
outbox)" subsection covering both paths and replay outcomes;
"What client mode is NOT, yet" now lists only conflict UI and
multi-tenancy. AGENTS.md client-mode pipeline gains writes +
mirror-mode bullets. ARCHITECTURE.md adds a "Writes: outbox +
offline replay" subsection with the trade-off rationale and the
phase-5-deferred conflict UI hand-off.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 08:20:07 -05:00

934 lines
34 KiB
Go

package main
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
"codeberg.org/VARASYS/ZDDC/zddc/internal/archive"
"codeberg.org/VARASYS/ZDDC/zddc/internal/auth"
"codeberg.org/VARASYS/ZDDC/zddc/internal/cache"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/handler"
"codeberg.org/VARASYS/ZDDC/zddc/internal/policy"
"codeberg.org/VARASYS/ZDDC/zddc/internal/tlsutil"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
"github.com/klauspost/compress/gzhttp"
"gopkg.in/natefinch/lumberjack.v2"
)
// version is the binary's own version, injected at build time via
// `-ldflags="-X main.version=..."`. Defaults to "dev" for unreleased
// builds; release pipelines pass the result of `git describe --tags`.
var version = "dev"
func main() {
// --print-rego: dump a bundled reference Rego policy and exit.
// Cheap escape hatch for operators standing up an external OPA who want
// a parity-tested baseline as a starting point for customization.
//
// --print-rego → standard cascade (commercial default)
// --print-rego=standard → same
// --print-rego=federal → parent-deny-is-absolute (NIST AC-6)
for _, a := range os.Args[1:] {
switch a {
case "--print-rego", "--print-rego=standard":
fmt.Print(policy.ReferenceRego)
return
case "--print-rego=federal":
fmt.Print(policy.FederalRego)
return
}
}
cfg, err := config.Load(os.Args[1:])
if errors.Is(err, config.ErrHelpRequested) {
config.Usage(os.Stderr)
os.Exit(0)
}
if errors.Is(err, config.ErrVersionRequested) {
printVersions(os.Stdout)
os.Exit(0)
}
if err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n\nRun with --help for usage.\n", err)
os.Exit(1)
}
logRing := setupLogger(cfg.LogLevel)
embedded := apps.EmbeddedVersions()
slog.Info("zddc-server starting",
"version", version,
"root", cfg.Root,
"addr", cfg.Addr,
"embedded_apps", embeddedVersionsForLog(embedded))
// Client mode short-circuit: when cfg.Upstream is set, this binary
// runs as a downstream proxy/cache/mirror rather than a master.
// The master-side machinery below (archive index, watcher, apps
// server, policy decider, ACL middleware, token store) is all
// skipped — every request flows through the cache layer, which
// forwards to upstream and (in cache/mirror modes) persists the
// response under cfg.Root.
if cfg.Upstream != "" {
runClient(cfg)
return
}
// Build archive index
slog.Info("building archive index...")
start := time.Now()
idx, err := archive.BuildIndex(cfg.Root)
if err != nil {
slog.Error("failed to build archive index", "err", err)
os.Exit(1)
}
slog.Info("archive index built", "duration", time.Since(start))
// Apps fetch+cache subsystem.
appsServer, err := setupApps(cfg)
if err != nil {
slog.Error("failed to set up apps subsystem", "err", err)
os.Exit(1)
}
// TLS config
tlsCfg, useTLS, err := tlsutil.TLSConfig(cfg)
if err != nil {
slog.Error("failed to configure TLS", "err", err)
os.Exit(1)
}
// Context for graceful shutdown
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer cancel()
// Start file-system watcher (best-effort live updates; misses cross-client
// writes on SMB/CIFS mounts since inotify only sees local-kernel events).
watcher, err := archive.NewWatcher(cfg.Root, idx)
if err != nil {
slog.Warn("failed to start filesystem watcher (index will not auto-update)", "err", err)
} else {
go func() {
if err := watcher.Start(ctx); err != nil && ctx.Err() == nil {
slog.Error("watcher error", "err", err)
}
}()
}
// Periodic full re-scan. Required when the served root is an SMB/CIFS
// share (Azure Files, etc.) — fsnotify sees only events the local kernel
// generates, so writes from other clients to the share are invisible to
// the watcher above. A periodic full walk closes that gap.
if cfg.ArchiveRescanInterval > 0 {
go runPeriodicRescan(ctx, cfg.Root, idx, cfg.ArchiveRescanInterval)
} else {
slog.Info("archive periodic rescan disabled (interval=0)")
}
// HTTP handler
mux := http.NewServeMux()
// Middleware chain (outermost → innermost):
// ACLMiddleware — extract email from cfg.EmailHeader, store in
// request context. Outermost so the email is
// available to AccessLogMiddleware (Go's context
// propagates DOWN the chain via r.WithContext, not
// UP — so AccessLog can't read a context value set
// by an inner middleware after next.ServeHTTP
// returns).
// AccessLogMiddleware — structured per-request log; reads email from
// the context the outer ACL middleware set.
// CORSMiddleware — Origin / preflight handling.
// dispatch — the actual request handler.
auditLogger := setupAccessAuditLog(cfg.AccessLog)
// Construct the policy decider once at startup. ZDDC_OPA_URL=internal
// (default) routes decisions through the in-process Go evaluator;
// http(s):// or unix:// values send each decision to an external
// OPA-compatible server (federal customers, custom Rego policies).
deciderCfg := policy.Config{
URL: cfg.OPAURL,
FailOpen: cfg.OPAFailOpen,
CacheTTL: cfg.OPACacheTTL,
CascadeMode: cfg.CascadeMode,
}
// Translate "0" (operator opt-out) to "disable cache" (negative TTL is
// the policy package's sentinel for "skip the wrapper").
if deciderCfg.CacheTTL == 0 {
deciderCfg.CacheTTL = -1
}
decider, err := policy.New(deciderCfg)
if err != nil {
slog.Error("invalid OPA URL", "url", cfg.OPAURL, "err", err)
os.Exit(1)
}
// --no-auth swaps the configured decider for one that allows
// everything. Logged at warn level so an operator who set this
// inadvertently sees it on every restart.
if cfg.NoAuth {
decider = policy.AllowAllDecider{}
slog.Warn("--no-auth enabled: ACL enforcement is disabled. Every request is permitted regardless of .zddc rules.")
}
slog.Info("policy decider ready",
"mode", policyModeLabel(cfg.OPAURL),
"url", cfg.OPAURL,
"cache_ttl", cfg.OPACacheTTL,
"cascade_mode", cfg.CascadeMode,
"no_auth", cfg.NoAuth)
// Token store: bearer-token issuance and validation.
// Persists under <ZDDC_ROOT>/.zddc.d/tokens/ — already excluded
// from public listings (fs.ListDirectory dot-prefix filter) and
// direct serving (dispatch's reserved-prefix guard). Failures here
// are non-fatal: token-based auth is opt-in per request, and
// header-based auth keeps working without it.
tokens, err := auth.NewStore(cfg.Root)
if err != nil {
slog.Warn("could not initialise token store; bearer-token auth disabled", "err", err)
tokens = nil
}
// Innermost handler: dispatch.
var inner http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
dispatch(cfg, idx, logRing, appsServer, tokens, w, r)
})
inner = handler.CORSMiddleware(cfg, inner)
// HSTS only when zddc-server itself is the TLS-terminating endpoint.
// Behind an upstream proxy terminating TLS (cfg.TLSMode=="none"), the
// proxy is responsible for HSTS — adding it here would conflict.
if useTLS {
inner = handler.HSTSMiddleware(inner)
}
inner = handler.AccessLogMiddleware(auditLogger, inner)
inner = handler.ACLMiddleware(cfg, decider, tokens, inner)
mux.Handle("/", inner)
gzWrapper, err := newGzipWrapper()
if err != nil {
slog.Error("gzhttp wrapper init", "err", err)
os.Exit(1)
}
srv := &http.Server{
Addr: cfg.Addr,
Handler: gzWrapper(mux),
TLSConfig: tlsCfg,
// Conservative timeouts. ReadHeaderTimeout caps how long a slow
// client can hold the connection before sending request headers
// (the slowloris vector). Read/Write timeouts cap full-request
// processing — directory listings + tool HTML serving complete
// in milliseconds even with gzip, so 60s is generous. IdleTimeout
// is the keep-alive ceiling between requests on the same conn.
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 60 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
// Serve in goroutine
if useTLS {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", true)
if err := srv.ListenAndServeTLS("", ""); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
} else {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", false)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
}
<-ctx.Done()
slog.Info("shutting down...")
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
if err := srv.Shutdown(shutdownCtx); err != nil {
slog.Error("shutdown error", "err", err)
}
slog.Info("stopped")
}
// runClient is the entry point when cfg.Upstream is set — a separate
// lifecycle from the master-side main(), with no archive index, no
// apps server, no watcher, no policy decider, no ACL middleware, no
// token store. The cache layer (zddc/internal/cache) is the entire
// request handler; AccessLog + HSTS + gzip wrap it the same way they
// wrap dispatch in master mode.
func runClient(cfg config.Config) {
cacheLayer, err := cache.New(cfg)
if err != nil {
slog.Error("client mode init failed", "err", err)
os.Exit(1)
}
slog.Info("client mode active",
"upstream", cacheLayer.Upstream(),
"mode", cacheLayer.Mode(),
"no_auth", cfg.NoAuth,
"skip_tls_verify", cfg.SkipTLSVerify)
if cfg.NoAuth {
slog.Warn("--no-auth enabled: incoming requests are not ACL-checked locally; trusting upstream's filtering.")
}
// Mirror walker: only constructed when --mode=mirror with at least
// one subtree (config validation ensures a default of "/" applies
// when the operator opted into mirror without specifying). Hooks
// itself into cacheLayer.onAccess; no further wiring needed here.
if cfg.Mode == "mirror" && len(cfg.MirrorSubtree) > 0 {
sched, err := cache.NewMirrorScheduler(cacheLayer, cfg.MirrorSubtree, cfg.MirrorMinInterval, 0)
if err != nil {
slog.Error("mirror scheduler init failed", "err", err)
os.Exit(1)
}
if sched != nil {
slog.Info("mirror walker armed",
"subtrees", sched.Subtrees(),
"min_interval", sched.MinInterval())
}
}
// Outbox: persist + replay offline writes. Only enabled in cache
// or mirror modes (proxy mode doesn't persist anything by design).
// A failure here is non-fatal: writes still flow live, but
// transport errors return 503 instead of being queued.
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer cancel()
if cfg.Mode != "proxy" {
outbox, err := cache.NewOutbox(cacheLayer)
if err != nil {
slog.Warn("outbox init failed; offline writes will return 503", "err", err)
} else {
cacheLayer.SetOutbox(outbox)
pending, _ := outbox.Pending()
slog.Info("outbox ready", "dir", outbox.Dir(), "pending_at_startup", len(pending))
go outbox.RunReplayLoop(ctx)
}
}
tlsCfg, useTLS, err := tlsutil.TLSConfig(cfg)
if err != nil {
slog.Error("failed to configure TLS", "err", err)
os.Exit(1)
}
auditLogger := setupAccessAuditLog(cfg.AccessLog)
var inner http.Handler = cacheLayer
inner = handler.CORSMiddleware(cfg, inner)
if useTLS {
inner = handler.HSTSMiddleware(inner)
}
inner = handler.AccessLogMiddleware(auditLogger, inner)
mux := http.NewServeMux()
mux.Handle("/", inner)
gzWrapper, err := newGzipWrapper()
if err != nil {
slog.Error("gzhttp wrapper init", "err", err)
os.Exit(1)
}
srv := &http.Server{
Addr: cfg.Addr,
Handler: gzWrapper(mux),
TLSConfig: tlsCfg,
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 60 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
if useTLS {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", true, "client_mode", true)
if err := srv.ListenAndServeTLS("", ""); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
} else {
go func() {
slog.Info("listening", "addr", cfg.Addr, "tls", false, "client_mode", true)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
cancel()
}
}()
}
<-ctx.Done()
slog.Info("shutting down...")
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
if err := srv.Shutdown(shutdownCtx); err != nil {
slog.Error("shutdown error", "err", err)
}
slog.Info("stopped")
}
// setupAccessAuditLog constructs a slog.Logger writing JSON lines to a
// size-rotated file at the operator-configured path. Returns nil if no
// path is configured (operator opted out via --access-log=) —
// AccessLogMiddleware then logs only to stderr.
//
// Auto-creates the parent directory (mode 0750) if missing, so the
// default path of <ZDDC_ROOT>/.zddc.d/logs/access-<host>.log "just
// works" on a fresh deployment without operator setup.
//
// Every record is tagged with `host` (os.Hostname). When multiple
// zddc-server replicas serve the same dataset (and write to the same
// .zddc.d/logs/ directory via per-host filenames), the host field also
// makes downstream-aggregated streams disambiguable.
//
// Rotation: lumberjack — 100 MB per file, 10 backups, 90-day max age,
// gzip compression on rotated files.
//
// File-permission posture: lumberjack creates new logs with mode 0600
// (running user only). For multi-user audit access, the operator should
// use group-readable parent directory permissions and either chmod the
// log out-of-band or run a forwarder that has its own read access.
// policyModeLabel collapses cfg.OPAURL to a one-word mode label for the
// startup log so operators can grep for the active decider quickly.
func policyModeLabel(opaURL string) string {
switch {
case opaURL == "" || strings.EqualFold(opaURL, "internal"):
return "internal"
case strings.HasPrefix(opaURL, "unix://"):
return "external-unix"
case strings.HasPrefix(opaURL, "https://"):
return "external-https"
case strings.HasPrefix(opaURL, "http://"):
return "external-http"
default:
return "unknown"
}
}
func setupAccessAuditLog(path string) *slog.Logger {
if path == "" {
return nil
}
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
slog.Error("could not create access-log directory; falling back to stderr-only",
"dir", filepath.Dir(path), "err", err)
return nil
}
rotator := &lumberjack.Logger{
Filename: path,
MaxSize: 100, // megabytes per file before rotation
MaxBackups: 10,
MaxAge: 90, // days
Compress: true,
}
host, _ := os.Hostname()
if host == "" {
host = "unknown"
}
// JSON handler — line-delimited JSON is the format every standard
// log shipper (Vector, Loki promtail, fluentbit, journalbeat) parses
// natively, and stays grep-friendly for ad-hoc inspection.
h := slog.NewJSONHandler(rotator, &slog.HandlerOptions{Level: slog.LevelInfo})
slog.Info("access log file enabled",
"path", path, "host", host,
"max_size_mb", 100, "max_backups", 10, "max_age_days", 90)
return slog.New(h).With("host", host)
}
// newGzipWrapper builds the gzip middleware applied to the entire mux.
// MinSize(1024) skips compressing tiny responses where the framing
// overhead exceeds the savings (304 Not Modified, error pages, small
// JSON listings under ~1 KB). The wrapper honors Accept-Encoding (passes
// through unchanged when the client doesn't advertise gzip), appends
// Vary: Accept-Encoding automatically, and passes through 304s untouched.
// Yields ~75% size reduction on the larger embedded HTML responses
// (mdedit: 920 KB → ~250 KB on the wire).
//
// Extracted so tests can construct an equivalent wrapper without going
// through the full main() server boot.
func newGzipWrapper() (func(http.Handler) http.HandlerFunc, error) {
return gzhttp.NewWrapper(gzhttp.MinSize(1024))
}
// setupApps creates the cache + fetcher + server. No seeding, no refresh,
// no admin UI — the server fetches once on first request, caches forever
// in <ZDDC_ROOT>/_app/, and falls back to the embedded HTML on any failure.
func setupApps(cfg config.Config) (*apps.Server, error) {
cache, err := apps.NewCache(filepath.Join(cfg.Root, apps.CacheDirName))
if err != nil {
return nil, fmt.Errorf("create cache: %w", err)
}
fetcher := apps.NewFetcher(cache, slog.Default())
// Apps signing pubkey. Resolution order, highest priority first:
// 1. --apps-pubkey / ZDDC_APPS_PUBKEY (path to PEM file)
// 2. apps_pubkey: inline PEM in the root <ZDDC_ROOT>/.zddc file
// (root-only — same trust-anchor treatment as admins:)
// 3. nothing → URL-fetched apps refuse-by-default; only embedded
// + local-path apps work
//
// Same posture as TLS certificates: zddc-server bakes nothing in.
// Operators using zddc.varasys.io's canonical channels download
// pubkey.pem from there and either configure the path via env/flag
// or paste the PEM contents inline into root .zddc.
switch {
case cfg.AppsPubKey != "":
pub, err := apps.LoadPubKey(cfg.AppsPubKey)
if err != nil {
return nil, fmt.Errorf("apps-pubkey: %w", err)
}
fetcher.VerifyKey = pub
slog.Info("apps signing pubkey loaded", "source", "env/flag", "path", cfg.AppsPubKey)
default:
// Fall back to apps_pubkey: in root .zddc.
rootZddc, err := zddc.ParseFile(filepath.Join(cfg.Root, ".zddc"))
if err == nil && rootZddc.AppsPubKey != "" {
pub, err := apps.ParsePubKeyPEM([]byte(rootZddc.AppsPubKey))
if err != nil {
return nil, fmt.Errorf("root .zddc apps_pubkey: %w", err)
}
fetcher.VerifyKey = pub
slog.Info("apps signing pubkey loaded", "source", "root .zddc apps_pubkey")
} else {
slog.Warn("apps-pubkey not configured; URL-fetched apps will be refused (only embedded + local-path apps will work). " +
"Set --apps-pubkey, ZDDC_APPS_PUBKEY, or apps_pubkey: in the root .zddc file to a PEM Ed25519 public key you trust.")
}
}
return apps.NewServer(cfg.Root, cache, fetcher, version), nil
}
// printVersions writes the binary version + the build label of every app
// embedded into the binary. Called by --version and reused for the
// startup log line.
func printVersions(w *os.File) {
fmt.Fprintf(w, "zddc-server %s\n\n", version)
embedded := apps.EmbeddedVersions()
if len(embedded) == 0 {
fmt.Fprintln(w, "Embedded tools: (none — run `sh build.sh` to populate)")
return
}
fmt.Fprintln(w, "Embedded tools:")
keys := make([]string, 0, len(embedded))
for k := range embedded {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprintf(w, " %-12s %s\n", k, embedded[k])
}
}
// embeddedVersionsForLog formats the embedded-versions map as a single
// short string suitable for the startup `log/slog` line. Sorted by app
// name for stable output.
func embeddedVersionsForLog(embedded map[string]string) string {
if len(embedded) == 0 {
return "(none)"
}
keys := make([]string, 0, len(embedded))
for k := range embedded {
keys = append(keys, k)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, k := range keys {
// Strip any " · timestamp · sha" suffix so the log line stays compact;
// operators who want full detail run `zddc-server --version`.
v := embedded[k]
if i := strings.Index(v, " "); i > 0 {
v = v[:i]
}
parts = append(parts, k+"="+v)
}
return strings.Join(parts, " ")
}
// dispatch routes a request to the appropriate handler.
func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, appsSrv *apps.Server, tokens *auth.Store, w http.ResponseWriter, r *http.Request) {
urlPath := r.URL.Path
email := handler.EmailFromContext(r)
// Profile routes — the page itself is reachable to anyone (anonymous
// included); admin-only sub-resources (whoami / config / logs /
// projects / .zddc editor) keep their existing per-resource 404
// existence-leakage gates inside ServeProfile.
if urlPath == handler.ProfilePathPrefix || strings.HasPrefix(urlPath, handler.ProfilePathPrefix+"/") {
handler.ServeProfile(cfg, ring, idx, w, r)
return
}
// Token self-service: HTML page at /.tokens, JSON API at
// /.api/tokens. Both routes require an authenticated user (the
// existing email middleware injects the email from upstream auth).
// Both routes refuse to serve when no token store is available
// (e.g. NewStore failed at startup) — handled inside the handlers.
if urlPath == handler.TokensPathPrefix || urlPath == handler.TokensPathPrefix+"/" {
handler.ServeTokensPage(cfg, tokens, w, r)
return
}
if urlPath == handler.TokensAPIPathPrefix || strings.HasPrefix(urlPath, handler.TokensAPIPathPrefix+"/") {
handler.ServeTokensAPI(cfg, tokens, w, r)
return
}
// Auth check endpoints — machine-only forward_auth targets used by
// upstream proxies (e.g. the dev-shell pod's Caddy in front of
// code-server) to gate routes on root-admin status. Handled before
// the reserved-prefix guard below so the .auth namespace passes
// through without being 404'd by the dot-prefix rule.
if urlPath == handler.AuthPathPrefix+"/admin" {
handler.ServeAuthAdmin(cfg, w, r)
return
}
// Project list API: GET / with Accept: application/json
if urlPath == "/" {
accept := r.Header.Get("Accept")
if strings.Contains(accept, "application/json") {
handler.ServeProjectList(cfg, w, r)
return
}
}
// Split path into segments
segments := strings.Split(strings.Trim(urlPath, "/"), "/")
// Per-directory .zddc editor: <dir>/.zddc.html is a virtual URL
// served by the existing form-based editor (same handler that
// powers /.profile/zddc/edit?path=<dir>). Routed BEFORE the
// dot-prefix guard so the leaf segment isn't 404'd. The handler
// itself gates on hasAnyAdminScope; non-admins see 404.
if handler.IsZddcEditorRequest(urlPath) {
handler.ServeZddcEditorAtPath(cfg, w, r)
return
}
// Reserve dot-prefixed path segments. The listing pipeline already hides
// hidden entries (internal/listing/listing.go:17, projectshandler.go:40),
// but direct URL access would still serve them. 404 here so hidden trees
// like /srv/.devshell (the in-image dev-shell's persistent home dir on
// the same Azure Files PVC as served data) cannot be fetched. The
// recognized virtual prefixes (.profile handled above, cfg.IndexPath
// handled below) are explicitly allowed through.
//
// Also reserve the apps cache directory (`_app`): the cached HTML files
// there must be served via the apps resolver (with proper headers and
// ACL), never raw at /_app/...html.
for _, seg := range segments {
if seg == "" {
continue
}
if seg == apps.CacheDirName {
http.NotFound(w, r)
return
}
if !strings.HasPrefix(seg, ".") {
continue
}
if seg == cfg.IndexPath {
continue
}
http.NotFound(w, r)
return
}
// Check for .archive segment in the path. .archive is project-scoped
// and addressed at exactly one depth — /<project>/.archive/... — even
// though offline-built HTML files reference siblings via
// "../.archive/<tracking>.html" from arbitrary depths. Any deeper form
// (/<project>/<sub>/.../.archive/...) gets a 301 to the project-rooted
// canonical so anchored links and bookmarks normalize to a single
// stable URL per tracking number. The redirect target preserves the
// path tail after .archive/ verbatim and the query string; browsers
// preserve the fragment automatically across redirects.
//
// .archive is read-only: only GET/HEAD reach the handler. Anything
// else (PUT/POST/DELETE) returns 405 here, before the file API would
// otherwise see the request. This avoids the 302→GET silent-method-
// downgrade trap and makes the contract explicit.
for i, seg := range segments {
if seg != cfg.IndexPath {
continue
}
if r.Method != http.MethodGet && r.Method != http.MethodHead {
w.Header().Set("Allow", "GET, HEAD")
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return
}
// segments[0] is the project; segments[i] is .archive. i==0
// means /.archive/... at the very root, with no project to
// scope by — 404 (a tracking-number reference must be project-
// rooted; cross-project tracking-number collisions otherwise
// silently pick a winner).
if i == 0 {
http.NotFound(w, r)
return
}
project := segments[0]
var filename string
if i+1 < len(segments) {
filename = strings.Join(segments[i+1:], "/")
}
// Canonicalize anything below /<project>/.archive/. Building
// the target by hand (rather than re-encoding) keeps any
// already-encoded characters in the original URL.RawPath
// trailing segments intact for the browser to follow.
if i > 1 {
target := "/" + project + "/" + cfg.IndexPath + "/" + filename
if r.URL.RawQuery != "" {
target += "?" + r.URL.RawQuery
}
http.Redirect(w, r, target, http.StatusMovedPermanently)
return
}
handler.ServeArchive(cfg, idx, w, r, project, filename)
return
}
// Tables-system intercept: *.table.html is a virtual URL that the
// table handler renders inline, reading rows from a directory of
// *.yaml files declared in the directory's .zddc tables: map.
// Discovery is .zddc-declarative — no auto-mount on file presence —
// so RecognizeTableRequest returns nil whenever there's no matching
// declaration and the URL falls through to the static-file path
// (or to the form intercept below for *.form.html / *.yaml.html).
//
// One exception: archive/<party>/mdl.table.html falls back to the
// embedded default MDL spec when no operator declaration exists.
// RecognizeTableRequest implements that fallback internally.
if tableReq := handler.RecognizeTableRequest(cfg.Root, r.Method, urlPath); tableReq != nil {
handler.ServeTable(cfg, tableReq, w, r)
return
}
// Form-system intercept: *.form.html and *.yaml.html under a sibling form
// folder are virtual URLs that the form handler renders inline, reading
// the underlying *.form.yaml spec (and, for re-edit, the *.yaml data) from
// disk. RecognizeFormRequest returns nil when the spec doesn't exist, so
// non-form .html URLs fall through to the static-file path below.
if formReq := handler.RecognizeFormRequest(cfg.Root, r.Method, urlPath); formReq != nil {
handler.ServeForm(cfg, formReq, w, r)
return
}
// File API — authenticated CRUD over the served tree. Catches PUT,
// DELETE, and POST on any non-reserved path. Read methods (GET/HEAD)
// fall through to the static / apps / directory pipeline below.
// Forms and .profile/.archive POSTs are already routed above this
// point so they take precedence.
if handler.IsWriteMethod(r.Method) {
handler.ServeFileAPI(cfg, w, r)
return
}
// Apps resolution for the root landing path: GET / or /index.html with
// no real index.html on disk → serve via apps.Serve("landing"). The
// other four apps are caught by the "stat fails → app HTML?" branch
// below, which only triggers when no concrete file is at the URL path.
//
// The landing page is intentionally public (no ACL gate). It's a
// project picker — the per-project ACL filtering done by
// fs.ListDirectory still hides projects an anonymous (or unauthorized)
// caller can't reach. See also handler.ServeDirectory's matching
// root-path bypass.
if appsSrv != nil && (urlPath == "/" || urlPath == "/index.html") {
realIndex := filepath.Join(cfg.Root, "index.html")
if _, err := os.Stat(realIndex); os.IsNotExist(err) {
chain, _ := zddc.EffectivePolicy(cfg.Root, cfg.Root)
if apps.AppAvailableAt(cfg.Root, cfg.Root, "landing") {
appsSrv.Serve(w, r, "landing", chain, cfg.Root)
return
}
}
}
// Resolve the physical path
cleanPath := filepath.FromSlash(strings.TrimPrefix(urlPath, "/"))
absPath := filepath.Join(cfg.Root, cleanPath)
// Guard against path traversal
if !strings.HasPrefix(absPath, cfg.Root+string(filepath.Separator)) && absPath != cfg.Root {
http.Error(w, "Not Found", http.StatusNotFound)
return
}
// Check filesystem
info, err := os.Stat(absPath)
if err != nil {
if os.IsNotExist(err) {
// Default MDL spec fallback: archive/<party>/mdl.table.yaml
// and archive/<party>/mdl.form.yaml are served from embedded
// bytes when no operator file exists on disk. The table app
// fetches these client-side; the fallback lets a fresh
// project work out of the box.
if r.Method == http.MethodGet || r.Method == http.MethodHead {
if bytes, ok := handler.IsDefaultMdlSpec(cfg.Root, urlPath); ok {
chain, _ := zddc.EffectivePolicy(cfg.Root, filepath.Dir(absPath))
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
w.Header().Set("Content-Type", "application/yaml; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("X-ZDDC-Source", "default-mdl-spec")
if r.Method == http.MethodHead {
return
}
_, _ = w.Write(bytes)
return
}
}
// File doesn't exist at this path. If the URL matches one of
// the canonical app HTML names AND the request directory is
// one where that app is available (working/staging/incoming
// for classifier, working for mdedit, staging for
// transmittal, anywhere for archive, root only for landing),
// resolve via the apps subsystem.
if appsSrv != nil {
if app, requestDirRel := apps.MatchAppHTML(urlPath); app != "" {
requestDir := filepath.Join(cfg.Root, filepath.FromSlash(requestDirRel))
if apps.AppAvailableAt(cfg.Root, requestDir, app) {
chain, _ := zddc.EffectivePolicy(cfg.Root, requestDir)
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
appsSrv.Serve(w, r, app, chain, requestDir)
return
}
}
}
http.Error(w, "Not Found", http.StatusNotFound)
} else {
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
}
return
}
if info.IsDir() {
// ACL check — bypassed at the root path so the landing page (the
// project picker) is reachable by anyone, including anonymous.
// Per-project filtering happens inside ServeDirectory →
// fs.ListDirectory, which hides directories the caller can't
// reach. Subdirectory requests still hit this gate.
isRoot := urlPath == "/"
if !isRoot {
chain, _ := zddc.EffectivePolicy(cfg.Root, absPath)
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
}
// URL convention: trailing slash → browse (handled by
// ServeDirectory, which serves browse.html for HTML requests
// and JSON for application/json). No trailing slash → the
// canonical default tool for this directory's context, if any
// (mdedit under working/, transmittal under staging/, archive
// under archive/, tables under archive/<party>/mdl/). When no
// default applies, fall back to the historical redirect-to-
// trailing-slash behaviour.
if !strings.HasSuffix(urlPath, "/") && (r.Method == http.MethodGet || r.Method == http.MethodHead) && !isRoot {
switch apps.DefaultAppAt(cfg.Root, absPath) {
case "tables":
// Tables aren't an apps-subsystem app — the table
// handler responds to <dir>/<name>.table.html. Serve
// the equivalent table view inline at the bare-mdl
// URL by routing through the handler with the
// canonical .table.html name appended.
if tr := handler.RecognizeTableRequest(cfg.Root, http.MethodGet, urlPath+".table.html"); tr != nil {
handler.ServeTable(cfg, tr, w, r)
return
}
case "archive", "transmittal", "mdedit":
if appsSrv != nil {
app := apps.DefaultAppAt(cfg.Root, absPath)
if apps.AppAvailableAt(cfg.Root, absPath, app) {
chain, _ := zddc.EffectivePolicy(cfg.Root, absPath)
appsSrv.Serve(w, r, app, chain, absPath)
return
}
}
}
}
if !strings.HasSuffix(urlPath, "/") {
http.Redirect(w, r, urlPath+"/", http.StatusMovedPermanently)
return
}
handler.ServeDirectory(cfg, w, r)
return
}
// Regular file: ACL on parent directory
chain, _ := zddc.EffectivePolicy(cfg.Root, filepath.Dir(absPath))
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
handler.ServeFile(w, r, absPath)
}
// runPeriodicRescan calls idx.Rebuild on `interval` until ctx is cancelled.
// Each tick walks fsRoot from scratch and atomically replaces the live index;
// concurrent reads are safe via the index's RWMutex. Errors are logged but do
// not stop the loop — a transient walk failure shouldn't disable rescans.
func runPeriodicRescan(ctx context.Context, fsRoot string, idx *archive.Index, interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
slog.Info("archive periodic rescan started", "interval", interval)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
dur, projects, tracking, err := idx.Rebuild(fsRoot)
if err != nil {
slog.Warn("archive rescan failed", "err", err, "duration", dur)
continue
}
slog.Debug("archive rescan ok", "duration", dur, "projects", projects, "tracking", tracking)
}
}
}
// setupLogger installs a slog default that fans every record out to stderr
// (the existing TextHandler — user-visible logging is unchanged) AND to an
// in-memory ring buffer that backs the /.profile/logs endpoint. Returns
// the ring so handlers can read it.
func setupLogger(level string) *handler.LogRing {
var l slog.Level
switch strings.ToLower(level) {
case "debug":
l = slog.LevelDebug
case "warn":
l = slog.LevelWarn
case "error":
l = slog.LevelError
default:
l = slog.LevelInfo
}
ring := handler.NewLogRing(500)
text := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: l})
rh := handler.NewRingHandler(ring, l)
slog.SetDefault(slog.New(handler.NewMultiHandler(text, rh)))
return ring
}