From ac7553f940a7c7b4bd2b185c1c347e55a2a592a5 Mon Sep 17 00:00:00 2001 From: ZDDC Date: Fri, 8 May 2026 10:03:51 -0500 Subject: [PATCH] fix(client): plug confused-deputy bind in client mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A focused security review of phases 1-4 surfaced one MEDIUM finding (confidence 9/10): in client mode (--upstream set) the cache layer forwards the configured bearer to upstream on every incoming request without authenticating the local caller, AND --addr defaulted to :8443 (all interfaces). Together those mean a CLI user running `zddc-server --upstream https://master --bearer-file ~/token` on a laptop on hotel/cafe Wi-Fi exposes an open-proxy confused-deputy: any attacker on the same L2 connects to https://:8443, accepts the self-signed cert, issues GETs (or PUTs/DELETEs that queue in the outbox), and the cache laundries each request through upstream with the engineer's bearer. The full cached subtree leaks. Two layers of defense in config.Load: 1. Loopback default in client mode. When cfg.Upstream is set and neither --addr nor ZDDC_ADDR was passed explicitly, --addr downgrades to "127.0.0.1:8443" (vs ":8443" in master mode). CLI users on a laptop get safe-by-default. Operators who want a non-loopback bind opt in explicitly. 2. Refuse non-loopback bind + bearer-file without acknowledgement. When cfg.Upstream is set, BearerFile is non-empty, the chosen addr is non-loopback, AND --insecure-direct is not set, the load fails with an error that names the bind, the threat (open-proxy confused-deputy laundering bearer credentials), and the acknowledgement flag. The helm zddc-server-cache/ chart already sets ZDDC_INSECURE_DIRECT=1 and relies on Kubernetes-namespaced pod networking for the gating, so the chart path is unaffected. The guard is bearer-file-conditional because proxy mode without a bearer doesn't have a credential to launder, and refusing it would needlessly block proxy-without-auth deployments. Tests in internal/config/config_test.go lock down all four cases: - --upstream with no explicit --addr → 127.0.0.1:8443 - --upstream + non-loopback --addr + --bearer-file (no IDirect) → refuse - --upstream + non-loopback --addr + --bearer-file + --insecure-direct → ok - --upstream + non-loopback --addr + NO bearer → ok (no credential to leak) Doc updates: zddc/README.md client-mode "Flags" section gets a WARNING block describing the loopback default + insecure-direct escape hatch. AGENTS.md ZDDC_UPSTREAM row mentions the addr downgrade. ARCHITECTURE.md gains a "Confused-deputy guard at startup" subsection under "Master + proxy/cache/mirror" with the two-layer defense rationale. helm/zddc-server-cache/values.yaml.example adds an inline note next to addr: ":8080" explaining why the chart sets ZDDC_INSECURE_DIRECT=1 and what the consequence is of removing either side of the gating. Master mode is unaffected — the client-mode validation block is gated by `if cfg.Upstream != ""`. All existing tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- AGENTS.md | 2 +- ARCHITECTURE.md | 7 ++ helm/zddc-server-cache/values.yaml.example | 9 +++ zddc/README.md | 2 + zddc/internal/config/config.go | 39 +++++++++- zddc/internal/config/config_test.go | 89 ++++++++++++++++++++++ 6 files changed, 146 insertions(+), 2 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6fdeaa0..371d47c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -447,7 +447,7 @@ ZDDC_ROOT=/path/to/your/archive ZDDC_TLS_CERT=none ZDDC_ADDR=:8080 \ | `ZDDC_CORS_ORIGIN` | *(empty)* | Comma-separated CORS allowlist; empty (default) disables CORS — appropriate for embedded-tools deployments where tools and data are same-origin. Set explicitly only for self-hosted tools at a different host (e.g. `https://tools.acme.com`) or the CDN-bootstrap pattern (`https://zddc.varasys.io`). | | `ZDDC_INSECURE` | *(empty)* | Must be `1` to allow startup with no `/.zddc`. Without it, the server refuses to start because no `.zddc` files anywhere → public-by-default. Set only for deliberately-public archives. | | `ZDDC_NO_AUTH` | *(empty)* | `1` skips ACL enforcement entirely on this instance. On a master: anyone reads everything (dev / trusted-LAN read-only deployments). On a downstream proxy/cache/mirror: trust upstream's filtering, don't re-evaluate ACLs locally. **Distinct from `ZDDC_INSECURE`** (which gates a startup safety check). | -| `ZDDC_UPSTREAM` | *(empty)* | Master URL (`https://master.example.com`). When set, the binary runs as a **client** (downstream proxy/cache/mirror) instead of a master — the master-side machinery (archive index, apps server, watcher, OPA, ACL middleware, token store) is replaced by the cache layer in `zddc/internal/cache/`. `--root` becomes the cache directory. | +| `ZDDC_UPSTREAM` | *(empty)* | Master URL (`https://master.example.com`). When set, the binary runs as a **client** (downstream proxy/cache/mirror) instead of a master — the master-side machinery (archive index, apps server, watcher, OPA, ACL middleware, token store) is replaced by the cache layer in `zddc/internal/cache/`. `--root` becomes the cache directory. **Setting this also downgrades the `--addr` default to `127.0.0.1:8443` (loopback)** — the cache forwards a bearer to upstream without authenticating the local caller, so non-loopback binds with `ZDDC_BEARER_FILE` set are refused unless `ZDDC_INSECURE_DIRECT=1` is also set. | | `ZDDC_MODE` | `cache` | Client mode: `proxy` (forward live, no persistence), `cache` (default; persist responses on access), `mirror` (phase 3 — currently behaves like `cache`). Ignored when `ZDDC_UPSTREAM` is empty. | | `ZDDC_BEARER_FILE` | *(empty)* | Path to a 0600 file containing the master-issued token (see `/.tokens` on the master). Forwarded as `Authorization: Bearer …` to upstream on every request. Ignored when `ZDDC_UPSTREAM` is empty. | | `ZDDC_SKIP_TLS_VERIFY` | *(empty)* | `1` accepts self-signed / untrusted upstream certs. Distinct from `ZDDC_NO_AUTH`. Dev / internal-CA scenarios only. | diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 7ff13f6..ef2b539 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -558,6 +558,13 @@ The local cache is not updated for offline writes by design — until upstream c The local instance forwards a single bearer (loaded from `--bearer-file` at startup) regardless of who's calling locally. Single-user-trust on a laptop. For multi-user scenarios, run multiple instances on the same host, or front the local server with your own auth proxy that injects per-user bearers downstream — both options keep the cache layer's design surface minimal. +#### Confused-deputy guard at startup + +Because the cache forwards a bearer upstream without authenticating the local caller, exposing the bind on a non-loopback interface would turn the binary into an open-proxy laundering anyone's request through the master. The config layer (`zddc/internal/config/config.go`) enforces two defenses: + +1. **Loopback default in client mode.** When `--upstream` is set, `--addr` defaults to `127.0.0.1:8443` instead of `:8443` — but only when `--addr` / `ZDDC_ADDR` was *not* set explicitly. CLI users on a laptop get safe-by-default; operators who want a non-loopback bind opt in explicitly. +2. **Refuse non-loopback bind + bearer without acknowledgement.** A non-loopback `--addr` *with* a configured `--bearer-file` *without* `--insecure-direct` (`ZDDC_INSECURE_DIRECT=1`) refuses to start. The error message names the bind, names the flag to acknowledge, and names the threat (open proxy confused-deputy). The helm `zddc-server-cache/` chart sets `ZDDC_INSECURE_DIRECT=1` and relies on Kubernetes-namespaced networking for the gating — that path is unaffected. The guard is bearer-file-conditional because proxy mode without a bearer doesn't have a credential to launder, and refusing it would needlessly block proxy-without-auth deployments. + ### Bearer token issuance zddc-server issues its own bearer tokens for non-browser callers (CLI tools, scripts, downstream proxy/cache/mirror instances). The master is the identity provider; no external IDP, no JWKS rotation. diff --git a/helm/zddc-server-cache/values.yaml.example b/helm/zddc-server-cache/values.yaml.example index 67ee218..104629a 100644 --- a/helm/zddc-server-cache/values.yaml.example +++ b/helm/zddc-server-cache/values.yaml.example @@ -21,6 +21,15 @@ zddc: # Listening address for incoming requests to this cache instance. # Plain HTTP — ingress / mesh terminates TLS upstream of the pod. + # + # NOTE: in client mode the binary refuses to start with a non- + # loopback bind AND a configured bearer UNLESS ZDDC_INSECURE_DIRECT=1 + # is also set. The cache forwards the bearer to upstream without + # authenticating the local caller, so a bare bind would be an open + # proxy. The chart's deployment.yaml sets ZDDC_INSECURE_DIRECT=1 + # and relies on the Kubernetes-namespaced pod network + ingress + # auth proxy for that gating. If you remove either you must + # redirect the bind to 127.0.0.1. addr: ":8080" # Email-header convention from your authenticating reverse proxy. diff --git a/zddc/README.md b/zddc/README.md index ef62cc3..6220797 100644 --- a/zddc/README.md +++ b/zddc/README.md @@ -240,6 +240,8 @@ provides ops provenance. | `--mirror-subtree ` / `ZDDC_MIRROR_SUBTREE` | Mirror-mode only. Comma-separated URL subtrees the access-triggered walker keeps current. Empty + `--mode=mirror` = full mirror (`/`). | | `--mirror-min-interval ` / `ZDDC_MIRROR_MIN_INTERVAL` | Mirror-mode only. Minimum gap between walks of the same subtree. Default `1h`. Idle subtrees generate zero upstream traffic until next access. | +> ⚠️ **Client mode default-binds to loopback.** When `--upstream` is set, `--addr` defaults to `127.0.0.1:8443` (NOT `:8443`) unless you set `--addr` / `ZDDC_ADDR` explicitly. The cache layer attaches the configured bearer to every forwarded request without authenticating the local caller — a non-loopback bind would expose an open-proxy confused-deputy to anyone reachable on that interface (e.g. local Wi-Fi). The startup config refuses non-loopback binds when a bearer file is configured unless you also pass `--insecure-direct` (`ZDDC_INSECURE_DIRECT=1`) to acknowledge an authenticating reverse proxy or network policy is in front. The `helm/zddc-server-cache/` chart sets `ZDDC_INSECURE_DIRECT=1` and relies on the Kubernetes-namespaced pod network for that gating; CLI users on a laptop should leave the loopback default in place. + ### Pipeline For each incoming `GET` (writes are not yet supported in client mode): diff --git a/zddc/internal/config/config.go b/zddc/internal/config/config.go index 77826ff..d9de9d0 100644 --- a/zddc/internal/config/config.go +++ b/zddc/internal/config/config.go @@ -82,7 +82,7 @@ func Load(args []string) (Config, error) { rootFlag := fs.String("root", os.Getenv("ZDDC_ROOT"), "Path to the served file tree. Default: ZDDC_ROOT or the current directory.") addrFlag := fs.String("addr", getEnv("ZDDC_ADDR", ":8443"), - "Listen address (host:port). Default: ZDDC_ADDR or :8443.") + "Listen address (host:port). Default: ZDDC_ADDR or :8443. **In client mode (--upstream set) the default downgrades to 127.0.0.1:8443** unless --addr/ZDDC_ADDR is set explicitly — the cache layer forwards a bearer to upstream without authenticating the local caller, so a non-loopback bind needs --insecure-direct to acknowledge an authenticating reverse proxy / network policy is in front.") tlsCertFlag := fs.String("tls-cert", os.Getenv("ZDDC_TLS_CERT"), "Path to a PEM TLS certificate. \"none\" disables TLS (plain HTTP). Empty means self-signed.") tlsKeyFlag := fs.String("tls-key", os.Getenv("ZDDC_TLS_KEY"), @@ -156,6 +156,7 @@ func Load(args []string) (Config, error) { // explicit env-var use. corsFlagSet := false accessLogFlagSet := false + addrFlagSet := false if args != nil { fs.Visit(func(f *flag.Flag) { switch f.Name { @@ -163,11 +164,15 @@ func Load(args []string) (Config, error) { corsFlagSet = true case "access-log": accessLogFlagSet = true + case "addr": + addrFlagSet = true } }) } _, accessLogEnvSet := os.LookupEnv("ZDDC_ACCESS_LOG") accessLogExplicit := accessLogFlagSet || accessLogEnvSet + _, addrEnvSet := os.LookupEnv("ZDDC_ADDR") + addrExplicit := addrFlagSet || addrEnvSet cfg := Config{ Root: *rootFlag, @@ -322,6 +327,38 @@ func Load(args []string) (Config, error) { if cfg.Mode != "mirror" { cfg.MirrorSubtree = nil } + + // Confused-deputy guard. The cache layer authenticates to + // upstream via the configured bearer but does NOT authenticate + // the local incoming caller, so a non-loopback bind exposes the + // cache as an open proxy that launders any caller's request + // with the bearer-holder's privileges. Two layers of defense: + // + // 1. When the operator didn't pass --addr / ZDDC_ADDR + // explicitly, downgrade the default to loopback. CLI + // users on a laptop get safe-by-default. + // + // 2. When the operator DID pick a non-loopback bind AND a + // bearer file is configured, refuse to start unless they + // also pass --insecure-direct (the existing flag for + // "I have an authenticating reverse proxy / network + // policy in front, the bare bind is intentional"). The + // helm cache chart sets ZDDC_INSECURE_DIRECT=1 + a + // Kubernetes-namespaced pod network for exactly this + // reason, so the chart path is unaffected. + if !addrExplicit { + cfg.Addr = "127.0.0.1:8443" + } + if cfg.BearerFile != "" && !isLoopbackAddr(cfg.Addr) && !*insecureDirectFlag { + return Config{}, fmt.Errorf( + "client mode (--upstream set) bound to %q is non-loopback AND a bearer is configured; "+ + "the cache will forward that bearer to upstream on every incoming request without "+ + "authenticating the caller, exposing an open-proxy confused-deputy to anyone reachable on this interface. "+ + "Bind to loopback (127.0.0.1: or [::1]:) — the default in client mode unless you set --addr / ZDDC_ADDR explicitly — "+ + "OR pass --insecure-direct (ZDDC_INSECURE_DIRECT=1) to acknowledge that an authenticating reverse proxy "+ + "or network policy gates the bind interface", + cfg.Addr) + } } return cfg, nil diff --git a/zddc/internal/config/config_test.go b/zddc/internal/config/config_test.go index 6575db8..3f499ca 100644 --- a/zddc/internal/config/config_test.go +++ b/zddc/internal/config/config_test.go @@ -390,3 +390,92 @@ func TestLoad_MissingRootZddcAllowedWithInsecureFlag(t *testing.T) { t.Errorf("cfg.Insecure = false, want true (set via --insecure)") } } + +// TestLoad_ClientModeAddrDefaultsToLoopback confirms that --upstream +// triggers the client-mode loopback default for --addr. The cache +// layer forwards the configured bearer to upstream without checking +// the local caller, so a non-loopback default would be a confused- +// deputy open proxy on whatever interface the host happens to bind. +func TestLoad_ClientModeAddrDefaultsToLoopback(t *testing.T) { + root := t.TempDir() + bearer := filepath.Join(root, "tok") + if err := os.WriteFile(bearer, []byte("xyz"), 0o600); err != nil { + t.Fatalf("seed bearer: %v", err) + } + cfg, err := Load([]string{ + "--root", root, + "--upstream", "https://master.example.com", + "--bearer-file", bearer, + }) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.Addr != "127.0.0.1:8443" { + t.Errorf("cfg.Addr = %q, want 127.0.0.1:8443 (loopback default in client mode)", cfg.Addr) + } +} + +// TestLoad_ClientModeNonLoopbackBindRefusesWithoutInsecureDirect locks +// down the confused-deputy guard: a non-loopback bind in client mode +// with a configured bearer must require the operator to acknowledge +// the deployment shape via --insecure-direct. +func TestLoad_ClientModeNonLoopbackBindRefusesWithoutInsecureDirect(t *testing.T) { + root := t.TempDir() + bearer := filepath.Join(root, "tok") + _ = os.WriteFile(bearer, []byte("xyz"), 0o600) + _, err := Load([]string{ + "--root", root, + "--upstream", "https://master.example.com", + "--bearer-file", bearer, + "--addr", "0.0.0.0:8444", + }) + if err == nil { + t.Fatal("Load() succeeded; want refusal for non-loopback client-mode bind with bearer") + } + if !strings.Contains(err.Error(), "--insecure-direct") { + t.Errorf("error should mention --insecure-direct as the acknowledgement flag; got: %v", err) + } +} + +// TestLoad_ClientModeNonLoopbackBindAllowedWithInsecureDirect confirms +// the operator can opt out of the loopback guard via the existing flag +// (matches the helm chart path which sets ZDDC_INSECURE_DIRECT=1). +func TestLoad_ClientModeNonLoopbackBindAllowedWithInsecureDirect(t *testing.T) { + root := t.TempDir() + bearer := filepath.Join(root, "tok") + _ = os.WriteFile(bearer, []byte("xyz"), 0o600) + cfg, err := Load([]string{ + "--root", root, + "--upstream", "https://master.example.com", + "--bearer-file", bearer, + "--addr", "0.0.0.0:8444", + "--insecure-direct", + }) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.Addr != "0.0.0.0:8444" { + t.Errorf("cfg.Addr = %q, want 0.0.0.0:8444", cfg.Addr) + } +} + +// TestLoad_ClientModeNoBearerSkipsGuard documents the narrower scope: +// without a bearer file, there's no credential to launder, so a non- +// loopback bind is the operator's call (the deployment still gets the +// X-ZDDC-Cache: hit/miss metadata-disclosure surface, but no credential +// forwarding). Refusing in this case would needlessly block proxy-mode +// deployments that don't authenticate to upstream at all. +func TestLoad_ClientModeNoBearerSkipsGuard(t *testing.T) { + root := t.TempDir() + cfg, err := Load([]string{ + "--root", root, + "--upstream", "https://master.example.com", + "--addr", "0.0.0.0:8444", + }) + if err != nil { + t.Fatalf("Load (client, non-loopback, no bearer): %v", err) + } + if cfg.Addr != "0.0.0.0:8444" { + t.Errorf("Addr = %q", cfg.Addr) + } +}