From b20e98b6aae3cedb05e111688f6902cc2c97559d Mon Sep 17 00:00:00 2001 From: ZDDC Date: Mon, 4 May 2026 17:57:28 -0500 Subject: [PATCH] fix(apps): cache key now includes scheme + full host:port (no collisions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous keyForURL stripped default ports (:443 for https, :80 for http) and omitted the scheme, so: http://example.com/x.html ──┐ https://example.com/x.html ──┴──→ same cache entry (collision) https://example.com/x.html ──┐ https://example.com:443/x.html ──┴──→ same cache entry This was a defensible HTTP convention but a real correctness issue on reverse-proxy stacks where http and https legitimately serve different bytes for the same path, or where two upstreams share a host but answer on different default ports. New layout: /[:]/. Full origin tuple in the key, no port stripping, scheme segregation. Examples: https/zddc.varasys.io/releases/archive_stable.html https/example.com:8443/x.html http/example.com/y.html (distinct from https/example.com/y.html) Operators retain the "ls _app/ to inspect what's cached" affordance they relied on; they just see one extra directory layer (scheme first, then host). Tests: * Updated TestKeyForURL to assert the new layout for every previously-covered case * New TestKeyForURL_NoCollisions explicitly asserts that the dimensions previously collapsed (default-port↔bare, http↔https, different non-default ports) now produce distinct keys Doc references to the cache layout under /_app/ updated in zddc/README.md (3 mentions). NOTE: existing _app/ caches under the old layout will be ignored on first request after upgrade — entries will be re-fetched and written to the new path. Operators can `rm -rf /_app` during the upgrade window if they prefer not to have orphans. Co-Authored-By: Claude Opus 4.7 (1M context) --- zddc/README.md | 6 ++-- zddc/internal/apps/cache.go | 38 +++++++++++++++----- zddc/internal/apps/cache_test.go | 60 +++++++++++++++++++++++++++----- 3 files changed, 85 insertions(+), 19 deletions(-) diff --git a/zddc/README.md b/zddc/README.md index 33ec144..9327111 100644 --- a/zddc/README.md +++ b/zddc/README.md @@ -344,7 +344,7 @@ naive intuition suggests. and serve arbitrary HTML to every viewer below that level. Subtree write authority on `.zddc` should be treated as full UI-mounting authority. The `_app/` cache is fetch-once-and-keep — operators clear it by deleting - `/_app//`. (See "Apps: virtual tool HTMLs" below for + `/_app//[:]/`. (See "Apps: virtual tool HTMLs" below for the resolver order; SHA-256 pinning is on the federal-readiness list, not currently implemented.) @@ -410,7 +410,7 @@ guarantee these for the model above to hold: SIEM) via a sidecar; do not treat the local rotation as the system of record. 4. **`apps:` URL fetches have no integrity check.** Fetched once on first - miss, cached at `/_app//` forever — no SHA-256 pin, + miss, cached at `/_app//[:]/` forever — no SHA-256 pin, no signature. Use only URLs you control, treat the apps cache as a trust boundary, and audit who has `.zddc` write authority where. @@ -893,7 +893,7 @@ For any path, the resolution order is: 3. **Embedded** — the build-time HTML compiled into the binary. URL sources are fetched once on first request and cached forever in -`/_app//`. There is no background refresh and no +`/_app//[:]/`. There is no background refresh and no hash verification — to pull a new build, delete the cache file. Concurrent misses for the same URL share one outbound fetch (singleflight). Direct URL access to `/_app/...` is blocked at dispatch; cached HTMLs are served diff --git a/zddc/internal/apps/cache.go b/zddc/internal/apps/cache.go index ccb6bb6..50a4ccb 100644 --- a/zddc/internal/apps/cache.go +++ b/zddc/internal/apps/cache.go @@ -37,7 +37,27 @@ func NewCache(root string) (*Cache, error) { func (c *Cache) Root() string { return c.root } // keyForURL converts a URL into a relative filesystem path under the -// cache root, e.g. "zddc.varasys.io/releases/archive_stable.html". +// cache root. +// +// Layout: /[:]/. The full origin tuple is in +// the key so two URLs that resolve different content cannot collide: +// +// https://example.com/x.html → https/example.com/x.html +// http://example.com/x.html → http/example.com/x.html +// https://example.com:8443/x.html → https/example.com:8443/x.html +// +// No port stripping. The previous behavior — collapsing :443 onto bare +// host for https (and :80 for http) — was a defensible HTTP convention +// but conflated "the operator wrote a URL with the default port" with +// "the operator wrote a bare-host URL". With explicit port preserved, +// every URL maps to exactly one filesystem path; operators can still +// `ls _app/https/example.com/` to inspect what's cached. Scheme +// segregation prevents an http:// hit from masquerading as an https:// +// hit when both are deliberately distinct (rare, but real on +// reverse-proxied stacks where http and https serve different bytes). +// +// Host is lowercased so the canonical-host normalization survives +// case-insensitive DNS. Port is preserved verbatim. func keyForURL(rawURL string) (string, error) { u, err := url.Parse(rawURL) if err != nil { @@ -52,13 +72,15 @@ func keyForURL(rawURL string) (string, error) { if u.RawQuery != "" { return "", fmt.Errorf("URL must not contain query string: %s", rawURL) } - host := strings.ToLower(u.Host) + // Lowercase the host part but preserve the port verbatim. Without + // this we'd lowercase a numeric port unnecessarily, which is fine + // but pointless; with this the ASCII-cased host normalization + // works the same for both default and explicit-port URLs. + host := u.Host if i := strings.Index(host, ":"); i >= 0 { - port := host[i+1:] - hostOnly := host[:i] - if (u.Scheme == "http" && port == "80") || (u.Scheme == "https" && port == "443") { - host = hostOnly - } + host = strings.ToLower(host[:i]) + host[i:] + } else { + host = strings.ToLower(host) } p := u.Path for strings.Contains(p, "//") { @@ -72,7 +94,7 @@ func keyForURL(rawURL string) (string, error) { if strings.Contains(cleaned, "..") { return "", fmt.Errorf("URL path contains '..'") } - return host + cleaned, nil + return u.Scheme + "/" + host + cleaned, nil } func (c *Cache) pathFor(rawURL string) (string, error) { diff --git a/zddc/internal/apps/cache_test.go b/zddc/internal/apps/cache_test.go index 1a1eb52..67b5e09 100644 --- a/zddc/internal/apps/cache_test.go +++ b/zddc/internal/apps/cache_test.go @@ -11,14 +11,26 @@ func TestKeyForURL(t *testing.T) { cases := []struct { raw, want string }{ - {"https://zddc.varasys.io/releases/archive_stable.html", "zddc.varasys.io/releases/archive_stable.html"}, - {"https://ZDDC.Varasys.IO/releases/archive_stable.html", "zddc.varasys.io/releases/archive_stable.html"}, - {"http://example.com:80/foo.html", "example.com/foo.html"}, - {"https://example.com:443/foo.html", "example.com/foo.html"}, - {"https://example.com:8443/foo.html", "example.com:8443/foo.html"}, - {"https://example.com/", "example.com/index.html"}, - {"https://example.com", "example.com/index.html"}, - {"https://example.com//foo//bar.html", "example.com/foo/bar.html"}, + // Default ports are PRESERVED — no port-stripping (the previous + // behavior conflated "operator wrote :443" with "operator wrote + // bare host"; with the full origin in the key, every URL maps + // to exactly one path). + {"https://zddc.varasys.io/releases/archive_stable.html", "https/zddc.varasys.io/releases/archive_stable.html"}, + {"https://ZDDC.Varasys.IO/releases/archive_stable.html", "https/zddc.varasys.io/releases/archive_stable.html"}, + {"http://example.com/foo.html", "http/example.com/foo.html"}, + {"http://example.com:80/foo.html", "http/example.com:80/foo.html"}, + {"https://example.com/foo.html", "https/example.com/foo.html"}, + {"https://example.com:443/foo.html", "https/example.com:443/foo.html"}, + {"https://example.com:8443/foo.html", "https/example.com:8443/foo.html"}, + // Scheme segregation: same host+path under http and https map + // to different cache entries (defensive against reverse-proxy + // stacks that legitimately serve different bytes per scheme). + {"http://example.com/x.html", "http/example.com/x.html"}, + {"https://example.com/x.html", "https/example.com/x.html"}, + // Path normalization preserved. + {"https://example.com/", "https/example.com/index.html"}, + {"https://example.com", "https/example.com/index.html"}, + {"https://example.com//foo//bar.html", "https/example.com/foo/bar.html"}, } for _, tc := range cases { t.Run(tc.raw, func(t *testing.T) { @@ -33,6 +45,38 @@ func TestKeyForURL(t *testing.T) { } } +// TestKeyForURL_NoCollisions: explicit assertion that the dimensions +// previously collapsed (default-port ↔ bare-host, http ↔ https) are +// now distinct. Any future change that re-introduces collapsing will +// fail this test. +func TestKeyForURL_NoCollisions(t *testing.T) { + pairs := [][2]string{ + // Different scheme, same host+path + {"http://example.com/x.html", "https://example.com/x.html"}, + // https default port preserved (not collapsed onto bare host) + {"https://example.com/x.html", "https://example.com:443/x.html"}, + // http default port preserved + {"http://example.com/x.html", "http://example.com:80/x.html"}, + // Different non-default ports + {"https://example.com:8443/x.html", "https://example.com:9443/x.html"}, + } + for _, p := range pairs { + t.Run(p[0]+" vs "+p[1], func(t *testing.T) { + a, err := keyForURL(p[0]) + if err != nil { + t.Fatalf("keyForURL(%q): %v", p[0], err) + } + b, err := keyForURL(p[1]) + if err != nil { + t.Fatalf("keyForURL(%q): %v", p[1], err) + } + if a == b { + t.Errorf("collision: %q and %q both → %q", p[0], p[1], a) + } + }) + } +} + func TestKeyForURL_Errors(t *testing.T) { cases := []string{ "",