fix(apps): cache key now includes scheme + full host:port (no collisions)

The previous keyForURL stripped default ports (:443 for https, :80
for http) and omitted the scheme, so:

  http://example.com/x.html   ──┐
  https://example.com/x.html  ──┴──→ same cache entry (collision)

  https://example.com/x.html      ──┐
  https://example.com:443/x.html  ──┴──→ same cache entry

This was a defensible HTTP convention but a real correctness issue
on reverse-proxy stacks where http and https legitimately serve
different bytes for the same path, or where two upstreams share a
host but answer on different default ports.

New layout: <scheme>/<host>[:<port>]/<path>. Full origin tuple in
the key, no port stripping, scheme segregation. Examples:

  https/zddc.varasys.io/releases/archive_stable.html
  https/example.com:8443/x.html
  http/example.com/y.html      (distinct from https/example.com/y.html)

Operators retain the "ls _app/ to inspect what's cached" affordance
they relied on; they just see one extra directory layer (scheme
first, then host).

Tests:
  * Updated TestKeyForURL to assert the new layout for every
    previously-covered case
  * New TestKeyForURL_NoCollisions explicitly asserts that the
    dimensions previously collapsed (default-port↔bare, http↔https,
    different non-default ports) now produce distinct keys

Doc references to the cache layout under <ZDDC_ROOT>/_app/ updated
in zddc/README.md (3 mentions).

NOTE: existing _app/ caches under the old layout will be ignored
on first request after upgrade — entries will be re-fetched and
written to the new path. Operators can `rm -rf <ZDDC_ROOT>/_app`
during the upgrade window if they prefer not to have orphans.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-05-04 17:57:28 -05:00
parent 460d5fdada
commit b20e98b6aa
3 changed files with 85 additions and 19 deletions

View file

@ -344,7 +344,7 @@ naive intuition suggests.
and serve arbitrary HTML to every viewer below that level. Subtree write
authority on `.zddc` should be treated as full UI-mounting authority. The
`_app/` cache is fetch-once-and-keep — operators clear it by deleting
`<ZDDC_ROOT>/_app/<host>/<path>`. (See "Apps: virtual tool HTMLs" below for
`<ZDDC_ROOT>/_app/<scheme>/<host>[:<port>]/<path>`. (See "Apps: virtual tool HTMLs" below for
the resolver order; SHA-256 pinning is on the federal-readiness list, not
currently implemented.)
@ -410,7 +410,7 @@ guarantee these for the model above to hold:
SIEM) via a sidecar; do not treat the local rotation as the system of
record.
4. **`apps:` URL fetches have no integrity check.** Fetched once on first
miss, cached at `<ZDDC_ROOT>/_app/<host>/<path>` forever — no SHA-256 pin,
miss, cached at `<ZDDC_ROOT>/_app/<scheme>/<host>[:<port>]/<path>` forever — no SHA-256 pin,
no signature. Use only URLs you control, treat the apps cache as a trust
boundary, and audit who has `.zddc` write authority where.
@ -893,7 +893,7 @@ For any path, the resolution order is:
3. **Embedded** — the build-time HTML compiled into the binary.
URL sources are fetched once on first request and cached forever in
`<ZDDC_ROOT>/_app/<host>/<path>`. There is no background refresh and no
`<ZDDC_ROOT>/_app/<scheme>/<host>[:<port>]/<path>`. There is no background refresh and no
hash verification — to pull a new build, delete the cache file. Concurrent
misses for the same URL share one outbound fetch (singleflight). Direct
URL access to `/_app/...` is blocked at dispatch; cached HTMLs are served

View file

@ -37,7 +37,27 @@ func NewCache(root string) (*Cache, error) {
func (c *Cache) Root() string { return c.root }
// keyForURL converts a URL into a relative filesystem path under the
// cache root, e.g. "zddc.varasys.io/releases/archive_stable.html".
// cache root.
//
// Layout: <scheme>/<host>[:<port>]/<path>. The full origin tuple is in
// the key so two URLs that resolve different content cannot collide:
//
// https://example.com/x.html → https/example.com/x.html
// http://example.com/x.html → http/example.com/x.html
// https://example.com:8443/x.html → https/example.com:8443/x.html
//
// No port stripping. The previous behavior — collapsing :443 onto bare
// host for https (and :80 for http) — was a defensible HTTP convention
// but conflated "the operator wrote a URL with the default port" with
// "the operator wrote a bare-host URL". With explicit port preserved,
// every URL maps to exactly one filesystem path; operators can still
// `ls _app/https/example.com/` to inspect what's cached. Scheme
// segregation prevents an http:// hit from masquerading as an https://
// hit when both are deliberately distinct (rare, but real on
// reverse-proxied stacks where http and https serve different bytes).
//
// Host is lowercased so the canonical-host normalization survives
// case-insensitive DNS. Port is preserved verbatim.
func keyForURL(rawURL string) (string, error) {
u, err := url.Parse(rawURL)
if err != nil {
@ -52,13 +72,15 @@ func keyForURL(rawURL string) (string, error) {
if u.RawQuery != "" {
return "", fmt.Errorf("URL must not contain query string: %s", rawURL)
}
host := strings.ToLower(u.Host)
// Lowercase the host part but preserve the port verbatim. Without
// this we'd lowercase a numeric port unnecessarily, which is fine
// but pointless; with this the ASCII-cased host normalization
// works the same for both default and explicit-port URLs.
host := u.Host
if i := strings.Index(host, ":"); i >= 0 {
port := host[i+1:]
hostOnly := host[:i]
if (u.Scheme == "http" && port == "80") || (u.Scheme == "https" && port == "443") {
host = hostOnly
}
host = strings.ToLower(host[:i]) + host[i:]
} else {
host = strings.ToLower(host)
}
p := u.Path
for strings.Contains(p, "//") {
@ -72,7 +94,7 @@ func keyForURL(rawURL string) (string, error) {
if strings.Contains(cleaned, "..") {
return "", fmt.Errorf("URL path contains '..'")
}
return host + cleaned, nil
return u.Scheme + "/" + host + cleaned, nil
}
func (c *Cache) pathFor(rawURL string) (string, error) {

View file

@ -11,14 +11,26 @@ func TestKeyForURL(t *testing.T) {
cases := []struct {
raw, want string
}{
{"https://zddc.varasys.io/releases/archive_stable.html", "zddc.varasys.io/releases/archive_stable.html"},
{"https://ZDDC.Varasys.IO/releases/archive_stable.html", "zddc.varasys.io/releases/archive_stable.html"},
{"http://example.com:80/foo.html", "example.com/foo.html"},
{"https://example.com:443/foo.html", "example.com/foo.html"},
{"https://example.com:8443/foo.html", "example.com:8443/foo.html"},
{"https://example.com/", "example.com/index.html"},
{"https://example.com", "example.com/index.html"},
{"https://example.com//foo//bar.html", "example.com/foo/bar.html"},
// Default ports are PRESERVED — no port-stripping (the previous
// behavior conflated "operator wrote :443" with "operator wrote
// bare host"; with the full origin in the key, every URL maps
// to exactly one path).
{"https://zddc.varasys.io/releases/archive_stable.html", "https/zddc.varasys.io/releases/archive_stable.html"},
{"https://ZDDC.Varasys.IO/releases/archive_stable.html", "https/zddc.varasys.io/releases/archive_stable.html"},
{"http://example.com/foo.html", "http/example.com/foo.html"},
{"http://example.com:80/foo.html", "http/example.com:80/foo.html"},
{"https://example.com/foo.html", "https/example.com/foo.html"},
{"https://example.com:443/foo.html", "https/example.com:443/foo.html"},
{"https://example.com:8443/foo.html", "https/example.com:8443/foo.html"},
// Scheme segregation: same host+path under http and https map
// to different cache entries (defensive against reverse-proxy
// stacks that legitimately serve different bytes per scheme).
{"http://example.com/x.html", "http/example.com/x.html"},
{"https://example.com/x.html", "https/example.com/x.html"},
// Path normalization preserved.
{"https://example.com/", "https/example.com/index.html"},
{"https://example.com", "https/example.com/index.html"},
{"https://example.com//foo//bar.html", "https/example.com/foo/bar.html"},
}
for _, tc := range cases {
t.Run(tc.raw, func(t *testing.T) {
@ -33,6 +45,38 @@ func TestKeyForURL(t *testing.T) {
}
}
// TestKeyForURL_NoCollisions: explicit assertion that the dimensions
// previously collapsed (default-port ↔ bare-host, http ↔ https) are
// now distinct. Any future change that re-introduces collapsing will
// fail this test.
func TestKeyForURL_NoCollisions(t *testing.T) {
pairs := [][2]string{
// Different scheme, same host+path
{"http://example.com/x.html", "https://example.com/x.html"},
// https default port preserved (not collapsed onto bare host)
{"https://example.com/x.html", "https://example.com:443/x.html"},
// http default port preserved
{"http://example.com/x.html", "http://example.com:80/x.html"},
// Different non-default ports
{"https://example.com:8443/x.html", "https://example.com:9443/x.html"},
}
for _, p := range pairs {
t.Run(p[0]+" vs "+p[1], func(t *testing.T) {
a, err := keyForURL(p[0])
if err != nil {
t.Fatalf("keyForURL(%q): %v", p[0], err)
}
b, err := keyForURL(p[1])
if err != nil {
t.Fatalf("keyForURL(%q): %v", p[1], err)
}
if a == b {
t.Errorf("collision: %q and %q both → %q", p[0], p[1], a)
}
})
}
}
func TestKeyForURL_Errors(t *testing.T) {
cases := []string{
"",