ZDDC/zddc/internal/apps/cache.go
ZDDC 8b6a2dc3e3 feat(zddc-server): apps fetch+cache subsystem with cascade overrides
Adds internal/apps/ package serving the five tool HTMLs at virtual paths
based on the surrounding folder name convention:

  archive      every directory (multi-project, project, archive, vendor)
  classifier   any Incoming/Working/Staging directory and subtree
  mdedit       any Working directory and subtree
  transmittal  any Staging directory and subtree
  landing      only at deployment root

The current-stable build of every tool is //go:embed'd into the binary
at compile time — that's the default with zero config. Operators
override per-directory via .zddc apps: entries; closer-to-leaf wins.

Spec syntax (in any apps: value):

  stable / beta / alpha / :stable          channel
  v0.0.4 / v0.0 / v0 / :v0.0.4              version
  https://my-mirror/releases                URL prefix only
  https://my-mirror/releases:beta           URL prefix + channel
  https://my-fork/archive.html              terminal full URL
  ./local.html / /abs/path.html             terminal local path

The special apps.default key provides a baseline URL prefix and channel
inherited by any app not overridden per-name. Per-axis cascade: a deeper
.zddc can override the URL, the channel, or both.

Cascade walks root→leaf; default applies first at each level, then the
per-app entry. Terminal sources (paths and full .html URLs) short-circuit
composition; deeper non-terminal entries override parent terminals.

URL sources fetch once on first request and cache forever in
<ZDDC_ROOT>/_app/<host>/<path> — different upstreams with the same
filename stay distinct. No background refresh, no SHA-256 verification:
operators delete the cache file to force a refetch. Concurrent misses
for the same source dedupe via a 30-line hand-rolled singleflight.

Per-request override: any user can append ?v=<spec> to a tool URL
(e.g. ?v=beta, ?v=v0.0.4, ?v=:alpha, ?v=https://mirror/releases:beta)
to ask for a different build for one request. Security: ?v= serves
ONLY versions already in the cache (cache miss returns 404; path
sources are rejected outright with 400). Users cannot trigger
arbitrary upstream fetches via crafted URLs.

Failed URL fetches (network down, 5xx) fall back to embedded with a
one-time WARN log. The X-ZDDC-Source response header reports what
served: fetch:URL / cache:URL / path:/abs / embedded:<app>@<build>.

Wire-in (cmd/zddc-server/main.go): dispatch routes <dir>/<app>.html
through apps.MatchAppHTML + AppAvailableAt + apps.Server.Serve when
no real file exists. Direct URL access to /_app/... is blocked at
the dispatch layer — cached files must go through the apps resolver
so they get correct Content-Type and ACL gating.

Schema (internal/zddc/file.go): ZddcFile gains Apps map[string]string
for cascade overrides. Validator (internal/zddc/validate.go) accepts
the special "default" key alongside the five canonical app names and
all spec forms.

Removes ZDDC_APPS_* env vars (no admin UI, no refresh interval, no
upstream allow-list — the simpler model has fewer knobs).

40+ unit tests across the new package: parser shapes, cascade
resolution with default+per-app interactions, terminal short-circuit
semantics, ?v= cache-only enforcement, embedded fallback, atomic
cache writes, singleflight dedup. Plus end-to-end dispatch tests in
cmd/zddc-server/main_test.go.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 15:25:25 -05:00

165 lines
3.9 KiB
Go

package apps
import (
"fmt"
"io/fs"
"net/url"
"os"
"path/filepath"
"strings"
)
// Cache stores fetched URL responses on disk under <ZDDC_ROOT>/_app/.
// Files are name-keyed by upstream host + path so operators can list
// and inspect them by hand. There is no metadata, no SHA-256, no
// expiration — fetch-once-and-keep-forever. To force a refetch,
// delete the cache file.
type Cache struct {
root string
}
// NewCache creates a Cache rooted at the given path. The directory is
// created if missing. Stale *.tmp files left over from interrupted
// writes are swept on construction.
func NewCache(root string) (*Cache, error) {
root = filepath.Clean(root)
if err := os.MkdirAll(root, 0o755); err != nil {
return nil, fmt.Errorf("create cache root: %w", err)
}
c := &Cache{root: root}
if err := c.sweepTemps(); err != nil {
return nil, fmt.Errorf("sweep temps: %w", err)
}
return c, nil
}
// Root returns the cache directory absolute path.
func (c *Cache) Root() string { return c.root }
// keyForURL converts a URL into a relative filesystem path under the
// cache root, e.g. "zddc.varasys.io/releases/archive_stable.html".
func keyForURL(rawURL string) (string, error) {
u, err := url.Parse(rawURL)
if err != nil {
return "", fmt.Errorf("parse URL: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return "", fmt.Errorf("unsupported scheme %q", u.Scheme)
}
if u.Host == "" {
return "", fmt.Errorf("URL is missing host")
}
if u.RawQuery != "" {
return "", fmt.Errorf("URL must not contain query string: %s", rawURL)
}
host := strings.ToLower(u.Host)
if i := strings.Index(host, ":"); i >= 0 {
port := host[i+1:]
hostOnly := host[:i]
if (u.Scheme == "http" && port == "80") || (u.Scheme == "https" && port == "443") {
host = hostOnly
}
}
p := u.Path
for strings.Contains(p, "//") {
p = strings.ReplaceAll(p, "//", "/")
}
p = strings.TrimPrefix(p, "/")
if p == "" {
p = "index.html"
}
cleaned := filepath.Clean("/" + p)
if strings.Contains(cleaned, "..") {
return "", fmt.Errorf("URL path contains '..'")
}
return host + cleaned, nil
}
func (c *Cache) pathFor(rawURL string) (string, error) {
key, err := keyForURL(rawURL)
if err != nil {
return "", err
}
return filepath.Join(c.root, filepath.FromSlash(key)), nil
}
// Has reports whether a cache entry exists for the URL.
func (c *Cache) Has(rawURL string) bool {
p, err := c.pathFor(rawURL)
if err != nil {
return false
}
_, err = os.Stat(p)
return err == nil
}
// Read returns the cached body or os.ErrNotExist.
func (c *Cache) Read(rawURL string) ([]byte, error) {
p, err := c.pathFor(rawURL)
if err != nil {
return nil, err
}
return os.ReadFile(p)
}
// Write atomically stores body for the URL. Parent directories are
// created as needed. Writes via tmp+rename so partial files are never
// observable.
func (c *Cache) Write(rawURL string, body []byte) error {
p, err := c.pathFor(rawURL)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
return err
}
return writeAtomic(p, body)
}
func writeAtomic(path string, data []byte) error {
dir := filepath.Dir(path)
tmp, err := os.CreateTemp(dir, filepath.Base(path)+".tmp.*")
if err != nil {
return err
}
tmpName := tmp.Name()
cleanup := func() { _ = os.Remove(tmpName) }
if _, err := tmp.Write(data); err != nil {
_ = tmp.Close()
cleanup()
return err
}
if err := tmp.Sync(); err != nil {
_ = tmp.Close()
cleanup()
return err
}
if err := tmp.Close(); err != nil {
cleanup()
return err
}
if err := os.Rename(tmpName, path); err != nil {
cleanup()
return err
}
return nil
}
func (c *Cache) sweepTemps() error {
err := filepath.WalkDir(c.root, func(p string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
if strings.Contains(d.Name(), ".tmp.") {
_ = os.Remove(p)
}
return nil
})
if err != nil && !os.IsNotExist(err) {
return err
}
return nil
}