diff --git a/AGENTS.md b/AGENTS.md index 371d47c..381d77e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -459,6 +459,14 @@ ZDDC_ROOT=/path/to/your/archive ZDDC_TLS_CERT=none ZDDC_ADDR=:8080 \ | `ZDDC_APPS_PUBKEY` | *(empty)* | Path to PEM Ed25519 pubkey for verifying signatures on URL-fetched `apps:` artifacts. Empty = URL apps refused. Download from `zddc.varasys.io/pubkey.pem` (canonical channels) or supply your own. No baked-in default — same posture as TLS certs. Alternative inline form: `apps_pubkey:` in root `.zddc` (root-only, env/flag wins). | | `ZDDC_ACCESS_LOG` | `/.zddc.d/logs/access-.log` | JSON-line audit log (lumberjack-rotated, 100 MB / 10 backups / 90 days, gzipped). Server auto-mkdirs the parent. Set explicitly to empty (`--access-log=`) to disable. Per-host filename + `host` field in every record so multi-replica deployments writing to the same `.zddc.d/` dir disambiguate cleanly. | +### URL handling + +**URLs are case-insensitive.** The dispatcher canonicalizes `r.URL.Path` against on-disk casing before any handler runs (`zddc/internal/fs/resolve.go ResolveCanonical`). Per segment: lowercase variant wins if it exists on disk; otherwise exact-case wins; otherwise readdir+CI scan with the lowercase variant winning the tiebreak when multiple case variants are siblings on disk. Walk stops at the first segment that doesn't exist so virtual prefixes (`.archive`, `.profile`, `.tokens`, `.api`, `.auth`) and 404 paths flow through with their tail preserved verbatim. + +**File and folder names preserve case on disk.** The canonicalization is purely a URL→filesystem-name mapping; nothing renames anything. Lowercase is the *project-wide canonical* convention, and auto-created folders in `internal/zddc/ensure.go` (`working/`, `staging/`, `archive//incoming/`) and the server's own state dirs (`_app/`, `.zddc.d/tokens/`, `.zddc.d/outbox/`, `.zddc.d/logs/`) are all lowercase by string literal. Operators can drop a `Mixed-Case-Folder/` and it stays mixed-case. + +**Audit log captures the as-typed path.** `AccessLogMiddleware` snapshots `r.URL.Path` before dispatch rewrites it; the audit record's `path` field is what the client sent. When canonicalization changed it, a `resolved_path` field is added. + ### Client mode (proxy / cache / mirror) When `--upstream ` is set, the binary runs as a **downstream client** of another zddc-server instead of a master. `cmd/zddc-server/main.go` short-circuits to `runClient(cfg)`, which builds a `*cache.Cache` from `zddc/internal/cache/` and uses it as the entire request handler — no archive index, no apps server, no watcher, no OPA decider, no ACL middleware, no token store. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index ef2b539..c03ee55 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -464,6 +464,7 @@ none of them is load-bearing alone. | ACL cascade | The default decider's rule set | Per-directory `.zddc` YAML with verb-set permissions (`r`/`w`/`c`/`d`/`a`) and roles, walked deepest-first first-match-wins under `--cascade-mode=delegated` or with absolute ancestor denies under `--cascade-mode=strict` (`zddc/internal/zddc/acl.go`, `cascade.go`). External OPA can replace this rule set with arbitrary Rego while keeping the same `.zddc` files as input data | | Special folders | Codify the bilateral exchange-record archetype | `Incoming`/`Working`/`Staging` get auto-ownership on mkdir (creator gets `rwcda` via an auto-written `.zddc`); `Issued`/`Received` enforce a server-side WORM split (ancestor grants masked to `r`; only an explicit `.zddc` at-or-below the WORM folder can grant `c` for a write-once drop-box). Admins exempt. `zddc/internal/zddc/special.go` | | Tool-rooted view | Make the caller's accessible subtree feel like their entire world (UX containment) | Archive auto-served at every directory; the URL it's served at *is* its root. No breadcrumb leads above | +| URL canonicalization | Resolve URL paths to on-disk casing before any layer below sees them | `zddc/internal/fs/resolve.go ResolveCanonical` — case-insensitive lookup with lowercase-wins tiebreak when sibling case variants exist on disk. File and folder names preserve case on disk; the canonicalization is purely URL→FS-name mapping. Virtual prefixes (`.archive`, `.profile`, `.tokens`) flow through verbatim | | Reserved hidden prefixes | Hide operator side-state (caches, dev-shell home dirs) from listings and direct fetch | `.`-prefixed → 404 + listing-filtered; `_`-prefixed → listing-filtered only | | Audit log | Reconstruct who did what after the fact | JSON-line tee per request to `/.zddc.d/logs/access-.log`; writes also emit `file_write` op records | | File API | Authenticated CRUD over the served tree | `zddc/internal/handler/fileapi.go` — PUT/DELETE/POST routed through the same ACL chain as GET, with per-method verbs (`r`/`w`/`c`/`d`/`a`). Mkdir under `Incoming`/`Working`/`Staging` writes a creator-owned `.zddc` automatically | diff --git a/zddc/cmd/zddc-server/main.go b/zddc/cmd/zddc-server/main.go index 2785030..4891997 100644 --- a/zddc/cmd/zddc-server/main.go +++ b/zddc/cmd/zddc-server/main.go @@ -19,6 +19,7 @@ import ( "codeberg.org/VARASYS/ZDDC/zddc/internal/auth" "codeberg.org/VARASYS/ZDDC/zddc/internal/cache" "codeberg.org/VARASYS/ZDDC/zddc/internal/config" + appfs "codeberg.org/VARASYS/ZDDC/zddc/internal/fs" "codeberg.org/VARASYS/ZDDC/zddc/internal/handler" "codeberg.org/VARASYS/ZDDC/zddc/internal/policy" "codeberg.org/VARASYS/ZDDC/zddc/internal/tlsutil" @@ -565,6 +566,28 @@ func embeddedVersionsForLog(embedded map[string]string) string { // dispatch routes a request to the appropriate handler. func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, appsSrv *apps.Server, tokens *auth.Store, w http.ResponseWriter, r *http.Request) { + // URL paths are case-insensitive: resolve each segment against the + // on-disk casing under cfg.Root, preferring the all-lowercase + // variant when multiple case variants exist (lowercase is the + // project-wide canonical case for folders). The walk stops at the + // first segment that doesn't exist on disk so virtual prefixes + // (.profile, .archive, .tokens) and 404 paths flow through with + // their tail preserved verbatim. Downstream handlers see the + // canonical r.URL.Path; the access log captures the as-typed path + // before this rewrite. + if absPath, canonical, ok := appfs.ResolveCanonical(cfg.Root, r.URL.Path); ok { + _ = absPath + // Restore trailing slash so directory routing (which redirects + // no-trailing-slash requests) keeps its existing semantics. + if strings.HasSuffix(r.URL.Path, "/") && !strings.HasSuffix(canonical, "/") && canonical != "/" { + canonical += "/" + } + if canonical != r.URL.Path { + r.URL.Path = canonical + r.URL.RawPath = "" + } + } + urlPath := r.URL.Path email := handler.EmailFromContext(r) diff --git a/zddc/cmd/zddc-server/main_test.go b/zddc/cmd/zddc-server/main_test.go index ea4337c..b47cb0f 100644 --- a/zddc/cmd/zddc-server/main_test.go +++ b/zddc/cmd/zddc-server/main_test.go @@ -510,6 +510,58 @@ func TestDispatchArchiveMethodGate(t *testing.T) { } } +// TestDispatchCaseInsensitiveURL: mixed-case URLs resolve to the on-disk +// canonical case, with the lowercase variant winning when both case +// variants exist as siblings on disk. +func TestDispatchCaseInsensitiveURL(t *testing.T) { + root := t.TempDir() + mustWrite(t, filepath.Join(root, ".zddc"), + "acl:\n allow:\n - \"*\"\n") + mustMkdir(t, filepath.Join(root, "project-a", "working")) + mustWrite(t, filepath.Join(root, "project-a", "working", "note.md"), "lowercase note") + + // Sibling Mixed-Case dir present too. Lowercase must win on the + // case-insensitive resolution; the Mixed-Case dir's contents must + // not bleed through under any URL casing. + mustMkdir(t, filepath.Join(root, "Project-A", "Working")) + mustWrite(t, filepath.Join(root, "Project-A", "Working", "note.md"), "MIXEDCASE note") + + idx, err := archive.BuildIndex(root) + if err != nil { + t.Fatalf("BuildIndex: %v", err) + } + cfg := config.Config{ + Root: root, + IndexPath: ".archive", + EmailHeader: "X-Auth-Request-Email", + } + ring := handler.NewLogRing(10) + + cases := []struct { + name string + url string + }{ + {"all lowercase", "/project-a/working/note.md"}, + {"mixed case top", "/Project-A/working/note.md"}, + {"mixed case nested", "/PROJECT-A/Working/Note.md"}, + {"all uppercase", "/PROJECT-A/WORKING/NOTE.MD"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, tc.url, nil) + rec := httptest.NewRecorder() + dispatch(cfg, idx, ring, nil, nil, rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%q", rec.Code, rec.Body.String()) + } + if got := rec.Body.String(); got != "lowercase note" { + t.Errorf("body=%q want %q (lowercase variant must win)", + got, "lowercase note") + } + }) + } +} + func mustMkdir(t *testing.T, path string) { t.Helper() if err := os.MkdirAll(path, 0o755); err != nil { diff --git a/zddc/internal/fs/resolve.go b/zddc/internal/fs/resolve.go new file mode 100644 index 0000000..3805223 --- /dev/null +++ b/zddc/internal/fs/resolve.go @@ -0,0 +1,114 @@ +package fs + +import ( + "os" + "path/filepath" + "sort" + "strings" +) + +// ResolveCanonical translates a URL path to its canonical on-disk form +// under fsRoot, treating segment matches as case-insensitive. When a +// directory contains multiple case variants of the same name, the +// all-lowercase variant wins; lowercase is the project-wide canonical +// case for folders. +// +// The walk stops at the first segment that does not exist on disk: +// the remainder of the URL path is appended verbatim. This lets virtual +// prefixes (.profile, .archive, .tokens) and 404 paths flow through +// unchanged. +// +// Returns the resolved absolute path and the canonical URL form +// (leading "/", "/"-separated, no trailing "/"). ok is false only when +// the input would escape fsRoot. +// +// The fast path is one Lstat for the lowercase form of the segment; +// readdir+scan only runs when neither lowercase nor exact-case exists. +func ResolveCanonical(fsRoot, urlPath string) (absPath, canonicalURL string, ok bool) { + rel := strings.TrimPrefix(urlPath, "/") + rel = strings.TrimSuffix(rel, "/") + if rel == "" { + return fsRoot, "/", true + } + + cur := fsRoot + canonParts := make([]string, 0, 8) + segments := strings.Split(rel, "/") + stopped := false + for _, seg := range segments { + if seg == "" || seg == "." || seg == ".." { + // Defensive — Clean strips these before this is called in + // practice, but if a caller passes a raw URL preserve it + // verbatim and let downstream safety checks reject it. + canonParts = append(canonParts, seg) + stopped = true + continue + } + if stopped { + canonParts = append(canonParts, seg) + continue + } + resolved, found := resolveSegment(cur, seg) + canonParts = append(canonParts, resolved) + if !found { + // Tail passes through verbatim — virtual prefixes + // (.archive, .tokens, .profile) and 404 paths land here. + stopped = true + continue + } + cur = filepath.Join(cur, resolved) + } + + canonicalURL = "/" + strings.Join(canonParts, "/") + abs := filepath.Join(fsRoot, filepath.FromSlash(strings.Join(canonParts, "/"))) + abs = filepath.Clean(abs) + if abs != fsRoot && !strings.HasPrefix(abs, fsRoot+string(filepath.Separator)) { + return "", "", false + } + return abs, canonicalURL, true +} + +// resolveSegment finds the on-disk name for a URL segment under parent. +// Order: (1) lowercase variant (canonical), (2) exact-case as given, +// (3) readdir scan picking the lowercase tiebreaker. Returns +// (segment-as-given, false) if nothing matches. +func resolveSegment(parent, segment string) (string, bool) { + lower := strings.ToLower(segment) + + if _, err := os.Lstat(filepath.Join(parent, lower)); err == nil { + return lower, true + } + if segment != lower { + if _, err := os.Lstat(filepath.Join(parent, segment)); err == nil { + return segment, true + } + } + + entries, err := os.ReadDir(parent) + if err != nil { + return segment, false + } + var matches []string + for _, e := range entries { + if strings.EqualFold(e.Name(), segment) { + matches = append(matches, e.Name()) + } + } + switch len(matches) { + case 0: + return segment, false + case 1: + return matches[0], true + default: + // Multiple case variants exist on disk. Prefer the all-lowercase + // variant; otherwise pick the first in sort order so the choice + // is deterministic across requests. + for _, m := range matches { + if m == strings.ToLower(m) { + return m, true + } + } + sort.Strings(matches) + return matches[0], true + } +} diff --git a/zddc/internal/fs/resolve_test.go b/zddc/internal/fs/resolve_test.go new file mode 100644 index 0000000..57dce7e --- /dev/null +++ b/zddc/internal/fs/resolve_test.go @@ -0,0 +1,156 @@ +package fs + +import ( + "os" + "path/filepath" + "runtime" + "testing" +) + +func mkdir(t *testing.T, parts ...string) { + t.Helper() + if err := os.MkdirAll(filepath.Join(parts...), 0o755); err != nil { + t.Fatal(err) + } +} + +func TestResolveCanonical_RootAndEmpty(t *testing.T) { + root := t.TempDir() + for _, in := range []string{"/", "", "//"} { + abs, url, ok := ResolveCanonical(root, in) + if !ok { + t.Fatalf("%q: ok=false", in) + } + if abs != root || url != "/" { + t.Fatalf("%q: abs=%q url=%q", in, abs, url) + } + } +} + +func TestResolveCanonical_ExactCase(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive", "incoming") + abs, url, ok := ResolveCanonical(root, "/archive/incoming") + if !ok || url != "/archive/incoming" { + t.Fatalf("ok=%v url=%q", ok, url) + } + if abs != filepath.Join(root, "archive", "incoming") { + t.Fatalf("abs=%q", abs) + } +} + +func TestResolveCanonical_MixedCaseURLLowercaseOnDisk(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive", "incoming") + abs, url, ok := ResolveCanonical(root, "/Archive/Incoming") + if !ok || url != "/archive/incoming" { + t.Fatalf("ok=%v url=%q", ok, url) + } + if abs != filepath.Join(root, "archive", "incoming") { + t.Fatalf("abs=%q", abs) + } +} + +func TestResolveCanonical_OnlyMixedCaseExists(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "Archive", "Incoming") + abs, url, ok := ResolveCanonical(root, "/archive/incoming") + if !ok || url != "/Archive/Incoming" { + t.Fatalf("ok=%v url=%q", ok, url) + } + if abs != filepath.Join(root, "Archive", "Incoming") { + t.Fatalf("abs=%q", abs) + } +} + +func TestResolveCanonical_BothCasesExistLowercaseWins(t *testing.T) { + if runtime.GOOS == "darwin" || runtime.GOOS == "windows" { + t.Skip("filesystem may be case-insensitive; tiebreak only meaningful on case-sensitive FS") + } + root := t.TempDir() + mkdir(t, root, "Archive") + mkdir(t, root, "archive") + if err := os.WriteFile(filepath.Join(root, "Archive", "marker"), []byte("upper"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(root, "archive", "marker"), []byte("lower"), 0o644); err != nil { + t.Fatal(err) + } + + for _, in := range []string{"/Archive/marker", "/archive/marker", "/aRcHiVe/marker"} { + abs, url, ok := ResolveCanonical(root, in) + if !ok { + t.Fatalf("%q: ok=false", in) + } + if url != "/archive/marker" { + t.Fatalf("%q: url=%q (want /archive/marker)", in, url) + } + body, err := os.ReadFile(abs) + if err != nil { + t.Fatalf("%q: read %s: %v", in, abs, err) + } + if string(body) != "lower" { + t.Fatalf("%q: body=%q (want \"lower\" — lowercase variant must win)", in, body) + } + } +} + +func TestResolveCanonical_NonexistentSegmentPreservesRemainder(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive") + abs, url, ok := ResolveCanonical(root, "/Archive/.archive/TR-001.html") + if !ok { + t.Fatal("ok=false") + } + // Walk canonicalizes "Archive" to "archive"; the virtual ".archive" + // segment doesn't exist on disk, so the remainder passes through + // unchanged so the dispatcher's virtual-prefix routing still fires. + if url != "/archive/.archive/TR-001.html" { + t.Fatalf("url=%q", url) + } + if abs != filepath.Join(root, "archive", ".archive", "TR-001.html") { + t.Fatalf("abs=%q", abs) + } +} + +func TestResolveCanonical_FileSegmentTerminatesWalk(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive") + if err := os.WriteFile(filepath.Join(root, "archive", "Doc.PDF"), []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + abs, url, ok := ResolveCanonical(root, "/Archive/doc.pdf") + if !ok { + t.Fatal("ok=false") + } + // On Linux Doc.PDF exists but doc.pdf does not — exact-case tier + // finds Doc.PDF and uses it. + if url != "/archive/Doc.PDF" { + t.Fatalf("url=%q", url) + } + _ = abs +} + +func TestResolveCanonical_RejectsEscape(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive") + // filepath.Clean reduces "/archive/../.." to "/.."; Resolve sees + // segments that don't exist on disk and walks them verbatim. The + // final containment check must reject the result. + _, _, ok := ResolveCanonical(root, "/archive/../../etc") + if ok { + t.Fatal("expected ok=false for escape path") + } +} + +func TestResolveCanonical_TrailingSlashesNormalized(t *testing.T) { + root := t.TempDir() + mkdir(t, root, "archive", "incoming") + _, url, ok := ResolveCanonical(root, "/Archive/Incoming/") + if !ok { + t.Fatal("ok=false") + } + if url != "/archive/incoming" { + t.Fatalf("url=%q", url) + } +} diff --git a/zddc/internal/handler/middleware.go b/zddc/internal/handler/middleware.go index ee3a0ea..aa8dc0f 100644 --- a/zddc/internal/handler/middleware.go +++ b/zddc/internal/handler/middleware.go @@ -186,6 +186,12 @@ func AccessLogMiddleware(auditLogger *slog.Logger, next http.Handler) http.Handl // Capture request start time start := time.Now() + // Snapshot the as-typed URL path before downstream handlers may + // rewrite it (case-insensitive canonicalization). The audit + // stream records what the client actually sent, not the + // resolved canonical form. + requestedPath := r.URL.Path + // Wrap the ResponseWriter wrapped := &responseWriter{ResponseWriter: w, status: 200} @@ -205,11 +211,14 @@ func AccessLogMiddleware(auditLogger *slog.Logger, next http.Handler) http.Handl "ts", start.Format(time.RFC3339), "email", email, "method", r.Method, - "path", r.URL.Path, + "path", requestedPath, "status", wrapped.status, "bytes", wrapped.bytes, "duration_ms", durationMs, } + if r.URL.Path != requestedPath { + args = append(args, "resolved_path", r.URL.Path) + } // Stderr stream (existing behavior). slog.Info("access", args...)