URLs are now case-insensitive against the on-disk casing under ZDDC_ROOT, with a lowercase-wins tiebreak when sibling case variants exist. File and folder names preserve case on disk — the change is a pure URL→FS-name mapping; nothing renames anything. internal/fs/resolve.go ResolveCanonical walks segments left-to-right under fsRoot. Per segment: try lowercase first (canonical / cheap lstat fast-path), then exact-case, then readdir+CI scan with the all-lowercase variant winning the tiebreak. Walk stops at the first segment that doesn't exist on disk so virtual prefixes (.archive, .profile, .tokens, .auth) and 404 paths flow through with their tail preserved verbatim. Path-escape safety check on the resolved abs path matches the existing safeJoin pattern. Wired in at the top of cmd/zddc-server/main.go dispatch(), which rewrites r.URL.Path before any handler runs. Downstream handlers (plus their existing safeJoin calls and the cascade walker) pick up canonical case automatically — no per-handler changes. The ACL cascade benefits from this for free since EffectivePolicy is keyed by the now-canonical absolute path. internal/handler/middleware.go AccessLogMiddleware snapshots the as-typed URL path before the rewrite. The audit log's `path` field records what the client actually sent; a `resolved_path` field is added only when canonicalization changed it. Operators reading the log can see both the raw request and what was served. Lowercase as the project-wide canonical convention is already honoured by the auto-created folders in internal/zddc/ensure.go (working/, staging/, archive/<party>/incoming/) and the server's own state dirs (_app/, .zddc.d/tokens/, .zddc.d/outbox/, .zddc.d/logs/). Operators who drop a Mixed-Case-Folder/ on disk keep that casing — the resolver finds it via the readdir tier. Performance: the lowercase-first lstat is one syscall on the hot path. Only mismatches (mixed-case URL where on-disk is also mixed-case) pay the readdir+EqualFold scan, and Linux page-caches small-dir readdirs aggressively. Apache mod_speling uses the same "try then fallback" pattern. Tests: - internal/fs/resolve_test.go — 9 unit tests: exact-case, mixed-case-URL-with-lowercase-on-disk, mixed-case-URL-with- mixed-case-on-disk, both-cases-exist-lowercase-wins, nonexistent segment preserves remainder, file-segment terminates walk, escape rejection, trailing-slash normalization, root. - cmd/zddc-server/main_test.go TestDispatchCaseInsensitiveURL — end-to-end through the dispatcher with sibling Archive/ and archive/ on disk; all four URL casings of the same path serve the lowercase variant's content (proves the tiebreak fires through every layer). - Full Go suite green. Docs: AGENTS.md gains a "URL handling" subsection in the zddc-server section; ARCHITECTURE.md security-model table gains a "URL canonicalization" row. Out of scope (separate decisions, can revisit if needed): - ACL glob CI-matching. If .zddc rules use mixed-case URL globs, they won't match the canonical lowercase URL. Workable today by writing rules in lowercase. Touches a different package. - Redirect-to-canonical (303). Server serves under whichever case the client used; canonicalization is internal. Could 301 to canonical for SEO/bookmark hygiene as a follow-up. - Client-mode (proxy/cache). Only master mode is wired so far. Cache-handler CI lives in internal/cache/cache.go cachePathFor and is a separate code path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
114 lines
3.4 KiB
Go
114 lines
3.4 KiB
Go
package fs
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// ResolveCanonical translates a URL path to its canonical on-disk form
|
|
// under fsRoot, treating segment matches as case-insensitive. When a
|
|
// directory contains multiple case variants of the same name, the
|
|
// all-lowercase variant wins; lowercase is the project-wide canonical
|
|
// case for folders.
|
|
//
|
|
// The walk stops at the first segment that does not exist on disk:
|
|
// the remainder of the URL path is appended verbatim. This lets virtual
|
|
// prefixes (.profile, .archive, .tokens) and 404 paths flow through
|
|
// unchanged.
|
|
//
|
|
// Returns the resolved absolute path and the canonical URL form
|
|
// (leading "/", "/"-separated, no trailing "/"). ok is false only when
|
|
// the input would escape fsRoot.
|
|
//
|
|
// The fast path is one Lstat for the lowercase form of the segment;
|
|
// readdir+scan only runs when neither lowercase nor exact-case exists.
|
|
func ResolveCanonical(fsRoot, urlPath string) (absPath, canonicalURL string, ok bool) {
|
|
rel := strings.TrimPrefix(urlPath, "/")
|
|
rel = strings.TrimSuffix(rel, "/")
|
|
if rel == "" {
|
|
return fsRoot, "/", true
|
|
}
|
|
|
|
cur := fsRoot
|
|
canonParts := make([]string, 0, 8)
|
|
segments := strings.Split(rel, "/")
|
|
stopped := false
|
|
for _, seg := range segments {
|
|
if seg == "" || seg == "." || seg == ".." {
|
|
// Defensive — Clean strips these before this is called in
|
|
// practice, but if a caller passes a raw URL preserve it
|
|
// verbatim and let downstream safety checks reject it.
|
|
canonParts = append(canonParts, seg)
|
|
stopped = true
|
|
continue
|
|
}
|
|
if stopped {
|
|
canonParts = append(canonParts, seg)
|
|
continue
|
|
}
|
|
resolved, found := resolveSegment(cur, seg)
|
|
canonParts = append(canonParts, resolved)
|
|
if !found {
|
|
// Tail passes through verbatim — virtual prefixes
|
|
// (.archive, .tokens, .profile) and 404 paths land here.
|
|
stopped = true
|
|
continue
|
|
}
|
|
cur = filepath.Join(cur, resolved)
|
|
}
|
|
|
|
canonicalURL = "/" + strings.Join(canonParts, "/")
|
|
abs := filepath.Join(fsRoot, filepath.FromSlash(strings.Join(canonParts, "/")))
|
|
abs = filepath.Clean(abs)
|
|
if abs != fsRoot && !strings.HasPrefix(abs, fsRoot+string(filepath.Separator)) {
|
|
return "", "", false
|
|
}
|
|
return abs, canonicalURL, true
|
|
}
|
|
|
|
// resolveSegment finds the on-disk name for a URL segment under parent.
|
|
// Order: (1) lowercase variant (canonical), (2) exact-case as given,
|
|
// (3) readdir scan picking the lowercase tiebreaker. Returns
|
|
// (segment-as-given, false) if nothing matches.
|
|
func resolveSegment(parent, segment string) (string, bool) {
|
|
lower := strings.ToLower(segment)
|
|
|
|
if _, err := os.Lstat(filepath.Join(parent, lower)); err == nil {
|
|
return lower, true
|
|
}
|
|
if segment != lower {
|
|
if _, err := os.Lstat(filepath.Join(parent, segment)); err == nil {
|
|
return segment, true
|
|
}
|
|
}
|
|
|
|
entries, err := os.ReadDir(parent)
|
|
if err != nil {
|
|
return segment, false
|
|
}
|
|
var matches []string
|
|
for _, e := range entries {
|
|
if strings.EqualFold(e.Name(), segment) {
|
|
matches = append(matches, e.Name())
|
|
}
|
|
}
|
|
switch len(matches) {
|
|
case 0:
|
|
return segment, false
|
|
case 1:
|
|
return matches[0], true
|
|
default:
|
|
// Multiple case variants exist on disk. Prefer the all-lowercase
|
|
// variant; otherwise pick the first in sort order so the choice
|
|
// is deterministic across requests.
|
|
for _, m := range matches {
|
|
if m == strings.ToLower(m) {
|
|
return m, true
|
|
}
|
|
}
|
|
sort.Strings(matches)
|
|
return matches[0], true
|
|
}
|
|
}
|