ZDDC/zddc/internal/handler/archivehandler_test.go
ZDDC fe28a73f59 feat(archive): serve in-place instead of redirecting (preserves #anchor links)
Resolved `.archive/<tracking>.html` URLs now serve the target file's
bytes inline via http.ServeFile with Cache-Control: no-cache, replacing
the previous 302 redirect to the per-transmittal URL.

Why: external links like `.archive/<tracking>.html#section` are meant
to track the latest revision. A redirect exposes the snapshot URL — any
forwarded link then pins to that snapshot instead of "latest." Serving
in-place keeps the `.archive/` URL stable as the resolver's "current"
target moves over time.

Cache-Control: no-cache is intentional. Each load revalidates against
the on-disk file's Last-Modified/ETag, so when a new revision lands the
resolver picks it and the browser refetches transparently.

ACL is unchanged: enforced on both the `.archive` context directory and
the resolved target file (per-target denial returns 404, not 403, to
avoid disclosing that a tracking number exists in a hidden subtree).

archivehandler_test.go status expectations updated 302 → 200; fixture
bodies adjusted for body-content verification of the in-place serve.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 20:32:29 -05:00

568 lines
20 KiB
Go

package handler
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path/filepath"
"strings"
"testing"
"codeberg.org/VARASYS/ZDDC/zddc/internal/archive"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/listing"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
)
// archiveTestRoot lays down a two-project tree so listings exercise project
// scoping, ACL cascading, and the per-project bucket boundary. ACLs are
// written per-test in the helper that calls this.
//
// <root>/
// ProjectA/
// 2025-01-01_T1 (IFR) - Title/100_~A (IFR) - Title.pdf
// 2025-01-01_T1 (IFR) - Title/100_A (IFC) - Title.pdf
// 2025-02-01_T2 (RTN) - Comments/100_~A+C1 (RTN) - Comments.pdf
// ProjectB/
// 2025-01-01_T3 (IFR) - Title/200_0 (IFR) - Other.pdf
func archiveTestRoot(t *testing.T) (string, *archive.Index) {
t.Helper()
root := t.TempDir()
// Write each fixture file's relative path as its content so the
// in-place .archive serve can be verified body-side (the resolver
// no longer issues a redirect — see archivehandler.go).
mk := func(rel string) {
path := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(path, []byte(rel), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
mk("ProjectA/2025-01-01_T1 (IFR) - Title/100_~A (IFR) - Title.pdf")
mk("ProjectA/2025-01-01_T1 (IFR) - Title/100_A (IFC) - Title.pdf")
mk("ProjectA/2025-02-01_T2 (RTN) - Comments/100_~A+C1 (RTN) - Comments.pdf")
mk("ProjectB/2025-01-01_T3 (IFR) - Title/200_0 (IFR) - Other.pdf")
idx, err := archive.BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
return root, idx
}
// writeZddc writes a .zddc YAML at <root>/<rel>/.zddc and clears the
// per-directory policy cache so a previous test's permissive .zddc doesn't
// bleed into this one.
func writeZddc(t *testing.T, root, rel, body string) {
t.Helper()
dir := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", dir, err)
}
if err := os.WriteFile(filepath.Join(dir, ".zddc"), []byte(body), 0o644); err != nil {
t.Fatalf("write .zddc: %v", err)
}
zddc.InvalidateCache(dir)
}
func archiveCfg(root string) config.Config {
return config.Config{Root: root, EmailHeader: "X-Auth-Request-Email", IndexPath: ".archive"}
}
func callArchive(t *testing.T, cfg config.Config, idx *archive.Index, email, contextPath, filename string) *httptest.ResponseRecorder {
t.Helper()
// Build a syntactically valid URL by escaping each segment of the
// contextPath and filename. The handler receives the decoded
// contextPath/filename arguments directly (as the dispatcher would have
// decoded them); the URL itself just needs to parse for httptest.
urlPath := encodePath(contextPath) + "/" + cfg.IndexPath
if filename != "" {
urlPath += "/" + url.PathEscape(filename)
} else {
urlPath += "/"
}
req := httptest.NewRequest(http.MethodGet, urlPath, nil)
req = req.WithContext(context.WithValue(req.Context(), EmailKey, email))
rec := httptest.NewRecorder()
ServeArchive(cfg, idx, rec, req, contextPath, filename)
return rec
}
// encodePath URL-escapes each non-empty slash-separated segment of p so
// special characters like spaces and parens don't break NewRequest's URL
// parser. A leading slash is preserved; an empty input becomes "/".
func encodePath(p string) string {
trimmed := strings.Trim(p, "/")
if trimmed == "" {
return ""
}
parts := strings.Split(trimmed, "/")
for i, s := range parts {
parts[i] = url.PathEscape(s)
}
return "/" + strings.Join(parts, "/")
}
func decodeListing(t *testing.T, body []byte) []listing.FileInfo {
t.Helper()
var out []listing.FileInfo
if err := json.Unmarshal(body, &out); err != nil {
t.Fatalf("invalid JSON: %v\n%s", err, body)
}
return out
}
func names(entries []listing.FileInfo) []string {
out := make([]string, 0, len(entries))
for _, e := range entries {
out = append(out, e.Name)
}
return out
}
func contains(xs []string, x string) bool {
for _, v := range xs {
if v == x {
return true
}
}
return false
}
// /.archive/ at the very root has no project segment to scope by, so it's a
// hard 404 — even for an admin. Stable references must include the project
// directory; otherwise cross-project tracking-number collisions would silently
// pick a winner.
func TestServeArchive_RootHasNoProjectScope404(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
cfg := archiveCfg(root)
for _, ctx := range []string{"/", ""} {
t.Run("ctx="+ctx, func(t *testing.T) {
rec := callArchive(t, cfg, idx, "alice@example.com", ctx, "")
if rec.Code != http.StatusNotFound {
t.Errorf("listing at root: status %d, want 404; body = %s", rec.Code, rec.Body.String())
}
rec = callArchive(t, cfg, idx, "alice@example.com", ctx, "100.html")
if rec.Code != http.StatusNotFound {
t.Errorf("resolve at root: status %d, want 404", rec.Code)
}
})
}
}
// .archive listings are scoped to the contextPath's first segment (the
// project). Each project sees only its own tracking numbers; cross-project
// entries are invisible. Subdirectory contextPaths still resolve to the
// top-level project's bucket — a request from /ProjectA/sub/sub/.archive/
// shows ProjectA's entries with that deeper URL prefix.
func TestServeArchive_ListingScopedToProject(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
cfg := archiveCfg(root)
const email = "alice@example.com"
cases := []struct {
name string
contextPath string
urlPrefix string
wantNames []string
denyNames []string
}{
{
"ProjectA top level",
"/ProjectA",
"/ProjectA/.archive/",
[]string{"100.html", "100_A.html", "100_~A.html"},
[]string{"200.html", "200_0.html"},
},
{
"ProjectA deeper subpath",
"/ProjectA/2025-01-01_T1 (IFR) - Title",
"/ProjectA/2025-01-01_T1 (IFR) - Title/.archive/",
[]string{"100.html", "100_A.html", "100_~A.html"},
[]string{"200.html", "200_0.html"},
},
{
"ProjectB top level",
"/ProjectB",
"/ProjectB/.archive/",
[]string{"200.html", "200_0.html"},
[]string{"100.html", "100_A.html", "100_~A.html"},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
rec := callArchive(t, cfg, idx, email, c.contextPath, "")
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want 200; body = %s", rec.Code, rec.Body.String())
}
got := decodeListing(t, rec.Body.Bytes())
gotNames := names(got)
for _, want := range c.wantNames {
if !contains(gotNames, want) {
t.Errorf("missing %q at %s; got %v", want, c.contextPath, gotNames)
}
}
for _, deny := range c.denyNames {
if contains(gotNames, deny) {
t.Errorf("unexpected cross-project entry %q at %s; got %v", deny, c.contextPath, gotNames)
}
}
for _, e := range got {
if !strings.HasPrefix(e.URL, c.urlPrefix) {
t.Errorf("entry %q URL = %q, want %s prefix", e.Name, e.URL, c.urlPrefix)
}
}
})
}
}
// Listing endpoint is gated by the contextPath ACL: callers who can't reach
// the directory the .archive virtually sits in get 403 (the directory is
// known to exist; just not accessible).
func TestServeArchive_ListingDeniedByContextPathACL(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["alice@example.com"]
`)
writeZddc(t, root, "ProjectA", `acl:
deny: ["mallory@example.com"]
allow: ["alice@example.com"]
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "mallory@example.com", "/ProjectA", "")
if rec.Code != http.StatusForbidden {
t.Errorf("denied caller got status %d, want 403; body = %s", rec.Code, rec.Body.String())
}
rec = callArchive(t, cfg, idx, "alice@example.com", "/ProjectA", "")
if rec.Code != http.StatusOK {
t.Errorf("allowed caller got status %d, want 200; body = %s", rec.Code, rec.Body.String())
}
}
// Listing entries are filtered per-target by ACL: a caller denied at a
// subtree's transmittal directory sees no entries whose target lives there.
// Excluding a user from a subdir requires an explicit deny there (the
// cascade is "first explicit match wins, bottom-up", so a child allow list
// doesn't narrow a parent's allow:["*"]).
func TestServeArchive_ListingFiltersEntriesByPerTargetACL(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
// Deny alice on the transmittal folder where 100_~A+C1 lives, so her
// listing of /ProjectA/.archive/ drops that entry — but other ProjectA
// entries stay visible. (A blanket /ProjectA deny would 403 the
// listing entirely; that's covered by the previous test.)
writeZddc(t, root, "ProjectA/2025-02-01_T2 (RTN) - Comments", `acl:
deny: ["alice@example.com"]
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "alice@example.com", "/ProjectA", "")
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want 200; body = %s", rec.Code, rec.Body.String())
}
gotNames := names(decodeListing(t, rec.Body.Bytes()))
for _, want := range []string{"100.html", "100_A.html", "100_~A.html"} {
if !contains(gotNames, want) {
t.Errorf("alice missing accessible entry %q; got %v", want, gotNames)
}
}
// Bob has no per-target denials in either project.
rec = callArchive(t, cfg, idx, "bob@example.com", "/ProjectB", "")
if rec.Code != http.StatusOK {
t.Fatalf("bob ProjectB listing: status %d, want 200", rec.Code)
}
gotNames = names(decodeListing(t, rec.Body.Bytes()))
if !contains(gotNames, "200.html") {
t.Errorf("bob should see ProjectB entry 200.html; got %v", gotNames)
}
}
// Direct redirect requests for a tracking number whose target the caller
// can't read return 404 (not 403, not 302) — the file's existence must not
// leak across the ACL boundary. Cross-project tracking-number requests also
// 404 because each project's bucket is separate.
func TestServeArchive_ResolveACLDeniedReturns404(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
writeZddc(t, root, "ProjectB", `acl:
deny: ["alice@example.com"]
`)
cfg := archiveCfg(root)
// 200 doesn't even live in ProjectA, so the resolver itself returns 404
// regardless of ACL — project scoping comes first.
rec := callArchive(t, cfg, idx, "alice@example.com", "/ProjectA", "200.html")
if rec.Code != http.StatusNotFound {
t.Errorf("alice → /ProjectA/.archive/200.html: status %d, want 404 (cross-project)", rec.Code)
}
// Alice in /ProjectA can resolve all of ProjectA's entries.
for _, fn := range []string{"100.html", "100_A.html", "100_~A.html", "100_~A+C1.html"} {
rec := callArchive(t, cfg, idx, "alice@example.com", "/ProjectA", fn)
if rec.Code != http.StatusOK {
t.Errorf("alice → /ProjectA/.archive/%s: status %d, want 200; body = %s", fn, rec.Code, rec.Body.String())
}
}
// Alice attempting ProjectB directly is denied at the contextPath ACL.
rec = callArchive(t, cfg, idx, "alice@example.com", "/ProjectB", "200.html")
if rec.Code != http.StatusForbidden {
t.Errorf("alice → /ProjectB/.archive/200.html: status %d, want 403 (denied at contextPath)", rec.Code)
}
// Bob has no denies — he can pull 200.html from /ProjectB.
rec = callArchive(t, cfg, idx, "bob@example.com", "/ProjectB", "200.html")
if rec.Code != http.StatusOK {
t.Errorf("bob → /ProjectB/.archive/200.html: status %d, want 200", rec.Code)
}
}
// Cascade direction sanity check: a denial at the subtree wins over an
// allow at the parent, AND a target-level allow can rescue a user the
// parent didn't mention. Both directions must be exercised so future
// refactors of the per-target ACL helper can't silently break one.
func TestServeArchive_CascadeDirectionsBothEnforced(t *testing.T) {
root, idx := archiveTestRoot(t)
// Root: deny default — only bob is on the list. ProjectA: explicitly
// allow alice. So alice is rescued at ProjectA, mallory stays out
// everywhere, bob stays in everywhere. Per-target ACL on resolved files
// doesn't kick in here — both projects allow bob via the root rule.
writeZddc(t, root, ".", `acl:
allow: ["bob@example.com"]
`)
writeZddc(t, root, "ProjectA", `acl:
allow: ["alice@example.com"]
`)
cfg := archiveCfg(root)
cases := []struct {
email string
contextPath string
filename string
wantStatus int
why string
}{
{"bob@example.com", "/ProjectA", "100.html", http.StatusOK, "bob allowed at root → reaches ProjectA target"},
{"bob@example.com", "/ProjectB", "200.html", http.StatusOK, "bob allowed at root → reaches ProjectB target"},
{"alice@example.com", "/ProjectA", "100.html", http.StatusOK, "alice rescued by ProjectA allow"},
{"alice@example.com", "/ProjectB", "200.html", http.StatusForbidden, "alice not in ProjectB chain → 403 at contextPath"},
// mallory denied everywhere; the contextPath gate fires first.
{"mallory@example.com", "/ProjectA", "100.html", http.StatusForbidden, "mallory blocked at contextPath"},
}
for _, c := range cases {
t.Run(c.email+"_"+c.contextPath+"_"+c.filename, func(t *testing.T) {
rec := callArchive(t, cfg, idx, c.email, c.contextPath, c.filename)
if rec.Code != c.wantStatus {
t.Errorf("%s @ %s → %s: status %d, want %d (%s)", c.email, c.contextPath, c.filename, rec.Code, c.wantStatus, c.why)
}
})
}
}
// .archive serves the resolved file in place — the URL never changes.
// From any depth within the same project the resolver picks the same
// target file, so the bytes returned to the caller must be identical
// across context paths (the per-revision file URL is intentionally
// hidden so external links remain stable).
func TestServeArchive_ServedBytesStableAcrossDepthWithinProject(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
cfg := archiveCfg(root)
wantBodyPrefix := "ProjectA/2025-01-01_T1 (IFR) - Title/100_A"
var firstBody string
for i, ctx := range []string{
"/ProjectA",
"/ProjectA/2025-01-01_T1 (IFR) - Title",
"/ProjectA/2025-02-01_T2 (RTN) - Comments",
} {
rec := callArchive(t, cfg, idx, "alice@example.com", ctx, "100.html")
if rec.Code != http.StatusOK {
t.Errorf("ctx=%s status=%d body=%s", ctx, rec.Code, rec.Body.String())
continue
}
if loc := rec.Header().Get("Location"); loc != "" {
t.Errorf("ctx=%s unexpected Location=%q (.archive must serve in place)", ctx, loc)
}
body := rec.Body.String()
if !strings.HasPrefix(body, wantBodyPrefix) {
t.Errorf("ctx=%s body=%q, want prefix %q", ctx, body, wantBodyPrefix)
}
if i == 0 {
firstBody = body
} else if body != firstBody {
t.Errorf("ctx=%s body differs from first contextPath (resolver should pick the same target regardless of depth)", ctx)
}
}
}
// Cross-project: same tracking number issued under two projects. Each
// project's .archive/ resolves to its own copy, never the other's.
func TestServeArchive_CrossProjectSameTrackingNoLeak(t *testing.T) {
root := t.TempDir()
mk := func(rel string) {
path := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(path, []byte(rel), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
mk("ProjectA/2025-01-01_T1 (IFR) - Title/123_A (IFR) - Title.pdf")
mk("ProjectB/2025-06-01_T9 (IFR) - Other Title/123_A (IFR) - Other Title.pdf")
idx, err := archive.BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
writeZddc(t, root, ".", `acl:
allow: ["*"]
`)
cfg := archiveCfg(root)
const email = "alice@example.com"
recA := callArchive(t, cfg, idx, email, "/ProjectA", "123.html")
if recA.Code != http.StatusOK {
t.Fatalf("ProjectA 123.html status=%d body=%s", recA.Code, recA.Body.String())
}
bodyA := recA.Body.String()
if !strings.HasPrefix(bodyA, "ProjectA/") {
t.Errorf("ProjectA body=%q, want a ProjectA/ file's content", bodyA)
}
recB := callArchive(t, cfg, idx, email, "/ProjectB", "123.html")
if recB.Code != http.StatusOK {
t.Fatalf("ProjectB 123.html status=%d body=%s", recB.Code, recB.Body.String())
}
bodyB := recB.Body.String()
if !strings.HasPrefix(bodyB, "ProjectB/") {
t.Errorf("ProjectB body=%q, want a ProjectB/ file's content", bodyB)
}
if bodyA == bodyB {
t.Errorf("cross-project leak: same body served for both projects: %q", bodyA)
}
// URL must NOT have been rewritten — neither response carries a
// Location header. Stable .archive/ links are the whole point.
if loc := recA.Header().Get("Location"); loc != "" {
t.Errorf("ProjectA: unexpected Location header %q (.archive must serve in place)", loc)
}
if loc := recB.Header().Get("Location"); loc != "" {
t.Errorf("ProjectB: unexpected Location header %q (.archive must serve in place)", loc)
}
// Listing each project shows only its own.
for _, c := range []struct{ ctx, mustHave, mustNot string }{
{"/ProjectA", "ProjectA", "ProjectB"},
{"/ProjectB", "ProjectB", "ProjectA"},
} {
rec := callArchive(t, cfg, idx, email, c.ctx, "")
if rec.Code != http.StatusOK {
t.Fatalf("listing %s: status %d", c.ctx, rec.Code)
}
got := decodeListing(t, rec.Body.Bytes())
for _, e := range got {
if !strings.Contains(e.URL, "/"+c.mustHave+"/") {
t.Errorf("ctx=%s entry URL %q lacks /%s/ segment", c.ctx, e.URL, c.mustHave)
}
}
}
}
// Default-deny: as soon as ANY .zddc exists in the chain, an unmatched
// caller is denied. Verify this applies to listing entries too — a target
// in a directory with a restrictive .zddc is not surfaced to outsiders even
// though the file exists.
func TestServeArchive_DefaultDenyOnceZddcExists(t *testing.T) {
root, idx := archiveTestRoot(t)
// Root .zddc allows alice only. No "*" — so anyone else is default-denied.
writeZddc(t, root, ".", `acl:
allow: ["alice@example.com"]
`)
cfg := archiveCfg(root)
// alice sees everything she's allowed to in ProjectA.
rec := callArchive(t, cfg, idx, "alice@example.com", "/ProjectA", "")
if rec.Code != http.StatusOK {
t.Fatalf("alice listing: status %d, want 200", rec.Code)
}
if len(decodeListing(t, rec.Body.Bytes())) == 0 {
t.Errorf("alice listing was empty, want entries")
}
// Charlie isn't on any list → default-deny → 403 even for the listing.
rec = callArchive(t, cfg, idx, "charlie@example.com", "/ProjectA", "")
if rec.Code != http.StatusForbidden {
t.Errorf("charlie listing: status %d, want 403", rec.Code)
}
// Direct resolve: contextPath ACL fires first → 403.
rec = callArchive(t, cfg, idx, "charlie@example.com", "/ProjectA", "100.html")
if rec.Code != http.StatusForbidden {
t.Errorf("charlie resolve: status %d, want 403 (denied at contextPath)", rec.Code)
}
}
// Empty email never matches — even an `allow: ["*"]` policy denies it,
// which is the existing zddc package contract. .archive must honor it.
func TestServeArchive_EmptyEmailDeniedEvenWithStarAllow(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
allow: ["*@example.com"]
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "", "/ProjectA", "")
if rec.Code != http.StatusForbidden {
t.Errorf("anonymous listing: status %d, want 403", rec.Code)
}
}
// projectFromContextPath is the canonical place to derive the project key
// from the .archive contextPath. Pin the edge cases.
func TestProjectFromContextPath(t *testing.T) {
cases := []struct {
ctx string
want string
}{
{"/ProjectA", "ProjectA"},
{"/ProjectA/", "ProjectA"},
{"/ProjectA/sub/sub", "ProjectA"},
{"/", ""},
{"", ""},
{"ProjectA/sub", "ProjectA"},
}
for _, c := range cases {
got := projectFromContextPath(c.ctx)
if got != c.want {
t.Errorf("projectFromContextPath(%q) = %q, want %q", c.ctx, got, c.want)
}
}
}