ZDDC/zddc/internal/handler/archivehandler_test.go
2026-06-11 13:32:31 -05:00

525 lines
19 KiB
Go

package handler
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path/filepath"
"strings"
"testing"
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
"codeberg.org/VARASYS/ZDDC/zddc/internal/archive"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/listing"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
)
// archiveTestRoot lays down a two-project tree so listings exercise project
// scoping, ACL cascading, and the per-project bucket boundary. ACLs are
// written per-test in the helper that calls this.
//
// <root>/
// ProjectA/
// 2025-01-01_T1 (IFR) - Title/100_~A (IFR) - Title.pdf
// 2025-01-01_T1 (IFR) - Title/100_A (IFC) - Title.pdf
// 2025-02-01_T2 (RTN) - Comments/100_~A+C1 (RTN) - Comments.pdf
// ProjectB/
// 2025-01-01_T3 (IFR) - Title/200_0 (IFR) - Other.pdf
func archiveTestRoot(t *testing.T) (string, *archive.Index) {
t.Helper()
root := t.TempDir()
// Write each fixture file's relative path as its content so the
// in-place .archive serve can be verified body-side (the resolver
// no longer issues a redirect — see archivehandler.go).
mk := func(rel string) {
path := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(path, []byte(rel), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
mk("ProjectA/2025-01-01_T1 (IFR) - Title/100_~A (IFR) - Title.pdf")
mk("ProjectA/2025-01-01_T1 (IFR) - Title/100_A (IFC) - Title.pdf")
mk("ProjectA/2025-02-01_T2 (RTN) - Comments/100_~A+C1 (RTN) - Comments.pdf")
mk("ProjectB/2025-01-01_T3 (IFR) - Title/200_0 (IFR) - Other.pdf")
idx, err := archive.BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
return root, idx
}
// writeZddc writes a .zddc YAML at <root>/<rel>/.zddc and clears the
// per-directory policy cache so a previous test's permissive .zddc doesn't
// bleed into this one.
func writeZddc(t *testing.T, root, rel, body string) {
t.Helper()
dir := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", dir, err)
}
if err := os.WriteFile(filepath.Join(dir, ".zddc"), []byte(body), 0o644); err != nil {
t.Fatalf("write .zddc: %v", err)
}
zddc.InvalidateCache(dir)
}
func archiveCfg(root string) config.Config {
return config.Config{Root: root, EmailHeader: "X-Auth-Request-Email", IndexPath: ".archive"}
}
// callArchive drives ServeArchive directly with (project, filename). The
// dispatcher is responsible for canonicalizing deeper /<project>/<sub>/
// .archive/... paths to this shape (see TestDispatchArchiveRedirect in
// the cmd package). Tests that want a specific Accept header set it on
// the recorder request before calling.
func callArchive(t *testing.T, cfg config.Config, idx *archive.Index, email, project, filename string) *httptest.ResponseRecorder {
t.Helper()
urlPath := "/"
if project != "" {
urlPath = "/" + url.PathEscape(project) + "/" + cfg.IndexPath + "/"
}
if filename != "" {
urlPath += url.PathEscape(filename)
}
req := httptest.NewRequest(http.MethodGet, urlPath, nil)
req = req.WithContext(context.WithValue(req.Context(), EmailKey, email))
rec := httptest.NewRecorder()
ServeArchive(cfg, idx, rec, req, project, filename)
return rec
}
// callArchiveAccept is callArchive plus a custom Accept header — used to
// drive the listing's content-negotiation branches.
func callArchiveAccept(t *testing.T, cfg config.Config, idx *archive.Index, email, project, filename, accept string) *httptest.ResponseRecorder {
t.Helper()
urlPath := "/"
if project != "" {
urlPath = "/" + url.PathEscape(project) + "/" + cfg.IndexPath + "/"
}
if filename != "" {
urlPath += url.PathEscape(filename)
}
req := httptest.NewRequest(http.MethodGet, urlPath, nil)
if accept != "" {
req.Header.Set("Accept", accept)
}
req = req.WithContext(context.WithValue(req.Context(), EmailKey, email))
rec := httptest.NewRecorder()
ServeArchive(cfg, idx, rec, req, project, filename)
return rec
}
func decodeListing(t *testing.T, body []byte) []listing.FileInfo {
t.Helper()
var out []listing.FileInfo
if err := json.Unmarshal(body, &out); err != nil {
t.Fatalf("invalid JSON: %v\n%s", err, body)
}
return out
}
func names(entries []listing.FileInfo) []string {
out := make([]string, 0, len(entries))
for _, e := range entries {
out = append(out, e.Name)
}
return out
}
func contains(xs []string, x string) bool {
for _, v := range xs {
if v == x {
return true
}
}
return false
}
// Empty project (no first segment) is rejected at the handler. The
// dispatcher already 404s /.archive/ before reaching here, but the handler
// keeps a defense-in-depth guard so a future direct caller can't bypass.
func TestServeArchive_EmptyProject404(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "alice@example.com", "", "")
if rec.Code != http.StatusNotFound {
t.Errorf("listing with empty project: status %d, want 404", rec.Code)
}
rec = callArchive(t, cfg, idx, "alice@example.com", "", "100.html")
if rec.Code != http.StatusNotFound {
t.Errorf("resolve with empty project: status %d, want 404", rec.Code)
}
}
// Unknown / empty project bucket returns 404 (not 403) — a probe for
// project names gets the same shape whether or not the project exists.
func TestServeArchive_UnknownProject404(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "alice@example.com", "NoSuchProject", "")
if rec.Code != http.StatusNotFound {
t.Errorf("listing for unknown project: status %d, want 404; body=%s", rec.Code, rec.Body.String())
}
rec = callArchive(t, cfg, idx, "alice@example.com", "NoSuchProject", "100.html")
if rec.Code != http.StatusNotFound {
t.Errorf("resolve in unknown project: status %d, want 404", rec.Code)
}
}
// Listing scoping: each project's bucket surfaces only its own entries,
// and entry URLs are always project-rooted (/<project>/.archive/...) —
// independent of any deeper request path the caller might have started
// from (the dispatcher canonicalizes those before reaching the handler).
func TestServeArchive_ListingScopedToProject(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
cfg := archiveCfg(root)
const email = "alice@example.com"
cases := []struct {
name string
project string
urlPrefix string
wantNames []string
denyNames []string
}{
{
"ProjectA",
"ProjectA",
"/ProjectA/.archive/",
[]string{"100.html", "100_A.html", "100_~A.html"},
[]string{"200.html", "200_0.html"},
},
{
"ProjectB",
"ProjectB",
"/ProjectB/.archive/",
[]string{"200.html", "200_0.html"},
[]string{"100.html", "100_A.html", "100_~A.html"},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
rec := callArchiveAccept(t, cfg, idx, email, c.project, "", "application/json")
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want 200; body = %s", rec.Code, rec.Body.String())
}
got := decodeListing(t, rec.Body.Bytes())
gotNames := names(got)
for _, want := range c.wantNames {
if !contains(gotNames, want) {
t.Errorf("missing %q in %s; got %v", want, c.project, gotNames)
}
}
for _, deny := range c.denyNames {
if contains(gotNames, deny) {
t.Errorf("unexpected cross-project entry %q in %s; got %v", deny, c.project, gotNames)
}
}
for _, e := range got {
if !strings.HasPrefix(e.URL, c.urlPrefix) {
t.Errorf("entry %q URL = %q, want %s prefix", e.Name, e.URL, c.urlPrefix)
}
}
})
}
}
// Listing existence-leak guard: a user who can read no entries in a
// non-empty project bucket gets 403, NOT 200 with an empty list. The
// project must not confirm its existence to a caller with no permissions.
func TestServeArchive_ListingForbiddenWhenUserCanReadNothing(t *testing.T) {
root, idx := archiveTestRoot(t)
// Default-deny: only alice listed at any level. mallory is in no
// allow list anywhere → every per-target check returns deny → the
// filtered listing is empty → 403.
writeZddc(t, root, ".", `acl:
permissions: {"alice@example.com": rwcd}
`)
cfg := archiveCfg(root)
rec := callArchiveAccept(t, cfg, idx, "mallory@example.com", "ProjectA", "", "application/json")
if rec.Code != http.StatusForbidden {
t.Errorf("mallory listing: status %d, want 403; body=%s", rec.Code, rec.Body.String())
}
rec = callArchiveAccept(t, cfg, idx, "alice@example.com", "ProjectA", "", "application/json")
if rec.Code != http.StatusOK {
t.Errorf("alice listing: status %d, want 200; body=%s", rec.Code, rec.Body.String())
}
}
// Listing entries are filtered per-target by ACL: a caller denied at one
// transmittal subtree but allowed at others sees the unblocked entries
// (200 with the subset), not 403, because they have SOME read access
// in the project.
func TestServeArchive_ListingFiltersEntriesByPerTargetACL(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
// Deny alice on the transmittal folder where 100_~A+C1 lives, so her
// listing of /ProjectA/.archive/ drops that entry — but other ProjectA
// entries stay visible.
writeZddc(t, root, "ProjectA/2025-02-01_T2 (RTN) - Comments", `acl:
permissions: {"alice@example.com": ""}
`)
cfg := archiveCfg(root)
rec := callArchiveAccept(t, cfg, idx, "alice@example.com", "ProjectA", "", "application/json")
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want 200; body = %s", rec.Code, rec.Body.String())
}
gotNames := names(decodeListing(t, rec.Body.Bytes()))
for _, want := range []string{"100.html", "100_A.html", "100_~A.html"} {
if !contains(gotNames, want) {
t.Errorf("alice missing accessible entry %q; got %v", want, gotNames)
}
}
// 100_~A+C1.html maps to a denied target — must not appear.
if contains(gotNames, "100_~A+C1.html") {
t.Errorf("alice unexpectedly saw denied entry 100_~A+C1.html; got %v", gotNames)
}
}
// Resolve: only the per-target ACL gates access. A caller denied on the
// resolved file's directory gets 404 (not 403) — never confirm the
// tracking number's existence.
func TestServeArchive_ResolvePerTargetACLOnly(t *testing.T) {
root, idx := archiveTestRoot(t)
// Both alice and mallory are root-allowed, but a deny on the
// transmittal folder kicks mallory out at the per-target chain
// ("first explicit match wins, bottom-up").
writeZddc(t, root, ".", `acl:
permissions: {"alice@example.com": rwcd, "mallory@example.com": rwcd}
`)
writeZddc(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title", `acl:
permissions: {"mallory@example.com": ""}
`)
cfg := archiveCfg(root)
// alice can resolve.
rec := callArchive(t, cfg, idx, "alice@example.com", "ProjectA", "100.html")
if rec.Code != http.StatusOK {
t.Errorf("alice resolve: status %d, want 200; body=%s", rec.Code, rec.Body.String())
}
// mallory is denied at the file's directory → 404 (existence-leak guard).
rec = callArchive(t, cfg, idx, "mallory@example.com", "ProjectA", "100.html")
if rec.Code != http.StatusNotFound {
t.Errorf("mallory resolve: status %d, want 404 (per-target deny); body=%s", rec.Code, rec.Body.String())
}
}
// Resolve is decoupled from project-root ACL: a user explicitly allowed
// at one transmittal folder but denied at the project root (and not in
// any other allow list) can still fetch tracking numbers that resolve
// to that folder. .archive/ is a virtual surface — the file's own ACL
// chain decides.
func TestServeArchive_ResolveBypassesProjectRootDenyWhenPerTargetAllows(t *testing.T) {
root, idx := archiveTestRoot(t)
// Project root denies bob, but the transmittal folder under it
// allows him. The cascade is "first explicit match wins, bottom-up"
// — so the per-target chain at the file's directory hits the local
// allow first.
writeZddc(t, root, ".", `acl:
permissions: {"alice@example.com": rwcd}
`)
writeZddc(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title", `acl:
permissions: {"bob@example.com": rwcd}
`)
cfg := archiveCfg(root)
rec := callArchive(t, cfg, idx, "bob@example.com", "ProjectA", "100.html")
if rec.Code != http.StatusOK {
t.Errorf("bob resolve: status %d, want 200 (per-target allow rescues him); body=%s", rec.Code, rec.Body.String())
}
if loc := rec.Header().Get("Location"); loc != "" {
t.Errorf("unexpected Location=%q (.archive must serve in place)", loc)
}
}
// Cross-project: same tracking number issued under two projects. Each
// project's .archive/ resolves to its own copy, never the other's.
func TestServeArchive_CrossProjectSameTrackingNoLeak(t *testing.T) {
root := t.TempDir()
mk := func(rel string) {
path := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(path, []byte(rel), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
mk("ProjectA/2025-01-01_T1 (IFR) - Title/123_A (IFR) - Title.pdf")
mk("ProjectB/2025-06-01_T9 (IFR) - Other Title/123_A (IFR) - Other Title.pdf")
idx, err := archive.BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
cfg := archiveCfg(root)
const email = "alice@example.com"
recA := callArchive(t, cfg, idx, email, "ProjectA", "123.html")
if recA.Code != http.StatusOK {
t.Fatalf("ProjectA 123.html status=%d body=%s", recA.Code, recA.Body.String())
}
bodyA := recA.Body.String()
if !strings.HasPrefix(bodyA, "ProjectA/") {
t.Errorf("ProjectA body=%q, want a ProjectA/ file's content", bodyA)
}
recB := callArchive(t, cfg, idx, email, "ProjectB", "123.html")
if recB.Code != http.StatusOK {
t.Fatalf("ProjectB 123.html status=%d body=%s", recB.Code, recB.Body.String())
}
bodyB := recB.Body.String()
if !strings.HasPrefix(bodyB, "ProjectB/") {
t.Errorf("ProjectB body=%q, want a ProjectB/ file's content", bodyB)
}
if bodyA == bodyB {
t.Errorf("cross-project leak: same body served for both projects: %q", bodyA)
}
// URL must NOT have been rewritten — neither response carries a
// Location header. Stable .archive/ links are the whole point.
if loc := recA.Header().Get("Location"); loc != "" {
t.Errorf("ProjectA: unexpected Location header %q (.archive must serve in place)", loc)
}
if loc := recB.Header().Get("Location"); loc != "" {
t.Errorf("ProjectB: unexpected Location header %q (.archive must serve in place)", loc)
}
// Listing each project shows only its own.
for _, c := range []struct{ project, mustHave string }{
{"ProjectA", "ProjectA"},
{"ProjectB", "ProjectB"},
} {
rec := callArchiveAccept(t, cfg, idx, email, c.project, "", "application/json")
if rec.Code != http.StatusOK {
t.Fatalf("listing %s: status %d", c.project, rec.Code)
}
got := decodeListing(t, rec.Body.Bytes())
for _, e := range got {
if !strings.Contains(e.URL, "/"+c.mustHave+"/") {
t.Errorf("project=%s entry URL %q lacks /%s/ segment", c.project, e.URL, c.mustHave)
}
}
}
}
// Empty email never matches — even an `allow: ["*"]` policy denies it,
// per the existing zddc package contract. .archive must honor it: the
// listing 403s (empty filtered set) and resolves return 404.
func TestServeArchive_EmptyEmailDeniedEvenWithStarAllow(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*@example.com": rwcd}
`)
cfg := archiveCfg(root)
rec := callArchiveAccept(t, cfg, idx, "", "ProjectA", "", "application/json")
if rec.Code != http.StatusForbidden {
t.Errorf("anonymous listing: status %d, want 403", rec.Code)
}
rec = callArchive(t, cfg, idx, "", "ProjectA", "100.html")
if rec.Code != http.StatusNotFound {
t.Errorf("anonymous resolve: status %d, want 404", rec.Code)
}
}
// Listing content negotiation: Accept: application/json returns the
// JSON entry array; Accept: text/html returns the embedded `browse` SPA
// bytes (tested by content-type and the embedded ETag header).
// The same URL must serve both, with Vary: Accept set.
func TestServeArchive_ListingContentNegotiation(t *testing.T) {
root, idx := archiveTestRoot(t)
writeZddc(t, root, ".", `acl:
permissions: {"*": rwcd}
`)
cfg := archiveCfg(root)
const email = "alice@example.com"
// JSON branch.
recJSON := callArchiveAccept(t, cfg, idx, email, "ProjectA", "", "application/json")
if recJSON.Code != http.StatusOK {
t.Fatalf("JSON listing: status %d, want 200; body=%s", recJSON.Code, recJSON.Body.String())
}
if ct := recJSON.Header().Get("Content-Type"); !strings.Contains(ct, "application/json") {
t.Errorf("JSON listing content-type=%q, want application/json", ct)
}
if vary := recJSON.Header().Get("Vary"); !strings.Contains(vary, "Accept") {
t.Errorf("JSON listing missing Vary: Accept (got %q)", vary)
}
_ = decodeListing(t, recJSON.Body.Bytes())
// HTML branch — falls back to JSON only if the embedded slot is
// empty, which won't be the case in a normal test run (the embed is
// populated at compile time). Verify either branch is sane.
recHTML := callArchiveAccept(t, cfg, idx, email, "ProjectA", "", "text/html")
if recHTML.Code != http.StatusOK {
t.Fatalf("HTML listing: status %d, want 200; body=%s", recHTML.Code, recHTML.Body.String())
}
ct := recHTML.Header().Get("Content-Type")
switch {
case strings.Contains(ct, "text/html"):
// Normal path: embedded browse bytes were served.
if etag := recHTML.Header().Get("ETag"); etag == "" || etag != `"`+apps.EmbeddedETag("browse")+`"` {
t.Errorf("HTML listing ETag=%q, want %q", etag, `"`+apps.EmbeddedETag("browse")+`"`)
}
if src := recHTML.Header().Get("X-ZDDC-Source"); src != "embedded:browse" {
t.Errorf("HTML listing X-ZDDC-Source=%q, want embedded:browse", src)
}
case strings.Contains(ct, "application/json"):
// Bootstrap path: embedded slot empty (e.g. fresh build before
// browse.html has been populated). JSON fallback is acceptable
// — confirm it parses as a listing.
_ = decodeListing(t, recHTML.Body.Bytes())
default:
t.Errorf("HTML listing unexpected content-type=%q", ct)
}
// Conditional GET: re-fetching with If-None-Match for the JSON ETag
// short-circuits to 304.
etagJSON := recJSON.Header().Get("ETag")
if etagJSON != "" {
req := httptest.NewRequest(http.MethodGet, "/ProjectA/.archive/", nil)
req.Header.Set("Accept", "application/json")
req.Header.Set("If-None-Match", etagJSON)
req = req.WithContext(context.WithValue(req.Context(), EmailKey, email))
rec304 := httptest.NewRecorder()
ServeArchive(cfg, idx, rec304, req, "ProjectA", "")
if rec304.Code != http.StatusNotModified {
t.Errorf("conditional JSON GET: status %d, want 304", rec304.Code)
}
}
}