From 5e4d4fefb321b7befc2c581fe6f9d018fed080d4 Mon Sep 17 00:00:00 2001 From: ZDDC Date: Tue, 12 May 2026 12:17:47 -0500 Subject: [PATCH] feat(zddc): serve a .zip as a virtual directory (zipfs + dispatch intercept) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit zddc-server can now browse into a .zip file without the client downloading the whole archive: - GET …/Foo.zip/ → JSON listing of the zip's members (Accept: application/json), or the browse SPA (HTML) — same content negotiation as ServeDirectory/.archive - GET …/Foo.zip/sub/doc.pdf → extracts and streams that one member (Range / ETag / conditional GET via http.ServeContent) - GET …/Foo.zip → unchanged: the raw .zip download - PUT/DELETE/POST …/Foo.zip/… → 405 (zip access is read-only) New internal/zipfs package reconstructs directory levels from the zip's flat central directory (synthesising intermediate dirs with no explicit "/" entry, mirroring what browse does client-side with JSZip) and drops zip-slip-unsafe entries ("..", absolute, backslash). New handler.ServeZip wraps it. The dispatcher gets splitZipPath + an intercept placed before the file-API branch (so a write to a path under a .zip is refused, not silently mkdir'd); ACL is the chain of the directory CONTAINING the zip — a zip carries no .zddc of its own, same as the .archive virtual surface. The os.Stat-per-segment walk is gated by a cheap ".zip/" substring check so ordinary requests are unaffected. Also fixes two pre-existing dispatch-test failures uncovered along the way: a non-existent top-level "*.html" URL was 302'ing to its slash form (because the bare "*" project glob makes every first-level segment "declared") — the cascade-declared no-slash block now requires a directory-shaped URL (trailing slash, or no file extension); and the stale TestDispatchSlashRouting expectation that archive//mdl/ 302s to mdl/table.html was updated to match the intended behaviour (the default-MDL virtual fallback shows the browse listing there; only a real on-disk tables: + *.table.yaml triggers the bounce). Co-Authored-By: Claude Opus 4.7 (1M context) --- zddc/cmd/zddc-server/main.go | 95 ++++++++ zddc/cmd/zddc-server/main_test.go | 180 ++++++++++++++-- zddc/internal/handler/ziphandler.go | 146 +++++++++++++ zddc/internal/handler/ziphandler_test.go | 226 +++++++++++++++++++ zddc/internal/zipfs/zipfs.go | 250 +++++++++++++++++++++ zddc/internal/zipfs/zipfs_test.go | 264 +++++++++++++++++++++++ 6 files changed, 1146 insertions(+), 15 deletions(-) create mode 100644 zddc/internal/handler/ziphandler.go create mode 100644 zddc/internal/handler/ziphandler_test.go create mode 100644 zddc/internal/zipfs/zipfs.go create mode 100644 zddc/internal/zipfs/zipfs_test.go diff --git a/zddc/cmd/zddc-server/main.go b/zddc/cmd/zddc-server/main.go index 9d8f2c5..5739c17 100644 --- a/zddc/cmd/zddc-server/main.go +++ b/zddc/cmd/zddc-server/main.go @@ -623,6 +623,57 @@ func serveSpecializedNoSlash(cfg config.Config, appsSrv *apps.Server, w http.Res return false } +// splitZipPath detects a "<…>.zip/" URL: a path where some +// ancestor segment resolves to a regular .zip file on disk and there's +// a tail segment after it (or a trailing slash). On a match it returns +// the zip's absolute filesystem path and the slash-separated member +// path inside the zip ("" when the URL is "<…>.zip/" with nothing +// after). ok is false for everything else — including "<…>.zip" with +// no trailing slash (that's a plain file download, handled downstream). +// +// Segments are stat'd one at a time against cfg.Root; case-folding has +// already been applied to on-disk segments by appfs.ResolveCanonical +// upstream, so the .zip segment matches by exact name here. The +// per-segment os.Stat walk is gated by a cheap ".zip/" substring check +// at the call site, so it never runs for ordinary requests. +func splitZipPath(fsRoot, urlPath string) (zipAbs, member string, ok bool) { + trimmed := strings.Trim(urlPath, "/") + if trimmed == "" { + return "", "", false + } + segs := strings.Split(trimmed, "/") + cur := fsRoot + for i, seg := range segs { + cur = filepath.Join(cur, seg) + if cur != fsRoot && !strings.HasPrefix(cur, fsRoot+string(filepath.Separator)) { + return "", "", false + } + info, err := os.Stat(cur) + if err != nil { + return "", "", false // a segment doesn't exist on disk — not a zip path + } + if info.IsDir() { + continue + } + // cur is a non-directory. Only a regular .zip file with a tail + // (or trailing slash) is "browse into the zip"; anything else + // falls through to the normal file path. + if !info.Mode().IsRegular() || !strings.EqualFold(filepath.Ext(seg), ".zip") { + return "", "", false + } + if i < len(segs)-1 { + return cur, strings.Join(segs[i+1:], "/"), true + } + // Last segment is the .zip itself: only a trailing slash means + // "browse into it" (member == root); a bare "<…>.zip" is a file. + if strings.HasSuffix(urlPath, "/") { + return cur, "", true + } + return "", "", false + } + return "", "", false +} + // dispatch routes a request to the appropriate handler. func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, appsSrv *apps.Server, tokens *auth.Store, w http.ResponseWriter, r *http.Request) { // URL paths are case-insensitive: resolve each segment against the @@ -824,6 +875,42 @@ func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, apps return } + // Zip-as-directory intercept: a "<…>.zip/" URL is a virtual + // surface over a real .zip file on disk — GET "<…>.zip/" lists the + // members, GET "<…>.zip/member.pdf" extracts and streams that one + // member, so a client never has to download the whole archive. The + // bare "<…>.zip" (no trailing slash) is NOT matched here and falls + // through to the normal file path (a plain download). Like .archive, + // a zip carries no .zddc of its own — ACL is the chain of the + // directory CONTAINING the zip. Read-only: write methods are + // rejected before ServeFileAPI could try to create a path under a + // file. (The os.Stat walk in splitZipPath is gated by this cheap + // substring check, so it doesn't run for ordinary requests.) + if strings.Contains(strings.ToLower(urlPath), ".zip/") { + if zipAbs, member, ok := splitZipPath(cfg.Root, urlPath); ok { + if handler.IsWriteMethod(r.Method) { + w.Header().Set("Allow", "GET, HEAD") + http.Error(w, "Zip archives are read-only", http.StatusMethodNotAllowed) + return + } + if r.Method != http.MethodGet && r.Method != http.MethodHead { + w.Header().Set("Allow", "GET, HEAD") + http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + return + } + chain, err := zddc.EffectivePolicy(cfg.Root, filepath.Dir(zipAbs)) + if err != nil { + slog.Warn("ACL policy error on zip parent", "path", filepath.Dir(zipAbs), "err", err) + } + if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed { + http.Error(w, "Forbidden", http.StatusForbidden) + return + } + handler.ServeZip(cfg, w, r, zipAbs, member) + return + } + } + // File API — authenticated CRUD over the served tree. Catches PUT, // DELETE, and POST on any non-reserved path. Read methods (GET/HEAD) // fall through to the static / apps / directory pipeline below. @@ -951,7 +1038,15 @@ func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, apps // - slash → ServeDirectory (DirTool; browse by default) // - no-slash → default_tool ("specialized app") if any, // else a 302 to the slash form. + // + // Guard: only directory-shaped URLs qualify. The bare "*" + // project glob matches *any* first-level segment — including + // "foo.html", "foo.txt", etc. — so without the extension + // check a non-existent top-level file would 302-to-slash + // instead of 404. A trailing slash, or no file extension on + // the last segment, means "asking for a directory". if (r.Method == http.MethodGet || r.Method == http.MethodHead) && + (strings.HasSuffix(urlPath, "/") || filepath.Ext(urlPath) == "") && zddc.IsDeclaredPath(cfg.Root, absPath) { if strings.HasSuffix(urlPath, "/") { handler.ServeDirectory(cfg, appsSrv, w, r) diff --git a/zddc/cmd/zddc-server/main_test.go b/zddc/cmd/zddc-server/main_test.go index 8566ea1..3d1c849 100644 --- a/zddc/cmd/zddc-server/main_test.go +++ b/zddc/cmd/zddc-server/main_test.go @@ -1,9 +1,11 @@ package main import ( + "archive/zip" "context" "crypto/ed25519" "crypto/rand" + "encoding/json" "net/http" "net/http/httptest" "net/url" @@ -375,18 +377,20 @@ func TestDispatchArchiveRedirect(t *testing.T) { } func TestDispatchSlashRouting(t *testing.T) { - // Convention: / → browse (directory view); → the canonical - // default tool for the directory (mdedit under working/, transmittal - // under staging/, archive under archive/, tables under - // archive//mdl/). Without a default app, no-slash falls - // through to the trailing-slash redirect (302). + // Convention: / → browse (directory view, via DirTool which + // defaults to browse); → the directory's default_tool ("the + // specialized app": mdedit under working/, transmittal under + // staging/, archive under archive/, tables under archive//mdl). + // Without a default_tool, no-slash falls through to the trailing- + // slash redirect (302). // - // Exception: a directory that is the rows-dir of a registered table - // (declared via parent .zddc tables:) — including the default-MDL - // fallback at archive//mdl/ — redirects the trailing-slash - // form too, bouncing to /.table.html. Bare folder - // listings here would just be a row-of-yaml-files preview that the - // table view subsumes. + // The only trailing-slash redirect is for a directory that is the + // rows-dir of a table declared via a REAL on-disk parent .zddc + // `tables:` map with an existing *.table.yaml spec — it bounces to + // /.table.html. The default-MDL virtual fallback at + // archive//mdl/ does NOT redirect: the slash form there shows + // the browse listing of the row YAMLs (the no-slash mdl form serves + // the table view). root := t.TempDir() mustWrite(t, filepath.Join(root, ".zddc"), "acl:\n permissions:\n \"*\": rwcda\n") @@ -433,10 +437,11 @@ func TestDispatchSlashRouting(t *testing.T) { {"archive/ no-slash → archive", "/Project/archive/Acme", http.StatusOK, true, ""}, {"archive/ slash → browse", "/Project/archive/Acme/", http.StatusOK, true, ""}, {"archive//mdl no-slash → tables", "/Project/archive/Acme/mdl", http.StatusOK, true, ""}, - // Trailing-slash form on a tables rows-dir bounces to the canonical - // .table.html URL so users land on the table view rather than a - // browse listing of the row-yaml files. - {"archive//mdl slash → 302 in-dir table.html", "/Project/archive/Acme/mdl/", http.StatusFound, false, "/Project/archive/Acme/mdl/table.html"}, + // The default-MDL virtual fallback does NOT redirect the slash + // form — it shows the browse listing of the row YAMLs. (Only a + // real on-disk parent .zddc tables: + *.table.yaml triggers the + // bounce to /.table.html.) + {"archive//mdl slash → browse", "/Project/archive/Acme/mdl/", http.StatusOK, true, ""}, {"archive//incoming no-slash → archive", "/Project/archive/Acme/incoming", http.StatusOK, true, ""}, {"archive//incoming slash → browse", "/Project/archive/Acme/incoming/", http.StatusOK, true, ""}, {"non-canonical no-slash → 302 to slash", "/Project/scratch", http.StatusFound, false, ""}, @@ -685,6 +690,151 @@ func mustMkdir(t *testing.T, path string) { } } +func mustWriteZip(t *testing.T, path string, entries map[string]string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir for zip %s: %v", path, err) + } + f, err := os.Create(path) + if err != nil { + t.Fatalf("create zip %s: %v", path, err) + } + defer f.Close() + zw := zip.NewWriter(f) + for name, body := range entries { + w, err := zw.Create(name) + if err != nil { + t.Fatalf("zip.Create(%q): %v", name, err) + } + if _, err := w.Write([]byte(body)); err != nil { + t.Fatalf("zip write %q: %v", name, err) + } + } + if err := zw.Close(); err != nil { + t.Fatalf("zip close %s: %v", path, err) + } +} + +// TestDispatchZipRouting exercises the .zip-as-virtual-directory +// intercept: <…>.zip/ lists members, <…>.zip/member streams one +// member, bare <…>.zip is still a plain file download, writes into a +// zip are refused, and ACL is inherited from the directory containing +// the zip (a zip has no .zddc of its own — same as .archive). +func TestDispatchZipRouting(t *testing.T) { + root := t.TempDir() + // Only alice@x may read under staging/; bob@x is denied there. + mustWrite(t, filepath.Join(root, ".zddc"), + "acl:\n permissions:\n \"*\": r\n") + mustMkdir(t, filepath.Join(root, "Proj", "staging")) + mustWrite(t, filepath.Join(root, "Proj", "staging", ".zddc"), + "acl:\n inherit: false\n permissions:\n \"alice@x\": rwcda\n") + zipPath := filepath.Join(root, "Proj", "staging", "T.zip") + mustWriteZip(t, zipPath, map[string]string{ + "DOC-001.pdf": "PDFDATA", + "sub/note.txt": "a note", + }) + zipBytes, _ := os.ReadFile(zipPath) + + idx, err := archive.BuildIndex(root) + if err != nil { + t.Fatalf("BuildIndex: %v", err) + } + cfg := config.Config{Root: root, IndexPath: ".archive", EmailHeader: "X-Auth-Request-Email"} + ring := handler.NewLogRing(10) + appsSrv, err := setupApps(cfg) + if err != nil { + t.Fatalf("setupApps: %v", err) + } + + do := func(method, path, email string, hdr map[string]string) *httptest.ResponseRecorder { + req := httptest.NewRequest(method, path, nil) + for k, v := range hdr { + req.Header.Set(k, v) + } + req = req.WithContext(context.WithValue(req.Context(), handler.EmailKey, email)) + rec := httptest.NewRecorder() + dispatch(cfg, idx, ring, appsSrv, nil, rec, req) + return rec + } + + t.Run("listing JSON", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip/", "alice@x", map[string]string{"Accept": "application/json"}) + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + var fis []map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &fis); err != nil { + t.Fatalf("decode listing: %v; body=%s", err, rec.Body.String()) + } + names := map[string]bool{} + for _, fi := range fis { + names[fi["name"].(string)] = fi["is_dir"] == true + } + if d, ok := names["DOC-001.pdf"]; !ok || d { + t.Errorf("expected file DOC-001.pdf; got %v", names) + } + if d, ok := names["sub/"]; !ok || !d { + t.Errorf("expected dir sub/; got %v", names) + } + }) + + t.Run("member extracted", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip/sub/note.txt", "alice@x", nil) + if rec.Code != http.StatusOK || rec.Body.String() != "a note" { + t.Fatalf("status=%d body=%q", rec.Code, rec.Body.String()) + } + if rec.Header().Get("X-ZDDC-Source") != "zip:T.zip" { + t.Errorf("X-ZDDC-Source=%q", rec.Header().Get("X-ZDDC-Source")) + } + }) + + t.Run("bare .zip is a plain download", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip", "alice@x", nil) + if rec.Code != http.StatusOK { + t.Fatalf("status=%d", rec.Code) + } + if rec.Body.Len() != len(zipBytes) { + t.Errorf("bare .zip body len=%d, want %d (raw zip bytes)", rec.Body.Len(), len(zipBytes)) + } + // It must NOT have the zip-virtual-dir source header. + if rec.Header().Get("X-ZDDC-Source") == "zip:T.zip" { + t.Errorf("bare .zip should be served as a file, not the virtual-dir handler") + } + }) + + t.Run("write into zip refused", func(t *testing.T) { + rec := do(http.MethodPut, "/Proj/staging/T.zip/new.txt", "alice@x", nil) + if rec.Code != http.StatusMethodNotAllowed { + t.Errorf("PUT into zip status=%d, want 405", rec.Code) + } + }) + + t.Run("ACL inherited from containing dir — denied", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip/sub/note.txt", "bob@x", nil) + if rec.Code != http.StatusForbidden { + t.Errorf("bob denied under staging/ → zip member status=%d, want 403", rec.Code) + } + rec2 := do(http.MethodGet, "/Proj/staging/T.zip/", "bob@x", map[string]string{"Accept": "application/json"}) + if rec2.Code != http.StatusForbidden { + t.Errorf("bob denied → zip listing status=%d, want 403", rec2.Code) + } + }) + + t.Run("missing member 404", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip/no/such.txt", "alice@x", nil) + if rec.Code != http.StatusNotFound { + t.Errorf("status=%d, want 404", rec.Code) + } + }) + + t.Run("directory member 302 to slash", func(t *testing.T) { + rec := do(http.MethodGet, "/Proj/staging/T.zip/sub", "alice@x", nil) + if rec.Code != http.StatusFound || rec.Header().Get("Location") != "/Proj/staging/T.zip/sub/" { + t.Errorf("status=%d loc=%q", rec.Code, rec.Header().Get("Location")) + } + }) +} + func mustWrite(t *testing.T, path, body string) { t.Helper() if err := os.WriteFile(path, []byte(body), 0o644); err != nil { diff --git a/zddc/internal/handler/ziphandler.go b/zddc/internal/handler/ziphandler.go new file mode 100644 index 0000000..92b097d --- /dev/null +++ b/zddc/internal/handler/ziphandler.go @@ -0,0 +1,146 @@ +package handler + +import ( + "bytes" + "encoding/json" + "io" + "log/slog" + "net/http" + "os" + "path" + "strings" + + "codeberg.org/VARASYS/ZDDC/zddc/internal/apps" + "codeberg.org/VARASYS/ZDDC/zddc/internal/config" + "codeberg.org/VARASYS/ZDDC/zddc/internal/zipfs" +) + +// maxZipMemberBytes caps the size of a single zip member the server +// will materialise in memory to extract. Zip transmittals carry +// documents, not multi-gigabyte blobs; a member declaring more than +// this in its central-directory header is refused (413) rather than +// buffered. Streaming/large-member support is a future optimisation. +const maxZipMemberBytes = 512 << 20 // 512 MiB + +// ServeZip serves the contents of the .zip file at zipAbsPath as a +// virtual directory. memberPath is the slash-separated path inside the +// zip (already URL-decoded; "" = the zip's root level). +// +// Only reached via the dispatcher's splitZipPath intercept, which has +// already (a) confirmed zipAbsPath is a regular .zip file under +// cfg.Root and (b) enforced ACL using the chain of the directory +// CONTAINING the zip — a zip has no .zddc of its own, exactly like the +// .archive virtual path. Write methods are rejected by the dispatcher +// before this is reached; zip access here is read-only. +// +// Routing inside the zip mirrors the server's slash convention: +// - memberPath names a file member → extract + stream it +// - memberPath names a directory level → JSON listing (Accept: json) +// or the browse SPA (HTML) +// - request had no trailing slash but the +// member is a directory level → 302 to add the slash +// - otherwise → 404 +func ServeZip(cfg config.Config, w http.ResponseWriter, r *http.Request, zipAbsPath, memberPath string) { + zr, closeZip, err := zipfs.ResolveCloser(zipAbsPath) + if err != nil { + if os.IsNotExist(err) { + http.Error(w, "Not Found", http.StatusNotFound) + } else { + slog.Warn("open zip archive", "path", zipAbsPath, "err", err) + http.Error(w, "Bad zip archive", http.StatusBadGateway) + } + return + } + defer closeZip() + + member := strings.Trim(memberPath, "/") + zipName := path.Base(zipAbsPath) + + // File member? + if member != "" { + if rc, size, mod, name, ok := zipfs.OpenMember(zr, member); ok { + defer rc.Close() + if size > maxZipMemberBytes { + http.Error(w, "Zip member too large to serve", http.StatusRequestEntityTooLarge) + return + } + // Buffer into memory so http.ServeContent gets a ReadSeeker + // (Range / conditional GET). Bounded by maxZipMemberBytes. + buf, err := io.ReadAll(rc) + if err != nil { + slog.Warn("extract zip member", "zip", zipAbsPath, "member", member, "err", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + return + } + w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate") + w.Header().Set("X-ZDDC-Source", "zip:"+zipName) + http.ServeContent(w, r, name, mod, bytes.NewReader(buf)) + return + } + } + + // Directory level. The dispatcher only routes here when the URL had + // a tail segment or a trailing slash, so a no-slash URL that lands + // on a directory level gets normalised to the slash form. + baseURL := r.URL.Path + if !strings.HasSuffix(baseURL, "/") { + if zipfs.IsDirLevel(zr, member) { + http.Redirect(w, r, baseURL+"/", http.StatusFound) + return + } + http.Error(w, "Not Found", http.StatusNotFound) + return + } + entries, valid := zipfs.List(zr, member, baseURL) + if !valid { + http.Error(w, "Not Found", http.StatusNotFound) + return + } + + // Vary: Accept — same URL serves the JSON listing or the browse + // SPA depending on Accept; without it caches can cross the wires. + w.Header().Set("Vary", "Accept") + + if strings.Contains(r.Header.Get("Accept"), "application/json") { + body, err := json.Marshal(entries) + if err != nil { + slog.Error("encoding zip listing", "err", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + return + } + etag := `"` + listingETag(body) + `"` + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", etag) + w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate") + w.Header().Set("X-ZDDC-Source", "zip:"+zipName) + if match := r.Header.Get("If-None-Match"); match != "" && match == etag { + w.WriteHeader(http.StatusNotModified) + return + } + _, _ = w.Write(body) + return + } + + // HTML: serve the embedded `browse` SPA, exactly like + // ServeDirectory's and ServeArchive's HTML branches. It auto-detects + // server mode by re-fetching this URL with Accept: application/json + // (→ the JSON branch above), then renders the zip's contents. + body := apps.EmbeddedBytes("browse") + if len(body) == 0 { + jsonBody, _ := json.Marshal(entries) + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", "no-cache") + _, _ = w.Write(jsonBody) + return + } + etag := `"` + apps.EmbeddedETag("browse") + `"` + w.Header().Set("ETag", etag) + w.Header().Set("Cache-Control", "public, max-age=0, must-revalidate") + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Header().Set("X-ZDDC-Source", "embedded:browse") + if match := r.Header.Get("If-None-Match"); match != "" && match == etag { + w.WriteHeader(http.StatusNotModified) + return + } + _, _ = w.Write(body) +} diff --git a/zddc/internal/handler/ziphandler_test.go b/zddc/internal/handler/ziphandler_test.go new file mode 100644 index 0000000..560c1a7 --- /dev/null +++ b/zddc/internal/handler/ziphandler_test.go @@ -0,0 +1,226 @@ +package handler + +import ( + "archive/zip" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "codeberg.org/VARASYS/ZDDC/zddc/internal/config" + "codeberg.org/VARASYS/ZDDC/zddc/internal/listing" +) + +func writeTestZip(t *testing.T, path string, entries map[string]string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + f, err := os.Create(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + zw := zip.NewWriter(f) + for name, body := range entries { + w, err := zw.Create(name) + if err != nil { + t.Fatalf("zip.Create(%q): %v", name, err) + } + if body != "" { + if _, err := w.Write([]byte(body)); err != nil { + t.Fatal(err) + } + } + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } +} + +func TestServeZip(t *testing.T) { + root := t.TempDir() + zipPath := filepath.Join(root, "P", "staging", "T.zip") + writeTestZip(t, zipPath, map[string]string{ + "DOC-001 (IFI) - Spec.pdf": "PDF-CONTENT", + "sub/note.txt": "a note", + "sub/deep/x.bin": "\x00\x01\x02", + }) + cfg := config.Config{Root: root} + + t.Run("root listing JSON", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/", nil) + req.Header.Set("Accept", "application/json") + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "") + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + var fis []listing.FileInfo + if err := json.Unmarshal(rec.Body.Bytes(), &fis); err != nil { + t.Fatalf("decode: %v; body=%s", err, rec.Body.String()) + } + byName := map[string]listing.FileInfo{} + for _, fi := range fis { + byName[fi.Name] = fi + } + if fi, ok := byName["DOC-001 (IFI) - Spec.pdf"]; !ok || fi.IsDir { + t.Errorf("expected file entry; got %v", byName) + } + if fi, ok := byName["sub/"]; !ok || !fi.IsDir { + t.Errorf("expected sub/ dir entry; got %v", byName) + } + // URL is relative to the request path and percent-escaped. + if got := byName["DOC-001 (IFI) - Spec.pdf"].URL; got != "/P/staging/T.zip/DOC-001%20%28IFI%29%20-%20Spec.pdf" { + t.Errorf("file URL=%q want escaped form", got) + } + if got := byName["sub/"].URL; got != "/P/staging/T.zip/sub/" { + t.Errorf("dir URL=%q", got) + } + if rec.Header().Get("Vary") != "Accept" { + t.Errorf("missing Vary: Accept") + } + if rec.Header().Get("ETag") == "" { + t.Errorf("missing ETag") + } + }) + + t.Run("nested listing JSON", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub/", nil) + req.Header.Set("Accept", "application/json") + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "sub") + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + var fis []listing.FileInfo + json.Unmarshal(rec.Body.Bytes(), &fis) + byName := map[string]bool{} + for _, fi := range fis { + byName[fi.Name] = fi.IsDir + } + if d, ok := byName["note.txt"]; !ok || d { + t.Errorf("sub/ should contain file note.txt; got %v", byName) + } + if d, ok := byName["deep/"]; !ok || !d { + t.Errorf("sub/ should contain dir deep/; got %v", byName) + } + }) + + t.Run("file member extracted", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub/note.txt", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "sub/note.txt") + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + if rec.Body.String() != "a note" { + t.Errorf("body=%q", rec.Body.String()) + } + if rec.Header().Get("X-ZDDC-Source") != "zip:T.zip" { + t.Errorf("X-ZDDC-Source=%q", rec.Header().Get("X-ZDDC-Source")) + } + // http.ServeContent sets Content-Type from the name (.txt). + if ct := rec.Header().Get("Content-Type"); ct == "" { + t.Errorf("missing Content-Type") + } + }) + + t.Run("file member case-insensitive", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/SUB/NOTE.TXT", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "SUB/NOTE.TXT") + if rec.Code != http.StatusOK || rec.Body.String() != "a note" { + t.Errorf("status=%d body=%q", rec.Code, rec.Body.String()) + } + }) + + t.Run("range request on a member", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/DOC-001%20%28IFI%29%20-%20Spec.pdf", nil) + req.Header.Set("Range", "bytes=0-2") + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "DOC-001 (IFI) - Spec.pdf") + if rec.Code != http.StatusPartialContent { + t.Fatalf("status=%d, want 206; body=%q", rec.Code, rec.Body.String()) + } + if rec.Body.String() != "PDF" { + t.Errorf("partial body=%q, want PDF", rec.Body.String()) + } + }) + + t.Run("missing member 404", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/no/such.txt", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "no/such.txt") + if rec.Code != http.StatusNotFound { + t.Errorf("status=%d, want 404", rec.Code) + } + }) + + t.Run("directory member without trailing slash 302s", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "sub") + if rec.Code != http.StatusFound { + t.Fatalf("status=%d, want 302", rec.Code) + } + if loc := rec.Header().Get("Location"); loc != "/P/staging/T.zip/sub/" { + t.Errorf("Location=%q", loc) + } + }) + + t.Run("bad zip path", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/Nope.zip/", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, filepath.Join(root, "P", "staging", "Nope.zip"), "") + if rec.Code != http.StatusNotFound { + t.Errorf("status=%d, want 404", rec.Code) + } + }) + + t.Run("zip-slip member is unreachable", func(t *testing.T) { + // Build a zip with a malicious entry; the handler must not surface it. + evilZip := filepath.Join(root, "P", "staging", "Evil.zip") + writeTestZip(t, evilZip, map[string]string{"ok.txt": "fine"}) + // Manually append nothing nasty via the safe writer (zip.Writer + // rejects "../" names? no — it allows them). Re-create with one. + f, _ := os.Create(evilZip) + zw := zip.NewWriter(f) + w1, _ := zw.Create("ok.txt") + w1.Write([]byte("fine")) + w2, _ := zw.Create("../escape.txt") + w2.Write([]byte("pwned")) + zw.Close() + f.Close() + + req := httptest.NewRequest(http.MethodGet, "/P/staging/Evil.zip/x", nil) + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, evilZip, "../escape.txt") + if rec.Code != http.StatusNotFound { + t.Errorf("zip-slip member status=%d, want 404", rec.Code) + } + // ...but the safe entry is fine. + req2 := httptest.NewRequest(http.MethodGet, "/P/staging/Evil.zip/ok.txt", nil) + rec2 := httptest.NewRecorder() + ServeZip(cfg, rec2, req2, evilZip, "ok.txt") + if rec2.Code != http.StatusOK || rec2.Body.String() != "fine" { + t.Errorf("safe member status=%d body=%q", rec2.Code, rec2.Body.String()) + } + }) + + t.Run("HTML request serves something usable", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/", nil) + req.Header.Set("Accept", "text/html") + rec := httptest.NewRecorder() + ServeZip(cfg, rec, req, zipPath, "") + if rec.Code != http.StatusOK { + t.Fatalf("status=%d", rec.Code) + } + ct := rec.Header().Get("Content-Type") + if ct != "text/html; charset=utf-8" && ct != "application/json" { + t.Errorf("Content-Type=%q, want html or json fallback", ct) + } + }) +} diff --git a/zddc/internal/zipfs/zipfs.go b/zddc/internal/zipfs/zipfs.go new file mode 100644 index 0000000..aa8a21e --- /dev/null +++ b/zddc/internal/zipfs/zipfs.go @@ -0,0 +1,250 @@ +// Package zipfs presents the contents of a .zip file on disk as a +// browsable virtual directory tree: a JSON listing at any directory +// level inside the zip, and on-demand extraction of a single member. +// +// It does no HTTP and no ACL — that's the caller's job (see +// handler.ServeZip). The model mirrors what the browse tool already +// does client-side with JSZip: the zip's central directory is a flat +// list of member names; directory levels are reconstructed on top, +// synthesising the intermediate directories that have no explicit +// "/" entry of their own. +// +// Member names are sanitised against zip-slip (no "..", no absolute +// paths, no backslashes); unsafe entries are silently dropped. +package zipfs + +import ( + "archive/zip" + "io" + "net/url" + "path" + "sort" + "strings" + "time" + + "codeberg.org/VARASYS/ZDDC/zddc/internal/listing" +) + +// Open opens the zip file at zipAbsPath for reading. The caller must +// Close the returned *zip.ReadCloser. Errors propagate os.Open errors +// (use os.IsNotExist) and zip-format errors. +func Open(zipAbsPath string) (*zip.ReadCloser, error) { + return zip.OpenReader(zipAbsPath) +} + +// cleanMember normalises a raw zip entry name. Returns ("", false) for +// anything unsafe to surface: absolute paths, backslash separators, +// and any path that escapes the zip root via "..". A trailing slash +// (the zip convention for an explicit directory entry) is preserved. +func cleanMember(name string) (string, bool) { + if name == "" || strings.ContainsRune(name, '\\') || strings.HasPrefix(name, "/") { + return "", false + } + isDir := strings.HasSuffix(name, "/") + cl := path.Clean(name) + if cl == "." || cl == ".." || strings.HasPrefix(cl, "../") { + return "", false + } + if isDir && cl != "" { + cl += "/" + } + return cl, true +} + +// normPrefix turns a member-path string ("", "a", "a/b", "a/b/") into +// the canonical "directory level" form: no leading or trailing slash. +func normPrefix(p string) string { + return strings.Trim(p, "/") +} + +// Open locates a *file* member matching memberPath inside r and returns +// a reader for its bytes plus its size and modtime. Matching is +// case-insensitive on the full path (segment-for-segment), mirroring +// the URL case-folding the rest of the server does. ok is false when +// no file member matches (it may still be a directory level — call +// List). +// +// memberPath must be the cleaned, slash-separated path with no leading +// or trailing slash (e.g. "sub/doc.pdf"). +func OpenMember(r *zip.Reader, memberPath string) (rc io.ReadCloser, size int64, mod time.Time, name string, ok bool) { + want := normPrefix(memberPath) + if want == "" { + return nil, 0, time.Time{}, "", false + } + for _, f := range r.File { + cl, good := cleanMember(f.Name) + if !good { + continue + } + if strings.HasSuffix(cl, "/") { + continue // directory entry + } + if strings.EqualFold(cl, want) { + h, err := f.Open() + if err != nil { + return nil, 0, time.Time{}, "", false + } + return h, int64(f.UncompressedSize64), f.Modified, path.Base(cl), true + } + } + return nil, 0, time.Time{}, "", false +} + +// IsDirLevel reports whether prefix names a directory level inside r. +// The zip root ("") always qualifies. Otherwise it's true when there's +// an explicit "/" entry, or any entry sits under "/". +func IsDirLevel(r *zip.Reader, prefix string) bool { + pfx := normPrefix(prefix) + if pfx == "" { + return true + } + under := pfx + "/" + for _, f := range r.File { + cl, ok := cleanMember(f.Name) + if !ok { + continue + } + // An explicit "/" entry confirms the level; so does any + // entry that lives under it. A *file* entry named exactly pfx + // does NOT make pfx a directory. + if strings.HasSuffix(cl, "/") && strings.TrimSuffix(cl, "/") == pfx { + return true + } + if strings.HasPrefix(strings.TrimSuffix(cl, "/"), under) { + return true + } + } + return false +} + +// List returns the immediate children of the directory level named by +// prefix ("" = the zip root) as Caddy-style FileInfo entries, with URL +// = baseURL + escaped-name. Synthesised intermediate directories (no +// explicit "/" entry) are included. valid is false when prefix +// names no directory level in the zip (no entry is at-or-under it). +// +// baseURL should end with "/" — it's the URL prefix for this level +// (the request path, e.g. "/P/staging/Foo.zip/" or +// "/P/staging/Foo.zip/sub/"). +func List(r *zip.Reader, prefix, baseURL string) ([]listing.FileInfo, bool) { + pfx := normPrefix(prefix) + var under string // the "/" string we strip, or "" at root + if pfx != "" { + under = pfx + "/" + } + + type child struct { + isDir bool + size int64 + mod time.Time + mode uint32 + explicit bool // backed by a real zip entry (vs. synthesised) + } + children := map[string]*child{} + sawLevel := pfx == "" // the root level always exists + + for _, f := range r.File { + cl, good := cleanMember(f.Name) + if !good { + continue + } + entryIsDir := strings.HasSuffix(cl, "/") + bare := strings.TrimSuffix(cl, "/") + if pfx != "" { + if bare == pfx { + // Entry is the prefix itself: an explicit "/" dir + // entry confirms the level; a file with that exact name + // is not a directory, so don't count it. + if entryIsDir { + sawLevel = true + } + continue + } + if !strings.HasPrefix(bare, under) { + continue + } + } + sawLevel = true + rest := bare + if pfx != "" { + rest = strings.TrimPrefix(bare, under) + } + if rest == "" { + continue + } + seg := rest + nested := false + if i := strings.IndexByte(rest, '/'); i >= 0 { + seg = rest[:i] + nested = true + } + c := children[seg] + if c == nil { + c = &child{} + children[seg] = c + } + if nested { + // seg is an intermediate directory on the way to a deeper + // member. Synthesised unless an explicit entry upgrades it. + c.isDir = true + } else if entryIsDir { + c.isDir = true + c.explicit = true + c.mod = f.Modified + c.mode = uint32(f.Mode()) + } else { + // Immediate file child. (If a dir and a file share a name — + // malformed zip — the file wins; harmless.) + if !c.explicit || !c.isDir { + c.isDir = false + c.explicit = true + c.size = int64(f.UncompressedSize64) + c.mod = f.Modified + c.mode = uint32(f.Mode()) + } + } + } + + if !sawLevel { + return nil, false + } + + out := make([]listing.FileInfo, 0, len(children)) + for name, c := range children { + fi := listing.FileInfo{ + Name: name, + Size: c.size, + ModTime: c.mod, + Mode: c.mode, + IsDir: c.isDir, + URL: baseURL + url.PathEscape(name), + } + if c.isDir { + fi.Name += "/" + fi.URL += "/" + } + out = append(out, fi) + } + sort.Slice(out, func(i, j int) bool { + if out[i].IsDir != out[j].IsDir { + return out[i].IsDir // directories first + } + return strings.ToLower(out[i].Name) < strings.ToLower(out[j].Name) + }) + return out, true +} + +// ResolveCloser is a tiny convenience for the handler: open the zip, +// returning the reader and a close func, or (nil, nil, err). +func ResolveCloser(zipAbsPath string) (*zip.Reader, func() error, error) { + rc, err := Open(zipAbsPath) + if err != nil { + return nil, nil, err + } + return &rc.Reader, rc.Close, nil +} + +// IsZipName reports whether name has a ".zip" extension (case-insensitive). +func IsZipName(name string) bool { + return strings.EqualFold(path.Ext(name), ".zip") +} diff --git a/zddc/internal/zipfs/zipfs_test.go b/zddc/internal/zipfs/zipfs_test.go new file mode 100644 index 0000000..39fffa0 --- /dev/null +++ b/zddc/internal/zipfs/zipfs_test.go @@ -0,0 +1,264 @@ +package zipfs + +import ( + "archive/zip" + "bytes" + "io" + "testing" +) + +// makeZip builds an in-memory zip. A value of "" creates an +// explicit directory entry (name must end with "/"); anything else is +// the file body. +func makeZip(t *testing.T, entries map[string]string) *zip.Reader { + t.Helper() + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + for name, body := range entries { + w, err := zw.Create(name) + if err != nil { + t.Fatalf("zip.Create(%q): %v", name, err) + } + if body != "" { + if _, err := w.Write([]byte(body)); err != nil { + t.Fatalf("write %q: %v", name, err) + } + } + } + if err := zw.Close(); err != nil { + t.Fatalf("zip.Close: %v", err) + } + zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatalf("zip.NewReader: %v", err) + } + return zr +} + +func TestList(t *testing.T) { + zr := makeZip(t, map[string]string{ + "a/b.txt": "hello", + "c.txt": "world", + "d/": "", // explicit dir + "d/e/f.pdf": "pdfbytes", + "d/g.txt": "g", + }) + + t.Run("root level", func(t *testing.T) { + out, ok := List(zr, "", "/Z/") + if !ok { + t.Fatal("root level should be valid") + } + got := map[string]bool{} // name -> isDir + for _, fi := range out { + got[fi.Name] = fi.IsDir + if fi.IsDir && fi.URL != "/Z/"+stripSlash(fi.Name)+"/" { + t.Errorf("dir %q URL=%q", fi.Name, fi.URL) + } + if !fi.IsDir && fi.URL != "/Z/"+fi.Name { + t.Errorf("file %q URL=%q", fi.Name, fi.URL) + } + } + // Expect: a/ (synthesized dir), c.txt (file), d/ (explicit dir). + if !got["a/"] || !got["d/"] { + t.Errorf("missing dir children; got %v", got) + } + if isDir, ok := got["c.txt"]; !ok || isDir { + t.Errorf("c.txt should be a file child; got %v", got) + } + if len(out) != 3 { + t.Errorf("root children = %d, want 3: %v", len(out), got) + } + // Directories sort before files. + if !out[0].IsDir || !out[1].IsDir || out[2].IsDir { + t.Errorf("sort order wrong: %v", out) + } + }) + + t.Run("nested level with explicit dir entry", func(t *testing.T) { + out, ok := List(zr, "d", "/Z/d/") + if !ok { + t.Fatal("d/ should be valid") + } + got := map[string]bool{} + for _, fi := range out { + got[fi.Name] = fi.IsDir + } + if !got["e/"] { + t.Errorf("d/ should contain synthesized e/; got %v", got) + } + if isDir, ok := got["g.txt"]; !ok || isDir { + t.Errorf("d/ should contain file g.txt; got %v", got) + } + }) + + t.Run("deep level without explicit dir entry", func(t *testing.T) { + out, ok := List(zr, "a", "/Z/a/") + if !ok { + t.Fatal("a/ should be valid (only known via a/b.txt)") + } + if len(out) != 1 || out[0].Name != "b.txt" || out[0].IsDir { + t.Errorf("a/ children = %v, want [b.txt]", out) + } + }) + + t.Run("nonexistent level", func(t *testing.T) { + if _, ok := List(zr, "nope", "/Z/nope/"); ok { + t.Error("nope should not be a valid level") + } + // A file name is not a directory level. + if _, ok := List(zr, "c.txt", "/Z/c.txt/"); ok { + t.Error("c.txt is a file, not a directory level") + } + }) + + t.Run("URL escaping", func(t *testing.T) { + zr2 := makeZip(t, map[string]string{"my doc.pdf": "x", "sub dir/k.txt": "y"}) + out, _ := List(zr2, "", "/Z/") + for _, fi := range out { + if fi.Name == "my doc.pdf" && fi.URL != "/Z/my%20doc.pdf" { + t.Errorf("file URL not escaped: %q", fi.URL) + } + if fi.Name == "sub dir/" && fi.URL != "/Z/sub%20dir/" { + t.Errorf("dir URL not escaped: %q", fi.URL) + } + } + }) +} + +func stripSlash(s string) string { + if len(s) > 0 && s[len(s)-1] == '/' { + return s[:len(s)-1] + } + return s +} + +func TestIsDirLevel(t *testing.T) { + zr := makeZip(t, map[string]string{ + "a/b.txt": "x", + "c.txt": "y", + "d/": "", + "d/e/f.pdf": "z", + }) + cases := map[string]bool{ + "": true, // root + "a": true, // implied (a/b.txt) + "a/": true, // trailing slash tolerated + "d": true, // explicit + "d/e": true, // implied (d/e/f.pdf) + "c.txt": false, // a file, not a level + "nope": false, + "a/b": false, // a/b is a file (a/b.txt is a/b/... no — "a/b.txt", so "a/b" prefix? "a/b.txt" starts with "a/b" but not "a/b/") + } + for prefix, want := range cases { + if got := IsDirLevel(zr, prefix); got != want { + t.Errorf("IsDirLevel(%q) = %v, want %v", prefix, got, want) + } + } +} + +func TestOpenMember(t *testing.T) { + zr := makeZip(t, map[string]string{ + "a/b.txt": "hello world", + "c.txt": "ccc", + "d/e/f.pdf": "pdf-bytes", + "d/": "", + }) + + t.Run("file member", func(t *testing.T) { + rc, size, _, name, ok := OpenMember(zr, "a/b.txt") + if !ok { + t.Fatal("a/b.txt should be found") + } + defer rc.Close() + if name != "b.txt" { + t.Errorf("name=%q, want b.txt", name) + } + if size != int64(len("hello world")) { + t.Errorf("size=%d, want %d", size, len("hello world")) + } + b, _ := io.ReadAll(rc) + if string(b) != "hello world" { + t.Errorf("body=%q", b) + } + }) + + t.Run("case-insensitive", func(t *testing.T) { + rc, _, _, _, ok := OpenMember(zr, "D/E/F.PDF") + if !ok { + t.Fatal("D/E/F.PDF should match d/e/f.pdf") + } + rc.Close() + }) + + t.Run("directory entry is not a member", func(t *testing.T) { + if _, _, _, _, ok := OpenMember(zr, "d"); ok { + t.Error("d/ is a directory, not a file member") + } + if _, _, _, _, ok := OpenMember(zr, "a"); ok { + t.Error("a is a directory level, not a file member") + } + }) + + t.Run("missing", func(t *testing.T) { + if _, _, _, _, ok := OpenMember(zr, "no/such.txt"); ok { + t.Error("missing member reported as found") + } + if _, _, _, _, ok := OpenMember(zr, ""); ok { + t.Error("empty member should not match") + } + }) +} + +func TestCleanMemberRejectsZipSlip(t *testing.T) { + bad := []string{ + "../evil.txt", + "a/../../evil.txt", + "/abs/evil.txt", + "a\\b.txt", + "..", + "", + } + for _, n := range bad { + if _, ok := cleanMember(n); ok { + t.Errorf("cleanMember(%q) should be rejected", n) + } + } + good := map[string]string{ + "a/b.txt": "a/b.txt", + "a/./b.txt": "a/b.txt", + "dir/": "dir/", + "x.txt": "x.txt", + } + for in, want := range good { + got, ok := cleanMember(in) + if !ok || got != want { + t.Errorf("cleanMember(%q) = (%q, %v), want (%q, true)", in, got, ok, want) + } + } +} + +func TestListIgnoresUnsafeEntries(t *testing.T) { + // A zip whose central directory carries a malicious "../" entry + // must not surface it. + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + for _, n := range []string{"good.txt", "../escape.txt", "sub/ok.txt"} { + w, _ := zw.Create(n) + w.Write([]byte("x")) + } + zw.Close() + zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + out, _ := List(zr, "", "/Z/") + for _, fi := range out { + if fi.Name == "escape.txt" || fi.Name == "../escape.txt" { + t.Errorf("unsafe entry surfaced: %q", fi.Name) + } + } + if _, _, _, _, ok := OpenMember(zr, "../escape.txt"); ok { + t.Error("unsafe member openable") + } +}