feat(zddc): serve a .zip as a virtual directory (zipfs + dispatch intercept)

zddc-server can now browse into a .zip file without the client
downloading the whole archive:
  - GET …/Foo.zip/                → JSON listing of the zip's members
                                     (Accept: application/json), or the
                                     browse SPA (HTML) — same content
                                     negotiation as ServeDirectory/.archive
  - GET …/Foo.zip/sub/doc.pdf     → extracts and streams that one member
                                     (Range / ETag / conditional GET via
                                     http.ServeContent)
  - GET …/Foo.zip                 → unchanged: the raw .zip download
  - PUT/DELETE/POST …/Foo.zip/…   → 405 (zip access is read-only)

New internal/zipfs package reconstructs directory levels from the zip's
flat central directory (synthesising intermediate dirs with no explicit
"<dir>/" entry, mirroring what browse does client-side with JSZip) and
drops zip-slip-unsafe entries ("..", absolute, backslash). New
handler.ServeZip wraps it. The dispatcher gets splitZipPath + an
intercept placed before the file-API branch (so a write to a path under
a .zip is refused, not silently mkdir'd); ACL is the chain of the
directory CONTAINING the zip — a zip carries no .zddc of its own, same
as the .archive virtual surface. The os.Stat-per-segment walk is gated
by a cheap ".zip/" substring check so ordinary requests are unaffected.

Also fixes two pre-existing dispatch-test failures uncovered along the
way: a non-existent top-level "*.html" URL was 302'ing to its slash
form (because the bare "*" project glob makes every first-level segment
"declared") — the cascade-declared no-slash block now requires a
directory-shaped URL (trailing slash, or no file extension); and the
stale TestDispatchSlashRouting expectation that archive/<party>/mdl/
302s to mdl/table.html was updated to match the intended behaviour
(the default-MDL virtual fallback shows the browse listing there; only
a real on-disk tables: + *.table.yaml triggers the bounce).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-05-12 12:17:47 -05:00
parent bb5e059477
commit 5e4d4fefb3
6 changed files with 1146 additions and 15 deletions

View file

@ -623,6 +623,57 @@ func serveSpecializedNoSlash(cfg config.Config, appsSrv *apps.Server, w http.Res
return false
}
// splitZipPath detects a "<…>.zip/<member>" URL: a path where some
// ancestor segment resolves to a regular .zip file on disk and there's
// a tail segment after it (or a trailing slash). On a match it returns
// the zip's absolute filesystem path and the slash-separated member
// path inside the zip ("" when the URL is "<…>.zip/" with nothing
// after). ok is false for everything else — including "<…>.zip" with
// no trailing slash (that's a plain file download, handled downstream).
//
// Segments are stat'd one at a time against cfg.Root; case-folding has
// already been applied to on-disk segments by appfs.ResolveCanonical
// upstream, so the .zip segment matches by exact name here. The
// per-segment os.Stat walk is gated by a cheap ".zip/" substring check
// at the call site, so it never runs for ordinary requests.
func splitZipPath(fsRoot, urlPath string) (zipAbs, member string, ok bool) {
trimmed := strings.Trim(urlPath, "/")
if trimmed == "" {
return "", "", false
}
segs := strings.Split(trimmed, "/")
cur := fsRoot
for i, seg := range segs {
cur = filepath.Join(cur, seg)
if cur != fsRoot && !strings.HasPrefix(cur, fsRoot+string(filepath.Separator)) {
return "", "", false
}
info, err := os.Stat(cur)
if err != nil {
return "", "", false // a segment doesn't exist on disk — not a zip path
}
if info.IsDir() {
continue
}
// cur is a non-directory. Only a regular .zip file with a tail
// (or trailing slash) is "browse into the zip"; anything else
// falls through to the normal file path.
if !info.Mode().IsRegular() || !strings.EqualFold(filepath.Ext(seg), ".zip") {
return "", "", false
}
if i < len(segs)-1 {
return cur, strings.Join(segs[i+1:], "/"), true
}
// Last segment is the .zip itself: only a trailing slash means
// "browse into it" (member == root); a bare "<…>.zip" is a file.
if strings.HasSuffix(urlPath, "/") {
return cur, "", true
}
return "", "", false
}
return "", "", false
}
// dispatch routes a request to the appropriate handler.
func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, appsSrv *apps.Server, tokens *auth.Store, w http.ResponseWriter, r *http.Request) {
// URL paths are case-insensitive: resolve each segment against the
@ -824,6 +875,42 @@ func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, apps
return
}
// Zip-as-directory intercept: a "<…>.zip/<member>" URL is a virtual
// surface over a real .zip file on disk — GET "<…>.zip/" lists the
// members, GET "<…>.zip/member.pdf" extracts and streams that one
// member, so a client never has to download the whole archive. The
// bare "<…>.zip" (no trailing slash) is NOT matched here and falls
// through to the normal file path (a plain download). Like .archive,
// a zip carries no .zddc of its own — ACL is the chain of the
// directory CONTAINING the zip. Read-only: write methods are
// rejected before ServeFileAPI could try to create a path under a
// file. (The os.Stat walk in splitZipPath is gated by this cheap
// substring check, so it doesn't run for ordinary requests.)
if strings.Contains(strings.ToLower(urlPath), ".zip/") {
if zipAbs, member, ok := splitZipPath(cfg.Root, urlPath); ok {
if handler.IsWriteMethod(r.Method) {
w.Header().Set("Allow", "GET, HEAD")
http.Error(w, "Zip archives are read-only", http.StatusMethodNotAllowed)
return
}
if r.Method != http.MethodGet && r.Method != http.MethodHead {
w.Header().Set("Allow", "GET, HEAD")
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return
}
chain, err := zddc.EffectivePolicy(cfg.Root, filepath.Dir(zipAbs))
if err != nil {
slog.Warn("ACL policy error on zip parent", "path", filepath.Dir(zipAbs), "err", err)
}
if allowed, _ := policy.AllowFromChain(r.Context(), handler.DeciderFromContext(r), chain, email, urlPath); !allowed {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
handler.ServeZip(cfg, w, r, zipAbs, member)
return
}
}
// File API — authenticated CRUD over the served tree. Catches PUT,
// DELETE, and POST on any non-reserved path. Read methods (GET/HEAD)
// fall through to the static / apps / directory pipeline below.
@ -951,7 +1038,15 @@ func dispatch(cfg config.Config, idx *archive.Index, ring *handler.LogRing, apps
// - slash → ServeDirectory (DirTool; browse by default)
// - no-slash → default_tool ("specialized app") if any,
// else a 302 to the slash form.
//
// Guard: only directory-shaped URLs qualify. The bare "*"
// project glob matches *any* first-level segment — including
// "foo.html", "foo.txt", etc. — so without the extension
// check a non-existent top-level file would 302-to-slash
// instead of 404. A trailing slash, or no file extension on
// the last segment, means "asking for a directory".
if (r.Method == http.MethodGet || r.Method == http.MethodHead) &&
(strings.HasSuffix(urlPath, "/") || filepath.Ext(urlPath) == "") &&
zddc.IsDeclaredPath(cfg.Root, absPath) {
if strings.HasSuffix(urlPath, "/") {
handler.ServeDirectory(cfg, appsSrv, w, r)

View file

@ -1,9 +1,11 @@
package main
import (
"archive/zip"
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/json"
"net/http"
"net/http/httptest"
"net/url"
@ -375,18 +377,20 @@ func TestDispatchArchiveRedirect(t *testing.T) {
}
func TestDispatchSlashRouting(t *testing.T) {
// Convention: <dir>/ → browse (directory view); <dir> → the canonical
// default tool for the directory (mdedit under working/, transmittal
// under staging/, archive under archive/, tables under
// archive/<party>/mdl/). Without a default app, no-slash falls
// through to the trailing-slash redirect (302).
// Convention: <dir>/ → browse (directory view, via DirTool which
// defaults to browse); <dir> → the directory's default_tool ("the
// specialized app": mdedit under working/, transmittal under
// staging/, archive under archive/, tables under archive/<party>/mdl).
// Without a default_tool, no-slash falls through to the trailing-
// slash redirect (302).
//
// Exception: a directory that is the rows-dir of a registered table
// (declared via parent .zddc tables:) — including the default-MDL
// fallback at archive/<party>/mdl/ — redirects the trailing-slash
// form too, bouncing to <parent>/<name>.table.html. Bare folder
// listings here would just be a row-of-yaml-files preview that the
// table view subsumes.
// The only trailing-slash redirect is for a directory that is the
// rows-dir of a table declared via a REAL on-disk parent .zddc
// `tables:` map with an existing *.table.yaml spec — it bounces to
// <parent>/<name>.table.html. The default-MDL virtual fallback at
// archive/<party>/mdl/ does NOT redirect: the slash form there shows
// the browse listing of the row YAMLs (the no-slash mdl form serves
// the table view).
root := t.TempDir()
mustWrite(t, filepath.Join(root, ".zddc"),
"acl:\n permissions:\n \"*\": rwcda\n")
@ -433,10 +437,11 @@ func TestDispatchSlashRouting(t *testing.T) {
{"archive/<party> no-slash → archive", "/Project/archive/Acme", http.StatusOK, true, ""},
{"archive/<party> slash → browse", "/Project/archive/Acme/", http.StatusOK, true, ""},
{"archive/<party>/mdl no-slash → tables", "/Project/archive/Acme/mdl", http.StatusOK, true, ""},
// Trailing-slash form on a tables rows-dir bounces to the canonical
// .table.html URL so users land on the table view rather than a
// browse listing of the row-yaml files.
{"archive/<party>/mdl slash → 302 in-dir table.html", "/Project/archive/Acme/mdl/", http.StatusFound, false, "/Project/archive/Acme/mdl/table.html"},
// The default-MDL virtual fallback does NOT redirect the slash
// form — it shows the browse listing of the row YAMLs. (Only a
// real on-disk parent .zddc tables: + *.table.yaml triggers the
// bounce to <parent>/<name>.table.html.)
{"archive/<party>/mdl slash → browse", "/Project/archive/Acme/mdl/", http.StatusOK, true, ""},
{"archive/<party>/incoming no-slash → archive", "/Project/archive/Acme/incoming", http.StatusOK, true, ""},
{"archive/<party>/incoming slash → browse", "/Project/archive/Acme/incoming/", http.StatusOK, true, ""},
{"non-canonical no-slash → 302 to slash", "/Project/scratch", http.StatusFound, false, ""},
@ -685,6 +690,151 @@ func mustMkdir(t *testing.T, path string) {
}
}
func mustWriteZip(t *testing.T, path string, entries map[string]string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir for zip %s: %v", path, err)
}
f, err := os.Create(path)
if err != nil {
t.Fatalf("create zip %s: %v", path, err)
}
defer f.Close()
zw := zip.NewWriter(f)
for name, body := range entries {
w, err := zw.Create(name)
if err != nil {
t.Fatalf("zip.Create(%q): %v", name, err)
}
if _, err := w.Write([]byte(body)); err != nil {
t.Fatalf("zip write %q: %v", name, err)
}
}
if err := zw.Close(); err != nil {
t.Fatalf("zip close %s: %v", path, err)
}
}
// TestDispatchZipRouting exercises the .zip-as-virtual-directory
// intercept: <…>.zip/ lists members, <…>.zip/member streams one
// member, bare <…>.zip is still a plain file download, writes into a
// zip are refused, and ACL is inherited from the directory containing
// the zip (a zip has no .zddc of its own — same as .archive).
func TestDispatchZipRouting(t *testing.T) {
root := t.TempDir()
// Only alice@x may read under staging/; bob@x is denied there.
mustWrite(t, filepath.Join(root, ".zddc"),
"acl:\n permissions:\n \"*\": r\n")
mustMkdir(t, filepath.Join(root, "Proj", "staging"))
mustWrite(t, filepath.Join(root, "Proj", "staging", ".zddc"),
"acl:\n inherit: false\n permissions:\n \"alice@x\": rwcda\n")
zipPath := filepath.Join(root, "Proj", "staging", "T.zip")
mustWriteZip(t, zipPath, map[string]string{
"DOC-001.pdf": "PDFDATA",
"sub/note.txt": "a note",
})
zipBytes, _ := os.ReadFile(zipPath)
idx, err := archive.BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
cfg := config.Config{Root: root, IndexPath: ".archive", EmailHeader: "X-Auth-Request-Email"}
ring := handler.NewLogRing(10)
appsSrv, err := setupApps(cfg)
if err != nil {
t.Fatalf("setupApps: %v", err)
}
do := func(method, path, email string, hdr map[string]string) *httptest.ResponseRecorder {
req := httptest.NewRequest(method, path, nil)
for k, v := range hdr {
req.Header.Set(k, v)
}
req = req.WithContext(context.WithValue(req.Context(), handler.EmailKey, email))
rec := httptest.NewRecorder()
dispatch(cfg, idx, ring, appsSrv, nil, rec, req)
return rec
}
t.Run("listing JSON", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip/", "alice@x", map[string]string{"Accept": "application/json"})
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
var fis []map[string]any
if err := json.Unmarshal(rec.Body.Bytes(), &fis); err != nil {
t.Fatalf("decode listing: %v; body=%s", err, rec.Body.String())
}
names := map[string]bool{}
for _, fi := range fis {
names[fi["name"].(string)] = fi["is_dir"] == true
}
if d, ok := names["DOC-001.pdf"]; !ok || d {
t.Errorf("expected file DOC-001.pdf; got %v", names)
}
if d, ok := names["sub/"]; !ok || !d {
t.Errorf("expected dir sub/; got %v", names)
}
})
t.Run("member extracted", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip/sub/note.txt", "alice@x", nil)
if rec.Code != http.StatusOK || rec.Body.String() != "a note" {
t.Fatalf("status=%d body=%q", rec.Code, rec.Body.String())
}
if rec.Header().Get("X-ZDDC-Source") != "zip:T.zip" {
t.Errorf("X-ZDDC-Source=%q", rec.Header().Get("X-ZDDC-Source"))
}
})
t.Run("bare .zip is a plain download", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip", "alice@x", nil)
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
if rec.Body.Len() != len(zipBytes) {
t.Errorf("bare .zip body len=%d, want %d (raw zip bytes)", rec.Body.Len(), len(zipBytes))
}
// It must NOT have the zip-virtual-dir source header.
if rec.Header().Get("X-ZDDC-Source") == "zip:T.zip" {
t.Errorf("bare .zip should be served as a file, not the virtual-dir handler")
}
})
t.Run("write into zip refused", func(t *testing.T) {
rec := do(http.MethodPut, "/Proj/staging/T.zip/new.txt", "alice@x", nil)
if rec.Code != http.StatusMethodNotAllowed {
t.Errorf("PUT into zip status=%d, want 405", rec.Code)
}
})
t.Run("ACL inherited from containing dir — denied", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip/sub/note.txt", "bob@x", nil)
if rec.Code != http.StatusForbidden {
t.Errorf("bob denied under staging/ → zip member status=%d, want 403", rec.Code)
}
rec2 := do(http.MethodGet, "/Proj/staging/T.zip/", "bob@x", map[string]string{"Accept": "application/json"})
if rec2.Code != http.StatusForbidden {
t.Errorf("bob denied → zip listing status=%d, want 403", rec2.Code)
}
})
t.Run("missing member 404", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip/no/such.txt", "alice@x", nil)
if rec.Code != http.StatusNotFound {
t.Errorf("status=%d, want 404", rec.Code)
}
})
t.Run("directory member 302 to slash", func(t *testing.T) {
rec := do(http.MethodGet, "/Proj/staging/T.zip/sub", "alice@x", nil)
if rec.Code != http.StatusFound || rec.Header().Get("Location") != "/Proj/staging/T.zip/sub/" {
t.Errorf("status=%d loc=%q", rec.Code, rec.Header().Get("Location"))
}
})
}
func mustWrite(t *testing.T, path, body string) {
t.Helper()
if err := os.WriteFile(path, []byte(body), 0o644); err != nil {

View file

@ -0,0 +1,146 @@
package handler
import (
"bytes"
"encoding/json"
"io"
"log/slog"
"net/http"
"os"
"path"
"strings"
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zipfs"
)
// maxZipMemberBytes caps the size of a single zip member the server
// will materialise in memory to extract. Zip transmittals carry
// documents, not multi-gigabyte blobs; a member declaring more than
// this in its central-directory header is refused (413) rather than
// buffered. Streaming/large-member support is a future optimisation.
const maxZipMemberBytes = 512 << 20 // 512 MiB
// ServeZip serves the contents of the .zip file at zipAbsPath as a
// virtual directory. memberPath is the slash-separated path inside the
// zip (already URL-decoded; "" = the zip's root level).
//
// Only reached via the dispatcher's splitZipPath intercept, which has
// already (a) confirmed zipAbsPath is a regular .zip file under
// cfg.Root and (b) enforced ACL using the chain of the directory
// CONTAINING the zip — a zip has no .zddc of its own, exactly like the
// .archive virtual path. Write methods are rejected by the dispatcher
// before this is reached; zip access here is read-only.
//
// Routing inside the zip mirrors the server's slash convention:
// - memberPath names a file member → extract + stream it
// - memberPath names a directory level → JSON listing (Accept: json)
// or the browse SPA (HTML)
// - request had no trailing slash but the
// member is a directory level → 302 to add the slash
// - otherwise → 404
func ServeZip(cfg config.Config, w http.ResponseWriter, r *http.Request, zipAbsPath, memberPath string) {
zr, closeZip, err := zipfs.ResolveCloser(zipAbsPath)
if err != nil {
if os.IsNotExist(err) {
http.Error(w, "Not Found", http.StatusNotFound)
} else {
slog.Warn("open zip archive", "path", zipAbsPath, "err", err)
http.Error(w, "Bad zip archive", http.StatusBadGateway)
}
return
}
defer closeZip()
member := strings.Trim(memberPath, "/")
zipName := path.Base(zipAbsPath)
// File member?
if member != "" {
if rc, size, mod, name, ok := zipfs.OpenMember(zr, member); ok {
defer rc.Close()
if size > maxZipMemberBytes {
http.Error(w, "Zip member too large to serve", http.StatusRequestEntityTooLarge)
return
}
// Buffer into memory so http.ServeContent gets a ReadSeeker
// (Range / conditional GET). Bounded by maxZipMemberBytes.
buf, err := io.ReadAll(rc)
if err != nil {
slog.Warn("extract zip member", "zip", zipAbsPath, "member", member, "err", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
http.ServeContent(w, r, name, mod, bytes.NewReader(buf))
return
}
}
// Directory level. The dispatcher only routes here when the URL had
// a tail segment or a trailing slash, so a no-slash URL that lands
// on a directory level gets normalised to the slash form.
baseURL := r.URL.Path
if !strings.HasSuffix(baseURL, "/") {
if zipfs.IsDirLevel(zr, member) {
http.Redirect(w, r, baseURL+"/", http.StatusFound)
return
}
http.Error(w, "Not Found", http.StatusNotFound)
return
}
entries, valid := zipfs.List(zr, member, baseURL)
if !valid {
http.Error(w, "Not Found", http.StatusNotFound)
return
}
// Vary: Accept — same URL serves the JSON listing or the browse
// SPA depending on Accept; without it caches can cross the wires.
w.Header().Set("Vary", "Accept")
if strings.Contains(r.Header.Get("Accept"), "application/json") {
body, err := json.Marshal(entries)
if err != nil {
slog.Error("encoding zip listing", "err", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
etag := `"` + listingETag(body) + `"`
w.Header().Set("Content-Type", "application/json")
w.Header().Set("ETag", etag)
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
w.WriteHeader(http.StatusNotModified)
return
}
_, _ = w.Write(body)
return
}
// HTML: serve the embedded `browse` SPA, exactly like
// ServeDirectory's and ServeArchive's HTML branches. It auto-detects
// server mode by re-fetching this URL with Accept: application/json
// (→ the JSON branch above), then renders the zip's contents.
body := apps.EmbeddedBytes("browse")
if len(body) == 0 {
jsonBody, _ := json.Marshal(entries)
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Cache-Control", "no-cache")
_, _ = w.Write(jsonBody)
return
}
etag := `"` + apps.EmbeddedETag("browse") + `"`
w.Header().Set("ETag", etag)
w.Header().Set("Cache-Control", "public, max-age=0, must-revalidate")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.Header().Set("X-ZDDC-Source", "embedded:browse")
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
w.WriteHeader(http.StatusNotModified)
return
}
_, _ = w.Write(body)
}

View file

@ -0,0 +1,226 @@
package handler
import (
"archive/zip"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/listing"
)
func writeTestZip(t *testing.T, path string, entries map[string]string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatal(err)
}
f, err := os.Create(path)
if err != nil {
t.Fatal(err)
}
defer f.Close()
zw := zip.NewWriter(f)
for name, body := range entries {
w, err := zw.Create(name)
if err != nil {
t.Fatalf("zip.Create(%q): %v", name, err)
}
if body != "<dir>" {
if _, err := w.Write([]byte(body)); err != nil {
t.Fatal(err)
}
}
}
if err := zw.Close(); err != nil {
t.Fatal(err)
}
}
func TestServeZip(t *testing.T) {
root := t.TempDir()
zipPath := filepath.Join(root, "P", "staging", "T.zip")
writeTestZip(t, zipPath, map[string]string{
"DOC-001 (IFI) - Spec.pdf": "PDF-CONTENT",
"sub/note.txt": "a note",
"sub/deep/x.bin": "\x00\x01\x02",
})
cfg := config.Config{Root: root}
t.Run("root listing JSON", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/", nil)
req.Header.Set("Accept", "application/json")
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "")
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
var fis []listing.FileInfo
if err := json.Unmarshal(rec.Body.Bytes(), &fis); err != nil {
t.Fatalf("decode: %v; body=%s", err, rec.Body.String())
}
byName := map[string]listing.FileInfo{}
for _, fi := range fis {
byName[fi.Name] = fi
}
if fi, ok := byName["DOC-001 (IFI) - Spec.pdf"]; !ok || fi.IsDir {
t.Errorf("expected file entry; got %v", byName)
}
if fi, ok := byName["sub/"]; !ok || !fi.IsDir {
t.Errorf("expected sub/ dir entry; got %v", byName)
}
// URL is relative to the request path and percent-escaped.
if got := byName["DOC-001 (IFI) - Spec.pdf"].URL; got != "/P/staging/T.zip/DOC-001%20%28IFI%29%20-%20Spec.pdf" {
t.Errorf("file URL=%q want escaped form", got)
}
if got := byName["sub/"].URL; got != "/P/staging/T.zip/sub/" {
t.Errorf("dir URL=%q", got)
}
if rec.Header().Get("Vary") != "Accept" {
t.Errorf("missing Vary: Accept")
}
if rec.Header().Get("ETag") == "" {
t.Errorf("missing ETag")
}
})
t.Run("nested listing JSON", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub/", nil)
req.Header.Set("Accept", "application/json")
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "sub")
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
var fis []listing.FileInfo
json.Unmarshal(rec.Body.Bytes(), &fis)
byName := map[string]bool{}
for _, fi := range fis {
byName[fi.Name] = fi.IsDir
}
if d, ok := byName["note.txt"]; !ok || d {
t.Errorf("sub/ should contain file note.txt; got %v", byName)
}
if d, ok := byName["deep/"]; !ok || !d {
t.Errorf("sub/ should contain dir deep/; got %v", byName)
}
})
t.Run("file member extracted", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub/note.txt", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "sub/note.txt")
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
if rec.Body.String() != "a note" {
t.Errorf("body=%q", rec.Body.String())
}
if rec.Header().Get("X-ZDDC-Source") != "zip:T.zip" {
t.Errorf("X-ZDDC-Source=%q", rec.Header().Get("X-ZDDC-Source"))
}
// http.ServeContent sets Content-Type from the name (.txt).
if ct := rec.Header().Get("Content-Type"); ct == "" {
t.Errorf("missing Content-Type")
}
})
t.Run("file member case-insensitive", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/SUB/NOTE.TXT", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "SUB/NOTE.TXT")
if rec.Code != http.StatusOK || rec.Body.String() != "a note" {
t.Errorf("status=%d body=%q", rec.Code, rec.Body.String())
}
})
t.Run("range request on a member", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/DOC-001%20%28IFI%29%20-%20Spec.pdf", nil)
req.Header.Set("Range", "bytes=0-2")
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "DOC-001 (IFI) - Spec.pdf")
if rec.Code != http.StatusPartialContent {
t.Fatalf("status=%d, want 206; body=%q", rec.Code, rec.Body.String())
}
if rec.Body.String() != "PDF" {
t.Errorf("partial body=%q, want PDF", rec.Body.String())
}
})
t.Run("missing member 404", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/no/such.txt", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "no/such.txt")
if rec.Code != http.StatusNotFound {
t.Errorf("status=%d, want 404", rec.Code)
}
})
t.Run("directory member without trailing slash 302s", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/sub", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "sub")
if rec.Code != http.StatusFound {
t.Fatalf("status=%d, want 302", rec.Code)
}
if loc := rec.Header().Get("Location"); loc != "/P/staging/T.zip/sub/" {
t.Errorf("Location=%q", loc)
}
})
t.Run("bad zip path", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/Nope.zip/", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, filepath.Join(root, "P", "staging", "Nope.zip"), "")
if rec.Code != http.StatusNotFound {
t.Errorf("status=%d, want 404", rec.Code)
}
})
t.Run("zip-slip member is unreachable", func(t *testing.T) {
// Build a zip with a malicious entry; the handler must not surface it.
evilZip := filepath.Join(root, "P", "staging", "Evil.zip")
writeTestZip(t, evilZip, map[string]string{"ok.txt": "fine"})
// Manually append nothing nasty via the safe writer (zip.Writer
// rejects "../" names? no — it allows them). Re-create with one.
f, _ := os.Create(evilZip)
zw := zip.NewWriter(f)
w1, _ := zw.Create("ok.txt")
w1.Write([]byte("fine"))
w2, _ := zw.Create("../escape.txt")
w2.Write([]byte("pwned"))
zw.Close()
f.Close()
req := httptest.NewRequest(http.MethodGet, "/P/staging/Evil.zip/x", nil)
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, evilZip, "../escape.txt")
if rec.Code != http.StatusNotFound {
t.Errorf("zip-slip member status=%d, want 404", rec.Code)
}
// ...but the safe entry is fine.
req2 := httptest.NewRequest(http.MethodGet, "/P/staging/Evil.zip/ok.txt", nil)
rec2 := httptest.NewRecorder()
ServeZip(cfg, rec2, req2, evilZip, "ok.txt")
if rec2.Code != http.StatusOK || rec2.Body.String() != "fine" {
t.Errorf("safe member status=%d body=%q", rec2.Code, rec2.Body.String())
}
})
t.Run("HTML request serves something usable", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/P/staging/T.zip/", nil)
req.Header.Set("Accept", "text/html")
rec := httptest.NewRecorder()
ServeZip(cfg, rec, req, zipPath, "")
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
ct := rec.Header().Get("Content-Type")
if ct != "text/html; charset=utf-8" && ct != "application/json" {
t.Errorf("Content-Type=%q, want html or json fallback", ct)
}
})
}

View file

@ -0,0 +1,250 @@
// Package zipfs presents the contents of a .zip file on disk as a
// browsable virtual directory tree: a JSON listing at any directory
// level inside the zip, and on-demand extraction of a single member.
//
// It does no HTTP and no ACL — that's the caller's job (see
// handler.ServeZip). The model mirrors what the browse tool already
// does client-side with JSZip: the zip's central directory is a flat
// list of member names; directory levels are reconstructed on top,
// synthesising the intermediate directories that have no explicit
// "<dir>/" entry of their own.
//
// Member names are sanitised against zip-slip (no "..", no absolute
// paths, no backslashes); unsafe entries are silently dropped.
package zipfs
import (
"archive/zip"
"io"
"net/url"
"path"
"sort"
"strings"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/listing"
)
// Open opens the zip file at zipAbsPath for reading. The caller must
// Close the returned *zip.ReadCloser. Errors propagate os.Open errors
// (use os.IsNotExist) and zip-format errors.
func Open(zipAbsPath string) (*zip.ReadCloser, error) {
return zip.OpenReader(zipAbsPath)
}
// cleanMember normalises a raw zip entry name. Returns ("", false) for
// anything unsafe to surface: absolute paths, backslash separators,
// and any path that escapes the zip root via "..". A trailing slash
// (the zip convention for an explicit directory entry) is preserved.
func cleanMember(name string) (string, bool) {
if name == "" || strings.ContainsRune(name, '\\') || strings.HasPrefix(name, "/") {
return "", false
}
isDir := strings.HasSuffix(name, "/")
cl := path.Clean(name)
if cl == "." || cl == ".." || strings.HasPrefix(cl, "../") {
return "", false
}
if isDir && cl != "" {
cl += "/"
}
return cl, true
}
// normPrefix turns a member-path string ("", "a", "a/b", "a/b/") into
// the canonical "directory level" form: no leading or trailing slash.
func normPrefix(p string) string {
return strings.Trim(p, "/")
}
// Open locates a *file* member matching memberPath inside r and returns
// a reader for its bytes plus its size and modtime. Matching is
// case-insensitive on the full path (segment-for-segment), mirroring
// the URL case-folding the rest of the server does. ok is false when
// no file member matches (it may still be a directory level — call
// List).
//
// memberPath must be the cleaned, slash-separated path with no leading
// or trailing slash (e.g. "sub/doc.pdf").
func OpenMember(r *zip.Reader, memberPath string) (rc io.ReadCloser, size int64, mod time.Time, name string, ok bool) {
want := normPrefix(memberPath)
if want == "" {
return nil, 0, time.Time{}, "", false
}
for _, f := range r.File {
cl, good := cleanMember(f.Name)
if !good {
continue
}
if strings.HasSuffix(cl, "/") {
continue // directory entry
}
if strings.EqualFold(cl, want) {
h, err := f.Open()
if err != nil {
return nil, 0, time.Time{}, "", false
}
return h, int64(f.UncompressedSize64), f.Modified, path.Base(cl), true
}
}
return nil, 0, time.Time{}, "", false
}
// IsDirLevel reports whether prefix names a directory level inside r.
// The zip root ("") always qualifies. Otherwise it's true when there's
// an explicit "<prefix>/" entry, or any entry sits under "<prefix>/".
func IsDirLevel(r *zip.Reader, prefix string) bool {
pfx := normPrefix(prefix)
if pfx == "" {
return true
}
under := pfx + "/"
for _, f := range r.File {
cl, ok := cleanMember(f.Name)
if !ok {
continue
}
// An explicit "<pfx>/" entry confirms the level; so does any
// entry that lives under it. A *file* entry named exactly pfx
// does NOT make pfx a directory.
if strings.HasSuffix(cl, "/") && strings.TrimSuffix(cl, "/") == pfx {
return true
}
if strings.HasPrefix(strings.TrimSuffix(cl, "/"), under) {
return true
}
}
return false
}
// List returns the immediate children of the directory level named by
// prefix ("" = the zip root) as Caddy-style FileInfo entries, with URL
// = baseURL + escaped-name. Synthesised intermediate directories (no
// explicit "<dir>/" entry) are included. valid is false when prefix
// names no directory level in the zip (no entry is at-or-under it).
//
// baseURL should end with "/" — it's the URL prefix for this level
// (the request path, e.g. "/P/staging/Foo.zip/" or
// "/P/staging/Foo.zip/sub/").
func List(r *zip.Reader, prefix, baseURL string) ([]listing.FileInfo, bool) {
pfx := normPrefix(prefix)
var under string // the "<pfx>/" string we strip, or "" at root
if pfx != "" {
under = pfx + "/"
}
type child struct {
isDir bool
size int64
mod time.Time
mode uint32
explicit bool // backed by a real zip entry (vs. synthesised)
}
children := map[string]*child{}
sawLevel := pfx == "" // the root level always exists
for _, f := range r.File {
cl, good := cleanMember(f.Name)
if !good {
continue
}
entryIsDir := strings.HasSuffix(cl, "/")
bare := strings.TrimSuffix(cl, "/")
if pfx != "" {
if bare == pfx {
// Entry is the prefix itself: an explicit "<pfx>/" dir
// entry confirms the level; a file with that exact name
// is not a directory, so don't count it.
if entryIsDir {
sawLevel = true
}
continue
}
if !strings.HasPrefix(bare, under) {
continue
}
}
sawLevel = true
rest := bare
if pfx != "" {
rest = strings.TrimPrefix(bare, under)
}
if rest == "" {
continue
}
seg := rest
nested := false
if i := strings.IndexByte(rest, '/'); i >= 0 {
seg = rest[:i]
nested = true
}
c := children[seg]
if c == nil {
c = &child{}
children[seg] = c
}
if nested {
// seg is an intermediate directory on the way to a deeper
// member. Synthesised unless an explicit entry upgrades it.
c.isDir = true
} else if entryIsDir {
c.isDir = true
c.explicit = true
c.mod = f.Modified
c.mode = uint32(f.Mode())
} else {
// Immediate file child. (If a dir and a file share a name —
// malformed zip — the file wins; harmless.)
if !c.explicit || !c.isDir {
c.isDir = false
c.explicit = true
c.size = int64(f.UncompressedSize64)
c.mod = f.Modified
c.mode = uint32(f.Mode())
}
}
}
if !sawLevel {
return nil, false
}
out := make([]listing.FileInfo, 0, len(children))
for name, c := range children {
fi := listing.FileInfo{
Name: name,
Size: c.size,
ModTime: c.mod,
Mode: c.mode,
IsDir: c.isDir,
URL: baseURL + url.PathEscape(name),
}
if c.isDir {
fi.Name += "/"
fi.URL += "/"
}
out = append(out, fi)
}
sort.Slice(out, func(i, j int) bool {
if out[i].IsDir != out[j].IsDir {
return out[i].IsDir // directories first
}
return strings.ToLower(out[i].Name) < strings.ToLower(out[j].Name)
})
return out, true
}
// ResolveCloser is a tiny convenience for the handler: open the zip,
// returning the reader and a close func, or (nil, nil, err).
func ResolveCloser(zipAbsPath string) (*zip.Reader, func() error, error) {
rc, err := Open(zipAbsPath)
if err != nil {
return nil, nil, err
}
return &rc.Reader, rc.Close, nil
}
// IsZipName reports whether name has a ".zip" extension (case-insensitive).
func IsZipName(name string) bool {
return strings.EqualFold(path.Ext(name), ".zip")
}

View file

@ -0,0 +1,264 @@
package zipfs
import (
"archive/zip"
"bytes"
"io"
"testing"
)
// makeZip builds an in-memory zip. A value of "<dir>" creates an
// explicit directory entry (name must end with "/"); anything else is
// the file body.
func makeZip(t *testing.T, entries map[string]string) *zip.Reader {
t.Helper()
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
for name, body := range entries {
w, err := zw.Create(name)
if err != nil {
t.Fatalf("zip.Create(%q): %v", name, err)
}
if body != "<dir>" {
if _, err := w.Write([]byte(body)); err != nil {
t.Fatalf("write %q: %v", name, err)
}
}
}
if err := zw.Close(); err != nil {
t.Fatalf("zip.Close: %v", err)
}
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
if err != nil {
t.Fatalf("zip.NewReader: %v", err)
}
return zr
}
func TestList(t *testing.T) {
zr := makeZip(t, map[string]string{
"a/b.txt": "hello",
"c.txt": "world",
"d/": "<dir>", // explicit dir
"d/e/f.pdf": "pdfbytes",
"d/g.txt": "g",
})
t.Run("root level", func(t *testing.T) {
out, ok := List(zr, "", "/Z/")
if !ok {
t.Fatal("root level should be valid")
}
got := map[string]bool{} // name -> isDir
for _, fi := range out {
got[fi.Name] = fi.IsDir
if fi.IsDir && fi.URL != "/Z/"+stripSlash(fi.Name)+"/" {
t.Errorf("dir %q URL=%q", fi.Name, fi.URL)
}
if !fi.IsDir && fi.URL != "/Z/"+fi.Name {
t.Errorf("file %q URL=%q", fi.Name, fi.URL)
}
}
// Expect: a/ (synthesized dir), c.txt (file), d/ (explicit dir).
if !got["a/"] || !got["d/"] {
t.Errorf("missing dir children; got %v", got)
}
if isDir, ok := got["c.txt"]; !ok || isDir {
t.Errorf("c.txt should be a file child; got %v", got)
}
if len(out) != 3 {
t.Errorf("root children = %d, want 3: %v", len(out), got)
}
// Directories sort before files.
if !out[0].IsDir || !out[1].IsDir || out[2].IsDir {
t.Errorf("sort order wrong: %v", out)
}
})
t.Run("nested level with explicit dir entry", func(t *testing.T) {
out, ok := List(zr, "d", "/Z/d/")
if !ok {
t.Fatal("d/ should be valid")
}
got := map[string]bool{}
for _, fi := range out {
got[fi.Name] = fi.IsDir
}
if !got["e/"] {
t.Errorf("d/ should contain synthesized e/; got %v", got)
}
if isDir, ok := got["g.txt"]; !ok || isDir {
t.Errorf("d/ should contain file g.txt; got %v", got)
}
})
t.Run("deep level without explicit dir entry", func(t *testing.T) {
out, ok := List(zr, "a", "/Z/a/")
if !ok {
t.Fatal("a/ should be valid (only known via a/b.txt)")
}
if len(out) != 1 || out[0].Name != "b.txt" || out[0].IsDir {
t.Errorf("a/ children = %v, want [b.txt]", out)
}
})
t.Run("nonexistent level", func(t *testing.T) {
if _, ok := List(zr, "nope", "/Z/nope/"); ok {
t.Error("nope should not be a valid level")
}
// A file name is not a directory level.
if _, ok := List(zr, "c.txt", "/Z/c.txt/"); ok {
t.Error("c.txt is a file, not a directory level")
}
})
t.Run("URL escaping", func(t *testing.T) {
zr2 := makeZip(t, map[string]string{"my doc.pdf": "x", "sub dir/k.txt": "y"})
out, _ := List(zr2, "", "/Z/")
for _, fi := range out {
if fi.Name == "my doc.pdf" && fi.URL != "/Z/my%20doc.pdf" {
t.Errorf("file URL not escaped: %q", fi.URL)
}
if fi.Name == "sub dir/" && fi.URL != "/Z/sub%20dir/" {
t.Errorf("dir URL not escaped: %q", fi.URL)
}
}
})
}
func stripSlash(s string) string {
if len(s) > 0 && s[len(s)-1] == '/' {
return s[:len(s)-1]
}
return s
}
func TestIsDirLevel(t *testing.T) {
zr := makeZip(t, map[string]string{
"a/b.txt": "x",
"c.txt": "y",
"d/": "<dir>",
"d/e/f.pdf": "z",
})
cases := map[string]bool{
"": true, // root
"a": true, // implied (a/b.txt)
"a/": true, // trailing slash tolerated
"d": true, // explicit
"d/e": true, // implied (d/e/f.pdf)
"c.txt": false, // a file, not a level
"nope": false,
"a/b": false, // a/b is a file (a/b.txt is a/b/... no — "a/b.txt", so "a/b" prefix? "a/b.txt" starts with "a/b" but not "a/b/")
}
for prefix, want := range cases {
if got := IsDirLevel(zr, prefix); got != want {
t.Errorf("IsDirLevel(%q) = %v, want %v", prefix, got, want)
}
}
}
func TestOpenMember(t *testing.T) {
zr := makeZip(t, map[string]string{
"a/b.txt": "hello world",
"c.txt": "ccc",
"d/e/f.pdf": "pdf-bytes",
"d/": "<dir>",
})
t.Run("file member", func(t *testing.T) {
rc, size, _, name, ok := OpenMember(zr, "a/b.txt")
if !ok {
t.Fatal("a/b.txt should be found")
}
defer rc.Close()
if name != "b.txt" {
t.Errorf("name=%q, want b.txt", name)
}
if size != int64(len("hello world")) {
t.Errorf("size=%d, want %d", size, len("hello world"))
}
b, _ := io.ReadAll(rc)
if string(b) != "hello world" {
t.Errorf("body=%q", b)
}
})
t.Run("case-insensitive", func(t *testing.T) {
rc, _, _, _, ok := OpenMember(zr, "D/E/F.PDF")
if !ok {
t.Fatal("D/E/F.PDF should match d/e/f.pdf")
}
rc.Close()
})
t.Run("directory entry is not a member", func(t *testing.T) {
if _, _, _, _, ok := OpenMember(zr, "d"); ok {
t.Error("d/ is a directory, not a file member")
}
if _, _, _, _, ok := OpenMember(zr, "a"); ok {
t.Error("a is a directory level, not a file member")
}
})
t.Run("missing", func(t *testing.T) {
if _, _, _, _, ok := OpenMember(zr, "no/such.txt"); ok {
t.Error("missing member reported as found")
}
if _, _, _, _, ok := OpenMember(zr, ""); ok {
t.Error("empty member should not match")
}
})
}
func TestCleanMemberRejectsZipSlip(t *testing.T) {
bad := []string{
"../evil.txt",
"a/../../evil.txt",
"/abs/evil.txt",
"a\\b.txt",
"..",
"",
}
for _, n := range bad {
if _, ok := cleanMember(n); ok {
t.Errorf("cleanMember(%q) should be rejected", n)
}
}
good := map[string]string{
"a/b.txt": "a/b.txt",
"a/./b.txt": "a/b.txt",
"dir/": "dir/",
"x.txt": "x.txt",
}
for in, want := range good {
got, ok := cleanMember(in)
if !ok || got != want {
t.Errorf("cleanMember(%q) = (%q, %v), want (%q, true)", in, got, ok, want)
}
}
}
func TestListIgnoresUnsafeEntries(t *testing.T) {
// A zip whose central directory carries a malicious "../" entry
// must not surface it.
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
for _, n := range []string{"good.txt", "../escape.txt", "sub/ok.txt"} {
w, _ := zw.Create(n)
w.Write([]byte("x"))
}
zw.Close()
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
if err != nil {
t.Fatal(err)
}
out, _ := List(zr, "", "/Z/")
for _, fi := range out {
if fi.Name == "escape.txt" || fi.Name == "../escape.txt" {
t.Errorf("unsafe entry surfaced: %q", fi.Name)
}
}
if _, _, _, _, ok := OpenMember(zr, "../escape.txt"); ok {
t.Error("unsafe member openable")
}
}