ZDDC/zddc/internal/zipfs/zipfs.go
ZDDC 5e4d4fefb3 feat(zddc): serve a .zip as a virtual directory (zipfs + dispatch intercept)
zddc-server can now browse into a .zip file without the client
downloading the whole archive:
  - GET …/Foo.zip/                → JSON listing of the zip's members
                                     (Accept: application/json), or the
                                     browse SPA (HTML) — same content
                                     negotiation as ServeDirectory/.archive
  - GET …/Foo.zip/sub/doc.pdf     → extracts and streams that one member
                                     (Range / ETag / conditional GET via
                                     http.ServeContent)
  - GET …/Foo.zip                 → unchanged: the raw .zip download
  - PUT/DELETE/POST …/Foo.zip/…   → 405 (zip access is read-only)

New internal/zipfs package reconstructs directory levels from the zip's
flat central directory (synthesising intermediate dirs with no explicit
"<dir>/" entry, mirroring what browse does client-side with JSZip) and
drops zip-slip-unsafe entries ("..", absolute, backslash). New
handler.ServeZip wraps it. The dispatcher gets splitZipPath + an
intercept placed before the file-API branch (so a write to a path under
a .zip is refused, not silently mkdir'd); ACL is the chain of the
directory CONTAINING the zip — a zip carries no .zddc of its own, same
as the .archive virtual surface. The os.Stat-per-segment walk is gated
by a cheap ".zip/" substring check so ordinary requests are unaffected.

Also fixes two pre-existing dispatch-test failures uncovered along the
way: a non-existent top-level "*.html" URL was 302'ing to its slash
form (because the bare "*" project glob makes every first-level segment
"declared") — the cascade-declared no-slash block now requires a
directory-shaped URL (trailing slash, or no file extension); and the
stale TestDispatchSlashRouting expectation that archive/<party>/mdl/
302s to mdl/table.html was updated to match the intended behaviour
(the default-MDL virtual fallback shows the browse listing there; only
a real on-disk tables: + *.table.yaml triggers the bounce).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 12:17:47 -05:00

250 lines
7.2 KiB
Go

// Package zipfs presents the contents of a .zip file on disk as a
// browsable virtual directory tree: a JSON listing at any directory
// level inside the zip, and on-demand extraction of a single member.
//
// It does no HTTP and no ACL — that's the caller's job (see
// handler.ServeZip). The model mirrors what the browse tool already
// does client-side with JSZip: the zip's central directory is a flat
// list of member names; directory levels are reconstructed on top,
// synthesising the intermediate directories that have no explicit
// "<dir>/" entry of their own.
//
// Member names are sanitised against zip-slip (no "..", no absolute
// paths, no backslashes); unsafe entries are silently dropped.
package zipfs
import (
"archive/zip"
"io"
"net/url"
"path"
"sort"
"strings"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/listing"
)
// Open opens the zip file at zipAbsPath for reading. The caller must
// Close the returned *zip.ReadCloser. Errors propagate os.Open errors
// (use os.IsNotExist) and zip-format errors.
func Open(zipAbsPath string) (*zip.ReadCloser, error) {
return zip.OpenReader(zipAbsPath)
}
// cleanMember normalises a raw zip entry name. Returns ("", false) for
// anything unsafe to surface: absolute paths, backslash separators,
// and any path that escapes the zip root via "..". A trailing slash
// (the zip convention for an explicit directory entry) is preserved.
func cleanMember(name string) (string, bool) {
if name == "" || strings.ContainsRune(name, '\\') || strings.HasPrefix(name, "/") {
return "", false
}
isDir := strings.HasSuffix(name, "/")
cl := path.Clean(name)
if cl == "." || cl == ".." || strings.HasPrefix(cl, "../") {
return "", false
}
if isDir && cl != "" {
cl += "/"
}
return cl, true
}
// normPrefix turns a member-path string ("", "a", "a/b", "a/b/") into
// the canonical "directory level" form: no leading or trailing slash.
func normPrefix(p string) string {
return strings.Trim(p, "/")
}
// Open locates a *file* member matching memberPath inside r and returns
// a reader for its bytes plus its size and modtime. Matching is
// case-insensitive on the full path (segment-for-segment), mirroring
// the URL case-folding the rest of the server does. ok is false when
// no file member matches (it may still be a directory level — call
// List).
//
// memberPath must be the cleaned, slash-separated path with no leading
// or trailing slash (e.g. "sub/doc.pdf").
func OpenMember(r *zip.Reader, memberPath string) (rc io.ReadCloser, size int64, mod time.Time, name string, ok bool) {
want := normPrefix(memberPath)
if want == "" {
return nil, 0, time.Time{}, "", false
}
for _, f := range r.File {
cl, good := cleanMember(f.Name)
if !good {
continue
}
if strings.HasSuffix(cl, "/") {
continue // directory entry
}
if strings.EqualFold(cl, want) {
h, err := f.Open()
if err != nil {
return nil, 0, time.Time{}, "", false
}
return h, int64(f.UncompressedSize64), f.Modified, path.Base(cl), true
}
}
return nil, 0, time.Time{}, "", false
}
// IsDirLevel reports whether prefix names a directory level inside r.
// The zip root ("") always qualifies. Otherwise it's true when there's
// an explicit "<prefix>/" entry, or any entry sits under "<prefix>/".
func IsDirLevel(r *zip.Reader, prefix string) bool {
pfx := normPrefix(prefix)
if pfx == "" {
return true
}
under := pfx + "/"
for _, f := range r.File {
cl, ok := cleanMember(f.Name)
if !ok {
continue
}
// An explicit "<pfx>/" entry confirms the level; so does any
// entry that lives under it. A *file* entry named exactly pfx
// does NOT make pfx a directory.
if strings.HasSuffix(cl, "/") && strings.TrimSuffix(cl, "/") == pfx {
return true
}
if strings.HasPrefix(strings.TrimSuffix(cl, "/"), under) {
return true
}
}
return false
}
// List returns the immediate children of the directory level named by
// prefix ("" = the zip root) as Caddy-style FileInfo entries, with URL
// = baseURL + escaped-name. Synthesised intermediate directories (no
// explicit "<dir>/" entry) are included. valid is false when prefix
// names no directory level in the zip (no entry is at-or-under it).
//
// baseURL should end with "/" — it's the URL prefix for this level
// (the request path, e.g. "/P/staging/Foo.zip/" or
// "/P/staging/Foo.zip/sub/").
func List(r *zip.Reader, prefix, baseURL string) ([]listing.FileInfo, bool) {
pfx := normPrefix(prefix)
var under string // the "<pfx>/" string we strip, or "" at root
if pfx != "" {
under = pfx + "/"
}
type child struct {
isDir bool
size int64
mod time.Time
mode uint32
explicit bool // backed by a real zip entry (vs. synthesised)
}
children := map[string]*child{}
sawLevel := pfx == "" // the root level always exists
for _, f := range r.File {
cl, good := cleanMember(f.Name)
if !good {
continue
}
entryIsDir := strings.HasSuffix(cl, "/")
bare := strings.TrimSuffix(cl, "/")
if pfx != "" {
if bare == pfx {
// Entry is the prefix itself: an explicit "<pfx>/" dir
// entry confirms the level; a file with that exact name
// is not a directory, so don't count it.
if entryIsDir {
sawLevel = true
}
continue
}
if !strings.HasPrefix(bare, under) {
continue
}
}
sawLevel = true
rest := bare
if pfx != "" {
rest = strings.TrimPrefix(bare, under)
}
if rest == "" {
continue
}
seg := rest
nested := false
if i := strings.IndexByte(rest, '/'); i >= 0 {
seg = rest[:i]
nested = true
}
c := children[seg]
if c == nil {
c = &child{}
children[seg] = c
}
if nested {
// seg is an intermediate directory on the way to a deeper
// member. Synthesised unless an explicit entry upgrades it.
c.isDir = true
} else if entryIsDir {
c.isDir = true
c.explicit = true
c.mod = f.Modified
c.mode = uint32(f.Mode())
} else {
// Immediate file child. (If a dir and a file share a name —
// malformed zip — the file wins; harmless.)
if !c.explicit || !c.isDir {
c.isDir = false
c.explicit = true
c.size = int64(f.UncompressedSize64)
c.mod = f.Modified
c.mode = uint32(f.Mode())
}
}
}
if !sawLevel {
return nil, false
}
out := make([]listing.FileInfo, 0, len(children))
for name, c := range children {
fi := listing.FileInfo{
Name: name,
Size: c.size,
ModTime: c.mod,
Mode: c.mode,
IsDir: c.isDir,
URL: baseURL + url.PathEscape(name),
}
if c.isDir {
fi.Name += "/"
fi.URL += "/"
}
out = append(out, fi)
}
sort.Slice(out, func(i, j int) bool {
if out[i].IsDir != out[j].IsDir {
return out[i].IsDir // directories first
}
return strings.ToLower(out[i].Name) < strings.ToLower(out[j].Name)
})
return out, true
}
// ResolveCloser is a tiny convenience for the handler: open the zip,
// returning the reader and a close func, or (nil, nil, err).
func ResolveCloser(zipAbsPath string) (*zip.Reader, func() error, error) {
rc, err := Open(zipAbsPath)
if err != nil {
return nil, nil, err
}
return &rc.Reader, rc.Close, nil
}
// IsZipName reports whether name has a ".zip" extension (case-insensitive).
func IsZipName(name string) bool {
return strings.EqualFold(path.Ext(name), ".zip")
}