ZDDC/zddc/internal/handler/ziphandler.go
ZDDC 5e4d4fefb3 feat(zddc): serve a .zip as a virtual directory (zipfs + dispatch intercept)
zddc-server can now browse into a .zip file without the client
downloading the whole archive:
  - GET …/Foo.zip/                → JSON listing of the zip's members
                                     (Accept: application/json), or the
                                     browse SPA (HTML) — same content
                                     negotiation as ServeDirectory/.archive
  - GET …/Foo.zip/sub/doc.pdf     → extracts and streams that one member
                                     (Range / ETag / conditional GET via
                                     http.ServeContent)
  - GET …/Foo.zip                 → unchanged: the raw .zip download
  - PUT/DELETE/POST …/Foo.zip/…   → 405 (zip access is read-only)

New internal/zipfs package reconstructs directory levels from the zip's
flat central directory (synthesising intermediate dirs with no explicit
"<dir>/" entry, mirroring what browse does client-side with JSZip) and
drops zip-slip-unsafe entries ("..", absolute, backslash). New
handler.ServeZip wraps it. The dispatcher gets splitZipPath + an
intercept placed before the file-API branch (so a write to a path under
a .zip is refused, not silently mkdir'd); ACL is the chain of the
directory CONTAINING the zip — a zip carries no .zddc of its own, same
as the .archive virtual surface. The os.Stat-per-segment walk is gated
by a cheap ".zip/" substring check so ordinary requests are unaffected.

Also fixes two pre-existing dispatch-test failures uncovered along the
way: a non-existent top-level "*.html" URL was 302'ing to its slash
form (because the bare "*" project glob makes every first-level segment
"declared") — the cascade-declared no-slash block now requires a
directory-shaped URL (trailing slash, or no file extension); and the
stale TestDispatchSlashRouting expectation that archive/<party>/mdl/
302s to mdl/table.html was updated to match the intended behaviour
(the default-MDL virtual fallback shows the browse listing there; only
a real on-disk tables: + *.table.yaml triggers the bounce).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 12:17:47 -05:00

146 lines
5.2 KiB
Go

package handler
import (
"bytes"
"encoding/json"
"io"
"log/slog"
"net/http"
"os"
"path"
"strings"
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zipfs"
)
// maxZipMemberBytes caps the size of a single zip member the server
// will materialise in memory to extract. Zip transmittals carry
// documents, not multi-gigabyte blobs; a member declaring more than
// this in its central-directory header is refused (413) rather than
// buffered. Streaming/large-member support is a future optimisation.
const maxZipMemberBytes = 512 << 20 // 512 MiB
// ServeZip serves the contents of the .zip file at zipAbsPath as a
// virtual directory. memberPath is the slash-separated path inside the
// zip (already URL-decoded; "" = the zip's root level).
//
// Only reached via the dispatcher's splitZipPath intercept, which has
// already (a) confirmed zipAbsPath is a regular .zip file under
// cfg.Root and (b) enforced ACL using the chain of the directory
// CONTAINING the zip — a zip has no .zddc of its own, exactly like the
// .archive virtual path. Write methods are rejected by the dispatcher
// before this is reached; zip access here is read-only.
//
// Routing inside the zip mirrors the server's slash convention:
// - memberPath names a file member → extract + stream it
// - memberPath names a directory level → JSON listing (Accept: json)
// or the browse SPA (HTML)
// - request had no trailing slash but the
// member is a directory level → 302 to add the slash
// - otherwise → 404
func ServeZip(cfg config.Config, w http.ResponseWriter, r *http.Request, zipAbsPath, memberPath string) {
zr, closeZip, err := zipfs.ResolveCloser(zipAbsPath)
if err != nil {
if os.IsNotExist(err) {
http.Error(w, "Not Found", http.StatusNotFound)
} else {
slog.Warn("open zip archive", "path", zipAbsPath, "err", err)
http.Error(w, "Bad zip archive", http.StatusBadGateway)
}
return
}
defer closeZip()
member := strings.Trim(memberPath, "/")
zipName := path.Base(zipAbsPath)
// File member?
if member != "" {
if rc, size, mod, name, ok := zipfs.OpenMember(zr, member); ok {
defer rc.Close()
if size > maxZipMemberBytes {
http.Error(w, "Zip member too large to serve", http.StatusRequestEntityTooLarge)
return
}
// Buffer into memory so http.ServeContent gets a ReadSeeker
// (Range / conditional GET). Bounded by maxZipMemberBytes.
buf, err := io.ReadAll(rc)
if err != nil {
slog.Warn("extract zip member", "zip", zipAbsPath, "member", member, "err", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
http.ServeContent(w, r, name, mod, bytes.NewReader(buf))
return
}
}
// Directory level. The dispatcher only routes here when the URL had
// a tail segment or a trailing slash, so a no-slash URL that lands
// on a directory level gets normalised to the slash form.
baseURL := r.URL.Path
if !strings.HasSuffix(baseURL, "/") {
if zipfs.IsDirLevel(zr, member) {
http.Redirect(w, r, baseURL+"/", http.StatusFound)
return
}
http.Error(w, "Not Found", http.StatusNotFound)
return
}
entries, valid := zipfs.List(zr, member, baseURL)
if !valid {
http.Error(w, "Not Found", http.StatusNotFound)
return
}
// Vary: Accept — same URL serves the JSON listing or the browse
// SPA depending on Accept; without it caches can cross the wires.
w.Header().Set("Vary", "Accept")
if strings.Contains(r.Header.Get("Accept"), "application/json") {
body, err := json.Marshal(entries)
if err != nil {
slog.Error("encoding zip listing", "err", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
etag := `"` + listingETag(body) + `"`
w.Header().Set("Content-Type", "application/json")
w.Header().Set("ETag", etag)
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
w.WriteHeader(http.StatusNotModified)
return
}
_, _ = w.Write(body)
return
}
// HTML: serve the embedded `browse` SPA, exactly like
// ServeDirectory's and ServeArchive's HTML branches. It auto-detects
// server mode by re-fetching this URL with Accept: application/json
// (→ the JSON branch above), then renders the zip's contents.
body := apps.EmbeddedBytes("browse")
if len(body) == 0 {
jsonBody, _ := json.Marshal(entries)
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Cache-Control", "no-cache")
_, _ = w.Write(jsonBody)
return
}
etag := `"` + apps.EmbeddedETag("browse") + `"`
w.Header().Set("ETag", etag)
w.Header().Set("Cache-Control", "public, max-age=0, must-revalidate")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.Header().Set("X-ZDDC-Source", "embedded:browse")
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
w.WriteHeader(http.StatusNotModified)
return
}
_, _ = w.Write(body)
}