zddc-server can now browse into a .zip file without the client
downloading the whole archive:
- GET …/Foo.zip/ → JSON listing of the zip's members
(Accept: application/json), or the
browse SPA (HTML) — same content
negotiation as ServeDirectory/.archive
- GET …/Foo.zip/sub/doc.pdf → extracts and streams that one member
(Range / ETag / conditional GET via
http.ServeContent)
- GET …/Foo.zip → unchanged: the raw .zip download
- PUT/DELETE/POST …/Foo.zip/… → 405 (zip access is read-only)
New internal/zipfs package reconstructs directory levels from the zip's
flat central directory (synthesising intermediate dirs with no explicit
"<dir>/" entry, mirroring what browse does client-side with JSZip) and
drops zip-slip-unsafe entries ("..", absolute, backslash). New
handler.ServeZip wraps it. The dispatcher gets splitZipPath + an
intercept placed before the file-API branch (so a write to a path under
a .zip is refused, not silently mkdir'd); ACL is the chain of the
directory CONTAINING the zip — a zip carries no .zddc of its own, same
as the .archive virtual surface. The os.Stat-per-segment walk is gated
by a cheap ".zip/" substring check so ordinary requests are unaffected.
Also fixes two pre-existing dispatch-test failures uncovered along the
way: a non-existent top-level "*.html" URL was 302'ing to its slash
form (because the bare "*" project glob makes every first-level segment
"declared") — the cascade-declared no-slash block now requires a
directory-shaped URL (trailing slash, or no file extension); and the
stale TestDispatchSlashRouting expectation that archive/<party>/mdl/
302s to mdl/table.html was updated to match the intended behaviour
(the default-MDL virtual fallback shows the browse listing there; only
a real on-disk tables: + *.table.yaml triggers the bounce).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
5.2 KiB
Go
146 lines
5.2 KiB
Go
package handler
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
|
|
"codeberg.org/VARASYS/ZDDC/zddc/internal/apps"
|
|
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
|
|
"codeberg.org/VARASYS/ZDDC/zddc/internal/zipfs"
|
|
)
|
|
|
|
// maxZipMemberBytes caps the size of a single zip member the server
|
|
// will materialise in memory to extract. Zip transmittals carry
|
|
// documents, not multi-gigabyte blobs; a member declaring more than
|
|
// this in its central-directory header is refused (413) rather than
|
|
// buffered. Streaming/large-member support is a future optimisation.
|
|
const maxZipMemberBytes = 512 << 20 // 512 MiB
|
|
|
|
// ServeZip serves the contents of the .zip file at zipAbsPath as a
|
|
// virtual directory. memberPath is the slash-separated path inside the
|
|
// zip (already URL-decoded; "" = the zip's root level).
|
|
//
|
|
// Only reached via the dispatcher's splitZipPath intercept, which has
|
|
// already (a) confirmed zipAbsPath is a regular .zip file under
|
|
// cfg.Root and (b) enforced ACL using the chain of the directory
|
|
// CONTAINING the zip — a zip has no .zddc of its own, exactly like the
|
|
// .archive virtual path. Write methods are rejected by the dispatcher
|
|
// before this is reached; zip access here is read-only.
|
|
//
|
|
// Routing inside the zip mirrors the server's slash convention:
|
|
// - memberPath names a file member → extract + stream it
|
|
// - memberPath names a directory level → JSON listing (Accept: json)
|
|
// or the browse SPA (HTML)
|
|
// - request had no trailing slash but the
|
|
// member is a directory level → 302 to add the slash
|
|
// - otherwise → 404
|
|
func ServeZip(cfg config.Config, w http.ResponseWriter, r *http.Request, zipAbsPath, memberPath string) {
|
|
zr, closeZip, err := zipfs.ResolveCloser(zipAbsPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
http.Error(w, "Not Found", http.StatusNotFound)
|
|
} else {
|
|
slog.Warn("open zip archive", "path", zipAbsPath, "err", err)
|
|
http.Error(w, "Bad zip archive", http.StatusBadGateway)
|
|
}
|
|
return
|
|
}
|
|
defer closeZip()
|
|
|
|
member := strings.Trim(memberPath, "/")
|
|
zipName := path.Base(zipAbsPath)
|
|
|
|
// File member?
|
|
if member != "" {
|
|
if rc, size, mod, name, ok := zipfs.OpenMember(zr, member); ok {
|
|
defer rc.Close()
|
|
if size > maxZipMemberBytes {
|
|
http.Error(w, "Zip member too large to serve", http.StatusRequestEntityTooLarge)
|
|
return
|
|
}
|
|
// Buffer into memory so http.ServeContent gets a ReadSeeker
|
|
// (Range / conditional GET). Bounded by maxZipMemberBytes.
|
|
buf, err := io.ReadAll(rc)
|
|
if err != nil {
|
|
slog.Warn("extract zip member", "zip", zipAbsPath, "member", member, "err", err)
|
|
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
|
|
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
|
|
http.ServeContent(w, r, name, mod, bytes.NewReader(buf))
|
|
return
|
|
}
|
|
}
|
|
|
|
// Directory level. The dispatcher only routes here when the URL had
|
|
// a tail segment or a trailing slash, so a no-slash URL that lands
|
|
// on a directory level gets normalised to the slash form.
|
|
baseURL := r.URL.Path
|
|
if !strings.HasSuffix(baseURL, "/") {
|
|
if zipfs.IsDirLevel(zr, member) {
|
|
http.Redirect(w, r, baseURL+"/", http.StatusFound)
|
|
return
|
|
}
|
|
http.Error(w, "Not Found", http.StatusNotFound)
|
|
return
|
|
}
|
|
entries, valid := zipfs.List(zr, member, baseURL)
|
|
if !valid {
|
|
http.Error(w, "Not Found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Vary: Accept — same URL serves the JSON listing or the browse
|
|
// SPA depending on Accept; without it caches can cross the wires.
|
|
w.Header().Set("Vary", "Accept")
|
|
|
|
if strings.Contains(r.Header.Get("Accept"), "application/json") {
|
|
body, err := json.Marshal(entries)
|
|
if err != nil {
|
|
slog.Error("encoding zip listing", "err", err)
|
|
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
etag := `"` + listingETag(body) + `"`
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.Header().Set("ETag", etag)
|
|
w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
|
|
w.Header().Set("X-ZDDC-Source", "zip:"+zipName)
|
|
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
|
|
w.WriteHeader(http.StatusNotModified)
|
|
return
|
|
}
|
|
_, _ = w.Write(body)
|
|
return
|
|
}
|
|
|
|
// HTML: serve the embedded `browse` SPA, exactly like
|
|
// ServeDirectory's and ServeArchive's HTML branches. It auto-detects
|
|
// server mode by re-fetching this URL with Accept: application/json
|
|
// (→ the JSON branch above), then renders the zip's contents.
|
|
body := apps.EmbeddedBytes("browse")
|
|
if len(body) == 0 {
|
|
jsonBody, _ := json.Marshal(entries)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.Header().Set("Cache-Control", "no-cache")
|
|
_, _ = w.Write(jsonBody)
|
|
return
|
|
}
|
|
etag := `"` + apps.EmbeddedETag("browse") + `"`
|
|
w.Header().Set("ETag", etag)
|
|
w.Header().Set("Cache-Control", "public, max-age=0, must-revalidate")
|
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
|
w.Header().Set("X-ZDDC-Source", "embedded:browse")
|
|
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
|
|
w.WriteHeader(http.StatusNotModified)
|
|
return
|
|
}
|
|
_, _ = w.Write(body)
|
|
}
|