ZDDC/zddc/internal/handler/subtreezip.go
2026-06-11 13:32:31 -05:00

198 lines
6.7 KiB
Go

package handler
import (
"archive/zip"
"io"
"io/fs"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/policy"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
)
// alreadyCompressedExt is the set of file extensions whose contents are
// already compressed (or incompressible) — re-DEFLATE-ing them in the
// output zip just burns CPU in the response path for ~no size win, so
// they're stored verbatim instead.
var alreadyCompressedExt = map[string]bool{
".zip": true, ".gz": true, ".bz2": true, ".xz": true, ".7z": true,
".pdf": true,
".png": true, ".jpg": true, ".jpeg": true, ".gif": true, ".webp": true,
".tif": true, ".tiff": true,
".docx": true, ".xlsx": true, ".pptx": true, ".odt": true, ".ods": true,
".mp3": true, ".mp4": true, ".m4a": true, ".webm": true, ".avi": true, ".mov": true,
}
func zipMethodFor(name string) uint16 {
if alreadyCompressedExt[strings.ToLower(filepath.Ext(name))] {
return zip.Store
}
return zip.Deflate
}
// RecognizeVirtualSubtreeZip reports whether urlPath names a virtual
// "<dir>.zip" — a download endpoint that streams a directory's
// subtree as a zip. Returns the directory's absolute path when the
// URL strips to a real directory under fsRoot, or to a cascade-
// declared path that the listing pipeline would render as empty.
//
// A virtual file living next to its source means clients can emit a
// plain `<a href>` without query-string handling; mirror tools pick
// it up via normal recursion; `curl -O` writes a sensible filename
// without a `--remote-header-name` hint. Real `.zip` files in the
// tree always win — stat is checked before this helper, so a genuine
// archive at `<path>.zip` serves its bytes normally.
func RecognizeVirtualSubtreeZip(fsRoot, urlPath string) (absDir string, ok bool) {
if !strings.HasSuffix(urlPath, ".zip") {
return "", false
}
base := strings.TrimSuffix(urlPath, ".zip")
if base == "" || base == "/" {
return "", false
}
rel := strings.Trim(base, "/")
abs := filepath.Join(fsRoot, filepath.FromSlash(rel))
// Path containment.
if abs != fsRoot && !strings.HasPrefix(abs, fsRoot+string(filepath.Separator)) {
return "", false
}
if info, err := os.Stat(abs); err == nil && info.IsDir() {
return abs, true
}
if zddc.IsDeclaredPath(fsRoot, abs) {
return abs, true
}
return "", false
}
// ServeSubtreeZip streams an application/zip download of every readable
// file under absDir (recursively), ACL-filtered against the requester.
// Invoked from the dispatcher when RecognizeVirtualSubtreeZip matches
// the request URL.
//
// Permissions: each file is gated by the .zddc chain of its containing
// directory (cached per directory), exactly like serveArchiveListing.
// Hidden entries — anything whose name starts with "." (.zddc, .archive
// is virtual anyway) or "_" (_template, _app) — are skipped, matching
// what the browse listing already hides. A `.zip` *file* found in the
// tree is added as opaque bytes (not recursed into; `…/Foo.zip/…` is a
// navigable surface elsewhere, but a subtree download just bundles the
// archive as-is).
//
// The response is streamed: headers go out first, then the zip is
// written entry-by-entry. So we can't 403-after-the-fact when the
// caller can read nothing under absDir — they just get a valid empty
// zip. (Empty leaks no more than a 403 would.) absDir need not exist
// on disk (a cascade-declared-but-unmaterialised folder → empty zip).
func ServeSubtreeZip(cfg config.Config, w http.ResponseWriter, r *http.Request, absDir string) {
if r.Method != http.MethodGet && r.Method != http.MethodHead {
w.Header().Set("Allow", "GET, HEAD")
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return
}
zipName := filepath.Base(absDir) + ".zip"
prefix := filepath.Base(absDir) // top-level folder name inside the zip
w.Header().Set("Content-Type", "application/zip")
w.Header().Set("Content-Disposition", "attachment; filename=\""+sanitizeFilename(zipName)+"\"")
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("X-ZDDC-Source", "subtree-zip")
if r.Method == http.MethodHead {
return
}
principal := PrincipalFromContext(r)
decider := DeciderFromContext(r)
ctx := r.Context()
// Per-directory ACL-decision cache (same shape as serveArchiveListing).
aclCache := make(map[string]bool)
allowed := func(fileDir string) bool {
if v, ok := aclCache[fileDir]; ok {
return v
}
chain, err := zddc.EffectivePolicy(cfg.Root, fileDir)
if err != nil {
aclCache[fileDir] = false
return false
}
rel, relErr := filepath.Rel(cfg.Root, fileDir)
urlPath := "/"
if relErr == nil && rel != "." {
urlPath = "/" + filepath.ToSlash(rel)
}
v, _ := policy.AllowFromChainP(ctx, decider, chain, principal, urlPath)
aclCache[fileDir] = v
return v
}
zw := zip.NewWriter(w)
walkErr := filepath.WalkDir(absDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil // skip unreadable entries; covers absDir-doesn't-exist
}
name := d.Name()
if d.IsDir() {
if path != absDir && (strings.HasPrefix(name, ".") || strings.HasPrefix(name, "_")) {
return filepath.SkipDir
}
return nil
}
if !d.Type().IsRegular() {
return nil // skip symlinks, devices, etc.
}
if strings.HasPrefix(name, ".") || strings.HasPrefix(name, "_") {
return nil
}
if !allowed(filepath.Dir(path)) {
return nil
}
rel, relErr := filepath.Rel(absDir, path)
if relErr != nil {
return nil
}
info, infoErr := d.Info()
hdr := &zip.FileHeader{
Name: prefix + "/" + filepath.ToSlash(rel),
Method: zipMethodFor(name),
}
if infoErr == nil {
hdr.Modified = info.ModTime()
}
entry, cErr := zw.CreateHeader(hdr)
if cErr != nil {
return cErr // writer/connection is broken — stop the walk
}
f, oErr := os.Open(path)
if oErr != nil {
slog.Warn("subtree-zip: open file", "path", path, "err", oErr)
return nil // best-effort; stream already in flight
}
_, copyErr := io.Copy(entry, f)
f.Close()
if copyErr != nil {
slog.Warn("subtree-zip: copy file", "path", path, "err", copyErr)
return copyErr // connection likely gone — stop
}
return nil
})
if walkErr != nil {
slog.Warn("subtree-zip: walk aborted", "dir", absDir, "err", walkErr)
}
if err := zw.Close(); err != nil {
slog.Warn("subtree-zip: close writer", "dir", absDir, "err", err)
}
}
// sanitizeFilename strips characters that would break a quoted
// Content-Disposition filename (CR/LF/quote/backslash) — directory
// basenames almost never contain these, but be defensive.
func sanitizeFilename(s string) string {
return strings.NewReplacer("\r", "", "\n", "", `"`, "", `\`, "").Replace(s)
}