diff --git a/zddc/cmd/zddc-server/main.go b/zddc/cmd/zddc-server/main.go index c3b6ad9..770d0c6 100644 --- a/zddc/cmd/zddc-server/main.go +++ b/zddc/cmd/zddc-server/main.go @@ -418,6 +418,10 @@ func runClient(cfg config.Config) { if err := srv.Shutdown(shutdownCtx); err != nil { slog.Error("shutdown error", "err", err) } + // Drain background cache work (revalidation kicked off on hits) + // before exiting, so in-flight sidecar writes finish rather than + // being abandoned mid-flight. + cacheLayer.Wait() slog.Info("stopped") } diff --git a/zddc/internal/cache/cache.go b/zddc/internal/cache/cache.go index d722acf..503cccb 100644 --- a/zddc/internal/cache/cache.go +++ b/zddc/internal/cache/cache.go @@ -67,6 +67,13 @@ type Cache struct { markerOnce sync.Once + // wg tracks background goroutines (cache revalidation on hits, + // mirror-walk hooks) so Wait() can drain them. Without this they + // outlive the request — fine in production until a graceful + // shutdown wants them finished, and in tests they race t.TempDir + // cleanup by writing into the cache root after the test returns. + wg sync.WaitGroup + // onAccess is invoked (when non-nil) after a request is dispatched. // The walker scheduler installs this hook to kick mirror walks based // on incoming traffic. Always called in a goroutine — must not @@ -85,6 +92,25 @@ type Cache struct { // offline writes. func (c *Cache) SetOutbox(o *Outbox) { c.outbox = o } +// goBackground runs fn in a tracked goroutine so Wait can drain +// in-flight background work — cache revalidation kicked off on a hit, +// and the mirror-walk access hook. These must never block the user +// response, but they also shouldn't outlive a graceful shutdown (or, +// in tests, a t.TempDir cleanup that they'd race by writing into the +// cache root after the test returns). +func (c *Cache) goBackground(fn func()) { + c.wg.Add(1) + go func() { + defer c.wg.Done() + fn() + }() +} + +// Wait blocks until all tracked background goroutines have finished. +// Intended for graceful shutdown; tests call it before the temp-dir +// cleanup runs. +func (c *Cache) Wait() { c.wg.Wait() } + // New constructs a Cache from the loaded configuration. Validates // upstream URL, reads the bearer-file (if configured), prepares the // HTTP client honoring SkipTLSVerify, and ensures the cache root @@ -181,7 +207,8 @@ func (c *Cache) ServeHTTP(w http.ResponseWriter, r *http.Request) { // after we've started serving — the user's request never blocks // on walk scheduling. if c.onAccess != nil { - go c.onAccess(r.URL.Path) + urlPath := r.URL.Path + c.goBackground(func() { c.onAccess(urlPath) }) } // Directory listings: try sidecar listing-cache, fall back to @@ -201,7 +228,8 @@ func (c *Cache) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err == nil && !info.IsDir() { c.serveFromDisk(w, r, path, info, "hit") // Background revalidate; never block the user response. - go c.revalidate(r.URL.Path, info.ModTime()) + urlPath, mtime := r.URL.Path, info.ModTime() + c.goBackground(func() { c.revalidate(urlPath, mtime) }) return } } @@ -221,7 +249,8 @@ func (c *Cache) serveDirectory(w http.ResponseWriter, r *http.Request) { info, err := os.Stat(path) if err == nil && !info.IsDir() { c.serveListingFromDisk(w, r, path, info, "hit") - go c.revalidateListing(r.URL.Path, r.Header.Get("Accept"), info.ModTime()) + urlPath, accept, mtime := r.URL.Path, r.Header.Get("Accept"), info.ModTime() + c.goBackground(func() { c.revalidateListing(urlPath, accept, mtime) }) return } } diff --git a/zddc/internal/cache/cache_test.go b/zddc/internal/cache/cache_test.go index 2ec9be0..cdc73af 100644 --- a/zddc/internal/cache/cache_test.go +++ b/zddc/internal/cache/cache_test.go @@ -31,6 +31,14 @@ func newTestCache(t *testing.T, mode string, upstreamHandler http.HandlerFunc) ( if err != nil { t.Fatalf("New: %v", err) } + // Drain background revalidation goroutines before the test's + // t.TempDir cleanup runs. Cleanups fire LIFO and t.TempDir + // registered its RemoveAll first (at the t.TempDir() call above), + // so this runs before it — preventing a revalidate goroutine from + // recreating the cache dir / dropping a temp file mid-RemoveAll + // ("directory not empty"). The upstream stays up (its Close was + // registered earliest, so it runs last). + t.Cleanup(c.Wait) return c, upstream }