Compare commits

..

3 commits

Author SHA1 Message Date
d0d8423ac6 test(handler): un-skip the profile existence-hiding invariant
TestInvariant_ProfileAdminEndpointsHideFromNonAdmins was skipped pending the
ServeProfile dispatcher refactor — which has since landed (ServeProfile in
profilehandler.go is the entry point, with an adminOnly wrapper that denies
with 404). Implement the test against it: non-admin, anonymous, and
un-elevated-admin callers must get 404 (never 403/200) on every admin-gated
sub-resource (/whoami, /config, /logs, /effective-policy, /reindex), so the
namespace can't be enumerated; an elevated admin gets through (/whoami,
/config positive control). Locks in the existence-hiding security property
that was previously unverified.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 16:41:29 -05:00
4f021d8abc fix(archive): log swallowed walkdir errors during transmittal indexing
indexTransmittalFolder silently dropped per-entry walk errors (`_ = err`),
so a permission or filesystem error on one file vanished without a trace —
the operator saw "missing from the index" with no clue why. Log it (the
slog.Warn the comment had already drafted) and keep indexing the rest.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 16:41:29 -05:00
1402864c4c fix(cache): track background revalidation goroutines; drain on shutdown + in tests
Root cause of the flaky cache tests (TestServeHTTP_DirectoryListingsCachedAsSidecar
and the other hit-path tests, ~1-in-many under parallel load): on a cache
hit, ServeHTTP launches `go c.revalidate(...)` / `go c.revalidateListing(...)`,
which write into the cache root (MkdirAll + CreateTemp + Rename). Those
goroutines outlive the request — and in tests, the test — so they race
t.TempDir's RemoveAll cleanup, recreating the dir or dropping a temp file
mid-removal. testing then reports "TempDir RemoveAll cleanup: ... directory
not empty" and marks the test failed (with a 0.00s body, no assertion line).
It only surfaced under the full parallel suite / -count because the timing
has to collide.

Fix: track these background goroutines in a sync.WaitGroup via a goBackground
helper, and expose Wait(). newTestCache registers t.Cleanup(c.Wait) — cleanups
fire LIFO and t.TempDir registered its RemoveAll first, so the drain runs
before it (upstream Close was registered earliest, so it runs last and stays
up while goroutines finish). runClient also calls cacheLayer.Wait() after
srv.Shutdown so in-flight sidecar writes complete on graceful shutdown rather
than being abandoned.

Verified: cache package at -count=200 reliably failed before, passes clean
after (0 failures, 0 cleanup errors); full `go test ./...` + vet green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 16:21:37 -05:00
5 changed files with 95 additions and 7 deletions

View file

@ -418,6 +418,10 @@ func runClient(cfg config.Config) {
if err := srv.Shutdown(shutdownCtx); err != nil {
slog.Error("shutdown error", "err", err)
}
// Drain background cache work (revalidation kicked off on hits)
// before exiting, so in-flight sidecar writes finish rather than
// being abandoned mid-flight.
cacheLayer.Wait()
slog.Info("stopped")
}

View file

@ -120,8 +120,10 @@ func walkAndIndex(idx *Index, fsRoot, dirAbs, serverDir string) error {
func indexTransmittalFolder(idx *Index, fsRoot, folderAbs, folderServerPath, date string) error {
return filepath.WalkDir(folderAbs, func(path string, d os.DirEntry, err error) error {
if err != nil {
// Log the error but continue indexing other files
_ = err // would log here: slog.Warn("walkdir error", "path", path, "err", err)
// Log and continue indexing the rest of the folder — a
// permission or FS error on one entry shouldn't abort the
// whole transmittal index or vanish without a trace.
slog.Warn("transmittal index: walkdir error", "path", path, "err", err)
return nil
}
if d.IsDir() {

View file

@ -67,6 +67,13 @@ type Cache struct {
markerOnce sync.Once
// wg tracks background goroutines (cache revalidation on hits,
// mirror-walk hooks) so Wait() can drain them. Without this they
// outlive the request — fine in production until a graceful
// shutdown wants them finished, and in tests they race t.TempDir
// cleanup by writing into the cache root after the test returns.
wg sync.WaitGroup
// onAccess is invoked (when non-nil) after a request is dispatched.
// The walker scheduler installs this hook to kick mirror walks based
// on incoming traffic. Always called in a goroutine — must not
@ -85,6 +92,25 @@ type Cache struct {
// offline writes.
func (c *Cache) SetOutbox(o *Outbox) { c.outbox = o }
// goBackground runs fn in a tracked goroutine so Wait can drain
// in-flight background work — cache revalidation kicked off on a hit,
// and the mirror-walk access hook. These must never block the user
// response, but they also shouldn't outlive a graceful shutdown (or,
// in tests, a t.TempDir cleanup that they'd race by writing into the
// cache root after the test returns).
func (c *Cache) goBackground(fn func()) {
c.wg.Add(1)
go func() {
defer c.wg.Done()
fn()
}()
}
// Wait blocks until all tracked background goroutines have finished.
// Intended for graceful shutdown; tests call it before the temp-dir
// cleanup runs.
func (c *Cache) Wait() { c.wg.Wait() }
// New constructs a Cache from the loaded configuration. Validates
// upstream URL, reads the bearer-file (if configured), prepares the
// HTTP client honoring SkipTLSVerify, and ensures the cache root
@ -181,7 +207,8 @@ func (c *Cache) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// after we've started serving — the user's request never blocks
// on walk scheduling.
if c.onAccess != nil {
go c.onAccess(r.URL.Path)
urlPath := r.URL.Path
c.goBackground(func() { c.onAccess(urlPath) })
}
// Directory listings: try sidecar listing-cache, fall back to
@ -201,7 +228,8 @@ func (c *Cache) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if err == nil && !info.IsDir() {
c.serveFromDisk(w, r, path, info, "hit")
// Background revalidate; never block the user response.
go c.revalidate(r.URL.Path, info.ModTime())
urlPath, mtime := r.URL.Path, info.ModTime()
c.goBackground(func() { c.revalidate(urlPath, mtime) })
return
}
}
@ -221,7 +249,8 @@ func (c *Cache) serveDirectory(w http.ResponseWriter, r *http.Request) {
info, err := os.Stat(path)
if err == nil && !info.IsDir() {
c.serveListingFromDisk(w, r, path, info, "hit")
go c.revalidateListing(r.URL.Path, r.Header.Get("Accept"), info.ModTime())
urlPath, accept, mtime := r.URL.Path, r.Header.Get("Accept"), info.ModTime()
c.goBackground(func() { c.revalidateListing(urlPath, accept, mtime) })
return
}
}

View file

@ -31,6 +31,14 @@ func newTestCache(t *testing.T, mode string, upstreamHandler http.HandlerFunc) (
if err != nil {
t.Fatalf("New: %v", err)
}
// Drain background revalidation goroutines before the test's
// t.TempDir cleanup runs. Cleanups fire LIFO and t.TempDir
// registered its RemoveAll first (at the t.TempDir() call above),
// so this runs before it — preventing a revalidate goroutine from
// recreating the cache dir / dropping a temp file mid-RemoveAll
// ("directory not empty"). The upstream stays up (its Close was
// registered earliest, so it runs last).
t.Cleanup(c.Wait)
return c, upstream
}

View file

@ -459,8 +459,53 @@ func TestInvariant_UnelevatedAdminNoSilentBypass(t *testing.T) {
func TestInvariant_ProfileAdminEndpointsHideFromNonAdmins(t *testing.T) {
// These checks lock in the existence-hiding property: non-admins must
// see 404, never 403, so they can't probe which paths exist.
t.Skip("requires the profile handler dispatcher entry point; skip until the refactor confirms ServeProfile signature")
// see 404, never 403, so they can't probe which admin-only resources
// exist. ServeProfile is the dispatcher (the refactor this test waited
// on); its adminOnly wrapper denies with 404 before the sub-handler
// runs, so a nil ring/index is safe for the non-admin paths.
cfg, _ := invariantsFixture(t)
adminEndpoints := []string{"/whoami", "/config", "/logs", "/effective-policy", "/reindex"}
profileGet := func(sub, email string, elevated bool) *httptest.ResponseRecorder {
req := httptest.NewRequest(http.MethodGet, ProfilePathPrefix+sub, nil)
ctx := context.WithValue(req.Context(), EmailKey, email)
ctx = context.WithValue(ctx, ElevatedKey, elevated)
req = req.WithContext(ctx)
rec := httptest.NewRecorder()
ServeProfile(cfg, nil, nil, rec, req)
return rec
}
// Non-admin (eve, project_team only) and anonymous callers must get
// 404 on every admin endpoint — never 403, never 200.
for _, who := range []struct {
email string
elevated bool
label string
}{
{"eve@example.com", false, "non-admin"},
{"", false, "anonymous"},
{"admin@example.com", false, "un-elevated admin"}, // sudo-style: no authority until elevated
} {
for _, sub := range adminEndpoints {
rec := profileGet(sub, who.email, who.elevated)
if rec.Code != http.StatusNotFound {
t.Errorf("%s GET /.profile%s = %d, want 404 (existence-hiding)", who.label, sub, rec.Code)
}
}
}
// Positive control: an elevated root admin must NOT get 404 on the
// gated routes that need no ring/index — proving the 404s above are
// the admin gate, not a missing route. (/whoami and /config don't
// touch the log ring or archive index.)
for _, sub := range []string{"/whoami", "/config"} {
rec := profileGet(sub, "admin@example.com", true)
if rec.Code == http.StatusNotFound {
t.Errorf("elevated admin GET /.profile%s = 404; the gate should admit admins", sub)
}
}
}
// dump prints the rec body when t.Logf would help debugging — used in