package cache import ( "net/http" "net/http/httptest" "os" "path/filepath" "strings" "sync" "testing" "time" "codeberg.org/VARASYS/ZDDC/zddc/internal/config" ) // fakeUpstream returns a handler that serves a small fixed tree: // // /Vendors/Acme/ → JSON listing of [a.txt, b.txt, sub/] // /Vendors/Acme/a.txt → "alpha" // /Vendors/Acme/b.txt → "beta" // /Vendors/Acme/sub/ → JSON listing of [c.txt] // /Vendors/Acme/sub/c.txt → "charlie" // /Vendors/Beta/ → JSON listing of [out-of-scope.txt] // /Vendors/Beta/out-of-scope.txt → "should-not-be-fetched" // // hit counts every URL request so tests can assert which paths the // walker visits. func fakeUpstream() (http.HandlerFunc, *sync.Map) { hits := &sync.Map{} return func(w http.ResponseWriter, r *http.Request) { old, _ := hits.LoadOrStore(r.URL.Path, int64(0)) hits.Store(r.URL.Path, old.(int64)+1) switch r.URL.Path { case "/Vendors/Acme/": w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"name":"a.txt","is_dir":false},{"name":"b.txt","is_dir":false},{"name":"sub/","is_dir":true}]`)) case "/Vendors/Acme/a.txt": _, _ = w.Write([]byte("alpha")) case "/Vendors/Acme/b.txt": _, _ = w.Write([]byte("beta")) case "/Vendors/Acme/sub/": w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"name":"c.txt","is_dir":false}]`)) case "/Vendors/Acme/sub/c.txt": _, _ = w.Write([]byte("charlie")) case "/Vendors/Beta/": w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"name":"out-of-scope.txt","is_dir":false}]`)) case "/Vendors/Beta/out-of-scope.txt": _, _ = w.Write([]byte("should-not-be-fetched")) default: http.NotFound(w, r) } }, hits } func newWalkerTest(t *testing.T, subtrees []string, minInterval time.Duration) (*Cache, *MirrorScheduler, *httptest.Server, *sync.Map) { t.Helper() handler, hits := fakeUpstream() upstream := httptest.NewServer(handler) t.Cleanup(upstream.Close) root := t.TempDir() c, err := New(config.Config{Root: root, Upstream: upstream.URL, Mode: "mirror"}) if err != nil { t.Fatalf("New: %v", err) } sched, err := NewMirrorScheduler(c, subtrees, minInterval, 0) if err != nil { t.Fatalf("NewMirrorScheduler: %v", err) } if sched == nil { t.Fatal("expected scheduler, got nil") } return c, sched, upstream, hits } // hitsCount returns the number of times urlPath was hit on upstream. func hitsCount(hits *sync.Map, urlPath string) int64 { v, ok := hits.Load(urlPath) if !ok { return 0 } return v.(int64) } // waitForFile polls until the named file exists or the deadline // passes. Used to wait on background walker writes without sleep loops. func waitForFile(t *testing.T, path string, deadline time.Duration) { t.Helper() end := time.Now().Add(deadline) for time.Now().Before(end) { if _, err := os.Stat(path); err == nil { return } time.Sleep(20 * time.Millisecond) } t.Fatalf("file did not appear within %v: %s", deadline, path) } func TestNewMirrorScheduler_NoSubtrees(t *testing.T) { root := t.TempDir() c, _ := New(config.Config{Root: root, Upstream: "http://example.com", Mode: "cache"}) sched, err := NewMirrorScheduler(c, nil, 0, 0) if err != nil { t.Fatalf("err: %v", err) } if sched != nil { t.Errorf("expected nil scheduler for empty subtree list") } } func TestMirrorScheduler_NormalizesSubtrees(t *testing.T) { root := t.TempDir() c, _ := New(config.Config{Root: root, Upstream: "http://example.com", Mode: "mirror"}) sched, _ := NewMirrorScheduler(c, []string{"Vendors/Acme/", "/Vendors/Beta", " Public ", "/"}, 0, 0) got := sched.Subtrees() want := []string{"/Vendors/Acme", "/Vendors/Beta", "/Public", "/"} if len(got) != len(want) { t.Fatalf("subtrees = %v, want %v", got, want) } for i := range got { if got[i] != want[i] { t.Errorf("subtree[%d] = %q, want %q", i, got[i], want[i]) } } } func TestMatchSubtree(t *testing.T) { c, _ := New(config.Config{Root: t.TempDir(), Upstream: "http://example.com", Mode: "mirror"}) sched, _ := NewMirrorScheduler(c, []string{"/Vendors/Acme", "/Public"}, 0, 0) cases := []struct { url string want string }{ {"/Vendors/Acme/foo.txt", "/Vendors/Acme"}, {"/Vendors/Acme", "/Vendors/Acme"}, {"/Vendors/Acme/sub/x.txt", "/Vendors/Acme"}, {"/Vendors/Beta/x.txt", ""}, {"/Public/file", "/Public"}, {"/Other", ""}, {"", ""}, } for _, tc := range cases { if got := sched.matchSubtree(tc.url); got != tc.want { t.Errorf("matchSubtree(%q) = %q, want %q", tc.url, got, tc.want) } } } func TestMatchSubtree_RootMatchesEverything(t *testing.T) { c, _ := New(config.Config{Root: t.TempDir(), Upstream: "http://example.com", Mode: "mirror"}) sched, _ := NewMirrorScheduler(c, []string{"/"}, 0, 0) for _, u := range []string{"/anything", "/Project/X", "/"} { if got := sched.matchSubtree(u); got != "/" { t.Errorf("matchSubtree(%q) = %q, want /", u, got) } } } func TestTrigger_FetchesSubtree(t *testing.T) { c, sched, _, hits := newWalkerTest(t, []string{"/Vendors/Acme"}, 5*time.Millisecond) // Trigger via a request URL under the subtree. sched.Trigger("/Vendors/Acme/a.txt") // Wait for the walker to drop all expected files. waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", "a.txt"), 2*time.Second) waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", "b.txt"), 2*time.Second) waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", "sub", "c.txt"), 2*time.Second) // JSON listing sidecars exist for the walked dirs. waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", listingCachePrefix+"json"), 2*time.Second) waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", "sub", listingCachePrefix+"json"), 2*time.Second) // Out-of-scope path was not visited. if got := hitsCount(hits, "/Vendors/Beta/"); got != 0 { t.Errorf("walker hit out-of-scope /Vendors/Beta/ %d times", got) } } func TestTrigger_OutOfScopeIsNoOp(t *testing.T) { _, sched, _, hits := newWalkerTest(t, []string{"/Vendors/Acme"}, 5*time.Millisecond) sched.Trigger("/Other/path.txt") time.Sleep(150 * time.Millisecond) // No upstream calls should have happened. count := 0 hits.Range(func(_, _ interface{}) bool { count++; return true }) if count != 0 { t.Errorf("triggered %d upstream calls for out-of-scope URL", count) } } func TestTrigger_RateLimitsRapidAccesses(t *testing.T) { _, sched, _, hits := newWalkerTest(t, []string{"/Vendors/Acme"}, 1*time.Hour) sched.Trigger("/Vendors/Acme/a.txt") // Wait for first walk to complete. deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { sched.mu.Lock() flight := sched.inFlight["/Vendors/Acme"] st := sched.state["/Vendors/Acme"] sched.mu.Unlock() if !flight && !st.LastWalkAt.IsZero() { break } time.Sleep(20 * time.Millisecond) } first := hitsCount(hits, "/Vendors/Acme/a.txt") // Subsequent triggers within the min-interval are no-ops. for i := 0; i < 5; i++ { sched.Trigger("/Vendors/Acme/a.txt") } time.Sleep(100 * time.Millisecond) second := hitsCount(hits, "/Vendors/Acme/a.txt") if second != first { t.Errorf("rate limit failed: hits went %d → %d", first, second) } } func TestTrigger_AfterIntervalElapsesWalksAgain(t *testing.T) { _, sched, _, hits := newWalkerTest(t, []string{"/Vendors/Acme"}, 50*time.Millisecond) sched.Trigger("/Vendors/Acme/a.txt") deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { sched.mu.Lock() flight := sched.inFlight["/Vendors/Acme"] sched.mu.Unlock() if !flight { break } time.Sleep(20 * time.Millisecond) } first := hitsCount(hits, "/Vendors/Acme/a.txt") // Wait past min-interval, trigger again, walk should re-fire. time.Sleep(100 * time.Millisecond) sched.Trigger("/Vendors/Acme/a.txt") deadline = time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { if hitsCount(hits, "/Vendors/Acme/a.txt") > first { break } time.Sleep(20 * time.Millisecond) } second := hitsCount(hits, "/Vendors/Acme/a.txt") if second <= first { t.Errorf("expected walk after interval; hits %d → %d", first, second) } } func TestTrigger_PurgesOrphanedFiles(t *testing.T) { c, sched, _, _ := newWalkerTest(t, []string{"/Vendors/Acme"}, 5*time.Millisecond) // Pre-seed an orphan that's NOT in upstream's listing. if err := os.MkdirAll(filepath.Join(c.root, "Vendors", "Acme"), 0o755); err != nil { t.Fatalf("mkdir: %v", err) } orphan := filepath.Join(c.root, "Vendors", "Acme", "stale.txt") if err := os.WriteFile(orphan, []byte("orphan"), 0o644); err != nil { t.Fatalf("seed: %v", err) } sched.Trigger("/Vendors/Acme/a.txt") waitForFile(t, filepath.Join(c.root, "Vendors", "Acme", "a.txt"), 2*time.Second) // Wait for walk to fully complete (orphan purge happens after fetches). deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { sched.mu.Lock() flight := sched.inFlight["/Vendors/Acme"] sched.mu.Unlock() if !flight { break } time.Sleep(20 * time.Millisecond) } if _, err := os.Stat(orphan); !os.IsNotExist(err) { t.Errorf("orphan file not purged: %v", err) } } func TestPersistedState_SurvivesRestart(t *testing.T) { c, sched, _, _ := newWalkerTest(t, []string{"/Vendors/Acme"}, 5*time.Millisecond) sched.Trigger("/Vendors/Acme/a.txt") deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { sched.mu.Lock() flight := sched.inFlight["/Vendors/Acme"] st := sched.state["/Vendors/Acme"] sched.mu.Unlock() if !flight && !st.LastWalkAt.IsZero() { break } time.Sleep(20 * time.Millisecond) } // State file should exist. statePath := filepath.Join(c.root, MirrorStateFile) if _, err := os.Stat(statePath); err != nil { t.Fatalf("state file missing: %v", err) } // New scheduler against the same root reads the prior state and // honors the rate-limit gate. sched2, _ := NewMirrorScheduler(c, []string{"/Vendors/Acme"}, 1*time.Hour, 0) st := sched2.state["/Vendors/Acme"] if st.LastWalkAt.IsZero() { t.Error("restart did not load prior LastWalkAt from state file") } } func TestTrigger_ConcurrentSameSubtreeDoesNotDoubleWalk(t *testing.T) { _, sched, _, hits := newWalkerTest(t, []string{"/Vendors/Acme"}, 1*time.Hour) var wg sync.WaitGroup for i := 0; i < 10; i++ { wg.Add(1) go func() { defer wg.Done() sched.Trigger("/Vendors/Acme/a.txt") }() } wg.Wait() deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { sched.mu.Lock() flight := sched.inFlight["/Vendors/Acme"] sched.mu.Unlock() if !flight { break } time.Sleep(20 * time.Millisecond) } // Each file should have been fetched exactly once even though we // triggered 10 times concurrently. if got := hitsCount(hits, "/Vendors/Acme/a.txt"); got != 1 { t.Errorf("a.txt fetched %d times, want 1", got) } } func TestServeHTTP_KicksMirrorOnAccess(t *testing.T) { // End-to-end through the cache layer's ServeHTTP, verifying the // onAccess hook fires and the walker prefetches. upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case "/Vendors/Acme/": w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"name":"only.txt","is_dir":false}]`)) case "/Vendors/Acme/only.txt": _, _ = w.Write([]byte("data")) default: http.NotFound(w, r) } })) defer upstream.Close() root := t.TempDir() c, err := New(config.Config{Root: root, Upstream: upstream.URL, Mode: "mirror"}) if err != nil { t.Fatalf("New: %v", err) } sched, err := NewMirrorScheduler(c, []string{"/Vendors/Acme"}, 5*time.Millisecond, 0) if err != nil { t.Fatalf("NewMirrorScheduler: %v", err) } rec := httptest.NewRecorder() c.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/Vendors/Acme/only.txt", nil)) if rec.Code != http.StatusOK { t.Fatalf("status = %d", rec.Code) } // The user's request itself fetched only.txt. Then the walker // (kicked async) fetches the listing + only.txt. So only.txt may // be fetched twice. The walker MUST have hit the listing. waitForFile(t, filepath.Join(root, "Vendors", "Acme", listingCachePrefix+"json"), 2*time.Second) waitForFile(t, filepath.Join(root, "Vendors", "Acme", "only.txt"), 2*time.Second) // Drain the walk before t.TempDir cleanup races with file writes. waitForWalkDrain(t, sched, "/Vendors/Acme", 2*time.Second) } // waitForWalkDrain blocks until no walk is in flight for subtree. // Avoids TempDir cleanup races where the walker is still writing // when the test finishes. func waitForWalkDrain(t *testing.T, sched *MirrorScheduler, subtree string, deadline time.Duration) { t.Helper() end := time.Now().Add(deadline) for time.Now().Before(end) { sched.mu.Lock() flight := sched.inFlight[subtree] sched.mu.Unlock() if !flight { return } time.Sleep(20 * time.Millisecond) } t.Fatalf("walk for %s did not drain within %v", subtree, deadline) } func TestPersistMirrorState_AtomicWrite(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, MirrorStateFile) state := map[string]subtreeState{ "/Vendors/Acme": {LastWalkAt: time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC)}, } persistMirrorState(path, state) got := loadMirrorState(path) if got["/Vendors/Acme"].LastWalkAt.IsZero() { t.Error("round-trip lost LastWalkAt") } // No leftover tmp files. entries, _ := os.ReadDir(dir) for _, e := range entries { if strings.HasPrefix(e.Name(), ".zddc-mirror-state-tmp-") { t.Errorf("leftover tmp: %s", e.Name()) } } } func TestLoadMirrorState_MissingReturnsEmpty(t *testing.T) { got := loadMirrorState(filepath.Join(t.TempDir(), "does-not-exist")) if got == nil || len(got) != 0 { t.Errorf("expected empty map, got %v", got) } } func TestLoadMirrorState_CorruptReturnsEmpty(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "garbage") _ = os.WriteFile(path, []byte("not json"), 0o644) got := loadMirrorState(path) if got == nil || len(got) != 0 { t.Errorf("corrupt should yield empty map, got %v", got) } } // TestWalker_HostileUpstreamCannotEscapeCacheRoot: a malicious upstream // listing that includes ".." or slash-bearing entry names must not steer // the walker's purge or fetch outside the cache root. The walker drops // such entries early; purgeOrphans's containment check is a second line // of defense if any slip through. func TestWalker_HostileUpstreamCannotEscapeCacheRoot(t *testing.T) { // Set up a parent directory we'd like NOT to lose files from. parent := t.TempDir() sentinel := filepath.Join(parent, "sentinel.txt") if err := os.WriteFile(sentinel, []byte("must-not-be-deleted"), 0o644); err != nil { t.Fatal(err) } cacheRoot := filepath.Join(parent, "cache") if err := os.MkdirAll(cacheRoot, 0o755); err != nil { t.Fatal(err) } // Hostile upstream: returns a listing for /Vendors/Acme/ that // includes a ".." entry. A naive walker would recurse into // /Vendors/Acme/../ and then purgeOrphans would operate on the // PARENT of cacheRoot — deleting the sentinel. upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case "/Vendors/Acme/": w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[ {"name":"a.txt","is_dir":false}, {"name":"../","is_dir":true}, {"name":"sub/with/slashes","is_dir":false} ]`)) case "/Vendors/Acme/a.txt": _, _ = w.Write([]byte("alpha")) default: http.NotFound(w, r) } })) defer upstream.Close() c, err := New(config.Config{Root: cacheRoot, Upstream: upstream.URL, Mode: "mirror"}) if err != nil { t.Fatalf("New: %v", err) } sched, err := NewMirrorScheduler(c, []string{"/Vendors/Acme"}, 5*time.Millisecond, 0) if err != nil { t.Fatalf("NewMirrorScheduler: %v", err) } sched.Trigger("/Vendors/Acme/a.txt") waitForFile(t, filepath.Join(cacheRoot, "Vendors", "Acme", "a.txt"), 2*time.Second) // The sentinel outside the cache root must still be there. if _, err := os.Stat(sentinel); err != nil { t.Fatalf("sentinel was removed — walker escaped cache root: %v", err) } }