ZDDC/zddc/internal/archive/index_test.go
2026-06-11 13:32:31 -05:00

438 lines
13 KiB
Go

package archive
import (
"bytes"
"log/slog"
"os"
"path/filepath"
"sort"
"strings"
"testing"
)
func mkTransmittal(t *testing.T, fsRoot, folderName string, files ...string) {
t.Helper()
dir := filepath.Join(fsRoot, folderName)
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", dir, err)
}
for _, f := range files {
path := filepath.Join(dir, f)
if err := os.WriteFile(path, []byte("x"), 0o644); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
}
func TestCompareRevisions_DraftOrdering(t *testing.T) {
cases := []struct {
a, b string
want int // sign only
}{
{"~A", "A", -1},
{"~A", "~B", -1},
{"A", "B", -1},
{"~A", "~A", 0},
{"A", "~A", 1},
}
for _, c := range cases {
got := compareRevisions(c.a, c.b)
var sign int
if got < 0 {
sign = -1
} else if got > 0 {
sign = 1
}
if sign != c.want {
t.Errorf("compareRevisions(%q, %q) sign = %d, want %d", c.a, c.b, sign, c.want)
}
}
}
func TestIndexAndResolve_DraftOnly(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"123_~A (IFR) - Title.pdf",
)
idx, err := BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
pe, ok := idx.ByProject["ProjectA"]
if !ok {
t.Fatalf("ProjectA bucket not indexed; ByProject = %v", idx.ByProject)
}
te, ok := pe.ByTracking["123"]
if !ok {
t.Fatalf("tracking 123 not indexed in ProjectA")
}
if te.HighestBaseRev != "~A" {
t.Errorf("HighestBaseRev = %q, want ~A", te.HighestBaseRev)
}
if _, ok := Resolve(idx, "ProjectA", "123.html"); !ok {
t.Errorf("Resolve(ProjectA, 123.html) failed")
}
if _, ok := Resolve(idx, "ProjectA", "123_~A.html"); !ok {
t.Errorf("Resolve(ProjectA, 123_~A.html) failed")
}
// Same tracking number queried under a different project must NOT resolve.
if _, ok := Resolve(idx, "ProjectB", "123.html"); ok {
t.Errorf("Resolve(ProjectB, 123.html) should fail — 123 belongs to ProjectA")
}
// Empty project — /.archive/ at the very root — never resolves.
if _, ok := Resolve(idx, "", "123.html"); ok {
t.Errorf("Resolve(\"\", 123.html) should fail — empty project must 404")
}
}
func TestIndexAndResolve_DraftWithModifier(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"123_~A (IFR) - Title.pdf",
)
mkTransmittal(t, root, "ProjectA/2025-02-01_T2 (RTN) - Comments",
"123_~A+C1 (RTN) - Comments.pdf",
)
idx, _ := BuildIndex(root)
if _, ok := Resolve(idx, "ProjectA", "123_~A+C1.html"); !ok {
t.Errorf("Resolve(ProjectA, 123_~A+C1.html) failed")
}
}
// "First chronologically found version of the latest rev": when the same rev
// appears in two transmittals within ONE project, the earlier date's copy
// wins. (Cross-project duplicates are handled separately — see
// TestCrossProject_NoCollision.)
func TestRecordFile_FirstChronologicalWins(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-03-01_Late (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
mkTransmittal(t, root, "ProjectA/2025-01-01_Early (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
idx, _ := BuildIndex(root)
target, ok := Resolve(idx, "ProjectA", "123_A.html")
if !ok {
t.Fatalf("Resolve(ProjectA, 123_A.html) failed")
}
if !contains(target, "2025-01-01_Early") {
t.Errorf("got %q, want path under 2025-01-01_Early/", target)
}
}
// Same tracking number issued under two different projects must NOT collide:
// each project's bucket carries its own copy and resolves independently.
func TestCrossProject_NoCollision(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
mkTransmittal(t, root, "ProjectB/2025-06-01_T9 (IFR) - Different Title",
"123_A (IFR) - Different Title.pdf",
)
idx, _ := BuildIndex(root)
a, okA := Resolve(idx, "ProjectA", "123_A.html")
if !okA {
t.Fatalf("Resolve(ProjectA, 123_A.html) failed")
}
if !contains(a, "ProjectA/") {
t.Errorf("ProjectA target = %q, want path under ProjectA/", a)
}
b, okB := Resolve(idx, "ProjectB", "123_A.html")
if !okB {
t.Fatalf("Resolve(ProjectB, 123_A.html) failed")
}
if !contains(b, "ProjectB/") {
t.Errorf("ProjectB target = %q, want path under ProjectB/", b)
}
if a == b {
t.Errorf("ProjectA and ProjectB targets must differ; got %q == %q", a, b)
}
// Each project's listing surfaces only its own tracking numbers.
aNames := entryNames(idx.AllEntries("ProjectA"))
bNames := entryNames(idx.AllEntries("ProjectB"))
for _, n := range aNames {
for _, m := range bNames {
if n == m && n == "123_A.html" {
// Same URLName is expected; targets just differ.
}
}
}
}
// AllEntries: every (tracking) gets <tracking>.html (highest) AND a
// <tracking>_<rev>.html for every base revision present, scoped to project.
func TestAllEntries_PerRevisionSurfaced(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"123_~A (IFR) - Title.pdf",
)
mkTransmittal(t, root, "ProjectA/2025-03-01_T3 (IFC) - Title",
"123_A (IFC) - Title.pdf",
"456_0 (IFR) - Other.pdf",
)
idx, _ := BuildIndex(root)
entries := idx.AllEntries("ProjectA")
got := make(map[string]string, len(entries))
for _, e := range entries {
got[e.URLName] = e.TargetPath
}
// Highest-rev shortcut + each per-rev redirect should be present.
wantNames := []string{
"123.html", // highest of 123 → A
"123_A.html", // explicit A
"123_~A.html", // explicit draft
"456.html", // highest of 456 → 0
"456_0.html", // explicit 0
}
for _, n := range wantNames {
if _, ok := got[n]; !ok {
t.Errorf("missing entry %q; got %v", n, sortedKeys(got))
}
}
// 123.html should resolve to the same path as 123_A.html (both point to
// the highest-rev's first-chronological copy).
if got["123.html"] != got["123_A.html"] {
t.Errorf("123.html (%q) != 123_A.html (%q); should both resolve to highest",
got["123.html"], got["123_A.html"])
}
// Sort: <tracking>.html sorts before <tracking>_*.html (because '.'<'_').
for i := 1; i < len(entries); i++ {
if entries[i-1].URLName > entries[i].URLName {
t.Errorf("AllEntries not sorted: %q before %q", entries[i-1].URLName, entries[i].URLName)
}
}
// Empty project key returns nil — root .archive doesn't exist.
if got := idx.AllEntries(""); got != nil {
t.Errorf("AllEntries(\"\") = %v, want nil", got)
}
if got := idx.AllEntries("NoSuchProject"); got != nil {
t.Errorf("AllEntries(NoSuchProject) = %v, want nil", got)
}
}
// Modifier-only files (no base) don't get a <tracking>.html or
// <tracking>_<rev>.html entry — the redirect would have nowhere to go since
// re.BasePath is empty. They remain reachable via <tracking>_<rev>+<mod>.html
// through the resolver but are not surfaced in the listing.
func TestAllEntries_ModifierOnlyNoBaseSkipped(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-02-01_T2 (RTN) - Comments",
"123_~A+C1 (RTN) - Comments.pdf",
)
idx, _ := BuildIndex(root)
for _, e := range idx.AllEntries("ProjectA") {
if e.URLName == "123.html" || e.URLName == "123_~A.html" {
t.Errorf("unexpected entry %q (no base file exists)", e.URLName)
}
}
}
// Within-project collision: two different files claim to be the same
// (project, tracking, rev). Chronological winner still wins, but a WARN log
// is emitted with both paths so the authoring mistake is diagnosable.
//
// (Cross-project duplicates are NOT collisions — they live in separate
// buckets. See TestCrossProject_NoCollision.)
func TestRecordFile_WithinProjectCollisionLogged(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-03-01_Late (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
// Different transmittal folder, same tracking+rev — e.g. operator
// re-issued under a different cover sheet by mistake.
mkTransmittal(t, root, "ProjectA/2025-01-01_Early (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
// Capture slog output during BuildIndex.
var buf bytes.Buffer
prev := slog.Default()
slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})))
defer slog.SetDefault(prev)
idx, err := BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
logged := buf.String()
if !strings.Contains(logged, "within-project revision collision") {
t.Errorf("expected collision WARN; got log:\n%s", logged)
}
if !strings.Contains(logged, "project=ProjectA") {
t.Errorf("expected project field in log; got:\n%s", logged)
}
if !strings.Contains(logged, "tracking=123") {
t.Errorf("expected tracking field in log; got:\n%s", logged)
}
// Chronological winner still wins.
target, ok := Resolve(idx, "ProjectA", "123_A.html")
if !ok {
t.Fatalf("Resolve failed")
}
if !contains(target, "2025-01-01_Early") {
t.Errorf("got %q, want path under 2025-01-01_Early/ (chronological winner)", target)
}
}
// Re-indexing the same transmittal folder (e.g. via the watcher) must NOT
// trip the collision detector — same path is a no-op, not a conflict.
func TestRecordFile_ReindexSamePathNoCollisionLog(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"123_A (IFR) - Title.pdf",
)
var buf bytes.Buffer
prev := slog.Default()
slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})))
defer slog.SetDefault(prev)
idx, err := BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
// Simulate the watcher firing again on the same transmittal folder.
transmittalAbs := filepath.Join(root, "ProjectA", "2025-01-01_T1 (IFR) - Title")
if err := idx.UpdateFromDir(root, transmittalAbs); err != nil {
t.Fatalf("UpdateFromDir: %v", err)
}
if strings.Contains(buf.String(), "within-project revision collision") {
t.Errorf("re-index should not log collision; got:\n%s", buf.String())
}
}
// Rebuild atomically replaces the index contents with a fresh walk of fsRoot.
// Files added after the initial build but before Rebuild are picked up;
// files removed are dropped. Counts and duration come back for the caller.
func TestRebuild_PicksUpAddsAndDrops(t *testing.T) {
root := t.TempDir()
mkTransmittal(t, root, "ProjectA/2025-01-01_T1 (IFR) - Title",
"100_A (IFR) - Title.pdf",
)
idx, err := BuildIndex(root)
if err != nil {
t.Fatalf("BuildIndex: %v", err)
}
if _, ok := Resolve(idx, "ProjectA", "100.html"); !ok {
t.Fatalf("pre-rebuild: 100 should resolve")
}
if _, ok := Resolve(idx, "ProjectA", "200.html"); ok {
t.Fatalf("pre-rebuild: 200 should NOT resolve yet")
}
// Add a second tracking number into a NEW transmittal folder (the
// inotify watcher would miss this on SMB; Rebuild must catch it).
mkTransmittal(t, root, "ProjectA/2025-02-01_T2 (IFR) - Other",
"200_A (IFR) - Other.pdf",
)
dur, projects, tracking, err := idx.Rebuild(root)
if err != nil {
t.Fatalf("Rebuild: %v", err)
}
if dur <= 0 {
t.Errorf("Rebuild duration = %v, want > 0", dur)
}
if projects != 1 {
t.Errorf("projects = %d, want 1", projects)
}
if tracking != 2 {
t.Errorf("tracking = %d, want 2 (100, 200)", tracking)
}
if _, ok := Resolve(idx, "ProjectA", "100.html"); !ok {
t.Errorf("post-rebuild: 100 lost")
}
if _, ok := Resolve(idx, "ProjectA", "200.html"); !ok {
t.Errorf("post-rebuild: 200 not picked up")
}
// Remove the original transmittal entirely; Rebuild should drop it.
if err := os.RemoveAll(filepath.Join(root, "ProjectA/2025-01-01_T1 (IFR) - Title")); err != nil {
t.Fatalf("RemoveAll: %v", err)
}
if _, _, _, err := idx.Rebuild(root); err != nil {
t.Fatalf("Rebuild: %v", err)
}
if _, ok := Resolve(idx, "ProjectA", "100.html"); ok {
t.Errorf("post-second-rebuild: 100 should be gone")
}
if _, ok := Resolve(idx, "ProjectA", "200.html"); !ok {
t.Errorf("post-second-rebuild: 200 lost")
}
}
// projectOf is the canonical place to derive the project key. Validate the
// edge cases so the contract doesn't drift silently.
func TestProjectOf(t *testing.T) {
cases := []struct {
path string
want string
}{
{"ProjectA/2025-01-01_T1/100_A.pdf", "ProjectA"},
{"ProjectA/sub/deep/file.pdf", "ProjectA"},
// Files at the root with no slash have no project.
{"top-level-loose-file.pdf", ""},
{"", ""},
// Defensive: leading slash should never reach this helper, but if it
// did, we'd return "" rather than picking up an empty leading segment.
{"/ProjectA/file", ""},
}
for _, c := range cases {
got := projectOf(c.path)
if got != c.want {
t.Errorf("projectOf(%q) = %q, want %q", c.path, got, c.want)
}
}
}
func contains(s, sub string) bool {
for i := 0; i+len(sub) <= len(s); i++ {
if s[i:i+len(sub)] == sub {
return true
}
}
return false
}
func sortedKeys(m map[string]string) []string {
out := make([]string, 0, len(m))
for k := range m {
out = append(out, k)
}
sort.Strings(out)
return out
}
func entryNames(entries []Entry) []string {
out := make([]string, 0, len(entries))
for _, e := range entries {
out = append(out, e.URLName)
}
sort.Strings(out)
return out
}