package archive import ( "log/slog" "os" "path/filepath" "regexp" "sort" "strings" "sync" "time" "codeberg.org/VARASYS/ZDDC/zddc/internal/zddc" ) // RevisionEntry holds the resolved file paths for one base revision. type RevisionEntry struct { BasePath string // server-relative path for trackingNumber_rev.html Modifiers map[string]string // modifier key (e.g. "C1") → server-relative path Date string // transmittal date (YYYY-MM-DD) for first-seen logic } // TrackingEntry holds all revision data for one tracking number. type TrackingEntry struct { HighestBaseRev string // highest base revision (for trackingNumber.html) ByRevision map[string]*RevisionEntry // base revision → entry } // ProjectEntry buckets all tracking numbers under one top-level segment of // fsRoot (the "project"). Each project is its own namespace — the same // tracking number issued under two different projects does NOT collide; each // project's .archive/ surfaces only its own. type ProjectEntry struct { ByTracking map[string]*TrackingEntry } // Index is the in-memory archive index, bucketed by project. The project key // is the first slash-separated segment of an indexed file's server-relative // path. .archive virtual requests under //.../.archive/ resolve // against the named project's bucket; /.archive/ at the very root has no // project and returns 404. type Index struct { mu sync.RWMutex ByProject map[string]*ProjectEntry } // NewIndex returns an empty Index. func NewIndex() *Index { return &Index{ ByProject: make(map[string]*ProjectEntry), } } // zddc filename: trackingNumber_revision (status) - title.ext // trackingNumber: no spaces or underscores // revision: ~?[A-Z0-9]+(+[CBNQ][0-9]+)? var zddcFilenameRE = regexp.MustCompile( `^([^_\s]+(?:-[^_\s]+)*)_(~?[A-Z0-9]+)(\+[CBNQ][0-9]+)?\s+\([^)]+\)\s*-\s*.+\.([^.]+)$`, ) type parsedFile struct { trackingNumber string baseRev string modifier string // empty or e.g. "C1" date string // transmittal folder date serverPath string // server-relative path (slash-separated, no leading slash) } // BuildIndex walks fsRoot, finds all transmittal folders, and builds the index. func BuildIndex(fsRoot string) (*Index, error) { idx := NewIndex() if err := walkAndIndex(idx, fsRoot, fsRoot, ""); err != nil { return nil, err } return idx, nil } // walkAndIndex recursively walks dirAbs looking for transmittal folders. // serverDir is the server-relative path of dirAbs (slash-separated, no leading slash). func walkAndIndex(idx *Index, fsRoot, dirAbs, serverDir string) error { entries, err := os.ReadDir(dirAbs) if err != nil { return err } for _, entry := range entries { name := entry.Name() if strings.HasPrefix(name, ".") { continue } if !entry.IsDir() { continue } var childServerDir string if serverDir == "" { childServerDir = name } else { childServerDir = serverDir + "/" + name } childAbs := filepath.Join(dirAbs, name) if date, _, _, _, ok := zddc.ParseTransmittalFolder(name); ok { // This is a transmittal folder — index its files if err := indexTransmittalFolder(idx, fsRoot, childAbs, childServerDir, date); err != nil { // Non-fatal: log and continue continue } } else { // Recurse into grouping/portfolio/project folders if err := walkAndIndex(idx, fsRoot, childAbs, childServerDir); err != nil { continue } } } return nil } // indexTransmittalFolder indexes all ZDDC files in a transmittal folder. func indexTransmittalFolder(idx *Index, fsRoot, folderAbs, folderServerPath, date string) error { return filepath.WalkDir(folderAbs, func(path string, d os.DirEntry, err error) error { if err != nil { // Log the error but continue indexing other files _ = err // would log here: slog.Warn("walkdir error", "path", path, "err", err) return nil } if d.IsDir() { return nil } name := d.Name() if strings.HasPrefix(name, ".") { return nil } m := zddcFilenameRE.FindStringSubmatch(name) if m == nil { return nil } tracking := m[1] baseRev := m[2] modifierFull := m[3] // e.g. "+C1" or "" modifier := "" if modifierFull != "" { modifier = modifierFull[1:] // strip leading "+" } // Build server-relative path relPath, err := filepath.Rel(fsRoot, path) if err != nil { return nil } serverPath := filepath.ToSlash(relPath) pf := parsedFile{ trackingNumber: tracking, baseRev: baseRev, modifier: modifier, date: date, serverPath: serverPath, } idx.recordFile(pf) return nil }) } // projectOf returns the top-level slash-separated segment of a server-relative // path. Files at the root (no slash) have no project and are not indexable. func projectOf(serverPath string) string { i := strings.IndexByte(serverPath, '/') if i <= 0 { return "" } return serverPath[:i] } // recordFile adds a parsed file to the index using first-seen (oldest date) // logic, bucketed under the project (top-level segment) the file lives in. func (idx *Index) recordFile(pf parsedFile) { project := projectOf(pf.serverPath) if project == "" { // File sits directly at the served root with no project wrapper. // Skipping it means /.archive/ at the root surfaces nothing — which // is exactly the contract: stable references must include a project // directory. Such files are still reachable as ordinary static URLs. return } idx.mu.Lock() defer idx.mu.Unlock() pe, ok := idx.ByProject[project] if !ok { pe = &ProjectEntry{ByTracking: make(map[string]*TrackingEntry)} idx.ByProject[project] = pe } te, ok := pe.ByTracking[pf.trackingNumber] if !ok { te = &TrackingEntry{ ByRevision: make(map[string]*RevisionEntry), } pe.ByTracking[pf.trackingNumber] = te } re, ok := te.ByRevision[pf.baseRev] if !ok { re = &RevisionEntry{ Modifiers: make(map[string]string), } te.ByRevision[pf.baseRev] = re } if pf.modifier == "" { switch { case re.BasePath == "": re.BasePath = pf.serverPath re.Date = pf.date case re.BasePath == pf.serverPath: // same file, no-op (e.g. re-index from the watcher) default: // Two different files claim to be (project, tracking, rev) — // that's a within-project authoring mistake. Log once with both // paths so it's diagnosable; chronological winner still wins. slog.Warn("archive: within-project revision collision", "project", project, "tracking", pf.trackingNumber, "revision", pf.baseRev, "existing", re.BasePath, "existingDate", re.Date, "new", pf.serverPath, "newDate", pf.date, ) if pf.date < re.Date { re.BasePath = pf.serverPath re.Date = pf.date } } } else { // Modifier file — record if no entry yet or this transmittal is older if existing, exists := re.Modifiers[pf.modifier]; !exists || pf.date < re.Date { _ = existing re.Modifiers[pf.modifier] = pf.serverPath } } // Update highest base revision te.HighestBaseRev = highestRevision(te) } // highestRevision returns the highest base revision among all revisions in te. // Revision ordering: numeric revisions (0,1,2…) are lower than alphabetic (A,B,C…). // Draft prefix ~ means lower than base. func highestRevision(te *TrackingEntry) string { if len(te.ByRevision) == 0 { return "" } revs := make([]string, 0, len(te.ByRevision)) for r := range te.ByRevision { revs = append(revs, r) } sort.Slice(revs, func(i, j int) bool { return compareRevisions(revs[i], revs[j]) < 0 }) return revs[len(revs)-1] } // compareRevisions returns negative if a < b, 0 if equal, positive if a > b. // Order: ~rev < numeric < alpha (A < B < C ...) func compareRevisions(a, b string) int { isDraftA := strings.HasPrefix(a, "~") isDraftB := strings.HasPrefix(b, "~") baseA := strings.TrimPrefix(a, "~") baseB := strings.TrimPrefix(b, "~") // Draft < non-draft of same base if baseA == baseB { if isDraftA && !isDraftB { return -1 } if !isDraftA && isDraftB { return 1 } return 0 } // Numeric vs alpha: numeric comes first aIsNum := len(baseA) > 0 && baseA[0] >= '0' && baseA[0] <= '9' bIsNum := len(baseB) > 0 && baseB[0] >= '0' && baseB[0] <= '9' if aIsNum && !bIsNum { return -1 } if !aIsNum && bIsNum { return 1 } // Both numeric or both alpha: string comparison (works for single-char alpha) if baseA < baseB { return -1 } if baseA > baseB { return 1 } return 0 } // Rebuild walks fsRoot from scratch and atomically replaces this Index's // contents with the result. Used by the periodic re-scan and the admin // /.profile/reindex endpoint. // // Why we need this even with the fsnotify watcher: inotify (which fsnotify // wraps on Linux) only fires on events the local kernel sees. When another // SMB/CIFS client writes to an Azure Files share, those writes never produce // inotify events on this pod's mount — so the watcher silently misses every // cross-client change. A periodic full re-scan covers that gap. // // Returns the duration of the rebuild and counts (projects, tracking numbers) // for the caller to log or surface to an admin endpoint. func (idx *Index) Rebuild(fsRoot string) (time.Duration, int, int, error) { start := time.Now() fresh, err := BuildIndex(fsRoot) if err != nil { return time.Since(start), 0, 0, err } projects := len(fresh.ByProject) tracking := 0 for _, pe := range fresh.ByProject { tracking += len(pe.ByTracking) } idx.mu.Lock() idx.ByProject = fresh.ByProject idx.mu.Unlock() return time.Since(start), projects, tracking, nil } // UpdateFromDir re-indexes a single transmittal folder (called by the watcher). func (idx *Index) UpdateFromDir(fsRoot, transmittalDirPath string) error { // Determine the date from the folder name folderName := filepath.Base(transmittalDirPath) date, _, _, _, ok := zddc.ParseTransmittalFolder(folderName) if !ok { return nil // not a transmittal folder } // Compute server-relative path for this folder rel, err := filepath.Rel(fsRoot, transmittalDirPath) if err != nil { return err } serverDir := filepath.ToSlash(rel) return indexTransmittalFolder(idx, fsRoot, transmittalDirPath, serverDir, date) } // Entry is one virtual redirect file in the archive listing. // // URLName is the filename surfaced under .archive/ (e.g. "123.html", // "123_~A.html"). TargetPath is the server-relative path the redirect // resolves to — used both as the redirect target and as the input to the // per-entry ACL check. type Entry struct { URLName string TargetPath string } // AllEntries returns a sorted snapshot of every redirect entry for the named // project. Two kinds per tracking number: // // - .html → first-chronological copy of the highest base rev // - _.html → first-chronological copy of that specific base rev // // Modifier files (e.g. _+C1.html) remain reachable via the // resolver but are not surfaced in the listing — they're return traffic // (comments / markups), not items the user browses to as primary documents. // // Sort order is by URLName; the "." in .html sorts before the "_" // in _.html, so each tracking number's highest-rev shortcut // comes first, followed by its individual revisions in revision order. // // An empty project (or one with no indexed tracking numbers) returns nil, // keeping the caller branch-free. func (idx *Index) AllEntries(project string) []Entry { idx.mu.RLock() defer idx.mu.RUnlock() pe, ok := idx.ByProject[project] if !ok { return nil } var result []Entry for tn, te := range pe.ByTracking { if te.HighestBaseRev != "" { if re, ok := te.ByRevision[te.HighestBaseRev]; ok && re.BasePath != "" { result = append(result, Entry{ URLName: tn + ".html", TargetPath: re.BasePath, }) } } for rev, re := range te.ByRevision { if re.BasePath == "" { continue } result = append(result, Entry{ URLName: tn + "_" + rev + ".html", TargetPath: re.BasePath, }) } } sort.Slice(result, func(i, j int) bool { return result[i].URLName < result[j].URLName }) return result }