ZDDC/zddc/internal/archive/index.go
ZDDC b7e1a4310b refactor(archive): use shared zddc.ParseTransmittalFolder
The transmittal-folder grammar was duplicated as a private regex inside
the archive package. Replace the local regex with calls to the shared
parser in zddc/internal/zddc/folder.go so the grammar lives in one
place and the upcoming staging→working mirror logic can reuse it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 09:14:19 -05:00

416 lines
12 KiB
Go

package archive
import (
"log/slog"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
)
// RevisionEntry holds the resolved file paths for one base revision.
type RevisionEntry struct {
BasePath string // server-relative path for trackingNumber_rev.html
Modifiers map[string]string // modifier key (e.g. "C1") → server-relative path
Date string // transmittal date (YYYY-MM-DD) for first-seen logic
}
// TrackingEntry holds all revision data for one tracking number.
type TrackingEntry struct {
HighestBaseRev string // highest base revision (for trackingNumber.html)
ByRevision map[string]*RevisionEntry // base revision → entry
}
// ProjectEntry buckets all tracking numbers under one top-level segment of
// fsRoot (the "project"). Each project is its own namespace — the same
// tracking number issued under two different projects does NOT collide; each
// project's .archive/ surfaces only its own.
type ProjectEntry struct {
ByTracking map[string]*TrackingEntry
}
// Index is the in-memory archive index, bucketed by project. The project key
// is the first slash-separated segment of an indexed file's server-relative
// path. .archive virtual requests under /<project>/.../.archive/ resolve
// against the named project's bucket; /.archive/ at the very root has no
// project and returns 404.
type Index struct {
mu sync.RWMutex
ByProject map[string]*ProjectEntry
}
// NewIndex returns an empty Index.
func NewIndex() *Index {
return &Index{
ByProject: make(map[string]*ProjectEntry),
}
}
// zddc filename: trackingNumber_revision (status) - title.ext
// trackingNumber: no spaces or underscores
// revision: ~?[A-Z0-9]+(+[CBNQ][0-9]+)?
var zddcFilenameRE = regexp.MustCompile(
`^([^_\s]+(?:-[^_\s]+)*)_(~?[A-Z0-9]+)(\+[CBNQ][0-9]+)?\s+\([^)]+\)\s*-\s*.+\.([^.]+)$`,
)
type parsedFile struct {
trackingNumber string
baseRev string
modifier string // empty or e.g. "C1"
date string // transmittal folder date
serverPath string // server-relative path (slash-separated, no leading slash)
}
// BuildIndex walks fsRoot, finds all transmittal folders, and builds the index.
func BuildIndex(fsRoot string) (*Index, error) {
idx := NewIndex()
if err := walkAndIndex(idx, fsRoot, fsRoot, ""); err != nil {
return nil, err
}
return idx, nil
}
// walkAndIndex recursively walks dirAbs looking for transmittal folders.
// serverDir is the server-relative path of dirAbs (slash-separated, no leading slash).
func walkAndIndex(idx *Index, fsRoot, dirAbs, serverDir string) error {
entries, err := os.ReadDir(dirAbs)
if err != nil {
return err
}
for _, entry := range entries {
name := entry.Name()
if strings.HasPrefix(name, ".") {
continue
}
if !entry.IsDir() {
continue
}
var childServerDir string
if serverDir == "" {
childServerDir = name
} else {
childServerDir = serverDir + "/" + name
}
childAbs := filepath.Join(dirAbs, name)
if date, _, _, _, ok := zddc.ParseTransmittalFolder(name); ok {
// This is a transmittal folder — index its files
if err := indexTransmittalFolder(idx, fsRoot, childAbs, childServerDir, date); err != nil {
// Non-fatal: log and continue
continue
}
} else {
// Recurse into grouping/portfolio/project folders
if err := walkAndIndex(idx, fsRoot, childAbs, childServerDir); err != nil {
continue
}
}
}
return nil
}
// indexTransmittalFolder indexes all ZDDC files in a transmittal folder.
func indexTransmittalFolder(idx *Index, fsRoot, folderAbs, folderServerPath, date string) error {
return filepath.WalkDir(folderAbs, func(path string, d os.DirEntry, err error) error {
if err != nil {
// Log the error but continue indexing other files
_ = err // would log here: slog.Warn("walkdir error", "path", path, "err", err)
return nil
}
if d.IsDir() {
return nil
}
name := d.Name()
if strings.HasPrefix(name, ".") {
return nil
}
m := zddcFilenameRE.FindStringSubmatch(name)
if m == nil {
return nil
}
tracking := m[1]
baseRev := m[2]
modifierFull := m[3] // e.g. "+C1" or ""
modifier := ""
if modifierFull != "" {
modifier = modifierFull[1:] // strip leading "+"
}
// Build server-relative path
relPath, err := filepath.Rel(fsRoot, path)
if err != nil {
return nil
}
serverPath := filepath.ToSlash(relPath)
pf := parsedFile{
trackingNumber: tracking,
baseRev: baseRev,
modifier: modifier,
date: date,
serverPath: serverPath,
}
idx.recordFile(pf)
return nil
})
}
// projectOf returns the top-level slash-separated segment of a server-relative
// path. Files at the root (no slash) have no project and are not indexable.
func projectOf(serverPath string) string {
i := strings.IndexByte(serverPath, '/')
if i <= 0 {
return ""
}
return serverPath[:i]
}
// recordFile adds a parsed file to the index using first-seen (oldest date)
// logic, bucketed under the project (top-level segment) the file lives in.
func (idx *Index) recordFile(pf parsedFile) {
project := projectOf(pf.serverPath)
if project == "" {
// File sits directly at the served root with no project wrapper.
// Skipping it means /.archive/ at the root surfaces nothing — which
// is exactly the contract: stable references must include a project
// directory. Such files are still reachable as ordinary static URLs.
return
}
idx.mu.Lock()
defer idx.mu.Unlock()
pe, ok := idx.ByProject[project]
if !ok {
pe = &ProjectEntry{ByTracking: make(map[string]*TrackingEntry)}
idx.ByProject[project] = pe
}
te, ok := pe.ByTracking[pf.trackingNumber]
if !ok {
te = &TrackingEntry{
ByRevision: make(map[string]*RevisionEntry),
}
pe.ByTracking[pf.trackingNumber] = te
}
re, ok := te.ByRevision[pf.baseRev]
if !ok {
re = &RevisionEntry{
Modifiers: make(map[string]string),
}
te.ByRevision[pf.baseRev] = re
}
if pf.modifier == "" {
switch {
case re.BasePath == "":
re.BasePath = pf.serverPath
re.Date = pf.date
case re.BasePath == pf.serverPath:
// same file, no-op (e.g. re-index from the watcher)
default:
// Two different files claim to be (project, tracking, rev) —
// that's a within-project authoring mistake. Log once with both
// paths so it's diagnosable; chronological winner still wins.
slog.Warn("archive: within-project revision collision",
"project", project,
"tracking", pf.trackingNumber,
"revision", pf.baseRev,
"existing", re.BasePath,
"existingDate", re.Date,
"new", pf.serverPath,
"newDate", pf.date,
)
if pf.date < re.Date {
re.BasePath = pf.serverPath
re.Date = pf.date
}
}
} else {
// Modifier file — record if no entry yet or this transmittal is older
if existing, exists := re.Modifiers[pf.modifier]; !exists || pf.date < re.Date {
_ = existing
re.Modifiers[pf.modifier] = pf.serverPath
}
}
// Update highest base revision
te.HighestBaseRev = highestRevision(te)
}
// highestRevision returns the highest base revision among all revisions in te.
// Revision ordering: numeric revisions (0,1,2…) are lower than alphabetic (A,B,C…).
// Draft prefix ~ means lower than base.
func highestRevision(te *TrackingEntry) string {
if len(te.ByRevision) == 0 {
return ""
}
revs := make([]string, 0, len(te.ByRevision))
for r := range te.ByRevision {
revs = append(revs, r)
}
sort.Slice(revs, func(i, j int) bool {
return compareRevisions(revs[i], revs[j]) < 0
})
return revs[len(revs)-1]
}
// compareRevisions returns negative if a < b, 0 if equal, positive if a > b.
// Order: ~rev < numeric < alpha (A < B < C ...)
func compareRevisions(a, b string) int {
isDraftA := strings.HasPrefix(a, "~")
isDraftB := strings.HasPrefix(b, "~")
baseA := strings.TrimPrefix(a, "~")
baseB := strings.TrimPrefix(b, "~")
// Draft < non-draft of same base
if baseA == baseB {
if isDraftA && !isDraftB {
return -1
}
if !isDraftA && isDraftB {
return 1
}
return 0
}
// Numeric vs alpha: numeric comes first
aIsNum := len(baseA) > 0 && baseA[0] >= '0' && baseA[0] <= '9'
bIsNum := len(baseB) > 0 && baseB[0] >= '0' && baseB[0] <= '9'
if aIsNum && !bIsNum {
return -1
}
if !aIsNum && bIsNum {
return 1
}
// Both numeric or both alpha: string comparison (works for single-char alpha)
if baseA < baseB {
return -1
}
if baseA > baseB {
return 1
}
return 0
}
// Rebuild walks fsRoot from scratch and atomically replaces this Index's
// contents with the result. Used by the periodic re-scan and the admin
// /.profile/reindex endpoint.
//
// Why we need this even with the fsnotify watcher: inotify (which fsnotify
// wraps on Linux) only fires on events the local kernel sees. When another
// SMB/CIFS client writes to an Azure Files share, those writes never produce
// inotify events on this pod's mount — so the watcher silently misses every
// cross-client change. A periodic full re-scan covers that gap.
//
// Returns the duration of the rebuild and counts (projects, tracking numbers)
// for the caller to log or surface to an admin endpoint.
func (idx *Index) Rebuild(fsRoot string) (time.Duration, int, int, error) {
start := time.Now()
fresh, err := BuildIndex(fsRoot)
if err != nil {
return time.Since(start), 0, 0, err
}
projects := len(fresh.ByProject)
tracking := 0
for _, pe := range fresh.ByProject {
tracking += len(pe.ByTracking)
}
idx.mu.Lock()
idx.ByProject = fresh.ByProject
idx.mu.Unlock()
return time.Since(start), projects, tracking, nil
}
// UpdateFromDir re-indexes a single transmittal folder (called by the watcher).
func (idx *Index) UpdateFromDir(fsRoot, transmittalDirPath string) error {
// Determine the date from the folder name
folderName := filepath.Base(transmittalDirPath)
date, _, _, _, ok := zddc.ParseTransmittalFolder(folderName)
if !ok {
return nil // not a transmittal folder
}
// Compute server-relative path for this folder
rel, err := filepath.Rel(fsRoot, transmittalDirPath)
if err != nil {
return err
}
serverDir := filepath.ToSlash(rel)
return indexTransmittalFolder(idx, fsRoot, transmittalDirPath, serverDir, date)
}
// Entry is one virtual redirect file in the archive listing.
//
// URLName is the filename surfaced under .archive/ (e.g. "123.html",
// "123_~A.html"). TargetPath is the server-relative path the redirect
// resolves to — used both as the redirect target and as the input to the
// per-entry ACL check.
type Entry struct {
URLName string
TargetPath string
}
// AllEntries returns a sorted snapshot of every redirect entry for the named
// project. Two kinds per tracking number:
//
// - <tracking>.html → first-chronological copy of the highest base rev
// - <tracking>_<rev>.html → first-chronological copy of that specific base rev
//
// Modifier files (e.g. <tracking>_<rev>+C1.html) remain reachable via the
// resolver but are not surfaced in the listing — they're return traffic
// (comments / markups), not items the user browses to as primary documents.
//
// Sort order is by URLName; the "." in <tracking>.html sorts before the "_"
// in <tracking>_<rev>.html, so each tracking number's highest-rev shortcut
// comes first, followed by its individual revisions in revision order.
//
// An empty project (or one with no indexed tracking numbers) returns nil,
// keeping the caller branch-free.
func (idx *Index) AllEntries(project string) []Entry {
idx.mu.RLock()
defer idx.mu.RUnlock()
pe, ok := idx.ByProject[project]
if !ok {
return nil
}
var result []Entry
for tn, te := range pe.ByTracking {
if te.HighestBaseRev != "" {
if re, ok := te.ByRevision[te.HighestBaseRev]; ok && re.BasePath != "" {
result = append(result, Entry{
URLName: tn + ".html",
TargetPath: re.BasePath,
})
}
}
for rev, re := range te.ByRevision {
if re.BasePath == "" {
continue
}
result = append(result, Entry{
URLName: tn + "_" + rev + ".html",
TargetPath: re.BasePath,
})
}
}
sort.Slice(result, func(i, j int) bool {
return result[i].URLName < result[j].URLName
})
return result
}