418 lines
12 KiB
Go
418 lines
12 KiB
Go
package archive
|
|
|
|
import (
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
|
|
)
|
|
|
|
// RevisionEntry holds the resolved file paths for one base revision.
|
|
type RevisionEntry struct {
|
|
BasePath string // server-relative path for trackingNumber_rev.html
|
|
Modifiers map[string]string // modifier key (e.g. "C1") → server-relative path
|
|
Date string // transmittal date (YYYY-MM-DD) for first-seen logic
|
|
}
|
|
|
|
// TrackingEntry holds all revision data for one tracking number.
|
|
type TrackingEntry struct {
|
|
HighestBaseRev string // highest base revision (for trackingNumber.html)
|
|
ByRevision map[string]*RevisionEntry // base revision → entry
|
|
}
|
|
|
|
// ProjectEntry buckets all tracking numbers under one top-level segment of
|
|
// fsRoot (the "project"). Each project is its own namespace — the same
|
|
// tracking number issued under two different projects does NOT collide; each
|
|
// project's .archive/ surfaces only its own.
|
|
type ProjectEntry struct {
|
|
ByTracking map[string]*TrackingEntry
|
|
}
|
|
|
|
// Index is the in-memory archive index, bucketed by project. The project key
|
|
// is the first slash-separated segment of an indexed file's server-relative
|
|
// path. .archive virtual requests under /<project>/.../.archive/ resolve
|
|
// against the named project's bucket; /.archive/ at the very root has no
|
|
// project and returns 404.
|
|
type Index struct {
|
|
mu sync.RWMutex
|
|
ByProject map[string]*ProjectEntry
|
|
}
|
|
|
|
// NewIndex returns an empty Index.
|
|
func NewIndex() *Index {
|
|
return &Index{
|
|
ByProject: make(map[string]*ProjectEntry),
|
|
}
|
|
}
|
|
|
|
// zddc filename: trackingNumber_revision (status) - title.ext
|
|
// trackingNumber: no spaces or underscores
|
|
// revision: ~?[A-Z0-9]+(+[CBNQ][0-9]+)?
|
|
var zddcFilenameRE = regexp.MustCompile(
|
|
`^([^_\s]+(?:-[^_\s]+)*)_(~?[A-Z0-9]+)(\+[CBNQ][0-9]+)?\s+\([^)]+\)\s*-\s*.+\.([^.]+)$`,
|
|
)
|
|
|
|
type parsedFile struct {
|
|
trackingNumber string
|
|
baseRev string
|
|
modifier string // empty or e.g. "C1"
|
|
date string // transmittal folder date
|
|
serverPath string // server-relative path (slash-separated, no leading slash)
|
|
}
|
|
|
|
// BuildIndex walks fsRoot, finds all transmittal folders, and builds the index.
|
|
func BuildIndex(fsRoot string) (*Index, error) {
|
|
idx := NewIndex()
|
|
if err := walkAndIndex(idx, fsRoot, fsRoot, ""); err != nil {
|
|
return nil, err
|
|
}
|
|
return idx, nil
|
|
}
|
|
|
|
// walkAndIndex recursively walks dirAbs looking for transmittal folders.
|
|
// serverDir is the server-relative path of dirAbs (slash-separated, no leading slash).
|
|
func walkAndIndex(idx *Index, fsRoot, dirAbs, serverDir string) error {
|
|
entries, err := os.ReadDir(dirAbs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, entry := range entries {
|
|
name := entry.Name()
|
|
if strings.HasPrefix(name, ".") {
|
|
continue
|
|
}
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
var childServerDir string
|
|
if serverDir == "" {
|
|
childServerDir = name
|
|
} else {
|
|
childServerDir = serverDir + "/" + name
|
|
}
|
|
childAbs := filepath.Join(dirAbs, name)
|
|
|
|
if date, _, _, _, ok := zddc.ParseTransmittalFolder(name); ok {
|
|
// This is a transmittal folder — index its files
|
|
if err := indexTransmittalFolder(idx, fsRoot, childAbs, childServerDir, date); err != nil {
|
|
// Non-fatal: log and continue
|
|
continue
|
|
}
|
|
} else {
|
|
// Recurse into grouping/portfolio/project folders
|
|
if err := walkAndIndex(idx, fsRoot, childAbs, childServerDir); err != nil {
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// indexTransmittalFolder indexes all ZDDC files in a transmittal folder.
|
|
func indexTransmittalFolder(idx *Index, fsRoot, folderAbs, folderServerPath, date string) error {
|
|
return filepath.WalkDir(folderAbs, func(path string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
// Log and continue indexing the rest of the folder — a
|
|
// permission or FS error on one entry shouldn't abort the
|
|
// whole transmittal index or vanish without a trace.
|
|
slog.Warn("transmittal index: walkdir error", "path", path, "err", err)
|
|
return nil
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
name := d.Name()
|
|
if strings.HasPrefix(name, ".") {
|
|
return nil
|
|
}
|
|
|
|
m := zddcFilenameRE.FindStringSubmatch(name)
|
|
if m == nil {
|
|
return nil
|
|
}
|
|
|
|
tracking := m[1]
|
|
baseRev := m[2]
|
|
modifierFull := m[3] // e.g. "+C1" or ""
|
|
modifier := ""
|
|
if modifierFull != "" {
|
|
modifier = modifierFull[1:] // strip leading "+"
|
|
}
|
|
|
|
// Build server-relative path
|
|
relPath, err := filepath.Rel(fsRoot, path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
serverPath := filepath.ToSlash(relPath)
|
|
|
|
pf := parsedFile{
|
|
trackingNumber: tracking,
|
|
baseRev: baseRev,
|
|
modifier: modifier,
|
|
date: date,
|
|
serverPath: serverPath,
|
|
}
|
|
|
|
idx.recordFile(pf)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// projectOf returns the top-level slash-separated segment of a server-relative
|
|
// path. Files at the root (no slash) have no project and are not indexable.
|
|
func projectOf(serverPath string) string {
|
|
i := strings.IndexByte(serverPath, '/')
|
|
if i <= 0 {
|
|
return ""
|
|
}
|
|
return serverPath[:i]
|
|
}
|
|
|
|
// recordFile adds a parsed file to the index using first-seen (oldest date)
|
|
// logic, bucketed under the project (top-level segment) the file lives in.
|
|
func (idx *Index) recordFile(pf parsedFile) {
|
|
project := projectOf(pf.serverPath)
|
|
if project == "" {
|
|
// File sits directly at the served root with no project wrapper.
|
|
// Skipping it means /.archive/ at the root surfaces nothing — which
|
|
// is exactly the contract: stable references must include a project
|
|
// directory. Such files are still reachable as ordinary static URLs.
|
|
return
|
|
}
|
|
|
|
idx.mu.Lock()
|
|
defer idx.mu.Unlock()
|
|
|
|
pe, ok := idx.ByProject[project]
|
|
if !ok {
|
|
pe = &ProjectEntry{ByTracking: make(map[string]*TrackingEntry)}
|
|
idx.ByProject[project] = pe
|
|
}
|
|
|
|
te, ok := pe.ByTracking[pf.trackingNumber]
|
|
if !ok {
|
|
te = &TrackingEntry{
|
|
ByRevision: make(map[string]*RevisionEntry),
|
|
}
|
|
pe.ByTracking[pf.trackingNumber] = te
|
|
}
|
|
|
|
re, ok := te.ByRevision[pf.baseRev]
|
|
if !ok {
|
|
re = &RevisionEntry{
|
|
Modifiers: make(map[string]string),
|
|
}
|
|
te.ByRevision[pf.baseRev] = re
|
|
}
|
|
|
|
if pf.modifier == "" {
|
|
switch {
|
|
case re.BasePath == "":
|
|
re.BasePath = pf.serverPath
|
|
re.Date = pf.date
|
|
case re.BasePath == pf.serverPath:
|
|
// same file, no-op (e.g. re-index from the watcher)
|
|
default:
|
|
// Two different files claim to be (project, tracking, rev) —
|
|
// that's a within-project authoring mistake. Log once with both
|
|
// paths so it's diagnosable; chronological winner still wins.
|
|
slog.Warn("archive: within-project revision collision",
|
|
"project", project,
|
|
"tracking", pf.trackingNumber,
|
|
"revision", pf.baseRev,
|
|
"existing", re.BasePath,
|
|
"existingDate", re.Date,
|
|
"new", pf.serverPath,
|
|
"newDate", pf.date,
|
|
)
|
|
if pf.date < re.Date {
|
|
re.BasePath = pf.serverPath
|
|
re.Date = pf.date
|
|
}
|
|
}
|
|
} else {
|
|
// Modifier file — record if no entry yet or this transmittal is older
|
|
if existing, exists := re.Modifiers[pf.modifier]; !exists || pf.date < re.Date {
|
|
_ = existing
|
|
re.Modifiers[pf.modifier] = pf.serverPath
|
|
}
|
|
}
|
|
|
|
// Update highest base revision
|
|
te.HighestBaseRev = highestRevision(te)
|
|
}
|
|
|
|
// highestRevision returns the highest base revision among all revisions in te.
|
|
// Revision ordering: numeric revisions (0,1,2…) are lower than alphabetic (A,B,C…).
|
|
// Draft prefix ~ means lower than base.
|
|
func highestRevision(te *TrackingEntry) string {
|
|
if len(te.ByRevision) == 0 {
|
|
return ""
|
|
}
|
|
revs := make([]string, 0, len(te.ByRevision))
|
|
for r := range te.ByRevision {
|
|
revs = append(revs, r)
|
|
}
|
|
sort.Slice(revs, func(i, j int) bool {
|
|
return compareRevisions(revs[i], revs[j]) < 0
|
|
})
|
|
return revs[len(revs)-1]
|
|
}
|
|
|
|
// compareRevisions returns negative if a < b, 0 if equal, positive if a > b.
|
|
// Order: ~rev < numeric < alpha (A < B < C ...)
|
|
func compareRevisions(a, b string) int {
|
|
isDraftA := strings.HasPrefix(a, "~")
|
|
isDraftB := strings.HasPrefix(b, "~")
|
|
baseA := strings.TrimPrefix(a, "~")
|
|
baseB := strings.TrimPrefix(b, "~")
|
|
|
|
// Draft < non-draft of same base
|
|
if baseA == baseB {
|
|
if isDraftA && !isDraftB {
|
|
return -1
|
|
}
|
|
if !isDraftA && isDraftB {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Numeric vs alpha: numeric comes first
|
|
aIsNum := len(baseA) > 0 && baseA[0] >= '0' && baseA[0] <= '9'
|
|
bIsNum := len(baseB) > 0 && baseB[0] >= '0' && baseB[0] <= '9'
|
|
if aIsNum && !bIsNum {
|
|
return -1
|
|
}
|
|
if !aIsNum && bIsNum {
|
|
return 1
|
|
}
|
|
|
|
// Both numeric or both alpha: string comparison (works for single-char alpha)
|
|
if baseA < baseB {
|
|
return -1
|
|
}
|
|
if baseA > baseB {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Rebuild walks fsRoot from scratch and atomically replaces this Index's
|
|
// contents with the result. Used by the periodic re-scan and the admin
|
|
// /.profile/reindex endpoint.
|
|
//
|
|
// Why we need this even with the fsnotify watcher: inotify (which fsnotify
|
|
// wraps on Linux) only fires on events the local kernel sees. When another
|
|
// SMB/CIFS client writes to an Azure Files share, those writes never produce
|
|
// inotify events on this pod's mount — so the watcher silently misses every
|
|
// cross-client change. A periodic full re-scan covers that gap.
|
|
//
|
|
// Returns the duration of the rebuild and counts (projects, tracking numbers)
|
|
// for the caller to log or surface to an admin endpoint.
|
|
func (idx *Index) Rebuild(fsRoot string) (time.Duration, int, int, error) {
|
|
start := time.Now()
|
|
fresh, err := BuildIndex(fsRoot)
|
|
if err != nil {
|
|
return time.Since(start), 0, 0, err
|
|
}
|
|
projects := len(fresh.ByProject)
|
|
tracking := 0
|
|
for _, pe := range fresh.ByProject {
|
|
tracking += len(pe.ByTracking)
|
|
}
|
|
idx.mu.Lock()
|
|
idx.ByProject = fresh.ByProject
|
|
idx.mu.Unlock()
|
|
return time.Since(start), projects, tracking, nil
|
|
}
|
|
|
|
// UpdateFromDir re-indexes a single transmittal folder (called by the watcher).
|
|
func (idx *Index) UpdateFromDir(fsRoot, transmittalDirPath string) error {
|
|
// Determine the date from the folder name
|
|
folderName := filepath.Base(transmittalDirPath)
|
|
date, _, _, _, ok := zddc.ParseTransmittalFolder(folderName)
|
|
if !ok {
|
|
return nil // not a transmittal folder
|
|
}
|
|
|
|
// Compute server-relative path for this folder
|
|
rel, err := filepath.Rel(fsRoot, transmittalDirPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
serverDir := filepath.ToSlash(rel)
|
|
|
|
return indexTransmittalFolder(idx, fsRoot, transmittalDirPath, serverDir, date)
|
|
}
|
|
|
|
// Entry is one virtual redirect file in the archive listing.
|
|
//
|
|
// URLName is the filename surfaced under .archive/ (e.g. "123.html",
|
|
// "123_~A.html"). TargetPath is the server-relative path the redirect
|
|
// resolves to — used both as the redirect target and as the input to the
|
|
// per-entry ACL check.
|
|
type Entry struct {
|
|
URLName string
|
|
TargetPath string
|
|
}
|
|
|
|
// AllEntries returns a sorted snapshot of every redirect entry for the named
|
|
// project. Two kinds per tracking number:
|
|
//
|
|
// - <tracking>.html → first-chronological copy of the highest base rev
|
|
// - <tracking>_<rev>.html → first-chronological copy of that specific base rev
|
|
//
|
|
// Modifier files (e.g. <tracking>_<rev>+C1.html) remain reachable via the
|
|
// resolver but are not surfaced in the listing — they're return traffic
|
|
// (comments / markups), not items the user browses to as primary documents.
|
|
//
|
|
// Sort order is by URLName; the "." in <tracking>.html sorts before the "_"
|
|
// in <tracking>_<rev>.html, so each tracking number's highest-rev shortcut
|
|
// comes first, followed by its individual revisions in revision order.
|
|
//
|
|
// An empty project (or one with no indexed tracking numbers) returns nil,
|
|
// keeping the caller branch-free.
|
|
func (idx *Index) AllEntries(project string) []Entry {
|
|
idx.mu.RLock()
|
|
defer idx.mu.RUnlock()
|
|
|
|
pe, ok := idx.ByProject[project]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
var result []Entry
|
|
for tn, te := range pe.ByTracking {
|
|
if te.HighestBaseRev != "" {
|
|
if re, ok := te.ByRevision[te.HighestBaseRev]; ok && re.BasePath != "" {
|
|
result = append(result, Entry{
|
|
URLName: tn + ".html",
|
|
TargetPath: re.BasePath,
|
|
})
|
|
}
|
|
}
|
|
for rev, re := range te.ByRevision {
|
|
if re.BasePath == "" {
|
|
continue
|
|
}
|
|
result = append(result, Entry{
|
|
URLName: tn + "_" + rev + ".html",
|
|
TargetPath: re.BasePath,
|
|
})
|
|
}
|
|
}
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].URLName < result[j].URLName
|
|
})
|
|
return result
|
|
}
|