ZDDC/zddc/internal/zddc/field_codes.go
ZDDC 875827d484 fix(records): validate folder_fields at load time + cache field-code patterns
- Add RecordRule.UnmarshalYAML so a misconfigured folder_fields fails
  when the .zddc is parsed, not as a 500 on the first record write. A
  negative parent-distance is now rejected with a message naming the
  field. Mirrors FieldCode.UnmarshalYAML's raw-alias pattern.
- Memoize anchored field-code pattern regexes in a package-level
  sync.Map (compileFieldPattern), used by both the unmarshal-time
  validation and FieldCode.Validate — replacing the per-call
  regexp.Compile that the old comment flagged as cache-if-it-shows-up.

Tests: negative distance rejected (standalone + nested in a records:
map), valid distance round-trips, pattern field code matches anchored.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 15:28:35 -05:00

292 lines
11 KiB
Go

package zddc
import (
"fmt"
"path"
"regexp"
"sync"
"gopkg.in/yaml.v3"
)
// compiledPatternCache memoizes anchored field-code regexes keyed by
// the raw pattern string. Patterns are validated (compiled) at
// .zddc-unmarshal time and re-used on every Validate call, so the
// cache is effectively populated once per distinct pattern.
var compiledPatternCache sync.Map // string -> *regexp.Regexp
// compileFieldPattern returns the anchored regexp for a field-code
// pattern, compiling+caching on first use.
func compileFieldPattern(pattern string) (*regexp.Regexp, error) {
if v, ok := compiledPatternCache.Load(pattern); ok {
return v.(*regexp.Regexp), nil
}
re, err := regexp.Compile("^(?:" + pattern + ")$")
if err != nil {
return nil, err
}
compiledPatternCache.Store(pattern, re)
return re, nil
}
// FieldCodeKind discriminates the validation behaviour of a field code.
type FieldCodeKind string
const (
// FieldCodeEnum: body value must be one of the keys in Codes.
FieldCodeEnum FieldCodeKind = "enum"
// FieldCodePattern: body value must match Pattern (anchored).
FieldCodePattern FieldCodeKind = "pattern"
// FieldCodeFree: any string passes (Description is human-readable
// only).
FieldCodeFree FieldCodeKind = "free"
)
// FieldCode is one entry in a .zddc field_codes: map. A field code
// declares the allowed shape of one component used in record bodies
// and filename composition (e.g. originator, discipline, type,
// sequence). Operators define these at the project root or higher
// in the cascade; child levels can narrow or replace individual
// codes via the map-merge semantics in mergeOverlay.
//
// One discriminator field (Kind) selects which of the three shape
// fields applies:
//
// - Kind=enum: Codes is a code → human-label map. Labels surface
// in form dropdowns; validation checks membership of the key
// set only.
// - Kind=pattern: Pattern is a regular expression matched against
// the whole value (the server anchors it with ^…$ on compile).
// - Kind=free: no constraint; Description is the only field used
// and it's surfaced as help-text in the form UI.
//
// The struct is intentionally permissive in storage (all shape
// fields are present) but enforces grammar at unmarshal time so
// downstream consumers can rely on the kind matching the populated
// fields.
type FieldCode struct {
Kind FieldCodeKind `yaml:"kind" json:"kind"`
Codes map[string]string `yaml:"codes,omitempty" json:"codes,omitempty"`
Pattern string `yaml:"pattern,omitempty" json:"pattern,omitempty"`
Description string `yaml:"description,omitempty" json:"description,omitempty"`
}
// UnmarshalYAML enforces the discriminated-union grammar. A FieldCode
// must declare exactly one of {codes, pattern} matching its kind; for
// free codes neither is allowed (Description is optional).
func (fc *FieldCode) UnmarshalYAML(node *yaml.Node) error {
// Decode into a plain struct first so we can validate after.
type raw FieldCode
var r raw
if err := node.Decode(&r); err != nil {
return err
}
switch r.Kind {
case FieldCodeEnum:
if len(r.Codes) == 0 {
return fmt.Errorf("field_code kind=enum requires non-empty codes:")
}
if r.Pattern != "" {
return fmt.Errorf("field_code kind=enum must not declare pattern:")
}
case FieldCodePattern:
if r.Pattern == "" {
return fmt.Errorf("field_code kind=pattern requires pattern:")
}
if len(r.Codes) > 0 {
return fmt.Errorf("field_code kind=pattern must not declare codes:")
}
if _, err := compileFieldPattern(r.Pattern); err != nil {
return fmt.Errorf("field_code kind=pattern: invalid regex: %w", err)
}
case FieldCodeFree:
if len(r.Codes) > 0 || r.Pattern != "" {
return fmt.Errorf("field_code kind=free must not declare codes: or pattern:")
}
case "":
return fmt.Errorf("field_code: kind is required (one of enum|pattern|free)")
default:
return fmt.Errorf("field_code: unknown kind %q (want enum|pattern|free)", r.Kind)
}
*fc = FieldCode(r)
return nil
}
// Validate checks a body value against the FieldCode's rule. Empty
// values are allowed only when the caller treats this code as
// optional — Validate itself doesn't know about optionality, only
// shape.
func (fc FieldCode) Validate(value string) error {
switch fc.Kind {
case FieldCodeEnum:
if _, ok := fc.Codes[value]; !ok {
return fmt.Errorf("value %q is not in the allowed code set", value)
}
case FieldCodePattern:
// Compiled once and cached by pattern string (see
// compiledPatternCache); the unmarshal-time compile already
// proved the pattern is valid, so an error here is internal.
re, err := compileFieldPattern(fc.Pattern)
if err != nil {
return fmt.Errorf("internal: pattern recompile: %w", err)
}
if !re.MatchString(value) {
return fmt.Errorf("value %q does not match pattern %q", value, fc.Pattern)
}
case FieldCodeFree:
// No constraint.
}
return nil
}
// RecordRule is one entry in a .zddc records: map. The map key is a
// filename-basename pattern (literal name like "ssr.yaml" or a glob
// like "*.yaml"); the entry describes the rules that apply to files
// matching that pattern in the directory at-or-below this cascade
// level.
//
// FilenameFormat is a composition template referencing field-code
// keys in braces, with `?` marking optional segments (omitted from
// the filename if the body field is empty or missing). Example:
//
// {originator}-{phase?}-{project}-{type}-{sequence}{suffix?}
//
// Field references must match keys declared in the cascade's
// field_codes: map; ServerSide composition + validation enforces
// that the body fields validate against the codes before composing.
//
// FieldDefaults supplies per-folder default values that the server
// injects when the client omits the field. Combined with Locked, a
// folder can pin a field to a single value (e.g. rsk/ pinning
// type=RSK).
//
// Locked is the list of field names that must not be overridden by
// the client. When the client submits a value that differs from
// FieldDefaults[field], the server returns 422.
//
// RowField names the per-row sequence field for tables whose rows
// are children of a parent deliverable (RSK pattern). When set,
// POST-create requests omit the field and the server assigns the
// next available value within the group identified by
// RowScopeFields. PUT-update preserves the existing value.
//
// RowScopeFields names the fields that, together, identify the
// parent deliverable that a row belongs to. Two records with the
// same scope-field values share a row-numbering sequence.
//
// FolderFields binds a body field to an ancestor folder name, making
// the folder the single source of truth for that component. The map
// value is the number of parent directories ABOVE the record's own
// directory whose folder name supplies the value (0 = the record's
// own directory, 1 = its parent, …). The server overwrites the body
// field with the derived name before validation and filename
// composition, so a client value can never disagree with the path.
// Example: under archive/<party>/mdl/, a record file's directory is
// mdl/ and its parent (distance 1) is the <party> folder, so
//
// folder_fields: { originator: 1 }
//
// pins every deliverable's originator to its party-folder name. The
// form renderer marks such fields read-only and pre-fills the derived
// value.
type RecordRule struct {
FilenameFormat string `yaml:"filename_format,omitempty" json:"filename_format,omitempty"`
FieldDefaults map[string]string `yaml:"field_defaults,omitempty" json:"field_defaults,omitempty"`
Locked []string `yaml:"locked,omitempty" json:"locked,omitempty"`
RowField string `yaml:"row_field,omitempty" json:"row_field,omitempty"`
RowScopeFields []string `yaml:"row_scope_fields,omitempty" json:"row_scope_fields,omitempty"`
FolderFields map[string]int `yaml:"folder_fields,omitempty" json:"folder_fields,omitempty"`
}
// UnmarshalYAML validates a RecordRule when its .zddc is parsed, so a
// misconfiguration fails at load time rather than as a 500 on the
// first record write. The `type raw` alias avoids re-invoking this
// method (mirrors FieldCode.UnmarshalYAML).
//
// folder_fields distances count parent directories above the record's
// own directory (0 = that directory, N = N levels up), so a negative
// value is meaningless — reject it with a message that names the field.
func (rr *RecordRule) UnmarshalYAML(node *yaml.Node) error {
type raw RecordRule
var r raw
if err := node.Decode(&r); err != nil {
return err
}
for field, dist := range r.FolderFields {
if dist < 0 {
return fmt.Errorf("folder_fields[%q]: distance must be >= 0 (parents above the record dir), got %d", field, dist)
}
}
*rr = RecordRule(r)
return nil
}
// mergeRecordRule composes two RecordRules, top taking precedence on
// scalars and FieldDefaults map-merge; Locked is concat-dedupe so
// children can add locks but never unlock. Used by mergeOverlay for
// per-pattern entries in Records.
func mergeRecordRule(base, top RecordRule) RecordRule {
out := base
if top.FilenameFormat != "" {
out.FilenameFormat = top.FilenameFormat
}
out.FieldDefaults = mergeStringMap(out.FieldDefaults, top.FieldDefaults)
out.Locked = mergeStringSlice(out.Locked, top.Locked)
if top.RowField != "" {
out.RowField = top.RowField
}
if len(top.RowScopeFields) > 0 {
// Scope-fields are an ordered list (the composition relies on
// the order); top entirely replaces base when set.
out.RowScopeFields = append([]string(nil), top.RowScopeFields...)
}
out.FolderFields = mergeIntMap(out.FolderFields, top.FolderFields)
return out
}
// mergeIntMap composes two map[string]int with top taking precedence
// per-key. Mirrors mergeStringMap's semantics for FolderFields.
func mergeIntMap(base, top map[string]int) map[string]int {
if len(top) == 0 {
return base
}
out := make(map[string]int, len(base)+len(top))
for k, v := range base {
out[k] = v
}
for k, v := range top {
out[k] = v
}
return out
}
// matchRecordRule picks the RecordRule that applies to a given file
// basename. Literal-key matches win over glob matches; for globs,
// the first matching entry in iteration order wins (callers wanting
// determinism should structure their patterns disjointly).
//
// Returns ("", RecordRule{}, false) when no entry matches.
func matchRecordRule(rules map[string]RecordRule, basename string) (string, RecordRule, bool) {
if rules == nil {
return "", RecordRule{}, false
}
// Pass 1: exact key.
if r, ok := rules[basename]; ok {
return basename, r, true
}
// Pass 2: glob via path.Match (basename-only, no separators).
for k, v := range rules {
if k == basename {
continue
}
ok, err := path.Match(k, basename)
if err != nil {
// Bad pattern: skip rather than aborting the cascade walk.
continue
}
if ok {
return k, v, true
}
}
return "", RecordRule{}, false
}