fix(records): validate folder_fields at load time + cache field-code patterns

- Add RecordRule.UnmarshalYAML so a misconfigured folder_fields fails
  when the .zddc is parsed, not as a 500 on the first record write. A
  negative parent-distance is now rejected with a message naming the
  field. Mirrors FieldCode.UnmarshalYAML's raw-alias pattern.
- Memoize anchored field-code pattern regexes in a package-level
  sync.Map (compileFieldPattern), used by both the unmarshal-time
  validation and FieldCode.Validate — replacing the per-call
  regexp.Compile that the old comment flagged as cache-if-it-shows-up.

Tests: negative distance rejected (standalone + nested in a records:
map), valid distance round-trips, pattern field code matches anchored.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-05-21 15:28:35 -05:00
parent 662bfbdbf9
commit 875827d484
2 changed files with 116 additions and 6 deletions

View file

@ -4,10 +4,31 @@ import (
"fmt"
"path"
"regexp"
"sync"
"gopkg.in/yaml.v3"
)
// compiledPatternCache memoizes anchored field-code regexes keyed by
// the raw pattern string. Patterns are validated (compiled) at
// .zddc-unmarshal time and re-used on every Validate call, so the
// cache is effectively populated once per distinct pattern.
var compiledPatternCache sync.Map // string -> *regexp.Regexp
// compileFieldPattern returns the anchored regexp for a field-code
// pattern, compiling+caching on first use.
func compileFieldPattern(pattern string) (*regexp.Regexp, error) {
if v, ok := compiledPatternCache.Load(pattern); ok {
return v.(*regexp.Regexp), nil
}
re, err := regexp.Compile("^(?:" + pattern + ")$")
if err != nil {
return nil, err
}
compiledPatternCache.Store(pattern, re)
return re, nil
}
// FieldCodeKind discriminates the validation behaviour of a field code.
type FieldCodeKind string
@ -75,7 +96,7 @@ func (fc *FieldCode) UnmarshalYAML(node *yaml.Node) error {
if len(r.Codes) > 0 {
return fmt.Errorf("field_code kind=pattern must not declare codes:")
}
if _, err := regexp.Compile("^(?:" + r.Pattern + ")$"); err != nil {
if _, err := compileFieldPattern(r.Pattern); err != nil {
return fmt.Errorf("field_code kind=pattern: invalid regex: %w", err)
}
case FieldCodeFree:
@ -102,11 +123,10 @@ func (fc FieldCode) Validate(value string) error {
return fmt.Errorf("value %q is not in the allowed code set", value)
}
case FieldCodePattern:
// Anchor at unmarshal-time would require holding a *Regexp on
// the struct; for simplicity we recompile here. Hot paths can
// cache via a sync.Map keyed by the pattern string if this
// shows up in profiles.
re, err := regexp.Compile("^(?:" + fc.Pattern + ")$")
// Compiled once and cached by pattern string (see
// compiledPatternCache); the unmarshal-time compile already
// proved the pattern is valid, so an error here is internal.
re, err := compileFieldPattern(fc.Pattern)
if err != nil {
return fmt.Errorf("internal: pattern recompile: %w", err)
}
@ -178,6 +198,29 @@ type RecordRule struct {
FolderFields map[string]int `yaml:"folder_fields,omitempty" json:"folder_fields,omitempty"`
}
// UnmarshalYAML validates a RecordRule when its .zddc is parsed, so a
// misconfiguration fails at load time rather than as a 500 on the
// first record write. The `type raw` alias avoids re-invoking this
// method (mirrors FieldCode.UnmarshalYAML).
//
// folder_fields distances count parent directories above the record's
// own directory (0 = that directory, N = N levels up), so a negative
// value is meaningless — reject it with a message that names the field.
func (rr *RecordRule) UnmarshalYAML(node *yaml.Node) error {
type raw RecordRule
var r raw
if err := node.Decode(&r); err != nil {
return err
}
for field, dist := range r.FolderFields {
if dist < 0 {
return fmt.Errorf("folder_fields[%q]: distance must be >= 0 (parents above the record dir), got %d", field, dist)
}
}
*rr = RecordRule(r)
return nil
}
// mergeRecordRule composes two RecordRules, top taking precedence on
// scalars and FieldDefaults map-merge; Locked is concat-dedupe so
// children can add locks but never unlock. Used by mergeOverlay for

View file

@ -0,0 +1,67 @@
package zddc
import (
"strings"
"testing"
"gopkg.in/yaml.v3"
)
// TestRecordRule_RejectsNegativeFolderDistance — a folder_fields entry
// with a negative parent-distance must fail at .zddc parse time (via
// RecordRule.UnmarshalYAML), not surface as a 500 on the first record
// write.
func TestRecordRule_RejectsNegativeFolderDistance(t *testing.T) {
src := "folder_fields:\n originator: -1\nfilename_format: \"{originator}-{sequence}\"\n"
var rr RecordRule
err := yaml.Unmarshal([]byte(src), &rr)
if err == nil {
t.Fatal("expected error for negative folder_fields distance, got nil")
}
if !strings.Contains(err.Error(), "folder_fields") || !strings.Contains(err.Error(), "originator") {
t.Errorf("error should name folder_fields + the field: %v", err)
}
}
// TestRecordRule_AcceptsValidFolderFields — a non-negative distance
// unmarshals and round-trips.
func TestRecordRule_AcceptsValidFolderFields(t *testing.T) {
src := "folder_fields:\n originator: 1\nfilename_format: \"{originator}-{sequence}\"\n"
var rr RecordRule
if err := yaml.Unmarshal([]byte(src), &rr); err != nil {
t.Fatalf("valid folder_fields should unmarshal: %v", err)
}
if rr.FolderFields["originator"] != 1 {
t.Errorf("folder_fields[originator]=%d want 1", rr.FolderFields["originator"])
}
}
// TestRecordRule_NegativeDistanceInRecordsMap — the same rejection
// applies when the rule is nested in a records: map (the real cascade
// shape: map[string]RecordRule decodes each value via UnmarshalYAML).
func TestRecordRule_NegativeDistanceInRecordsMap(t *testing.T) {
src := "\"*.yaml\":\n folder_fields:\n originator: -2\n"
var rules map[string]RecordRule
if err := yaml.Unmarshal([]byte(src), &rules); err == nil {
t.Fatal("expected error for negative distance in records map, got nil")
}
}
// TestFieldCode_PatternValidate — pattern field codes match values
// against the anchored regex (exercises the compiled-pattern cache).
func TestFieldCode_PatternValidate(t *testing.T) {
src := "kind: pattern\npattern: \"[0-9]{4}\"\n"
var fc FieldCode
if err := yaml.Unmarshal([]byte(src), &fc); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if err := fc.Validate("0042"); err != nil {
t.Errorf("0042 should match [0-9]{4}: %v", err)
}
if err := fc.Validate("42"); err == nil {
t.Error("42 should NOT match [0-9]{4} (anchored, 4 digits)")
}
if err := fc.Validate("0042x"); err == nil {
t.Error("0042x should NOT match (anchored)")
}
}