From 875827d484bd5974df20b99365163975ac3355be Mon Sep 17 00:00:00 2001 From: ZDDC Date: Thu, 21 May 2026 15:28:35 -0500 Subject: [PATCH] fix(records): validate folder_fields at load time + cache field-code patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add RecordRule.UnmarshalYAML so a misconfigured folder_fields fails when the .zddc is parsed, not as a 500 on the first record write. A negative parent-distance is now rejected with a message naming the field. Mirrors FieldCode.UnmarshalYAML's raw-alias pattern. - Memoize anchored field-code pattern regexes in a package-level sync.Map (compileFieldPattern), used by both the unmarshal-time validation and FieldCode.Validate — replacing the per-call regexp.Compile that the old comment flagged as cache-if-it-shows-up. Tests: negative distance rejected (standalone + nested in a records: map), valid distance round-trips, pattern field code matches anchored. Co-Authored-By: Claude Opus 4.7 (1M context) --- zddc/internal/zddc/field_codes.go | 55 ++++++++++++++++++--- zddc/internal/zddc/field_codes_test.go | 67 ++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 zddc/internal/zddc/field_codes_test.go diff --git a/zddc/internal/zddc/field_codes.go b/zddc/internal/zddc/field_codes.go index 7abe217..f8bf2de 100644 --- a/zddc/internal/zddc/field_codes.go +++ b/zddc/internal/zddc/field_codes.go @@ -4,10 +4,31 @@ import ( "fmt" "path" "regexp" + "sync" "gopkg.in/yaml.v3" ) +// compiledPatternCache memoizes anchored field-code regexes keyed by +// the raw pattern string. Patterns are validated (compiled) at +// .zddc-unmarshal time and re-used on every Validate call, so the +// cache is effectively populated once per distinct pattern. +var compiledPatternCache sync.Map // string -> *regexp.Regexp + +// compileFieldPattern returns the anchored regexp for a field-code +// pattern, compiling+caching on first use. +func compileFieldPattern(pattern string) (*regexp.Regexp, error) { + if v, ok := compiledPatternCache.Load(pattern); ok { + return v.(*regexp.Regexp), nil + } + re, err := regexp.Compile("^(?:" + pattern + ")$") + if err != nil { + return nil, err + } + compiledPatternCache.Store(pattern, re) + return re, nil +} + // FieldCodeKind discriminates the validation behaviour of a field code. type FieldCodeKind string @@ -75,7 +96,7 @@ func (fc *FieldCode) UnmarshalYAML(node *yaml.Node) error { if len(r.Codes) > 0 { return fmt.Errorf("field_code kind=pattern must not declare codes:") } - if _, err := regexp.Compile("^(?:" + r.Pattern + ")$"); err != nil { + if _, err := compileFieldPattern(r.Pattern); err != nil { return fmt.Errorf("field_code kind=pattern: invalid regex: %w", err) } case FieldCodeFree: @@ -102,11 +123,10 @@ func (fc FieldCode) Validate(value string) error { return fmt.Errorf("value %q is not in the allowed code set", value) } case FieldCodePattern: - // Anchor at unmarshal-time would require holding a *Regexp on - // the struct; for simplicity we recompile here. Hot paths can - // cache via a sync.Map keyed by the pattern string if this - // shows up in profiles. - re, err := regexp.Compile("^(?:" + fc.Pattern + ")$") + // Compiled once and cached by pattern string (see + // compiledPatternCache); the unmarshal-time compile already + // proved the pattern is valid, so an error here is internal. + re, err := compileFieldPattern(fc.Pattern) if err != nil { return fmt.Errorf("internal: pattern recompile: %w", err) } @@ -178,6 +198,29 @@ type RecordRule struct { FolderFields map[string]int `yaml:"folder_fields,omitempty" json:"folder_fields,omitempty"` } +// UnmarshalYAML validates a RecordRule when its .zddc is parsed, so a +// misconfiguration fails at load time rather than as a 500 on the +// first record write. The `type raw` alias avoids re-invoking this +// method (mirrors FieldCode.UnmarshalYAML). +// +// folder_fields distances count parent directories above the record's +// own directory (0 = that directory, N = N levels up), so a negative +// value is meaningless — reject it with a message that names the field. +func (rr *RecordRule) UnmarshalYAML(node *yaml.Node) error { + type raw RecordRule + var r raw + if err := node.Decode(&r); err != nil { + return err + } + for field, dist := range r.FolderFields { + if dist < 0 { + return fmt.Errorf("folder_fields[%q]: distance must be >= 0 (parents above the record dir), got %d", field, dist) + } + } + *rr = RecordRule(r) + return nil +} + // mergeRecordRule composes two RecordRules, top taking precedence on // scalars and FieldDefaults map-merge; Locked is concat-dedupe so // children can add locks but never unlock. Used by mergeOverlay for diff --git a/zddc/internal/zddc/field_codes_test.go b/zddc/internal/zddc/field_codes_test.go new file mode 100644 index 0000000..f5a6305 --- /dev/null +++ b/zddc/internal/zddc/field_codes_test.go @@ -0,0 +1,67 @@ +package zddc + +import ( + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +// TestRecordRule_RejectsNegativeFolderDistance — a folder_fields entry +// with a negative parent-distance must fail at .zddc parse time (via +// RecordRule.UnmarshalYAML), not surface as a 500 on the first record +// write. +func TestRecordRule_RejectsNegativeFolderDistance(t *testing.T) { + src := "folder_fields:\n originator: -1\nfilename_format: \"{originator}-{sequence}\"\n" + var rr RecordRule + err := yaml.Unmarshal([]byte(src), &rr) + if err == nil { + t.Fatal("expected error for negative folder_fields distance, got nil") + } + if !strings.Contains(err.Error(), "folder_fields") || !strings.Contains(err.Error(), "originator") { + t.Errorf("error should name folder_fields + the field: %v", err) + } +} + +// TestRecordRule_AcceptsValidFolderFields — a non-negative distance +// unmarshals and round-trips. +func TestRecordRule_AcceptsValidFolderFields(t *testing.T) { + src := "folder_fields:\n originator: 1\nfilename_format: \"{originator}-{sequence}\"\n" + var rr RecordRule + if err := yaml.Unmarshal([]byte(src), &rr); err != nil { + t.Fatalf("valid folder_fields should unmarshal: %v", err) + } + if rr.FolderFields["originator"] != 1 { + t.Errorf("folder_fields[originator]=%d want 1", rr.FolderFields["originator"]) + } +} + +// TestRecordRule_NegativeDistanceInRecordsMap — the same rejection +// applies when the rule is nested in a records: map (the real cascade +// shape: map[string]RecordRule decodes each value via UnmarshalYAML). +func TestRecordRule_NegativeDistanceInRecordsMap(t *testing.T) { + src := "\"*.yaml\":\n folder_fields:\n originator: -2\n" + var rules map[string]RecordRule + if err := yaml.Unmarshal([]byte(src), &rules); err == nil { + t.Fatal("expected error for negative distance in records map, got nil") + } +} + +// TestFieldCode_PatternValidate — pattern field codes match values +// against the anchored regex (exercises the compiled-pattern cache). +func TestFieldCode_PatternValidate(t *testing.T) { + src := "kind: pattern\npattern: \"[0-9]{4}\"\n" + var fc FieldCode + if err := yaml.Unmarshal([]byte(src), &fc); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if err := fc.Validate("0042"); err != nil { + t.Errorf("0042 should match [0-9]{4}: %v", err) + } + if err := fc.Validate("42"); err == nil { + t.Error("42 should NOT match [0-9]{4} (anchored, 4 digits)") + } + if err := fc.Validate("0042x"); err == nil { + t.Error("0042x should NOT match (anchored)") + } +}