ZDDC/zddc/internal/handler/history.go
ZDDC 3b2280de7f test(handler): coverage for record audit + history flows
Adds history_test.go with eight cases exercising the record-write
orchestration path:
- CreateStampsAuditFields: PUT to a fresh mdl path → audit fields
  injected; response echoes the stamped YAML; no history dir yet.
- UpdateIncrementsRevisionAndArchivesPrior: second PUT archives
  the prior bytes under .history/<base>/<ts>-<sha8>.yaml, bumps
  revision, preserves created_*, chains previous_sha.
- ConflictPreservesHistory: 412 from stale If-Match leaves the live
  file untouched and writes NO history entry (the failed write must
  be a true no-op).
- ClientAuditFieldsStripped: client-supplied created_by / revision
  are silently overwritten by server values — anti-forgery test.
- FilenameMismatch: URL says ...-0002 but body composes to ...-0001
  → 422.
- LockedFieldRejected: posting type=SPC to an rsk row → 422 with
  /type error (rsk/ locks type=RSK via cascade).
- SSRHistoryAtPartyLevel: writes to archive/<party>/ssr.yaml put
  history at archive/<party>/.history/ssr/, NOT at
  archive/.history/<party>/.
- RollupCreate_AssignsRowAndComposesFilename: three POSTs to
  /project/rsk/form.html in two table-scope groups demonstrate the
  server picks up filename_format + row_field+row_scope_fields from
  the cascade, auto-assigns sequence row numbers per group, and
  composes the canonical filename.

Bug fix surfaced by the first test: composeFilename was eliding TWO
separators around an optional placeholder when one was correct.
"ACM-{phase?}-PRJ" with phase="" was producing "ACMPRJ" instead of
"ACM-PRJ". Now drops only the trailing separator from output and
lets the next iteration emit the connector.

Default-project-{mdl,rsk}.form.yaml updated: project-rollup MDL +
RSK schemas gained the six readOnly audit fields and the project-
rsk schema picked up the full table-tracking component shape (+
row) plus an enum-locked type=RSK. The required: list no longer
includes type for rsk schemas — the cascade's field_defaults
injects it after schema validation, and requiring it would 422
well-behaved clients.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 10:08:52 -05:00

706 lines
22 KiB
Go

// Package handler — history.go: orchestrates writes of "record" YAML
// files (mdl rows, rsk rows, ssr.yaml) with three guarantees the
// generic file API cannot make on its own:
//
// 1. Audit fields are server-managed. created_at / created_by /
// updated_at / updated_by / revision / previous_sha are stripped
// from incoming bodies and stamped from the request context,
// making client-side forgery impossible.
//
// 2. Prior bytes are preserved. Before the live file is overwritten
// the previous content is copied (byte-for-byte) into
// <dir>/.history/<base>/<RFC3339Nano-UTC>-<sha8>.<ext>. The
// filename embeds the timestamp + the SHA-256 prefix of the prior
// bytes — the same value that's stamped into the new record's
// previous_sha field — so the chain is auditable.
//
// 3. Filename composition is enforced. When the matched RecordRule
// declares a filename_format, the server composes the expected
// basename from body fields and rejects writes whose URL doesn't
// agree. This binds the on-disk identity to the body's
// tracking-number components, eliminating drift.
//
// Records are identified by the cascade: a .zddc records: entry
// matched against the basename selects the rule. Files that don't
// match any rule fall through to a plain write — non-record YAML
// (table.yaml, form.yaml, plain documents) is unaffected.
package handler
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"codeberg.org/VARASYS/ZDDC/zddc/internal/config"
"codeberg.org/VARASYS/ZDDC/zddc/internal/jsonschema"
"codeberg.org/VARASYS/ZDDC/zddc/internal/zddc"
"gopkg.in/yaml.v3"
)
// Audit-field key names. Snake-case to match the existing .zddc
// `created_by:` precedent rather than the camelCase used in form
// schemas (those describe domain data; these describe provenance).
const (
auditFieldCreatedAt = "created_at"
auditFieldCreatedBy = "created_by"
auditFieldUpdatedAt = "updated_at"
auditFieldUpdatedBy = "updated_by"
auditFieldRevision = "revision"
auditFieldPreviousSha = "previous_sha"
)
// historyDirName is the dot-prefixed bookkeeping folder under each
// record-containing directory. resolveTargetPath's dot-segment
// rejection means no client URL can reach into .history/ — only the
// server's own history-write code path touches it.
const historyDirName = ".history"
// WriteRecordResult carries what serveFilePut needs to surface a
// response after a successful record write.
type WriteRecordResult struct {
FinalBody []byte // bytes actually written to disk (after stamping)
Created bool // true if no prior file existed (response 201 vs 200)
}
// WriteWithHistory orchestrates a record write at abs (which must be
// the canonical on-disk path — virtual-view rewriting already
// applied). cleanURL is the URL the caller surfaces (for audit
// logging). body is the raw request bytes.
//
// Returns:
// - res, nil, nil: success; caller writes 200/201 + ETag.
// - _, errs, nil: 422 with the validation errors (locked
// mismatch, field_code violation, filename
// composition mismatch).
// - _, _, err: internal error; caller writes 500.
//
// The function does NOT do ACL, ETag-precondition, or canonical-
// ancestor seeding — those are still serveFilePut's job and run
// before this call. The function DOES handle prior-bytes capture,
// audit stamping, history write, and live write.
func WriteWithHistory(cfg config.Config, abs, cleanURL string, body []byte, principalEmail string) (WriteRecordResult, []jsonschema.Error, error) {
dir := filepath.Dir(abs)
base := filepath.Base(abs)
// Resolve cascade at the record's parent dir.
chain, err := zddc.EffectivePolicy(cfg.Root, dir)
if err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("effective policy: %w", err)
}
_, rule, hasRule := chain.EffectiveRecordRule(base)
// Read prior bytes (nil if create).
var priorBody []byte
priorExisted := false
if data, err := os.ReadFile(abs); err == nil {
priorBody = data
priorExisted = true
} else if !errors.Is(err, os.ErrNotExist) {
return WriteRecordResult{}, nil, fmt.Errorf("read prior: %w", err)
}
// Parse incoming body as a YAML map. Empty body is allowed
// (the schema validator catches required-field omissions, or
// the caller-side spec is permissive); we use an empty map.
bodyMap := map[string]any{}
if len(body) > 0 {
if err := yaml.Unmarshal(body, &bodyMap); err != nil {
return WriteRecordResult{}, []jsonschema.Error{{Path: "/", Message: "body is not valid YAML: " + err.Error()}}, nil
}
if bodyMap == nil {
bodyMap = map[string]any{}
}
}
// Strip client-supplied audit fields. The server is the sole
// authority for these; any value we'd accept here is forgeable.
stripAuditFields(bodyMap)
// Honor records: rule. If no rule matched the basename, fall
// through to a plain write (no stamping, no history) — this
// covers non-record YAML files like table.yaml that may share
// a directory with records.
if !hasRule {
if err := zddc.WriteAtomic(abs, body); err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("write: %w", err)
}
return WriteRecordResult{FinalBody: body, Created: !priorExisted}, nil, nil
}
codes := chain.EffectiveFieldCodes()
// Inject field_defaults for keys the body omitted (so the
// stamped result is self-describing) and check locked: against
// any conflicting client values.
var verrs []jsonschema.Error
for k, want := range rule.FieldDefaults {
got, present := bodyMap[k]
if !present {
bodyMap[k] = want
continue
}
if locked := containsString(rule.Locked, k); locked {
gotStr := asString(got)
if gotStr != want {
verrs = append(verrs, jsonschema.Error{
Path: "/" + k,
Message: fmt.Sprintf("field is locked to %q in this folder; got %q", want, gotStr),
})
}
}
}
// Validate body values against field_codes (best-effort: only
// fields actually present in the body are checked; absent
// fields are someone else's concern — typically the form
// schema's required: list).
for k, code := range codes {
raw, ok := bodyMap[k]
if !ok {
continue
}
s := asString(raw)
if s == "" {
continue // empty/optional — schema enforces presence
}
if err := code.Validate(s); err != nil {
verrs = append(verrs, jsonschema.Error{
Path: "/" + k,
Message: err.Error(),
})
}
}
if len(verrs) > 0 {
return WriteRecordResult{}, verrs, nil
}
// Compose filename from body when filename_format is set, and
// verify the URL basename matches. Skipped when the rule has
// no format (SSR: identity is the parent folder name).
if rule.FilenameFormat != "" {
composed, cerr := composeFilename(rule.FilenameFormat, bodyMap)
if cerr != nil {
return WriteRecordResult{}, []jsonschema.Error{{Path: "/", Message: cerr.Error()}}, nil
}
ext := filepath.Ext(base)
composedWithExt := composed + ext
if composedWithExt != base {
return WriteRecordResult{}, []jsonschema.Error{{
Path: "/",
Message: fmt.Sprintf("filename mismatch: URL is %q, body composes to %q", base, composedWithExt),
}}, nil
}
}
// Stamp audit fields. On create: created_* and updated_* are
// both the current principal/timestamp; revision = 1. On
// update: preserve created_* (parse from priorBody), refresh
// updated_*, increment revision, set previous_sha = sha-prefix
// of priorBody.
now := time.Now().UTC().Format(time.RFC3339Nano)
if principalEmail == "" {
principalEmail = "anonymous"
}
priorAudit := parsePriorAudit(priorBody)
if !priorExisted {
bodyMap[auditFieldCreatedAt] = now
bodyMap[auditFieldCreatedBy] = principalEmail
bodyMap[auditFieldRevision] = 1
} else {
if priorAudit.createdAt != "" {
bodyMap[auditFieldCreatedAt] = priorAudit.createdAt
} else {
// Lazy migration: the prior file had no created_*
// stamp. Treat this write as the establishment of
// audit history — created and updated are the same
// principal/timestamp (we don't know who originally
// authored it).
bodyMap[auditFieldCreatedAt] = now
}
if priorAudit.createdBy != "" {
bodyMap[auditFieldCreatedBy] = priorAudit.createdBy
} else {
bodyMap[auditFieldCreatedBy] = principalEmail
}
bodyMap[auditFieldRevision] = priorAudit.revision + 1
bodyMap[auditFieldPreviousSha] = sha8(priorBody)
}
bodyMap[auditFieldUpdatedAt] = now
bodyMap[auditFieldUpdatedBy] = principalEmail
finalBody, err := yaml.Marshal(bodyMap)
if err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("marshal: %w", err)
}
// Write history BEFORE live. If we crash after history but
// before live, the prior version is safe (still on disk under
// its history filename). The reverse order would lose the
// prior bytes if the live write succeeded but history failed.
// On a clean retry, the history filename is deterministic
// (timestamp+sha8 of priorBody) — rewriting it idempotently
// is harmless when the live write later succeeds.
if priorExisted {
histDir := filepath.Join(dir, historyDirName, stripExt(base))
if err := os.MkdirAll(histDir, 0o755); err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("mkdir history: %w", err)
}
histName := now + "-" + sha8(priorBody) + filepath.Ext(base)
histPath := filepath.Join(histDir, histName)
if err := zddc.WriteAtomic(histPath, priorBody); err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("write history: %w", err)
}
}
if err := zddc.WriteAtomic(abs, finalBody); err != nil {
return WriteRecordResult{}, nil, fmt.Errorf("write live: %w", err)
}
return WriteRecordResult{FinalBody: finalBody, Created: !priorExisted}, nil, nil
}
// priorAuditSnapshot is the minimum we need from a prior version's
// body to stamp the next revision: who created it (preserved
// forever) and what revision number it carried (so we can ++).
type priorAuditSnapshot struct {
createdAt string
createdBy string
revision int
}
func parsePriorAudit(body []byte) priorAuditSnapshot {
if len(body) == 0 {
return priorAuditSnapshot{}
}
m := map[string]any{}
if err := yaml.Unmarshal(body, &m); err != nil {
return priorAuditSnapshot{}
}
out := priorAuditSnapshot{}
if v, ok := m[auditFieldCreatedAt].(string); ok {
out.createdAt = v
}
if v, ok := m[auditFieldCreatedBy].(string); ok {
out.createdBy = v
}
switch v := m[auditFieldRevision].(type) {
case int:
out.revision = v
case int64:
out.revision = int(v)
case float64:
out.revision = int(v)
}
return out
}
func stripAuditFields(m map[string]any) {
delete(m, auditFieldCreatedAt)
delete(m, auditFieldCreatedBy)
delete(m, auditFieldUpdatedAt)
delete(m, auditFieldUpdatedBy)
delete(m, auditFieldRevision)
delete(m, auditFieldPreviousSha)
}
// composeFilename interpolates a filename_format template against
// the supplied body fields. Placeholders are {fieldname} for
// required segments and {fieldname?} for optional ones; an optional
// placeholder with an empty/missing body field is dropped along
// with one adjacent separator if both neighbors are static text.
//
// Example, format = "{originator}-{phase?}-{project}-{type}-{sequence}{suffix?}"
// with body = {originator: ACM, project: PRJ, type: SPC, sequence: 0001}
// (phase + suffix absent) yields "ACM-PRJ-SPC-0001".
//
// Adjacent-separator handling: the function recognises a "-" or "_"
// literal immediately preceding an optional placeholder and drops
// it together with the placeholder when the field is empty. Static
// text not adjacent to a placeholder is preserved as-is. A literal
// "{" or "}" must be escaped as "{{" / "}}" (currently unused —
// the embedded defaults don't need it).
func composeFilename(format string, body map[string]any) (string, error) {
var out strings.Builder
out.Grow(len(format))
i := 0
for i < len(format) {
c := format[i]
// Literal { or } escapes: {{ → {, }} → }.
if c == '{' && i+1 < len(format) && format[i+1] == '{' {
out.WriteByte('{')
i += 2
continue
}
if c == '}' && i+1 < len(format) && format[i+1] == '}' {
out.WriteByte('}')
i += 2
continue
}
if c != '{' {
out.WriteByte(c)
i++
continue
}
// Placeholder: scan to '}'.
end := strings.IndexByte(format[i+1:], '}')
if end == -1 {
return "", fmt.Errorf("filename_format: unterminated placeholder at offset %d", i)
}
name := format[i+1 : i+1+end]
i += end + 2 // past the '}'
optional := false
if strings.HasSuffix(name, "?") {
optional = true
name = name[:len(name)-1]
}
val := asString(body[name])
if val == "" {
if !optional {
return "", fmt.Errorf("filename_format: required field %q is missing or empty", name)
}
// Drop the trailing separator we just wrote, if any.
// For "A-{b?}-C" with b empty we want "A-C": dropping
// the preceding '-' here, then letting the next
// iteration emit the trailing '-' from the format, is
// exactly one connector between A and C. (Earlier
// versions of this code also skipped the leading
// separator, which double-elided.)
s := out.String()
if n := len(s); n > 0 && (s[n-1] == '-' || s[n-1] == '_') {
out.Reset()
out.WriteString(s[:n-1])
}
continue
}
out.WriteString(val)
}
return out.String(), nil
}
// AssignNextRow finds the next free row sequence within the
// row-scope group identified by scopeFields. Used by POST-create
// handlers (rsk row creation) before invoking WriteWithHistory.
// Returns the zero-padded string value to inject into bodyMap[rowField].
//
// Width is fixed at 3 (covers up to 999 rows per table). Operators
// who need more declare a per-deployment field_codes:row pattern;
// the width here is for the auto-assign output, not for parsing
// (which uses the matched pattern from the cascade).
func AssignNextRow(dir, rowField string, scopeFields []string, body map[string]any) (string, error) {
if rowField == "" {
return "", fmt.Errorf("row_field is empty")
}
entries, err := os.ReadDir(dir)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return "001", nil
}
return "", err
}
max := 0
rowRe := regexp.MustCompile(`^[0-9]+$`)
for _, ent := range entries {
if ent.IsDir() {
continue
}
name := ent.Name()
if !strings.HasSuffix(name, filepath.Ext(name)) {
continue
}
path := filepath.Join(dir, name)
data, err := os.ReadFile(path)
if err != nil {
continue
}
other := map[string]any{}
if err := yaml.Unmarshal(data, &other); err != nil {
continue
}
// Same scope group?
sameGroup := true
for _, f := range scopeFields {
if asString(other[f]) != asString(body[f]) {
sameGroup = false
break
}
}
if !sameGroup {
continue
}
v := asString(other[rowField])
if !rowRe.MatchString(v) {
continue
}
n := atoiSafe(v)
if n > max {
max = n
}
}
return fmt.Sprintf("%03d", max+1), nil
}
// ServeHistoryList responds to GET <record>.yaml?history=1 with the
// list of prior revisions archived under .history/<base>/. The
// caller has already evaluated ACL against the live record (read
// permission on the parent dir gates history visibility too — if
// you can read the current state you can read its history).
//
// Returns 404 when abs doesn't exist or isn't a record (the caller
// should rely on the live record's GET 404 path instead of leaking
// existence here, but defense in depth costs nothing).
func ServeHistoryList(w http.ResponseWriter, r *http.Request, abs string) {
if !isRecordPathForHistory(abs) {
http.NotFound(w, r)
return
}
if _, err := os.Stat(abs); err != nil {
if errors.Is(err, os.ErrNotExist) {
// Record file gone; the caller's normal 404 path
// suppresses existence-leak, so we mirror that.
http.NotFound(w, r)
return
}
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
entries, err := ListHistory(abs)
if err != nil {
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("X-ZDDC-Source", "history-list")
// json.NewEncoder for streaming; sort already happened in
// ListHistory.
enc := json.NewEncoder(w)
if err := enc.Encode(entries); err != nil {
// Body already partially flushed at this point; nothing to do.
return
}
}
// isRecordPathForHistory mirrors isRecordPath but lives in this file
// so the history-list handler doesn't need to import its caller's
// internal helper. Keep the two in sync — if one accepts a new
// extension the other should too.
func isRecordPathForHistory(abs string) bool {
base := filepath.Base(abs)
switch base {
case "table.yaml", "form.yaml", ".zddc":
return false
}
ext := filepath.Ext(base)
if ext != ".yaml" && ext != ".yml" {
return false
}
if strings.HasSuffix(base, ".table.yaml") || strings.HasSuffix(base, ".form.yaml") {
return false
}
return true
}
// HistoryEntry describes one prior revision of a record, as listed
// by ServeHistoryList.
type HistoryEntry struct {
Revision int `json:"revision"`
Ts string `json:"ts"`
By string `json:"by"`
Sha8 string `json:"sha"`
Path string `json:"path"`
}
// ListHistory walks the .history/<base>/ directory adjacent to abs
// and returns one HistoryEntry per archived revision, sorted newest
// first. Empty list when the dir doesn't exist (e.g. record never
// updated).
//
// Filename format: <RFC3339Nano>-<sha8>.<ext>. Author/revision are
// read from the YAML body's audit fields — those describe the
// archived bytes' provenance.
func ListHistory(abs string) ([]HistoryEntry, error) {
dir := filepath.Dir(abs)
base := filepath.Base(abs)
histDir := filepath.Join(dir, historyDirName, stripExt(base))
ents, err := os.ReadDir(histDir)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
return nil, err
}
out := make([]HistoryEntry, 0, len(ents))
for _, e := range ents {
if e.IsDir() {
continue
}
name := e.Name()
// Expected shape: <ts>-<sha8>.<ext>. Parse from the right
// to be lenient about timestamps that contain '-'.
ext := filepath.Ext(name)
stem := strings.TrimSuffix(name, ext)
idx := strings.LastIndexByte(stem, '-')
if idx < 0 {
continue
}
ts := stem[:idx]
sha := stem[idx+1:]
entry := HistoryEntry{Ts: ts, Sha8: sha, Path: filepath.Join(historyDirName, stripExt(base), name)}
// Pull author + revision from the archived body.
if data, err := os.ReadFile(filepath.Join(histDir, name)); err == nil {
snap := parsePriorAudit(data)
entry.Revision = snap.revision
entry.By = snap.createdBy
// updated_by is more informative when present.
m := map[string]any{}
if err := yaml.Unmarshal(data, &m); err == nil {
if v, ok := m[auditFieldUpdatedBy].(string); ok && v != "" {
entry.By = v
}
}
}
out = append(out, entry)
}
sort.Slice(out, func(i, j int) bool { return out[i].Ts > out[j].Ts })
return out, nil
}
// augmentSchemaFromCascade mutates schema in place to inject
// cascade-resolved field_codes and records:-rule constraints. For
// every property whose name matches a field-code key, the relevant
// enum/pattern/labels are injected. For every record-rule's locked
// field, the corresponding property is marked readOnly. For every
// field_default, the corresponding property's Default is set if
// absent.
//
// gateDir is the directory the cascade was resolved at — needed
// only to pick the right records: rule when multiple patterns
// could match. The current cascade interface gives us the chain
// already; we pull a single "*.yaml" representative rule (matching
// the create-time behaviour in serveFormCreateRollup).
//
// Mutates the input schema. No-op when schema is nil.
func augmentSchemaFromCascade(schema *jsonschema.Schema, chain zddc.PolicyChain, gateDir string) {
if schema == nil || schema.Properties == nil {
return
}
codes := chain.EffectiveFieldCodes()
for name, prop := range schema.Properties {
if code, ok := codes[name]; ok {
switch code.Kind {
case zddc.FieldCodeEnum:
// Populate Enum with the code keys (sorted for
// deterministic order). Labels carries the
// human-readable display strings.
keys := make([]string, 0, len(code.Codes))
for k := range code.Codes {
keys = append(keys, k)
}
sort.Strings(keys)
if len(prop.Enum) == 0 {
prop.Enum = make([]any, len(keys))
for i, k := range keys {
prop.Enum[i] = k
}
}
if prop.Labels == nil && len(code.Codes) > 0 {
prop.Labels = make(map[string]string, len(code.Codes))
for k, v := range code.Codes {
prop.Labels[k] = v
}
}
case zddc.FieldCodePattern:
if prop.Pattern == "" {
prop.Pattern = code.Pattern
}
case zddc.FieldCodeFree:
// No constraint to inject; description is the
// only field and the operator can author it
// directly in the form spec.
}
}
}
// Apply the matched records:-rule's readOnly + default to
// matching properties. We probe with "*.yaml" — the records
// entries shipped in the embedded defaults all match that
// glob; operator schemas with literal-keyed rules would still
// be honoured by serveFormCreateRollup but won't be reflected
// in the form-render augmentation here.
if _, rule, ok := chain.EffectiveRecordRule("placeholder.yaml"); ok {
for _, name := range rule.Locked {
if prop, present := schema.Properties[name]; present {
prop.ReadOnly = true
}
}
for name, val := range rule.FieldDefaults {
if prop, present := schema.Properties[name]; present {
if prop.Default == nil {
prop.Default = val
}
}
}
}
}
// ---- small helpers ----
func sha8(data []byte) string {
sum := sha256.Sum256(data)
return hex.EncodeToString(sum[:])[:8]
}
func stripExt(name string) string {
return strings.TrimSuffix(name, filepath.Ext(name))
}
func containsString(haystack []string, needle string) bool {
for _, s := range haystack {
if s == needle {
return true
}
}
return false
}
func asString(v any) string {
switch s := v.(type) {
case string:
return s
case nil:
return ""
case int:
return fmt.Sprintf("%d", s)
case int64:
return fmt.Sprintf("%d", s)
case float64:
// Strip trailing .0 for the common integer-in-JSON case.
if s == float64(int64(s)) {
return fmt.Sprintf("%d", int64(s))
}
return fmt.Sprintf("%v", s)
default:
return fmt.Sprintf("%v", s)
}
}
func atoiSafe(s string) int {
n := 0
for _, c := range s {
if c < '0' || c > '9' {
return 0
}
n = n*10 + int(c-'0')
}
return n
}