metronome/tests/run.mjs
Me Here 754ed1c22d Formalize track format: spec + golden-vector conformance suite
Single source of truth for the track ("program"/"patch") grammar, which was
implemented by hand in src/engine.js and pico-cp/app.py with no cross-check and
had quietly drifted.

- docs/track-format.md: formal grammar, container (programs.json) schema with a
  version field, the new per-track playback-flow model (rep/end + relative goto;
  default = loop forever), normalization rules, and a list of known divergences.
- tests/: golden vectors + a runner that loads the REAL engine.js and app.py
  grammar (no copies; app.py via ast extraction) and compares both against the
  spec. Exit non-zero on unexpected mismatch or round-trip break -> usable as CI.

Surfaces real divergences for follow-up: default accent pattern (no =pattern)
differs web vs device and affects shipped presets; euclid not parsed on device;
vol/cd dropped on device; unknown-sound fallback; tempo clamp; empty patch.
The rep/end playback-flow vectors are the acceptance test for building that.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 23:54:20 -05:00

97 lines
4.4 KiB
JavaScript

#!/usr/bin/env node
// Conformance runner for the PM track format.
// node tests/run.mjs run all golden vectors against engine.js + app.py
// node tests/run.mjs -v also print the expected/actual diff for every failure
//
// For each vector it parses `in` with both implementations, normalizes, and compares to
// `norm`. A mismatch on an impl listed in the vector's `expectFail` is "known" (expected);
// any other mismatch is a regression and fails the run. See docs/track-format.md.
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { dirname, join } from "node:path";
import { execFileSync } from "node:child_process";
import * as js from "./adapters/js_adapter.mjs";
const here = dirname(fileURLToPath(import.meta.url));
const verbose = process.argv.includes("-v");
const fixtures = JSON.parse(readFileSync(join(here, "fixtures", "track-format.json"), "utf8"));
const pyAdapter = join(here, "adapters", "py_adapter.py");
// stable, key-sorted JSON so deep-equality is a string compare.
const stable = (o) => JSON.stringify(o, (k, v) =>
v && typeof v === "object" && !Array.isArray(v)
? Object.fromEntries(Object.keys(v).sort().map((kk) => [kk, v[kk]]))
: v);
function runJs(patch) {
try {
return { norm: js.normalize(patch), canonical: js.canonical(patch), error: null };
} catch (e) {
return { norm: null, canonical: null, error: String(e.message || e) };
}
}
function runPy(patch) {
try {
const out = execFileSync("python3", [pyAdapter, patch], { encoding: "utf8" });
return { ...JSON.parse(out), error: null };
} catch (e) {
const msg = (e.stderr || "").toString().trim().split("\n").pop() || e.message;
return { norm: null, canonical: null, error: msg };
}
}
const want = stable; // alias
let regressions = 0, fixedNowCount = 0, nonIdempotent = 0;
const rows = [];
function jsIdempotent(patch) {
try { const c1 = js.canonical(patch); return c1 === js.canonical(c1); } catch { return false; }
}
for (const c of fixtures.cases) {
const expected = want(c.norm);
const expectFail = new Set(c.expectFail || []);
const r = { id: c.id, status: c.status };
for (const [impl, res] of [["js", runJs(c.in)], ["py", runPy(c.in)]]) {
// serialize(parse(x)) must be stable under re-parsing (no silent drift on round-trip).
const idem = impl === "js" ? jsIdempotent(c.in) : res.idempotent !== false;
if (res.error == null && !idem) { nonIdempotent++; console.log(` ! non-idempotent serialize: ${c.id} [${impl}]`); }
const ok = res.error == null && want(res.norm) === expected;
const known = expectFail.has(impl);
let mark;
if (ok && !known) mark = "PASS";
else if (ok && known) { mark = "FIXED"; fixedNowCount++; } // listed as failing but now passes
else if (!ok && known) mark = "known"; // expected divergence/not-built
else { mark = "FAIL"; regressions++; } // unexpected → regression
r[impl] = mark;
r[impl + "_res"] = res;
if (mark === "FAIL" && verbose) {
console.log(`\n--- ${c.id} [${impl}] expected vs actual ---`);
console.log("expected:", expected);
console.log("actual: ", res.error ? "ERROR " + res.error : want(res.norm));
}
}
rows.push(r);
}
// ---- report ----
const pad = (s, n) => String(s).padEnd(n);
console.log("\n PM track-format conformance\n");
console.log(" " + pad("case", 26) + pad("status", 13) + pad("engine.js", 11) + "app.py");
console.log(" " + "-".repeat(58));
const glyph = { PASS: "✓ pass", known: "· known", FAIL: "✗ FAIL", FIXED: "★ fixed" };
for (const r of rows) {
console.log(" " + pad(r.id, 26) + pad(r.status, 13) + pad(glyph[r.js], 11) + glyph[r.py]);
}
const counts = rows.reduce((a, r) => { a[r.js] = (a[r.js] || 0) + 1; a[r.py] = (a[r.py] || 0) + 1; return a; }, {});
console.log("\n " + Object.entries(counts).map(([k, v]) => `${glyph[k] || k}: ${v}`).join(" "));
if (fixedNowCount) console.log(`\n ${fixedNowCount} case(s) marked expectFail now PASS — update the fixture (remove them from expectFail).`);
if (nonIdempotent) console.log(` ${nonIdempotent} non-idempotent serialize(s) above.`);
if (regressions || nonIdempotent) {
console.log(`\n${regressions} unexpected failure(s), ${nonIdempotent} round-trip issue(s). Run with -v for diffs.\n`);
process.exit(1);
}
console.log("\n ✓ no unexpected failures; serialize round-trips are stable.\n");