feat(classifier): switch dataset export/import to a filename-per-file format

Replaces the ID-based dataset export/import (which required an external editor
to build a nested tree and keep node ids consistent) with a flat, AI-friendly
list: one record per input file carrying its full ZDDC filename — and an
optional transmittal {party, slot, date, type, seq, status, title}.

- Export: one {source, originalName, filename, excluded, transmittal?} record
  per source file (filename = the derived ZDDC name, "" if unassigned).
- Import: parses each filename and rebuilds the tracking tree (parseFolderLevels
  + addTrackingPath, sharing ancestors); excluded files are marked; transmittals
  are reconstructed with party/bin dedup. No node ids for the editor to manage.

New classify helpers: transmittalRecord (export), findOrAddParty /
findOrAddTransmittalBin (import dedup). serialize/load stay for workspace
persistence. Test rewritten for the filename round-trip (classify.spec.js -> 34).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-10 11:52:44 -05:00
parent 4425a599f0
commit 9851cc4463
3 changed files with 130 additions and 49 deletions

View file

@ -204,39 +204,45 @@
if (app.modules.tree) app.modules.tree.render(); if (app.modules.tree) app.modules.tree.render();
} }
// ── dataset export / import ──────────────────────────────────────────── // ── dataset export / import (one record per file) ──────────────────────
// Round-trip the full classification (trees + assignments + output name) as // Round-trip the classification as a flat list of files, each carrying its
// JSON so it can be edited externally (e.g. by an AI) and re-imported. The // full ZDDC filename (and optional transmittal). An external editor (e.g. an
// exported `sourceFiles` list is informational — it tells the editor which // AI) just sets filenames; on import the app parses each filename and
// files exist; only the canonical state is read back on import. // rebuilds the tracking tree (no node ids to manage).
function collectSourceFiles() { function eachSourceFile(cb) {
var c = app.modules.classify, out = [];
(function walk(nodes) { (function walk(nodes) {
(nodes || []).forEach(function (n) { (nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); });
(n.files || []).forEach(function (f) {
out.push({ key: c.srcKeyForFile(f), name: window.zddc.joinExtension(f.originalFilename, f.extension) });
});
walk(n.children);
});
})(app.folderTree || []); })(app.folderTree || []);
return out;
} }
function exportDataset() { function exportDataset() {
var s = app.modules.classify.serialize(); var c = app.modules.classify, files = [];
eachSourceFile(function (f) {
var key = c.srcKeyForFile(f);
var a = c.getAssignment(key) || {};
var d = c.deriveTarget(f);
var rec = {
source: key,
originalName: window.zddc.joinExtension(f.originalFilename, f.extension),
filename: a.excluded ? '' : (d.filename || ''),
excluded: !!a.excluded,
};
if (!a.excluded && a.transmittalNodeId) {
var t = c.transmittalRecord(a.transmittalNodeId);
if (t) rec.transmittal = t;
}
files.push(rec);
});
var payload = { var payload = {
zddcClassifierDataset: 1, zddcClassifierFiles: 1,
exportedAt: new Date().toISOString(), exportedAt: new Date().toISOString(),
_format: 'ZDDC Classifier dataset. trackingTree/transmittalTree are folder trees of ' _format: 'One record per input file. Set "filename" to its full ZDDC name '
+ '{id,name,children}. assignments maps each source file (key) to its placement ' + '"TRACKING_REV (STATUS) - Title.ext" — on import the app splits TRACKING on "-" and the '
+ '{trackingNodeId, transmittalNodeId, excluded, titleOverride}, referencing node ids ' + 'final "_" into nested folders, and files in shared paths share ancestors. Set '
+ 'in the trees. The tracking number is a node\'s ancestor names joined with "-"; the ' + '"excluded": true for non-documents (filename then ignored). "transmittal" is optional: '
+ 'leaf folder is "REV (STATUS)". sourceFiles lists every available file (informational; ' + '{party, slot:"received"|"issued", date:"YYYY-MM-DD", type:"TRN"|"SUB", seq, status, title}. '
+ 'ignored on import). Edit names/structure/assignments and re-import; keep ids consistent.', + 'Classify every "source" key; do not invent files.',
outputName: s.outputName || null, outputName: c.serialize().outputName || null,
trackingTree: s.trackingTree || [], files: files,
transmittalTree: s.transmittalTree || [],
assignments: s.assignments || {},
sourceFiles: collectSourceFiles(),
}; };
var name = 'classifier-dataset'; var name = 'classifier-dataset';
try { try {
@ -258,15 +264,39 @@
var obj; var obj;
try { obj = JSON.parse(reader.result); } try { obj = JSON.parse(reader.result); }
catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; } catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; }
if (!obj || (!obj.trackingTree && !obj.transmittalTree && !obj.assignments)) { if (!obj || !Array.isArray(obj.files)) {
window.zddc.toast('Import failed — not a classifier dataset.', 'error'); return; window.zddc.toast('Import failed — expected a classifier dataset with a "files" list.', 'error'); return;
} }
var c = app.modules.classify; var c = app.modules.classify;
var hasData = c.getTrackingTree().length || c.getTransmittalTree().length var hasData = c.getTrackingTree().length || c.getTransmittalTree().length
|| Object.keys(c.serialize().assignments || {}).length; || Object.keys(c.serialize().assignments || {}).length;
if (hasData && !confirm('Replace the current classification with the imported dataset?')) return; if (hasData && !confirm('Replace the current classification with the imported dataset?')) return;
c.load(obj); // reads trackingTree/transmittalTree/assignments/outputName; ignores the rest c.reset();
window.zddc.toast('Dataset imported.', 'success'); var ok = 0, bad = 0;
obj.files.forEach(function (rec) {
if (!rec || !rec.source) return;
var key = rec.source;
if (rec.excluded) { c.setExcluded([key], true); ok++; return; }
if (rec.filename) {
var p = window.zddc.parseFilename(String(rec.filename).trim());
if (p && p.valid) {
var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
c.place([key], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
if (p.title != null) c.setTitleOverride(key, p.title);
ok++;
} else { bad++; }
}
if (rec.transmittal && rec.transmittal.party) {
var t = rec.transmittal;
var pid = c.findOrAddParty(t.party);
var bid = c.findOrAddTransmittalBin(pid, t.slot || 'received', {
date: t.date, type: t.type || 'TRN', seq: t.seq, status: t.status, title: t.title,
});
if (bid) c.place([key], bid, 'transmittal');
}
});
window.zddc.toast('Imported ' + ok + ' file' + (ok === 1 ? '' : 's')
+ (bad ? (' — ' + bad + ' had an unparseable filename') : '') + '.', bad ? 'warning' : 'success');
}; };
reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); }; reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); };
reader.readAsText(file); reader.readAsText(file);

View file

@ -503,6 +503,40 @@
return trackingChain(info).join(' / '); return trackingChain(info).join(' / ');
} }
// ── filename-based export/import helpers ─────────────────────────────────
// A flat, AI-friendly transmittal record for a placed file (export side).
function transmittalRecord(binId) {
var info = infoFor(binId);
if (!info || info.kind !== 'transmittal') return null;
var slot = info.parent ? infoFor(info.parent.id) : null;
var party = slot && slot.parent ? infoFor(slot.parent.id) : null;
var m = info.node.meta || {};
return {
party: party ? party.node.name : '',
slot: slot ? slot.node.slot : '',
date: m.date || '', type: m.type || 'TRN', seq: m.seq || '',
status: m.status || '', title: m.title || '',
};
}
// Find-or-create a party by name (import side — reuse so shared transmittals
// don't duplicate the party).
function findOrAddParty(name) {
var existing = (state.transmittalTree || []).filter(function (p) { return p.name === name; })[0];
return existing ? existing.id : addParty(name);
}
// Find-or-create a transmittal bin under party/slot matching meta (import).
function findOrAddTransmittalBin(partyId, slot, meta) {
var pinfo = infoFor(partyId);
if (!pinfo || pinfo.kind !== 'party') return null;
var wantName = transmittalFolderName(pinfo.node.name, meta);
var slotNode = (pinfo.node.children || []).filter(function (s) { return s.slot === slot; })[0];
if (slotNode) {
var existing = (slotNode.children || []).filter(function (b) { return b.name === wantName; })[0];
if (existing) return existing.id;
}
return addTransmittalBin(partyId, slot, meta);
}
// ── mode ───────────────────────────────────────────────────────────────── // ── mode ─────────────────────────────────────────────────────────────────
function setEnabled(on) { state.enabled = !!on; notify(); } function setEnabled(on) { state.enabled = !!on; notify(); }
function isEnabled() { return state.enabled; } function isEnabled() { return state.enabled; }
@ -524,6 +558,8 @@
expandFolderPattern: expandFolderPattern, expandFolderPattern: expandFolderPattern,
parseFolderLevels: parseFolderLevels, addTrackingPath: addTrackingPath, parseFolderLevels: parseFolderLevels, addTrackingPath: addTrackingPath,
trackingNodeComplete: trackingNodeComplete, trackingPathLabel: trackingPathLabel, trackingNodeComplete: trackingNodeComplete, trackingPathLabel: trackingPathLabel,
transmittalRecord: transmittalRecord,
findOrAddParty: findOrAddParty, findOrAddTransmittalBin: findOrAddTransmittalBin,
getNode: getNode, getTrackingTree: function () { return state.trackingTree; }, getNode: getNode, getTrackingTree: function () { return state.trackingTree; },
getTransmittalTree: function () { return state.transmittalTree; }, getTransmittalTree: function () { return state.transmittalTree; },
// derive + reverse // derive + reverse

View file

@ -661,28 +661,43 @@ test('editing a placed files filename re-files it onto the parsed tracking pa
expect(r.title).toBe('New Title'); expect(r.title).toBe('New Title');
}); });
test('dataset round-trip: serialize → JSON → load preserves trees + assignments', async ({ page }) => { test('dataset (filename-based): import reconstruction rebuilds tracking + shared transmittals', async ({ page }) => {
const r = await page.evaluate(() => { const r = await page.evaluate(() => {
const c = window.app.modules.classify; const c = window.app.modules.classify;
const z = window.zddc;
c.reset(); c.reset();
const leaf = c.addTrackingNode(c.addTrackingNode(null, 'CPO'), 'A (IFR)'); // Mirrors app.importDataset's per-record reconstruction: two docs sharing
c.addTrackingNode(null, 'EMPTY-BRANCH'); // a node with no files (must survive) // one transmittal package, plus an excluded junk file.
const file = { folderPath: 'Root', originalFilename: 'doc', extension: 'pdf' }; const recs = [
const key = c.srcKeyForFile(file); { source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false,
c.place([key], leaf, 'tracking'); transmittal: { party: 'Acme', slot: 'received', date: '2025-10-31', type: 'TRN', seq: '0043', status: 'IFC', title: 'Pkg' } },
// Emulate export wrapper (extra keys load() must ignore) → JSON → load. { source: 'b.pdf', filename: 'CPO-0002_0 (IFU) - Doc B.pdf', excluded: false,
const exported = { zddcClassifierDataset: 1, exportedAt: 'x', sourceFiles: [{ key }], ...c.serialize() }; transmittal: { party: 'Acme', slot: 'received', date: '2025-10-31', type: 'TRN', seq: '0043', status: 'IFC', title: 'Pkg' } },
const json = JSON.stringify(exported); { source: 'junk.tmp', filename: '', excluded: true },
c.reset(); ];
c.load(JSON.parse(json)); recs.forEach((rec) => {
const tree = c.getTrackingTree(); if (rec.excluded) { c.setExcluded([rec.source], true); return; }
const p = z.parseFilename(rec.filename);
c.place([rec.source], c.addTrackingPath(null, c.parseFolderLevels(p.trackingNumber + '_' + p.revision + ' (' + p.status + ')')), 'tracking');
c.setTitleOverride(rec.source, p.title);
const t = rec.transmittal;
const bid = c.findOrAddTransmittalBin(c.findOrAddParty(t.party), t.slot, t);
c.place([rec.source], bid, 'transmittal');
});
const da = c.deriveTarget({ folderPath: '', originalFilename: 'a', extension: 'pdf' }); // key 'a.pdf'
const tree = c.getTransmittalTree();
return { return {
names: tree.map((n) => n.name).sort(), tracking: da.tracking, rev: da.revision, status: da.status, title: da.title,
leaf: tree.find((n) => n.name === 'CPO').children[0].name, parties: tree.length,
assigned: !!c.getAssignment(key), bins: tree[0] ? tree[0].children.filter((s) => s.slot === 'received')[0].children.length : -1,
excluded: c.getAssignment('junk.tmp').excluded,
}; };
}); });
expect(r.names).toEqual(['CPO', 'EMPTY-BRANCH']); // empty branch preserved expect(r.tracking).toBe('CPO-0001');
expect(r.leaf).toBe('A (IFR)'); expect(r.rev).toBe('0');
expect(r.assigned).toBe(true); expect(r.status).toBe('IFU');
expect(r.title).toBe('Doc A');
expect(r.parties).toBe(1); // one Acme party
expect(r.bins).toBe(1); // shared transmittal → single bin (dedup)
expect(r.excluded).toBe(true);
}); });