feat(classifier): CSV path round-trip — export filtered paths, import old→new mapping

Add an AI-friendly classification round-trip alongside the By-tracking grid:

- "⬇ Export paths" (filetree header): downloads the filtered file list as a
  1-column CSV of full (root-relative) paths — the same keys the importer
  matches on. Hand it to an LLM to classify into
  <party>/<direction>/<transmittal>/<file>.ext.
- "Import paths…" (above the target list): loads a 2-column CSV (old path,
  new path). Each new path drives both axes — the trailing filename sets the
  tracking number (rename, via parseFilename → tracking tree) and the leading
  <party>/<direction>/<transmittal> segments route a transmittal (via
  parseFolder → transmittal tree). MERGE semantics: only files named in the
  CSV are touched; others keep their classification.
- Per-row problems (unknown old path, unparseable filename/transmittal, bad
  direction) are collected and offered as a downloadable errors CSV, with a
  summary toast — scales to thousands of rows. Either axis can apply
  independently, so a filename-only new path is a rename with no error.

This replaces the JSON "Export for editing" / "Import edits" pair (the CSV
path form is fully expressive for this model and simpler to round-trip); the
TSV "Export list" clipboard→Excel button is kept. Buttons can grow into a
modal later if more options are needed.

Includes a Playwright test driving the real file-input import (rename+route,
filename-only, merge-preserves-unlisted, CSV-quoted comma in title, error row).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-17 16:38:15 -05:00
parent 8ee7f1c460
commit ce6efb0201
4 changed files with 200 additions and 87 deletions

View file

@ -150,9 +150,9 @@
showExcludedCheckbox: document.getElementById('showExcludedCheckbox'), showExcludedCheckbox: document.getElementById('showExcludedCheckbox'),
showEmptyCheckbox: document.getElementById('showEmptyCheckbox'), showEmptyCheckbox: document.getElementById('showEmptyCheckbox'),
exportListBtn: document.getElementById('exportListBtn'), exportListBtn: document.getElementById('exportListBtn'),
exportDatasetBtn: document.getElementById('exportDatasetBtn'), exportPathsBtn: document.getElementById('exportPathsBtn'),
importDatasetBtn: document.getElementById('importDatasetBtn'), importPathsBtn: document.getElementById('importPathsBtn'),
importDatasetInput: document.getElementById('importDatasetInput'), importPathsInput: document.getElementById('importPathsInput'),
resetDatasetBtn: document.getElementById('resetDatasetBtn'), resetDatasetBtn: document.getElementById('resetDatasetBtn'),
treeFilterInput: document.getElementById('treeFilterInput'), treeFilterInput: document.getElementById('treeFilterInput'),
trackingFilterInput: document.getElementById('trackingFilterInput'), trackingFilterInput: document.getElementById('trackingFilterInput'),
@ -219,89 +219,120 @@
(nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); }); (nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); });
})(app.folderTree || []); })(app.folderTree || []);
} }
function exportDataset() { // CSV cell quoting (RFC4180): quote when the value holds a comma, quote, or
var c = app.modules.classify, files = []; // newline; embedded quotes are doubled.
eachSourceFile(function (f) { function csvCell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
var key = c.srcKeyForFile(f); // Minimal RFC4180-ish CSV parser → array of rows of string cells. Handles
var a = c.getAssignment(key) || {}; // quoted fields with embedded commas/quotes/newlines (titles may contain
var d = c.deriveTarget(f); // commas). CRLF/CR are normalized to LF.
var rec = { function parseCsv(text) {
source: key, var rows = [], row = [], field = '', inQ = false, i = 0;
originalName: window.zddc.joinExtension(f.originalFilename, f.extension), text = String(text == null ? '' : text).replace(/\r\n?/g, '\n');
filename: a.excluded ? '' : (d.filename || ''), for (; i < text.length; i++) {
excluded: !!a.excluded, var ch = text[i];
}; if (inQ) {
if (!a.excluded && a.transmittalNodeId) { if (ch === '"') { if (text[i + 1] === '"') { field += '"'; i++; } else { inQ = false; } }
var t = c.transmittalRecord(a.transmittalNodeId); else { field += ch; }
if (t) rec.transmittal = t; } else if (ch === '"') { inQ = true; }
} else if (ch === ',') { row.push(field); field = ''; }
files.push(rec); else if (ch === '\n') { row.push(field); rows.push(row); row = []; field = ''; }
}); else { field += ch; }
var payload = { }
zddcClassifierFiles: 1, if (field !== '' || row.length) { row.push(field); rows.push(row); }
exportedAt: new Date().toISOString(), return rows;
_format: 'One record per input file. Set "filename" to its full ZDDC name '
+ '"TRACKING_REV (STATUS) - Title.ext" — on import the app splits TRACKING on "-" and the '
+ 'final "_" into nested folders, and files in shared paths share ancestors. Set '
+ '"excluded": true for non-documents (filename then ignored). "transmittal" is optional: '
+ '{party, slot:"received"|"issued", date:"YYYY-MM-DD", type:"TRN"|"SUB", seq, status, title}. '
+ 'Classify every "source" key; do not invent files.',
outputName: c.serialize().outputName || null,
files: files,
};
var name = 'classifier-dataset';
try {
if (app.modules.workspace && typeof app.modules.workspace.activeName === 'function') {
name = app.modules.workspace.activeName() || name;
}
} catch (_) { /* ok */ }
var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' });
var url = URL.createObjectURL(blob);
var a = document.createElement('a');
a.href = url;
a.download = String(name).replace(/[^\w.-]+/g, '_') + '.zddc-classification.json';
document.body.appendChild(a); a.click(); a.remove();
URL.revokeObjectURL(url);
} }
function importDataset(file) { // Trigger a client-side download of `text` as `name`.
function downloadText(text, name, mime) {
var blob = new Blob([text], { type: mime || 'text/plain' });
var url = URL.createObjectURL(blob);
var a = document.createElement('a'); a.href = url; a.download = name;
document.body.appendChild(a); a.click(); a.remove();
setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
}
// Import a 2-column CSV (old path, new path) — e.g. an AI-classified list.
// MERGE semantics: only files named in the CSV are touched; others keep their
// current classification. Each new path
// "<party>/<direction>/<transmittal>/<file>.ext" drives two axes — the
// filename sets the tracking number (rename) and the leading segments route a
// transmittal. Either axis can apply independently; per-row problems are
// collected and offered as a downloadable errors CSV (the list can be huge).
function importPaths(file) {
var reader = new FileReader(); var reader = new FileReader();
reader.onload = function () { reader.onload = function () {
var obj; var rows = parseCsv(reader.result);
try { obj = JSON.parse(reader.result); } if (!rows.length) { window.zddc.toast('Import failed — the CSV is empty.', 'error'); return; }
catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; }
if (!obj || !Array.isArray(obj.files)) {
window.zddc.toast('Import failed — expected a classifier dataset with a "files" list.', 'error'); return;
}
var c = app.modules.classify; var c = app.modules.classify;
var hasData = c.getTrackingTree().length || c.getTransmittalTree().length // Old path must resolve to a real scanned file (srcKey set).
|| Object.keys(c.serialize().assignments || {}).length; var valid = Object.create(null);
if (hasData && !confirm('Replace the current classification with the imported dataset?')) return; eachSourceFile(function (f) { valid[c.srcKeyForFile(f)] = true; });
c.reset();
var ok = 0, bad = 0; var imported = 0, errors = [];
obj.files.forEach(function (rec) { rows.forEach(function (cells, idx) {
if (!rec || !rec.source) return; var oldPath = (cells[0] || '').trim();
var key = rec.source; var newPath = (cells[1] || '').trim();
if (rec.excluded) { c.setExcluded([key], true); ok++; return; } // Tolerate a header row (first row whose first cell isn't a file).
if (rec.filename) { if (idx === 0 && !valid[oldPath] && /^(old|path|source|from)\b/i.test(oldPath)) return;
var p = window.zddc.parseFilename(String(rec.filename).trim()); if (!oldPath && !newPath) return; // blank line
if (p && p.valid) { if (!oldPath) { errors.push([oldPath, newPath, 'missing old path']); return; }
var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')'; if (!valid[oldPath]) { errors.push([oldPath, newPath, 'no such file in the current scan']); return; }
c.place([key], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking'); if (!newPath) { errors.push([oldPath, newPath, 'missing new path']); return; }
if (p.title != null) c.setTitleOverride(key, p.title);
ok++; var segs = newPath.split('/').filter(function (s) { return s !== ''; });
} else { bad++; } if (!segs.length) { errors.push([oldPath, newPath, 'empty new path']); return; }
var filename = segs[segs.length - 1];
var leading = segs.slice(0, -1);
var didTracking = false, didTransmittal = false, rowErr = '';
function note(m) { rowErr = rowErr ? rowErr + '; ' + m : m; }
// Axis 1 — filename → tracking tree (the rename).
var p = window.zddc.parseFilename(filename);
if (p && p.valid) {
var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
c.place([oldPath], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
if (p.title != null) c.setTitleOverride(oldPath, p.title);
didTracking = true;
} else {
note('filename is not a valid ZDDC name "' + filename + '"');
} }
if (rec.transmittal && rec.transmittal.party) {
var t = rec.transmittal; // Axis 2 — <party>/<direction>/<transmittal> → transmittal tree (the route).
var pid = c.findOrAddParty(t.party); if (leading.length >= 3) {
var bid = c.findOrAddTransmittalBin(pid, t.slot || 'received', { var party = leading[0];
date: t.date, type: t.type || 'TRN', seq: t.seq, status: t.status, title: t.title, var slot = leading[1].toLowerCase();
}); var folder = leading.slice(2).join('/');
if (bid) c.place([key], bid, 'transmittal'); if (slot !== 'issued' && slot !== 'received') {
note('direction must be "issued" or "received", got "' + leading[1] + '"');
} else {
var pf = window.zddc.parseFolder(folder);
if (pf && pf.valid) {
var tnParts = pf.trackingNumber.split('-');
var seq = tnParts.pop(), type = tnParts.pop();
var bid = c.findOrAddTransmittalBin(c.findOrAddParty(party), slot, {
date: pf.date, type: type || 'TRN', seq: seq || '', status: pf.status, title: pf.title,
});
if (bid) { c.place([oldPath], bid, 'transmittal'); didTransmittal = true; }
else note('could not create the transmittal folder');
} else {
note('transmittal folder is not a valid ZDDC folder name "' + folder + '"');
}
}
} else if (leading.length >= 1) {
note('to route a transmittal the new path needs <party>/<direction>/<transmittal>/<file>');
} }
if (didTracking || didTransmittal) imported++;
if (rowErr) errors.push([oldPath, newPath, rowErr]);
}); });
window.zddc.toast('Imported ' + ok + ' file' + (ok === 1 ? '' : 's')
+ (bad ? (' — ' + bad + ' had an unparseable filename') : '') + '.', bad ? 'warning' : 'success'); if (errors.length) {
var elines = ['old path,new path,reason'];
errors.forEach(function (e) { elines.push(csvCell(e[0]) + ',' + csvCell(e[1]) + ',' + csvCell(e[2])); });
downloadText(elines.join('\n'), 'classifier-import-errors.csv', 'text/csv');
}
window.zddc.toast('Imported ' + imported + ' file' + (imported === 1 ? '' : 's')
+ (errors.length ? (' — ' + errors.length + ' row' + (errors.length === 1 ? '' : 's')
+ ' had problems (downloaded classifier-import-errors.csv)') : '') + '.',
errors.length ? 'warning' : 'success');
}; };
reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); }; reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); };
reader.readAsText(file); reader.readAsText(file);
@ -381,11 +412,13 @@
}); });
// Dataset export / import (round-trip the classification through a JSON file). // Dataset export / import (round-trip the classification through a JSON file).
if (app.dom.exportDatasetBtn) app.dom.exportDatasetBtn.addEventListener('click', exportDataset); if (app.dom.exportPathsBtn) app.dom.exportPathsBtn.addEventListener('click', function () {
if (app.dom.importDatasetBtn) app.dom.importDatasetBtn.addEventListener('click', function () { app.dom.importDatasetInput.click(); }); if (app.modules.tree && app.modules.tree.exportPathList) app.modules.tree.exportPathList();
});
if (app.dom.importPathsBtn) app.dom.importPathsBtn.addEventListener('click', function () { app.dom.importPathsInput.click(); });
if (app.dom.resetDatasetBtn) app.dom.resetDatasetBtn.addEventListener('click', resetDataset); if (app.dom.resetDatasetBtn) app.dom.resetDatasetBtn.addEventListener('click', resetDataset);
if (app.dom.importDatasetInput) app.dom.importDatasetInput.addEventListener('change', function () { if (app.dom.importPathsInput) app.dom.importPathsInput.addEventListener('change', function () {
if (this.files && this.files[0]) importDataset(this.files[0]); if (this.files && this.files[0]) importPaths(this.files[0]);
this.value = ''; // allow re-importing the same file this.value = ''; // allow re-importing the same file
}); });

View file

@ -203,6 +203,27 @@
if (!built.count) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; } if (!built.count) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
copyOrDownload(built.tsv, built.count); copyOrDownload(built.tsv, built.count);
} }
// Download the filtered file list as a 1-column CSV of full (root-relative)
// paths — the same keys “Import paths” matches on. Meant to be handed to an AI
// that returns a 2-column old→new mapping.
function exportPathList() {
var c = window.app.modules.classify;
var files = filteredFileObjects().slice().sort(function (a, b) {
return cmpName(c.srcKeyForFile(a), c.srcKeyForFile(b));
});
if (!files.length) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
function cell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
var lines = ['path'];
files.forEach(function (f) { lines.push(cell(c.srcKeyForFile(f))); });
try {
var blob = new Blob([lines.join('\n')], { type: 'text/csv' });
var url = URL.createObjectURL(blob);
var a = document.createElement('a'); a.href = url; a.download = 'classifier-paths.csv';
document.body.appendChild(a); a.click(); a.remove();
setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
window.zddc.toast('Exported ' + files.length + ' path' + (files.length === 1 ? '' : 's') + ' to classifier-paths.csv.', 'success');
} catch (e) { window.zddc.toast('Could not export the path list — ' + (e.message || e), 'error'); }
}
function copyOrDownload(text, count) { function copyOrDownload(text, count) {
function ok() { window.zddc.toast('Copied ' + count + ' file' + (count === 1 ? '' : 's') + ' (path + file) — paste into Excel.', 'success'); } function ok() { window.zddc.toast('Copied ' + count + ' file' + (count === 1 ? '' : 's') + ' (path + file) — paste into Excel.', 'success'); }
function download() { function download() {
@ -1042,6 +1063,7 @@
setShowFilters, setShowFilters,
setNameFilter, setNameFilter,
exportFilteredList, exportFilteredList,
exportPathList,
filteredFiles: filteredFileObjects, filteredFiles: filteredFileObjects,
_buildExportTsv: buildExportTsv _buildExportTsv: buildExportTsv
}; };

View file

@ -83,6 +83,8 @@
</label> </label>
<button class="btn btn-sm export-list-btn" id="exportListBtn" <button class="btn btn-sm export-list-btn" id="exportListBtn"
title="Copy the filtered file list (path + file columns, no folders) as TSV — paste into Excel, edit, then paste back via “Paste rows”. Paste a full path into the Current name column to bind that exact file.">⬆ Export list</button> title="Copy the filtered file list (path + file columns, no folders) as TSV — paste into Excel, edit, then paste back via “Paste rows”. Paste a full path into the Current name column to bind that exact file.">⬆ Export list</button>
<button class="btn btn-sm export-list-btn" id="exportPathsBtn"
title="Download the filtered file list as a 1-column CSV of full paths. Feed it to an AI to classify into <party>/<direction>/<transmittal>/<file>.ext, then bring the 2-column result back via “Import paths” above the target list.">⬇ Export paths</button>
</div> </div>
<input type="search" id="treeFilterInput" class="tree-filter" spellcheck="false" <input type="search" id="treeFilterInput" class="tree-filter" spellcheck="false"
placeholder="Filter files… (e.g. master deliverables list)" aria-label="Filter files"> placeholder="Filter files… (e.g. master deliverables list)" aria-label="Filter files">
@ -105,9 +107,8 @@
<div class="pane-header-right"> <div class="pane-header-right">
<span id="classifyStats" class="file-stats"></span> <span id="classifyStats" class="file-stats"></span>
<span class="header-divider">|</span> <span class="header-divider">|</span>
<button id="exportDatasetBtn" class="btn btn-secondary btn-sm" title="Download the classifications as a filename-per-file JSON to edit (e.g. with an AI), then re-import here. NOT a workspace — no scanned tree.">Export for editing</button> <button id="importPathsBtn" class="btn btn-secondary btn-sm" title="Import a 2-column CSV (old path, new path). Each new path “<party>/<direction>/<transmittal>/<file>.ext” sets that files tracking number (rename) and routes it into a transmittal. Only files named in the CSV are touched — others keep their current classification. Export the source list first via “Export paths” on the left.">Import paths…</button>
<button id="importDatasetBtn" class="btn btn-secondary btn-sm" title="Load an edited classification JSON back in — replaces the current classifications. (To move a whole scanned workspace between browsers, use “Import workspace” on the welcome screen.)">Import edits</button> <input type="file" id="importPathsInput" accept=".csv,text/csv,text/plain" hidden>
<input type="file" id="importDatasetInput" accept="application/json,.json" hidden>
<button id="resetDatasetBtn" class="btn btn-sm btn-danger" title="Discard all classifications and start over from the raw scanned input (does not touch your files)">Reset</button> <button id="resetDatasetBtn" class="btn btn-sm btn-danger" title="Discard all classifications and start over from the raw scanned input (does not touch your files)">Reset</button>
</div> </div>
</div> </div>

View file

@ -700,7 +700,7 @@ test('dataset (filename-based): import reconstruction rebuilds tracking + shared
const c = window.app.modules.classify; const c = window.app.modules.classify;
const z = window.zddc; const z = window.zddc;
c.reset(); c.reset();
// Mirrors app.importDataset's per-record reconstruction: two docs sharing // Mirrors app.importPaths's per-row reconstruction: two docs sharing
// one transmittal package, plus an excluded junk file. // one transmittal package, plus an excluded junk file.
const recs = [ const recs = [
{ source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false, { source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false,
@ -1680,6 +1680,63 @@ test('export: filtered file list → TSV (path + file), includes collapsed folde
expect(r.filtered).toContain('Elec/valve spec.pdf\tvalve spec.pdf'); expect(r.filtered).toContain('Elec/valve spec.pdf\tvalve spec.pdf');
}); });
test('import paths: CSV old→new drives rename + transmittal route (merge, errors reported)', async ({ page }) => {
await page.evaluate(() => window.app.modules.app.setMode());
await page.evaluate(() => {
const c = window.app.modules.classify;
c.reset();
const f1 = { originalFilename: 'IMG_001', extension: 'pdf', folderPath: 'Job/Inbox' };
const f2 = { originalFilename: 'IMG_002', extension: 'pdf', folderPath: 'Job/Inbox' };
const f3 = { originalFilename: 'keep me', extension: 'pdf', folderPath: 'Job/Inbox' };
window.app.folderTree = [{ name: 'Job', path: 'Job', files: [], children: [
{ name: 'Inbox', path: 'Job/Inbox', files: [f1, f2, f3], children: [] }] }];
// A file NOT named in the CSV must keep its prior classification (merge).
c.place([c.srcKeyForFile(f3)], c.addTrackingPath(null, c.parseFolderLevels('ZZZ-0009_B (IFR)')), 'tracking');
// Capture the summary toast so the test can await the async FileReader.
window.__toast = null;
window.zddc.toast = (msg, level) => { window.__toast = { msg, level }; };
});
// Row 1: full route + rename. Row 2: filename only (rename, no route → no error).
// Row 3: old path absent from the scan → error. Title with a comma exercises CSV quoting.
const csv = [
'old path,new path',
'Inbox/IMG_001.pdf,"Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg/CPO-0001_0 (IFU) - Doc A, rev one.pdf"',
'Inbox/IMG_002.pdf,CPO-0002_A (IFR) - Doc B.pdf',
'ghost/missing.pdf,whatever/x.pdf',
].join('\n');
await page.setInputFiles('#importPathsInput', { name: 'map.csv', mimeType: 'text/csv', buffer: Buffer.from(csv) });
await page.waitForFunction(() => window.__toast !== null);
const r = await page.evaluate(() => {
const c = window.app.modules.classify;
const d1 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_001', extension: 'pdf' });
const d2 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_002', extension: 'pdf' });
const d3 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'keep me', extension: 'pdf' });
return {
toast: window.__toast,
d1: { tracking: d1.tracking, rev: d1.revision, status: d1.status, title: d1.title, outPath: d1.outPath },
d2: { tracking: d2.tracking, rev: d2.revision, status: d2.status, title: d2.title, outPath: d2.outPath },
d3tracking: d3.tracking, d3rev: d3.revision,
};
});
// Row 1 — both axes: filename → name, path → transmittal output.
expect(r.d1.tracking).toBe('CPO-0001');
expect(r.d1.rev).toBe('0');
expect(r.d1.status).toBe('IFU');
expect(r.d1.title).toBe('Doc A, rev one'); // comma survived CSV quoting
expect(r.d1.outPath).toBe('Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg');
// Row 2 — filename only: renamed, no transmittal, NOT an error.
expect(r.d2.tracking).toBe('CPO-0002');
expect(r.d2.rev).toBe('A');
expect(r.d2.outPath).toBe('');
// Merge: the un-listed file keeps its prior placement.
expect(r.d3tracking).toBe('ZZZ-0009');
expect(r.d3rev).toBe('B');
// Two rows imported; the missing-file row is flagged → warning.
expect(r.toast.level).toBe('warning');
expect(r.toast.msg).toContain('Imported 2 files');
expect(r.toast.msg).toContain('1 row had problems');
});
test('paste rows: a full-path Current name binds that exact file directly', async ({ page }) => { test('paste rows: a full-path Current name binds that exact file directly', async ({ page }) => {
await page.evaluate(() => window.app.modules.app.setMode()); await page.evaluate(() => window.app.modules.app.setMode());
const r = await page.evaluate(() => { const r = await page.evaluate(() => {