From ce6efb0201c88abb2bd30aa57477f4d608f18be5 Mon Sep 17 00:00:00 2001 From: ZDDC Date: Wed, 17 Jun 2026 16:38:15 -0500 Subject: [PATCH] =?UTF-8?q?feat(classifier):=20CSV=20path=20round-trip=20?= =?UTF-8?q?=E2=80=94=20export=20filtered=20paths,=20import=20old=E2=86=92n?= =?UTF-8?q?ew=20mapping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an AI-friendly classification round-trip alongside the By-tracking grid: - "⬇ Export paths" (filetree header): downloads the filtered file list as a 1-column CSV of full (root-relative) paths — the same keys the importer matches on. Hand it to an LLM to classify into ///.ext. - "Import paths…" (above the target list): loads a 2-column CSV (old path, new path). Each new path drives both axes — the trailing filename sets the tracking number (rename, via parseFilename → tracking tree) and the leading // segments route a transmittal (via parseFolder → transmittal tree). MERGE semantics: only files named in the CSV are touched; others keep their classification. - Per-row problems (unknown old path, unparseable filename/transmittal, bad direction) are collected and offered as a downloadable errors CSV, with a summary toast — scales to thousands of rows. Either axis can apply independently, so a filename-only new path is a rename with no error. This replaces the JSON "Export for editing" / "Import edits" pair (the CSV path form is fully expressive for this model and simpler to round-trip); the TSV "Export list" clipboard→Excel button is kept. Buttons can grow into a modal later if more options are needed. Includes a Playwright test driving the real file-input import (rename+route, filename-only, merge-preserves-unlisted, CSV-quoted comma in title, error row). Co-Authored-By: Claude Opus 4.8 (1M context) --- classifier/js/app.js | 199 +++++++++++++++++++++++---------------- classifier/js/tree.js | 22 +++++ classifier/template.html | 7 +- tests/classify.spec.js | 59 +++++++++++- 4 files changed, 200 insertions(+), 87 deletions(-) diff --git a/classifier/js/app.js b/classifier/js/app.js index bff9eda..4d5148a 100644 --- a/classifier/js/app.js +++ b/classifier/js/app.js @@ -150,9 +150,9 @@ showExcludedCheckbox: document.getElementById('showExcludedCheckbox'), showEmptyCheckbox: document.getElementById('showEmptyCheckbox'), exportListBtn: document.getElementById('exportListBtn'), - exportDatasetBtn: document.getElementById('exportDatasetBtn'), - importDatasetBtn: document.getElementById('importDatasetBtn'), - importDatasetInput: document.getElementById('importDatasetInput'), + exportPathsBtn: document.getElementById('exportPathsBtn'), + importPathsBtn: document.getElementById('importPathsBtn'), + importPathsInput: document.getElementById('importPathsInput'), resetDatasetBtn: document.getElementById('resetDatasetBtn'), treeFilterInput: document.getElementById('treeFilterInput'), trackingFilterInput: document.getElementById('trackingFilterInput'), @@ -219,89 +219,120 @@ (nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); }); })(app.folderTree || []); } - function exportDataset() { - var c = app.modules.classify, files = []; - eachSourceFile(function (f) { - var key = c.srcKeyForFile(f); - var a = c.getAssignment(key) || {}; - var d = c.deriveTarget(f); - var rec = { - source: key, - originalName: window.zddc.joinExtension(f.originalFilename, f.extension), - filename: a.excluded ? '' : (d.filename || ''), - excluded: !!a.excluded, - }; - if (!a.excluded && a.transmittalNodeId) { - var t = c.transmittalRecord(a.transmittalNodeId); - if (t) rec.transmittal = t; - } - files.push(rec); - }); - var payload = { - zddcClassifierFiles: 1, - exportedAt: new Date().toISOString(), - _format: 'One record per input file. Set "filename" to its full ZDDC name ' - + '"TRACKING_REV (STATUS) - Title.ext" — on import the app splits TRACKING on "-" and the ' - + 'final "_" into nested folders, and files in shared paths share ancestors. Set ' - + '"excluded": true for non-documents (filename then ignored). "transmittal" is optional: ' - + '{party, slot:"received"|"issued", date:"YYYY-MM-DD", type:"TRN"|"SUB", seq, status, title}. ' - + 'Classify every "source" key; do not invent files.', - outputName: c.serialize().outputName || null, - files: files, - }; - var name = 'classifier-dataset'; - try { - if (app.modules.workspace && typeof app.modules.workspace.activeName === 'function') { - name = app.modules.workspace.activeName() || name; - } - } catch (_) { /* ok */ } - var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' }); - var url = URL.createObjectURL(blob); - var a = document.createElement('a'); - a.href = url; - a.download = String(name).replace(/[^\w.-]+/g, '_') + '.zddc-classification.json'; - document.body.appendChild(a); a.click(); a.remove(); - URL.revokeObjectURL(url); + // CSV cell quoting (RFC4180): quote when the value holds a comma, quote, or + // newline; embedded quotes are doubled. + function csvCell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; } + // Minimal RFC4180-ish CSV parser → array of rows of string cells. Handles + // quoted fields with embedded commas/quotes/newlines (titles may contain + // commas). CRLF/CR are normalized to LF. + function parseCsv(text) { + var rows = [], row = [], field = '', inQ = false, i = 0; + text = String(text == null ? '' : text).replace(/\r\n?/g, '\n'); + for (; i < text.length; i++) { + var ch = text[i]; + if (inQ) { + if (ch === '"') { if (text[i + 1] === '"') { field += '"'; i++; } else { inQ = false; } } + else { field += ch; } + } else if (ch === '"') { inQ = true; } + else if (ch === ',') { row.push(field); field = ''; } + else if (ch === '\n') { row.push(field); rows.push(row); row = []; field = ''; } + else { field += ch; } + } + if (field !== '' || row.length) { row.push(field); rows.push(row); } + return rows; } - function importDataset(file) { + // Trigger a client-side download of `text` as `name`. + function downloadText(text, name, mime) { + var blob = new Blob([text], { type: mime || 'text/plain' }); + var url = URL.createObjectURL(blob); + var a = document.createElement('a'); a.href = url; a.download = name; + document.body.appendChild(a); a.click(); a.remove(); + setTimeout(function () { URL.revokeObjectURL(url); }, 10000); + } + // Import a 2-column CSV (old path, new path) — e.g. an AI-classified list. + // MERGE semantics: only files named in the CSV are touched; others keep their + // current classification. Each new path + // "///.ext" drives two axes — the + // filename sets the tracking number (rename) and the leading segments route a + // transmittal. Either axis can apply independently; per-row problems are + // collected and offered as a downloadable errors CSV (the list can be huge). + function importPaths(file) { var reader = new FileReader(); reader.onload = function () { - var obj; - try { obj = JSON.parse(reader.result); } - catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; } - if (!obj || !Array.isArray(obj.files)) { - window.zddc.toast('Import failed — expected a classifier dataset with a "files" list.', 'error'); return; - } + var rows = parseCsv(reader.result); + if (!rows.length) { window.zddc.toast('Import failed — the CSV is empty.', 'error'); return; } var c = app.modules.classify; - var hasData = c.getTrackingTree().length || c.getTransmittalTree().length - || Object.keys(c.serialize().assignments || {}).length; - if (hasData && !confirm('Replace the current classification with the imported dataset?')) return; - c.reset(); - var ok = 0, bad = 0; - obj.files.forEach(function (rec) { - if (!rec || !rec.source) return; - var key = rec.source; - if (rec.excluded) { c.setExcluded([key], true); ok++; return; } - if (rec.filename) { - var p = window.zddc.parseFilename(String(rec.filename).trim()); - if (p && p.valid) { - var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')'; - c.place([key], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking'); - if (p.title != null) c.setTitleOverride(key, p.title); - ok++; - } else { bad++; } + // Old path must resolve to a real scanned file (srcKey set). + var valid = Object.create(null); + eachSourceFile(function (f) { valid[c.srcKeyForFile(f)] = true; }); + + var imported = 0, errors = []; + rows.forEach(function (cells, idx) { + var oldPath = (cells[0] || '').trim(); + var newPath = (cells[1] || '').trim(); + // Tolerate a header row (first row whose first cell isn't a file). + if (idx === 0 && !valid[oldPath] && /^(old|path|source|from)\b/i.test(oldPath)) return; + if (!oldPath && !newPath) return; // blank line + if (!oldPath) { errors.push([oldPath, newPath, 'missing old path']); return; } + if (!valid[oldPath]) { errors.push([oldPath, newPath, 'no such file in the current scan']); return; } + if (!newPath) { errors.push([oldPath, newPath, 'missing new path']); return; } + + var segs = newPath.split('/').filter(function (s) { return s !== ''; }); + if (!segs.length) { errors.push([oldPath, newPath, 'empty new path']); return; } + var filename = segs[segs.length - 1]; + var leading = segs.slice(0, -1); + var didTracking = false, didTransmittal = false, rowErr = ''; + function note(m) { rowErr = rowErr ? rowErr + '; ' + m : m; } + + // Axis 1 — filename → tracking tree (the rename). + var p = window.zddc.parseFilename(filename); + if (p && p.valid) { + var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')'; + c.place([oldPath], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking'); + if (p.title != null) c.setTitleOverride(oldPath, p.title); + didTracking = true; + } else { + note('filename is not a valid ZDDC name "' + filename + '"'); } - if (rec.transmittal && rec.transmittal.party) { - var t = rec.transmittal; - var pid = c.findOrAddParty(t.party); - var bid = c.findOrAddTransmittalBin(pid, t.slot || 'received', { - date: t.date, type: t.type || 'TRN', seq: t.seq, status: t.status, title: t.title, - }); - if (bid) c.place([key], bid, 'transmittal'); + + // Axis 2 — // → transmittal tree (the route). + if (leading.length >= 3) { + var party = leading[0]; + var slot = leading[1].toLowerCase(); + var folder = leading.slice(2).join('/'); + if (slot !== 'issued' && slot !== 'received') { + note('direction must be "issued" or "received", got "' + leading[1] + '"'); + } else { + var pf = window.zddc.parseFolder(folder); + if (pf && pf.valid) { + var tnParts = pf.trackingNumber.split('-'); + var seq = tnParts.pop(), type = tnParts.pop(); + var bid = c.findOrAddTransmittalBin(c.findOrAddParty(party), slot, { + date: pf.date, type: type || 'TRN', seq: seq || '', status: pf.status, title: pf.title, + }); + if (bid) { c.place([oldPath], bid, 'transmittal'); didTransmittal = true; } + else note('could not create the transmittal folder'); + } else { + note('transmittal folder is not a valid ZDDC folder name "' + folder + '"'); + } + } + } else if (leading.length >= 1) { + note('to route a transmittal the new path needs ///'); } + + if (didTracking || didTransmittal) imported++; + if (rowErr) errors.push([oldPath, newPath, rowErr]); }); - window.zddc.toast('Imported ' + ok + ' file' + (ok === 1 ? '' : 's') - + (bad ? (' — ' + bad + ' had an unparseable filename') : '') + '.', bad ? 'warning' : 'success'); + + if (errors.length) { + var elines = ['old path,new path,reason']; + errors.forEach(function (e) { elines.push(csvCell(e[0]) + ',' + csvCell(e[1]) + ',' + csvCell(e[2])); }); + downloadText(elines.join('\n'), 'classifier-import-errors.csv', 'text/csv'); + } + window.zddc.toast('Imported ' + imported + ' file' + (imported === 1 ? '' : 's') + + (errors.length ? (' — ' + errors.length + ' row' + (errors.length === 1 ? '' : 's') + + ' had problems (downloaded classifier-import-errors.csv)') : '') + '.', + errors.length ? 'warning' : 'success'); }; reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); }; reader.readAsText(file); @@ -381,11 +412,13 @@ }); // Dataset export / import (round-trip the classification through a JSON file). - if (app.dom.exportDatasetBtn) app.dom.exportDatasetBtn.addEventListener('click', exportDataset); - if (app.dom.importDatasetBtn) app.dom.importDatasetBtn.addEventListener('click', function () { app.dom.importDatasetInput.click(); }); + if (app.dom.exportPathsBtn) app.dom.exportPathsBtn.addEventListener('click', function () { + if (app.modules.tree && app.modules.tree.exportPathList) app.modules.tree.exportPathList(); + }); + if (app.dom.importPathsBtn) app.dom.importPathsBtn.addEventListener('click', function () { app.dom.importPathsInput.click(); }); if (app.dom.resetDatasetBtn) app.dom.resetDatasetBtn.addEventListener('click', resetDataset); - if (app.dom.importDatasetInput) app.dom.importDatasetInput.addEventListener('change', function () { - if (this.files && this.files[0]) importDataset(this.files[0]); + if (app.dom.importPathsInput) app.dom.importPathsInput.addEventListener('change', function () { + if (this.files && this.files[0]) importPaths(this.files[0]); this.value = ''; // allow re-importing the same file }); diff --git a/classifier/js/tree.js b/classifier/js/tree.js index bb0e231..92ce6ad 100644 --- a/classifier/js/tree.js +++ b/classifier/js/tree.js @@ -203,6 +203,27 @@ if (!built.count) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; } copyOrDownload(built.tsv, built.count); } + // Download the filtered file list as a 1-column CSV of full (root-relative) + // paths — the same keys “Import paths” matches on. Meant to be handed to an AI + // that returns a 2-column old→new mapping. + function exportPathList() { + var c = window.app.modules.classify; + var files = filteredFileObjects().slice().sort(function (a, b) { + return cmpName(c.srcKeyForFile(a), c.srcKeyForFile(b)); + }); + if (!files.length) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; } + function cell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; } + var lines = ['path']; + files.forEach(function (f) { lines.push(cell(c.srcKeyForFile(f))); }); + try { + var blob = new Blob([lines.join('\n')], { type: 'text/csv' }); + var url = URL.createObjectURL(blob); + var a = document.createElement('a'); a.href = url; a.download = 'classifier-paths.csv'; + document.body.appendChild(a); a.click(); a.remove(); + setTimeout(function () { URL.revokeObjectURL(url); }, 10000); + window.zddc.toast('Exported ' + files.length + ' path' + (files.length === 1 ? '' : 's') + ' to classifier-paths.csv.', 'success'); + } catch (e) { window.zddc.toast('Could not export the path list — ' + (e.message || e), 'error'); } + } function copyOrDownload(text, count) { function ok() { window.zddc.toast('Copied ' + count + ' file' + (count === 1 ? '' : 's') + ' (path + file) — paste into Excel.', 'success'); } function download() { @@ -1042,6 +1063,7 @@ setShowFilters, setNameFilter, exportFilteredList, + exportPathList, filteredFiles: filteredFileObjects, _buildExportTsv: buildExportTsv }; diff --git a/classifier/template.html b/classifier/template.html index 0de033f..182885d 100644 --- a/classifier/template.html +++ b/classifier/template.html @@ -83,6 +83,8 @@ + @@ -105,9 +107,8 @@
| - - - + +
diff --git a/tests/classify.spec.js b/tests/classify.spec.js index c4ae3ad..d47361b 100644 --- a/tests/classify.spec.js +++ b/tests/classify.spec.js @@ -700,7 +700,7 @@ test('dataset (filename-based): import reconstruction rebuilds tracking + shared const c = window.app.modules.classify; const z = window.zddc; c.reset(); - // Mirrors app.importDataset's per-record reconstruction: two docs sharing + // Mirrors app.importPaths's per-row reconstruction: two docs sharing // one transmittal package, plus an excluded junk file. const recs = [ { source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false, @@ -1680,6 +1680,63 @@ test('export: filtered file list → TSV (path + file), includes collapsed folde expect(r.filtered).toContain('Elec/valve spec.pdf\tvalve spec.pdf'); }); +test('import paths: CSV old→new drives rename + transmittal route (merge, errors reported)', async ({ page }) => { + await page.evaluate(() => window.app.modules.app.setMode()); + await page.evaluate(() => { + const c = window.app.modules.classify; + c.reset(); + const f1 = { originalFilename: 'IMG_001', extension: 'pdf', folderPath: 'Job/Inbox' }; + const f2 = { originalFilename: 'IMG_002', extension: 'pdf', folderPath: 'Job/Inbox' }; + const f3 = { originalFilename: 'keep me', extension: 'pdf', folderPath: 'Job/Inbox' }; + window.app.folderTree = [{ name: 'Job', path: 'Job', files: [], children: [ + { name: 'Inbox', path: 'Job/Inbox', files: [f1, f2, f3], children: [] }] }]; + // A file NOT named in the CSV must keep its prior classification (merge). + c.place([c.srcKeyForFile(f3)], c.addTrackingPath(null, c.parseFolderLevels('ZZZ-0009_B (IFR)')), 'tracking'); + // Capture the summary toast so the test can await the async FileReader. + window.__toast = null; + window.zddc.toast = (msg, level) => { window.__toast = { msg, level }; }; + }); + // Row 1: full route + rename. Row 2: filename only (rename, no route → no error). + // Row 3: old path absent from the scan → error. Title with a comma exercises CSV quoting. + const csv = [ + 'old path,new path', + 'Inbox/IMG_001.pdf,"Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg/CPO-0001_0 (IFU) - Doc A, rev one.pdf"', + 'Inbox/IMG_002.pdf,CPO-0002_A (IFR) - Doc B.pdf', + 'ghost/missing.pdf,whatever/x.pdf', + ].join('\n'); + await page.setInputFiles('#importPathsInput', { name: 'map.csv', mimeType: 'text/csv', buffer: Buffer.from(csv) }); + await page.waitForFunction(() => window.__toast !== null); + const r = await page.evaluate(() => { + const c = window.app.modules.classify; + const d1 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_001', extension: 'pdf' }); + const d2 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_002', extension: 'pdf' }); + const d3 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'keep me', extension: 'pdf' }); + return { + toast: window.__toast, + d1: { tracking: d1.tracking, rev: d1.revision, status: d1.status, title: d1.title, outPath: d1.outPath }, + d2: { tracking: d2.tracking, rev: d2.revision, status: d2.status, title: d2.title, outPath: d2.outPath }, + d3tracking: d3.tracking, d3rev: d3.revision, + }; + }); + // Row 1 — both axes: filename → name, path → transmittal output. + expect(r.d1.tracking).toBe('CPO-0001'); + expect(r.d1.rev).toBe('0'); + expect(r.d1.status).toBe('IFU'); + expect(r.d1.title).toBe('Doc A, rev one'); // comma survived CSV quoting + expect(r.d1.outPath).toBe('Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg'); + // Row 2 — filename only: renamed, no transmittal, NOT an error. + expect(r.d2.tracking).toBe('CPO-0002'); + expect(r.d2.rev).toBe('A'); + expect(r.d2.outPath).toBe(''); + // Merge: the un-listed file keeps its prior placement. + expect(r.d3tracking).toBe('ZZZ-0009'); + expect(r.d3rev).toBe('B'); + // Two rows imported; the missing-file row is flagged → warning. + expect(r.toast.level).toBe('warning'); + expect(r.toast.msg).toContain('Imported 2 files'); + expect(r.toast.msg).toContain('1 row had problems'); +}); + test('paste rows: a full-path Current name binds that exact file directly', async ({ page }) => { await page.evaluate(() => window.app.modules.app.setMode()); const r = await page.evaluate(() => {