From ce6efb0201c88abb2bd30aa57477f4d608f18be5 Mon Sep 17 00:00:00 2001
From: ZDDC <caseywitt@proton.me>
Date: Wed, 17 Jun 2026 16:38:15 -0500
Subject: [PATCH] =?UTF-8?q?feat(classifier):=20CSV=20path=20round-trip=20?=
 =?UTF-8?q?=E2=80=94=20export=20filtered=20paths,=20import=20old=E2=86=92n?=
 =?UTF-8?q?ew=20mapping?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an AI-friendly classification round-trip alongside the By-tracking grid:

- "⬇ Export paths" (filetree header): downloads the filtered file list as a
  1-column CSV of full (root-relative) paths — the same keys the importer
  matches on. Hand it to an LLM to classify into
  <party>/<direction>/<transmittal>/<file>.ext.
- "Import paths…" (above the target list): loads a 2-column CSV (old path,
  new path). Each new path drives both axes — the trailing filename sets the
  tracking number (rename, via parseFilename → tracking tree) and the leading
  <party>/<direction>/<transmittal> segments route a transmittal (via
  parseFolder → transmittal tree). MERGE semantics: only files named in the
  CSV are touched; others keep their classification.
- Per-row problems (unknown old path, unparseable filename/transmittal, bad
  direction) are collected and offered as a downloadable errors CSV, with a
  summary toast — scales to thousands of rows. Either axis can apply
  independently, so a filename-only new path is a rename with no error.

This replaces the JSON "Export for editing" / "Import edits" pair (the CSV
path form is fully expressive for this model and simpler to round-trip); the
TSV "Export list" clipboard→Excel button is kept. Buttons can grow into a
modal later if more options are needed.

Includes a Playwright test driving the real file-input import (rename+route,
filename-only, merge-preserves-unlisted, CSV-quoted comma in title, error row).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 classifier/js/app.js     | 199 +++++++++++++++++++++++----------------
 classifier/js/tree.js    |  22 +++++
 classifier/template.html |   7 +-
 tests/classify.spec.js   |  59 +++++++++++-
 4 files changed, 200 insertions(+), 87 deletions(-)
diff --git a/classifier/js/app.js b/classifier/js/app.js
index bff9eda..4d5148a 100644
--- a/classifier/js/app.js
+++ b/classifier/js/app.js
@@ -150,9 +150,9 @@
             showExcludedCheckbox: document.getElementById('showExcludedCheckbox'),
             showEmptyCheckbox: document.getElementById('showEmptyCheckbox'),
             exportListBtn: document.getElementById('exportListBtn'),
-            exportDatasetBtn: document.getElementById('exportDatasetBtn'),
-            importDatasetBtn: document.getElementById('importDatasetBtn'),
-            importDatasetInput: document.getElementById('importDatasetInput'),
+            exportPathsBtn: document.getElementById('exportPathsBtn'),
+            importPathsBtn: document.getElementById('importPathsBtn'),
+            importPathsInput: document.getElementById('importPathsInput'),
             resetDatasetBtn: document.getElementById('resetDatasetBtn'),
             treeFilterInput: document.getElementById('treeFilterInput'),
             trackingFilterInput: document.getElementById('trackingFilterInput'),
@@ -219,89 +219,120 @@
             (nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); });
         })(app.folderTree || []);
     }
-    function exportDataset() {
-        var c = app.modules.classify, files = [];
-        eachSourceFile(function (f) {
-            var key = c.srcKeyForFile(f);
-            var a = c.getAssignment(key) || {};
-            var d = c.deriveTarget(f);
-            var rec = {
-                source: key,
-                originalName: window.zddc.joinExtension(f.originalFilename, f.extension),
-                filename: a.excluded ? '' : (d.filename || ''),
-                excluded: !!a.excluded,
-            };
-            if (!a.excluded && a.transmittalNodeId) {
-                var t = c.transmittalRecord(a.transmittalNodeId);
-                if (t) rec.transmittal = t;
-            }
-            files.push(rec);
-        });
-        var payload = {
-            zddcClassifierFiles: 1,
-            exportedAt: new Date().toISOString(),
-            _format: 'One record per input file. Set "filename" to its full ZDDC name '
-                + '"TRACKING_REV (STATUS) - Title.ext" — on import the app splits TRACKING on "-" and the '
-                + 'final "_" into nested folders, and files in shared paths share ancestors. Set '
-                + '"excluded": true for non-documents (filename then ignored). "transmittal" is optional: '
-                + '{party, slot:"received"|"issued", date:"YYYY-MM-DD", type:"TRN"|"SUB", seq, status, title}. '
-                + 'Classify every "source" key; do not invent files.',
-            outputName: c.serialize().outputName || null,
-            files: files,
-        };
-        var name = 'classifier-dataset';
-        try {
-            if (app.modules.workspace && typeof app.modules.workspace.activeName === 'function') {
-                name = app.modules.workspace.activeName() || name;
-            }
-        } catch (_) { /* ok */ }
-        var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' });
-        var url = URL.createObjectURL(blob);
-        var a = document.createElement('a');
-        a.href = url;
-        a.download = String(name).replace(/[^\w.-]+/g, '_') + '.zddc-classification.json';
-        document.body.appendChild(a); a.click(); a.remove();
-        URL.revokeObjectURL(url);
+    // CSV cell quoting (RFC4180): quote when the value holds a comma, quote, or
+    // newline; embedded quotes are doubled.
+    function csvCell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
+    // Minimal RFC4180-ish CSV parser → array of rows of string cells. Handles
+    // quoted fields with embedded commas/quotes/newlines (titles may contain
+    // commas). CRLF/CR are normalized to LF.
+    function parseCsv(text) {
+        var rows = [], row = [], field = '', inQ = false, i = 0;
+        text = String(text == null ? '' : text).replace(/\r\n?/g, '\n');
+        for (; i < text.length; i++) {
+            var ch = text[i];
+            if (inQ) {
+                if (ch === '"') { if (text[i + 1] === '"') { field += '"'; i++; } else { inQ = false; } }
+                else { field += ch; }
+            } else if (ch === '"') { inQ = true; }
+            else if (ch === ',') { row.push(field); field = ''; }
+            else if (ch === '\n') { row.push(field); rows.push(row); row = []; field = ''; }
+            else { field += ch; }
+        }
+        if (field !== '' || row.length) { row.push(field); rows.push(row); }
+        return rows;
     }
-    function importDataset(file) {
+    // Trigger a client-side download of `text` as `name`.
+    function downloadText(text, name, mime) {
+        var blob = new Blob([text], { type: mime || 'text/plain' });
+        var url = URL.createObjectURL(blob);
+        var a = document.createElement('a'); a.href = url; a.download = name;
+        document.body.appendChild(a); a.click(); a.remove();
+        setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
+    }
+    // Import a 2-column CSV (old path, new path) — e.g. an AI-classified list.
+    // MERGE semantics: only files named in the CSV are touched; others keep their
+    // current classification. Each new path
+    // "<party>/<direction>/<transmittal>/<file>.ext" drives two axes — the
+    // filename sets the tracking number (rename) and the leading segments route a
+    // transmittal. Either axis can apply independently; per-row problems are
+    // collected and offered as a downloadable errors CSV (the list can be huge).
+    function importPaths(file) {
         var reader = new FileReader();
         reader.onload = function () {
-            var obj;
-            try { obj = JSON.parse(reader.result); }
-            catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; }
-            if (!obj || !Array.isArray(obj.files)) {
-                window.zddc.toast('Import failed — expected a classifier dataset with a "files" list.', 'error'); return;
-            }
+            var rows = parseCsv(reader.result);
+            if (!rows.length) { window.zddc.toast('Import failed — the CSV is empty.', 'error'); return; }
             var c = app.modules.classify;
-            var hasData = c.getTrackingTree().length || c.getTransmittalTree().length
-                || Object.keys(c.serialize().assignments || {}).length;
-            if (hasData && !confirm('Replace the current classification with the imported dataset?')) return;
-            c.reset();
-            var ok = 0, bad = 0;
-            obj.files.forEach(function (rec) {
-                if (!rec || !rec.source) return;
-                var key = rec.source;
-                if (rec.excluded) { c.setExcluded([key], true); ok++; return; }
-                if (rec.filename) {
-                    var p = window.zddc.parseFilename(String(rec.filename).trim());
-                    if (p && p.valid) {
-                        var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
-                        c.place([key], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
-                        if (p.title != null) c.setTitleOverride(key, p.title);
-                        ok++;
-                    } else { bad++; }
+            // Old path must resolve to a real scanned file (srcKey set).
+            var valid = Object.create(null);
+            eachSourceFile(function (f) { valid[c.srcKeyForFile(f)] = true; });
+
+            var imported = 0, errors = [];
+            rows.forEach(function (cells, idx) {
+                var oldPath = (cells[0] || '').trim();
+                var newPath = (cells[1] || '').trim();
+                // Tolerate a header row (first row whose first cell isn't a file).
+                if (idx === 0 && !valid[oldPath] && /^(old|path|source|from)\b/i.test(oldPath)) return;
+                if (!oldPath && !newPath) return;                 // blank line
+                if (!oldPath) { errors.push([oldPath, newPath, 'missing old path']); return; }
+                if (!valid[oldPath]) { errors.push([oldPath, newPath, 'no such file in the current scan']); return; }
+                if (!newPath) { errors.push([oldPath, newPath, 'missing new path']); return; }
+
+                var segs = newPath.split('/').filter(function (s) { return s !== ''; });
+                if (!segs.length) { errors.push([oldPath, newPath, 'empty new path']); return; }
+                var filename = segs[segs.length - 1];
+                var leading = segs.slice(0, -1);
+                var didTracking = false, didTransmittal = false, rowErr = '';
+                function note(m) { rowErr = rowErr ? rowErr + '; ' + m : m; }
+
+                // Axis 1 — filename → tracking tree (the rename).
+                var p = window.zddc.parseFilename(filename);
+                if (p && p.valid) {
+                    var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
+                    c.place([oldPath], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
+                    if (p.title != null) c.setTitleOverride(oldPath, p.title);
+                    didTracking = true;
+                } else {
+                    note('filename is not a valid ZDDC name "' + filename + '"');
                 }
-                if (rec.transmittal && rec.transmittal.party) {
-                    var t = rec.transmittal;
-                    var pid = c.findOrAddParty(t.party);
-                    var bid = c.findOrAddTransmittalBin(pid, t.slot || 'received', {
-                        date: t.date, type: t.type || 'TRN', seq: t.seq, status: t.status, title: t.title,
-                    });
-                    if (bid) c.place([key], bid, 'transmittal');
+
+                // Axis 2 — <party>/<direction>/<transmittal> → transmittal tree (the route).
+                if (leading.length >= 3) {
+                    var party = leading[0];
+                    var slot = leading[1].toLowerCase();
+                    var folder = leading.slice(2).join('/');
+                    if (slot !== 'issued' && slot !== 'received') {
+                        note('direction must be "issued" or "received", got "' + leading[1] + '"');
+                    } else {
+                        var pf = window.zddc.parseFolder(folder);
+                        if (pf && pf.valid) {
+                            var tnParts = pf.trackingNumber.split('-');
+                            var seq = tnParts.pop(), type = tnParts.pop();
+                            var bid = c.findOrAddTransmittalBin(c.findOrAddParty(party), slot, {
+                                date: pf.date, type: type || 'TRN', seq: seq || '', status: pf.status, title: pf.title,
+                            });
+                            if (bid) { c.place([oldPath], bid, 'transmittal'); didTransmittal = true; }
+                            else note('could not create the transmittal folder');
+                        } else {
+                            note('transmittal folder is not a valid ZDDC folder name "' + folder + '"');
+                        }
+                    }
+                } else if (leading.length >= 1) {
+                    note('to route a transmittal the new path needs <party>/<direction>/<transmittal>/<file>');
                 }
+
+                if (didTracking || didTransmittal) imported++;
+                if (rowErr) errors.push([oldPath, newPath, rowErr]);
             });
-            window.zddc.toast('Imported ' + ok + ' file' + (ok === 1 ? '' : 's')
-                + (bad ? (' — ' + bad + ' had an unparseable filename') : '') + '.', bad ? 'warning' : 'success');
+
+            if (errors.length) {
+                var elines = ['old path,new path,reason'];
+                errors.forEach(function (e) { elines.push(csvCell(e[0]) + ',' + csvCell(e[1]) + ',' + csvCell(e[2])); });
+                downloadText(elines.join('\n'), 'classifier-import-errors.csv', 'text/csv');
+            }
+            window.zddc.toast('Imported ' + imported + ' file' + (imported === 1 ? '' : 's')
+                + (errors.length ? (' — ' + errors.length + ' row' + (errors.length === 1 ? '' : 's')
+                    + ' had problems (downloaded classifier-import-errors.csv)') : '') + '.',
+                errors.length ? 'warning' : 'success');
         };
         reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); };
         reader.readAsText(file);
@@ -381,11 +412,13 @@
         });
 
         // Dataset export / import (round-trip the classification through a JSON file).
-        if (app.dom.exportDatasetBtn) app.dom.exportDatasetBtn.addEventListener('click', exportDataset);
-        if (app.dom.importDatasetBtn) app.dom.importDatasetBtn.addEventListener('click', function () { app.dom.importDatasetInput.click(); });
+        if (app.dom.exportPathsBtn) app.dom.exportPathsBtn.addEventListener('click', function () {
+            if (app.modules.tree && app.modules.tree.exportPathList) app.modules.tree.exportPathList();
+        });
+        if (app.dom.importPathsBtn) app.dom.importPathsBtn.addEventListener('click', function () { app.dom.importPathsInput.click(); });
         if (app.dom.resetDatasetBtn) app.dom.resetDatasetBtn.addEventListener('click', resetDataset);
-        if (app.dom.importDatasetInput) app.dom.importDatasetInput.addEventListener('change', function () {
-            if (this.files && this.files[0]) importDataset(this.files[0]);
+        if (app.dom.importPathsInput) app.dom.importPathsInput.addEventListener('change', function () {
+            if (this.files && this.files[0]) importPaths(this.files[0]);
             this.value = '';   // allow re-importing the same file
         });
         
diff --git a/classifier/js/tree.js b/classifier/js/tree.js
index bb0e231..92ce6ad 100644
--- a/classifier/js/tree.js
+++ b/classifier/js/tree.js
@@ -203,6 +203,27 @@
         if (!built.count) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
         copyOrDownload(built.tsv, built.count);
     }
+    // Download the filtered file list as a 1-column CSV of full (root-relative)
+    // paths — the same keys “Import paths” matches on. Meant to be handed to an AI
+    // that returns a 2-column old→new mapping.
+    function exportPathList() {
+        var c = window.app.modules.classify;
+        var files = filteredFileObjects().slice().sort(function (a, b) {
+            return cmpName(c.srcKeyForFile(a), c.srcKeyForFile(b));
+        });
+        if (!files.length) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
+        function cell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
+        var lines = ['path'];
+        files.forEach(function (f) { lines.push(cell(c.srcKeyForFile(f))); });
+        try {
+            var blob = new Blob([lines.join('\n')], { type: 'text/csv' });
+            var url = URL.createObjectURL(blob);
+            var a = document.createElement('a'); a.href = url; a.download = 'classifier-paths.csv';
+            document.body.appendChild(a); a.click(); a.remove();
+            setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
+            window.zddc.toast('Exported ' + files.length + ' path' + (files.length === 1 ? '' : 's') + ' to classifier-paths.csv.', 'success');
+        } catch (e) { window.zddc.toast('Could not export the path list — ' + (e.message || e), 'error'); }
+    }
     function copyOrDownload(text, count) {
         function ok() { window.zddc.toast('Copied ' + count + ' file' + (count === 1 ? '' : 's') + ' (path + file) — paste into Excel.', 'success'); }
         function download() {
@@ -1042,6 +1063,7 @@
         setShowFilters,
         setNameFilter,
         exportFilteredList,
+        exportPathList,
         filteredFiles: filteredFileObjects,
         _buildExportTsv: buildExportTsv
     };
diff --git a/classifier/template.html b/classifier/template.html
index 0de033f..182885d 100644
--- a/classifier/template.html
+++ b/classifier/template.html
@@ -83,6 +83,8 @@
                         </label>
                         <button class="btn btn-sm export-list-btn" id="exportListBtn"
                                 title="Copy the filtered file list (path + file columns, no folders) as TSV — paste into Excel, edit, then paste back via “Paste rows”. Paste a full path into the Current name column to bind that exact file.">⬆ Export list</button>
+                        <button class="btn btn-sm export-list-btn" id="exportPathsBtn"
+                                title="Download the filtered file list as a 1-column CSV of full paths. Feed it to an AI to classify into <party>/<direction>/<transmittal>/<file>.ext, then bring the 2-column result back via “Import paths” above the target list.">⬇ Export paths</button>
                     </div>
                     <input type="search" id="treeFilterInput" class="tree-filter" spellcheck="false"
                            placeholder="Filter files… (e.g. master deliverables list)" aria-label="Filter files">
@@ -105,9 +107,8 @@
                         <div class="pane-header-right">
                             <span id="classifyStats" class="file-stats"></span>
                             <span class="header-divider">|</span>
-                            <button id="exportDatasetBtn" class="btn btn-secondary btn-sm" title="Download the classifications as a filename-per-file JSON to edit (e.g. with an AI), then re-import here. NOT a workspace — no scanned tree.">Export for editing</button>
-                            <button id="importDatasetBtn" class="btn btn-secondary btn-sm" title="Load an edited classification JSON back in — replaces the current classifications. (To move a whole scanned workspace between browsers, use “Import workspace” on the welcome screen.)">Import edits</button>
-                            <input type="file" id="importDatasetInput" accept="application/json,.json" hidden>
+                            <button id="importPathsBtn" class="btn btn-secondary btn-sm" title="Import a 2-column CSV (old path, new path). Each new path “<party>/<direction>/<transmittal>/<file>.ext” sets that file’s tracking number (rename) and routes it into a transmittal. Only files named in the CSV are touched — others keep their current classification. Export the source list first via “Export paths” on the left.">Import paths…</button>
+                            <input type="file" id="importPathsInput" accept=".csv,text/csv,text/plain" hidden>
                             <button id="resetDatasetBtn" class="btn btn-sm btn-danger" title="Discard all classifications and start over from the raw scanned input (does not touch your files)">Reset</button>
                         </div>
                     </div>
diff --git a/tests/classify.spec.js b/tests/classify.spec.js
index c4ae3ad..d47361b 100644
--- a/tests/classify.spec.js
+++ b/tests/classify.spec.js
@@ -700,7 +700,7 @@ test('dataset (filename-based): import reconstruction rebuilds tracking + shared
         const c = window.app.modules.classify;
         const z = window.zddc;
         c.reset();
-        // Mirrors app.importDataset's per-record reconstruction: two docs sharing
+        // Mirrors app.importPaths's per-row reconstruction: two docs sharing
         // one transmittal package, plus an excluded junk file.
         const recs = [
             { source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false,
@@ -1680,6 +1680,63 @@ test('export: filtered file list → TSV (path + file), includes collapsed folde
     expect(r.filtered).toContain('Elec/valve spec.pdf\tvalve spec.pdf');
 });
 
+test('import paths: CSV old→new drives rename + transmittal route (merge, errors reported)', async ({ page }) => {
+    await page.evaluate(() => window.app.modules.app.setMode());
+    await page.evaluate(() => {
+        const c = window.app.modules.classify;
+        c.reset();
+        const f1 = { originalFilename: 'IMG_001', extension: 'pdf', folderPath: 'Job/Inbox' };
+        const f2 = { originalFilename: 'IMG_002', extension: 'pdf', folderPath: 'Job/Inbox' };
+        const f3 = { originalFilename: 'keep me', extension: 'pdf', folderPath: 'Job/Inbox' };
+        window.app.folderTree = [{ name: 'Job', path: 'Job', files: [], children: [
+            { name: 'Inbox', path: 'Job/Inbox', files: [f1, f2, f3], children: [] }] }];
+        // A file NOT named in the CSV must keep its prior classification (merge).
+        c.place([c.srcKeyForFile(f3)], c.addTrackingPath(null, c.parseFolderLevels('ZZZ-0009_B (IFR)')), 'tracking');
+        // Capture the summary toast so the test can await the async FileReader.
+        window.__toast = null;
+        window.zddc.toast = (msg, level) => { window.__toast = { msg, level }; };
+    });
+    // Row 1: full route + rename. Row 2: filename only (rename, no route → no error).
+    // Row 3: old path absent from the scan → error. Title with a comma exercises CSV quoting.
+    const csv = [
+        'old path,new path',
+        'Inbox/IMG_001.pdf,"Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg/CPO-0001_0 (IFU) - Doc A, rev one.pdf"',
+        'Inbox/IMG_002.pdf,CPO-0002_A (IFR) - Doc B.pdf',
+        'ghost/missing.pdf,whatever/x.pdf',
+    ].join('\n');
+    await page.setInputFiles('#importPathsInput', { name: 'map.csv', mimeType: 'text/csv', buffer: Buffer.from(csv) });
+    await page.waitForFunction(() => window.__toast !== null);
+    const r = await page.evaluate(() => {
+        const c = window.app.modules.classify;
+        const d1 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_001', extension: 'pdf' });
+        const d2 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_002', extension: 'pdf' });
+        const d3 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'keep me', extension: 'pdf' });
+        return {
+            toast: window.__toast,
+            d1: { tracking: d1.tracking, rev: d1.revision, status: d1.status, title: d1.title, outPath: d1.outPath },
+            d2: { tracking: d2.tracking, rev: d2.revision, status: d2.status, title: d2.title, outPath: d2.outPath },
+            d3tracking: d3.tracking, d3rev: d3.revision,
+        };
+    });
+    // Row 1 — both axes: filename → name, path → transmittal output.
+    expect(r.d1.tracking).toBe('CPO-0001');
+    expect(r.d1.rev).toBe('0');
+    expect(r.d1.status).toBe('IFU');
+    expect(r.d1.title).toBe('Doc A, rev one');   // comma survived CSV quoting
+    expect(r.d1.outPath).toBe('Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg');
+    // Row 2 — filename only: renamed, no transmittal, NOT an error.
+    expect(r.d2.tracking).toBe('CPO-0002');
+    expect(r.d2.rev).toBe('A');
+    expect(r.d2.outPath).toBe('');
+    // Merge: the un-listed file keeps its prior placement.
+    expect(r.d3tracking).toBe('ZZZ-0009');
+    expect(r.d3rev).toBe('B');
+    // Two rows imported; the missing-file row is flagged → warning.
+    expect(r.toast.level).toBe('warning');
+    expect(r.toast.msg).toContain('Imported 2 files');
+    expect(r.toast.msg).toContain('1 row had problems');
+});
+
 test('paste rows: a full-path Current name binds that exact file directly', async ({ page }) => {
     await page.evaluate(() => window.app.modules.app.setMode());
     const r = await page.evaluate(() => {