feat(classifier): CSV path round-trip — export filtered paths, import old→new mapping

Add an AI-friendly classification round-trip alongside the By-tracking grid: - "⬇ Export paths" (filetree header): downloads the filtered file list as a 1-column CSV of full (root-relative) paths — the same keys the importer matches on. Hand it to an LLM to classify into <party>/<direction>/<transmittal>/<file>.ext. - "Import paths…" (above the target list): loads a 2-column CSV (old path, new path). Each new path drives both axes — the trailing filename sets the tracking number (rename, via parseFilename → tracking tree) and the leading <party>/<direction>/<transmittal> segments route a transmittal (via parseFolder → transmittal tree). MERGE semantics: only files named in the CSV are touched; others keep their classification. - Per-row problems (unknown old path, unparseable filename/transmittal, bad direction) are collected and offered as a downloadable errors CSV, with a summary toast — scales to thousands of rows. Either axis can apply independently, so a filename-only new path is a rename with no error. This replaces the JSON "Export for editing" / "Import edits" pair (the CSV path form is fully expressive for this model and simpler to round-trip); the TSV "Export list" clipboard→Excel button is kept. Buttons can grow into a modal later if more options are needed. Includes a Playwright test driving the real file-input import (rename+route, filename-only, merge-preserves-unlisted, CSV-quoted comma in title, error row). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 16:38:15 -05:00 · 2026-06-17 16:38:15 -05:00 · ce6efb0201
commit ce6efb0201
parent 8ee7f1c460
4 changed files with 200 additions and 87 deletions
--- a/classifier/js/app.js
+++ b/classifier/js/app.js
@ -150,9 +150,9 @@
            showExcludedCheckbox: document.getElementById('showExcludedCheckbox'),
            showEmptyCheckbox: document.getElementById('showEmptyCheckbox'),
            exportListBtn: document.getElementById('exportListBtn'),
-            exportDatasetBtn: document.getElementById('exportDatasetBtn'),
+            exportPathsBtn: document.getElementById('exportPathsBtn'),
-            importDatasetBtn: document.getElementById('importDatasetBtn'),
+            importPathsBtn: document.getElementById('importPathsBtn'),
-            importDatasetInput: document.getElementById('importDatasetInput'),
+            importPathsInput: document.getElementById('importPathsInput'),
            resetDatasetBtn: document.getElementById('resetDatasetBtn'),
            treeFilterInput: document.getElementById('treeFilterInput'),
            trackingFilterInput: document.getElementById('trackingFilterInput'),
@ -219,89 +219,120 @@
            (nodes || []).forEach(function (n) { (n.files || []).forEach(cb); walk(n.children); });
        })(app.folderTree || []);
    }
-    function exportDataset() {
+    // CSV cell quoting (RFC4180): quote when the value holds a comma, quote, or
-        var c = app.modules.classify, files = [];
+    // newline; embedded quotes are doubled.
-        eachSourceFile(function (f) {
+    function csvCell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
-            var key = c.srcKeyForFile(f);
+    // Minimal RFC4180-ish CSV parser → array of rows of string cells. Handles
-            var a = c.getAssignment(key) || {};
+    // quoted fields with embedded commas/quotes/newlines (titles may contain
-            var d = c.deriveTarget(f);
+    // commas). CRLF/CR are normalized to LF.
-            var rec = {
+    function parseCsv(text) {
-                source: key,
+        var rows = [], row = [], field = '', inQ = false, i = 0;
-                originalName: window.zddc.joinExtension(f.originalFilename, f.extension),
+        text = String(text == null ? '' : text).replace(/\r\n?/g, '\n');
-                filename: a.excluded ? '' : (d.filename || ''),
+        for (; i < text.length; i++) {
-                excluded: !!a.excluded,
+            var ch = text[i];
-            };
+            if (inQ) {
-            if (!a.excluded && a.transmittalNodeId) {
+                if (ch === '"') { if (text[i + 1] === '"') { field += '"'; i++; } else { inQ = false; } }
-                var t = c.transmittalRecord(a.transmittalNodeId);
+                else { field += ch; }
-                if (t) rec.transmittal = t;
+            } else if (ch === '"') { inQ = true; }
-            }
+            else if (ch === ',') { row.push(field); field = ''; }
-            files.push(rec);
+            else if (ch === '\n') { row.push(field); rows.push(row); row = []; field = ''; }
-        });
+            else { field += ch; }
-        var payload = {
+        }
-            zddcClassifierFiles: 1,
+        if (field !== '' || row.length) { row.push(field); rows.push(row); }
-            exportedAt: new Date().toISOString(),
+        return rows;
            _format: 'One record per input file. Set "filename" to its full ZDDC name '
                + '"TRACKING_REV (STATUS) - Title.ext" — on import the app splits TRACKING on "-" and the '
                + 'final "_" into nested folders, and files in shared paths share ancestors. Set '
                + '"excluded": true for non-documents (filename then ignored). "transmittal" is optional: '
                + '{party, slot:"received"|"issued", date:"YYYY-MM-DD", type:"TRN"|"SUB", seq, status, title}. '
                + 'Classify every "source" key; do not invent files.',
            outputName: c.serialize().outputName || null,
            files: files,
        };
        var name = 'classifier-dataset';
        try {
            if (app.modules.workspace && typeof app.modules.workspace.activeName === 'function') {
                name = app.modules.workspace.activeName() || name;
            }
        } catch (_) { /* ok */ }
        var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' });
        var url = URL.createObjectURL(blob);
        var a = document.createElement('a');
        a.href = url;
        a.download = String(name).replace(/[^\w.-]+/g, '_') + '.zddc-classification.json';
        document.body.appendChild(a); a.click(); a.remove();
        URL.revokeObjectURL(url);
    }
-    function importDataset(file) {
+    // Trigger a client-side download of `text` as `name`.
    function downloadText(text, name, mime) {
        var blob = new Blob([text], { type: mime || 'text/plain' });
        var url = URL.createObjectURL(blob);
        var a = document.createElement('a'); a.href = url; a.download = name;
        document.body.appendChild(a); a.click(); a.remove();
        setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
    }
    // Import a 2-column CSV (old path, new path) — e.g. an AI-classified list.
    // MERGE semantics: only files named in the CSV are touched; others keep their
    // current classification. Each new path
    // "<party>/<direction>/<transmittal>/<file>.ext" drives two axes — the
    // filename sets the tracking number (rename) and the leading segments route a
    // transmittal. Either axis can apply independently; per-row problems are
    // collected and offered as a downloadable errors CSV (the list can be huge).
    function importPaths(file) {
        var reader = new FileReader();
        reader.onload = function () {
-            var obj;
+            var rows = parseCsv(reader.result);
-            try { obj = JSON.parse(reader.result); }
+            if (!rows.length) { window.zddc.toast('Import failed — the CSV is empty.', 'error'); return; }
            catch (e) { window.zddc.toast('Import failed — not valid JSON.', 'error'); return; }
            if (!obj || !Array.isArray(obj.files)) {
                window.zddc.toast('Import failed — expected a classifier dataset with a "files" list.', 'error'); return;
            }
            var c = app.modules.classify;
-            var hasData = c.getTrackingTree().length || c.getTransmittalTree().length
+            // Old path must resolve to a real scanned file (srcKey set).
-                || Object.keys(c.serialize().assignments || {}).length;
+            var valid = Object.create(null);
-            if (hasData && !confirm('Replace the current classification with the imported dataset?')) return;
+            eachSourceFile(function (f) { valid[c.srcKeyForFile(f)] = true; });
-            c.reset();
+
-            var ok = 0, bad = 0;
+            var imported = 0, errors = [];
-            obj.files.forEach(function (rec) {
+            rows.forEach(function (cells, idx) {
-                if (!rec || !rec.source) return;
+                var oldPath = (cells[0] || '').trim();
-                var key = rec.source;
+                var newPath = (cells[1] || '').trim();
-                if (rec.excluded) { c.setExcluded([key], true); ok++; return; }
+                // Tolerate a header row (first row whose first cell isn't a file).
-                if (rec.filename) {
+                if (idx === 0 && !valid[oldPath] && /^(old|path|source|from)\b/i.test(oldPath)) return;
-                    var p = window.zddc.parseFilename(String(rec.filename).trim());
+                if (!oldPath && !newPath) return;                 // blank line
-                    if (p && p.valid) {
+                if (!oldPath) { errors.push([oldPath, newPath, 'missing old path']); return; }
-                        var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
+                if (!valid[oldPath]) { errors.push([oldPath, newPath, 'no such file in the current scan']); return; }
-                        c.place([key], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
+                if (!newPath) { errors.push([oldPath, newPath, 'missing new path']); return; }
-                        if (p.title != null) c.setTitleOverride(key, p.title);
+
-                        ok++;
+                var segs = newPath.split('/').filter(function (s) { return s !== ''; });
-                    } else { bad++; }
+                if (!segs.length) { errors.push([oldPath, newPath, 'empty new path']); return; }
                var filename = segs[segs.length - 1];
                var leading = segs.slice(0, -1);
                var didTracking = false, didTransmittal = false, rowErr = '';
                function note(m) { rowErr = rowErr ? rowErr + '; ' + m : m; }
                // Axis 1 — filename → tracking tree (the rename).
                var p = window.zddc.parseFilename(filename);
                if (p && p.valid) {
                    var stem = p.trackingNumber + '_' + p.revision + ' (' + p.status + ')';
                    c.place([oldPath], c.addTrackingPath(null, c.parseFolderLevels(stem)), 'tracking');
                    if (p.title != null) c.setTitleOverride(oldPath, p.title);
                    didTracking = true;
                } else {
                    note('filename is not a valid ZDDC name "' + filename + '"');
                }
-                if (rec.transmittal && rec.transmittal.party) {
+
-                    var t = rec.transmittal;
+                // Axis 2 — <party>/<direction>/<transmittal> → transmittal tree (the route).
-                    var pid = c.findOrAddParty(t.party);
+                if (leading.length >= 3) {
-                    var bid = c.findOrAddTransmittalBin(pid, t.slot || 'received', {
+                    var party = leading[0];
-                        date: t.date, type: t.type || 'TRN', seq: t.seq, status: t.status, title: t.title,
+                    var slot = leading[1].toLowerCase();
-                    });
+                    var folder = leading.slice(2).join('/');
-                    if (bid) c.place([key], bid, 'transmittal');
+                    if (slot !== 'issued' && slot !== 'received') {
                        note('direction must be "issued" or "received", got "' + leading[1] + '"');
                    } else {
                        var pf = window.zddc.parseFolder(folder);
                        if (pf && pf.valid) {
                            var tnParts = pf.trackingNumber.split('-');
                            var seq = tnParts.pop(), type = tnParts.pop();
                            var bid = c.findOrAddTransmittalBin(c.findOrAddParty(party), slot, {
                                date: pf.date, type: type || 'TRN', seq: seq || '', status: pf.status, title: pf.title,
                            });
                            if (bid) { c.place([oldPath], bid, 'transmittal'); didTransmittal = true; }
                            else note('could not create the transmittal folder');
                        } else {
                            note('transmittal folder is not a valid ZDDC folder name "' + folder + '"');
                        }
                    }
                } else if (leading.length >= 1) {
                    note('to route a transmittal the new path needs <party>/<direction>/<transmittal>/<file>');
                }
                if (didTracking || didTransmittal) imported++;
                if (rowErr) errors.push([oldPath, newPath, rowErr]);
            });
-            window.zddc.toast('Imported ' + ok + ' file' + (ok === 1 ? '' : 's')
+
-                + (bad ? (' — ' + bad + ' had an unparseable filename') : '') + '.', bad ? 'warning' : 'success');
+            if (errors.length) {
                var elines = ['old path,new path,reason'];
                errors.forEach(function (e) { elines.push(csvCell(e[0]) + ',' + csvCell(e[1]) + ',' + csvCell(e[2])); });
                downloadText(elines.join('\n'), 'classifier-import-errors.csv', 'text/csv');
            }
            window.zddc.toast('Imported ' + imported + ' file' + (imported === 1 ? '' : 's')
                + (errors.length ? (' — ' + errors.length + ' row' + (errors.length === 1 ? '' : 's')
                    + ' had problems (downloaded classifier-import-errors.csv)') : '') + '.',
                errors.length ? 'warning' : 'success');
        };
        reader.onerror = function () { window.zddc.toast('Import failed — could not read the file.', 'error'); };
        reader.readAsText(file);
@ -381,11 +412,13 @@
        });
        // Dataset export / import (round-trip the classification through a JSON file).
-        if (app.dom.exportDatasetBtn) app.dom.exportDatasetBtn.addEventListener('click', exportDataset);
+        if (app.dom.exportPathsBtn) app.dom.exportPathsBtn.addEventListener('click', function () {
-        if (app.dom.importDatasetBtn) app.dom.importDatasetBtn.addEventListener('click', function () { app.dom.importDatasetInput.click(); });
+            if (app.modules.tree && app.modules.tree.exportPathList) app.modules.tree.exportPathList();
        });
        if (app.dom.importPathsBtn) app.dom.importPathsBtn.addEventListener('click', function () { app.dom.importPathsInput.click(); });
        if (app.dom.resetDatasetBtn) app.dom.resetDatasetBtn.addEventListener('click', resetDataset);
-        if (app.dom.importDatasetInput) app.dom.importDatasetInput.addEventListener('change', function () {
+        if (app.dom.importPathsInput) app.dom.importPathsInput.addEventListener('change', function () {
-            if (this.files && this.files[0]) importDataset(this.files[0]);
+            if (this.files && this.files[0]) importPaths(this.files[0]);
            this.value = '';   // allow re-importing the same file
        });
--- a/classifier/js/tree.js
+++ b/classifier/js/tree.js
@ -203,6 +203,27 @@
        if (!built.count) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
        copyOrDownload(built.tsv, built.count);
    }
    // Download the filtered file list as a 1-column CSV of full (root-relative)
    // paths — the same keys “Import paths” matches on. Meant to be handed to an AI
    // that returns a 2-column old→new mapping.
    function exportPathList() {
        var c = window.app.modules.classify;
        var files = filteredFileObjects().slice().sort(function (a, b) {
            return cmpName(c.srcKeyForFile(a), c.srcKeyForFile(b));
        });
        if (!files.length) { window.zddc.toast('No files to export — nothing passes the current filters.', 'info'); return; }
        function cell(s) { s = (s == null ? '' : String(s)); return /[",\n\r]/.test(s) ? '"' + s.replace(/"/g, '""') + '"' : s; }
        var lines = ['path'];
        files.forEach(function (f) { lines.push(cell(c.srcKeyForFile(f))); });
        try {
            var blob = new Blob([lines.join('\n')], { type: 'text/csv' });
            var url = URL.createObjectURL(blob);
            var a = document.createElement('a'); a.href = url; a.download = 'classifier-paths.csv';
            document.body.appendChild(a); a.click(); a.remove();
            setTimeout(function () { URL.revokeObjectURL(url); }, 10000);
            window.zddc.toast('Exported ' + files.length + ' path' + (files.length === 1 ? '' : 's') + ' to classifier-paths.csv.', 'success');
        } catch (e) { window.zddc.toast('Could not export the path list — ' + (e.message || e), 'error'); }
    }
    function copyOrDownload(text, count) {
        function ok() { window.zddc.toast('Copied ' + count + ' file' + (count === 1 ? '' : 's') + ' (path + file) — paste into Excel.', 'success'); }
        function download() {
@ -1042,6 +1063,7 @@
        setShowFilters,
        setNameFilter,
        exportFilteredList,
        exportPathList,
        filteredFiles: filteredFileObjects,
        _buildExportTsv: buildExportTsv
    };
--- a/classifier/template.html
+++ b/classifier/template.html
@ -83,6 +83,8 @@
                        </label>
                        <button class="btn btn-sm export-list-btn" id="exportListBtn"
                                title="Copy the filtered file list (path + file columns, no folders) as TSV — paste into Excel, edit, then paste back via “Paste rows”. Paste a full path into the Current name column to bind that exact file.">⬆ Export list</button>
                        <button class="btn btn-sm export-list-btn" id="exportPathsBtn"
                                title="Download the filtered file list as a 1-column CSV of full paths. Feed it to an AI to classify into <party>/<direction>/<transmittal>/<file>.ext, then bring the 2-column result back via “Import paths” above the target list.">⬇ Export paths</button>
                    </div>
                    <input type="search" id="treeFilterInput" class="tree-filter" spellcheck="false"
                           placeholder="Filter files… (e.g. master deliverables list)" aria-label="Filter files">
@ -105,9 +107,8 @@
                        <div class="pane-header-right">
                            <span id="classifyStats" class="file-stats"></span>
                            <span class="header-divider">|</span>
-                            <button id="exportDatasetBtn" class="btn btn-secondary btn-sm" title="Download the classifications as a filename-per-file JSON to edit (e.g. with an AI), then re-import here. NOT a workspace — no scanned tree.">Export for editing</button>
+                            <button id="importPathsBtn" class="btn btn-secondary btn-sm" title="Import a 2-column CSV (old path, new path). Each new path “<party>/<direction>/<transmittal>/<file>.ext” sets that file’s tracking number (rename) and routes it into a transmittal. Only files named in the CSV are touched — others keep their current classification. Export the source list first via “Export paths” on the left.">Import paths…</button>
-                            <button id="importDatasetBtn" class="btn btn-secondary btn-sm" title="Load an edited classification JSON back in — replaces the current classifications. (To move a whole scanned workspace between browsers, use “Import workspace” on the welcome screen.)">Import edits</button>
+                            <input type="file" id="importPathsInput" accept=".csv,text/csv,text/plain" hidden>
                            <input type="file" id="importDatasetInput" accept="application/json,.json" hidden>
                            <button id="resetDatasetBtn" class="btn btn-sm btn-danger" title="Discard all classifications and start over from the raw scanned input (does not touch your files)">Reset</button>
                        </div>
                    </div>
--- a/tests/classify.spec.js
+++ b/tests/classify.spec.js
@ -700,7 +700,7 @@ test('dataset (filename-based): import reconstruction rebuilds tracking + shared
        const c = window.app.modules.classify;
        const z = window.zddc;
        c.reset();
-        // Mirrors app.importDataset's per-record reconstruction: two docs sharing
+        // Mirrors app.importPaths's per-row reconstruction: two docs sharing
        // one transmittal package, plus an excluded junk file.
        const recs = [
            { source: 'a.pdf', filename: 'CPO-0001_0 (IFU) - Doc A.pdf', excluded: false,
@ -1680,6 +1680,63 @@ test('export: filtered file list → TSV (path + file), includes collapsed folde
    expect(r.filtered).toContain('Elec/valve spec.pdf\tvalve spec.pdf');
 });
 test('import paths: CSV old→new drives rename + transmittal route (merge, errors reported)', async ({ page }) => {
    await page.evaluate(() => window.app.modules.app.setMode());
    await page.evaluate(() => {
        const c = window.app.modules.classify;
        c.reset();
        const f1 = { originalFilename: 'IMG_001', extension: 'pdf', folderPath: 'Job/Inbox' };
        const f2 = { originalFilename: 'IMG_002', extension: 'pdf', folderPath: 'Job/Inbox' };
        const f3 = { originalFilename: 'keep me', extension: 'pdf', folderPath: 'Job/Inbox' };
        window.app.folderTree = [{ name: 'Job', path: 'Job', files: [], children: [
            { name: 'Inbox', path: 'Job/Inbox', files: [f1, f2, f3], children: [] }] }];
        // A file NOT named in the CSV must keep its prior classification (merge).
        c.place([c.srcKeyForFile(f3)], c.addTrackingPath(null, c.parseFolderLevels('ZZZ-0009_B (IFR)')), 'tracking');
        // Capture the summary toast so the test can await the async FileReader.
        window.__toast = null;
        window.zddc.toast = (msg, level) => { window.__toast = { msg, level }; };
    });
    // Row 1: full route + rename. Row 2: filename only (rename, no route → no error).
    // Row 3: old path absent from the scan → error. Title with a comma exercises CSV quoting.
    const csv = [
        'old path,new path',
        'Inbox/IMG_001.pdf,"Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg/CPO-0001_0 (IFU) - Doc A, rev one.pdf"',
        'Inbox/IMG_002.pdf,CPO-0002_A (IFR) - Doc B.pdf',
        'ghost/missing.pdf,whatever/x.pdf',
    ].join('\n');
    await page.setInputFiles('#importPathsInput', { name: 'map.csv', mimeType: 'text/csv', buffer: Buffer.from(csv) });
    await page.waitForFunction(() => window.__toast !== null);
    const r = await page.evaluate(() => {
        const c = window.app.modules.classify;
        const d1 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_001', extension: 'pdf' });
        const d2 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'IMG_002', extension: 'pdf' });
        const d3 = c.deriveTarget({ folderPath: 'Job/Inbox', originalFilename: 'keep me', extension: 'pdf' });
        return {
            toast: window.__toast,
            d1: { tracking: d1.tracking, rev: d1.revision, status: d1.status, title: d1.title, outPath: d1.outPath },
            d2: { tracking: d2.tracking, rev: d2.revision, status: d2.status, title: d2.title, outPath: d2.outPath },
            d3tracking: d3.tracking, d3rev: d3.revision,
        };
    });
    // Row 1 — both axes: filename → name, path → transmittal output.
    expect(r.d1.tracking).toBe('CPO-0001');
    expect(r.d1.rev).toBe('0');
    expect(r.d1.status).toBe('IFU');
    expect(r.d1.title).toBe('Doc A, rev one');   // comma survived CSV quoting
    expect(r.d1.outPath).toBe('Acme/received/2025-10-31_Acme-TRN-0043 (IFC) - Pkg');
    // Row 2 — filename only: renamed, no transmittal, NOT an error.
    expect(r.d2.tracking).toBe('CPO-0002');
    expect(r.d2.rev).toBe('A');
    expect(r.d2.outPath).toBe('');
    // Merge: the un-listed file keeps its prior placement.
    expect(r.d3tracking).toBe('ZZZ-0009');
    expect(r.d3rev).toBe('B');
    // Two rows imported; the missing-file row is flagged → warning.
    expect(r.toast.level).toBe('warning');
    expect(r.toast.msg).toContain('Imported 2 files');
    expect(r.toast.msg).toContain('1 row had problems');
 });
 test('paste rows: a full-path Current name binds that exact file directly', async ({ page }) => {
    await page.evaluate(() => window.app.modules.app.setMode());
    const r = await page.evaluate(() => {