feat(classifier): copy-out with duplicate detection + map restore (phase 5)

The Copy button (enabled once >=1 file is fully classified) copies the mapped
files into a user-chosen output directory under their canonical names/layout
<party>/{received,issued}/<transmittal>/<filename> — reading the source, never
writing it.

- copy.js: plan() (complete, non-excluded files) → conflict scan (two sources
  → same output path are reported + skipped) → copyTo() engine on the generic
  FS-Access shape (ensureDir + getFileHandle + createWritable). Per-file dedup:
  identical target (sha256) is skipped; existing-but-different is left
  untouched and reported; live footer progress; completion toast.
- app.js: restores the saved map on launch (keyed by source-relative path, so
  it re-attaches when the same directory is re-opened) and persists the source
  handle on open; Copy button wired.
- target-tree.js: enables/labels the Copy button from the done count.
- 2 copy-engine tests with mock FS handles (copy/skip/differ + conflict);
  24 classify+classifier tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-09 12:37:44 -05:00
parent eb1e3ec948
commit 420f735e89
5 changed files with 285 additions and 5 deletions

View file

@ -58,6 +58,7 @@ concat_files \
"js/scanner.js" \ "js/scanner.js" \
"js/tree.js" \ "js/tree.js" \
"js/target-tree.js" \ "js/target-tree.js" \
"js/copy.js" \
"js/spreadsheet.js" \ "js/spreadsheet.js" \
"js/selection.js" \ "js/selection.js" \
"js/preview.js" \ "js/preview.js" \

View file

@ -33,6 +33,23 @@
cacheDOMElements(); cacheDOMElements();
setupEventListeners(); setupEventListeners();
// Restore a saved Classify & Copy map (placements + target trees). It
// keys on source-relative paths, so it re-attaches once the SAME source
// directory is opened again — the source handle itself can't be opened
// without a user gesture, so we remind the user to re-pick it.
if (app.modules.persist && app.modules.persist.available) {
app.modules.persist.loadState().then(function (s) {
if (!s) return;
var has = Object.keys(s.assignments || {}).length
|| (s.trackingTree || []).length || (s.transmittalTree || []).length;
if (!has) return;
app.modules.classify.load(s);
if (window.zddc && window.zddc.toast) {
window.zddc.toast('Restored your Classify & Copy map from this browser. Open the SAME source directory and switch to “Classify & Copy” to continue.', 'info', { durationMs: 9000 });
}
});
}
// Browser-compatibility branch: // Browser-compatibility branch:
// HTTP mode (served by zddc-server) — works everywhere; the // HTTP mode (served by zddc-server) — works everywhere; the
// HTTP polyfill stands in for the FS Access API. Auto-load // HTTP polyfill stands in for the FS Access API. Auto-load
@ -164,7 +181,8 @@
modeRenameBtn: document.getElementById('modeRenameBtn'), modeRenameBtn: document.getElementById('modeRenameBtn'),
modeClassifyBtn: document.getElementById('modeClassifyBtn'), modeClassifyBtn: document.getElementById('modeClassifyBtn'),
spreadsheetPane: document.getElementById('spreadsheetPane'), spreadsheetPane: document.getElementById('spreadsheetPane'),
targetPane: document.getElementById('targetPane') targetPane: document.getElementById('targetPane'),
copyOutputBtn: document.getElementById('copyOutputBtn')
}; };
} }
@ -218,6 +236,7 @@
// Workflow mode switch // Workflow mode switch
if (app.dom.modeRenameBtn) app.dom.modeRenameBtn.addEventListener('click', function () { setMode('rename'); }); if (app.dom.modeRenameBtn) app.dom.modeRenameBtn.addEventListener('click', function () { setMode('rename'); });
if (app.dom.modeClassifyBtn) app.dom.modeClassifyBtn.addEventListener('click', function () { setMode('classify'); }); if (app.dom.modeClassifyBtn) app.dom.modeClassifyBtn.addEventListener('click', function () { setMode('classify'); });
if (app.dom.copyOutputBtn) app.dom.copyOutputBtn.addEventListener('click', function () { app.modules.copy.run(); });
// Keyboard shortcuts // Keyboard shortcuts
document.addEventListener('keydown', handleKeyDown); document.addEventListener('keydown', handleKeyDown);
@ -351,6 +370,9 @@
*/ */
async function openDirectory(dirHandle) { async function openDirectory(dirHandle) {
app.rootHandle = dirHandle; app.rootHandle = dirHandle;
// Remember the source handle so a later session can re-grant access in
// one click (the map re-attaches by relative path either way).
if (app.modules.persist) app.modules.persist.saveSourceHandle(dirHandle);
// Hide welcome screen and show main UI // Hide welcome screen and show main UI
hideWelcomeScreen(); hideWelcomeScreen();

180
classifier/js/copy.js Normal file
View file

@ -0,0 +1,180 @@
/**
* ZDDC Classifier copy-out (Classify & Copy mode).
*
* Copies the fully-classified source files into a SEPARATE output directory
* under their canonical ZDDC names and folder layout
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
* The source is never modified every operation is a read (getFile) on the
* source and a write into the chosen output handle.
*
* Duplicate detection:
* - two sources the same output path = mapping conflict (skipped + reported)
* - target already exists, identical bytes (sha256) = skipped
* - target exists, different bytes = left untouched + reported (no clobber)
*
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
* createWritable), so it works against a real handle today and a server-backed
* output handle later without changing this logic.
*/
(function () {
'use strict';
var outputHandle = null; // remembered for the session
function C() { return window.app.modules.classify; }
function collectFiles() {
var out = [];
(function walk(nodes) {
(nodes || []).forEach(function (n) {
(n.files || []).forEach(function (f) { out.push(f); });
walk(n.children);
});
})(window.app.folderTree || []);
return out;
}
// Files that are ready to copy: complete target, not excluded.
function plan() {
var c = C(), items = [];
collectFiles().forEach(function (f) {
var d = c.deriveTarget(f);
if (d.excluded || !d.complete) return;
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
});
return items;
}
// Group by output path; >1 source for a path = a mapping conflict.
function conflictsIn(items) {
var by = {}, conflicts = [];
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
return { by: by, conflicts: conflicts };
}
function toast(msg, level) {
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
}
function setStatus(text) {
var el = document.getElementById('scanStatus');
if (!el) return;
el.textContent = text;
el.classList.toggle('scanning', !!text);
}
async function chooseOutput() {
if (!window.showDirectoryPicker) {
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
return null;
}
try {
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
outputHandle = h;
C().setOutputName(h.name);
return h;
} catch (e) {
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
return null;
}
}
async function ensureDir(root, relPath) {
var parts = relPath.split('/').filter(Boolean);
var cur = root;
for (var i = 0; i < parts.length; i++) {
cur = await cur.getDirectoryHandle(parts[i], { create: true });
}
return cur;
}
async function sameContent(existingHandle, srcFileObj) {
var ef = await existingHandle.getFile();
var sf = await srcFileObj.handle.getFile();
if (ef.size !== sf.size) return false;
var a = await window.zddc.crypto.sha256File(ef);
var b = await window.zddc.crypto.sha256File(sf);
return a === b;
}
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
async function copyOne(out, p) {
var dir = await ensureDir(out, p.d.outPath);
var existing = null;
try { existing = await dir.getFileHandle(p.d.filename); } catch (e) { /* NotFound → fresh copy */ }
if (existing) {
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
}
var srcFile = await p.file.handle.getFile(); // READ source (never write it)
var fh = await dir.getFileHandle(p.d.filename, { create: true });
var w = await fh.createWritable();
await w.write(srcFile);
await w.close();
return 'copied';
}
async function run() {
if (!C().isEnabled()) return;
var items = plan();
if (!items.length) {
toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning');
return;
}
var cf = conflictsIn(items);
var blocked = {};
cf.conflicts.forEach(function (path) { blocked[path] = true; });
var todo = items.filter(function (p) { return !blocked[p.outRel]; });
if (cf.conflicts.length) {
toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n'
+ cf.conflicts.join('\n'), 'error');
}
if (!todo.length) return;
var out = outputHandle || await chooseOutput();
if (!out) return;
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\nThe source directory is not modified.')) return;
var s = await copyTo(out, todo);
var msg = 'Copy complete — ' + s.copied + ' copied, ' + s.skipped + ' identical skipped'
+ (s.differ ? (', ' + s.differ + ' already exist with different content (left untouched)') : '')
+ (s.errors ? (', ' + s.errors + ' errors') : '') + '.';
toast(msg, (s.errors || s.differ) ? 'warning' : 'success');
if (s.differing.length) toast('Existing-but-different (not overwritten):\n' + s.differing.join('\n'), 'warning');
return s;
}
// Run the copy loop over a ready list against an output handle. No picker,
// no confirm — that's run()'s job; this is the engine (and the test seam).
async function copyTo(out, todo) {
var s = { copied: 0, skipped: 0, differ: 0, errors: 0, differing: [] };
for (var i = 0; i < todo.length; i++) {
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
try {
var r = await copyOne(out, todo[i]);
s[r]++;
if (r === 'differ') s.differing.push(todo[i].outRel);
} catch (e) {
s.errors++;
if (window.zddc && window.zddc.toast) {
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
}
}
}
setStatus('');
return s;
}
function readyCount() { return plan().length; }
window.app.modules.copy = {
run: run,
readyCount: readyCount,
chooseOutput: chooseOutput,
// test/advanced seams
plan: plan,
conflictsIn: conflictsIn,
copyTo: copyTo,
};
})();

View file

@ -103,11 +103,17 @@
} }
function renderStats(files) { function renderStats(files) {
if (!els.stats) return;
var s = C().stats(files); var s = C().stats(files);
if (els.stats) {
els.stats.textContent = s.done + ' done · ' + s.partial + ' in progress · ' els.stats.textContent = s.done + ' done · ' + s.partial + ' in progress · '
+ s.none + ' unassigned · ' + s.excluded + ' excluded'; + s.none + ' unassigned · ' + s.excluded + ' excluded';
} }
var copyBtn = document.getElementById('copyOutputBtn');
if (copyBtn) {
copyBtn.disabled = s.done === 0;
copyBtn.textContent = s.done ? ('Copy ' + s.done + '…') : 'Copy…';
}
}
function el(tag, cls, text) { function el(tag, cls, text) {
var e = document.createElement(tag); var e = document.createElement(tag);

View file

@ -286,6 +286,77 @@ test('cross-tree reveal: source→target switches to the placed axis', async ({
expect(ok).toBe(true); expect(ok).toBe(true);
}); });
// ── Phase 5: copy-out engine + duplicate detection (mock FS handles) ───────
test('copy: writes the file, skips an identical re-copy, flags a differing target', async ({ page }) => {
await page.click('#modeClassifyBtn');
const res = await page.evaluate(async () => {
const c = window.app.modules.classify, copy = window.app.modules.copy;
const store = {};
const fileHandleFor = (full) => ({
getFile: async () => new File([store[full] != null ? store[full] : ''], full.split('/').pop()),
createWritable: async () => ({ write: async (d) => { store[full] = (d && d.text) ? await d.text() : d; }, close: async () => { } }),
});
const mockDir = (prefix) => ({
name: prefix || 'out',
getDirectoryHandle: async (name) => mockDir((prefix ? prefix + '/' : '') + name),
getFileHandle: async (name, opts) => {
const full = (prefix ? prefix + '/' : '') + name;
if (!opts || !opts.create) { if (!(full in store)) { const e = new Error('NF'); e.name = 'NotFoundError'; throw e; } }
return fileHandleFor(full);
},
});
const srcFile = (name, content) => {
const p = name.split('.'); const ext = p.length > 1 ? p.pop() : ''; const stem = p.join('.');
return { originalFilename: stem, extension: ext, folderPath: 'Root', handle: { getFile: async () => new File([content], name) } };
};
const f = srcFile('foundation.pdf', 'AAA');
window.app.folderTree = [{ name: 'Root', path: 'Root', expanded: true, scanState: 'done', files: [f], children: [], runFiles: 1 }];
const leaf = c.addTrackingNode(c.addTrackingNode(null, 'ACME-MECH-0001'), 'A (IFR)');
const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' });
const key = c.srcKeyForFile(f);
c.place([key], leaf, 'tracking'); c.place([key], bin, 'transmittal');
const out = mockDir('');
const first = await copy.copyTo(out, copy.plan());
const second = await copy.copyTo(out, copy.plan()); // identical → skipped
const tkey = Object.keys(store)[0];
store[tkey] = 'DIFFERENT'; // tamper target
const third = await copy.copyTo(out, copy.plan()); // differs → left alone
return { firstCopied: first.copied, secondSkipped: second.skipped, thirdDiffer: third.differ, keys: Object.keys(store) };
});
expect(res.firstCopied).toBe(1);
expect(res.secondSkipped).toBe(1);
expect(res.thirdDiffer).toBe(1);
expect(res.keys.some((k) => k.endsWith('ClientCorp/received/2026-03-14_ClientCorp-TRN-0007 (---) - Transmittal/ACME-MECH-0001_A (IFR) - foundation.pdf'))).toBe(true);
});
test('copy: two sources mapping to the same output path are a conflict', async ({ page }) => {
await page.click('#modeClassifyBtn');
const conflicts = await page.evaluate(() => {
const c = window.app.modules.classify, copy = window.app.modules.copy;
const srcFile = (name, folder) => {
const p = name.split('.'); const ext = p.length > 1 ? p.pop() : ''; const stem = p.join('.');
return { originalFilename: stem, extension: ext, folderPath: folder, handle: { getFile: async () => new File(['x'], name) } };
};
const f1 = srcFile('plan.pdf', 'Root/a');
const f2 = srcFile('plan.pdf', 'Root/b'); // same name, different folder → same derived output
window.app.folderTree = [{
name: 'Root', path: 'Root', expanded: true, scanState: 'done', files: [],
children: [
{ name: 'a', path: 'Root/a', files: [f1], children: [] },
{ name: 'b', path: 'Root/b', files: [f2], children: [] },
],
}];
const leaf = c.addTrackingNode(c.addTrackingNode(null, 'ACME-MECH-0001'), 'A (IFR)');
const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' });
c.place([c.srcKeyForFile(f1)], leaf, 'tracking'); c.place([c.srcKeyForFile(f1)], bin, 'transmittal');
c.place([c.srcKeyForFile(f2)], leaf, 'tracking'); c.place([c.srcKeyForFile(f2)], bin, 'transmittal');
return copy.conflictsIn(copy.plan()).conflicts.length;
});
expect(conflicts).toBe(1);
});
test('deleting a tracking node clears the files placed in it', async ({ page }) => { test('deleting a tracking node clears the files placed in it', async ({ page }) => {
const after = await page.evaluate((file) => { const after = await page.evaluate((file) => {
const c = window.app.modules.classify; const c = window.app.modules.classify;