ZDDC/classifier/js/copy.js
ZDDC 1d09abdc8b feat(classifier): workspaces — scan-once, resume from snapshot (phase 6)
The classifier re-scanned the source on every session; on cloud-backed mounts
(OneDrive/Samba) that's minutes of per-op latency. Workspaces fix it: scan a
folder ONCE, snapshot the completed tree, and resume instantly — all
classification runs on the data model; the filesystem is only touched at copy.

- persist.js v2: multi-workspace IndexedDB (tiny 'index' store for the welcome
  list + 'data' store holding the source handle, tree snapshot, and map). DB v2.
- scanner.js: snapshotTree()/loadSnapshot() (compact, handle-less, marked done,
  totals recomputed) + lazy resolveFileHandle/resolveDirHandle from the root.
- workspace.js: welcome manager (new/open/rename/delete), debounced autosave of
  the active workspace, 'Refresh from disk' (re-scan → re-snapshot, path-keyed
  map carries over). New workspace = the one slow full scan; reopen = instant.
- copy.js: resolves snapshot files' handles from the workspace root with a
  one-click read permission re-grant; missing-on-disk files surface as errors.
- app.js: enterAppShell() shared by rename/workspace flows; exposes setMode;
  classify.js decoupled from persistence.
- template/css: welcome workspace list + header 'Workspaces' button.
- tests: snapshot round-trip, persist CRUD + classify-only-preserves-tree,
  copy-from-snapshot via mock root handle (28 classify/classifier tests green).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 15:07:40 -05:00

199 lines
8.1 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* ZDDC Classifier — copy-out (Classify & Copy mode).
*
* Copies the fully-classified source files into a SEPARATE output directory
* under their canonical ZDDC names and folder layout
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
* The source is never modified — every operation is a read (getFile) on the
* source and a write into the chosen output handle.
*
* Duplicate detection:
* - two sources → the same output path = mapping conflict (skipped + reported)
* - target already exists, identical bytes (sha256) = skipped
* - target exists, different bytes = left untouched + reported (no clobber)
*
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
* createWritable), so it works against a real handle today and a server-backed
* output handle later without changing this logic.
*/
(function () {
'use strict';
var outputHandle = null; // remembered for the session
function C() { return window.app.modules.classify; }
function collectFiles() {
var out = [];
(function walk(nodes) {
(nodes || []).forEach(function (n) {
(n.files || []).forEach(function (f) { out.push(f); });
walk(n.children);
});
})(window.app.folderTree || []);
return out;
}
// Files that are ready to copy: complete target, not excluded.
function plan() {
var c = C(), items = [];
collectFiles().forEach(function (f) {
var d = c.deriveTarget(f);
if (d.excluded || !d.complete) return;
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
});
return items;
}
// Group by output path; >1 source for a path = a mapping conflict.
function conflictsIn(items) {
var by = {}, conflicts = [];
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
return { by: by, conflicts: conflicts };
}
function toast(msg, level) {
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
}
function setStatus(text) {
var el = document.getElementById('scanStatus');
if (!el) return;
el.textContent = text;
el.classList.toggle('scanning', !!text);
}
async function chooseOutput() {
if (!window.showDirectoryPicker) {
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
return null;
}
try {
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
outputHandle = h;
C().setOutputName(h.name);
return h;
} catch (e) {
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
return null;
}
}
async function ensureDir(root, relPath) {
var parts = relPath.split('/').filter(Boolean);
var cur = root;
for (var i = 0; i < parts.length; i++) {
cur = await cur.getDirectoryHandle(parts[i], { create: true });
}
return cur;
}
async function sameContent(existingHandle, srcFileObj) {
var ef = await existingHandle.getFile();
var sf = await (await srcHandle(srcFileObj)).getFile();
if (ef.size !== sf.size) return false;
var a = await window.zddc.crypto.sha256File(ef);
var b = await window.zddc.crypto.sha256File(sf);
return a === b;
}
// Resolve a source file's live handle. Fresh-scan files already carry one;
// snapshot-loaded files resolve lazily from the workspace root by path.
async function srcHandle(fileObj) {
if (fileObj.handle) return fileObj.handle;
if (!window.app.rootHandle) throw new Error('source directory not connected');
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
}
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
async function copyOne(out, p) {
var dir = await ensureDir(out, p.d.outPath);
var existing = null;
try { existing = await dir.getFileHandle(p.d.filename); } catch (e) { /* NotFound → fresh copy */ }
if (existing) {
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
}
var srcFile = await (await srcHandle(p.file)).getFile(); // READ source (never write it)
var fh = await dir.getFileHandle(p.d.filename, { create: true });
var w = await fh.createWritable();
await w.write(srcFile);
await w.close();
return 'copied';
}
async function run() {
if (!C().isEnabled()) return;
var items = plan();
if (!items.length) {
toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning');
return;
}
var cf = conflictsIn(items);
var blocked = {};
cf.conflicts.forEach(function (path) { blocked[path] = true; });
var todo = items.filter(function (p) { return !blocked[p.outRel]; });
if (cf.conflicts.length) {
toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n'
+ cf.conflicts.join('\n'), 'error');
}
if (!todo.length) return;
// Snapshot-loaded files have no live handle — re-grant read on the
// workspace source directory (one click) before copying.
if (todo.some(function (p) { return !p.file.handle; })) {
if (!window.app.rootHandle) {
toast('The source directory isnt connected. Re-open the workspace to reconnect it.', 'error');
return;
}
var srcOk = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; }
}
var out = outputHandle || await chooseOutput();
if (!out) return;
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\nThe source directory is not modified.')) return;
var s = await copyTo(out, todo);
var msg = 'Copy complete — ' + s.copied + ' copied, ' + s.skipped + ' identical skipped'
+ (s.differ ? (', ' + s.differ + ' already exist with different content (left untouched)') : '')
+ (s.errors ? (', ' + s.errors + ' errors') : '') + '.';
toast(msg, (s.errors || s.differ) ? 'warning' : 'success');
if (s.differing.length) toast('Existing-but-different (not overwritten):\n' + s.differing.join('\n'), 'warning');
return s;
}
// Run the copy loop over a ready list against an output handle. No picker,
// no confirm — that's run()'s job; this is the engine (and the test seam).
async function copyTo(out, todo) {
var s = { copied: 0, skipped: 0, differ: 0, errors: 0, differing: [] };
for (var i = 0; i < todo.length; i++) {
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
try {
var r = await copyOne(out, todo[i]);
s[r]++;
if (r === 'differ') s.differing.push(todo[i].outRel);
} catch (e) {
s.errors++;
if (window.zddc && window.zddc.toast) {
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
}
}
}
setStatus('');
return s;
}
function readyCount() { return plan().length; }
window.app.modules.copy = {
run: run,
readyCount: readyCount,
chooseOutput: chooseOutput,
// test/advanced seams
plan: plan,
conflictsIn: conflictsIn,
copyTo: copyTo,
};
})();