ZDDC/classifier/js/copy.js
ZDDC 203674ee4c feat(classifier): persist & copy files inside .zip archives
Zip members were live-only: expandable while the source was connected, but the
workspace snapshot dropped the archive (.zip became a plain file), so a
classification made inside one vanished on reopen — and copy couldn't extract it
anyway (it tried to walk the archive path as a real directory).

Now zips are first-class:
- snapshotTree/loadSnapshot persist the scanned archive subtree — zip-root +
  virtual folders + members carry isVirtual/zipPath/zipEntryPath, so the tree
  rebuilds on reopen and assignments inside an archive survive. An archive that
  was never opened persists as a lazy 'zip' node that reopens on demand.
- scanner.ensureZipLoaded(rootHandle, zipPath) reloads an archive from the
  workspace root when the in-memory cache is cold (post-restore); scanZipNode
  falls back to it when a restored zip node has no live file object.
- copy.js reads a member via scanner.extractZipMember (Blob from the archive)
  instead of a non-existent file handle; preview.js reloads the archive for a
  restored member before opening it.

This also reconciles export/import with the snapshot: both now keep zip members,
so a round-trip no longer leaves dangling in-archive assignments.

Tests: zip subtree snapshot round-trip; copy extracts a member to the output (45).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 13:27:00 -05:00

209 lines
8.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* ZDDC Classifier — copy-out (Classify & Copy mode).
*
* Copies the fully-classified source files into a SEPARATE output directory
* under their canonical ZDDC names and folder layout
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
* The source is never modified — every operation is a read (getFile) on the
* source and a write into the chosen output handle.
*
* Duplicate detection:
* - two sources → the same output path = mapping conflict (skipped + reported)
* - target already exists, identical bytes (sha256) = skipped
* - target exists, different bytes = left untouched + reported (no clobber)
*
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
* createWritable), so it works against a real handle today and a server-backed
* output handle later without changing this logic.
*/
(function () {
'use strict';
var outputHandle = null; // remembered for the session
function C() { return window.app.modules.classify; }
function collectFiles() {
var out = [];
(function walk(nodes) {
(nodes || []).forEach(function (n) {
(n.files || []).forEach(function (f) { out.push(f); });
walk(n.children);
});
})(window.app.folderTree || []);
return out;
}
// Files that are ready to copy: complete target, not excluded.
function plan() {
var c = C(), items = [];
collectFiles().forEach(function (f) {
var d = c.deriveTarget(f);
if (d.excluded || !d.complete) return;
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
});
return items;
}
// Group by output path; >1 source for a path = a mapping conflict.
function conflictsIn(items) {
var by = {}, conflicts = [];
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
return { by: by, conflicts: conflicts };
}
function toast(msg, level) {
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
}
function setStatus(text) {
var el = document.getElementById('scanStatus');
if (!el) return;
el.textContent = text;
el.classList.toggle('scanning', !!text);
}
async function chooseOutput() {
if (!window.showDirectoryPicker) {
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
return null;
}
try {
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
outputHandle = h;
C().setOutputName(h.name);
return h;
} catch (e) {
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
return null;
}
}
async function ensureDir(root, relPath) {
var parts = relPath.split('/').filter(Boolean);
var cur = root;
for (var i = 0; i < parts.length; i++) {
cur = await cur.getDirectoryHandle(parts[i], { create: true });
}
return cur;
}
async function sameContent(existingHandle, srcFileObj) {
var ef = await existingHandle.getFile();
var sf = await readSource(srcFileObj);
if (ef.size !== sf.size) return false;
var a = await window.zddc.crypto.sha256File(ef);
var b = await window.zddc.crypto.sha256File(sf);
return a === b;
}
// Resolve a source file's live handle. Fresh-scan files already carry one;
// snapshot-loaded files resolve lazily from the workspace root by path.
async function srcHandle(fileObj) {
if (fileObj.handle) return fileObj.handle;
if (!window.app.rootHandle) throw new Error('source directory not connected');
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
}
// Read a source file's bytes (a File or Blob). A zip member is extracted
// from its archive (lazily reloaded from the root); a plain file is read
// through its resolved handle. The source is never written either way.
async function readSource(fileObj) {
if (fileObj.isVirtual) {
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
}
return (await srcHandle(fileObj)).getFile();
}
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
async function copyOne(out, p) {
var dir = await ensureDir(out, p.d.outPath);
var existing = null;
try { existing = await dir.getFileHandle(p.d.filename); } catch (e) { /* NotFound → fresh copy */ }
if (existing) {
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
}
var srcFile = await readSource(p.file); // READ source (never write it)
var fh = await dir.getFileHandle(p.d.filename, { create: true });
var w = await fh.createWritable();
await w.write(srcFile);
await w.close();
return 'copied';
}
async function run() {
if (!C().isEnabled()) return;
var items = plan();
if (!items.length) {
toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning');
return;
}
var cf = conflictsIn(items);
var blocked = {};
cf.conflicts.forEach(function (path) { blocked[path] = true; });
var todo = items.filter(function (p) { return !blocked[p.outRel]; });
if (cf.conflicts.length) {
toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n'
+ cf.conflicts.join('\n'), 'error');
}
if (!todo.length) return;
// Snapshot-loaded files have no live handle — re-grant read on the
// workspace source directory (one click) before copying.
if (todo.some(function (p) { return !p.file.handle; })) {
if (!window.app.rootHandle) {
toast('The source directory isnt connected. Re-open the workspace to reconnect it.', 'error');
return;
}
var srcOk = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; }
}
var out = outputHandle || await chooseOutput();
if (!out) return;
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\nThe source directory is not modified.')) return;
var s = await copyTo(out, todo);
var msg = 'Copy complete — ' + s.copied + ' copied, ' + s.skipped + ' identical skipped'
+ (s.differ ? (', ' + s.differ + ' already exist with different content (left untouched)') : '')
+ (s.errors ? (', ' + s.errors + ' errors') : '') + '.';
toast(msg, (s.errors || s.differ) ? 'warning' : 'success');
if (s.differing.length) toast('Existing-but-different (not overwritten):\n' + s.differing.join('\n'), 'warning');
return s;
}
// Run the copy loop over a ready list against an output handle. No picker,
// no confirm — that's run()'s job; this is the engine (and the test seam).
async function copyTo(out, todo) {
var s = { copied: 0, skipped: 0, differ: 0, errors: 0, differing: [] };
for (var i = 0; i < todo.length; i++) {
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
try {
var r = await copyOne(out, todo[i]);
s[r]++;
if (r === 'differ') s.differing.push(todo[i].outRel);
} catch (e) {
s.errors++;
if (window.zddc && window.zddc.toast) {
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
}
}
}
setStatus('');
return s;
}
function readyCount() { return plan().length; }
window.app.modules.copy = {
run: run,
readyCount: readyCount,
chooseOutput: chooseOutput,
// test/advanced seams
plan: plan,
conflictsIn: conflictsIn,
copyTo: copyTo,
};
})();