Zip members were live-only: expandable while the source was connected, but the workspace snapshot dropped the archive (.zip became a plain file), so a classification made inside one vanished on reopen — and copy couldn't extract it anyway (it tried to walk the archive path as a real directory). Now zips are first-class: - snapshotTree/loadSnapshot persist the scanned archive subtree — zip-root + virtual folders + members carry isVirtual/zipPath/zipEntryPath, so the tree rebuilds on reopen and assignments inside an archive survive. An archive that was never opened persists as a lazy 'zip' node that reopens on demand. - scanner.ensureZipLoaded(rootHandle, zipPath) reloads an archive from the workspace root when the in-memory cache is cold (post-restore); scanZipNode falls back to it when a restored zip node has no live file object. - copy.js reads a member via scanner.extractZipMember (Blob from the archive) instead of a non-existent file handle; preview.js reloads the archive for a restored member before opening it. This also reconciles export/import with the snapshot: both now keep zip members, so a round-trip no longer leaves dangling in-archive assignments. Tests: zip subtree snapshot round-trip; copy extracts a member to the output (45). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
209 lines
8.5 KiB
JavaScript
209 lines
8.5 KiB
JavaScript
/**
|
||
* ZDDC Classifier — copy-out (Classify & Copy mode).
|
||
*
|
||
* Copies the fully-classified source files into a SEPARATE output directory
|
||
* under their canonical ZDDC names and folder layout
|
||
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
|
||
* The source is never modified — every operation is a read (getFile) on the
|
||
* source and a write into the chosen output handle.
|
||
*
|
||
* Duplicate detection:
|
||
* - two sources → the same output path = mapping conflict (skipped + reported)
|
||
* - target already exists, identical bytes (sha256) = skipped
|
||
* - target exists, different bytes = left untouched + reported (no clobber)
|
||
*
|
||
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
|
||
* createWritable), so it works against a real handle today and a server-backed
|
||
* output handle later without changing this logic.
|
||
*/
|
||
(function () {
|
||
'use strict';
|
||
|
||
var outputHandle = null; // remembered for the session
|
||
|
||
function C() { return window.app.modules.classify; }
|
||
|
||
function collectFiles() {
|
||
var out = [];
|
||
(function walk(nodes) {
|
||
(nodes || []).forEach(function (n) {
|
||
(n.files || []).forEach(function (f) { out.push(f); });
|
||
walk(n.children);
|
||
});
|
||
})(window.app.folderTree || []);
|
||
return out;
|
||
}
|
||
|
||
// Files that are ready to copy: complete target, not excluded.
|
||
function plan() {
|
||
var c = C(), items = [];
|
||
collectFiles().forEach(function (f) {
|
||
var d = c.deriveTarget(f);
|
||
if (d.excluded || !d.complete) return;
|
||
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
|
||
});
|
||
return items;
|
||
}
|
||
|
||
// Group by output path; >1 source for a path = a mapping conflict.
|
||
function conflictsIn(items) {
|
||
var by = {}, conflicts = [];
|
||
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
|
||
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
|
||
return { by: by, conflicts: conflicts };
|
||
}
|
||
|
||
function toast(msg, level) {
|
||
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
|
||
}
|
||
function setStatus(text) {
|
||
var el = document.getElementById('scanStatus');
|
||
if (!el) return;
|
||
el.textContent = text;
|
||
el.classList.toggle('scanning', !!text);
|
||
}
|
||
|
||
async function chooseOutput() {
|
||
if (!window.showDirectoryPicker) {
|
||
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
|
||
return null;
|
||
}
|
||
try {
|
||
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
|
||
outputHandle = h;
|
||
C().setOutputName(h.name);
|
||
return h;
|
||
} catch (e) {
|
||
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function ensureDir(root, relPath) {
|
||
var parts = relPath.split('/').filter(Boolean);
|
||
var cur = root;
|
||
for (var i = 0; i < parts.length; i++) {
|
||
cur = await cur.getDirectoryHandle(parts[i], { create: true });
|
||
}
|
||
return cur;
|
||
}
|
||
|
||
async function sameContent(existingHandle, srcFileObj) {
|
||
var ef = await existingHandle.getFile();
|
||
var sf = await readSource(srcFileObj);
|
||
if (ef.size !== sf.size) return false;
|
||
var a = await window.zddc.crypto.sha256File(ef);
|
||
var b = await window.zddc.crypto.sha256File(sf);
|
||
return a === b;
|
||
}
|
||
|
||
// Resolve a source file's live handle. Fresh-scan files already carry one;
|
||
// snapshot-loaded files resolve lazily from the workspace root by path.
|
||
async function srcHandle(fileObj) {
|
||
if (fileObj.handle) return fileObj.handle;
|
||
if (!window.app.rootHandle) throw new Error('source directory not connected');
|
||
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
||
}
|
||
|
||
// Read a source file's bytes (a File or Blob). A zip member is extracted
|
||
// from its archive (lazily reloaded from the root); a plain file is read
|
||
// through its resolved handle. The source is never written either way.
|
||
async function readSource(fileObj) {
|
||
if (fileObj.isVirtual) {
|
||
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
|
||
}
|
||
return (await srcHandle(fileObj)).getFile();
|
||
}
|
||
|
||
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
|
||
async function copyOne(out, p) {
|
||
var dir = await ensureDir(out, p.d.outPath);
|
||
var existing = null;
|
||
try { existing = await dir.getFileHandle(p.d.filename); } catch (e) { /* NotFound → fresh copy */ }
|
||
if (existing) {
|
||
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
|
||
}
|
||
var srcFile = await readSource(p.file); // READ source (never write it)
|
||
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
||
var w = await fh.createWritable();
|
||
await w.write(srcFile);
|
||
await w.close();
|
||
return 'copied';
|
||
}
|
||
|
||
async function run() {
|
||
if (!C().isEnabled()) return;
|
||
var items = plan();
|
||
if (!items.length) {
|
||
toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning');
|
||
return;
|
||
}
|
||
var cf = conflictsIn(items);
|
||
var blocked = {};
|
||
cf.conflicts.forEach(function (path) { blocked[path] = true; });
|
||
var todo = items.filter(function (p) { return !blocked[p.outRel]; });
|
||
|
||
if (cf.conflicts.length) {
|
||
toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n'
|
||
+ cf.conflicts.join('\n'), 'error');
|
||
}
|
||
if (!todo.length) return;
|
||
|
||
// Snapshot-loaded files have no live handle — re-grant read on the
|
||
// workspace source directory (one click) before copying.
|
||
if (todo.some(function (p) { return !p.file.handle; })) {
|
||
if (!window.app.rootHandle) {
|
||
toast('The source directory isn’t connected. Re-open the workspace to reconnect it.', 'error');
|
||
return;
|
||
}
|
||
var srcOk = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||
if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; }
|
||
}
|
||
|
||
var out = outputHandle || await chooseOutput();
|
||
if (!out) return;
|
||
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\nThe source directory is not modified.')) return;
|
||
|
||
var s = await copyTo(out, todo);
|
||
|
||
var msg = 'Copy complete — ' + s.copied + ' copied, ' + s.skipped + ' identical skipped'
|
||
+ (s.differ ? (', ' + s.differ + ' already exist with different content (left untouched)') : '')
|
||
+ (s.errors ? (', ' + s.errors + ' errors') : '') + '.';
|
||
toast(msg, (s.errors || s.differ) ? 'warning' : 'success');
|
||
if (s.differing.length) toast('Existing-but-different (not overwritten):\n' + s.differing.join('\n'), 'warning');
|
||
return s;
|
||
}
|
||
|
||
// Run the copy loop over a ready list against an output handle. No picker,
|
||
// no confirm — that's run()'s job; this is the engine (and the test seam).
|
||
async function copyTo(out, todo) {
|
||
var s = { copied: 0, skipped: 0, differ: 0, errors: 0, differing: [] };
|
||
for (var i = 0; i < todo.length; i++) {
|
||
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
|
||
try {
|
||
var r = await copyOne(out, todo[i]);
|
||
s[r]++;
|
||
if (r === 'differ') s.differing.push(todo[i].outRel);
|
||
} catch (e) {
|
||
s.errors++;
|
||
if (window.zddc && window.zddc.toast) {
|
||
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
|
||
}
|
||
}
|
||
}
|
||
setStatus('');
|
||
return s;
|
||
}
|
||
|
||
function readyCount() { return plan().length; }
|
||
|
||
window.app.modules.copy = {
|
||
run: run,
|
||
readyCount: readyCount,
|
||
chooseOutput: chooseOutput,
|
||
// test/advanced seams
|
||
plan: plan,
|
||
conflictsIn: conflictsIn,
|
||
copyTo: copyTo,
|
||
};
|
||
})();
|