Replaces the zip option. Copy now offers two real destinations, both filing under <party>/<received|issued>/<transmittal>/<name>: - Server archive — PUTs each file into a zddc-server over HTTP via the zddc-source HttpDirectoryHandle (mkdir as needed). Offered only over http(s); prompts for the archive URL (guessed from the current path's archive/ segment). - Local folder — the File System Access picker (choose archive/ to file directly). Both reuse the one copy engine and are efficiently resumable: copyOne probes the target with a cheap stat/HEAD and SKIPS anything already present — no source read, no hashing — so a re-run after an interruption only does what's left. Canonical ZDDC names + the WORM archive mean an existing name is the same document, so we never overwrite (the old content-diff path is dropped). Tests: re-run skips an existing local target; PUT into a server-style handle then resume-skips it (52 green). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
297 lines
13 KiB
JavaScript
297 lines
13 KiB
JavaScript
/**
|
||
* ZDDC Classifier — copy-out (Classify & Copy mode).
|
||
*
|
||
* Copies the fully-classified source files into a SEPARATE output directory
|
||
* under their canonical ZDDC names and folder layout
|
||
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
|
||
* The source is never modified — every operation is a read (getFile) on the
|
||
* source and a write into the chosen output handle.
|
||
*
|
||
* Duplicate detection:
|
||
* - two sources → the same output path = mapping conflict (skipped + reported)
|
||
* - target already exists, identical bytes (sha256) = skipped
|
||
* - target exists, different bytes = left untouched + reported (no clobber)
|
||
*
|
||
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
|
||
* createWritable), so it works against a real handle today and a server-backed
|
||
* output handle later without changing this logic.
|
||
*/
|
||
(function () {
|
||
'use strict';
|
||
|
||
var outputHandle = null; // remembered for the session
|
||
|
||
function C() { return window.app.modules.classify; }
|
||
|
||
function collectFiles() {
|
||
var out = [];
|
||
(function walk(nodes) {
|
||
(nodes || []).forEach(function (n) {
|
||
(n.files || []).forEach(function (f) { out.push(f); });
|
||
walk(n.children);
|
||
});
|
||
})(window.app.folderTree || []);
|
||
return out;
|
||
}
|
||
|
||
// Files that are ready to copy: complete target, not excluded.
|
||
function plan() {
|
||
var c = C(), items = [];
|
||
collectFiles().forEach(function (f) {
|
||
var d = c.deriveTarget(f);
|
||
if (d.excluded || !d.complete) return;
|
||
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
|
||
});
|
||
return items;
|
||
}
|
||
|
||
// Group by output path; >1 source for a path = a mapping conflict.
|
||
function conflictsIn(items) {
|
||
var by = {}, conflicts = [];
|
||
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
|
||
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
|
||
return { by: by, conflicts: conflicts };
|
||
}
|
||
|
||
function toast(msg, level) {
|
||
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
|
||
}
|
||
function setStatus(text) {
|
||
var el = document.getElementById('scanStatus');
|
||
if (!el) return;
|
||
el.textContent = text;
|
||
el.classList.toggle('scanning', !!text);
|
||
}
|
||
|
||
async function chooseOutput() {
|
||
if (!window.showDirectoryPicker) {
|
||
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
|
||
return null;
|
||
}
|
||
try {
|
||
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
|
||
outputHandle = h;
|
||
C().setOutputName(h.name);
|
||
return h;
|
||
} catch (e) {
|
||
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function ensureDir(root, relPath) {
|
||
var parts = relPath.split('/').filter(Boolean);
|
||
var cur = root;
|
||
for (var i = 0; i < parts.length; i++) {
|
||
cur = await cur.getDirectoryHandle(parts[i], { create: true });
|
||
}
|
||
return cur;
|
||
}
|
||
|
||
// Resolve a target subdirectory WITHOUT creating it (null if any segment is
|
||
// missing). Lets us check a file's existence cheaply on resume before paying
|
||
// to create the folder chain.
|
||
async function resolveDir(root, relPath, create) {
|
||
var parts = relPath.split('/').filter(Boolean);
|
||
var cur = root;
|
||
for (var i = 0; i < parts.length; i++) {
|
||
try { cur = await cur.getDirectoryHandle(parts[i], create ? { create: true } : undefined); }
|
||
catch (e) { if (!create) return null; throw e; }
|
||
}
|
||
return cur;
|
||
}
|
||
|
||
// Resolve a source file's live handle. Fresh-scan files already carry one;
|
||
// snapshot-loaded files resolve lazily from the workspace root by path.
|
||
async function srcHandle(fileObj) {
|
||
if (fileObj.handle) return fileObj.handle;
|
||
if (!window.app.rootHandle) throw new Error('source directory not connected');
|
||
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
||
}
|
||
|
||
// Read a source file's bytes (a File or Blob). A zip member is extracted
|
||
// from its archive (lazily reloaded from the root); a plain file is read
|
||
// through its resolved handle. The source is never written either way.
|
||
async function readSource(fileObj) {
|
||
if (fileObj.isVirtual) {
|
||
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
|
||
}
|
||
return (await srcHandle(fileObj)).getFile();
|
||
}
|
||
|
||
// Copy one file. Returns 'copied' | 'skipped' (already present → resumable).
|
||
// The existence check is a cheap stat/HEAD; a present target is left as-is so
|
||
// re-running after an interruption skips the work already done — no source
|
||
// read, no hashing. (Canonical ZDDC names ⇒ same name = same document, and
|
||
// the server archive is WORM, so we never overwrite.)
|
||
async function copyOne(out, p) {
|
||
// Cheap existence probe: resolve the dir WITHOUT creating it (the HTTP
|
||
// handle doesn't verify here, but getFileHandle below does a HEAD).
|
||
var probe = await resolveDir(out, p.d.outPath, false);
|
||
if (probe) {
|
||
try { await probe.getFileHandle(p.d.filename); return 'skipped'; }
|
||
catch (e) { /* NotFound → write it below */ }
|
||
}
|
||
// Write path: create the folder chain (idempotent) then read + write.
|
||
var dir = await ensureDir(out, p.d.outPath);
|
||
var srcFile = await readSource(p.file); // READ source (never write it)
|
||
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
||
var w = await fh.createWritable();
|
||
await w.write(srcFile);
|
||
await w.close();
|
||
return 'copied';
|
||
}
|
||
|
||
async function run() {
|
||
if (!C().isEnabled()) return;
|
||
var items = plan();
|
||
if (!items.length) {
|
||
toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning');
|
||
return;
|
||
}
|
||
var cf = conflictsIn(items);
|
||
var blocked = {};
|
||
cf.conflicts.forEach(function (path) { blocked[path] = true; });
|
||
var todo = items.filter(function (p) { return !blocked[p.outRel]; });
|
||
|
||
if (cf.conflicts.length) {
|
||
toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n'
|
||
+ cf.conflicts.join('\n'), 'error');
|
||
}
|
||
if (!todo.length) return;
|
||
|
||
// Where to file the canonical copies: the server archive (HTTP) or a
|
||
// local folder (File System Access). Both read the source, never write it,
|
||
// and both are resumable — already-present targets are skipped.
|
||
var dest = await chooseDestination(todo.length);
|
||
if (!dest) return;
|
||
|
||
// Snapshot-loaded files have no live handle — re-grant read on the
|
||
// workspace source directory (one click) before reading.
|
||
if (todo.some(function (p) { return !p.file.handle; })) {
|
||
if (!window.app.rootHandle) {
|
||
toast('The source directory isn’t connected. Re-open the workspace to reconnect it.', 'error');
|
||
return;
|
||
}
|
||
var srcOk = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||
if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; }
|
||
}
|
||
|
||
return dest === 'server' ? copyToServer(todo) : copyToLocal(todo);
|
||
}
|
||
|
||
function summary(s, where) {
|
||
var msg = 'Copy to ' + where + ' — ' + s.copied + ' copied, ' + s.skipped + ' already there'
|
||
+ (s.errors ? (', ' + s.errors + ' failed (retry to resume)') : '') + '.';
|
||
toast(msg, s.errors ? 'warning' : 'success');
|
||
}
|
||
|
||
async function copyToLocal(todo) {
|
||
var out = outputHandle || await chooseOutput();
|
||
if (!out) return;
|
||
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\n'
|
||
+ 'Written under <party>/<received|issued>/<transmittal>/ — pick your archive/ folder to file them directly. '
|
||
+ 'Re-running resumes (already-copied files are skipped). The source is not modified.')) return;
|
||
var s = await copyTo(out, todo);
|
||
summary(s, '"' + out.name + '"');
|
||
return s;
|
||
}
|
||
|
||
// Copy straight into the server's archive over HTTP (PUT per file, mkdir as
|
||
// needed). Uses the zddc-source HTTP handle, so the SAME copy engine writes
|
||
// <party>/<received|issued>/<transmittal>/<name> under the chosen archive URL.
|
||
async function copyToServer(todo) {
|
||
var src = window.zddc && window.zddc.source;
|
||
if (!src || location.protocol === 'file:') {
|
||
toast('Server copy needs the classifier to be served by a zddc-server (open it over http).', 'error');
|
||
return;
|
||
}
|
||
var url = serverArchiveUrl || guessArchiveUrl();
|
||
url = prompt('Server archive URL to file into (canonical copies go under <party>/<received|issued>/<transmittal>/):', url);
|
||
if (!url) return;
|
||
if (url.charAt(url.length - 1) !== '/') url += '/';
|
||
serverArchiveUrl = url;
|
||
var out;
|
||
try {
|
||
var u = new URL(url, location.origin);
|
||
out = new src.HttpDirectoryHandle(u.href, 'archive');
|
||
} catch (e) { toast('Bad archive URL — ' + (e.message || e), 'error'); return; }
|
||
var s = await copyTo(out, todo);
|
||
summary(s, 'server archive');
|
||
return s;
|
||
}
|
||
// Best-guess archive root from the current page: the path up to and including
|
||
// the first "archive/" segment, else the served directory.
|
||
function guessArchiveUrl() {
|
||
var p = location.pathname;
|
||
var m = /^(.*?\/archive\/)/.exec(p);
|
||
return location.origin + (m ? m[1] : p.replace(/[^/]*$/, ''));
|
||
}
|
||
var serverArchiveUrl = null;
|
||
|
||
// Tiny modal: choose server archive vs local folder. Resolves 'server' |
|
||
// 'local' | null. The server option is offered only over http(s).
|
||
function chooseDestination(n) {
|
||
return new Promise(function (resolve) {
|
||
var done = false;
|
||
function finish(v) { if (done) return; done = true; document.removeEventListener('keydown', onKey); back.remove(); resolve(v); }
|
||
function onKey(e) { if (e.key === 'Escape') finish(null); }
|
||
var onServer = location.protocol === 'http:' || location.protocol === 'https:';
|
||
var back = document.createElement('div'); back.className = 'copy-choice__backdrop';
|
||
var box = document.createElement('div'); box.className = 'copy-choice';
|
||
var h = document.createElement('h3');
|
||
h.textContent = 'Copy ' + n + ' classified file' + (n === 1 ? '' : 's');
|
||
var p = document.createElement('p');
|
||
p.innerHTML = 'Filed under <code><party>/<received|issued>/<transmittal>/<name></code>. '
|
||
+ 'Re-running resumes — files already present at the destination are skipped.';
|
||
var row = document.createElement('div'); row.className = 'copy-choice__btns';
|
||
function btn(label, cls, val, disabled) {
|
||
var b = document.createElement('button'); b.className = 'btn ' + cls; b.textContent = label;
|
||
if (disabled) { b.disabled = true; b.title = 'Open the classifier over a zddc-server to enable this'; }
|
||
else b.addEventListener('click', function () { finish(val); });
|
||
return b;
|
||
}
|
||
row.appendChild(btn('☁ Copy to server archive', 'btn-primary', 'server', !onServer));
|
||
row.appendChild(btn('📁 Copy to a local folder…', onServer ? 'btn-secondary' : 'btn-primary', 'local'));
|
||
row.appendChild(btn('Cancel', 'btn-secondary', null));
|
||
box.appendChild(h); box.appendChild(p); box.appendChild(row);
|
||
back.appendChild(box);
|
||
back.addEventListener('click', function (e) { if (e.target === back) finish(null); });
|
||
document.addEventListener('keydown', onKey);
|
||
document.body.appendChild(back);
|
||
});
|
||
}
|
||
|
||
// Run the copy loop over a ready list against an output handle. No picker,
|
||
// no confirm — that's run()'s job; this is the engine (and the test seam).
|
||
// Resumable: copyOne skips targets that already exist, so a re-run after an
|
||
// interruption only does the remaining work.
|
||
async function copyTo(out, todo) {
|
||
var s = { copied: 0, skipped: 0, errors: 0 };
|
||
for (var i = 0; i < todo.length; i++) {
|
||
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
|
||
try {
|
||
s[await copyOne(out, todo[i])]++;
|
||
} catch (e) {
|
||
s.errors++;
|
||
if (window.zddc && window.zddc.toast) {
|
||
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
|
||
}
|
||
}
|
||
}
|
||
setStatus('');
|
||
return s;
|
||
}
|
||
|
||
function readyCount() { return plan().length; }
|
||
|
||
window.app.modules.copy = {
|
||
run: run,
|
||
readyCount: readyCount,
|
||
chooseOutput: chooseOutput,
|
||
// test/advanced seams
|
||
plan: plan,
|
||
conflictsIn: conflictsIn,
|
||
copyTo: copyTo,
|
||
};
|
||
})();
|