430 lines
21 KiB
JavaScript
430 lines
21 KiB
JavaScript
/**
|
||
* ZDDC Classifier — copy-out (Classify & Copy mode).
|
||
*
|
||
* Copies the fully-classified source files into a SEPARATE output directory
|
||
* under their canonical ZDDC names and folder layout
|
||
* <party>/{received,issued}/<DATE_TN (STATUS) - TITLE>/<TRACKING_REV (STATUS) - TITLE.ext>
|
||
* The source is never modified — every operation is a read (getFile) on the
|
||
* source and a write into the chosen output handle.
|
||
*
|
||
* Duplicate detection:
|
||
* - two sources → the same output path = mapping conflict (skipped + reported)
|
||
* - target already exists, identical bytes (sha256) = skipped
|
||
* - target exists, different bytes = left untouched + reported (no clobber)
|
||
*
|
||
* Built on the generic FS-Access shape (getDirectoryHandle/getFileHandle/
|
||
* createWritable), so it works against a real handle today and a server-backed
|
||
* output handle later without changing this logic.
|
||
*/
|
||
(function () {
|
||
'use strict';
|
||
|
||
var outputHandle = null; // remembered for the session
|
||
|
||
function C() { return window.app.modules.classify; }
|
||
|
||
function collectFiles() {
|
||
var out = [];
|
||
(function walk(nodes) {
|
||
(nodes || []).forEach(function (n) {
|
||
(n.files || []).forEach(function (f) { out.push(f); });
|
||
walk(n.children);
|
||
});
|
||
})(window.app.folderTree || []);
|
||
return out;
|
||
}
|
||
|
||
// Files that are ready to copy: complete target, not excluded.
|
||
function plan() {
|
||
var c = C(), items = [];
|
||
collectFiles().forEach(function (f) {
|
||
var d = c.deriveTarget(f);
|
||
if (d.excluded || !d.complete) return;
|
||
items.push({ file: f, d: d, outRel: d.outPath + '/' + d.filename });
|
||
});
|
||
return items;
|
||
}
|
||
|
||
// Group by output path; >1 source for a path = a mapping conflict.
|
||
function conflictsIn(items) {
|
||
var by = {}, conflicts = [];
|
||
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
|
||
Object.keys(by).forEach(function (k) { if (by[k].length > 1) conflicts.push(k); });
|
||
return { by: by, conflicts: conflicts };
|
||
}
|
||
|
||
function toast(msg, level) {
|
||
if (window.zddc && window.zddc.toast) window.zddc.toast(msg, level);
|
||
}
|
||
function setStatus(text) {
|
||
var el = document.getElementById('scanStatus');
|
||
if (!el) return;
|
||
el.textContent = text;
|
||
el.classList.toggle('scanning', !!text);
|
||
}
|
||
|
||
async function chooseOutput() {
|
||
if (!window.showDirectoryPicker) {
|
||
toast('Copying to an output directory needs the File System Access API (use Chromium, or run via zddc-server).', 'error');
|
||
return null;
|
||
}
|
||
try {
|
||
var h = await window.showDirectoryPicker({ mode: 'readwrite', id: 'zddc-classifier-output' });
|
||
outputHandle = h;
|
||
C().setOutputName(h.name);
|
||
return h;
|
||
} catch (e) {
|
||
if (e.name !== 'AbortError') toast('Could not open the output directory — ' + (e.message || e), 'error');
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function ensureDir(root, relPath) {
|
||
var parts = relPath.split('/').filter(Boolean);
|
||
var cur = root;
|
||
for (var i = 0; i < parts.length; i++) {
|
||
cur = await cur.getDirectoryHandle(parts[i], { create: true });
|
||
}
|
||
return cur;
|
||
}
|
||
|
||
// Resolve a target subdirectory WITHOUT creating it (null if any segment is
|
||
// missing). Lets us check a file's existence cheaply on resume before paying
|
||
// to create the folder chain.
|
||
async function resolveDir(root, relPath, create) {
|
||
var parts = relPath.split('/').filter(Boolean);
|
||
var cur = root;
|
||
for (var i = 0; i < parts.length; i++) {
|
||
try { cur = await cur.getDirectoryHandle(parts[i], create ? { create: true } : undefined); }
|
||
catch (e) { if (!create) return null; throw e; }
|
||
}
|
||
return cur;
|
||
}
|
||
|
||
// Resolve a source file's live handle. Fresh-scan files already carry one;
|
||
// snapshot-loaded files resolve lazily from the workspace root by path.
|
||
async function srcHandle(fileObj) {
|
||
if (fileObj.handle) return fileObj.handle;
|
||
if (!window.app.rootHandle) throw new Error('source directory not connected');
|
||
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
||
}
|
||
|
||
// Read a source file's bytes (a File or Blob). A zip member is extracted
|
||
// from its archive (lazily reloaded from the root); a plain file is read
|
||
// through its resolved handle. The source is never written either way.
|
||
async function readSource(fileObj) {
|
||
if (fileObj.isVirtual) {
|
||
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
|
||
}
|
||
return (await srcHandle(fileObj)).getFile();
|
||
}
|
||
|
||
// Copy one file. Returns 'copied' | 'skipped' (already present → resumable).
|
||
// The existence check is a cheap stat/HEAD; a present target is left as-is so
|
||
// re-running after an interruption skips the work already done — no source
|
||
// read, no hashing. (Canonical ZDDC names ⇒ same name = same document, and
|
||
// the server archive is WORM, so we never overwrite.)
|
||
// SHA-256 of a source file's bytes, cached on the file object (reused by the
|
||
// duplicate-conflict audit AND the post-copy verify).
|
||
async function sourceSha(fileObj) {
|
||
if (fileObj.sha256) return fileObj.sha256;
|
||
var blob = await readSource(fileObj);
|
||
var h = await window.zddc.crypto.sha256File(blob);
|
||
fileObj.sha256 = h;
|
||
return h;
|
||
}
|
||
async function writeTarget(out, p) {
|
||
var dir = await ensureDir(out, p.d.outPath);
|
||
var srcFile = await readSource(p.file); // READ source (never write it)
|
||
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
||
var w = await fh.createWritable();
|
||
await w.write(srcFile);
|
||
await w.close();
|
||
}
|
||
async function copyOne(out, p) {
|
||
// Cheap existence probe: resolve the dir WITHOUT creating it (the HTTP
|
||
// handle doesn't verify here, but getFileHandle below does a HEAD).
|
||
var probe = await resolveDir(out, p.d.outPath, false);
|
||
if (probe) {
|
||
try { await probe.getFileHandle(p.d.filename); return 'skipped'; }
|
||
catch (e) { /* NotFound → write it below */ }
|
||
}
|
||
await writeTarget(out, p);
|
||
return 'copied';
|
||
}
|
||
// Read the written target back and compare its SHA-256 to the source.
|
||
async function verifyOne(out, p) {
|
||
var dir = await resolveDir(out, p.d.outPath, false);
|
||
if (!dir) return false;
|
||
var fh; try { fh = await dir.getFileHandle(p.d.filename); } catch (e) { return false; }
|
||
var th = await window.zddc.crypto.sha256File(await fh.getFile());
|
||
return th === (await sourceSha(p.file));
|
||
}
|
||
async function removeTarget(out, p) {
|
||
var dir = await resolveDir(out, p.d.outPath, false);
|
||
if (dir && dir.removeEntry) { try { await dir.removeEntry(p.d.filename); } catch (e) { /* best effort */ } }
|
||
}
|
||
|
||
// Snapshot-loaded files have no live handle — re-grant read on the source
|
||
// (one click) before we read any bytes (hashing or copying). Returns false
|
||
// if the source can't be read.
|
||
async function ensureSourceReadable(items) {
|
||
if (!items.some(function (p) { return !p.file.handle; })) return true;
|
||
if (!window.app.rootHandle) {
|
||
toast('The source directory isn’t connected. Re-open the workspace to reconnect it.', 'error');
|
||
return false;
|
||
}
|
||
var ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||
if (!ok) { toast('Permission to read the source directory was denied.', 'error'); return false; }
|
||
return true;
|
||
}
|
||
|
||
// Group fully-classified files by their canonical output name. Files with the
|
||
// SAME tracking number + revision MUST have the same content: identical bytes
|
||
// collapse to a single copy; differing bytes are a CONFLICT the user must fix.
|
||
async function resolvePlan(items) {
|
||
var by = {};
|
||
items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); });
|
||
var todo = [], conflicts = [], conflictKeys = {}, dupeCount = 0, keys = Object.keys(by);
|
||
for (var i = 0; i < keys.length; i++) {
|
||
var group = by[keys[i]];
|
||
if (group.length === 1) { todo.push(group[0]); continue; }
|
||
var hashes = [], bad = false;
|
||
for (var j = 0; j < group.length; j++) {
|
||
try { hashes.push(await sourceSha(group[j].file)); } catch (e) { bad = true; hashes.push('ERR' + j); }
|
||
}
|
||
var distinct = {}; hashes.forEach(function (h) { distinct[h] = true; });
|
||
if (!bad && Object.keys(distinct).length === 1) {
|
||
todo.push(group[0]); dupeCount += group.length - 1; // identical → one copy
|
||
} else {
|
||
conflicts.push(keys[i]);
|
||
group.forEach(function (g) { conflictKeys[g.d.key] = true; });
|
||
}
|
||
}
|
||
return { todo: todo, conflicts: conflicts, conflictKeys: conflictKeys, dupeCount: dupeCount };
|
||
}
|
||
|
||
// Pre-flight shared by Copy and the standalone "Check" button: hash colliding
|
||
// names, flag conflicts in the UI, return the deduped todo (or null to abort).
|
||
async function preflight(verb) {
|
||
var items = plan();
|
||
if (!items.length) {
|
||
toast('Nothing ' + verb + ' yet — no files are fully classified (need a tracking leaf AND a transmittal).', 'warning');
|
||
return null;
|
||
}
|
||
if (!(await ensureSourceReadable(items))) return null;
|
||
setStatus('Checking for same-name/different-content conflicts…');
|
||
var r = await resolvePlan(items);
|
||
setStatus('');
|
||
C().setHashConflicts(r.conflictKeys);
|
||
if (r.conflicts.length) {
|
||
toast(r.conflicts.length + ' same-name/different-content conflict(s) flagged (≠ in red): same tracking+revision, different bytes. Fix these before copying.', 'error');
|
||
}
|
||
if (r.dupeCount) toast(r.dupeCount + ' exact duplicate(s) collapse to one copy.', 'info');
|
||
return r;
|
||
}
|
||
|
||
// Standalone audit (the "Check" button) — flag conflicts without copying.
|
||
async function audit() {
|
||
var r = await preflight('to check');
|
||
if (r && !r.conflicts.length) {
|
||
toast('No conflicts — ' + r.todo.length + ' file' + (r.todo.length === 1 ? '' : 's') + ' ready to copy.', 'success');
|
||
}
|
||
return r;
|
||
}
|
||
|
||
async function run() {
|
||
if (!C().isEnabled()) return;
|
||
var r = await preflight('to copy');
|
||
if (!r) return;
|
||
var todo = r.todo;
|
||
if (!todo.length) { if (r.conflicts.length) toast('Resolve the flagged conflicts, then copy.', 'warning'); return; }
|
||
|
||
// Where to file the canonical copies: the server archive (HTTP) or a local
|
||
// folder. Both read the source, never write it, both resumable + verified.
|
||
var dest = await chooseDestination(todo.length);
|
||
if (!dest) return;
|
||
return dest === 'server' ? copyToServer(todo) : copyToLocal(todo);
|
||
}
|
||
|
||
function summary(s, where) {
|
||
var msg = 'Copy to ' + where + ' — ' + s.copied + ' copied & verified, ' + s.skipped + ' already there'
|
||
+ (s.verifyFailed ? (', ' + s.verifyFailed + ' FAILED verification (bad copy removed — re-run)') : '')
|
||
+ (s.errors ? (', ' + s.errors + ' errored (retry to resume)') : '') + '.';
|
||
toast(msg, (s.errors || s.verifyFailed) ? 'warning' : 'success');
|
||
}
|
||
|
||
async function copyToLocal(todo) {
|
||
var out = outputHandle || await chooseOutput();
|
||
if (!out) return;
|
||
if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\n'
|
||
+ 'Written under <party>/<received|issued>/<transmittal>/ — pick your archive/ folder to file them directly. '
|
||
+ 'Re-running resumes (already-copied files are skipped). The source is not modified.')) return;
|
||
var s = await copyTo(out, todo);
|
||
summary(s, '"' + out.name + '"');
|
||
return s;
|
||
}
|
||
|
||
// Copy straight into a project's archive on the server over HTTP (PUT per
|
||
// file, mkdir as needed). Uses the zddc-source HTTP handle, so the SAME copy
|
||
// engine writes <party>/<received|issued>/<transmittal>/<name> under
|
||
// <project>/archive/. The user picks any project they can access.
|
||
async function copyToServer(todo) {
|
||
var src = window.zddc && window.zddc.source;
|
||
if (!src || location.protocol === 'file:') {
|
||
toast('Server copy needs the classifier to be served by a zddc-server (open it over http).', 'error');
|
||
return;
|
||
}
|
||
var projects = await fetchAccessProjects();
|
||
if (projects == null) { toast('Could not load your projects from the server.', 'error'); return; }
|
||
if (!projects.length) { toast('No projects you can access on this server.', 'warning'); return; }
|
||
var proj = await chooseProject(projects);
|
||
if (!proj) return;
|
||
var archive;
|
||
try {
|
||
var rel = proj.url || ('/' + proj.name + '/');
|
||
if (rel.charAt(rel.length - 1) !== '/') rel += '/';
|
||
archive = new URL(rel + 'archive/', location.origin).href;
|
||
} catch (e) { toast('Bad project URL — ' + (e.message || e), 'error'); return; }
|
||
var out = new src.HttpDirectoryHandle(archive, 'archive');
|
||
var s = await copyTo(out, todo);
|
||
summary(s, (proj.title || proj.name) + ' / archive');
|
||
return s;
|
||
}
|
||
// The caller's accessible projects (read view from /.profile/access). Write
|
||
// permission is enforced server-side on PUT, so a 403 surfaces per file.
|
||
async function fetchAccessProjects() {
|
||
try {
|
||
var resp = await fetch('/.profile/access', { headers: { 'Accept': 'application/json' }, credentials: 'same-origin', cache: 'no-cache' });
|
||
if (!resp.ok) return null;
|
||
if ((resp.headers.get('Content-Type') || '').toLowerCase().indexOf('json') === -1) return null;
|
||
var data = await resp.json();
|
||
return Array.isArray(data.projects) ? data.projects : [];
|
||
} catch (e) { return null; }
|
||
}
|
||
function chooseProject(projects) {
|
||
return new Promise(function (resolve) {
|
||
var done = false;
|
||
function finish(v) { if (done) return; done = true; document.removeEventListener('keydown', onKey); back.remove(); resolve(v); }
|
||
function onKey(e) { if (e.key === 'Escape') finish(null); }
|
||
var back = document.createElement('div'); back.className = 'copy-choice__backdrop';
|
||
var box = document.createElement('div'); box.className = 'copy-choice';
|
||
var h = document.createElement('h3'); h.textContent = 'Copy to a project archive';
|
||
var p = document.createElement('p');
|
||
p.innerHTML = 'Files go to <code><project>/archive/<party>/<received|issued>/<transmittal>/</code>. Pick a project you can access.';
|
||
var sel = document.createElement('select'); sel.className = 'copy-choice__select';
|
||
projects.forEach(function (pr, i) {
|
||
var o = document.createElement('option'); o.value = String(i);
|
||
o.textContent = pr.name + (pr.title ? ' — ' + pr.title : '');
|
||
sel.appendChild(o);
|
||
});
|
||
var row = document.createElement('div'); row.className = 'copy-choice__btns';
|
||
var go = document.createElement('button'); go.className = 'btn btn-primary'; go.textContent = 'Copy here';
|
||
go.addEventListener('click', function () { finish(projects[Number(sel.value)] || null); });
|
||
var cancel = document.createElement('button'); cancel.className = 'btn btn-secondary'; cancel.textContent = 'Cancel';
|
||
cancel.addEventListener('click', function () { finish(null); });
|
||
row.appendChild(go); row.appendChild(cancel);
|
||
box.appendChild(h); box.appendChild(p); box.appendChild(sel); box.appendChild(row);
|
||
back.appendChild(box);
|
||
back.addEventListener('click', function (e) { if (e.target === back) finish(null); });
|
||
document.addEventListener('keydown', onKey);
|
||
document.body.appendChild(back);
|
||
});
|
||
}
|
||
|
||
// Tiny modal: choose server archive vs local folder. Resolves 'server' |
|
||
// 'local' | null. The server option is offered only over http(s).
|
||
function chooseDestination(n) {
|
||
return new Promise(function (resolve) {
|
||
var done = false;
|
||
function finish(v) { if (done) return; done = true; document.removeEventListener('keydown', onKey); back.remove(); resolve(v); }
|
||
function onKey(e) { if (e.key === 'Escape') finish(null); }
|
||
var onServer = location.protocol === 'http:' || location.protocol === 'https:';
|
||
var back = document.createElement('div'); back.className = 'copy-choice__backdrop';
|
||
var box = document.createElement('div'); box.className = 'copy-choice';
|
||
var h = document.createElement('h3');
|
||
h.textContent = 'Copy ' + n + ' classified file' + (n === 1 ? '' : 's');
|
||
var p = document.createElement('p');
|
||
p.innerHTML = 'Filed under <code><party>/<received|issued>/<transmittal>/<name></code>. '
|
||
+ 'Re-running resumes — files already present at the destination are skipped.';
|
||
var row = document.createElement('div'); row.className = 'copy-choice__btns';
|
||
function btn(label, cls, val, disabled) {
|
||
var b = document.createElement('button'); b.className = 'btn ' + cls; b.textContent = label;
|
||
if (disabled) { b.disabled = true; b.title = 'Open the classifier over a zddc-server to enable this'; }
|
||
else b.addEventListener('click', function () { finish(val); });
|
||
return b;
|
||
}
|
||
row.appendChild(btn('☁ Copy to server archive', 'btn-primary', 'server', !onServer));
|
||
row.appendChild(btn('📁 Copy to a local folder…', onServer ? 'btn-secondary' : 'btn-primary', 'local'));
|
||
row.appendChild(btn('Cancel', 'btn-secondary', null));
|
||
box.appendChild(h); box.appendChild(p); box.appendChild(row);
|
||
back.appendChild(box);
|
||
back.addEventListener('click', function (e) { if (e.target === back) finish(null); });
|
||
document.addEventListener('keydown', onKey);
|
||
document.body.appendChild(back);
|
||
});
|
||
}
|
||
|
||
// Run the copy loop over a ready list against an output handle. No picker,
|
||
// no confirm — that's run()'s job; this is the engine (and the test seam).
|
||
// Resumable: copyOne skips targets that already exist, so a re-run after an
|
||
// interruption only does the remaining work.
|
||
async function copyTo(out, todo) {
|
||
var s = { copied: 0, skipped: 0, errors: 0, verifyFailed: 0 };
|
||
var copied = [];
|
||
for (var i = 0; i < todo.length; i++) {
|
||
setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename);
|
||
try {
|
||
var r = await copyOne(out, todo[i]);
|
||
s[r]++;
|
||
if (r === 'copied') copied.push(todo[i]);
|
||
} catch (e) {
|
||
s.errors++;
|
||
if (window.zddc && window.zddc.toast) {
|
||
window.zddc.toast('Failed to copy ' + todo[i].outRel + ' — ' + (e.message || e), 'error');
|
||
}
|
||
}
|
||
}
|
||
// Verification pass over JUST the files copied this run: read each target
|
||
// back, compare SHA-256 to the source. One re-copy attempt on mismatch;
|
||
// if it still fails, remove the bad target so a re-run re-copies it — so
|
||
// resume converges on a fully-correct archive.
|
||
for (var k = 0; k < copied.length; k++) {
|
||
setStatus('Verifying… ' + (k + 1) + '/' + copied.length + ' — ' + copied[k].d.filename);
|
||
try {
|
||
if (await verifyOne(out, copied[k])) continue;
|
||
await writeTarget(out, copied[k]);
|
||
if (await verifyOne(out, copied[k])) continue;
|
||
s.verifyFailed++;
|
||
await removeTarget(out, copied[k]);
|
||
if (window.zddc && window.zddc.toast) {
|
||
window.zddc.toast('Verification failed for ' + copied[k].outRel + ' — removed the bad copy; re-run to retry.', 'error');
|
||
}
|
||
} catch (e) {
|
||
s.verifyFailed++;
|
||
if (window.zddc && window.zddc.toast) {
|
||
window.zddc.toast('Verify error for ' + copied[k].outRel + ' — ' + (e.message || e), 'error');
|
||
}
|
||
}
|
||
}
|
||
setStatus('');
|
||
return s;
|
||
}
|
||
|
||
function readyCount() { return plan().length; }
|
||
|
||
window.app.modules.copy = {
|
||
run: run,
|
||
audit: audit,
|
||
readyCount: readyCount,
|
||
chooseOutput: chooseOutput,
|
||
// shared with the MDL flow
|
||
fetchAccessProjects: fetchAccessProjects,
|
||
chooseProject: chooseProject,
|
||
// test/advanced seams
|
||
plan: plan,
|
||
conflictsIn: conflictsIn,
|
||
resolvePlan: resolvePlan,
|
||
copyTo: copyTo,
|
||
};
|
||
})();
|