diff --git a/classifier/js/app.js b/classifier/js/app.js index 5612465..f5161f7 100644 --- a/classifier/js/app.js +++ b/classifier/js/app.js @@ -182,7 +182,8 @@ modeClassifyBtn: document.getElementById('modeClassifyBtn'), spreadsheetPane: document.getElementById('spreadsheetPane'), targetPane: document.getElementById('targetPane'), - copyOutputBtn: document.getElementById('copyOutputBtn') + copyOutputBtn: document.getElementById('copyOutputBtn'), + checkDuplicatesBtn: document.getElementById('checkDuplicatesBtn') }; } @@ -372,6 +373,7 @@ if (app.dom.modeRenameBtn) app.dom.modeRenameBtn.addEventListener('click', function () { setMode('rename'); }); if (app.dom.modeClassifyBtn) app.dom.modeClassifyBtn.addEventListener('click', function () { setMode('classify'); }); if (app.dom.copyOutputBtn) app.dom.copyOutputBtn.addEventListener('click', function () { app.modules.copy.run(); }); + if (app.dom.checkDuplicatesBtn) app.dom.checkDuplicatesBtn.addEventListener('click', function () { app.modules.copy.audit(); }); // Live source-tree filter (matches file path + name; reveals the hierarchy). if (app.dom.treeFilterInput) app.dom.treeFilterInput.addEventListener('input', function () { diff --git a/classifier/js/classify.js b/classifier/js/classify.js index 73a793e..5cdfe59 100644 --- a/classifier/js/classify.js +++ b/classifier/js/classify.js @@ -62,6 +62,13 @@ // id -> { node, kind:'tracking'|'party'|'slot'|'transmittal', parent } var nodeIndex = {}; + // Transient (not serialized): srcKeys flagged by the copy audit as a + // same-name/different-content conflict. Cleared whenever a placement changes. + var hashConflicts = {}; + function setHashConflicts(map) { hashConflicts = map || {}; notify(); } + function hasHashConflict(key) { return !!hashConflicts[key]; } + function clearHashConflicts() { hashConflicts = {}; } + // ── pub/sub ────────────────────────────────────────────────────────────── var listeners = []; function on(cb) { listeners.push(cb); return function () { listeners = listeners.filter(function (f) { return f !== cb; }); }; } @@ -139,6 +146,7 @@ a.excluded = false; // placing un-excludes cleanAssignment(k); }); + clearHashConflicts(); // a placement changed → stale conflict flags notify(); } function setExcluded(keys, excluded) { @@ -148,6 +156,7 @@ if (excluded) { a.trackingNodeId = null; a.transmittalNodeId = null; } cleanAssignment(k); }); + clearHashConflicts(); notify(); } // Forget any assignment for these source keys (e.g. when a .zip flips @@ -608,6 +617,7 @@ // assignments assignmentFor: assignmentFor, getAssignment: getAssignment, place: place, setExcluded: setExcluded, dropAssignments: dropAssignments, + setHashConflicts: setHashConflicts, hasHashConflict: hasHashConflict, setTitleOverride: setTitleOverride, // trees addTrackingNode: addTrackingNode, addParty: addParty, diff --git a/classifier/js/copy.js b/classifier/js/copy.js index c6e3698..07e35e2 100644 --- a/classifier/js/copy.js +++ b/classifier/js/copy.js @@ -124,6 +124,23 @@ // re-running after an interruption skips the work already done — no source // read, no hashing. (Canonical ZDDC names ⇒ same name = same document, and // the server archive is WORM, so we never overwrite.) + // SHA-256 of a source file's bytes, cached on the file object (reused by the + // duplicate-conflict audit AND the post-copy verify). + async function sourceSha(fileObj) { + if (fileObj.sha256) return fileObj.sha256; + var blob = await readSource(fileObj); + var h = await window.zddc.crypto.sha256File(blob); + fileObj.sha256 = h; + return h; + } + async function writeTarget(out, p) { + var dir = await ensureDir(out, p.d.outPath); + var srcFile = await readSource(p.file); // READ source (never write it) + var fh = await dir.getFileHandle(p.d.filename, { create: true }); + var w = await fh.createWritable(); + await w.write(srcFile); + await w.close(); + } async function copyOne(out, p) { // Cheap existence probe: resolve the dir WITHOUT creating it (the HTTP // handle doesn't verify here, but getFileHandle below does a HEAD). @@ -132,58 +149,109 @@ try { await probe.getFileHandle(p.d.filename); return 'skipped'; } catch (e) { /* NotFound → write it below */ } } - // Write path: create the folder chain (idempotent) then read + write. - var dir = await ensureDir(out, p.d.outPath); - var srcFile = await readSource(p.file); // READ source (never write it) - var fh = await dir.getFileHandle(p.d.filename, { create: true }); - var w = await fh.createWritable(); - await w.write(srcFile); - await w.close(); + await writeTarget(out, p); return 'copied'; } + // Read the written target back and compare its SHA-256 to the source. + async function verifyOne(out, p) { + var dir = await resolveDir(out, p.d.outPath, false); + if (!dir) return false; + var fh; try { fh = await dir.getFileHandle(p.d.filename); } catch (e) { return false; } + var th = await window.zddc.crypto.sha256File(await fh.getFile()); + return th === (await sourceSha(p.file)); + } + async function removeTarget(out, p) { + var dir = await resolveDir(out, p.d.outPath, false); + if (dir && dir.removeEntry) { try { await dir.removeEntry(p.d.filename); } catch (e) { /* best effort */ } } + } + + // Snapshot-loaded files have no live handle — re-grant read on the source + // (one click) before we read any bytes (hashing or copying). Returns false + // if the source can't be read. + async function ensureSourceReadable(items) { + if (!items.some(function (p) { return !p.file.handle; })) return true; + if (!window.app.rootHandle) { + toast('The source directory isn’t connected. Re-open the workspace to reconnect it.', 'error'); + return false; + } + var ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false); + if (!ok) { toast('Permission to read the source directory was denied.', 'error'); return false; } + return true; + } + + // Group fully-classified files by their canonical output name. Files with the + // SAME tracking number + revision MUST have the same content: identical bytes + // collapse to a single copy; differing bytes are a CONFLICT the user must fix. + async function resolvePlan(items) { + var by = {}; + items.forEach(function (p) { (by[p.outRel] = by[p.outRel] || []).push(p); }); + var todo = [], conflicts = [], conflictKeys = {}, dupeCount = 0, keys = Object.keys(by); + for (var i = 0; i < keys.length; i++) { + var group = by[keys[i]]; + if (group.length === 1) { todo.push(group[0]); continue; } + var hashes = [], bad = false; + for (var j = 0; j < group.length; j++) { + try { hashes.push(await sourceSha(group[j].file)); } catch (e) { bad = true; hashes.push('ERR' + j); } + } + var distinct = {}; hashes.forEach(function (h) { distinct[h] = true; }); + if (!bad && Object.keys(distinct).length === 1) { + todo.push(group[0]); dupeCount += group.length - 1; // identical → one copy + } else { + conflicts.push(keys[i]); + group.forEach(function (g) { conflictKeys[g.d.key] = true; }); + } + } + return { todo: todo, conflicts: conflicts, conflictKeys: conflictKeys, dupeCount: dupeCount }; + } + + // Pre-flight shared by Copy and the standalone "Check" button: hash colliding + // names, flag conflicts in the UI, return the deduped todo (or null to abort). + async function preflight(verb) { + var items = plan(); + if (!items.length) { + toast('Nothing ' + verb + ' yet — no files are fully classified (need a tracking leaf AND a transmittal).', 'warning'); + return null; + } + if (!(await ensureSourceReadable(items))) return null; + setStatus('Checking for same-name/different-content conflicts…'); + var r = await resolvePlan(items); + setStatus(''); + C().setHashConflicts(r.conflictKeys); + if (r.conflicts.length) { + toast(r.conflicts.length + ' same-name/different-content conflict(s) flagged (≠ in red): same tracking+revision, different bytes. Fix these before copying.', 'error'); + } + if (r.dupeCount) toast(r.dupeCount + ' exact duplicate(s) collapse to one copy.', 'info'); + return r; + } + + // Standalone audit (the "Check" button) — flag conflicts without copying. + async function audit() { + var r = await preflight('to check'); + if (r && !r.conflicts.length) { + toast('No conflicts — ' + r.todo.length + ' file' + (r.todo.length === 1 ? '' : 's') + ' ready to copy.', 'success'); + } + return r; + } async function run() { if (!C().isEnabled()) return; - var items = plan(); - if (!items.length) { - toast('Nothing to copy yet — no files are fully classified (need both a tracking leaf and a transmittal).', 'warning'); - return; - } - var cf = conflictsIn(items); - var blocked = {}; - cf.conflicts.forEach(function (path) { blocked[path] = true; }); - var todo = items.filter(function (p) { return !blocked[p.outRel]; }); + var r = await preflight('to copy'); + if (!r) return; + var todo = r.todo; + if (!todo.length) { if (r.conflicts.length) toast('Resolve the flagged conflicts, then copy.', 'warning'); return; } - if (cf.conflicts.length) { - toast(cf.conflicts.length + ' output-name collision(s) — two source files map to the same name. Skipped:\n' - + cf.conflicts.join('\n'), 'error'); - } - if (!todo.length) return; - - // Where to file the canonical copies: the server archive (HTTP) or a - // local folder (File System Access). Both read the source, never write it, - // and both are resumable — already-present targets are skipped. + // Where to file the canonical copies: the server archive (HTTP) or a local + // folder. Both read the source, never write it, both resumable + verified. var dest = await chooseDestination(todo.length); if (!dest) return; - - // Snapshot-loaded files have no live handle — re-grant read on the - // workspace source directory (one click) before reading. - if (todo.some(function (p) { return !p.file.handle; })) { - if (!window.app.rootHandle) { - toast('The source directory isn’t connected. Re-open the workspace to reconnect it.', 'error'); - return; - } - var srcOk = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false); - if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; } - } - return dest === 'server' ? copyToServer(todo) : copyToLocal(todo); } function summary(s, where) { - var msg = 'Copy to ' + where + ' — ' + s.copied + ' copied, ' + s.skipped + ' already there' - + (s.errors ? (', ' + s.errors + ' failed (retry to resume)') : '') + '.'; - toast(msg, s.errors ? 'warning' : 'success'); + var msg = 'Copy to ' + where + ' — ' + s.copied + ' copied & verified, ' + s.skipped + ' already there' + + (s.verifyFailed ? (', ' + s.verifyFailed + ' FAILED verification (bad copy removed — re-run)') : '') + + (s.errors ? (', ' + s.errors + ' errored (retry to resume)') : '') + '.'; + toast(msg, (s.errors || s.verifyFailed) ? 'warning' : 'success'); } async function copyToLocal(todo) { @@ -302,11 +370,14 @@ // Resumable: copyOne skips targets that already exist, so a re-run after an // interruption only does the remaining work. async function copyTo(out, todo) { - var s = { copied: 0, skipped: 0, errors: 0 }; + var s = { copied: 0, skipped: 0, errors: 0, verifyFailed: 0 }; + var copied = []; for (var i = 0; i < todo.length; i++) { setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename); try { - s[await copyOne(out, todo[i])]++; + var r = await copyOne(out, todo[i]); + s[r]++; + if (r === 'copied') copied.push(todo[i]); } catch (e) { s.errors++; if (window.zddc && window.zddc.toast) { @@ -314,6 +385,28 @@ } } } + // Verification pass over JUST the files copied this run: read each target + // back, compare SHA-256 to the source. One re-copy attempt on mismatch; + // if it still fails, remove the bad target so a re-run re-copies it — so + // resume converges on a fully-correct archive. + for (var k = 0; k < copied.length; k++) { + setStatus('Verifying… ' + (k + 1) + '/' + copied.length + ' — ' + copied[k].d.filename); + try { + if (await verifyOne(out, copied[k])) continue; + await writeTarget(out, copied[k]); + if (await verifyOne(out, copied[k])) continue; + s.verifyFailed++; + await removeTarget(out, copied[k]); + if (window.zddc && window.zddc.toast) { + window.zddc.toast('Verification failed for ' + copied[k].outRel + ' — removed the bad copy; re-run to retry.', 'error'); + } + } catch (e) { + s.verifyFailed++; + if (window.zddc && window.zddc.toast) { + window.zddc.toast('Verify error for ' + copied[k].outRel + ' — ' + (e.message || e), 'error'); + } + } + } setStatus(''); return s; } @@ -322,11 +415,13 @@ window.app.modules.copy = { run: run, + audit: audit, readyCount: readyCount, chooseOutput: chooseOutput, // test/advanced seams plan: plan, conflictsIn: conflictsIn, + resolvePlan: resolvePlan, copyTo: copyTo, }; })(); diff --git a/classifier/js/target-tree.js b/classifier/js/target-tree.js index 47d15c0..25c4864 100644 --- a/classifier/js/target-tree.js +++ b/classifier/js/target-tree.js @@ -276,17 +276,20 @@ // delegated preview + name-edit handlers apply. function fileCellContent(f) { var d = C().deriveTarget(f); - var row = el('div', 'tfile' + (d.errors.length ? ' tfile--err' : '')); + var conflict = C().hasHashConflict(d.key); // same name, different bytes + var bad = d.errors.length || conflict; + var row = el('div', 'tfile' + (bad ? ' tfile--err' : '')); row.dataset.key = d.key; var orig = f.originalFilename + (f.extension ? '.' + f.extension : ''); - var name = el('input', 'tfile__name' + (d.errors.length ? ' tfile__name--err' : '')); + var name = el('input', 'tfile__name' + (bad ? ' tfile__name--err' : '')); name.type = 'text'; name.value = d.filename || ''; name.placeholder = '(incomplete)'; - name.title = (d.errors.length ? d.errors.join('; ') + ' · ' : '') + 'original: ' + orig; + name.title = (conflict ? 'Same tracking+revision as another file but DIFFERENT content — fix before copying · ' : '') + + (d.errors.length ? d.errors.join('; ') + ' · ' : '') + 'original: ' + orig; row.appendChild(name); - row.appendChild(el('span', 'tfile__badge' + (d.errors.length ? ' tfile__badge--err' : ' tfile__badge--ok'), - d.errors.length ? '⚠' : '✓')); + row.appendChild(el('span', 'tfile__badge' + (bad ? ' tfile__badge--err' : ' tfile__badge--ok'), + conflict ? '≠' : (d.errors.length ? '⚠' : '✓'))); return row; } diff --git a/classifier/template.html b/classifier/template.html index b093d74..c8eb78f 100644 --- a/classifier/template.html +++ b/classifier/template.html @@ -170,7 +170,8 @@ - + +