From 144baeec617e020cc1d08311740b1e1bf208dd3c Mon Sep 17 00:00:00 2001 From: ZDDC Date: Thu, 11 Jun 2026 08:43:53 -0500 Subject: [PATCH] =?UTF-8?q?feat(classifier):=20Copy=20=E2=86=92=20server?= =?UTF-8?q?=20archive=20(HTTP)=20or=20local=20folder,=20resumable?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the zip option. Copy now offers two real destinations, both filing under ///: - Server archive — PUTs each file into a zddc-server over HTTP via the zddc-source HttpDirectoryHandle (mkdir as needed). Offered only over http(s); prompts for the archive URL (guessed from the current path's archive/ segment). - Local folder — the File System Access picker (choose archive/ to file directly). Both reuse the one copy engine and are efficiently resumable: copyOne probes the target with a cheap stat/HEAD and SKIPS anything already present — no source read, no hashing — so a re-run after an interruption only does what's left. Canonical ZDDC names + the WORM archive mean an existing name is the same document, so we never overwrite (the old content-diff path is dropped). Tests: re-run skips an existing local target; PUT into a server-style handle then resume-skips it (52 green). Co-Authored-By: Claude Opus 4.8 (1M context) --- classifier/js/copy.js | 146 ++++++++++++++++++++++------------------- tests/classify.spec.js | 41 +++++++----- 2 files changed, 103 insertions(+), 84 deletions(-) diff --git a/classifier/js/copy.js b/classifier/js/copy.js index 2025727..8f346ba 100644 --- a/classifier/js/copy.js +++ b/classifier/js/copy.js @@ -88,13 +88,17 @@ return cur; } - async function sameContent(existingHandle, srcFileObj) { - var ef = await existingHandle.getFile(); - var sf = await readSource(srcFileObj); - if (ef.size !== sf.size) return false; - var a = await window.zddc.crypto.sha256File(ef); - var b = await window.zddc.crypto.sha256File(sf); - return a === b; + // Resolve a target subdirectory WITHOUT creating it (null if any segment is + // missing). Lets us check a file's existence cheaply on resume before paying + // to create the folder chain. + async function resolveDir(root, relPath, create) { + var parts = relPath.split('/').filter(Boolean); + var cur = root; + for (var i = 0; i < parts.length; i++) { + try { cur = await cur.getDirectoryHandle(parts[i], create ? { create: true } : undefined); } + catch (e) { if (!create) return null; throw e; } + } + return cur; } // Resolve a source file's live handle. Fresh-scan files already carry one; @@ -115,14 +119,21 @@ return (await srcHandle(fileObj)).getFile(); } - // Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone). + // Copy one file. Returns 'copied' | 'skipped' (already present → resumable). + // The existence check is a cheap stat/HEAD; a present target is left as-is so + // re-running after an interruption skips the work already done — no source + // read, no hashing. (Canonical ZDDC names ⇒ same name = same document, and + // the server archive is WORM, so we never overwrite.) async function copyOne(out, p) { - var dir = await ensureDir(out, p.d.outPath); - var existing = null; - try { existing = await dir.getFileHandle(p.d.filename); } catch (e) { /* NotFound → fresh copy */ } - if (existing) { - return (await sameContent(existing, p.file)) ? 'skipped' : 'differ'; + // Cheap existence probe: resolve the dir WITHOUT creating it (the HTTP + // handle doesn't verify here, but getFileHandle below does a HEAD). + var probe = await resolveDir(out, p.d.outPath, false); + if (probe) { + try { await probe.getFileHandle(p.d.filename); return 'skipped'; } + catch (e) { /* NotFound → write it below */ } } + // Write path: create the folder chain (idempotent) then read + write. + var dir = await ensureDir(out, p.d.outPath); var srcFile = await readSource(p.file); // READ source (never write it) var fh = await dir.getFileHandle(p.d.filename, { create: true }); var w = await fh.createWritable(); @@ -149,8 +160,9 @@ } if (!todo.length) return; - // Ask where to put the canonical copies: a directory (drop straight into - // archive/) or a downloadable zip. Both read the source, never write it. + // Where to file the canonical copies: the server archive (HTTP) or a + // local folder (File System Access). Both read the source, never write it, + // and both are resumable — already-present targets are skipped. var dest = await chooseDestination(todo.length); if (!dest) return; @@ -165,81 +177,82 @@ if (!srcOk) { toast('Permission to read the source directory was denied.', 'error'); return; } } - return dest === 'zip' ? downloadZip(todo) : copyToDirectory(todo); + return dest === 'server' ? copyToServer(todo) : copyToLocal(todo); } - async function copyToDirectory(todo) { + function summary(s, where) { + var msg = 'Copy to ' + where + ' — ' + s.copied + ' copied, ' + s.skipped + ' already there' + + (s.errors ? (', ' + s.errors + ' failed (retry to resume)') : '') + '.'; + toast(msg, s.errors ? 'warning' : 'success'); + } + + async function copyToLocal(todo) { var out = outputHandle || await chooseOutput(); if (!out) return; if (!confirm('Copy ' + todo.length + ' file(s) into "' + out.name + '"?\n\n' - + 'Written under /// — pick your archive/ folder to file them directly. The source is not modified.')) return; - + + 'Written under /// — pick your archive/ folder to file them directly. ' + + 'Re-running resumes (already-copied files are skipped). The source is not modified.')) return; var s = await copyTo(out, todo); - var msg = 'Copy complete — ' + s.copied + ' copied, ' + s.skipped + ' identical skipped' - + (s.differ ? (', ' + s.differ + ' already exist with different content (left untouched)') : '') - + (s.errors ? (', ' + s.errors + ' errors') : '') + '.'; - toast(msg, (s.errors || s.differ) ? 'warning' : 'success'); - if (s.differing.length) toast('Existing-but-different (not overwritten):\n' + s.differing.join('\n'), 'warning'); + summary(s, '"' + out.name + '"'); return s; } - // Bundle the canonical copies into a single .zip and download it — same - // /// layout as the directory copy, so the - // archive unzips straight into place. - async function downloadZip(todo) { - if (typeof JSZip === 'undefined') { toast('Zip export needs JSZip — rebuild the classifier.', 'error'); return; } - var zip = new JSZip(); - var s = { added: 0, errors: 0 }; - for (var i = 0; i < todo.length; i++) { - setStatus('Zipping… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename); - try { zip.file(todo[i].outRel, await readSource(todo[i].file)); s.added++; } - catch (e) { s.errors++; toast('Failed to add ' + todo[i].outRel + ' — ' + (e.message || e), 'error'); } + // Copy straight into the server's archive over HTTP (PUT per file, mkdir as + // needed). Uses the zddc-source HTTP handle, so the SAME copy engine writes + // /// under the chosen archive URL. + async function copyToServer(todo) { + var src = window.zddc && window.zddc.source; + if (!src || location.protocol === 'file:') { + toast('Server copy needs the classifier to be served by a zddc-server (open it over http).', 'error'); + return; } - setStatus('Packaging…'); - var blob; - try { blob = await zip.generateAsync({ type: 'blob' }); } - catch (e) { setStatus(''); toast('Could not build the zip — ' + (e.message || e), 'error'); return s; } - setStatus(''); - var name = zipBaseName() + '.zip'; - var a = document.createElement('a'); - a.href = URL.createObjectURL(blob); - a.download = name; - document.body.appendChild(a); a.click(); a.remove(); - setTimeout(function () { URL.revokeObjectURL(a.href); }, 1000); - toast('Downloaded ' + name + ' — ' + s.added + ' file' + (s.added === 1 ? '' : 's') - + (s.errors ? (', ' + s.errors + ' failed') : '') + '.', s.errors ? 'warning' : 'success'); + var url = serverArchiveUrl || guessArchiveUrl(); + url = prompt('Server archive URL to file into (canonical copies go under ///):', url); + if (!url) return; + if (url.charAt(url.length - 1) !== '/') url += '/'; + serverArchiveUrl = url; + var out; + try { + var u = new URL(url, location.origin); + out = new src.HttpDirectoryHandle(u.href, 'archive'); + } catch (e) { toast('Bad archive URL — ' + (e.message || e), 'error'); return; } + var s = await copyTo(out, todo); + summary(s, 'server archive'); return s; } - function zipBaseName() { - try { - var ws = window.app.modules.workspace; - var n = ws && ws.activeName && ws.activeName(); - if (n) return String(n).replace(/[^\w.-]+/g, '_'); - } catch (_) { /* ok */ } - return 'transmittals'; + // Best-guess archive root from the current page: the path up to and including + // the first "archive/" segment, else the served directory. + function guessArchiveUrl() { + var p = location.pathname; + var m = /^(.*?\/archive\/)/.exec(p); + return location.origin + (m ? m[1] : p.replace(/[^/]*$/, '')); } + var serverArchiveUrl = null; - // Tiny modal: choose directory vs zip. Resolves 'dir' | 'zip' | null. + // Tiny modal: choose server archive vs local folder. Resolves 'server' | + // 'local' | null. The server option is offered only over http(s). function chooseDestination(n) { return new Promise(function (resolve) { var done = false; function finish(v) { if (done) return; done = true; document.removeEventListener('keydown', onKey); back.remove(); resolve(v); } function onKey(e) { if (e.key === 'Escape') finish(null); } + var onServer = location.protocol === 'http:' || location.protocol === 'https:'; var back = document.createElement('div'); back.className = 'copy-choice__backdrop'; var box = document.createElement('div'); box.className = 'copy-choice'; var h = document.createElement('h3'); h.textContent = 'Copy ' + n + ' classified file' + (n === 1 ? '' : 's'); var p = document.createElement('p'); - p.innerHTML = 'Layout: <party>/<received|issued>/<transmittal>/<name>. ' - + 'Copy into a folder — choose your archive/ to file them directly — or download a zip.'; + p.innerHTML = 'Filed under <party>/<received|issued>/<transmittal>/<name>. ' + + 'Re-running resumes — files already present at the destination are skipped.'; var row = document.createElement('div'); row.className = 'copy-choice__btns'; - function btn(label, cls, val) { + function btn(label, cls, val, disabled) { var b = document.createElement('button'); b.className = 'btn ' + cls; b.textContent = label; - b.addEventListener('click', function () { finish(val); }); + if (disabled) { b.disabled = true; b.title = 'Open the classifier over a zddc-server to enable this'; } + else b.addEventListener('click', function () { finish(val); }); return b; } - row.appendChild(btn('📁 Copy to a folder…', 'btn-primary', 'dir')); - row.appendChild(btn('🗜 Download .zip', 'btn-secondary', 'zip')); + row.appendChild(btn('☁ Copy to server archive', 'btn-primary', 'server', !onServer)); + row.appendChild(btn('📁 Copy to a local folder…', onServer ? 'btn-secondary' : 'btn-primary', 'local')); row.appendChild(btn('Cancel', 'btn-secondary', null)); box.appendChild(h); box.appendChild(p); box.appendChild(row); back.appendChild(box); @@ -251,14 +264,14 @@ // Run the copy loop over a ready list against an output handle. No picker, // no confirm — that's run()'s job; this is the engine (and the test seam). + // Resumable: copyOne skips targets that already exist, so a re-run after an + // interruption only does the remaining work. async function copyTo(out, todo) { - var s = { copied: 0, skipped: 0, differ: 0, errors: 0, differing: [] }; + var s = { copied: 0, skipped: 0, errors: 0 }; for (var i = 0; i < todo.length; i++) { setStatus('Copying… ' + (i + 1) + '/' + todo.length + ' — ' + todo[i].d.filename); try { - var r = await copyOne(out, todo[i]); - s[r]++; - if (r === 'differ') s.differing.push(todo[i].outRel); + s[await copyOne(out, todo[i])]++; } catch (e) { s.errors++; if (window.zddc && window.zddc.toast) { @@ -280,6 +293,5 @@ plan: plan, conflictsIn: conflictsIn, copyTo: copyTo, - downloadZip: downloadZip, }; })(); diff --git a/tests/classify.spec.js b/tests/classify.spec.js index cfe1fd2..0d33236 100644 --- a/tests/classify.spec.js +++ b/tests/classify.spec.js @@ -322,7 +322,7 @@ test('cross-tree reveal: source→target switches to the placed axis', async ({ // ── Phase 5: copy-out engine + duplicate detection (mock FS handles) ─────── -test('copy: writes the file, skips an identical re-copy, flags a differing target', async ({ page }) => { +test('copy: writes the file, then resumes by skipping an existing target', async ({ page }) => { await page.click('#modeClassifyBtn'); const res = await page.evaluate(async () => { const c = window.app.modules.classify, copy = window.app.modules.copy; @@ -353,15 +353,12 @@ test('copy: writes the file, skips an identical re-copy, flags a differing targe const out = mockDir(''); const first = await copy.copyTo(out, copy.plan()); - const second = await copy.copyTo(out, copy.plan()); // identical → skipped - const tkey = Object.keys(store)[0]; - store[tkey] = 'DIFFERENT'; // tamper target - const third = await copy.copyTo(out, copy.plan()); // differs → left alone - return { firstCopied: first.copied, secondSkipped: second.skipped, thirdDiffer: third.differ, keys: Object.keys(store) }; + const second = await copy.copyTo(out, copy.plan()); // already present → skipped (resume) + return { firstCopied: first.copied, secondSkipped: second.skipped, secondCopied: second.copied, keys: Object.keys(store) }; }); expect(res.firstCopied).toBe(1); - expect(res.secondSkipped).toBe(1); - expect(res.thirdDiffer).toBe(1); + expect(res.secondSkipped).toBe(1); // re-run resumes: the existing target is skipped + expect(res.secondCopied).toBe(0); // …and not re-written expect(res.keys.some((k) => k.endsWith('ClientCorp/received/2026-03-14_ClientCorp-TRN-0007 (---) - Transmittal/ACME-MECH-0001_A (IFR) - foundation.pdf'))).toBe(true); }); @@ -1029,7 +1026,7 @@ test('Show Partial surfaces files assigned in the other tab only', async ({ page expect(r.withoutPartial).toBe(false); // hidden once Partial is off }); -test('Copy → zip bundles files under ///', async ({ page }) => { +test('copy: PUTs into a server-style handle, then resumes by skipping existing', async ({ page }) => { await page.click('#modeClassifyBtn'); const r = await page.evaluate(async () => { const c = window.app.modules.classify, copy = window.app.modules.copy; @@ -1043,15 +1040,25 @@ test('Copy → zip bundles files under ///', asy const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' }); c.place([c.srcKeyForFile(f)], leaf, 'tracking'); c.place([c.srcKeyForFile(f)], bin, 'transmittal'); - const entries = {}; - window.JSZip = function () { - this.file = (name, data) => { entries[name] = data; }; - this.generateAsync = async () => new Blob(['zip']); - }; - await copy.downloadZip(copy.plan()); - return { paths: Object.keys(entries) }; + + // Server-style handle: getDirectoryHandle never verifies (like the HTTP + // polyfill); getFileHandle does a HEAD-style existence check. + const store = {}, mkdirs = []; + const srvDir = (base) => ({ + getDirectoryHandle: async (n, opts) => { if (opts && opts.create) mkdirs.push(base + n); return srvDir(base + n + '/'); }, + getFileHandle: async (n, opts) => { + const full = base + n; + if ((!opts || !opts.create) && !(full in store)) { const e = new Error('NF'); e.name = 'NotFoundError'; throw e; } + return { createWritable: async () => ({ write: async (d) => { store[full] = d; }, close: async () => {} }) }; + }, + }); + const out = srvDir(''); + const first = await copy.copyTo(out, copy.plan()); + const second = await copy.copyTo(out, copy.plan()); // existing → skipped (resume) + return { firstCopied: first.copied, secondSkipped: second.skipped, paths: Object.keys(store) }; }); - expect(r.paths.length).toBe(1); + expect(r.firstCopied).toBe(1); + expect(r.secondSkipped).toBe(1); expect(r.paths[0].startsWith('ClientCorp/received/')).toBe(true); expect(r.paths[0].endsWith('ACME-MECH-0001_A (IFR) - foundation.pdf')).toBe(true); });