From 203674ee4ce3d9651ee104b9f3af9f7fd378c234 Mon Sep 17 00:00:00 2001 From: ZDDC Date: Wed, 10 Jun 2026 13:27:00 -0500 Subject: [PATCH] feat(classifier): persist & copy files inside .zip archives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zip members were live-only: expandable while the source was connected, but the workspace snapshot dropped the archive (.zip became a plain file), so a classification made inside one vanished on reopen — and copy couldn't extract it anyway (it tried to walk the archive path as a real directory). Now zips are first-class: - snapshotTree/loadSnapshot persist the scanned archive subtree — zip-root + virtual folders + members carry isVirtual/zipPath/zipEntryPath, so the tree rebuilds on reopen and assignments inside an archive survive. An archive that was never opened persists as a lazy 'zip' node that reopens on demand. - scanner.ensureZipLoaded(rootHandle, zipPath) reloads an archive from the workspace root when the in-memory cache is cold (post-restore); scanZipNode falls back to it when a restored zip node has no live file object. - copy.js reads a member via scanner.extractZipMember (Blob from the archive) instead of a non-existent file handle; preview.js reloads the archive for a restored member before opening it. This also reconciles export/import with the snapshot: both now keep zip members, so a round-trip no longer leaves dangling in-archive assignments. Tests: zip subtree snapshot round-trip; copy extracts a member to the output (45). Co-Authored-By: Claude Opus 4.8 (1M context) --- classifier/js/copy.js | 14 ++++++- classifier/js/preview.js | 14 ++++++- classifier/js/scanner.js | 80 +++++++++++++++++++++++++++++++++------- tests/classify.spec.js | 71 +++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 18 deletions(-) diff --git a/classifier/js/copy.js b/classifier/js/copy.js index 7331589..ee72892 100644 --- a/classifier/js/copy.js +++ b/classifier/js/copy.js @@ -90,7 +90,7 @@ async function sameContent(existingHandle, srcFileObj) { var ef = await existingHandle.getFile(); - var sf = await (await srcHandle(srcFileObj)).getFile(); + var sf = await readSource(srcFileObj); if (ef.size !== sf.size) return false; var a = await window.zddc.crypto.sha256File(ef); var b = await window.zddc.crypto.sha256File(sf); @@ -105,6 +105,16 @@ return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj); } + // Read a source file's bytes (a File or Blob). A zip member is extracted + // from its archive (lazily reloaded from the root); a plain file is read + // through its resolved handle. The source is never written either way. + async function readSource(fileObj) { + if (fileObj.isVirtual) { + return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj); + } + return (await srcHandle(fileObj)).getFile(); + } + // Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone). async function copyOne(out, p) { var dir = await ensureDir(out, p.d.outPath); @@ -113,7 +123,7 @@ if (existing) { return (await sameContent(existing, p.file)) ? 'skipped' : 'differ'; } - var srcFile = await (await srcHandle(p.file)).getFile(); // READ source (never write it) + var srcFile = await readSource(p.file); // READ source (never write it) var fh = await dir.getFileHandle(p.d.filename, { create: true }); var w = await fh.createWritable(); await w.write(srcFile); diff --git a/classifier/js/preview.js b/classifier/js/preview.js index 15d5ea3..4a39145 100644 --- a/classifier/js/preview.js +++ b/classifier/js/preview.js @@ -526,12 +526,22 @@ // permission re-grant) before opening the preview window. async function previewFile(file) { try { - if (!file.handle && !file.isVirtual && window.app.rootHandle) { + const sc = window.app.modules.scanner; + if (file.isVirtual) { + // Snapshot-restored zip member — reload its archive from the root. + if (window.app.rootHandle && !sc.getZipCache(file.zipPath)) { + if (window.app.modules.persist && window.app.modules.persist.verifyPermission) { + const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false); + if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; } + } + await sc.ensureZipLoaded(window.app.rootHandle, file.zipPath); + } + } else if (!file.handle && window.app.rootHandle) { if (window.app.modules.persist && window.app.modules.persist.verifyPermission) { const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false); if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; } } - await window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, file); + await sc.resolveFileHandle(window.app.rootHandle, file); } await openPreviewWindow(file); } catch (e) { diff --git a/classifier/js/scanner.js b/classifier/js/scanner.js index 7ab7dcd..746b7d8 100644 --- a/classifier/js/scanner.js +++ b/classifier/js/scanner.js @@ -369,11 +369,23 @@ // Read a lazy zip node's contents on demand (when opened), building its // child nodes and folding its internal totals into ancestors. async function scanZipNode(node) { - if (node.scanState !== 'zip-pending' || !node._zipFileObj) return; + if (node.scanState !== 'zip-pending') return; + var fileObj = node._zipFileObj; + if (!fileObj) { + // Restored from a snapshot — no live file object. Resolve the .zip + // from the workspace root by its path so it can be opened on demand. + if (!window.app.rootHandle || !node.zipPath) return; + try { + var dir = await resolveDirHandle(window.app.rootHandle, relFromRoot(parentPath(node.zipPath))); + fileObj = { handle: await dir.getFileHandle(baseName(node.zipPath)), folderHandle: dir }; + } catch (e) { + reportScanError(node.path, e); node.scanState = 'done'; node.runFiles = 0; node.runDirs = 0; return; + } + } node.scanState = 'scanning'; scheduleRender(); try { - await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done' + await scanZipIntoNode(node, fileObj); // builds children, runFiles/runDirs, sets 'done' } catch (e) { reportScanError(node.path, e); node.scanState = 'done'; @@ -754,22 +766,30 @@ // ── Workspace snapshot (scan once, resume without re-walking the FS) ──── // Serialize the completed scan to compact JSON (short keys: large trees). - // Zip-root nodes are NOT preserved as expandable folders — the .zip stays a - // plain file in its parent (classifying inside archives is out of scope for - // a persisted workspace). + // Zip subtrees ARE preserved: a scanned archive keeps its virtual folders + + // members so classifications inside it survive reopen; copy/preview re-load + // the archive lazily from the root (ensureZipLoaded). An archive that was + // never opened persists as a lazy 'zip' node that reopens on demand. function snapshotTree() { - function serFile(f) { return { o: f.originalFilename, e: f.extension, p: f.folderPath }; } + function serFile(f) { + var o = { o: f.originalFilename, e: f.extension, p: f.folderPath }; + if (f.isVirtual) { o.z = f.zipPath; o.ze = f.zipEntryPath; } // zip member + return o; + } function serNode(n) { var o = { n: n.name, p: n.path }; + if (n.isZipRoot) o.zr = 1; // archive root (zipPath === n.path) + else if (n.isVirtualDir) o.vd = n.zipPath; // folder inside an archive if (n.files && n.files.length) o.f = n.files.map(serFile); - var realKids = (n.children || []).filter(function (c) { return !c.isZipRoot; }); - if (realKids.length) o.c = realKids.map(serNode); + if (n.children && n.children.length) o.c = n.children.map(serNode); // Record scan progress so an interrupted scan can resume: 'children' // = direct entries fully read (kids may still be pending); anything - // unfinished (pending/scanning/zip) → 'pending' to re-read. 'done' - // is the default and omitted. + // unfinished → 'pending' to re-read. An unopened archive persists as + // 'zip' (reopen lazily, never a real dir re-walk). 'done' is the + // default and omitted. var st = n.scanState; - if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending'; + if (n.isZipRoot && st !== 'done') o.s = 'zip'; + else if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending'; return o; } return (window.app.folderTree || []).map(serNode); @@ -780,7 +800,7 @@ // workspace root handle at copy/preview time. function loadSnapshot(snap) { function deFile(sf) { - return { + var fo = { handle: null, folderHandle: null, originalFilename: sf.o, extension: sf.e, size: null, lastModified: null, @@ -788,11 +808,18 @@ isDirty: false, error: false, errorMessage: '', validation: null, sha256: null, folderPath: sf.p, }; + if (sf.z) { fo.isVirtual = true; fo.zipPath = sf.z; fo.zipEntryPath = sf.ze; } + return fo; } function deNode(sn, parent) { - var node = makeNode({ name: sn.n, kind: 'directory' }, sn.p, parent); + var desc = { name: sn.n, kind: 'directory' }; + if (sn.zr) { desc.isZipRoot = true; desc.zipPath = sn.p; } + else if (sn.vd) { desc.isVirtualDir = true; desc.zipPath = sn.vd; } + var node = makeNode(desc, sn.p, parent); node.handle = null; - node.scanState = sn.s || 'done'; // 'pending'/'children' resume on reconnect + if (sn.zr || sn.vd) node.virtualPath = sn.p; + // 'zip' restores an unopened archive (reopen lazily); else resume marker. + node.scanState = sn.s === 'zip' ? 'zip-pending' : (sn.s || 'done'); node.expanded = false; node.files = (sn.f || []).map(deFile); node.children = (sn.c || []).map(function (c) { return deNode(c, node); }); @@ -819,6 +846,29 @@ // ── Lazy handle resolution (snapshot files carry paths, not handles) ──── function relFromRoot(p) { var i = (p || '').indexOf('/'); return i < 0 ? '' : p.slice(i + 1); } + function parentPath(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? '' : p.slice(0, i); } + function baseName(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? p : p.slice(i + 1); } + // Load (and cache) a zip archive by its tree path. After a snapshot restore + // the in-memory cache is empty, so resolve the .zip from the workspace root + // and parse it on demand. Returns the cache record { zip, fileHandle, ... }. + async function ensureZipLoaded(rootHandle, zipPath) { + var cached = zipCache.get(zipPath); + if (cached && cached.zip) return cached; + if (!rootHandle) throw new Error('source directory not connected'); + var dir = await resolveDirHandle(rootHandle, relFromRoot(parentPath(zipPath))); + var fh = await dir.getFileHandle(baseName(zipPath)); + var zip = await JSZip.loadAsync(await (await fh.getFile()).arrayBuffer()); + var rec = { zip: zip, fileHandle: fh, folderHandle: dir }; + zipCache.set(zipPath, rec); + return rec; + } + // Read a zip member's bytes as a Blob (lazily loading its archive). + async function extractZipMember(rootHandle, fileObj) { + var rec = await ensureZipLoaded(rootHandle, fileObj.zipPath); + var entry = rec.zip.file(fileObj.zipEntryPath); + if (!entry) throw new Error('zip member not found: ' + fileObj.zipEntryPath); + return await entry.async('blob'); + } async function resolveDirHandle(rootHandle, relPath) { var cur = rootHandle; var parts = (relPath || '').split('/').filter(Boolean); @@ -886,6 +936,8 @@ loadSnapshot, resolveFileHandle, resolveDirHandle, + ensureZipLoaded, + extractZipMember, resumeScan }; })(); diff --git a/tests/classify.spec.js b/tests/classify.spec.js index cec583c..f0c5e40 100644 --- a/tests/classify.spec.js +++ b/tests/classify.spec.js @@ -812,3 +812,74 @@ test('search opens only the branch with a hit, leaving siblings collapsed', asyn expect(r.folders).toEqual(['Project', 'Project/Electrical']); expect(r.files).toEqual(['Switchgear Spec.pdf']); }); + +test('snapshot: a scanned zip subtree round-trips with its virtual members', async ({ page }) => { + const r = await page.evaluate(() => { + const sc = window.app.modules.scanner; + window.app.folderTree = [{ + name: 'Root', path: 'Root', scanState: 'done', files: [], children: [{ + name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, zipPath: 'Root/docs.zip', + scanState: 'done', children: [], files: [{ + originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip', + isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf', + }], + }], + }]; + const json = JSON.stringify(sc.snapshotTree()); + window.app.folderTree = []; + sc.loadSnapshot(JSON.parse(json)); + const zip = window.app.folderTree[0].children[0]; + const m = zip.files[0]; + return { + isZipRoot: zip.isZipRoot, zipPath: zip.zipPath, done: zip.scanState === 'done', + virtual: m.isVirtual, mZip: m.zipPath, entry: m.zipEntryPath, handleNull: m.handle === null, + }; + }); + expect(r.isZipRoot).toBe(true); // archive preserved as an expandable folder + expect(r.zipPath).toBe('Root/docs.zip'); + expect(r.done).toBe(true); + expect(r.virtual).toBe(true); // member flagged virtual… + expect(r.mZip).toBe('Root/docs.zip'); // …with enough to re-extract + expect(r.entry).toBe('spec.pdf'); + expect(r.handleNull).toBe(true); +}); + +test('copy: a zip member is extracted from its archive and written out', async ({ page }) => { + await page.click('#modeClassifyBtn'); + const res = await page.evaluate(async () => { + const c = window.app.modules.classify, copy = window.app.modules.copy; + const f = { + originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip', + isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf', handle: null, + }; + window.app.folderTree = [{ name: 'Root', path: 'Root', files: [], children: [ + { name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, files: [f], children: [] }, + ] }]; + // Stub archive extraction — return the member's bytes as a Blob. + window.app.rootHandle = {}; + window.app.modules.scanner.extractZipMember = async () => new File(['ZIPBYTES'], 'spec.pdf'); + + const leaf = c.addTrackingNode(c.addTrackingNode(null, 'ACME-MECH-0001'), 'A (IFR)'); + const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' }); + c.place([c.srcKeyForFile(f)], leaf, 'tracking'); c.place([c.srcKeyForFile(f)], bin, 'transmittal'); + + const outStore = {}; + const mkOut = (prefix) => ({ + name: prefix || 'out', + getDirectoryHandle: async (n) => mkOut((prefix ? prefix + '/' : '') + n), + getFileHandle: async (n, opts) => { + const full = (prefix ? prefix + '/' : '') + n; + if (!opts || !opts.create) { if (!(full in outStore)) { const e = new Error('NF'); e.name = 'NotFoundError'; throw e; } } + return { + getFile: async () => new File([outStore[full] != null ? outStore[full] : ''], n), + createWritable: async () => ({ write: async (d) => { outStore[full] = (d && d.text) ? await d.text() : d; }, close: async () => { } }), + }; + }, + }); + const s = await copy.copyTo(mkOut(''), copy.plan()); + return { copied: s.copied, content: Object.values(outStore)[0], wrote: Object.keys(outStore).some((k) => k.endsWith('spec.pdf')) }; + }); + expect(res.copied).toBe(1); + expect(res.wrote).toBe(true); + expect(res.content).toBe('ZIPBYTES'); +});