feat(classifier): persist & copy files inside .zip archives
Zip members were live-only: expandable while the source was connected, but the workspace snapshot dropped the archive (.zip became a plain file), so a classification made inside one vanished on reopen — and copy couldn't extract it anyway (it tried to walk the archive path as a real directory). Now zips are first-class: - snapshotTree/loadSnapshot persist the scanned archive subtree — zip-root + virtual folders + members carry isVirtual/zipPath/zipEntryPath, so the tree rebuilds on reopen and assignments inside an archive survive. An archive that was never opened persists as a lazy 'zip' node that reopens on demand. - scanner.ensureZipLoaded(rootHandle, zipPath) reloads an archive from the workspace root when the in-memory cache is cold (post-restore); scanZipNode falls back to it when a restored zip node has no live file object. - copy.js reads a member via scanner.extractZipMember (Blob from the archive) instead of a non-existent file handle; preview.js reloads the archive for a restored member before opening it. This also reconciles export/import with the snapshot: both now keep zip members, so a round-trip no longer leaves dangling in-archive assignments. Tests: zip subtree snapshot round-trip; copy extracts a member to the output (45). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e1c479dba5
commit
203674ee4c
4 changed files with 161 additions and 18 deletions
|
|
@ -90,7 +90,7 @@
|
|||
|
||||
async function sameContent(existingHandle, srcFileObj) {
|
||||
var ef = await existingHandle.getFile();
|
||||
var sf = await (await srcHandle(srcFileObj)).getFile();
|
||||
var sf = await readSource(srcFileObj);
|
||||
if (ef.size !== sf.size) return false;
|
||||
var a = await window.zddc.crypto.sha256File(ef);
|
||||
var b = await window.zddc.crypto.sha256File(sf);
|
||||
|
|
@ -105,6 +105,16 @@
|
|||
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
||||
}
|
||||
|
||||
// Read a source file's bytes (a File or Blob). A zip member is extracted
|
||||
// from its archive (lazily reloaded from the root); a plain file is read
|
||||
// through its resolved handle. The source is never written either way.
|
||||
async function readSource(fileObj) {
|
||||
if (fileObj.isVirtual) {
|
||||
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
|
||||
}
|
||||
return (await srcHandle(fileObj)).getFile();
|
||||
}
|
||||
|
||||
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
|
||||
async function copyOne(out, p) {
|
||||
var dir = await ensureDir(out, p.d.outPath);
|
||||
|
|
@ -113,7 +123,7 @@
|
|||
if (existing) {
|
||||
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
|
||||
}
|
||||
var srcFile = await (await srcHandle(p.file)).getFile(); // READ source (never write it)
|
||||
var srcFile = await readSource(p.file); // READ source (never write it)
|
||||
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
||||
var w = await fh.createWritable();
|
||||
await w.write(srcFile);
|
||||
|
|
|
|||
|
|
@ -526,12 +526,22 @@
|
|||
// permission re-grant) before opening the preview window.
|
||||
async function previewFile(file) {
|
||||
try {
|
||||
if (!file.handle && !file.isVirtual && window.app.rootHandle) {
|
||||
const sc = window.app.modules.scanner;
|
||||
if (file.isVirtual) {
|
||||
// Snapshot-restored zip member — reload its archive from the root.
|
||||
if (window.app.rootHandle && !sc.getZipCache(file.zipPath)) {
|
||||
if (window.app.modules.persist && window.app.modules.persist.verifyPermission) {
|
||||
const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||||
if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; }
|
||||
}
|
||||
await sc.ensureZipLoaded(window.app.rootHandle, file.zipPath);
|
||||
}
|
||||
} else if (!file.handle && window.app.rootHandle) {
|
||||
if (window.app.modules.persist && window.app.modules.persist.verifyPermission) {
|
||||
const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||||
if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; }
|
||||
}
|
||||
await window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, file);
|
||||
await sc.resolveFileHandle(window.app.rootHandle, file);
|
||||
}
|
||||
await openPreviewWindow(file);
|
||||
} catch (e) {
|
||||
|
|
|
|||
|
|
@ -369,11 +369,23 @@
|
|||
// Read a lazy zip node's contents on demand (when opened), building its
|
||||
// child nodes and folding its internal totals into ancestors.
|
||||
async function scanZipNode(node) {
|
||||
if (node.scanState !== 'zip-pending' || !node._zipFileObj) return;
|
||||
if (node.scanState !== 'zip-pending') return;
|
||||
var fileObj = node._zipFileObj;
|
||||
if (!fileObj) {
|
||||
// Restored from a snapshot — no live file object. Resolve the .zip
|
||||
// from the workspace root by its path so it can be opened on demand.
|
||||
if (!window.app.rootHandle || !node.zipPath) return;
|
||||
try {
|
||||
var dir = await resolveDirHandle(window.app.rootHandle, relFromRoot(parentPath(node.zipPath)));
|
||||
fileObj = { handle: await dir.getFileHandle(baseName(node.zipPath)), folderHandle: dir };
|
||||
} catch (e) {
|
||||
reportScanError(node.path, e); node.scanState = 'done'; node.runFiles = 0; node.runDirs = 0; return;
|
||||
}
|
||||
}
|
||||
node.scanState = 'scanning';
|
||||
scheduleRender();
|
||||
try {
|
||||
await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done'
|
||||
await scanZipIntoNode(node, fileObj); // builds children, runFiles/runDirs, sets 'done'
|
||||
} catch (e) {
|
||||
reportScanError(node.path, e);
|
||||
node.scanState = 'done';
|
||||
|
|
@ -754,22 +766,30 @@
|
|||
// ── Workspace snapshot (scan once, resume without re-walking the FS) ────
|
||||
|
||||
// Serialize the completed scan to compact JSON (short keys: large trees).
|
||||
// Zip-root nodes are NOT preserved as expandable folders — the .zip stays a
|
||||
// plain file in its parent (classifying inside archives is out of scope for
|
||||
// a persisted workspace).
|
||||
// Zip subtrees ARE preserved: a scanned archive keeps its virtual folders +
|
||||
// members so classifications inside it survive reopen; copy/preview re-load
|
||||
// the archive lazily from the root (ensureZipLoaded). An archive that was
|
||||
// never opened persists as a lazy 'zip' node that reopens on demand.
|
||||
function snapshotTree() {
|
||||
function serFile(f) { return { o: f.originalFilename, e: f.extension, p: f.folderPath }; }
|
||||
function serFile(f) {
|
||||
var o = { o: f.originalFilename, e: f.extension, p: f.folderPath };
|
||||
if (f.isVirtual) { o.z = f.zipPath; o.ze = f.zipEntryPath; } // zip member
|
||||
return o;
|
||||
}
|
||||
function serNode(n) {
|
||||
var o = { n: n.name, p: n.path };
|
||||
if (n.isZipRoot) o.zr = 1; // archive root (zipPath === n.path)
|
||||
else if (n.isVirtualDir) o.vd = n.zipPath; // folder inside an archive
|
||||
if (n.files && n.files.length) o.f = n.files.map(serFile);
|
||||
var realKids = (n.children || []).filter(function (c) { return !c.isZipRoot; });
|
||||
if (realKids.length) o.c = realKids.map(serNode);
|
||||
if (n.children && n.children.length) o.c = n.children.map(serNode);
|
||||
// Record scan progress so an interrupted scan can resume: 'children'
|
||||
// = direct entries fully read (kids may still be pending); anything
|
||||
// unfinished (pending/scanning/zip) → 'pending' to re-read. 'done'
|
||||
// is the default and omitted.
|
||||
// unfinished → 'pending' to re-read. An unopened archive persists as
|
||||
// 'zip' (reopen lazily, never a real dir re-walk). 'done' is the
|
||||
// default and omitted.
|
||||
var st = n.scanState;
|
||||
if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending';
|
||||
if (n.isZipRoot && st !== 'done') o.s = 'zip';
|
||||
else if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending';
|
||||
return o;
|
||||
}
|
||||
return (window.app.folderTree || []).map(serNode);
|
||||
|
|
@ -780,7 +800,7 @@
|
|||
// workspace root handle at copy/preview time.
|
||||
function loadSnapshot(snap) {
|
||||
function deFile(sf) {
|
||||
return {
|
||||
var fo = {
|
||||
handle: null, folderHandle: null,
|
||||
originalFilename: sf.o, extension: sf.e,
|
||||
size: null, lastModified: null,
|
||||
|
|
@ -788,11 +808,18 @@
|
|||
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null,
|
||||
folderPath: sf.p,
|
||||
};
|
||||
if (sf.z) { fo.isVirtual = true; fo.zipPath = sf.z; fo.zipEntryPath = sf.ze; }
|
||||
return fo;
|
||||
}
|
||||
function deNode(sn, parent) {
|
||||
var node = makeNode({ name: sn.n, kind: 'directory' }, sn.p, parent);
|
||||
var desc = { name: sn.n, kind: 'directory' };
|
||||
if (sn.zr) { desc.isZipRoot = true; desc.zipPath = sn.p; }
|
||||
else if (sn.vd) { desc.isVirtualDir = true; desc.zipPath = sn.vd; }
|
||||
var node = makeNode(desc, sn.p, parent);
|
||||
node.handle = null;
|
||||
node.scanState = sn.s || 'done'; // 'pending'/'children' resume on reconnect
|
||||
if (sn.zr || sn.vd) node.virtualPath = sn.p;
|
||||
// 'zip' restores an unopened archive (reopen lazily); else resume marker.
|
||||
node.scanState = sn.s === 'zip' ? 'zip-pending' : (sn.s || 'done');
|
||||
node.expanded = false;
|
||||
node.files = (sn.f || []).map(deFile);
|
||||
node.children = (sn.c || []).map(function (c) { return deNode(c, node); });
|
||||
|
|
@ -819,6 +846,29 @@
|
|||
|
||||
// ── Lazy handle resolution (snapshot files carry paths, not handles) ────
|
||||
function relFromRoot(p) { var i = (p || '').indexOf('/'); return i < 0 ? '' : p.slice(i + 1); }
|
||||
function parentPath(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? '' : p.slice(0, i); }
|
||||
function baseName(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? p : p.slice(i + 1); }
|
||||
// Load (and cache) a zip archive by its tree path. After a snapshot restore
|
||||
// the in-memory cache is empty, so resolve the .zip from the workspace root
|
||||
// and parse it on demand. Returns the cache record { zip, fileHandle, ... }.
|
||||
async function ensureZipLoaded(rootHandle, zipPath) {
|
||||
var cached = zipCache.get(zipPath);
|
||||
if (cached && cached.zip) return cached;
|
||||
if (!rootHandle) throw new Error('source directory not connected');
|
||||
var dir = await resolveDirHandle(rootHandle, relFromRoot(parentPath(zipPath)));
|
||||
var fh = await dir.getFileHandle(baseName(zipPath));
|
||||
var zip = await JSZip.loadAsync(await (await fh.getFile()).arrayBuffer());
|
||||
var rec = { zip: zip, fileHandle: fh, folderHandle: dir };
|
||||
zipCache.set(zipPath, rec);
|
||||
return rec;
|
||||
}
|
||||
// Read a zip member's bytes as a Blob (lazily loading its archive).
|
||||
async function extractZipMember(rootHandle, fileObj) {
|
||||
var rec = await ensureZipLoaded(rootHandle, fileObj.zipPath);
|
||||
var entry = rec.zip.file(fileObj.zipEntryPath);
|
||||
if (!entry) throw new Error('zip member not found: ' + fileObj.zipEntryPath);
|
||||
return await entry.async('blob');
|
||||
}
|
||||
async function resolveDirHandle(rootHandle, relPath) {
|
||||
var cur = rootHandle;
|
||||
var parts = (relPath || '').split('/').filter(Boolean);
|
||||
|
|
@ -886,6 +936,8 @@
|
|||
loadSnapshot,
|
||||
resolveFileHandle,
|
||||
resolveDirHandle,
|
||||
ensureZipLoaded,
|
||||
extractZipMember,
|
||||
resumeScan
|
||||
};
|
||||
})();
|
||||
|
|
|
|||
|
|
@ -812,3 +812,74 @@ test('search opens only the branch with a hit, leaving siblings collapsed', asyn
|
|||
expect(r.folders).toEqual(['Project', 'Project/Electrical']);
|
||||
expect(r.files).toEqual(['Switchgear Spec.pdf']);
|
||||
});
|
||||
|
||||
test('snapshot: a scanned zip subtree round-trips with its virtual members', async ({ page }) => {
|
||||
const r = await page.evaluate(() => {
|
||||
const sc = window.app.modules.scanner;
|
||||
window.app.folderTree = [{
|
||||
name: 'Root', path: 'Root', scanState: 'done', files: [], children: [{
|
||||
name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, zipPath: 'Root/docs.zip',
|
||||
scanState: 'done', children: [], files: [{
|
||||
originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip',
|
||||
isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf',
|
||||
}],
|
||||
}],
|
||||
}];
|
||||
const json = JSON.stringify(sc.snapshotTree());
|
||||
window.app.folderTree = [];
|
||||
sc.loadSnapshot(JSON.parse(json));
|
||||
const zip = window.app.folderTree[0].children[0];
|
||||
const m = zip.files[0];
|
||||
return {
|
||||
isZipRoot: zip.isZipRoot, zipPath: zip.zipPath, done: zip.scanState === 'done',
|
||||
virtual: m.isVirtual, mZip: m.zipPath, entry: m.zipEntryPath, handleNull: m.handle === null,
|
||||
};
|
||||
});
|
||||
expect(r.isZipRoot).toBe(true); // archive preserved as an expandable folder
|
||||
expect(r.zipPath).toBe('Root/docs.zip');
|
||||
expect(r.done).toBe(true);
|
||||
expect(r.virtual).toBe(true); // member flagged virtual…
|
||||
expect(r.mZip).toBe('Root/docs.zip'); // …with enough to re-extract
|
||||
expect(r.entry).toBe('spec.pdf');
|
||||
expect(r.handleNull).toBe(true);
|
||||
});
|
||||
|
||||
test('copy: a zip member is extracted from its archive and written out', async ({ page }) => {
|
||||
await page.click('#modeClassifyBtn');
|
||||
const res = await page.evaluate(async () => {
|
||||
const c = window.app.modules.classify, copy = window.app.modules.copy;
|
||||
const f = {
|
||||
originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip',
|
||||
isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf', handle: null,
|
||||
};
|
||||
window.app.folderTree = [{ name: 'Root', path: 'Root', files: [], children: [
|
||||
{ name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, files: [f], children: [] },
|
||||
] }];
|
||||
// Stub archive extraction — return the member's bytes as a Blob.
|
||||
window.app.rootHandle = {};
|
||||
window.app.modules.scanner.extractZipMember = async () => new File(['ZIPBYTES'], 'spec.pdf');
|
||||
|
||||
const leaf = c.addTrackingNode(c.addTrackingNode(null, 'ACME-MECH-0001'), 'A (IFR)');
|
||||
const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' });
|
||||
c.place([c.srcKeyForFile(f)], leaf, 'tracking'); c.place([c.srcKeyForFile(f)], bin, 'transmittal');
|
||||
|
||||
const outStore = {};
|
||||
const mkOut = (prefix) => ({
|
||||
name: prefix || 'out',
|
||||
getDirectoryHandle: async (n) => mkOut((prefix ? prefix + '/' : '') + n),
|
||||
getFileHandle: async (n, opts) => {
|
||||
const full = (prefix ? prefix + '/' : '') + n;
|
||||
if (!opts || !opts.create) { if (!(full in outStore)) { const e = new Error('NF'); e.name = 'NotFoundError'; throw e; } }
|
||||
return {
|
||||
getFile: async () => new File([outStore[full] != null ? outStore[full] : ''], n),
|
||||
createWritable: async () => ({ write: async (d) => { outStore[full] = (d && d.text) ? await d.text() : d; }, close: async () => { } }),
|
||||
};
|
||||
},
|
||||
});
|
||||
const s = await copy.copyTo(mkOut(''), copy.plan());
|
||||
return { copied: s.copied, content: Object.values(outStore)[0], wrote: Object.keys(outStore).some((k) => k.endsWith('spec.pdf')) };
|
||||
});
|
||||
expect(res.copied).toBe(1);
|
||||
expect(res.wrote).toBe(true);
|
||||
expect(res.content).toBe('ZIPBYTES');
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue