feat(classifier): persist & copy files inside .zip archives
Zip members were live-only: expandable while the source was connected, but the workspace snapshot dropped the archive (.zip became a plain file), so a classification made inside one vanished on reopen — and copy couldn't extract it anyway (it tried to walk the archive path as a real directory). Now zips are first-class: - snapshotTree/loadSnapshot persist the scanned archive subtree — zip-root + virtual folders + members carry isVirtual/zipPath/zipEntryPath, so the tree rebuilds on reopen and assignments inside an archive survive. An archive that was never opened persists as a lazy 'zip' node that reopens on demand. - scanner.ensureZipLoaded(rootHandle, zipPath) reloads an archive from the workspace root when the in-memory cache is cold (post-restore); scanZipNode falls back to it when a restored zip node has no live file object. - copy.js reads a member via scanner.extractZipMember (Blob from the archive) instead of a non-existent file handle; preview.js reloads the archive for a restored member before opening it. This also reconciles export/import with the snapshot: both now keep zip members, so a round-trip no longer leaves dangling in-archive assignments. Tests: zip subtree snapshot round-trip; copy extracts a member to the output (45). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e1c479dba5
commit
203674ee4c
4 changed files with 161 additions and 18 deletions
|
|
@ -90,7 +90,7 @@
|
||||||
|
|
||||||
async function sameContent(existingHandle, srcFileObj) {
|
async function sameContent(existingHandle, srcFileObj) {
|
||||||
var ef = await existingHandle.getFile();
|
var ef = await existingHandle.getFile();
|
||||||
var sf = await (await srcHandle(srcFileObj)).getFile();
|
var sf = await readSource(srcFileObj);
|
||||||
if (ef.size !== sf.size) return false;
|
if (ef.size !== sf.size) return false;
|
||||||
var a = await window.zddc.crypto.sha256File(ef);
|
var a = await window.zddc.crypto.sha256File(ef);
|
||||||
var b = await window.zddc.crypto.sha256File(sf);
|
var b = await window.zddc.crypto.sha256File(sf);
|
||||||
|
|
@ -105,6 +105,16 @@
|
||||||
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
return window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, fileObj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read a source file's bytes (a File or Blob). A zip member is extracted
|
||||||
|
// from its archive (lazily reloaded from the root); a plain file is read
|
||||||
|
// through its resolved handle. The source is never written either way.
|
||||||
|
async function readSource(fileObj) {
|
||||||
|
if (fileObj.isVirtual) {
|
||||||
|
return window.app.modules.scanner.extractZipMember(window.app.rootHandle, fileObj);
|
||||||
|
}
|
||||||
|
return (await srcHandle(fileObj)).getFile();
|
||||||
|
}
|
||||||
|
|
||||||
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
|
// Copy one file. Returns 'copied' | 'skipped' (identical) | 'differ' (left alone).
|
||||||
async function copyOne(out, p) {
|
async function copyOne(out, p) {
|
||||||
var dir = await ensureDir(out, p.d.outPath);
|
var dir = await ensureDir(out, p.d.outPath);
|
||||||
|
|
@ -113,7 +123,7 @@
|
||||||
if (existing) {
|
if (existing) {
|
||||||
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
|
return (await sameContent(existing, p.file)) ? 'skipped' : 'differ';
|
||||||
}
|
}
|
||||||
var srcFile = await (await srcHandle(p.file)).getFile(); // READ source (never write it)
|
var srcFile = await readSource(p.file); // READ source (never write it)
|
||||||
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
var fh = await dir.getFileHandle(p.d.filename, { create: true });
|
||||||
var w = await fh.createWritable();
|
var w = await fh.createWritable();
|
||||||
await w.write(srcFile);
|
await w.write(srcFile);
|
||||||
|
|
|
||||||
|
|
@ -526,12 +526,22 @@
|
||||||
// permission re-grant) before opening the preview window.
|
// permission re-grant) before opening the preview window.
|
||||||
async function previewFile(file) {
|
async function previewFile(file) {
|
||||||
try {
|
try {
|
||||||
if (!file.handle && !file.isVirtual && window.app.rootHandle) {
|
const sc = window.app.modules.scanner;
|
||||||
|
if (file.isVirtual) {
|
||||||
|
// Snapshot-restored zip member — reload its archive from the root.
|
||||||
|
if (window.app.rootHandle && !sc.getZipCache(file.zipPath)) {
|
||||||
if (window.app.modules.persist && window.app.modules.persist.verifyPermission) {
|
if (window.app.modules.persist && window.app.modules.persist.verifyPermission) {
|
||||||
const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||||||
if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; }
|
if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; }
|
||||||
}
|
}
|
||||||
await window.app.modules.scanner.resolveFileHandle(window.app.rootHandle, file);
|
await sc.ensureZipLoaded(window.app.rootHandle, file.zipPath);
|
||||||
|
}
|
||||||
|
} else if (!file.handle && window.app.rootHandle) {
|
||||||
|
if (window.app.modules.persist && window.app.modules.persist.verifyPermission) {
|
||||||
|
const ok = await window.app.modules.persist.verifyPermission(window.app.rootHandle, false);
|
||||||
|
if (!ok) { if (window.zddc) window.zddc.toast('Permission to read the source directory was denied.', 'error'); return; }
|
||||||
|
}
|
||||||
|
await sc.resolveFileHandle(window.app.rootHandle, file);
|
||||||
}
|
}
|
||||||
await openPreviewWindow(file);
|
await openPreviewWindow(file);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|
|
||||||
|
|
@ -369,11 +369,23 @@
|
||||||
// Read a lazy zip node's contents on demand (when opened), building its
|
// Read a lazy zip node's contents on demand (when opened), building its
|
||||||
// child nodes and folding its internal totals into ancestors.
|
// child nodes and folding its internal totals into ancestors.
|
||||||
async function scanZipNode(node) {
|
async function scanZipNode(node) {
|
||||||
if (node.scanState !== 'zip-pending' || !node._zipFileObj) return;
|
if (node.scanState !== 'zip-pending') return;
|
||||||
|
var fileObj = node._zipFileObj;
|
||||||
|
if (!fileObj) {
|
||||||
|
// Restored from a snapshot — no live file object. Resolve the .zip
|
||||||
|
// from the workspace root by its path so it can be opened on demand.
|
||||||
|
if (!window.app.rootHandle || !node.zipPath) return;
|
||||||
|
try {
|
||||||
|
var dir = await resolveDirHandle(window.app.rootHandle, relFromRoot(parentPath(node.zipPath)));
|
||||||
|
fileObj = { handle: await dir.getFileHandle(baseName(node.zipPath)), folderHandle: dir };
|
||||||
|
} catch (e) {
|
||||||
|
reportScanError(node.path, e); node.scanState = 'done'; node.runFiles = 0; node.runDirs = 0; return;
|
||||||
|
}
|
||||||
|
}
|
||||||
node.scanState = 'scanning';
|
node.scanState = 'scanning';
|
||||||
scheduleRender();
|
scheduleRender();
|
||||||
try {
|
try {
|
||||||
await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done'
|
await scanZipIntoNode(node, fileObj); // builds children, runFiles/runDirs, sets 'done'
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
reportScanError(node.path, e);
|
reportScanError(node.path, e);
|
||||||
node.scanState = 'done';
|
node.scanState = 'done';
|
||||||
|
|
@ -754,22 +766,30 @@
|
||||||
// ── Workspace snapshot (scan once, resume without re-walking the FS) ────
|
// ── Workspace snapshot (scan once, resume without re-walking the FS) ────
|
||||||
|
|
||||||
// Serialize the completed scan to compact JSON (short keys: large trees).
|
// Serialize the completed scan to compact JSON (short keys: large trees).
|
||||||
// Zip-root nodes are NOT preserved as expandable folders — the .zip stays a
|
// Zip subtrees ARE preserved: a scanned archive keeps its virtual folders +
|
||||||
// plain file in its parent (classifying inside archives is out of scope for
|
// members so classifications inside it survive reopen; copy/preview re-load
|
||||||
// a persisted workspace).
|
// the archive lazily from the root (ensureZipLoaded). An archive that was
|
||||||
|
// never opened persists as a lazy 'zip' node that reopens on demand.
|
||||||
function snapshotTree() {
|
function snapshotTree() {
|
||||||
function serFile(f) { return { o: f.originalFilename, e: f.extension, p: f.folderPath }; }
|
function serFile(f) {
|
||||||
|
var o = { o: f.originalFilename, e: f.extension, p: f.folderPath };
|
||||||
|
if (f.isVirtual) { o.z = f.zipPath; o.ze = f.zipEntryPath; } // zip member
|
||||||
|
return o;
|
||||||
|
}
|
||||||
function serNode(n) {
|
function serNode(n) {
|
||||||
var o = { n: n.name, p: n.path };
|
var o = { n: n.name, p: n.path };
|
||||||
|
if (n.isZipRoot) o.zr = 1; // archive root (zipPath === n.path)
|
||||||
|
else if (n.isVirtualDir) o.vd = n.zipPath; // folder inside an archive
|
||||||
if (n.files && n.files.length) o.f = n.files.map(serFile);
|
if (n.files && n.files.length) o.f = n.files.map(serFile);
|
||||||
var realKids = (n.children || []).filter(function (c) { return !c.isZipRoot; });
|
if (n.children && n.children.length) o.c = n.children.map(serNode);
|
||||||
if (realKids.length) o.c = realKids.map(serNode);
|
|
||||||
// Record scan progress so an interrupted scan can resume: 'children'
|
// Record scan progress so an interrupted scan can resume: 'children'
|
||||||
// = direct entries fully read (kids may still be pending); anything
|
// = direct entries fully read (kids may still be pending); anything
|
||||||
// unfinished (pending/scanning/zip) → 'pending' to re-read. 'done'
|
// unfinished → 'pending' to re-read. An unopened archive persists as
|
||||||
// is the default and omitted.
|
// 'zip' (reopen lazily, never a real dir re-walk). 'done' is the
|
||||||
|
// default and omitted.
|
||||||
var st = n.scanState;
|
var st = n.scanState;
|
||||||
if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending';
|
if (n.isZipRoot && st !== 'done') o.s = 'zip';
|
||||||
|
else if (st && st !== 'done') o.s = (st === 'children') ? 'children' : 'pending';
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
return (window.app.folderTree || []).map(serNode);
|
return (window.app.folderTree || []).map(serNode);
|
||||||
|
|
@ -780,7 +800,7 @@
|
||||||
// workspace root handle at copy/preview time.
|
// workspace root handle at copy/preview time.
|
||||||
function loadSnapshot(snap) {
|
function loadSnapshot(snap) {
|
||||||
function deFile(sf) {
|
function deFile(sf) {
|
||||||
return {
|
var fo = {
|
||||||
handle: null, folderHandle: null,
|
handle: null, folderHandle: null,
|
||||||
originalFilename: sf.o, extension: sf.e,
|
originalFilename: sf.o, extension: sf.e,
|
||||||
size: null, lastModified: null,
|
size: null, lastModified: null,
|
||||||
|
|
@ -788,11 +808,18 @@
|
||||||
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null,
|
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null,
|
||||||
folderPath: sf.p,
|
folderPath: sf.p,
|
||||||
};
|
};
|
||||||
|
if (sf.z) { fo.isVirtual = true; fo.zipPath = sf.z; fo.zipEntryPath = sf.ze; }
|
||||||
|
return fo;
|
||||||
}
|
}
|
||||||
function deNode(sn, parent) {
|
function deNode(sn, parent) {
|
||||||
var node = makeNode({ name: sn.n, kind: 'directory' }, sn.p, parent);
|
var desc = { name: sn.n, kind: 'directory' };
|
||||||
|
if (sn.zr) { desc.isZipRoot = true; desc.zipPath = sn.p; }
|
||||||
|
else if (sn.vd) { desc.isVirtualDir = true; desc.zipPath = sn.vd; }
|
||||||
|
var node = makeNode(desc, sn.p, parent);
|
||||||
node.handle = null;
|
node.handle = null;
|
||||||
node.scanState = sn.s || 'done'; // 'pending'/'children' resume on reconnect
|
if (sn.zr || sn.vd) node.virtualPath = sn.p;
|
||||||
|
// 'zip' restores an unopened archive (reopen lazily); else resume marker.
|
||||||
|
node.scanState = sn.s === 'zip' ? 'zip-pending' : (sn.s || 'done');
|
||||||
node.expanded = false;
|
node.expanded = false;
|
||||||
node.files = (sn.f || []).map(deFile);
|
node.files = (sn.f || []).map(deFile);
|
||||||
node.children = (sn.c || []).map(function (c) { return deNode(c, node); });
|
node.children = (sn.c || []).map(function (c) { return deNode(c, node); });
|
||||||
|
|
@ -819,6 +846,29 @@
|
||||||
|
|
||||||
// ── Lazy handle resolution (snapshot files carry paths, not handles) ────
|
// ── Lazy handle resolution (snapshot files carry paths, not handles) ────
|
||||||
function relFromRoot(p) { var i = (p || '').indexOf('/'); return i < 0 ? '' : p.slice(i + 1); }
|
function relFromRoot(p) { var i = (p || '').indexOf('/'); return i < 0 ? '' : p.slice(i + 1); }
|
||||||
|
function parentPath(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? '' : p.slice(0, i); }
|
||||||
|
function baseName(p) { var i = (p || '').lastIndexOf('/'); return i < 0 ? p : p.slice(i + 1); }
|
||||||
|
// Load (and cache) a zip archive by its tree path. After a snapshot restore
|
||||||
|
// the in-memory cache is empty, so resolve the .zip from the workspace root
|
||||||
|
// and parse it on demand. Returns the cache record { zip, fileHandle, ... }.
|
||||||
|
async function ensureZipLoaded(rootHandle, zipPath) {
|
||||||
|
var cached = zipCache.get(zipPath);
|
||||||
|
if (cached && cached.zip) return cached;
|
||||||
|
if (!rootHandle) throw new Error('source directory not connected');
|
||||||
|
var dir = await resolveDirHandle(rootHandle, relFromRoot(parentPath(zipPath)));
|
||||||
|
var fh = await dir.getFileHandle(baseName(zipPath));
|
||||||
|
var zip = await JSZip.loadAsync(await (await fh.getFile()).arrayBuffer());
|
||||||
|
var rec = { zip: zip, fileHandle: fh, folderHandle: dir };
|
||||||
|
zipCache.set(zipPath, rec);
|
||||||
|
return rec;
|
||||||
|
}
|
||||||
|
// Read a zip member's bytes as a Blob (lazily loading its archive).
|
||||||
|
async function extractZipMember(rootHandle, fileObj) {
|
||||||
|
var rec = await ensureZipLoaded(rootHandle, fileObj.zipPath);
|
||||||
|
var entry = rec.zip.file(fileObj.zipEntryPath);
|
||||||
|
if (!entry) throw new Error('zip member not found: ' + fileObj.zipEntryPath);
|
||||||
|
return await entry.async('blob');
|
||||||
|
}
|
||||||
async function resolveDirHandle(rootHandle, relPath) {
|
async function resolveDirHandle(rootHandle, relPath) {
|
||||||
var cur = rootHandle;
|
var cur = rootHandle;
|
||||||
var parts = (relPath || '').split('/').filter(Boolean);
|
var parts = (relPath || '').split('/').filter(Boolean);
|
||||||
|
|
@ -886,6 +936,8 @@
|
||||||
loadSnapshot,
|
loadSnapshot,
|
||||||
resolveFileHandle,
|
resolveFileHandle,
|
||||||
resolveDirHandle,
|
resolveDirHandle,
|
||||||
|
ensureZipLoaded,
|
||||||
|
extractZipMember,
|
||||||
resumeScan
|
resumeScan
|
||||||
};
|
};
|
||||||
})();
|
})();
|
||||||
|
|
|
||||||
|
|
@ -812,3 +812,74 @@ test('search opens only the branch with a hit, leaving siblings collapsed', asyn
|
||||||
expect(r.folders).toEqual(['Project', 'Project/Electrical']);
|
expect(r.folders).toEqual(['Project', 'Project/Electrical']);
|
||||||
expect(r.files).toEqual(['Switchgear Spec.pdf']);
|
expect(r.files).toEqual(['Switchgear Spec.pdf']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('snapshot: a scanned zip subtree round-trips with its virtual members', async ({ page }) => {
|
||||||
|
const r = await page.evaluate(() => {
|
||||||
|
const sc = window.app.modules.scanner;
|
||||||
|
window.app.folderTree = [{
|
||||||
|
name: 'Root', path: 'Root', scanState: 'done', files: [], children: [{
|
||||||
|
name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, zipPath: 'Root/docs.zip',
|
||||||
|
scanState: 'done', children: [], files: [{
|
||||||
|
originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip',
|
||||||
|
isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf',
|
||||||
|
}],
|
||||||
|
}],
|
||||||
|
}];
|
||||||
|
const json = JSON.stringify(sc.snapshotTree());
|
||||||
|
window.app.folderTree = [];
|
||||||
|
sc.loadSnapshot(JSON.parse(json));
|
||||||
|
const zip = window.app.folderTree[0].children[0];
|
||||||
|
const m = zip.files[0];
|
||||||
|
return {
|
||||||
|
isZipRoot: zip.isZipRoot, zipPath: zip.zipPath, done: zip.scanState === 'done',
|
||||||
|
virtual: m.isVirtual, mZip: m.zipPath, entry: m.zipEntryPath, handleNull: m.handle === null,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
expect(r.isZipRoot).toBe(true); // archive preserved as an expandable folder
|
||||||
|
expect(r.zipPath).toBe('Root/docs.zip');
|
||||||
|
expect(r.done).toBe(true);
|
||||||
|
expect(r.virtual).toBe(true); // member flagged virtual…
|
||||||
|
expect(r.mZip).toBe('Root/docs.zip'); // …with enough to re-extract
|
||||||
|
expect(r.entry).toBe('spec.pdf');
|
||||||
|
expect(r.handleNull).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('copy: a zip member is extracted from its archive and written out', async ({ page }) => {
|
||||||
|
await page.click('#modeClassifyBtn');
|
||||||
|
const res = await page.evaluate(async () => {
|
||||||
|
const c = window.app.modules.classify, copy = window.app.modules.copy;
|
||||||
|
const f = {
|
||||||
|
originalFilename: 'spec', extension: 'pdf', folderPath: 'Root/docs.zip',
|
||||||
|
isVirtual: true, zipPath: 'Root/docs.zip', zipEntryPath: 'spec.pdf', handle: null,
|
||||||
|
};
|
||||||
|
window.app.folderTree = [{ name: 'Root', path: 'Root', files: [], children: [
|
||||||
|
{ name: 'docs.zip', path: 'Root/docs.zip', isZipRoot: true, files: [f], children: [] },
|
||||||
|
] }];
|
||||||
|
// Stub archive extraction — return the member's bytes as a Blob.
|
||||||
|
window.app.rootHandle = {};
|
||||||
|
window.app.modules.scanner.extractZipMember = async () => new File(['ZIPBYTES'], 'spec.pdf');
|
||||||
|
|
||||||
|
const leaf = c.addTrackingNode(c.addTrackingNode(null, 'ACME-MECH-0001'), 'A (IFR)');
|
||||||
|
const bin = c.addTransmittalBin(c.addParty('ClientCorp'), 'received', { date: '2026-03-14', type: 'TRN', seq: '0007' });
|
||||||
|
c.place([c.srcKeyForFile(f)], leaf, 'tracking'); c.place([c.srcKeyForFile(f)], bin, 'transmittal');
|
||||||
|
|
||||||
|
const outStore = {};
|
||||||
|
const mkOut = (prefix) => ({
|
||||||
|
name: prefix || 'out',
|
||||||
|
getDirectoryHandle: async (n) => mkOut((prefix ? prefix + '/' : '') + n),
|
||||||
|
getFileHandle: async (n, opts) => {
|
||||||
|
const full = (prefix ? prefix + '/' : '') + n;
|
||||||
|
if (!opts || !opts.create) { if (!(full in outStore)) { const e = new Error('NF'); e.name = 'NotFoundError'; throw e; } }
|
||||||
|
return {
|
||||||
|
getFile: async () => new File([outStore[full] != null ? outStore[full] : ''], n),
|
||||||
|
createWritable: async () => ({ write: async (d) => { outStore[full] = (d && d.text) ? await d.text() : d; }, close: async () => { } }),
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const s = await copy.copyTo(mkOut(''), copy.plan());
|
||||||
|
return { copied: s.copied, content: Object.values(outStore)[0], wrote: Object.keys(outStore).some((k) => k.endsWith('spec.pdf')) };
|
||||||
|
});
|
||||||
|
expect(res.copied).toBe(1);
|
||||||
|
expect(res.wrote).toBe(true);
|
||||||
|
expect(res.content).toBe('ZIPBYTES');
|
||||||
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue