perf(classifier): scan is a pure listing — no getFile() per file; lazy zips

The scan was slow because it OPENED every file (getFile() for size/lastModified
— which the grid doesn't even display) and read every ZIP inline. On a network
share that's a round-trip per file. Now:

- createFileObject builds rows from the directory entry name alone, no
  getFile(); size/lastModified load on demand (preview/SHA/rename already call
  getFile() themselves). The scan is now a pure directory listing.
- ZIPs are lazy: a .zip is an expandable node read only when opened
  (scanZipNode), not during the walk.
- Footer shows live elapsed time (ticks every second), and a success toast
  fires at completion with totals: "Scan complete — N folders, M files in Ts."

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-09 10:55:29 -05:00
parent cb1456e55f
commit caff489206
2 changed files with 99 additions and 51 deletions

View file

@ -32,18 +32,27 @@
updateScanStatus(); updateScanStatus();
} }
// Render the running scan status into the tree-pane header. // elapsed since the scan started, e.g. "3.2s" or "1m 04s".
function elapsedStr() {
if (!scanStats) return '0s';
const ms = Date.now() - scanStats.startedAt;
if (ms < 60000) return (ms / 1000).toFixed(1) + 's';
const m = Math.floor(ms / 60000);
const s = Math.round((ms % 60000) / 1000);
return m + 'm ' + (s < 10 ? '0' : '') + s + 's';
}
// Render the running scan status (with live elapsed time) into the footer.
function updateScanStatus() { function updateScanStatus() {
const el = document.getElementById('scanStatus'); const el = document.getElementById('scanStatus');
if (!el || !scanStats) return; if (!el || !scanStats) return;
if (scanStats.done) { if (scanStats.done) {
const secs = ((Date.now() - scanStats.startedAt) / 1000).toFixed(1);
el.textContent = 'Scanned ' + scanStats.folders + ' folders · ' el.textContent = 'Scanned ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files in ' + secs + 's'; + scanStats.files + ' files in ' + elapsedStr();
el.classList.remove('scanning'); el.classList.remove('scanning');
} else { } else {
el.textContent = 'Scanning… ' + scanStats.folders + ' folders · ' el.textContent = 'Scanning… ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files' + scanStats.files + ' files · ' + elapsedStr()
+ (scanStats.current ? ' — ' + scanStats.current : ''); + (scanStats.current ? ' — ' + scanStats.current : '');
el.classList.add('scanning'); el.classList.add('scanning');
} }
@ -139,6 +148,13 @@
} }
flushRender(); flushRender();
// Tick the footer's elapsed time once a second even if no new folder
// landed (so a slow directory doesn't make the timer look frozen).
const ticker = setInterval(function () {
if (myGen !== scanGen || (scanStats && scanStats.done)) { clearInterval(ticker); return; }
updateScanStatus();
}, 1000);
// Breadth-first by level behind a bounded worker pool: level 1, then // Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first). // level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so // Deeper levels keep filling in; workers await between directories so
@ -155,11 +171,20 @@
} }
level = next; level = next;
} }
clearInterval(ticker);
if (myGen !== scanGen) return; // superseded by a newer scan if (myGen !== scanGen) return; // superseded by a newer scan
scanStats.done = true; scanStats.done = true;
scanStats.current = ''; scanStats.current = '';
flushRender(); flushRender();
// Completion toast with the totals + elapsed time.
if (window.zddc && typeof window.zddc.toast === 'function') {
window.zddc.toast(
'Scan complete — ' + scanStats.folders + ' folders, '
+ scanStats.files + ' files in ' + elapsedStr() + '.',
'success');
}
} }
// Run fn over items with at most `limit` concurrent calls; resolves when // Run fn over items with at most `limit` concurrent calls; resolves when
@ -229,6 +254,8 @@
// only a 'pending' node is scanned, so concurrent callers (background + // only a 'pending' node is scanned, so concurrent callers (background +
// open-prioritised) don't double-scan. // open-prioritised) don't double-scan.
async function scanNodeChildren(node, myGen) { async function scanNodeChildren(node, myGen) {
// A .zip is a lazy node — read its contents only when opened.
if (node.scanState === 'zip-pending') { await scanZipNode(node); return; }
if (node.scanState !== 'pending') return; if (node.scanState !== 'pending') return;
node.scanState = 'scanning'; node.scanState = 'scanning';
if (scanStats) scanStats.current = node.path; if (scanStats) scanStats.current = node.path;
@ -238,18 +265,19 @@
for await (const entry of node.handle.values()) { for await (const entry of node.handle.values()) {
if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled
if (entry.kind === 'file') { if (entry.kind === 'file') {
const fo = await createFileObject(entry, node.handle); const fo = createFileObject(entry, node.handle);
if (!fo) continue;
fo.folderPath = node.path; fo.folderPath = node.path;
files.push(fo); files.push(fo);
if (scanStats) scanStats.files++; if (scanStats) scanStats.files++;
if (fo.extension === 'zip' && typeof JSZip !== 'undefined') { if (fo.extension === 'zip' && typeof JSZip !== 'undefined') {
// Don't read the archive during the listing — make an
// expandable, lazy zip node scanned on open (scanZipNode).
const zipName = zddc.joinExtension(fo.originalFilename, fo.extension); const zipName = zddc.joinExtension(fo.originalFilename, fo.extension);
const zipPath = node.path + '/' + zipName; const zipPath = node.path + '/' + zipName;
const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath }; const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath };
const zipNode = makeNode(zh, zipPath, node); const zipNode = makeNode(zh, zipPath, node);
try { await scanZipIntoNode(zipNode, fo); } zipNode._zipFileObj = fo;
catch (e) { reportScanError(zipPath, e); zipNode.scanState = 'done'; } zipNode.scanState = 'zip-pending';
childDirs.push(zipNode); childDirs.push(zipNode);
if (scanStats) scanStats.folders++; if (scanStats) scanStats.folders++;
} }
@ -267,18 +295,15 @@
node.fileCount = files.length; node.fileCount = files.length;
node.children = childDirs; node.children = childDirs;
node.subdirCount = childDirs.length; node.subdirCount = childDirs.length;
// Roll this folder's own files/dirs (plus the full contents of any // Roll this folder's own files/dirs into the running subtree totals of
// inline-zip children) into the running subtree totals of this node // this node + every ancestor. Real child dirs add their share when they
// and every ancestor. Regular child dirs add their own share when they // get scanned; lazy zip nodes add theirs when opened (scanZipNode).
// get scanned — that's how the total fills in progressively. const addF = files.length;
let addF = files.length; const addD = childDirs.length;
let addD = childDirs.length;
for (const c of childDirs) {
if (c.scanState === 'done') { addF += c.runFiles; addD += c.runDirs; }
}
for (let a = node; a; a = a.parent) { a.runFiles += addF; a.runDirs += addD; } for (let a = node; a; a = a.parent) { a.runFiles += addF; a.runDirs += addD; }
// Zip children are scanned inline ('done'); real dirs are still pending. // Only real unscanned dirs hold the parent open; zip-pending children
node.pending = childDirs.filter(function (c) { return c.scanState !== 'done'; }).length; // are lazy, so they don't.
node.pending = childDirs.filter(function (c) { return c.scanState === 'pending'; }).length;
if (node.pending === 0) { if (node.pending === 0) {
markDone(node); markDone(node);
} else { } else {
@ -287,6 +312,30 @@
scheduleRender(); scheduleRender();
} }
// Read a lazy zip node's contents on demand (when opened), building its
// child nodes and folding its internal totals into ancestors.
async function scanZipNode(node) {
if (node.scanState !== 'zip-pending' || !node._zipFileObj) return;
node.scanState = 'scanning';
scheduleRender();
try {
await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done'
} catch (e) {
reportScanError(node.path, e);
node.scanState = 'done';
node.runFiles = 0;
node.runDirs = 0;
}
node._zipFileObj = null;
// The zip counted as 1 dir in its parent already; now fold in its
// internal files/dirs to every ancestor's running totals.
for (let a = node.parent; a; a = a.parent) {
a.runFiles += node.runFiles;
a.runDirs += node.runDirs;
}
scheduleRender();
}
// Build a zip-root node's children from its archive contents (in memory), // Build a zip-root node's children from its archive contents (in memory),
// marking the whole zip subtree 'done' immediately. Mirrors the on-disk // marking the whole zip subtree 'done' immediately. Mirrors the on-disk
// node shape so the rest of the app treats zip folders like real ones. // node shape so the rest of the app treats zip folders like real ones.
@ -618,37 +667,34 @@
/** /**
* Create file object with metadata * Create file object with metadata
*/ */
async function createFileObject(fileHandle, folderHandle) { // Build a file row from JUST the directory entry — no getFile(). Listing a
try { // network share is already slow; the old code opened EVERY file to read
const file = await fileHandle.getFile(); // size/lastModified (which the grid doesn't even display), turning a
const split = zddc.splitExtension(file.name); // listing into one network round-trip per file. size/lastModified are now
// loaded on demand by preview / SHA / rename, which call getFile()
return { // themselves. The scan is now a pure directory listing.
handle: fileHandle, function createFileObject(fileHandle, folderHandle) {
folderHandle: folderHandle, const split = zddc.splitExtension(fileHandle.name);
originalFilename: split.name, return {
extension: split.extension, handle: fileHandle,
size: file.size, folderHandle: folderHandle,
lastModified: file.lastModified, originalFilename: split.name,
extension: split.extension,
// Editable fields size: null,
trackingNumber: '', lastModified: null,
revision: '', // Editable fields
status: '', trackingNumber: '',
title: '', revision: '',
status: '',
// State title: '',
isDirty: false, // State
error: false, isDirty: false,
errorMessage: '', error: false,
validation: null, errorMessage: '',
sha256: null validation: null,
// folderPath will be added later in buildTree sha256: null
}; // folderPath added by the caller.
} catch (err) { };
console.error('Error reading file:', fileHandle.name, err);
return null;
}
} }
// Export module // Export module

View file

@ -37,6 +37,7 @@
el.textContent = ''; el.textContent = '';
const st = folder.scanState; const st = folder.scanState;
if (st === 'pending') return; if (st === 'pending') return;
if (st === 'zip-pending') { el.textContent = '(zip — open to scan)'; return; }
if (st === 'scanning') { el.textContent = 'scanning…'; return; } if (st === 'scanning') { el.textContent = 'scanning…'; return; }
const done = st === 'done'; const done = st === 'done';
@ -93,7 +94,8 @@
const toggle = document.createElement('span'); const toggle = document.createElement('span');
toggle.className = 'folder-toggle'; toggle.className = 'folder-toggle';
const mightHaveChildren = (folder.children && folder.children.length > 0) const mightHaveChildren = (folder.children && folder.children.length > 0)
|| folder.scanState === 'pending'; || folder.scanState === 'pending'
|| folder.scanState === 'zip-pending';
if (mightHaveChildren) { if (mightHaveChildren) {
toggle.textContent = folder.expanded ? '▼' : '▶'; toggle.textContent = folder.expanded ? '▼' : '▶';
toggle.addEventListener('click', (e) => { toggle.addEventListener('click', (e) => {