perf(classifier): scan is a pure listing — no getFile() per file; lazy zips

The scan was slow because it OPENED every file (getFile() for size/lastModified
— which the grid doesn't even display) and read every ZIP inline. On a network
share that's a round-trip per file. Now:

- createFileObject builds rows from the directory entry name alone, no
  getFile(); size/lastModified load on demand (preview/SHA/rename already call
  getFile() themselves). The scan is now a pure directory listing.
- ZIPs are lazy: a .zip is an expandable node read only when opened
  (scanZipNode), not during the walk.
- Footer shows live elapsed time (ticks every second), and a success toast
  fires at completion with totals: "Scan complete — N folders, M files in Ts."

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-09 10:55:29 -05:00
parent cb1456e55f
commit caff489206
2 changed files with 99 additions and 51 deletions

View file

@ -32,18 +32,27 @@
updateScanStatus();
}
// Render the running scan status into the tree-pane header.
// elapsed since the scan started, e.g. "3.2s" or "1m 04s".
function elapsedStr() {
if (!scanStats) return '0s';
const ms = Date.now() - scanStats.startedAt;
if (ms < 60000) return (ms / 1000).toFixed(1) + 's';
const m = Math.floor(ms / 60000);
const s = Math.round((ms % 60000) / 1000);
return m + 'm ' + (s < 10 ? '0' : '') + s + 's';
}
// Render the running scan status (with live elapsed time) into the footer.
function updateScanStatus() {
const el = document.getElementById('scanStatus');
if (!el || !scanStats) return;
if (scanStats.done) {
const secs = ((Date.now() - scanStats.startedAt) / 1000).toFixed(1);
el.textContent = 'Scanned ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files in ' + secs + 's';
+ scanStats.files + ' files in ' + elapsedStr();
el.classList.remove('scanning');
} else {
el.textContent = 'Scanning… ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files'
+ scanStats.files + ' files · ' + elapsedStr()
+ (scanStats.current ? ' — ' + scanStats.current : '');
el.classList.add('scanning');
}
@ -139,6 +148,13 @@
}
flushRender();
// Tick the footer's elapsed time once a second even if no new folder
// landed (so a slow directory doesn't make the timer look frozen).
const ticker = setInterval(function () {
if (myGen !== scanGen || (scanStats && scanStats.done)) { clearInterval(ticker); return; }
updateScanStatus();
}, 1000);
// Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so
@ -155,11 +171,20 @@
}
level = next;
}
clearInterval(ticker);
if (myGen !== scanGen) return; // superseded by a newer scan
scanStats.done = true;
scanStats.current = '';
flushRender();
// Completion toast with the totals + elapsed time.
if (window.zddc && typeof window.zddc.toast === 'function') {
window.zddc.toast(
'Scan complete — ' + scanStats.folders + ' folders, '
+ scanStats.files + ' files in ' + elapsedStr() + '.',
'success');
}
}
// Run fn over items with at most `limit` concurrent calls; resolves when
@ -229,6 +254,8 @@
// only a 'pending' node is scanned, so concurrent callers (background +
// open-prioritised) don't double-scan.
async function scanNodeChildren(node, myGen) {
// A .zip is a lazy node — read its contents only when opened.
if (node.scanState === 'zip-pending') { await scanZipNode(node); return; }
if (node.scanState !== 'pending') return;
node.scanState = 'scanning';
if (scanStats) scanStats.current = node.path;
@ -238,18 +265,19 @@
for await (const entry of node.handle.values()) {
if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled
if (entry.kind === 'file') {
const fo = await createFileObject(entry, node.handle);
if (!fo) continue;
const fo = createFileObject(entry, node.handle);
fo.folderPath = node.path;
files.push(fo);
if (scanStats) scanStats.files++;
if (fo.extension === 'zip' && typeof JSZip !== 'undefined') {
// Don't read the archive during the listing — make an
// expandable, lazy zip node scanned on open (scanZipNode).
const zipName = zddc.joinExtension(fo.originalFilename, fo.extension);
const zipPath = node.path + '/' + zipName;
const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath };
const zipNode = makeNode(zh, zipPath, node);
try { await scanZipIntoNode(zipNode, fo); }
catch (e) { reportScanError(zipPath, e); zipNode.scanState = 'done'; }
zipNode._zipFileObj = fo;
zipNode.scanState = 'zip-pending';
childDirs.push(zipNode);
if (scanStats) scanStats.folders++;
}
@ -267,18 +295,15 @@
node.fileCount = files.length;
node.children = childDirs;
node.subdirCount = childDirs.length;
// Roll this folder's own files/dirs (plus the full contents of any
// inline-zip children) into the running subtree totals of this node
// and every ancestor. Regular child dirs add their own share when they
// get scanned — that's how the total fills in progressively.
let addF = files.length;
let addD = childDirs.length;
for (const c of childDirs) {
if (c.scanState === 'done') { addF += c.runFiles; addD += c.runDirs; }
}
// Roll this folder's own files/dirs into the running subtree totals of
// this node + every ancestor. Real child dirs add their share when they
// get scanned; lazy zip nodes add theirs when opened (scanZipNode).
const addF = files.length;
const addD = childDirs.length;
for (let a = node; a; a = a.parent) { a.runFiles += addF; a.runDirs += addD; }
// Zip children are scanned inline ('done'); real dirs are still pending.
node.pending = childDirs.filter(function (c) { return c.scanState !== 'done'; }).length;
// Only real unscanned dirs hold the parent open; zip-pending children
// are lazy, so they don't.
node.pending = childDirs.filter(function (c) { return c.scanState === 'pending'; }).length;
if (node.pending === 0) {
markDone(node);
} else {
@ -287,6 +312,30 @@
scheduleRender();
}
// Read a lazy zip node's contents on demand (when opened), building its
// child nodes and folding its internal totals into ancestors.
async function scanZipNode(node) {
if (node.scanState !== 'zip-pending' || !node._zipFileObj) return;
node.scanState = 'scanning';
scheduleRender();
try {
await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done'
} catch (e) {
reportScanError(node.path, e);
node.scanState = 'done';
node.runFiles = 0;
node.runDirs = 0;
}
node._zipFileObj = null;
// The zip counted as 1 dir in its parent already; now fold in its
// internal files/dirs to every ancestor's running totals.
for (let a = node.parent; a; a = a.parent) {
a.runFiles += node.runFiles;
a.runDirs += node.runDirs;
}
scheduleRender();
}
// Build a zip-root node's children from its archive contents (in memory),
// marking the whole zip subtree 'done' immediately. Mirrors the on-disk
// node shape so the rest of the app treats zip folders like real ones.
@ -618,37 +667,34 @@
/**
* Create file object with metadata
*/
async function createFileObject(fileHandle, folderHandle) {
try {
const file = await fileHandle.getFile();
const split = zddc.splitExtension(file.name);
// Build a file row from JUST the directory entry — no getFile(). Listing a
// network share is already slow; the old code opened EVERY file to read
// size/lastModified (which the grid doesn't even display), turning a
// listing into one network round-trip per file. size/lastModified are now
// loaded on demand by preview / SHA / rename, which call getFile()
// themselves. The scan is now a pure directory listing.
function createFileObject(fileHandle, folderHandle) {
const split = zddc.splitExtension(fileHandle.name);
return {
handle: fileHandle,
folderHandle: folderHandle,
originalFilename: split.name,
extension: split.extension,
size: file.size,
lastModified: file.lastModified,
size: null,
lastModified: null,
// Editable fields
trackingNumber: '',
revision: '',
status: '',
title: '',
// State
isDirty: false,
error: false,
errorMessage: '',
validation: null,
sha256: null
// folderPath will be added later in buildTree
// folderPath added by the caller.
};
} catch (err) {
console.error('Error reading file:', fileHandle.name, err);
return null;
}
}
// Export module

View file

@ -37,6 +37,7 @@
el.textContent = '';
const st = folder.scanState;
if (st === 'pending') return;
if (st === 'zip-pending') { el.textContent = '(zip — open to scan)'; return; }
if (st === 'scanning') { el.textContent = 'scanning…'; return; }
const done = st === 'done';
@ -93,7 +94,8 @@
const toggle = document.createElement('span');
toggle.className = 'folder-toggle';
const mightHaveChildren = (folder.children && folder.children.length > 0)
|| folder.scanState === 'pending';
|| folder.scanState === 'pending'
|| folder.scanState === 'zip-pending';
if (mightHaveChildren) {
toggle.textContent = folder.expanded ? '▼' : '▶';
toggle.addEventListener('click', (e) => {