feat(classifier): incremental scan — status, top-levels-first, per-folder state

Replaces the full depth-first "scan everything, then render once + expandAll +
selectAll" walk (which looked stalled and was a render bomb on a large network
drive) with a progressive, breadth-first scan:

- Walks level-by-level behind a bounded worker pool (6), rendering as it goes —
  the top folder levels appear immediately, deeper levels fill in the
  background. Workers await between directories so the UI stays responsive.
- Live status line under the tree header: "Scanning… N folders · M files —
  <current path>", ending "Scanned … in Ts."
- Per-folder state machine (pending → scanning → children → done) with
  immediate subfolder/file counts; the row is greyed (with a faint pulse) until
  its whole subtree is scanned, then turns solid — the at-a-glance signal.
- Opening a folder jumps its subtree to the front of the scan (ensureScanned),
  so an opened folder always shows complete contents; idempotent vs the
  background walk.
- No more auto-expand/auto-select-all (that loaded the entire drive up front);
  the root is selected so the grid shows its files immediately.
- ZIPs stay expandable, scanned inline into virtual nodes (already in memory
  once read); whole zip subtree marked done at once.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-09 10:03:46 -05:00
parent e0ae0772da
commit ecb0a270cc
4 changed files with 338 additions and 70 deletions

View file

@ -175,6 +175,36 @@
background-color: var(--bg-hover);
}
/* A folder whose subtree isn't fully scanned yet: greyed name + counts,
turning solid once scanState hits 'done'. A faint pulse signals active
scanning. */
.folder-item.scanning .folder-name,
.folder-item.scanning .folder-count {
color: var(--text-muted, #8a8a8a);
}
.folder-item.scanning .folder-count {
font-style: italic;
animation: scan-pulse 1.2s ease-in-out infinite;
}
@keyframes scan-pulse {
0%, 100% { opacity: 0.55; }
50% { opacity: 1; }
}
/* Live scan status line under the tree-pane header. */
.scan-status {
padding: 0.25rem 0.6rem;
font-size: 0.75rem;
color: var(--text-muted, #8a8a8a);
border-bottom: 1px solid var(--border, #e2e2e2);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
min-height: 1.1em;
}
.scan-status:empty { display: none; }
.scan-status.scanning { color: var(--primary, #2868c8); }
.folder-item.selected {
background-color: var(--bg-selected);
font-weight: 500;

View file

@ -8,13 +8,96 @@
// Store ZIP data for later access
const zipCache = new Map(); // path -> { zip: JSZip, fileHandle: FileSystemFileHandle }
// ── Incremental-scan state ───────────────────────────────────────────────
// The scan no longer reads the whole tree before rendering. It walks
// breadth-first behind a small worker pool, renders progressively (top
// levels appear first), and shows live status — so a huge network drive
// never looks stalled. Each folder tracks its own scan state + counts.
let scanGen = 0; // bumped per scan; stale workers bail
let scanStats = null; // { folders, files, current, done, startedAt }
let renderTimer = null; // throttle for progressive re-render
function scheduleRender() {
if (renderTimer) return;
renderTimer = setTimeout(function () {
renderTimer = null;
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}, 180);
}
function flushRender() {
if (renderTimer) { clearTimeout(renderTimer); renderTimer = null; }
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}
// Render the running scan status into the tree-pane header.
function updateScanStatus() {
const el = document.getElementById('scanStatus');
if (!el || !scanStats) return;
if (scanStats.done) {
const secs = ((Date.now() - scanStats.startedAt) / 1000).toFixed(1);
el.textContent = 'Scanned ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files in ' + secs + 's';
el.classList.remove('scanning');
} else {
el.textContent = 'Scanning… ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files'
+ (scanStats.current ? ' — ' + scanStats.current : '');
el.classList.add('scanning');
}
}
// Make a tree node. scanState: 'pending' (children not read) →
// 'scanning' → 'children' (immediate children read, subtree still going) →
// 'done' (entire subtree enumerated). The UI greys a node until 'done'.
function makeNode(handle, path, parent) {
const node = {
name: handle.name,
path: path,
handle: handle,
parent: parent || null,
files: [],
fileCount: 0,
subdirCount: 0,
totalFiles: 0,
totalDirs: 0,
children: [],
expanded: false,
scanState: 'pending',
pending: 0, // child dirs not yet 'done'
};
if (handle.isZipRoot) { node.isZipRoot = true; node.zipPath = handle.zipPath; }
if (handle.isVirtualDir) { node.isVirtualDir = true; node.zipPath = handle.zipPath; }
return node;
}
// Mark a node's subtree fully scanned: roll up recursive totals and
// propagate completion to the parent (which flips to 'done' once all its
// children are done). This is what turns a folder from grey to solid.
function markDone(node) {
if (node.scanState === 'done') return;
node.scanState = 'done';
let tf = node.fileCount, td = node.children.length;
for (const c of node.children) { tf += c.totalFiles; td += c.totalDirs; }
node.totalFiles = tf;
node.totalDirs = td;
const p = node.parent;
if (p && p.scanState !== 'done') {
p.pending -= 1;
if (p.pending <= 0 && (p.scanState === 'children' || p.scanState === 'scanning')) {
markDone(p);
}
}
}
/**
* Scan directory and build folder tree with files
*/
async function scanDirectory(dirHandle, preserveState = false) {
// Save current state if preserving
// Preserve which folders were expanded across a rescan (e.g. after a
// ZIP extract) so the user doesn't lose their place.
let savedExpanded = new Set();
let savedSelected = new Set();
if (preserveState) {
@ -22,38 +105,82 @@
savedSelected = new Set(window.app.selectedFolders);
}
// Clear ZIP cache
const myGen = ++scanGen;
zipCache.clear();
scanStats = { folders: 0, files: 0, current: dirHandle.name, done: false, startedAt: Date.now() };
// Map to store files by folder handle (or ZIP path for virtual folders)
const foldersMap = new Map();
// Recursively scan
await scanFolder(dirHandle, foldersMap, dirHandle.name);
// Build tree structure
window.app.folderTree = window.app.modules.tree.buildTree(dirHandle, foldersMap);
// Set in store
// Root node — render immediately so the pane never sits blank.
const root = makeNode(dirHandle, dirHandle.name, null);
root.expanded = true;
window.app.folderTree = [root];
window.app.modules.store.setFolderTree(window.app.folderTree);
if (preserveState) {
// Restore expanded state
restoreExpandedPaths(window.app.folderTree, savedExpanded);
// Restore selection
window.app.selectedFolders = savedSelected;
// Render without changing selection
window.app.modules.tree.render();
window.app.modules.store.setSelectedFolders(savedSelected);
if (!preserveState) {
// Select the root so the grid shows its immediate files at once,
// instead of auto-loading the ENTIRE drive (the old behaviour,
// which is exactly what stalled on a large share).
window.app.selectedFolders = new Set([root.path]);
window.app.lastSelectedFolderPath = root.path;
window.app.modules.store.setSelectedFolders(window.app.selectedFolders);
} else {
// Render tree
window.app.modules.tree.render();
// Auto-expand and select all folders
window.app.modules.tree.expandAll();
window.app.modules.tree.selectAll();
window.app.selectedFolders = savedSelected;
window.app.modules.store.setSelectedFolders(savedSelected);
}
flushRender();
// Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so
// the UI stays responsive on a slow/large network drive.
let level = [root];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) {
if (preserveState && savedExpanded.has(c.path)) c.expanded = true;
if (c.scanState === 'pending') next.push(c);
}
}
level = next;
}
if (myGen !== scanGen) return; // superseded by a newer scan
scanStats.done = true;
scanStats.current = '';
flushRender();
}
// Run fn over items with at most `limit` concurrent calls; resolves when
// all have settled. Termination is clean (no transient-empty-queue race).
async function runWithConcurrency(items, limit, fn) {
let i = 0;
async function runner() {
while (i < items.length) {
const idx = i++;
await fn(items[idx]);
}
}
const runners = [];
for (let k = 0; k < Math.min(limit, items.length); k++) runners.push(runner());
await Promise.all(runners);
}
// Force a folder's subtree to scan NOW (jumped ahead of the background
// walk). Called when the user opens a folder, so an opened folder always
// shows complete contents. Idempotent + shares the live scan generation.
async function ensureScanned(node) {
if (!node || !node.handle || node.scanState === 'done') return;
const myGen = scanGen;
let level = [node];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) if (c.scanState === 'pending') next.push(c);
}
level = next;
}
flushRender();
}
/**
@ -83,43 +210,118 @@
}
}
/**
* Recursively scan a folder
*/
async function scanFolder(dirHandle, foldersMap, currentPath) {
const items = [];
// Read ONE directory's immediate entries: files into node.files, child
// directories into node.children (left 'pending' for the BFS to descend).
// A .zip becomes an expandable zip-root child, scanned inline (its
// contents are already in memory once the entry is read). Idempotent:
// only a 'pending' node is scanned, so concurrent callers (background +
// open-prioritised) don't double-scan.
async function scanNodeChildren(node, myGen) {
if (node.scanState !== 'pending') return;
node.scanState = 'scanning';
if (scanStats) scanStats.current = node.path;
const files = [];
const childDirs = [];
try {
for await (const entry of dirHandle.values()) {
for await (const entry of node.handle.values()) {
if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled
if (entry.kind === 'file') {
// Create file object
const file = await createFileObject(entry, dirHandle);
if (file) {
items.push(file);
// Check if it's a ZIP file - scan its contents
if (file.extension === 'zip' && typeof JSZip !== 'undefined') {
await scanZipFile(file, foldersMap, currentPath, items);
}
const fo = await createFileObject(entry, node.handle);
if (!fo) continue;
fo.folderPath = node.path;
files.push(fo);
if (scanStats) scanStats.files++;
if (fo.extension === 'zip' && typeof JSZip !== 'undefined') {
const zipName = zddc.joinExtension(fo.originalFilename, fo.extension);
const zipPath = node.path + '/' + zipName;
const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath };
const zipNode = makeNode(zh, zipPath, node);
try { await scanZipIntoNode(zipNode, fo); }
catch (e) { console.error('Error scanning ZIP:', zipPath, e); }
childDirs.push(zipNode);
if (scanStats) scanStats.folders++;
}
} else if (entry.kind === 'directory') {
// Add directory reference
items.push({
handle: entry,
isDirectory: true
});
// Recursively scan subdirectory
const childPath = currentPath + '/' + entry.name;
await scanFolder(entry, foldersMap, childPath);
const childPath = node.path + '/' + entry.name;
childDirs.push(makeNode(entry, childPath, node));
if (scanStats) scanStats.folders++;
}
}
} catch (err) {
console.error('Error scanning folder:', dirHandle.name, err);
console.error('Error scanning folder:', node.path, err);
}
node.files = files;
node.fileCount = files.length;
node.children = childDirs;
node.subdirCount = childDirs.length;
// Zip children are scanned inline ('done'); real dirs are still pending.
node.pending = childDirs.filter(function (c) { return c.scanState !== 'done'; }).length;
if (node.pending === 0) {
markDone(node);
} else {
node.scanState = 'children';
}
scheduleRender();
}
// Store files for this folder
foldersMap.set(dirHandle, items);
// Build a zip-root node's children from its archive contents (in memory),
// marking the whole zip subtree 'done' immediately. Mirrors the on-disk
// node shape so the rest of the app treats zip folders like real ones.
async function scanZipIntoNode(zipNode, fileObj) {
const f = await fileObj.handle.getFile();
const zip = await JSZip.loadAsync(await f.arrayBuffer());
const zipPath = zipNode.path;
zipCache.set(zipPath, { zip: zip, fileHandle: fileObj.handle, folderHandle: fileObj.folderHandle });
const dirNodes = new Map();
dirNodes.set(zipPath, zipNode);
function ensureDir(dirPath) {
if (dirNodes.has(dirPath)) return dirNodes.get(dirPath);
const parentPath = dirPath.substring(0, dirPath.lastIndexOf('/'));
const parent = ensureDir(parentPath);
const name = dirPath.substring(dirPath.lastIndexOf('/') + 1);
const vh = { name: name, kind: 'directory', isVirtualDir: true, zipPath: zipPath, virtualPath: dirPath };
const child = makeNode(vh, dirPath, parent);
parent.children.push(child);
dirNodes.set(dirPath, child);
return child;
}
zip.forEach(function (relativePath, entry) {
if (entry.dir) {
ensureDir(zipPath + '/' + relativePath.replace(/\/$/, ''));
} else {
const fileName = relativePath.split('/').pop();
const fileDir = relativePath.includes('/')
? zipPath + '/' + relativePath.substring(0, relativePath.lastIndexOf('/'))
: zipPath;
const dirNode = ensureDir(fileDir);
const split = zddc.splitExtension(fileName);
dirNode.files.push({
originalFilename: split.name,
extension: split.extension,
size: entry._data ? entry._data.uncompressedSize : 0,
lastModified: entry.date ? entry.date.getTime() : Date.now(),
isVirtual: true,
zipPath: zipPath,
zipEntryPath: relativePath,
folderPath: dirNode.path,
trackingNumber: '', revision: '', status: '', title: '',
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null
});
}
});
finalizeZipNode(zipNode);
}
// Roll up a zip node's counts + mark its whole subtree 'done'.
function finalizeZipNode(node) {
node.fileCount = node.files.length;
node.subdirCount = node.children.length;
let tf = node.fileCount, td = node.children.length;
for (const c of node.children) { finalizeZipNode(c); tf += c.totalFiles; td += c.totalDirs; }
node.totalFiles = tf;
node.totalDirs = td;
node.scanState = 'done';
node.pending = 0;
}
/**
@ -429,6 +631,7 @@
// Export module
window.app.modules.scanner = {
scanDirectory,
ensureScanned,
getZipCache,
extractZip
};

View file

@ -25,6 +25,23 @@
updateSelectedCount();
}
/**
* Count label for a folder row. While the folder is still being scanned
* its counts are unknown; once its own directory has been read we show the
* immediate subfolder/file counts (greyed until the whole subtree is done).
*/
function folderCountLabel(folder) {
const st = folder.scanState;
if (st === 'pending') return '';
if (st === 'scanning') return 'scanning…';
const d = folder.subdirCount || 0;
const f = folder.fileCount || 0;
const parts = [];
if (d) parts.push(d + (d === 1 ? ' folder' : ' folders'));
parts.push(f + (f === 1 ? ' file' : ' files'));
return '(' + parts.join(', ') + ')';
}
/**
* Create a folder element
*/
@ -33,6 +50,11 @@
const item = document.createElement('div');
item.className = 'folder-item';
// Grey the row until its subtree is fully scanned (scanState 'done');
// 'scanning' rows also get a subtle pulse via CSS.
if (folder.scanState && folder.scanState !== 'done') {
item.classList.add('scanning');
}
item.dataset.path = folder.path;
item.style.paddingLeft = `${level * 1.5}rem`;
@ -41,10 +63,13 @@
item.classList.add('selected');
}
// Toggle button (if has children)
// Toggle button: shown when the folder has children OR hasn't been
// scanned yet (it might have children — expanding triggers its scan).
const toggle = document.createElement('span');
toggle.className = 'folder-toggle';
if (folder.children && folder.children.length > 0) {
const mightHaveChildren = (folder.children && folder.children.length > 0)
|| folder.scanState === 'pending';
if (mightHaveChildren) {
toggle.textContent = folder.expanded ? '▼' : '▶';
toggle.addEventListener('click', (e) => {
e.stopPropagation();
@ -74,10 +99,11 @@
name.textContent = folder.name;
item.appendChild(name);
// File count
// Subfolder / file counts (immediate). Greyed via the row's .scanning
// class until the subtree is fully scanned.
const count = document.createElement('span');
count.className = 'folder-count';
count.textContent = `(${folder.fileCount || 0})`;
count.textContent = folderCountLabel(folder);
item.appendChild(count);
// Extract button for ZIP roots
@ -301,6 +327,14 @@
}
render();
// Opening a not-yet-complete folder jumps its subtree to the front of
// the scan so its contents are complete on open (re-renders as it
// fills in). Background scanning continues for everything else.
if (folder.expanded && folder.scanState !== 'done'
&& window.app.modules.scanner && window.app.modules.scanner.ensureScanned) {
window.app.modules.scanner.ensureScanned(folder).then(render).catch(() => {});
}
}
/**

View file

@ -58,6 +58,7 @@
<span id="selectedFoldersCount" class="folder-count">0 folders selected</span>
</div>
</div>
<div id="scanStatus" class="scan-status" aria-live="polite"></div>
<div id="folderTree" class="folder-tree">
<!-- Dynamically populated -->
</div>