From e0ba77a75bd0163ddb143369fd608c18415ee87f Mon Sep 17 00:00:00 2001 From: ZDDC Date: Tue, 9 Jun 2026 11:08:57 -0500 Subject: [PATCH] perf+ux(classifier): continuous 16-way scan pool; accurate FS error text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scan concurrency: the scan is I/O-bound — each directory read is a network round-trip to the share, so the lever is parallel in-flight reads, not CPU threads. Replace the per-level BFS barrier (which idled workers waiting on the slowest dir in each level) with a continuous shared-queue pool that keeps up to SCAN_CONCURRENCY (16, up from 6) reads in flight at once, pulling newly discovered child dirs as they land. Still roughly breadth-first (FIFO), so top levels surface first. ensureScanned reuses it. Error messages: translate File System Access DOMExceptions into accurate, actionable text keyed on err.name (not the cryptic raw message, which reads like a permission problem when it isn't). e.g. InvalidStateError now reads 'the folder changed on disk since it was first read … rescan' instead of 'an operation that depends on state cached in an interface object …'. The raw name+message is appended in parens for copy-paste troubleshooting. Co-Authored-By: Claude Opus 4.8 (1M context) --- classifier/js/scanner.js | 130 +++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 40 deletions(-) diff --git a/classifier/js/scanner.js b/classifier/js/scanner.js index 0c3e6ee..1acb0b1 100644 --- a/classifier/js/scanner.js +++ b/classifier/js/scanner.js @@ -16,6 +16,11 @@ let scanGen = 0; // bumped per scan; stale workers bail let scanStats = null; // { folders, files, current, done, startedAt } let renderTimer = null; // throttle for progressive re-render + // How many directory reads to keep in flight at once. The scan is + // I/O-bound (each readdir is a network round-trip to the share), so the + // lever is parallel in-flight reads, not CPU threads — raise this if the + // share tolerates more concurrency. + var SCAN_CONCURRENCY = 16; function scheduleRender() { if (renderTimer) return; @@ -100,14 +105,56 @@ } } - // One-shot toast for scan errors (permission denied, network hiccups on a - // share). De-duped per path so a flaky folder doesn't spam. + // Translate a File System Access API error into accurate, actionable text. + // The browser's raw DOMException messages are cryptic and often read like a + // permission problem when they aren't — we key off err.name (reliable) + // rather than the message. Returns a plain-language explanation; the raw + // name + message are still appended by the caller for troubleshooting. + function describeFsError(err) { + var name = err && err.name ? err.name : ''; + switch (name) { + case 'NotAllowedError': + return 'Permission to read this folder was denied or revoked. ' + + 'Re-pick the root folder to re-grant access.'; + case 'InvalidStateError': + // The handle was read once, then the directory changed underneath + // it (common on a network/SMB share that's being written to, or + // after a disconnect/reconnect). NOT a permissions problem. + return 'The folder changed on disk since it was first read ' + + '(common on a busy or reconnecting network share). ' + + 'Rescan to pick up the current contents.'; + case 'NotFoundError': + return 'The folder no longer exists — it may have been moved, ' + + 'renamed, or deleted since the scan started.'; + case 'NotReadableError': + return 'The folder could not be read — the share may have ' + + 'disconnected, or the OS denied access.'; + case 'SecurityError': + return 'The browser blocked access to this folder for security ' + + 'reasons.'; + case 'TypeMismatchError': + return 'Expected a folder here but found a file (or vice-versa).'; + case 'AbortError': + return 'Reading this folder was aborted.'; + default: + return 'Could not read this folder.'; + } + } + + // One-shot toast for scan errors (permission denied, stale handles, network + // hiccups on a share). De-duped per path so a flaky folder doesn't spam. const scanErrorsSeen = new Set(); function reportScanError(path, err) { console.error('Scan error:', path, err); if (scanErrorsSeen.has(path)) return; scanErrorsSeen.add(path); - const msg = 'Couldn’t scan ' + path + ': ' + (err && err.message ? err.message : err); + // Plain-language explanation, then the raw error in parentheses so the + // user can copy it (toasts are selectable) for deeper troubleshooting. + var raw = err && err.name + ? err.name + (err.message ? ': ' + err.message : '') + : (err && err.message ? err.message : String(err)); + var msg = 'Couldn’t scan ' + path + ' — ' + describeFsError(err) + + '\n\n(' + raw + ')'; if (window.zddc && typeof window.zddc.toast === 'function') { window.zddc.toast(msg, 'error'); } @@ -155,21 +202,13 @@ updateScanStatus(); }, 1000); - // Breadth-first by level behind a bounded worker pool: level 1, then - // level 2, … each rendered as it lands (top levels appear first). - // Deeper levels keep filling in; workers await between directories so - // the UI stays responsive on a slow/large network drive. - let level = [root]; - while (level.length && myGen === scanGen) { - await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); }); - const next = []; - for (const n of level) { - for (const c of n.children) { - if (preserveState && savedExpanded.has(c.path)) c.expanded = true; - if (c.scanState === 'pending') next.push(c); - } - } - level = next; + // Continuous breadth-first walk: up to SCAN_CONCURRENCY directory reads + // in flight at once, pulling newly-discovered child dirs as they land + // (no per-level barrier, so the pool stays saturated). Top levels still + // appear first (FIFO). The cap is the lever — see SCAN_CONCURRENCY. + await drainQueue([root], myGen, SCAN_CONCURRENCY); + if (preserveState && savedExpanded.size) { + restoreExpandedPaths(window.app.folderTree, savedExpanded); } clearInterval(ticker); if (myGen !== scanGen) return; // superseded by a newer scan @@ -187,19 +226,39 @@ } } - // Run fn over items with at most `limit` concurrent calls; resolves when - // all have settled. Termination is clean (no transient-empty-queue race). - async function runWithConcurrency(items, limit, fn) { - let i = 0; - async function runner() { - while (i < items.length) { - const idx = i++; - await fn(items[idx]); + // Continuous worker pool over a shared queue: keep up to `conc` directory + // reads in flight at once, pulling newly-discovered child dirs as they land + // — no per-level barrier, so workers never idle waiting on the slowest dir + // in a level. Roughly breadth-first (FIFO; a node's children are enqueued + // after it), so top levels still surface first. Resolves when the queue is + // drained and no read is in flight (clean termination, no empty-queue race). + function drainQueue(seed, myGen, conc) { + const queue = seed.slice(); + let active = 0; + return new Promise(function (resolve) { + function finishIfIdle() { + if (queue.length === 0 && active === 0) resolve(); } - } - const runners = []; - for (let k = 0; k < Math.min(limit, items.length); k++) runners.push(runner()); - await Promise.all(runners); + function pump() { + while (myGen === scanGen && active < conc && queue.length) { + const node = queue.shift(); + active++; + Promise.resolve(scanNodeChildren(node, myGen)).then(function () { + active--; + if (myGen === scanGen) { + const kids = node.children; + for (let i = 0; i < kids.length; i++) { + if (kids[i].scanState === 'pending') queue.push(kids[i]); + } + } + pump(); + finishIfIdle(); + }, function () { active--; pump(); finishIfIdle(); }); + } + finishIfIdle(); + } + pump(); + }); } // Force a folder's subtree to scan NOW (jumped ahead of the background @@ -207,16 +266,7 @@ // shows complete contents. Idempotent + shares the live scan generation. async function ensureScanned(node) { if (!node || !node.handle || node.scanState === 'done') return; - const myGen = scanGen; - let level = [node]; - while (level.length && myGen === scanGen) { - await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); }); - const next = []; - for (const n of level) { - for (const c of n.children) if (c.scanState === 'pending') next.push(c); - } - level = next; - } + await drainQueue([node], scanGen, SCAN_CONCURRENCY); flushRender(); }