perf+ux(classifier): continuous 16-way scan pool; accurate FS error text

Scan concurrency: the scan is I/O-bound — each directory read is a network
round-trip to the share, so the lever is parallel in-flight reads, not CPU
threads. Replace the per-level BFS barrier (which idled workers waiting on
the slowest dir in each level) with a continuous shared-queue pool that
keeps up to SCAN_CONCURRENCY (16, up from 6) reads in flight at once,
pulling newly discovered child dirs as they land. Still roughly
breadth-first (FIFO), so top levels surface first. ensureScanned reuses it.

Error messages: translate File System Access DOMExceptions into accurate,
actionable text keyed on err.name (not the cryptic raw message, which reads
like a permission problem when it isn't). e.g. InvalidStateError now reads
'the folder changed on disk since it was first read … rescan' instead of
'an operation that depends on state cached in an interface object …'. The
raw name+message is appended in parens for copy-paste troubleshooting.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ZDDC 2026-06-09 11:08:57 -05:00
parent caff489206
commit e0ba77a75b

View file

@ -16,6 +16,11 @@
let scanGen = 0; // bumped per scan; stale workers bail
let scanStats = null; // { folders, files, current, done, startedAt }
let renderTimer = null; // throttle for progressive re-render
// How many directory reads to keep in flight at once. The scan is
// I/O-bound (each readdir is a network round-trip to the share), so the
// lever is parallel in-flight reads, not CPU threads — raise this if the
// share tolerates more concurrency.
var SCAN_CONCURRENCY = 16;
function scheduleRender() {
if (renderTimer) return;
@ -100,14 +105,56 @@
}
}
// One-shot toast for scan errors (permission denied, network hiccups on a
// share). De-duped per path so a flaky folder doesn't spam.
// Translate a File System Access API error into accurate, actionable text.
// The browser's raw DOMException messages are cryptic and often read like a
// permission problem when they aren't — we key off err.name (reliable)
// rather than the message. Returns a plain-language explanation; the raw
// name + message are still appended by the caller for troubleshooting.
function describeFsError(err) {
var name = err && err.name ? err.name : '';
switch (name) {
case 'NotAllowedError':
return 'Permission to read this folder was denied or revoked. '
+ 'Re-pick the root folder to re-grant access.';
case 'InvalidStateError':
// The handle was read once, then the directory changed underneath
// it (common on a network/SMB share that's being written to, or
// after a disconnect/reconnect). NOT a permissions problem.
return 'The folder changed on disk since it was first read '
+ '(common on a busy or reconnecting network share). '
+ 'Rescan to pick up the current contents.';
case 'NotFoundError':
return 'The folder no longer exists — it may have been moved, '
+ 'renamed, or deleted since the scan started.';
case 'NotReadableError':
return 'The folder could not be read — the share may have '
+ 'disconnected, or the OS denied access.';
case 'SecurityError':
return 'The browser blocked access to this folder for security '
+ 'reasons.';
case 'TypeMismatchError':
return 'Expected a folder here but found a file (or vice-versa).';
case 'AbortError':
return 'Reading this folder was aborted.';
default:
return 'Could not read this folder.';
}
}
// One-shot toast for scan errors (permission denied, stale handles, network
// hiccups on a share). De-duped per path so a flaky folder doesn't spam.
const scanErrorsSeen = new Set();
function reportScanError(path, err) {
console.error('Scan error:', path, err);
if (scanErrorsSeen.has(path)) return;
scanErrorsSeen.add(path);
const msg = 'Couldnt scan ' + path + ': ' + (err && err.message ? err.message : err);
// Plain-language explanation, then the raw error in parentheses so the
// user can copy it (toasts are selectable) for deeper troubleshooting.
var raw = err && err.name
? err.name + (err.message ? ': ' + err.message : '')
: (err && err.message ? err.message : String(err));
var msg = 'Couldnt scan ' + path + ' — ' + describeFsError(err)
+ '\n\n(' + raw + ')';
if (window.zddc && typeof window.zddc.toast === 'function') {
window.zddc.toast(msg, 'error');
}
@ -155,21 +202,13 @@
updateScanStatus();
}, 1000);
// Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so
// the UI stays responsive on a slow/large network drive.
let level = [root];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) {
if (preserveState && savedExpanded.has(c.path)) c.expanded = true;
if (c.scanState === 'pending') next.push(c);
}
}
level = next;
// Continuous breadth-first walk: up to SCAN_CONCURRENCY directory reads
// in flight at once, pulling newly-discovered child dirs as they land
// (no per-level barrier, so the pool stays saturated). Top levels still
// appear first (FIFO). The cap is the lever — see SCAN_CONCURRENCY.
await drainQueue([root], myGen, SCAN_CONCURRENCY);
if (preserveState && savedExpanded.size) {
restoreExpandedPaths(window.app.folderTree, savedExpanded);
}
clearInterval(ticker);
if (myGen !== scanGen) return; // superseded by a newer scan
@ -187,19 +226,39 @@
}
}
// Run fn over items with at most `limit` concurrent calls; resolves when
// all have settled. Termination is clean (no transient-empty-queue race).
async function runWithConcurrency(items, limit, fn) {
let i = 0;
async function runner() {
while (i < items.length) {
const idx = i++;
await fn(items[idx]);
// Continuous worker pool over a shared queue: keep up to `conc` directory
// reads in flight at once, pulling newly-discovered child dirs as they land
// — no per-level barrier, so workers never idle waiting on the slowest dir
// in a level. Roughly breadth-first (FIFO; a node's children are enqueued
// after it), so top levels still surface first. Resolves when the queue is
// drained and no read is in flight (clean termination, no empty-queue race).
function drainQueue(seed, myGen, conc) {
const queue = seed.slice();
let active = 0;
return new Promise(function (resolve) {
function finishIfIdle() {
if (queue.length === 0 && active === 0) resolve();
}
}
const runners = [];
for (let k = 0; k < Math.min(limit, items.length); k++) runners.push(runner());
await Promise.all(runners);
function pump() {
while (myGen === scanGen && active < conc && queue.length) {
const node = queue.shift();
active++;
Promise.resolve(scanNodeChildren(node, myGen)).then(function () {
active--;
if (myGen === scanGen) {
const kids = node.children;
for (let i = 0; i < kids.length; i++) {
if (kids[i].scanState === 'pending') queue.push(kids[i]);
}
}
pump();
finishIfIdle();
}, function () { active--; pump(); finishIfIdle(); });
}
finishIfIdle();
}
pump();
});
}
// Force a folder's subtree to scan NOW (jumped ahead of the background
@ -207,16 +266,7 @@
// shows complete contents. Idempotent + shares the live scan generation.
async function ensureScanned(node) {
if (!node || !node.handle || node.scanState === 'done') return;
const myGen = scanGen;
let level = [node];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) if (c.scanState === 'pending') next.push(c);
}
level = next;
}
await drainQueue([node], scanGen, SCAN_CONCURRENCY);
flushRender();
}