ZDDC/classifier/js/scanner.js
ZDDC ecb0a270cc feat(classifier): incremental scan — status, top-levels-first, per-folder state
Replaces the full depth-first "scan everything, then render once + expandAll +
selectAll" walk (which looked stalled and was a render bomb on a large network
drive) with a progressive, breadth-first scan:

- Walks level-by-level behind a bounded worker pool (6), rendering as it goes —
  the top folder levels appear immediately, deeper levels fill in the
  background. Workers await between directories so the UI stays responsive.
- Live status line under the tree header: "Scanning… N folders · M files —
  <current path>", ending "Scanned … in Ts."
- Per-folder state machine (pending → scanning → children → done) with
  immediate subfolder/file counts; the row is greyed (with a faint pulse) until
  its whole subtree is scanned, then turns solid — the at-a-glance signal.
- Opening a folder jumps its subtree to the front of the scan (ensureScanned),
  so an opened folder always shows complete contents; idempotent vs the
  background walk.
- No more auto-expand/auto-select-all (that loaded the entire drive up front);
  the root is selected so the grid shows its files immediately.
- ZIPs stay expandable, scanned inline into virtual nodes (already in memory
  once read); whole zip subtree marked done at once.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 10:03:46 -05:00

639 lines
25 KiB
JavaScript

/**
* Directory Scanner Module
* Scans directories and collects files
*/
(function() {
'use strict';
// Store ZIP data for later access
const zipCache = new Map(); // path -> { zip: JSZip, fileHandle: FileSystemFileHandle }
// ── Incremental-scan state ───────────────────────────────────────────────
// The scan no longer reads the whole tree before rendering. It walks
// breadth-first behind a small worker pool, renders progressively (top
// levels appear first), and shows live status — so a huge network drive
// never looks stalled. Each folder tracks its own scan state + counts.
let scanGen = 0; // bumped per scan; stale workers bail
let scanStats = null; // { folders, files, current, done, startedAt }
let renderTimer = null; // throttle for progressive re-render
function scheduleRender() {
if (renderTimer) return;
renderTimer = setTimeout(function () {
renderTimer = null;
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}, 180);
}
function flushRender() {
if (renderTimer) { clearTimeout(renderTimer); renderTimer = null; }
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}
// Render the running scan status into the tree-pane header.
function updateScanStatus() {
const el = document.getElementById('scanStatus');
if (!el || !scanStats) return;
if (scanStats.done) {
const secs = ((Date.now() - scanStats.startedAt) / 1000).toFixed(1);
el.textContent = 'Scanned ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files in ' + secs + 's';
el.classList.remove('scanning');
} else {
el.textContent = 'Scanning… ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files'
+ (scanStats.current ? ' — ' + scanStats.current : '');
el.classList.add('scanning');
}
}
// Make a tree node. scanState: 'pending' (children not read) →
// 'scanning' → 'children' (immediate children read, subtree still going) →
// 'done' (entire subtree enumerated). The UI greys a node until 'done'.
function makeNode(handle, path, parent) {
const node = {
name: handle.name,
path: path,
handle: handle,
parent: parent || null,
files: [],
fileCount: 0,
subdirCount: 0,
totalFiles: 0,
totalDirs: 0,
children: [],
expanded: false,
scanState: 'pending',
pending: 0, // child dirs not yet 'done'
};
if (handle.isZipRoot) { node.isZipRoot = true; node.zipPath = handle.zipPath; }
if (handle.isVirtualDir) { node.isVirtualDir = true; node.zipPath = handle.zipPath; }
return node;
}
// Mark a node's subtree fully scanned: roll up recursive totals and
// propagate completion to the parent (which flips to 'done' once all its
// children are done). This is what turns a folder from grey to solid.
function markDone(node) {
if (node.scanState === 'done') return;
node.scanState = 'done';
let tf = node.fileCount, td = node.children.length;
for (const c of node.children) { tf += c.totalFiles; td += c.totalDirs; }
node.totalFiles = tf;
node.totalDirs = td;
const p = node.parent;
if (p && p.scanState !== 'done') {
p.pending -= 1;
if (p.pending <= 0 && (p.scanState === 'children' || p.scanState === 'scanning')) {
markDone(p);
}
}
}
/**
* Scan directory and build folder tree with files
*/
async function scanDirectory(dirHandle, preserveState = false) {
// Preserve which folders were expanded across a rescan (e.g. after a
// ZIP extract) so the user doesn't lose their place.
let savedExpanded = new Set();
let savedSelected = new Set();
if (preserveState) {
savedExpanded = getExpandedPaths(window.app.folderTree);
savedSelected = new Set(window.app.selectedFolders);
}
const myGen = ++scanGen;
zipCache.clear();
scanStats = { folders: 0, files: 0, current: dirHandle.name, done: false, startedAt: Date.now() };
// Root node — render immediately so the pane never sits blank.
const root = makeNode(dirHandle, dirHandle.name, null);
root.expanded = true;
window.app.folderTree = [root];
window.app.modules.store.setFolderTree(window.app.folderTree);
if (!preserveState) {
// Select the root so the grid shows its immediate files at once,
// instead of auto-loading the ENTIRE drive (the old behaviour,
// which is exactly what stalled on a large share).
window.app.selectedFolders = new Set([root.path]);
window.app.lastSelectedFolderPath = root.path;
window.app.modules.store.setSelectedFolders(window.app.selectedFolders);
} else {
window.app.selectedFolders = savedSelected;
window.app.modules.store.setSelectedFolders(savedSelected);
}
flushRender();
// Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so
// the UI stays responsive on a slow/large network drive.
let level = [root];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) {
if (preserveState && savedExpanded.has(c.path)) c.expanded = true;
if (c.scanState === 'pending') next.push(c);
}
}
level = next;
}
if (myGen !== scanGen) return; // superseded by a newer scan
scanStats.done = true;
scanStats.current = '';
flushRender();
}
// Run fn over items with at most `limit` concurrent calls; resolves when
// all have settled. Termination is clean (no transient-empty-queue race).
async function runWithConcurrency(items, limit, fn) {
let i = 0;
async function runner() {
while (i < items.length) {
const idx = i++;
await fn(items[idx]);
}
}
const runners = [];
for (let k = 0; k < Math.min(limit, items.length); k++) runners.push(runner());
await Promise.all(runners);
}
// Force a folder's subtree to scan NOW (jumped ahead of the background
// walk). Called when the user opens a folder, so an opened folder always
// shows complete contents. Idempotent + shares the live scan generation.
async function ensureScanned(node) {
if (!node || !node.handle || node.scanState === 'done') return;
const myGen = scanGen;
let level = [node];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) if (c.scanState === 'pending') next.push(c);
}
level = next;
}
flushRender();
}
/**
* Get all expanded folder paths from tree
*/
function getExpandedPaths(folders, paths = new Set()) {
for (const folder of folders) {
if (folder.expanded) {
paths.add(folder.path);
}
if (folder.children) {
getExpandedPaths(folder.children, paths);
}
}
return paths;
}
/**
* Restore expanded state to tree
*/
function restoreExpandedPaths(folders, expandedPaths) {
for (const folder of folders) {
folder.expanded = expandedPaths.has(folder.path);
if (folder.children) {
restoreExpandedPaths(folder.children, expandedPaths);
}
}
}
// Read ONE directory's immediate entries: files into node.files, child
// directories into node.children (left 'pending' for the BFS to descend).
// A .zip becomes an expandable zip-root child, scanned inline (its
// contents are already in memory once the entry is read). Idempotent:
// only a 'pending' node is scanned, so concurrent callers (background +
// open-prioritised) don't double-scan.
async function scanNodeChildren(node, myGen) {
if (node.scanState !== 'pending') return;
node.scanState = 'scanning';
if (scanStats) scanStats.current = node.path;
const files = [];
const childDirs = [];
try {
for await (const entry of node.handle.values()) {
if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled
if (entry.kind === 'file') {
const fo = await createFileObject(entry, node.handle);
if (!fo) continue;
fo.folderPath = node.path;
files.push(fo);
if (scanStats) scanStats.files++;
if (fo.extension === 'zip' && typeof JSZip !== 'undefined') {
const zipName = zddc.joinExtension(fo.originalFilename, fo.extension);
const zipPath = node.path + '/' + zipName;
const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath };
const zipNode = makeNode(zh, zipPath, node);
try { await scanZipIntoNode(zipNode, fo); }
catch (e) { console.error('Error scanning ZIP:', zipPath, e); }
childDirs.push(zipNode);
if (scanStats) scanStats.folders++;
}
} else if (entry.kind === 'directory') {
const childPath = node.path + '/' + entry.name;
childDirs.push(makeNode(entry, childPath, node));
if (scanStats) scanStats.folders++;
}
}
} catch (err) {
console.error('Error scanning folder:', node.path, err);
}
node.files = files;
node.fileCount = files.length;
node.children = childDirs;
node.subdirCount = childDirs.length;
// Zip children are scanned inline ('done'); real dirs are still pending.
node.pending = childDirs.filter(function (c) { return c.scanState !== 'done'; }).length;
if (node.pending === 0) {
markDone(node);
} else {
node.scanState = 'children';
}
scheduleRender();
}
// Build a zip-root node's children from its archive contents (in memory),
// marking the whole zip subtree 'done' immediately. Mirrors the on-disk
// node shape so the rest of the app treats zip folders like real ones.
async function scanZipIntoNode(zipNode, fileObj) {
const f = await fileObj.handle.getFile();
const zip = await JSZip.loadAsync(await f.arrayBuffer());
const zipPath = zipNode.path;
zipCache.set(zipPath, { zip: zip, fileHandle: fileObj.handle, folderHandle: fileObj.folderHandle });
const dirNodes = new Map();
dirNodes.set(zipPath, zipNode);
function ensureDir(dirPath) {
if (dirNodes.has(dirPath)) return dirNodes.get(dirPath);
const parentPath = dirPath.substring(0, dirPath.lastIndexOf('/'));
const parent = ensureDir(parentPath);
const name = dirPath.substring(dirPath.lastIndexOf('/') + 1);
const vh = { name: name, kind: 'directory', isVirtualDir: true, zipPath: zipPath, virtualPath: dirPath };
const child = makeNode(vh, dirPath, parent);
parent.children.push(child);
dirNodes.set(dirPath, child);
return child;
}
zip.forEach(function (relativePath, entry) {
if (entry.dir) {
ensureDir(zipPath + '/' + relativePath.replace(/\/$/, ''));
} else {
const fileName = relativePath.split('/').pop();
const fileDir = relativePath.includes('/')
? zipPath + '/' + relativePath.substring(0, relativePath.lastIndexOf('/'))
: zipPath;
const dirNode = ensureDir(fileDir);
const split = zddc.splitExtension(fileName);
dirNode.files.push({
originalFilename: split.name,
extension: split.extension,
size: entry._data ? entry._data.uncompressedSize : 0,
lastModified: entry.date ? entry.date.getTime() : Date.now(),
isVirtual: true,
zipPath: zipPath,
zipEntryPath: relativePath,
folderPath: dirNode.path,
trackingNumber: '', revision: '', status: '', title: '',
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null
});
}
});
finalizeZipNode(zipNode);
}
// Roll up a zip node's counts + mark its whole subtree 'done'.
function finalizeZipNode(node) {
node.fileCount = node.files.length;
node.subdirCount = node.children.length;
let tf = node.fileCount, td = node.children.length;
for (const c of node.children) { finalizeZipNode(c); tf += c.totalFiles; td += c.totalDirs; }
node.totalFiles = tf;
node.totalDirs = td;
node.scanState = 'done';
node.pending = 0;
}
/**
* Scan a ZIP file and add its contents as virtual folders
*/
async function scanZipFile(zipFileObj, foldersMap, parentPath, parentItems) {
try {
const fileObj = await zipFileObj.handle.getFile();
const arrayBuffer = await fileObj.arrayBuffer();
const zip = await JSZip.loadAsync(arrayBuffer);
const zipPath = parentPath + '/' + zddc.joinExtension(zipFileObj.originalFilename, zipFileObj.extension);
// Cache the ZIP for later extraction
zipCache.set(zipPath, {
zip: zip,
fileHandle: zipFileObj.handle,
folderHandle: zipFileObj.folderHandle
});
// Mark the file as a ZIP container
zipFileObj.isZipContainer = true;
zipFileObj.zipPath = zipPath;
// Build virtual folder structure from ZIP contents
const virtualFolders = new Map(); // path -> { files: [], subdirs: Set }
virtualFolders.set(zipPath, { files: [], subdirs: new Set() });
zip.forEach((relativePath, zipEntry) => {
if (zipEntry.dir) {
// It's a directory
const dirPath = zipPath + '/' + relativePath.replace(/\/$/, '');
if (!virtualFolders.has(dirPath)) {
virtualFolders.set(dirPath, { files: [], subdirs: new Set() });
}
// Add to parent's subdirs
const parentDir = dirPath.substring(0, dirPath.lastIndexOf('/'));
if (virtualFolders.has(parentDir)) {
virtualFolders.get(parentDir).subdirs.add(dirPath);
}
} else {
// It's a file
const fileName = relativePath.split('/').pop();
const fileDir = relativePath.includes('/')
? zipPath + '/' + relativePath.substring(0, relativePath.lastIndexOf('/'))
: zipPath;
// Ensure parent directories exist
ensureVirtualPath(virtualFolders, zipPath, fileDir);
// Create virtual file object
const split = zddc.splitExtension(fileName);
const virtualFile = {
originalFilename: split.name,
extension: split.extension,
size: zipEntry._data ? zipEntry._data.uncompressedSize : 0,
lastModified: zipEntry.date ? zipEntry.date.getTime() : Date.now(),
// Virtual file markers
isVirtual: true,
zipPath: zipPath,
zipEntryPath: relativePath,
// Editable fields
trackingNumber: '',
revision: '',
status: '',
title: '',
// State
isDirty: false,
error: false,
errorMessage: '',
validation: null,
sha256: null
};
virtualFolders.get(fileDir).files.push(virtualFile);
}
});
// Convert virtual folders to format compatible with tree builder
// Create a virtual handle for the ZIP root
const zipVirtualHandle = {
name: zddc.joinExtension(zipFileObj.originalFilename, zipFileObj.extension),
kind: 'directory',
isZipRoot: true,
zipPath: zipPath
};
// Store virtual folder data
buildVirtualFolderMap(virtualFolders, zipPath, foldersMap, zipVirtualHandle);
// Add ZIP as a virtual directory in parent
parentItems.push({
handle: zipVirtualHandle,
isDirectory: true,
isZipRoot: true
});
} catch (err) {
console.error('Error scanning ZIP file:', zipFileObj.originalFilename, err);
}
}
/**
* Ensure all parent directories exist in virtual folder map
*/
function ensureVirtualPath(virtualFolders, zipPath, targetPath) {
if (virtualFolders.has(targetPath)) return;
const parts = targetPath.substring(zipPath.length + 1).split('/').filter(p => p);
let currentPath = zipPath;
for (const part of parts) {
const parentPath = currentPath;
currentPath = currentPath + '/' + part;
if (!virtualFolders.has(currentPath)) {
virtualFolders.set(currentPath, { files: [], subdirs: new Set() });
}
if (virtualFolders.has(parentPath)) {
virtualFolders.get(parentPath).subdirs.add(currentPath);
}
}
}
/**
* Build virtual folder entries for the foldersMap
* Uses path strings as keys for virtual folders to avoid object reference issues
*/
function buildVirtualFolderMap(virtualFolders, zipPath, foldersMap, zipVirtualHandle) {
const rootData = virtualFolders.get(zipPath);
if (!rootData) return;
// Create items array for ZIP root
const rootItems = [...rootData.files];
// Add subdirectories
for (const subdirPath of rootData.subdirs) {
const subdirName = subdirPath.split('/').pop();
const subdirHandle = {
name: subdirName,
kind: 'directory',
isVirtualDir: true,
virtualPath: subdirPath,
zipPath: zipPath
};
rootItems.push({
handle: subdirHandle,
isDirectory: true,
isVirtualDir: true
});
// Recursively add subdir contents
buildVirtualSubfolder(virtualFolders, subdirPath, foldersMap, zipPath);
}
// Store with both the handle object AND the path string as keys
// This ensures lookup works regardless of which reference is used
foldersMap.set(zipVirtualHandle, rootItems);
foldersMap.set(zipPath, rootItems); // Path-based key for tree building
}
/**
* Recursively build virtual subfolder entries
*/
function buildVirtualSubfolder(virtualFolders, folderPath, foldersMap, zipPath) {
const folderData = virtualFolders.get(folderPath);
if (!folderData) return;
const folderName = folderPath.split('/').pop();
const folderHandle = {
name: folderName,
kind: 'directory',
isVirtualDir: true,
virtualPath: folderPath,
zipPath: zipPath
};
const items = [...folderData.files];
// Store with path string key for tree building lookup
foldersMap.set(folderPath, items);
// Add subdirectories
for (const subdirPath of folderData.subdirs) {
const subdirName = subdirPath.split('/').pop();
const subdirHandle = {
name: subdirName,
kind: 'directory',
isVirtualDir: true,
virtualPath: subdirPath,
zipPath: zipPath
};
items.push({
handle: subdirHandle,
isDirectory: true,
isVirtualDir: true
});
// Recursively add subdir contents
buildVirtualSubfolder(virtualFolders, subdirPath, foldersMap, zipPath);
}
foldersMap.set(folderHandle, items);
}
/**
* Get cached ZIP data
*/
function getZipCache(zipPath) {
return zipCache.get(zipPath);
}
/**
* Extract a ZIP file to its parent directory
*/
async function extractZip(zipPath) {
const cached = zipCache.get(zipPath);
if (!cached) {
throw new Error('ZIP not found in cache');
}
const { zip, folderHandle } = cached;
// Get the ZIP filename without extension for the extract folder name
const zipName = zipPath.split('/').pop();
const extractFolderName = zipName.replace(/\.zip$/i, '');
// Create extraction folder
const extractFolder = await folderHandle.getDirectoryHandle(extractFolderName, { create: true });
// Extract all files
const entries = [];
zip.forEach((relativePath, zipEntry) => {
if (!zipEntry.dir) {
entries.push({ path: relativePath, entry: zipEntry });
}
});
for (const { path, entry } of entries) {
try {
// Create subdirectories if needed
const parts = path.split('/');
const fileName = parts.pop();
let currentDir = extractFolder;
for (const part of parts) {
if (part) {
currentDir = await currentDir.getDirectoryHandle(part, { create: true });
}
}
// Write file
const content = await entry.async('arraybuffer');
const fileHandle = await currentDir.getFileHandle(fileName, { create: true });
const writable = await fileHandle.createWritable();
await writable.write(content);
await writable.close();
} catch (err) {
console.error('Error extracting file:', path, err);
}
}
return extractFolderName;
}
/**
* Create file object with metadata
*/
async function createFileObject(fileHandle, folderHandle) {
try {
const file = await fileHandle.getFile();
const split = zddc.splitExtension(file.name);
return {
handle: fileHandle,
folderHandle: folderHandle,
originalFilename: split.name,
extension: split.extension,
size: file.size,
lastModified: file.lastModified,
// Editable fields
trackingNumber: '',
revision: '',
status: '',
title: '',
// State
isDirty: false,
error: false,
errorMessage: '',
validation: null,
sha256: null
// folderPath will be added later in buildTree
};
} catch (err) {
console.error('Error reading file:', fileHandle.name, err);
return null;
}
}
// Export module
window.app.modules.scanner = {
scanDirectory,
ensureScanned,
getZipCache,
extractZip
};
})();