ZDDC/classifier/js/scanner.js
ZDDC caff489206 perf(classifier): scan is a pure listing — no getFile() per file; lazy zips
The scan was slow because it OPENED every file (getFile() for size/lastModified
— which the grid doesn't even display) and read every ZIP inline. On a network
share that's a round-trip per file. Now:

- createFileObject builds rows from the directory entry name alone, no
  getFile(); size/lastModified load on demand (preview/SHA/rename already call
  getFile() themselves). The scan is now a pure directory listing.
- ZIPs are lazy: a .zip is an expandable node read only when opened
  (scanZipNode), not during the walk.
- Footer shows live elapsed time (ticks every second), and a success toast
  fires at completion with totals: "Scan complete — N folders, M files in Ts."

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 10:55:29 -05:00

708 lines
29 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Directory Scanner Module
* Scans directories and collects files
*/
(function() {
'use strict';
// Store ZIP data for later access
const zipCache = new Map(); // path -> { zip: JSZip, fileHandle: FileSystemFileHandle }
// ── Incremental-scan state ───────────────────────────────────────────────
// The scan no longer reads the whole tree before rendering. It walks
// breadth-first behind a small worker pool, renders progressively (top
// levels appear first), and shows live status — so a huge network drive
// never looks stalled. Each folder tracks its own scan state + counts.
let scanGen = 0; // bumped per scan; stale workers bail
let scanStats = null; // { folders, files, current, done, startedAt }
let renderTimer = null; // throttle for progressive re-render
function scheduleRender() {
if (renderTimer) return;
renderTimer = setTimeout(function () {
renderTimer = null;
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}, 180);
}
function flushRender() {
if (renderTimer) { clearTimeout(renderTimer); renderTimer = null; }
try { window.app.modules.tree.render(); } catch (_) { /* ignore */ }
updateScanStatus();
}
// elapsed since the scan started, e.g. "3.2s" or "1m 04s".
function elapsedStr() {
if (!scanStats) return '0s';
const ms = Date.now() - scanStats.startedAt;
if (ms < 60000) return (ms / 1000).toFixed(1) + 's';
const m = Math.floor(ms / 60000);
const s = Math.round((ms % 60000) / 1000);
return m + 'm ' + (s < 10 ? '0' : '') + s + 's';
}
// Render the running scan status (with live elapsed time) into the footer.
function updateScanStatus() {
const el = document.getElementById('scanStatus');
if (!el || !scanStats) return;
if (scanStats.done) {
el.textContent = 'Scanned ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files in ' + elapsedStr();
el.classList.remove('scanning');
} else {
el.textContent = 'Scanning… ' + scanStats.folders + ' folders · '
+ scanStats.files + ' files · ' + elapsedStr()
+ (scanStats.current ? ' — ' + scanStats.current : '');
el.classList.add('scanning');
}
}
// Make a tree node. scanState: 'pending' (children not read) →
// 'scanning' → 'children' (immediate children read, subtree still going) →
// 'done' (entire subtree enumerated). The UI greys a node until 'done'.
function makeNode(handle, path, parent) {
const node = {
name: handle.name,
path: path,
handle: handle,
parent: parent || null,
files: [],
fileCount: 0, // direct files in this folder
subdirCount: 0, // direct subfolders
runFiles: 0, // files in the whole subtree (grows as scanned; final on 'done')
runDirs: 0, // subfolders in the whole subtree
children: [],
expanded: false,
scanState: 'pending',
pending: 0, // child dirs not yet 'done'
};
if (handle.isZipRoot) { node.isZipRoot = true; node.zipPath = handle.zipPath; }
if (handle.isVirtualDir) { node.isVirtualDir = true; node.zipPath = handle.zipPath; }
return node;
}
// Mark a node's subtree fully scanned: roll up recursive totals and
// propagate completion to the parent (which flips to 'done' once all its
// children are done). This is what turns a folder from grey to solid.
function markDone(node) {
if (node.scanState === 'done') return;
// runFiles/runDirs were accumulated into this node (and its ancestors)
// as each descendant was scanned, so by the time the subtree is
// complete they already hold the final totals — nothing to compute.
node.scanState = 'done';
const p = node.parent;
if (p && p.scanState !== 'done') {
p.pending -= 1;
if (p.pending <= 0 && (p.scanState === 'children' || p.scanState === 'scanning')) {
markDone(p);
}
}
}
// One-shot toast for scan errors (permission denied, network hiccups on a
// share). De-duped per path so a flaky folder doesn't spam.
const scanErrorsSeen = new Set();
function reportScanError(path, err) {
console.error('Scan error:', path, err);
if (scanErrorsSeen.has(path)) return;
scanErrorsSeen.add(path);
const msg = 'Couldnt scan ' + path + ': ' + (err && err.message ? err.message : err);
if (window.zddc && typeof window.zddc.toast === 'function') {
window.zddc.toast(msg, 'error');
}
}
/**
* Scan directory and build folder tree with files
*/
async function scanDirectory(dirHandle, preserveState = false) {
// Preserve which folders were expanded across a rescan (e.g. after a
// ZIP extract) so the user doesn't lose their place.
let savedExpanded = new Set();
let savedSelected = new Set();
if (preserveState) {
savedExpanded = getExpandedPaths(window.app.folderTree);
savedSelected = new Set(window.app.selectedFolders);
}
const myGen = ++scanGen;
zipCache.clear();
scanStats = { folders: 0, files: 0, current: dirHandle.name, done: false, startedAt: Date.now() };
// Root node — render immediately so the pane never sits blank.
const root = makeNode(dirHandle, dirHandle.name, null);
root.expanded = true;
window.app.folderTree = [root];
window.app.modules.store.setFolderTree(window.app.folderTree);
if (!preserveState) {
// Select the root so the grid shows its immediate files at once,
// instead of auto-loading the ENTIRE drive (the old behaviour,
// which is exactly what stalled on a large share).
window.app.selectedFolders = new Set([root.path]);
window.app.lastSelectedFolderPath = root.path;
window.app.modules.store.setSelectedFolders(window.app.selectedFolders);
} else {
window.app.selectedFolders = savedSelected;
window.app.modules.store.setSelectedFolders(savedSelected);
}
flushRender();
// Tick the footer's elapsed time once a second even if no new folder
// landed (so a slow directory doesn't make the timer look frozen).
const ticker = setInterval(function () {
if (myGen !== scanGen || (scanStats && scanStats.done)) { clearInterval(ticker); return; }
updateScanStatus();
}, 1000);
// Breadth-first by level behind a bounded worker pool: level 1, then
// level 2, … each rendered as it lands (top levels appear first).
// Deeper levels keep filling in; workers await between directories so
// the UI stays responsive on a slow/large network drive.
let level = [root];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) {
if (preserveState && savedExpanded.has(c.path)) c.expanded = true;
if (c.scanState === 'pending') next.push(c);
}
}
level = next;
}
clearInterval(ticker);
if (myGen !== scanGen) return; // superseded by a newer scan
scanStats.done = true;
scanStats.current = '';
flushRender();
// Completion toast with the totals + elapsed time.
if (window.zddc && typeof window.zddc.toast === 'function') {
window.zddc.toast(
'Scan complete — ' + scanStats.folders + ' folders, '
+ scanStats.files + ' files in ' + elapsedStr() + '.',
'success');
}
}
// Run fn over items with at most `limit` concurrent calls; resolves when
// all have settled. Termination is clean (no transient-empty-queue race).
async function runWithConcurrency(items, limit, fn) {
let i = 0;
async function runner() {
while (i < items.length) {
const idx = i++;
await fn(items[idx]);
}
}
const runners = [];
for (let k = 0; k < Math.min(limit, items.length); k++) runners.push(runner());
await Promise.all(runners);
}
// Force a folder's subtree to scan NOW (jumped ahead of the background
// walk). Called when the user opens a folder, so an opened folder always
// shows complete contents. Idempotent + shares the live scan generation.
async function ensureScanned(node) {
if (!node || !node.handle || node.scanState === 'done') return;
const myGen = scanGen;
let level = [node];
while (level.length && myGen === scanGen) {
await runWithConcurrency(level, 6, function (n) { return scanNodeChildren(n, myGen); });
const next = [];
for (const n of level) {
for (const c of n.children) if (c.scanState === 'pending') next.push(c);
}
level = next;
}
flushRender();
}
/**
* Get all expanded folder paths from tree
*/
function getExpandedPaths(folders, paths = new Set()) {
for (const folder of folders) {
if (folder.expanded) {
paths.add(folder.path);
}
if (folder.children) {
getExpandedPaths(folder.children, paths);
}
}
return paths;
}
/**
* Restore expanded state to tree
*/
function restoreExpandedPaths(folders, expandedPaths) {
for (const folder of folders) {
folder.expanded = expandedPaths.has(folder.path);
if (folder.children) {
restoreExpandedPaths(folder.children, expandedPaths);
}
}
}
// Read ONE directory's immediate entries: files into node.files, child
// directories into node.children (left 'pending' for the BFS to descend).
// A .zip becomes an expandable zip-root child, scanned inline (its
// contents are already in memory once the entry is read). Idempotent:
// only a 'pending' node is scanned, so concurrent callers (background +
// open-prioritised) don't double-scan.
async function scanNodeChildren(node, myGen) {
// A .zip is a lazy node — read its contents only when opened.
if (node.scanState === 'zip-pending') { await scanZipNode(node); return; }
if (node.scanState !== 'pending') return;
node.scanState = 'scanning';
if (scanStats) scanStats.current = node.path;
const files = [];
const childDirs = [];
try {
for await (const entry of node.handle.values()) {
if (myGen !== scanGen) { node.scanState = 'pending'; return; } // cancelled
if (entry.kind === 'file') {
const fo = createFileObject(entry, node.handle);
fo.folderPath = node.path;
files.push(fo);
if (scanStats) scanStats.files++;
if (fo.extension === 'zip' && typeof JSZip !== 'undefined') {
// Don't read the archive during the listing — make an
// expandable, lazy zip node scanned on open (scanZipNode).
const zipName = zddc.joinExtension(fo.originalFilename, fo.extension);
const zipPath = node.path + '/' + zipName;
const zh = { name: zipName, kind: 'directory', isZipRoot: true, zipPath: zipPath };
const zipNode = makeNode(zh, zipPath, node);
zipNode._zipFileObj = fo;
zipNode.scanState = 'zip-pending';
childDirs.push(zipNode);
if (scanStats) scanStats.folders++;
}
} else if (entry.kind === 'directory') {
const childPath = node.path + '/' + entry.name;
childDirs.push(makeNode(entry, childPath, node));
if (scanStats) scanStats.folders++;
}
}
} catch (err) {
node.scanError = true;
reportScanError(node.path, err);
}
node.files = files;
node.fileCount = files.length;
node.children = childDirs;
node.subdirCount = childDirs.length;
// Roll this folder's own files/dirs into the running subtree totals of
// this node + every ancestor. Real child dirs add their share when they
// get scanned; lazy zip nodes add theirs when opened (scanZipNode).
const addF = files.length;
const addD = childDirs.length;
for (let a = node; a; a = a.parent) { a.runFiles += addF; a.runDirs += addD; }
// Only real unscanned dirs hold the parent open; zip-pending children
// are lazy, so they don't.
node.pending = childDirs.filter(function (c) { return c.scanState === 'pending'; }).length;
if (node.pending === 0) {
markDone(node);
} else {
node.scanState = 'children';
}
scheduleRender();
}
// Read a lazy zip node's contents on demand (when opened), building its
// child nodes and folding its internal totals into ancestors.
async function scanZipNode(node) {
if (node.scanState !== 'zip-pending' || !node._zipFileObj) return;
node.scanState = 'scanning';
scheduleRender();
try {
await scanZipIntoNode(node, node._zipFileObj); // builds children, runFiles/runDirs, sets 'done'
} catch (e) {
reportScanError(node.path, e);
node.scanState = 'done';
node.runFiles = 0;
node.runDirs = 0;
}
node._zipFileObj = null;
// The zip counted as 1 dir in its parent already; now fold in its
// internal files/dirs to every ancestor's running totals.
for (let a = node.parent; a; a = a.parent) {
a.runFiles += node.runFiles;
a.runDirs += node.runDirs;
}
scheduleRender();
}
// Build a zip-root node's children from its archive contents (in memory),
// marking the whole zip subtree 'done' immediately. Mirrors the on-disk
// node shape so the rest of the app treats zip folders like real ones.
async function scanZipIntoNode(zipNode, fileObj) {
const f = await fileObj.handle.getFile();
const zip = await JSZip.loadAsync(await f.arrayBuffer());
const zipPath = zipNode.path;
zipCache.set(zipPath, { zip: zip, fileHandle: fileObj.handle, folderHandle: fileObj.folderHandle });
const dirNodes = new Map();
dirNodes.set(zipPath, zipNode);
function ensureDir(dirPath) {
if (dirNodes.has(dirPath)) return dirNodes.get(dirPath);
const parentPath = dirPath.substring(0, dirPath.lastIndexOf('/'));
const parent = ensureDir(parentPath);
const name = dirPath.substring(dirPath.lastIndexOf('/') + 1);
const vh = { name: name, kind: 'directory', isVirtualDir: true, zipPath: zipPath, virtualPath: dirPath };
const child = makeNode(vh, dirPath, parent);
parent.children.push(child);
dirNodes.set(dirPath, child);
return child;
}
zip.forEach(function (relativePath, entry) {
if (entry.dir) {
ensureDir(zipPath + '/' + relativePath.replace(/\/$/, ''));
} else {
const fileName = relativePath.split('/').pop();
const fileDir = relativePath.includes('/')
? zipPath + '/' + relativePath.substring(0, relativePath.lastIndexOf('/'))
: zipPath;
const dirNode = ensureDir(fileDir);
const split = zddc.splitExtension(fileName);
dirNode.files.push({
originalFilename: split.name,
extension: split.extension,
size: entry._data ? entry._data.uncompressedSize : 0,
lastModified: entry.date ? entry.date.getTime() : Date.now(),
isVirtual: true,
zipPath: zipPath,
zipEntryPath: relativePath,
folderPath: dirNode.path,
trackingNumber: '', revision: '', status: '', title: '',
isDirty: false, error: false, errorMessage: '', validation: null, sha256: null
});
}
});
finalizeZipNode(zipNode);
}
// Roll up a zip node's counts + mark its whole subtree 'done'.
function finalizeZipNode(node) {
node.fileCount = node.files.length;
node.subdirCount = node.children.length;
let rf = node.files.length, rd = node.children.length;
for (const c of node.children) { finalizeZipNode(c); rf += c.runFiles; rd += c.runDirs; }
node.runFiles = rf;
node.runDirs = rd;
node.scanState = 'done';
node.pending = 0;
}
/**
* Scan a ZIP file and add its contents as virtual folders
*/
async function scanZipFile(zipFileObj, foldersMap, parentPath, parentItems) {
try {
const fileObj = await zipFileObj.handle.getFile();
const arrayBuffer = await fileObj.arrayBuffer();
const zip = await JSZip.loadAsync(arrayBuffer);
const zipPath = parentPath + '/' + zddc.joinExtension(zipFileObj.originalFilename, zipFileObj.extension);
// Cache the ZIP for later extraction
zipCache.set(zipPath, {
zip: zip,
fileHandle: zipFileObj.handle,
folderHandle: zipFileObj.folderHandle
});
// Mark the file as a ZIP container
zipFileObj.isZipContainer = true;
zipFileObj.zipPath = zipPath;
// Build virtual folder structure from ZIP contents
const virtualFolders = new Map(); // path -> { files: [], subdirs: Set }
virtualFolders.set(zipPath, { files: [], subdirs: new Set() });
zip.forEach((relativePath, zipEntry) => {
if (zipEntry.dir) {
// It's a directory
const dirPath = zipPath + '/' + relativePath.replace(/\/$/, '');
if (!virtualFolders.has(dirPath)) {
virtualFolders.set(dirPath, { files: [], subdirs: new Set() });
}
// Add to parent's subdirs
const parentDir = dirPath.substring(0, dirPath.lastIndexOf('/'));
if (virtualFolders.has(parentDir)) {
virtualFolders.get(parentDir).subdirs.add(dirPath);
}
} else {
// It's a file
const fileName = relativePath.split('/').pop();
const fileDir = relativePath.includes('/')
? zipPath + '/' + relativePath.substring(0, relativePath.lastIndexOf('/'))
: zipPath;
// Ensure parent directories exist
ensureVirtualPath(virtualFolders, zipPath, fileDir);
// Create virtual file object
const split = zddc.splitExtension(fileName);
const virtualFile = {
originalFilename: split.name,
extension: split.extension,
size: zipEntry._data ? zipEntry._data.uncompressedSize : 0,
lastModified: zipEntry.date ? zipEntry.date.getTime() : Date.now(),
// Virtual file markers
isVirtual: true,
zipPath: zipPath,
zipEntryPath: relativePath,
// Editable fields
trackingNumber: '',
revision: '',
status: '',
title: '',
// State
isDirty: false,
error: false,
errorMessage: '',
validation: null,
sha256: null
};
virtualFolders.get(fileDir).files.push(virtualFile);
}
});
// Convert virtual folders to format compatible with tree builder
// Create a virtual handle for the ZIP root
const zipVirtualHandle = {
name: zddc.joinExtension(zipFileObj.originalFilename, zipFileObj.extension),
kind: 'directory',
isZipRoot: true,
zipPath: zipPath
};
// Store virtual folder data
buildVirtualFolderMap(virtualFolders, zipPath, foldersMap, zipVirtualHandle);
// Add ZIP as a virtual directory in parent
parentItems.push({
handle: zipVirtualHandle,
isDirectory: true,
isZipRoot: true
});
} catch (err) {
console.error('Error scanning ZIP file:', zipFileObj.originalFilename, err);
}
}
/**
* Ensure all parent directories exist in virtual folder map
*/
function ensureVirtualPath(virtualFolders, zipPath, targetPath) {
if (virtualFolders.has(targetPath)) return;
const parts = targetPath.substring(zipPath.length + 1).split('/').filter(p => p);
let currentPath = zipPath;
for (const part of parts) {
const parentPath = currentPath;
currentPath = currentPath + '/' + part;
if (!virtualFolders.has(currentPath)) {
virtualFolders.set(currentPath, { files: [], subdirs: new Set() });
}
if (virtualFolders.has(parentPath)) {
virtualFolders.get(parentPath).subdirs.add(currentPath);
}
}
}
/**
* Build virtual folder entries for the foldersMap
* Uses path strings as keys for virtual folders to avoid object reference issues
*/
function buildVirtualFolderMap(virtualFolders, zipPath, foldersMap, zipVirtualHandle) {
const rootData = virtualFolders.get(zipPath);
if (!rootData) return;
// Create items array for ZIP root
const rootItems = [...rootData.files];
// Add subdirectories
for (const subdirPath of rootData.subdirs) {
const subdirName = subdirPath.split('/').pop();
const subdirHandle = {
name: subdirName,
kind: 'directory',
isVirtualDir: true,
virtualPath: subdirPath,
zipPath: zipPath
};
rootItems.push({
handle: subdirHandle,
isDirectory: true,
isVirtualDir: true
});
// Recursively add subdir contents
buildVirtualSubfolder(virtualFolders, subdirPath, foldersMap, zipPath);
}
// Store with both the handle object AND the path string as keys
// This ensures lookup works regardless of which reference is used
foldersMap.set(zipVirtualHandle, rootItems);
foldersMap.set(zipPath, rootItems); // Path-based key for tree building
}
/**
* Recursively build virtual subfolder entries
*/
function buildVirtualSubfolder(virtualFolders, folderPath, foldersMap, zipPath) {
const folderData = virtualFolders.get(folderPath);
if (!folderData) return;
const folderName = folderPath.split('/').pop();
const folderHandle = {
name: folderName,
kind: 'directory',
isVirtualDir: true,
virtualPath: folderPath,
zipPath: zipPath
};
const items = [...folderData.files];
// Store with path string key for tree building lookup
foldersMap.set(folderPath, items);
// Add subdirectories
for (const subdirPath of folderData.subdirs) {
const subdirName = subdirPath.split('/').pop();
const subdirHandle = {
name: subdirName,
kind: 'directory',
isVirtualDir: true,
virtualPath: subdirPath,
zipPath: zipPath
};
items.push({
handle: subdirHandle,
isDirectory: true,
isVirtualDir: true
});
// Recursively add subdir contents
buildVirtualSubfolder(virtualFolders, subdirPath, foldersMap, zipPath);
}
foldersMap.set(folderHandle, items);
}
/**
* Get cached ZIP data
*/
function getZipCache(zipPath) {
return zipCache.get(zipPath);
}
/**
* Extract a ZIP file to its parent directory
*/
async function extractZip(zipPath) {
const cached = zipCache.get(zipPath);
if (!cached) {
throw new Error('ZIP not found in cache');
}
const { zip, folderHandle } = cached;
// Get the ZIP filename without extension for the extract folder name
const zipName = zipPath.split('/').pop();
const extractFolderName = zipName.replace(/\.zip$/i, '');
// Create extraction folder
const extractFolder = await folderHandle.getDirectoryHandle(extractFolderName, { create: true });
// Extract all files
const entries = [];
zip.forEach((relativePath, zipEntry) => {
if (!zipEntry.dir) {
entries.push({ path: relativePath, entry: zipEntry });
}
});
for (const { path, entry } of entries) {
try {
// Create subdirectories if needed
const parts = path.split('/');
const fileName = parts.pop();
let currentDir = extractFolder;
for (const part of parts) {
if (part) {
currentDir = await currentDir.getDirectoryHandle(part, { create: true });
}
}
// Write file
const content = await entry.async('arraybuffer');
const fileHandle = await currentDir.getFileHandle(fileName, { create: true });
const writable = await fileHandle.createWritable();
await writable.write(content);
await writable.close();
} catch (err) {
console.error('Error extracting file:', path, err);
}
}
return extractFolderName;
}
/**
* Create file object with metadata
*/
// Build a file row from JUST the directory entry — no getFile(). Listing a
// network share is already slow; the old code opened EVERY file to read
// size/lastModified (which the grid doesn't even display), turning a
// listing into one network round-trip per file. size/lastModified are now
// loaded on demand by preview / SHA / rename, which call getFile()
// themselves. The scan is now a pure directory listing.
function createFileObject(fileHandle, folderHandle) {
const split = zddc.splitExtension(fileHandle.name);
return {
handle: fileHandle,
folderHandle: folderHandle,
originalFilename: split.name,
extension: split.extension,
size: null,
lastModified: null,
// Editable fields
trackingNumber: '',
revision: '',
status: '',
title: '',
// State
isDirty: false,
error: false,
errorMessage: '',
validation: null,
sha256: null
// folderPath added by the caller.
};
}
// Export module
window.app.modules.scanner = {
scanDirectory,
ensureScanned,
getZipCache,
extractZip
};
})();