ZDDC/browse/js/download.js
ZDDC d524966f00 perf(browse): stream files into the offline zip instead of buffering all bytes
downloadFsSubtree pre-read every file's arrayBuffer() and handed the raw
ArrayBuffer to JSZip, so the entire subtree's bytes sat in the JS heap at
once before zipping — the likely OOM on a large local folder despite the
size warning. Hand JSZip the File (a Blob backed by disk) instead; it reads
each lazily during generateAsync, dropping peak memory to roughly the zip
output plus JSZip's working set.

Also document, on downloadUrl, why server-side download errors aren't
surfaced as toasts: the <a download> click is fire-and-forget, and the
folder path targets zddc-server's streamed virtual "<dir>.zip" endpoint —
routing it through fetch() to make errors catchable would defeat the
streaming for arbitrarily large archives. Left as a known, documented
limitation rather than a buffering regression.

All 6 browse Playwright specs pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 15:33:50 -05:00

190 lines
8 KiB
JavaScript

// download.js — per-node downloads, surfaced through the tree's
// right-click menu (downloadFile / downloadFolder).
//
// downloadFile: a single file. Server mode lets the browser pull
// node.url (zddc-server emits Content-Disposition); FS-API mode
// reads bytes through the file handle and blob-downloads.
//
// downloadFolder: an arbitrary directory node as a .zip. Server
// mode points an <a download> at the virtual "<node-path>.zip"
// URL — zddc-server recognises the suffix and streams an ACL-
// filtered archive without buffering on the client. FS-API mode
// walks the picked handle in two passes — metadata first, then
// bytes — so we can warn before loading a very large tree into
// memory.
(function () {
'use strict';
var state = window.app.state;
// Soft thresholds for the offline bundle: above either, confirm()
// before loading everything into memory.
var WARN_FILE_COUNT = 2000;
var WARN_TOTAL_BYTES = 500 * 1024 * 1024;
function events() { return window.app.modules.events; }
function isHiddenName(name) {
return name.length === 0 || name[0] === '.' || name[0] === '_';
}
function fmtMB(bytes) { return (bytes / (1024 * 1024)).toFixed(1) + ' MB'; }
// Trigger a browser download of a Blob (revokes the object URL after).
function downloadBlob(filename, blob) {
var a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(function () {
URL.revokeObjectURL(a.href);
a.remove();
}, 0);
}
// Trigger a download from a same-origin server URL via Content-Disposition.
// NOTE: an <a download> click is fire-and-forget — a server error
// (401/403/404/5xx) can't be observed here, so failures surface only as
// the browser's own download error, not a toast. This is deliberate: the
// folder path points at zddc-server's streamed virtual "<dir>.zip"
// endpoint, and buffering it through fetch() to make errors catchable
// would defeat the streaming (the archive can be arbitrarily large).
function downloadUrl(filename, url) {
var a = document.createElement('a');
a.href = url;
a.download = filename; // hint; the server's Content-Disposition wins
document.body.appendChild(a);
a.click();
setTimeout(function () { a.remove(); }, 0);
}
// Recursively collect every (non-hidden) file under dirHandle into
// `out` as { relPath, handle, size }, accumulating into `tally`.
// relPrefix is the slash-terminated path within the picked root
// ("" at the root).
async function collectFiles(dirHandle, relPrefix, out, tally) {
for await (var pair of dirHandle.entries()) {
var name = pair[0];
var handle = pair[1];
if (isHiddenName(name)) continue;
if (handle.kind === 'directory') {
await collectFiles(handle, relPrefix + name + '/', out, tally);
} else {
var size = 0;
try {
var f = await handle.getFile();
size = f.size || 0;
} catch (_e) { /* permission lost — count it as 0 */ }
out.push({ relPath: relPrefix + name, handle: handle, size: size });
tally.count++;
tally.bytes += size;
}
}
}
async function downloadFsSubtree(rootHandle) {
var ev = events();
ev.statusInfo('Scanning ' + rootHandle.name + '…');
var files = [];
var tally = { count: 0, bytes: 0 };
await collectFiles(rootHandle, '', files, tally);
if (files.length === 0) {
ev.statusInfo(rootHandle.name + ' is empty — nothing to download.');
return;
}
if (tally.count > WARN_FILE_COUNT || tally.bytes > WARN_TOTAL_BYTES) {
var ok = window.confirm(
'This folder has ' + tally.count + ' files (~' + fmtMB(tally.bytes) + ').\n\n'
+ 'Building the zip loads them all into memory — it may be slow or crash the tab.\n\n'
+ 'Continue?');
if (!ok) { ev.statusClear(); return; }
}
var zip = new window.JSZip();
for (var i = 0; i < files.length; i++) {
ev.statusInfo('Zipping ' + rootHandle.name + '… (' + (i + 1) + '/' + files.length + ')');
// Hand JSZip the File (a Blob, backed by disk) rather than
// pre-reading every file's arrayBuffer — otherwise the whole
// tree's raw bytes sit in the JS heap at once before zipping.
// JSZip reads each Blob lazily during generateAsync.
var f = await files[i].handle.getFile();
zip.file(rootHandle.name + '/' + files[i].relPath, f);
}
ev.statusInfo('Generating ' + rootHandle.name + '.zip…');
var blob = await zip.generateAsync({ type: 'blob' });
downloadBlob(rootHandle.name + '.zip', blob);
ev.statusInfo('Downloaded ' + rootHandle.name + '.zip (' + files.length + ' files)');
}
var busy = false;
// Download a single file node. Server mode: rely on the node's
// own URL (the server emits Content-Disposition). FS mode: read
// bytes through the handle and trigger a blob download. Works
// for ordinary files, for .zip members (the loader sets node.url
// for zip members in server mode and a ZipFileHandle offline),
// and for the .zip file itself.
async function downloadFile(node) {
if (busy) return;
if (!node || node.isDir) {
events().statusError('Not a file: ' + (node && node.name));
return;
}
busy = true;
try {
if (node.url) {
events().statusInfo('Downloading ' + node.name + '…');
downloadUrl(node.name, node.url);
setTimeout(function () { events().statusClear(); }, 2500);
} else if (node.handle && typeof node.handle.getFile === 'function') {
events().statusInfo('Preparing ' + node.name + '…');
var f = await node.handle.getFile();
var blob = new Blob([await f.arrayBuffer()]);
downloadBlob(node.name, blob);
events().statusInfo('Downloaded ' + node.name);
} else {
events().statusError('No download path for ' + node.name);
}
} catch (e) {
events().statusError('Download failed: ' + (e && e.message ? e.message : e));
} finally {
busy = false;
}
}
// Download an arbitrary folder node as a .zip. Server mode points
// an <a download> at the virtual "<node-path>.zip" URL (the
// dispatcher recognises the suffix and streams the subtree). FS
// mode walks the directory handle.
async function downloadFolder(node) {
if (busy) return;
if (!node || !node.isDir) {
events().statusError('Not a folder: ' + (node && node.name));
return;
}
busy = true;
try {
if (state.source === 'server') {
var tree = window.app.modules.tree;
var dir = tree.pathFor(node).replace(/\/$/, '');
events().statusInfo('Preparing ' + node.name + '.zip…');
downloadUrl(node.name + '.zip', dir + '.zip');
setTimeout(function () { events().statusClear(); }, 2500);
} else if (state.source === 'fs' && node.handle
&& node.handle.kind === 'directory') {
await downloadFsSubtree(node.handle);
} else {
events().statusError('Cannot download ' + node.name);
}
} catch (e) {
events().statusError('Download failed: ' + (e && e.message ? e.message : e));
} finally {
busy = false;
}
}
window.app.modules.download = {
downloadFile: downloadFile,
downloadFolder: downloadFolder
};
})();