diff --git a/classifier/css/layout.css b/classifier/css/layout.css index 59adf75..da67e8e 100644 --- a/classifier/css/layout.css +++ b/classifier/css/layout.css @@ -623,10 +623,13 @@ input.tfile__name:focus { border-color: var(--primary); background: var(--bg); o .scratch-preview__more { color: var(--text-muted); font-size: 0.76rem; padding: 0.2rem 0; } .scratch-match__fuzzy { display: inline-flex; align-items: center; gap: 0.3rem; font-size: 0.8rem; color: var(--text-muted); } .scratch-match__row { display: flex; align-items: center; gap: 0.5rem; font-size: 0.8rem; padding: 0.15rem 0; cursor: pointer; } +.scratch-match__row--review { opacity: 0.85; } /* not an exact 1:1 — needs a look */ +.scratch-match__row--review .scratch-match__conf { color: var(--warning, #b8860b); } .scratch-match__file { flex: 1; min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } .scratch-match__arrow { color: var(--text-muted); } .scratch-match__tn { font-family: var(--mono, monospace); } -.scratch-match__conf { color: var(--text-muted); font-size: 0.72rem; width: 3rem; text-align: right; } +.scratch-match__conf { color: var(--text-muted); font-size: 0.72rem; min-width: 6rem; text-align: right; white-space: nowrap; } +.worklist-cur { font-family: var(--mono, monospace); color: var(--text-muted); } /* The base seltable rules live in shared/seltable.css (bundled by build.sh and shared with the tables tool); only the classifier-specific catalog bits diff --git a/classifier/js/classify.js b/classifier/js/classify.js index 612046d..3f9661e 100644 --- a/classifier/js/classify.js +++ b/classifier/js/classify.js @@ -499,6 +499,8 @@ id: r.id || uid(), party: r.party || '', trackingNumber: (r.trackingNumber || '').trim(), title: r.title || '', revisionCell: r.revisionCell || '', + // The file's existing name (pasted col 4) — a join key for name-match. + currentName: (r.currentName || '').trim(), source: rowSource(r), archiveRevisions: Array.isArray(r.archiveRevisions) ? r.archiveRevisions : [], placed: Object.create(null), @@ -617,6 +619,9 @@ // Parse Excel/TSV text into scratch rows. Columns: Tracking ⇥ Rev(Status) ⇥ // Title; a 4th bare-status column merges into the revision; a lone cell that // parses as a full ZDDC filename is split; a header row is skipped. + // FIXED schema, by column position (no variant detection): a header row is + // skipped, then each line is tracking_number ⇥ rev (status) ⇥ title ⇥ + // current name. Trailing columns may be omitted (currentName/title blank). function parsePastedRows(text) { function unq(s) { s = (s == null ? '' : String(s)).trim(); @@ -629,43 +634,82 @@ if (!raw.trim()) return; var cells = raw.split('\t').map(unq); var c0 = cells[0] || ''; - if (!sawData && cells.length > 1 && /^(tracking|number|no\.?|doc(ument)?|drawing|item)\b/i.test(c0) && c0.indexOf('-') === -1) { - return; // header row - } - var tracking = '', rev = '', title = ''; - if (cells.length === 1) { - var p = window.zddc.parseFilename(c0); - if (p && p.valid && p.trackingNumber) { tracking = p.trackingNumber; rev = p.revision + (p.status ? ' (' + p.status + ')' : ''); title = p.title || ''; } - else tracking = c0; - } else { - tracking = c0; - if (cells.length >= 4 && cells[2] && window.zddc.isValidStatus(cells[2])) { rev = (cells[1] + ' (' + cells[2] + ')').trim(); title = cells[3] || ''; } - else { rev = cells[1] || ''; title = cells[2] || ''; } - } - if (!tracking) { skipped.push({ line: i + 1, reason: 'no tracking number', text: raw }); return; } + // Skip a leading header row (first cell is a header word, not a tn). + if (!sawData && /^(tracking|number|no\.?|doc(ument)?|drawing|item)\b/i.test(c0) && c0.indexOf('-') === -1) return; + if (!c0) { skipped.push({ line: i + 1, reason: 'no tracking number', text: raw }); return; } sawData = true; - rows.push({ trackingNumber: tracking, revisionCell: rev.trim(), title: title, source: { pasted: true } }); + rows.push({ + trackingNumber: c0, + revisionCell: (cells[1] || '').trim(), + title: cells[2] || '', + currentName: cells[3] || '', + source: { pasted: true }, + }); }); return { rows: rows, skipped: skipped }; } function normTok(s) { return String(s == null ? '' : s).toUpperCase().replace(/[^A-Z0-9]/g, ''); } - // Propose row matches for source files by finding a row whose tracking number - // appears in the filename. opts.fuzzy also matches on the digit-run. + function dropExt(s) { return String(s == null ? '' : s).replace(/\.[^.\/\\]+$/, ''); } + function nameKey(s) { return dropExt(s).toLowerCase().replace(/[^a-z0-9]+/g, ''); } + function nameTokens(s) { return dropExt(s).toLowerCase().split(/[^a-z0-9]+/).filter(Boolean); } + // Score a pasted "current name" against a file's name: 1 = exact (normalized, + // extension dropped), 0.6–0.95 = token coverage, 0.7 = a clean substring, + // 0 = no match. Token-set beats raw substring (survives reordering). + function nameScore(rowName, fileFull) { + var rk = nameKey(rowName); if (!rk) return 0; + var fk = nameKey(fileFull); + if (rk === fk) return 1; + var rt = nameTokens(rowName); + if (rt.length) { + var ft = Object.create(null); nameTokens(fileFull).forEach(function (t) { ft[t] = true; }); + var hit = 0; rt.forEach(function (t) { if (ft[t]) hit++; }); + var cov = hit / rt.length; + if (cov >= 0.6) return Math.min(0.95, 0.6 + 0.35 * cov); + } + var a = rk.length <= fk.length ? rk : fk, b = rk.length <= fk.length ? fk : rk; + if (a.length >= 4 && b.indexOf(a) !== -1) return 0.7; + return 0; + } + // Propose file↔row matches. PRIMARY signal is the pasted "current name" + // column (nameScore); FALLBACK is the tracking number embedded in the + // filename (opts.fuzzy also tries the digit-run). Each proposal carries a + // confidence and an `auto` flag — true only for an exact 1:1 match (conf 1, + // the unique conf-1 match for BOTH its file and its row), the only kind safe + // to assign without confirmation. function proposeMatches(files, rows, opts) { opts = opts || {}; + var named = (rows || []).filter(function (r) { return (r.currentName || '').trim(); }); var out = []; (files || []).forEach(function (f) { var full = zddc.joinExtension(f.originalFilename, f.extension); - var nameNorm = normTok(full), nameDigits = nameNorm.replace(/[^0-9]/g, ''), best = null; - (rows || []).forEach(function (r) { - var tn = r.trackingNumber || ''; if (!tn) return; - var tnNorm = normTok(tn), conf = 0; - if (full.indexOf(tn) !== -1) conf = 1; - else if (tnNorm && nameNorm.indexOf(tnNorm) !== -1) conf = 0.8; - else if (opts.fuzzy) { var d = tnNorm.replace(/[^0-9]/g, ''); if (d && nameDigits.indexOf(d) !== -1) conf = 0.5; } - if (conf && (!best || conf > best.confidence)) best = { row: r, confidence: conf }; + var best = null; + named.forEach(function (r) { + var s = nameScore(r.currentName, full); + if (s > 0 && (!best || s > best.confidence)) best = { row: r, confidence: s, via: 'name' }; }); - if (best) out.push({ file: f, row: best.row, confidence: best.confidence }); + if (!best) { // fallback: tracking number in the filename + var nameNorm = normTok(full), nameDigits = nameNorm.replace(/[^0-9]/g, ''); + (rows || []).forEach(function (r) { + var tn = r.trackingNumber || ''; if (!tn) return; + var tnNorm = normTok(tn), conf = 0; + if (full.indexOf(tn) !== -1) conf = 1; + else if (tnNorm && nameNorm.indexOf(tnNorm) !== -1) conf = 0.8; + else if (opts.fuzzy) { var d = tnNorm.replace(/[^0-9]/g, ''); if (d && nameDigits.indexOf(d) !== -1) conf = 0.5; } + if (conf && (!best || conf > best.confidence)) best = { row: r, confidence: conf, via: 'tracking' }; + }); + } + if (best) out.push({ file: f, row: best.row, confidence: best.confidence, via: best.via, auto: false }); + }); + // Auto-assignable = exact + unambiguous both ways (so duplicate names + // never silently grab the wrong file). + var rowEx = Object.create(null), fileEx = Object.create(null); + out.forEach(function (p) { + if (p.confidence !== 1) return; + rowEx[p.row.id || p.row.trackingNumber] = (rowEx[p.row.id || p.row.trackingNumber] || 0) + 1; + fileEx[srcKeyForFile(p.file)] = (fileEx[srcKeyForFile(p.file)] || 0) + 1; + }); + out.forEach(function (p) { + if (p.confidence === 1) p.auto = rowEx[p.row.id || p.row.trackingNumber] === 1 && fileEx[srcKeyForFile(p.file)] === 1; }); return out; } diff --git a/classifier/js/target-tree.js b/classifier/js/target-tree.js index fe78b70..57336d9 100644 --- a/classifier/js/target-tree.js +++ b/classifier/js/target-tree.js @@ -529,10 +529,11 @@ render: function (r, td) { editCell(td, 'worklist-tn__input', r.trackingNumber, 'ACME-…-0001', function (v) { c.setRowTracking(r.id, v); }, tnWarn(r)); } }, { key: 'title', title: 'Title', cls: 'worklist-title', get: function (r) { return r.title || ''; }, render: function (r, td) { editCell(td, 'worklist-title__input', r.title, 'title', function (v) { c.setRowTitle(r.id, v); }); } }, + { key: 'cur', title: 'Current name', cls: 'worklist-cur', get: function (r) { return r.currentName || ''; } }, { key: 'src', title: 'Source', cls: 'worklist-src', get: function (r) { var s = r.source || {}; return [s.mdl ? 'mdl' : '', s.archive ? 'arch' : '', s.pasted ? 'pasted' : ''].filter(Boolean).join(' '); }, render: function (r, td) { renderSource(r, td); } }, { key: 'latest', title: 'Latest rev', get: function (r) { return latestRevOf(r.archiveRevisions); } }, - { key: 'rev', title: 'Revision', cls: 'mdl-rev', get: function (r) { return r.revisionCell; }, + { key: 'rev', title: 'Revision', cls: 'worklist-rev', get: function (r) { return r.revisionCell; }, render: function (r, td) { editCell(td, 'worklist-rev__input', r.revisionCell, 'A (IFR)', function (v) { c.setRevisionCell(r.id, v); }); } }, ]; worklistGrid = window.app.modules.seltable.create({ @@ -730,12 +731,29 @@ document.body.appendChild(back); return { body: body, foot: foot, close: close }; } + function unassignedFiles() { + var c = C(); + return allFiles().filter(function (f) { + var a = c.getAssignment(c.srcKeyForFile(f)); + return !(a && (a.trackingNodeId || a.excluded)); + }); + } + // Assign every exact, unambiguous (1:1) current-name match without prompting; + // returns the count. Lower-confidence / ambiguous matches are left for the + // user to review via "Match names". + function autoAssignByName() { + var c = C(), n = 0; + c.proposeMatches(unassignedFiles(), c.getWorklist(), {}).forEach(function (p) { + if (p.auto) { c.assignFromRow([c.srcKeyForFile(p.file)], p.row); n++; } + }); + return n; + } function openPasteDialog(prefill) { var c = C(); - var m = scratchModal('Paste rows from Excel', 'Columns: Tracking · Rev (Status) · Title — tab-separated, as Excel copies. A header row is skipped; a pasted full filename is split.'); + var m = scratchModal('Paste rows from Excel', 'Fixed columns, tab-separated as Excel copies: Tracking number · Rev (Status) · Title · Current name. A header row is skipped. The current name is matched against your files — exact matches are assigned automatically.'); var ta = document.createElement('textarea'); ta.className = 'scratch-paste__ta'; ta.rows = 6; ta.spellcheck = false; - ta.placeholder = 'ACME-AR-DWG-0001\tA (IFR)\tFloor plan'; + ta.placeholder = 'ACME-AR-DWG-0001\tA (IFR)\tFloor plan\tIMG_4471.pdf'; ta.value = prefill || ''; m.body.appendChild(ta); var preview = el('div', 'scratch-paste__preview'); m.body.appendChild(preview); @@ -748,12 +766,13 @@ preview.textContent = ''; if (parsed.rows.length) { var tbl = el('table', 'scratch-preview__table'); - var head = el('tr'); ['Tracking number', 'Revision', 'Title'].forEach(function (h) { head.appendChild(el('th', null, h)); }); tbl.appendChild(head); + var head = el('tr'); ['Tracking number', 'Revision', 'Title', 'Current name'].forEach(function (h) { head.appendChild(el('th', null, h)); }); tbl.appendChild(head); parsed.rows.slice(0, 50).forEach(function (r) { var tr = el('tr'); tr.appendChild(el('td', null, r.trackingNumber)); tr.appendChild(el('td', null, r.revisionCell || '')); tr.appendChild(el('td', null, r.title || '')); + tr.appendChild(el('td', null, r.currentName || '')); tbl.appendChild(tr); }); preview.appendChild(tbl); @@ -767,7 +786,10 @@ var n = parsed.rows.length; c.appendWorklist(parsed.rows); m.close(); showTab('worklist'); - window.zddc.toast('Added ' + n + ' pasted row' + (n === 1 ? '' : 's') + '.', 'success'); + var assigned = autoAssignByName(); + var msg = 'Added ' + n + ' pasted row' + (n === 1 ? '' : 's') + '.'; + if (assigned) msg += ' Auto-assigned ' + assigned + ' file' + (assigned === 1 ? '' : 's') + ' by current name.'; + window.zddc.toast(msg + (assigned ? ' Review the rest with ⚡ Match names.' : ''), 'success'); }); ta.addEventListener('input', refresh); refresh(); ta.focus(); @@ -776,12 +798,9 @@ var c = C(); var rows = c.getWorklist(); if (!rows.length) { window.zddc.toast('Load or paste some tracking numbers first.', 'warning'); return; } - var files = allFiles().filter(function (f) { - var a = c.getAssignment(c.srcKeyForFile(f)); - return !(a && (a.trackingNodeId || a.excluded)); - }); + var files = unassignedFiles(); if (!files.length) { window.zddc.toast('No unassigned files to match.', 'info'); return; } - var m = scratchModal('Match names', 'Files whose name contains a known tracking number. Review, then assign the checked matches.'); + var m = scratchModal('Match names', 'Each unassigned file matched to a row by its “Current name” (or the tracking number in its filename). Exact matches are pre-checked; review the rest, then Assign.'); var opts = { fuzzy: false }; var fuzzyLbl = el('label', 'scratch-match__fuzzy'); var fuzzy = document.createElement('input'); fuzzy.type = 'checkbox'; @@ -797,16 +816,19 @@ list.textContent = ''; if (!proposals.length) { list.appendChild(el('div', 'scratch-preview__skip', 'No matches found.')); accept.disabled = true; accept.textContent = 'Assign'; return; } proposals.forEach(function (p, i) { - var rowEl = el('label', 'scratch-match__row'); - var cb = document.createElement('input'); cb.type = 'checkbox'; cb.checked = true; cb.dataset.i = i; + var rowEl = el('label', 'scratch-match__row' + (p.auto ? '' : ' scratch-match__row--review')); + var cb = document.createElement('input'); cb.type = 'checkbox'; + cb.checked = !!p.auto; // pre-check only exact 1:1 matches; opt in to the rest + cb.dataset.i = i; rowEl.appendChild(cb); - rowEl.appendChild(el('span', 'scratch-match__file', zddc.joinExtension(p.file.originalFilename, p.file.extension))); + rowEl.appendChild(el('span', 'scratch-match__file', window.zddc.joinExtension(p.file.originalFilename, p.file.extension))); rowEl.appendChild(el('span', 'scratch-match__arrow', '→')); rowEl.appendChild(el('span', 'scratch-match__tn', p.row.trackingNumber)); - rowEl.appendChild(el('span', 'scratch-match__conf', Math.round(p.confidence * 100) + '%')); + var tag = el('span', 'scratch-match__conf', Math.round(p.confidence * 100) + '% · ' + (p.via === 'name' ? 'name' : 'tracking#')); + rowEl.appendChild(tag); list.appendChild(rowEl); }); - accept.disabled = false; accept.textContent = 'Assign ' + proposals.length; + accept.disabled = false; accept.textContent = 'Assign checked'; } accept.addEventListener('click', function () { var n = 0; diff --git a/tests/classify.spec.js b/tests/classify.spec.js index 1753b91..2d3ad6f 100644 --- a/tests/classify.spec.js +++ b/tests/classify.spec.js @@ -1365,27 +1365,63 @@ test('From a list: load() migrates a legacy mdlNodeId placement into a tracking expect(r.named).toBe('ACM-PRJ-EL-SPC-0009_B (IFC) - Legacy.pdf'); // classification preserved }); -test('parsePastedRows handles 3-col, 4-col(status), a filename, a header, and bad rows', async ({ page }) => { +test('parsePastedRows: fixed columns tracking · rev · title · current name', async ({ page }) => { await page.click('#modeClassifyBtn'); const r = await page.evaluate(() => { const c = window.app.modules.classify; const text = [ - 'Tracking Number\tRev\tTitle', // header → skipped - 'ACM-PRJ-EL-SPC-0001\tA (IFR)\tFloor plan', // 3-col - 'ACM-PRJ-EL-SPC-0002\tB\tIFC\tSection', // 4-col (status split) - 'ACM-PRJ-EL-SPC-0003_C (IFA) - Detail.pdf', // single full filename - '\tjust a rev\t', // no tracking → skipped + 'Tracking Number\tRev\tTitle\tCurrent name', // header → skipped + 'ACM-PRJ-EL-SPC-0001\tA (IFR)\tFloor plan\tIMG_4471.pdf', // full 4 columns + 'ACM-PRJ-EL-SPC-0002\tB (IFC)\tSection', // 3 cols → current name blank + '\tjust a rev\t', // no tracking → skipped ].join('\n'); return c.parsePastedRows(text); }); - expect(r.rows.map(x => x.trackingNumber)).toEqual(['ACM-PRJ-EL-SPC-0001', 'ACM-PRJ-EL-SPC-0002', 'ACM-PRJ-EL-SPC-0003']); - expect(r.rows[0].revisionCell).toBe('A (IFR)'); - expect(r.rows[1].revisionCell).toBe('B (IFC)'); // status column merged - expect(r.rows[2].revisionCell).toBe('C (IFA)'); // split from the filename - expect(r.rows[2].title).toBe('Detail'); + expect(r.rows.map(x => x.trackingNumber)).toEqual(['ACM-PRJ-EL-SPC-0001', 'ACM-PRJ-EL-SPC-0002']); + expect(r.rows[0]).toMatchObject({ revisionCell: 'A (IFR)', title: 'Floor plan', currentName: 'IMG_4471.pdf' }); + expect(r.rows[1].currentName).toBe(''); // omitted trailing column expect(r.skipped.length).toBe(1); // the no-tracking row }); +test('proposeMatches: the current-name column drives exact (auto) + token matches', async ({ page }) => { + await page.click('#modeClassifyBtn'); + const r = await page.evaluate(() => { + const c = window.app.modules.classify; + const files = [ + { originalFilename: 'IMG_4471', extension: 'pdf', folderPath: 'R' }, // exact (case+ext+sep differ) + { originalFilename: 'site-survey-final-v2', extension: 'docx', folderPath: 'R' }, // token coverage + { originalFilename: 'totally unrelated', extension: 'pdf', folderPath: 'R' }, // no match + ]; + const rows = [ + { id: 'm1', trackingNumber: 'ACM-AR-DWG-0001', currentName: 'img_4471.PDF' }, + { id: 'm2', trackingNumber: 'ACM-AR-DWG-0002', currentName: 'Site Survey final' }, + ]; + const m = c.proposeMatches(files, rows, {}); + return Object.fromEntries(m.map(p => [p.file.originalFilename, { tn: p.row.trackingNumber, conf: p.confidence, via: p.via, auto: p.auto }])); + }); + expect(r['IMG_4471']).toMatchObject({ tn: 'ACM-AR-DWG-0001', conf: 1, via: 'name', auto: true }); // exact 1:1 → auto + expect(r['site-survey-final-v2'].tn).toBe('ACM-AR-DWG-0002'); + expect(r['site-survey-final-v2'].via).toBe('name'); + expect(r['site-survey-final-v2'].auto).toBe(false); // < exact → needs review + expect(r['totally unrelated']).toBeUndefined(); // no match dropped +}); + +test('proposeMatches: ambiguous duplicate current-name is not auto-assigned', async ({ page }) => { + await page.click('#modeClassifyBtn'); + const r = await page.evaluate(() => { + const c = window.app.modules.classify; + const files = [ + { originalFilename: 'scan001', extension: 'pdf', folderPath: 'Root/A' }, + { originalFilename: 'scan001', extension: 'pdf', folderPath: 'Root/B' }, // same name, different folder + ]; + const rows = [{ id: 'm1', trackingNumber: 'ACM-AR-DWG-0009', currentName: 'scan001.pdf' }]; + return c.proposeMatches(files, rows, {}).map(p => ({ conf: p.confidence, auto: p.auto })); + }); + expect(r.length).toBe(2); // both files match the one row + expect(r.every(p => p.conf === 1)).toBe(true); + expect(r.every(p => p.auto === false)).toBe(true); // a row claimed by 2 files → neither auto-assigns +}); + test('proposeMatches finds a row whose tracking number is in the filename', async ({ page }) => { await page.click('#modeClassifyBtn'); const r = await page.evaluate(() => {