ZDDC — Zero Day Document Control. A file-naming convention plus five single-file HTML tools (archive, transmittal, classifier, mdedit, landing) and an optional Go HTTP server (zddc-server) with ACL and a virtual archive index. Self-contained, offline-capable, dependency-free. See README.md for an overview, AGENTS.md and ARCHITECTURE.md for the build/release/architecture detail, bootstrap/README.md for the two-level deployment install pattern, and zddc/README.md for the HTTP server.
197 lines
5.4 KiB
JavaScript
Executable file
197 lines
5.4 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
/**
|
|
* Training Data Collector for Qwen3-Coder-Next
|
|
*
|
|
* Captures conversations where Qwen needs expert (Sonnet/Opus) assistance
|
|
* and stores them in a format suitable for LoRA fine-tuning
|
|
*
|
|
* Usage:
|
|
* node collect-interaction.js --query "..." --qwen "..." --expert "..." --domain "domain-name"
|
|
*/
|
|
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
// Get script directory
|
|
const SCRIPT_DIR = __dirname;
|
|
const RAW_FILE = path.join(SCRIPT_DIR, 'raw', 'interactions.jsonl');
|
|
|
|
/**
|
|
* Generate unique interaction ID
|
|
*/
|
|
function generateInteractionId() {
|
|
const timestamp = Date.now();
|
|
const random = Math.random().toString(36).substring(2, 10);
|
|
return `int_${timestamp}_${random}`;
|
|
}
|
|
|
|
/**
|
|
* Detect domain from conversation content
|
|
*/
|
|
function detectDomain(messages) {
|
|
const text = JSON.stringify(messages).toLowerCase();
|
|
|
|
// ZDDC naming patterns
|
|
if (text.includes('zddc') ||
|
|
text.includes('trackingnumber') ||
|
|
text.includes('revision') ||
|
|
text.includes('_a (ifr)') ||
|
|
text.includes('status code')) {
|
|
return 'zddc-naming';
|
|
}
|
|
|
|
// HTML SPA patterns
|
|
if (text.includes('html') ||
|
|
text.includes('spa') ||
|
|
text.includes('single-file') ||
|
|
text.includes('es module') ||
|
|
text.includes('vanilla js')) {
|
|
return 'html-architecture';
|
|
}
|
|
|
|
// Build system patterns
|
|
if (text.includes('build') ||
|
|
text.includes('build.sh') ||
|
|
text.includes('dist/') ||
|
|
text.includes('template.html')) {
|
|
return 'build-system';
|
|
}
|
|
|
|
// Debugging patterns
|
|
if (text.includes('debug') ||
|
|
text.includes('error') ||
|
|
text.includes('fix') ||
|
|
text.includes('console')) {
|
|
return 'coding-debugging';
|
|
}
|
|
|
|
// Reasoning patterns
|
|
if (text.includes('reason') ||
|
|
text.includes('analyze') ||
|
|
text.includes('architecture') ||
|
|
text.includes('design')) {
|
|
return 'reasoning-architecture';
|
|
}
|
|
|
|
// Default to general coding
|
|
return 'general-coding';
|
|
}
|
|
|
|
/**
|
|
* Create training example object
|
|
*/
|
|
function createTrainingExample(userQuery, qwenResponse, expertResponse, options = {}) {
|
|
const domain = options.domain || detectDomain([userQuery, qwenResponse, expertResponse]);
|
|
|
|
return {
|
|
messages: [
|
|
{ role: 'user', content: userQuery },
|
|
{ role: 'assistant', content: qwenResponse },
|
|
{ role: 'user', content: 'consult Sonnet' },
|
|
{ role: 'assistant', content: expertResponse }
|
|
],
|
|
metadata: {
|
|
domain: domain,
|
|
adapter: `lora-v1-${domain.replace(/-/g, '_')}`,
|
|
timestamp: new Date().toISOString(),
|
|
interaction_id: generateInteractionId(),
|
|
source: 'manual-expert-consultation',
|
|
...options.metadata
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Append to JSONL file
|
|
*/
|
|
function appendToJSONL(filePath, data) {
|
|
const jsonLine = JSON.stringify(data);
|
|
fs.appendFileSync(filePath, jsonLine + '\n');
|
|
}
|
|
|
|
/**
|
|
* Format domain name from detected or provided
|
|
*/
|
|
function formatDomainName(domain) {
|
|
// Convert hyphens to underscores for adapter name
|
|
return domain.replace(/-/g, '_');
|
|
}
|
|
|
|
/**
|
|
* Collect a training example
|
|
*/
|
|
export function collect({
|
|
userQuery,
|
|
qwenResponse,
|
|
expertResponse,
|
|
domain = null,
|
|
metadata = {}
|
|
}) {
|
|
if (!userQuery || !qwenResponse || !expertResponse) {
|
|
console.error('Error: Missing required parameters');
|
|
console.error('Usage: node collect-interaction.js --query "..." --qwen "..." --expert "..."');
|
|
process.exit(1);
|
|
}
|
|
|
|
const trainingExample = createTrainingExample(
|
|
userQuery,
|
|
qwenResponse,
|
|
expertResponse,
|
|
{ domain, metadata }
|
|
);
|
|
|
|
// Ensure raw directory exists
|
|
fs.mkdirSync(path.dirname(RAW_FILE), { recursive: true });
|
|
|
|
// Append to raw file
|
|
appendToJSONL(RAW_FILE, trainingExample);
|
|
|
|
console.log('\n=== Training Example Captured ===');
|
|
console.log(`Domain: ${trainingExample.metadata.domain}`);
|
|
console.log(`Adapter: ${trainingExample.metadata.adapter}`);
|
|
console.log(`Interaction ID: ${trainingExample.metadata.interaction_id}`);
|
|
console.log(`Timestamp: ${trainingExample.metadata.timestamp}`);
|
|
console.log(`Raw file: ${RAW_FILE}`);
|
|
console.log('=================================\n');
|
|
|
|
return trainingExample;
|
|
}
|
|
|
|
/**
|
|
* CLI interface
|
|
*/
|
|
function main() {
|
|
const args = process.argv.slice(2);
|
|
|
|
// Parse arguments
|
|
const queryIdx = args.findIndex(arg => arg === '--query');
|
|
const qwenIdx = args.findIndex(arg => arg === '--qwen');
|
|
const expertIdx = args.findIndex(arg => arg === '--expert');
|
|
const domainIdx = args.findIndex(arg => arg === '--domain');
|
|
|
|
if (queryIdx === -1 || qwenIdx === -1 || expertIdx === -1) {
|
|
console.error('Error: Missing required arguments');
|
|
console.error('Usage: node collect-interaction.js --query "..." --qwen "..." --expert "..." [--domain "domain"]');
|
|
console.error(' node collect-interaction.js --query "Query" --qwen "Qwen answer" --expert "Expert answer"');
|
|
process.exit(1);
|
|
}
|
|
|
|
const userQuery = args[queryIdx + 1];
|
|
const qwenResponse = args[qwenIdx + 1];
|
|
const expertResponse = args[expertIdx + 1];
|
|
const domain = domainIdx !== -1 ? args[domainIdx + 1] : null;
|
|
|
|
collect({ userQuery, qwenResponse, expertResponse, domain });
|
|
}
|
|
|
|
// Export for module usage
|
|
export default { collect, createTrainingExample, detectDomain };
|
|
|
|
// Run if executed directly
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
main();
|
|
}
|