ruvnet · ruvnet · May 19, 2026 · May 19, 2026
diff --git a/agentic-flow/package.json b/agentic-flow/package.json
@@ -1,6 +1,6 @@
 {
   "name": "agentic-flow",
-  "version": "2.0.11",
+  "version": "2.0.13",
   "description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
   "type": "module",
   "main": "dist/index.js",
@@ -157,7 +157,6 @@
     "@ruvector/ruvllm": "^0.2.3",
     "@ruvector/tiny-dancer": "^0.1.17",
     "@supabase/supabase-js": "^2.78.0",
-    "@xenova/transformers": "^2.17.2",
     "axios": "^1.12.2",
     "dotenv": "^16.4.5",
     "express": "^5.1.0",
@@ -177,6 +176,7 @@
     "@rollup/rollup-darwin-arm64": "^4.59.0",
     "@ruvector/attention": "^0.1.4",
     "@ruvector/sona": "^0.1.4",
+    "@xenova/transformers": "^2.17.2",
     "agentdb": "^3.0.0-alpha.14",
     "better-sqlite3": "^11.10.0",
     "onnxruntime-node": "^1.23.2",

diff --git a/agentic-flow/src/reasoningbank/utils/embeddings.ts b/agentic-flow/src/reasoningbank/utils/embeddings.ts
@@ -1,17 +1,25 @@
 /**
  * Embedding generation for semantic similarity
  * Uses local transformers.js - no API key required!
+ *
+ * `@xenova/transformers` is an OPTIONAL dependency. The module is loaded
+ * dynamically inside `initializeEmbeddings()` so the rest of this file is
+ * importable even when transformers.js is absent (e.g. when consumers
+ * pass `npm install --omit=optional`). Code paths that don't call
+ * `computeEmbedding()` continue to work without ever loading the module.
  */
 
-import { pipeline, env } from '@xenova/transformers';
+import type { pipeline as Pipeline, env as Env, FeatureExtractionPipeline } from '@xenova/transformers';
 import { loadConfig } from './config.js';
 
-// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)
-// The native ONNX runtime causes "DefaultLogger not registered" errors in Node.js
-env.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy
-env.backends.onnx.wasm.numThreads = 1; // Single thread for stability
+// Cached references resolved at first call to initializeEmbeddings(). Types
+// are imported as `type-only` so TypeScript can typecheck the file without
+// requiring @xenova/transformers to be installed at build time — the actual
+// runtime import is dynamic below.
+let pipeline: typeof Pipeline | null = null;
+let env: typeof Env | null = null;
 
-let embeddingPipeline: any = null;
+let embeddingPipeline: FeatureExtractionPipeline | null = null;
 let initializationPromise: Promise<void> | null = null;
 const embeddingCache = new Map<string, Float32Array>();
 // MEMORY LEAK FIX: Track TTL timers so they can be cleaned up
@@ -44,18 +52,41 @@ async function initializeEmbeddings(): Promise<void> {
 
   // RACE CONDITION FIX: Create promise for concurrent callers to await
   initializationPromise = (async () => {
+    // Optional-dep load: try to import @xenova/transformers. If absent,
+    // emit a clear warning and let callers fall back to hash-based embeddings.
+    if (!pipeline || !env) {
+      try {
+        const transformers = await import('@xenova/transformers');
+        pipeline = transformers.pipeline;
+        env = transformers.env;
+        // Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)
+        // The native ONNX runtime causes "DefaultLogger not registered" errors in Node.js
+        env.backends.onnx.wasm.proxy = false;     // Disable ONNX runtime proxy
+        env.backends.onnx.wasm.numThreads = 1;    // Single thread for stability
+      } catch (err: unknown) {
+        console.warn('[Embeddings] @xenova/transformers not installed (optional dependency).');
+        console.warn('[Embeddings] Install with: npm install @xenova/transformers');
+        console.warn('[Embeddings] Falling back to hash-based embeddings');
+        initializationPromise = null;
+        return;
+      }
+    }
+
     console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');
     console.log('[Embeddings] First run will download ~23MB model...');
 
     try {
-      embeddingPipeline = await pipeline(
+      // `pipeline('feature-extraction', ...)` returns a union; narrow to
+      // FeatureExtractionPipeline so call-sites can use .pooling / .normalize.
+      embeddingPipeline = (await pipeline(
         'feature-extraction',
         'Xenova/all-MiniLM-L6-v2',
         { quantized: true } // Smaller, faster
-      );
+      )) as FeatureExtractionPipeline;
       console.log('[Embeddings] Local model ready! (384 dimensions)');
-    } catch (error: any) {
-      console.error('[Embeddings] Failed to initialize:', error?.message || error);
+    } catch (error: unknown) {
+      const msg = error instanceof Error ? error.message : String(error);
+      console.error('[Embeddings] Failed to initialize:', msg);
       console.warn('[Embeddings] Falling back to hash-based embeddings');
       // Reset promise so retry is possible
       initializationPromise = null;
@@ -89,9 +120,13 @@ export async function computeEmbedding(text: string): Promise<Float32Array> {
         pooling: 'mean',
         normalize: true
       });
-      embedding = new Float32Array(output.data);
-    } catch (error: any) {
-      console.error('[Embeddings] Generation failed:', error?.message || error);
+      // output.data is a Tensor.data typed-array union; cast to a Float32-
+      // compatible source. The model is feature-extraction with normalize:true
+      // so the underlying buffer is always Float32 at runtime.
+      embedding = new Float32Array(output.data as unknown as ArrayLike<number>);
+    } catch (error: unknown) {
+      const msg = error instanceof Error ? error.message : String(error);
+      console.error('[Embeddings] Generation failed:', msg);
       embedding = hashEmbed(text, 384); // Fallback
     }
   } else {

diff --git a/agentic-flow/src/router/providers/onnx-local-optimized.ts b/agentic-flow/src/router/providers/onnx-local-optimized.ts
@@ -8,18 +8,18 @@
  * - Better generation parameters for code tasks
  * - System prompt caching
  *
- * Note: onnxruntime-node is optional - will error if not installed
+ * Note: onnxruntime-node is optional - will error if not installed.
+ *
+ * NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
+ * fired the native-binding load (`onnxruntime_binding.node`) at module
+ * import time. On Windows this crashes with "OS cannot run %1" — and the
+ * crash propagated to any consumer that transitively imports this file
+ * (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
+ * This file does not use `ort` directly — the base `ONNXLocalProvider`
+ * it extends does, and that file now lazy-loads ort on first session
+ * init. So we just drop the eager top-level load here.
  */
 
-let ort: any = null;
-
-// Dynamic import for optional onnxruntime-node
-try {
-  ort = await import('onnxruntime-node');
-} catch {
-  // Will be handled at runtime
-}
-
 import { get_encoding } from 'tiktoken';
 import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
 import type {

diff --git a/agentic-flow/src/router/providers/onnx-local.ts b/agentic-flow/src/router/providers/onnx-local.ts
@@ -3,17 +3,30 @@
  *
  * Uses onnxruntime-node for true local CPU/GPU inference
  * Falls back gracefully when native module isn't available (Windows)
+ *
+ * NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
+ * `initializeSession()` call, not at module import. The previous top-level
+ * `await import('onnxruntime-node')` fired the native-binding load
+ * (`onnxruntime_binding.node`) at module load time, which crashed Windows
+ * environments where the NAPI binary cannot be loaded — even when the
+ * consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
+ * the router. Moving the import inside `loadOrt()` keeps importing
+ * `reasoningbank` side-effect-free with respect to native bindings.
  */
 
 let ort: any = null;
 let ortAvailable = false;
-
-// Dynamic import for optional onnxruntime-node
-try {
-  ort = await import('onnxruntime-node');
-  ortAvailable = true;
-} catch {
-  console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
+let ortLoaded = false;
+
+async function loadOrt(): Promise<void> {
+  if (ortLoaded) return;
+  ortLoaded = true;
+  try {
+    ort = await import('onnxruntime-node');
+    ortAvailable = true;
+  } catch {
+    console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
+  }
 }
 
 import * as fs from 'fs';
@@ -107,6 +120,7 @@ export class ONNXLocalProvider implements LLMProvider {
   private async initializeSession(): Promise<void> {
     if (this.session) return;
 
+    await loadOrt();
     if (!ortAvailable || !ort) {
       throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
     }