Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions agentic-flow/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "agentic-flow",
"version": "2.0.11",
"version": "2.0.13",
"description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
"type": "module",
"main": "dist/index.js",
Expand Down Expand Up @@ -157,7 +157,6 @@
"@ruvector/ruvllm": "^0.2.3",
"@ruvector/tiny-dancer": "^0.1.17",
"@supabase/supabase-js": "^2.78.0",
"@xenova/transformers": "^2.17.2",
"axios": "^1.12.2",
"dotenv": "^16.4.5",
"express": "^5.1.0",
Expand All @@ -177,6 +176,7 @@
"@rollup/rollup-darwin-arm64": "^4.59.0",
"@ruvector/attention": "^0.1.4",
"@ruvector/sona": "^0.1.4",
"@xenova/transformers": "^2.17.2",
"agentdb": "^3.0.0-alpha.14",
"better-sqlite3": "^11.10.0",
"onnxruntime-node": "^1.23.2",
Expand Down
61 changes: 48 additions & 13 deletions agentic-flow/src/reasoningbank/utils/embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
/**
* Embedding generation for semantic similarity
* Uses local transformers.js - no API key required!
*
* `@xenova/transformers` is an OPTIONAL dependency. The module is loaded
* dynamically inside `initializeEmbeddings()` so the rest of this file is
* importable even when transformers.js is absent (e.g. when consumers
* pass `npm install --omit=optional`). Code paths that don't call
* `computeEmbedding()` continue to work without ever loading the module.
*/

import { pipeline, env } from '@xenova/transformers';
import type { pipeline as Pipeline, env as Env, FeatureExtractionPipeline } from '@xenova/transformers';
import { loadConfig } from './config.js';

// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)
// The native ONNX runtime causes "DefaultLogger not registered" errors in Node.js
env.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy
env.backends.onnx.wasm.numThreads = 1; // Single thread for stability
// Cached references resolved at first call to initializeEmbeddings(). Types
// are imported as `type-only` so TypeScript can typecheck the file without
// requiring @xenova/transformers to be installed at build time — the actual
// runtime import is dynamic below.
let pipeline: typeof Pipeline | null = null;
let env: typeof Env | null = null;

let embeddingPipeline: any = null;
let embeddingPipeline: FeatureExtractionPipeline | null = null;
let initializationPromise: Promise<void> | null = null;
const embeddingCache = new Map<string, Float32Array>();
// MEMORY LEAK FIX: Track TTL timers so they can be cleaned up
Expand Down Expand Up @@ -44,18 +52,41 @@ async function initializeEmbeddings(): Promise<void> {

// RACE CONDITION FIX: Create promise for concurrent callers to await
initializationPromise = (async () => {
// Optional-dep load: try to import @xenova/transformers. If absent,
// emit a clear warning and let callers fall back to hash-based embeddings.
if (!pipeline || !env) {
try {
const transformers = await import('@xenova/transformers');
pipeline = transformers.pipeline;
env = transformers.env;
// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)
// The native ONNX runtime causes "DefaultLogger not registered" errors in Node.js
env.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy
env.backends.onnx.wasm.numThreads = 1; // Single thread for stability
} catch (err: unknown) {
console.warn('[Embeddings] @xenova/transformers not installed (optional dependency).');
console.warn('[Embeddings] Install with: npm install @xenova/transformers');
console.warn('[Embeddings] Falling back to hash-based embeddings');
initializationPromise = null;
return;
}
}

console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');
console.log('[Embeddings] First run will download ~23MB model...');

try {
embeddingPipeline = await pipeline(
// `pipeline('feature-extraction', ...)` returns a union; narrow to
// FeatureExtractionPipeline so call-sites can use .pooling / .normalize.
embeddingPipeline = (await pipeline(
'feature-extraction',
'Xenova/all-MiniLM-L6-v2',
{ quantized: true } // Smaller, faster
);
)) as FeatureExtractionPipeline;
console.log('[Embeddings] Local model ready! (384 dimensions)');
} catch (error: any) {
console.error('[Embeddings] Failed to initialize:', error?.message || error);
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);
console.error('[Embeddings] Failed to initialize:', msg);
console.warn('[Embeddings] Falling back to hash-based embeddings');
// Reset promise so retry is possible
initializationPromise = null;
Expand Down Expand Up @@ -89,9 +120,13 @@ export async function computeEmbedding(text: string): Promise<Float32Array> {
pooling: 'mean',
normalize: true
});
embedding = new Float32Array(output.data);
} catch (error: any) {
console.error('[Embeddings] Generation failed:', error?.message || error);
// output.data is a Tensor.data typed-array union; cast to a Float32-
// compatible source. The model is feature-extraction with normalize:true
// so the underlying buffer is always Float32 at runtime.
embedding = new Float32Array(output.data as unknown as ArrayLike<number>);
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);
console.error('[Embeddings] Generation failed:', msg);
embedding = hashEmbed(text, 384); // Fallback
}
} else {
Expand Down
20 changes: 10 additions & 10 deletions agentic-flow/src/router/providers/onnx-local-optimized.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
* - Better generation parameters for code tasks
* - System prompt caching
*
* Note: onnxruntime-node is optional - will error if not installed
* Note: onnxruntime-node is optional - will error if not installed.
*
* NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
* fired the native-binding load (`onnxruntime_binding.node`) at module
* import time. On Windows this crashes with "OS cannot run %1" — and the
* crash propagated to any consumer that transitively imports this file
* (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
* This file does not use `ort` directly — the base `ONNXLocalProvider`
* it extends does, and that file now lazy-loads ort on first session
* init. So we just drop the eager top-level load here.
*/

let ort: any = null;

// Dynamic import for optional onnxruntime-node
try {
ort = await import('onnxruntime-node');
} catch {
// Will be handled at runtime
}

import { get_encoding } from 'tiktoken';
import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
import type {
Expand Down
28 changes: 21 additions & 7 deletions agentic-flow/src/router/providers/onnx-local.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,30 @@
*
* Uses onnxruntime-node for true local CPU/GPU inference
* Falls back gracefully when native module isn't available (Windows)
*
* NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
* `initializeSession()` call, not at module import. The previous top-level
* `await import('onnxruntime-node')` fired the native-binding load
* (`onnxruntime_binding.node`) at module load time, which crashed Windows
* environments where the NAPI binary cannot be loaded — even when the
* consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
* the router. Moving the import inside `loadOrt()` keeps importing
* `reasoningbank` side-effect-free with respect to native bindings.
*/

let ort: any = null;
let ortAvailable = false;

// Dynamic import for optional onnxruntime-node
try {
ort = await import('onnxruntime-node');
ortAvailable = true;
} catch {
console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
let ortLoaded = false;

async function loadOrt(): Promise<void> {
if (ortLoaded) return;
ortLoaded = true;
try {
ort = await import('onnxruntime-node');
ortAvailable = true;
} catch {
console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
}
}

import * as fs from 'fs';
Expand Down Expand Up @@ -107,6 +120,7 @@ export class ONNXLocalProvider implements LLMProvider {
private async initializeSession(): Promise<void> {
if (this.session) return;

await loadOrt();
if (!ortAvailable || !ort) {
throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
}
Expand Down
Loading