From 7f6c63e0e79cfb2673ede01eadf2e26d9b01690f Mon Sep 17 00:00:00 2001
From: Vineeth N K <vineethkrishnan007@gmail.com>
Date: Sat, 13 Jun 2026 23:30:49 +0530
Subject: [PATCH] fix(docker): use glibc base image so local embeddings load

@huggingface/transformers eagerly requires the native onnxruntime-node at import, whose prebuilt binary is glibc-only and cannot dlopen on Alpine/musl (fails with ERR_DLOPEN_FAILED, and gcompat does not provide the glibc fortify symbols it needs). With EMBEDDING_PROVIDER=local this crashed the app on boot.

Switch both Dockerfile stages to node:22-slim (glibc), where onnxruntime-node loads natively, which is also faster than the WASM fallback. The local embedding adapter now uses the default native backend instead of forcing WASM. The compose healthcheck moves from wget (absent on slim) to a node fetch one-liner.

Verified on the target host: app boots clean and an in-container embed returns a 384-dim vector.
---
 .env.example                                           |  3 ++-
 Dockerfile                                             |  4 ++--
 docker-compose.yml                                     |  8 +++++++-
 .../adapters/local-embedding.adapter.spec.ts           |  8 ++------
 .../infrastructure/adapters/local-embedding.adapter.ts | 10 +---------
 5 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/.env.example b/.env.example
index ce511a7..e8ceb28 100644
--- a/.env.example
+++ b/.env.example
@@ -35,7 +35,8 @@ VOYAGE_API_KEY=
 # EMBEDDING_DIMENSIONS=512
 #
 # Local embeddings: runs a sentence-transformers model in-process via
-# transformers.js (WASM), no API key. Downloads the model once to the cache dir.
+# transformers.js (native onnxruntime), no API key. Downloads the model once to
+# the cache dir. Requires the glibc-based image (the Dockerfile uses node:slim).
 # EMBEDDING_PROVIDER=local
 # EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2
 # EMBEDDING_DIMENSIONS=384
diff --git a/Dockerfile b/Dockerfile
index 70ab017..a1c2757 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # Stage 1: Build
-FROM node:22-alpine AS builder
+FROM node:22-slim AS builder
 WORKDIR /app
 COPY package.json package-lock.json ./
 RUN npm ci --ignore-scripts
@@ -8,7 +8,7 @@ COPY src/ src/
 RUN npx nest build
 
 # Stage 2: Production
-FROM node:22-alpine AS runner
+FROM node:22-slim AS runner
 WORKDIR /app
 ENV NODE_ENV=production
 
diff --git a/docker-compose.yml b/docker-compose.yml
index b1604c2..3133703 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,13 @@ services:
       redis:
         condition: service_healthy
     healthcheck:
-      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/health/ready']
+      test:
+        [
+          'CMD',
+          'node',
+          '-e',
+          "fetch('http://localhost:3000/health/ready').then((r) => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))",
+        ]
       interval: 10s
       timeout: 5s
       retries: 5
diff --git a/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts b/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts
index 3442859..3cd83fb 100644
--- a/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts
+++ b/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts
@@ -25,9 +25,7 @@ describe('LocalEmbeddingAdapter', () => {
     const result = await new LocalEmbeddingAdapter(buildConfig()).embedBatch(['hello']);
 
     expect(result).toEqual([[0.1, 0.2, 0.3]]);
-    expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
-      device: 'wasm',
-    });
+    expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
     expect(extractor).toHaveBeenCalledWith(['hello'], { pooling: 'mean', normalize: true });
   });
 
@@ -42,9 +40,7 @@ describe('LocalEmbeddingAdapter', () => {
     await adapter.embed('b');
 
     expect(pipelineMock).toHaveBeenCalledTimes(1);
-    expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en', {
-      device: 'wasm',
-    });
+    expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en');
   });
 
   it('returns the single vector from embed()', async () => {
diff --git a/src/memory/infrastructure/adapters/local-embedding.adapter.ts b/src/memory/infrastructure/adapters/local-embedding.adapter.ts
index 685dedb..f2a04fa 100644
--- a/src/memory/infrastructure/adapters/local-embedding.adapter.ts
+++ b/src/memory/infrastructure/adapters/local-embedding.adapter.ts
@@ -6,14 +6,6 @@ import { AppConfig } from '../../../config/app.config.js';
 
 const DEFAULT_LOCAL_MODEL = 'Xenova/all-MiniLM-L6-v2';
 
-// The runner image is Alpine (musl), where the native onnxruntime-node binary
-// will not load. Force the WASM backend and a single thread so embeddings work
-// without SharedArrayBuffer.
-const wasmBackend = env.backends?.onnx?.wasm;
-if (wasmBackend) {
-  wasmBackend.numThreads = 1;
-}
-
 @Injectable()
 export class LocalEmbeddingAdapter extends EmbeddingProviderPort {
   private readonly logger = new Logger(LocalEmbeddingAdapter.name);
@@ -47,7 +39,7 @@ export class LocalEmbeddingAdapter extends EmbeddingProviderPort {
 
   private loadExtractor(): Promise<FeatureExtractionPipeline> {
     if (!this.extractor) {
-      this.extractor = pipeline('feature-extraction', this.model, { device: 'wasm' });
+      this.extractor = pipeline('feature-extraction', this.model);
     }
     return this.extractor;
   }