diff --git a/.env.example b/.env.example index ce511a7..e8ceb28 100644 --- a/.env.example +++ b/.env.example @@ -35,7 +35,8 @@ VOYAGE_API_KEY= # EMBEDDING_DIMENSIONS=512 # # Local embeddings: runs a sentence-transformers model in-process via -# transformers.js (WASM), no API key. Downloads the model once to the cache dir. +# transformers.js (native onnxruntime), no API key. Downloads the model once to +# the cache dir. Requires the glibc-based image (the Dockerfile uses node:slim). # EMBEDDING_PROVIDER=local # EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2 # EMBEDDING_DIMENSIONS=384 diff --git a/Dockerfile b/Dockerfile index 70ab017..a1c2757 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 1: Build -FROM node:22-alpine AS builder +FROM node:22-slim AS builder WORKDIR /app COPY package.json package-lock.json ./ RUN npm ci --ignore-scripts @@ -8,7 +8,7 @@ COPY src/ src/ RUN npx nest build # Stage 2: Production -FROM node:22-alpine AS runner +FROM node:22-slim AS runner WORKDIR /app ENV NODE_ENV=production diff --git a/docker-compose.yml b/docker-compose.yml index b1604c2..3133703 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,13 @@ services: redis: condition: service_healthy healthcheck: - test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/health/ready'] + test: + [ + 'CMD', + 'node', + '-e', + "fetch('http://localhost:3000/health/ready').then((r) => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))", + ] interval: 10s timeout: 5s retries: 5 diff --git a/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts b/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts index 3442859..3cd83fb 100644 --- a/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts +++ b/src/memory/infrastructure/adapters/local-embedding.adapter.spec.ts @@ -25,9 +25,7 @@ describe('LocalEmbeddingAdapter', () => { const result = await new LocalEmbeddingAdapter(buildConfig()).embedBatch(['hello']); expect(result).toEqual([[0.1, 0.2, 0.3]]); - expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2', { - device: 'wasm', - }); + expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); expect(extractor).toHaveBeenCalledWith(['hello'], { pooling: 'mean', normalize: true }); }); @@ -42,9 +40,7 @@ describe('LocalEmbeddingAdapter', () => { await adapter.embed('b'); expect(pipelineMock).toHaveBeenCalledTimes(1); - expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en', { - device: 'wasm', - }); + expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en'); }); it('returns the single vector from embed()', async () => { diff --git a/src/memory/infrastructure/adapters/local-embedding.adapter.ts b/src/memory/infrastructure/adapters/local-embedding.adapter.ts index 685dedb..f2a04fa 100644 --- a/src/memory/infrastructure/adapters/local-embedding.adapter.ts +++ b/src/memory/infrastructure/adapters/local-embedding.adapter.ts @@ -6,14 +6,6 @@ import { AppConfig } from '../../../config/app.config.js'; const DEFAULT_LOCAL_MODEL = 'Xenova/all-MiniLM-L6-v2'; -// The runner image is Alpine (musl), where the native onnxruntime-node binary -// will not load. Force the WASM backend and a single thread so embeddings work -// without SharedArrayBuffer. -const wasmBackend = env.backends?.onnx?.wasm; -if (wasmBackend) { - wasmBackend.numThreads = 1; -} - @Injectable() export class LocalEmbeddingAdapter extends EmbeddingProviderPort { private readonly logger = new Logger(LocalEmbeddingAdapter.name); @@ -47,7 +39,7 @@ export class LocalEmbeddingAdapter extends EmbeddingProviderPort { private loadExtractor(): Promise { if (!this.extractor) { - this.extractor = pipeline('feature-extraction', this.model, { device: 'wasm' }); + this.extractor = pipeline('feature-extraction', this.model); } return this.extractor; }