Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ VOYAGE_API_KEY=
# EMBEDDING_DIMENSIONS=512
#
# Local embeddings: runs a sentence-transformers model in-process via
# transformers.js (WASM), no API key. Downloads the model once to the cache dir.
# transformers.js (native onnxruntime), no API key. Downloads the model once to
# the cache dir. Requires the glibc-based image (the Dockerfile uses node:slim).
# EMBEDDING_PROVIDER=local
# EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2
# EMBEDDING_DIMENSIONS=384
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Stage 1: Build
FROM node:22-alpine AS builder
FROM node:22-slim AS builder
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --ignore-scripts
Expand All @@ -8,7 +8,7 @@ COPY src/ src/
RUN npx nest build

# Stage 2: Production
FROM node:22-alpine AS runner
FROM node:22-slim AS runner
WORKDIR /app
ENV NODE_ENV=production

Expand Down
8 changes: 7 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ services:
redis:
condition: service_healthy
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/health/ready']
test:
[
'CMD',
'node',
'-e',
"fetch('http://localhost:3000/health/ready').then((r) => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))",
]
interval: 10s
timeout: 5s
retries: 5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ describe('LocalEmbeddingAdapter', () => {
const result = await new LocalEmbeddingAdapter(buildConfig()).embedBatch(['hello']);

expect(result).toEqual([[0.1, 0.2, 0.3]]);
expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
device: 'wasm',
});
expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
expect(extractor).toHaveBeenCalledWith(['hello'], { pooling: 'mean', normalize: true });
});

Expand All @@ -42,9 +40,7 @@ describe('LocalEmbeddingAdapter', () => {
await adapter.embed('b');

expect(pipelineMock).toHaveBeenCalledTimes(1);
expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en', {
device: 'wasm',
});
expect(pipelineMock).toHaveBeenCalledWith('feature-extraction', 'Xenova/bge-small-en');
});

it('returns the single vector from embed()', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ import { AppConfig } from '../../../config/app.config.js';

const DEFAULT_LOCAL_MODEL = 'Xenova/all-MiniLM-L6-v2';

// The runner image is Alpine (musl), where the native onnxruntime-node binary
// will not load. Force the WASM backend and a single thread so embeddings work
// without SharedArrayBuffer.
const wasmBackend = env.backends?.onnx?.wasm;
if (wasmBackend) {
wasmBackend.numThreads = 1;
}

@Injectable()
export class LocalEmbeddingAdapter extends EmbeddingProviderPort {
private readonly logger = new Logger(LocalEmbeddingAdapter.name);
Expand Down Expand Up @@ -47,7 +39,7 @@ export class LocalEmbeddingAdapter extends EmbeddingProviderPort {

private loadExtractor(): Promise<FeatureExtractionPipeline> {
if (!this.extractor) {
this.extractor = pipeline('feature-extraction', this.model, { device: 'wasm' });
this.extractor = pipeline('feature-extraction', this.model);
}
return this.extractor;
}
Expand Down
Loading