Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added .jules/bolt.md
Empty file.
61 changes: 61 additions & 0 deletions packages/ai/src/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

import { describe, it, expect } from "vitest";
import {
generateEmbeddings,
cosineSimilarity,
normalizeEmbeddingText,
embeddingContentHash,
embeddingDimensions
} from "./index.js";

describe("AI Embedding Utils", () => {
it("should normalize text correctly", () => {
expect(normalizeEmbeddingText(" hello world ")).toBe("hello world");
});

it("should generate consistent hashes", () => {
const text = "test message";
const hash1 = embeddingContentHash(text);
const hash2 = embeddingContentHash(text);
expect(hash1).toBe(hash2);
expect(hash1).toMatch(/^[a-f0-9]{64}$/);
});

it("should generate deterministic synthetic embeddings", async () => {
const text = "deterministic test";
const result1 = await generateEmbeddings([text], { forceSynthetic: true });
const result2 = await generateEmbeddings([text], { forceSynthetic: true });

expect(result1[0].values).toEqual(result2[0].values);
expect(result1[0].dimensions).toBe(embeddingDimensions);
});

it("should produce normalized synthetic vectors", async () => {
const text = "normalized test";
const [record] = await generateEmbeddings([text], { forceSynthetic: true });

const magnitude = Math.sqrt(
record.values.reduce((sum, val) => sum + val * val, 0)
);
// Should be very close to 1
expect(magnitude).toBeGreaterThan(0.999999);
expect(magnitude).toBeLessThan(1.000001);
});

it("should calculate cosine similarity correctly", () => {
const v1 = [1, 0, 0];
const v2 = [1, 0, 0];
const v3 = [0, 1, 0];
const v4 = [-1, 0, 0];

expect(cosineSimilarity(v1, v2)).toBeCloseTo(1);
expect(cosineSimilarity(v1, v3)).toBeCloseTo(0);
expect(cosineSimilarity(v1, v4)).toBeCloseTo(-1);
});

it("should handle empty or mismatched vectors in cosineSimilarity", () => {
expect(cosineSimilarity([], [1, 2, 3])).toBe(0);
expect(cosineSimilarity([1, 2], [1, 2, 3])).toBe(0);
expect(cosineSimilarity(undefined, [1])).toBe(0);
});
});
41 changes: 29 additions & 12 deletions packages/ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,33 @@ const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim();
const contentHashFor = (value: string) =>
crypto.createHash("sha256").update(normalizeText(value)).digest("hex");

const seededUnitValue = (seed: string, index: number) => {
const digest = crypto.createHash("sha256").update(`${seed}:${index}`).digest();
const int = digest.readUInt32BE(0);
return int / 0xffffffff;
/**
* Fast, deterministic PRNG based on Mulberry32.
* @param seed - 32-bit integer seed
*/
const mulberry32 = (seed: number) => {
return () => {
/* biome-ignore lint/style/noParameterAssign: optimization */
seed |= 0;
/* biome-ignore lint/style/noParameterAssign: optimization */
seed = (seed + 0x6d2b79f5) | 0;
let t = Math.imul(seed ^ (seed >>> 15), 1 | seed);
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};
};

const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
const normalized = normalizeText(text);
const hash = contentHashFor(normalized);
const values = Array.from({
length: dimensions
}, (_, index) => {
const centered = seededUnitValue(hash, index) * 2 - 1;
return Number(centered.toFixed(8));
});
const hash = crypto.createHash("sha256").update(normalized).digest();
const view = new DataView(hash.buffer, hash.byteOffset, hash.byteLength);
const seed = view.getUint32(0);
const prng = mulberry32(seed);

const values = new Array(dimensions);
for (let i = 0; i < dimensions; i++) {
values[i] = prng() * 2 - 1;
}
return normalizeVector(values);
};

Expand All @@ -43,7 +55,12 @@ const normalizeVector = (values: number[]) => {
return values.map(() => 0);
}

return values.map((value) => Number((value / magnitude).toFixed(8)));
const invMagnitude = 1 / magnitude;
return values.map((value) => {
const v = value * invMagnitude;
// Fast rounding to 8 decimal places
return Math.sign(v) * Math.round(Math.abs(v) * 1e8) / 1e8;
});
};

const toEmbeddingVectorRecord = (
Expand Down
Loading