diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index 6eb88fc..01ac6e0 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -15,35 +15,60 @@ const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim(); const contentHashFor = (value: string) => crypto.createHash("sha256").update(normalizeText(value)).digest("hex"); -const seededUnitValue = (seed: string, index: number) => { - const digest = crypto.createHash("sha256").update(`${seed}:${index}`).digest(); - const int = digest.readUInt32BE(0); - return int / 0xffffffff; +/** + * Fast Mulberry32 PRNG for deterministic vector generation. + * @see https://stackoverflow.com/a/47593316 + */ +const mulberry32 = (seed: number) => { + let state = seed; + return () => { + state += 0x6d2b79f5; + let t = state; + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; }; const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => { const normalized = normalizeText(text); - const hash = contentHashFor(normalized); - const values = Array.from({ - length: dimensions - }, (_, index) => { - const centered = seededUnitValue(hash, index) * 2 - 1; - return Number(centered.toFixed(8)); - }); - return normalizeVector(values); + const hash = crypto.createHash("sha256").update(normalized).digest(); + const seed = hash.readUInt32BE(0); + const random = mulberry32(seed); + + const values = new Float64Array(dimensions); + for (let i = 0; i < dimensions; i++) { + const centered = random() * 2 - 1; + // Round to 8 decimal places for consistency with legacy implementation + values[i] = Math.round(centered * 1e8) / 1e8; + } + return normalizeVector(Array.from(values)); }; const normalizeVector = (values: number[]) => { - if (values.length === 0) { + const length = values.length; + if (length === 0) { return values; } - const magnitude = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0)); + let sumSq = 0; + for (let i = 0; i < length; i++) { + const val = values[i] ?? 0; + sumSq += val * val; + } + + const magnitude = Math.sqrt(sumSq); if (magnitude === 0) { - return values.map(() => 0); + return new Array(length).fill(0); } - return values.map((value) => Number((value / magnitude).toFixed(8))); + const result = new Array(length); + for (let i = 0; i < length; i++) { + const val = values[i] ?? 0; + // Round to 8 decimal places + result[i] = Math.round((val / magnitude) * 1e8) / 1e8; + } + return result; }; const toEmbeddingVectorRecord = (