Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions packages/ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,60 @@ const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim();
const contentHashFor = (value: string) =>
crypto.createHash("sha256").update(normalizeText(value)).digest("hex");

const seededUnitValue = (seed: string, index: number) => {
const digest = crypto.createHash("sha256").update(`${seed}:${index}`).digest();
const int = digest.readUInt32BE(0);
return int / 0xffffffff;
/**
* Fast Mulberry32 PRNG for deterministic vector generation.
* @see https://stackoverflow.com/a/47593316
*/
const mulberry32 = (seed: number) => {
let state = seed;
return () => {
state += 0x6d2b79f5;
let t = state;
t = Math.imul(t ^ (t >>> 15), t | 1);
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};
};

const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
const normalized = normalizeText(text);
const hash = contentHashFor(normalized);
const values = Array.from({
length: dimensions
}, (_, index) => {
const centered = seededUnitValue(hash, index) * 2 - 1;
return Number(centered.toFixed(8));
});
return normalizeVector(values);
const hash = crypto.createHash("sha256").update(normalized).digest();
const seed = hash.readUInt32BE(0);
const random = mulberry32(seed);

const values = new Float64Array(dimensions);
for (let i = 0; i < dimensions; i++) {
const centered = random() * 2 - 1;
// Round to 8 decimal places for consistency with legacy implementation
values[i] = Math.round(centered * 1e8) / 1e8;
}
return normalizeVector(Array.from(values));
};

const normalizeVector = (values: number[]) => {
if (values.length === 0) {
const length = values.length;
if (length === 0) {
return values;
}

const magnitude = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0));
let sumSq = 0;
for (let i = 0; i < length; i++) {
const val = values[i] ?? 0;
sumSq += val * val;
}

const magnitude = Math.sqrt(sumSq);
if (magnitude === 0) {
return values.map(() => 0);
return new Array(length).fill(0);
}

return values.map((value) => Number((value / magnitude).toFixed(8)));
const result = new Array(length);
for (let i = 0; i < length; i++) {
const val = values[i] ?? 0;
// Round to 8 decimal places
result[i] = Math.round((val / magnitude) * 1e8) / 1e8;
}
return result;
};

const toEmbeddingVectorRecord = (
Expand Down
Loading