Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2026-05-27 - [Synthetic Embedding Optimization]
**Learning:** Replacing `toFixed(8)` with manual `Math.round` logic (with sign handling) and `crypto.createHash` with `crypto.hash` significantly improves throughput. However, `crypto.hash` returns a string by default, and using a `Uint8Array` in template literals for seeding causes collisions (stringifies to `[object Uint8Array]`).
**Action:** Always verify `crypto.hash` encoding when used for seeds, and centralize normalization to avoid redundant O(N) passes.
60 changes: 60 additions & 0 deletions packages/ai/src/benchmark.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

import crypto from "node:crypto";

const DEFAULT_EMBEDDING_DIMENSIONS = 1536;

const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim();

const contentHashFor = (value: string) =>
crypto.hash("sha256", normalizeText(value));

const seededUnitValue = (seed: string, index: number) => {
const digest = crypto.hash("sha256", `${seed}:${index}`, "buffer");
const int = digest.readUInt32BE(0);
return int / 0xffffffff;
};

const normalizeVector = (values: number[]) => {
if (values.length === 0) {
return values;
}

const magnitude = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0));
if (magnitude === 0) {
return values.map(() => 0);
}

const invMagnitude = 1 / magnitude;
return values.map((value) => {
const normalized = value * invMagnitude;
return Math.sign(normalized) * Math.round(Math.abs(normalized) * 1e8) / 1e8;
});
};

const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
const normalized = normalizeText(text);
const hash = contentHashFor(normalized);
return Array.from({
length: dimensions
}, (_, index) => {
const centered = seededUnitValue(hash, index) * 2 - 1;
return Math.sign(centered) * Math.round(Math.abs(centered) * 1e8) / 1e8;
});
};

async function main() {
const text = "Hello world, this is a test for performance benchmarking of synthetic embeddings.";
const iterations = 100;

console.log(`Benchmarking ${iterations} synthetic embedding generations...`);
const start = Date.now();
for (let i = 0; i < iterations; i++) {
normalizeVector(syntheticVector(text + i));
}
const end = Date.now();
const duration = end - start;
console.log(`Duration: ${duration}ms`);
console.log(`Throughput: ${(iterations / (duration / 1000)).toFixed(2)} embeddings/sec`);
}

main();
37 changes: 37 additions & 0 deletions packages/ai/src/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { describe, it, expect } from "vitest";
import { generateEmbedding, cosineSimilarity } from "./index.js";

describe("@jeanbot/ai", () => {
it("should generate deterministic synthetic embeddings", async () => {
const text = "test deterministic";
const embedding1 = await generateEmbedding(text, { forceSynthetic: true });
const embedding2 = await generateEmbedding(text, { forceSynthetic: true });

expect(embedding1.values).toEqual(embedding2.values);
expect(embedding1.contentHash).toBe(embedding2.contentHash);
});

it("should generate normalized vectors", async () => {
const text = "test normalized";
const embedding = await generateEmbedding(text, { forceSynthetic: true });

const magnitude = Math.sqrt(
embedding.values.reduce((sum, val) => sum + val * val, 0)
);

// Should be very close to 1
expect(magnitude).toBeGreaterThan(0.999);
expect(magnitude).toBeLessThan(1.001);
});

it("should calculate cosine similarity correctly", () => {
const v1 = [1, 0, 0];
const v2 = [1, 0, 0];
const v3 = [0, 1, 0];
const v4 = [-1, 0, 0];

expect(cosineSimilarity(v1, v2)).toBeCloseTo(1);
expect(cosineSimilarity(v1, v3)).toBeCloseTo(0);
expect(cosineSimilarity(v1, v4)).toBeCloseTo(-1);
});
});
19 changes: 12 additions & 7 deletions packages/ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,24 @@ const MAX_RETRIES = 2;
const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim();

const contentHashFor = (value: string) =>
crypto.createHash("sha256").update(normalizeText(value)).digest("hex");
crypto.hash("sha256", normalizeText(value));

const seededUnitValue = (seed: string, index: number) => {
const digest = crypto.createHash("sha256").update(`${seed}:${index}`).digest();
const digest = crypto.hash("sha256", `${seed}:${index}`, "buffer");
const int = digest.readUInt32BE(0);
return int / 0xffffffff;
};

const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
const normalized = normalizeText(text);
const hash = contentHashFor(normalized);
const values = Array.from({
return Array.from({
length: dimensions
}, (_, index) => {
const centered = seededUnitValue(hash, index) * 2 - 1;
return Number(centered.toFixed(8));
// Faster rounding than toFixed(8)
return Math.sign(centered) * Math.round(Math.abs(centered) * 1e8) / 1e8;
});
return normalizeVector(values);
};

const normalizeVector = (values: number[]) => {
Expand All @@ -43,7 +43,12 @@ const normalizeVector = (values: number[]) => {
return values.map(() => 0);
}

return values.map((value) => Number((value / magnitude).toFixed(8)));
const invMagnitude = 1 / magnitude;
return values.map((value) => {
const normalized = value * invMagnitude;
// Faster rounding than toFixed(8)
return Math.sign(normalized) * Math.round(Math.abs(normalized) * 1e8) / 1e8;
});
};

const toEmbeddingVectorRecord = (
Expand Down Expand Up @@ -127,7 +132,7 @@ const callOpenAiEmbeddings = async (
const data = payload.data ?? [];
return data
.sort((left, right) => (left.index ?? 0) - (right.index ?? 0))
.map((record) => normalizeVector(record.embedding ?? []));
.map((record) => record.embedding ?? []);
};

const embedBatchLive = async (
Expand Down
6 changes: 3 additions & 3 deletions workspace/users/{userId}/.jeanbot/context.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# JeanBot User Context

- Current mission: Smoke test
- Updated at: 2026-03-13T21:07:03.733Z
- Completed steps: Inspect workspace files | Load and update memory context | Run policy and risk review | Decompose objective into steps | Create safety checkpoint | Handle finance-sensitive workflows | Synthesize final mission result | Track status and coordination | Synthesize final mission result | Clarify mission constraints | Produce mission documentation
- Current mission: API mission
- Updated at: 2026-05-27T10:37:16.866Z
- Completed steps: Inspect workspace files | Load and update memory context | Run policy and risk review | Decompose objective into steps | Synthesize final mission result | Track status and coordination | Synthesize final mission result | Clarify mission constraints | Produce mission documentation
- In-progress steps: none
- Upcoming steps: none
Loading