From d6068beca2c02843b9eb649945fc74ce5ffff4a5 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Thu, 28 May 2026 05:47:05 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20optimize=20synthetic=20embe?=
 =?UTF-8?q?dding=20generation=20and=20vector=20normalization?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Optimized synthetic embedding generation and vector math in `@jeanbot/ai`.
- Replaced per-dimension hashing with a single SHA-256 hash and a Mulberry32 PRNG.
- Replaced `toFixed(8)` with a faster `round8` utility using `Math.round`.
- Optimized `normalizeVector` with a single pass and inverse multiplication.
- Removed redundant normalization calls in the generation pipeline.
- Added a benchmark script and unit tests to verify performance and correctness.

Throughput increased from ~102 to ~4500 embeddings/sec (~45x gain).

Co-authored-by: hackerxj2010 <198651211+hackerxj2010@users.noreply.github.com>
---
 .jules/bolt.md                               |  3 ++
 packages/ai/src/benchmark.ts                 | 22 ++++++++
 packages/ai/src/index.test.ts                | 36 +++++++++++++
 packages/ai/src/index.ts                     | 56 ++++++++++++++------
 workspace/users/{userId}/.jeanbot/context.md |  6 +--
 5 files changed, 103 insertions(+), 20 deletions(-)
 create mode 100644 .jules/bolt.md
 create mode 100644 packages/ai/src/benchmark.ts
 create mode 100644 packages/ai/src/index.test.ts

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..9ce56a6
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2026-05-28 - [Mulberry32 PRNG for Synthetic Embeddings]
+**Learning:** Replaced O(dimensions * SHA256) with a single hash + Mulberry32 PRNG for synthetic embedding generation. This improved throughput by ~45x (from ~100 to ~4500 embeddings/sec).
+**Action:** Use seeded PRNGs for deterministic mock data generation instead of repeated hashing in hot paths.
diff --git a/packages/ai/src/benchmark.ts b/packages/ai/src/benchmark.ts
new file mode 100644
index 0000000..2d2e083
--- /dev/null
+++ b/packages/ai/src/benchmark.ts
@@ -0,0 +1,22 @@
+import { generateEmbeddings } from "./index.js";
+
+async function main() {
+  const inputs = Array.from({ length: 100 }, (_, i) => `This is a sample text for embedding generation number ${i}. It should be long enough to provide some work for the hashing algorithm.`);
+
+  console.log("Starting benchmark for synthetic embeddings (100 iterations)...");
+
+  // Warmup
+  await generateEmbeddings(inputs.slice(0, 10), { forceSynthetic: true });
+
+  const start = Date.now();
+  await generateEmbeddings(inputs, { forceSynthetic: true });
+  const end = Date.now();
+
+  const duration = end - start;
+  const throughput = (inputs.length / (duration / 1000)).toFixed(2);
+
+  console.log(`Duration: ${duration}ms`);
+  console.log(`Throughput: ${throughput} embeddings/sec`);
+}
+
+main().catch(console.error);
diff --git a/packages/ai/src/index.test.ts b/packages/ai/src/index.test.ts
new file mode 100644
index 0000000..71b3cf4
--- /dev/null
+++ b/packages/ai/src/index.test.ts
@@ -0,0 +1,36 @@
+import { describe, it, expect } from "vitest";
+import { generateEmbedding, cosineSimilarity, normalizeEmbeddingText } from "./index.js";
+
+describe("@jeanbot/ai", () => {
+  it("generates deterministic synthetic embeddings", async () => {
+    const text = "Hello, world!";
+    const first = await generateEmbedding(text, { forceSynthetic: true });
+    const second = await generateEmbedding(text, { forceSynthetic: true });
+
+    expect(first.values).toEqual(second.values);
+    expect(first.contentHash).toBe(second.contentHash);
+  });
+
+  it("produces normalized synthetic vectors", async () => {
+    const text = "Performance is key";
+    const record = await generateEmbedding(text, { forceSynthetic: true });
+
+    const magnitude = Math.sqrt(record.values.reduce((sum, v) => sum + v * v, 0));
+    expect(magnitude).toBeCloseTo(1, 6);
+  });
+
+  it("calculates cosine similarity correctly", () => {
+    const v1 = [1, 0, 0];
+    const v2 = [0, 1, 0];
+    const v3 = [1, 1, 0];
+
+    expect(cosineSimilarity(v1, v1)).toBeCloseTo(1, 6);
+    expect(cosineSimilarity(v1, v2)).toBeCloseTo(0, 6);
+    expect(cosineSimilarity(v1, v3)).toBeCloseTo(1 / Math.sqrt(2), 6);
+  });
+
+  it("normalizes text consistently", () => {
+    const input = "  multi  \n  space  ";
+    expect(normalizeEmbeddingText(input)).toBe("multi space");
+  });
+});
diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts
index 6eb88fc..320d9a4 100644
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@@ -12,25 +12,36 @@ const MAX_RETRIES = 2;
 
 const normalizeText = (value: string) => value.replace(/\s+/g, " ").trim();
 
-const contentHashFor = (value: string) =>
-  crypto.createHash("sha256").update(normalizeText(value)).digest("hex");
+/**
+ * Fast rounding to 8 decimal places using Math.round.
+ * Significantly faster than toFixed(8).
+ */
+const round8 = (value: number) => Math.round(value * 1e8) / 1e8;
 
-const seededUnitValue = (seed: string, index: number) => {
-  const digest = crypto.createHash("sha256").update(`${seed}:${index}`).digest();
-  const int = digest.readUInt32BE(0);
-  return int / 0xffffffff;
-};
+const contentHashFor = (value: string) =>
+  crypto.hash("sha256", normalizeText(value), "hex");
 
 const syntheticVector = (text: string, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
   const normalized = normalizeText(text);
   const hash = contentHashFor(normalized);
-  const values = Array.from({
-    length: dimensions
-  }, (_, index) => {
-    const centered = seededUnitValue(hash, index) * 2 - 1;
-    return Number(centered.toFixed(8));
-  });
-  return normalizeVector(values);
+  const seed = Number.parseInt(hash.slice(0, 8), 16);
+
+  // Mulberry32 PRNG
+  let t = seed;
+  const next = () => {
+    t = (t + 0x6d2b79f5) | 0;
+    let z = t;
+    z = Math.imul(z ^ (z >>> 15), z | 1);
+    z = (z + Math.imul(z ^ (z >>> 7), z | 61)) | 0;
+    return ((z ^ (z >>> 14)) >>> 0) / 4294967296;
+  };
+
+  const values = new Array(dimensions);
+  for (let index = 0; index < dimensions; index += 1) {
+    const centered = next() * 2 - 1;
+    values[index] = round8(centered);
+  }
+  return values;
 };
 
 const normalizeVector = (values: number[]) => {
@@ -38,12 +49,23 @@ const normalizeVector = (values: number[]) => {
     return values;
   }
 
-  const magnitude = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0));
+  let sum = 0;
+  for (let index = 0; index < values.length; index += 1) {
+    const val = values[index] ?? 0;
+    sum += val * val;
+  }
+
+  const magnitude = Math.sqrt(sum);
   if (magnitude === 0) {
     return values.map(() => 0);
   }
 
-  return values.map((value) => Number((value / magnitude).toFixed(8)));
+  const invMagnitude = 1 / magnitude;
+  const result = new Array(values.length);
+  for (let index = 0; index < values.length; index += 1) {
+    result[index] = round8((values[index] ?? 0) * invMagnitude);
+  }
+  return result;
 };
 
 const toEmbeddingVectorRecord = (
@@ -127,7 +149,7 @@ const callOpenAiEmbeddings = async (
   const data = payload.data ?? [];
   return data
     .sort((left, right) => (left.index ?? 0) - (right.index ?? 0))
-    .map((record) => normalizeVector(record.embedding ?? []));
+    .map((record) => record.embedding ?? []);
 };
 
 const embedBatchLive = async (
diff --git a/workspace/users/{userId}/.jeanbot/context.md b/workspace/users/{userId}/.jeanbot/context.md
index 207eb92..f47b44b 100644
--- a/workspace/users/{userId}/.jeanbot/context.md
+++ b/workspace/users/{userId}/.jeanbot/context.md
@@ -1,7 +1,7 @@
 # JeanBot User Context
 
-- Current mission: Smoke test
-- Updated at: 2026-03-13T21:07:03.733Z
-- Completed steps: Inspect workspace files | Load and update memory context | Run policy and risk review | Decompose objective into steps | Create safety checkpoint | Handle finance-sensitive workflows | Synthesize final mission result | Track status and coordination | Synthesize final mission result | Clarify mission constraints | Produce mission documentation
+- Current mission: API mission
+- Updated at: 2026-05-28T05:40:32.576Z
+- Completed steps: Inspect workspace files | Load and update memory context | Run policy and risk review | Decompose objective into steps | Synthesize final mission result | Track status and coordination | Synthesize final mission result | Clarify mission constraints | Produce mission documentation
 - In-progress steps: none
 - Upcoming steps: none
\ No newline at end of file