From 21266e3244efddb8f77017e7e6d6cf209ba44aed Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Mon, 29 Jun 2026 18:23:11 +0000
Subject: [PATCH 1/3] fix(templates): give TypeScript templates per-session
 in-process short-term memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TypeScript Strands no-memory template cached a single global Agent
shared across every session, so a process serving multiple sessions (e.g.
`agentcore dev`) leaked conversation history between them and grew without
bound. The VercelAI template was stateless and could not recall earlier
turns even within one session.

Key both templates' in-process history by sessionId, bounded by an
insertion-ordered Map acting as an LRU(128) — mirroring the Python
templates. On AgentCore Runtime each microVM serves a single session, so
this holds one entry; the bound only matters for many-session processes
like local dev. No AWS service is provisioned; this is free, best-effort
short-term memory that resets on cold start.

Verified on deployed AgentCore Runtime: same-session recall works,
distinct sessions stay isolated, and bare invokes are unaffected.
---
 .../assets.snapshot.test.ts.snap              | 79 +++++++++++++++----
 .../typescript/http/strands/base/main.ts      | 40 +++++++---
 .../typescript/http/vercelai/base/main.ts     | 37 ++++++++-
 3 files changed, 129 insertions(+), 27 deletions(-)
diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
index c13c5320a..79588a13b 100644
--- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
+++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
@@ -7695,18 +7695,35 @@ async function getOrCreateAgent(sessionId: string, actorId: string): Promise<Age
   return agent;
 }
 {{else}}
-let cachedAgent: Agent | null = null;
-
-async function getOrCreateAgent(): Promise<Agent> {
-  if (!cachedAgent) {
-    const model = await loadModel();
-    cachedAgent = new Agent({
-      model,
-      systemPrompt: SYSTEM_PROMPT,
-      tools,
-    });
+const AGENT_CACHE_LIMIT = 128;
+
+// Reuses one Agent per sessionId so each session keeps its own in-process
+// conversation history (best-effort; resets on cold start). A Map preserves
+// insertion order, so it doubles as an LRU bounded to 128 sessions — a local
+// dev process serving many sessions cannot leak history between them or grow
+// without bound. On AgentCore Runtime each microVM serves a single session, so
+// this holds one entry. For durable history, attach memory.
+const agentCache = new Map<string, Agent>();
+
+async function getOrCreateAgent(sessionId: string): Promise<Agent> {
+  const existing = agentCache.get(sessionId);
+  if (existing) {
+    agentCache.delete(sessionId);
+    agentCache.set(sessionId, existing);
+    return existing;
   }
-  return cachedAgent;
+  if (agentCache.size >= AGENT_CACHE_LIMIT) {
+    const oldest = agentCache.keys().next().value;
+    if (oldest !== undefined) agentCache.delete(oldest);
+  }
+  const model = await loadModel();
+  const agent = new Agent({
+    model,
+    systemPrompt: SYSTEM_PROMPT,
+    tools,
+  });
+  agentCache.set(sessionId, agent);
+  return agent;
 }
 {{/if}}
 
@@ -7718,7 +7735,8 @@ const app = new BedrockAgentCoreApp({
       const actorId = getActorId(payload, context);
       const agent = await getOrCreateAgent(sessionId, actorId);
       {{else}}
-      const agent = await getOrCreateAgent();
+      const sessionId = context?.sessionId ?? 'default-session';
+      const agent = await getOrCreateAgent(sessionId);
       {{/if}}
 
       {{#if hasMemory}}
@@ -8068,24 +8086,57 @@ Thumbs.db
 
 exports[`Assets Directory Snapshots > TypeScript assets > typescript/typescript/http/vercelai/base/main.ts should match snapshot 1`] = `
 "import { BedrockAgentCoreApp } from 'bedrock-agentcore/runtime';
-import { streamText } from 'ai';
+import { streamText, type ModelMessage } from 'ai';
 import { loadModel } from './model/load.js';
 
 const SYSTEM_PROMPT = \`You are a helpful assistant.\`;
 
+const HISTORY_LIMIT = 128;
+
+// Keeps one message history per sessionId so each session remembers its own
+// turns (best-effort; resets on cold start). A Map preserves insertion order,
+// so it doubles as an LRU bounded to 128 sessions — a local dev process serving
+// many sessions cannot leak history between them or grow without bound. On
+// AgentCore Runtime each microVM serves a single session, so this holds one
+// entry. For durable history, persist messages to an external store.
+const histories = new Map<string, ModelMessage[]>();
+
+function getHistory(sessionId: string): ModelMessage[] {
+  const existing = histories.get(sessionId);
+  if (existing) {
+    histories.delete(sessionId);
+    histories.set(sessionId, existing);
+    return existing;
+  }
+  if (histories.size >= HISTORY_LIMIT) {
+    const oldest = histories.keys().next().value;
+    if (oldest !== undefined) histories.delete(oldest);
+  }
+  const fresh: ModelMessage[] = [];
+  histories.set(sessionId, fresh);
+  return fresh;
+}
+
 const app = new BedrockAgentCoreApp({
   invocationHandler: {
     async *process(payload: any, context: any) {
+      const sessionId = context?.sessionId ?? 'default-session';
+      const messages = getHistory(sessionId);
+      messages.push({ role: 'user', content: payload.prompt ?? '' });
+
       const model = await loadModel();
       const result = streamText({
         model,
         system: SYSTEM_PROMPT,
-        prompt: payload.prompt ?? '',
+        messages,
       });
 
+      let assistant = '';
       for await (const chunk of result.textStream) {
+        assistant += chunk;
         yield { data: chunk };
       }
+      messages.push({ role: 'assistant', content: assistant });
     },
   },
 });
diff --git a/src/assets/typescript/http/strands/base/main.ts b/src/assets/typescript/http/strands/base/main.ts
index 4b33f583c..32e6c4810 100644
--- a/src/assets/typescript/http/strands/base/main.ts
+++ b/src/assets/typescript/http/strands/base/main.ts
@@ -53,18 +53,35 @@ async function getOrCreateAgent(sessionId: string, actorId: string): Promise<Age
   return agent;
 }
 {{else}}
-let cachedAgent: Agent | null = null;
+const AGENT_CACHE_LIMIT = 128;
 
-async function getOrCreateAgent(): Promise<Agent> {
-  if (!cachedAgent) {
-    const model = await loadModel();
-    cachedAgent = new Agent({
-      model,
-      systemPrompt: SYSTEM_PROMPT,
-      tools,
-    });
+// Reuses one Agent per sessionId so each session keeps its own in-process
+// conversation history (best-effort; resets on cold start). A Map preserves
+// insertion order, so it doubles as an LRU bounded to 128 sessions — a local
+// dev process serving many sessions cannot leak history between them or grow
+// without bound. On AgentCore Runtime each microVM serves a single session, so
+// this holds one entry. For durable history, attach memory.
+const agentCache = new Map<string, Agent>();
+
+async function getOrCreateAgent(sessionId: string): Promise<Agent> {
+  const existing = agentCache.get(sessionId);
+  if (existing) {
+    agentCache.delete(sessionId);
+    agentCache.set(sessionId, existing);
+    return existing;
+  }
+  if (agentCache.size >= AGENT_CACHE_LIMIT) {
+    const oldest = agentCache.keys().next().value;
+    if (oldest !== undefined) agentCache.delete(oldest);
   }
-  return cachedAgent;
+  const model = await loadModel();
+  const agent = new Agent({
+    model,
+    systemPrompt: SYSTEM_PROMPT,
+    tools,
+  });
+  agentCache.set(sessionId, agent);
+  return agent;
 }
 {{/if}}
 
@@ -76,7 +93,8 @@ const app = new BedrockAgentCoreApp({
       const actorId = getActorId(payload, context);
       const agent = await getOrCreateAgent(sessionId, actorId);
       {{else}}
-      const agent = await getOrCreateAgent();
+      const sessionId = context?.sessionId ?? 'default-session';
+      const agent = await getOrCreateAgent(sessionId);
       {{/if}}
 
       {{#if hasMemory}}
diff --git a/src/assets/typescript/http/vercelai/base/main.ts b/src/assets/typescript/http/vercelai/base/main.ts
index 09fdb933f..81535a991 100644
--- a/src/assets/typescript/http/vercelai/base/main.ts
+++ b/src/assets/typescript/http/vercelai/base/main.ts
@@ -1,22 +1,55 @@
 import { BedrockAgentCoreApp } from 'bedrock-agentcore/runtime';
-import { streamText } from 'ai';
+import { streamText, type ModelMessage } from 'ai';
 import { loadModel } from './model/load.js';
 
 const SYSTEM_PROMPT = `You are a helpful assistant.`;
 
+const HISTORY_LIMIT = 128;
+
+// Keeps one message history per sessionId so each session remembers its own
+// turns (best-effort; resets on cold start). A Map preserves insertion order,
+// so it doubles as an LRU bounded to 128 sessions — a local dev process serving
+// many sessions cannot leak history between them or grow without bound. On
+// AgentCore Runtime each microVM serves a single session, so this holds one
+// entry. For durable history, persist messages to an external store.
+const histories = new Map<string, ModelMessage[]>();
+
+function getHistory(sessionId: string): ModelMessage[] {
+  const existing = histories.get(sessionId);
+  if (existing) {
+    histories.delete(sessionId);
+    histories.set(sessionId, existing);
+    return existing;
+  }
+  if (histories.size >= HISTORY_LIMIT) {
+    const oldest = histories.keys().next().value;
+    if (oldest !== undefined) histories.delete(oldest);
+  }
+  const fresh: ModelMessage[] = [];
+  histories.set(sessionId, fresh);
+  return fresh;
+}
+
 const app = new BedrockAgentCoreApp({
   invocationHandler: {
     async *process(payload: any, context: any) {
+      const sessionId = context?.sessionId ?? 'default-session';
+      const messages = getHistory(sessionId);
+      messages.push({ role: 'user', content: payload.prompt ?? '' });
+
       const model = await loadModel();
       const result = streamText({
         model,
         system: SYSTEM_PROMPT,
-        prompt: payload.prompt ?? '',
+        messages,
       });
 
+      let assistant = '';
       for await (const chunk of result.textStream) {
+        assistant += chunk;
         yield { data: chunk };
       }
+      messages.push({ role: 'assistant', content: assistant });
     },
   },
 });

From ccb52f30277441a1559e459172b5a2fea24986d4 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Mon, 29 Jun 2026 19:57:29 +0000
Subject: [PATCH 2/3] fix(templates): don't persist VercelAI history on a
 failed or empty turn

streamText errors surface as a silently-empty textStream (the for-await
completes with zero chunks rather than throwing), so the previous code
committed the user turn plus an empty assistant turn. On the next
invocation for the same sessionId, the provider rejected the empty
assistant content, poisoning the session for the rest of the process.

Build the request from a staged copy ([...history, userMessage]) and only
push the exchange into the persistent history after a non-empty reply, so
a failed turn is dropped cleanly. Addresses PR review feedback.
---
 .../__snapshots__/assets.snapshot.test.ts.snap    | 15 +++++++++++----
 src/assets/typescript/http/vercelai/base/main.ts  | 15 +++++++++++----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
index 79588a13b..bd3b121f4 100644
--- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
+++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
@@ -8121,14 +8121,14 @@ const app = new BedrockAgentCoreApp({
   invocationHandler: {
     async *process(payload: any, context: any) {
       const sessionId = context?.sessionId ?? 'default-session';
-      const messages = getHistory(sessionId);
-      messages.push({ role: 'user', content: payload.prompt ?? '' });
+      const history = getHistory(sessionId);
+      const userMessage: ModelMessage = { role: 'user', content: payload.prompt ?? '' };
 
       const model = await loadModel();
       const result = streamText({
         model,
         system: SYSTEM_PROMPT,
-        messages,
+        messages: [...history, userMessage],
       });
 
       let assistant = '';
@@ -8136,7 +8136,14 @@ const app = new BedrockAgentCoreApp({
         assistant += chunk;
         yield { data: chunk };
       }
-      messages.push({ role: 'assistant', content: assistant });
+
+      // Commit the exchange to history only after a non-empty reply. On a failed
+      // or empty stream the turn is dropped instead of leaving a dangling user
+      // (or empty assistant) message — consecutive same-role or empty-content
+      // messages would otherwise be rejected on the next turn for this session.
+      if (assistant.length > 0) {
+        history.push(userMessage, { role: 'assistant', content: assistant });
+      }
     },
   },
 });
diff --git a/src/assets/typescript/http/vercelai/base/main.ts b/src/assets/typescript/http/vercelai/base/main.ts
index 81535a991..b899c9f31 100644
--- a/src/assets/typescript/http/vercelai/base/main.ts
+++ b/src/assets/typescript/http/vercelai/base/main.ts
@@ -34,14 +34,14 @@ const app = new BedrockAgentCoreApp({
   invocationHandler: {
     async *process(payload: any, context: any) {
       const sessionId = context?.sessionId ?? 'default-session';
-      const messages = getHistory(sessionId);
-      messages.push({ role: 'user', content: payload.prompt ?? '' });
+      const history = getHistory(sessionId);
+      const userMessage: ModelMessage = { role: 'user', content: payload.prompt ?? '' };
 
       const model = await loadModel();
       const result = streamText({
         model,
         system: SYSTEM_PROMPT,
-        messages,
+        messages: [...history, userMessage],
       });
 
       let assistant = '';
@@ -49,7 +49,14 @@ const app = new BedrockAgentCoreApp({
         assistant += chunk;
         yield { data: chunk };
       }
-      messages.push({ role: 'assistant', content: assistant });
+
+      // Commit the exchange to history only after a non-empty reply. On a failed
+      // or empty stream the turn is dropped instead of leaving a dangling user
+      // (or empty assistant) message — consecutive same-role or empty-content
+      // messages would otherwise be rejected on the next turn for this session.
+      if (assistant.length > 0) {
+        history.push(userMessage, { role: 'assistant', content: assistant });
+      }
     },
   },
 });

From 1540ed67ce9b6a6026a718258cf78e023fde87f1 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Mon, 29 Jun 2026 20:15:03 +0000
Subject: [PATCH 3/3] fix(templates): roll back Strands history on a failed
 turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Agent.stream() appends the user message to the cached agent's history
before invoking the model, and a generic mid-stream error (e.g. throttling
or a transient provider error) re-throws without rolling that back. Since
the no-memory branch now caches one Agent per sessionId, the next turn on
that session would append a second user message, leaving consecutive user
turns that providers requiring strict role alternation (e.g. Anthropic)
reject — poisoning the session for the process lifetime.

Snapshot the agent's messages before streaming and restore them in a catch
before re-throwing, using the SDK's takeSnapshot/loadSnapshot. Scoped to the
no-memory branch; the memory branch persists via the session manager and
must not roll back local state. Verified end-to-end through the runtime
handler: a forced turn-1 failure leaves history clean and the next turn on
the same session succeeds. Addresses PR review feedback.
---
 .../assets.snapshot.test.ts.snap              | 26 ++++++++++++++-----
 .../typescript/http/strands/base/main.ts      | 26 ++++++++++++++-----
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
index bd3b121f4..77c1d1ee8 100644
--- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
+++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
@@ -7757,14 +7757,26 @@ const app = new BedrockAgentCoreApp({
         await agent.memoryManager?.flush();
       }
       {{else}}
-      for await (const event of agent.stream(payload.prompt ?? '')) {
-        if (
-          event.type === 'modelStreamUpdateEvent' &&
-          event.event?.type === 'modelContentBlockDeltaEvent' &&
-          event.event.delta?.type === 'textDelta'
-        ) {
-          yield { data: event.event.delta.text };
+      // Snapshot history before streaming so a failed turn can be rolled back.
+      // Agent.stream() appends the user message before invoking the model; on a
+      // mid-stream error that user turn would otherwise linger in the cached
+      // agent, and the next turn for this session would send consecutive user
+      // messages (rejected by providers that require strict role alternation,
+      // e.g. Anthropic). Restoring on error keeps the session reusable.
+      const snapshot = agent.takeSnapshot({ include: ['messages'] });
+      try {
+        for await (const event of agent.stream(payload.prompt ?? '')) {
+          if (
+            event.type === 'modelStreamUpdateEvent' &&
+            event.event?.type === 'modelContentBlockDeltaEvent' &&
+            event.event.delta?.type === 'textDelta'
+          ) {
+            yield { data: event.event.delta.text };
+          }
         }
+      } catch (error) {
+        agent.loadSnapshot(snapshot);
+        throw error;
       }
       {{/if}}
     },
diff --git a/src/assets/typescript/http/strands/base/main.ts b/src/assets/typescript/http/strands/base/main.ts
index 32e6c4810..fe7c7136b 100644
--- a/src/assets/typescript/http/strands/base/main.ts
+++ b/src/assets/typescript/http/strands/base/main.ts
@@ -115,14 +115,26 @@ const app = new BedrockAgentCoreApp({
         await agent.memoryManager?.flush();
       }
       {{else}}
-      for await (const event of agent.stream(payload.prompt ?? '')) {
-        if (
-          event.type === 'modelStreamUpdateEvent' &&
-          event.event?.type === 'modelContentBlockDeltaEvent' &&
-          event.event.delta?.type === 'textDelta'
-        ) {
-          yield { data: event.event.delta.text };
+      // Snapshot history before streaming so a failed turn can be rolled back.
+      // Agent.stream() appends the user message before invoking the model; on a
+      // mid-stream error that user turn would otherwise linger in the cached
+      // agent, and the next turn for this session would send consecutive user
+      // messages (rejected by providers that require strict role alternation,
+      // e.g. Anthropic). Restoring on error keeps the session reusable.
+      const snapshot = agent.takeSnapshot({ include: ['messages'] });
+      try {
+        for await (const event of agent.stream(payload.prompt ?? '')) {
+          if (
+            event.type === 'modelStreamUpdateEvent' &&
+            event.event?.type === 'modelContentBlockDeltaEvent' &&
+            event.event.delta?.type === 'textDelta'
+          ) {
+            yield { data: event.event.delta.text };
+          }
         }
+      } catch (error) {
+        agent.loadSnapshot(snapshot);
+        throw error;
       }
       {{/if}}
     },