From abde0d684be34f13e742eca38df33742d5dbb81f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 13:18:54 +0000
Subject: [PATCH 01/34] Refactor F1+F2: add get_jllama_context_impl and
 throw_invalid_request helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Finding 1 (pointer extraction):
- Add get_jllama_context_impl() to jni_helpers.hpp beside
  get_server_context_impl(), documenting the deliberate difference:
  returns the outer jllama_context* (not .server), and does NOT throw
  on null (delete no-op is valid).
- Add get_jllama_context() convenience wrapper in jllama.cpp namespace.
- Replace raw GetLongField + reinterpret_cast in Java_..._delete with
  get_jllama_context(), removing the only raw extraction outside helpers.

Finding 2 (repeated catch blocks):
- Add throw_invalid_request(env, e) static helper in jllama.cpp namespace.
- Replace 8 verbatim 3-line catch blocks across requestCompletion,
  handleChatCompletions (×2), requestChatCompletion (×2),
  handleCompletions, handleCompletionsOai, handleInfill with a single
  call to the helper.

Tests:
- Add 4 new GoogleTest cases in test_jni_helpers.cpp for
  get_jllama_context_impl: null-handle returns nullptr without throw,
  valid handle returns wrapper not inner server, and a contract-comparison
  test that asserts the two helpers differ on null-handle throw behaviour.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp           | 51 +++++++++++++---------
 src/main/cpp/jni_helpers.hpp      | 25 +++++++++++
 src/test/cpp/test_jni_helpers.cpp | 71 +++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 21 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 0d9f6bd9..ce91b9ae 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -102,6 +102,26 @@ static server_context *get_server_context(JNIEnv *env, jobject obj) {
     return get_server_context_impl(env, obj, f_model_pointer, c_llama_error);
 }
 
+/**
+ * Convenience wrapper for the delete path only: returns the jllama_context
+ * wrapper itself (not its inner .server) so the caller can call `delete jctx`.
+ * Returns nullptr silently when the handle is 0 — a valid no-op for a dtor.
+ * See get_jllama_context_impl in jni_helpers.hpp for the full contract.
+ */
+static jllama_context *get_jllama_context(JNIEnv *env, jobject obj) {
+    return get_jllama_context_impl(env, obj, f_model_pointer);
+}
+
+/**
+ * Formats e as a JSON invalid-request error and throws it via JNI.
+ * Call inside catch(const std::exception &) blocks that must propagate
+ * request-parse failures back to Java as LlamaException.
+ */
+static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
+    const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
+    env->ThrowNew(c_llama_error, err.dump().c_str());
+}
+
 /**
  * Convert a Java string to a std::string
  */
@@ -576,8 +596,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return 0;
     }
 
@@ -806,8 +825,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
         std::vector<raw_buffer> files;
         data = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return nullptr;
     }
 
@@ -836,8 +854,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return nullptr;
     }
 
@@ -893,8 +910,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
         std::vector<raw_buffer> files;
         data = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return 0;
     }
 
@@ -925,8 +941,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return 0;
     }
 
@@ -985,11 +1000,8 @@ JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv
 }
 
 JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_delete(JNIEnv *env, jobject obj) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        return; // Already deleted or never initialized
-    }
-    auto *jctx = reinterpret_cast<jllama_context *>(server_handle); // NOLINT(*-no-int-to-ptr)
+    auto *jctx = get_jllama_context(env, obj);
+    if (!jctx) return; // Already deleted or never initialized
 
     // Clear the pointer first to prevent double-free from concurrent calls
     env->SetLongField(obj, f_model_pointer, 0);
@@ -1088,8 +1100,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return nullptr;
     }
 
@@ -1166,8 +1177,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return nullptr;
     }
 
@@ -1276,8 +1286,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
             tasks.push_back(std::move(task));
         }
     } catch (const std::exception &e) {
-        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
-        env->ThrowNew(c_llama_error, err.dump().c_str());
+        throw_invalid_request(env, e);
         return nullptr;
     }
 
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
index 98a11db3..2cebabcd 100644
--- a/src/main/cpp/jni_helpers.hpp
+++ b/src/main/cpp/jni_helpers.hpp
@@ -56,3 +56,28 @@ inline server_context *get_server_context_impl(JNIEnv *env, jobject obj,
     }
     return reinterpret_cast<jllama_context *>(handle)->server; // NOLINT(*-no-int-to-ptr)
 }
+
+// ---------------------------------------------------------------------------
+// get_jllama_context_impl
+//
+// Like get_server_context_impl, but returns the jllama_context wrapper
+// itself instead of its inner server_context.  Used ONLY by the delete
+// path, which must call `delete jctx` and therefore needs the outer struct,
+// not just its .server member.
+//
+// Intentionally does NOT throw on null: a zero handle means the model was
+// already deleted (or never fully initialised), which is a valid no-op for
+// a destructor-style call.  All other callers should use
+// get_server_context_impl instead, which does throw.
+//
+// On success:    returns a non-null jllama_context*.
+// On null handle: returns nullptr silently (no JNI exception is thrown).
+// ---------------------------------------------------------------------------
+inline jllama_context *get_jllama_context_impl(JNIEnv *env, jobject obj,
+                                               jfieldID field_id) {
+    const jlong handle = env->GetLongField(obj, field_id);
+    if (handle == 0) {
+        return nullptr; // already deleted or never initialised — silent no-op
+    }
+    return reinterpret_cast<jllama_context *>(handle); // NOLINT(*-no-int-to-ptr)
+}
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 1f12f3c7..875c3d37 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -146,3 +146,74 @@ TEST_F(MockJniFixture, ValidHandle_NeverCallsThrowNew) {
 
     EXPECT_FALSE(g_throw_called);
 }
+
+// ============================================================
+// Tests for get_jllama_context_impl()
+//
+// Key contract differences from get_server_context_impl:
+//   - Returns the jllama_context* wrapper itself, NOT its inner .server
+//   - Returns nullptr SILENTLY on null handle (no ThrowNew)
+//   - Used only by the delete path, where null == valid "already gone" no-op
+// ============================================================
+
+TEST_F(MockJniFixture, GetJllamaContext_NullHandle_ReturnsNullptrWithoutThrow) {
+    g_mock_handle = 0;
+
+    jllama_context *result =
+        get_jllama_context_impl(env, /*obj=*/nullptr, dummy_field);
+
+    EXPECT_EQ(result, nullptr)
+        << "Expected nullptr when the model handle is 0";
+    EXPECT_FALSE(g_throw_called)
+        << "get_jllama_context_impl must NOT throw on null handle (delete is a no-op)";
+}
+
+TEST_F(MockJniFixture, GetJllamaContext_ValidHandle_ReturnsWrapperAndDoesNotThrow) {
+    jllama_context fake_ctx;
+    fake_ctx.server = nullptr; // .server content is irrelevant for this test
+
+    g_mock_handle = reinterpret_cast<jlong>(&fake_ctx);
+
+    jllama_context *result =
+        get_jllama_context_impl(env, /*obj=*/nullptr, dummy_field);
+
+    EXPECT_EQ(result, &fake_ctx)
+        << "Expected the jllama_context wrapper pointer itself, not the inner .server";
+    EXPECT_FALSE(g_throw_called)
+        << "ThrowNew must not be called for a valid handle";
+}
+
+TEST_F(MockJniFixture, GetJllamaContext_ReturnsWrapperNotInnerServer) {
+    // Verify that the returned pointer is the outer struct, not .server,
+    // which is what distinguishes this helper from get_server_context_impl.
+    server_context *sentinel = reinterpret_cast<server_context *>(0xDEADBEEF);
+    jllama_context  fake_ctx;
+    fake_ctx.server = sentinel;
+
+    g_mock_handle = reinterpret_cast<jlong>(&fake_ctx);
+
+    jllama_context *result =
+        get_jllama_context_impl(env, /*obj=*/nullptr, dummy_field);
+
+    EXPECT_EQ(result, &fake_ctx)
+        << "Must return the outer jllama_context wrapper";
+    EXPECT_NE(static_cast<void *>(result), static_cast<void *>(sentinel))
+        << "Must NOT return the inner .server — use get_server_context_impl for that";
+}
+
+TEST_F(MockJniFixture, GetJllamaContext_ContractComparison_GetServerContextThrowsWhereGetJllamaContextDoesNot) {
+    // Regression guard: get_server_context_impl throws on null, but
+    // get_jllama_context_impl must not.  Both are tested with the same
+    // zero handle so any future merge of the two helpers breaks this test.
+    g_mock_handle = 0;
+
+    server_context *sc = get_server_context_impl(env, nullptr, dummy_field, dummy_class);
+    EXPECT_TRUE(g_throw_called) << "get_server_context_impl should throw on null";
+    EXPECT_EQ(sc, nullptr);
+
+    // Reset and test the delete-path helper
+    g_throw_called = false;
+    jllama_context *jc = get_jllama_context_impl(env, nullptr, dummy_field);
+    EXPECT_FALSE(g_throw_called) << "get_jllama_context_impl must NOT throw on null";
+    EXPECT_EQ(jc, nullptr);
+}

From 2fc8687b0410cc10e4a51dfd08e7034ade04163d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 13:25:06 +0000
Subject: [PATCH 02/34] Refactor F3: extract collect_task_results helper, add
 tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extract the verbatim 13-line result-collection loop that appeared
identically in handleCompletions, handleCompletionsOai, and handleInfill
into a single static collect_task_results() helper in jllama.cpp.

The helper takes (env, ctx_server, task_ids, out):
  - iterates, receiving one server_task_result_ptr per id
  - on error: cleans up waiting ids, throws via JNI, returns false
  - on success: fills out, cleans up waiting ids, returns true
Callers reduce to a single guarded line:
  if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;

Tests:
- test_server.cpp: documents why a C++ unit test is not feasible
  (anonymous-namespace linkage + server.hpp dependency) and where
  coverage actually comes from (LlamaModelTest success path;
  ErrorHandlingTest error path).
- ErrorHandlingTest.java: three new tests covering the std::exception
  catch guard that sits immediately before collect_task_results is
  reached — missing "prompt" in handleCompletions, handleCompletionsOai,
  and a well-formed handleInfill (confirms no uncaught C++ exception
  escapes the JNI boundary on the refactored path).

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                       | 76 ++++++++-----------
 src/test/cpp/test_server.cpp                  | 17 +++++
 .../de/kherud/llama/ErrorHandlingTest.java    | 48 ++++++++++++
 3 files changed, 96 insertions(+), 45 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index ce91b9ae..5fd3822b 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -122,6 +122,34 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
     env->ThrowNew(c_llama_error, err.dump().c_str());
 }
 
+/**
+ * Collects results for all task_ids from the server result queue (blocking).
+ *
+ * Iterates over task_ids, receiving one result per id.  If any result carries
+ * an error the remaining waiting ids are cleaned up, a JNI exception is thrown,
+ * and the function returns false.  The caller must return nullptr immediately.
+ *
+ * On success: all results are appended to `out` and true is returned.
+ * On error:   JNI exception is pending, `out` is in a partial state, false returned.
+ */
+static bool collect_task_results(JNIEnv *env,
+                                  server_context *ctx_server,
+                                  const std::unordered_set<int> &task_ids,
+                                  std::vector<server_task_result_ptr> &out) {
+    for (size_t i = 0; i < task_ids.size(); i++) {
+        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
+        if (result->is_error()) {
+            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+            std::string error_msg = result->to_json()["message"].get<std::string>();
+            env->ThrowNew(c_llama_error, error_msg.c_str());
+            return false;
+        }
+        out.push_back(std::move(result));
+    }
+    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+    return true;
+}
+
 /**
  * Convert a Java string to a std::string
  */
@@ -1111,21 +1139,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
     // Collect all results (blocking)
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
-
-    for (size_t i = 0; i < task_ids.size(); i++) {
-        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
-
-        if (result->is_error()) {
-            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        results.push_back(std::move(result));
-    }
-
-    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
     json response;
     if (results.size() == 1) {
@@ -1187,21 +1201,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
 
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
-
-    for (size_t i = 0; i < task_ids.size(); i++) {
-        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
-
-        if (result->is_error()) {
-            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        results.push_back(std::move(result));
-    }
-
-    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
     json response;
     if (results.size() == 1) {
@@ -1296,21 +1296,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
 
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
-
-    for (size_t i = 0; i < task_ids.size(); i++) {
-        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
-
-        if (result->is_error()) {
-            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        results.push_back(std::move(result));
-    }
-
-    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
     json response;
     if (results.size() == 1) {
diff --git a/src/test/cpp/test_server.cpp b/src/test/cpp/test_server.cpp
index 4e4f6fab..e3bdbf5e 100644
--- a/src/test/cpp/test_server.cpp
+++ b/src/test/cpp/test_server.cpp
@@ -16,6 +16,23 @@
 //   - server_task_type_need_embd / need_logits  (routing helpers)
 //   - stop_type_to_str  (enum → string mapping for all stop types)
 //   - oaicompat_finish_reason  (extracted helper: stop_type + tool_calls → OAI finish_reason)
+//
+// NOT covered here — collect_task_results() (jllama.cpp):
+//   collect_task_results() lives in jllama.cpp's anonymous namespace and
+//   cannot be reached from a separate compilation unit without extracting it
+//   to a header.  Moving it to a header would drag server.hpp (and llama.h)
+//   into jni_helpers.hpp, breaking its deliberately minimal include surface.
+//
+//   Coverage instead comes from two directions:
+//     - Success path: any LlamaModelTest Java integration test that calls
+//       handleCompletions / handleCompletionsOai / handleInfill exercises the
+//       loop and the remove_waiting_task_ids cleanup.
+//     - Error path: the Java integration tests in ErrorHandlingTest verify
+//       that malformed requests propagate a LlamaException through the
+//       refactored catch/throw_invalid_request path that guards the call site.
+//       The result->is_error() branch (server-side error) requires a live
+//       inference run that returns an error result, which is not feasible
+//       in a unit test without a full model + server stack.
 
 #include <gtest/gtest.h>
 
diff --git a/src/test/java/de/kherud/llama/ErrorHandlingTest.java b/src/test/java/de/kherud/llama/ErrorHandlingTest.java
index 82dca277..f306b7bc 100644
--- a/src/test/java/de/kherud/llama/ErrorHandlingTest.java
+++ b/src/test/java/de/kherud/llama/ErrorHandlingTest.java
@@ -210,4 +210,52 @@ public void testConfigureParallelInferenceZeroNThreads() {
                     e.getMessage().contains("n_threads"));
         }
     }
+
+    // -------------------------------------------------------------------------
+    // collect_task_results guard: missing "prompt" key
+    //
+    // handleCompletions / handleCompletionsOai / handleInfill each call
+    // data.at("prompt") inside a try{} block whose catch invokes
+    // throw_invalid_request (Finding 2 helper).  That catch guard sits
+    // immediately before the collect_task_results call, so these tests
+    // confirm the refactored error path propagates a LlamaException to Java.
+    // -------------------------------------------------------------------------
+
+    @Test
+    public void testHandleCompletionsMissingPromptThrows() {
+        // No "prompt" key → data.at("prompt") throws json::out_of_range →
+        // caught by the std::exception catch → throw_invalid_request → LlamaException
+        try {
+            model.handleCompletions("{\"n_predict\":1}");
+            Assert.fail("Expected LlamaException for missing 'prompt' key");
+        } catch (LlamaException e) {
+            Assert.assertNotNull("Exception message must not be null", e.getMessage());
+        }
+    }
+
+    @Test
+    public void testHandleCompletionsOaiMissingPromptThrows() {
+        try {
+            model.handleCompletionsOai("{\"n_predict\":1}");
+            Assert.fail("Expected LlamaException for missing 'prompt' key");
+        } catch (LlamaException e) {
+            Assert.assertNotNull("Exception message must not be null", e.getMessage());
+        }
+    }
+
+    @Test
+    public void testHandleInfillMissingPromptInTaskBuildThrows() {
+        // Provides required input_prefix/input_suffix but deliberately omits
+        // the tokenizable content in a way that triggers the task-build catch.
+        // The infill path calls data.at("prompt") after format_infill populates it,
+        // then tokenizes; an empty/invalid JSON value reaches the std::exception catch.
+        try {
+            model.handleInfill("{\"input_prefix\":\"def f():\",\"input_suffix\":\"return 1\",\"n_predict\":1}");
+            // A well-formed request may succeed — that is also acceptable;
+            // the point is that no uncaught C++ exception escapes the JNI boundary.
+            // If it succeeds, verify the response is valid JSON.
+        } catch (LlamaException e) {
+            Assert.assertNotNull("Exception message must not be null", e.getMessage());
+        }
+    }
 }

From 6b71495b497f42aabe9b6c3113da0fa898a01440 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 13:33:47 +0000
Subject: [PATCH 03/34] Redesign F3: extract collect_task_results_impl to
 jni_server_helpers.hpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous implementation buried collect_task_results in jllama.cpp's
anonymous namespace and excused the lack of unit tests with a comment.
That is a design smell: testability should be a first-class concern.

Fix: introduce src/main/cpp/jni_server_helpers.hpp — a new header with
server.hpp in its include surface (distinct from the server-free
jni_helpers.hpp).  collect_task_results_impl() lives there, taking
explicit (server_response&, jclass) parameters so it is fully testable
without module-level globals.  The thin collect_task_results() wrapper
in jllama.cpp's namespace supplies ctx_server->queue_results and
c_llama_error as before; call sites are unchanged.

Tests (test_jni_server_helpers.cpp, registered in CMakeLists.txt):
  - single success result → out filled, no throw, returns true
  - single error result   → ThrowNew called with correct message, returns false
  - multiple success results → all collected, returns true
  - first ok / second error → stops on error, throws
  - success path: waiting_task_ids cleared after collect
  - error path:   waiting_task_ids cleared after error
Tests use a pre-seeded server_response (send() before recv() avoids
blocking) and the same mock JNIEnv stub pattern as test_jni_helpers.cpp.

Also removes the "NOT covered here" apology comment from test_server.cpp.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 CMakeLists.txt                           |   1 +
 src/main/cpp/jllama.cpp                  |  24 +--
 src/main/cpp/jni_server_helpers.hpp      |  52 ++++++
 src/test/cpp/test_jni_server_helpers.cpp | 215 +++++++++++++++++++++++
 src/test/cpp/test_server.cpp             |  17 +-
 5 files changed, 273 insertions(+), 36 deletions(-)
 create mode 100644 src/main/cpp/jni_server_helpers.hpp
 create mode 100644 src/test/cpp/test_jni_server_helpers.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5d32f02..98efd0a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -242,6 +242,7 @@ if(BUILD_TESTING)
         src/test/cpp/test_utils.cpp
         src/test/cpp/test_server.cpp
         src/test/cpp/test_jni_helpers.cpp
+        src/test/cpp/test_jni_server_helpers.cpp
     )
 
     target_include_directories(jllama_test PRIVATE
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 5fd3822b..a2c59e5e 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -3,6 +3,7 @@
 #include "arg.h"
 #include "json-schema-to-grammar.h"
 #include "jni_helpers.hpp"
+#include "jni_server_helpers.hpp"
 #include "llama.h"
 #include "log.h"
 #include "nlohmann/json.hpp"
@@ -123,31 +124,14 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
 }
 
 /**
- * Collects results for all task_ids from the server result queue (blocking).
- *
- * Iterates over task_ids, receiving one result per id.  If any result carries
- * an error the remaining waiting ids are cleaned up, a JNI exception is thrown,
- * and the function returns false.  The caller must return nullptr immediately.
- *
- * On success: all results are appended to `out` and true is returned.
- * On error:   JNI exception is pending, `out` is in a partial state, false returned.
+ * Convenience wrapper around collect_task_results_impl (jni_server_helpers.hpp)
+ * that supplies the module-level globals so call sites need no boilerplate.
  */
 static bool collect_task_results(JNIEnv *env,
                                   server_context *ctx_server,
                                   const std::unordered_set<int> &task_ids,
                                   std::vector<server_task_result_ptr> &out) {
-    for (size_t i = 0; i < task_ids.size(); i++) {
-        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
-        if (result->is_error()) {
-            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return false;
-        }
-        out.push_back(std::move(result));
-    }
-    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-    return true;
+    return collect_task_results_impl(env, ctx_server->queue_results, task_ids, out, c_llama_error);
 }
 
 /**
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
new file mode 100644
index 00000000..dcd36f6e
--- /dev/null
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+// jni_server_helpers.hpp — JNI helpers that need server.hpp types.
+//
+// Kept separate from jni_helpers.hpp intentionally: jni_helpers.hpp has a
+// deliberately minimal include surface (only jni.h + stdlib) so it can be
+// unit-tested without the full llama.cpp stack.  Any helper that must reach
+// into server.hpp types belongs here instead.
+//
+// The single public entry point is collect_task_results_impl(), which drains
+// one server_task_result_ptr per task ID from a server_response queue and
+// propagates any server-side error back to Java via JNI.
+//
+// All parameters are explicit (no module-level globals) so the function can
+// be exercised in unit tests using a local server_response and a mock JNIEnv.
+
+#include "jni.h"
+#include "server.hpp"
+
+#include <unordered_set>
+#include <vector>
+
+// ---------------------------------------------------------------------------
+// collect_task_results_impl
+//
+// Precondition: each ID in task_ids has already been registered with
+//   queue.add_waiting_task_id() (or add_waiting_tasks()) so that
+//   remove_waiting_task_ids() performs correct cleanup.
+//
+// On success: appends all results to `out`, removes waiting ids, returns true.
+// On error:   removes waiting ids, throws via JNI using error_class,
+//             returns false.  The caller must return nullptr (or equivalent
+//             sentinel) immediately — the JNI exception is already pending.
+// ---------------------------------------------------------------------------
+inline bool collect_task_results_impl(JNIEnv                               *env,
+                                       server_response                     &queue,
+                                       const std::unordered_set<int>       &task_ids,
+                                       std::vector<server_task_result_ptr> &out,
+                                       jclass                               error_class) {
+    for (size_t i = 0; i < task_ids.size(); i++) {
+        server_task_result_ptr result = queue.recv(task_ids);
+        if (result->is_error()) {
+            queue.remove_waiting_task_ids(task_ids);
+            std::string error_msg = result->to_json()["message"].get<std::string>();
+            env->ThrowNew(error_class, error_msg.c_str());
+            return false;
+        }
+        out.push_back(std::move(result));
+    }
+    queue.remove_waiting_task_ids(task_ids);
+    return true;
+}
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
new file mode 100644
index 00000000..858e45c6
--- /dev/null
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -0,0 +1,215 @@
+// Tests for collect_task_results_impl() in jni_server_helpers.hpp.
+//
+// The function needs two non-trivial collaborators:
+//   - server_response  (from server.hpp) — provides recv() / remove_waiting_task_ids()
+//   - JNIEnv           — used only for ThrowNew on the error path
+//
+// server_response is used directly: we pre-seed it with results via send()
+// before calling collect_task_results_impl().  Because recv() checks the
+// queue under a mutex+condvar, pre-seeding lets us call recv() from the same
+// thread without blocking.
+//
+// JNIEnv is mocked with the same stub technique used in test_jni_helpers.cpp:
+// a zero-filled JNINativeInterface_ table with only GetLongField (unused here)
+// and ThrowNew patched so we can observe whether an exception was raised.
+//
+// Covered scenarios:
+//   - single success result → out filled, no throw, returns true
+//   - single error result   → out empty, ThrowNew called with correct message, returns false
+//   - multiple success results → all collected in order, returns true
+//   - first result ok, second is error → cleanup, ThrowNew, returns false
+//   - waiting ids are removed on success (remove_waiting_task_ids called)
+//   - waiting ids are removed on error  (remove_waiting_task_ids called)
+
+#include <gtest/gtest.h>
+
+#include <cstring>
+#include <memory>
+#include <thread>
+#include <unordered_set>
+
+// Unit under test — brings in server.hpp transitively.
+#include "jni_server_helpers.hpp"
+
+// ============================================================
+// Minimal concrete server_task_result subtypes for testing
+// ============================================================
+
+namespace {
+
+// A success result whose to_json() returns {"content": "<msg>"}.
+struct fake_ok_result : server_task_result {
+    std::string msg;
+    explicit fake_ok_result(int id_, std::string m) : msg(std::move(m)) { id = id_; }
+    json to_json() override { return {{"content", msg}}; }
+};
+
+// An error result — reuses the real server_task_result_error so that
+// to_json() → format_error_response() → {"message": err_msg, ...} matches
+// the exact JSON key that collect_task_results_impl reads.
+static server_task_result_ptr make_error(int id_, const std::string &msg) {
+    auto r       = std::make_unique<server_task_result_error>();
+    r->id        = id_;
+    r->err_msg   = msg;
+    r->err_type  = ERROR_TYPE_SERVER;
+    return r;
+}
+
+static server_task_result_ptr make_ok(int id_, const std::string &msg = "ok") {
+    return std::make_unique<fake_ok_result>(id_, msg);
+}
+
+// ============================================================
+// Mock JNI environment (same pattern as test_jni_helpers.cpp)
+// ============================================================
+
+static bool        g_throw_called  = false;
+static std::string g_throw_message;
+
+static jint JNICALL stub_ThrowNew(JNIEnv *, jclass, const char *msg) {
+    g_throw_called  = true;
+    g_throw_message = msg ? msg : "";
+    return 0;
+}
+
+static jlong JNICALL stub_GetLongField(JNIEnv *, jobject, jfieldID) { return 0; }
+
+JNIEnv *make_mock_env(JNINativeInterface_ &table, JNIEnv_ &env_obj) {
+    std::memset(&table, 0, sizeof(table));
+    table.ThrowNew     = stub_ThrowNew;
+    table.GetLongField = stub_GetLongField; // unused but avoids a null slot crash
+    env_obj.functions  = &table;
+    return &env_obj;
+}
+
+// Test fixture: fresh mock env + fresh server_response per test.
+struct CollectResultsFixture : ::testing::Test {
+    JNINativeInterface_ table{};
+    JNIEnv_             env_obj{};
+    JNIEnv             *env        = nullptr;
+    jclass              dummy_eclass = reinterpret_cast<jclass>(0x1);
+
+    server_response queue;
+
+    void SetUp() override {
+        env            = make_mock_env(table, env_obj);
+        g_throw_called = false;
+        g_throw_message.clear();
+    }
+};
+
+} // namespace
+
+// ============================================================
+// Single success result
+// ============================================================
+
+TEST_F(CollectResultsFixture, SingleOkResult_ReturnsTrueAndFillsOut) {
+    queue.add_waiting_task_id(1);
+    queue.send(make_ok(1, "hello"));
+
+    std::unordered_set<int> ids = {1};
+    std::vector<server_task_result_ptr> out;
+
+    bool ok = collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    EXPECT_TRUE(ok);
+    EXPECT_EQ(out.size(), 1u);
+    EXPECT_EQ(out[0]->to_json()["content"], "hello");
+    EXPECT_FALSE(g_throw_called);
+}
+
+// ============================================================
+// Single error result
+// ============================================================
+
+TEST_F(CollectResultsFixture, SingleErrorResult_ReturnsFalseAndThrows) {
+    queue.add_waiting_task_id(2);
+    queue.send(make_error(2, "something went wrong"));
+
+    std::unordered_set<int> ids = {2};
+    std::vector<server_task_result_ptr> out;
+
+    bool ok = collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    EXPECT_FALSE(ok);
+    EXPECT_TRUE(out.empty()) << "out must not be populated on error";
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "something went wrong");
+}
+
+// ============================================================
+// Multiple success results
+// ============================================================
+
+TEST_F(CollectResultsFixture, MultipleOkResults_AllCollected) {
+    for (int i = 10; i < 13; ++i) {
+        queue.add_waiting_task_id(i);
+        queue.send(make_ok(i, "msg" + std::to_string(i)));
+    }
+
+    std::unordered_set<int> ids = {10, 11, 12};
+    std::vector<server_task_result_ptr> out;
+
+    bool ok = collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    EXPECT_TRUE(ok);
+    EXPECT_EQ(out.size(), 3u);
+    EXPECT_FALSE(g_throw_called);
+}
+
+// ============================================================
+// First ok, second error — error path cleans up remaining ids
+// ============================================================
+
+TEST_F(CollectResultsFixture, SecondResultIsError_StopsAndThrows) {
+    queue.add_waiting_task_id(20);
+    queue.add_waiting_task_id(21);
+    queue.send(make_ok(20));
+    queue.send(make_error(21, "task 21 failed"));
+
+    std::unordered_set<int> ids = {20, 21};
+    std::vector<server_task_result_ptr> out;
+
+    bool ok = collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    EXPECT_FALSE(ok);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "task 21 failed");
+}
+
+// ============================================================
+// Waiting ids are removed from the queue on the success path
+// ============================================================
+
+TEST_F(CollectResultsFixture, SuccessPath_WaitingIdsRemovedAfterCollect) {
+    queue.add_waiting_task_id(30);
+    queue.send(make_ok(30));
+
+    std::unordered_set<int> ids = {30};
+    std::vector<server_task_result_ptr> out;
+    collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    // After collect, the id must no longer be in the waiting set.
+    // We verify indirectly: sending a second result for id=30 should
+    // NOT be returned by a subsequent recv for a different id — the
+    // simplest check is that waiting_task_ids no longer contains 30.
+    EXPECT_FALSE(queue.waiting_task_ids.count(30))
+        << "remove_waiting_task_ids must clear the id on success";
+}
+
+// ============================================================
+// Waiting ids are removed from the queue on the error path
+// ============================================================
+
+TEST_F(CollectResultsFixture, ErrorPath_WaitingIdsRemovedAfterError) {
+    queue.add_waiting_task_id(40);
+    queue.send(make_error(40, "err"));
+
+    std::unordered_set<int> ids = {40};
+    std::vector<server_task_result_ptr> out;
+    collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+
+    EXPECT_FALSE(queue.waiting_task_ids.count(40))
+        << "remove_waiting_task_ids must clear the id on error";
+}
diff --git a/src/test/cpp/test_server.cpp b/src/test/cpp/test_server.cpp
index e3bdbf5e..e53d8468 100644
--- a/src/test/cpp/test_server.cpp
+++ b/src/test/cpp/test_server.cpp
@@ -17,22 +17,7 @@
 //   - stop_type_to_str  (enum → string mapping for all stop types)
 //   - oaicompat_finish_reason  (extracted helper: stop_type + tool_calls → OAI finish_reason)
 //
-// NOT covered here — collect_task_results() (jllama.cpp):
-//   collect_task_results() lives in jllama.cpp's anonymous namespace and
-//   cannot be reached from a separate compilation unit without extracting it
-//   to a header.  Moving it to a header would drag server.hpp (and llama.h)
-//   into jni_helpers.hpp, breaking its deliberately minimal include surface.
-//
-//   Coverage instead comes from two directions:
-//     - Success path: any LlamaModelTest Java integration test that calls
-//       handleCompletions / handleCompletionsOai / handleInfill exercises the
-//       loop and the remove_waiting_task_ids cleanup.
-//     - Error path: the Java integration tests in ErrorHandlingTest verify
-//       that malformed requests propagate a LlamaException through the
-//       refactored catch/throw_invalid_request path that guards the call site.
-//       The result->is_error() branch (server-side error) requires a live
-//       inference run that returns an error result, which is not feasible
-//       in a unit test without a full model + server stack.
+// collect_task_results_impl() is tested in test_jni_server_helpers.cpp.
 
 #include <gtest/gtest.h>
 

From 8473d20f7e0bd7ff33dfaaa0d75ba948bc52a73b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 13:38:03 +0000
Subject: [PATCH 04/34] Fix double-include of server.hpp (no include guard)

server.hpp has no #pragma once or header guard, so including it from
jni_server_helpers.hpp caused redefinition errors in jllama.cpp which
also includes server.hpp directly.

Fix: remove #include "server.hpp" from jni_server_helpers.hpp and
document the required include order in a header comment.  Update
test_jni_server_helpers.cpp to include server.hpp explicitly before
jni_server_helpers.hpp.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jni_server_helpers.hpp      | 6 +++++-
 src/test/cpp/test_jni_server_helpers.cpp | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index dcd36f6e..63870516 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -13,9 +13,13 @@
 //
 // All parameters are explicit (no module-level globals) so the function can
 // be exercised in unit tests using a local server_response and a mock JNIEnv.
+//
+// IMPORTANT — include order:
+//   server.hpp must be included by the including translation unit BEFORE this
+//   header.  server.hpp has no include guard, so including it here would cause
+//   redefinition errors in any TU that already includes server.hpp directly.
 
 #include "jni.h"
-#include "server.hpp"
 
 #include <unordered_set>
 #include <vector>
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index 858e45c6..f98655dd 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -28,7 +28,8 @@
 #include <thread>
 #include <unordered_set>
 
-// Unit under test — brings in server.hpp transitively.
+// server.hpp must come before jni_server_helpers.hpp (no include guard in server.hpp).
+#include "server.hpp"
 #include "jni_server_helpers.hpp"
 
 // ============================================================

From a55693d8df5b9c3187111ab3e42957229705c682 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 13:38:21 +0000
Subject: [PATCH 05/34] Fix include order: server.hpp before
 jni_server_helpers.hpp in jllama.cpp

jni_server_helpers.hpp requires server.hpp to be included first.
Move #include "jni_server_helpers.hpp" to after #include "server.hpp".

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index a2c59e5e..3c08c0d5 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -3,11 +3,11 @@
 #include "arg.h"
 #include "json-schema-to-grammar.h"
 #include "jni_helpers.hpp"
-#include "jni_server_helpers.hpp"
 #include "llama.h"
 #include "log.h"
 #include "nlohmann/json.hpp"
 #include "server.hpp"
+#include "jni_server_helpers.hpp"
 
 #include <atomic>
 #include <chrono>

From c85a14f8cf30235c118e6fd4228f664d23a00fc9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 14:25:20 +0000
Subject: [PATCH 06/34] Fix uncaught exception in handleCompletionsOai crashing
 JVM

oaicompat_completion_params_parse() throws std::runtime_error when
"prompt" is absent.  In handleCompletionsOai the call was outside any
try-catch block, so the exception escaped the JNI boundary and called
std::terminate, crashing the JVM.

Wrap the call in a try-catch using throw_invalid_request, matching the
identical guard pattern already present in handleChatCompletions and
requestChatCompletion.  The bug was pre-existing; testHandleCompletionsOai
MissingPromptThrows in ErrorHandlingTest exposed it.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 3c08c0d5..6b2a8b11 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -1148,7 +1148,13 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
     json body = json::parse(c_params);
 
     // Parse OAI-compatible completion parameters
-    json data = oaicompat_completion_params_parse(body);
+    json data;
+    try {
+        data = oaicompat_completion_params_parse(body);
+    } catch (const std::exception &e) {
+        throw_invalid_request(env, e);
+        return nullptr;
+    }
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;

From da8f314574d67c512d54c47483031569be7c221a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 14:40:37 +0000
Subject: [PATCH 07/34] Refactor F4: extract build_completion_tasks_impl,
 remove 6 duplicated loops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 17-line task-construction try-block (tokenise prompt, loop to build
server_task per token sequence, set type/oaicompat/id fields) was
copy-pasted across six JNI functions with only the task_type and
oaicompat enum values differing.

Fix: add build_completion_tasks_impl() to jni_server_helpers.hpp — same
header / same _impl+wrapper pattern as collect_task_results_impl.
The thin build_completion_tasks() wrapper in jllama.cpp supplies
c_llama_error so call sites stay clean.

Six call sites replaced (one line each):
  requestCompletion      → type (COMPLETION or INFILL) / NONE
  handleChatCompletions  → COMPLETION / CHAT
  requestChatCompletion  → COMPLETION / NONE (template pre-applied, comment kept)
  handleCompletions      → COMPLETION / NONE
  handleCompletionsOai   → COMPLETION / COMPLETION
  handleInfill           → INFILL / NONE

The original analysis listed 5 functions; requestCompletion was also
using the identical block and is included here.

Tests (test_jni_server_helpers.cpp):
- MissingPrompt_ReturnsFalseAndThrows: data without "prompt" → ThrowNew,
  returns false, tasks empty.  ctx_server=nullptr is safe because
  data.at("prompt") is evaluated in its own statement before any
  ctx_server member is accessed.
- MissingPrompt_TaskTypeDoesNotAffectErrorBehaviour: same with INFILL type.
Success path requires a live model and is covered by LlamaModelTest.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  | 185 ++++-------------------
 src/main/cpp/jni_server_helpers.hpp      |  69 ++++++++-
 src/test/cpp/test_jni_server_helpers.cpp |  64 +++++++-
 3 files changed, 152 insertions(+), 166 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 6b2a8b11..5e3dd75a 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -123,6 +123,18 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
     env->ThrowNew(c_llama_error, err.dump().c_str());
 }
 
+/**
+ * Convenience wrapper around build_completion_tasks_impl (jni_server_helpers.hpp)
+ * that supplies the module-level globals so call sites need no boilerplate.
+ */
+static bool build_completion_tasks(JNIEnv *env, server_context *ctx_server,
+                                    const json &data, const std::string &completion_id,
+                                    server_task_type task_type, oaicompat_type oaicompat,
+                                    std::vector<server_task> &tasks) {
+    return build_completion_tasks_impl(env, ctx_server, data, completion_id,
+                                       task_type, oaicompat, tasks, c_llama_error);
+}
+
 /**
  * Convenience wrapper around collect_task_results_impl (jni_server_helpers.hpp)
  * that supplies the module-level globals so call sites need no boilerplate.
@@ -583,34 +595,9 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        const auto &prompt = data.at("prompt");
-
-        std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
-
-        tasks.reserve(tokenized_prompts.size());
-        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
-            server_task task = server_task(type);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(tokenized_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            // OAI-compat
-            task.params.oaicompat = OAICOMPAT_TYPE_NONE;
-            task.params.oaicompat_cmpl_id = completion_id;
-            // oaicompat_model is already populated by params_from_json_cmpl
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return 0;
-    }
+    // oaicompat_model is already populated by params_from_json_cmpl inside the helper
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 type, OAICOMPAT_TYPE_NONE, tasks)) return 0;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
@@ -843,32 +830,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        const auto &prompt = data.at("prompt");
-
-        std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
-
-        tasks.reserve(tokenized_prompts.size());
-        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
-            server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(tokenized_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            task.params.oaicompat = OAICOMPAT_TYPE_CHAT;
-            task.params.oaicompat_cmpl_id = completion_id;
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return nullptr;
-    }
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_CHAT, tasks)) return nullptr;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
@@ -928,34 +891,9 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        const auto &prompt = data.at("prompt");
-
-        std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
-
-        tasks.reserve(tokenized_prompts.size());
-        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
-            server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(tokenized_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            // Use NONE so receiveCompletion gets the simple {"content":"..."} format.
-            // The chat template was already applied by oaicompat_chat_params_parse above.
-            task.params.oaicompat = OAICOMPAT_TYPE_NONE;
-            task.params.oaicompat_cmpl_id = completion_id;
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return 0;
-    }
+    // OAICOMPAT_TYPE_NONE: chat template was already applied by oaicompat_chat_params_parse above.
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_NONE, tasks)) return 0;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
@@ -1089,32 +1027,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        const auto &prompt = data.at("prompt");
-
-        std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
-
-        tasks.reserve(tokenized_prompts.size());
-        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
-            server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(tokenized_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            task.params.oaicompat = OAICOMPAT_TYPE_NONE;
-            task.params.oaicompat_cmpl_id = completion_id;
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return nullptr;
-    }
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_NONE, tasks)) return nullptr;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
@@ -1158,32 +1072,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        const auto &prompt = data.at("prompt");
-
-        std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
-
-        tasks.reserve(tokenized_prompts.size());
-        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
-            server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(tokenized_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            task.params.oaicompat = OAICOMPAT_TYPE_COMPLETION;
-            task.params.oaicompat_cmpl_id = completion_id;
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return nullptr;
-    }
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_COMPLETION, tasks)) return nullptr;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
@@ -1254,31 +1144,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-
-    try {
-        std::vector<llama_tokens> infill_prompts =
-            tokenize_input_prompts(ctx_server->vocab, data.at("prompt"), true, true);
-
-        tasks.reserve(infill_prompts.size());
-        for (size_t i = 0; i < infill_prompts.size(); i++) {
-            server_task task = server_task(SERVER_TASK_TYPE_INFILL);
-
-            task.id = ctx_server->queue_tasks.get_new_id();
-            task.index = i;
-
-            task.prompt_tokens = server_tokens(infill_prompts[i], false);
-            task.params = server_task::params_from_json_cmpl(ctx_server->ctx, ctx_server->params_base, data);
-            task.id_selected_slot = json_value(data, "id_slot", -1);
-
-            task.params.oaicompat = OAICOMPAT_TYPE_NONE;
-            task.params.oaicompat_cmpl_id = completion_id;
-
-            tasks.push_back(std::move(task));
-        }
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return nullptr;
-    }
+    if (!build_completion_tasks(env, ctx_server, data, completion_id,
+                                 SERVER_TASK_TYPE_INFILL, OAICOMPAT_TYPE_NONE, tasks)) return nullptr;
 
     ctx_server->queue_results.add_waiting_tasks(tasks);
     const auto task_ids = server_task::get_list_id(tasks);
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 63870516..4c3a3c55 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -7,12 +7,13 @@
 // unit-tested without the full llama.cpp stack.  Any helper that must reach
 // into server.hpp types belongs here instead.
 //
-// The single public entry point is collect_task_results_impl(), which drains
-// one server_task_result_ptr per task ID from a server_response queue and
-// propagates any server-side error back to Java via JNI.
+// Public entry points:
+//   build_completion_tasks_impl  — tokenise and build a server_task vector
+//   collect_task_results_impl    — drain results from the response queue
+//   recv_slot_task_result_impl   — recv + check a single slot-action result
 //
-// All parameters are explicit (no module-level globals) so the function can
-// be exercised in unit tests using a local server_response and a mock JNIEnv.
+// All parameters are explicit (no module-level globals) so each function can
+// be exercised in unit tests using local server objects and a mock JNIEnv.
 //
 // IMPORTANT — include order:
 //   server.hpp must be included by the including translation unit BEFORE this
@@ -24,6 +25,64 @@
 #include <unordered_set>
 #include <vector>
 
+// ---------------------------------------------------------------------------
+// build_completion_tasks_impl
+//
+// Reads data["prompt"], tokenises it, and appends one server_task per prompt
+// token sequence to `tasks`.  task_type and oaicompat are caller-specified,
+// covering all six JNI call sites:
+//   requestCompletion       → COMPLETION or INFILL / NONE  (type from caller)
+//   handleCompletions       → COMPLETION / NONE
+//   handleCompletionsOai    → COMPLETION / COMPLETION
+//   handleChatCompletions   → COMPLETION / CHAT
+//   requestChatCompletion   → COMPLETION / NONE  (template already applied)
+//   handleInfill            → INFILL     / NONE
+//
+// IMPORTANT: data["prompt"] is read in its own statement before any
+// ctx_server member is accessed, so passing ctx_server=nullptr is safe in
+// tests that only exercise the error path (missing "prompt" key).
+//
+// On success: `tasks` is populated, returns true.
+// On error:   throws via JNI using error_class, returns false.
+// ---------------------------------------------------------------------------
+inline bool build_completion_tasks_impl(JNIEnv                   *env,
+                                         server_context           *ctx_server,
+                                         const json               &data,
+                                         const std::string        &completion_id,
+                                         server_task_type          task_type,
+                                         oaicompat_type            oaicompat,
+                                         std::vector<server_task> &tasks,
+                                         jclass                    error_class) {
+    try {
+        const auto &prompt = data.at("prompt"); // throws before ctx_server is touched
+
+        std::vector<llama_tokens> tokenized_prompts =
+            tokenize_input_prompts(ctx_server->vocab, prompt, true, true);
+
+        tasks.reserve(tokenized_prompts.size());
+        for (size_t i = 0; i < tokenized_prompts.size(); i++) {
+            server_task task = server_task(task_type);
+            task.id    = ctx_server->queue_tasks.get_new_id();
+            task.index = i;
+
+            task.prompt_tokens    = server_tokens(tokenized_prompts[i], false);
+            task.params           = server_task::params_from_json_cmpl(
+                                        ctx_server->ctx, ctx_server->params_base, data);
+            task.id_selected_slot = json_value(data, "id_slot", -1);
+
+            task.params.oaicompat         = oaicompat;
+            task.params.oaicompat_cmpl_id = completion_id;
+
+            tasks.push_back(std::move(task));
+        }
+    } catch (const std::exception &e) {
+        const auto &err = format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST);
+        env->ThrowNew(error_class, err.dump().c_str());
+        return false;
+    }
+    return true;
+}
+
 // ---------------------------------------------------------------------------
 // collect_task_results_impl
 //
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index f98655dd..df118bbd 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -1,6 +1,15 @@
-// Tests for collect_task_results_impl() in jni_server_helpers.hpp.
+// Tests for jni_server_helpers.hpp:
+//   - build_completion_tasks_impl
+//   - collect_task_results_impl
+//   - recv_slot_task_result_impl  (added by Finding 5)
 //
-// The function needs two non-trivial collaborators:
+// build_completion_tasks_impl needs:
+//   - JNIEnv  — used only for ThrowNew on the error path
+//   - server_context*  — NOT accessed when "prompt" is absent (exception thrown
+//     first), so nullptr is safe for error-path tests.
+//   - json data — provided inline in the test
+//
+// collect_task_results_impl and recv_slot_task_result_impl need:
 //   - server_response  (from server.hpp) — provides recv() / remove_waiting_task_ids()
 //   - JNIEnv           — used only for ThrowNew on the error path
 //
@@ -214,3 +223,54 @@ TEST_F(CollectResultsFixture, ErrorPath_WaitingIdsRemovedAfterError) {
     EXPECT_FALSE(queue.waiting_task_ids.count(40))
         << "remove_waiting_task_ids must clear the id on error";
 }
+
+// ============================================================
+// Tests for build_completion_tasks_impl
+//
+// Only the error path is unit-testable here: the function reads
+// data["prompt"] in its own statement BEFORE accessing ctx_server, so
+// passing nullptr for ctx_server is safe when "prompt" is absent.
+//
+// The success path requires a live server_context (llama vocab + context
+// pointers) and is covered by LlamaModelTest Java integration tests.
+// ============================================================
+
+TEST_F(CollectResultsFixture, BuildTasks_MissingPrompt_ReturnsFalseAndThrows) {
+    json data = {{"n_predict", 1}}; // deliberately no "prompt" key
+    std::string completion_id = "test-cmpl-id";
+    std::vector<server_task> tasks;
+
+    // ctx_server is nullptr — safe because data.at("prompt") throws before
+    // any ctx_server member is accessed.
+    bool ok = build_completion_tasks_impl(env,
+                                          /*ctx_server=*/nullptr,
+                                          data, completion_id,
+                                          SERVER_TASK_TYPE_COMPLETION,
+                                          OAICOMPAT_TYPE_NONE,
+                                          tasks,
+                                          dummy_eclass);
+
+    EXPECT_FALSE(ok)           << "Missing 'prompt' must return false";
+    EXPECT_TRUE(g_throw_called) << "ThrowNew must be called for missing 'prompt'";
+    EXPECT_TRUE(tasks.empty()) << "tasks must remain empty on error";
+}
+
+TEST_F(CollectResultsFixture, BuildTasks_MissingPrompt_TaskTypeDoesNotAffectErrorBehaviour) {
+    // The error path is identical regardless of task_type / oaicompat.
+    // Verify INFILL behaves the same way.
+    json data = {{"input_prefix", "def f():"}, {"input_suffix", "return 1"}};
+    std::string completion_id = "infill-cmpl-id";
+    std::vector<server_task> tasks;
+
+    bool ok = build_completion_tasks_impl(env,
+                                          /*ctx_server=*/nullptr,
+                                          data, completion_id,
+                                          SERVER_TASK_TYPE_INFILL,
+                                          OAICOMPAT_TYPE_NONE,
+                                          tasks,
+                                          dummy_eclass);
+
+    EXPECT_FALSE(ok);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_TRUE(tasks.empty());
+}

From 2ab7a12e116c6b5d375de6d6e3a16e01bc0a1e68 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 14:45:46 +0000
Subject: [PATCH 08/34] Finding 5: extract recv_slot_task_result_impl, replace
 4 handleSlotAction recv blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each of the four handleSlotAction switch cases (LIST/SAVE/RESTORE/ERASE)
contained the same 9-line recv → check → return block.  Extract the pattern
into recv_slot_task_result_impl (jni_server_helpers.hpp) and a thin wrapper
recv_slot_task_result (jllama.cpp), then replace all four duplicates with a
single call.

Unit tests added in test_jni_server_helpers.cpp:
  - success path returns non-null jstring and calls NewStringUTF with result JSON
  - error path returns nullptr and calls ThrowNew with the error message
  - waiting task id is removed from the queue on both success and error paths

Requires stub_NewStringUTF in the mock JNI table (added alongside the tests).

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  | 57 +++++---------------
 src/main/cpp/jni_server_helpers.hpp      | 29 ++++++++++
 src/test/cpp/test_jni_server_helpers.cpp | 69 ++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 44 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 5e3dd75a..86e27c0c 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -135,6 +135,15 @@ static bool build_completion_tasks(JNIEnv *env, server_context *ctx_server,
                                        task_type, oaicompat, tasks, c_llama_error);
 }
 
+/**
+ * Convenience wrapper around recv_slot_task_result_impl (jni_server_helpers.hpp).
+ * Caller must have already registered task_id with add_waiting_task_id() and
+ * posted the task; this wrapper covers recv → check → return.
+ */
+static jstring recv_slot_task_result(JNIEnv *env, server_context *ctx_server, int task_id) {
+    return recv_slot_task_result_impl(env, ctx_server->queue_results, task_id, c_llama_error);
+}
+
 /**
  * Convenience wrapper around collect_task_results_impl (jni_server_helpers.hpp)
  * that supplies the module-level globals so call sites need no boilerplate.
@@ -1341,17 +1350,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
         int id = task.id;
         ctx_server->queue_tasks.post(std::move(task), true);
 
-        server_task_result_ptr result = ctx_server->queue_results.recv(id);
-        ctx_server->queue_results.remove_waiting_task_id(id);
-
-        if (result->is_error()) {
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        std::string resp = result->to_json().dump();
-        return env->NewStringUTF(resp.c_str());
+        return recv_slot_task_result(env, ctx_server, id);
     }
     case 1: { // SAVE
         std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
@@ -1370,17 +1369,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
         ctx_server->queue_results.add_waiting_task_id(tid);
         ctx_server->queue_tasks.post(std::move(task));
 
-        server_task_result_ptr result = ctx_server->queue_results.recv(tid);
-        ctx_server->queue_results.remove_waiting_task_id(tid);
-
-        if (result->is_error()) {
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        std::string resp = result->to_json().dump();
-        return env->NewStringUTF(resp.c_str());
+        return recv_slot_task_result(env, ctx_server, tid);
     }
     case 2: { // RESTORE
         std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
@@ -1399,17 +1388,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
         ctx_server->queue_results.add_waiting_task_id(tid);
         ctx_server->queue_tasks.post(std::move(task));
 
-        server_task_result_ptr result = ctx_server->queue_results.recv(tid);
-        ctx_server->queue_results.remove_waiting_task_id(tid);
-
-        if (result->is_error()) {
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        std::string resp = result->to_json().dump();
-        return env->NewStringUTF(resp.c_str());
+        return recv_slot_task_result(env, ctx_server, tid);
     }
     case 3: { // ERASE
         server_task task(SERVER_TASK_TYPE_SLOT_ERASE);
@@ -1420,17 +1399,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
         ctx_server->queue_results.add_waiting_task_id(tid);
         ctx_server->queue_tasks.post(std::move(task));
 
-        server_task_result_ptr result = ctx_server->queue_results.recv(tid);
-        ctx_server->queue_results.remove_waiting_task_id(tid);
-
-        if (result->is_error()) {
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        std::string resp = result->to_json().dump();
-        return env->NewStringUTF(resp.c_str());
+        return recv_slot_task_result(env, ctx_server, tid);
     }
     default:
         env->ThrowNew(c_llama_error, "Invalid slot action");
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 4c3a3c55..511924e3 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -83,6 +83,35 @@ inline bool build_completion_tasks_impl(JNIEnv                   *env,
     return true;
 }
 
+// ---------------------------------------------------------------------------
+// recv_slot_task_result_impl
+//
+// Receives a single slot-action result from the response queue, checks for
+// an error, and returns the result JSON as a JNI string.
+//
+// Used by all four handleSlotAction switch cases (LIST / SAVE / RESTORE /
+// ERASE).  The caller is responsible for constructing the task, registering
+// the task ID with queue.add_waiting_task_id(), and posting it to the task
+// queue; this helper only covers the recv → check → return leg.
+//
+// On success: returns a new jstring containing result->to_json().dump().
+// On error:   removes the waiting task id, throws via JNI, returns nullptr.
+// ---------------------------------------------------------------------------
+inline jstring recv_slot_task_result_impl(JNIEnv          *env,
+                                           server_response &queue,
+                                           int              task_id,
+                                           jclass           error_class) {
+    server_task_result_ptr result = queue.recv(task_id);
+    queue.remove_waiting_task_id(task_id);
+    if (result->is_error()) {
+        std::string error_msg = result->to_json()["message"].get<std::string>();
+        env->ThrowNew(error_class, error_msg.c_str());
+        return nullptr;
+    }
+    std::string resp = result->to_json().dump();
+    return env->NewStringUTF(resp.c_str());
+}
+
 // ---------------------------------------------------------------------------
 // collect_task_results_impl
 //
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index df118bbd..c01df6a7 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -76,6 +76,11 @@ static server_task_result_ptr make_ok(int id_, const std::string &msg = "ok") {
 static bool        g_throw_called  = false;
 static std::string g_throw_message;
 
+// NewStringUTF stub: stores the string so tests can inspect it, returns a
+// non-null sentinel so callers can distinguish success from nullptr (error).
+static std::string g_new_string_utf_value;
+static jstring     g_new_string_utf_sentinel = reinterpret_cast<jstring>(0xBEEF);
+
 static jint JNICALL stub_ThrowNew(JNIEnv *, jclass, const char *msg) {
     g_throw_called  = true;
     g_throw_message = msg ? msg : "";
@@ -84,10 +89,16 @@ static jint JNICALL stub_ThrowNew(JNIEnv *, jclass, const char *msg) {
 
 static jlong JNICALL stub_GetLongField(JNIEnv *, jobject, jfieldID) { return 0; }
 
+static jstring JNICALL stub_NewStringUTF(JNIEnv *, const char *utf) {
+    g_new_string_utf_value = utf ? utf : "";
+    return g_new_string_utf_sentinel;
+}
+
 JNIEnv *make_mock_env(JNINativeInterface_ &table, JNIEnv_ &env_obj) {
     std::memset(&table, 0, sizeof(table));
     table.ThrowNew     = stub_ThrowNew;
     table.GetLongField = stub_GetLongField; // unused but avoids a null slot crash
+    table.NewStringUTF = stub_NewStringUTF;
     env_obj.functions  = &table;
     return &env_obj;
 }
@@ -105,6 +116,7 @@ struct CollectResultsFixture : ::testing::Test {
         env            = make_mock_env(table, env_obj);
         g_throw_called = false;
         g_throw_message.clear();
+        g_new_string_utf_value.clear();
     }
 };
 
@@ -274,3 +286,60 @@ TEST_F(CollectResultsFixture, BuildTasks_MissingPrompt_TaskTypeDoesNotAffectErro
     EXPECT_TRUE(g_throw_called);
     EXPECT_TRUE(tasks.empty());
 }
+
+// ============================================================
+// Tests for recv_slot_task_result_impl
+//
+// Pre-seed the server_response queue (same technique as collect tests):
+// calling send() before recv() satisfies the condvar predicate immediately,
+// so recv() returns without blocking even from the same thread.
+//
+// NewStringUTF is stubbed to return a sentinel non-null jstring and capture
+// the serialised JSON so we can verify the success path.
+// ============================================================
+
+TEST_F(CollectResultsFixture, RecvSlotResult_SuccessResult_ReturnsNonNullAndDoesNotThrow) {
+    queue.add_waiting_task_id(50);
+    queue.send(make_ok(50, "slot-ok"));
+
+    jstring result = recv_slot_task_result_impl(env, queue, 50, dummy_eclass);
+
+    EXPECT_NE(result, nullptr)       << "success result must return non-null jstring";
+    EXPECT_FALSE(g_throw_called)     << "ThrowNew must not be called on success";
+    // The stub captures the serialised JSON passed to NewStringUTF.
+    EXPECT_FALSE(g_new_string_utf_value.empty())
+        << "NewStringUTF must be called with the result JSON";
+    EXPECT_NE(g_new_string_utf_value.find("slot-ok"), std::string::npos)
+        << "result JSON must contain the content from the fake result";
+}
+
+TEST_F(CollectResultsFixture, RecvSlotResult_ErrorResult_ReturnsNullAndThrows) {
+    queue.add_waiting_task_id(51);
+    queue.send(make_error(51, "slot operation failed"));
+
+    jstring result = recv_slot_task_result_impl(env, queue, 51, dummy_eclass);
+
+    EXPECT_EQ(result, nullptr)      << "error result must return nullptr";
+    EXPECT_TRUE(g_throw_called)     << "ThrowNew must be called on error";
+    EXPECT_EQ(g_throw_message, "slot operation failed");
+}
+
+TEST_F(CollectResultsFixture, RecvSlotResult_WaitingIdRemovedAfterSuccess) {
+    queue.add_waiting_task_id(52);
+    queue.send(make_ok(52));
+
+    recv_slot_task_result_impl(env, queue, 52, dummy_eclass);
+
+    EXPECT_FALSE(queue.waiting_task_ids.count(52))
+        << "remove_waiting_task_id must clear the id on success";
+}
+
+TEST_F(CollectResultsFixture, RecvSlotResult_WaitingIdRemovedAfterError) {
+    queue.add_waiting_task_id(53);
+    queue.send(make_error(53, "err"));
+
+    recv_slot_task_result_impl(env, queue, 53, dummy_eclass);
+
+    EXPECT_FALSE(queue.waiting_task_ids.count(53))
+        << "remove_waiting_task_id must clear the id on error";
+}

From ebf352d7fbde3837eab259a72450dc072bff02ae Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 14:47:10 +0000
Subject: [PATCH 09/34] Finding 6: add [[nodiscard]] to all _impl helpers and
 their wrappers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All five helper functions whose return value encodes success/failure or carries
a non-owning pointer must not have their result silently discarded.  Adding
[[nodiscard]] turns an accidental discard into a compile-time diagnostic:

  jni_helpers.hpp:
    get_server_context_impl  — returns nullptr on error (exception pending)
    get_jllama_context_impl  — returns nullptr when handle is 0

  jni_server_helpers.hpp:
    build_completion_tasks_impl  — returns false on error (exception pending)
    recv_slot_task_result_impl   — returns nullptr on error (exception pending)
    collect_task_results_impl    — returns false on error (exception pending)

  jllama.cpp (wrappers):
    get_server_context, get_jllama_context, build_completion_tasks,
    recv_slot_task_result, collect_task_results — all marked [[nodiscard]]

No behaviour change.  C++17 is already required by the build (cxx_std_17 on
jllama_test; jllama itself uses cxx_std_11 but [[nodiscard]] is C++17 and GCC/
Clang accept it silently as an extension under -std=c++11 with no errors).

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp             | 10 +++++-----
 src/main/cpp/jni_helpers.hpp        | 10 +++++-----
 src/main/cpp/jni_server_helpers.hpp |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 86e27c0c..94d91970 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -99,7 +99,7 @@ jobject o_log_callback = nullptr;
  * globals.  Returns nullptr (with a JNI exception pending) when the model
  * is not loaded.
  */
-static server_context *get_server_context(JNIEnv *env, jobject obj) {
+[[nodiscard]] static server_context *get_server_context(JNIEnv *env, jobject obj) {
     return get_server_context_impl(env, obj, f_model_pointer, c_llama_error);
 }
 
@@ -109,7 +109,7 @@ static server_context *get_server_context(JNIEnv *env, jobject obj) {
  * Returns nullptr silently when the handle is 0 — a valid no-op for a dtor.
  * See get_jllama_context_impl in jni_helpers.hpp for the full contract.
  */
-static jllama_context *get_jllama_context(JNIEnv *env, jobject obj) {
+[[nodiscard]] static jllama_context *get_jllama_context(JNIEnv *env, jobject obj) {
     return get_jllama_context_impl(env, obj, f_model_pointer);
 }
 
@@ -127,7 +127,7 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
  * Convenience wrapper around build_completion_tasks_impl (jni_server_helpers.hpp)
  * that supplies the module-level globals so call sites need no boilerplate.
  */
-static bool build_completion_tasks(JNIEnv *env, server_context *ctx_server,
+[[nodiscard]] static bool build_completion_tasks(JNIEnv *env, server_context *ctx_server,
                                     const json &data, const std::string &completion_id,
                                     server_task_type task_type, oaicompat_type oaicompat,
                                     std::vector<server_task> &tasks) {
@@ -140,7 +140,7 @@ static bool build_completion_tasks(JNIEnv *env, server_context *ctx_server,
  * Caller must have already registered task_id with add_waiting_task_id() and
  * posted the task; this wrapper covers recv → check → return.
  */
-static jstring recv_slot_task_result(JNIEnv *env, server_context *ctx_server, int task_id) {
+[[nodiscard]] static jstring recv_slot_task_result(JNIEnv *env, server_context *ctx_server, int task_id) {
     return recv_slot_task_result_impl(env, ctx_server->queue_results, task_id, c_llama_error);
 }
 
@@ -148,7 +148,7 @@ static jstring recv_slot_task_result(JNIEnv *env, server_context *ctx_server, in
  * Convenience wrapper around collect_task_results_impl (jni_server_helpers.hpp)
  * that supplies the module-level globals so call sites need no boilerplate.
  */
-static bool collect_task_results(JNIEnv *env,
+[[nodiscard]] static bool collect_task_results(JNIEnv *env,
                                   server_context *ctx_server,
                                   const std::unordered_set<int> &task_ids,
                                   std::vector<server_task_result_ptr> &out) {
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
index 2cebabcd..b0efe042 100644
--- a/src/main/cpp/jni_helpers.hpp
+++ b/src/main/cpp/jni_helpers.hpp
@@ -46,9 +46,9 @@ struct jllama_context {
 // Parameters are passed explicitly (no module-level globals) so the function
 // can be exercised from unit tests using a mock JNIEnv.
 // ---------------------------------------------------------------------------
-inline server_context *get_server_context_impl(JNIEnv *env, jobject obj,
-                                               jfieldID field_id,
-                                               jclass   error_class) {
+[[nodiscard]] inline server_context *get_server_context_impl(JNIEnv *env, jobject obj,
+                                                              jfieldID field_id,
+                                                              jclass   error_class) {
     const jlong handle = env->GetLongField(obj, field_id);
     if (handle == 0) {
         env->ThrowNew(error_class, "Model is not loaded");
@@ -73,8 +73,8 @@ inline server_context *get_server_context_impl(JNIEnv *env, jobject obj,
 // On success:    returns a non-null jllama_context*.
 // On null handle: returns nullptr silently (no JNI exception is thrown).
 // ---------------------------------------------------------------------------
-inline jllama_context *get_jllama_context_impl(JNIEnv *env, jobject obj,
-                                               jfieldID field_id) {
+[[nodiscard]] inline jllama_context *get_jllama_context_impl(JNIEnv *env, jobject obj,
+                                                              jfieldID field_id) {
     const jlong handle = env->GetLongField(obj, field_id);
     if (handle == 0) {
         return nullptr; // already deleted or never initialised — silent no-op
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 511924e3..4e2f997a 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -45,7 +45,7 @@
 // On success: `tasks` is populated, returns true.
 // On error:   throws via JNI using error_class, returns false.
 // ---------------------------------------------------------------------------
-inline bool build_completion_tasks_impl(JNIEnv                   *env,
+[[nodiscard]] inline bool build_completion_tasks_impl(JNIEnv                   *env,
                                          server_context           *ctx_server,
                                          const json               &data,
                                          const std::string        &completion_id,
@@ -97,7 +97,7 @@ inline bool build_completion_tasks_impl(JNIEnv                   *env,
 // On success: returns a new jstring containing result->to_json().dump().
 // On error:   removes the waiting task id, throws via JNI, returns nullptr.
 // ---------------------------------------------------------------------------
-inline jstring recv_slot_task_result_impl(JNIEnv          *env,
+[[nodiscard]] inline jstring recv_slot_task_result_impl(JNIEnv          *env,
                                            server_response &queue,
                                            int              task_id,
                                            jclass           error_class) {
@@ -124,7 +124,7 @@ inline jstring recv_slot_task_result_impl(JNIEnv          *env,
 //             returns false.  The caller must return nullptr (or equivalent
 //             sentinel) immediately — the JNI exception is already pending.
 // ---------------------------------------------------------------------------
-inline bool collect_task_results_impl(JNIEnv                               *env,
+[[nodiscard]] inline bool collect_task_results_impl(JNIEnv                               *env,
                                        server_response                     &queue,
                                        const std::unordered_set<int>       &task_ids,
                                        std::vector<server_task_result_ptr> &out,

From 6ff2f5427888b58c54592d39fc72e29c76313a85 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:05:56 +0000
Subject: [PATCH 10/34] Finding A: fix JNI_OnUnload double-delete of
 c_log_level and missing c_log_format delete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

JNI_OnLoad creates a NewGlobalRef for both c_log_level (line 335) and
c_log_format (line 336).  JNI_OnUnload had two consecutive DeleteGlobalRef
calls for c_log_level and none for c_log_format:

  - double-deleting a global ref is undefined behaviour (JNI spec §4.4)
  - c_log_format was leaked on every library unload

Fix: replace the duplicate c_log_level line with c_log_format so each of the
14 global class refs created in JNI_OnLoad is deleted exactly once in
JNI_OnUnload.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 94d91970..7a703d36 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -442,7 +442,7 @@ JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
     env->DeleteGlobalRef(c_biconsumer);
     env->DeleteGlobalRef(c_llama_error);
     env->DeleteGlobalRef(c_log_level);
-    env->DeleteGlobalRef(c_log_level);
+    env->DeleteGlobalRef(c_log_format);
     env->DeleteGlobalRef(c_error_oom);
 
     env->DeleteGlobalRef(o_utf_8);

From 3b47cb794465cab016606cf30e78bffd6c915b21 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:06:09 +0000
Subject: [PATCH 11/34] Finding B: replace missed manual collect loop in
 handleChatCompletions

During Finding 3, three handlers were updated to use collect_task_results().
handleChatCompletions was overlooked and still contained the identical 13-line
manual recv loop.  Replace it with the same collect_task_results() call used
by handleCompletions, handleCompletionsOai, and handleInfill.

No behaviour change; collect_task_results_impl is already unit-tested.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 7a703d36..be1b889b 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -849,21 +849,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
     // Collect all results (blocking)
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
-
-    for (size_t i = 0; i < task_ids.size(); i++) {
-        server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
-
-        if (result->is_error()) {
-            ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(c_llama_error, error_msg.c_str());
-            return nullptr;
-        }
-
-        results.push_back(std::move(result));
-    }
-
-    ctx_server->queue_results.remove_waiting_task_ids(task_ids);
+    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
     // Build response JSON
     json response;

From da3b5308760134178cfd18a8eaca2347fc0d0993 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:08:29 +0000
Subject: [PATCH 12/34] Finding C: extract parse_json_params, replace 9
 duplicate parse_jstring+json::parse pairs

Every JNI handler that receives a jstring parameter begins with the same two
lines:

  std::string c_params = parse_jstring(env, jparams);
  json data = json::parse(c_params);

Extract to a one-liner helper parse_json_params(JNIEnv*, jstring) placed
immediately after parse_jstring in the anonymous namespace, so the related
helpers are co-located.  Replace all 9 call sites:

  requestCompletion, applyTemplate, handleChatCompletions,
  requestChatCompletion, handleCompletions, handleCompletionsOai,
  handleInfill, handleEmbeddings, configureParallelInference

The ordered_json variant in jsonSchemaToGrammarBytes uses a different type
and is intentionally left unchanged.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index be1b889b..183892e2 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -172,6 +172,15 @@ std::string parse_jstring(JNIEnv *env, jstring java_string) {
     return string;
 }
 
+/**
+ * Convert a Java string to a parsed JSON object.
+ * Combines parse_jstring + json::parse, which every parameter-taking JNI
+ * function needs before it can read its arguments.
+ */
+static json parse_json_params(JNIEnv *env, jstring jparams) {
+    return json::parse(parse_jstring(env, jparams));
+}
+
 char **parse_string_array(JNIEnv *env, const jobjectArray string_array, const jsize length) {
     auto *const result = static_cast<char **>(malloc(length * sizeof(char *)));
 
@@ -593,8 +602,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return 0;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json data = json::parse(c_params);
+    json data = parse_json_params(env, jparams);
 
     server_task_type type = SERVER_TASK_TYPE_COMPLETION;
 
@@ -807,8 +815,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json data = json::parse(c_params);
+    json data = parse_json_params(env, jparams);
 
     std::vector<raw_buffer> files;
     json templateData = oaicompat_chat_params_parse(data, ctx_server->oai_parser_opt, files);
@@ -824,8 +831,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json body = json::parse(c_params);
+    json body = parse_json_params(env, jparams);
 
     // Apply chat template via OAI-compatible parser
     json data;
@@ -871,8 +877,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return 0;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json body = json::parse(c_params);
+    json body = parse_json_params(env, jparams);
 
     // Apply chat template via OAI-compatible parser
     json data;
@@ -1017,8 +1022,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json data = json::parse(c_params);
+    json data = parse_json_params(env, jparams);
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
@@ -1053,8 +1057,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    std::string c_params = parse_jstring(env, jparams);
-    json body = json::parse(c_params);
+    json body = parse_json_params(env, jparams);
 
     // Parse OAI-compatible completion parameters
     json data;
@@ -1112,8 +1115,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
         return nullptr;
     }
 
-    std::string c_params = parse_jstring(env, jparams);
-    json data = json::parse(c_params);
+    json data = parse_json_params(env, jparams);
 
     if (!data.contains("input_prefix")) {
         env->ThrowNew(c_llama_error, "\"input_prefix\" is required");
@@ -1183,8 +1185,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEn
         return nullptr;
     }
 
-    std::string c_params = parse_jstring(env, jparams);
-    json body = json::parse(c_params);
+    json body = parse_json_params(env, jparams);
 
     json prompt;
     if (body.count("input") != 0) {
@@ -1398,8 +1399,7 @@ JNIEXPORT jboolean JNICALL Java_de_kherud_llama_LlamaModel_configureParallelInfe
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return JNI_FALSE;
 
-    std::string config_str = parse_jstring(env, jconfig);
-    json config = json::parse(config_str);
+    json config = parse_json_params(env, jconfig);
 
     if (config.contains("slot_prompt_similarity")) {
         float similarity = config["slot_prompt_similarity"].get<float>();

From 61662249b6772f9cc15885340e116279ddada94d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:11:34 +0000
Subject: [PATCH 13/34] Finding D: extract dispatch_tasks, replace 9 duplicate
 add_waiting+get_list_id+post triples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every batch dispatch point in the file repeats the same three lines:

  ctx_server->queue_results.add_waiting_tasks(tasks);
  auto task_ids = server_task::get_list_id(tasks);
  ctx_server->queue_tasks.post(std::move(tasks));

Extract to dispatch_tasks(server_context*, vector<server_task>&) placed
between build_completion_tasks and recv_slot_task_result, maintaining the
natural pipeline order: build → dispatch → recv.  The single function covers
all call sites — completion (requestCompletion, handleChatCompletions,
requestChatCompletion, handleCompletions, handleCompletionsOai, handleInfill)
and embedding/rerank (embed, handleRerank, handleEmbeddings) — because the
three-line body is identical in all nine cases.

Side-fix: handleEmbeddings loop bound changed from tasks.size() to
task_ids.size() — tasks is in a moved-from state after dispatch_tasks, so
reading its size was technically UB; task_ids has the same count and is valid.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 62 ++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 183892e2..49a81d41 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -135,6 +135,27 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
                                        task_type, oaicompat, tasks, c_llama_error);
 }
 
+/**
+ * Register all tasks for result waiting, post them to the task queue, and
+ * return the set of task IDs.
+ *
+ * This covers the repeated three-line pattern used by every batch dispatch
+ * point (completion, chat, infill, embedding, rerank):
+ *
+ *   ctx_server->queue_results.add_waiting_tasks(tasks);
+ *   auto task_ids = server_task::get_list_id(tasks);
+ *   ctx_server->queue_tasks.post(std::move(tasks));
+ *
+ * After the call, `tasks` is in a valid but unspecified state (moved-from).
+ */
+static std::unordered_set<int> dispatch_tasks(server_context *ctx_server,
+                                               std::vector<server_task> &tasks) {
+    ctx_server->queue_results.add_waiting_tasks(tasks);
+    auto task_ids = server_task::get_list_id(tasks);
+    ctx_server->queue_tasks.post(std::move(tasks));
+    return task_ids;
+}
+
 /**
  * Convenience wrapper around recv_slot_task_result_impl (jni_server_helpers.hpp).
  * Caller must have already registered task_id with add_waiting_task_id() and
@@ -616,10 +637,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  type, OAICOMPAT_TYPE_NONE, tasks)) return 0;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     if (task_ids.size() != 1) {
         env->ThrowNew(c_llama_error, "multitasking currently not supported");
@@ -688,10 +706,7 @@ JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env,
 
     tasks.push_back(std::move(task));
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    std::unordered_set<int> task_ids = server_task::get_list_id(tasks);
-
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
     const auto id_task = *task_ids.begin();
     json responses = json::array();
 
@@ -778,10 +793,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *e
         task.prompt_tokens = server_tokens(tokens, false);
         tasks.push_back(std::move(task));
     }
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    std::unordered_set<int> task_ids = server_task::get_list_id(tasks);
-
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     json results_json = json::array();
 
@@ -848,9 +860,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_CHAT, tasks)) return nullptr;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     // Collect all results (blocking)
     std::vector<server_task_result_ptr> results;
@@ -895,9 +905,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_NONE, tasks)) return 0;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     if (task_ids.size() != 1) {
         env->ThrowNew(c_llama_error, "multitasking currently not supported");
@@ -1029,9 +1037,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_NONE, tasks)) return nullptr;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     // Collect all results (blocking)
     std::vector<server_task_result_ptr> results;
@@ -1073,9 +1079,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_COMPLETION, tasks)) return nullptr;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
@@ -1144,9 +1148,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_INFILL, OAICOMPAT_TYPE_NONE, tasks)) return nullptr;
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    const auto task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     std::vector<server_task_result_ptr> results;
     results.reserve(task_ids.size());
@@ -1232,13 +1234,11 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEn
         tasks.push_back(std::move(task));
     }
 
-    ctx_server->queue_results.add_waiting_tasks(tasks);
-    std::unordered_set<int> task_ids = server_task::get_list_id(tasks);
-    ctx_server->queue_tasks.post(std::move(tasks));
+    const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
     json responses = json::array();
 
-    for (size_t i = 0; i < tasks.size(); i++) {
+    for (size_t i = 0; i < task_ids.size(); i++) {
         server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
 
         if (result->is_error()) {

From 66931626298be635ca4c8a9f24b7f8a3b383e488 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:13:52 +0000
Subject: [PATCH 14/34] Finding E: extract results_to_jstring_impl, replace 4
 duplicate serialisation blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four handlers (handleChatCompletions, handleCompletions, handleCompletionsOai,
handleInfill) each ended with an identical 9-line block that assembles a JSON
response from a results vector and returns it as a jstring:

  json response;
  if (results.size() == 1) { response = results[0]->to_json(); }
  else { response = json::array(); for (...) response.push_back(...); }
  std::string s = response.dump();
  return env->NewStringUTF(s.c_str());

Extract to results_to_jstring_impl (jni_server_helpers.hpp) with a thin
results_to_jstring wrapper in jllama.cpp placed immediately after
collect_task_results, keeping collect → serialise helpers adjacent.

The helper is NOT applied to receiveCompletionJson (streaming path that also
sets a "stop" field) or handleRerank (builds a different JSON schema directly
into a results_json array) — those retain their own serialisation logic.

Unit tests added in test_jni_server_helpers.cpp:
  - single result → bare JSON object with correct content field
  - multiple results → JSON array with correct elements in order
  - empty vector → empty JSON array (degenerate, documents the contract)

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  | 63 ++++++------------------
 src/main/cpp/jni_server_helpers.hpp      | 30 +++++++++++
 src/test/cpp/test_jni_server_helpers.cpp | 56 +++++++++++++++++++++
 3 files changed, 100 insertions(+), 49 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 49a81d41..8a79949c 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -176,6 +176,16 @@ static std::unordered_set<int> dispatch_tasks(server_context *ctx_server,
     return collect_task_results_impl(env, ctx_server->queue_results, task_ids, out, c_llama_error);
 }
 
+/**
+ * Convenience wrapper around results_to_jstring_impl (jni_server_helpers.hpp).
+ * Serialises results to a jstring (single object or JSON array).
+ */
+[[nodiscard]] static jstring results_to_jstring(
+        JNIEnv *env,
+        const std::vector<server_task_result_ptr> &results) {
+    return results_to_jstring_impl(env, results);
+}
+
 /**
  * Convert a Java string to a std::string
  */
@@ -867,19 +877,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
     results.reserve(task_ids.size());
     if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
-    // Build response JSON
-    json response;
-    if (results.size() == 1) {
-        response = results[0]->to_json();
-    } else {
-        response = json::array();
-        for (auto &res : results) {
-            response.push_back(res->to_json());
-        }
-    }
-
-    std::string response_str = response.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return results_to_jstring(env, results);
 }
 
 JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNIEnv *env, jobject obj,
@@ -1044,18 +1042,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
     results.reserve(task_ids.size());
     if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
-    json response;
-    if (results.size() == 1) {
-        response = results[0]->to_json();
-    } else {
-        response = json::array();
-        for (auto &res : results) {
-            response.push_back(res->to_json());
-        }
-    }
-
-    std::string response_str = response.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return results_to_jstring(env, results);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(JNIEnv *env, jobject obj,
@@ -1085,18 +1072,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
     results.reserve(task_ids.size());
     if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
-    json response;
-    if (results.size() == 1) {
-        response = results[0]->to_json();
-    } else {
-        response = json::array();
-        for (auto &res : results) {
-            response.push_back(res->to_json());
-        }
-    }
-
-    std::string response_str = response.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return results_to_jstring(env, results);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *env, jobject obj, jstring jparams) {
@@ -1154,18 +1130,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
     results.reserve(task_ids.size());
     if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
 
-    json response;
-    if (results.size() == 1) {
-        response = results[0]->to_json();
-    } else {
-        response = json::array();
-        for (auto &res : results) {
-            response.push_back(res->to_json());
-        }
-    }
-
-    std::string response_str = response.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return results_to_jstring(env, results);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEnv *env, jobject obj,
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 4e2f997a..3e0bda7c 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -142,3 +142,33 @@
     queue.remove_waiting_task_ids(task_ids);
     return true;
 }
+
+// ---------------------------------------------------------------------------
+// results_to_jstring_impl
+//
+// Serialises a vector of task results to a jstring.
+//
+// When there is exactly one result, the top-level JSON is that result's object
+// directly.  When there are multiple results, they are wrapped in a JSON array.
+// This mirrors the OpenAI API convention used by handleCompletions,
+// handleCompletionsOai, handleChatCompletions, and handleInfill.
+//
+// Parameters are passed explicitly so the function is testable without a real
+// JVM.  The caller is responsible for checking that `results` is non-empty
+// before calling (an empty vector produces an empty JSON array).
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline jstring results_to_jstring_impl(
+        JNIEnv *env,
+        const std::vector<server_task_result_ptr> &results) {
+    json response;
+    if (results.size() == 1) {
+        response = results[0]->to_json();
+    } else {
+        response = json::array();
+        for (const auto &res : results) {
+            response.push_back(res->to_json());
+        }
+    }
+    std::string s = response.dump();
+    return env->NewStringUTF(s.c_str());
+}
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index c01df6a7..f89ee157 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -343,3 +343,59 @@ TEST_F(CollectResultsFixture, RecvSlotResult_WaitingIdRemovedAfterError) {
     EXPECT_FALSE(queue.waiting_task_ids.count(53))
         << "remove_waiting_task_id must clear the id on error";
 }
+
+// ============================================================
+// Tests for results_to_jstring_impl
+//
+// Verifies that the serialisation helper produces the right shape:
+//   - single result  → bare JSON object
+//   - multiple results → JSON array
+//
+// NewStringUTF is stubbed via the fixture's mock env; g_new_string_utf_value
+// captures whatever string was passed to it.
+// ============================================================
+
+TEST_F(CollectResultsFixture, ResultsToJstring_SingleResult_ReturnsBareObject) {
+    std::vector<server_task_result_ptr> results;
+    results.push_back(make_ok(1, "hello"));
+
+    jstring js = results_to_jstring_impl(env, results);
+
+    EXPECT_NE(js, nullptr);
+    EXPECT_FALSE(g_new_string_utf_value.empty());
+
+    // The top-level JSON must be an object (not an array).
+    json parsed = json::parse(g_new_string_utf_value);
+    EXPECT_TRUE(parsed.is_object())
+        << "single result must serialise as a bare JSON object, got: "
+        << g_new_string_utf_value;
+    EXPECT_EQ(parsed.value("content", ""), "hello");
+}
+
+TEST_F(CollectResultsFixture, ResultsToJstring_MultipleResults_ReturnsArray) {
+    std::vector<server_task_result_ptr> results;
+    results.push_back(make_ok(2, "first"));
+    results.push_back(make_ok(3, "second"));
+
+    jstring js = results_to_jstring_impl(env, results);
+
+    EXPECT_NE(js, nullptr);
+    json parsed = json::parse(g_new_string_utf_value);
+    EXPECT_TRUE(parsed.is_array())
+        << "multiple results must serialise as a JSON array, got: "
+        << g_new_string_utf_value;
+    ASSERT_EQ(parsed.size(), 2u);
+    EXPECT_EQ(parsed[0].value("content", ""), "first");
+    EXPECT_EQ(parsed[1].value("content", ""), "second");
+}
+
+TEST_F(CollectResultsFixture, ResultsToJstring_EmptyVector_ReturnsEmptyArray) {
+    std::vector<server_task_result_ptr> results;
+
+    jstring js = results_to_jstring_impl(env, results);
+
+    EXPECT_NE(js, nullptr);
+    json parsed = json::parse(g_new_string_utf_value);
+    EXPECT_TRUE(parsed.is_array());
+    EXPECT_TRUE(parsed.empty());
+}

From 46fe06548fc17e4cc4d72e811a99ea6840dcaf1c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:41:14 +0000
Subject: [PATCH 15/34] Finding I: remove dead variables and redundant
 to_json() calls in embed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The embed function had three code-quality issues left from an earlier refactor:

  1. `json responses = json::array();` — declared after dispatch_tasks but
     never read; the function builds no array of responses.
  2. `json error = nullptr;` — declared but never assigned or read.
  3. `result->to_json()` was called three times (lines 727, 729, 739) for the
     same result object.  Lines 727 (`json response_str`) and 739 (`out_res`)
     both captured the return value; line 729 called it a third time inline
     just to read ["message"].  Removed the unused `response_str` alias so
     `out_res` is now the single authoritative capture.

No behaviour change.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 8a79949c..e2db882d 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -718,13 +718,9 @@ JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env,
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
     const auto id_task = *task_ids.begin();
-    json responses = json::array();
-
-    json error = nullptr;
 
     server_task_result_ptr result = ctx_server->queue_results.recv(id_task);
 
-    json response_str = result->to_json();
     if (result->is_error()) {
         std::string response = result->to_json()["message"].get<std::string>();
         ctx_server->queue_results.remove_waiting_task_id(id_task);

From f46ec39029345ba4327cc6be0f653997bf074ac1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:43:28 +0000
Subject: [PATCH 16/34] Finding H: extract require_single_task_id_impl, replace
 2 duplicate guard blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

requestCompletion and requestChatCompletion both ended with the same 5-line
pattern that verifies exactly one task was produced and returns its id:

  if (task_ids.size() != 1) {
      env->ThrowNew(c_llama_error, "multitasking currently not supported");
      return 0;
  }
  return *task_ids.begin();

Extract to require_single_task_id_impl (jni_helpers.hpp) with the _impl /
wrapper convention used by the other helpers, placed directly after
get_jllama_context_impl so all handle/id validation helpers are co-located.
Thin wrapper require_single_task_id in jllama.cpp supplies c_llama_error.

Unit tests added to test_jni_helpers.cpp:
  - exactly one id  → returns that id, no throw
  - empty set       → returns 0, ThrowNew("multitasking currently not supported")
  - multiple ids    → returns 0, ThrowNew("multitasking currently not supported")

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp           | 27 +++++++++++++++------------
 src/main/cpp/jni_helpers.hpp      | 26 ++++++++++++++++++++++++++
 src/test/cpp/test_jni_helpers.cpp | 28 ++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index e2db882d..23b89aa9 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -156,6 +156,19 @@ static std::unordered_set<int> dispatch_tasks(server_context *ctx_server,
     return task_ids;
 }
 
+/**
+ * Asserts that exactly one task was created after dispatch and returns its ID.
+ * Returns 0 (with a JNI exception pending) if the count is not exactly 1.
+ *
+ * Used by requestCompletion and requestChatCompletion, which hand the task ID
+ * back to the Java caller for streaming consumption via receiveCompletionJson.
+ * Both functions are restricted to single-prompt, single-task invocations.
+ */
+static int require_single_task_id(JNIEnv *env,
+                                   const std::unordered_set<int> &task_ids) {
+    return require_single_task_id_impl(env, task_ids, c_llama_error);
+}
+
 /**
  * Convenience wrapper around recv_slot_task_result_impl (jni_server_helpers.hpp).
  * Caller must have already registered task_id with add_waiting_task_id() and
@@ -649,12 +662,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    if (task_ids.size() != 1) {
-        env->ThrowNew(c_llama_error, "multitasking currently not supported");
-        return 0;
-    }
-
-    return *task_ids.begin();
+    return require_single_task_id(env, task_ids);
 }
 
 JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_releaseTask(JNIEnv *env, jobject obj, jint id_task) {
@@ -901,12 +909,7 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    if (task_ids.size() != 1) {
-        env->ThrowNew(c_llama_error, "multitasking currently not supported");
-        return 0;
-    }
-
-    return *task_ids.begin();
+    return require_single_task_id(env, task_ids);
 }
 
 JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env, jobject obj, jstring jprompt) {
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
index b0efe042..d966580c 100644
--- a/src/main/cpp/jni_helpers.hpp
+++ b/src/main/cpp/jni_helpers.hpp
@@ -12,6 +12,7 @@
 
 #include <atomic>
 #include <thread>
+#include <unordered_set>
 
 // Forward declaration — callers that need the full definition must include
 // server.hpp themselves.
@@ -81,3 +82,28 @@ struct jllama_context {
     }
     return reinterpret_cast<jllama_context *>(handle); // NOLINT(*-no-int-to-ptr)
 }
+
+// ---------------------------------------------------------------------------
+// require_single_task_id_impl
+//
+// Validates that exactly one task was created after dispatch and returns its
+// ID.  Returns 0 (with a JNI exception pending) when the count is not 1.
+//
+// Used by requestCompletion and requestChatCompletion, which hand the returned
+// ID back to the Java caller for streaming consumption via
+// receiveCompletionJson.  Both functions are restricted to single-prompt,
+// single-task invocations.
+//
+// On success: returns the single task id (> 0 in practice).
+// On failure: throws via JNI, returns 0.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline int require_single_task_id_impl(
+        JNIEnv *env,
+        const std::unordered_set<int> &task_ids,
+        jclass error_class) {
+    if (task_ids.size() != 1) {
+        env->ThrowNew(error_class, "multitasking currently not supported");
+        return 0;
+    }
+    return *task_ids.begin();
+}
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 875c3d37..0f6f8f5b 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -15,6 +15,7 @@
 #include <gtest/gtest.h>
 
 #include <cstring>
+#include <unordered_set>
 
 // jni_helpers.hpp is the unit under test; it includes jni.h which defines
 // JNIEnv_ and JNINativeInterface_.
@@ -217,3 +218,30 @@ TEST_F(MockJniFixture, GetJllamaContext_ContractComparison_GetServerContextThrow
     EXPECT_FALSE(g_throw_called) << "get_jllama_context_impl must NOT throw on null";
     EXPECT_EQ(jc, nullptr);
 }
+
+// ============================================================
+// Tests for require_single_task_id_impl()
+// ============================================================
+
+TEST_F(MockJniFixture, RequireSingleTaskId_ExactlyOne_ReturnsIdNoThrow) {
+    std::unordered_set<int> ids = {42};
+    int result = require_single_task_id_impl(env, ids, dummy_class);
+    EXPECT_EQ(result, 42);
+    EXPECT_FALSE(g_throw_called);
+}
+
+TEST_F(MockJniFixture, RequireSingleTaskId_Empty_ReturnsZeroAndThrows) {
+    std::unordered_set<int> ids;
+    int result = require_single_task_id_impl(env, ids, dummy_class);
+    EXPECT_EQ(result, 0);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "multitasking currently not supported");
+}
+
+TEST_F(MockJniFixture, RequireSingleTaskId_Multiple_ReturnsZeroAndThrows) {
+    std::unordered_set<int> ids = {1, 2, 3};
+    int result = require_single_task_id_impl(env, ids, dummy_class);
+    EXPECT_EQ(result, 0);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "multitasking currently not supported");
+}

From 53bb4b59dab4b351ad9bf0f23c9d691598480382 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:44:04 +0000
Subject: [PATCH 17/34] Finding G: extract detokenize helper, replace
 is_vocab_only dispatch in 2 functions

decodeBytes and handleDetokenize both contained the same 5-line conditional:

  if (!ctx_server->is_vocab_only()) {
      text = tokens_to_str(ctx_server->ctx,  tokens.cbegin(), tokens.cend());
  } else {
      text = tokens_to_str(ctx_server->vocab, tokens.cbegin(), tokens.cend());
  }

Extract to a static detokenize(server_context*, vector<llama_token>&) helper
placed immediately before decodeBytes so the helper and its two call sites are
all adjacent.

Side-clean in decodeBytes: ReleaseIntArrayElements now comes before the
detokenize call (tokens vector already copied), which was the correct order
before the change; moving it next to the array acquisition makes the lifetime
clearer.

No behaviour change.  No new tests: the dispatch itself depends on the
server_context runtime state (is_vocab_only) and is covered by the existing
integration test suite.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 23b89aa9..b31327c2 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -932,6 +932,21 @@ JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env,
     return java_tokens;
 }
 
+/**
+ * Detokenise a token sequence to a UTF-8 string, dispatching on whether the
+ * context is vocab-only (no llama_context available) or full.
+ *
+ * Both decodeBytes and handleDetokenize repeat this identical branch; placing
+ * the helper immediately above keeps the three related blocks adjacent.
+ */
+static std::string detokenize(const server_context *ctx_server,
+                               const std::vector<llama_token> &tokens) {
+    if (!ctx_server->is_vocab_only()) {
+        return tokens_to_str(ctx_server->ctx, tokens.cbegin(), tokens.cend());
+    }
+    return tokens_to_str(ctx_server->vocab, tokens.cbegin(), tokens.cend());
+}
+
 JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv *env, jobject obj,
                                                                          jintArray java_tokens) {
     auto *ctx_server = get_server_context(env, obj);
@@ -941,17 +956,9 @@ JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv
     jint *elements = env->GetIntArrayElements(java_tokens, nullptr);
     std::vector<llama_token> tokens(elements, elements + length);
 
-    std::string text;
-    if (!ctx_server->is_vocab_only()) {
-        text = tokens_to_str(ctx_server->ctx, tokens.cbegin(), tokens.cend());
-    } else {
-        // vocab-only mode: detokenize using vocabulary directly
-        text = tokens_to_str(ctx_server->vocab, tokens.cbegin(), tokens.cend());
-    }
-
     env->ReleaseIntArrayElements(java_tokens, elements, 0);
 
-    return parse_jbytes(env, text);
+    return parse_jbytes(env, detokenize(ctx_server, tokens));
 }
 
 JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_delete(JNIEnv *env, jobject obj) {
@@ -1275,14 +1282,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEn
     std::vector<llama_token> tokens(elements, elements + length);
     env->ReleaseIntArrayElements(jtokens, elements, JNI_ABORT);
 
-    std::string content;
-    if (!ctx_server->is_vocab_only()) {
-        content = tokens_to_str(ctx_server->ctx, tokens.cbegin(), tokens.cend());
-    } else {
-        content = tokens_to_str(ctx_server->vocab, tokens.cbegin(), tokens.cend());
-    }
-
-    json data = format_detokenized_response(content);
+    json data = format_detokenized_response(detokenize(ctx_server, tokens));
 
     std::string response_str = data.dump();
     return env->NewStringUTF(response_str.c_str());

From bc8bdb61f8a06a2f1e5270ec610cf2405228ebc3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:44:55 +0000
Subject: [PATCH 18/34] Finding F: extract dispatch_single_task, replace 4
 duplicate register+post blocks in handleSlotAction

Each of the four handleSlotAction switch cases (LIST/SAVE/RESTORE/ERASE) ended
with the same 4-line pattern:

  int tid = task.id;
  ctx_server->queue_results.add_waiting_task_id(tid);
  ctx_server->queue_tasks.post(std::move(task)[, priority]);
  return recv_slot_task_result(env, ctx_server, tid);

Extract to dispatch_single_task(server_context*, server_task&, bool priority)
placed immediately after dispatch_tasks so both single-task and batch dispatch
helpers are adjacent.  The priority parameter (default false) covers case 0
(LIST/metrics), which uses post(task, true).

All four cases now resolve to task construction + one dispatch_single_task call
wrapped in recv_slot_task_result, eliminating the repeated bookkeeping.

No behaviour change; no new unit tests (the dispatch itself is integration-
level, exercised by the existing slot-action Java tests).

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 51 ++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index b31327c2..ab90bc26 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -156,6 +156,25 @@ static std::unordered_set<int> dispatch_tasks(server_context *ctx_server,
     return task_ids;
 }
 
+/**
+ * Register a single task for result waiting, post it, and return its ID.
+ *
+ * Variant of dispatch_tasks for one-shot tasks (slot actions) that are
+ * dispatched individually rather than in a batch.  The `priority` flag maps
+ * to the second argument of queue_tasks.post() — set true for metrics/LIST
+ * queries that must jump ahead of normal completion work.
+ *
+ * After the call, `task` is in a valid but unspecified state (moved-from).
+ */
+static int dispatch_single_task(server_context *ctx_server,
+                                 server_task &task,
+                                 bool priority = false) {
+    const int tid = task.id;
+    ctx_server->queue_results.add_waiting_task_id(tid);
+    ctx_server->queue_tasks.post(std::move(task), priority);
+    return tid;
+}
+
 /**
  * Asserts that exactly one task was created after dispatch and returns its ID.
  * Returns 0 (with a JNI exception pending) if the count is not exactly 1.
@@ -1294,14 +1313,11 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
     if (!ctx_server) return nullptr;
 
     switch (action) {
-    case 0: { // LIST — get slot info via metrics
+    case 0: { // LIST — get slot info via metrics (priority post)
         server_task task(SERVER_TASK_TYPE_METRICS);
         task.id = ctx_server->queue_tasks.get_new_id();
-        ctx_server->queue_results.add_waiting_task_id(task.id);
-        int id = task.id;
-        ctx_server->queue_tasks.post(std::move(task), true);
-
-        return recv_slot_task_result(env, ctx_server, id);
+        return recv_slot_task_result(env, ctx_server,
+                                     dispatch_single_task(ctx_server, task, /*priority=*/true));
     }
     case 1: { // SAVE
         std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
@@ -1309,18 +1325,12 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
             env->ThrowNew(c_llama_error, "Filename is required for slot save");
             return nullptr;
         }
-
         server_task task(SERVER_TASK_TYPE_SLOT_SAVE);
         task.id = ctx_server->queue_tasks.get_new_id();
         task.slot_action.id_slot = slotId;
         task.slot_action.filename = filename;
         task.slot_action.filepath = filename;
-
-        int tid = task.id;
-        ctx_server->queue_results.add_waiting_task_id(tid);
-        ctx_server->queue_tasks.post(std::move(task));
-
-        return recv_slot_task_result(env, ctx_server, tid);
+        return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
     }
     case 2: { // RESTORE
         std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
@@ -1328,29 +1338,18 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
             env->ThrowNew(c_llama_error, "Filename is required for slot restore");
             return nullptr;
         }
-
         server_task task(SERVER_TASK_TYPE_SLOT_RESTORE);
         task.id = ctx_server->queue_tasks.get_new_id();
         task.slot_action.id_slot = slotId;
         task.slot_action.filename = filename;
         task.slot_action.filepath = filename;
-
-        int tid = task.id;
-        ctx_server->queue_results.add_waiting_task_id(tid);
-        ctx_server->queue_tasks.post(std::move(task));
-
-        return recv_slot_task_result(env, ctx_server, tid);
+        return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
     }
     case 3: { // ERASE
         server_task task(SERVER_TASK_TYPE_SLOT_ERASE);
         task.id = ctx_server->queue_tasks.get_new_id();
         task.slot_action.id_slot = slotId;
-
-        int tid = task.id;
-        ctx_server->queue_results.add_waiting_task_id(tid);
-        ctx_server->queue_tasks.post(std::move(task));
-
-        return recv_slot_task_result(env, ctx_server, tid);
+        return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
     }
     default:
         env->ThrowNew(c_llama_error, "Invalid slot action");

From 769f258457d66a1a50fcb7ef9aeb4621eb90323f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 15:45:45 +0000
Subject: [PATCH 19/34] Finding J: extract parse_oai_chat_params, replace 2
 duplicate try/catch blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

handleChatCompletions and requestChatCompletion both contained the same 9-line
try/catch for parsing the OAI chat body:

  json data;
  try {
      std::vector<raw_buffer> files;
      data = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
  } catch (const std::exception &e) {
      throw_invalid_request(env, e);
      return [nullptr | 0];
  }

The only difference was the error sentinel (nullptr vs 0).  Extract to
parse_oai_chat_params(env, ctx_server, body, json& out) → bool, placed
immediately after throw_invalid_request so both error-handling helpers are
adjacent.  Callers become:

  json data;
  if (!parse_oai_chat_params(env, ctx_server, body, data)) return [sentinel];

No behaviour change; no new unit tests (the underlying oaicompat_chat_params_
parse is llama.cpp internals; error-path coverage is provided by the existing
Java integration tests).

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 43 +++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index ab90bc26..d35e28cd 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -123,6 +123,29 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
     env->ThrowNew(c_llama_error, err.dump().c_str());
 }
 
+/**
+ * Parse the OAI chat-completion body through oaicompat_chat_params_parse and
+ * write the result into `out`.  Returns true on success.  On parse failure
+ * throws an invalid-request JNI exception and returns false; the caller must
+ * return its own sentinel value (nullptr or 0) immediately.
+ *
+ * handleChatCompletions and requestChatCompletion share this identical 9-line
+ * try/catch block — they differ only in what sentinel they return on error.
+ */
+[[nodiscard]] static bool parse_oai_chat_params(JNIEnv *env,
+                                                 server_context *ctx_server,
+                                                 const json &body,
+                                                 json &out) {
+    try {
+        std::vector<raw_buffer> files;
+        out = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
+        return true;
+    } catch (const std::exception &e) {
+        throw_invalid_request(env, e);
+        return false;
+    }
+}
+
 /**
  * Convenience wrapper around build_completion_tasks_impl (jni_server_helpers.hpp)
  * that supplies the module-level globals so call sites need no boilerplate.
@@ -878,15 +901,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
 
     json body = parse_json_params(env, jparams);
 
-    // Apply chat template via OAI-compatible parser
     json data;
-    try {
-        std::vector<raw_buffer> files;
-        data = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return nullptr;
-    }
+    if (!parse_oai_chat_params(env, ctx_server, body, data)) return nullptr;
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
@@ -910,19 +926,12 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
 
     json body = parse_json_params(env, jparams);
 
-    // Apply chat template via OAI-compatible parser
+    // OAICOMPAT_TYPE_NONE: chat template is applied by parse_oai_chat_params below.
     json data;
-    try {
-        std::vector<raw_buffer> files;
-        data = oaicompat_chat_params_parse(body, ctx_server->oai_parser_opt, files);
-    } catch (const std::exception &e) {
-        throw_invalid_request(env, e);
-        return 0;
-    }
+    if (!parse_oai_chat_params(env, ctx_server, body, data)) return 0;
 
     auto completion_id = gen_chatcmplid();
     std::vector<server_task> tasks;
-    // OAICOMPAT_TYPE_NONE: chat template was already applied by oaicompat_chat_params_parse above.
     if (!build_completion_tasks(env, ctx_server, data, completion_id,
                                  SERVER_TASK_TYPE_COMPLETION, OAICOMPAT_TYPE_NONE, tasks)) return 0;
 

From 1a62d86c6c8d2852e783d5c34c92f56001e4846c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 16:15:42 +0000
Subject: [PATCH 20/34] Fix build error and warnings: const json& in
 parse_oai_chat_params, nodiscard casts, applyTemplate guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three issues from the CI build:

1. Build error: oaicompat_chat_params_parse takes json& (non-const) but
   parse_oai_chat_params declared its body parameter as const json&, causing
   a qualifier-discard error.  Fix: remove const from the body parameter.

2. applyTemplate called oaicompat_chat_params_parse without a try/catch,
   leaving parse errors to escape the JNI boundary.  Now routes through
   parse_oai_chat_params, matching handleChatCompletions and
   requestChatCompletion.  Also removes the redundant jtok_str local.

3. Four test call sites deliberately discard [[nodiscard]] return values
   (the test only cares about the side-effects, not the return).  Add (void)
   casts to silence the -Wunused-result warnings:
     - test_jni_helpers.cpp: 2 × get_server_context_impl
     - test_jni_server_helpers.cpp: 2 × collect_task_results_impl,
                                    2 × recv_slot_task_result_impl

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  | 10 ++++------
 src/test/cpp/test_jni_helpers.cpp        |  4 ++--
 src/test/cpp/test_jni_server_helpers.cpp |  8 ++++----
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index d35e28cd..0ac294cc 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -134,7 +134,7 @@ static void throw_invalid_request(JNIEnv *env, const std::exception &e) {
  */
 [[nodiscard]] static bool parse_oai_chat_params(JNIEnv *env,
                                                  server_context *ctx_server,
-                                                 const json &body,
+                                                 json &body,
                                                  json &out) {
     try {
         std::vector<raw_buffer> files;
@@ -885,13 +885,11 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *
 
     json data = parse_json_params(env, jparams);
 
-    std::vector<raw_buffer> files;
-    json templateData = oaicompat_chat_params_parse(data, ctx_server->oai_parser_opt, files);
+    json templateData;
+    if (!parse_oai_chat_params(env, ctx_server, data, templateData)) return nullptr;
 
     std::string tok_str = templateData.at("prompt");
-    jstring jtok_str = env->NewStringUTF(tok_str.c_str());
-
-    return jtok_str;
+    return env->NewStringUTF(tok_str.c_str());
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(JNIEnv *env, jobject obj,
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 0f6f8f5b..c828b265 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -127,7 +127,7 @@ TEST_F(MockJniFixture, ValidHandle_ReturnsServerContextAndDoesNotThrow) {
 TEST_F(MockJniFixture, NullHandle_ErrorMessageIsExact) {
     g_mock_handle = 0;
 
-    get_server_context_impl(env, nullptr, dummy_field, dummy_class);
+    (void)get_server_context_impl(env, nullptr, dummy_field, dummy_class);
 
     ASSERT_TRUE(g_throw_called);
     EXPECT_EQ(g_throw_message, "Model is not loaded");
@@ -143,7 +143,7 @@ TEST_F(MockJniFixture, ValidHandle_NeverCallsThrowNew) {
     fake_ctx.server = sentinel;
     g_mock_handle = reinterpret_cast<jlong>(&fake_ctx);
 
-    get_server_context_impl(env, nullptr, dummy_field, dummy_class);
+    (void)get_server_context_impl(env, nullptr, dummy_field, dummy_class);
 
     EXPECT_FALSE(g_throw_called);
 }
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index f89ee157..546412eb 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -210,7 +210,7 @@ TEST_F(CollectResultsFixture, SuccessPath_WaitingIdsRemovedAfterCollect) {
 
     std::unordered_set<int> ids = {30};
     std::vector<server_task_result_ptr> out;
-    collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+    (void)collect_task_results_impl(env, queue, ids, out, dummy_eclass);
 
     // After collect, the id must no longer be in the waiting set.
     // We verify indirectly: sending a second result for id=30 should
@@ -230,7 +230,7 @@ TEST_F(CollectResultsFixture, ErrorPath_WaitingIdsRemovedAfterError) {
 
     std::unordered_set<int> ids = {40};
     std::vector<server_task_result_ptr> out;
-    collect_task_results_impl(env, queue, ids, out, dummy_eclass);
+    (void)collect_task_results_impl(env, queue, ids, out, dummy_eclass);
 
     EXPECT_FALSE(queue.waiting_task_ids.count(40))
         << "remove_waiting_task_ids must clear the id on error";
@@ -328,7 +328,7 @@ TEST_F(CollectResultsFixture, RecvSlotResult_WaitingIdRemovedAfterSuccess) {
     queue.add_waiting_task_id(52);
     queue.send(make_ok(52));
 
-    recv_slot_task_result_impl(env, queue, 52, dummy_eclass);
+    (void)recv_slot_task_result_impl(env, queue, 52, dummy_eclass);
 
     EXPECT_FALSE(queue.waiting_task_ids.count(52))
         << "remove_waiting_task_id must clear the id on success";
@@ -338,7 +338,7 @@ TEST_F(CollectResultsFixture, RecvSlotResult_WaitingIdRemovedAfterError) {
     queue.add_waiting_task_id(53);
     queue.send(make_error(53, "err"));
 
-    recv_slot_task_result_impl(env, queue, 53, dummy_eclass);
+    (void)recv_slot_task_result_impl(env, queue, 53, dummy_eclass);
 
     EXPECT_FALSE(queue.waiting_task_ids.count(53))
         << "remove_waiting_task_id must clear the id on error";

From 06169a845cbf3f340c547f65cd06afe94c8f6eba Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 16:45:50 +0000
Subject: [PATCH 21/34] Finding K: remove dead commented-out duplicate log
 block in loadModel

The five-line commented block (duplicate of the active LOG_INF call
immediately above it) served no purpose and would never be re-enabled
in its current form. Deleted to reduce noise.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 0ac294cc..cfa56333 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -640,12 +640,6 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
             common_chat_templates_source(ctx_server->chat_templates.get()).c_str(),
             common_chat_format_example(ctx_server->chat_templates.get(), ctx_server->params_base.use_jinja, ctx_server->params_base.default_template_kwargs).c_str());
 
-    // print sample chat example to make it clear which template is used
-    //    LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
-    //         common_chat_templates_source(ctx_server->chat_templates.get()),
-    //        common_chat_format_example(*ctx_server->chat_templates.template_default,
-    //        ctx_server->params_base.use_jinja) .c_str());
-
     ctx_server->queue_tasks.on_new_task(
         std::bind(&server_context::process_single_task, ctx_server, std::placeholders::_1));
     ctx_server->queue_tasks.on_update_slots(std::bind(&server_context::update_slots, ctx_server));

From 6875e5f3501f2a813e797effd53e9f8c882f51b8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 16:47:43 +0000
Subject: [PATCH 22/34] Finding L: extract json_to_jstring_impl for repeated
 dump()+NewStringUTF pattern

Five JNI functions (receiveCompletionJson, handleRerank, handleEmbeddings,
handleTokenize, handleDetokenize) each contained the identical two-liner:
  std::string response_str = some_json.dump();
  return env->NewStringUTF(response_str.c_str());

Extracted to json_to_jstring_impl in jni_server_helpers.hpp (placed next to
results_to_jstring_impl, the analogous helper for result vectors) and a thin
json_to_jstring wrapper in jllama.cpp. All five call sites updated.
Three unit tests added in test_jni_server_helpers.cpp.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  | 23 +++++++------
 src/main/cpp/jni_server_helpers.hpp      | 17 +++++++++
 src/test/cpp/test_jni_server_helpers.cpp | 44 ++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index cfa56333..0ca0c5e3 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -241,6 +241,14 @@ static int require_single_task_id(JNIEnv *env,
     return results_to_jstring_impl(env, results);
 }
 
+/**
+ * Convenience wrapper around json_to_jstring_impl (jni_server_helpers.hpp).
+ * Serialises any json value to a JNI string via dump() + NewStringUTF.
+ */
+[[nodiscard]] static jstring json_to_jstring(JNIEnv *env, const json &j) {
+    return json_to_jstring_impl(env, j);
+}
+
 /**
  * Convert a Java string to a std::string
  */
@@ -728,8 +736,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson(
         ctx_server->queue_results.remove_waiting_task_id(id_task);
     }
 
-    std::string response_str = response.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return json_to_jstring(env, response);
 }
 
 JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env, jobject obj, jstring jprompt) {
@@ -869,8 +876,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *e
 
     ctx_server->queue_results.remove_waiting_task_ids(task_ids);
 
-    std::string response_str = results_json.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return json_to_jstring(env, results_json);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *env, jobject obj, jstring jparams) {
@@ -1248,8 +1254,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEn
                     ? format_embeddings_response_oaicompat(body, responses, use_base64)
                     : json(responses);
 
-    std::string response_str = root.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return json_to_jstring(env, root);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv *env, jobject obj, jstring jcontent,
@@ -1288,8 +1293,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv
 
     json data = format_tokenizer_response(tokens_response);
 
-    std::string response_str = data.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return json_to_jstring(env, data);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEnv *env, jobject obj,
@@ -1304,8 +1308,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEn
 
     json data = format_detokenized_response(detokenize(ctx_server, tokens));
 
-    std::string response_str = data.dump();
-    return env->NewStringUTF(response_str.c_str());
+    return json_to_jstring(env, data);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEnv *env, jobject obj, jint action,
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 3e0bda7c..355961bc 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -172,3 +172,20 @@
     std::string s = response.dump();
     return env->NewStringUTF(s.c_str());
 }
+
+// ---------------------------------------------------------------------------
+// json_to_jstring_impl
+//
+// Serialises any json value to a JNI string via dump() + NewStringUTF.
+// Extracted from the repeated two-line pattern:
+//
+//   std::string response_str = some_json.dump();
+//   return env->NewStringUTF(response_str.c_str());
+//
+// Used by receiveCompletionJson, handleRerank, handleEmbeddings,
+// handleTokenize, and handleDetokenize.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline jstring json_to_jstring_impl(JNIEnv *env, const json &j) {
+    std::string s = j.dump();
+    return env->NewStringUTF(s.c_str());
+}
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index 546412eb..f1e660d5 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -399,3 +399,47 @@ TEST_F(CollectResultsFixture, ResultsToJstring_EmptyVector_ReturnsEmptyArray) {
     EXPECT_TRUE(parsed.is_array());
     EXPECT_TRUE(parsed.empty());
 }
+
+// ============================================================
+// Tests for json_to_jstring_impl
+//
+// Verifies that any json value is serialised correctly via
+// dump() + NewStringUTF.  The stub captures the string so tests
+// can round-trip parse it.
+// ============================================================
+
+TEST_F(CollectResultsFixture, JsonToJstring_Object_RoundTrips) {
+    json j = {{"key", "value"}, {"n", 42}};
+
+    jstring js = json_to_jstring_impl(env, j);
+
+    EXPECT_NE(js, nullptr);
+    EXPECT_FALSE(g_new_string_utf_value.empty());
+    json parsed = json::parse(g_new_string_utf_value);
+    EXPECT_TRUE(parsed.is_object());
+    EXPECT_EQ(parsed.value("key", ""), "value");
+    EXPECT_EQ(parsed.value("n", 0), 42);
+}
+
+TEST_F(CollectResultsFixture, JsonToJstring_Array_RoundTrips) {
+    json j = json::array({1, 2, 3});
+
+    jstring js = json_to_jstring_impl(env, j);
+
+    EXPECT_NE(js, nullptr);
+    json parsed = json::parse(g_new_string_utf_value);
+    EXPECT_TRUE(parsed.is_array());
+    ASSERT_EQ(parsed.size(), 3u);
+    EXPECT_EQ(parsed[0], 1);
+    EXPECT_EQ(parsed[2], 3);
+}
+
+TEST_F(CollectResultsFixture, JsonToJstring_ReturnsSentinelFromStub) {
+    // The mock NewStringUTF returns the 0xBEEF sentinel — verify the
+    // function propagates it unchanged so callers get a non-null jstring.
+    json j = {{"ok", true}};
+
+    jstring js = json_to_jstring_impl(env, j);
+
+    EXPECT_EQ(js, reinterpret_cast<jstring>(0xBEEF));
+}

From f0a0e3771e8b37201fcaecc772f00e255a423b46 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 16:48:25 +0000
Subject: [PATCH 23/34] Finding M: extract exec_slot_file_task for SAVE/RESTORE
 slot actions

The SAVE (case 1) and RESTORE (case 2) branches of handleSlotAction were
identical 8-line blocks differing only in task type and error message.
Extracted to exec_slot_file_task(), placed next to dispatch_single_task
so the two dispatch helpers sit together. Each case now reduces to a
single call expression.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 62 ++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 0ca0c5e3..46b34df9 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -198,6 +198,34 @@ static int dispatch_single_task(server_context *ctx_server,
     return tid;
 }
 
+/**
+ * Validates `jfilename`, builds a SAVE or RESTORE slot task, dispatches it,
+ * and returns the result as a jstring.  Shared by the SAVE (case 1) and
+ * RESTORE (case 2) branches of handleSlotAction, which are identical except
+ * for the task type and the error message when the filename is empty.
+ *
+ * On missing filename: throws via JNI and returns nullptr.
+ * On success: returns the result JSON as a jstring.
+ */
+[[nodiscard]] static jstring exec_slot_file_task(JNIEnv           *env,
+                                                  server_context   *ctx_server,
+                                                  jint              slotId,
+                                                  jstring           jfilename,
+                                                  server_task_type  task_type,
+                                                  const char       *empty_filename_error) {
+    const std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
+    if (filename.empty()) {
+        env->ThrowNew(c_llama_error, empty_filename_error);
+        return nullptr;
+    }
+    server_task task(task_type);
+    task.id = ctx_server->queue_tasks.get_new_id();
+    task.slot_action.id_slot  = slotId;
+    task.slot_action.filename = filename;
+    task.slot_action.filepath = filename;
+    return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
+}
+
 /**
  * Asserts that exactly one task was created after dispatch and returns its ID.
  * Returns 0 (with a JNI exception pending) if the count is not exactly 1.
@@ -1323,32 +1351,14 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
         return recv_slot_task_result(env, ctx_server,
                                      dispatch_single_task(ctx_server, task, /*priority=*/true));
     }
-    case 1: { // SAVE
-        std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
-        if (filename.empty()) {
-            env->ThrowNew(c_llama_error, "Filename is required for slot save");
-            return nullptr;
-        }
-        server_task task(SERVER_TASK_TYPE_SLOT_SAVE);
-        task.id = ctx_server->queue_tasks.get_new_id();
-        task.slot_action.id_slot = slotId;
-        task.slot_action.filename = filename;
-        task.slot_action.filepath = filename;
-        return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
-    }
-    case 2: { // RESTORE
-        std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
-        if (filename.empty()) {
-            env->ThrowNew(c_llama_error, "Filename is required for slot restore");
-            return nullptr;
-        }
-        server_task task(SERVER_TASK_TYPE_SLOT_RESTORE);
-        task.id = ctx_server->queue_tasks.get_new_id();
-        task.slot_action.id_slot = slotId;
-        task.slot_action.filename = filename;
-        task.slot_action.filepath = filename;
-        return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
-    }
+    case 1: // SAVE
+        return exec_slot_file_task(env, ctx_server, slotId, jfilename,
+                                    SERVER_TASK_TYPE_SLOT_SAVE,
+                                    "Filename is required for slot save");
+    case 2: // RESTORE
+        return exec_slot_file_task(env, ctx_server, slotId, jfilename,
+                                    SERVER_TASK_TYPE_SLOT_RESTORE,
+                                    "Filename is required for slot restore");
     case 3: { // ERASE
         server_task task(SERVER_TASK_TYPE_SLOT_ERASE);
         task.id = ctx_server->queue_tasks.get_new_id();

From 9b5e03ebb4691f88fe3d1967b39873468ab2974c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 16:49:59 +0000
Subject: [PATCH 24/34] Finding N: extract collect_and_serialize for the
 collect+results_to_jstring pipeline

Four JNI handlers (handleChatCompletions, handleCompletions, handleCompletionsOai,
handleInfill) each contained the identical four-line pipeline:
  std::vector<server_task_result_ptr> results;
  results.reserve(task_ids.size());
  if (!collect_task_results(...)) return nullptr;
  return results_to_jstring(...);

Extracted to collect_and_serialize(), placed next to collect_task_results and
results_to_jstring so the three serialisation helpers are co-located.
All four call sites updated to a single-expression return.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 51 +++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 46b34df9..1d5ff5c8 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -277,6 +277,31 @@ static int require_single_task_id(JNIEnv *env,
     return json_to_jstring_impl(env, j);
 }
 
+/**
+ * Collect all results for the given task IDs from the server response queue,
+ * then serialise them to a JNI string.
+ *
+ * Combines the repeated four-line pipeline used by handleCompletions,
+ * handleCompletionsOai, handleChatCompletions, and handleInfill:
+ *
+ *   std::vector<server_task_result_ptr> results;
+ *   results.reserve(task_ids.size());
+ *   if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
+ *   return results_to_jstring(env, results);
+ *
+ * On error (collect_task_results returns false): a JNI exception is already
+ * pending; this function returns nullptr so the caller can propagate it.
+ */
+[[nodiscard]] static jstring collect_and_serialize(
+        JNIEnv *env,
+        server_context *ctx_server,
+        const std::unordered_set<int> &task_ids) {
+    std::vector<server_task_result_ptr> results;
+    results.reserve(task_ids.size());
+    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
+    return results_to_jstring(env, results);
+}
+
 /**
  * Convert a Java string to a std::string
  */
@@ -937,12 +962,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    // Collect all results (blocking)
-    std::vector<server_task_result_ptr> results;
-    results.reserve(task_ids.size());
-    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
-
-    return results_to_jstring(env, results);
+    return collect_and_serialize(env, ctx_server, task_ids);
 }
 
 JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNIEnv *env, jobject obj,
@@ -1097,12 +1117,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    // Collect all results (blocking)
-    std::vector<server_task_result_ptr> results;
-    results.reserve(task_ids.size());
-    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
-
-    return results_to_jstring(env, results);
+    return collect_and_serialize(env, ctx_server, task_ids);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(JNIEnv *env, jobject obj,
@@ -1128,11 +1143,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    std::vector<server_task_result_ptr> results;
-    results.reserve(task_ids.size());
-    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
-
-    return results_to_jstring(env, results);
+    return collect_and_serialize(env, ctx_server, task_ids);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *env, jobject obj, jstring jparams) {
@@ -1186,11 +1197,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
 
     const auto task_ids = dispatch_tasks(ctx_server, tasks);
 
-    std::vector<server_task_result_ptr> results;
-    results.reserve(task_ids.size());
-    if (!collect_task_results(env, ctx_server, task_ids, results)) return nullptr;
-
-    return results_to_jstring(env, results);
+    return collect_and_serialize(env, ctx_server, task_ids);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEnv *env, jobject obj,

From 8df38946d753d74329ad1191f73e18a4f62fe1ef Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 17:20:44 +0000
Subject: [PATCH 25/34] Fix M: move exec_slot_file_task after
 parse_jstring/recv_slot_task_result

exec_slot_file_task called parse_jstring and recv_slot_task_result but was
placed before both definitions, causing 'use of undeclared identifier' errors
on Clang (macOS CI). Moved to after parse_json_params where both helpers are
already visible; note added in the doc comment explaining the placement.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 59 ++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 1d5ff5c8..b9cf6fa2 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -198,34 +198,6 @@ static int dispatch_single_task(server_context *ctx_server,
     return tid;
 }
 
-/**
- * Validates `jfilename`, builds a SAVE or RESTORE slot task, dispatches it,
- * and returns the result as a jstring.  Shared by the SAVE (case 1) and
- * RESTORE (case 2) branches of handleSlotAction, which are identical except
- * for the task type and the error message when the filename is empty.
- *
- * On missing filename: throws via JNI and returns nullptr.
- * On success: returns the result JSON as a jstring.
- */
-[[nodiscard]] static jstring exec_slot_file_task(JNIEnv           *env,
-                                                  server_context   *ctx_server,
-                                                  jint              slotId,
-                                                  jstring           jfilename,
-                                                  server_task_type  task_type,
-                                                  const char       *empty_filename_error) {
-    const std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
-    if (filename.empty()) {
-        env->ThrowNew(c_llama_error, empty_filename_error);
-        return nullptr;
-    }
-    server_task task(task_type);
-    task.id = ctx_server->queue_tasks.get_new_id();
-    task.slot_action.id_slot  = slotId;
-    task.slot_action.filename = filename;
-    task.slot_action.filepath = filename;
-    return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
-}
-
 /**
  * Asserts that exactly one task was created after dispatch and returns its ID.
  * Returns 0 (with a JNI exception pending) if the count is not exactly 1.
@@ -328,6 +300,37 @@ static json parse_json_params(JNIEnv *env, jstring jparams) {
     return json::parse(parse_jstring(env, jparams));
 }
 
+/**
+ * Validates `jfilename`, builds a SAVE or RESTORE slot task, dispatches it,
+ * and returns the result as a jstring.  Shared by the SAVE (case 1) and
+ * RESTORE (case 2) branches of handleSlotAction, which are identical except
+ * for the task type and the error message when the filename is empty.
+ *
+ * On missing filename: throws via JNI and returns nullptr.
+ * On success: returns the result JSON as a jstring.
+ *
+ * Placed here (after parse_jstring and recv_slot_task_result) because both
+ * helpers must be visible at the point of definition.
+ */
+[[nodiscard]] static jstring exec_slot_file_task(JNIEnv           *env,
+                                                  server_context   *ctx_server,
+                                                  jint              slotId,
+                                                  jstring           jfilename,
+                                                  server_task_type  task_type,
+                                                  const char       *empty_filename_error) {
+    const std::string filename = jfilename != nullptr ? parse_jstring(env, jfilename) : "";
+    if (filename.empty()) {
+        env->ThrowNew(c_llama_error, empty_filename_error);
+        return nullptr;
+    }
+    server_task task(task_type);
+    task.id = ctx_server->queue_tasks.get_new_id();
+    task.slot_action.id_slot  = slotId;
+    task.slot_action.filename = filename;
+    task.slot_action.filepath = filename;
+    return recv_slot_task_result(env, ctx_server, dispatch_single_task(ctx_server, task));
+}
+
 char **parse_string_array(JNIEnv *env, const jobjectArray string_array, const jsize length) {
     auto *const result = static_cast<char **>(malloc(length * sizeof(char *)));
 

From 339ae6b01981f8b8858d12de5bd00e1678902b23 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:30:40 +0000
Subject: [PATCH 26/34] Finding O: use json_to_jstring_impl in
 recv_slot_task_result_impl success path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

recv_slot_task_result_impl had the last remaining inline dump()+NewStringUTF
in the header — the same 2-line pattern that json_to_jstring_impl (Finding L)
was extracted to replace. Now all serialisation in jni_server_helpers.hpp goes
through json_to_jstring_impl.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jni_server_helpers.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 355961bc..ae4f88d5 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -108,8 +108,7 @@
         env->ThrowNew(error_class, error_msg.c_str());
         return nullptr;
     }
-    std::string resp = result->to_json().dump();
-    return env->NewStringUTF(resp.c_str());
+    return json_to_jstring_impl(env, result->to_json());
 }
 
 // ---------------------------------------------------------------------------

From 8aac5bbde1493be73c150b534bb0329e8d74015e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:32:11 +0000
Subject: [PATCH 27/34] Finding U: split results_to_json_impl out of
 results_to_jstring_impl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

results_to_jstring_impl combined two concerns: building the JSON value
(single-object vs array) and serialising it to a jstring. The construction
logic is now in a standalone results_to_json_impl() that requires no JNI,
placed immediately before results_to_jstring_impl which is reduced to a
one-liner delegating to results_to_json_impl + json_to_jstring_impl.

Three unit tests added in test_jni_server_helpers.cpp covering single,
multiple, and empty result sets — no JNI mock needed.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jni_server_helpers.hpp      | 42 +++++++++++++++---------
 src/test/cpp/test_jni_server_helpers.cpp | 38 +++++++++++++++++++++
 2 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index ae4f88d5..e3ee8cf7 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -143,33 +143,43 @@
 }
 
 // ---------------------------------------------------------------------------
-// results_to_jstring_impl
+// results_to_json_impl
 //
-// Serialises a vector of task results to a jstring.
+// Converts a vector of task results to a json value without touching JNI.
 //
 // When there is exactly one result, the top-level JSON is that result's object
 // directly.  When there are multiple results, they are wrapped in a JSON array.
 // This mirrors the OpenAI API convention used by handleCompletions,
 // handleCompletionsOai, handleChatCompletions, and handleInfill.
 //
-// Parameters are passed explicitly so the function is testable without a real
-// JVM.  The caller is responsible for checking that `results` is non-empty
-// before calling (an empty vector produces an empty JSON array).
+// Separated from results_to_jstring_impl so the construction logic is
+// unit-testable without any JNI mock.  The caller is responsible for checking
+// that `results` is non-empty before calling (an empty vector produces an
+// empty JSON array).
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline jstring results_to_jstring_impl(
-        JNIEnv *env,
+[[nodiscard]] inline json results_to_json_impl(
         const std::vector<server_task_result_ptr> &results) {
-    json response;
     if (results.size() == 1) {
-        response = results[0]->to_json();
-    } else {
-        response = json::array();
-        for (const auto &res : results) {
-            response.push_back(res->to_json());
-        }
+        return results[0]->to_json();
     }
-    std::string s = response.dump();
-    return env->NewStringUTF(s.c_str());
+    json arr = json::array();
+    for (const auto &res : results) {
+        arr.push_back(res->to_json());
+    }
+    return arr;
+}
+
+// ---------------------------------------------------------------------------
+// results_to_jstring_impl
+//
+// Serialises a vector of task results to a jstring by delegating JSON
+// construction to results_to_json_impl and serialisation to
+// json_to_jstring_impl.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline jstring results_to_jstring_impl(
+        JNIEnv *env,
+        const std::vector<server_task_result_ptr> &results) {
+    return json_to_jstring_impl(env, results_to_json_impl(results));
 }
 
 // ---------------------------------------------------------------------------
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index f1e660d5..023784e1 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -400,6 +400,44 @@ TEST_F(CollectResultsFixture, ResultsToJstring_EmptyVector_ReturnsEmptyArray) {
     EXPECT_TRUE(parsed.empty());
 }
 
+// ============================================================
+// Tests for results_to_json_impl
+//
+// Pure-logic counterpart to results_to_jstring_impl — no JNI needed.
+// ============================================================
+
+TEST(ResultsToJsonImpl, SingleResult_ReturnsObjectDirectly) {
+    std::vector<server_task_result_ptr> results;
+    results.push_back(make_ok(1, "only"));
+
+    json out = results_to_json_impl(results);
+
+    EXPECT_TRUE(out.is_object());
+    EXPECT_EQ(out.value("content", ""), "only");
+}
+
+TEST(ResultsToJsonImpl, MultipleResults_ReturnsArray) {
+    std::vector<server_task_result_ptr> results;
+    results.push_back(make_ok(1, "a"));
+    results.push_back(make_ok(2, "b"));
+
+    json out = results_to_json_impl(results);
+
+    EXPECT_TRUE(out.is_array());
+    ASSERT_EQ(out.size(), 2u);
+    EXPECT_EQ(out[0].value("content", ""), "a");
+    EXPECT_EQ(out[1].value("content", ""), "b");
+}
+
+TEST(ResultsToJsonImpl, EmptyVector_ReturnsEmptyArray) {
+    std::vector<server_task_result_ptr> results;
+
+    json out = results_to_json_impl(results);
+
+    EXPECT_TRUE(out.is_array());
+    EXPECT_TRUE(out.empty());
+}
+
 // ============================================================
 // Tests for json_to_jstring_impl
 //

From 1b7fd3bd5a76a3d812c08f912de70f7c4c7f6604 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:33:45 +0000
Subject: [PATCH 28/34] =?UTF-8?q?Finding=20P:=20extract=20jint=5Farray=5Ft?=
 =?UTF-8?q?o=5Ftokens=5Fimpl=20for=20jintArray=E2=86=92vector<llama=5Ftoke?=
 =?UTF-8?q?n>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

decodeBytes and handleDetokenize both contained the identical 4-line pattern:
  GetArrayLength / GetIntArrayElements / vector(elems, elems+len) / Release

Extracted to jint_array_to_tokens_impl in jni_helpers.hpp (no server types
needed), placed next to parse_jbytes. Uses JNI_ABORT uniformly — decodeBytes
previously used flag 0 which caused an unnecessary copy-writeback on some JVM
implementations. Thin jint_array_to_tokens wrapper added in jllama.cpp.

Three unit tests added in test_jni_helpers.cpp with array-stub fixture
verifying element copy and that JNI_ABORT is passed to Release.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp           | 21 ++++---
 src/main/cpp/jni_helpers.hpp      | 19 +++++++
 src/test/cpp/test_jni_helpers.cpp | 92 +++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index b9cf6fa2..87a716ec 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -357,6 +357,14 @@ void free_string_array(char **array, jsize length) {
     }
 }
 
+/**
+ * Convenience wrapper around jint_array_to_tokens_impl (jni_helpers.hpp).
+ * Reads a Java int array into a vector<llama_token> using JNI_ABORT (read-only).
+ */
+[[nodiscard]] static std::vector<llama_token> jint_array_to_tokens(JNIEnv *env, jintArray array) {
+    return jint_array_to_tokens_impl(env, array);
+}
+
 /**
  * Since Java expects utf16 but std::strings are utf8, we can't directly use `env->NewString` or `env-NewString`,
  * but we directly send the bytes and do the conversion in Java. Unfortunately, there isn't a nice/standardized way to
@@ -1029,12 +1037,7 @@ JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    jsize length = env->GetArrayLength(java_tokens);
-    jint *elements = env->GetIntArrayElements(java_tokens, nullptr);
-    std::vector<llama_token> tokens(elements, elements + length);
-
-    env->ReleaseIntArrayElements(java_tokens, elements, 0);
-
+    const auto tokens = jint_array_to_tokens(env, java_tokens);
     return parse_jbytes(env, detokenize(ctx_server, tokens));
 }
 
@@ -1339,11 +1342,7 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEn
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    jsize length = env->GetArrayLength(jtokens);
-    jint *elements = env->GetIntArrayElements(jtokens, nullptr);
-    std::vector<llama_token> tokens(elements, elements + length);
-    env->ReleaseIntArrayElements(jtokens, elements, JNI_ABORT);
-
+    const auto tokens = jint_array_to_tokens(env, jtokens);
     json data = format_detokenized_response(detokenize(ctx_server, tokens));
 
     return json_to_jstring(env, data);
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
index d966580c..7c4c491c 100644
--- a/src/main/cpp/jni_helpers.hpp
+++ b/src/main/cpp/jni_helpers.hpp
@@ -107,3 +107,22 @@ struct jllama_context {
     }
     return *task_ids.begin();
 }
+
+// ---------------------------------------------------------------------------
+// jint_array_to_tokens_impl
+//
+// Reads a Java int array into a std::vector<llama_token> and releases the
+// JNI array elements with JNI_ABORT (read-only — no writeback needed).
+//
+// Extracted from the identical 4-line pattern repeated in decodeBytes and
+// handleDetokenize.  Parameters are explicit so the function is unit-testable
+// without a real JVM.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline std::vector<int32_t> jint_array_to_tokens_impl(
+        JNIEnv *env, jintArray array) {
+    const jsize length   = env->GetArrayLength(array);
+    jint *elements       = env->GetIntArrayElements(array, nullptr);
+    std::vector<int32_t> tokens(elements, elements + length);
+    env->ReleaseIntArrayElements(array, elements, JNI_ABORT);
+    return tokens;
+}
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index c828b265..3dc0ec0c 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -245,3 +245,95 @@ TEST_F(MockJniFixture, RequireSingleTaskId_Multiple_ReturnsZeroAndThrows) {
     EXPECT_TRUE(g_throw_called);
     EXPECT_EQ(g_throw_message, "multitasking currently not supported");
 }
+
+// ============================================================
+// Tests for jint_array_to_tokens_impl()
+//
+// Needs GetArrayLength, GetIntArrayElements, and
+// ReleaseIntArrayElements stubs.  GetIntArrayElements returns a
+// pointer to a static buffer.  ReleaseIntArrayElements is a no-op.
+// ============================================================
+
+namespace {
+
+static jint  g_array_data[8]  = {};
+static jsize g_array_length   = 0;
+static bool  g_release_called = false;
+static jint  g_release_mode   = -1;
+
+static jsize JNICALL stub_GetArrayLength(JNIEnv * /*env*/, jarray /*arr*/) {
+    return g_array_length;
+}
+static jint *JNICALL stub_GetIntArrayElements(JNIEnv * /*env*/,
+                                               jintArray /*arr*/,
+                                               jboolean * /*isCopy*/) {
+    return g_array_data;
+}
+static void JNICALL stub_ReleaseIntArrayElements(JNIEnv * /*env*/,
+                                                  jintArray /*arr*/,
+                                                  jint * /*elems*/,
+                                                  jint mode) {
+    g_release_called = true;
+    g_release_mode   = mode;
+}
+
+JNIEnv *make_array_env(JNINativeInterface_ &table, JNIEnv_ &env_obj) {
+    std::memset(&table, 0, sizeof(table));
+    table.GetArrayLength          = stub_GetArrayLength;
+    table.GetIntArrayElements     = stub_GetIntArrayElements;
+    table.ReleaseIntArrayElements = stub_ReleaseIntArrayElements;
+    env_obj.functions             = &table;
+    return &env_obj;
+}
+
+struct ArrayFixture : ::testing::Test {
+    JNINativeInterface_ table{};
+    JNIEnv_             env_obj{};
+    JNIEnv             *env = nullptr;
+
+    void SetUp() override {
+        env              = make_array_env(table, env_obj);
+        g_release_called = false;
+        g_release_mode   = -1;
+        std::memset(g_array_data, 0, sizeof(g_array_data));
+        g_array_length = 0;
+    }
+};
+
+} // namespace
+
+TEST_F(ArrayFixture, JintArrayToTokens_EmptyArray_ReturnsEmptyVector) {
+    g_array_length = 0;
+
+    auto tokens = jint_array_to_tokens_impl(env, nullptr);
+
+    EXPECT_TRUE(tokens.empty());
+    EXPECT_TRUE(g_release_called);
+    EXPECT_EQ(g_release_mode, JNI_ABORT);
+}
+
+TEST_F(ArrayFixture, JintArrayToTokens_ThreeElements_CopiedCorrectly) {
+    g_array_data[0] = 10;
+    g_array_data[1] = 20;
+    g_array_data[2] = 30;
+    g_array_length  = 3;
+
+    auto tokens = jint_array_to_tokens_impl(env, nullptr);
+
+    ASSERT_EQ(tokens.size(), 3u);
+    EXPECT_EQ(tokens[0], 10);
+    EXPECT_EQ(tokens[1], 20);
+    EXPECT_EQ(tokens[2], 30);
+}
+
+TEST_F(ArrayFixture, JintArrayToTokens_ReleasesWithAbortFlag) {
+    // JNI_ABORT means no writeback — required since we only read the array.
+    g_array_length = 1;
+    g_array_data[0] = 42;
+
+    (void)jint_array_to_tokens_impl(env, nullptr);
+
+    EXPECT_TRUE(g_release_called);
+    EXPECT_EQ(g_release_mode, JNI_ABORT)
+        << "must use JNI_ABORT (no writeback) for read-only array access";
+}

From 23430533505165236a94087d58a93174eeeb05db Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:34:44 +0000
Subject: [PATCH 29/34] Finding Q: extract get_result_error_message for
 repeated error-string extraction

Five sites used the identical expression:
  result->to_json()["message"].get<std::string>()

Extracted to get_result_error_message() in jni_server_helpers.hpp, placed
before collect_task_results_impl (the first caller in the header). Updated
in receiveCompletionJson, embed, handleRerank (jllama.cpp) and both _impl
functions (jni_server_helpers.hpp). Two unit tests added.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp                  |  9 +++------
 src/main/cpp/jni_server_helpers.hpp      | 23 +++++++++++++++++++----
 src/test/cpp/test_jni_server_helpers.cpp | 17 +++++++++++++++++
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 87a716ec..c661606c 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -787,9 +787,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson(
     server_task_result_ptr result = ctx_server->queue_results.recv(id_task);
 
     if (result->is_error()) {
-        std::string response = result->to_json()["message"].get<std::string>();
         ctx_server->queue_results.remove_waiting_task_id(id_task);
-        env->ThrowNew(c_llama_error, response.c_str());
+        env->ThrowNew(c_llama_error, get_result_error_message(result).c_str());
         return nullptr;
     }
 
@@ -837,9 +836,8 @@ JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env,
     server_task_result_ptr result = ctx_server->queue_results.recv(id_task);
 
     if (result->is_error()) {
-        std::string response = result->to_json()["message"].get<std::string>();
         ctx_server->queue_results.remove_waiting_task_id(id_task);
-        env->ThrowNew(c_llama_error, response.c_str());
+        env->ThrowNew(c_llama_error, get_result_error_message(result).c_str());
         return nullptr;
     }
 
@@ -921,9 +919,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *e
     for (size_t i = 0; i < task_ids.size(); i++) {
         server_task_result_ptr result = ctx_server->queue_results.recv(task_ids);
         if (result->is_error()) {
-            auto response = result->to_json()["message"].get<std::string>();
             ctx_server->queue_results.remove_waiting_task_ids(task_ids);
-            env->ThrowNew(c_llama_error, response.c_str());
+            env->ThrowNew(c_llama_error, get_result_error_message(result).c_str());
             return nullptr;
         }
 
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index e3ee8cf7..2f78268f 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -104,13 +104,29 @@
     server_task_result_ptr result = queue.recv(task_id);
     queue.remove_waiting_task_id(task_id);
     if (result->is_error()) {
-        std::string error_msg = result->to_json()["message"].get<std::string>();
-        env->ThrowNew(error_class, error_msg.c_str());
+        env->ThrowNew(error_class, get_result_error_message(result).c_str());
         return nullptr;
     }
     return json_to_jstring_impl(env, result->to_json());
 }
 
+// ---------------------------------------------------------------------------
+// get_result_error_message
+//
+// Extracts the human-readable error string from a failed task result.
+// Equivalent to result->to_json()["message"].get<std::string>(), which
+// appears verbatim in five places:
+//
+//   receiveCompletionJson, embed, handleRerank   (in jllama.cpp)
+//   collect_task_results_impl, recv_slot_task_result_impl  (in this header)
+//
+// Placed here (before the two _impl users) so both can call it.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline std::string get_result_error_message(
+        const server_task_result_ptr &result) {
+    return result->to_json()["message"].get<std::string>();
+}
+
 // ---------------------------------------------------------------------------
 // collect_task_results_impl
 //
@@ -132,8 +148,7 @@
         server_task_result_ptr result = queue.recv(task_ids);
         if (result->is_error()) {
             queue.remove_waiting_task_ids(task_ids);
-            std::string error_msg = result->to_json()["message"].get<std::string>();
-            env->ThrowNew(error_class, error_msg.c_str());
+            env->ThrowNew(error_class, get_result_error_message(result).c_str());
             return false;
         }
         out.push_back(std::move(result));
diff --git a/src/test/cpp/test_jni_server_helpers.cpp b/src/test/cpp/test_jni_server_helpers.cpp
index 023784e1..475b8642 100644
--- a/src/test/cpp/test_jni_server_helpers.cpp
+++ b/src/test/cpp/test_jni_server_helpers.cpp
@@ -122,6 +122,23 @@ struct CollectResultsFixture : ::testing::Test {
 
 } // namespace
 
+// ============================================================
+// Tests for get_result_error_message()
+//
+// No JNI needed — the function only calls result->to_json() and
+// performs a JSON key lookup, both of which are pure C++.
+// ============================================================
+
+TEST(GetResultErrorMessage, ErrorResult_ReturnsMessageString) {
+    server_task_result_ptr r = make_error(1, "something went wrong");
+    EXPECT_EQ(get_result_error_message(r), "something went wrong");
+}
+
+TEST(GetResultErrorMessage, DifferentMessage_ReturnsCorrectString) {
+    server_task_result_ptr r = make_error(2, "out of memory");
+    EXPECT_EQ(get_result_error_message(r), "out of memory");
+}
+
 // ============================================================
 // Single success result
 // ============================================================

From b40ae3dbbda049f21b5ccc8ff12215267a3952d0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:35:14 +0000
Subject: [PATCH 30/34] Finding R: introduce fail_load lambda in loadModel for
 repeated cleanup

loadModel had two near-identical 4-line error paths (vocab-only load failure
and full-model load failure) differing only in the error message:
  delete ctx_server; delete jctx; llama_backend_free(); env->ThrowNew(...);

Replaced with a local fail_load lambda defined once after jctx is allocated,
capturing env/ctx_server/jctx by reference. Each call site is now a single
self-documenting expression.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index c661606c..46f363bf 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -655,14 +655,20 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
     jctx->vocab_only = vocab_only;
     auto *ctx_server = jctx->server;
 
+    // Shared cleanup for load failures: tear down the context and throw.
+    // Used by both the vocab-only and full-model error paths below.
+    auto fail_load = [&](const char *msg) {
+        delete ctx_server;
+        delete jctx;
+        llama_backend_free();
+        env->ThrowNew(c_llama_error, msg);
+    };
+
     // Vocab-only mode: load just the tokenizer, skip inference setup.
     if (vocab_only) {
         SRV_INF("loading tokenizer from '%s'\n", params.model.path.c_str());
         if (!ctx_server->load_tokenizer(params)) {
-            delete ctx_server;
-            delete jctx;
-            llama_backend_free();
-            env->ThrowNew(c_llama_error, "could not load tokenizer from given file path");
+            fail_load("could not load tokenizer from given file path");
             return;
         }
         env->SetLongField(obj, f_model_pointer, reinterpret_cast<jlong>(jctx));
@@ -693,10 +699,7 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
 
     // load the model
     if (!ctx_server->load_model(params)) {
-        delete ctx_server;
-        delete jctx;
-        llama_backend_free();
-        env->ThrowNew(c_llama_error, "could not load model from given file path");
+        fail_load("could not load model from given file path");
         return;
     }
 

From df7418098009a50d90b7011239f35505c8d726e0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:36:14 +0000
Subject: [PATCH 31/34] Finding S: extract check_infill_support_impl for FIM
 token compatibility check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

handleInfill contained a 10-line block that checked for the three FIM tokens
(prefix, suffix, middle) and threw a descriptive error if any were missing.
Extracted to check_infill_support_impl() in jni_server_helpers.hpp (vocab and
error_class passed explicitly), with a thin check_infill_support wrapper in
jllama.cpp placed immediately before handleInfill. The handler body is now a
single guard line: if (!check_infill_support(env, ctx_server)) return nullptr;

Note: unit testing check_infill_support_impl requires a real llama_vocab
(llama_vocab_fim_* are opaque C functions with no link-time seam), so no
standalone test is included — coverage comes via the integration test suite.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp             | 24 +++++++++---------------
 src/main/cpp/jni_server_helpers.hpp | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 46f363bf..5adf96c3 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -1152,25 +1152,19 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
     return collect_and_serialize(env, ctx_server, task_ids);
 }
 
+/**
+ * Convenience wrapper around check_infill_support_impl.
+ * Returns false (with a JNI exception pending) when the model lacks FIM tokens.
+ */
+[[nodiscard]] static bool check_infill_support(JNIEnv *env, server_context *ctx_server) {
+    return check_infill_support_impl(env, ctx_server->vocab, c_llama_error);
+}
+
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *env, jobject obj, jstring jparams) {
     auto *ctx_server = get_server_context(env, obj);
     if (!ctx_server) return nullptr;
 
-    // Check model compatibility for infill
-    std::string err;
-    if (llama_vocab_fim_pre(ctx_server->vocab) == LLAMA_TOKEN_NULL) {
-        err += "prefix token is missing. ";
-    }
-    if (llama_vocab_fim_suf(ctx_server->vocab) == LLAMA_TOKEN_NULL) {
-        err += "suffix token is missing. ";
-    }
-    if (llama_vocab_fim_mid(ctx_server->vocab) == LLAMA_TOKEN_NULL) {
-        err += "middle token is missing. ";
-    }
-    if (!err.empty()) {
-        env->ThrowNew(c_llama_error, ("Infill is not supported by this model: " + err).c_str());
-        return nullptr;
-    }
+    if (!check_infill_support(env, ctx_server)) return nullptr;
 
     json data = parse_json_params(env, jparams);
 
diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 2f78268f..0b95970c 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -213,3 +213,28 @@
     std::string s = j.dump();
     return env->NewStringUTF(s.c_str());
 }
+
+// ---------------------------------------------------------------------------
+// check_infill_support_impl
+//
+// Checks that the model vocabulary has all three fill-in-the-middle (FIM)
+// tokens (prefix, suffix, middle).  Returns true if infill is supported.
+// On failure: populates a descriptive error message, throws via JNI using
+// error_class, and returns false.
+//
+// Extracted from the 10-line compatibility block in handleInfill so it can
+// be unit-tested independently of the JNI dispatch layer.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline bool check_infill_support_impl(JNIEnv          *env,
+                                                     const llama_vocab *vocab,
+                                                     jclass            error_class) {
+    std::string err;
+    if (llama_vocab_fim_pre(vocab) == LLAMA_TOKEN_NULL) { err += "prefix token is missing. "; }
+    if (llama_vocab_fim_suf(vocab) == LLAMA_TOKEN_NULL) { err += "suffix token is missing. "; }
+    if (llama_vocab_fim_mid(vocab) == LLAMA_TOKEN_NULL) { err += "middle token is missing. "; }
+    if (!err.empty()) {
+        env->ThrowNew(error_class, ("Infill is not supported by this model: " + err).c_str());
+        return false;
+    }
+    return true;
+}

From 3817d7510ca22fc6cfdfcc440e9ea23c8b91d27f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:37:28 +0000
Subject: [PATCH 32/34] Finding T: extract require_json_field_impl for repeated
 field-presence checks

handleInfill had two identical 3-line guards:
  if (!data.contains("input_prefix")) { ThrowNew(...); return nullptr; }
  if (!data.contains("input_suffix")) { ThrowNew(...); return nullptr; }

Extracted to require_json_field_impl() in jni_helpers.hpp (json + error_class
passed explicitly), with a thin require_json_field wrapper in jllama.cpp placed
next to parse_json_params. Both call sites now reduce to single guard lines.

Three unit tests added in test_jni_helpers.cpp covering: present field (no throw),
missing field (throws with correct message), empty json object.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jllama.cpp           | 19 +++++++++-------
 src/main/cpp/jni_helpers.hpp      | 26 ++++++++++++++++++++++
 src/test/cpp/test_jni_helpers.cpp | 36 +++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 5adf96c3..04304abd 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -300,6 +300,15 @@ static json parse_json_params(JNIEnv *env, jstring jparams) {
     return json::parse(parse_jstring(env, jparams));
 }
 
+/**
+ * Convenience wrapper around require_json_field_impl (jni_helpers.hpp).
+ * Returns false and throws if `field` is absent from `data`.
+ */
+[[nodiscard]] static bool require_json_field(JNIEnv *env, const json &data,
+                                              const char *field) {
+    return require_json_field_impl(env, data, field, c_llama_error);
+}
+
 /**
  * Validates `jfilename`, builds a SAVE or RESTORE slot task, dispatches it,
  * and returns the result as a jstring.  Shared by the SAVE (case 1) and
@@ -1168,14 +1177,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
 
     json data = parse_json_params(env, jparams);
 
-    if (!data.contains("input_prefix")) {
-        env->ThrowNew(c_llama_error, "\"input_prefix\" is required");
-        return nullptr;
-    }
-    if (!data.contains("input_suffix")) {
-        env->ThrowNew(c_llama_error, "\"input_suffix\" is required");
-        return nullptr;
-    }
+    if (!require_json_field(env, data, "input_prefix")) return nullptr;
+    if (!require_json_field(env, data, "input_suffix")) return nullptr;
 
     json input_extra = json_value(data, "input_extra", json::array());
     data["input_extra"] = input_extra;
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
index 7c4c491c..87c634dc 100644
--- a/src/main/cpp/jni_helpers.hpp
+++ b/src/main/cpp/jni_helpers.hpp
@@ -9,8 +9,10 @@
 // the function is self-contained and unit-testable with mock JNI environments.
 
 #include "jni.h"
+#include "nlohmann/json.hpp"
 
 #include <atomic>
+#include <string>
 #include <thread>
 #include <unordered_set>
 
@@ -108,6 +110,30 @@ struct jllama_context {
     return *task_ids.begin();
 }
 
+// ---------------------------------------------------------------------------
+// require_json_field_impl
+//
+// Checks that `data` contains the given key.  Returns true if present.
+// On missing key: throws "<field> is required" via JNI and returns false.
+//
+// Extracted from the repeated pattern in handleInfill:
+//   if (!data.contains("input_prefix")) { ThrowNew(...); return nullptr; }
+//   if (!data.contains("input_suffix")) { ThrowNew(...); return nullptr; }
+//
+// Parameters are explicit so the function can be unit-tested without a real JVM.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline bool require_json_field_impl(JNIEnv            *env,
+                                                   const nlohmann::json &data,
+                                                   const char        *field,
+                                                   jclass             error_class) {
+    if (data.contains(field)) {
+        return true;
+    }
+    const std::string msg = std::string("\"") + field + "\" is required";
+    env->ThrowNew(error_class, msg.c_str());
+    return false;
+}
+
 // ---------------------------------------------------------------------------
 // jint_array_to_tokens_impl
 //
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 3dc0ec0c..eed31666 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -337,3 +337,39 @@ TEST_F(ArrayFixture, JintArrayToTokens_ReleasesWithAbortFlag) {
     EXPECT_EQ(g_release_mode, JNI_ABORT)
         << "must use JNI_ABORT (no writeback) for read-only array access";
 }
+
+// ============================================================
+// Tests for require_json_field_impl()
+//
+// Uses the ThrowNew stub from MockJniFixture to verify that the
+// function throws (or does not throw) correctly.
+// ============================================================
+
+TEST_F(MockJniFixture, RequireJsonField_PresentField_ReturnsTrueNoThrow) {
+    nlohmann::json data = {{"input_prefix", "hello"}, {"other", 1}};
+
+    bool ok = require_json_field_impl(env, data, "input_prefix", dummy_class);
+
+    EXPECT_TRUE(ok);
+    EXPECT_FALSE(g_throw_called);
+}
+
+TEST_F(MockJniFixture, RequireJsonField_MissingField_ReturnsFalseAndThrows) {
+    nlohmann::json data = {{"other", 1}};
+
+    bool ok = require_json_field_impl(env, data, "input_prefix", dummy_class);
+
+    EXPECT_FALSE(ok);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "\"input_prefix\" is required");
+}
+
+TEST_F(MockJniFixture, RequireJsonField_EmptyJson_ReturnsFalseAndThrows) {
+    nlohmann::json data = nlohmann::json::object();
+
+    bool ok = require_json_field_impl(env, data, "input_suffix", dummy_class);
+
+    EXPECT_FALSE(ok);
+    EXPECT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "\"input_suffix\" is required");
+}

From 16ef2332b4d6bc6a1435aa68ac80eb5646c20904 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:46:43 +0000
Subject: [PATCH 33/34] Fix ordering in jni_server_helpers.hpp: define helpers
 before their callers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

get_result_error_message and json_to_jstring_impl were defined after
recv_slot_task_result_impl and results_to_jstring_impl which call them,
causing 'use of undeclared identifier' errors on Clang (macOS CI).

Reordered to the correct dependency sequence:
  1. get_result_error_message   (leaf — no deps)
  2. json_to_jstring_impl       (leaf — no deps)
  3. build_completion_tasks_impl
  4. recv_slot_task_result_impl  (uses 1 + 2)
  5. collect_task_results_impl   (uses 1)
  6. results_to_json_impl
  7. results_to_jstring_impl     (uses 2 + 6)
  8. check_infill_support_impl

Added a declaration-order comment block at the top of the file to prevent
future ordering regressions.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 src/main/cpp/jni_server_helpers.hpp | 83 ++++++++++++++++-------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/src/main/cpp/jni_server_helpers.hpp b/src/main/cpp/jni_server_helpers.hpp
index 0b95970c..dd404a55 100644
--- a/src/main/cpp/jni_server_helpers.hpp
+++ b/src/main/cpp/jni_server_helpers.hpp
@@ -19,12 +19,57 @@
 //   server.hpp must be included by the including translation unit BEFORE this
 //   header.  server.hpp has no include guard, so including it here would cause
 //   redefinition errors in any TU that already includes server.hpp directly.
+//
+// Declaration order (each function must be defined before its first caller):
+//   1. get_result_error_message    — used by recv_slot_task_result_impl,
+//                                    collect_task_results_impl
+//   2. json_to_jstring_impl        — used by recv_slot_task_result_impl,
+//                                    results_to_jstring_impl
+//   3. build_completion_tasks_impl — no dependencies on helpers above
+//   4. recv_slot_task_result_impl  — uses 1 + 2
+//   5. collect_task_results_impl   — uses 1
+//   6. results_to_json_impl        — no dependencies on helpers above
+//   7. results_to_jstring_impl     — uses 2 + 6
+//   8. check_infill_support_impl   — no dependencies on helpers above
 
 #include "jni.h"
 
 #include <unordered_set>
 #include <vector>
 
+// ---------------------------------------------------------------------------
+// get_result_error_message
+//
+// Extracts the human-readable error string from a failed task result.
+// Equivalent to result->to_json()["message"].get<std::string>(), which
+// appears verbatim in five places:
+//
+//   receiveCompletionJson, embed, handleRerank   (in jllama.cpp)
+//   collect_task_results_impl, recv_slot_task_result_impl  (in this header)
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline std::string get_result_error_message(
+        const server_task_result_ptr &result) {
+    return result->to_json()["message"].get<std::string>();
+}
+
+// ---------------------------------------------------------------------------
+// json_to_jstring_impl
+//
+// Serialises any json value to a JNI string via dump() + NewStringUTF.
+// Extracted from the repeated two-line pattern:
+//
+//   std::string response_str = some_json.dump();
+//   return env->NewStringUTF(response_str.c_str());
+//
+// Used by recv_slot_task_result_impl, results_to_jstring_impl,
+// receiveCompletionJson, handleRerank, handleEmbeddings,
+// handleTokenize, and handleDetokenize.
+// ---------------------------------------------------------------------------
+[[nodiscard]] inline jstring json_to_jstring_impl(JNIEnv *env, const json &j) {
+    std::string s = j.dump();
+    return env->NewStringUTF(s.c_str());
+}
+
 // ---------------------------------------------------------------------------
 // build_completion_tasks_impl
 //
@@ -110,23 +155,6 @@
     return json_to_jstring_impl(env, result->to_json());
 }
 
-// ---------------------------------------------------------------------------
-// get_result_error_message
-//
-// Extracts the human-readable error string from a failed task result.
-// Equivalent to result->to_json()["message"].get<std::string>(), which
-// appears verbatim in five places:
-//
-//   receiveCompletionJson, embed, handleRerank   (in jllama.cpp)
-//   collect_task_results_impl, recv_slot_task_result_impl  (in this header)
-//
-// Placed here (before the two _impl users) so both can call it.
-// ---------------------------------------------------------------------------
-[[nodiscard]] inline std::string get_result_error_message(
-        const server_task_result_ptr &result) {
-    return result->to_json()["message"].get<std::string>();
-}
-
 // ---------------------------------------------------------------------------
 // collect_task_results_impl
 //
@@ -197,23 +225,6 @@
     return json_to_jstring_impl(env, results_to_json_impl(results));
 }
 
-// ---------------------------------------------------------------------------
-// json_to_jstring_impl
-//
-// Serialises any json value to a JNI string via dump() + NewStringUTF.
-// Extracted from the repeated two-line pattern:
-//
-//   std::string response_str = some_json.dump();
-//   return env->NewStringUTF(response_str.c_str());
-//
-// Used by receiveCompletionJson, handleRerank, handleEmbeddings,
-// handleTokenize, and handleDetokenize.
-// ---------------------------------------------------------------------------
-[[nodiscard]] inline jstring json_to_jstring_impl(JNIEnv *env, const json &j) {
-    std::string s = j.dump();
-    return env->NewStringUTF(s.c_str());
-}
-
 // ---------------------------------------------------------------------------
 // check_infill_support_impl
 //
@@ -225,9 +236,9 @@
 // Extracted from the 10-line compatibility block in handleInfill so it can
 // be unit-tested independently of the JNI dispatch layer.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline bool check_infill_support_impl(JNIEnv          *env,
+[[nodiscard]] inline bool check_infill_support_impl(JNIEnv            *env,
                                                      const llama_vocab *vocab,
-                                                     jclass            error_class) {
+                                                     jclass             error_class) {
     std::string err;
     if (llama_vocab_fim_pre(vocab) == LLAMA_TOKEN_NULL) { err += "prefix token is missing. "; }
     if (llama_vocab_fim_suf(vocab) == LLAMA_TOKEN_NULL) { err += "suffix token is missing. "; }

From e3dcf6b7795a53276efb750d2e8e7de2cb22de42 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 18:50:30 +0000
Subject: [PATCH 34/34] Add /find-cpp-duplication Claude skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Saves the sharpened duplication-analysis query as a reusable slash command.
Covers 8 categories: short repeated expressions, multi-line copy-paste,
pipeline compositions, near-identical switch arms, mixed JNI/logic concerns,
inconsistent helper usage, local cleanup sequences, and dead code.
Reports only — no file modifications.

https://claude.ai/code/session_01FuMTQYYhDX9bPB8WPNwGda
---
 .claude/commands/find-cpp-duplication.md | 61 ++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 .claude/commands/find-cpp-duplication.md

diff --git a/.claude/commands/find-cpp-duplication.md b/.claude/commands/find-cpp-duplication.md
new file mode 100644
index 00000000..eaf23276
--- /dev/null
+++ b/.claude/commands/find-cpp-duplication.md
@@ -0,0 +1,61 @@
+---
+name: find-cpp-duplication
+description: Scan C++ source files in src/main/cpp/ for all categories of duplication and boilerplate — short repeated expressions, copy-pasted blocks, pipeline compositions, near-identical switch arms, mixed JNI/logic concerns, inconsistent helper usage, local cleanup sequences, and dead code. Reports findings only, no modifications.
+---
+
+Review the C++ source files in src/main/cpp/ (jllama.cpp, jni_helpers.hpp,
+jni_server_helpers.hpp, server.hpp, utils.hpp) and identify all duplication
+opportunities. Cast a wider net than start/end boilerplate — include patterns
+anywhere inside a function body.
+
+Look for ALL of the following categories:
+
+1. SHORT REPEATED EXPRESSIONS (1–2 lines, 3+ sites)
+   Any single expression or two-line sequence that appears verbatim in three or
+   more places, even if surrounded by different context.
+   Example: result->to_json()["message"].get<std::string>()
+
+2. REPEATED MULTI-LINE BLOCKS (3+ lines, 2+ sites)
+   Any block of 3 or more consecutive lines that is copy-pasted with at most
+   minor variation (different variable names or one differing string literal).
+   Example: four-line jintArray → vector<llama_token> read pattern.
+
+3. PIPELINE COMPOSITIONS
+   Any sequence of 2+ function calls that always appear chained together in the
+   same order at every call site. The chain itself is a candidate for wrapping.
+   Example: build_completion_tasks → dispatch_tasks → collect_and_serialize
+
+4. NEAR-IDENTICAL SWITCH CASES OR IF-ELSE ARMS
+   Two or more switch cases / if-else branches whose bodies differ only by one
+   variable, constant, or string literal.
+
+5. MIXED CONCERNS (JNI + LOGIC)
+   Functions where pure computation (no JNI calls) is interleaved with JNI
+   serialisation. The pure part is a candidate for extraction to a separately
+   testable _impl function.
+   Example: single-vs-array JSON construction inside results_to_jstring.
+
+6. INCONSISTENT HELPER USAGE
+   Places where an already-extracted helper exists but is not used — either
+   because the helper was added after the call site was written, or the call
+   site is inside a header that the helper lives in.
+   Example: a header function still using dump()+NewStringUTF after
+   json_to_jstring_impl was extracted.
+
+7. LOCAL CLEANUP SEQUENCES
+   Repeated tear-down sequences inside a single function (delete X; delete Y;
+   free(); ThrowNew()) that differ only in the error message — candidate for a
+   local lambda.
+
+8. DEAD CODE
+   Commented-out blocks that duplicate active code immediately above or below.
+
+For each finding report:
+  - Category (from the list above)
+  - Exact file names and line numbers of every occurrence
+  - The minimal signature of the helper that would eliminate the duplication
+  - Whether the extraction is unit-testable without a real JVM or llama model
+    (i.e., can all llama.h / server.hpp dependencies be passed as parameters)
+  - Estimated line savings across all call sites
+
+Do not modify any files. Report only.