From 59cd94120869faaef1357211293fcb6fcd763ce1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 07:52:44 +0000
Subject: [PATCH 1/4] refactor: extract get_server_context helper to eliminate
 JNI boilerplate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move jllama_context struct to a new jni_helpers.hpp header and introduce
get_server_context_impl() — a dependency-injectable helper that validates
the Java-side model handle and returns the native server_context pointer.
A thin get_server_context() wrapper in jllama.cpp binds the module-level
globals so all call sites become a clean two-liner.

Replaces 19 repetitive 2-5 line blocks (reinterpret_cast + optional null
check + ThrowNew) with the helper across all JNIEXPORT functions.
As a side-effect, nine functions that previously had no null-check guard
now throw "Model is not loaded" instead of crashing on a stale handle.

Add src/test/cpp/test_jni_helpers.cpp: four unit tests exercising
get_server_context_impl() via a mock JNINativeInterface_ table — no JVM
required.  Wire the new test file and the JNI include path into the
BUILD_TESTING CMake target.

https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR
---
 CMakeLists.txt                    |   3 +
 src/main/cpp/jllama.cpp           | 138 ++++++++++------------------
 src/main/cpp/jni_helpers.hpp      |  58 ++++++++++++
 src/test/cpp/test_jni_helpers.cpp | 148 ++++++++++++++++++++++++++++++
 4 files changed, 257 insertions(+), 90 deletions(-)
 create mode 100644 src/main/cpp/jni_helpers.hpp
 create mode 100644 src/test/cpp/test_jni_helpers.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3bbc4f6a..03279091 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -206,12 +206,15 @@ if(BUILD_TESTING)
     add_executable(jllama_test
         src/test/cpp/test_utils.cpp
         src/test/cpp/test_server.cpp
+        src/test/cpp/test_jni_helpers.cpp
     )
 
     target_include_directories(jllama_test PRIVATE
         src/main/cpp
         # mtmd.h is not always propagated transitively — add it explicitly
         ${llama.cpp_SOURCE_DIR}/tools/mtmd
+        # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required)
+        ${JNI_INCLUDE_DIRS}
     )
     target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main)
     target_compile_features(jllama_test PRIVATE cxx_std_17)
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index efc954be..84611c09 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -2,6 +2,7 @@
 
 #include "arg.h"
 #include "json-schema-to-grammar.h"
+#include "jni_helpers.hpp"
 #include "llama.h"
 #include "log.h"
 #include "nlohmann/json.hpp"
@@ -32,20 +33,17 @@ static constexpr int N_PARALLEL_AUTO = -1;
 // appropriate default and preserves pre-b7433 behaviour.
 static constexpr int N_PARALLEL_DEFAULT = 1;
 
+// jllama_context is defined in jni_helpers.hpp.
+
 /**
- * Wrapper that owns a server_context and the background worker thread.
- * Stored as the Java-side `ctx` (jlong) pointer. Using a wrapper allows
- * us to join the thread on close() instead of detaching it, which
- * eliminates the race between thread teardown and JVM shutdown.
+ * Convenience wrapper: extracts and validates the server_context from the
+ * Java-side model object using the module-level field-ID and error-class
+ * globals.  Returns nullptr (with a JNI exception pending) when the model
+ * is not loaded.
  */
-struct jllama_context {
-    server_context *server = nullptr;
-    std::thread worker;
-    bool vocab_only = false;
-    // Signals that the worker thread has entered start_loop() and is ready.
-    // Without this, terminate() can race with start_loop() setting running=true.
-    std::atomic<bool> worker_ready{false};
-};
+static server_context *get_server_context(JNIEnv *env, jobject obj) {
+    return get_server_context_impl(env, obj, f_model_pointer, c_llama_error);
+}
 
 JavaVM *g_vm = nullptr;
 
@@ -539,8 +537,8 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
 }
 
 JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv *env, jobject obj, jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return 0;
 
     std::string c_params = parse_jstring(env, jparams);
     json data = json::parse(c_params);
@@ -597,15 +595,15 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv
 }
 
 JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_releaseTask(JNIEnv *env, jobject obj, jint id_task) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return;
     ctx_server->queue_results.remove_waiting_task_id(id_task);
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson(JNIEnv *env, jobject obj,
                                                                                jint id_task) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     server_task_result_ptr result = ctx_server->queue_results.recv(id_task);
 
@@ -628,8 +626,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson(
 }
 
 JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env, jobject obj, jstring jprompt) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     if (!ctx_server->params_base.embedding) {
         env->ThrowNew(c_llama_error,
@@ -715,8 +713,8 @@ JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env,
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *env, jobject obj, jstring jprompt,
                                                                       jobjectArray documents) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     if (!ctx_server->params_base.embedding || ctx_server->params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) {
         env->ThrowNew(c_llama_error,
@@ -779,8 +777,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *e
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *env, jobject obj, jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    const auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     std::string c_params = parse_jstring(env, jparams);
     json data = json::parse(c_params);
@@ -796,12 +794,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(JNIEnv *env, jobject obj,
                                                                                 jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     std::string c_params = parse_jstring(env, jparams);
     json body = json::parse(c_params);
@@ -887,12 +881,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(
 
 JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNIEnv *env, jobject obj,
                                                                              jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return 0;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return 0;
 
     std::string c_params = parse_jstring(env, jparams);
     json body = json::parse(c_params);
@@ -953,8 +943,8 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI
 }
 
 JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env, jobject obj, jstring jprompt) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     const std::string c_prompt = parse_jstring(env, jprompt);
 
@@ -974,8 +964,8 @@ JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env,
 
 JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv *env, jobject obj,
                                                                          jintArray java_tokens) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     jsize length = env->GetArrayLength(java_tokens);
     jint *elements = env->GetIntArrayElements(java_tokens, nullptr);
@@ -1025,8 +1015,8 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_delete(JNIEnv *env, jobje
 }
 
 JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_cancelCompletion(JNIEnv *env, jobject obj, jint id_task) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return;
     std::unordered_set<int> id_tasks = {id_task};
     ctx_server->cancel_tasks(id_tasks);
     ctx_server->queue_results.remove_waiting_task_id(id_task);
@@ -1067,12 +1057,8 @@ JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_jsonSchemaToGrammar
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIEnv *env, jobject obj,
                                                                             jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     std::string c_params = parse_jstring(env, jparams);
     json data = json::parse(c_params);
@@ -1146,12 +1132,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(JNIEnv *env, jobject obj,
                                                                                jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     std::string c_params = parse_jstring(env, jparams);
     json body = json::parse(c_params);
@@ -1226,12 +1208,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J
 }
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *env, jobject obj, jstring jparams) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     // Check model compatibility for infill
     std::string err;
@@ -1341,12 +1319,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEnv *env, jobject obj,
                                                                            jstring jparams, jboolean joaiCompat) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     if (!ctx_server->params_base.embedding) {
         env->ThrowNew(c_llama_error,
@@ -1442,12 +1416,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEn
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv *env, jobject obj, jstring jcontent,
                                                                          jboolean jaddSpecial,
                                                                          jboolean jwithPieces) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     const std::string content = parse_jstring(env, jcontent);
     const bool add_special = jaddSpecial;
@@ -1485,12 +1455,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEnv *env, jobject obj,
                                                                            jintArray jtokens) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     jsize length = env->GetArrayLength(jtokens);
     jint *elements = env->GetIntArrayElements(jtokens, nullptr);
@@ -1512,12 +1478,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEn
 
 JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEnv *env, jobject obj, jint action,
                                                                            jint slotId, jstring jfilename) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return nullptr;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return nullptr;
 
     switch (action) {
     case 0: { // LIST — get slot info via metrics
@@ -1626,12 +1588,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
 
 JNIEXPORT jboolean JNICALL Java_de_kherud_llama_LlamaModel_configureParallelInference(JNIEnv *env, jobject obj,
                                                                                       jstring jconfig) {
-    jlong server_handle = env->GetLongField(obj, f_model_pointer);
-    if (server_handle == 0) {
-        env->ThrowNew(c_llama_error, "Model is not loaded");
-        return JNI_FALSE;
-    }
-    auto *ctx_server = reinterpret_cast<jllama_context *>(server_handle)->server; // NOLINT(*-no-int-to-ptr)
+    auto *ctx_server = get_server_context(env, obj);
+    if (!ctx_server) return JNI_FALSE;
 
     std::string config_str = parse_jstring(env, jconfig);
     json config = json::parse(config_str);
diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp
new file mode 100644
index 00000000..98a11db3
--- /dev/null
+++ b/src/main/cpp/jni_helpers.hpp
@@ -0,0 +1,58 @@
+#pragma once
+
+// jni_helpers.hpp — JNI utility helpers for jllama.cpp
+//
+// Extracted from jllama.cpp so that the core logic can be tested without a
+// running JVM.  The single public entry point is get_server_context_impl(),
+// which validates the Java-side model handle and returns the native
+// server_context pointer.  All module-level globals are passed explicitly so
+// the function is self-contained and unit-testable with mock JNI environments.
+
+#include "jni.h"
+
+#include <atomic>
+#include <thread>
+
+// Forward declaration — callers that need the full definition must include
+// server.hpp themselves.
+struct server_context;
+
+// ---------------------------------------------------------------------------
+// jllama_context
+//
+// Owns a server_context and the background worker thread.  Stored as the
+// Java-side `ctx` (jlong) pointer.  Using a wrapper allows us to join the
+// thread on close() instead of detaching it, which eliminates the race
+// between thread teardown and JVM shutdown.
+// ---------------------------------------------------------------------------
+struct jllama_context {
+    server_context *server       = nullptr;
+    std::thread     worker;
+    bool            vocab_only   = false;
+    // Signals that the worker thread has entered start_loop() and is ready.
+    // Without this, terminate() can race with start_loop() setting running=true.
+    std::atomic<bool> worker_ready{false};
+};
+
+// ---------------------------------------------------------------------------
+// get_server_context_impl
+//
+// Reads the native handle stored in the Java LlamaModel object, validates it,
+// and returns the embedded server_context pointer.
+//
+// On success: returns a non-null server_context*.
+// On failure: throws "Model is not loaded" via JNI and returns nullptr.
+//
+// Parameters are passed explicitly (no module-level globals) so the function
+// can be exercised from unit tests using a mock JNIEnv.
+// ---------------------------------------------------------------------------
+inline server_context *get_server_context_impl(JNIEnv *env, jobject obj,
+                                               jfieldID field_id,
+                                               jclass   error_class) {
+    const jlong handle = env->GetLongField(obj, field_id);
+    if (handle == 0) {
+        env->ThrowNew(error_class, "Model is not loaded");
+        return nullptr;
+    }
+    return reinterpret_cast<jllama_context *>(handle)->server; // NOLINT(*-no-int-to-ptr)
+}
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
new file mode 100644
index 00000000..1f12f3c7
--- /dev/null
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -0,0 +1,148 @@
+// Tests for get_server_context_impl() in jni_helpers.hpp.
+//
+// The function relies on JNIEnv, which is normally only available when a JVM
+// is running.  To keep this test self-contained (no JVM), we exploit the fact
+// that JNIEnv_ in C++ mode is a thin class whose every method dispatches
+// through a JNINativeInterface_ function-pointer table.  We zero-initialise
+// that table, then patch the two slots we actually call (GetLongField and
+// ThrowNew) with small lambda-backed stubs.
+//
+// Covered scenarios:
+//   - handle == 0  →  ThrowNew("Model is not loaded") called, nullptr returned
+//   - handle != 0  →  no throw, correct server_context* returned
+//   - ThrowNew is NOT called when the handle is valid
+
+#include <gtest/gtest.h>
+
+#include <cstring>
+
+// jni_helpers.hpp is the unit under test; it includes jni.h which defines
+// JNIEnv_ and JNINativeInterface_.
+#include "jni_helpers.hpp"
+
+// ============================================================
+// Minimal mock JNI environment
+// ============================================================
+
+namespace {
+
+// Mutable globals written by the stub functions, read by the tests.
+static jlong        g_mock_handle       = 0;
+static bool         g_throw_called      = false;
+static std::string  g_throw_message;
+
+// Stub that satisfies the JNINativeInterface_::GetLongField signature.
+static jlong JNICALL stub_GetLongField(JNIEnv * /*env*/, jobject /*obj*/,
+                                       jfieldID /*id*/) {
+    return g_mock_handle;
+}
+
+// Stub that satisfies the JNINativeInterface_::ThrowNew signature.
+static jint JNICALL stub_ThrowNew(JNIEnv * /*env*/, jclass /*clazz*/,
+                                  const char *msg) {
+    g_throw_called  = true;
+    g_throw_message = msg ? msg : "";
+    return 0;
+}
+
+// Build a JNIEnv that routes GetLongField and ThrowNew through our stubs.
+// All other slots remain null; any unexpected call will crash, acting as an
+// assertion that we only touch the two operations we intend to.
+JNIEnv *make_mock_env(JNINativeInterface_ &table, JNIEnv_ &env_obj) {
+    std::memset(&table, 0, sizeof(table));
+    table.GetLongField = stub_GetLongField;
+    table.ThrowNew     = stub_ThrowNew;
+    env_obj.functions  = &table;
+    return &env_obj;
+}
+
+// Convenience: reset all mock state before each test.
+struct MockJniFixture : ::testing::Test {
+    JNINativeInterface_ table{};
+    JNIEnv_             env_obj{};
+    JNIEnv             *env = nullptr;
+
+    // Dummy field/class handles — their values are never dereferenced by the
+    // stubs, so any non-null sentinel is fine.
+    jfieldID dummy_field = reinterpret_cast<jfieldID>(0x1);
+    jclass   dummy_class = reinterpret_cast<jclass>(0x2);
+
+    void SetUp() override {
+        env             = make_mock_env(table, env_obj);
+        g_mock_handle   = 0;
+        g_throw_called  = false;
+        g_throw_message.clear();
+    }
+};
+
+} // namespace
+
+// ============================================================
+// Test: null handle → ThrowNew + nullptr
+// ============================================================
+
+TEST_F(MockJniFixture, NullHandle_ThrowsAndReturnsNullptr) {
+    g_mock_handle = 0; // model not loaded
+
+    server_context *result =
+        get_server_context_impl(env, /*obj=*/nullptr, dummy_field, dummy_class);
+
+    EXPECT_EQ(result, nullptr)
+        << "Expected nullptr when the model handle is 0";
+    EXPECT_TRUE(g_throw_called)
+        << "Expected ThrowNew to be called for a null handle";
+    EXPECT_EQ(g_throw_message, "Model is not loaded");
+}
+
+// ============================================================
+// Test: valid handle → correct server_context* returned, no throw
+// ============================================================
+
+TEST_F(MockJniFixture, ValidHandle_ReturnsServerContextAndDoesNotThrow) {
+    // Build a minimal jllama_context on the stack.  We only need server to be
+    // set; worker and worker_ready are never touched by the helper.
+    // server_context is forward-declared in jni_helpers.hpp, so we can legally
+    // hold a pointer to it without a full definition.
+    server_context *sentinel = reinterpret_cast<server_context *>(0xDEADBEEF);
+    jllama_context  fake_ctx;
+    fake_ctx.server = sentinel;
+
+    g_mock_handle = reinterpret_cast<jlong>(&fake_ctx);
+
+    server_context *result =
+        get_server_context_impl(env, /*obj=*/nullptr, dummy_field, dummy_class);
+
+    EXPECT_EQ(result, sentinel)
+        << "Expected the server pointer embedded in jllama_context";
+    EXPECT_FALSE(g_throw_called)
+        << "ThrowNew must not be called for a valid handle";
+}
+
+// ============================================================
+// Test: ThrowNew message is exactly "Model is not loaded"
+//       (guards against future typo regressions)
+// ============================================================
+
+TEST_F(MockJniFixture, NullHandle_ErrorMessageIsExact) {
+    g_mock_handle = 0;
+
+    get_server_context_impl(env, nullptr, dummy_field, dummy_class);
+
+    ASSERT_TRUE(g_throw_called);
+    EXPECT_EQ(g_throw_message, "Model is not loaded");
+}
+
+// ============================================================
+// Test: ThrowNew is NOT called when handle is valid
+// ============================================================
+
+TEST_F(MockJniFixture, ValidHandle_NeverCallsThrowNew) {
+    server_context *sentinel = reinterpret_cast<server_context *>(0xCAFEBABE);
+    jllama_context  fake_ctx;
+    fake_ctx.server = sentinel;
+    g_mock_handle = reinterpret_cast<jlong>(&fake_ctx);
+
+    get_server_context_impl(env, nullptr, dummy_field, dummy_class);
+
+    EXPECT_FALSE(g_throw_called);
+}

From f1aa35fc93c116e290798390f490dc15ff122b9f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 08:40:29 +0000
Subject: [PATCH 2/4] fix: move get_server_context wrapper after global
 variable declarations

The wrapper referenced f_model_pointer and c_llama_error which are
declared later in the anonymous namespace, causing a build error.
Moved the wrapper to after all globals, just before parse_jstring().

https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR
---
 src/main/cpp/jllama.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 84611c09..0d9f6bd9 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -35,16 +35,6 @@ static constexpr int N_PARALLEL_DEFAULT = 1;
 
 // jllama_context is defined in jni_helpers.hpp.
 
-/**
- * Convenience wrapper: extracts and validates the server_context from the
- * Java-side model object using the module-level field-ID and error-class
- * globals.  Returns nullptr (with a JNI exception pending) when the model
- * is not loaded.
- */
-static server_context *get_server_context(JNIEnv *env, jobject obj) {
-    return get_server_context_impl(env, obj, f_model_pointer, c_llama_error);
-}
-
 JavaVM *g_vm = nullptr;
 
 // classes
@@ -102,6 +92,16 @@ jobject o_log_format_json = nullptr;
 jobject o_log_format_text = nullptr;
 jobject o_log_callback = nullptr;
 
+/**
+ * Convenience wrapper: extracts and validates the server_context from the
+ * Java-side model object using the module-level field-ID and error-class
+ * globals.  Returns nullptr (with a JNI exception pending) when the model
+ * is not loaded.
+ */
+static server_context *get_server_context(JNIEnv *env, jobject obj) {
+    return get_server_context_impl(env, obj, f_model_pointer, c_llama_error);
+}
+
 /**
  * Convert a Java string to a std::string
  */

From 4da41b16e37bb84fbc50960861396f4e1d575567 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 12:24:20 +0000
Subject: [PATCH 3/4] fix: disable AVX-512 in all builds to prevent SIGILL on
 unsupported CPUs

AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum
8370C).  The AMD EPYC 7763 used by GitHub Actions runners does not
support it.  When a libjllama.so compiled with AVX-512 instructions is
loaded on such a machine the JVM crashes immediately with SIGILL.

Set GGML_AVX512=OFF unconditionally so the distributed library runs on
any x86-64 CPU with at least AVX2 (available since ~2013).

https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR
---
 CMakeLists.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 03279091..186669b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,18 @@ set(LLAMA_CURL ON)
 if(WIN32)
     set(LLAMA_BUILD_BORINGSSL ON CACHE BOOL "" FORCE)
 endif()
+
+# Disable AVX-512 for all builds.
+#
+# AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum 8370C).
+# Many common CI and production machines do NOT support it, including the
+# AMD EPYC 7763 used by GitHub Actions runners.  If a library compiled with
+# AVX-512 instructions is loaded on a CPU without AVX-512 support the JVM
+# crashes immediately with SIGILL (illegal instruction).
+#
+# Keeping AVX-512 off produces a binary that runs on any x86-64 machine with
+# at least AVX2, which covers the vast majority of hardware since ~2013.
+set(GGML_AVX512 OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git

From 0832ac3f2fc8a1f6e55df2cff95a960bcc9e4488 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 5 Apr 2026 12:33:55 +0000
Subject: [PATCH 4/4] docs: expand AVX-512 comment with known compatibility
 problems

Document four concrete reasons why AVX-512 is disabled for all builds:
1. CPUs without AVX-512 support crash with SIGILL (AMD EPYC 7763 vs
   Intel Xeon Platinum 8370C is the exact CI scenario that triggered this)
2. Intel disabled AVX-512 on all Alder Lake / Raptor Lake desktop CPUs
   due to the P-core / E-core scheduler incompatibility
3. Clock throttling on Skylake-X server chips and AMD Ryzen 9000 (Zen 5)
   makes AVX-512 actively harmful for throughput-sensitive workloads
4. High power draw can destabilize systems near voltage limits

https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR
---
 CMakeLists.txt | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 186669b8..d5d32f02 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,14 +55,37 @@ endif()
 
 # Disable AVX-512 for all builds.
 #
-# AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum 8370C).
-# Many common CI and production machines do NOT support it, including the
-# AMD EPYC 7763 used by GitHub Actions runners.  If a library compiled with
-# AVX-512 instructions is loaded on a CPU without AVX-512 support the JVM
-# crashes immediately with SIGILL (illegal instruction).
+# AVX-512 causes problems in several well-known scenarios, so we turn it off
+# unconditionally to keep the distributed library safe and broadly compatible.
 #
-# Keeping AVX-512 off produces a binary that runs on any x86-64 machine with
-# at least AVX2, which covers the vast majority of hardware since ~2013.
+# 1. Many CPUs simply do not support AVX-512 at all.  Loading a binary that
+#    contains AVX-512 instructions on such a machine crashes the JVM immediately
+#    with SIGILL (illegal instruction).  A real example from CI: the AMD EPYC
+#    7763 used by GitHub Actions runners has no AVX-512, while the Intel Xeon
+#    Platinum 8370C used in the manylinux build container does.  A library built
+#    on the Xeon would crash on the EPYC.
+#
+# 2. Intel disabled AVX-512 on 12th-gen (Alder Lake) and later desktop CPUs.
+#    Alder Lake mixes big P-cores (which have AVX-512 hardware) with small
+#    E-cores (which do not).  Because the OS scheduler can move a thread between
+#    core types at any time, running AVX-512 instructions is unsafe.  Intel
+#    first locked it out via BIOS/microcode updates, then began physically fusing
+#    it off in silicon on new chips.  All current shipping Alder Lake, Raptor
+#    Lake, and newer Intel desktop CPUs have AVX-512 permanently disabled.
+#
+# 3. Even CPUs that do support AVX-512 often throttle their clock speed while
+#    executing it.  Older Intel server chips (Skylake-X era) can drop hundreds
+#    of MHz across all cores — even cores not running AVX-512.  AMD Ryzen 9000
+#    (Zen 5) drops roughly 10 % in frequency when AVX-512 is active and takes
+#    more than 100 ms to ramp back up after the workload ends.
+#
+# 4. AVX-512's high instantaneous power draw can destabilize systems that are
+#    running near their voltage limits, leading to random crashes.
+#
+# Keeping AVX-512 off produces a binary that runs correctly on any x86-64
+# machine with at least AVX2 support, which covers virtually all hardware
+# built since 2013.  FORCE prevents llama.cpp's own cmake defaults from
+# re-enabling it regardless of the build flags passed on the command line.
 set(GGML_AVX512 OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp