From 59cd94120869faaef1357211293fcb6fcd763ce1 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 5 Apr 2026 07:52:44 +0000 Subject: [PATCH 1/4] refactor: extract get_server_context helper to eliminate JNI boilerplate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move jllama_context struct to a new jni_helpers.hpp header and introduce get_server_context_impl() — a dependency-injectable helper that validates the Java-side model handle and returns the native server_context pointer. A thin get_server_context() wrapper in jllama.cpp binds the module-level globals so all call sites become a clean two-liner. Replaces 19 repetitive 2-5 line blocks (reinterpret_cast + optional null check + ThrowNew) with the helper across all JNIEXPORT functions. As a side-effect, nine functions that previously had no null-check guard now throw "Model is not loaded" instead of crashing on a stale handle. Add src/test/cpp/test_jni_helpers.cpp: four unit tests exercising get_server_context_impl() via a mock JNINativeInterface_ table — no JVM required. Wire the new test file and the JNI include path into the BUILD_TESTING CMake target. https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR --- CMakeLists.txt | 3 + src/main/cpp/jllama.cpp | 138 ++++++++++------------------ src/main/cpp/jni_helpers.hpp | 58 ++++++++++++ src/test/cpp/test_jni_helpers.cpp | 148 ++++++++++++++++++++++++++++++ 4 files changed, 257 insertions(+), 90 deletions(-) create mode 100644 src/main/cpp/jni_helpers.hpp create mode 100644 src/test/cpp/test_jni_helpers.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bbc4f6a..03279091 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,12 +206,15 @@ if(BUILD_TESTING) add_executable(jllama_test src/test/cpp/test_utils.cpp src/test/cpp/test_server.cpp + src/test/cpp/test_jni_helpers.cpp ) target_include_directories(jllama_test PRIVATE src/main/cpp # mtmd.h is not always propagated transitively — add it explicitly ${llama.cpp_SOURCE_DIR}/tools/mtmd + # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required) + ${JNI_INCLUDE_DIRS} ) target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main) target_compile_features(jllama_test PRIVATE cxx_std_17) diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp index efc954be..84611c09 100644 --- a/src/main/cpp/jllama.cpp +++ b/src/main/cpp/jllama.cpp @@ -2,6 +2,7 @@ #include "arg.h" #include "json-schema-to-grammar.h" +#include "jni_helpers.hpp" #include "llama.h" #include "log.h" #include "nlohmann/json.hpp" @@ -32,20 +33,17 @@ static constexpr int N_PARALLEL_AUTO = -1; // appropriate default and preserves pre-b7433 behaviour. static constexpr int N_PARALLEL_DEFAULT = 1; +// jllama_context is defined in jni_helpers.hpp. + /** - * Wrapper that owns a server_context and the background worker thread. - * Stored as the Java-side `ctx` (jlong) pointer. Using a wrapper allows - * us to join the thread on close() instead of detaching it, which - * eliminates the race between thread teardown and JVM shutdown. + * Convenience wrapper: extracts and validates the server_context from the + * Java-side model object using the module-level field-ID and error-class + * globals. Returns nullptr (with a JNI exception pending) when the model + * is not loaded. */ -struct jllama_context { - server_context *server = nullptr; - std::thread worker; - bool vocab_only = false; - // Signals that the worker thread has entered start_loop() and is ready. - // Without this, terminate() can race with start_loop() setting running=true. - std::atomic worker_ready{false}; -}; +static server_context *get_server_context(JNIEnv *env, jobject obj) { + return get_server_context_impl(env, obj, f_model_pointer, c_llama_error); +} JavaVM *g_vm = nullptr; @@ -539,8 +537,8 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo } JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return 0; std::string c_params = parse_jstring(env, jparams); json data = json::parse(c_params); @@ -597,15 +595,15 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv } JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_releaseTask(JNIEnv *env, jobject obj, jint id_task) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return; ctx_server->queue_results.remove_waiting_task_id(id_task); } JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson(JNIEnv *env, jobject obj, jint id_task) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; server_task_result_ptr result = ctx_server->queue_results.recv(id_task); @@ -628,8 +626,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_receiveCompletionJson( } JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env, jobject obj, jstring jprompt) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; if (!ctx_server->params_base.embedding) { env->ThrowNew(c_llama_error, @@ -715,8 +713,8 @@ JNIEXPORT jfloatArray JNICALL Java_de_kherud_llama_LlamaModel_embed(JNIEnv *env, JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *env, jobject obj, jstring jprompt, jobjectArray documents) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; if (!ctx_server->params_base.embedding || ctx_server->params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) { env->ThrowNew(c_llama_error, @@ -779,8 +777,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleRerank(JNIEnv *e } JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - const auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; std::string c_params = parse_jstring(env, jparams); json data = json::parse(c_params); @@ -796,12 +794,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_applyTemplate(JNIEnv * JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; std::string c_params = parse_jstring(env, jparams); json body = json::parse(c_params); @@ -887,12 +881,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleChatCompletions( JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return 0; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return 0; std::string c_params = parse_jstring(env, jparams); json body = json::parse(c_params); @@ -953,8 +943,8 @@ JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestChatCompletion(JNI } JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env, jobject obj, jstring jprompt) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; const std::string c_prompt = parse_jstring(env, jprompt); @@ -974,8 +964,8 @@ JNIEXPORT jintArray JNICALL Java_de_kherud_llama_LlamaModel_encode(JNIEnv *env, JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_decodeBytes(JNIEnv *env, jobject obj, jintArray java_tokens) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; jsize length = env->GetArrayLength(java_tokens); jint *elements = env->GetIntArrayElements(java_tokens, nullptr); @@ -1025,8 +1015,8 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_delete(JNIEnv *env, jobje } JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_cancelCompletion(JNIEnv *env, jobject obj, jint id_task) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return; std::unordered_set id_tasks = {id_task}; ctx_server->cancel_tasks(id_tasks); ctx_server->queue_results.remove_waiting_task_id(id_task); @@ -1067,12 +1057,8 @@ JNIEXPORT jbyteArray JNICALL Java_de_kherud_llama_LlamaModel_jsonSchemaToGrammar JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; std::string c_params = parse_jstring(env, jparams); json data = json::parse(c_params); @@ -1146,12 +1132,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletions(JNIE JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; std::string c_params = parse_jstring(env, jparams); json body = json::parse(c_params); @@ -1226,12 +1208,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleCompletionsOai(J } JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *env, jobject obj, jstring jparams) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; // Check model compatibility for infill std::string err; @@ -1341,12 +1319,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleInfill(JNIEnv *e JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEnv *env, jobject obj, jstring jparams, jboolean joaiCompat) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; if (!ctx_server->params_base.embedding) { env->ThrowNew(c_llama_error, @@ -1442,12 +1416,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleEmbeddings(JNIEn JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv *env, jobject obj, jstring jcontent, jboolean jaddSpecial, jboolean jwithPieces) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; const std::string content = parse_jstring(env, jcontent); const bool add_special = jaddSpecial; @@ -1485,12 +1455,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleTokenize(JNIEnv JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEnv *env, jobject obj, jintArray jtokens) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; jsize length = env->GetArrayLength(jtokens); jint *elements = env->GetIntArrayElements(jtokens, nullptr); @@ -1512,12 +1478,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleDetokenize(JNIEn JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEnv *env, jobject obj, jint action, jint slotId, jstring jfilename) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return nullptr; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return nullptr; switch (action) { case 0: { // LIST — get slot info via metrics @@ -1626,12 +1588,8 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn JNIEXPORT jboolean JNICALL Java_de_kherud_llama_LlamaModel_configureParallelInference(JNIEnv *env, jobject obj, jstring jconfig) { - jlong server_handle = env->GetLongField(obj, f_model_pointer); - if (server_handle == 0) { - env->ThrowNew(c_llama_error, "Model is not loaded"); - return JNI_FALSE; - } - auto *ctx_server = reinterpret_cast(server_handle)->server; // NOLINT(*-no-int-to-ptr) + auto *ctx_server = get_server_context(env, obj); + if (!ctx_server) return JNI_FALSE; std::string config_str = parse_jstring(env, jconfig); json config = json::parse(config_str); diff --git a/src/main/cpp/jni_helpers.hpp b/src/main/cpp/jni_helpers.hpp new file mode 100644 index 00000000..98a11db3 --- /dev/null +++ b/src/main/cpp/jni_helpers.hpp @@ -0,0 +1,58 @@ +#pragma once + +// jni_helpers.hpp — JNI utility helpers for jllama.cpp +// +// Extracted from jllama.cpp so that the core logic can be tested without a +// running JVM. The single public entry point is get_server_context_impl(), +// which validates the Java-side model handle and returns the native +// server_context pointer. All module-level globals are passed explicitly so +// the function is self-contained and unit-testable with mock JNI environments. + +#include "jni.h" + +#include +#include + +// Forward declaration — callers that need the full definition must include +// server.hpp themselves. +struct server_context; + +// --------------------------------------------------------------------------- +// jllama_context +// +// Owns a server_context and the background worker thread. Stored as the +// Java-side `ctx` (jlong) pointer. Using a wrapper allows us to join the +// thread on close() instead of detaching it, which eliminates the race +// between thread teardown and JVM shutdown. +// --------------------------------------------------------------------------- +struct jllama_context { + server_context *server = nullptr; + std::thread worker; + bool vocab_only = false; + // Signals that the worker thread has entered start_loop() and is ready. + // Without this, terminate() can race with start_loop() setting running=true. + std::atomic worker_ready{false}; +}; + +// --------------------------------------------------------------------------- +// get_server_context_impl +// +// Reads the native handle stored in the Java LlamaModel object, validates it, +// and returns the embedded server_context pointer. +// +// On success: returns a non-null server_context*. +// On failure: throws "Model is not loaded" via JNI and returns nullptr. +// +// Parameters are passed explicitly (no module-level globals) so the function +// can be exercised from unit tests using a mock JNIEnv. +// --------------------------------------------------------------------------- +inline server_context *get_server_context_impl(JNIEnv *env, jobject obj, + jfieldID field_id, + jclass error_class) { + const jlong handle = env->GetLongField(obj, field_id); + if (handle == 0) { + env->ThrowNew(error_class, "Model is not loaded"); + return nullptr; + } + return reinterpret_cast(handle)->server; // NOLINT(*-no-int-to-ptr) +} diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp new file mode 100644 index 00000000..1f12f3c7 --- /dev/null +++ b/src/test/cpp/test_jni_helpers.cpp @@ -0,0 +1,148 @@ +// Tests for get_server_context_impl() in jni_helpers.hpp. +// +// The function relies on JNIEnv, which is normally only available when a JVM +// is running. To keep this test self-contained (no JVM), we exploit the fact +// that JNIEnv_ in C++ mode is a thin class whose every method dispatches +// through a JNINativeInterface_ function-pointer table. We zero-initialise +// that table, then patch the two slots we actually call (GetLongField and +// ThrowNew) with small lambda-backed stubs. +// +// Covered scenarios: +// - handle == 0 → ThrowNew("Model is not loaded") called, nullptr returned +// - handle != 0 → no throw, correct server_context* returned +// - ThrowNew is NOT called when the handle is valid + +#include + +#include + +// jni_helpers.hpp is the unit under test; it includes jni.h which defines +// JNIEnv_ and JNINativeInterface_. +#include "jni_helpers.hpp" + +// ============================================================ +// Minimal mock JNI environment +// ============================================================ + +namespace { + +// Mutable globals written by the stub functions, read by the tests. +static jlong g_mock_handle = 0; +static bool g_throw_called = false; +static std::string g_throw_message; + +// Stub that satisfies the JNINativeInterface_::GetLongField signature. +static jlong JNICALL stub_GetLongField(JNIEnv * /*env*/, jobject /*obj*/, + jfieldID /*id*/) { + return g_mock_handle; +} + +// Stub that satisfies the JNINativeInterface_::ThrowNew signature. +static jint JNICALL stub_ThrowNew(JNIEnv * /*env*/, jclass /*clazz*/, + const char *msg) { + g_throw_called = true; + g_throw_message = msg ? msg : ""; + return 0; +} + +// Build a JNIEnv that routes GetLongField and ThrowNew through our stubs. +// All other slots remain null; any unexpected call will crash, acting as an +// assertion that we only touch the two operations we intend to. +JNIEnv *make_mock_env(JNINativeInterface_ &table, JNIEnv_ &env_obj) { + std::memset(&table, 0, sizeof(table)); + table.GetLongField = stub_GetLongField; + table.ThrowNew = stub_ThrowNew; + env_obj.functions = &table; + return &env_obj; +} + +// Convenience: reset all mock state before each test. +struct MockJniFixture : ::testing::Test { + JNINativeInterface_ table{}; + JNIEnv_ env_obj{}; + JNIEnv *env = nullptr; + + // Dummy field/class handles — their values are never dereferenced by the + // stubs, so any non-null sentinel is fine. + jfieldID dummy_field = reinterpret_cast(0x1); + jclass dummy_class = reinterpret_cast(0x2); + + void SetUp() override { + env = make_mock_env(table, env_obj); + g_mock_handle = 0; + g_throw_called = false; + g_throw_message.clear(); + } +}; + +} // namespace + +// ============================================================ +// Test: null handle → ThrowNew + nullptr +// ============================================================ + +TEST_F(MockJniFixture, NullHandle_ThrowsAndReturnsNullptr) { + g_mock_handle = 0; // model not loaded + + server_context *result = + get_server_context_impl(env, /*obj=*/nullptr, dummy_field, dummy_class); + + EXPECT_EQ(result, nullptr) + << "Expected nullptr when the model handle is 0"; + EXPECT_TRUE(g_throw_called) + << "Expected ThrowNew to be called for a null handle"; + EXPECT_EQ(g_throw_message, "Model is not loaded"); +} + +// ============================================================ +// Test: valid handle → correct server_context* returned, no throw +// ============================================================ + +TEST_F(MockJniFixture, ValidHandle_ReturnsServerContextAndDoesNotThrow) { + // Build a minimal jllama_context on the stack. We only need server to be + // set; worker and worker_ready are never touched by the helper. + // server_context is forward-declared in jni_helpers.hpp, so we can legally + // hold a pointer to it without a full definition. + server_context *sentinel = reinterpret_cast(0xDEADBEEF); + jllama_context fake_ctx; + fake_ctx.server = sentinel; + + g_mock_handle = reinterpret_cast(&fake_ctx); + + server_context *result = + get_server_context_impl(env, /*obj=*/nullptr, dummy_field, dummy_class); + + EXPECT_EQ(result, sentinel) + << "Expected the server pointer embedded in jllama_context"; + EXPECT_FALSE(g_throw_called) + << "ThrowNew must not be called for a valid handle"; +} + +// ============================================================ +// Test: ThrowNew message is exactly "Model is not loaded" +// (guards against future typo regressions) +// ============================================================ + +TEST_F(MockJniFixture, NullHandle_ErrorMessageIsExact) { + g_mock_handle = 0; + + get_server_context_impl(env, nullptr, dummy_field, dummy_class); + + ASSERT_TRUE(g_throw_called); + EXPECT_EQ(g_throw_message, "Model is not loaded"); +} + +// ============================================================ +// Test: ThrowNew is NOT called when handle is valid +// ============================================================ + +TEST_F(MockJniFixture, ValidHandle_NeverCallsThrowNew) { + server_context *sentinel = reinterpret_cast(0xCAFEBABE); + jllama_context fake_ctx; + fake_ctx.server = sentinel; + g_mock_handle = reinterpret_cast(&fake_ctx); + + get_server_context_impl(env, nullptr, dummy_field, dummy_class); + + EXPECT_FALSE(g_throw_called); +} From f1aa35fc93c116e290798390f490dc15ff122b9f Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 5 Apr 2026 08:40:29 +0000 Subject: [PATCH 2/4] fix: move get_server_context wrapper after global variable declarations The wrapper referenced f_model_pointer and c_llama_error which are declared later in the anonymous namespace, causing a build error. Moved the wrapper to after all globals, just before parse_jstring(). https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR --- src/main/cpp/jllama.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp index 84611c09..0d9f6bd9 100644 --- a/src/main/cpp/jllama.cpp +++ b/src/main/cpp/jllama.cpp @@ -35,16 +35,6 @@ static constexpr int N_PARALLEL_DEFAULT = 1; // jllama_context is defined in jni_helpers.hpp. -/** - * Convenience wrapper: extracts and validates the server_context from the - * Java-side model object using the module-level field-ID and error-class - * globals. Returns nullptr (with a JNI exception pending) when the model - * is not loaded. - */ -static server_context *get_server_context(JNIEnv *env, jobject obj) { - return get_server_context_impl(env, obj, f_model_pointer, c_llama_error); -} - JavaVM *g_vm = nullptr; // classes @@ -102,6 +92,16 @@ jobject o_log_format_json = nullptr; jobject o_log_format_text = nullptr; jobject o_log_callback = nullptr; +/** + * Convenience wrapper: extracts and validates the server_context from the + * Java-side model object using the module-level field-ID and error-class + * globals. Returns nullptr (with a JNI exception pending) when the model + * is not loaded. + */ +static server_context *get_server_context(JNIEnv *env, jobject obj) { + return get_server_context_impl(env, obj, f_model_pointer, c_llama_error); +} + /** * Convert a Java string to a std::string */ From 4da41b16e37bb84fbc50960861396f4e1d575567 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 5 Apr 2026 12:24:20 +0000 Subject: [PATCH 3/4] fix: disable AVX-512 in all builds to prevent SIGILL on unsupported CPUs AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum 8370C). The AMD EPYC 7763 used by GitHub Actions runners does not support it. When a libjllama.so compiled with AVX-512 instructions is loaded on such a machine the JVM crashes immediately with SIGILL. Set GGML_AVX512=OFF unconditionally so the distributed library runs on any x86-64 CPU with at least AVX2 (available since ~2013). https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR --- CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03279091..186669b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,18 @@ set(LLAMA_CURL ON) if(WIN32) set(LLAMA_BUILD_BORINGSSL ON CACHE BOOL "" FORCE) endif() + +# Disable AVX-512 for all builds. +# +# AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum 8370C). +# Many common CI and production machines do NOT support it, including the +# AMD EPYC 7763 used by GitHub Actions runners. If a library compiled with +# AVX-512 instructions is loaded on a CPU without AVX-512 support the JVM +# crashes immediately with SIGILL (illegal instruction). +# +# Keeping AVX-512 off produces a binary that runs on any x86-64 machine with +# at least AVX2, which covers the vast majority of hardware since ~2013. +set(GGML_AVX512 OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git From 0832ac3f2fc8a1f6e55df2cff95a960bcc9e4488 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 5 Apr 2026 12:33:55 +0000 Subject: [PATCH 4/4] docs: expand AVX-512 comment with known compatibility problems Document four concrete reasons why AVX-512 is disabled for all builds: 1. CPUs without AVX-512 support crash with SIGILL (AMD EPYC 7763 vs Intel Xeon Platinum 8370C is the exact CI scenario that triggered this) 2. Intel disabled AVX-512 on all Alder Lake / Raptor Lake desktop CPUs due to the P-core / E-core scheduler incompatibility 3. Clock throttling on Skylake-X server chips and AMD Ryzen 9000 (Zen 5) makes AVX-512 actively harmful for throughput-sensitive workloads 4. High power draw can destabilize systems near voltage limits https://claude.ai/code/session_01UzZRdmP6koZtJpHgYns6rR --- CMakeLists.txt | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 186669b8..d5d32f02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,14 +55,37 @@ endif() # Disable AVX-512 for all builds. # -# AVX-512 is only available on newer Intel CPUs (e.g. Intel Xeon Platinum 8370C). -# Many common CI and production machines do NOT support it, including the -# AMD EPYC 7763 used by GitHub Actions runners. If a library compiled with -# AVX-512 instructions is loaded on a CPU without AVX-512 support the JVM -# crashes immediately with SIGILL (illegal instruction). +# AVX-512 causes problems in several well-known scenarios, so we turn it off +# unconditionally to keep the distributed library safe and broadly compatible. # -# Keeping AVX-512 off produces a binary that runs on any x86-64 machine with -# at least AVX2, which covers the vast majority of hardware since ~2013. +# 1. Many CPUs simply do not support AVX-512 at all. Loading a binary that +# contains AVX-512 instructions on such a machine crashes the JVM immediately +# with SIGILL (illegal instruction). A real example from CI: the AMD EPYC +# 7763 used by GitHub Actions runners has no AVX-512, while the Intel Xeon +# Platinum 8370C used in the manylinux build container does. A library built +# on the Xeon would crash on the EPYC. +# +# 2. Intel disabled AVX-512 on 12th-gen (Alder Lake) and later desktop CPUs. +# Alder Lake mixes big P-cores (which have AVX-512 hardware) with small +# E-cores (which do not). Because the OS scheduler can move a thread between +# core types at any time, running AVX-512 instructions is unsafe. Intel +# first locked it out via BIOS/microcode updates, then began physically fusing +# it off in silicon on new chips. All current shipping Alder Lake, Raptor +# Lake, and newer Intel desktop CPUs have AVX-512 permanently disabled. +# +# 3. Even CPUs that do support AVX-512 often throttle their clock speed while +# executing it. Older Intel server chips (Skylake-X era) can drop hundreds +# of MHz across all cores — even cores not running AVX-512. AMD Ryzen 9000 +# (Zen 5) drops roughly 10 % in frequency when AVX-512 is active and takes +# more than 100 ms to ramp back up after the workload ends. +# +# 4. AVX-512's high instantaneous power draw can destabilize systems that are +# running near their voltage limits, leading to random crashes. +# +# Keeping AVX-512 off produces a binary that runs correctly on any x86-64 +# machine with at least AVX2 support, which covers virtually all hardware +# built since 2013. FORCE prevents llama.cpp's own cmake defaults from +# re-enabling it regardless of the build flags passed on the command line. set(GGML_AVX512 OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp