bernardladenthin · bernardladenthin · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI.
 
-Current llama.cpp pinned version: **b8808**
+Current llama.cpp pinned version: **b8831**
 
 ## Upgrading CUDA Version
 
@@ -107,7 +107,7 @@ jllama.cpp / server.hpp / utils.hpp
 
 **Priority-ordered review list for upgrade diffs** (highest break risk first)
 
-The top 8 rows cover all known breaking changes from b5022 → b8808.
+The top 8 rows cover all known breaking changes from b5022 → b8831.
 For future upgrades, provide diffs for at least these 8 files rather than the full patch.
 
 | File | What to watch for |
@@ -136,7 +136,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu
 `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`,
 `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp`
 
-**Known breaking changes by version range** (b5022 → b8808):
+**Known breaking changes by version range** (b5022 → b8831):
 
 | Version | File | Change |
 |---------|------|--------|
@@ -151,6 +151,9 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu
 | ~b7864 | `common/mtmd.h` | `mtmd_init_params.verbosity` field removed |
 | ~b7904–b8190 | `common/common.h` | `params_base.model_alias` changed from `std::string` to a container; use `*model_alias.begin()` instead of direct string cast |
 | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently |
+| ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` |
+| ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) |
+| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `<intrin.h>` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` |
 
 ## Build Commands
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -97,10 +97,18 @@ set(GGML_AVX512  OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b8808
+	GIT_TAG        b8831
 )
 FetchContent_MakeAvailable(llama.cpp)
 
+# b8831 added ggml_graph_next_uid() which calls _InterlockedIncrement64 via
+# <intrin.h> on x86. The intrinsic only exists on x64; provide the
+# implementation in a compat TU so the linker resolves __InterlockedIncrement64.
+if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4)
+    target_sources(ggml-base PRIVATE
+        ${CMAKE_SOURCE_DIR}/src/main/cpp/compat/ggml_x86_compat.c)
+endif()
+
 # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON.
 # LLAMA_BUILD_TOOLS defaults to LLAMA_STANDALONE, which is OFF when llama.cpp
 # is consumed via FetchContent.  Build mtmd explicitly so the target exists.
@@ -204,7 +212,7 @@ add_library(jllama SHARED src/main/cpp/jllama.cpp src/main/cpp/server.hpp src/ma
 
 set_target_properties(jllama PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(jllama PRIVATE src/main/cpp ${JNI_INCLUDE_DIRS} ${llama.cpp_SOURCE_DIR}/tools/mtmd)
-target_link_libraries(jllama PRIVATE common mtmd llama nlohmann_json)
+target_link_libraries(jllama PRIVATE llama-common mtmd llama nlohmann_json)
 target_compile_features(jllama PRIVATE cxx_std_11)
 
 target_compile_definitions(jllama PRIVATE
@@ -259,7 +267,7 @@ if(BUILD_TESTING)
         # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required)
         ${JNI_INCLUDE_DIRS}
     )
-    target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main)
+    target_link_libraries(jllama_test PRIVATE llama-common mtmd llama nlohmann_json GTest::gtest_main)
     target_compile_features(jllama_test PRIVATE cxx_std_17)
 
     target_compile_definitions(jllama_test PRIVATE

diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational)
-[![llama.cpp b8808](https://img.shields.io/badge/llama.cpp-%23b8808-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8808)
+[![llama.cpp b8831](https://img.shields.io/badge/llama.cpp-%23b8831-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8831)
 
 # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)
 

diff --git a/src/main/cpp/compat/ggml_x86_compat.c b/src/main/cpp/compat/ggml_x86_compat.c
@@ -0,0 +1,11 @@
+#if defined(_MSC_VER) && defined(_M_IX86)
+#include <windows.h>
+
+/* On 32-bit x86 MSVC, <intrin.h> declares _InterlockedIncrement64 as
+   extern __cdecl but provides no implementation (the intrinsic only exists
+   on x64/ARM64). Satisfy the extern with a wrapper around the Win32 API
+   InterlockedIncrement64 (implemented via CMPXCHG8B on x86). */
+__int64 __cdecl _InterlockedIncrement64(volatile __int64* Addend) {
+    return InterlockedIncrement64((volatile LONGLONG*)Addend);
+}
+#endif
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
@@ -749,7 +749,7 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
 
     llama_numa_init(params.numa);
 
-    LOG_INF("build_info: %s\n", build_info.c_str());
+    LOG_INF("build_info: %s\n", llama_build_info());
     LOG_INF("%s\n", common_params_get_system_info(params).c_str());
 
     std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};

diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp
@@ -2,6 +2,7 @@
 #include "utils.hpp"
 
 #include "arg.h"
+#include "build-info.h"
 #include "common.h"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
@@ -747,7 +748,7 @@ struct server_task_result_cmpl_final : server_task_result {
                         }})},
             {"created", t},
             {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
             {"object", "text_completion"},
             {"usage", json{{"completion_tokens", n_decoded},
                            {"prompt_tokens", n_prompt_tokens},
@@ -791,7 +792,7 @@ struct server_task_result_cmpl_final : server_task_result {
         json res = json{{"choices", json::array({choice})},
                         {"created", t},
                         {"model", oaicompat_model},
-                        {"system_fingerprint", build_info},
+                        {"system_fingerprint", std::string(llama_build_info())},
                         {"object", "chat.completion"},
                         {"usage", json{{"completion_tokens", n_decoded},
                                        {"prompt_tokens", n_prompt_tokens},
@@ -826,7 +827,7 @@ struct server_task_result_cmpl_final : server_task_result {
                 {"created", t},
                 {"id", oaicompat_cmpl_id},
                 {"model", oaicompat_model},
-                {"system_fingerprint", build_info},
+                {"system_fingerprint", std::string(llama_build_info())},
                 {"object", "chat.completion.chunk"},
             });
         }
@@ -842,7 +843,7 @@ struct server_task_result_cmpl_final : server_task_result {
             {"created", t},
             {"id", oaicompat_cmpl_id},
             {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
             {"object", "chat.completion.chunk"},
             {"usage",
              json{
@@ -942,7 +943,7 @@ struct server_task_result_cmpl_partial : server_task_result {
                                     }})},
                         {"created", t},
                         {"model", oaicompat_model},
-                        {"system_fingerprint", build_info},
+                        {"system_fingerprint", std::string(llama_build_info())},
                         {"object", "text_completion"},
                         {"id", oaicompat_cmpl_id}};
 
@@ -975,7 +976,7 @@ struct server_task_result_cmpl_partial : server_task_result {
                 {"created", t},
                 {"id", oaicompat_cmpl_id},
                 {"model", oaicompat_model},
-                {"system_fingerprint", build_info},
+                {"system_fingerprint", std::string(llama_build_info())},
                 {"object", "chat.completion.chunk"},
             });
         };

diff --git a/src/main/cpp/utils.hpp b/src/main/cpp/utils.hpp
@@ -3,6 +3,7 @@
 #include "download.h" // common_remote_get_content, common_remote_params
 #include "base64.hpp"
 #include "chat.h"
+#include "build-info.h"
 #include "common.h"
 #include "llama.h"
 #include "log.h"
@@ -59,7 +60,7 @@ template <typename T> static T json_value(const json &body, const std::string &k
     }
 }
 
-// build_info is now defined in common.h (since b7788)
+// build_info removed in b8831; use llama_build_info() from build-info.h
 
 // thin wrapper around common_grammar_trigger with (de)serialization functions
 struct server_grammar_trigger {
@@ -777,7 +778,7 @@ static json oaicompat_chat_params_parse(json &body, /* openai api json semantics
                     // download remote image
                     // TODO @ngxson : maybe make these params configurable
                     common_remote_params params;
-                    params.headers.push_back({"User-Agent", "llama.cpp/" + build_info});
+                    params.headers.push_back({"User-Agent", "llama.cpp/" + std::string(llama_build_info())});
                     params.max_size = 1024 * 1024 * 10; // 10MB
                     params.timeout = 10;                // seconds
                     SRV_INF("downloading image from '%s'\n", url.c_str());