From 0b205fc962e21a0a73fe9c59a6919210d8219480 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 17 Apr 2026 16:25:07 +0000
Subject: [PATCH 1/4] Upgrade llama.cpp from b8808 to b8831
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Breaking changes fixed:
- Rename CMake link target: common → llama-common (jllama and jllama_test)
- Replace removed `build_info` string with `llama_build_info()` function
  in server.hpp (6 sites), jllama.cpp (1 site), utils.hpp (1 site)
- Add `#include "build-info.h"` in server.hpp and utils.hpp

https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB
---
 CLAUDE.md               |  2 +-
 CMakeLists.txt          |  6 +++---
 README.md               |  2 +-
 src/main/cpp/jllama.cpp |  2 +-
 src/main/cpp/server.hpp | 13 +++++++------
 src/main/cpp/utils.hpp  |  5 +++--
 6 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 05f9f111..149ddb65 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI.
 
-Current llama.cpp pinned version: **b8808**
+Current llama.cpp pinned version: **b8831**
 
 ## Upgrading CUDA Version
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe5482e1..ffabb95a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ set(GGML_AVX512  OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b8808
+	GIT_TAG        b8831
 )
 FetchContent_MakeAvailable(llama.cpp)
 
@@ -204,7 +204,7 @@ add_library(jllama SHARED src/main/cpp/jllama.cpp src/main/cpp/server.hpp src/ma
 
 set_target_properties(jllama PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(jllama PRIVATE src/main/cpp ${JNI_INCLUDE_DIRS} ${llama.cpp_SOURCE_DIR}/tools/mtmd)
-target_link_libraries(jllama PRIVATE common mtmd llama nlohmann_json)
+target_link_libraries(jllama PRIVATE llama-common mtmd llama nlohmann_json)
 target_compile_features(jllama PRIVATE cxx_std_11)
 
 target_compile_definitions(jllama PRIVATE
@@ -259,7 +259,7 @@ if(BUILD_TESTING)
         # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required)
         ${JNI_INCLUDE_DIRS}
     )
-    target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main)
+    target_link_libraries(jllama_test PRIVATE llama-common mtmd llama nlohmann_json GTest::gtest_main)
     target_compile_features(jllama_test PRIVATE cxx_std_17)
 
     target_compile_definitions(jllama_test PRIVATE
diff --git a/README.md b/README.md
index 1f157d59..e81df1e7 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational)
-[![llama.cpp b8808](https://img.shields.io/badge/llama.cpp-%23b8808-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8808)
+[![llama.cpp b8831](https://img.shields.io/badge/llama.cpp-%23b8831-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8831)
 
 # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)
 
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index c3e21f2e..7b66fcd9 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -749,7 +749,7 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
 
     llama_numa_init(params.numa);
 
-    LOG_INF("build_info: %s\n", build_info.c_str());
+    LOG_INF("build_info: %s\n", llama_build_info());
     LOG_INF("%s\n", common_params_get_system_info(params).c_str());
 
     std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp
index a585ecc5..e36cc327 100644
--- a/src/main/cpp/server.hpp
+++ b/src/main/cpp/server.hpp
@@ -2,6 +2,7 @@
 #include "utils.hpp"
 
 #include "arg.h"
+#include "build-info.h"
 #include "common.h"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
@@ -747,7 +748,7 @@ struct server_task_result_cmpl_final : server_task_result {
                         }})},
             {"created", t},
             {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
             {"object", "text_completion"},
             {"usage", json{{"completion_tokens", n_decoded},
                            {"prompt_tokens", n_prompt_tokens},
@@ -791,7 +792,7 @@ struct server_task_result_cmpl_final : server_task_result {
         json res = json{{"choices", json::array({choice})},
                         {"created", t},
                         {"model", oaicompat_model},
-                        {"system_fingerprint", build_info},
+                        {"system_fingerprint", std::string(llama_build_info())},
                         {"object", "chat.completion"},
                         {"usage", json{{"completion_tokens", n_decoded},
                                        {"prompt_tokens", n_prompt_tokens},
@@ -826,7 +827,7 @@ struct server_task_result_cmpl_final : server_task_result {
                 {"created", t},
                 {"id", oaicompat_cmpl_id},
                 {"model", oaicompat_model},
-                {"system_fingerprint", build_info},
+                {"system_fingerprint", std::string(llama_build_info())},
                 {"object", "chat.completion.chunk"},
             });
         }
@@ -842,7 +843,7 @@ struct server_task_result_cmpl_final : server_task_result {
             {"created", t},
             {"id", oaicompat_cmpl_id},
             {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
             {"object", "chat.completion.chunk"},
             {"usage",
              json{
@@ -942,7 +943,7 @@ struct server_task_result_cmpl_partial : server_task_result {
                                     }})},
                         {"created", t},
                         {"model", oaicompat_model},
-                        {"system_fingerprint", build_info},
+                        {"system_fingerprint", std::string(llama_build_info())},
                         {"object", "text_completion"},
                         {"id", oaicompat_cmpl_id}};
 
@@ -975,7 +976,7 @@ struct server_task_result_cmpl_partial : server_task_result {
                 {"created", t},
                 {"id", oaicompat_cmpl_id},
                 {"model", oaicompat_model},
-                {"system_fingerprint", build_info},
+                {"system_fingerprint", std::string(llama_build_info())},
                 {"object", "chat.completion.chunk"},
             });
         };
diff --git a/src/main/cpp/utils.hpp b/src/main/cpp/utils.hpp
index b3f94a36..4dfc7bb0 100644
--- a/src/main/cpp/utils.hpp
+++ b/src/main/cpp/utils.hpp
@@ -3,6 +3,7 @@
 #include "download.h" // common_remote_get_content, common_remote_params
 #include "base64.hpp"
 #include "chat.h"
+#include "build-info.h"
 #include "common.h"
 #include "llama.h"
 #include "log.h"
@@ -59,7 +60,7 @@ template <typename T> static T json_value(const json &body, const std::string &k
     }
 }
 
-// build_info is now defined in common.h (since b7788)
+// build_info removed in b8831; use llama_build_info() from build-info.h
 
 // thin wrapper around common_grammar_trigger with (de)serialization functions
 struct server_grammar_trigger {
@@ -777,7 +778,7 @@ static json oaicompat_chat_params_parse(json &body, /* openai api json semantics
                     // download remote image
                     // TODO @ngxson : maybe make these params configurable
                     common_remote_params params;
-                    params.headers.push_back({"User-Agent", "llama.cpp/" + build_info});
+                    params.headers.push_back({"User-Agent", "llama.cpp/" + std::string(llama_build_info())});
                     params.max_size = 1024 * 1024 * 10; // 10MB
                     params.timeout = 10;                // seconds
                     SRV_INF("downloading image from '%s'\n", url.c_str());

From e3915d31df1d2c7c419359871eb64cd420e420ac Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 17 Apr 2026 19:49:04 +0000
Subject: [PATCH 2/4] Fix Windows x86 linker error: __InterlockedIncrement64
 unavailable on 32-bit MSVC

b8831 added ggml_graph_next_uid() in ggml.c which uses the MSVC compiler
intrinsic __InterlockedIncrement64. This intrinsic only exists on x64/ARM64;
on 32-bit x86 it causes LNK2019 at link time. Map it to the Win32 API
InterlockedIncrement64 (same signature, available on x86 via CMPXCHG8B) for
the ggml-base target when building with MSVC on a 32-bit host.

https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB
---
 CMakeLists.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ffabb95a..0fb7ea54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,6 +101,14 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(llama.cpp)
 
+# b8831 added ggml_graph_next_uid() which uses __InterlockedIncrement64, an
+# MSVC intrinsic not available on 32-bit x86. Map it to the Win32 API
+# InterlockedIncrement64 (same signature, available on all Windows via CMPXCHG8B).
+if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4)
+    target_compile_definitions(ggml-base PRIVATE
+        __InterlockedIncrement64=InterlockedIncrement64)
+endif()
+
 # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON.
 # LLAMA_BUILD_TOOLS defaults to LLAMA_STANDALONE, which is OFF when llama.cpp
 # is consumed via FetchContent.  Build mtmd explicitly so the target exists.

From 73233bf158f0838a5dcd575912b7b87ca2e4cae0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 17 Apr 2026 20:01:01 +0000
Subject: [PATCH 3/4] =?UTF-8?q?CLAUDE.md:=20document=20b8808=E2=86=92b8831?=
 =?UTF-8?q?=20breaking=20changes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add three new rows to the known breaking changes table:
- CMake target common → llama-common
- build_info string → llama_build_info() in build-info.h
- __InterlockedIncrement64 unavailable on Windows x86

Update range header from b8808 to b8831.

https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB
---
 CLAUDE.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 149ddb65..ef234f31 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -107,7 +107,7 @@ jllama.cpp / server.hpp / utils.hpp
 
 **Priority-ordered review list for upgrade diffs** (highest break risk first)
 
-The top 8 rows cover all known breaking changes from b5022 → b8808.
+The top 8 rows cover all known breaking changes from b5022 → b8831.
 For future upgrades, provide diffs for at least these 8 files rather than the full patch.
 
 | File | What to watch for |
@@ -136,7 +136,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu
 `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`,
 `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp`
 
-**Known breaking changes by version range** (b5022 → b8808):
+**Known breaking changes by version range** (b5022 → b8831):
 
 | Version | File | Change |
 |---------|------|--------|
@@ -151,6 +151,9 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu
 | ~b7864 | `common/mtmd.h` | `mtmd_init_params.verbosity` field removed |
 | ~b7904–b8190 | `common/common.h` | `params_base.model_alias` changed from `std::string` to a container; use `*model_alias.begin()` instead of direct string cast |
 | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently |
+| ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` |
+| ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) |
+| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` uses `__InterlockedIncrement64` intrinsic unavailable on 32-bit MSVC x86; workaround: `target_compile_definitions(ggml-base PRIVATE __InterlockedIncrement64=InterlockedIncrement64)` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` |
 
 ## Build Commands
 

From 161f72ce5e66f3b4e0672e4e4c0a8cfce5adc544 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 17 Apr 2026 20:28:49 +0000
Subject: [PATCH 4/4] Fix Windows x86 LNK2019 for __InterlockedIncrement64 via
 compat TU

The compile-definition approach failed because <intrin.h> defines
#define __InterlockedIncrement64 _InterlockedIncrement64 after our /D
flag, so the compiler still emits an extern call to _InterlockedIncrement64
(linker symbol __InterlockedIncrement64) with no definition.

Provide the definition in a new compat TU (ggml_x86_compat.c) added to
ggml-base via target_sources, guarded by MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4.
The wrapper calls Win32 InterlockedIncrement64 (CMPXCHG8B, available on all
x86 Windows) satisfying <intrin.h>'s extern __cdecl declaration at link time.

https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB
---
 CLAUDE.md                             |  2 +-
 CMakeLists.txt                        | 10 +++++-----
 src/main/cpp/compat/ggml_x86_compat.c | 11 +++++++++++
 3 files changed, 17 insertions(+), 6 deletions(-)
 create mode 100644 src/main/cpp/compat/ggml_x86_compat.c

diff --git a/CLAUDE.md b/CLAUDE.md
index ef234f31..3b2ac465 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -153,7 +153,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu
 | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently |
 | ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` |
 | ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) |
-| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` uses `__InterlockedIncrement64` intrinsic unavailable on 32-bit MSVC x86; workaround: `target_compile_definitions(ggml-base PRIVATE __InterlockedIncrement64=InterlockedIncrement64)` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` |
+| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `<intrin.h>` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` |
 
 ## Build Commands
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0fb7ea54..79c8e5db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,12 +101,12 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(llama.cpp)
 
-# b8831 added ggml_graph_next_uid() which uses __InterlockedIncrement64, an
-# MSVC intrinsic not available on 32-bit x86. Map it to the Win32 API
-# InterlockedIncrement64 (same signature, available on all Windows via CMPXCHG8B).
+# b8831 added ggml_graph_next_uid() which calls _InterlockedIncrement64 via
+# <intrin.h> on x86. The intrinsic only exists on x64; provide the
+# implementation in a compat TU so the linker resolves __InterlockedIncrement64.
 if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4)
-    target_compile_definitions(ggml-base PRIVATE
-        __InterlockedIncrement64=InterlockedIncrement64)
+    target_sources(ggml-base PRIVATE
+        ${CMAKE_SOURCE_DIR}/src/main/cpp/compat/ggml_x86_compat.c)
 endif()
 
 # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON.
diff --git a/src/main/cpp/compat/ggml_x86_compat.c b/src/main/cpp/compat/ggml_x86_compat.c
new file mode 100644
index 00000000..d4596be0
--- /dev/null
+++ b/src/main/cpp/compat/ggml_x86_compat.c
@@ -0,0 +1,11 @@
+#if defined(_MSC_VER) && defined(_M_IX86)
+#include <windows.h>
+
+/* On 32-bit x86 MSVC, <intrin.h> declares _InterlockedIncrement64 as
+   extern __cdecl but provides no implementation (the intrinsic only exists
+   on x64/ARM64). Satisfy the extern with a wrapper around the Win32 API
+   InterlockedIncrement64 (implemented via CMPXCHG8B on x86). */
+__int64 __cdecl _InterlockedIncrement64(volatile __int64* Addend) {
+    return InterlockedIncrement64((volatile LONGLONG*)Addend);
+}
+#endif