From 0b205fc962e21a0a73fe9c59a6919210d8219480 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 16:25:07 +0000 Subject: [PATCH 1/4] Upgrade llama.cpp from b8808 to b8831 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes fixed: - Rename CMake link target: common → llama-common (jllama and jllama_test) - Replace removed `build_info` string with `llama_build_info()` function in server.hpp (6 sites), jllama.cpp (1 site), utils.hpp (1 site) - Add `#include "build-info.h"` in server.hpp and utils.hpp https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB --- CLAUDE.md | 2 +- CMakeLists.txt | 6 +++--- README.md | 2 +- src/main/cpp/jllama.cpp | 2 +- src/main/cpp/server.hpp | 13 +++++++------ src/main/cpp/utils.hpp | 5 +++-- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 05f9f111..149ddb65 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI. -Current llama.cpp pinned version: **b8808** +Current llama.cpp pinned version: **b8831** ## Upgrading CUDA Version diff --git a/CMakeLists.txt b/CMakeLists.txt index fe5482e1..ffabb95a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ set(GGML_AVX512 OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG b8808 + GIT_TAG b8831 ) FetchContent_MakeAvailable(llama.cpp) @@ -204,7 +204,7 @@ add_library(jllama SHARED src/main/cpp/jllama.cpp src/main/cpp/server.hpp src/ma set_target_properties(jllama PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(jllama PRIVATE src/main/cpp ${JNI_INCLUDE_DIRS} ${llama.cpp_SOURCE_DIR}/tools/mtmd) -target_link_libraries(jllama PRIVATE common mtmd llama nlohmann_json) +target_link_libraries(jllama PRIVATE llama-common mtmd llama nlohmann_json) target_compile_features(jllama PRIVATE cxx_std_11) target_compile_definitions(jllama PRIVATE @@ -259,7 +259,7 @@ if(BUILD_TESTING) # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required) ${JNI_INCLUDE_DIRS} ) - target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main) + target_link_libraries(jllama_test PRIVATE llama-common mtmd llama nlohmann_json GTest::gtest_main) target_compile_features(jllama_test PRIVATE cxx_std_17) target_compile_definitions(jllama_test PRIVATE diff --git a/README.md b/README.md index 1f157d59..e81df1e7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational) -[![llama.cpp b8808](https://img.shields.io/badge/llama.cpp-%23b8808-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8808) +[![llama.cpp b8831](https://img.shields.io/badge/llama.cpp-%23b8831-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8831) # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp index c3e21f2e..7b66fcd9 100644 --- a/src/main/cpp/jllama.cpp +++ b/src/main/cpp/jllama.cpp @@ -749,7 +749,7 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo llama_numa_init(params.numa); - LOG_INF("build_info: %s\n", build_info.c_str()); + LOG_INF("build_info: %s\n", llama_build_info()); LOG_INF("%s\n", common_params_get_system_info(params).c_str()); std::atomic state{SERVER_STATE_LOADING_MODEL}; diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp index a585ecc5..e36cc327 100644 --- a/src/main/cpp/server.hpp +++ b/src/main/cpp/server.hpp @@ -2,6 +2,7 @@ #include "utils.hpp" #include "arg.h" +#include "build-info.h" #include "common.h" #include "json-schema-to-grammar.h" #include "llama.h" @@ -747,7 +748,7 @@ struct server_task_result_cmpl_final : server_task_result { }})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"usage", json{{"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -791,7 +792,7 @@ struct server_task_result_cmpl_final : server_task_result { json res = json{{"choices", json::array({choice})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion"}, {"usage", json{{"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -826,7 +827,7 @@ struct server_task_result_cmpl_final : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); } @@ -842,7 +843,7 @@ struct server_task_result_cmpl_final : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, {"usage", json{ @@ -942,7 +943,7 @@ struct server_task_result_cmpl_partial : server_task_result { }})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"id", oaicompat_cmpl_id}}; @@ -975,7 +976,7 @@ struct server_task_result_cmpl_partial : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); }; diff --git a/src/main/cpp/utils.hpp b/src/main/cpp/utils.hpp index b3f94a36..4dfc7bb0 100644 --- a/src/main/cpp/utils.hpp +++ b/src/main/cpp/utils.hpp @@ -3,6 +3,7 @@ #include "download.h" // common_remote_get_content, common_remote_params #include "base64.hpp" #include "chat.h" +#include "build-info.h" #include "common.h" #include "llama.h" #include "log.h" @@ -59,7 +60,7 @@ template static T json_value(const json &body, const std::string &k } } -// build_info is now defined in common.h (since b7788) +// build_info removed in b8831; use llama_build_info() from build-info.h // thin wrapper around common_grammar_trigger with (de)serialization functions struct server_grammar_trigger { @@ -777,7 +778,7 @@ static json oaicompat_chat_params_parse(json &body, /* openai api json semantics // download remote image // TODO @ngxson : maybe make these params configurable common_remote_params params; - params.headers.push_back({"User-Agent", "llama.cpp/" + build_info}); + params.headers.push_back({"User-Agent", "llama.cpp/" + std::string(llama_build_info())}); params.max_size = 1024 * 1024 * 10; // 10MB params.timeout = 10; // seconds SRV_INF("downloading image from '%s'\n", url.c_str()); From e3915d31df1d2c7c419359871eb64cd420e420ac Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 19:49:04 +0000 Subject: [PATCH 2/4] Fix Windows x86 linker error: __InterlockedIncrement64 unavailable on 32-bit MSVC b8831 added ggml_graph_next_uid() in ggml.c which uses the MSVC compiler intrinsic __InterlockedIncrement64. This intrinsic only exists on x64/ARM64; on 32-bit x86 it causes LNK2019 at link time. Map it to the Win32 API InterlockedIncrement64 (same signature, available on x86 via CMPXCHG8B) for the ggml-base target when building with MSVC on a 32-bit host. https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB --- CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffabb95a..0fb7ea54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,14 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(llama.cpp) +# b8831 added ggml_graph_next_uid() which uses __InterlockedIncrement64, an +# MSVC intrinsic not available on 32-bit x86. Map it to the Win32 API +# InterlockedIncrement64 (same signature, available on all Windows via CMPXCHG8B). +if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) + target_compile_definitions(ggml-base PRIVATE + __InterlockedIncrement64=InterlockedIncrement64) +endif() + # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON. # LLAMA_BUILD_TOOLS defaults to LLAMA_STANDALONE, which is OFF when llama.cpp # is consumed via FetchContent. Build mtmd explicitly so the target exists. From 73233bf158f0838a5dcd575912b7b87ca2e4cae0 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 20:01:01 +0000 Subject: [PATCH 3/4] =?UTF-8?q?CLAUDE.md:=20document=20b8808=E2=86=92b8831?= =?UTF-8?q?=20breaking=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three new rows to the known breaking changes table: - CMake target common → llama-common - build_info string → llama_build_info() in build-info.h - __InterlockedIncrement64 unavailable on Windows x86 Update range header from b8808 to b8831. https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB --- CLAUDE.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 149ddb65..ef234f31 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -107,7 +107,7 @@ jllama.cpp / server.hpp / utils.hpp **Priority-ordered review list for upgrade diffs** (highest break risk first) -The top 8 rows cover all known breaking changes from b5022 → b8808. +The top 8 rows cover all known breaking changes from b5022 → b8831. For future upgrades, provide diffs for at least these 8 files rather than the full patch. | File | What to watch for | @@ -136,7 +136,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`, `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp` -**Known breaking changes by version range** (b5022 → b8808): +**Known breaking changes by version range** (b5022 → b8831): | Version | File | Change | |---------|------|--------| @@ -151,6 +151,9 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu | ~b7864 | `common/mtmd.h` | `mtmd_init_params.verbosity` field removed | | ~b7904–b8190 | `common/common.h` | `params_base.model_alias` changed from `std::string` to a container; use `*model_alias.begin()` instead of direct string cast | | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently | +| ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` | +| ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) | +| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` uses `__InterlockedIncrement64` intrinsic unavailable on 32-bit MSVC x86; workaround: `target_compile_definitions(ggml-base PRIVATE __InterlockedIncrement64=InterlockedIncrement64)` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` | ## Build Commands From 161f72ce5e66f3b4e0672e4e4c0a8cfce5adc544 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 20:28:49 +0000 Subject: [PATCH 4/4] Fix Windows x86 LNK2019 for __InterlockedIncrement64 via compat TU The compile-definition approach failed because defines #define __InterlockedIncrement64 _InterlockedIncrement64 after our /D flag, so the compiler still emits an extern call to _InterlockedIncrement64 (linker symbol __InterlockedIncrement64) with no definition. Provide the definition in a new compat TU (ggml_x86_compat.c) added to ggml-base via target_sources, guarded by MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4. The wrapper calls Win32 InterlockedIncrement64 (CMPXCHG8B, available on all x86 Windows) satisfying 's extern __cdecl declaration at link time. https://claude.ai/code/session_01ABjsNTGnSH3WJvCGx7JbWB --- CLAUDE.md | 2 +- CMakeLists.txt | 10 +++++----- src/main/cpp/compat/ggml_x86_compat.c | 11 +++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 src/main/cpp/compat/ggml_x86_compat.c diff --git a/CLAUDE.md b/CLAUDE.md index ef234f31..3b2ac465 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -153,7 +153,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently | | ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` | | ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) | -| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` uses `__InterlockedIncrement64` intrinsic unavailable on 32-bit MSVC x86; workaround: `target_compile_definitions(ggml-base PRIVATE __InterlockedIncrement64=InterlockedIncrement64)` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` | +| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` | ## Build Commands diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fb7ea54..79c8e5db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,12 +101,12 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(llama.cpp) -# b8831 added ggml_graph_next_uid() which uses __InterlockedIncrement64, an -# MSVC intrinsic not available on 32-bit x86. Map it to the Win32 API -# InterlockedIncrement64 (same signature, available on all Windows via CMPXCHG8B). +# b8831 added ggml_graph_next_uid() which calls _InterlockedIncrement64 via +# on x86. The intrinsic only exists on x64; provide the +# implementation in a compat TU so the linker resolves __InterlockedIncrement64. if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) - target_compile_definitions(ggml-base PRIVATE - __InterlockedIncrement64=InterlockedIncrement64) + target_sources(ggml-base PRIVATE + ${CMAKE_SOURCE_DIR}/src/main/cpp/compat/ggml_x86_compat.c) endif() # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON. diff --git a/src/main/cpp/compat/ggml_x86_compat.c b/src/main/cpp/compat/ggml_x86_compat.c new file mode 100644 index 00000000..d4596be0 --- /dev/null +++ b/src/main/cpp/compat/ggml_x86_compat.c @@ -0,0 +1,11 @@ +#if defined(_MSC_VER) && defined(_M_IX86) +#include + +/* On 32-bit x86 MSVC, declares _InterlockedIncrement64 as + extern __cdecl but provides no implementation (the intrinsic only exists + on x64/ARM64). Satisfy the extern with a wrapper around the Win32 API + InterlockedIncrement64 (implemented via CMPXCHG8B on x86). */ +__int64 __cdecl _InterlockedIncrement64(volatile __int64* Addend) { + return InterlockedIncrement64((volatile LONGLONG*)Addend); +} +#endif