diff --git a/CLAUDE.md b/CLAUDE.md index 05f9f111..3b2ac465 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI. -Current llama.cpp pinned version: **b8808** +Current llama.cpp pinned version: **b8831** ## Upgrading CUDA Version @@ -107,7 +107,7 @@ jllama.cpp / server.hpp / utils.hpp **Priority-ordered review list for upgrade diffs** (highest break risk first) -The top 8 rows cover all known breaking changes from b5022 → b8808. +The top 8 rows cover all known breaking changes from b5022 → b8831. For future upgrades, provide diffs for at least these 8 files rather than the full patch. | File | What to watch for | @@ -136,7 +136,7 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`, `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp` -**Known breaking changes by version range** (b5022 → b8808): +**Known breaking changes by version range** (b5022 → b8831): | Version | File | Change | |---------|------|--------| @@ -151,6 +151,9 @@ For future upgrades, provide diffs for at least these 8 files rather than the fu | ~b7864 | `common/mtmd.h` | `mtmd_init_params.verbosity` field removed | | ~b7904–b8190 | `common/common.h` | `params_base.model_alias` changed from `std::string` to a container; use `*model_alias.begin()` instead of direct string cast | | ~b8778–b8808 | `tools/mtmd/mtmd.h` | `MTMD_DEFAULT_IMAGE_MARKER` macro removed; `mtmd_image_tokens_get_nx/ny` deprecated; new `mtmd_decoder_pos` struct + `mtmd_image_tokens_get_decoder_pos()`; `mtmd_context_params_default()` now sets `image_marker = nullptr` (throws `"custom image_marker is not supported anymore"` if non-null); upstream server adds randomized `get_media_marker()` in `server-common.h` — our `server.hpp` is unaffected since it does not include that header and uses `mtmd_default_marker()` consistently | +| ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` | +| ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) | +| ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` | ## Build Commands diff --git a/CMakeLists.txt b/CMakeLists.txt index fe5482e1..79c8e5db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,10 +97,18 @@ set(GGML_AVX512 OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG b8808 + GIT_TAG b8831 ) FetchContent_MakeAvailable(llama.cpp) +# b8831 added ggml_graph_next_uid() which calls _InterlockedIncrement64 via +# on x86. The intrinsic only exists on x64; provide the +# implementation in a compat TU so the linker resolves __InterlockedIncrement64. +if(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) + target_sources(ggml-base PRIVATE + ${CMAKE_SOURCE_DIR}/src/main/cpp/compat/ggml_x86_compat.c) +endif() + # mtmd lives in tools/mtmd, which is only built when LLAMA_BUILD_TOOLS=ON. # LLAMA_BUILD_TOOLS defaults to LLAMA_STANDALONE, which is OFF when llama.cpp # is consumed via FetchContent. Build mtmd explicitly so the target exists. @@ -204,7 +212,7 @@ add_library(jllama SHARED src/main/cpp/jllama.cpp src/main/cpp/server.hpp src/ma set_target_properties(jllama PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(jllama PRIVATE src/main/cpp ${JNI_INCLUDE_DIRS} ${llama.cpp_SOURCE_DIR}/tools/mtmd) -target_link_libraries(jllama PRIVATE common mtmd llama nlohmann_json) +target_link_libraries(jllama PRIVATE llama-common mtmd llama nlohmann_json) target_compile_features(jllama PRIVATE cxx_std_11) target_compile_definitions(jllama PRIVATE @@ -259,7 +267,7 @@ if(BUILD_TESTING) # jni.h / jni_md.h needed by jni_helpers.hpp (mock JNI tests, no JVM required) ${JNI_INCLUDE_DIRS} ) - target_link_libraries(jllama_test PRIVATE common mtmd llama nlohmann_json GTest::gtest_main) + target_link_libraries(jllama_test PRIVATE llama-common mtmd llama nlohmann_json GTest::gtest_main) target_compile_features(jllama_test PRIVATE cxx_std_17) target_compile_definitions(jllama_test PRIVATE diff --git a/README.md b/README.md index 1f157d59..e81df1e7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational) -[![llama.cpp b8808](https://img.shields.io/badge/llama.cpp-%23b8808-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8808) +[![llama.cpp b8831](https://img.shields.io/badge/llama.cpp-%23b8831-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8831) # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) diff --git a/src/main/cpp/compat/ggml_x86_compat.c b/src/main/cpp/compat/ggml_x86_compat.c new file mode 100644 index 00000000..d4596be0 --- /dev/null +++ b/src/main/cpp/compat/ggml_x86_compat.c @@ -0,0 +1,11 @@ +#if defined(_MSC_VER) && defined(_M_IX86) +#include + +/* On 32-bit x86 MSVC, declares _InterlockedIncrement64 as + extern __cdecl but provides no implementation (the intrinsic only exists + on x64/ARM64). Satisfy the extern with a wrapper around the Win32 API + InterlockedIncrement64 (implemented via CMPXCHG8B on x86). */ +__int64 __cdecl _InterlockedIncrement64(volatile __int64* Addend) { + return InterlockedIncrement64((volatile LONGLONG*)Addend); +} +#endif diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp index c3e21f2e..7b66fcd9 100644 --- a/src/main/cpp/jllama.cpp +++ b/src/main/cpp/jllama.cpp @@ -749,7 +749,7 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo llama_numa_init(params.numa); - LOG_INF("build_info: %s\n", build_info.c_str()); + LOG_INF("build_info: %s\n", llama_build_info()); LOG_INF("%s\n", common_params_get_system_info(params).c_str()); std::atomic state{SERVER_STATE_LOADING_MODEL}; diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp index a585ecc5..e36cc327 100644 --- a/src/main/cpp/server.hpp +++ b/src/main/cpp/server.hpp @@ -2,6 +2,7 @@ #include "utils.hpp" #include "arg.h" +#include "build-info.h" #include "common.h" #include "json-schema-to-grammar.h" #include "llama.h" @@ -747,7 +748,7 @@ struct server_task_result_cmpl_final : server_task_result { }})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"usage", json{{"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -791,7 +792,7 @@ struct server_task_result_cmpl_final : server_task_result { json res = json{{"choices", json::array({choice})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion"}, {"usage", json{{"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -826,7 +827,7 @@ struct server_task_result_cmpl_final : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); } @@ -842,7 +843,7 @@ struct server_task_result_cmpl_final : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, {"usage", json{ @@ -942,7 +943,7 @@ struct server_task_result_cmpl_partial : server_task_result { }})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"id", oaicompat_cmpl_id}}; @@ -975,7 +976,7 @@ struct server_task_result_cmpl_partial : server_task_result { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); }; diff --git a/src/main/cpp/utils.hpp b/src/main/cpp/utils.hpp index b3f94a36..4dfc7bb0 100644 --- a/src/main/cpp/utils.hpp +++ b/src/main/cpp/utils.hpp @@ -3,6 +3,7 @@ #include "download.h" // common_remote_get_content, common_remote_params #include "base64.hpp" #include "chat.h" +#include "build-info.h" #include "common.h" #include "llama.h" #include "log.h" @@ -59,7 +60,7 @@ template static T json_value(const json &body, const std::string &k } } -// build_info is now defined in common.h (since b7788) +// build_info removed in b8831; use llama_build_info() from build-info.h // thin wrapper around common_grammar_trigger with (de)serialization functions struct server_grammar_trigger { @@ -777,7 +778,7 @@ static json oaicompat_chat_params_parse(json &body, /* openai api json semantics // download remote image // TODO @ngxson : maybe make these params configurable common_remote_params params; - params.headers.push_back({"User-Agent", "llama.cpp/" + build_info}); + params.headers.push_back({"User-Agent", "llama.cpp/" + std::string(llama_build_info())}); params.max_size = 1024 * 1024 * 10; // 10MB params.timeout = 10; // seconds SRV_INF("downloading image from '%s'\n", url.c_str());