From 1ae7dbd4412a676e022f585d9fc07f94c899172d Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 12:09:31 +0000 Subject: [PATCH 1/4] Upgrade llama.cpp from b8838 to b8841 and add ModelMeta API - Bump GIT_TAG to b8841 in CMakeLists.txt - Update README.md badge/link to b8841 - Update CLAUDE.md pinned version and known-changes table - server.hpp: expose modalities (vision/audio) in model_meta() - jllama.cpp: add getModelMetaJson JNI function - pom.xml: add jackson-databind 2.19.0 for JsonNode support - ModelMeta.java: new class wrapping JsonNode with typed getters for all model_meta fields (vocab_type, n_vocab, n_ctx_train, n_embd, n_params, size, supportsVision, supportsAudio) - LlamaModel.java: add getModelMeta() returning ModelMeta - LlamaModelTest.java: add testGetModelMeta with round-trip assertion https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR --- CLAUDE.md | 5 +- CMakeLists.txt | 2 +- README.md | 2 +- pom.xml | 5 ++ src/main/cpp/jllama.cpp | 5 ++ src/main/cpp/server.hpp | 13 +++- src/main/java/de/kherud/llama/LlamaModel.java | 25 ++++++ src/main/java/de/kherud/llama/ModelMeta.java | 76 +++++++++++++++++++ .../java/de/kherud/llama/LlamaModelTest.java | 38 ++++++++++ 9 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 src/main/java/de/kherud/llama/ModelMeta.java diff --git a/CLAUDE.md b/CLAUDE.md index d2bf01ed..e7accd52 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI. -Current llama.cpp pinned version: **b8838** +Current llama.cpp pinned version: **b8841** ## Upgrading CUDA Version @@ -137,7 +137,7 @@ Also review the project `CMakeLists.txt` for build-system-level breaks (e.g. ren `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`, `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp` -**Known breaking changes by version range** (b5022 → b8831): +**Known breaking changes by version range** (b5022 → b8841): | Version | File | Change | |---------|------|--------| @@ -155,6 +155,7 @@ Also review the project `CMakeLists.txt` for build-system-level breaks (e.g. ren | ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` | | ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) | | ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` | +| ~b8838–b8841 | `src/llama-model.h` | Attention bias fields renamed: `bq`→`wq_b`, `bk`→`wk_b`, `bv`→`wv_b`, `bo`→`wo_b`, `bqkv`→`wqkv_b`; internal to llama.cpp, no impact on this project | ## Build Commands diff --git a/CMakeLists.txt b/CMakeLists.txt index 20c62c24..e10ad6fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ set(GGML_AVX512 OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG b8838 + GIT_TAG b8841 ) FetchContent_MakeAvailable(llama.cpp) diff --git a/README.md b/README.md index 0b897aff..66c84a66 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational) -[![llama.cpp b8838](https://img.shields.io/badge/llama.cpp-%23b8838-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8838) +[![llama.cpp b8841](https://img.shields.io/badge/llama.cpp-%23b8841-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8841) # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) diff --git a/pom.xml b/pom.xml index 3d00ff55..d84cfd2c 100644 --- a/pom.xml +++ b/pom.xml @@ -65,6 +65,11 @@ 24.1.0 compile + + com.fasterxml.jackson.core + jackson-databind + 2.19.0 + diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp index 7b66fcd9..77868ac0 100644 --- a/src/main/cpp/jllama.cpp +++ b/src/main/cpp/jllama.cpp @@ -820,6 +820,11 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo env->SetLongField(obj, f_model_pointer, reinterpret_cast(jctx)); } +JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) { + REQUIRE_SERVER_CONTEXT(nullptr); + return json_to_jstring(env, ctx_server->model_meta()); +} + JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv *env, jobject obj, jstring jparams) { REQUIRE_SERVER_CONTEXT(0); diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp index e36cc327..31013c0c 100644 --- a/src/main/cpp/server.hpp +++ b/src/main/cpp/server.hpp @@ -3625,9 +3625,16 @@ struct server_context { json model_meta() const { return json{ - {"vocab_type", llama_vocab_type(vocab)}, {"n_vocab", llama_vocab_n_tokens(vocab)}, - {"n_ctx_train", llama_model_n_ctx_train(model)}, {"n_embd", llama_model_n_embd(model)}, - {"n_params", llama_model_n_params(model)}, {"size", llama_model_size(model)}, + {"vocab_type", llama_vocab_type(vocab)}, + {"n_vocab", llama_vocab_n_tokens(vocab)}, + {"n_ctx_train", llama_model_n_ctx_train(model)}, + {"n_embd", llama_model_n_embd(model)}, + {"n_params", llama_model_n_params(model)}, + {"size", llama_model_size(model)}, + {"modalities", json{ + {"vision", mctx ? mtmd_support_vision(mctx) : false}, + {"audio", mctx ? mtmd_support_audio(mctx) : false}, + }}, }; } }; diff --git a/src/main/java/de/kherud/llama/LlamaModel.java b/src/main/java/de/kherud/llama/LlamaModel.java index 88db746b..c633aa48 100644 --- a/src/main/java/de/kherud/llama/LlamaModel.java +++ b/src/main/java/de/kherud/llama/LlamaModel.java @@ -316,6 +316,31 @@ public String getMetrics() { return handleSlotAction(0, 0, null); } + private static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = + new com.fasterxml.jackson.databind.ObjectMapper(); + + /** + * Returns model metadata with typed accessors for vocab, context, embedding, + * parameter count, size, and modality support flags (vision, audio). + *

+ * The returned {@link ModelMeta} wraps the raw JSON from the native layer. + * Call {@link ModelMeta#toString()} to re-serialize to compact JSON for use + * in {@code assertEquals}. + *

+ * + * @return {@link ModelMeta} parsed from the native {@code model_meta()} response + * @throws LlamaException if the native call fails or the response cannot be parsed + */ + public ModelMeta getModelMeta() throws LlamaException { + try { + return new ModelMeta(OBJECT_MAPPER.readTree(getModelMetaJson())); + } catch (java.io.IOException e) { + throw new LlamaException("Failed to parse model meta JSON: " + e.getMessage()); + } + } + + native String getModelMetaJson() throws LlamaException; + /** * Erase the KV cache for a specific slot. * diff --git a/src/main/java/de/kherud/llama/ModelMeta.java b/src/main/java/de/kherud/llama/ModelMeta.java new file mode 100644 index 00000000..0e31ae38 --- /dev/null +++ b/src/main/java/de/kherud/llama/ModelMeta.java @@ -0,0 +1,76 @@ +package de.kherud.llama; + +import com.fasterxml.jackson.databind.JsonNode; + +/** + * Model metadata returned by {@link LlamaModel#getModelMeta()}. + *

+ * Typed getters cover all fields currently returned by the native {@code model_meta()} + * function. The underlying {@link JsonNode} is also exposed via {@link #asJson()} so + * that future fields added on the C++ side remain accessible without code changes. + *

+ *

{@link #toString()} re-serializes to compact JSON and is suitable for + * {@code assertEquals} in unit tests.

+ */ +public final class ModelMeta { + + private final JsonNode node; + + ModelMeta(JsonNode node) { + this.node = node; + } + + /** Vocabulary type identifier (e.g. SPM = 2, BPE = 1). */ + public int getVocabType() { + return node.path("vocab_type").asInt(0); + } + + /** Total number of tokens in the model vocabulary. */ + public int getNVocab() { + return node.path("n_vocab").asInt(0); + } + + /** Context length the model was trained with. */ + public int getNCtxTrain() { + return node.path("n_ctx_train").asInt(0); + } + + /** Embedding dimension of the model. */ + public int getNEmbd() { + return node.path("n_embd").asInt(0); + } + + /** Total number of model parameters. */ + public long getNParams() { + return node.path("n_params").asLong(0L); + } + + /** Model file size in bytes. */ + public long getSize() { + return node.path("size").asLong(0L); + } + + /** Returns true if the model supports vision (image) input. */ + public boolean supportsVision() { + return node.at("/modalities/vision").asBoolean(false); + } + + /** Returns true if the model supports audio input. */ + public boolean supportsAudio() { + return node.at("/modalities/audio").asBoolean(false); + } + + /** + * Returns the underlying {@link JsonNode} for direct access to any field, + * including fields added in future llama.cpp versions. + */ + public JsonNode asJson() { + return node; + } + + /** Re-serializes to compact JSON. Suitable for {@code assertEquals} in tests. */ + @Override + public String toString() { + return node.toString(); + } +} diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java index 8523176c..7dc655c2 100644 --- a/src/test/java/de/kherud/llama/LlamaModelTest.java +++ b/src/test/java/de/kherud/llama/LlamaModelTest.java @@ -926,4 +926,42 @@ public void testSpeculativeDecoding() { Assert.assertFalse("Expected non-empty response from speculative complete", response.isEmpty()); } } + + @Test + public void testGetModelMeta() throws LlamaException { + ModelMeta meta = model.getModelMeta(); + + // Typed getters — exact values depend on the loaded model; fill in after first run + Assert.assertTrue("n_vocab must be positive", meta.getNVocab() > 0); + Assert.assertTrue("n_ctx_train must be positive", meta.getNCtxTrain() > 0); + Assert.assertTrue("n_embd must be positive", meta.getNEmbd() > 0); + Assert.assertTrue("n_params must be positive", meta.getNParams() > 0); + Assert.assertTrue("size must be positive", meta.getSize() > 0); + + // CodeLlama (text-only model) must not report multimodal support + Assert.assertFalse("text-only model must not report vision support", meta.supportsVision()); + Assert.assertFalse("text-only model must not report audio support", meta.supportsAudio()); + + // Dynamic access via the underlying JsonNode + Assert.assertTrue("modalities field must be present", meta.asJson().has("modalities")); + Assert.assertTrue("vocab_type field must be present", meta.asJson().has("vocab_type")); + + // Round-trip: toString() must produce valid compact JSON containing all top-level keys + String json = meta.toString(); + Assert.assertNotNull(json); + Assert.assertTrue(json.contains("\"vocab_type\"")); + Assert.assertTrue(json.contains("\"n_vocab\"")); + Assert.assertTrue(json.contains("\"n_ctx_train\"")); + Assert.assertTrue(json.contains("\"n_embd\"")); + Assert.assertTrue(json.contains("\"n_params\"")); + Assert.assertTrue(json.contains("\"size\"")); + Assert.assertTrue(json.contains("\"modalities\"")); + Assert.assertTrue(json.contains("\"vision\"")); + Assert.assertTrue(json.contains("\"audio\"")); + + // Uncomment and fill in after running once to pin exact values: + // Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384," + // + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024," + // + "\"modalities\":{\"vision\":false,\"audio\":false}}", json); + } } From ccff08f1c7dbbf6116145d6d9484f7f6ae016294 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 12:12:18 +0000 Subject: [PATCH 2/4] Uncomment exact-JSON assertEquals in testGetModelMeta Failure message will show the actual value for pinning. https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR --- src/test/java/de/kherud/llama/LlamaModelTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java index 7dc655c2..09863e77 100644 --- a/src/test/java/de/kherud/llama/LlamaModelTest.java +++ b/src/test/java/de/kherud/llama/LlamaModelTest.java @@ -959,9 +959,9 @@ public void testGetModelMeta() throws LlamaException { Assert.assertTrue(json.contains("\"vision\"")); Assert.assertTrue(json.contains("\"audio\"")); - // Uncomment and fill in after running once to pin exact values: - // Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384," - // + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024," - // + "\"modalities\":{\"vision\":false,\"audio\":false}}", json); + // Fill in the expected value from the failure message and re-run to pin exact output: + Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384," + + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024," + + "\"modalities\":{\"vision\":false,\"audio\":false}}", json); } } From 9e03ac854256aa10d058efc21db121d6877fa1f1 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 12:34:43 +0000 Subject: [PATCH 3/4] Fix UnsatisfiedLinkError: add getModelMetaJson to jllama.h extern "C" block Without this declaration the C++ compiler mangles the symbol name, making it invisible to the JVM's JNI lookup. https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR --- src/main/cpp/jllama.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/cpp/jllama.h b/src/main/cpp/jllama.h index 85c5ace6..971e29c5 100644 --- a/src/main/cpp/jllama.h +++ b/src/main/cpp/jllama.h @@ -128,6 +128,13 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn JNIEXPORT jboolean JNICALL Java_de_kherud_llama_LlamaModel_configureParallelInference(JNIEnv *, jobject, jstring); +/* + * Class: de_kherud_llama_LlamaModel + * Method: getModelMetaJson + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_getModelMetaJson(JNIEnv *, jobject); + #ifdef __cplusplus } #endif From b63a95545986408c3dd6a8e1d8b23d5c9ec8d160 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 12:48:14 +0000 Subject: [PATCH 4/4] Pin testGetModelMeta assertEquals to actual codellama-7b.Q2_K values vocab_type=1 (SPM), n_params=6738546688, size=2825274880 from the actual model loaded in CI. https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR --- src/test/java/de/kherud/llama/LlamaModelTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java index 09863e77..0bd34ccd 100644 --- a/src/test/java/de/kherud/llama/LlamaModelTest.java +++ b/src/test/java/de/kherud/llama/LlamaModelTest.java @@ -960,8 +960,8 @@ public void testGetModelMeta() throws LlamaException { Assert.assertTrue(json.contains("\"audio\"")); // Fill in the expected value from the failure message and re-run to pin exact output: - Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384," - + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024," + Assert.assertEquals("{\"vocab_type\":1,\"n_vocab\":32016,\"n_ctx_train\":16384," + + "\"n_embd\":4096,\"n_params\":6738546688,\"size\":2825274880," + "\"modalities\":{\"vision\":false,\"audio\":false}}", json); } }