From 1ae7dbd4412a676e022f585d9fc07f94c899172d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 19 Apr 2026 12:09:31 +0000
Subject: [PATCH 1/4] Upgrade llama.cpp from b8838 to b8841 and add ModelMeta
 API

- Bump GIT_TAG to b8841 in CMakeLists.txt
- Update README.md badge/link to b8841
- Update CLAUDE.md pinned version and known-changes table
- server.hpp: expose modalities (vision/audio) in model_meta()
- jllama.cpp: add getModelMetaJson JNI function
- pom.xml: add jackson-databind 2.19.0 for JsonNode support
- ModelMeta.java: new class wrapping JsonNode with typed getters
  for all model_meta fields (vocab_type, n_vocab, n_ctx_train,
  n_embd, n_params, size, supportsVision, supportsAudio)
- LlamaModel.java: add getModelMeta() returning ModelMeta
- LlamaModelTest.java: add testGetModelMeta with round-trip assertion

https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR
---
 CLAUDE.md                                     |  5 +-
 CMakeLists.txt                                |  2 +-
 README.md                                     |  2 +-
 pom.xml                                       |  5 ++
 src/main/cpp/jllama.cpp                       |  5 ++
 src/main/cpp/server.hpp                       | 13 +++-
 src/main/java/de/kherud/llama/LlamaModel.java | 25 ++++++
 src/main/java/de/kherud/llama/ModelMeta.java  | 76 +++++++++++++++++++
 .../java/de/kherud/llama/LlamaModelTest.java  | 38 ++++++++++
 9 files changed, 164 insertions(+), 7 deletions(-)
 create mode 100644 src/main/java/de/kherud/llama/ModelMeta.java

diff --git a/CLAUDE.md b/CLAUDE.md
index d2bf01ed..e7accd52 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI.
 
-Current llama.cpp pinned version: **b8838**
+Current llama.cpp pinned version: **b8841**
 
 ## Upgrading CUDA Version
 
@@ -137,7 +137,7 @@ Also review the project `CMakeLists.txt` for build-system-level breaks (e.g. ren
 `ggml/include/ggml.h`, `ggml/include/ggml-backend.h`, `ggml/include/ggml-opt.h`,
 `ggml-alloc.h`, `ggml-cpu.h`, `peg-parser.h`, `base64.hpp`
 
-**Known breaking changes by version range** (b5022 → b8831):
+**Known breaking changes by version range** (b5022 → b8841):
 
 | Version | File | Change |
 |---------|------|--------|
@@ -155,6 +155,7 @@ Also review the project `CMakeLists.txt` for build-system-level breaks (e.g. ren
 | ~b8808–b8831 | project `CMakeLists.txt` | CMake target `common` renamed to `llama-common`; update `target_link_libraries` for `jllama` and `jllama_test` |
 | ~b8808–b8831 | `common/common.h` → new `common/build-info.h` | `build_info` `std::string` removed; replaced by `llama_build_info()` (`const char*`) in new `build-info.h`; add `#include "build-info.h"` in `server.hpp` and `utils.hpp`; call sites: `std::string(llama_build_info())` in `server.hpp` (6×), `llama_build_info()` in `jllama.cpp` (1×) and `utils.hpp` (1×) |
 | ~b8808–b8831 | `ggml/src/ggml.c` | New `ggml_graph_next_uid()` calls `_InterlockedIncrement64` via `<intrin.h>` on x86; intrinsic unavailable on 32-bit MSVC; fix: `src/main/cpp/compat/ggml_x86_compat.c` provides `__cdecl _InterlockedIncrement64` via `InterlockedIncrement64` (CMPXCHG8B), added to `ggml-base` via `target_sources` guarded by `MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4` |
+| ~b8838–b8841 | `src/llama-model.h` | Attention bias fields renamed: `bq`→`wq_b`, `bk`→`wk_b`, `bv`→`wv_b`, `bo`→`wo_b`, `bqkv`→`wqkv_b`; internal to llama.cpp, no impact on this project |
 
 ## Build Commands
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20c62c24..e10ad6fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ set(GGML_AVX512  OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b8838
+	GIT_TAG        b8841
 )
 FetchContent_MakeAvailable(llama.cpp)
 
diff --git a/README.md b/README.md
index 0b897aff..66c84a66 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational)
-[![llama.cpp b8838](https://img.shields.io/badge/llama.cpp-%23b8838-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8838)
+[![llama.cpp b8841](https://img.shields.io/badge/llama.cpp-%23b8841-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b8841)
 
 # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)
 
diff --git a/pom.xml b/pom.xml
index 3d00ff55..d84cfd2c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,6 +65,11 @@
 			<version>24.1.0</version>
 			<scope>compile</scope>
 		</dependency>
+		<dependency>
+			<groupId>com.fasterxml.jackson.core</groupId>
+			<artifactId>jackson-databind</artifactId>
+			<version>2.19.0</version>
+		</dependency>
 	</dependencies>
 
 	<build>
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
index 7b66fcd9..77868ac0 100644
--- a/src/main/cpp/jllama.cpp
+++ b/src/main/cpp/jllama.cpp
@@ -820,6 +820,11 @@ JNIEXPORT void JNICALL Java_de_kherud_llama_LlamaModel_loadModel(JNIEnv *env, jo
     env->SetLongField(obj, f_model_pointer, reinterpret_cast<jlong>(jctx));
 }
 
+JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
+    REQUIRE_SERVER_CONTEXT(nullptr);
+    return json_to_jstring(env, ctx_server->model_meta());
+}
+
 JNIEXPORT jint JNICALL Java_de_kherud_llama_LlamaModel_requestCompletion(JNIEnv *env, jobject obj, jstring jparams) {
     REQUIRE_SERVER_CONTEXT(0);
 
diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp
index e36cc327..31013c0c 100644
--- a/src/main/cpp/server.hpp
+++ b/src/main/cpp/server.hpp
@@ -3625,9 +3625,16 @@ struct server_context {
 
     json model_meta() const {
         return json{
-            {"vocab_type", llama_vocab_type(vocab)},         {"n_vocab", llama_vocab_n_tokens(vocab)},
-            {"n_ctx_train", llama_model_n_ctx_train(model)}, {"n_embd", llama_model_n_embd(model)},
-            {"n_params", llama_model_n_params(model)},       {"size", llama_model_size(model)},
+            {"vocab_type",  llama_vocab_type(vocab)},
+            {"n_vocab",     llama_vocab_n_tokens(vocab)},
+            {"n_ctx_train", llama_model_n_ctx_train(model)},
+            {"n_embd",      llama_model_n_embd(model)},
+            {"n_params",    llama_model_n_params(model)},
+            {"size",        llama_model_size(model)},
+            {"modalities",  json{
+                {"vision", mctx ? mtmd_support_vision(mctx) : false},
+                {"audio",  mctx ? mtmd_support_audio(mctx)  : false},
+            }},
         };
     }
 };
diff --git a/src/main/java/de/kherud/llama/LlamaModel.java b/src/main/java/de/kherud/llama/LlamaModel.java
index 88db746b..c633aa48 100644
--- a/src/main/java/de/kherud/llama/LlamaModel.java
+++ b/src/main/java/de/kherud/llama/LlamaModel.java
@@ -316,6 +316,31 @@ public String getMetrics() {
 		return handleSlotAction(0, 0, null);
 	}
 
+	private static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER =
+			new com.fasterxml.jackson.databind.ObjectMapper();
+
+	/**
+	 * Returns model metadata with typed accessors for vocab, context, embedding,
+	 * parameter count, size, and modality support flags (vision, audio).
+	 * <p>
+	 * The returned {@link ModelMeta} wraps the raw JSON from the native layer.
+	 * Call {@link ModelMeta#toString()} to re-serialize to compact JSON for use
+	 * in {@code assertEquals}.
+	 * </p>
+	 *
+	 * @return {@link ModelMeta} parsed from the native {@code model_meta()} response
+	 * @throws LlamaException if the native call fails or the response cannot be parsed
+	 */
+	public ModelMeta getModelMeta() throws LlamaException {
+		try {
+			return new ModelMeta(OBJECT_MAPPER.readTree(getModelMetaJson()));
+		} catch (java.io.IOException e) {
+			throw new LlamaException("Failed to parse model meta JSON: " + e.getMessage());
+		}
+	}
+
+	native String getModelMetaJson() throws LlamaException;
+
 	/**
 	 * Erase the KV cache for a specific slot.
 	 *
diff --git a/src/main/java/de/kherud/llama/ModelMeta.java b/src/main/java/de/kherud/llama/ModelMeta.java
new file mode 100644
index 00000000..0e31ae38
--- /dev/null
+++ b/src/main/java/de/kherud/llama/ModelMeta.java
@@ -0,0 +1,76 @@
+package de.kherud.llama;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+/**
+ * Model metadata returned by {@link LlamaModel#getModelMeta()}.
+ * <p>
+ * Typed getters cover all fields currently returned by the native {@code model_meta()}
+ * function. The underlying {@link JsonNode} is also exposed via {@link #asJson()} so
+ * that future fields added on the C++ side remain accessible without code changes.
+ * </p>
+ * <p>{@link #toString()} re-serializes to compact JSON and is suitable for
+ * {@code assertEquals} in unit tests.</p>
+ */
+public final class ModelMeta {
+
+    private final JsonNode node;
+
+    ModelMeta(JsonNode node) {
+        this.node = node;
+    }
+
+    /** Vocabulary type identifier (e.g. SPM = 2, BPE = 1). */
+    public int getVocabType() {
+        return node.path("vocab_type").asInt(0);
+    }
+
+    /** Total number of tokens in the model vocabulary. */
+    public int getNVocab() {
+        return node.path("n_vocab").asInt(0);
+    }
+
+    /** Context length the model was trained with. */
+    public int getNCtxTrain() {
+        return node.path("n_ctx_train").asInt(0);
+    }
+
+    /** Embedding dimension of the model. */
+    public int getNEmbd() {
+        return node.path("n_embd").asInt(0);
+    }
+
+    /** Total number of model parameters. */
+    public long getNParams() {
+        return node.path("n_params").asLong(0L);
+    }
+
+    /** Model file size in bytes. */
+    public long getSize() {
+        return node.path("size").asLong(0L);
+    }
+
+    /** Returns true if the model supports vision (image) input. */
+    public boolean supportsVision() {
+        return node.at("/modalities/vision").asBoolean(false);
+    }
+
+    /** Returns true if the model supports audio input. */
+    public boolean supportsAudio() {
+        return node.at("/modalities/audio").asBoolean(false);
+    }
+
+    /**
+     * Returns the underlying {@link JsonNode} for direct access to any field,
+     * including fields added in future llama.cpp versions.
+     */
+    public JsonNode asJson() {
+        return node;
+    }
+
+    /** Re-serializes to compact JSON. Suitable for {@code assertEquals} in tests. */
+    @Override
+    public String toString() {
+        return node.toString();
+    }
+}
diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java
index 8523176c..7dc655c2 100644
--- a/src/test/java/de/kherud/llama/LlamaModelTest.java
+++ b/src/test/java/de/kherud/llama/LlamaModelTest.java
@@ -926,4 +926,42 @@ public void testSpeculativeDecoding() {
 			Assert.assertFalse("Expected non-empty response from speculative complete", response.isEmpty());
 		}
 	}
+
+	@Test
+	public void testGetModelMeta() throws LlamaException {
+		ModelMeta meta = model.getModelMeta();
+
+		// Typed getters — exact values depend on the loaded model; fill in after first run
+		Assert.assertTrue("n_vocab must be positive", meta.getNVocab() > 0);
+		Assert.assertTrue("n_ctx_train must be positive", meta.getNCtxTrain() > 0);
+		Assert.assertTrue("n_embd must be positive", meta.getNEmbd() > 0);
+		Assert.assertTrue("n_params must be positive", meta.getNParams() > 0);
+		Assert.assertTrue("size must be positive", meta.getSize() > 0);
+
+		// CodeLlama (text-only model) must not report multimodal support
+		Assert.assertFalse("text-only model must not report vision support", meta.supportsVision());
+		Assert.assertFalse("text-only model must not report audio support", meta.supportsAudio());
+
+		// Dynamic access via the underlying JsonNode
+		Assert.assertTrue("modalities field must be present", meta.asJson().has("modalities"));
+		Assert.assertTrue("vocab_type field must be present", meta.asJson().has("vocab_type"));
+
+		// Round-trip: toString() must produce valid compact JSON containing all top-level keys
+		String json = meta.toString();
+		Assert.assertNotNull(json);
+		Assert.assertTrue(json.contains("\"vocab_type\""));
+		Assert.assertTrue(json.contains("\"n_vocab\""));
+		Assert.assertTrue(json.contains("\"n_ctx_train\""));
+		Assert.assertTrue(json.contains("\"n_embd\""));
+		Assert.assertTrue(json.contains("\"n_params\""));
+		Assert.assertTrue(json.contains("\"size\""));
+		Assert.assertTrue(json.contains("\"modalities\""));
+		Assert.assertTrue(json.contains("\"vision\""));
+		Assert.assertTrue(json.contains("\"audio\""));
+
+		// Uncomment and fill in after running once to pin exact values:
+		// Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384,"
+		//         + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024,"
+		//         + "\"modalities\":{\"vision\":false,\"audio\":false}}", json);
+	}
 }

From ccff08f1c7dbbf6116145d6d9484f7f6ae016294 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 19 Apr 2026 12:12:18 +0000
Subject: [PATCH 2/4] Uncomment exact-JSON assertEquals in testGetModelMeta

Failure message will show the actual value for pinning.

https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR
---
 src/test/java/de/kherud/llama/LlamaModelTest.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java
index 7dc655c2..09863e77 100644
--- a/src/test/java/de/kherud/llama/LlamaModelTest.java
+++ b/src/test/java/de/kherud/llama/LlamaModelTest.java
@@ -959,9 +959,9 @@ public void testGetModelMeta() throws LlamaException {
 		Assert.assertTrue(json.contains("\"vision\""));
 		Assert.assertTrue(json.contains("\"audio\""));
 
-		// Uncomment and fill in after running once to pin exact values:
-		// Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384,"
-		//         + "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024,"
-		//         + "\"modalities\":{\"vision\":false,\"audio\":false}}", json);
+		// Fill in the expected value from the failure message and re-run to pin exact output:
+		Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384,"
+				+ "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024,"
+				+ "\"modalities\":{\"vision\":false,\"audio\":false}}", json);
 	}
 }

From 9e03ac854256aa10d058efc21db121d6877fa1f1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 19 Apr 2026 12:34:43 +0000
Subject: [PATCH 3/4] Fix UnsatisfiedLinkError: add getModelMetaJson to
 jllama.h extern "C" block

Without this declaration the C++ compiler mangles the symbol name,
making it invisible to the JVM's JNI lookup.

https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR
---
 src/main/cpp/jllama.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/main/cpp/jllama.h b/src/main/cpp/jllama.h
index 85c5ace6..971e29c5 100644
--- a/src/main/cpp/jllama.h
+++ b/src/main/cpp/jllama.h
@@ -128,6 +128,13 @@ JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_handleSlotAction(JNIEn
 
 JNIEXPORT jboolean JNICALL Java_de_kherud_llama_LlamaModel_configureParallelInference(JNIEnv *, jobject, jstring);
 
+/*
+ * Class:     de_kherud_llama_LlamaModel
+ * Method:    getModelMetaJson
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_de_kherud_llama_LlamaModel_getModelMetaJson(JNIEnv *, jobject);
+
 #ifdef __cplusplus
 }
 #endif

From b63a95545986408c3dd6a8e1d8b23d5c9ec8d160 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 19 Apr 2026 12:48:14 +0000
Subject: [PATCH 4/4] Pin testGetModelMeta assertEquals to actual
 codellama-7b.Q2_K values

vocab_type=1 (SPM), n_params=6738546688, size=2825274880 from the
actual model loaded in CI.

https://claude.ai/code/session_018sNXS6DJXJUBhUEKJxDVFR
---
 src/test/java/de/kherud/llama/LlamaModelTest.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java
index 09863e77..0bd34ccd 100644
--- a/src/test/java/de/kherud/llama/LlamaModelTest.java
+++ b/src/test/java/de/kherud/llama/LlamaModelTest.java
@@ -960,8 +960,8 @@ public void testGetModelMeta() throws LlamaException {
 		Assert.assertTrue(json.contains("\"audio\""));
 
 		// Fill in the expected value from the failure message and re-run to pin exact output:
-		Assert.assertEquals("{\"vocab_type\":2,\"n_vocab\":32016,\"n_ctx_train\":16384,"
-				+ "\"n_embd\":4096,\"n_params\":6738415616,\"size\":2744325024,"
+		Assert.assertEquals("{\"vocab_type\":1,\"n_vocab\":32016,\"n_ctx_train\":16384,"
+				+ "\"n_embd\":4096,\"n_params\":6738546688,\"size\":2825274880,"
 				+ "\"modalities\":{\"vision\":false,\"audio\":false}}", json);
 	}
 }