Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
a40586e
Add chatCompleteText() for plain-string chat completion
claude Apr 21, 2026
17fc38b
Add StopReason enum to LlamaOutput
claude Apr 21, 2026
0317df3
Implement parseProbabilities() — fix silent no-op stub
claude Apr 21, 2026
435404e
Make LlamaIterator/LlamaIterable AutoCloseable to prevent task slot leak
claude Apr 21, 2026
cb90bd8
Expose architecture and name from GGUF metadata in ModelMeta
claude Apr 21, 2026
d76dc04
Replace LlamaOutput manual JSON parsing with Jackson
claude Apr 21, 2026
4fa23d0
Extract CompletionResponseParser — pure JSON transforms for completio…
claude Apr 21, 2026
a8fd307
Extract RerankResponseParser — pure JSON transforms for rerank responses
claude Apr 21, 2026
7bb66dd
Extract ChatResponseParser — eliminates all OAI response substring sc…
claude Apr 21, 2026
ba0a4f4
Add ParameterJsonSerializer; wire InferenceParameters and JsonParameters
claude Apr 21, 2026
d19270e
Fix test failures from Jackson migration: whitespace, null, and slash…
claude Apr 21, 2026
7b71aa1
Fix Javadoc doclint errors in new json/ classes
claude Apr 21, 2026
14a1930
Add ModelFlag enum; wire all ModelParameters flag methods to use it
claude Apr 21, 2026
1ae73b3
Introduce CliArg interface; unify enum CLI serialization across all a…
claude Apr 21, 2026
cd95710
Refactor json/ helpers from static utility classes to instantiable cl…
claude Apr 21, 2026
a9918d1
Fix stale enum tests; add ModelFlagTest
claude Apr 21, 2026
aba7e52
Refactor StopReason: add stopType field, replace fromJson with fromSt…
claude Apr 21, 2026
d40ddf7
Rewrite enum tests with @Parameterized data-provider pattern
claude Apr 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions src/main/cpp/server.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3624,17 +3624,26 @@ struct server_context {
}

json model_meta() const {
// Read optional string metadata from GGUF headers; empty string if absent.
auto read_meta_str = [&](const char * key) -> std::string {
char buf[512] = {};
int32_t n = llama_model_meta_val_str(model, key, buf, sizeof(buf));
return n >= 0 ? std::string(buf, n) : std::string();
};

return json{
{"vocab_type", llama_vocab_type(vocab)},
{"n_vocab", llama_vocab_n_tokens(vocab)},
{"n_ctx_train", llama_model_n_ctx_train(model)},
{"n_embd", llama_model_n_embd(model)},
{"n_params", llama_model_n_params(model)},
{"size", llama_model_size(model)},
{"modalities", json{
{"vocab_type", llama_vocab_type(vocab)},
{"n_vocab", llama_vocab_n_tokens(vocab)},
{"n_ctx_train", llama_model_n_ctx_train(model)},
{"n_embd", llama_model_n_embd(model)},
{"n_params", llama_model_n_params(model)},
{"size", llama_model_size(model)},
{"modalities", json{
{"vision", mctx ? mtmd_support_vision(mctx) : false},
{"audio", mctx ? mtmd_support_audio(mctx) : false},
}},
{"architecture", read_meta_str("general.architecture")},
{"name", read_meta_str("general.name")},
};
}
};
130 changes: 10 additions & 120 deletions src/main/java/de/kherud/llama/InferenceParameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,7 @@ public InferenceParameters setPenaltyPrompt(String penaltyPrompt) {
*/
public InferenceParameters setPenaltyPrompt(int[] tokens) {
if (tokens.length > 0) {
StringBuilder builder = new StringBuilder();
builder.append("[");
for (int i = 0; i < tokens.length; i++) {
builder.append(tokens[i]);
if (i < tokens.length - 1) {
builder.append(", ");
}
}
builder.append("]");
parameters.put(PARAM_PENALTY_PROMPT, builder.toString());
parameters.put(PARAM_PENALTY_PROMPT, serializer.buildIntArray(tokens).toString());
}
return this;
}
Expand Down Expand Up @@ -408,7 +399,7 @@ public InferenceParameters setIgnoreEos(boolean ignoreEos) {
*/
public InferenceParameters setTokenIdBias(Map<Integer, Float> logitBias) {
if (!logitBias.isEmpty()) {
parameters.put(PARAM_LOGIT_BIAS, buildBiasPairArray(logitBias, String::valueOf));
parameters.put(PARAM_LOGIT_BIAS, serializer.buildTokenIdBiasArray(logitBias).toString());
}
return this;
}
Expand All @@ -428,7 +419,7 @@ public InferenceParameters setTokenIdBias(Map<Integer, Float> logitBias) {
*/
public InferenceParameters disableTokenIds(Collection<Integer> tokenIds) {
if (!tokenIds.isEmpty()) {
parameters.put(PARAM_LOGIT_BIAS, buildDisablePairArray(tokenIds, String::valueOf));
parameters.put(PARAM_LOGIT_BIAS, serializer.buildDisableTokenIdArray(tokenIds).toString());
}
return this;
}
Expand All @@ -448,7 +439,7 @@ public InferenceParameters disableTokenIds(Collection<Integer> tokenIds) {
*/
public InferenceParameters setTokenBias(Map<String, Float> logitBias) {
if (!logitBias.isEmpty()) {
parameters.put(PARAM_LOGIT_BIAS, buildBiasPairArray(logitBias, this::toJsonString));
parameters.put(PARAM_LOGIT_BIAS, serializer.buildTokenStringBiasArray(logitBias).toString());
}
return this;
}
Expand All @@ -468,7 +459,7 @@ public InferenceParameters setTokenBias(Map<String, Float> logitBias) {
*/
public InferenceParameters disableTokens(Collection<String> tokens) {
if (!tokens.isEmpty()) {
parameters.put(PARAM_LOGIT_BIAS, buildDisablePairArray(tokens, this::toJsonString));
parameters.put(PARAM_LOGIT_BIAS, serializer.buildDisableTokenStringArray(tokens).toString());
}
return this;
}
Expand All @@ -481,16 +472,7 @@ public InferenceParameters disableTokens(Collection<String> tokens) {
*/
public InferenceParameters setStopStrings(String... stopStrings) {
if (stopStrings.length > 0) {
StringBuilder builder = new StringBuilder();
builder.append("[");
for (int i = 0; i < stopStrings.length; i++) {
builder.append(toJsonString(stopStrings[i]));
if (i < stopStrings.length - 1) {
builder.append(", ");
}
}
builder.append("]");
parameters.put(PARAM_STOP, builder.toString());
parameters.put(PARAM_STOP, serializer.buildStopStrings(stopStrings).toString());
}
return this;
}
Expand All @@ -503,29 +485,7 @@ public InferenceParameters setStopStrings(String... stopStrings) {
*/
public InferenceParameters setSamplers(Sampler... samplers) {
if (samplers.length > 0) {
StringBuilder builder = new StringBuilder();
builder.append("[");
for (int i = 0; i < samplers.length; i++) {
switch (samplers[i]) {
case TOP_K:
builder.append("\"top_k\"");
break;
case TOP_P:
builder.append("\"top_p\"");
break;
case MIN_P:
builder.append("\"min_p\"");
break;
case TEMPERATURE:
builder.append("\"temperature\"");
break;
}
if (i < samplers.length - 1) {
builder.append(", ");
}
}
builder.append("]");
parameters.put(PARAM_SAMPLERS, builder.toString());
parameters.put(PARAM_SAMPLERS, serializer.buildSamplers(samplers).toString());
}
return this;
}
Expand Down Expand Up @@ -567,7 +527,7 @@ public InferenceParameters setChatTemplate(String chatTemplate) {
* @return this builder
*/
public InferenceParameters setChatTemplateKwargs(java.util.Map<String, String> kwargs) {
parameters.put(PARAM_CHAT_TEMPLATE_KWARGS, mapToJsonObject(kwargs));
parameters.put(PARAM_CHAT_TEMPLATE_KWARGS, serializer.buildRawValueObject(kwargs).toString());
return this;
}

Expand All @@ -581,44 +541,7 @@ public InferenceParameters setChatTemplateKwargs(java.util.Map<String, String> k
* @return this builder
*/
public InferenceParameters setMessages(String systemMessage, List<Pair<String, String>> messages) {
StringBuilder messagesBuilder = new StringBuilder();
messagesBuilder.append("[");

// Add system message (if provided)
if (systemMessage != null && !systemMessage.isEmpty()) {
messagesBuilder.append("{\"role\": \"system\", \"content\": ")
.append(toJsonString(systemMessage))
.append("}");
if (!messages.isEmpty()) {
messagesBuilder.append(", ");
}
}

// Add user/assistant messages
for (int i = 0; i < messages.size(); i++) {
Pair<String, String> message = messages.get(i);
String role = message.getKey();
String content = message.getValue();

if (!role.equals("user") && !role.equals("assistant")) {
throw new IllegalArgumentException("Invalid role: " + role + ". Role must be 'user' or 'assistant'.");
}

messagesBuilder.append("{\"role\":")
.append(toJsonString(role))
.append(", \"content\": ")
.append(toJsonString(content))
.append("}");

if (i < messages.size() - 1) {
messagesBuilder.append(", ");
}
}

messagesBuilder.append("]");

// Convert ArrayNode to a JSON string and store it in parameters
parameters.put(PARAM_MESSAGES, messagesBuilder.toString());
parameters.put(PARAM_MESSAGES, serializer.buildMessages(systemMessage, messages).toString());
return this;
}

Expand All @@ -627,38 +550,5 @@ InferenceParameters setStream(boolean stream) {
return this;
}

private static <K, V> String buildBiasPairArray(Map<K, V> map,
java.util.function.Function<K, String> keySerializer) {
StringBuilder builder = new StringBuilder("[");
int i = 0;
for (Map.Entry<K, V> entry : map.entrySet()) {
builder.append("[")
.append(keySerializer.apply(entry.getKey()))
.append(", ")
.append(entry.getValue())
.append("]");
if (i++ < map.size() - 1) {
builder.append(", ");
}
}
builder.append("]");
return builder.toString();
}

private static <T> String buildDisablePairArray(Collection<T> items,
java.util.function.Function<T, String> serializer) {
StringBuilder builder = new StringBuilder("[");
int i = 0;
for (T item : items) {
builder.append("[")
.append(serializer.apply(item))
.append(", false]");
if (i++ < items.size() - 1) {
builder.append(", ");
}
}
builder.append("]");
return builder.toString();
}

}

71 changes: 5 additions & 66 deletions src/main/java/de/kherud/llama/JsonParameters.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package de.kherud.llama;

import de.kherud.llama.json.ParameterJsonSerializer;

import java.util.HashMap;
import java.util.Map;

Expand All @@ -14,6 +16,8 @@ abstract class JsonParameters {
// The JNI code for a proper Java-typed data object is comparatively too complex and hard to maintain.
final Map<String, String> parameters = new HashMap<>();

protected final ParameterJsonSerializer serializer = new ParameterJsonSerializer();

@Override
public String toString() {
StringBuilder builder = new StringBuilder();
Expand All @@ -35,73 +39,8 @@ public String toString() {
return builder.toString();
}

static String mapToJsonObject(Map<String, String> map) {
StringBuilder sb = new StringBuilder("{");
boolean first = true;
for (Map.Entry<String, String> entry : map.entrySet()) {
if (!first) sb.append(",");
sb.append("\"").append(entry.getKey()).append("\":").append(entry.getValue());
first = false;
}
sb.append("}");
return sb.toString();
}

// taken from org.json.JSONObject#quote(String, Writer)
String toJsonString(String text) {
if (text == null) return null;
StringBuilder builder = new StringBuilder((text.length()) + 2);

char b;
char c = 0;
String hhhh;
int i;
int len = text.length();

builder.append('"');
for (i = 0; i < len; i += 1) {
b = c;
c = text.charAt(i);
switch (c) {
case '\\':
case '"':
builder.append('\\');
builder.append(c);
break;
case '/':
if (b == '<') {
builder.append('\\');
}
builder.append(c);
break;
case '\b':
builder.append("\\b");
break;
case '\t':
builder.append("\\t");
break;
case '\n':
builder.append("\\n");
break;
case '\f':
builder.append("\\f");
break;
case '\r':
builder.append("\\r");
break;
default:
if (c < ' ' || (c >= '\u0080' && c < '\u00a0') || (c >= '\u2000' && c < '\u2100')) {
builder.append("\\u");
hhhh = Integer.toHexString(c);
builder.append("0000", 0, 4 - hhhh.length());
builder.append(hhhh);
}
else {
builder.append(c);
}
}
}
builder.append('"');
return builder.toString();
return serializer.toJsonString(text);
}
}
39 changes: 35 additions & 4 deletions src/main/java/de/kherud/llama/LlamaIterable.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,44 @@
import org.jetbrains.annotations.NotNull;

/**
* An iterable used by {@link LlamaModel#generate(InferenceParameters)} that specifically returns a {@link LlamaIterator}.
* An {@link Iterable} wrapper around {@link LlamaIterator} returned by
* {@link LlamaModel#generate(InferenceParameters)} and {@link LlamaModel#generateChat(InferenceParameters)}.
*
* <p>Implements {@link AutoCloseable} so that a try-with-resources block automatically cancels
* any in-progress generation when the loop exits early (e.g. via {@code break}), preventing the
* native task slot from leaking:
*
* <pre>{@code
* try (LlamaIterable it = model.generate(params)) {
* for (LlamaOutput o : it) {
* if (done) break; // close() cancels the native task automatically
* }
* }
* }</pre>
*
* <p>A plain for-each loop without try-with-resources continues to work; the {@link #close()}
* method just will not be called on early exit in that case.
*/
@FunctionalInterface
public interface LlamaIterable extends Iterable<LlamaOutput> {
public final class LlamaIterable implements Iterable<LlamaOutput>, AutoCloseable {

private final LlamaIterator iterator;

LlamaIterable(LlamaIterator iterator) {
this.iterator = iterator;
}

@NotNull
@Override
LlamaIterator iterator();
public LlamaIterator iterator() {
return iterator;
}

/**
* Cancels any in-progress generation. Delegates to {@link LlamaIterator#close()}.
* Safe to call multiple times.
*/
@Override
public void close() {
iterator.close();
}
}
Loading
Loading