Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions LlamaTornadoCli.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//JAVA 21
//PREVIEW
//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev
//DEPS io.github.beehive-lab:tornado-api:2.1.0
//DEPS io.github.beehive-lab:tornado-runtime:2.1.0

//SOURCES TornadoFlags.java
// === Set to not get annoying warnings about annotation processing
//JAVAC_OPTIONS -proc:full

// Compiler options
//JAVAC_OPTIONS --enable-preview
//JAVAC_OPTIONS --add-modules=jdk.incubator.vector

// JVM options for basic setup
//JAVA_OPTIONS --enable-preview
//JAVA_OPTIONS --add-modules=jdk.incubator.vector

package org.beehive.gpullama3.cli;

import org.beehive.gpullama3.Options;
import org.beehive.gpullama3.auxiliary.LastRunMetrics;
import org.beehive.gpullama3.inference.sampler.Sampler;
import org.beehive.gpullama3.model.Model;

import java.io.IOException;

import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;

/**
* LlamaTornadoCli - Pure Java CLI for running llama-tornado models
*
* This class provides a standalone command-line interface for running LLaMA models
* with TornadoVM acceleration. It can be executed directly with JBang or as a
* compiled Java application.
*
* Usage with JBang:
* jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here"
*
* Usage as compiled application:
* java --enable-preview --add-modules jdk.incubator.vector \
* -cp target/gpu-llama3-0.3.1.jar \
* org.beehive.gpullama3.cli.LlamaTornadoCli \
* --model path/to/model.gguf --prompt "Your prompt here"
*
* Examples:
* # Interactive chat mode
* jbang LlamaTornadoCli.java -m model.gguf --interactive
*
* # Single instruction mode
* jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing"
*
* # With TornadoVM acceleration
* jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true
*
* # Custom temperature and sampling
* jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \
* --temperature 0.7 --top-p 0.9 --max-tokens 512
*/
public class LlamaTornadoCli {

// Configuration flags
public static final boolean USE_VECTOR_API = Boolean.parseBoolean(
System.getProperty("llama.VectorAPI", "true"));
public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(
System.getProperty("llama.ShowPerfInteractive", "true"));

/**
* Run a single instruction and display the response
*/
private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
String response = model.runInstructOnce(sampler, options);
System.out.println(response);
if (SHOW_PERF_INTERACTIVE) {
LastRunMetrics.printMetrics();
}
}

/**
* Main entry point for the CLI application
*
* @param args command-line arguments (see Options.parseOptions for details)
* @throws IOException if model loading fails
*/
public static void main(String[] args) throws IOException {
// Print banner
printBanner();

// Check if help requested
if (args.length == 0 || hasHelpFlag(args)) {
Options.printUsage(System.out);
System.exit(0);
}

try {
// Parse options
Options options = Options.parseOptions(args);

// Load model
Model model = loadModel(options);

// Create sampler
Sampler sampler = createSampler(model, options);

// Run in interactive or single-instruction mode
if (options.interactive()) {
System.out.println("Starting interactive chat mode...");
System.out.println("Type your messages below (Ctrl+C to exit):");
System.out.println();
model.runInteractive(sampler, options);
} else {
runSingleInstruction(model, sampler, options);
}
} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}

/**
* Check if help flag is present in arguments
*/
private static boolean hasHelpFlag(String[] args) {
for (String arg : args) {
if (arg.equals("--help") || arg.equals("-h")) {
return true;
}
}
return false;
}

/**
* Print ASCII banner
*/
private static void printBanner() {
System.out.println("""
╔══════════════════════════════════════════════════════════╗
║ Llama-Tornado CLI - GPU-Accelerated LLM ║
║ Powered by TornadoVM & Java 21 ║
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is inconsistent spacing in this table cell. The word "Powered" has extra trailing spaces compared to other cells, which affects the visual alignment of the banner ASCII art.

Suggested change
Powered by TornadoVM & Java 21
Powered by TornadoVM & Java 21

Copilot uses AI. Check for mistakes.
╚══════════════════════════════════════════════════════════╝
""");
}
}
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
MVN = ./mvnw

# Default target
all: package
all: install

# Build the project (clean and package without tests)
build: clean package
Expand All @@ -14,6 +14,9 @@ build: clean package
clean:
$(MVN) clean

install:
$(MVN) install -DskipTests

# Package the project without running tests
package:
$(MVN) package -DskipTests
Expand Down
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,40 @@ llama-tornado --gpu --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "te
The above model can we swapped with one of the other models, such as `beehive-llama-3.2-3b-instruct-fp16.gguf` or `beehive-llama-3.2-8b-instruct-fp16.gguf`, depending on your needs.
Check models below.

-----------

## 🚀 Running with JBang (Pure Java CLI)

You can run llama-tornado as a pure Java script using [JBang](https://www.jbang.dev/) without building or installing anything. This provides a simple, script-like experience similar to [Jlama's CLI](https://github.com/tjake/Jlama).

### Prerequisites for JBang

1. **Install JBang**: Follow the [JBang installation guide](https://www.jbang.dev/download/)
2. **TornadoVM SDK**: You still need TornadoVM installed and `TORNADO_SDK` environment variable set (see Setup section above)

### Quick Start with JBang

```bash
# Basic usage - interactive chat mode
jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf --interactive

# Single instruction mode
jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf -p "Explain quantum computing"

# With TornadoVM GPU acceleration
jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \
-p "Tell me a joke" --use-tornadovm true

# Custom generation parameters
jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \
-p "Write a short story" \
--temperature 0.7 \
--top-p 0.9 \
--max-tokens 512
```

-----------

## Collection of Tested Models

### Llama3.2 Collection
Expand Down
Loading