beehive-lab · mikepapadim · Dec 16, 2025 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java
@@ -0,0 +1,145 @@
+//JAVA 21
+//PREVIEW
+//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev
+//DEPS io.github.beehive-lab:tornado-api:2.1.0
+//DEPS io.github.beehive-lab:tornado-runtime:2.1.0
+
+//SOURCES TornadoFlags.java
+// === Set to not get annoying warnings about annotation processing
+//JAVAC_OPTIONS -proc:full
+
+// Compiler options
+//JAVAC_OPTIONS --enable-preview
+//JAVAC_OPTIONS --add-modules=jdk.incubator.vector
+
+// JVM options for basic setup
+//JAVA_OPTIONS --enable-preview
+//JAVA_OPTIONS --add-modules=jdk.incubator.vector
+
+package org.beehive.gpullama3.cli;
+
+import org.beehive.gpullama3.Options;
+import org.beehive.gpullama3.auxiliary.LastRunMetrics;
+import org.beehive.gpullama3.inference.sampler.Sampler;
+import org.beehive.gpullama3.model.Model;
+
+import java.io.IOException;
+
+import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
+import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;
+
+/**
+ * LlamaTornadoCli - Pure Java CLI for running llama-tornado models
+ *
+ * This class provides a standalone command-line interface for running LLaMA models
+ * with TornadoVM acceleration. It can be executed directly with JBang or as a
+ * compiled Java application.
+ *
+ * Usage with JBang:
+ *   jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here"
+ *
+ * Usage as compiled application:
+ *   java --enable-preview --add-modules jdk.incubator.vector \
+ *        -cp target/gpu-llama3-0.3.1.jar \
+ *        org.beehive.gpullama3.cli.LlamaTornadoCli \
+ *        --model path/to/model.gguf --prompt "Your prompt here"
+ *
+ * Examples:
+ *   # Interactive chat mode
+ *   jbang LlamaTornadoCli.java -m model.gguf --interactive
+ *
+ *   # Single instruction mode
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing"
+ *
+ *   # With TornadoVM acceleration
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true
+ *
+ *   # Custom temperature and sampling
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \
+ *        --temperature 0.7 --top-p 0.9 --max-tokens 512
+ */
+public class LlamaTornadoCli {
+
+    // Configuration flags
+    public static final boolean USE_VECTOR_API = Boolean.parseBoolean(
+        System.getProperty("llama.VectorAPI", "true"));
+    public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(
+        System.getProperty("llama.ShowPerfInteractive", "true"));
+
+    /**
+     * Run a single instruction and display the response
+     */
+    private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
+        String response = model.runInstructOnce(sampler, options);
+        System.out.println(response);
+        if (SHOW_PERF_INTERACTIVE) {
+            LastRunMetrics.printMetrics();
+        }
+    }
+
+    /**
+     * Main entry point for the CLI application
+     *
+     * @param args command-line arguments (see Options.parseOptions for details)
+     * @throws IOException if model loading fails
+     */
+    public static void main(String[] args) throws IOException {
+        // Print banner
+        printBanner();
+
+        // Check if help requested
+        if (args.length == 0 || hasHelpFlag(args)) {
+            Options.printUsage(System.out);
+            System.exit(0);
+        }
+
+        try {
+            // Parse options
+            Options options = Options.parseOptions(args);
+
+            // Load model
+            Model model = loadModel(options);
+
+            // Create sampler
+            Sampler sampler = createSampler(model, options);
+
+            // Run in interactive or single-instruction mode
+            if (options.interactive()) {
+                System.out.println("Starting interactive chat mode...");
+                System.out.println("Type your messages below (Ctrl+C to exit):");
+                System.out.println();
+                model.runInteractive(sampler, options);
+            } else {
+                runSingleInstruction(model, sampler, options);
+            }
+        } catch (Exception e) {
+            System.err.println("Error: " + e.getMessage());
+            e.printStackTrace();
+            System.exit(1);
+        }
+    }
+
+    /**
+     * Check if help flag is present in arguments
+     */
+    private static boolean hasHelpFlag(String[] args) {
+        for (String arg : args) {
+            if (arg.equals("--help") || arg.equals("-h")) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Print ASCII banner
+     */
+    private static void printBanner() {
+        System.out.println("""
+            ╔══════════════════════════════════════════════════════════╗
+            ║        Llama-Tornado CLI - GPU-Accelerated LLM           ║
+            ║           Powered by TornadoVM & Java 21                 ║
-            ║           Powered by TornadoVM & Java 21                 ║
+            ║        Powered by TornadoVM & Java 21                    ║
-            ║           Powered by TornadoVM & Java 21                 ║
+            ║        Powered by TornadoVM & Java 21                    ║
+            ╚══════════════════════════════════════════════════════════╝
+            """);
+    }
+}
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@
 MVN = ./mvnw
 
 # Default target
-all: package
+all: install
 
 # Build the project (clean and package without tests)
 build: clean package
@@ -14,6 +14,9 @@ build: clean package
 clean:
 	$(MVN) clean
 
+install:
+    $(MVN) install -DskipTests
+
 # Package the project without running tests
 package:
 	$(MVN) package -DskipTests

diff --git a/README.md b/README.md
@@ -242,6 +242,40 @@ llama-tornado --gpu --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "te
 The above model can we swapped with one of the other models, such as `beehive-llama-3.2-3b-instruct-fp16.gguf` or `beehive-llama-3.2-8b-instruct-fp16.gguf`, depending on your needs.
 Check models below.
 
+-----------
+
+## 🚀 Running with JBang (Pure Java CLI)
+
+You can run llama-tornado as a pure Java script using [JBang](https://www.jbang.dev/) without building or installing anything. This provides a simple, script-like experience similar to [Jlama's CLI](https://github.com/tjake/Jlama).
+
+### Prerequisites for JBang
+
+1. **Install JBang**: Follow the [JBang installation guide](https://www.jbang.dev/download/)
+2. **TornadoVM SDK**: You still need TornadoVM installed and `TORNADO_SDK` environment variable set (see Setup section above)
+
+### Quick Start with JBang
+
+```bash
+# Basic usage - interactive chat mode
+jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf --interactive
+
+# Single instruction mode
+jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf -p "Explain quantum computing"
+
+# With TornadoVM GPU acceleration
+jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \
+     -p "Tell me a joke" --use-tornadovm true
+
+# Custom generation parameters
+jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \
+     -p "Write a short story" \
+     --temperature 0.7 \
+     --top-p 0.9 \
+     --max-tokens 512
+```
+
+-----------
+
 ## Collection of Tested Models
 
 ### Llama3.2 Collection