From 7e57e5aa7888f7c4b8268e086b14f4af2dd76b35 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Dec 2025 10:14:31 +0000 Subject: [PATCH 1/9] Add pure Java CLI with JBang support for llama-tornado - Create LlamaTornadoCli.java with JBang directives for script-like execution - Add CLI to both root directory (for JBang) and src/main/java (for Maven builds) - Include comprehensive JBang configuration with Java 21, Vector API, and TornadoVM setup - Add example script (run-jbang-example.sh) for quick testing - Update README with JBang usage section, examples, and comparison table - Enables running llama-tornado without building, similar to Jlama's approach This allows users to run: jbang LlamaTornadoCli.java -m model.gguf -p "prompt" --- LlamaTornadoCli.java | 159 ++++++++++++++++++ README.md | 64 +++++++ examples/run-jbang-example.sh | 71 ++++++++ .../gpullama3/cli/LlamaTornadoCli.java | 159 ++++++++++++++++++ 4 files changed, 453 insertions(+) create mode 100755 LlamaTornadoCli.java create mode 100755 examples/run-jbang-example.sh create mode 100644 src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java new file mode 100755 index 00000000..ec07473b --- /dev/null +++ b/LlamaTornadoCli.java @@ -0,0 +1,159 @@ +#!/usr/bin/env jbang +//JAVA 21 +//PREVIEW +//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 +//DEPS io.github.beehive-lab:tornado-api:2.1.0 +//DEPS io.github.beehive-lab:tornado-runtime:2.1.0 + +// Compiler options +//JAVAC_OPTIONS --enable-preview +//JAVAC_OPTIONS --add-modules=jdk.incubator.vector + +// JVM options for TornadoVM +//JAVA_OPTIONS --enable-preview +//JAVA_OPTIONS --add-modules=jdk.incubator.vector +//JAVA_OPTIONS -XX:-UseCompressedOops +//JAVA_OPTIONS -XX:+UnlockExperimentalVMOptions +//JAVA_OPTIONS -XX:+EnableJVMCI +//JAVA_OPTIONS -XX:+UseJVMCICompiler +//JAVA_OPTIONS -XX:+UseParallelGC + +// Module exports for TornadoVM (adjust paths based on your TornadoVM installation) +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.api.runtime=tornado.runtime + +package org.beehive.gpullama3.cli; + +import org.beehive.gpullama3.Options; +import org.beehive.gpullama3.auxiliary.LastRunMetrics; +import org.beehive.gpullama3.inference.sampler.Sampler; +import org.beehive.gpullama3.model.Model; + +import java.io.IOException; + +import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; +import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; + +/** + * LlamaTornadoCli - Pure Java CLI for running llama-tornado models + * + * This class provides a standalone command-line interface for running LLaMA models + * with TornadoVM acceleration. It can be executed directly with JBang or as a + * compiled Java application. + * + * Usage with JBang: + * jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here" + * + * Usage as compiled application: + * java --enable-preview --add-modules jdk.incubator.vector \ + * -cp target/gpu-llama3-0.3.1.jar \ + * org.beehive.gpullama3.cli.LlamaTornadoCli \ + * --model path/to/model.gguf --prompt "Your prompt here" + * + * Examples: + * # Interactive chat mode + * jbang LlamaTornadoCli.java -m model.gguf --interactive + * + * # Single instruction mode + * jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing" + * + * # With TornadoVM acceleration + * jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true + * + * # Custom temperature and sampling + * jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \ + * --temperature 0.7 --top-p 0.9 --max-tokens 512 + */ +public class LlamaTornadoCli { + + // Configuration flags + public static final boolean USE_VECTOR_API = Boolean.parseBoolean( + System.getProperty("llama.VectorAPI", "true")); + public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( + System.getProperty("llama.ShowPerfInteractive", "true")); + + /** + * Run a single instruction and display the response + */ + private static void runSingleInstruction(Model model, Sampler sampler, Options options) { + String response = model.runInstructOnce(sampler, options); + System.out.println(response); + if (SHOW_PERF_INTERACTIVE) { + LastRunMetrics.printMetrics(); + } + } + + /** + * Main entry point for the CLI application + * + * @param args command-line arguments (see Options.parseOptions for details) + * @throws IOException if model loading fails + */ + public static void main(String[] args) throws IOException { + // Print banner + printBanner(); + + // Check if help requested + if (args.length == 0 || hasHelpFlag(args)) { + Options.printUsage(System.out); + System.exit(0); + } + + try { + // Parse options + Options options = Options.parseOptions(args); + + // Load model + System.out.println("Loading model from: " + options.modelPath()); + Model model = loadModel(options); + System.out.println("Model loaded successfully!"); + + // Create sampler + Sampler sampler = createSampler(model, options); + + // Run in interactive or single-instruction mode + if (options.interactive()) { + System.out.println("Starting interactive chat mode..."); + System.out.println("Type your messages below (Ctrl+C to exit):"); + System.out.println(); + model.runInteractive(sampler, options); + } else { + runSingleInstruction(model, sampler, options); + } + } catch (Exception e) { + System.err.println("Error: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Check if help flag is present in arguments + */ + private static boolean hasHelpFlag(String[] args) { + for (String arg : args) { + if (arg.equals("--help") || arg.equals("-h")) { + return true; + } + } + return false; + } + + /** + * Print ASCII banner + */ + private static void printBanner() { + System.out.println(""" + ╔══════════════════════════════════════════════════════════╗ + ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ + ║ Powered by TornadoVM & Java 21 ║ + ╚══════════════════════════════════════════════════════════╝ + """); + } +} diff --git a/README.md b/README.md index 01cf752d..64063e7e 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,70 @@ llama-tornado --gpu --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "te The above model can we swapped with one of the other models, such as `beehive-llama-3.2-3b-instruct-fp16.gguf` or `beehive-llama-3.2-8b-instruct-fp16.gguf`, depending on your needs. Check models below. +----------- + +## 🚀 Running with JBang (Pure Java CLI) + +You can run llama-tornado as a pure Java script using [JBang](https://www.jbang.dev/) without building or installing anything. This provides a simple, script-like experience similar to [Jlama's CLI](https://github.com/tjake/Jlama). + +### Prerequisites for JBang + +1. **Install JBang**: Follow the [JBang installation guide](https://www.jbang.dev/download/) +2. **TornadoVM SDK**: You still need TornadoVM installed and `TORNADO_SDK` environment variable set (see Setup section above) + +### Quick Start with JBang + +```bash +# Basic usage - interactive chat mode +jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf --interactive + +# Single instruction mode +jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf -p "Explain quantum computing" + +# With TornadoVM GPU acceleration +jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ + -p "Tell me a joke" --use-tornadovm true + +# Custom generation parameters +jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ + -p "Write a short story" \ + --temperature 0.7 \ + --top-p 0.9 \ + --max-tokens 512 +``` + +### JBang vs llama-tornado Script + +| Feature | JBang CLI | llama-tornado Script | +|---------|-----------|---------------------| +| **Installation** | No build required | Requires `mvn package` | +| **Dependencies** | Auto-downloaded | Included in fat JAR | +| **TornadoVM Setup** | Basic (via dependencies) | Full (via tornado command wrapper) | +| **GPU Acceleration** | Limited | Full support with all TornadoVM optimizations | +| **Use Case** | Quick experimentation, CPU inference | Production use, full GPU acceleration | + +### How It Works + +The `LlamaTornadoCli.java` file includes special JBang directives at the top: + +```java +#!/usr/bin/env jbang +//JAVA 21 +//PREVIEW +//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 +//DEPS io.github.beehive-lab:tornado-api:2.1.0 +//DEPS io.github.beehive-lab:tornado-runtime:2.1.0 +``` + +These directives tell JBang to: +- Use Java 21 with preview features +- Download the required Maven dependencies automatically +- Set up the necessary JVM options for Vector API and TornadoVM + +**Note**: For full GPU acceleration with all TornadoVM optimizations, we recommend using the `llama-tornado` script instead, which properly configures all TornadoVM runtime parameters. + +----------- + ## Collection of Tested Models ### Llama3.2 Collection diff --git a/examples/run-jbang-example.sh b/examples/run-jbang-example.sh new file mode 100755 index 00000000..823e5b7a --- /dev/null +++ b/examples/run-jbang-example.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Example script to run llama-tornado with JBang +# +# This demonstrates how to use the JBang CLI for quick experimentation +# with llama-tornado models. + +# Colors for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}╔══════════════════════════════════════════════════════════╗${NC}" +echo -e "${BLUE}║ Llama-Tornado JBang CLI Example ║${NC}" +echo -e "${BLUE}╚══════════════════════════════════════════════════════════╝${NC}" +echo "" + +# Check if JBang is installed +if ! command -v jbang &> /dev/null; then + echo "❌ JBang is not installed!" + echo "Please install JBang first: https://www.jbang.dev/download/" + exit 1 +fi + +echo -e "${GREEN}✓${NC} JBang is installed" + +# Check if model file is provided +if [ -z "$1" ]; then + echo "" + echo "Usage: $0 [prompt]" + echo "" + echo "Examples:" + echo " $0 beehive-llama-3.2-1b-instruct-fp16.gguf" + echo " $0 beehive-llama-3.2-1b-instruct-fp16.gguf \"Tell me a joke\"" + echo "" + exit 1 +fi + +MODEL_PATH="$1" +PROMPT="${2:-What is the capital of France?}" + +# Check if model file exists +if [ ! -f "$MODEL_PATH" ]; then + echo "❌ Model file not found: $MODEL_PATH" + echo "" + echo "Please download a model first. See:" + echo "https://huggingface.co/collections/beehive-lab/llama3-gpullama3java" + exit 1 +fi + +echo -e "${GREEN}✓${NC} Model file found: $MODEL_PATH" +echo "" + +# Run with JBang +echo "Running inference with prompt: \"$PROMPT\"" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" + +cd "$(dirname "$0")/.." || exit + +jbang LlamaTornadoCli.java \ + --model "$MODEL_PATH" \ + --prompt "$PROMPT" \ + --temperature 0.7 \ + --max-tokens 256 + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo -e "${GREEN}Done!${NC}" +echo "" +echo "Try interactive mode:" +echo " jbang LlamaTornadoCli.java --model $MODEL_PATH --interactive" diff --git a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java new file mode 100644 index 00000000..ec07473b --- /dev/null +++ b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java @@ -0,0 +1,159 @@ +#!/usr/bin/env jbang +//JAVA 21 +//PREVIEW +//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 +//DEPS io.github.beehive-lab:tornado-api:2.1.0 +//DEPS io.github.beehive-lab:tornado-runtime:2.1.0 + +// Compiler options +//JAVAC_OPTIONS --enable-preview +//JAVAC_OPTIONS --add-modules=jdk.incubator.vector + +// JVM options for TornadoVM +//JAVA_OPTIONS --enable-preview +//JAVA_OPTIONS --add-modules=jdk.incubator.vector +//JAVA_OPTIONS -XX:-UseCompressedOops +//JAVA_OPTIONS -XX:+UnlockExperimentalVMOptions +//JAVA_OPTIONS -XX:+EnableJVMCI +//JAVA_OPTIONS -XX:+UseJVMCICompiler +//JAVA_OPTIONS -XX:+UseParallelGC + +// Module exports for TornadoVM (adjust paths based on your TornadoVM installation) +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.runtime +//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.api.runtime=tornado.runtime + +package org.beehive.gpullama3.cli; + +import org.beehive.gpullama3.Options; +import org.beehive.gpullama3.auxiliary.LastRunMetrics; +import org.beehive.gpullama3.inference.sampler.Sampler; +import org.beehive.gpullama3.model.Model; + +import java.io.IOException; + +import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; +import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; + +/** + * LlamaTornadoCli - Pure Java CLI for running llama-tornado models + * + * This class provides a standalone command-line interface for running LLaMA models + * with TornadoVM acceleration. It can be executed directly with JBang or as a + * compiled Java application. + * + * Usage with JBang: + * jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here" + * + * Usage as compiled application: + * java --enable-preview --add-modules jdk.incubator.vector \ + * -cp target/gpu-llama3-0.3.1.jar \ + * org.beehive.gpullama3.cli.LlamaTornadoCli \ + * --model path/to/model.gguf --prompt "Your prompt here" + * + * Examples: + * # Interactive chat mode + * jbang LlamaTornadoCli.java -m model.gguf --interactive + * + * # Single instruction mode + * jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing" + * + * # With TornadoVM acceleration + * jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true + * + * # Custom temperature and sampling + * jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \ + * --temperature 0.7 --top-p 0.9 --max-tokens 512 + */ +public class LlamaTornadoCli { + + // Configuration flags + public static final boolean USE_VECTOR_API = Boolean.parseBoolean( + System.getProperty("llama.VectorAPI", "true")); + public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( + System.getProperty("llama.ShowPerfInteractive", "true")); + + /** + * Run a single instruction and display the response + */ + private static void runSingleInstruction(Model model, Sampler sampler, Options options) { + String response = model.runInstructOnce(sampler, options); + System.out.println(response); + if (SHOW_PERF_INTERACTIVE) { + LastRunMetrics.printMetrics(); + } + } + + /** + * Main entry point for the CLI application + * + * @param args command-line arguments (see Options.parseOptions for details) + * @throws IOException if model loading fails + */ + public static void main(String[] args) throws IOException { + // Print banner + printBanner(); + + // Check if help requested + if (args.length == 0 || hasHelpFlag(args)) { + Options.printUsage(System.out); + System.exit(0); + } + + try { + // Parse options + Options options = Options.parseOptions(args); + + // Load model + System.out.println("Loading model from: " + options.modelPath()); + Model model = loadModel(options); + System.out.println("Model loaded successfully!"); + + // Create sampler + Sampler sampler = createSampler(model, options); + + // Run in interactive or single-instruction mode + if (options.interactive()) { + System.out.println("Starting interactive chat mode..."); + System.out.println("Type your messages below (Ctrl+C to exit):"); + System.out.println(); + model.runInteractive(sampler, options); + } else { + runSingleInstruction(model, sampler, options); + } + } catch (Exception e) { + System.err.println("Error: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Check if help flag is present in arguments + */ + private static boolean hasHelpFlag(String[] args) { + for (String arg : args) { + if (arg.equals("--help") || arg.equals("-h")) { + return true; + } + } + return false; + } + + /** + * Print ASCII banner + */ + private static void printBanner() { + System.out.println(""" + ╔══════════════════════════════════════════════════════════╗ + ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ + ║ Powered by TornadoVM & Java 21 ║ + ╚══════════════════════════════════════════════════════════╝ + """); + } +} From 74948558da5fdec4f2611ae1c88bf2bee5e72272 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Dec 2025 10:21:29 +0000 Subject: [PATCH 2/9] Fix: Remove JBang directives from Maven-compiled CLI version - Remove JBang directives (shebang, //JAVA, //DEPS, etc.) from src/main/java version - Keep JBang directives only in root LlamaTornadoCli.java for JBang usage - Update javadoc to clarify this version is for Maven builds - Fixes Maven compilation errors caused by JBang-specific syntax - Root LlamaTornadoCli.java remains unchanged for JBang script usage --- .../gpullama3/cli/LlamaTornadoCli.java | 54 +++++-------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java index ec07473b..2ad46277 100644 --- a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java +++ b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java @@ -1,33 +1,3 @@ -#!/usr/bin/env jbang -//JAVA 21 -//PREVIEW -//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 -//DEPS io.github.beehive-lab:tornado-api:2.1.0 -//DEPS io.github.beehive-lab:tornado-runtime:2.1.0 - -// Compiler options -//JAVAC_OPTIONS --enable-preview -//JAVAC_OPTIONS --add-modules=jdk.incubator.vector - -// JVM options for TornadoVM -//JAVA_OPTIONS --enable-preview -//JAVA_OPTIONS --add-modules=jdk.incubator.vector -//JAVA_OPTIONS -XX:-UseCompressedOops -//JAVA_OPTIONS -XX:+UnlockExperimentalVMOptions -//JAVA_OPTIONS -XX:+EnableJVMCI -//JAVA_OPTIONS -XX:+UseJVMCICompiler -//JAVA_OPTIONS -XX:+UseParallelGC - -// Module exports for TornadoVM (adjust paths based on your TornadoVM installation) -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.api.runtime=tornado.runtime - package org.beehive.gpullama3.cli; import org.beehive.gpullama3.Options; @@ -44,11 +14,9 @@ * LlamaTornadoCli - Pure Java CLI for running llama-tornado models * * This class provides a standalone command-line interface for running LLaMA models - * with TornadoVM acceleration. It can be executed directly with JBang or as a - * compiled Java application. + * with TornadoVM acceleration. This version is compiled as part of the Maven build. * - * Usage with JBang: - * jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here" + * For JBang usage, use the LlamaTornadoCli.java file in the root directory. * * Usage as compiled application: * java --enable-preview --add-modules jdk.incubator.vector \ @@ -58,17 +26,19 @@ * * Examples: * # Interactive chat mode - * jbang LlamaTornadoCli.java -m model.gguf --interactive + * java -cp target/gpu-llama3-0.3.1.jar \ + * org.beehive.gpullama3.cli.LlamaTornadoCli \ + * -m model.gguf --interactive * * # Single instruction mode - * jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing" - * - * # With TornadoVM acceleration - * jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true + * java -cp target/gpu-llama3-0.3.1.jar \ + * org.beehive.gpullama3.cli.LlamaTornadoCli \ + * -m model.gguf -p "Explain quantum computing" * - * # Custom temperature and sampling - * jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \ - * --temperature 0.7 --top-p 0.9 --max-tokens 512 + * # With TornadoVM acceleration (requires TornadoVM runtime setup) + * java -cp target/gpu-llama3-0.3.1.jar \ + * org.beehive.gpullama3.cli.LlamaTornadoCli \ + * -m model.gguf -p "Hello" --use-tornadovm true */ public class LlamaTornadoCli { From 5f039704479c2dbdce0a9b8ea75c34e9d0f9f4d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Dec 2025 10:26:13 +0000 Subject: [PATCH 3/9] Fix: Remove shebang from JBang CLI to match TornadoVM pattern - Remove #!/usr/bin/env jbang from LlamaTornadoCli.java - JBang directives should use only comment-based syntax (//JAVA, //DEPS) - Matches the pattern used in TornadoVM JBang examples - Update README to reflect correct JBang directive format - Fixes JBang compilation error with javac This follows the standard JBang pattern where shebang is not needed when running with 'jbang file.java' command. --- LlamaTornadoCli.java | 1 - README.md | 1 - 2 files changed, 2 deletions(-) diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java index ec07473b..07b1b83f 100755 --- a/LlamaTornadoCli.java +++ b/LlamaTornadoCli.java @@ -1,4 +1,3 @@ -#!/usr/bin/env jbang //JAVA 21 //PREVIEW //DEPS io.github.beehive-lab:gpu-llama3:0.3.1 diff --git a/README.md b/README.md index 64063e7e..b9295c8b 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,6 @@ jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ The `LlamaTornadoCli.java` file includes special JBang directives at the top: ```java -#!/usr/bin/env jbang //JAVA 21 //PREVIEW //DEPS io.github.beehive-lab:gpu-llama3:0.3.1 From 5e5c0b1653d8f4419476ed041ff5aec062b1802b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Dec 2025 10:31:58 +0000 Subject: [PATCH 4/9] Add TornadoFlags.java for proper TornadoVM JBang configuration - Create TornadoFlags.java with complete TornadoVM runtime setup - Include GraalVM compiler dependency, module exports, and runtime config - Reference TornadoFlags.java from LlamaTornadoCli.java using //SOURCES - Remove duplicate configuration from main CLI file for cleaner structure - Update README to document TornadoFlags.java and link to TornadoVM examples - Follows the same pattern as TornadoVM JBang examples This provides proper JVMCI compiler and TornadoVM runtime configuration when running with JBang, enabling GPU acceleration support. --- LlamaTornadoCli.java | 19 +---- README.md | 6 +- TornadoFlags.java | 191 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 17 deletions(-) create mode 100644 TornadoFlags.java diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java index 07b1b83f..84cdf32c 100755 --- a/LlamaTornadoCli.java +++ b/LlamaTornadoCli.java @@ -4,28 +4,15 @@ //DEPS io.github.beehive-lab:tornado-api:2.1.0 //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 +//SOURCES TornadoFlags.java + // Compiler options //JAVAC_OPTIONS --enable-preview //JAVAC_OPTIONS --add-modules=jdk.incubator.vector -// JVM options for TornadoVM +// JVM options for basic setup //JAVA_OPTIONS --enable-preview //JAVA_OPTIONS --add-modules=jdk.incubator.vector -//JAVA_OPTIONS -XX:-UseCompressedOops -//JAVA_OPTIONS -XX:+UnlockExperimentalVMOptions -//JAVA_OPTIONS -XX:+EnableJVMCI -//JAVA_OPTIONS -XX:+UseJVMCICompiler -//JAVA_OPTIONS -XX:+UseParallelGC - -// Module exports for TornadoVM (adjust paths based on your TornadoVM installation) -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.runtime -//JAVA_OPTIONS --add-exports=jdk.internal.vm.compiler/org.graalvm.compiler.api.runtime=tornado.runtime package org.beehive.gpullama3.cli; diff --git a/README.md b/README.md index b9295c8b..3f0ea818 100644 --- a/README.md +++ b/README.md @@ -294,12 +294,16 @@ The `LlamaTornadoCli.java` file includes special JBang directives at the top: //DEPS io.github.beehive-lab:gpu-llama3:0.3.1 //DEPS io.github.beehive-lab:tornado-api:2.1.0 //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 + +//SOURCES TornadoFlags.java ``` These directives tell JBang to: - Use Java 21 with preview features - Download the required Maven dependencies automatically -- Set up the necessary JVM options for Vector API and TornadoVM +- Load TornadoVM configuration from `TornadoFlags.java` + +The `TornadoFlags.java` file contains all TornadoVM-specific JVM configuration (module exports, runtime settings, etc.), keeping the main CLI file clean and maintainable. This follows the same pattern as the [TornadoVM JBang examples](https://gist.github.com/maxandersen/14ecdc03c7c57fc59dfeb7ba37dd4c9c). **Note**: For full GPU acceleration with all TornadoVM optimizations, we recommend using the `llama-tornado` script instead, which properly configures all TornadoVM runtime parameters. diff --git a/TornadoFlags.java b/TornadoFlags.java new file mode 100644 index 00000000..df956937 --- /dev/null +++ b/TornadoFlags.java @@ -0,0 +1,191 @@ +// === Set to not get annoying warnings about annotation processing +//JAVAC_OPTIONS -proc:full + +// === Deps for GraalVM compiler (needed for TornadoVM) === +//DEPS org.graalvm.compiler:compiler:23.1.0 + +// === JVM mode and memory settings === +//JAVA_OPTIONS -server +//JAVA_OPTIONS -XX:-UseCompressedOops +//JAVA_OPTIONS -XX:+UnlockExperimentalVMOptions +//JAVA_OPTIONS -XX:+EnableJVMCI +//JAVA_OPTIONS -XX:-UseCompressedClassPointers +//JAVA_OPTIONS -XX:+UseParallelGC + +// === Native library path === +//JAVA_OPTIONS -Djava.library.path=${env.TORNADO_SDK}/lib + +// === Tornado runtime classes === +//JAVA_OPTIONS -Dtornado.load.api.implementation=uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph +//JAVA_OPTIONS -Dtornado.load.runtime.implementation=uk.ac.manchester.tornado.runtime.TornadoCoreRuntime +//JAVA_OPTIONS -Dtornado.load.tornado.implementation=uk.ac.manchester.tornado.runtime.common.Tornado +//JAVA_OPTIONS -Dtornado.load.annotation.implementation=uk.ac.manchester.tornado.annotation.ASMClassVisitor +//JAVA_OPTIONS -Dtornado.load.annotation.parallel=uk.ac.manchester.tornado.api.annotations.Parallel + +// === Module system === +//JAVA_OPTIONS --module-path ${env.TORNADO_SDK}/share/java/tornado +//JAVA_OPTIONS --upgrade-module-path ${env.TORNADO_SDK}/share/java/graalJars +//JAVA_OPTIONS --add-modules ALL-SYSTEM,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.opencl + +// === Common exports === +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.cfg=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.hotspot.meta=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.util=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=tornado.runtime,tornado.annotation,tornado.drivers.common,jdk.internal.vm.compiler +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=tornado.runtime,tornado.drivers.common,jdk.internal.vm.compiler +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph.spi=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.gen=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodeinfo=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.spi=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.api.runtime=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.code=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.target=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.debug=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.hotspot=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.java=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.asm=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.phases=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.graphbuilderconf=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.options=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.tiers=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.util=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.printer=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.runtime=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.runtime=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph.iterators=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.bytecode=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.spi=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.api.replacements=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.replacements=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.inlining=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.phases=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.type=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.extended=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.loop=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.inlining.info=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.inlining.policy=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.inlining.walker=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.loop.phases=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.debug=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.memory=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.virtual=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.constopt=tornado.runtime +//JAVA_OPTIONS --add-opens jdk.internal.vm.ci/jdk.vm.ci.hotspot=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.gc=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.memory.address=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.replacements.nodes=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.word=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.util=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.framemap=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.alloc=tornado.runtime +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.memory=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.runtime,tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph.iterators=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.extended=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.loop=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.options=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.debug=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.virtual=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.loop.phases=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.util=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.tiers=tornado.drivers.common +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common=tornado.drivers.common + +// === OpenCL-specific exports === +//JAVA_OPTIONS --add-opens java.base/java.lang=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.hotspot.meta=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.replacements.classfile=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.alloc=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.util=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.cfg=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.framemap=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph.spi=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.gen=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodeinfo=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.calc=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.spi=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.code=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.debug=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.hotspot=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.java=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.asm=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.phases=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.graphbuilderconf=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.options=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.tiers=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.util=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.printer=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.runtime=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.graph.iterators=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.java=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.bytecode=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.spi=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.api.replacements=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.replacements=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.inlining=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.phases=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.type=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.extended=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.loop=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.loop.phases=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.debug=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.memory=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.util=tornado.drivers.opencl +//JAVA_OPTIONS --add-opens jdk.internal.vm.ci/jdk.vm.ci.hotspot=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.asm=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.cfg=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.schedule=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.virtual.phases.ea=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.lir.ssa=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.calc=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.gen=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.match=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.memory.address=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.nodes.type=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.graph=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.phases.common.util=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.word=tornado.drivers.opencl +//JAVA_OPTIONS --add-exports jdk.internal.vm.compiler/org.graalvm.compiler.core.common.memory=tornado.drivers.opencl + +package org.beehive.gpullama3.cli; + +/** + * TornadoFlags - JBang configuration file for TornadoVM runtime setup + * + * This file contains all the JVM options and module exports needed to run + * TornadoVM with JBang. It's referenced from LlamaTornadoCli.java using: + * //SOURCES TornadoFlags.java + * + * This pattern keeps the main CLI file clean while ensuring all necessary + * TornadoVM runtime configuration is properly set up. + */ +public class TornadoFlags { + // This class is intentionally empty - all configuration is in JBang directives above +} From 80040bf41d14717c4180b975fe7182d17e2b8913 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Mon, 15 Dec 2025 13:04:02 +0200 Subject: [PATCH 5/9] Fix jbang --- LlamaTornadoCli.java | 8 +- .../java/org/beehive/gpullama3/Options.java | 2 +- .../gpullama3/cli/LlamaTornadoCli.java | 258 +++++++++--------- 3 files changed, 135 insertions(+), 133 deletions(-) diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java index 84cdf32c..bd5aaa67 100755 --- a/LlamaTornadoCli.java +++ b/LlamaTornadoCli.java @@ -5,6 +5,8 @@ //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 //SOURCES TornadoFlags.java +// === Set to not get annoying warnings about annotation processing +//JAVAC_OPTIONS -proc:full // Compiler options //JAVAC_OPTIONS --enable-preview @@ -87,7 +89,7 @@ public static void main(String[] args) throws IOException { // Check if help requested if (args.length == 0 || hasHelpFlag(args)) { - Options.printUsage(System.out); +// Options.printUsage(System.out); System.exit(0); } @@ -137,8 +139,8 @@ private static boolean hasHelpFlag(String[] args) { private static void printBanner() { System.out.println(""" ╔══════════════════════════════════════════════════════════╗ - ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ - ║ Powered by TornadoVM & Java 21 ║ + ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ + ║ Powered by TornadoVM & Java 21 ║ ╚══════════════════════════════════════════════════════════╝ """); } diff --git a/src/main/java/org/beehive/gpullama3/Options.java b/src/main/java/org/beehive/gpullama3/Options.java index bd492fd8..54d149e8 100644 --- a/src/main/java/org/beehive/gpullama3/Options.java +++ b/src/main/java/org/beehive/gpullama3/Options.java @@ -28,7 +28,7 @@ private static boolean getDefaultTornadoVM() { return Boolean.parseBoolean(System.getProperty("use.tornadovm", "false")); } - static void printUsage(PrintStream out) { + public static void printUsage(PrintStream out) { out.println("Usage: jbang Llama3.java [options]"); out.println(); out.println("Options:"); diff --git a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java index 2ad46277..cc27cbc7 100644 --- a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java +++ b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java @@ -1,129 +1,129 @@ -package org.beehive.gpullama3.cli; - -import org.beehive.gpullama3.Options; -import org.beehive.gpullama3.auxiliary.LastRunMetrics; -import org.beehive.gpullama3.inference.sampler.Sampler; -import org.beehive.gpullama3.model.Model; - -import java.io.IOException; - -import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; -import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; - -/** - * LlamaTornadoCli - Pure Java CLI for running llama-tornado models - * - * This class provides a standalone command-line interface for running LLaMA models - * with TornadoVM acceleration. This version is compiled as part of the Maven build. - * - * For JBang usage, use the LlamaTornadoCli.java file in the root directory. - * - * Usage as compiled application: - * java --enable-preview --add-modules jdk.incubator.vector \ - * -cp target/gpu-llama3-0.3.1.jar \ - * org.beehive.gpullama3.cli.LlamaTornadoCli \ - * --model path/to/model.gguf --prompt "Your prompt here" - * - * Examples: - * # Interactive chat mode - * java -cp target/gpu-llama3-0.3.1.jar \ - * org.beehive.gpullama3.cli.LlamaTornadoCli \ - * -m model.gguf --interactive - * - * # Single instruction mode - * java -cp target/gpu-llama3-0.3.1.jar \ - * org.beehive.gpullama3.cli.LlamaTornadoCli \ - * -m model.gguf -p "Explain quantum computing" - * - * # With TornadoVM acceleration (requires TornadoVM runtime setup) - * java -cp target/gpu-llama3-0.3.1.jar \ - * org.beehive.gpullama3.cli.LlamaTornadoCli \ - * -m model.gguf -p "Hello" --use-tornadovm true - */ -public class LlamaTornadoCli { - - // Configuration flags - public static final boolean USE_VECTOR_API = Boolean.parseBoolean( - System.getProperty("llama.VectorAPI", "true")); - public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( - System.getProperty("llama.ShowPerfInteractive", "true")); - - /** - * Run a single instruction and display the response - */ - private static void runSingleInstruction(Model model, Sampler sampler, Options options) { - String response = model.runInstructOnce(sampler, options); - System.out.println(response); - if (SHOW_PERF_INTERACTIVE) { - LastRunMetrics.printMetrics(); - } - } - - /** - * Main entry point for the CLI application - * - * @param args command-line arguments (see Options.parseOptions for details) - * @throws IOException if model loading fails - */ - public static void main(String[] args) throws IOException { - // Print banner - printBanner(); - - // Check if help requested - if (args.length == 0 || hasHelpFlag(args)) { - Options.printUsage(System.out); - System.exit(0); - } - - try { - // Parse options - Options options = Options.parseOptions(args); - - // Load model - System.out.println("Loading model from: " + options.modelPath()); - Model model = loadModel(options); - System.out.println("Model loaded successfully!"); - - // Create sampler - Sampler sampler = createSampler(model, options); - - // Run in interactive or single-instruction mode - if (options.interactive()) { - System.out.println("Starting interactive chat mode..."); - System.out.println("Type your messages below (Ctrl+C to exit):"); - System.out.println(); - model.runInteractive(sampler, options); - } else { - runSingleInstruction(model, sampler, options); - } - } catch (Exception e) { - System.err.println("Error: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Check if help flag is present in arguments - */ - private static boolean hasHelpFlag(String[] args) { - for (String arg : args) { - if (arg.equals("--help") || arg.equals("-h")) { - return true; - } - } - return false; - } - - /** - * Print ASCII banner - */ - private static void printBanner() { - System.out.println(""" - ╔══════════════════════════════════════════════════════════╗ - ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ - ║ Powered by TornadoVM & Java 21 ║ - ╚══════════════════════════════════════════════════════════╝ - """); - } -} +//package org.beehive.gpullama3.cli; +// +//import org.beehive.gpullama3.Options; +//import org.beehive.gpullama3.auxiliary.LastRunMetrics; +//import org.beehive.gpullama3.inference.sampler.Sampler; +//import org.beehive.gpullama3.model.Model; +// +//import java.io.IOException; +// +//import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; +//import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; +// +///** +// * LlamaTornadoCli - Pure Java CLI for running llama-tornado models +// * +// * This class provides a standalone command-line interface for running LLaMA models +// * with TornadoVM acceleration. This version is compiled as part of the Maven build. +// * +// * For JBang usage, use the LlamaTornadoCli.java file in the root directory. +// * +// * Usage as compiled application: +// * java --enable-preview --add-modules jdk.incubator.vector \ +// * -cp target/gpu-llama3-0.3.1.jar \ +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ +// * --model path/to/model.gguf --prompt "Your prompt here" +// * +// * Examples: +// * # Interactive chat mode +// * java -cp target/gpu-llama3-0.3.1.jar \ +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ +// * -m model.gguf --interactive +// * +// * # Single instruction mode +// * java -cp target/gpu-llama3-0.3.1.jar \ +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ +// * -m model.gguf -p "Explain quantum computing" +// * +// * # With TornadoVM acceleration (requires TornadoVM runtime setup) +// * java -cp target/gpu-llama3-0.3.1.jar \ +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ +// * -m model.gguf -p "Hello" --use-tornadovm true +// */ +//public class LlamaTornadoCli { +// +// // Configuration flags +// public static final boolean USE_VECTOR_API = Boolean.parseBoolean( +// System.getProperty("llama.VectorAPI", "true")); +// public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( +// System.getProperty("llama.ShowPerfInteractive", "true")); +// +// /** +// * Run a single instruction and display the response +// */ +// private static void runSingleInstruction(Model model, Sampler sampler, Options options) { +// String response = model.runInstructOnce(sampler, options); +// System.out.println(response); +// if (SHOW_PERF_INTERACTIVE) { +// LastRunMetrics.printMetrics(); +// } +// } +// +// /** +// * Main entry point for the CLI application +// * +// * @param args command-line arguments (see Options.parseOptions for details) +// * @throws IOException if model loading fails +// */ +// public static void main(String[] args) throws IOException { +// // Print banner +// printBanner(); +// +// // Check if help requested +// if (args.length == 0 || hasHelpFlag(args)) { +//// Options.printUsage(System.out); +// System.exit(0); +// } +// +// try { +// // Parse options +// Options options = Options.parseOptions(args); +// +// // Load model +// System.out.println("Loading model from: " + options.modelPath()); +// Model model = loadModel(options); +// System.out.println("Model loaded successfully!"); +// +// // Create sampler +// Sampler sampler = createSampler(model, options); +// +// // Run in interactive or single-instruction mode +// if (options.interactive()) { +// System.out.println("Starting interactive chat mode..."); +// System.out.println("Type your messages below (Ctrl+C to exit):"); +// System.out.println(); +// model.runInteractive(sampler, options); +// } else { +// runSingleInstruction(model, sampler, options); +// } +// } catch (Exception e) { +// System.err.println("Error: " + e.getMessage()); +// e.printStackTrace(); +// System.exit(1); +// } +// } +// +// /** +// * Check if help flag is present in arguments +// */ +// private static boolean hasHelpFlag(String[] args) { +// for (String arg : args) { +// if (arg.equals("--help") || arg.equals("-h")) { +// return true; +// } +// } +// return false; +// } +// +// /** +// * Print ASCII banner +// */ +// private static void printBanner() { +// System.out.println(""" +// ╔══════════════════════════════════════════════════════════╗ +// ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ +// ║ Powered by TornadoVM & Java 21 ║ +// ╚══════════════════════════════════════════════════════════╝ +// """); +// } +//} From 5ff078f34b1715a5d1071fae57e92dab6c24dc93 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Mon, 15 Dec 2025 13:13:18 +0200 Subject: [PATCH 6/9] Update GPU Llama3 dependency to version 0.3.2-dev and enable usage help in CLI --- LlamaTornadoCli.java | 6 +- pom.xml | 2 +- .../gpullama3/cli/LlamaTornadoCli.java | 129 ------------------ 3 files changed, 3 insertions(+), 134 deletions(-) delete mode 100644 src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java diff --git a/LlamaTornadoCli.java b/LlamaTornadoCli.java index bd5aaa67..5a502aeb 100755 --- a/LlamaTornadoCli.java +++ b/LlamaTornadoCli.java @@ -1,6 +1,6 @@ //JAVA 21 //PREVIEW -//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 +//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev //DEPS io.github.beehive-lab:tornado-api:2.1.0 //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 @@ -89,7 +89,7 @@ public static void main(String[] args) throws IOException { // Check if help requested if (args.length == 0 || hasHelpFlag(args)) { -// Options.printUsage(System.out); + Options.printUsage(System.out); System.exit(0); } @@ -98,9 +98,7 @@ public static void main(String[] args) throws IOException { Options options = Options.parseOptions(args); // Load model - System.out.println("Loading model from: " + options.modelPath()); Model model = loadModel(options); - System.out.println("Model loaded successfully!"); // Create sampler Sampler sampler = createSampler(model, options); diff --git a/pom.xml b/pom.xml index f7a88ad1..4e38bdb8 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ io.github.beehive-lab gpu-llama3 - 0.3.1 + 0.3.2-dev GPU Llama3 GPU-accelerated LLaMA3 inference using TornadoVM diff --git a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java b/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java deleted file mode 100644 index cc27cbc7..00000000 --- a/src/main/java/org/beehive/gpullama3/cli/LlamaTornadoCli.java +++ /dev/null @@ -1,129 +0,0 @@ -//package org.beehive.gpullama3.cli; -// -//import org.beehive.gpullama3.Options; -//import org.beehive.gpullama3.auxiliary.LastRunMetrics; -//import org.beehive.gpullama3.inference.sampler.Sampler; -//import org.beehive.gpullama3.model.Model; -// -//import java.io.IOException; -// -//import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; -//import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; -// -///** -// * LlamaTornadoCli - Pure Java CLI for running llama-tornado models -// * -// * This class provides a standalone command-line interface for running LLaMA models -// * with TornadoVM acceleration. This version is compiled as part of the Maven build. -// * -// * For JBang usage, use the LlamaTornadoCli.java file in the root directory. -// * -// * Usage as compiled application: -// * java --enable-preview --add-modules jdk.incubator.vector \ -// * -cp target/gpu-llama3-0.3.1.jar \ -// * org.beehive.gpullama3.cli.LlamaTornadoCli \ -// * --model path/to/model.gguf --prompt "Your prompt here" -// * -// * Examples: -// * # Interactive chat mode -// * java -cp target/gpu-llama3-0.3.1.jar \ -// * org.beehive.gpullama3.cli.LlamaTornadoCli \ -// * -m model.gguf --interactive -// * -// * # Single instruction mode -// * java -cp target/gpu-llama3-0.3.1.jar \ -// * org.beehive.gpullama3.cli.LlamaTornadoCli \ -// * -m model.gguf -p "Explain quantum computing" -// * -// * # With TornadoVM acceleration (requires TornadoVM runtime setup) -// * java -cp target/gpu-llama3-0.3.1.jar \ -// * org.beehive.gpullama3.cli.LlamaTornadoCli \ -// * -m model.gguf -p "Hello" --use-tornadovm true -// */ -//public class LlamaTornadoCli { -// -// // Configuration flags -// public static final boolean USE_VECTOR_API = Boolean.parseBoolean( -// System.getProperty("llama.VectorAPI", "true")); -// public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( -// System.getProperty("llama.ShowPerfInteractive", "true")); -// -// /** -// * Run a single instruction and display the response -// */ -// private static void runSingleInstruction(Model model, Sampler sampler, Options options) { -// String response = model.runInstructOnce(sampler, options); -// System.out.println(response); -// if (SHOW_PERF_INTERACTIVE) { -// LastRunMetrics.printMetrics(); -// } -// } -// -// /** -// * Main entry point for the CLI application -// * -// * @param args command-line arguments (see Options.parseOptions for details) -// * @throws IOException if model loading fails -// */ -// public static void main(String[] args) throws IOException { -// // Print banner -// printBanner(); -// -// // Check if help requested -// if (args.length == 0 || hasHelpFlag(args)) { -//// Options.printUsage(System.out); -// System.exit(0); -// } -// -// try { -// // Parse options -// Options options = Options.parseOptions(args); -// -// // Load model -// System.out.println("Loading model from: " + options.modelPath()); -// Model model = loadModel(options); -// System.out.println("Model loaded successfully!"); -// -// // Create sampler -// Sampler sampler = createSampler(model, options); -// -// // Run in interactive or single-instruction mode -// if (options.interactive()) { -// System.out.println("Starting interactive chat mode..."); -// System.out.println("Type your messages below (Ctrl+C to exit):"); -// System.out.println(); -// model.runInteractive(sampler, options); -// } else { -// runSingleInstruction(model, sampler, options); -// } -// } catch (Exception e) { -// System.err.println("Error: " + e.getMessage()); -// e.printStackTrace(); -// System.exit(1); -// } -// } -// -// /** -// * Check if help flag is present in arguments -// */ -// private static boolean hasHelpFlag(String[] args) { -// for (String arg : args) { -// if (arg.equals("--help") || arg.equals("-h")) { -// return true; -// } -// } -// return false; -// } -// -// /** -// * Print ASCII banner -// */ -// private static void printBanner() { -// System.out.println(""" -// ╔══════════════════════════════════════════════════════════╗ -// ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ -// ║ Powered by TornadoVM & Java 21 ║ -// ╚══════════════════════════════════════════════════════════╝ -// """); -// } -//} From f539fe87cca2923df83d4254a9e327ee0fad2475 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Mon, 15 Dec 2025 14:50:14 +0200 Subject: [PATCH 7/9] Remove deprecated JBang example script from repository --- examples/run-jbang-example.sh | 71 ----------------------------------- 1 file changed, 71 deletions(-) delete mode 100755 examples/run-jbang-example.sh diff --git a/examples/run-jbang-example.sh b/examples/run-jbang-example.sh deleted file mode 100755 index 823e5b7a..00000000 --- a/examples/run-jbang-example.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# Example script to run llama-tornado with JBang -# -# This demonstrates how to use the JBang CLI for quick experimentation -# with llama-tornado models. - -# Colors for output -GREEN='\033[0;32m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -echo -e "${BLUE}╔══════════════════════════════════════════════════════════╗${NC}" -echo -e "${BLUE}║ Llama-Tornado JBang CLI Example ║${NC}" -echo -e "${BLUE}╚══════════════════════════════════════════════════════════╝${NC}" -echo "" - -# Check if JBang is installed -if ! command -v jbang &> /dev/null; then - echo "❌ JBang is not installed!" - echo "Please install JBang first: https://www.jbang.dev/download/" - exit 1 -fi - -echo -e "${GREEN}✓${NC} JBang is installed" - -# Check if model file is provided -if [ -z "$1" ]; then - echo "" - echo "Usage: $0 [prompt]" - echo "" - echo "Examples:" - echo " $0 beehive-llama-3.2-1b-instruct-fp16.gguf" - echo " $0 beehive-llama-3.2-1b-instruct-fp16.gguf \"Tell me a joke\"" - echo "" - exit 1 -fi - -MODEL_PATH="$1" -PROMPT="${2:-What is the capital of France?}" - -# Check if model file exists -if [ ! -f "$MODEL_PATH" ]; then - echo "❌ Model file not found: $MODEL_PATH" - echo "" - echo "Please download a model first. See:" - echo "https://huggingface.co/collections/beehive-lab/llama3-gpullama3java" - exit 1 -fi - -echo -e "${GREEN}✓${NC} Model file found: $MODEL_PATH" -echo "" - -# Run with JBang -echo "Running inference with prompt: \"$PROMPT\"" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "" - -cd "$(dirname "$0")/.." || exit - -jbang LlamaTornadoCli.java \ - --model "$MODEL_PATH" \ - --prompt "$PROMPT" \ - --temperature 0.7 \ - --max-tokens 256 - -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo -e "${GREEN}Done!${NC}" -echo "" -echo "Try interactive mode:" -echo " jbang LlamaTornadoCli.java --model $MODEL_PATH --interactive" From b624d57f14f3ef70d95c258521aad6237f6236a1 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Tue, 16 Dec 2025 12:38:35 +0200 Subject: [PATCH 8/9] Update Makefile to change default target from 'package' to 'install' --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dd0eac84..3f44bac9 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ MVN = ./mvnw # Default target -all: package +all: install # Build the project (clean and package without tests) build: clean package @@ -14,6 +14,9 @@ build: clean package clean: $(MVN) clean +install: + $(MVN) install -DskipTests + # Package the project without running tests package: $(MVN) package -DskipTests From 7b8bd75170514f8ad94b5753f60abe43c62e50e6 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Tue, 16 Dec 2025 13:13:10 +0200 Subject: [PATCH 9/9] Remove JBang vs llama-tornado comparison section from README.md --- README.md | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/README.md b/README.md index 3f0ea818..3f0747d9 100644 --- a/README.md +++ b/README.md @@ -274,39 +274,6 @@ jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ --max-tokens 512 ``` -### JBang vs llama-tornado Script - -| Feature | JBang CLI | llama-tornado Script | -|---------|-----------|---------------------| -| **Installation** | No build required | Requires `mvn package` | -| **Dependencies** | Auto-downloaded | Included in fat JAR | -| **TornadoVM Setup** | Basic (via dependencies) | Full (via tornado command wrapper) | -| **GPU Acceleration** | Limited | Full support with all TornadoVM optimizations | -| **Use Case** | Quick experimentation, CPU inference | Production use, full GPU acceleration | - -### How It Works - -The `LlamaTornadoCli.java` file includes special JBang directives at the top: - -```java -//JAVA 21 -//PREVIEW -//DEPS io.github.beehive-lab:gpu-llama3:0.3.1 -//DEPS io.github.beehive-lab:tornado-api:2.1.0 -//DEPS io.github.beehive-lab:tornado-runtime:2.1.0 - -//SOURCES TornadoFlags.java -``` - -These directives tell JBang to: -- Use Java 21 with preview features -- Download the required Maven dependencies automatically -- Load TornadoVM configuration from `TornadoFlags.java` - -The `TornadoFlags.java` file contains all TornadoVM-specific JVM configuration (module exports, runtime settings, etc.), keeping the main CLI file clean and maintainable. This follows the same pattern as the [TornadoVM JBang examples](https://gist.github.com/maxandersen/14ecdc03c7c57fc59dfeb7ba37dd4c9c). - -**Note**: For full GPU acceleration with all TornadoVM optimizations, we recommend using the `llama-tornado` script instead, which properly configures all TornadoVM runtime parameters. - ----------- ## Collection of Tested Models