From a385633631456c850b2f35d19c5438981352e8cb Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:17:24 +0000
Subject: [PATCH] Prepare release 0.3.0

---
 CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++
 CITATION.cff |  4 ++--
 README.md    |  2 +-
 pom.xml      |  2 +-
 4 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 CHANGELOG.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..cf028153
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,32 @@
+# Changelog
+
+All notable changes to GPULlama3.java will be documented in this file.
+
+## [0.3.0] - 2025-12-11
+
+### Model Support
+
+- [refactor] Generalize the design of `tornadovm` package to support multiple new models and types for GPU exec  ([#62](https://github.com/beehive-lab/GPULlama3.java/pull/62))
+- Refactor/cleanup model loaders ([#58](https://github.com/beehive-lab/GPULlama3.java/pull/58))
+- Add Support for Q8_0 Models ([#59](https://github.com/beehive-lab/GPULlama3.java/pull/59))
+
+### Bug Fixes
+
+- [fix] Normalization compute step for non-nvidia hardware ([#84](https://github.com/beehive-lab/GPULlama3.java/pull/84))
+
+### Other Changes
+
+- Update README to enhance TornadoVM performance section and clarify GP… ([#85](https://github.com/beehive-lab/GPULlama3.java/pull/85))
+- Simplify installation by replacing TornadoVM submodule with pre-built SDK ([#82](https://github.com/beehive-lab/GPULlama3.java/pull/82))
+- [FP16] Improved performance by fusing dequantize with compute  in kernels: 20-30% Inference Speedup ([#78](https://github.com/beehive-lab/GPULlama3.java/pull/78))
+- [cicd] Prevent workflows from running on forks ([#83](https://github.com/beehive-lab/GPULlama3.java/pull/83))
+- [CI][packaging] Automate process of deploying a new release with Github actions ([#81](https://github.com/beehive-lab/GPULlama3.java/pull/81))
+- [Opt] Manipulation of Q8_0 tensors with Tornado `ByteArray`s ([#79](https://github.com/beehive-lab/GPULlama3.java/pull/79))
+- Optimization in Q8_0 loading ([#74](https://github.com/beehive-lab/GPULlama3.java/pull/74))
+- [opt] GGUF Load Optimization for tensors in TornadoVM layout ([#71](https://github.com/beehive-lab/GPULlama3.java/pull/71))
+- Add `SchedulerType` support to all TornadoVM layer planners and layer… ([#66](https://github.com/beehive-lab/GPULlama3.java/pull/66))
+- Weight Abstractions ([#65](https://github.com/beehive-lab/GPULlama3.java/pull/65))
+- Bug fixes in sizes and names of GridScheduler ([#64](https://github.com/beehive-lab/GPULlama3.java/pull/64))
+- Add Maven wrapper support ([#56](https://github.com/beehive-lab/GPULlama3.java/pull/56))
+- Add changes used in Devoxx Demo ([#54](https://github.com/beehive-lab/GPULlama3.java/pull/54))
+
diff --git a/CITATION.cff b/CITATION.cff
index c13b56b3..78de4ae4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -15,6 +15,6 @@ authors:
   given-names: "Christos"
 title: "GPULlama3.java"
 license: MIT License
-version: 0.1.0-beta
-date-released: "2025-05-30"
+version: 0.3.0
+date-released: 2025-12-11
 url: "https://github.com/beehive-lab/GPULlama3.java"
diff --git a/README.md b/README.md
index 07871443..1ad890f1 100644
--- a/README.md
+++ b/README.md
@@ -165,7 +165,7 @@ You can add **GPULlama3.java** directly to your Maven project by including the f
 <dependency>
     <groupId>io.github.beehive-lab</groupId>
     <artifactId>gpu-llama3</artifactId>
-    <version>0.2.2</version>
+    <version>0.3.0</version>
 </dependency>
 ```
 
diff --git a/pom.xml b/pom.xml
index a59830ef..f2dcd441 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
         <!-- Use your verified namespace -->
         <groupId>io.github.beehive-lab</groupId>
         <artifactId>gpu-llama3</artifactId>
-        <version>0.2.2</version> <!-- release version (no -SNAPSHOT) -->
+        <version>0.3.0</version> <!-- release version (no -SNAPSHOT) -->
 
         <name>GPU Llama3</name>
         <description>GPU-accelerated LLaMA3 inference using TornadoVM</description>