Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/workflows/code-style.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Code Style Checker

on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
pull_request:
branches:
- master

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true

jobs:
model-naming:
runs-on: ubuntu-slim
steps:
- uses: actions/checkout@v6
- name: Check model naming conventions
run: |
python3 - << 'EOF'
import re, os, sys

pairs = re.findall(
r'case\s+(LLM_ARCH_\w+)\s*:\s*\n\s+return new (llama_model_\w+)\s*\(',
open("src/llama-model.cpp").read())

errors = []
for arch, cls in pairs:
suffix = arch[len("LLM_ARCH_"):]
csuffix = cls[len("llama_model_"):]
fname = csuffix.replace("_", "-") + ".cpp"

if not re.fullmatch(r'[A-Z][A-Z0-9_]*', suffix):
errors.append(f"{arch}: suffix not upper snake case, example: LLM_ARCH_MY_MODEL")

if not re.fullmatch(r'[a-z][a-z0-9_]*', csuffix):
errors.append(f"{arch}: class suffix not lower snake case, example: llama_model_my_model")

elif suffix.lower() != csuffix:
errors.append(f"{arch}: arch/class name mismatch, expected class 'llama_model_{suffix.lower()}' but got '{cls}'")

elif not os.path.isfile(f"src/models/{fname}"):
errors.append(f"{arch}: expects model file name to be src/models/{fname}, but not found")

if errors:
print('\n'.join(f" - {e}" for e in errors)); sys.exit(1)
print(f"OK: {len(pairs)} mappings validated.")
EOF
5 changes: 0 additions & 5 deletions .github/workflows/editorconfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@ name: EditorConfig Checker

on:
workflow_dispatch: # allows manual triggering
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
push:
branches:
- master
Expand Down
6 changes: 2 additions & 4 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,7 @@ static handle_model_result common_params_handle_model(struct common_params_model
auto download_result = common_download_model(model, opts, true);

if (download_result.model_path.empty()) {
LOG_ERR("error: failed to download model from Hugging Face\n");
exit(1);
throw std::runtime_error("failed to download model from Hugging Face");
}

model.name = model.hf_repo;
Expand All @@ -380,8 +379,7 @@ static handle_model_result common_params_handle_model(struct common_params_model
opts.offline = offline;
auto download_result = common_download_model(model, opts);
if (download_result.model_path.empty()) {
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
exit(1);
throw std::runtime_error("failed to download model from " + model.url);
}
}

Expand Down
27 changes: 16 additions & 11 deletions docs/backend/OPENVINO.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,22 @@ Although OpenVINO supports a wide range of [Intel hardware](https://docs.openvin

## Validated Models

The following models have been validated for functionality on Intel® Core™ Ultra Series 1 and Series 2:

- [Llama-3.2-1B-Instruct-GGUF](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF/)
- [Llama-3.1-8B-Instruct](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
- [microsoft/Phi-3-mini-4k-instruct-gguf](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf)
- [Qwen/Qwen2.5-1.5B-Instruct-GGUF](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF)
- [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B-GGUF)
- [openbmb/MiniCPM-1B-sft-bf16](https://huggingface.co/openbmb/MiniCPM-S-1B-sft-gguf)
- [tencent/Hunyuan-7B-Instruct](https://huggingface.co/bartowski/tencent_Hunyuan-7B-Instruct-GGUF)
- [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
- [bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF](https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF)
The following models were validated on Intel® Core™ Ultra Series 2. While our testing was limited, the OpenVINO backend is expected to work across a broad range of [Intel hardware](https://docs.openvino.ai/2026/about-openvino/release-notes-openvino/system-requirements.html).
- Use `GGML_OPENVINO_STATEFUL_EXECUTION=1` when using GPU device.
- `-fa 1` is required when running llama-bench with the OpenVINO backend.
- Additional model support, quantization formats and validations are work in progress.

| Model | Validated | Known Issues |
| :------| :---------- | :-------------|
| [Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF/) | `FP16`, `Q8_0`, `Q4_0`, `Q4_1`, `Q4_K_M` on CPU/GPU/NPU | — |
| [Meta-Llama-3.1-8B-Instruct](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF) | `Q8_0`, `Q4_K_M` on CPU/GPU/NPU | `Q4_0_8_8`, `Q4_0_4_8`, `Q4_0_4_4` fail |
| [Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) | `FP16`, `Q4` on CPU/NPU | GPU unsupported for `FP16` and `Q4` (`llama-cli`, `llama-bench`) |
| [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF) | `FP16`, `Q8_0`, `Q4_0`, `Q4_1`, `Q4_K_M` on CPU/GPU/NPU | — |
| [Qwen3-8B-Instruct](https://huggingface.co/Qwen/Qwen3-8B-GGUF) | `FP16`, `Q8_0`, `Q4_0`, `Q4_1`, `Q4_K_M` on CPU/NPU; GPU works via `llama-bench` | GPU `llama-cli` unsupported for all quantizations |
| [MiniCPM-V-2_6-GGUF](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `Q4_0` on CPU/GPU/NPU | — |
| [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF) | `Q8_0`, `Q4_0`, `Q4_1`, `Q4_K_M` on CPU/GPU/NPU | — |
| [Hunyuan-7B-Instruct](https://huggingface.co/bartowski/tencent_Hunyuan-7B-Instruct-GGUF) | CPU: `Q8_0`, `Q4_0`, `Q4_1`, `Q4_K_M`; GPU: `Q8_0`, `Q4_0`, `Q4_1`; NPU (`llama-bench` only): `Q4_0`, `Q4_1`, `Q4_K_M` | GPU `Q4_K_M` unsupported; NPU `llama-cli` unsupported |
| [Mistral-7B-Instruct-v0.3](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/) | CPU/GPU: `Q8_0`, `Q4_K_M`; NPU: `Q8_0`, `Q4_K_M` (via `llama-bench`) | NPU `llama-cli` unsupported for `Q8_0`, `Q4_K_M` |

## Build Instructions

Expand Down
2 changes: 2 additions & 0 deletions ggml/src/ggml-hexagon/ggml-hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2865,6 +2865,7 @@ static htp_op_code op_remap_to_htp(const ggml_tensor * t) {
case GGML_UNARY_OP_NEG: return HTP_OP_UNARY_NEG;
case GGML_UNARY_OP_EXP: return HTP_OP_UNARY_EXP;
case GGML_UNARY_OP_SOFTPLUS: return HTP_OP_UNARY_SOFTPLUS;
case GGML_UNARY_OP_TANH: return HTP_OP_UNARY_TANH;
default:
break;
}
Expand Down Expand Up @@ -3335,6 +3336,7 @@ static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, cons
case GGML_UNARY_OP_EXP:
case GGML_UNARY_OP_SIGMOID:
case GGML_UNARY_OP_SOFTPLUS:
case GGML_UNARY_OP_TANH:
supp = ggml_hexagon_supported_unary(sess, op);
break;
case GGML_UNARY_OP_SILU:
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-hexagon/htp/htp-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum htp_op_code {
HTP_OP_UNARY_EXP,
HTP_OP_UNARY_NEG,
HTP_OP_UNARY_SOFTPLUS,
HTP_OP_UNARY_TANH,
HTP_OP_GLU_SWIGLU,
HTP_OP_GLU_SWIGLU_OAI,
HTP_OP_GLU_GEGLU,
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-hexagon/htp/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ static int execute_op(struct htp_ops_context * octx) {
case HTP_OP_UNARY_SIGMOID:
case HTP_OP_UNARY_NEG:
case HTP_OP_UNARY_EXP:
case HTP_OP_UNARY_TANH:
case HTP_OP_L2_NORM:
return op_unary(octx);

Expand Down
22 changes: 21 additions & 1 deletion ggml/src/ggml-hexagon/htp/unary-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,21 @@ static void l2_norm_f32(const float * restrict src,
}
}

static void tanh_f32(const float * restrict src,
float * restrict dst,
uint8_t * restrict spad,
const uint32_t num_rows,
const uint32_t row_elems,
const size_t row_size,
int32_t * op_params) {
for (uint32_t ir = 0; ir < num_rows; ir++) {
const uint8_t * restrict src_local = (const uint8_t *)src + (ir * row_size);
uint8_t * restrict dst_local = (uint8_t *)dst + (ir * row_size);

hvx_tanh_f32_aa(dst_local, src_local, row_elems);
}
}

static void unary_job_f32_per_thread(unsigned int nth, unsigned int ith, void * data) {
const struct htp_unary_context * uctx = (const struct htp_unary_context *) data;
struct htp_ops_context * octx = uctx->octx;
Expand Down Expand Up @@ -477,6 +492,9 @@ static void unary_job_f32_per_thread(unsigned int nth, unsigned int ith, void *
case HTP_OP_UNARY_SOFTPLUS:
softplus_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
break;
case HTP_OP_UNARY_TANH:
tanh_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
break;
case HTP_OP_L2_NORM:
l2_norm_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
break;
Expand Down Expand Up @@ -547,10 +565,12 @@ static int execute_op_unary_f32(struct htp_ops_context * octx) {
case HTP_OP_UNARY_SOFTPLUS:
op_type = "softplus-f32";
break;
case HTP_OP_UNARY_TANH:
op_type = "tanh-f32";
break;
case HTP_OP_L2_NORM:
op_type = "l2norm-f32";
break;

default:
FARF(ERROR, "Unsupported unary Op %u\n", octx->op);
return HTP_STATUS_NO_SUPPORT;
Expand Down
Loading
Loading