A modern Zig 0.16 framework for AI services, vector search, and high-performance systems
Quick Start · Documentation · Examples · Contributing
╔═══════════════════════════════════════════════════════════════╗
║ LLM Inference · Vector Database · GPU Acceleration ║
║ Agent Runtime · Distributed Compute · Training Pipelines║
╚═══════════════════════════════════════════════════════════════╝
|
Built with Zig for zero-cost abstractions, comptime optimization, and bare-metal performance. SIMD-accelerated operations throughout. |
Battle-tested with 1296 tests (1290 passing, 6 skip), comprehensive error handling, graceful degradation, and circuit breakers for resilience. |
Enable only what you need. Every feature is toggleable at compile-time with zero overhead for disabled modules. |
git clone https://github.com/donaldfilimon/abi.git
cd abi
zig build
zig build run -- --help| Dependency | Version | Required |
|---|---|---|
| Zig | 0.16.0-dev.2682+02142a54d | Yes |
| Git | Any | Yes |
| GPU Drivers | Latest | Optional |
const std = @import("std");
const abi = @import("abi");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
var app = try abi.App.initDefault(allocator);
defer app.deinit();
std.debug.print("ABI v{s} ready!\n", .{abi.version()});
}ABI now exposes canonical v2 entrypoints only:
- Use
abi.App/abi.AppBuilderas the runtime types. - Use
abi.features.<name>for feature modules andabi.services.<name>for service modules. - Use
abi.App.init(...),abi.App.initDefault(...), andabi.App.builder(...)for app bootstrap. - Legacy aliases (
abi.Framework,abi.init*, top-levelabi.<feature|service>) are removed.
AI Agent Chat
const abi = @import("abi");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
var agent = try abi.features.ai.Agent.init(allocator, .{
.name = "assistant",
.temperature = 0.7,
.enable_history = true,
});
defer agent.deinit();
const response = try agent.chat("Explain Zig's comptime in one sentence.", allocator);
defer allocator.free(response);
std.debug.print("Agent: {s}\n", .{response});
}Vector Database
const abi = @import("abi");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
// Create a 384-dimensional vector database
var db = try abi.wdbx.createDatabase(allocator, .{ .dimension = 384 });
defer db.deinit();
// Insert vectors
try db.insertVector(1, &embedding1);
try db.insertVector(2, &embedding2);
// Search for similar vectors
const results = try db.searchVectors(&query_embedding, 10);
defer allocator.free(results);
for (results) |result| {
std.debug.print("ID: {d}, Score: {d:.4}\n", .{ result.id, result.score });
}
}GPU-Accelerated Compute
const abi = @import("abi");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
// Auto-selects best available backend (CUDA > Vulkan > Metal > CPU)
var gpu = try abi.Gpu.init(allocator, .{
.enable_profiling = true,
.memory_mode = .automatic,
});
defer gpu.deinit();
const a = try gpu.createBufferFromSlice(f32, &[_]f32{ 1, 2, 3, 4 }, .{});
const b = try gpu.createBufferFromSlice(f32, &[_]f32{ 4, 3, 2, 1 }, .{});
const result = try gpu.createBuffer(4 * @sizeOf(f32), .{});
defer { gpu.destroyBuffer(a); gpu.destroyBuffer(b); gpu.destroyBuffer(result); }
// Executes on GPU with automatic SIMD/scalar fallback
_ = try gpu.vectorAdd(a, b, result);
var output: [4]f32 = undefined;
try result.read(f32, &output);
// output = { 5, 5, 5, 5 }
}Training Pipeline
const abi = @import("abi");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
const config = abi.features.ai.TrainingConfig{
.epochs = 10,
.batch_size = 32,
.learning_rate = 0.001,
.optimizer = .adamw,
};
var result = try abi.features.ai.trainWithResult(allocator, config);
defer result.deinit();
std.debug.print("Final loss: {d:.6}\n", .{result.report.final_loss});
}- Define command metadata in the command module using
pub const meta: command.Meta. - Keep registry ordering/metadata overrides in
/Users/donaldfilimon/abi/tools/cli/registry/overrides.zig. - Refresh the generated registry snapshot with
zig build refresh-cli-registryafter adding commands. - Use command metadata fields for options/UI/risk so launcher/completion/help are derived from one source.
- For simple UI dashboards, use
/Users/donaldfilimon/abi/tools/cli/ui/dsl/mod.zigto avoid repeated theme/session/dashboard boilerplate. - Refresh/check registry snapshots with:
zig build refresh-cli-registryzig build check-cli-registry
# Core Commands
abi --help # Show all commands
abi system-info # System and feature status
abi ui launch # Interactive TUI launcher
# Database Operations
abi db stats # Database statistics
abi db add --id 1 --embed "text"
abi db search --embed "query" --top 5
abi db backup --path backup.db
# AI & Agents
abi agent # Interactive chat
abi agent --persona coder # Use specific persona
abi agent -m "Hello" # One-shot message
abi llm chat model.gguf # Chat with local model
# GPU Management
abi gpu backends # List available backends
abi gpu devices # Enumerate all GPUs
abi gpu summary # Quick status
# Training
abi train run --epochs 10 # Start training
abi train resume ./checkpoint # Resume from checkpoint
abi train monitor # Real-time metrics
# Runtime Feature Flags
abi --list-features # Show feature status
abi --enable-gpu db stats # Enable feature for command
abi --disable-ai system-info # Disable feature for command| Benchmark | Operations/sec |
|---|---|
| SIMD Vector Dot Product | 84,875,233 |
| SIMD Vector Addition | 84,709,869 |
| Configuration Loading | 66,476,102 |
| Memory Allocation (1KB) | 464,712 |
| Logging Operations | 331,960 |
| Compute Engine Task | 93,368 |
| Network Registry Ops | 84,831 |
| JSON Parse/Serialize | 83,371 |
| Database Vector Insert | 68,444 |
| Database Vector Search | 56,563 |
ReleaseFast build on typical development workstation. Run zig build benchmarks to test your system.
abi/
├── src/
│ ├── abi.zig # Public API entry point
│ ├── config/ # Unified configuration
│ ├── framework.zig # Lifecycle orchestration
│ ├── platform/ # Platform detection (OS, arch, CPU)
│ │
│ ├── ai/ # AI Module
│ │ ├── llm/ # Local LLM inference (Llama-CPP parity)
│ │ ├── agents/ # Agent runtime with personas
│ │ ├── training/ # Training pipelines
│ │ └── embeddings/ # Vector embeddings
│ │
│ ├── gpu/ # GPU Acceleration
│ │ ├── backends/ # CUDA, Vulkan, Metal, WebGPU, FPGA
│ │ ├── kernels/ # Compute kernels
│ │ └── dsl/ # Shader DSL & codegen
│ │
│ ├── database/ # Vector Database (WDBX)
│ │ ├── hnsw.zig # HNSW indexing
│ │ └── distributed/ # Sharding & replication
│ │
│ ├── runtime/ # Compute Infrastructure
│ │ ├── engine/ # Work-stealing scheduler
│ │ ├── concurrency/ # Lock-free primitives
│ │ └── memory/ # Pool allocators
│ │
│ ├── network/ # Distributed Compute
│ │ └── raft/ # Consensus protocol
│ │
│ ├── shared/ # Shared utilities (security, io, utils)
│ │
│ └── observability/ # Metrics & Tracing
│
├── tools/cli/ # CLI implementation
├── examples/ # Usage examples
└── docs/ # Documentation
System Architecture Diagram
flowchart TB
subgraph "Public API"
ABI[abi.zig]
end
subgraph "Framework Layer"
FW[Framework Orchestration]
CFG[Configuration]
REG[Feature Registry]
end
subgraph "Feature Modules"
AI[AI Runtime]
GPU[GPU Acceleration]
DB[Vector Database]
NET[Distributed Network]
OBS[Observability]
end
subgraph "Infrastructure"
RT[Runtime Engine]
MEM[Memory Management]
CONC[Concurrency]
end
ABI --> FW
FW --> CFG
FW --> REG
FW --> AI
FW --> GPU
FW --> DB
FW --> NET
FW --> OBS
AI --> RT
GPU --> RT
DB --> RT
RT --> MEM
RT --> CONC
All features are enabled by default. Disable unused features to reduce binary size.
| Flag | Default | Description |
|---|---|---|
-Denable-ai |
true | AI features, agents, and connectors |
-Denable-llm |
true | Local LLM inference |
-Denable-gpu |
true | GPU acceleration |
-Denable-database |
true | Vector database (WDBX) |
-Denable-network |
true | Distributed compute |
-Denable-web |
true | HTTP client utilities |
-Denable-profiling |
true | Performance profiling |
# Single backend
zig build -Dgpu-backend=vulkan
zig build -Dgpu-backend=cuda
zig build -Dgpu-backend=metal
# Multiple backends (comma-separated)
zig build -Dgpu-backend=cuda,vulkan
# Auto-detect best available
zig build -Dgpu-backend=autoC bindings were removed during the 2026-01-30 cleanup and are being reintroduced as part of the language bindings roadmap. Track progress in ROADMAP.md under Language bindings.
| Resource | Description |
|---|---|
| Online Docs | Published documentation site |
| Docs Source | Docs build and layout |
| API Overview | High-level API reference |
| Getting Started | First steps and setup |
| Configuration | Config system overview |
| Architecture | System structure |
| AI Guide | LLM, agents, training |
| GPU Guide | Multi-backend GPU acceleration |
| Database Guide | WDBX vector database |
| Network Guide | Distributed compute |
| Deployment Guide | Production deployment |
| Observability Guide | Metrics and profiling |
| Security Guide | Security model |
| Examples Guide | Example walkthroughs |
| API Reference | Public API summary |
| Quickstart | Getting started guide |
| Developer Guide | Zig 0.16 patterns and conventions |
# Run all tests
zig build test --summary all
# Test specific module
zig test src/runtime/engine/engine.zig
# Filter tests by pattern
zig test src/tests/mod.zig --test-filter "pattern"
# Run benchmarks
zig build benchmarks
# Lint check
zig build lint| Variable | Description |
|---|---|
ABI_OPENAI_API_KEY |
OpenAI API key |
ABI_ANTHROPIC_API_KEY |
Anthropic/Claude API key |
ABI_OLLAMA_HOST |
Ollama host (default: http://127.0.0.1:11434) |
ABI_OLLAMA_MODEL |
Default Ollama model |
ABI_HF_API_TOKEN |
HuggingFace API token |
DISCORD_BOT_TOKEN |
Discord bot token |
| Milestone | Status |
|---|---|
| Zig 0.16 Migration | |
| Llama-CPP Parity | |
| C Library Bindings | |
| Plugin Registry | |
| Runtime Consolidation | |
| Feature Stubs | |
| Multi-GPU Orchestration |
See PLAN.md for current sprint status and ROADMAP.md for version history.
We welcome contributions! Please see:
- CONTRIBUTING.md - Development workflow
- CLAUDE.md - Coding guidelines and patterns