From 70d0113056afe8db8723c66373e9d506dded77f7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:35:51 +0000 Subject: [PATCH 1/7] Reorder sidebar, replace quickstart with tabbed guide, rename env var - Move Deployment section before SDK Reference in sidebar - Replace quick-start.md with quick-start.mdx featuring tabs for Conda (recommended), Spack, and Docker installation methods - Add verified CEE example (bundle, query, retrieve, destroy) - Document default configuration, runtime startup, and env vars - Rename CHIMAERA_WITH_RUNTIME to CHI_WITH_RUNTIME in hpc-cluster.md Co-Authored-By: Claude Opus 4.6 --- docs/deployment/hpc-cluster.md | 2 +- docs/getting-started/quick-start.md | 126 ------------ docs/getting-started/quick-start.mdx | 290 +++++++++++++++++++++++++++ sidebars.ts | 20 +- 4 files changed, 301 insertions(+), 137 deletions(-) delete mode 100644 docs/getting-started/quick-start.md create mode 100644 docs/getting-started/quick-start.mdx diff --git a/docs/deployment/hpc-cluster.md b/docs/deployment/hpc-cluster.md index 87818f4..b3e0c5a 100644 --- a/docs/deployment/hpc-cluster.md +++ b/docs/deployment/hpc-cluster.md @@ -66,7 +66,7 @@ export CHI_IPC_MODE=TCP | Variable | Default | Description | |----------|---------|-------------| -| `CHIMAERA_WITH_RUNTIME` | *(unset)* | When set to `1`, starts the runtime server in-process. When `0`, client-only mode. | +| `CHI_WITH_RUNTIME` | *(unset)* | When set to `1`, starts the runtime server in-process. When `0`, client-only mode. | This variable is read by `CHIMAERA_INIT()`. If unset, the value of the `default_with_runtime` argument passed to `CHIMAERA_INIT()` is used instead. diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md deleted file mode 100644 index 544eb05..0000000 --- a/docs/getting-started/quick-start.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -sidebar_position: 2 -title: Quick Start -description: Get IOWarp running with Docker in 5 minutes. ---- - -# Quick Start Tutorial - -Get IOWarp running with Docker in 5 minutes. This tutorial walks you through running the IOWarp runtime with buffering services. - -## Prerequisites - -- Docker and Docker Compose installed -- At least 8 GB of available RAM - -## 1. Start the Runtime - -The `docker/quickstart/` directory contains everything you need. From the repository root: - -```bash -cd docker/quickstart -docker compose up -d -``` - -This starts a single-node Chimaera runtime using the pre-built `iowarp/deploy-cpu` image. - -### Verify it's running - -```bash -docker compose logs -``` - -You should see output indicating that worker threads have been spawned and modules loaded. Look for `SpawnWorkerThreads` in the output. - -### Stop the runtime - -```bash -docker compose down -``` - -## 2. Configuration - -The quickstart ships with a ready-to-use `chimaera.yaml`. Here is a minimal configuration for reference: - -```yaml -# IOWarp Runtime Configuration -networking: - port: 5555 - -compose: - # Block device (DRAM buffer) - - mod_name: chimaera_bdev - pool_name: "ram::chi_default_bdev" - pool_query: local - pool_id: "301.0" - bdev_type: ram - capacity: "512MB" - - # Context Transfer Engine (CTE) - - mod_name: wrp_cte_core - pool_name: cte_main - pool_query: local - pool_id: "512.0" - storage: - - path: "ram::cte_ram_tier1" - bdev_type: "ram" - capacity_limit: "512MB" - score: 1.0 - dpe: - dpe_type: "max_bw" - targets: - neighborhood: 1 - default_target_timeout_ms: 30000 - poll_period_ms: 5000 - - # Context Assimilation Engine (CAE) - - mod_name: wrp_cae_core - pool_name: wrp_cae_core_pool - pool_query: local - pool_id: "400.0" -``` - -**Storage parameters:** - -| Parameter | Description | -|-----------|-------------| -| `path` | `ram::` for RAM, `/dev/` for block devices | -| `bdev_type` | `ram`, `nvme`, or `aio` (async I/O) | -| `capacity_limit` | Max capacity (`KB`, `MB`, `GB`, `TB`) | -| `score` | Tier priority: `0.0` = lowest, `1.0` = highest | - -### Docker Compose Details - -The `docker-compose.yml` mounts the config at `/etc/iowarp/chimaera.yaml` and sets the `CHI_SERVER_CONF` environment variable so the runtime finds it: - -```yaml -services: - iowarp: - image: iowarp/deploy-cpu:latest - container_name: iowarp-quickstart - hostname: iowarp - volumes: - - ./chimaera.yaml:/etc/iowarp/chimaera.yaml:ro - environment: - - CHI_SERVER_CONF=/etc/iowarp/chimaera.yaml - ports: - - "5555:5555" - mem_limit: 8g - command: ["chimaera", "runtime", "start"] - restart: unless-stopped -``` - -## Next Steps - -- [View Research Demos](https://iowarp.ai/research/demos/) — See IOWarp in action with real scientific workflows -- [Explore the Platform](https://iowarp.ai/platform/) — Learn about IOWarp's context engineering architecture -- [Try CLIO Kit](../clio-kit/mcp-servers) — Use 16 MCP servers for AI-powered scientific computing -- [Deployment Guide](../deployment/hpc-cluster) — Deploy IOWarp on HPC clusters -- [Configuration Reference](../deployment/configuration) — Deep dive into configuration options - -## Support - -- Open an issue on the [GitHub repository](https://github.com/iowarp/iowarp-install) -- Join the [Zulip Chat](https://iowarp.zulipchat.com) -- Visit the [IOWarp website](https://iowarp.ai) -- Email: grc@illinoistech.edu diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx new file mode 100644 index 0000000..ed40ee1 --- /dev/null +++ b/docs/getting-started/quick-start.mdx @@ -0,0 +1,290 @@ +--- +sidebar_position: 2 +title: Quick Start +description: Set up your IOWarp environment and run your first example. +--- + +# Quick Start + +This guide walks you through setting up an IOWarp environment, starting the +Chimaera runtime, and running a Context Exploration Engine (CEE) example that +ingests, queries, retrieves, and cleans up data. + +## 1. Set Up Your Environment + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + +Conda is the recommended way to install IOWarp Core. It builds the full +package from source with all optional features available. + +```bash +# Clone the repository with submodules +git clone --recurse-submodules https://github.com/iowarp/iowarp-core.git +cd iowarp-core + +# Build and install (creates or reuses the "iowarp" conda environment) +./install.sh +``` + +Activate the environment in every new terminal: + +```bash +conda activate iowarp +``` + +#### Build Variants + +By default `install.sh` builds a CPU-only release. Pass a variant name for +additional features: + +| Variant | Command | What it enables | +|---------|---------|-----------------| +| Release (default) | `./install.sh` | CPU-only, all engines | +| Debug | `./install.sh debug` | Debug symbols, sanitizers | +| CUDA | `./install.sh cuda` | NVIDIA GPU acceleration | +| ROCm | `./install.sh rocm` | AMD GPU acceleration | +| MPI | `./install.sh mpi` | Distributed multi-node | +| Full | `./install.sh full` | CUDA + MPI + everything | + + + + +1. Install Spack (v0.22.3+ recommended): + +```bash +git clone https://github.com/spack/spack.git -b v0.22.3 +cd spack +. share/spack/setup-env.sh +echo ". ${PWD}/share/spack/setup-env.sh" >> ~/.bashrc +``` + +2. Add the IOWarp repository: + +```bash +git clone https://github.com/iowarp/iowarp-install.git +spack repo add iowarp-install/iowarp-spack +``` + +3. Install IOWarp: + +```bash +spack install iowarp +spack load iowarp +``` + + + + +Pull the pre-built image: + +```bash +docker pull iowarp/deploy-cpu:latest +``` + +Run a single-node runtime: + +```bash +docker run -d -p 5555:5555 --memory=8g \ + --name iowarp iowarp/deploy-cpu:latest \ + chimaera runtime start +``` + +Or use Docker Compose. Create a `docker-compose.yml`: + +```yaml +services: + iowarp: + image: iowarp/deploy-cpu:latest + container_name: iowarp + hostname: iowarp + volumes: + - ./chimaera.yaml:/home/iowarp/.chimaera/chimaera.yaml:ro + ports: + - "5555:5555" + mem_limit: 8g + command: ["chimaera", "runtime", "start"] + restart: unless-stopped +``` + +```bash +docker compose up -d +docker compose logs # verify it started +``` + +:::info +IOWarp uses `memfd_create()` for shared memory, so no special `/dev/shm` +configuration is needed. Only `mem_limit` matters for resource control. +::: + + + + +### Verify the installation + +```bash +chimaera --help +``` + +You should see the available subcommands (`runtime start`, `runtime stop`, +`monitor`, `compose`, etc.). + +## 2. Default Configuration + +During installation a default configuration file is placed at: + +``` +~/.chimaera/chimaera.yaml +``` + +This file is only created if it does not already exist, so your customisations +are never overwritten. A reference copy is also installed to +`$CONDA_PREFIX/etc/chimaera/chimaera_default.yaml`. + +The runtime resolves its configuration in this order: + +| Priority | Source | +|----------|--------| +| 1 | `CHI_SERVER_CONF` environment variable | +| 2 | `~/.chimaera/chimaera.yaml` | +| 3 | Built-in defaults | + +The default configuration starts **4 worker threads** on **port 5555** and +composes three modules automatically: + +| Module | Purpose | +|--------|---------| +| `chimaera_bdev` | 512 MB RAM block device | +| `wrp_cte_core` | Context Transfer Engine with a 512 MB RAM cache | +| `wrp_cae_core` | Context Assimilation Engine | + +## 3. Start the Runtime + +### Option A -- Standalone daemon + +```bash +# Start in the background +chimaera runtime start & + +# Verify it is running +chimaera monitor --once + +# When done +chimaera runtime stop +``` + +### Option B -- Embedded runtime (recommended for scripts) + +Set the `CHI_WITH_RUNTIME` environment variable and the runtime starts +inside your application process -- no separate daemon needed: + +```bash +export CHI_WITH_RUNTIME=1 +``` + +The example below uses this approach. + +## 4. Context Exploration Engine Example + +The Context Exploration Engine (CEE) lets you assimilate data into IOWarp, +query for it by name or regex, retrieve it, and clean up -- all from Python. + +Save the following as **`cee_quickstart.py`**: + +```python +#!/usr/bin/env python3 +"""IOWarp CEE Quickstart -- assimilate, query, retrieve, destroy.""" + +import os +import tempfile + +import wrp_cee as cee + +# -- 1. Create a sample file ------------------------------------------- +data = b"Hello from IOWarp! " * 50_000 # ~950 KB +tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".bin") +tmp.write(data) +tmp.close() +print(f"Created test file: {tmp.name} ({len(data):,} bytes)") + +# -- 2. Initialise the CEE interface ----------------------------------- +# ContextInterface connects to the running Chimaera runtime (or starts +# an embedded one when CHI_WITH_RUNTIME=1). +iface = cee.ContextInterface() + +# -- 3. Bundle (assimilate) the file ----------------------------------- +tag = "quickstart_demo" +ctx = cee.AssimilationCtx( + src=f"file::{tmp.name}", # source: local file (note :: not ://) + dst=f"iowarp::{tag}", # destination tag in IOWarp + format="binary", # raw binary ingest +) +rc = iface.context_bundle([ctx]) +assert rc == 0, f"context_bundle failed (rc={rc})" +print(f"Assimilated file into tag '{tag}'") + +# -- 4. Query for blobs in the tag ------------------------------------- +blobs = iface.context_query(tag, ".*", 0) # regex ".*" matches all blobs +print(f"Found {len(blobs)} blob(s): {blobs}") + +# -- 5. Retrieve the data back ----------------------------------------- +packed = iface.context_retrieve(tag, ".*", 0) +if packed: + print(f"Retrieved {len(packed[0]):,} bytes") + +# -- 6. Destroy the tag ------------------------------------------------ +iface.context_destroy([tag]) +print(f"Destroyed tag '{tag}'") + +# -- Cleanup ------------------------------------------------------------ +os.unlink(tmp.name) +print("Done!") +``` + +Run it with the embedded runtime: + +```bash +export CHI_WITH_RUNTIME=1 +python3 cee_quickstart.py +``` + +Expected output: + +``` +Created test file: /tmp/tmpXXXXXXXX.bin (950,000 bytes) +Assimilated file into tag 'quickstart_demo' +Found 2 blob(s): ['chunk_0', 'description'] +Retrieved 950,029 bytes +Destroyed tag 'quickstart_demo' +Done! +``` + +## 5. Key Environment Variables + +| Variable | Description | +|----------|-------------| +| `CHI_SERVER_CONF` | Path to YAML configuration file (highest priority) | +| `CHI_WITH_RUNTIME` | Set to `1` to start an embedded runtime in-process | +| `CHI_IPC_MODE` | Transport: `SHM` (shared memory), `TCP` (default), `IPC` (Unix socket) | +| `HSHM_LOG_LEVEL` | Logging verbosity: `debug`, `info`, `warning`, `error`, `fatal` | + +## Next Steps + +- Edit `~/.chimaera/chimaera.yaml` to tune thread counts, storage tiers, + or add file-backed block devices +- [Configuration Reference](../deployment/configuration) -- full parameter + documentation +- [HPC Deployment](../deployment/hpc-cluster) -- multi-node cluster setup +- [CLIO Kit](../clio-kit/mcp-servers) -- MCP servers for AI-powered + scientific computing +- [SDK Reference](../sdk/) -- component APIs and development guides + +## Support + +- Open an issue on the [GitHub repository](https://github.com/iowarp/core) +- Join the [Zulip Chat](https://iowarp.zulipchat.com) +- Visit the [IOWarp website](https://iowarp.ai) +- Email: grc@illinoistech.edu diff --git a/sidebars.ts b/sidebars.ts index 68aa700..0f7f511 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -19,6 +19,16 @@ const sidebars: SidebarsConfig = { 'clio-kit/mcp-servers', ], }, + { + type: 'category', + label: 'Deployment', + items: [ + 'deployment/configuration', + 'deployment/hpc-cluster', + 'deployment/performance', + 'deployment/monitoring', + ], + }, { type: 'category', label: 'SDK Reference', @@ -56,16 +66,6 @@ const sidebars: SidebarsConfig = { 'api/storage', ], }, - { - type: 'category', - label: 'Deployment', - items: [ - 'deployment/configuration', - 'deployment/hpc-cluster', - 'deployment/performance', - 'deployment/monitoring', - ], - }, 'faq', 'tutorials', ], From abbd47818c4e4e49dd880c9f3b4785afc0c41df2 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:37:51 +0000 Subject: [PATCH 2/7] Make native install the recommended method, pip experimental Reorder installation tabs: Native Install (Recommended) is now the default tab. Pip is moved last and marked as experimental with a caution banner noting its limitations (no GPU, MPI, or HDF5). Co-Authored-By: Claude Opus 4.6 --- docs/getting-started/installation.mdx | 114 ++++++++++++++++++-------- 1 file changed, 78 insertions(+), 36 deletions(-) diff --git a/docs/getting-started/installation.mdx b/docs/getting-started/installation.mdx index bb5c23a..ed04bb2 100644 --- a/docs/getting-started/installation.mdx +++ b/docs/getting-started/installation.mdx @@ -1,7 +1,7 @@ --- sidebar_position: 1 title: Installation -description: Install IOWarp via pip, Docker, native build, or Spack package manager. +description: Install IOWarp via native build, Docker, Spack, or pip. --- # Installation @@ -12,43 +12,57 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - + -Install the IOWarp Python package: +Clone and build IOWarp Core from source using the automated installer. +This uses conda internally to manage dependencies and produces a full-featured +build with all optional components. ```bash -pip install iowarp-core +git clone --recurse-submodules https://github.com/iowarp/iowarp-core.git +cd iowarp-core +./install.sh ``` -Verify the installation: +Activate the environment in every new terminal: -```python -import iowarp_core -print(iowarp_core.get_version()) +```bash +conda activate iowarp ``` -### Start the Runtime +#### Build Variants -IOWarp includes the `chimaera` CLI for managing the runtime: +Pass a variant name to enable additional features: + +| Variant | Command | What it enables | +|---------|---------|-----------------| +| Release (default) | `./install.sh` | CPU-only, all engines | +| Debug | `./install.sh debug` | Debug symbols, sanitizers | +| CUDA | `./install.sh cuda` | NVIDIA GPU acceleration | +| ROCm | `./install.sh rocm` | AMD GPU acceleration | +| MPI | `./install.sh mpi` | Distributed multi-node | +| Full | `./install.sh full` | CUDA + MPI + everything | + +### Verify the installation ```bash -chimaera runtime start +conda activate iowarp +chimaera --help ``` ### What's Included -The pip package is self-contained with all dependencies statically linked. It includes: +The native install provides the complete IOWarp stack: -- **Python API** — `import iowarp_core` and `import wrp_cee` for context management +- **All engines** — CTE, CAE, CEE with full feature support +- **Python API** — `import wrp_cee` for context management - **CLI** — `chimaera` command for runtime management -- **Shared libraries** — All IOWarp runtime libraries bundled in the package - -:::info -No system dependencies are required beyond a standard C/C++ runtime (glibc). The package works on any Linux x86_64 or aarch64 system with Python 3.10+. -::: +- **GPU support** — CUDA and ROCm variants available +- **MPI support** — Distributed multi-node deployments +- **HDF5 support** — Scientific data format ingestion - + Pull and run the IOWarp Docker image: @@ -92,23 +106,9 @@ IOWarp uses `memfd_create()` for shared memory, so no special `/dev/shm` configu ::: - - -Use the standalone installer script: - -```bash -curl -fsSL https://raw.githubusercontent.com/iowarp/iowarp-install/main/install.sh | bash -``` - -This will: -- Clone and build IOWarp core with all submodules -- Install the IOWarp CLIO Kit -- Set up the complete IOWarp environment - - - + -1. Install Spack (v0.23 recommended): +1. Install Spack (v0.22.3+ recommended): ```bash git clone https://github.com/spack/spack.git -b v0.22.3 @@ -130,11 +130,53 @@ spack repo add iowarp-install/iowarp-spack spack install iowarp ``` + + + +:::caution Experimental +The pip package is experimental. It does not include GPU (CUDA/ROCm), +MPI, or HDF5 support. For production use, prefer the +[native install](#) method. +::: + +Install the IOWarp Python package: + +```bash +pip install iowarp-core +``` + +Verify the installation: + +```python +import iowarp_core +print(iowarp_core.get_version()) +``` + +### Start the Runtime + +IOWarp includes the `chimaera` CLI for managing the runtime: + +```bash +chimaera runtime start +``` + +### What's Included + +The pip package is self-contained with all dependencies statically linked. It includes: + +- **Python API** — `import iowarp_core` and `import wrp_cee` for context management +- **CLI** — `chimaera` command for runtime management +- **Shared libraries** — All IOWarp runtime libraries bundled in the package + +:::info +No system dependencies are required beyond a standard C/C++ runtime (glibc). The package works on any Linux x86_64 or aarch64 system with Python 3.10+. +::: + ## Next Steps -- [Quick Start Tutorial](./quick-start) — Run your first benchmark +- [Quick Start Tutorial](./quick-start) — Start the runtime and run your first example - [Configuration Reference](../deployment/configuration) — Customize your deployment - [CLIO Kit](../clio-kit/mcp-servers) — Explore MCP servers for AI agents From 3b1011fe152812a875e867bdb19cd69de6ac67e4 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:47:06 +0000 Subject: [PATCH 3/7] Remove duplicated installation steps from quickstart The quickstart now links to the installation page instead of repeating the setup instructions with tabs. It focuses on configuration, runtime startup, and the CEE example. Co-Authored-By: Claude Opus 4.6 --- docs/getting-started/quick-start.mdx | 137 ++------------------------- 1 file changed, 8 insertions(+), 129 deletions(-) diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx index ed40ee1..d85b469 100644 --- a/docs/getting-started/quick-start.mdx +++ b/docs/getting-started/quick-start.mdx @@ -1,138 +1,17 @@ --- sidebar_position: 2 title: Quick Start -description: Set up your IOWarp environment and run your first example. +description: Start the Chimaera runtime and run your first CEE example. --- # Quick Start -This guide walks you through setting up an IOWarp environment, starting the -Chimaera runtime, and running a Context Exploration Engine (CEE) example that +This guide assumes you have already [installed IOWarp](./installation). +It walks you through the default configuration, starting the Chimaera +runtime, and running a Context Exploration Engine (CEE) example that ingests, queries, retrieves, and cleans up data. -## 1. Set Up Your Environment - -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - - - - -Conda is the recommended way to install IOWarp Core. It builds the full -package from source with all optional features available. - -```bash -# Clone the repository with submodules -git clone --recurse-submodules https://github.com/iowarp/iowarp-core.git -cd iowarp-core - -# Build and install (creates or reuses the "iowarp" conda environment) -./install.sh -``` - -Activate the environment in every new terminal: - -```bash -conda activate iowarp -``` - -#### Build Variants - -By default `install.sh` builds a CPU-only release. Pass a variant name for -additional features: - -| Variant | Command | What it enables | -|---------|---------|-----------------| -| Release (default) | `./install.sh` | CPU-only, all engines | -| Debug | `./install.sh debug` | Debug symbols, sanitizers | -| CUDA | `./install.sh cuda` | NVIDIA GPU acceleration | -| ROCm | `./install.sh rocm` | AMD GPU acceleration | -| MPI | `./install.sh mpi` | Distributed multi-node | -| Full | `./install.sh full` | CUDA + MPI + everything | - - - - -1. Install Spack (v0.22.3+ recommended): - -```bash -git clone https://github.com/spack/spack.git -b v0.22.3 -cd spack -. share/spack/setup-env.sh -echo ". ${PWD}/share/spack/setup-env.sh" >> ~/.bashrc -``` - -2. Add the IOWarp repository: - -```bash -git clone https://github.com/iowarp/iowarp-install.git -spack repo add iowarp-install/iowarp-spack -``` - -3. Install IOWarp: - -```bash -spack install iowarp -spack load iowarp -``` - - - - -Pull the pre-built image: - -```bash -docker pull iowarp/deploy-cpu:latest -``` - -Run a single-node runtime: - -```bash -docker run -d -p 5555:5555 --memory=8g \ - --name iowarp iowarp/deploy-cpu:latest \ - chimaera runtime start -``` - -Or use Docker Compose. Create a `docker-compose.yml`: - -```yaml -services: - iowarp: - image: iowarp/deploy-cpu:latest - container_name: iowarp - hostname: iowarp - volumes: - - ./chimaera.yaml:/home/iowarp/.chimaera/chimaera.yaml:ro - ports: - - "5555:5555" - mem_limit: 8g - command: ["chimaera", "runtime", "start"] - restart: unless-stopped -``` - -```bash -docker compose up -d -docker compose logs # verify it started -``` - -:::info -IOWarp uses `memfd_create()` for shared memory, so no special `/dev/shm` -configuration is needed. Only `mem_limit` matters for resource control. -::: - - - - -### Verify the installation - -```bash -chimaera --help -``` - -You should see the available subcommands (`runtime start`, `runtime stop`, -`monitor`, `compose`, etc.). - -## 2. Default Configuration +## 1. Default Configuration During installation a default configuration file is placed at: @@ -161,7 +40,7 @@ composes three modules automatically: | `wrp_cte_core` | Context Transfer Engine with a 512 MB RAM cache | | `wrp_cae_core` | Context Assimilation Engine | -## 3. Start the Runtime +## 2. Start the Runtime ### Option A -- Standalone daemon @@ -187,7 +66,7 @@ export CHI_WITH_RUNTIME=1 The example below uses this approach. -## 4. Context Exploration Engine Example +## 3. Context Exploration Engine Example The Context Exploration Engine (CEE) lets you assimilate data into IOWarp, query for it by name or regex, retrieve it, and clean up -- all from Python. @@ -262,7 +141,7 @@ Destroyed tag 'quickstart_demo' Done! ``` -## 5. Key Environment Variables +## 4. Key Environment Variables | Variable | Description | |----------|-------------| From ad62d8b7a946d128e0ca9d6f772a13b2c2fa76b9 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:48:26 +0000 Subject: [PATCH 4/7] Add environment activation step to quickstart with tabs Add a tabbed section at the top of the quickstart showing how to activate the IOWarp environment for each installer (conda activate, spack load, docker exec, pip). Co-Authored-By: Claude Opus 4.6 --- docs/getting-started/quick-start.mdx | 58 ++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx index d85b469..03b4d26 100644 --- a/docs/getting-started/quick-start.mdx +++ b/docs/getting-started/quick-start.mdx @@ -7,11 +7,55 @@ description: Start the Chimaera runtime and run your first CEE example. # Quick Start This guide assumes you have already [installed IOWarp](./installation). -It walks you through the default configuration, starting the Chimaera -runtime, and running a Context Exploration Engine (CEE) example that -ingests, queries, retrieves, and cleans up data. +It walks you through activating your environment, the default +configuration, starting the Chimaera runtime, and running a Context +Exploration Engine (CEE) example. -## 1. Default Configuration +## 1. Activate Your Environment + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + +```bash +conda activate iowarp +``` + + + + +```bash +spack load iowarp +``` + + + + +If you are using Docker, the environment is already set up inside the +container. Exec into a running container: + +```bash +docker exec -it iowarp bash +``` + + + + +No activation needed -- the `chimaera` CLI and Python modules are +available once the package is installed in your Python environment. + + + + +Verify the environment is active: + +```bash +chimaera --help +``` + +## 2. Default Configuration During installation a default configuration file is placed at: @@ -40,7 +84,7 @@ composes three modules automatically: | `wrp_cte_core` | Context Transfer Engine with a 512 MB RAM cache | | `wrp_cae_core` | Context Assimilation Engine | -## 2. Start the Runtime +## 3. Start the Runtime ### Option A -- Standalone daemon @@ -66,7 +110,7 @@ export CHI_WITH_RUNTIME=1 The example below uses this approach. -## 3. Context Exploration Engine Example +## 4. Context Exploration Engine Example The Context Exploration Engine (CEE) lets you assimilate data into IOWarp, query for it by name or regex, retrieve it, and clean up -- all from Python. @@ -141,7 +185,7 @@ Destroyed tag 'quickstart_demo' Done! ``` -## 4. Key Environment Variables +## 5. Key Environment Variables | Variable | Description | |----------|-------------| From 41aed41efa863e406f2f926de2ada7ec4d30b2e3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:49:40 +0000 Subject: [PATCH 5/7] Remove CHI_WITH_RUNTIME from quickstart Simplify the quickstart to use only the standalone daemon approach. Removes embedded runtime references and CHI_WITH_RUNTIME from the env vars table. Co-Authored-By: Claude Opus 4.6 --- docs/getting-started/quick-start.mdx | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx index 03b4d26..cd3699f 100644 --- a/docs/getting-started/quick-start.mdx +++ b/docs/getting-started/quick-start.mdx @@ -86,8 +86,6 @@ composes three modules automatically: ## 3. Start the Runtime -### Option A -- Standalone daemon - ```bash # Start in the background chimaera runtime start & @@ -99,17 +97,6 @@ chimaera monitor --once chimaera runtime stop ``` -### Option B -- Embedded runtime (recommended for scripts) - -Set the `CHI_WITH_RUNTIME` environment variable and the runtime starts -inside your application process -- no separate daemon needed: - -```bash -export CHI_WITH_RUNTIME=1 -``` - -The example below uses this approach. - ## 4. Context Exploration Engine Example The Context Exploration Engine (CEE) lets you assimilate data into IOWarp, @@ -134,8 +121,7 @@ tmp.close() print(f"Created test file: {tmp.name} ({len(data):,} bytes)") # -- 2. Initialise the CEE interface ----------------------------------- -# ContextInterface connects to the running Chimaera runtime (or starts -# an embedded one when CHI_WITH_RUNTIME=1). +# ContextInterface connects to the running Chimaera runtime. iface = cee.ContextInterface() # -- 3. Bundle (assimilate) the file ----------------------------------- @@ -167,10 +153,9 @@ os.unlink(tmp.name) print("Done!") ``` -Run it with the embedded runtime: +Run it (make sure the runtime is still running in the background): ```bash -export CHI_WITH_RUNTIME=1 python3 cee_quickstart.py ``` @@ -190,7 +175,6 @@ Done! | Variable | Description | |----------|-------------| | `CHI_SERVER_CONF` | Path to YAML configuration file (highest priority) | -| `CHI_WITH_RUNTIME` | Set to `1` to start an embedded runtime in-process | | `CHI_IPC_MODE` | Transport: `SHM` (shared memory), `TCP` (default), `IPC` (Unix socket) | | `HSHM_LOG_LEVEL` | Logging verbosity: `debug`, `info`, `warning`, `error`, `fatal` | From 8b3bcae2c86a5ead235a7f0d2b4157fa5bdd498e Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 07:52:34 +0000 Subject: [PATCH 6/7] Rename 'Native Install' to 'Conda' and remove CONDA_PREFIX reference Co-Authored-By: Claude Opus 4.6 --- docs/getting-started/installation.mdx | 8 ++++---- docs/getting-started/quick-start.mdx | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/getting-started/installation.mdx b/docs/getting-started/installation.mdx index ed04bb2..f0582e9 100644 --- a/docs/getting-started/installation.mdx +++ b/docs/getting-started/installation.mdx @@ -1,7 +1,7 @@ --- sidebar_position: 1 title: Installation -description: Install IOWarp via native build, Docker, Spack, or pip. +description: Install IOWarp via Conda, Docker, Spack, or pip. --- # Installation @@ -12,7 +12,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - + Clone and build IOWarp Core from source using the automated installer. This uses conda internally to manage dependencies and produces a full-featured @@ -52,7 +52,7 @@ chimaera --help ### What's Included -The native install provides the complete IOWarp stack: +The Conda install provides the complete IOWarp stack: - **All engines** — CTE, CAE, CEE with full feature support - **Python API** — `import wrp_cee` for context management @@ -136,7 +136,7 @@ spack install iowarp :::caution Experimental The pip package is experimental. It does not include GPU (CUDA/ROCm), MPI, or HDF5 support. For production use, prefer the -[native install](#) method. +[Conda install](#) method. ::: Install the IOWarp Python package: diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx index cd3699f..d43b27e 100644 --- a/docs/getting-started/quick-start.mdx +++ b/docs/getting-started/quick-start.mdx @@ -17,7 +17,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - + ```bash conda activate iowarp @@ -64,8 +64,7 @@ During installation a default configuration file is placed at: ``` This file is only created if it does not already exist, so your customisations -are never overwritten. A reference copy is also installed to -`$CONDA_PREFIX/etc/chimaera/chimaera_default.yaml`. +are never overwritten. The runtime resolves its configuration in this order: From 8ca2875519b67280e666178b6e746d2537477da3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 20 Feb 2026 08:54:15 +0000 Subject: [PATCH 7/7] Clean up API reference and split allocator docs - Remove Storage API from sidebar (placeholder page) - Rename Python API doc to 'Context Exploration', remove installation section and internal implementation references - Split allocator guide into separate Memory Backends and Allocator docs Co-Authored-By: Claude Opus 4.6 --- docs/api/python.md | 82 +++------ .../1.allocator/allocator_guide.md | 165 +----------------- .../1.allocator/memory_backend_guide.md | 138 +++++++++++++++ sidebars.ts | 1 - 4 files changed, 169 insertions(+), 217 deletions(-) create mode 100644 docs/sdk/context-transport-primitives/1.allocator/memory_backend_guide.md diff --git a/docs/api/python.md b/docs/api/python.md index 7f043de..c9ec6da 100644 --- a/docs/api/python.md +++ b/docs/api/python.md @@ -1,42 +1,16 @@ -# Context Exploration Engine - Python API Documentation - -## Overview - -The Context Exploration Engine (CEE) provides a high-level Python API for managing and exploring data contexts in IOWarp. The API is accessible through the `wrp_cee` Python module and offers a simple interface for data assimilation, querying, retrieval, and cleanup operations. - -**Key Feature:** The CEE API automatically initializes the IOWarp runtime when you create a `ContextInterface` instance. You don't need to manually initialize Chimaera, CTE, or CAE - the `ContextInterface` constructor handles all of this internally. - -## Installation - -### From pip (Recommended) - -```bash -pip install iowarp-core -``` - -This installs the `iowarp_core` package (runtime utilities, CLI) and the `wrp_cee` Python extension (context exploration API). All native dependencies are bundled — no system libraries or build tools required. - -### From Source - -Build IOWarp with Python bindings enabled: +--- +sidebar_position: 1 +title: Context Exploration +description: API reference for data assimilation, querying, retrieval, and cleanup in IOWarp. +--- -```bash -cmake --preset=debug -DWRP_CORE_ENABLE_PYTHON=ON -cmake --build build -j$(nproc) -sudo cmake --install build -``` +# Context Exploration -The `wrp_cee` module will be installed to your Python site-packages directory. +## Overview -### Verification +The `wrp_cee` Python module provides a high-level API for managing and exploring data contexts in IOWarp. It offers a simple interface for data assimilation, querying, retrieval, and cleanup operations. -```python -import iowarp_core -print("IOWarp version:", iowarp_core.get_version()) - -import wrp_cee -print("CEE API loaded successfully!") -``` +**Key Feature:** The API automatically initializes the IOWarp runtime when you create a `ContextInterface` instance. You don't need to manually initialize Chimaera, CTE, or CAE — the `ContextInterface` constructor handles all of this internally. ## Module: `wrp_cee` @@ -178,10 +152,8 @@ ctx_interface = wrp_cee.ContextInterface() **Parameters:** None **Notes:** -- Automatically initializes CAE client (which in turn initializes CTE and Chimaera) -- Verifies Chimaera IPC is available -- Sets `is_initialized_` flag on success -- Assumes runtime configuration is already set via environment variables (e.g., `CHI_SERVER_CONF`) +- Automatically initializes the full IOWarp runtime stack +- Requires runtime configuration via environment variables (e.g., `CHI_SERVER_CONF`) **Typical Environment Setup:** @@ -221,7 +193,7 @@ result = ctx_interface.context_bundle(bundle) **Description:** -Assimilates one or more data objects into IOWarp. Each `AssimilationCtx` in the bundle describes a source file/dataset to assimilate and where to store it. The method calls the CAE's `ParseOmni` function which schedules assimilation tasks for each context. +Assimilates one or more data objects into IOWarp. Each `AssimilationCtx` in the bundle describes a source file/dataset to assimilate and where to store it. **Example:** @@ -276,7 +248,7 @@ blob_names = ctx_interface.context_query(tag_re, blob_re, max_results=0) **Description:** -Queries the CTE system for blobs matching the specified regex patterns. Uses `BlobQuery` with `Broadcast` pool query to search across all nodes. Returns only the blob names, not the data. +Queries for blobs matching the specified regex patterns across all nodes. Returns only the blob names, not the data. **Example:** @@ -320,7 +292,7 @@ packed_data = ctx_interface.context_retrieve( - Default: `1024` - **`max_context_size`** (int, optional): Maximum total context size in bytes - Default: `268435456` (256MB) -- **`batch_size`** (int, optional): Number of concurrent `AsyncGetBlob` operations +- **`batch_size`** (int, optional): Number of concurrent retrieval operations - Controls parallelism - Default: `32` @@ -332,14 +304,12 @@ packed_data = ctx_interface.context_retrieve( **Description:** Retrieves blob data matching the specified patterns and packs it into a single binary buffer. The method: -1. Uses `BlobQuery` to find matching blobs +1. Finds matching blobs 2. Allocates a buffer of size `max_context_size` -3. Retrieves blobs in batches using `AsyncGetBlob` +3. Retrieves blobs in batches for efficiency 4. Packs data sequentially into the buffer 5. Returns the packed data as a string -Blobs are processed in batches for efficiency. The buffer is automatically allocated and freed. - **Example:** ```python @@ -388,7 +358,7 @@ result = ctx_interface.context_destroy(context_names) **Description:** -Deletes the specified contexts from the CTE system. Each context name is treated as a tag name and deleted using CTE's `DelTag` API. This operation removes the tag and all associated blobs. +Deletes the specified contexts. Each context name is treated as a tag name. This operation removes the tag and all associated blobs. **Example:** @@ -415,7 +385,7 @@ else: ```python #!/usr/bin/env python3 -"""Complete CEE API example""" +"""Complete Python API example""" import wrp_cee as cee import os @@ -480,9 +450,9 @@ dst="iowarp://my_tag" # Wrong! Don't use :// ## Runtime Assumptions -The CEE Python API assumes: +The Python API assumes: -1. **Runtime is Started:** The IOWarp runtime (Chimaera server) should be running, or will be started by the `ContextInterface` constructor. +1. **Runtime is Started:** The IOWarp runtime should be running, or will be started by the `ContextInterface` constructor. 2. **Configuration Available:** Runtime configuration is available via environment variable: ```bash @@ -491,12 +461,6 @@ The CEE Python API assumes: 3. **Proper Permissions:** Your Python process has permission to access shared memory segments and connect to the runtime. -4. **Dependencies Initialized:** When you create a `ContextInterface`, it will: - - Initialize CAE client - - Initialize CTE client (via CAE) - - Initialize Chimaera client (via CTE) - - Verify IPC manager is available - --- ## Error Handling @@ -529,7 +493,5 @@ if result != 0: ## See Also -- **C++ API Documentation:** `context-exploration-engine/api/include/wrp_cee/api/context_interface.h` -- **Unit Tests:** `context-exploration-engine/api/test/test_context_interface.py` -- **Demo Script:** `context-exploration-engine/api/demo/simple_assimilation_demo.py` -- **CTE Documentation:** `context-transfer-engine/docs/cte/cte.md` +- [Quick Start Guide](../getting-started/quick-start) — End-to-end walkthrough +- [Configuration Reference](../deployment/configuration) — Runtime and storage tier setup diff --git a/docs/sdk/context-transport-primitives/1.allocator/allocator_guide.md b/docs/sdk/context-transport-primitives/1.allocator/allocator_guide.md index 4f98d9b..b9c95e6 100644 --- a/docs/sdk/context-transport-primitives/1.allocator/allocator_guide.md +++ b/docs/sdk/context-transport-primitives/1.allocator/allocator_guide.md @@ -1,16 +1,14 @@ -# Memory Allocators & Backends Guide +--- +sidebar_position: 2 +--- -## Overview - -HSHM provides a hierarchy of memory allocators and backends for shared memory, private memory, and GPU memory management. The allocator system supports cross-process memory sharing, GPU-accessible allocations, and lock-free multi-threaded allocation. - -## Allocator Architecture +# Allocator Guide -All allocators inherit from the `Allocator` base class and are wrapped via `BaseAllocator` which provides type-safe allocation methods. +## Overview -**Source:** `hermes_shm/memory/allocator/allocator.h` +HSHM provides a hierarchy of memory allocators for shared memory, private memory, and GPU memory management. All allocators inherit from the `Allocator` base class and are wrapped via `BaseAllocator` which provides type-safe allocation methods. -### Core Pointer Types +## Core Pointer Types HSHM uses offset-based pointers for process-independent shared memory addressing: @@ -28,7 +26,7 @@ char* raw = ptr.ptr_; // Direct access (fast) hipc::ShmPtr<> shm = ptr.shm_; // Shared memory handle (cross-process) ``` -### Common Allocator API +## Common Allocator API All allocators expose these methods through `BaseAllocator`: @@ -51,111 +49,10 @@ FullPtr NewObjs(size_t count, Args&&... args); void DelObjs(FullPtr ptr, size_t count); ``` -## Memory Backends - -Memory backends provide the underlying memory regions that allocators manage. A backend is always created first, then an allocator is constructed on top of it. - -### Backend Lifecycle - -Every backend supports two operations: -- `shm_init()` — Create and initialize a new memory region (the **owner**) -- `shm_attach()` — Attach to an existing memory region created by another process - -### MallocBackend - -Wraps `malloc` for private (non-shared) in-process memory. Useful for single-process tests and allocators that don't need cross-process sharing. - -```cpp -#include "hermes_shm/memory/backend/malloc_backend.h" - -hipc::MallocBackend backend; -size_t heap_size = 128 * 1024 * 1024; // 128 MB -backend.shm_init(hipc::MemoryBackendId(0, 0), heap_size); - -// Create an allocator on top of this backend -auto *alloc = backend.MakeAlloc(); -``` - -### PosixShmMmap - -The primary backend for cross-process shared memory. Uses `shm_open` and `mmap` to create memory-mapped regions accessible by multiple processes. - -```cpp -#include "hermes_shm/memory/backend/posix_shm_mmap.h" - -PosixShmMmap backend; - -// Process 0: Create shared memory -backend.shm_init(MemoryBackendId(0, 0), 512 * 1024 * 1024, "/my_shm_region"); - -// Process 1+: Attach to existing shared memory -backend.shm_attach("/my_shm_region"); -``` - -**Ownership model:** The process that calls `shm_init()` is the owner and is responsible for cleanup. Use `SetOwner()` / `UnsetOwner()` to transfer ownership between processes. - -### GpuMalloc - -**Source:** `hermes_shm/memory/backend/gpu_malloc.h` - -Allocates memory directly on the GPU using `cudaMalloc` (CUDA) or `hipMalloc` (ROCm). - -```cpp -// Only available when HSHM_ENABLE_CUDA or HSHM_ENABLE_ROCM is set -GpuMalloc backend; -backend.shm_init(backend_id, data_capacity); -``` - -**Memory Layout:** -``` -GPU Memory: [MemoryBackendHeader | GpuMallocPrivateHeader | Data...] -``` - -**Characteristics:** -- Allocates entire region on GPU via `GpuApi::Malloc()` -- Creates an IPC handle (`GpuIpcMemHandle`) for cross-process GPU memory sharing -- Enforces minimum 1MB data size -- Freed via `GpuApi::Free()` -- Conditionally compiled: `#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM` - -### GpuShmMmap - -**Source:** `hermes_shm/memory/backend/gpu_shm_mmap.h` - -GPU-accessible POSIX shared memory. Combines host shared memory with GPU registration for zero-copy GPU access. - -```cpp -// Only available when HSHM_ENABLE_CUDA or HSHM_ENABLE_ROCM is set -GpuShmMmap backend; -backend.shm_init(backend_id, url, data_capacity); -``` - -**Memory Layout:** -``` -POSIX SHM File: [4KB backend header | 4KB shared header | Data...] -Virtual Memory: [4KB private header | 4KB shared header | Data...] -``` - -**Characteristics:** -- Creates POSIX shared memory object (`shm_open`) -- Maps with combined private/shared access (`MapMixedMemory`) -- Registers memory with GPU via `GpuApi::RegisterHostMemory()` -- GPU can access the memory directly without explicit transfers -- Supports `shm_attach()` for other processes to join -- Enforces minimum 1MB backend size -- Conditionally compiled: `#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM` - -**Key Difference from GpuMalloc:** -- Memory lives on the host (CPU) but is GPU-accessible -- Inherently shareable via POSIX shared memory (no IPC handle needed) -- Better for data that both CPU and GPU need to access - ## Allocator Types ### MallocAllocator -**Source:** `hermes_shm/memory/allocator/malloc_allocator.h` - Wraps standard `malloc`/`free`. Used for private (non-shared) memory when no shared memory backend is needed. ```cpp @@ -175,8 +72,6 @@ alloc->DelObjs(ptr, 100); ### ArenaAllocator -**Source:** `hermes_shm/memory/allocator/arena_allocator.h` - Bump-pointer allocator. Allocations advance a pointer through a contiguous region. Individual frees are not supported — the entire arena is freed at once via `Reset()`. ```cpp @@ -211,8 +106,6 @@ size_t remaining = alloc->GetRemainingSize(); ### BuddyAllocator -**Source:** `hermes_shm/memory/allocator/buddy_allocator.h` - Power-of-two free list allocator. Maintains separate free lists for different size classes, providing efficient allocation with bounded fragmentation. ```cpp @@ -254,8 +147,6 @@ alloc->Free(ptr); ### MultiProcessAllocator -**Source:** `hermes_shm/memory/allocator/mp_allocator.h` - Three-tier hierarchical allocator designed for multi-process, multi-threaded environments. Each tier adds more contention but accesses more memory. **Architecture:** @@ -301,8 +192,6 @@ The allocator system is designed for multiple processes to share the same memory ### Example: Multi-Process BuddyAllocator -From `context-transport-primitives/test/unit/allocator/test_buddy_allocator_multiprocess.cc`: - ```cpp #include "hermes_shm/memory/allocator/buddy_allocator.h" #include "hermes_shm/memory/backend/posix_shm_mmap.h" @@ -349,8 +238,6 @@ int main(int argc, char **argv) { ### Example: Multi-Process MultiProcessAllocator -From `context-transport-primitives/test/unit/allocator/test_mp_allocator_multiprocess.cc`: - ```cpp #include "hermes_shm/memory/allocator/mp_allocator.h" #include "hermes_shm/memory/backend/posix_shm_mmap.h" @@ -409,8 +296,6 @@ int main(int argc, char **argv) { ### Orchestrating Multi-Process Tests -The shell script `run_mp_allocator_multiprocess_test.sh` shows how to orchestrate multiple processes: - ```bash #!/bin/bash TEST_BINARY="./test_mp_allocator_multiprocess" @@ -442,39 +327,6 @@ wait $RANK0_PID $RANK1_PID $RANK2_PID - `AttachAlloc()` reinterprets the existing memory as an allocator and calls `shm_attach()` — no reinitialization - Ownership (`SetOwner`/`UnsetOwner`) determines which process destroys the shared memory on exit -## GPU Compatibility - -### GpuApi - -The `GpuApi` class provides an abstraction over CUDA and ROCm: - -| Method | Description | -|--------|-------------| -| `GpuApi::Malloc(size)` | Allocate GPU memory | -| `GpuApi::Free(ptr)` | Free GPU memory | -| `GpuApi::Memcpy(dst, src, size, kind)` | Copy memory between host/device | -| `GpuApi::RegisterHostMemory(ptr, size)` | Register host memory for GPU access | -| `GpuApi::UnregisterHostMemory(ptr)` | Unregister host memory | -| `GpuApi::GetIpcMemHandle(ptr)` | Get IPC handle for GPU memory sharing | - -### Conditional Compilation - -GPU backends are only compiled when CUDA or ROCm is enabled: - -```cpp -#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM - // GPU-specific code -#endif - -#if HSHM_IS_HOST - // Host-only operations (initialization, IPC setup) -#endif - -#if HSHM_IS_GPU - // GPU kernel operations -#endif -``` - ## Choosing an Allocator | Allocator | Use Case | Shared Memory | GPU | Free Support | @@ -486,4 +338,5 @@ GPU backends are only compiled when CUDA or ROCm is enabled: ## Related Documentation +- [Memory Backends Guide](./memory_backend_guide) - Backends that provide memory regions for these allocators - [Data Structures Guide](../types/data_structures_guide) - Data structures that use these allocators diff --git a/docs/sdk/context-transport-primitives/1.allocator/memory_backend_guide.md b/docs/sdk/context-transport-primitives/1.allocator/memory_backend_guide.md new file mode 100644 index 0000000..5d0a076 --- /dev/null +++ b/docs/sdk/context-transport-primitives/1.allocator/memory_backend_guide.md @@ -0,0 +1,138 @@ +--- +sidebar_position: 1 +--- + +# Memory Backends Guide + +## Overview + +Memory backends provide the underlying memory regions that allocators manage. A backend is always created first, then an allocator is constructed on top of it. HSHM supports shared memory, private memory, and GPU memory backends. + +## Backend Lifecycle + +Every backend supports two operations: +- `shm_init()` — Create and initialize a new memory region (the **owner**) +- `shm_attach()` — Attach to an existing memory region created by another process + +## MallocBackend + +Wraps `malloc` for private (non-shared) in-process memory. Useful for single-process tests and allocators that don't need cross-process sharing. + +```cpp +#include "hermes_shm/memory/backend/malloc_backend.h" + +hipc::MallocBackend backend; +size_t heap_size = 128 * 1024 * 1024; // 128 MB +backend.shm_init(hipc::MemoryBackendId(0, 0), heap_size); + +// Create an allocator on top of this backend +auto *alloc = backend.MakeAlloc(); +``` + +## PosixShmMmap + +The primary backend for cross-process shared memory. Uses `shm_open` and `mmap` to create memory-mapped regions accessible by multiple processes. + +```cpp +#include "hermes_shm/memory/backend/posix_shm_mmap.h" + +PosixShmMmap backend; + +// Process 0: Create shared memory +backend.shm_init(MemoryBackendId(0, 0), 512 * 1024 * 1024, "/my_shm_region"); + +// Process 1+: Attach to existing shared memory +backend.shm_attach("/my_shm_region"); +``` + +**Ownership model:** The process that calls `shm_init()` is the owner and is responsible for cleanup. Use `SetOwner()` / `UnsetOwner()` to transfer ownership between processes. + +## GpuMalloc + +Allocates memory directly on the GPU using `cudaMalloc` (CUDA) or `hipMalloc` (ROCm). + +```cpp +// Only available when HSHM_ENABLE_CUDA or HSHM_ENABLE_ROCM is set +GpuMalloc backend; +backend.shm_init(backend_id, data_capacity); +``` + +**Memory Layout:** +``` +GPU Memory: [MemoryBackendHeader | GpuMallocPrivateHeader | Data...] +``` + +**Characteristics:** +- Allocates entire region on GPU via `GpuApi::Malloc()` +- Creates an IPC handle (`GpuIpcMemHandle`) for cross-process GPU memory sharing +- Enforces minimum 1MB data size +- Freed via `GpuApi::Free()` +- Conditionally compiled: `#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM` + +## GpuShmMmap + +GPU-accessible POSIX shared memory. Combines host shared memory with GPU registration for zero-copy GPU access. + +```cpp +// Only available when HSHM_ENABLE_CUDA or HSHM_ENABLE_ROCM is set +GpuShmMmap backend; +backend.shm_init(backend_id, url, data_capacity); +``` + +**Memory Layout:** +``` +POSIX SHM File: [4KB backend header | 4KB shared header | Data...] +Virtual Memory: [4KB private header | 4KB shared header | Data...] +``` + +**Characteristics:** +- Creates POSIX shared memory object (`shm_open`) +- Maps with combined private/shared access (`MapMixedMemory`) +- Registers memory with GPU via `GpuApi::RegisterHostMemory()` +- GPU can access the memory directly without explicit transfers +- Supports `shm_attach()` for other processes to join +- Enforces minimum 1MB backend size +- Conditionally compiled: `#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM` + +**Key Difference from GpuMalloc:** +- Memory lives on the host (CPU) but is GPU-accessible +- Inherently shareable via POSIX shared memory (no IPC handle needed) +- Better for data that both CPU and GPU need to access + +## GPU Compatibility + +### GpuApi + +The `GpuApi` class provides an abstraction over CUDA and ROCm: + +| Method | Description | +|--------|-------------| +| `GpuApi::Malloc(size)` | Allocate GPU memory | +| `GpuApi::Free(ptr)` | Free GPU memory | +| `GpuApi::Memcpy(dst, src, size, kind)` | Copy memory between host/device | +| `GpuApi::RegisterHostMemory(ptr, size)` | Register host memory for GPU access | +| `GpuApi::UnregisterHostMemory(ptr)` | Unregister host memory | +| `GpuApi::GetIpcMemHandle(ptr)` | Get IPC handle for GPU memory sharing | + +### Conditional Compilation + +GPU backends are only compiled when CUDA or ROCm is enabled: + +```cpp +#if HSHM_ENABLE_CUDA || HSHM_ENABLE_ROCM + // GPU-specific code +#endif + +#if HSHM_IS_HOST + // Host-only operations (initialization, IPC setup) +#endif + +#if HSHM_IS_GPU + // GPU kernel operations +#endif +``` + +## Related Documentation + +- [Allocator Guide](./allocator_guide) - Allocators that manage memory from these backends +- [Data Structures Guide](../types/data_structures_guide) - Data structures that use these allocators diff --git a/sidebars.ts b/sidebars.ts index 0f7f511..9f2dc38 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -63,7 +63,6 @@ const sidebars: SidebarsConfig = { items: [ 'api/python', 'api/agents', - 'api/storage', ], }, 'faq',