Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/_runner-xdna2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0

---
name: _runner-xdna2

"on":
workflow_call:
inputs:
pytest-marker:
required: true
type: string
docker-image:
required: false
type: string
default: "deeploy-xdna:local"

jobs:
test-runner-xdna2:
runs-on: xdna2-npu
# NOTE: We cannot use the `container:` directive here because
# GitHub Actions does not support `--device` flags required for
# NPU access (/dev/accel/accel0). Instead we use explicit
# `docker run` commands.
steps:
- name: Fix workspace permissions
shell: bash
run: |
docker run --rm \
-v "${{ github.workspace }}":/workspace \
${{ inputs.docker-image }} \
chown -R $(id -u):$(id -g) /workspace || true

- name: Checkout Repo
uses: actions/checkout@v4
with:
submodules: recursive

- name: Run Tests in Docker
shell: bash
run: |
docker run --rm \
--device /dev/accel/accel0 \
--ulimit memlock=-1 \
-v /opt/xilinx:/opt/xilinx \
-v "${{ github.workspace }}":/app/Deeploy \
-w /app/Deeploy \
${{ inputs.docker-image }} \
bash -c "
pip install -e . &&
cd DeeployTest &&
pytest test_platforms.py -v -m 'xdna2 and ${{ inputs.pytest-marker }}'
"
31 changes: 31 additions & 0 deletions .github/workflows/ci-platform-xdna2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0

---
name: CI • XDNA2

"on":
push:
branches:
- "**"
tags:
- "v*.*.*"
pull_request:
workflow_dispatch:
inputs:
docker_image:
description: "XDNA2 Docker image (must be pre-built on the runner)"
required: false
default: "deeploy-xdna:local"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
xdna2-kernels:
uses: ./.github/workflows/_runner-xdna2.yml
with:
pytest-marker: "kernels"
docker-image: ${{ inputs.docker_image || 'deeploy-xdna:local' }}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@ CHANGELOG_GEN.md
# Container Artifacts
.pyusbip/
.cache/

# Claude context file
CLAUDE.md
Container/xrt-debs/
17 changes: 17 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ elseif(platform STREQUAL SoftHier)
message(STATUS "Building for platform 'SoftHier'")
elseif(platform STREQUAL Chimera)
message(STATUS "Building for platform 'Chimera'")
elseif(platform STREQUAL XDNA2)
message(STATUS "Building for platform 'XDNA2'")
else()
message(FATAL_ERROR "Invalid platform '${platform}' specified!")
endif()
Expand Down Expand Up @@ -299,5 +301,20 @@ if(platform STREQUAL Chimera)

endif()

if(platform STREQUAL XDNA2)

project(${TESTNAME} LANGUAGES CXX)

message(STATUS "============================= XDNA2 Configuration ============================")
message(STATUS "[cMake ] GENERATED_SOURCE = " ${GENERATED_SOURCE})
message(STATUS "[cMake ] TESTNAME = " ${TESTNAME})
message(STATUS "==============================================================================")
message(STATUS "")

add_subdirectory(TargetLibraries/XDNA2)
add_subdirectory(DeeployTest/Platforms/XDNA2)

endif()


print_simulation_config()
52 changes: 52 additions & 0 deletions Container/Dockerfile.deeploy-xdna
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0

FROM ubuntu:24.04

ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV PIP_BREAK_SYSTEM_PACKAGES=1
ENV LLVM_INSTALL_DIR="nope"

RUN apt-get update && apt-get install -y \
software-properties-common \
&& add-apt-repository -y ppa:amd-team/xrt \
&& apt-get update && apt-get install -y \
cmake \
ninja-build \
g++ \
git \
git-lfs \
python3 \
python3-pip \
python-is-python3 \
uuid-dev \
wget \
curl \
ccache \
libxrt2 \
libxrt-npu2 \
libxrt-dev \
libxrt-utils \
libxrt-utils-npu \
&& rm -rf /var/lib/apt/lists/*

ENV XILINX_XRT=/opt/xilinx/xrt
ENV PATH=${XILINX_XRT}/bin:${PATH}
ENV LD_LIBRARY_PATH=${XILINX_XRT}/lib


WORKDIR /app
COPY pyproject.toml requirements-xdna.txt ./
RUN pip install toml-to-requirements && \
toml-to-req --toml-file pyproject.toml && \
pip install -r requirements.txt && \
pip install -r requirements-xdna.txt && \
rm -f requirements.txt pyproject.toml requirements-xdna.txt

ENV MLIR_AIE_PYTHON=/usr/bin/python3

WORKDIR /app/Deeploy
201 changes: 201 additions & 0 deletions Deeploy/MLIRDataTypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0
"""Base classes for MLIR-emitting node templates and code transformations.

This module provides:

* :class:`MLIRNodeTemplate` — a :class:`NodeTemplate` subclass whose
``emit()`` method populates an ``mlir.ir.Module`` instead of rendering C.
* :class:`MLIRExecutionBlock` — MLIR-specific execution state replacing the
C-oriented :class:`ExecutionBlock` (code-snippet deque) with MLIR builder
state (tile references, ObjectFifo handles, tiling parameters).
* :class:`MLIRCodeTransformationPass` — base class for MLIR code
transformation passes that operate on an :class:`MLIRExecutionBlock`.
* :class:`MLIRCodeTransformation` — two-phase pass container
(``devicePasses`` + ``runtimeSequencePasses``) that the deployer
orchestrates inside ``@aie_d.device`` and ``@aiex_d.runtime_sequence``
regions respectively.

All classes are intentionally dialect-agnostic so that future MLIR-based
backends (NVGPU, Linalg, …) can reuse them.
"""

from __future__ import annotations

from abc import abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

from Deeploy.DeeployTypes import NodeTemplate

if TYPE_CHECKING:
from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation

# ======================================================================
# MLIRExecutionBlock
# ======================================================================


class MLIRExecutionBlock:
"""MLIR-specific execution state for a single operator.

Replaces the C-oriented :class:`ExecutionBlock` (which holds a deque of
:class:`CodeSnippet` objects) with fields that carry MLIR builder state
through the code-transformation pipeline.

Passes populate fields progressively:

1. The deployer sets ``computeTile``, ``shimTile``,
``operatorRepresentation``, and ``patternMemoryConstraint``.
2. A device-phase pass (e.g. ``MLIRObjectFifoPass``) fills
``fifoMap``, ``fifoTypes``, ``tileSize``, ``numTiles``,
``kernelFuncName``, and ``kernelObjFile``.
3. The deployer sets ``runtimeSequenceArgs`` before the runtime-
sequence phase.
4. A runtime-sequence pass (e.g. ``MLIRRuntimeSequencePass``) reads
all of the above to emit DMA configuration.
"""

def __init__(self, computeTile: Any = None, shimTile: Any = None) -> None:
# MLIR tile references (set by deployer)
self.computeTile: Any = computeTile
self.shimTile: Any = shimTile

# Operator metadata (set by deployer from parser)
self.operatorRepresentation: OperatorRepresentation = {}

# Tiling constraint from midend solver (may be None)
self.patternMemoryConstraint: Any = None

# Populated by device-phase passes (e.g. MLIRObjectFifoPass)
self.fifoMap: Dict[str, str] = {} # tensor name → FIFO name
self.fifoTypes: Dict[str, Any] = {} # tensor name → MemRefType
self.tileSize: int = 0
self.numTiles: int = 0
self.numElements: int = 0
self.kernelFuncName: Optional[str] = None
self.kernelObjFile: Optional[str] = None

# The MLIRNodeTemplate for this node (set by deployer, called by
# MLIRComputeCorePass to emit the kernel call inside the core block)
self.template: Optional[Any] = None

# Set by deployer before runtime-sequence phase
self.runtimeSequenceArgs: List[Any] = []

# Input / output tensor name lists (set by deployer from parser)
self.inputNames: List[str] = []
self.outputNames: List[str] = []


# ======================================================================
# MLIRCodeTransformationPass / MLIRCodeTransformation
# ======================================================================


class MLIRCodeTransformationPass:
"""Base class for passes that transform an :class:`MLIRExecutionBlock`.

Subclasses override :meth:`apply` to read / mutate the block's fields
and optionally emit MLIR operations into the current insertion point.
"""

def apply(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
return ctxt, mlirBlock


class MLIRCodeTransformation:
"""Two-phase pass container for MLIR code transformations.

*devicePasses* run inside an ``@aie_d.device(...)`` region (ObjectFifo
creation, external-kernel declarations, …).

*runtimeSequencePasses* run inside an ``@aiex_d.runtime_sequence``
block (DMA configuration, token await, …).

The deployer calls :meth:`applyDevicePasses` and
:meth:`applyRuntimeSequencePasses` at the appropriate points.
"""

def __init__(self,
devicePasses: Optional[List[MLIRCodeTransformationPass]] = None,
runtimeSequencePasses: Optional[List[MLIRCodeTransformationPass]] = None) -> None:
self.devicePasses: List[MLIRCodeTransformationPass] = devicePasses or []
self.runtimeSequencePasses: List[MLIRCodeTransformationPass] = runtimeSequencePasses or []

def applyDevicePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
for _pass in self.devicePasses:
ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
return ctxt, mlirBlock

def applyRuntimeSequencePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
for _pass in self.runtimeSequencePasses:
ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
return ctxt, mlirBlock


# ======================================================================
# MLIRNodeTemplate
# ======================================================================


class MLIRNodeTemplate(NodeTemplate):
"""NodeTemplate subclass that emits MLIR instead of C code.

Subclasses must override :meth:`emit` to add dialect operations to an
``mlir.ir.Module`` (or region / insertion point provided via *kwargs*).

``generate()`` is overridden as a convenience that constructs a
standalone module, calls :meth:`emit`, and returns the MLIR text.
The base-class ``alignToContext`` / ``hoistTransientBuffers`` hooks are
retained and work unchanged.
"""

def __init__(self):
# Empty Mako template — no C code is generated.
super().__init__("")

# ------------------------------------------------------------------
# Subclass API
# ------------------------------------------------------------------

@abstractmethod
def emit(self, operatorRepresentation: OperatorRepresentation, **kwargs) -> None:
"""Populate an MLIR module with the operations for this node.

The caller (typically the deployer) sets up an ``mlir.ir.Module``
with the appropriate device wrapper and passes dialect-specific
context through *kwargs* (e.g. insertion point, tile references,
ObjectFifo handles).

Parameters
----------
operatorRepresentation : OperatorRepresentation
The parser's node representation (buffer names, sizes, types …).
**kwargs
Dialect-specific context provided by the deployer.
"""
...

# ------------------------------------------------------------------
# NodeTemplate overrides
# ------------------------------------------------------------------

def generate(self, operatorRepresentation = {}, **kwargs) -> str:
"""Generate an MLIR string for this node.

This default implementation is a thin wrapper: it delegates to
:meth:`emit`. Deployers that need to build a single module from
multiple nodes should call :meth:`emit` directly with the shared
module context and then stringify the complete module themselves.

Returns
-------
str
MLIR text (printable module or fragment).
"""
self.emit(operatorRepresentation, **kwargs)
return ""
Loading
Loading