diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index e4abf423..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 0950c051..c7686a89 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ + +# Generated files +samples/benchmarks/plots/**/*.csv +samples/benchmarks/plots/**/*.pdf + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -8,7 +13,8 @@ __pycache__/ # DS STore -*.DS_STORE +.DS_Store +**/.DS_Store # .idea @@ -33,6 +39,10 @@ share/python-wheels/ *.egg MANIFEST +# SDK +/tests/csl_runtime/cerebras-sdk/ +/tests/csl_runtime/cerebras-sdk.tar.gz + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. diff --git a/LICENSE b/LICENSE index 22f678fa..e956fbce 100644 --- a/LICENSE +++ b/LICENSE @@ -2,6 +2,7 @@ BSD 3-Clause License Copyright (c) 2026, Lawrence Livermore National Security, LLC Copyright (c) 2026, SPCL, ETH Zurich +Copyright (c) 2026, Noéda AG All rights reserved. diff --git a/README.md b/README.md index 7a11e478..b40ff77a 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ For full details, see the paper: Clone the repository and install the package: ```bash -git clone https://github.com/glukas/spatialstencil.git -cd spatialstencil +git clone https://github.com/glukas/spada.git +cd spada pip install -e . ``` @@ -64,7 +64,7 @@ Key options: To compile a GT4Py stencil file to SPADA IR (`.spst` and `.sptl`): ```bash -python -m spatialstencil.cli.gt4py_to_spatial samples/stencils.py 128,128,80 output/ --function-name laplacian +python -m spada.cli.gt4py_to_spatial samples/stencils.py 128,128,80 output/ --function-name laplacian ``` Arguments in order: `input_file`, `domain_size` (comma-separated `x,y,z`), `output_dir`. Omitting `--function-name` compiles all stencils in the file. @@ -76,13 +76,13 @@ The resulting `.sptl` file can then be passed to `sptlc`. After compiling with `cslc` (invoked automatically by `sptlc` unless `--generate-only` is set), run the kernel via the Cerebras `cs_python` launcher: ```bash -cs_python spatialstencil/runtime/runtime.py output/ in_field.npy +cs_python spada/runtime/runtime.py output/ in_field.npy ``` Alternatively, use the `Program` class directly from Python (must be run with `cs_python`): ```python -from spatialstencil.runtime.runtime import Program +from spada.runtime.runtime import Program import numpy as np program = Program("output/") @@ -183,23 +183,26 @@ brew install lima qemu lima-additional-guestagents # one-time tests/csl_runtime/run-in-lima.sh --sdk-url ``` -This creates the Lima VM on first use (~5–10 min), downloads and extracts the SDK to `tests/csl_runtime/cerebras-sdk/`, installs Python dependencies inside the VM, and runs the full test suite. Other modes: +This creates the Lima VM on first use (~5–10 min), downloads and extracts the SDK to `tests/csl_runtime/cerebras-sdk/`, installs Python dependencies inside the VM, and runs the full test suite. +If the SDK tarball is already downloaded or extracted, use `--sdk /path/to/cs_sdk` instead of `--sdk-url`. + +Other modes: ```bash # Run a single test -tests/csl_runtime/run-in-lima.sh --sdk-url --test test_add.sh +tests/csl_runtime/run-in-lima.sh --sdk --test test_add.sh # Verify the SDK toolchain only -tests/csl_runtime/run-in-lima.sh --sdk-url --check +tests/csl_runtime/run-in-lima.sh --sdk --check # Run the Cerebras SDK smoke test -tests/csl_runtime/run-in-lima.sh --sdk-url --smoke /path/to/csl-extras-* +tests/csl_runtime/run-in-lima.sh --sdk --smoke /path/to/csl-extras-* # Drop into an interactive shell inside the VM -tests/csl_runtime/run-in-lima.sh --sdk-url --shell +tests/csl_runtime/run-in-lima.sh --sdk --shell ``` -If the SDK tarball is already downloaded or extracted, use `--sdk /path/to/cs_sdk` instead of `--sdk-url`. The repository must reside under `$HOME` (Lima mounts the Mac home directory by default). The Lima configuration is in `tests/csl_runtime/lima-ubuntu-x86_64.yaml`. + The repository must reside under `$HOME` (Lima mounts the Mac home directory by default). The Lima configuration is in `tests/csl_runtime/lima-ubuntu-x86_64.yaml`. **Cleanup** generated test artifacts: @@ -214,7 +217,7 @@ make -C tests/csl_runtime clean-sdk # also remove the downloaded SDK Questions, discussions, and feedback are welcome via GitHub Issues: -- **Bug reports and feature requests**: [GitHub Issues](https://github.com/glukas/spatialstencil/issues) +- **Bug reports and feature requests**: [GitHub Issues](https://github.com/glukas/spada/issues) --- @@ -227,9 +230,9 @@ Contributions are welcome. Please follow these steps: 3. **Write tests** for any new functionality. Tests live in `tests/` and are organized by subsystem (`stencil_ir/`, `spatial_ir/`, `placement/`, `gt4py/`, `csl_runtime/`). 4. **Format** your code with `black` and `isort`, and verify with `flake8`: ```bash - black spatialstencil tests - isort spatialstencil tests - flake8 spatialstencil tests + black spada tests + isort spada tests + flake8 spada tests ``` 5. **Run tests**: see the [Testing](#testing) section for Python unit tests and CSL runtime tests. 6. **Open a pull request** against `main` with a clear description of the change and its motivation. diff --git a/irspec/docs/index.md b/irspec/docs/index.md index 000ea345..32c5b651 100644 --- a/irspec/docs/index.md +++ b/irspec/docs/index.md @@ -1,17 +1,17 @@ -# Welcome to MkDocs +# SPADA — Multi-Level Spatial IR Specification -For full documentation visit [mkdocs.org](https://www.mkdocs.org). +SPADA is a programming language and compiler for spatial dataflow architectures such as the [Cerebras Wafer-Scale Engine](https://www.cerebras.net/). It provides precise control over data placement, communication streams, and asynchronous execution while abstracting architecture-specific routing details. -## Commands +This site documents the three intermediate representations (IRs) used in the SPADA compilation pipeline: -* `mkdocs new [dir-name]` - Create a new project. -* `mkdocs serve` - Start the live-reloading docs server. -* `mkdocs build` - Build the documentation site. -* `mkdocs -h` - Print help message and exit. +| IR | Input | Output | +|---|---|---| +| **Stencil IR** | GT4Py stencil definitions | Spatial IR | +| **Spatial IR** | Stencil IR / hand-written SPADA kernels | Dataflow Task IR | +| **Dataflow Task IR** | Spatial IR | Cerebras CSL | + +For full details on the SPADA language, compiler, and hardware results, see: + +> Lukas Gianinazzi, Tal Ben-Nun, Torsten Hoefler. *SPADA: A Spatial Dataflow Architecture Programming Language.* arXiv:2511.09447, 2025. -## Project layout - mkdocs.yml # The configuration file. - docs/ - index.md # The documentation homepage. - ... # Other markdown pages, images and other files. diff --git a/irspec/mkdocs.yml b/irspec/mkdocs.yml index 76ffdc4c..9df7e611 100644 --- a/irspec/mkdocs.yml +++ b/irspec/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: Multi-Level Spatial IR +site_name: Spatial Dataflow Abstraction (SpaDA) site_url: http://localhost:8000 theme: material nav: diff --git a/samples/benchmarks/bench_hardware.sh b/samples/benchmarks/bench_hardware.sh index e98ef780..bfad55d5 100755 --- a/samples/benchmarks/bench_hardware.sh +++ b/samples/benchmarks/bench_hardware.sh @@ -9,7 +9,7 @@ BLUE='\033[0;34m' NC='\033[0m' BENCHMARK_DIR="samples/benchmarks" -RUNTIME="spatialstencil/runtime/runtime.py" +RUNTIME="spada/runtime/runtime.py" OUTPUT_DIR="benchmark_results" mkdir $OUTPUT_DIR diff --git a/samples/benchmarks/laplacian_4_4_4_test.sptl b/samples/benchmarks/laplacian_4_4_4_test.sptl deleted file mode 100644 index 738adef8..00000000 --- a/samples/benchmarks/laplacian_4_4_4_test.sptl +++ /dev/null @@ -1,1600 +0,0 @@ -kernel @laplacian<>(stream[6, 6] readonly _in_field, stream[4, 4] writeonly __kernel_out_0) { - place u16 i#3, u16 j#3 in [0:1:2 , 1:5:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#3, u16 j#3 in [0:1:2 , 2:5:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#1, u16 j#1 in [0:1:2 , 0:1:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#1, u16 j#1 in [5:6:2 , 0:1:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#1, u16 j#1 in [1:5:2 , 0:1:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#1, u16 j#1 in [2:5:2 , 0:1:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#2, u16 j#2 in [0:1:2 , 5:6:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#2, u16 j#2 in [5:6:2 , 5:6:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#2, u16 j#2 in [1:5:2 , 5:6:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#2, u16 j#2 in [2:5:2 , 5:6:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#4, u16 j#4 in [5:6:2 , 4:5:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [4:5:2 , 4:5:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#4, u16 j#4 in [5:6:2 , 2:4:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#4, u16 j#4 in [5:6:2 , 3:4:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i#4, u16 j#4 in [5:6:2 , 1:2:2] { - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [1:2:2 , 2:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [1:2:2 , 3:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [4:5:2 , 1:2:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [2:4:2 , 4:5:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [3:4:2 , 4:5:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [1:2:2 , 4:5:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [2:4:2 , 2:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [2:4:2 , 3:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [3:4:2 , 2:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [3:4:2 , 3:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [4:5:2 , 2:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [4:5:2 , 3:4:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [2:4:2 , 1:2:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [3:4:2 , 1:2:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - place u16 i, u16 j in [1:2:2 , 1:2:2] { - f32[4] out_field_0_0_0 - f32[4] in_field_0_0_0 - f32[4] out_field_0_0_0#1 - f32[4] _temp_0_0_0 - f32[4] _temp_0_0_0#1 - f32[4] _temp_0_0_0#2 - f32[4] _temp_0_0_0#3 - f32[4] _temp_0_0_0#4 - } - dataflow u16 i#11, u16 j#11 in [0:1:2 , 1:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - } - dataflow u16 i#11, u16 j#11 in [0:1:2 , 2:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - } - dataflow u16 i#13, u16 j#13 in [1:5:2 , 0:1:2] { - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - } - dataflow u16 i#13, u16 j#13 in [2:5:2 , 0:1:2] { - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - } - dataflow u16 i#14, u16 j#14 in [1:5:2 , 5:6:2] { - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - } - dataflow u16 i#14, u16 j#14 in [2:5:2 , 5:6:2] { - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - } - dataflow u16 i#15, u16 j#15 in [5:6:2 , 4:5:2] { - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [4:5:2 , 4:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#15, u16 j#15 in [5:6:2 , 2:4:2] { - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#15, u16 j#15 in [5:6:2 , 3:4:2] { - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#15, u16 j#15 in [5:6:2 , 1:2:2] { - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [1:2:2 , 2:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [1:2:2 , 3:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [4:5:2 , 1:2:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [2:4:2 , 4:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [3:4:2 , 4:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [1:2:2 , 4:5:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [2:4:2 , 2:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [2:4:2 , 3:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [3:4:2 , 2:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [3:4:2 , 3:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [4:5:2 , 2:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [4:5:2 , 3:4:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [2:4:2 , 1:2:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [3:4:2 , 1:2:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - dataflow u16 i#12, u16 j#12 in [1:2:2 , 1:2:2] { - stream _stream_in_field#4 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 0 - } - stream _stream_in_field#5 = relative_stream(1, 0) { - hops = [(1, 0)], - channel = 1 - } - stream _stream_in_field#6 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 2 - } - stream _stream_in_field#7 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = 3 - } - stream _stream_in_field#8 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 4 - } - stream _stream_in_field#9 = relative_stream(0, -1) { - hops = [(0, -1)], - channel = 5 - } - stream _stream_in_field#10 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 6 - } - stream _stream_in_field#11 = relative_stream(-1, 0) { - hops = [(-1, 0)], - channel = 7 - } - } - compute u16 i#8, u16 j#8 in [0:1:2 , 1:5:2] { - await receive(in_field_0_0_0, _in_field[i#8, j#8]) - awaitall - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - } - compute u16 i#8, u16 j#8 in [0:1:2 , 2:5:2] { - await receive(in_field_0_0_0, _in_field[i#8, j#8]) - awaitall - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - } - compute u16 i#6, u16 j#6 in [0:1:2 , 0:1:2] { - await receive(in_field_0_0_0, _in_field[i#6, j#6]) - awaitall - } - compute u16 i#6, u16 j#6 in [5:6:2 , 0:1:2] { - await receive(in_field_0_0_0, _in_field[i#6, j#6]) - awaitall - } - compute u16 i#6, u16 j#6 in [1:5:2 , 0:1:2] { - await receive(in_field_0_0_0, _in_field[i#6, j#6]) - awaitall - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - } - compute u16 i#6, u16 j#6 in [2:5:2 , 0:1:2] { - await receive(in_field_0_0_0, _in_field[i#6, j#6]) - awaitall - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - } - compute u16 i#7, u16 j#7 in [0:1:2 , 5:6:2] { - await receive(in_field_0_0_0, _in_field[i#7, j#7]) - awaitall - } - compute u16 i#7, u16 j#7 in [5:6:2 , 5:6:2] { - await receive(in_field_0_0_0, _in_field[i#7, j#7]) - awaitall - } - compute u16 i#7, u16 j#7 in [1:5:2 , 5:6:2] { - await receive(in_field_0_0_0, _in_field[i#7, j#7]) - awaitall - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - } - compute u16 i#7, u16 j#7 in [2:5:2 , 5:6:2] { - await receive(in_field_0_0_0, _in_field[i#7, j#7]) - awaitall - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - } - compute u16 i#9, u16 j#9 in [5:6:2 , 4:5:2] { - await receive(in_field_0_0_0, _in_field[i#9, j#9]) - awaitall - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - } - compute u16 i#5, u16 j#5 in [4:5:2 , 4:5:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = x#1 - } - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = x#3 - } - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#9, u16 j#9 in [5:6:2 , 2:4:2] { - await receive(in_field_0_0_0, _in_field[i#9, j#9]) - awaitall - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - } - compute u16 i#9, u16 j#9 in [5:6:2 , 3:4:2] { - await receive(in_field_0_0_0, _in_field[i#9, j#9]) - awaitall - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - } - compute u16 i#9, u16 j#9 in [5:6:2 , 1:2:2] { - await receive(in_field_0_0_0, _in_field[i#9, j#9]) - awaitall - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - } - compute u16 i#5, u16 j#5 in [1:2:2 , 2:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [1:2:2 , 3:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [4:5:2 , 1:2:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = x#1 - } - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = x#2 - } - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [2:4:2 , 4:5:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = x#3 - } - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [3:4:2 , 4:5:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = x#3 - } - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [1:2:2 , 4:5:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = x#3 - } - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [2:4:2 , 2:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [2:4:2 , 3:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [3:4:2 , 2:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [3:4:2 , 3:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [4:5:2 , 2:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = x#1 - } - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#8) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#9) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#7) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#6) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [4:5:2 , 3:4:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = x#1 - } - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = (_temp_0_0_0#1 + x#2) - } - completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#8) - await _send_comp#2 - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [2:4:2 , 1:2:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#10) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#11) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#5) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = x#2 - } - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [3:4:2 , 1:2:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - completion _send_comp = send(in_field_0_0_0, _stream_in_field#10) - await _send_comp - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = x#2 - } - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } - compute u16 i#5, u16 j#5 in [1:2:2 , 1:2:2] { - await receive(in_field_0_0_0, _in_field[i#5, j#5]) - awaitall - completion _recv_comp = foreach i32 k, f32 x in [0:4:1], receive(_stream_in_field#11) { - _temp_0_0_0[k] = x - } - await _recv_comp - completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:4:1], receive(_stream_in_field#4) { - _temp_0_0_0#1[k#1] = (_temp_0_0_0 + x#1) - } - completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#5) - await _send_comp#1 - await _recv_comp#1 - completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:4:1], receive(_stream_in_field#9) { - _temp_0_0_0#2[k#2] = x#2 - } - await _recv_comp#2 - completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:4:1], receive(_stream_in_field#6) { - _temp_0_0_0#3[k#3] = (_temp_0_0_0#2 + x#3) - } - completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#7) - await _send_comp#3 - await _recv_comp#3 - await map i32 k#4 in [0:4:1] { - _temp_0_0_0#4[k#4] = (-4.0 * in_field_0_0_0[k#4]) - } - await map i32 k#5 in [0:4:1] { - out_field_0_0_0#1[k#5] = (_temp_0_0_0#4[k#5] + _temp_0_0_0#3[k#5]) - } - await map i32 k#6 in [0:4:1] { - out_field_0_0_0[k#6] = out_field_0_0_0#1[k#6] - } - awaitall - await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) - } -} \ No newline at end of file diff --git a/samples/benchmarks/laplacian_746_990_320.sptl b/samples/benchmarks/laplacian_746_990_320.sptl new file mode 100644 index 00000000..9ef12acc --- /dev/null +++ b/samples/benchmarks/laplacian_746_990_320.sptl @@ -0,0 +1,1600 @@ +kernel @laplacian<>(stream[748, 992] readonly _in_field, stream[746, 990] writeonly __kernel_out_0) { + place u16 i#3, u16 j#3 in [0:1:2 , 1:991:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#1 + f32[320] _temp_0_0_0 + f32[320] _temp_0_0_0#1 + f32[320] _temp_0_0_0#2 + f32[320] _temp_0_0_0#3 + f32[320] _temp_0_0_0#4 + } + place u16 i#3, u16 j#3 in [0:1:2 , 2:991:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#1 + f32[320] _temp_0_0_0 + f32[320] _temp_0_0_0#1 + f32[320] _temp_0_0_0#2 + f32[320] _temp_0_0_0#3 + f32[320] _temp_0_0_0#4 + } + place u16 i#1, u16 j#1 in [0:1:2 , 0:1:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#2 + f32[320] _temp_0_0_0#5 + f32[320] _temp_0_0_0#6 + f32[320] _temp_0_0_0#7 + f32[320] _temp_0_0_0#8 + f32[320] _temp_0_0_0#9 + } + place u16 i#2, u16 j#2 in [0:1:2 , 991:992:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#3 + f32[320] _temp_0_0_0#10 + f32[320] _temp_0_0_0#11 + f32[320] _temp_0_0_0#12 + f32[320] _temp_0_0_0#13 + f32[320] _temp_0_0_0#14 + } + place u16 i#1, u16 j#1 in [747:748:2 , 0:1:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#5 + f32[320] _temp_0_0_0#20 + f32[320] _temp_0_0_0#21 + f32[320] _temp_0_0_0#22 + f32[320] _temp_0_0_0#23 + f32[320] _temp_0_0_0#24 + } + place u16 i#1, u16 j#1 in [1:747:2 , 0:1:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#6 + f32[320] _temp_0_0_0#25 + f32[320] _temp_0_0_0#26 + f32[320] _temp_0_0_0#27 + f32[320] _temp_0_0_0#28 + f32[320] _temp_0_0_0#29 + } + place u16 i#1, u16 j#1 in [2:747:2 , 0:1:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#6 + f32[320] _temp_0_0_0#25 + f32[320] _temp_0_0_0#26 + f32[320] _temp_0_0_0#27 + f32[320] _temp_0_0_0#28 + f32[320] _temp_0_0_0#29 + } + place u16 i#2, u16 j#2 in [747:748:2 , 991:992:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#7 + f32[320] _temp_0_0_0#30 + f32[320] _temp_0_0_0#31 + f32[320] _temp_0_0_0#32 + f32[320] _temp_0_0_0#33 + f32[320] _temp_0_0_0#34 + } + place u16 i#2, u16 j#2 in [1:747:2 , 991:992:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#8 + f32[320] _temp_0_0_0#35 + f32[320] _temp_0_0_0#36 + f32[320] _temp_0_0_0#37 + f32[320] _temp_0_0_0#38 + f32[320] _temp_0_0_0#39 + } + place u16 i#2, u16 j#2 in [2:747:2 , 991:992:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#8 + f32[320] _temp_0_0_0#35 + f32[320] _temp_0_0_0#36 + f32[320] _temp_0_0_0#37 + f32[320] _temp_0_0_0#38 + f32[320] _temp_0_0_0#39 + } + place u16 i#4, u16 j#4 in [747:748:2 , 2:991:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#4 + f32[320] _temp_0_0_0#15 + f32[320] _temp_0_0_0#16 + f32[320] _temp_0_0_0#17 + f32[320] _temp_0_0_0#18 + f32[320] _temp_0_0_0#19 + } + place u16 i#4, u16 j#4 in [747:748:2 , 3:991:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#4 + f32[320] _temp_0_0_0#15 + f32[320] _temp_0_0_0#16 + f32[320] _temp_0_0_0#17 + f32[320] _temp_0_0_0#18 + f32[320] _temp_0_0_0#19 + } + place u16 i, u16 j in [1:2:2 , 1:2:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#9 + f32[320] _temp_0_0_0#40 + f32[320] _temp_0_0_0#41 + f32[320] _temp_0_0_0#42 + f32[320] _temp_0_0_0#43 + f32[320] _temp_0_0_0#44 + } + place u16 i#4, u16 j#4 in [747:748:2 , 1:2:2] { + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#10 + f32[320] _temp_0_0_0#45 + f32[320] _temp_0_0_0#46 + f32[320] _temp_0_0_0#47 + f32[320] _temp_0_0_0#48 + f32[320] _temp_0_0_0#49 + } + place u16 i, u16 j in [1:2:2 , 990:991:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#14 + f32[320] _temp_0_0_0#65 + f32[320] _temp_0_0_0#66 + f32[320] _temp_0_0_0#67 + f32[320] _temp_0_0_0#68 + f32[320] _temp_0_0_0#69 + } + place u16 i, u16 j in [746:747:2 , 2:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#13 + f32[320] _temp_0_0_0#60 + f32[320] _temp_0_0_0#61 + f32[320] _temp_0_0_0#62 + f32[320] _temp_0_0_0#63 + f32[320] _temp_0_0_0#64 + } + place u16 i, u16 j in [746:747:2 , 3:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#13 + f32[320] _temp_0_0_0#60 + f32[320] _temp_0_0_0#61 + f32[320] _temp_0_0_0#62 + f32[320] _temp_0_0_0#63 + f32[320] _temp_0_0_0#64 + } + place u16 i, u16 j in [2:746:2 , 1:2:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#12 + f32[320] _temp_0_0_0#55 + f32[320] _temp_0_0_0#56 + f32[320] _temp_0_0_0#57 + f32[320] _temp_0_0_0#58 + f32[320] _temp_0_0_0#59 + } + place u16 i, u16 j in [3:746:2 , 1:2:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#12 + f32[320] _temp_0_0_0#55 + f32[320] _temp_0_0_0#56 + f32[320] _temp_0_0_0#57 + f32[320] _temp_0_0_0#58 + f32[320] _temp_0_0_0#59 + } + place u16 i, u16 j in [746:747:2 , 1:2:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#11 + f32[320] _temp_0_0_0#50 + f32[320] _temp_0_0_0#51 + f32[320] _temp_0_0_0#52 + f32[320] _temp_0_0_0#53 + f32[320] _temp_0_0_0#54 + } + place u16 i, u16 j in [746:747:2 , 990:991:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#18 + f32[320] _temp_0_0_0#85 + f32[320] _temp_0_0_0#86 + f32[320] _temp_0_0_0#87 + f32[320] _temp_0_0_0#88 + f32[320] _temp_0_0_0#89 + } + place u16 i, u16 j in [2:746:2 , 990:991:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#17 + f32[320] _temp_0_0_0#80 + f32[320] _temp_0_0_0#81 + f32[320] _temp_0_0_0#82 + f32[320] _temp_0_0_0#83 + f32[320] _temp_0_0_0#84 + } + place u16 i, u16 j in [3:746:2 , 990:991:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#17 + f32[320] _temp_0_0_0#80 + f32[320] _temp_0_0_0#81 + f32[320] _temp_0_0_0#82 + f32[320] _temp_0_0_0#83 + f32[320] _temp_0_0_0#84 + } + place u16 i, u16 j in [2:746:2 , 2:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#16 + f32[320] _temp_0_0_0#75 + f32[320] _temp_0_0_0#76 + f32[320] _temp_0_0_0#77 + f32[320] _temp_0_0_0#78 + f32[320] _temp_0_0_0#79 + } + place u16 i, u16 j in [2:746:2 , 3:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#16 + f32[320] _temp_0_0_0#75 + f32[320] _temp_0_0_0#76 + f32[320] _temp_0_0_0#77 + f32[320] _temp_0_0_0#78 + f32[320] _temp_0_0_0#79 + } + place u16 i, u16 j in [3:746:2 , 2:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#16 + f32[320] _temp_0_0_0#75 + f32[320] _temp_0_0_0#76 + f32[320] _temp_0_0_0#77 + f32[320] _temp_0_0_0#78 + f32[320] _temp_0_0_0#79 + } + place u16 i, u16 j in [3:746:2 , 3:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#16 + f32[320] _temp_0_0_0#75 + f32[320] _temp_0_0_0#76 + f32[320] _temp_0_0_0#77 + f32[320] _temp_0_0_0#78 + f32[320] _temp_0_0_0#79 + } + place u16 i, u16 j in [1:2:2 , 2:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#15 + f32[320] _temp_0_0_0#70 + f32[320] _temp_0_0_0#71 + f32[320] _temp_0_0_0#72 + f32[320] _temp_0_0_0#73 + f32[320] _temp_0_0_0#74 + } + place u16 i, u16 j in [1:2:2 , 3:990:2] { + f32[320] out_field_0_0_0 + f32[320] in_field_0_0_0 + f32[320] out_field_0_0_0#15 + f32[320] _temp_0_0_0#70 + f32[320] _temp_0_0_0#71 + f32[320] _temp_0_0_0#72 + f32[320] _temp_0_0_0#73 + f32[320] _temp_0_0_0#74 + } + dataflow u16 i#11, u16 j#11 in [0:1:2 , 1:991:2] { + stream _stream_in_field#4 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 0 +} + stream _stream_in_field#5 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 1 +} + } + dataflow u16 i#11, u16 j#11 in [0:1:2 , 2:991:2] { + stream _stream_in_field#4 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 0 +} + stream _stream_in_field#5 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 1 +} + } + dataflow u16 i#15, u16 j#15 in [747:748:2 , 2:991:2] { + stream _stream_in_field#6 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 2 +} + stream _stream_in_field#7 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 3 +} + } + dataflow u16 i#15, u16 j#15 in [747:748:2 , 3:991:2] { + stream _stream_in_field#6 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 2 +} + stream _stream_in_field#7 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 3 +} + } + dataflow u16 i#12, u16 j#12 in [1:2:2 , 1:2:2] { + stream _stream_in_field#8 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 4 +} + stream _stream_in_field#9 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 5 +} + stream _stream_in_field#10 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 6 +} + stream _stream_in_field#11 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 7 +} + stream _stream_in_field#12 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 8 +} + stream _stream_in_field#13 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 9 +} + stream _stream_in_field#14 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 10 +} + stream _stream_in_field#15 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 11 +} + } + dataflow u16 i#13, u16 j#13 in [1:747:2 , 0:1:2] { + stream _stream_in_field#16 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 12 +} + stream _stream_in_field#17 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 13 +} + } + dataflow u16 i#13, u16 j#13 in [2:747:2 , 0:1:2] { + stream _stream_in_field#16 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 12 +} + stream _stream_in_field#17 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 13 +} + } + dataflow u16 i#14, u16 j#14 in [1:747:2 , 991:992:2] { + stream _stream_in_field#18 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 14 +} + stream _stream_in_field#19 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 15 +} + } + dataflow u16 i#14, u16 j#14 in [2:747:2 , 991:992:2] { + stream _stream_in_field#18 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 14 +} + stream _stream_in_field#19 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 15 +} + } + dataflow u16 i#15, u16 j#15 in [747:748:2 , 1:2:2] { + stream _stream_in_field#20 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 16 +} + stream _stream_in_field#21 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 17 +} + } + dataflow u16 i#12, u16 j#12 in [1:2:2 , 990:991:2] { + stream _stream_in_field#22 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 18 +} + stream _stream_in_field#23 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 19 +} + stream _stream_in_field#24 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 20 +} + stream _stream_in_field#25 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 21 +} + stream _stream_in_field#26 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 22 +} + stream _stream_in_field#27 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 23 +} + stream _stream_in_field#28 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 24 +} + stream _stream_in_field#29 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 25 +} + } + dataflow u16 i#12, u16 j#12 in [746:747:2 , 2:990:2] { + stream _stream_in_field#30 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 26 +} + stream _stream_in_field#31 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 27 +} + stream _stream_in_field#32 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_in_field#33 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_in_field#34 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 30 +} + stream _stream_in_field#35 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 31 +} + stream _stream_in_field#36 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 32 +} + stream _stream_in_field#37 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 33 +} + } + dataflow u16 i#12, u16 j#12 in [746:747:2 , 3:990:2] { + stream _stream_in_field#30 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 26 +} + stream _stream_in_field#31 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 27 +} + stream _stream_in_field#32 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_in_field#33 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_in_field#34 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 30 +} + stream _stream_in_field#35 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 31 +} + stream _stream_in_field#36 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 32 +} + stream _stream_in_field#37 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 33 +} + } + dataflow u16 i#12, u16 j#12 in [2:746:2 , 1:2:2] { + stream _stream_in_field#38 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 34 +} + stream _stream_in_field#39 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 35 +} + stream _stream_in_field#40 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 36 +} + stream _stream_in_field#41 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 37 +} + stream _stream_in_field#42 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 38 +} + stream _stream_in_field#43 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 39 +} + stream _stream_in_field#44 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 40 +} + stream _stream_in_field#45 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 41 +} + } + dataflow u16 i#12, u16 j#12 in [3:746:2 , 1:2:2] { + stream _stream_in_field#38 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 34 +} + stream _stream_in_field#39 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 35 +} + stream _stream_in_field#40 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 36 +} + stream _stream_in_field#41 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 37 +} + stream _stream_in_field#42 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 38 +} + stream _stream_in_field#43 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 39 +} + stream _stream_in_field#44 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 40 +} + stream _stream_in_field#45 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 41 +} + } + dataflow u16 i#12, u16 j#12 in [746:747:2 , 1:2:2] { + stream _stream_in_field#46 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 42 +} + stream _stream_in_field#47 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 43 +} + stream _stream_in_field#48 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 44 +} + stream _stream_in_field#49 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 45 +} + stream _stream_in_field#50 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 46 +} + stream _stream_in_field#51 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 47 +} + stream _stream_in_field#52 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 48 +} + stream _stream_in_field#53 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 49 +} + } + dataflow u16 i#12, u16 j#12 in [746:747:2 , 990:991:2] { + stream _stream_in_field#54 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 50 +} + stream _stream_in_field#55 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 51 +} + stream _stream_in_field#56 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 52 +} + stream _stream_in_field#57 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 53 +} + stream _stream_in_field#58 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 54 +} + stream _stream_in_field#59 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 55 +} + stream _stream_in_field#60 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 56 +} + stream _stream_in_field#61 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 57 +} + } + dataflow u16 i#12, u16 j#12 in [2:746:2 , 990:991:2] { + stream _stream_in_field#62 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 58 +} + stream _stream_in_field#63 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 59 +} + stream _stream_in_field#64 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 60 +} + stream _stream_in_field#65 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 61 +} + stream _stream_in_field#66 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 62 +} + stream _stream_in_field#67 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 63 +} + stream _stream_in_field#68 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 64 +} + stream _stream_in_field#69 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 65 +} + } + dataflow u16 i#12, u16 j#12 in [3:746:2 , 990:991:2] { + stream _stream_in_field#62 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 58 +} + stream _stream_in_field#63 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 59 +} + stream _stream_in_field#64 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 60 +} + stream _stream_in_field#65 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 61 +} + stream _stream_in_field#66 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 62 +} + stream _stream_in_field#67 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 63 +} + stream _stream_in_field#68 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 64 +} + stream _stream_in_field#69 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 65 +} + } + dataflow u16 i#12, u16 j#12 in [2:746:2 , 2:990:2] { + stream _stream_in_field#70 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 66 +} + stream _stream_in_field#71 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 67 +} + stream _stream_in_field#72 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 68 +} + stream _stream_in_field#73 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 69 +} + stream _stream_in_field#74 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 70 +} + stream _stream_in_field#75 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 71 +} + stream _stream_in_field#76 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 72 +} + stream _stream_in_field#77 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 73 +} + } + dataflow u16 i#12, u16 j#12 in [2:746:2 , 3:990:2] { + stream _stream_in_field#70 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 66 +} + stream _stream_in_field#71 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 67 +} + stream _stream_in_field#72 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 68 +} + stream _stream_in_field#73 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 69 +} + stream _stream_in_field#74 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 70 +} + stream _stream_in_field#75 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 71 +} + stream _stream_in_field#76 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 72 +} + stream _stream_in_field#77 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 73 +} + } + dataflow u16 i#12, u16 j#12 in [3:746:2 , 2:990:2] { + stream _stream_in_field#70 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 66 +} + stream _stream_in_field#71 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 67 +} + stream _stream_in_field#72 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 68 +} + stream _stream_in_field#73 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 69 +} + stream _stream_in_field#74 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 70 +} + stream _stream_in_field#75 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 71 +} + stream _stream_in_field#76 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 72 +} + stream _stream_in_field#77 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 73 +} + } + dataflow u16 i#12, u16 j#12 in [3:746:2 , 3:990:2] { + stream _stream_in_field#70 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 66 +} + stream _stream_in_field#71 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 67 +} + stream _stream_in_field#72 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 68 +} + stream _stream_in_field#73 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 69 +} + stream _stream_in_field#74 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 70 +} + stream _stream_in_field#75 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 71 +} + stream _stream_in_field#76 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 72 +} + stream _stream_in_field#77 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 73 +} + } + dataflow u16 i#12, u16 j#12 in [1:2:2 , 2:990:2] { + stream _stream_in_field#78 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 74 +} + stream _stream_in_field#79 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 75 +} + stream _stream_in_field#80 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 76 +} + stream _stream_in_field#81 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 77 +} + stream _stream_in_field#82 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 78 +} + stream _stream_in_field#83 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 79 +} + stream _stream_in_field#84 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 80 +} + stream _stream_in_field#85 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 81 +} + } + dataflow u16 i#12, u16 j#12 in [1:2:2 , 3:990:2] { + stream _stream_in_field#78 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 74 +} + stream _stream_in_field#79 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 75 +} + stream _stream_in_field#80 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 76 +} + stream _stream_in_field#81 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 77 +} + stream _stream_in_field#82 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 78 +} + stream _stream_in_field#83 = relative_stream(0, -1) { + hops = [(0, -1)], + channel = 79 +} + stream _stream_in_field#84 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 80 +} + stream _stream_in_field#85 = relative_stream(-1, 0) { + hops = [(-1, 0)], + channel = 81 +} + } + compute u16 i#8, u16 j#8 in [0:1:2 , 1:991:2] { + await receive(in_field_0_0_0, _in_field[i#8, j#8]) + awaitall + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) + await _send_comp#1 + awaitall + } + compute u16 i#8, u16 j#8 in [0:1:2 , 2:991:2] { + await receive(in_field_0_0_0, _in_field[i#8, j#8]) + awaitall + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#4) + await _send_comp#1 + awaitall + } + compute u16 i#6, u16 j#6 in [0:1:2 , 0:1:2] { + await receive(in_field_0_0_0, _in_field[i#6, j#6]) + awaitall + } + compute u16 i#7, u16 j#7 in [0:1:2 , 991:992:2] { + await receive(in_field_0_0_0, _in_field[i#7, j#7]) + awaitall + } + compute u16 i#6, u16 j#6 in [747:748:2 , 0:1:2] { + await receive(in_field_0_0_0, _in_field[i#6, j#6]) + awaitall + } + compute u16 i#6, u16 j#6 in [1:747:2 , 0:1:2] { + await receive(in_field_0_0_0, _in_field[i#6, j#6]) + awaitall + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#16) + await _send_comp#3 + awaitall + } + compute u16 i#6, u16 j#6 in [2:747:2 , 0:1:2] { + await receive(in_field_0_0_0, _in_field[i#6, j#6]) + awaitall + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#16) + await _send_comp#3 + awaitall + } + compute u16 i#7, u16 j#7 in [747:748:2 , 991:992:2] { + await receive(in_field_0_0_0, _in_field[i#7, j#7]) + awaitall + } + compute u16 i#7, u16 j#7 in [1:747:2 , 991:992:2] { + await receive(in_field_0_0_0, _in_field[i#7, j#7]) + awaitall + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#18) + await _send_comp#2 + awaitall + } + compute u16 i#7, u16 j#7 in [2:747:2 , 991:992:2] { + await receive(in_field_0_0_0, _in_field[i#7, j#7]) + awaitall + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#18) + await _send_comp#2 + awaitall + } + compute u16 i#9, u16 j#9 in [747:748:2 , 2:991:2] { + await receive(in_field_0_0_0, _in_field[i#9, j#9]) + awaitall + completion _send_comp = send(in_field_0_0_0, _stream_in_field#6) + await _send_comp + awaitall + } + compute u16 i#9, u16 j#9 in [747:748:2 , 3:991:2] { + await receive(in_field_0_0_0, _in_field[i#9, j#9]) + awaitall + completion _send_comp = send(in_field_0_0_0, _stream_in_field#6) + await _send_comp + awaitall + } + compute u16 i#5, u16 j#5 in [1:2:2 , 1:2:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#15) { + _temp_0_0_0#40[k] = x + } + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#8) { + _temp_0_0_0#41[k#1] = (_temp_0_0_0#40[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#9) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#13) { + _temp_0_0_0#42[k#2] = (_temp_0_0_0#41[k#2] + x#2) + } + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#10) { + _temp_0_0_0#43[k#3] = (_temp_0_0_0#42[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#11) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#44[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#9[k#5] = (_temp_0_0_0#44[k#5] + _temp_0_0_0#43[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#9[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#9, u16 j#9 in [747:748:2 , 1:2:2] { + await receive(in_field_0_0_0, _in_field[i#9, j#9]) + awaitall + completion _send_comp = send(in_field_0_0_0, _stream_in_field#20) + await _send_comp + awaitall + } + compute u16 i#5, u16 j#5 in [1:2:2 , 990:991:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#29) { + _temp_0_0_0#65[k] = x + } + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#22) { + _temp_0_0_0#66[k#1] = (_temp_0_0_0#65[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#23) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#26) { + _temp_0_0_0#67[k#2] = (_temp_0_0_0#66[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#27) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#25) { + _temp_0_0_0#68[k#3] = (_temp_0_0_0#67[k#3] + x#3) + } + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#69[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#14[k#5] = (_temp_0_0_0#69[k#5] + _temp_0_0_0#68[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#14[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [746:747:2 , 2:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#36) { + _temp_0_0_0#60[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#37) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#31) { + _temp_0_0_0#61[k#1] = (_temp_0_0_0#60[k#1] + x#1) + } + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#34) { + _temp_0_0_0#62[k#2] = (_temp_0_0_0#61[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#35) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#33) { + _temp_0_0_0#63[k#3] = (_temp_0_0_0#62[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#32) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#64[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#13[k#5] = (_temp_0_0_0#64[k#5] + _temp_0_0_0#63[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#13[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [746:747:2 , 3:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#36) { + _temp_0_0_0#60[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#37) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#31) { + _temp_0_0_0#61[k#1] = (_temp_0_0_0#60[k#1] + x#1) + } + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#35) { + _temp_0_0_0#62[k#2] = (_temp_0_0_0#61[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#34) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#32) { + _temp_0_0_0#63[k#3] = (_temp_0_0_0#62[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#33) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#64[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#13[k#5] = (_temp_0_0_0#64[k#5] + _temp_0_0_0#63[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#13[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [2:746:2 , 1:2:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#44) { + _temp_0_0_0#55[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#45) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#39) { + _temp_0_0_0#56[k#1] = (_temp_0_0_0#55[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#38) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#43) { + _temp_0_0_0#57[k#2] = (_temp_0_0_0#56[k#2] + x#2) + } + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#40) { + _temp_0_0_0#58[k#3] = (_temp_0_0_0#57[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#41) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#59[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#12[k#5] = (_temp_0_0_0#59[k#5] + _temp_0_0_0#58[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#12[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [3:746:2 , 1:2:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#45) { + _temp_0_0_0#55[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#44) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#38) { + _temp_0_0_0#56[k#1] = (_temp_0_0_0#55[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#39) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#43) { + _temp_0_0_0#57[k#2] = (_temp_0_0_0#56[k#2] + x#2) + } + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#40) { + _temp_0_0_0#58[k#3] = (_temp_0_0_0#57[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#41) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#59[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#12[k#5] = (_temp_0_0_0#59[k#5] + _temp_0_0_0#58[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#12[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [746:747:2 , 1:2:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#52) { + _temp_0_0_0#50[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#53) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#47) { + _temp_0_0_0#51[k#1] = (_temp_0_0_0#50[k#1] + x#1) + } + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#51) { + _temp_0_0_0#52[k#2] = (_temp_0_0_0#51[k#2] + x#2) + } + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#48) { + _temp_0_0_0#53[k#3] = (_temp_0_0_0#52[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#49) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#54[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#11[k#5] = (_temp_0_0_0#54[k#5] + _temp_0_0_0#53[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#11[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [746:747:2 , 990:991:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#60) { + _temp_0_0_0#85[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#61) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#55) { + _temp_0_0_0#86[k#1] = (_temp_0_0_0#85[k#1] + x#1) + } + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#58) { + _temp_0_0_0#87[k#2] = (_temp_0_0_0#86[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#59) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#57) { + _temp_0_0_0#88[k#3] = (_temp_0_0_0#87[k#3] + x#3) + } + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#89[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#18[k#5] = (_temp_0_0_0#89[k#5] + _temp_0_0_0#88[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#18[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [2:746:2 , 990:991:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#68) { + _temp_0_0_0#80[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#69) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#63) { + _temp_0_0_0#81[k#1] = (_temp_0_0_0#80[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#62) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#66) { + _temp_0_0_0#82[k#2] = (_temp_0_0_0#81[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#67) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#65) { + _temp_0_0_0#83[k#3] = (_temp_0_0_0#82[k#3] + x#3) + } + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#84[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#17[k#5] = (_temp_0_0_0#84[k#5] + _temp_0_0_0#83[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#17[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [3:746:2 , 990:991:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#69) { + _temp_0_0_0#80[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#68) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#62) { + _temp_0_0_0#81[k#1] = (_temp_0_0_0#80[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#63) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#66) { + _temp_0_0_0#82[k#2] = (_temp_0_0_0#81[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#67) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#65) { + _temp_0_0_0#83[k#3] = (_temp_0_0_0#82[k#3] + x#3) + } + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#84[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#17[k#5] = (_temp_0_0_0#84[k#5] + _temp_0_0_0#83[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#17[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [2:746:2 , 2:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#76) { + _temp_0_0_0#75[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#77) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#71) { + _temp_0_0_0#76[k#1] = (_temp_0_0_0#75[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#70) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#74) { + _temp_0_0_0#77[k#2] = (_temp_0_0_0#76[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#75) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#73) { + _temp_0_0_0#78[k#3] = (_temp_0_0_0#77[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#72) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#79[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#16[k#5] = (_temp_0_0_0#79[k#5] + _temp_0_0_0#78[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#16[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [2:746:2 , 3:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#76) { + _temp_0_0_0#75[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#77) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#71) { + _temp_0_0_0#76[k#1] = (_temp_0_0_0#75[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#70) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#75) { + _temp_0_0_0#77[k#2] = (_temp_0_0_0#76[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#74) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#72) { + _temp_0_0_0#78[k#3] = (_temp_0_0_0#77[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#73) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#79[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#16[k#5] = (_temp_0_0_0#79[k#5] + _temp_0_0_0#78[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#16[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [3:746:2 , 2:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#77) { + _temp_0_0_0#75[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#76) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#70) { + _temp_0_0_0#76[k#1] = (_temp_0_0_0#75[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#71) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#74) { + _temp_0_0_0#77[k#2] = (_temp_0_0_0#76[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#75) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#73) { + _temp_0_0_0#78[k#3] = (_temp_0_0_0#77[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#72) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#79[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#16[k#5] = (_temp_0_0_0#79[k#5] + _temp_0_0_0#78[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#16[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [3:746:2 , 3:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#77) { + _temp_0_0_0#75[k] = x + } + completion _send_comp = send(in_field_0_0_0, _stream_in_field#76) + await _send_comp + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#70) { + _temp_0_0_0#76[k#1] = (_temp_0_0_0#75[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#71) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#75) { + _temp_0_0_0#77[k#2] = (_temp_0_0_0#76[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#74) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#72) { + _temp_0_0_0#78[k#3] = (_temp_0_0_0#77[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#73) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#79[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#16[k#5] = (_temp_0_0_0#79[k#5] + _temp_0_0_0#78[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#16[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [1:2:2 , 2:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#85) { + _temp_0_0_0#70[k] = x + } + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#78) { + _temp_0_0_0#71[k#1] = (_temp_0_0_0#70[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#79) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#82) { + _temp_0_0_0#72[k#2] = (_temp_0_0_0#71[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#83) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#81) { + _temp_0_0_0#73[k#3] = (_temp_0_0_0#72[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#80) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#74[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#15[k#5] = (_temp_0_0_0#74[k#5] + _temp_0_0_0#73[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#15[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } + compute u16 i#5, u16 j#5 in [1:2:2 , 3:990:2] { + await receive(in_field_0_0_0, _in_field[i#5, j#5]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_in_field#85) { + _temp_0_0_0#70[k] = x + } + await _recv_comp + completion _recv_comp#1 = foreach i32 k#1, f32 x#1 in [0:320:1], receive(_stream_in_field#78) { + _temp_0_0_0#71[k#1] = (_temp_0_0_0#70[k#1] + x#1) + } + completion _send_comp#1 = send(in_field_0_0_0, _stream_in_field#79) + await _send_comp#1 + await _recv_comp#1 + completion _recv_comp#2 = foreach i32 k#2, f32 x#2 in [0:320:1], receive(_stream_in_field#83) { + _temp_0_0_0#72[k#2] = (_temp_0_0_0#71[k#2] + x#2) + } + completion _send_comp#2 = send(in_field_0_0_0, _stream_in_field#82) + await _send_comp#2 + await _recv_comp#2 + completion _recv_comp#3 = foreach i32 k#3, f32 x#3 in [0:320:1], receive(_stream_in_field#80) { + _temp_0_0_0#73[k#3] = (_temp_0_0_0#72[k#3] + x#3) + } + completion _send_comp#3 = send(in_field_0_0_0, _stream_in_field#81) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#4 in [0:320:1] { + _temp_0_0_0#74[k#4] = (-4.0 * in_field_0_0_0[k#4]) + } + await map i32 k#5 in [0:320:1] { + out_field_0_0_0#15[k#5] = (_temp_0_0_0#74[k#5] + _temp_0_0_0#73[k#5]) + } + await map i32 k#6 in [0:320:1] { + out_field_0_0_0[k#6] = out_field_0_0_0#15[k#6] + } + awaitall + await send(out_field_0_0_0, __kernel_out_0[(i#5 - 1), (j#5 - 1)]) + awaitall + } +} \ No newline at end of file diff --git a/samples/benchmarks/sweep_hardware.sh b/samples/benchmarks/sweep_hardware.sh index e0987f2f..8b222a99 100755 --- a/samples/benchmarks/sweep_hardware.sh +++ b/samples/benchmarks/sweep_hardware.sh @@ -7,7 +7,7 @@ BLUE='\033[0;34m' NC='\033[0m' BENCHMARK_DIR="samples/benchmarks" -RUNTIME="spatialstencil/runtime/runtime.py" +RUNTIME="spada/runtime/runtime.py" OUTPUT_DIR="${BM_OUTPUT_DIR:-benchmark_results}" mkdir $OUTPUT_DIR diff --git a/samples/benchmarks/uvbke_746_990_320.sptl b/samples/benchmarks/uvbke_746_990_320.sptl new file mode 100644 index 00000000..bd0ff33b --- /dev/null +++ b/samples/benchmarks/uvbke_746_990_320.sptl @@ -0,0 +1,1260 @@ +kernel @uvbke<>(stream[746, 991] readonly _arg0, stream[747, 990] readonly _arg1, stream[746, 990] readonly _arg2, stream[746, 990] readonly _arg3, stream[746, 990] writeonly __kernel_out_0, stream[746, 990] writeonly __kernel_out_1) { + place u16 i#2, u16 j#2 in [0:1:2 , 990:991:2] { + f32[320] arg1_0_0_0 + } + place u16 i#2, u16 j#2 in [0:1:2 , 1:990:2] { + f32[320] arg1_0_0_0 + } + place u16 i#2, u16 j#2 in [0:1:2 , 2:990:2] { + f32[320] arg1_0_0_0 + } + place u16 i, u16 j in [1:746:2 , 990:991:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#1 + f32[320] _temp_0_0_0#3 + f32[320] arg5_0_0_0#1 + f32[320] i16_0_0_0#2 + f32[320] _temp_0_0_0#1 + f32[320] i19_0_0_0#2 + f32[320] i21_0_0_0 + f32[320] _temp_0_0_0#4 + f32[320] arg4_0_0_0#1 + f32[320] i16_0_0_0#3 + f32[320] _temp_0_0_0#5 + f32[320] i19_0_0_0#3 + } + place u16 i, u16 j in [2:746:2 , 990:991:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#1 + f32[320] _temp_0_0_0#3 + f32[320] arg5_0_0_0#1 + f32[320] i16_0_0_0#2 + f32[320] _temp_0_0_0#1 + f32[320] i19_0_0_0#2 + f32[320] i21_0_0_0 + f32[320] _temp_0_0_0#4 + f32[320] arg4_0_0_0#1 + f32[320] i16_0_0_0#3 + f32[320] _temp_0_0_0#5 + f32[320] i19_0_0_0#3 + } + place u16 i, u16 j in [746:747:2 , 990:991:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#2 + f32[320] _temp_0_0_0#6 + f32[320] arg5_0_0_0#2 + f32[320] i16_0_0_0#4 + f32[320] _temp_0_0_0#7 + f32[320] i19_0_0_0#4 + f32[320] i21_0_0_0#3 + f32[320] _temp_0_0_0#8 + f32[320] arg4_0_0_0#2 + f32[320] i16_0_0_0#5 + f32[320] _temp_0_0_0#9 + f32[320] i19_0_0_0#5 + } + place u16 i, u16 j in [1:746:2 , 1:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#4 + f32[320] _temp_0_0_0#10 + f32[320] arg5_0_0_0#3 + f32[320] i16_0_0_0#6 + f32[320] _temp_0_0_0#11 + f32[320] i19_0_0_0#6 + f32[320] i21_0_0_0#5 + f32[320] _temp_0_0_0#12 + f32[320] arg4_0_0_0#3 + f32[320] i16_0_0_0#7 + f32[320] _temp_0_0_0#13 + f32[320] i19_0_0_0#7 + } + place u16 i, u16 j in [1:746:2 , 2:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#4 + f32[320] _temp_0_0_0#10 + f32[320] arg5_0_0_0#3 + f32[320] i16_0_0_0#6 + f32[320] _temp_0_0_0#11 + f32[320] i19_0_0_0#6 + f32[320] i21_0_0_0#5 + f32[320] _temp_0_0_0#12 + f32[320] arg4_0_0_0#3 + f32[320] i16_0_0_0#7 + f32[320] _temp_0_0_0#13 + f32[320] i19_0_0_0#7 + } + place u16 i, u16 j in [2:746:2 , 1:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#4 + f32[320] _temp_0_0_0#10 + f32[320] arg5_0_0_0#3 + f32[320] i16_0_0_0#6 + f32[320] _temp_0_0_0#11 + f32[320] i19_0_0_0#6 + f32[320] i21_0_0_0#5 + f32[320] _temp_0_0_0#12 + f32[320] arg4_0_0_0#3 + f32[320] i16_0_0_0#7 + f32[320] _temp_0_0_0#13 + f32[320] i19_0_0_0#7 + } + place u16 i, u16 j in [2:746:2 , 2:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#4 + f32[320] _temp_0_0_0#10 + f32[320] arg5_0_0_0#3 + f32[320] i16_0_0_0#6 + f32[320] _temp_0_0_0#11 + f32[320] i19_0_0_0#6 + f32[320] i21_0_0_0#5 + f32[320] _temp_0_0_0#12 + f32[320] arg4_0_0_0#3 + f32[320] i16_0_0_0#7 + f32[320] _temp_0_0_0#13 + f32[320] i19_0_0_0#7 + } + place u16 i, u16 j in [746:747:2 , 1:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#6 + f32[320] _temp_0_0_0#14 + f32[320] arg5_0_0_0#4 + f32[320] i16_0_0_0#8 + f32[320] _temp_0_0_0#15 + f32[320] i19_0_0_0#8 + f32[320] i21_0_0_0#7 + f32[320] _temp_0_0_0#16 + f32[320] arg4_0_0_0#4 + f32[320] i16_0_0_0#9 + f32[320] _temp_0_0_0#17 + f32[320] i19_0_0_0#9 + } + place u16 i, u16 j in [746:747:2 , 2:990:2] { + f32[320] arg2_0_0_0 + f32[320] arg3_0_0_0 + f32[320] arg4_0_0_0 + f32[320] arg5_0_0_0 + f32[320] arg0_0_0_0 + f32[320] arg1_0_0_0 + f32[320] i21_0_0_0#6 + f32[320] _temp_0_0_0#14 + f32[320] arg5_0_0_0#4 + f32[320] i16_0_0_0#8 + f32[320] _temp_0_0_0#15 + f32[320] i19_0_0_0#8 + f32[320] i21_0_0_0#7 + f32[320] _temp_0_0_0#16 + f32[320] arg4_0_0_0#4 + f32[320] i16_0_0_0#9 + f32[320] _temp_0_0_0#17 + f32[320] i19_0_0_0#9 + } + place u16 i#1, u16 j#1 in [1:747:2 , 0:1:2] { + f32[320] arg0_0_0_0 + } + place u16 i#1, u16 j#1 in [2:747:2 , 0:1:2] { + f32[320] arg0_0_0_0 + } + place u16 i#7, u16 j#7 in [747:748:2 , 990:991:2] { + f32[320] i16_0_0_0 + f32[320] _temp_0_0_0 + f32[320] i19_0_0_0#1 + } + place u16 i#8, u16 j#8 in [1:747:2 , 991:992:2] { + f32[320] i19_0_0_0 + f32[320] i16_0_0_0#1 + f32[320] _temp_0_0_0#2 + } + place u16 i#8, u16 j#8 in [2:747:2 , 991:992:2] { + f32[320] i19_0_0_0 + f32[320] i16_0_0_0#1 + f32[320] _temp_0_0_0#2 + } + place u16 i#7, u16 j#7 in [747:748:2 , 1:990:2] { + f32[320] i16_0_0_0 + f32[320] _temp_0_0_0 + f32[320] i19_0_0_0#1 + } + place u16 i#7, u16 j#7 in [747:748:2 , 2:990:2] { + f32[320] i16_0_0_0 + f32[320] _temp_0_0_0 + f32[320] i19_0_0_0#1 + } + dataflow u16 i#9, u16 j#9 in [0:1:2 , 990:991:2] { + stream _stream_arg1#2 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 0 +} + stream _stream_arg1#3 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 1 +} + stream _stream_arg1#4 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 2 +} + stream _stream_arg1#5 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 3 +} + } + dataflow u16 i#9, u16 j#9 in [0:1:2 , 1:990:2] { + stream _stream_arg1#6 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 4 +} + stream _stream_arg1#7 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 5 +} + stream _stream_arg1#8 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 6 +} + stream _stream_arg1#9 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 7 +} + } + dataflow u16 i#9, u16 j#9 in [0:1:2 , 2:990:2] { + stream _stream_arg1#6 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 4 +} + stream _stream_arg1#7 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 5 +} + stream _stream_arg1#8 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 6 +} + stream _stream_arg1#9 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 7 +} + } + dataflow u16 i#10, u16 j#10 in [1:746:2 , 990:991:2] { + stream _stream_arg1#10 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 8 +} + stream _stream_arg1#11 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 9 +} + stream _stream_arg0#2 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 10 +} + stream _stream_arg0#3 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 11 +} + stream _stream_arg0#4 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 12 +} + stream _stream_arg0#5 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 13 +} + stream _stream_arg1#12 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 14 +} + stream _stream_arg1#13 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 15 +} + } + dataflow u16 i#10, u16 j#10 in [2:746:2 , 990:991:2] { + stream _stream_arg1#10 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 8 +} + stream _stream_arg1#11 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 9 +} + stream _stream_arg0#2 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 10 +} + stream _stream_arg0#3 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 11 +} + stream _stream_arg0#4 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 12 +} + stream _stream_arg0#5 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 13 +} + stream _stream_arg1#12 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 14 +} + stream _stream_arg1#13 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 15 +} + } + dataflow u16 i#10, u16 j#10 in [746:747:2 , 990:991:2] { + stream _stream_arg1#14 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 16 +} + stream _stream_arg1#15 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 17 +} + stream _stream_arg0#6 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 18 +} + stream _stream_arg0#7 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 19 +} + stream _stream_arg0#8 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 20 +} + stream _stream_arg0#9 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 21 +} + stream _stream_arg1#16 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 22 +} + stream _stream_arg1#17 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 23 +} + } + dataflow u16 i#10, u16 j#10 in [1:746:2 , 1:990:2] { + stream _stream_arg1#18 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 24 +} + stream _stream_arg1#19 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 25 +} + stream _stream_arg0#10 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 26 +} + stream _stream_arg0#11 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 27 +} + stream _stream_arg0#12 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_arg0#13 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_arg1#20 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 30 +} + stream _stream_arg1#21 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 31 +} + } + dataflow u16 i#10, u16 j#10 in [1:746:2 , 2:990:2] { + stream _stream_arg1#18 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 24 +} + stream _stream_arg1#19 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 25 +} + stream _stream_arg0#10 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 26 +} + stream _stream_arg0#11 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 27 +} + stream _stream_arg0#12 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_arg0#13 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_arg1#20 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 30 +} + stream _stream_arg1#21 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 31 +} + } + dataflow u16 i#10, u16 j#10 in [2:746:2 , 1:990:2] { + stream _stream_arg1#18 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 24 +} + stream _stream_arg1#19 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 25 +} + stream _stream_arg0#10 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 26 +} + stream _stream_arg0#11 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 27 +} + stream _stream_arg0#12 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_arg0#13 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_arg1#20 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 30 +} + stream _stream_arg1#21 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 31 +} + } + dataflow u16 i#10, u16 j#10 in [2:746:2 , 2:990:2] { + stream _stream_arg1#18 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 24 +} + stream _stream_arg1#19 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 25 +} + stream _stream_arg0#10 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 26 +} + stream _stream_arg0#11 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 27 +} + stream _stream_arg0#12 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 28 +} + stream _stream_arg0#13 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 29 +} + stream _stream_arg1#20 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 30 +} + stream _stream_arg1#21 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 31 +} + } + dataflow u16 i#10, u16 j#10 in [746:747:2 , 1:990:2] { + stream _stream_arg1#22 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 32 +} + stream _stream_arg1#23 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 33 +} + stream _stream_arg0#14 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 34 +} + stream _stream_arg0#15 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 35 +} + stream _stream_arg0#16 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 36 +} + stream _stream_arg0#17 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 37 +} + stream _stream_arg1#24 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 38 +} + stream _stream_arg1#25 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 39 +} + } + dataflow u16 i#10, u16 j#10 in [746:747:2 , 2:990:2] { + stream _stream_arg1#22 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 32 +} + stream _stream_arg1#23 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 33 +} + stream _stream_arg0#14 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 34 +} + stream _stream_arg0#15 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 35 +} + stream _stream_arg0#16 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 36 +} + stream _stream_arg0#17 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 37 +} + stream _stream_arg1#24 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 38 +} + stream _stream_arg1#25 = relative_stream(1, 0) { + hops = [(1, 0)], + channel = 39 +} + } + dataflow u16 i#11, u16 j#11 in [1:747:2 , 0:1:2] { + stream _stream_arg0#18 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 40 +} + stream _stream_arg0#19 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 41 +} + stream _stream_arg0#20 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 42 +} + stream _stream_arg0#21 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 43 +} + } + dataflow u16 i#11, u16 j#11 in [2:747:2 , 0:1:2] { + stream _stream_arg0#18 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 40 +} + stream _stream_arg0#19 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 41 +} + stream _stream_arg0#20 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 42 +} + stream _stream_arg0#21 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = 43 +} + } + compute u16 i#5, u16 j#5 in [0:1:2 , 990:991:2] { + await receive(arg1_0_0_0, _arg1[i#5, j#5]) + awaitall + completion _send_comp = send(arg1_0_0_0, _stream_arg1#2) + await _send_comp + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#4) + await _send_comp#3 + awaitall + } + compute u16 i#5, u16 j#5 in [0:1:2 , 1:990:2] { + await receive(arg1_0_0_0, _arg1[i#5, j#5]) + awaitall + completion _send_comp = send(arg1_0_0_0, _stream_arg1#6) + await _send_comp + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#8) + await _send_comp#3 + awaitall + } + compute u16 i#5, u16 j#5 in [0:1:2 , 2:990:2] { + await receive(arg1_0_0_0, _arg1[i#5, j#5]) + awaitall + completion _send_comp = send(arg1_0_0_0, _stream_arg1#6) + await _send_comp + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#8) + await _send_comp#3 + awaitall + } + compute u16 i#3, u16 j#3 in [1:746:2 , 990:991:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#10) { + _temp_0_0_0#1[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#11) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#2[k#1] = (_temp_0_0_0#1[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#3) { + i19_0_0_0#2[k#2] = (arg0_0_0_0[k#2] + x#1) + } + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#4[k#3] = (i19_0_0_0#2[k#3] - i16_0_0_0#2[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0[k#4] = (112.5 * _temp_0_0_0#4[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#1[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#5) { + _temp_0_0_0#5[k#6] = (arg0_0_0_0[k#6] + x#2) + } + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#3[k#7] = (_temp_0_0_0#5[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#12) { + i19_0_0_0#3[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#13) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#3[k#9] = (i19_0_0_0#3[k#9] - i16_0_0_0#3[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#1[k#10] = (112.5 * _temp_0_0_0#3[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#1[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#1[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#1[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#1[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [2:746:2 , 990:991:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#11) { + _temp_0_0_0#1[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#10) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#2[k#1] = (_temp_0_0_0#1[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#3) { + i19_0_0_0#2[k#2] = (arg0_0_0_0[k#2] + x#1) + } + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#4[k#3] = (i19_0_0_0#2[k#3] - i16_0_0_0#2[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0[k#4] = (112.5 * _temp_0_0_0#4[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#1[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#5) { + _temp_0_0_0#5[k#6] = (arg0_0_0_0[k#6] + x#2) + } + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#3[k#7] = (_temp_0_0_0#5[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#13) { + i19_0_0_0#3[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#12) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#3[k#9] = (i19_0_0_0#3[k#9] - i16_0_0_0#3[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#1[k#10] = (112.5 * _temp_0_0_0#3[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#1[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#1[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#1[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#1[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [746:747:2 , 990:991:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#15) { + _temp_0_0_0#7[k] = (arg1_0_0_0[k] + x) + } + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#4[k#1] = (_temp_0_0_0#7[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#7) { + i19_0_0_0#4[k#2] = (arg0_0_0_0[k#2] + x#1) + } + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#8[k#3] = (i19_0_0_0#4[k#3] - i16_0_0_0#4[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#3[k#4] = (112.5 * _temp_0_0_0#8[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#2[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#3[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#9) { + _temp_0_0_0#9[k#6] = (arg0_0_0_0[k#6] + x#2) + } + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#5[k#7] = (_temp_0_0_0#9[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#17) { + i19_0_0_0#5[k#8] = (arg1_0_0_0[k#8] + x#3) + } + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#6[k#9] = (i19_0_0_0#5[k#9] - i16_0_0_0#5[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#2[k#10] = (112.5 * _temp_0_0_0#6[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#2[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#2[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#2[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#2[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [1:746:2 , 1:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#18) { + _temp_0_0_0#11[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#19) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#6[k#1] = (_temp_0_0_0#11[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#10) { + i19_0_0_0#6[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#11) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#12[k#3] = (i19_0_0_0#6[k#3] - i16_0_0_0#6[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#5[k#4] = (112.5 * _temp_0_0_0#12[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#3[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#5[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#12) { + _temp_0_0_0#13[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#13) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#7[k#7] = (_temp_0_0_0#13[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#20) { + i19_0_0_0#7[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#21) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#10[k#9] = (i19_0_0_0#7[k#9] - i16_0_0_0#7[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#4[k#10] = (112.5 * _temp_0_0_0#10[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#3[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#4[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#3[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#3[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [1:746:2 , 2:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#18) { + _temp_0_0_0#11[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#19) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#6[k#1] = (_temp_0_0_0#11[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#11) { + i19_0_0_0#6[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#10) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#12[k#3] = (i19_0_0_0#6[k#3] - i16_0_0_0#6[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#5[k#4] = (112.5 * _temp_0_0_0#12[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#3[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#5[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#13) { + _temp_0_0_0#13[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#12) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#7[k#7] = (_temp_0_0_0#13[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#20) { + i19_0_0_0#7[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#21) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#10[k#9] = (i19_0_0_0#7[k#9] - i16_0_0_0#7[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#4[k#10] = (112.5 * _temp_0_0_0#10[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#3[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#4[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#3[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#3[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [2:746:2 , 1:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#19) { + _temp_0_0_0#11[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#18) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#6[k#1] = (_temp_0_0_0#11[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#10) { + i19_0_0_0#6[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#11) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#12[k#3] = (i19_0_0_0#6[k#3] - i16_0_0_0#6[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#5[k#4] = (112.5 * _temp_0_0_0#12[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#3[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#5[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#12) { + _temp_0_0_0#13[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#13) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#7[k#7] = (_temp_0_0_0#13[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#21) { + i19_0_0_0#7[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#20) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#10[k#9] = (i19_0_0_0#7[k#9] - i16_0_0_0#7[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#4[k#10] = (112.5 * _temp_0_0_0#10[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#3[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#4[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#3[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#3[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [2:746:2 , 2:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#19) { + _temp_0_0_0#11[k] = (arg1_0_0_0[k] + x) + } + completion _send_comp = send(arg1_0_0_0, _stream_arg1#18) + await _send_comp + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#6[k#1] = (_temp_0_0_0#11[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#11) { + i19_0_0_0#6[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#10) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#12[k#3] = (i19_0_0_0#6[k#3] - i16_0_0_0#6[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#5[k#4] = (112.5 * _temp_0_0_0#12[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#3[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#5[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#13) { + _temp_0_0_0#13[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#12) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#7[k#7] = (_temp_0_0_0#13[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#21) { + i19_0_0_0#7[k#8] = (arg1_0_0_0[k#8] + x#3) + } + completion _send_comp#3 = send(arg1_0_0_0, _stream_arg1#20) + await _send_comp#3 + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#10[k#9] = (i19_0_0_0#7[k#9] - i16_0_0_0#7[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#4[k#10] = (112.5 * _temp_0_0_0#10[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#3[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#4[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#3[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#3[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [746:747:2 , 1:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#23) { + _temp_0_0_0#15[k] = (arg1_0_0_0[k] + x) + } + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#8[k#1] = (_temp_0_0_0#15[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#14) { + i19_0_0_0#8[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#15) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#16[k#3] = (i19_0_0_0#8[k#3] - i16_0_0_0#8[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#7[k#4] = (112.5 * _temp_0_0_0#16[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#4[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#7[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#16) { + _temp_0_0_0#17[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#17) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#9[k#7] = (_temp_0_0_0#17[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#25) { + i19_0_0_0#9[k#8] = (arg1_0_0_0[k#8] + x#3) + } + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#14[k#9] = (i19_0_0_0#9[k#9] - i16_0_0_0#9[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#6[k#10] = (112.5 * _temp_0_0_0#14[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#4[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#6[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#4[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#4[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#3, u16 j#3 in [746:747:2 , 2:990:2] { + await receive(arg2_0_0_0, _arg2[i#3, j#3]) + await receive(arg3_0_0_0, _arg3[i#3, j#3]) + await receive(arg0_0_0_0, _arg0[i#3, j#3]) + await receive(arg1_0_0_0, _arg1[i#3, j#3]) + awaitall + completion _recv_comp = foreach i32 k, f32 x in [0:320:1], receive(_stream_arg1#23) { + _temp_0_0_0#15[k] = (arg1_0_0_0[k] + x) + } + await _recv_comp + await map i32 k#1 in [0:320:1] { + i16_0_0_0#8[k#1] = (_temp_0_0_0#15[k#1] * arg2_0_0_0[k#1]) + } + completion _recv_comp#1 = foreach i32 k#2, f32 x#1 in [0:320:1], receive(_stream_arg0#15) { + i19_0_0_0#8[k#2] = (arg0_0_0_0[k#2] + x#1) + } + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#14) + await _send_comp#1 + await _recv_comp#1 + await map i32 k#3 in [0:320:1] { + _temp_0_0_0#16[k#3] = (i19_0_0_0#8[k#3] - i16_0_0_0#8[k#3]) + } + await map i32 k#4 in [0:320:1] { + i21_0_0_0#7[k#4] = (112.5 * _temp_0_0_0#16[k#4]) + } + await map i32 k#5 in [0:320:1] { + arg4_0_0_0#4[k#5] = (arg3_0_0_0[k#5] * i21_0_0_0#7[k#5]) + } + completion _recv_comp#2 = foreach i32 k#6, f32 x#2 in [0:320:1], receive(_stream_arg0#17) { + _temp_0_0_0#17[k#6] = (arg0_0_0_0[k#6] + x#2) + } + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#16) + await _send_comp#2 + await _recv_comp#2 + await map i32 k#7 in [0:320:1] { + i16_0_0_0#9[k#7] = (_temp_0_0_0#17[k#7] * arg2_0_0_0[k#7]) + } + completion _recv_comp#3 = foreach i32 k#8, f32 x#3 in [0:320:1], receive(_stream_arg1#25) { + i19_0_0_0#9[k#8] = (arg1_0_0_0[k#8] + x#3) + } + await _recv_comp#3 + await map i32 k#9 in [0:320:1] { + _temp_0_0_0#14[k#9] = (i19_0_0_0#9[k#9] - i16_0_0_0#9[k#9]) + } + await map i32 k#10 in [0:320:1] { + i21_0_0_0#6[k#10] = (112.5 * _temp_0_0_0#14[k#10]) + } + await map i32 k#11 in [0:320:1] { + arg5_0_0_0#4[k#11] = (arg3_0_0_0[k#11] * i21_0_0_0#6[k#11]) + } + await map i32 k#12 in [0:320:1] { + arg4_0_0_0[k#12] = arg4_0_0_0#4[k#12] + } + await map i32 k#13 in [0:320:1] { + arg5_0_0_0[k#13] = arg5_0_0_0#4[k#13] + } + awaitall + await send(arg4_0_0_0, __kernel_out_0[(i#3 - 1), (j#3 - 1)]) + awaitall + await send(arg5_0_0_0, __kernel_out_1[(i#3 - 1), (j#3 - 1)]) + awaitall + } + compute u16 i#4, u16 j#4 in [1:747:2 , 0:1:2] { + await receive(arg0_0_0_0, _arg0[i#4, j#4]) + awaitall + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#18) + await _send_comp#1 + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#20) + await _send_comp#2 + awaitall + } + compute u16 i#4, u16 j#4 in [2:747:2 , 0:1:2] { + await receive(arg0_0_0_0, _arg0[i#4, j#4]) + awaitall + completion _send_comp#1 = send(arg0_0_0_0, _stream_arg0#18) + await _send_comp#1 + completion _send_comp#2 = send(arg0_0_0_0, _stream_arg0#20) + await _send_comp#2 + awaitall + } + compute u16 i, u16 j in [747:748:2 , 990:991:2] { + + } + compute u16 i, u16 j in [0:1:2 , 991:992:2] { + + } + compute u16 i, u16 j in [747:748:2 , 991:992:2] { + + } + compute u16 i, u16 j in [1:747:2 , 991:992:2] { + + } + compute u16 i, u16 j in [2:747:2 , 991:992:2] { + + } + compute u16 i, u16 j in [0:1:2 , 0:1:2] { + + } + compute u16 i, u16 j in [747:748:2 , 0:1:2] { + + } + compute u16 i, u16 j in [747:748:2 , 1:990:2] { + + } + compute u16 i, u16 j in [747:748:2 , 2:990:2] { + + } +} \ No newline at end of file diff --git a/scripts/examples.py b/scripts/examples.py index 195265d2..cfc8bd6c 100644 --- a/scripts/examples.py +++ b/scripts/examples.py @@ -1,8 +1,8 @@ import numpy as np import igraph as ig -from spatialstencil.placement.domain import FieldDomain -from spatialstencil.placement.stencil import Stencil, StencilDirection -from spatialstencil.placement.graph import StencilGraph +from spada.placement.domain import FieldDomain +from spada.placement.stencil import Stencil, StencilDirection +from spada.placement.graph import StencilGraph def horizontal_diffusion(): """ diff --git a/scripts/generate_benchmarks.sh b/scripts/generate_benchmarks.sh index 979a51fb..df6c6291 100755 --- a/scripts/generate_benchmarks.sh +++ b/scripts/generate_benchmarks.sh @@ -1,6 +1,6 @@ #!/bin/bash -#python ./spatialstencil/cli/gt4py_to_spatial.py ./samples/gt4py_test_instances.py 4,4,4 ./samples/tests -python ./spatialstencil/cli/gt4py_to_spatial.py ./samples/stencils.py 4,4,4 ./samples/benchmarks -python ./spatialstencil/cli/gt4py_to_spatial.py ./samples/stencils.py 16,16,4 ./samples/benchmarks -python ./spatialstencil/cli/gt4py_to_spatial.py ./samples/stencils.py 128,128,80 ./samples/benchmarks -python ./spatialstencil/cli/gt4py_to_spatial.py ./samples/stencils.py 512,512,80 ./samples/benchmarks \ No newline at end of file +#python ./spada/cli/gt4py_to_spatial.py ./samples/gt4py_test_instances.py 4,4,4 ./samples/tests +python ./spada/cli/gt4py_to_spatial.py ./samples/stencils.py 4,4,4 ./samples/benchmarks +python ./spada/cli/gt4py_to_spatial.py ./samples/stencils.py 16,16,4 ./samples/benchmarks +python ./spada/cli/gt4py_to_spatial.py ./samples/stencils.py 128,128,80 ./samples/benchmarks +python ./spada/cli/gt4py_to_spatial.py ./samples/stencils.py 512,512,80 ./samples/benchmarks \ No newline at end of file diff --git a/scripts/placement_demo.py b/scripts/placement_demo.py index 44a99f84..5a334422 100644 --- a/scripts/placement_demo.py +++ b/scripts/placement_demo.py @@ -5,12 +5,12 @@ from numpy.typing import NDArray from scripts import examples -from spatialstencil.placement.graph import Stencil, StencilDirection, FieldDomain, StencilGraph -from spatialstencil.placement.placed_graph import PlacedStencilGraph -from spatialstencil.placement.mla import linearize_with_ck -from spatialstencil.placement.model import CostModel, PlacementCost -from spatialstencil.placement.optimizer import best_of_k_placement -from spatialstencil.placement.partition import FieldPartition +from spada.placement.graph import Stencil, StencilDirection, FieldDomain, StencilGraph +from spada.placement.placed_graph import PlacedStencilGraph +from spada.placement.mla import linearize_with_ck +from spada.placement.model import CostModel, PlacementCost +from spada.placement.optimizer import best_of_k_placement +from spada.placement.partition import FieldPartition def demo_graph(): diff --git a/setup.py b/setup.py index 7953d2cb..dc897eeb 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Setup script for spatialstencil package.""" +"""Setup script for spada package.""" from setuptools import setup, find_packages import os @@ -11,28 +11,28 @@ def read_readme(): if os.path.exists(readme_path): with open(readme_path, 'r', encoding='utf-8') as f: return f.read() - return "A spatial stencil compiler for high-performance computing." + return "A SpaDA compiler for high-performance computing." # Read version from package def get_version(): - """Get version from spatialstencil package.""" + """Get version from spada package.""" try: - import spatialstencil - return spatialstencil.__version__ + import spada + return spada.__version__ except (ImportError, AttributeError): return "0.1.0" setup( - name="spatialstencil", + name="spada", version=get_version(), - author="SpatialStencil Team", + author="SpaDA Team", author_email="", - description="A spatial stencil compiler for high-performance computing", + description="A SpaDA compiler for high-performance computing", long_description=read_readme(), long_description_content_type="text/markdown", - url="https://github.com/glukas/spatialstencil", + url="https://github.com/glukas/spada", packages=find_packages(), classifiers=[ "Development Status :: 3 - Alpha", @@ -86,11 +86,11 @@ def get_version(): ], }, entry_points={ - "console_scripts": ["sptlc=spatialstencil.cli.compiler:compile_spatial_ir"], + "console_scripts": ["sptlc=spada.cli.compiler:compile_spatial_ir"], }, include_package_data=True, package_data={ - "spatialstencil": ["**/*.py", "assets/csl/sync/*.csl"], + "spada": ["**/*.py", "assets/csl/sync/*.csl"], }, keywords=[ "stencil", diff --git a/spatialstencil/__init__.py b/spada/__init__.py similarity index 100% rename from spatialstencil/__init__.py rename to spada/__init__.py diff --git a/spatialstencil/cli/__init__.py b/spada/cli/__init__.py similarity index 100% rename from spatialstencil/cli/__init__.py rename to spada/cli/__init__.py diff --git a/spatialstencil/cli/compiler.py b/spada/cli/compiler.py similarity index 93% rename from spatialstencil/cli/compiler.py rename to spada/cli/compiler.py index 24c851b0..7bb19a37 100644 --- a/spatialstencil/cli/compiler.py +++ b/spada/cli/compiler.py @@ -1,10 +1,10 @@ import click import itertools import os -from spatialstencil.lowering import spatial_ir_to_csl as s2c -from spatialstencil.syntax.spatial_ir import parser, passes, analysis, irnodes as spa, canonicalization -from spatialstencil.syntax.csl import constants as csl -from spatialstencil.syntax.common import serialization +from spada.lowering import spatial_ir_to_csl as s2c +from spada.syntax.spatial_ir import parser, passes, analysis, irnodes as spa, canonicalization +from spada.syntax.csl import constants as csl +from spada.syntax.common import serialization import subprocess @@ -16,7 +16,6 @@ @click.option('--offset-y', '-y', default=0, type=int, help='Offset for rectangular region in y direction') @click.option('--generate-only', '-g', is_flag=True, help='Only generate the output files without compiling them') @click.option('--disable-benchmarking', is_flag=True, help='Disable benchmarking code generation (and memory overhead)') -@click.option('--sync-benchmarking', is_flag=True, help='Generate sync-assisted benchmarking support') @click.option('--disable-asynchronous', is_flag=True, help='Disable asynchronous task code generation') @click.option('--disable-dsd', is_flag=True, help='Disable DSD operation detection and code generation') @click.option('--disable-map', is_flag=True, help='Disable @map operation detection and code generation') @@ -24,12 +23,9 @@ @click.option('--disable-task-recycling', is_flag=True, help='Disable task ID recycling') @click.option('--disable-copy-elision', is_flag=True, help='Disable copy elimination optimization pass') def compile_spatial_ir(input_file: str, output_folder: str, param: list[str], offset_x: int, offset_y: int, - generate_only: bool, disable_benchmarking: bool, sync_benchmarking: bool, + generate_only: bool, disable_benchmarking: bool, disable_asynchronous: bool, disable_dsd: bool, disable_map: bool, disable_task_fusion: bool, disable_task_recycling: bool, disable_copy_elision: bool): - if disable_benchmarking and sync_benchmarking: - raise click.UsageError("--sync-benchmarking cannot be used with --disable-benchmarking") - # Parse parameters into dictionary kernel_parameters = {} for p in param: @@ -87,14 +83,13 @@ def compile_spatial_ir(input_file: str, output_folder: str, param: list[str], of using_memcpy_mode = False if disable_map: - from spatialstencil.syntax.csl import statements + from spada.syntax.csl import statements statements.DISABLE_MAPS = True # Lower the spatial IR to CSL csl_files = s2c.lower_spatial_ir_to_csl( kernel, disable_benchmarking=disable_benchmarking, - sync_benchmarking=sync_benchmarking, disable_asynchronous=disable_asynchronous, disable_dsd=disable_dsd, task_fusion=not disable_task_fusion, diff --git a/spatialstencil/cli/count_flop.py b/spada/cli/count_flop.py similarity index 96% rename from spatialstencil/cli/count_flop.py rename to spada/cli/count_flop.py index 654c2f1d..eda576a4 100644 --- a/spatialstencil/cli/count_flop.py +++ b/spada/cli/count_flop.py @@ -1,8 +1,8 @@ import sys import os from pathlib import Path -from spatialstencil.syntax.stencil_ir.parser import Parser -from spatialstencil.syntax.stencil_ir.flop_counter import FLOPCounter +from spada.syntax.stencil_ir.parser import Parser +from spada.syntax.stencil_ir.flop_counter import FLOPCounter def find_spst_files(directory: str) -> list[Path]: @@ -60,7 +60,7 @@ def analyze_file(filepath: Path, parser: Parser, counter: FLOPCounter) -> tuple[ def print_header(): """Print a nice header for the analysis.""" print("=" * 80) - print(" " * 20 + "FLOP Analysis for Spatial Stencil Programs") + print(" " * 20 + "FLOP Analysis for SpaDA Programs") print("=" * 80) print() diff --git a/spatialstencil/cli/gt4py_to_spatial.py b/spada/cli/gt4py_to_spatial.py similarity index 95% rename from spatialstencil/cli/gt4py_to_spatial.py rename to spada/cli/gt4py_to_spatial.py index 4a7352d7..36279d53 100644 --- a/spatialstencil/cli/gt4py_to_spatial.py +++ b/spada/cli/gt4py_to_spatial.py @@ -3,10 +3,10 @@ import sys from pathlib import Path import traceback -from spatialstencil.syntax.gt4py import parser -from spatialstencil.lowering import gt4py_to_stencil_ir -from spatialstencil.lowering.stencil_to_spatial import lower_stencil_to_spatial -from spatialstencil.syntax.stencil_ir import type_inference +from spada.syntax.gt4py import parser +from spada.lowering import gt4py_to_stencil_ir +from spada.lowering.stencil_to_spatial import lower_stencil_to_spatial +from spada.syntax.stencil_ir import type_inference def parse_domain_size(domain_str): """Parse domain_size string in format 'x,y,z' and return tuple of ints.""" diff --git a/spatialstencil/lowering/__init__.py b/spada/lowering/__init__.py similarity index 100% rename from spatialstencil/lowering/__init__.py rename to spada/lowering/__init__.py diff --git a/spatialstencil/lowering/gt4py_to_stencil_ir.py b/spada/lowering/gt4py_to_stencil_ir.py similarity index 98% rename from spatialstencil/lowering/gt4py_to_stencil_ir.py rename to spada/lowering/gt4py_to_stencil_ir.py index 184eb94e..c1a8c5d9 100644 --- a/spatialstencil/lowering/gt4py_to_stencil_ir.py +++ b/spada/lowering/gt4py_to_stencil_ir.py @@ -1,10 +1,10 @@ import ast from collections import defaultdict import copy -from spatialstencil.syntax.gt4py import astnodes as gtast -from spatialstencil.syntax.common.find_and_replace import PyASTFindReplace -from spatialstencil.syntax.stencil_ir import irnodes as sast, type_inference -from spatialstencil.syntax.stencil_ir.ssa import SSAVisitor +from spada.syntax.gt4py import astnodes as gtast +from spada.syntax.common.find_and_replace import PyASTFindReplace +from spada.syntax.stencil_ir import irnodes as sast, type_inference +from spada.syntax.stencil_ir.ssa import SSAVisitor def lower_gt4py_to_stencil_ir(program: gtast.GTProgram, @@ -532,10 +532,10 @@ def visit_Call(self, node: ast.Call) -> None: if __name__ == '__main__': import sys - from spatialstencil.syntax.gt4py import parser + from spada.syntax.gt4py import parser if len(sys.argv) not in (2, 3): - print('USAGE: python -m spatialstencil.lowering.gt4py_to_stencil_ir [FUNCTION NAME]') + print('USAGE: python -m spada.lowering.gt4py_to_stencil_ir [FUNCTION NAME]') exit(1) out = parser.parse_file(sys.argv[1]) diff --git a/spatialstencil/lowering/spatial_ir_to_csl.py b/spada/lowering/spatial_ir_to_csl.py similarity index 89% rename from spatialstencil/lowering/spatial_ir_to_csl.py rename to spada/lowering/spatial_ir_to_csl.py index 1cc7a194..3c36712c 100644 --- a/spatialstencil/lowering/spatial_ir_to_csl.py +++ b/spada/lowering/spatial_ir_to_csl.py @@ -3,22 +3,21 @@ """ from collections import defaultdict -from contextlib import nullcontext import copy import functools from io import StringIO import textwrap -from spatialstencil.syntax.common.types import BIT_WIDTH -from spatialstencil.syntax.spatial_ir import irnodes as spir, canonicalization, analysis, passes -from spatialstencil.syntax.spatial_ir import copy_elimination -from spatialstencil.syntax.spatial_ir import canonical_subgrids -from spatialstencil.syntax.spatial_ir.canonicalization import PEBlock, Rectangle -from spatialstencil.syntax.csl import constants as csl, preprocessing, tasks as tdag, statements as cslstmt, dsd_ops -from spatialstencil.syntax.csl import benchmarking as cslbench -from spatialstencil.syntax.csl import structures as cslstruct -from spatialstencil.syntax.csl import task_recycling, prune_unused_fields as csl_pruning -from spatialstencil.syntax.csl.codefile import CodeFile -from spatialstencil.syntax.csl.statements import name_to_csl, dtype_as_csl, expr_to_csl +from spada.syntax.common.types import BIT_WIDTH +from spada.syntax.spatial_ir import irnodes as spir, canonicalization, analysis, passes +from spada.syntax.spatial_ir import copy_elimination +from spada.syntax.spatial_ir import canonical_subgrids +from spada.syntax.spatial_ir.canonicalization import PEBlock, Rectangle +from spada.syntax.csl import constants as csl, preprocessing, tasks as tdag, statements as cslstmt, dsd_ops +from spada.syntax.csl import benchmarking as cslbench +from spada.syntax.csl import structures as cslstruct +from spada.syntax.csl import task_recycling, prune_unused_fields as csl_pruning +from spada.syntax.csl.codefile import CodeFile +from spada.syntax.csl.statements import name_to_csl, dtype_as_csl, expr_to_csl UniqueDSDDict = dict[str, list[tuple[str, cslstruct.DataStructureDescriptor]]] @@ -34,7 +33,7 @@ def canonicalize_kernel(kernel: spir.Kernel) -> spir.Kernel: :param kernel: A fully concretized Spatial IR kernel. :return: The transformed kernel, ready for - :func:`~spatialstencil.syntax.spatial_ir.canonicalization.consolidate_rectangles_to_equivalence_classes`. + :func:`~spada.syntax.spatial_ir.canonicalization.consolidate_rectangles_to_equivalence_classes`. """ kernel = canonicalization.inline_metaprogramming(kernel) kernel = canonicalization.canonicalize_phases(kernel) @@ -48,7 +47,6 @@ def canonicalize_kernel(kernel: spir.Kernel) -> spir.Kernel: def lower_spatial_ir_to_csl(kernel: spir.Kernel, rect_offset: tuple[int, int] = (0, 0), disable_benchmarking: bool = False, - sync_benchmarking: bool = False, disable_asynchronous: bool = False, disable_dsd: bool = False, task_fusion: bool = True, @@ -62,7 +60,6 @@ def lower_spatial_ir_to_csl(kernel: spir.Kernel, :param rect_offset: The offset of the output rectangle to use. :param disable_benchmarking: If True, disables benchmarking code generation (and memory overhead). Use in memory-limited scenarios. - :param sync_benchmarking: If True, generate sync-assisted benchmarking support for more accurate cycle counts. :param disable_asynchronous: If True, disables asynchronous task code generation. :param disable_dsd: If True, disables DSD operation detection and code generation. :param task_fusion: If True, enables task fusion to reduce number of tasks. @@ -86,9 +83,6 @@ def lower_spatial_ir_to_csl(kernel: spir.Kernel, # Run the shared canonicalization pipeline kernel = canonicalize_kernel(kernel) - if disable_benchmarking and sync_benchmarking: - raise ValueError("Sync benchmarking requires benchmarking support to be enabled.") - # Check if we are streaming or using memcpy mode use_memcpy_mode = analysis.kernel_uses_memcpy_mode(kernel) @@ -120,7 +114,7 @@ def lower_spatial_ir_to_csl(kernel: spir.Kernel, # Add benchmarking fields if not disable_benchmarking: - _add_benchmarking_fields(rectangles, sync_benchmarking) + _add_benchmarking_fields(rectangles) # Collect scalar argument types scalar_argument_types = [] @@ -135,170 +129,158 @@ def lower_spatial_ir_to_csl(kernel: spir.Kernel, routing_instructions: list[str] = [] color_maps = [] - resource_context = cslbench.reserve_codegen_resources(csl) if sync_benchmarking and not disable_benchmarking else nullcontext(None) - with resource_context as sync_resources: - channel_to_color = _collect_colors_globally(kernel, rectangles, use_memcpy_mode) - - for rect in rectangles: - # Create a unique CSL code file based on rectangle offset - csl_name = f'code_{rect.x_range[0]}_{rect.y_range[0]}.csl' - rect_code, color_map = generate_rectangle(kernel, rect, routing_instructions, scalar_arguments, use_memcpy_mode, - stream_rects, channel_to_color, disable_benchmarking, sync_benchmarking, - disable_asynchronous, disable_dsd, task_fusion, - task_id_recycling) - color_maps.append(color_map) - csl_codes.append(CodeFile(csl_name, rect_code)) - - # Prepare outputs - layout_code = StringIO() - - if sync_resources is not None: - csl_codes.extend(cslbench.load_sync_assets()) + channel_to_color = _collect_colors_globally(kernel, rectangles, use_memcpy_mode) - ############################################### - # Generate main layout file - - # Compute the tight PE bounding box. kernel.get_grid_rect() now returns tight bounds - # (last-contained PE + 1) rather than canonicalized stops. - x0, x1, y0, y1 = kernel.get_grid_rect() - assert x0 == 0, "PE Grid must start at x=0" - assert y0 == 0, "PE Grid must start at y=0" - rect_size = x1 - x0, y1 - y0 - - # Collect unique routes for all rectangles - routes_per_rectangle = _collect_routes(rectangles, color_maps) + for rect in rectangles: + # Create a unique CSL code file based on rectangle offset + csl_name = f'code_{rect.x_range[0]}_{rect.y_range[0]}.csl' + rect_code, color_map = generate_rectangle(kernel, rect, routing_instructions, scalar_arguments, use_memcpy_mode, + stream_rects, channel_to_color, disable_benchmarking, + disable_asynchronous, disable_dsd, task_fusion, + task_id_recycling) + color_maps.append(color_map) + csl_codes.append(CodeFile(csl_name, rect_code)) + + # Prepare outputs + layout_code = StringIO() + + ############################################### + # Generate main layout file + + # Compute the tight PE bounding box. kernel.get_grid_rect() now returns tight bounds + # (last-contained PE + 1) rather than canonicalized stops. + x0, x1, y0, y1 = kernel.get_grid_rect() + assert x0 == 0, "PE Grid must start at x=0" + assert y0 == 0, "PE Grid must start at y=0" + rect_size = x1 - x0, y1 - y0 + + # Collect unique routes for all rectangles + routes_per_rectangle = _collect_routes(rectangles, color_maps) - if use_memcpy_mode: - layout_code.write(f''' + if use_memcpy_mode: + layout_code.write(f''' // Memcpy setup const memcpy = @import_module("", .{{ .width = {rect_size[0]}, .height = {rect_size[1]}, }}); ''') - else: - input_args = [] - output_args = [] - for arg in kernel.arguments: - if arg.compiletime: - continue - if arg.readonly: - input_args.append(arg) - elif arg.writeonly: - output_args.append(arg) - else: - input_args.append(arg) - output_args.append(arg) + else: + input_args = [] + output_args = [] + for arg in kernel.arguments: + if arg.compiletime: + continue + if arg.readonly: + input_args.append(arg) + elif arg.writeonly: + output_args.append(arg) + else: + input_args.append(arg) + output_args.append(arg) - # Only up to 4 streams in each direction are supported (4 input, 4 output streams) - if len(input_args) > 4 or len(output_args) > 4: - raise ValueError('Too many input/output streams: only 4 input and 4 output streams are supported in CSL') + # Only up to 4 streams in each direction are supported (4 input, 4 output streams) + if len(input_args) > 4 or len(output_args) > 4: + raise ValueError('Too many input/output streams: only 4 input and 4 output streams are supported in CSL') - # Generate streaming DATA_*_ID parameters for each input/output stream - layout_code.write('// Streaming copy setup\n') - for i, input_arg in enumerate(input_args): - layout_code.write(f'''param MEMCPYH2D_DATA_{i}_ID: i16; + # Generate streaming DATA_*_ID parameters for each input/output stream + layout_code.write('// Streaming copy setup\n') + for i, input_arg in enumerate(input_args): + layout_code.write(f'''param MEMCPYH2D_DATA_{i}_ID: i16; const MEMCPYH2D_DATA_{i}: color = @get_color(MEMCPYH2D_DATA_{i}_ID); ''') - for i, output_arg in enumerate(output_args): - layout_code.write(f'''param MEMCPYD2H_DATA_{i}_ID: i16; + for i, output_arg in enumerate(output_args): + layout_code.write(f'''param MEMCPYD2H_DATA_{i}_ID: i16; const MEMCPYD2H_DATA_{i}: color = @get_color(MEMCPYD2H_DATA_{i}_ID); ''') - layout_code.write(f''' + layout_code.write(f''' const memcpy = @import_module("", .{{ .width = width, .height = height, ''') - for i, input_arg in enumerate(input_args): - layout_code.write(f''' .MEMCPYH2D_{i} = MEMCPYH2D_DATA_{i}_ID, + for i, input_arg in enumerate(input_args): + layout_code.write(f''' .MEMCPYH2D_{i} = MEMCPYH2D_DATA_{i}_ID, ''') - for i, output_arg in enumerate(output_args): - layout_code.write(f''' .MEMCPYD2H_{i} = MEMCPYD2H_DATA_{i}_ID, + for i, output_arg in enumerate(output_args): + layout_code.write(f''' .MEMCPYD2H_{i} = MEMCPYD2H_DATA_{i}_ID, ''') - layout_code.write(f''' + layout_code.write(f''' }}); ''') - if sync_resources is not None: - layout_code.write(cslbench.generate_sync_layout_setup(rect_size[0], rect_size[1], sync_resources)) - - layout_code.write(f'''layout {{ + layout_code.write(f'''layout {{ // Rectangle and code setup @set_rectangle{rect_size};''') - # First pass: @set_tile_code for every PE. - # All tile codes must be established before any @set_color_config call, - # because multi-hop routing config may reference neighboring PEs that - # belong to a different rectangle (e.g. pass-through relays). - sync_tile_binding = cslbench.generate_sync_tile_binding() if sync_resources is not None else '' - for rect in rectangles: - xb_pre, xe_pre, xs, yb_pre, ye_pre, ys = *rect.x_range, *rect.y_range - code_filename = f'code_{xb_pre}_{yb_pre}.csl' - xb = xb_pre + rect_offset[0] - xe = xe_pre + rect_offset[0] - yb = yb_pre + rect_offset[1] - ye = ye_pre + rect_offset[1] + # First pass: @set_tile_code for every PE. + # All tile codes must be established before any @set_color_config call, + # because multi-hop routing config may reference neighboring PEs that + # belong to a different rectangle (e.g. pass-through relays). + for rect in rectangles: + xb_pre, xe_pre, xs, yb_pre, ye_pre, ys = *rect.x_range, *rect.y_range + code_filename = f'code_{xb_pre}_{yb_pre}.csl' + xb = xb_pre + rect_offset[0] + xe = xe_pre + rect_offset[0] + yb = yb_pre + rect_offset[1] + ye = ye_pre + rect_offset[1] - layout_code.write(f''' + layout_code.write(f''' for (@range(i16, {xb}, {xe}, {xs})) |pe_x| {{ for (@range(i16, {yb}, {ye}, {ys})) |pe_y| {{ - @set_tile_code(pe_x, pe_y, "{code_filename}", .{{ .memcpy_params = memcpy.get_params(pe_x), {sync_tile_binding}}}); + @set_tile_code(pe_x, pe_y, "{code_filename}", .{{ .memcpy_params = memcpy.get_params(pe_x), }}); }} }}\n''') - # Second pass: routing (@set_color_config). By emitting these after all - # @set_tile_code calls, every PE referenced by a multi-hop offset is - # guaranteed to already have tile code assigned. - layout_code.write('\n // Routes\n') - for rect in rectangles: - xb_pre, xe_pre, xs, yb_pre, ye_pre, ys = *rect.x_range, *rect.y_range - xb = xb_pre + rect_offset[0] - xe = xe_pre + rect_offset[0] - yb = yb_pre + rect_offset[1] - ye = ye_pre + rect_offset[1] - route_code = routes_per_rectangle.get((xb_pre, yb_pre), '') - if route_code.strip(): - layout_code.write(f''' + # Second pass: routing (@set_color_config). By emitting these after all + # @set_tile_code calls, every PE referenced by a multi-hop offset is + # guaranteed to already have tile code assigned. + layout_code.write('\n // Routes\n') + for rect in rectangles: + xb_pre, xe_pre, xs, yb_pre, ye_pre, ys = *rect.x_range, *rect.y_range + xb = xb_pre + rect_offset[0] + xe = xe_pre + rect_offset[0] + yb = yb_pre + rect_offset[1] + ye = ye_pre + rect_offset[1] + route_code = routes_per_rectangle.get((xb_pre, yb_pre), '') + if route_code.strip(): + layout_code.write(f''' for (@range(i16, {xb}, {xe}, {xs})) |pe_x| {{ for (@range(i16, {yb}, {ye}, {ys})) |pe_y| {{ {route_code} }} }}\n''') - for rinst in routing_instructions: - layout_code.write(rinst + '\n') - - # Emit symbol names for arguments and kernel - layout_code.write('\n // Extern fields\n') - # Gather extern fields from kernel arguments - extern_fields: list[spir.FieldDeclaration] = [] - for rect in rectangles: - place_block = rect.metadata.place - for field in place_block.statements: - if field.is_extern: - if any(field.field_name == ef.field_name for ef in extern_fields): - continue - extern_fields.append(field) + for rinst in routing_instructions: + layout_code.write(rinst + '\n') - for field in extern_fields: - dtype = field.dtype - if isinstance(field.dtype, spir.ArrayType) and isinstance(field.dtype.base_type, spir.StreamType): - pass - elif isinstance(field.dtype, spir.StreamType): - # Support scalar streams - dtype = spir.ArrayType(field.dtype, [1]) + # Emit symbol names for arguments and kernel + layout_code.write('\n // Extern fields\n') + # Gather extern fields from kernel arguments + extern_fields: list[spir.FieldDeclaration] = [] + for rect in rectangles: + place_block = rect.metadata.place + for field in place_block.statements: + if field.is_extern: + if any(field.field_name == ef.field_name for ef in extern_fields): + continue + extern_fields.append(field) - layout_code.write(f' @export_name("{field.field_name.name}", {dtype_as_csl(dtype, export=True)}, true);\n') + for field in extern_fields: + dtype = field.dtype + if isinstance(field.dtype, spir.ArrayType) and isinstance(field.dtype.base_type, spir.StreamType): + pass + elif isinstance(field.dtype, spir.StreamType): + # Support scalar streams + dtype = spir.ArrayType(field.dtype, [1]) - if sync_resources is not None: - layout_code.write(cslbench.generate_sync_layout_exports()) + layout_code.write(f' @export_name("{field.field_name.name}", {dtype_as_csl(dtype, export=True)}, true);\n') - layout_code.write(f''' + layout_code.write(f''' // Kernel @export_name("{kernel.name}", fn({", ".join(scalar_argument_types)})void); }}''') - csl_codes.append(CodeFile('layout.csl', layout_code.getvalue())) + csl_codes.append(CodeFile('layout.csl', layout_code.getvalue())) # Return all generated code files return csl_codes @@ -312,7 +294,6 @@ def generate_rectangle(kernel: spir.Kernel, stream_extents: analysis.StreamExtents, channel_to_color: dict[int, int], disable_benchmarking: bool = False, - sync_benchmarking: bool = False, disable_asynchronous: bool = False, disable_dsd: bool = False, task_fusion: bool = True, @@ -351,7 +332,7 @@ def generate_rectangle(kernel: spir.Kernel, canonicalization.convert_foreach_data_tasks_to_loops(rect, dtypes) dtypes = _collect_identifier_types(rect.metadata, kernel.arguments) - benchmark_code = _generate_benchmarking_code(header, disable_benchmarking, sync_benchmarking) + benchmark_code = _generate_benchmarking_code(header, disable_benchmarking) # Convert compute block subgraphs into tasks: # * Make task DAG out of computations @@ -1603,24 +1584,22 @@ def _generate_task_code(rect: PEBlock, current_code.write(f'{indent}@unblock({task_id});\n') -def _generate_benchmarking_code(header: StringIO, disable_benchmarking: bool, - sync_benchmarking: bool) -> cslbench.RectangleBenchmarkingCode: +def _generate_benchmarking_code(header: StringIO, disable_benchmarking: bool) -> cslbench.RectangleBenchmarkingCode: """ Generates benchmarking code in the header and current code. - + :param header: A code generator stream for a file's header (where the declarations are). :return: Benchmarking code fragments to insert into the generated file. """ if disable_benchmarking: return cslbench.RectangleBenchmarkingCode() - benchmark_code = ( - cslbench.generate_sync_rectangle_code() if sync_benchmarking else cslbench.generate_basic_rectangle_code()) + benchmark_code = cslbench.generate_basic_rectangle_code() header.write(benchmark_code.header) return benchmark_code -def _add_benchmarking_fields(rectangles: list[Rectangle[PEBlock]], sync_benchmarking: bool = False): +def _add_benchmarking_fields(rectangles: list[Rectangle[PEBlock]]): """ Adds benchmarking variables to the code. :param rectangles: The rectangles to modify. @@ -1638,13 +1617,6 @@ def _add_benchmarking_fields(rectangles: list[Rectangle[PEBlock]], sync_benchmar dtype=spir.ArrayType(spir.ScalarType.u16, [3]), is_extern=True, )) - if sync_benchmarking: - rect.metadata.place.statements.append( - spir.FieldDeclaration( - field_name=spir.Identifier('__benchmark_refclock', 0), - dtype=spir.ArrayType(spir.ScalarType.u16, [3]), - is_extern=True, - )) def _collect_identifier_types(rect: PEBlock, diff --git a/spatialstencil/lowering/stencil_to_spatial.py b/spada/lowering/stencil_to_spatial.py similarity index 87% rename from spatialstencil/lowering/stencil_to_spatial.py rename to spada/lowering/stencil_to_spatial.py index d3722ea2..89dfb80a 100644 --- a/spatialstencil/lowering/stencil_to_spatial.py +++ b/spada/lowering/stencil_to_spatial.py @@ -1,24 +1,24 @@ import copy -from spatialstencil.lowering.stencil_to_spatial_routing import ChannelStrategy, KernelRouting -import spatialstencil.syntax.stencil_ir.irnodes as sast -import spatialstencil.syntax.spatial_ir.irnodes as spa -from spatialstencil.lowering.stencil_to_spatial_compute import ProgramCompute, AbstractStatement -from spatialstencil.lowering.stencil_to_spatial_dataflow import ProgramDataflow -from spatialstencil.lowering.stencil_to_spatial_place import ProgramPlacement - -from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir.canonical_subgrids import canonicalize_subgrids, fill_compute_rectangle -from spatialstencil.syntax.spatial_ir.grid_geometry import split_rectangles - -from spatialstencil.syntax.stencil_ir.domain_collector import DomainCollector -from spatialstencil.syntax.stencil_ir.canonicalize_expression import CanonicalizeExpression -from spatialstencil.syntax.stencil_ir.refactor_forward_backward_stencils import RefactorForwardBackwardStencils -from spatialstencil.syntax.stencil_ir.type_inference import infer_scalar_types, infer_types -from spatialstencil.syntax.stencil_ir.ssa import SSAVisitor -from spatialstencil.syntax.spatial_ir.passes import mark_readonly_writeonly_arguments -from spatialstencil.syntax.spatial_ir.analysis import detect_undefined_array_access +from spada.lowering.stencil_to_spatial_routing import ChannelStrategy, KernelRouting +import spada.syntax.stencil_ir.irnodes as sast +import spada.syntax.spatial_ir.irnodes as spa +from spada.lowering.stencil_to_spatial_compute import ProgramCompute, AbstractStatement +from spada.lowering.stencil_to_spatial_dataflow import ProgramDataflow +from spada.lowering.stencil_to_spatial_place import ProgramPlacement + +from spada.lowering.versioning import Versioning +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir.canonical_subgrids import canonicalize_subgrids, fill_compute_rectangle +from spada.syntax.spatial_ir.grid_geometry import split_rectangles + +from spada.syntax.stencil_ir.domain_collector import DomainCollector +from spada.syntax.stencil_ir.canonicalize_expression import CanonicalizeExpression +from spada.syntax.stencil_ir.refactor_forward_backward_stencils import RefactorForwardBackwardStencils +from spada.syntax.stencil_ir.type_inference import infer_scalar_types, infer_types +from spada.syntax.stencil_ir.ssa import SSAVisitor +from spada.syntax.spatial_ir.passes import mark_readonly_writeonly_arguments +from spada.syntax.spatial_ir.analysis import detect_undefined_array_access def lower_stencil_to_spatial(stencil: sast.Program, channel_strategy: ChannelStrategy = ChannelStrategy.TRIVIAL) -> spa.Kernel: """Lower a stencil to a spatial program. diff --git a/spatialstencil/lowering/stencil_to_spatial_compute.py b/spada/lowering/stencil_to_spatial_compute.py similarity index 97% rename from spatialstencil/lowering/stencil_to_spatial_compute.py rename to spada/lowering/stencil_to_spatial_compute.py index 3843b133..a544f174 100644 --- a/spatialstencil/lowering/stencil_to_spatial_compute.py +++ b/spada/lowering/stencil_to_spatial_compute.py @@ -1,17 +1,17 @@ import copy from dataclasses import dataclass -from spatialstencil.lowering.stencil_to_spatial_compute_fwbw import ForwardBackwardComputeVisitor -from spatialstencil.lowering.stencil_to_spatial_dataflow import ProgramDataflow -from spatialstencil.lowering.stencil_to_spatial_place import ProgramPlacement -from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.basenode import Wildcard -from spatialstencil.syntax.common.tree_matching import PatternTransformer -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle, group_rectangles_by_domain, split_rectangles -from spatialstencil.syntax.stencil_ir.domain_collector import DomainCollector -import spatialstencil.syntax.spatial_ir.irnodes as spa -import spatialstencil.syntax.stencil_ir.irnodes as sast +from spada.lowering.stencil_to_spatial_compute_fwbw import ForwardBackwardComputeVisitor +from spada.lowering.stencil_to_spatial_dataflow import ProgramDataflow +from spada.lowering.stencil_to_spatial_place import ProgramPlacement +from spada.lowering.versioning import Versioning +from spada.syntax.common.basenode import Wildcard +from spada.syntax.common.tree_matching import PatternTransformer +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir.grid_geometry import Rectangle, group_rectangles_by_domain, split_rectangles +from spada.syntax.stencil_ir.domain_collector import DomainCollector +import spada.syntax.spatial_ir.irnodes as spa +import spada.syntax.stencil_ir.irnodes as sast AbstractStatement = Rectangle[tuple[int, spa.Statement]] diff --git a/spatialstencil/lowering/stencil_to_spatial_compute_fwbw.py b/spada/lowering/stencil_to_spatial_compute_fwbw.py similarity index 94% rename from spatialstencil/lowering/stencil_to_spatial_compute_fwbw.py rename to spada/lowering/stencil_to_spatial_compute_fwbw.py index 86ef9ba8..876b09e0 100644 --- a/spatialstencil/lowering/stencil_to_spatial_compute_fwbw.py +++ b/spada/lowering/stencil_to_spatial_compute_fwbw.py @@ -1,12 +1,12 @@ import copy -from spatialstencil.lowering.stencil_to_spatial_dataflow import ProgramDataflow -from spatialstencil.lowering.stencil_to_spatial_place import ProgramPlacement -from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle -import spatialstencil.syntax.spatial_ir.irnodes as spa -import spatialstencil.syntax.stencil_ir.irnodes as sast -from spatialstencil.syntax.stencil_ir.irnodes import ComputationBlock +from spada.lowering.stencil_to_spatial_dataflow import ProgramDataflow +from spada.lowering.stencil_to_spatial_place import ProgramPlacement +from spada.lowering.versioning import Versioning +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir.grid_geometry import Rectangle +import spada.syntax.spatial_ir.irnodes as spa +import spada.syntax.stencil_ir.irnodes as sast +from spada.syntax.stencil_ir.irnodes import ComputationBlock AbstractStatement = Rectangle[tuple[int, spa.Statement]] diff --git a/spatialstencil/lowering/stencil_to_spatial_dataflow.py b/spada/lowering/stencil_to_spatial_dataflow.py similarity index 95% rename from spatialstencil/lowering/stencil_to_spatial_dataflow.py rename to spada/lowering/stencil_to_spatial_dataflow.py index 79697683..da218e0a 100644 --- a/spatialstencil/lowering/stencil_to_spatial_dataflow.py +++ b/spada/lowering/stencil_to_spatial_dataflow.py @@ -2,15 +2,15 @@ from dataclasses import dataclass from enum import Enum, auto -import spatialstencil.syntax.stencil_ir.irnodes as sast -import spatialstencil.syntax.spatial_ir.irnodes as spa -from spatialstencil.lowering.stencil_to_spatial_place import ProgramPlacement +import spada.syntax.stencil_ir.irnodes as sast +import spada.syntax.spatial_ir.irnodes as spa +from spada.lowering.stencil_to_spatial_place import ProgramPlacement -from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle, split_rectangles, group_rectangles_by_domain +from spada.lowering.versioning import Versioning +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir.grid_geometry import Rectangle, split_rectangles, group_rectangles_by_domain -from spatialstencil.syntax.stencil_ir.domain_collector import DomainCollector +from spada.syntax.stencil_ir.domain_collector import DomainCollector @dataclass(frozen=True) diff --git a/spatialstencil/lowering/stencil_to_spatial_place.py b/spada/lowering/stencil_to_spatial_place.py similarity index 96% rename from spatialstencil/lowering/stencil_to_spatial_place.py rename to spada/lowering/stencil_to_spatial_place.py index 8176a75f..99d4ea66 100644 --- a/spatialstencil/lowering/stencil_to_spatial_place.py +++ b/spada/lowering/stencil_to_spatial_place.py @@ -3,13 +3,13 @@ from dataclasses import dataclass from typing import Mapping, Set -import spatialstencil.syntax.stencil_ir.irnodes as sast -import spatialstencil.syntax.spatial_ir.irnodes as spa -from spatialstencil.lowering.versioning import Versioning +import spada.syntax.stencil_ir.irnodes as sast +import spada.syntax.spatial_ir.irnodes as spa +from spada.lowering.versioning import Versioning -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle, split_rectangles, group_rectangles_by_domain -from spatialstencil.syntax.stencil_ir.domain_collector import DomainCollector +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir.grid_geometry import Rectangle, split_rectangles, group_rectangles_by_domain +from spada.syntax.stencil_ir.domain_collector import DomainCollector AbstractFieldDeclaration = Rectangle[spa.FieldDeclaration] diff --git a/spatialstencil/lowering/stencil_to_spatial_routing.py b/spada/lowering/stencil_to_spatial_routing.py similarity index 98% rename from spatialstencil/lowering/stencil_to_spatial_routing.py rename to spada/lowering/stencil_to_spatial_routing.py index 0801d921..7203c9f8 100644 --- a/spatialstencil/lowering/stencil_to_spatial_routing.py +++ b/spada/lowering/stencil_to_spatial_routing.py @@ -1,8 +1,8 @@ import copy from enum import Enum, auto -from spatialstencil.lowering.versioning import Versioning -import spatialstencil.syntax.spatial_ir.irnodes as spa -from spatialstencil.syntax.spatial_ir.canonicalization import canonicalize_phases, inline_phases +from spada.lowering.versioning import Versioning +import spada.syntax.spatial_ir.irnodes as spa +from spada.syntax.spatial_ir.canonicalization import canonicalize_phases, inline_phases class ChannelStrategy(Enum): diff --git a/spatialstencil/lowering/versioning.py b/spada/lowering/versioning.py similarity index 100% rename from spatialstencil/lowering/versioning.py rename to spada/lowering/versioning.py diff --git a/spatialstencil/placement/README.md b/spada/placement/README.md similarity index 100% rename from spatialstencil/placement/README.md rename to spada/placement/README.md diff --git a/spatialstencil/placement/__init__.py b/spada/placement/__init__.py similarity index 100% rename from spatialstencil/placement/__init__.py rename to spada/placement/__init__.py diff --git a/spatialstencil/placement/domain.py b/spada/placement/domain.py similarity index 100% rename from spatialstencil/placement/domain.py rename to spada/placement/domain.py diff --git a/spatialstencil/placement/graph.py b/spada/placement/graph.py similarity index 98% rename from spatialstencil/placement/graph.py rename to spada/placement/graph.py index 43fb59b6..3356db46 100644 --- a/spatialstencil/placement/graph.py +++ b/spada/placement/graph.py @@ -1,8 +1,8 @@ from typing import Sequence, List, Dict, Tuple import igraph as ig -from spatialstencil.placement.domain import FieldDomain -from spatialstencil.placement.stencil import Stencil, StencilDirection +from spada.placement.domain import FieldDomain +from spada.placement.stencil import Stencil, StencilDirection class StencilGraph: diff --git a/spatialstencil/placement/mla.py b/spada/placement/mla.py similarity index 100% rename from spatialstencil/placement/mla.py rename to spada/placement/mla.py diff --git a/spatialstencil/placement/model.py b/spada/placement/model.py similarity index 98% rename from spatialstencil/placement/model.py rename to spada/placement/model.py index d1d72b95..62b73036 100644 --- a/spatialstencil/placement/model.py +++ b/spada/placement/model.py @@ -8,8 +8,8 @@ import numpy as np from numpy.typing import NDArray -from spatialstencil.placement.graph import StencilGraph -from spatialstencil.placement.placement import Placement +from spada.placement.graph import StencilGraph +from spada.placement.placement import Placement @dataclass diff --git a/spatialstencil/placement/optimizer.py b/spada/placement/optimizer.py similarity index 88% rename from spatialstencil/placement/optimizer.py rename to spada/placement/optimizer.py index 7edb7922..83724ff0 100644 --- a/spatialstencil/placement/optimizer.py +++ b/spada/placement/optimizer.py @@ -2,10 +2,10 @@ import igraph import numpy as np -from spatialstencil.placement.graph import StencilGraph -from spatialstencil.placement.placed_graph import PlacedStencilGraph -from spatialstencil.placement.model import PlacementCost, CostModel -from spatialstencil.placement.partition import FieldPartition +from spada.placement.graph import StencilGraph +from spada.placement.placed_graph import PlacedStencilGraph +from spada.placement.model import PlacementCost, CostModel +from spada.placement.partition import FieldPartition def color_graph(g: StencilGraph): diff --git a/spatialstencil/placement/partition.py b/spada/placement/partition.py similarity index 97% rename from spatialstencil/placement/partition.py rename to spada/placement/partition.py index 5884dc6f..73c699ae 100644 --- a/spatialstencil/placement/partition.py +++ b/spada/placement/partition.py @@ -11,9 +11,9 @@ import igraph from typing import Tuple -from spatialstencil.placement.graph import FieldDomain -from spatialstencil.placement.mla import linearize_with_random_forest -from spatialstencil.placement.placement import Placement +from spada.placement.graph import FieldDomain +from spada.placement.mla import linearize_with_random_forest +from spada.placement.placement import Placement diff --git a/spatialstencil/placement/placed_graph.py b/spada/placement/placed_graph.py similarity index 94% rename from spatialstencil/placement/placed_graph.py rename to spada/placement/placed_graph.py index 2f14c4f6..e6edd334 100644 --- a/spatialstencil/placement/placed_graph.py +++ b/spada/placement/placed_graph.py @@ -2,9 +2,9 @@ import igraph as ig import matplotlib.pyplot as plt -from spatialstencil.placement.graph import StencilGraph -from spatialstencil.placement.partition import Placement -from spatialstencil.placement.stencil import StencilDirection +from spada.placement.graph import StencilGraph +from spada.placement.partition import Placement +from spada.placement.stencil import StencilDirection class PlacedStencilGraph(StencilGraph): diff --git a/spatialstencil/placement/placement.py b/spada/placement/placement.py similarity index 99% rename from spatialstencil/placement/placement.py rename to spada/placement/placement.py index 8495b5df..32d900fb 100644 --- a/spatialstencil/placement/placement.py +++ b/spada/placement/placement.py @@ -7,7 +7,7 @@ import numpy as np import igraph -from spatialstencil.placement.graph import StencilGraph +from spada.placement.graph import StencilGraph @dataclass diff --git a/spatialstencil/placement/stencil.py b/spada/placement/stencil.py similarity index 100% rename from spatialstencil/placement/stencil.py rename to spada/placement/stencil.py diff --git a/spatialstencil/runtime/__init__.py b/spada/runtime/__init__.py similarity index 100% rename from spatialstencil/runtime/__init__.py rename to spada/runtime/__init__.py diff --git a/spatialstencil/runtime/cerebras_runtime_stub.py b/spada/runtime/cerebras_runtime_stub.py similarity index 100% rename from spatialstencil/runtime/cerebras_runtime_stub.py rename to spada/runtime/cerebras_runtime_stub.py diff --git a/spatialstencil/runtime/runtime.py b/spada/runtime/runtime.py similarity index 83% rename from spatialstencil/runtime/runtime.py rename to spada/runtime/runtime.py index 8f3c01f7..9ef3adc6 100644 --- a/spatialstencil/runtime/runtime.py +++ b/spada/runtime/runtime.py @@ -8,10 +8,8 @@ import numpy.typing as npt import time -SYNC_REQUIRED_SYMBOLS = ("f_sync", "f_tic", "f_toc", "__benchmark_refclock") - if TYPE_CHECKING: - from spatialstencil.runtime import cerebras_runtime_stub as crt + from spada.runtime import cerebras_runtime_stub as crt else: try: from cerebras.sdk.runtime import sdkruntimepybind as crt @@ -226,52 +224,6 @@ def copy_back_benchmark_cycles(runtime: crt.SdkRuntime, metadata: ProgramMetadat return cycle_stop - cycle_start -def copy_back_sync_buffer(runtime: crt.SdkRuntime, metadata: ProgramMetadata) -> np.ndarray: - """ - Copy back the reference clock sync-benchmarking buffer. - - :param runtime: The Cerebras SDK runtime object to perform the copy operation - :param metadata: Program metadata containing input/output information - :return: Numpy array containing the reference clock for each PE - """ - cycle_ref = np.zeros(metadata.kernel_dims + [3], dtype=np.uint32) - runtime.memcpy_d2h( - cycle_ref.ravel(), - runtime.get_id("__benchmark_refclock"), - 0, - 0, - *cycle_ref.shape, - streaming=False, - data_type=crt.MemcpyDataType.MEMCPY_16BIT, - order=crt.MemcpyOrder.ROW_MAJOR, - nonblock=False, - ) - return convert_timestamp(cycle_ref) - - -def copy_back_sync_benchmark_data(runtime: crt.SdkRuntime, metadata: ProgramMetadata) -> np.ndarray: - """ - Copy back sync-benchmarking data and reconstruct the corrected global cycle count. - - :param runtime: The Cerebras SDK runtime object to perform the copy operation - :param metadata: Program metadata containing input/output information - :return: Numpy scalar containing the total number of cycles spent on the chip - for the benchmarked period. - """ - # Compute propagation delay (one cycle per link) to synchronize reference clocks - width, height = metadata.kernel_dims - propagation_delay = np.arange(width, dtype=np.uint64)[:, None] + np.arange(height, dtype=np.uint64)[None, :] - - time_start, time_end = copy_back_benchmark_data(runtime, metadata) - reference = copy_back_sync_buffer(runtime, metadata) - reference = reference - propagation_delay - time_start = time_start - reference - time_end = time_end - reference - - # Return the total time spent on the chip - return time_end.max() - time_start.min() - - def print_cycle_counts(label: str, cycle_counts: np.ndarray) -> None: """ Print benchmark data in a compact form for either scalar or per-PE cycle counts. @@ -342,14 +294,9 @@ def __init__( self.inputs = self.metadata.inputs self.outputs = self.metadata.outputs - print("SYNC BENCHMARK?", self.has_sync_benchmarking()) - def has_symbol(self, symbol: str) -> bool: return self.runtime.get_id(symbol) is not None - def has_sync_benchmarking(self) -> bool: - return all(self.has_symbol(symbol) for symbol in SYNC_REQUIRED_SYMBOLS) - def has_basic_benchmarking(self) -> bool: return self.has_symbol("__benchmark_start") and self.has_symbol("__benchmark_stop") @@ -387,15 +334,8 @@ def __call__(self, *args, **kwargs) -> Dict[str, np.ndarray]: self.runtime.run() print("done.", flush=True) - sync_benchmarking = False - if self.benchmark: - sync_benchmarking = self.has_sync_benchmarking() - if not sync_benchmarking and not self.has_basic_benchmarking(): - raise ValueError("Benchmarking requested but not enabled in the program.") - - if self.benchmark and sync_benchmarking and not self.metadata.memcpy_mode: - self.runtime.launch("f_sync", nonblock=False) - self.runtime.launch("f_tic", nonblock=False) + if self.benchmark and not self.has_basic_benchmarking(): + raise ValueError("Benchmarking requested but not enabled in the program.") # Copy data to device for name, data in kwargs.items(): @@ -414,28 +354,17 @@ def __call__(self, *args, **kwargs) -> Dict[str, np.ndarray]: # Use flatten_copy to copy data to device flatten_copy(name, data, expected_shape, self.runtime, self.metadata, self.benchmark) - if self.benchmark and sync_benchmarking and self.metadata.memcpy_mode: - self.runtime.launch("f_sync", nonblock=False) - # Run the program for i in range(self.repetitions): if self.metadata.memcpy_mode: if self.benchmark and not self.simulator and i == 0: time.sleep(5.0) print("Launching kernel...", flush=True, end="") - if self.benchmark and sync_benchmarking: - self.runtime.launch("f_tic", nonblock=False) self.runtime.launch(self.metadata.kernel_name, *scalar_args, nonblock=False) - if self.benchmark and sync_benchmarking: - self.runtime.launch("f_toc", nonblock=False) print("kernel launched.", flush=True) if self.benchmark: - cycle_counts = ( - copy_back_sync_benchmark_data(self.runtime, self.metadata) - if sync_benchmarking - else copy_back_benchmark_cycles(self.runtime, self.metadata) - ) + cycle_counts = copy_back_benchmark_cycles(self.runtime, self.metadata) num_digits = len(str(self.repetitions)) np.save(self.output_dir / f"perf_cycles_{i:0{num_digits}d}.npy", cycle_counts) print_cycle_counts(f"Iteration {i} cycle count", cycle_counts) @@ -460,11 +389,7 @@ def __call__(self, *args, **kwargs) -> Dict[str, np.ndarray]: print("Copy-back complete.", flush=True) if self.benchmark and not self.metadata.memcpy_mode: - cycle_counts = ( - copy_back_sync_benchmark_data(self.runtime, self.metadata) - if sync_benchmarking - else copy_back_benchmark_data(self.runtime, self.metadata) - ) + cycle_counts = copy_back_benchmark_data(self.runtime, self.metadata) np.save(self.output_dir / "perf_cycles.npy", cycle_counts) print_cycle_counts("Cycle count", cycle_counts) diff --git a/spatialstencil/syntax/__init__.py b/spada/syntax/__init__.py similarity index 100% rename from spatialstencil/syntax/__init__.py rename to spada/syntax/__init__.py diff --git a/spatialstencil/syntax/common/__init__.py b/spada/syntax/common/__init__.py similarity index 100% rename from spatialstencil/syntax/common/__init__.py rename to spada/syntax/common/__init__.py diff --git a/spatialstencil/syntax/common/basenode.py b/spada/syntax/common/basenode.py similarity index 100% rename from spatialstencil/syntax/common/basenode.py rename to spada/syntax/common/basenode.py diff --git a/spatialstencil/syntax/common/find_and_replace.py b/spada/syntax/common/find_and_replace.py similarity index 100% rename from spatialstencil/syntax/common/find_and_replace.py rename to spada/syntax/common/find_and_replace.py diff --git a/spatialstencil/syntax/common/match_tree.py b/spada/syntax/common/match_tree.py similarity index 98% rename from spatialstencil/syntax/common/match_tree.py rename to spada/syntax/common/match_tree.py index 47cb63fa..1c576c08 100644 --- a/spatialstencil/syntax/common/match_tree.py +++ b/spada/syntax/common/match_tree.py @@ -2,9 +2,9 @@ from dataclasses import dataclass from typing import List, Union, Deque, TypeVar, Any, Generic -from spatialstencil.syntax.common.basenode import BaseNode +from spada.syntax.common.basenode import BaseNode -import spatialstencil.syntax.common.basenode as syntax +import spada.syntax.common.basenode as syntax V = TypeVar('V') diff --git a/spatialstencil/syntax/common/serialization.py b/spada/syntax/common/serialization.py similarity index 100% rename from spatialstencil/syntax/common/serialization.py rename to spada/syntax/common/serialization.py diff --git a/spatialstencil/syntax/common/tree_matching.py b/spada/syntax/common/tree_matching.py similarity index 97% rename from spatialstencil/syntax/common/tree_matching.py rename to spada/syntax/common/tree_matching.py index 43ce3d62..2e811afc 100644 --- a/spatialstencil/syntax/common/tree_matching.py +++ b/spada/syntax/common/tree_matching.py @@ -3,9 +3,9 @@ from dataclasses import dataclass from typing import TypeVar, Generic -from spatialstencil.syntax.common.basenode import BaseNode, Wildcard -from spatialstencil.syntax.common.match_tree import root_to_leaf_paths, TreeNode, Symbol, Index, Label, MatchingBaseNode -from spatialstencil.syntax.common.trie import TrieBuilder, TrieNode, Trie +from spada.syntax.common.basenode import BaseNode, Wildcard +from spada.syntax.common.match_tree import root_to_leaf_paths, TreeNode, Symbol, Index, Label, MatchingBaseNode +from spada.syntax.common.trie import TrieBuilder, TrieNode, Trie from collections import deque, defaultdict diff --git a/spatialstencil/syntax/common/trie.py b/spada/syntax/common/trie.py similarity index 100% rename from spatialstencil/syntax/common/trie.py rename to spada/syntax/common/trie.py diff --git a/spatialstencil/syntax/common/types.py b/spada/syntax/common/types.py similarity index 100% rename from spatialstencil/syntax/common/types.py rename to spada/syntax/common/types.py diff --git a/spatialstencil/syntax/common/visitor.py b/spada/syntax/common/visitor.py similarity index 99% rename from spatialstencil/syntax/common/visitor.py rename to spada/syntax/common/visitor.py index 5f86c712..2514f981 100644 --- a/spatialstencil/syntax/common/visitor.py +++ b/spada/syntax/common/visitor.py @@ -5,7 +5,7 @@ functionality such as IR language testing and dataclass support. """ from typing import Generic, TypeVar, Sequence -from spatialstencil.syntax.common.basenode import BaseNode +from spada.syntax.common.basenode import BaseNode # Create a generic type T that extends the base node type BaseNodeT = TypeVar('BaseNodeT', bound=BaseNode) diff --git a/spatialstencil/syntax/csl/__init__.py b/spada/syntax/csl/__init__.py similarity index 100% rename from spatialstencil/syntax/csl/__init__.py rename to spada/syntax/csl/__init__.py diff --git a/spatialstencil/syntax/csl/benchmarking.py b/spada/syntax/csl/benchmarking.py similarity index 98% rename from spatialstencil/syntax/csl/benchmarking.py rename to spada/syntax/csl/benchmarking.py index b28dae7e..3b19ea72 100644 --- a/spatialstencil/syntax/csl/benchmarking.py +++ b/spada/syntax/csl/benchmarking.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Iterator, Sequence -from spatialstencil.syntax.csl.codefile import CodeFile +from spada.syntax.csl.codefile import CodeFile _SYNC_ASSET_DIR = Path(__file__).resolve().parents[2] / "assets" / "csl" / "sync" diff --git a/spatialstencil/syntax/csl/codefile.py b/spada/syntax/csl/codefile.py similarity index 100% rename from spatialstencil/syntax/csl/codefile.py rename to spada/syntax/csl/codefile.py diff --git a/spatialstencil/syntax/csl/constants.py b/spada/syntax/csl/constants.py similarity index 100% rename from spatialstencil/syntax/csl/constants.py rename to spada/syntax/csl/constants.py diff --git a/spatialstencil/syntax/csl/dsd_ops.py b/spada/syntax/csl/dsd_ops.py similarity index 98% rename from spatialstencil/syntax/csl/dsd_ops.py rename to spada/syntax/csl/dsd_ops.py index fd168fb0..0123754c 100644 --- a/spatialstencil/syntax/csl/dsd_ops.py +++ b/spada/syntax/csl/dsd_ops.py @@ -4,8 +4,8 @@ import copy from dataclasses import dataclass from typing import Literal, Optional -from spatialstencil.syntax.spatial_ir import irnodes as spir -from spatialstencil.syntax.csl import structures as cslstruct +from spada.syntax.spatial_ir import irnodes as spir +from spada.syntax.csl import structures as cslstruct UniqueDSDDict = dict[str, list[tuple[str, cslstruct.DataStructureDescriptor]]] @@ -96,7 +96,7 @@ def _ident_or_const(expr: spir.SpatialNode) -> spir.Identifier | spir.ConstantLi def _dsd(dsds: UniqueDSDDict, expr: spir.SpatialNode, output: bool = False) -> str: - from spatialstencil.syntax.csl.statements import name_to_csl + from spada.syntax.csl.statements import name_to_csl if isinstance(expr, spir.Identifier): if expr.as_ir() not in dsds: return name_to_csl(expr) @@ -124,7 +124,7 @@ def _dsd(dsds: UniqueDSDDict, expr: spir.SpatialNode, output: bool = False) -> s def _dsd_object(dsds: UniqueDSDDict, expr: spir.SpatialNode, output: bool = False) -> str: - from spatialstencil.syntax.csl.statements import name_to_csl + from spada.syntax.csl.statements import name_to_csl if isinstance(expr, spir.Identifier): if expr.as_ir() not in dsds: return name_to_csl(expr) @@ -312,7 +312,7 @@ def _as_csl(self, statement: spir.AssignmentStatement | spir.SendStatement, raise TypeError(f"Unsupported types for cast operation: {src_dtype}, {dtype}") if self.scalar_input: - from spatialstencil.syntax.csl.statements import emit_expression + from spada.syntax.csl.statements import emit_expression if isinstance(statement, spir.SendStatement): # local_array may be an ArraySlice (e.g. a[k]) or a plain Identifier (e.g. x) src_expr = emit_expression(spir.Expression(statement.local_array), dsds, dtypes) diff --git a/spatialstencil/syntax/csl/preprocessing.py b/spada/syntax/csl/preprocessing.py similarity index 80% rename from spatialstencil/syntax/csl/preprocessing.py rename to spada/syntax/csl/preprocessing.py index 9c9a06a5..d2d97943 100644 --- a/spatialstencil/syntax/csl/preprocessing.py +++ b/spada/syntax/csl/preprocessing.py @@ -1,4 +1,4 @@ -from spatialstencil.syntax.spatial_ir.canonicalization import PEBlock +from spada.syntax.spatial_ir.canonicalization import PEBlock def preprocess_rectangle(rect: PEBlock): diff --git a/spatialstencil/syntax/csl/prune_unused_fields.py b/spada/syntax/csl/prune_unused_fields.py similarity index 82% rename from spatialstencil/syntax/csl/prune_unused_fields.py rename to spada/syntax/csl/prune_unused_fields.py index 9057ee6e..6e0ffbcc 100644 --- a/spatialstencil/syntax/csl/prune_unused_fields.py +++ b/spada/syntax/csl/prune_unused_fields.py @@ -2,10 +2,10 @@ Support module for copy elimination in the CSL codegen backend. Shares logic with the more general copy elimination pass, but specializes for DSD operations. """ -from spatialstencil.syntax.csl import dsd_ops -from spatialstencil.syntax.spatial_ir import irnodes as spir -from spatialstencil.syntax.spatial_ir.canonicalization import PEBlock -from spatialstencil.syntax.spatial_ir.copy_elimination import _FieldUseCollector +from spada.syntax.csl import dsd_ops +from spada.syntax.spatial_ir import irnodes as spir +from spada.syntax.spatial_ir.canonicalization import PEBlock +from spada.syntax.spatial_ir.copy_elimination import _FieldUseCollector def _effective_statement_for_csl_codegen( diff --git a/spatialstencil/syntax/csl/statements.py b/spada/syntax/csl/statements.py similarity index 99% rename from spatialstencil/syntax/csl/statements.py rename to spada/syntax/csl/statements.py index e49d4902..f2eccc0e 100644 --- a/spatialstencil/syntax/csl/statements.py +++ b/spada/syntax/csl/statements.py @@ -1,8 +1,8 @@ from io import StringIO from typing import Optional -from spatialstencil.syntax.csl.structures import DataStructureDescriptor -from spatialstencil.syntax.csl import dsd_ops -from spatialstencil.syntax.spatial_ir import irnodes as spir +from spada.syntax.csl.structures import DataStructureDescriptor +from spada.syntax.csl import dsd_ops +from spada.syntax.spatial_ir import irnodes as spir UniqueDSDDict = dict[str, list[tuple[str, DataStructureDescriptor]]] diff --git a/spatialstencil/syntax/csl/structures.py b/spada/syntax/csl/structures.py similarity index 100% rename from spatialstencil/syntax/csl/structures.py rename to spada/syntax/csl/structures.py diff --git a/spatialstencil/syntax/csl/task_recycling.py b/spada/syntax/csl/task_recycling.py similarity index 99% rename from spatialstencil/syntax/csl/task_recycling.py rename to spada/syntax/csl/task_recycling.py index 66b74013..96dc6d78 100644 --- a/spatialstencil/syntax/csl/task_recycling.py +++ b/spada/syntax/csl/task_recycling.py @@ -1,7 +1,7 @@ """ This module plans how logical CSL local tasks can share a smaller set of hardware local-task IDs when the program contains more local tasks than the -target architecture exposes in :mod:`spatialstencil.syntax.csl.constants`. +target architecture exposes in :mod:`spada.syntax.csl.constants`. Terminology ----------- @@ -144,8 +144,8 @@ import heapq from typing import Iterable -from spatialstencil.syntax.csl import constants -from spatialstencil.syntax.csl import tasks as tdag +from spada.syntax.csl import constants +from spada.syntax.csl import tasks as tdag @dataclass(frozen=True) diff --git a/spatialstencil/syntax/csl/tasks.py b/spada/syntax/csl/tasks.py similarity index 97% rename from spatialstencil/syntax/csl/tasks.py rename to spada/syntax/csl/tasks.py index 6d9fecc1..838ae46b 100644 --- a/spatialstencil/syntax/csl/tasks.py +++ b/spada/syntax/csl/tasks.py @@ -7,8 +7,8 @@ from enum import Enum, auto import networkx as nx # TODO: Switch to igraph from typing import Any, Literal, Optional -from spatialstencil.syntax.spatial_ir import irnodes as spir, analysis -from spatialstencil.syntax.csl import constants, dsd_ops, structures as cslstruct +from spada.syntax.spatial_ir import irnodes as spir, analysis +from spada.syntax.csl import constants, dsd_ops, structures as cslstruct UniqueDSDDict = dict[str, list[tuple[str, cslstruct.DataStructureDescriptor]]] diff --git a/spatialstencil/syntax/gt4py/__init__.py b/spada/syntax/gt4py/__init__.py similarity index 100% rename from spatialstencil/syntax/gt4py/__init__.py rename to spada/syntax/gt4py/__init__.py diff --git a/spatialstencil/syntax/gt4py/astnodes.py b/spada/syntax/gt4py/astnodes.py similarity index 97% rename from spatialstencil/syntax/gt4py/astnodes.py rename to spada/syntax/gt4py/astnodes.py index 809312c2..bf9cc679 100644 --- a/spatialstencil/syntax/gt4py/astnodes.py +++ b/spada/syntax/gt4py/astnodes.py @@ -6,8 +6,8 @@ import enum from dataclasses import dataclass -from spatialstencil.syntax.common.basenode import BaseNode -from spatialstencil.syntax.common import visitor +from spada.syntax.common.basenode import BaseNode +from spada.syntax.common import visitor class ComputationType(enum.Enum): diff --git a/spatialstencil/syntax/gt4py/parser.py b/spada/syntax/gt4py/parser.py similarity index 93% rename from spatialstencil/syntax/gt4py/parser.py rename to spada/syntax/gt4py/parser.py index 53c41584..f78c6abb 100644 --- a/spatialstencil/syntax/gt4py/parser.py +++ b/spada/syntax/gt4py/parser.py @@ -1,7 +1,7 @@ import ast import sys from typing import TextIO -from spatialstencil.syntax.gt4py.astnodes import * +from spada.syntax.gt4py.astnodes import * class GTVisitor(ast.NodeVisitor): @@ -105,10 +105,10 @@ def parse_function(func: ast.FunctionDef) -> GTProgram: def parse_string(code: str) -> dict[str, GTree]: """ - Parses a string representing a spatial stencil program, returning the + Parses a string representing a SpaDA program, returning the top-level program AST node. - :param code: A code string in spatial stencil format. + :param code: A code string in SpaDA format. :return: A Program node representing the root of the AST. """ module = ast.parse(code) @@ -123,7 +123,7 @@ def parse_string(code: str) -> dict[str, GTree]: def parse_file(file_or_filename: TextIO | str) -> dict[str, ast.FunctionDef]: """ - Parses a file representing a spatial stencil program, returning the + Parses a file representing a SpaDA program, returning the top-level program AST node. :param file_or_filename: A file path or handle to an open file to read. @@ -137,7 +137,7 @@ def parse_file(file_or_filename: TextIO | str) -> dict[str, ast.FunctionDef]: if __name__ == '__main__': if len(sys.argv) not in (2, 3): - print('USAGE: python -m spatialstencil.syntax.gt4py.parser [FUNCTION NAME]') + print('USAGE: python -m spada.syntax.gt4py.parser [FUNCTION NAME]') exit(1) out = parse_file(sys.argv[1]) diff --git a/spatialstencil/syntax/spatial_ir/__init__.py b/spada/syntax/spatial_ir/__init__.py similarity index 100% rename from spatialstencil/syntax/spatial_ir/__init__.py rename to spada/syntax/spatial_ir/__init__.py diff --git a/spatialstencil/syntax/spatial_ir/analysis.py b/spada/syntax/spatial_ir/analysis.py similarity index 99% rename from spatialstencil/syntax/spatial_ir/analysis.py rename to spada/syntax/spatial_ir/analysis.py index 7ca6d695..b1b093cd 100644 --- a/spatialstencil/syntax/spatial_ir/analysis.py +++ b/spada/syntax/spatial_ir/analysis.py @@ -2,11 +2,11 @@ Contains analysis functions for Spatial IR, such as statement dependency analysis. """ from collections import defaultdict -from spatialstencil.syntax.spatial_ir import irnodes as spir +from spada.syntax.spatial_ir import irnodes as spir from dataclasses import dataclass from typing import Literal import networkx as nx # TODO: Switch to igraph -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle +from spada.syntax.spatial_ir.grid_geometry import Rectangle @dataclass(frozen=True) diff --git a/spatialstencil/syntax/spatial_ir/canonical_subgrids.py b/spada/syntax/spatial_ir/canonical_subgrids.py similarity index 93% rename from spatialstencil/syntax/spatial_ir/canonical_subgrids.py rename to spada/syntax/spatial_ir/canonical_subgrids.py index af05eb53..9f1027ad 100644 --- a/spatialstencil/syntax/spatial_ir/canonical_subgrids.py +++ b/spada/syntax/spatial_ir/canonical_subgrids.py @@ -1,9 +1,9 @@ import copy -from spatialstencil.syntax.spatial_ir.grid_geometry import split_rectangles -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, SubgridExpression, DataflowBlock, PlaceBlock, ComputeBlock, \ +from spada.syntax.spatial_ir.grid_geometry import split_rectangles +from spada.syntax.spatial_ir.irnodes import Kernel, SubgridExpression, DataflowBlock, PlaceBlock, ComputeBlock, \ Phase -import spatialstencil.syntax.spatial_ir.irnodes as spa +import spada.syntax.spatial_ir.irnodes as spa def fill_compute_rectangle(kernel: spa.Kernel, block_variable_type: spa.ScalarType = spa.ScalarType.u16) -> spa.Kernel: diff --git a/spatialstencil/syntax/spatial_ir/canonicalization.py b/spada/syntax/spatial_ir/canonicalization.py similarity index 99% rename from spatialstencil/syntax/spatial_ir/canonicalization.py rename to spada/syntax/spatial_ir/canonicalization.py index ab3133ff..998a88b7 100644 --- a/spatialstencil/syntax/spatial_ir/canonicalization.py +++ b/spada/syntax/spatial_ir/canonicalization.py @@ -5,8 +5,8 @@ import copy from dataclasses import dataclass from itertools import product -from spatialstencil.syntax.spatial_ir import irnodes as spir, analysis, passes -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle +from spada.syntax.spatial_ir import irnodes as spir, analysis, passes +from spada.syntax.spatial_ir.grid_geometry import Rectangle def inline_metaprogramming(kernel: spir.Kernel) -> spir.Kernel: @@ -641,7 +641,7 @@ def __init__(self, dtypes: dict[spir.Identifier, spir.IRType]): self.dtypes = dtypes def visit_ForeachStatement(self, node: spir.ForeachStatement): - from spatialstencil.syntax.csl import dsd_ops + from spada.syntax.csl import dsd_ops if dsd_ops.get_dsd_op(self.dtypes, node) is not None: return self.generic_visit(node) diff --git a/spatialstencil/syntax/spatial_ir/copy_elimination.py b/spada/syntax/spatial_ir/copy_elimination.py similarity index 99% rename from spatialstencil/syntax/spatial_ir/copy_elimination.py rename to spada/syntax/spatial_ir/copy_elimination.py index ce1fc97b..07da896d 100644 --- a/spatialstencil/syntax/spatial_ir/copy_elimination.py +++ b/spada/syntax/spatial_ir/copy_elimination.py @@ -21,8 +21,8 @@ from collections import defaultdict from dataclasses import dataclass -from spatialstencil.syntax.spatial_ir import irnodes as spir, passes -from spatialstencil.syntax.spatial_ir.canonicalization import PEBlock, Rectangle +from spada.syntax.spatial_ir import irnodes as spir, passes +from spada.syntax.spatial_ir.canonicalization import PEBlock, Rectangle @dataclass(frozen=True) diff --git a/spatialstencil/syntax/spatial_ir/grid_geometry.py b/spada/syntax/spatial_ir/grid_geometry.py similarity index 100% rename from spatialstencil/syntax/spatial_ir/grid_geometry.py rename to spada/syntax/spatial_ir/grid_geometry.py diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spada/syntax/spatial_ir/irnodes.py similarity index 99% rename from spatialstencil/syntax/spatial_ir/irnodes.py rename to spada/syntax/spatial_ir/irnodes.py index b6075fbe..28d029ef 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spada/syntax/spatial_ir/irnodes.py @@ -3,10 +3,10 @@ import copy from dataclasses import dataclass, field from typing import Union, Tuple, Optional, Literal -from spatialstencil.syntax.common import visitor -from spatialstencil.syntax.common.basenode import BaseNode -from spatialstencil.syntax.common.types import ScalarType, IRType -from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle +from spada.syntax.common import visitor +from spada.syntax.common.basenode import BaseNode +from spada.syntax.common.types import ScalarType, IRType +from spada.syntax.spatial_ir.grid_geometry import Rectangle @dataclass diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spada/syntax/spatial_ir/language.lark similarity index 100% rename from spatialstencil/syntax/spatial_ir/language.lark rename to spada/syntax/spatial_ir/language.lark diff --git a/spatialstencil/syntax/spatial_ir/lark_to_ir.py b/spada/syntax/spatial_ir/lark_to_ir.py similarity index 98% rename from spatialstencil/syntax/spatial_ir/lark_to_ir.py rename to spada/syntax/spatial_ir/lark_to_ir.py index 40a482d7..da877497 100644 --- a/spatialstencil/syntax/spatial_ir/lark_to_ir.py +++ b/spada/syntax/spatial_ir/lark_to_ir.py @@ -1,8 +1,8 @@ import lark -from spatialstencil.syntax.common.types import ScalarType -from spatialstencil.syntax.spatial_ir import irnodes -from spatialstencil.syntax.spatial_ir.irnodes import StreamType, Identifier +from spada.syntax.common.types import ScalarType +from spada.syntax.spatial_ir import irnodes +from spada.syntax.spatial_ir.irnodes import StreamType, Identifier class TreeToSpatialIR(lark.Transformer): diff --git a/spatialstencil/syntax/spatial_ir/parser.py b/spada/syntax/spatial_ir/parser.py similarity index 87% rename from spatialstencil/syntax/spatial_ir/parser.py rename to spada/syntax/spatial_ir/parser.py index cd201cff..c566c286 100644 --- a/spatialstencil/syntax/spatial_ir/parser.py +++ b/spada/syntax/spatial_ir/parser.py @@ -3,8 +3,8 @@ import sys from typing import TextIO -from spatialstencil.syntax.spatial_ir import irnodes -from spatialstencil.syntax.spatial_ir import lark_to_ir +from spada.syntax.spatial_ir import irnodes +from spada.syntax.spatial_ir import lark_to_ir class Parser: @@ -29,7 +29,7 @@ def parse(self, code: str, name: str = None) -> irnodes.Kernel: Parses a string representing a spatial IR kernel, returning the top-level kernel IR node. - :param code: A code string in spatial stencil format. + :param code: A code string in SpaDA format. :param name: An optional name for the file, used for error messages. :return: A Kernel node representing the root of the spatial IR. """ @@ -44,7 +44,7 @@ def parse_string(code: str, name: str = None) -> irnodes.Kernel: Parses a string representing a spatial IR kernel, returning the top-level kernel IR node. - :param code: A code string in spatial stencil format. + :param code: A code string in SpaDA format. :param name: An optional name for the file, used for error messages. :return: A Kernel node representing the root of the spatial IR. """ @@ -68,7 +68,7 @@ def parse_file(file_or_filename: TextIO | str) -> irnodes.Kernel: if __name__ == '__main__': if len(sys.argv) != 2: - print('USAGE: python -m spatialstencil.syntax.spatial_ir.parser ') + print('USAGE: python -m spada.syntax.spatial_ir.parser ') exit(1) out = parse_file(sys.argv[1]) diff --git a/spatialstencil/syntax/spatial_ir/passes.py b/spada/syntax/spatial_ir/passes.py similarity index 98% rename from spatialstencil/syntax/spatial_ir/passes.py rename to spada/syntax/spatial_ir/passes.py index 98f2c3cb..243fe46f 100644 --- a/spatialstencil/syntax/spatial_ir/passes.py +++ b/spada/syntax/spatial_ir/passes.py @@ -4,8 +4,8 @@ from collections.abc import Callable from dataclasses import dataclass, field, replace -from spatialstencil.syntax.spatial_ir import irnodes as spa -from spatialstencil.syntax.stencil_ir.type_inference import _result_type_of +from spada.syntax.spatial_ir import irnodes as spa +from spada.syntax.stencil_ir.type_inference import _result_type_of class Concretizer(spa.NodeTransformer): diff --git a/spatialstencil/syntax/stencil_ir/analysis.py b/spada/syntax/stencil_ir/analysis.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/analysis.py rename to spada/syntax/stencil_ir/analysis.py index c9a24905..75ffba72 100644 --- a/spatialstencil/syntax/stencil_ir/analysis.py +++ b/spada/syntax/stencil_ir/analysis.py @@ -2,7 +2,7 @@ Analysis passes on the Stencil IR. """ from collections import defaultdict -from spatialstencil.syntax.stencil_ir import irnodes as sast +from spada.syntax.stencil_ir import irnodes as sast from typing import Literal diff --git a/spatialstencil/syntax/stencil_ir/canonicalization.py b/spada/syntax/stencil_ir/canonicalization.py similarity index 93% rename from spatialstencil/syntax/stencil_ir/canonicalization.py rename to spada/syntax/stencil_ir/canonicalization.py index f076fe30..798c0225 100644 --- a/spatialstencil/syntax/stencil_ir/canonicalization.py +++ b/spada/syntax/stencil_ir/canonicalization.py @@ -5,8 +5,8 @@ from collections import defaultdict from typing import Literal -from spatialstencil.syntax.stencil_ir import irnodes as sast -from spatialstencil.syntax.stencil_ir import type_inference +from spada.syntax.stencil_ir import irnodes as sast +from spada.syntax.stencil_ir import type_inference def canonicalize(program: sast.Program) -> sast.Program: diff --git a/spatialstencil/syntax/stencil_ir/canonicalize_expression.py b/spada/syntax/stencil_ir/canonicalize_expression.py similarity index 95% rename from spatialstencil/syntax/stencil_ir/canonicalize_expression.py rename to spada/syntax/stencil_ir/canonicalize_expression.py index b3d1fad2..0b8f6938 100644 --- a/spatialstencil/syntax/stencil_ir/canonicalize_expression.py +++ b/spada/syntax/stencil_ir/canonicalize_expression.py @@ -10,11 +10,11 @@ The modifications are done in-place on the IR nodes. """ -from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.basenode import Wildcard -from spatialstencil.syntax.common.tree_matching import PatternTransformer -from spatialstencil.syntax.stencil_ir.domain_collector import DomainCollector -from spatialstencil.syntax.stencil_ir.irnodes import * +from spada.lowering.versioning import Versioning +from spada.syntax.common.basenode import Wildcard +from spada.syntax.common.tree_matching import PatternTransformer +from spada.syntax.stencil_ir.domain_collector import DomainCollector +from spada.syntax.stencil_ir.irnodes import * class CanonicalizeExpression(NodeVisitor): diff --git a/spatialstencil/syntax/stencil_ir/def_use_analysis.py b/spada/syntax/stencil_ir/def_use_analysis.py similarity index 97% rename from spatialstencil/syntax/stencil_ir/def_use_analysis.py rename to spada/syntax/stencil_ir/def_use_analysis.py index 0d10720f..195452ba 100644 --- a/spatialstencil/syntax/stencil_ir/def_use_analysis.py +++ b/spada/syntax/stencil_ir/def_use_analysis.py @@ -1,8 +1,8 @@ from dataclasses import dataclass from typing import Sequence -import spatialstencil.syntax.stencil_ir.irnodes as sast -from spatialstencil.syntax.stencil_ir.irnodes import ComputationBlock +import spada.syntax.stencil_ir.irnodes as sast +from spada.syntax.stencil_ir.irnodes import ComputationBlock @dataclass diff --git a/spatialstencil/syntax/stencil_ir/domain_collector.py b/spada/syntax/stencil_ir/domain_collector.py similarity index 97% rename from spatialstencil/syntax/stencil_ir/domain_collector.py rename to spada/syntax/stencil_ir/domain_collector.py index f20639a2..9da56905 100644 --- a/spatialstencil/syntax/stencil_ir/domain_collector.py +++ b/spada/syntax/stencil_ir/domain_collector.py @@ -1,7 +1,7 @@ from dataclasses import dataclass -import spatialstencil.syntax.stencil_ir.irnodes as sast -from spatialstencil.syntax.stencil_ir.irnodes import ComputationBlock, Program +import spada.syntax.stencil_ir.irnodes as sast +from spada.syntax.stencil_ir.irnodes import ComputationBlock, Program @dataclass diff --git a/spatialstencil/syntax/stencil_ir/domain_inference.py b/spada/syntax/stencil_ir/domain_inference.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/domain_inference.py rename to spada/syntax/stencil_ir/domain_inference.py index b492a9ae..5ae04b6c 100644 --- a/spatialstencil/syntax/stencil_ir/domain_inference.py +++ b/spada/syntax/stencil_ir/domain_inference.py @@ -2,11 +2,11 @@ import warnings from typing import Sequence, Collection -import spatialstencil.syntax.stencil_ir.irnodes as sast +import spada.syntax.stencil_ir.irnodes as sast import copy -from spatialstencil.syntax.stencil_ir import def_use_analysis -from spatialstencil.syntax.stencil_ir.def_use_analysis import ScopedUse, ScopedDefinition +from spada.syntax.stencil_ir import def_use_analysis +from spada.syntax.stencil_ir.def_use_analysis import ScopedUse, ScopedDefinition def infer_field_domains(program: sast.Program, diff --git a/spatialstencil/syntax/stencil_ir/extent_inference.py b/spada/syntax/stencil_ir/extent_inference.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/extent_inference.py rename to spada/syntax/stencil_ir/extent_inference.py index cbe5c0a6..31a8be8b 100644 --- a/spatialstencil/syntax/stencil_ir/extent_inference.py +++ b/spada/syntax/stencil_ir/extent_inference.py @@ -2,8 +2,8 @@ from collections import defaultdict from typing import Sequence, Collection -import spatialstencil.syntax.stencil_ir.irnodes as sast -import spatialstencil.syntax.stencil_ir.def_use_analysis as def_use_analysis +import spada.syntax.stencil_ir.irnodes as sast +import spada.syntax.stencil_ir.def_use_analysis as def_use_analysis def infer_field_extents(program: sast.Program): """ diff --git a/spatialstencil/syntax/stencil_ir/flop_counter.py b/spada/syntax/stencil_ir/flop_counter.py similarity index 96% rename from spatialstencil/syntax/stencil_ir/flop_counter.py rename to spada/syntax/stencil_ir/flop_counter.py index 5bd4ad8f..27a2995c 100644 --- a/spatialstencil/syntax/stencil_ir/flop_counter.py +++ b/spada/syntax/stencil_ir/flop_counter.py @@ -1,10 +1,10 @@ """ -FLOP Counter for spatial stencil IR computations. +FLOP Counter for SpaDA IR computations. This visitor counts the total number of floating-point operations (FLOPs) in a stencil computation by analyzing statements and their execution domains. """ -from spatialstencil.syntax.stencil_ir.irnodes import (FieldType, NodeVisitor, Expression, Identifier, Subscript, +from spada.syntax.stencil_ir.irnodes import (FieldType, NodeVisitor, Expression, Identifier, Subscript, UnaryOperator, BinaryOperator, TernaryOperator, MathCall, StatementBlock, AssignOp, ReturnOp, ViewType, Cartesian, Program) @@ -12,7 +12,7 @@ class FLOPCounter(NodeVisitor): """ - A visitor that counts FLOPs in a spatial stencil computation. + A visitor that counts FLOPs in a SpaDA computation. The count is calculated as: FLOPs = operations_per_statement × output_domain_size × num_output_extents diff --git a/spatialstencil/syntax/stencil_ir/irnodes.py b/spada/syntax/stencil_ir/irnodes.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/irnodes.py rename to spada/syntax/stencil_ir/irnodes.py index 444a3578..6d2d1b70 100644 --- a/spatialstencil/syntax/stencil_ir/irnodes.py +++ b/spada/syntax/stencil_ir/irnodes.py @@ -1,13 +1,13 @@ """ -Native class definitions for the spatial stencil Intermediate Representation (IR). +Native class definitions for the SpaDA Intermediate Representation (IR). """ from dataclasses import dataclass, field import enum from typing import Literal, Sequence -from spatialstencil.syntax.common.basenode import BaseNode -from spatialstencil.syntax.common import visitor -from spatialstencil.syntax.common.types import IRType, ScalarType +from spada.syntax.common.basenode import BaseNode +from spada.syntax.common import visitor +from spada.syntax.common.types import IRType, ScalarType class ComputationType(enum.Enum): @@ -19,7 +19,7 @@ class ComputationType(enum.Enum): class Node(BaseNode): """ - Abstract class representing an IR node for spatial stencils. + Abstract class representing an IR node for SpaDA. """ @classmethod diff --git a/spatialstencil/syntax/stencil_ir/language.lark b/spada/syntax/stencil_ir/language.lark similarity index 100% rename from spatialstencil/syntax/stencil_ir/language.lark rename to spada/syntax/stencil_ir/language.lark diff --git a/spatialstencil/syntax/stencil_ir/lark_to_ast.py b/spada/syntax/stencil_ir/lark_to_ast.py similarity index 99% rename from spatialstencil/syntax/stencil_ir/lark_to_ast.py rename to spada/syntax/stencil_ir/lark_to_ast.py index 53955cfe..5cb8a6d2 100644 --- a/spatialstencil/syntax/stencil_ir/lark_to_ast.py +++ b/spada/syntax/stencil_ir/lark_to_ast.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import lark -from spatialstencil.syntax.stencil_ir import irnodes +from spada.syntax.stencil_ir import irnodes class TreeToAST(lark.Transformer): diff --git a/spatialstencil/syntax/stencil_ir/parser.py b/spada/syntax/stencil_ir/parser.py similarity index 72% rename from spatialstencil/syntax/stencil_ir/parser.py rename to spada/syntax/stencil_ir/parser.py index 82fa8523..bef1ab42 100644 --- a/spatialstencil/syntax/stencil_ir/parser.py +++ b/spada/syntax/stencil_ir/parser.py @@ -3,13 +3,13 @@ import sys from typing import TextIO -from spatialstencil.syntax.stencil_ir import irnodes -from spatialstencil.syntax.stencil_ir import lark_to_ast +from spada.syntax.stencil_ir import irnodes +from spada.syntax.stencil_ir import lark_to_ast class Parser: """ - A spatial stencil language parser. Parses multiple strings faster than + A SpaDA language parser. Parses multiple strings faster than calling ``parser.parse_string`` multiple times. """ @@ -26,10 +26,10 @@ def __init__(self) -> None: def parse(self, code: str) -> irnodes.Program: """ - Parses a string representing a spatial stencil program, returning the + Parses a string representing a SpaDA program, returning the top-level program AST node. - :param code: A code string in spatial stencil format. + :param code: A code string in SpaDA format. :return: A Program node representing the root of the AST. """ tree = self.parser.parse(code) @@ -39,10 +39,10 @@ def parse(self, code: str) -> irnodes.Program: def parse_string(code: str) -> irnodes.Program: """ - Parses a string representing a spatial stencil program, returning the + Parses a string representing a SpaDA program, returning the top-level program AST node. - :param code: A code string in spatial stencil format. + :param code: A code string in SpaDA format. :return: A Program node representing the root of the AST. """ parser = Parser() @@ -51,7 +51,7 @@ def parse_string(code: str) -> irnodes.Program: def parse_file(file_or_filename: TextIO | str) -> irnodes.Program: """ - Parses a file representing a spatial stencil program, returning the + Parses a file representing a SpaDA program, returning the top-level program AST node. :param file_or_filename: A file path or handle to an open file to read. @@ -65,7 +65,7 @@ def parse_file(file_or_filename: TextIO | str) -> irnodes.Program: if __name__ == '__main__': if len(sys.argv) != 2: - print('USAGE: python -m spatialstencil.syntax.stencil_ir.parser ') + print('USAGE: python -m spada.syntax.stencil_ir.parser ') exit(1) out = parse_file(sys.argv[1]) diff --git a/spatialstencil/syntax/stencil_ir/refactor_forward_backward_stencils.py b/spada/syntax/stencil_ir/refactor_forward_backward_stencils.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/refactor_forward_backward_stencils.py rename to spada/syntax/stencil_ir/refactor_forward_backward_stencils.py index c1e18138..adf81d1d 100644 --- a/spatialstencil/syntax/stencil_ir/refactor_forward_backward_stencils.py +++ b/spada/syntax/stencil_ir/refactor_forward_backward_stencils.py @@ -1,6 +1,6 @@ import copy from collections import defaultdict -from spatialstencil.syntax.stencil_ir.irnodes import * +from spada.syntax.stencil_ir.irnodes import * class RefactorForwardBackwardStencils(ScopedNodeVisitor): diff --git a/spatialstencil/syntax/stencil_ir/ssa.py b/spada/syntax/stencil_ir/ssa.py similarity index 96% rename from spatialstencil/syntax/stencil_ir/ssa.py rename to spada/syntax/stencil_ir/ssa.py index fb005701..1f36cb36 100644 --- a/spatialstencil/syntax/stencil_ir/ssa.py +++ b/spada/syntax/stencil_ir/ssa.py @@ -2,8 +2,8 @@ from dataclasses import dataclass from typing import Mapping -import spatialstencil.syntax.stencil_ir.irnodes as sast -from spatialstencil.syntax.stencil_ir.irnodes import ComputationBlock, Program +import spada.syntax.stencil_ir.irnodes as sast +from spada.syntax.stencil_ir.irnodes import ComputationBlock, Program class SSAVisitor(sast.ScopedNodeVisitor): diff --git a/spatialstencil/syntax/stencil_ir/type_inference.py b/spada/syntax/stencil_ir/type_inference.py similarity index 98% rename from spatialstencil/syntax/stencil_ir/type_inference.py rename to spada/syntax/stencil_ir/type_inference.py index 9d534692..7cb30f12 100644 --- a/spatialstencil/syntax/stencil_ir/type_inference.py +++ b/spada/syntax/stencil_ir/type_inference.py @@ -3,12 +3,12 @@ """ import copy -from spatialstencil.syntax.common import types -from spatialstencil.syntax.stencil_ir import irnodes as sast -from spatialstencil.syntax.stencil_ir import analysis +from spada.syntax.common import types +from spada.syntax.stencil_ir import irnodes as sast +from spada.syntax.stencil_ir import analysis -from spatialstencil.syntax.stencil_ir.domain_inference import infer_field_domains -from spatialstencil.syntax.stencil_ir.extent_inference import infer_field_extents +from spada.syntax.stencil_ir.domain_inference import infer_field_domains +from spada.syntax.stencil_ir.extent_inference import infer_field_extents def infer_types(program: sast.Program, diff --git a/spatialstencil/assets/csl/sync/README.md b/spatialstencil/assets/csl/sync/README.md deleted file mode 100644 index 03675e86..00000000 --- a/spatialstencil/assets/csl/sync/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# CSL PE Clock Synchronization Utility -The files in this folder were copied from the Cerebras SDK 1.4.0 [bandwidth-test benchmark](https://github.com/Cerebras/sdk-examples/tree/rel-sdk-1.4.0/benchmarks/bandwidth-test/src/sync). -They provide functionality for synchronizing the clocks of all PEs to measure -communication operations (e.g., collectives) correctly. Use with `--sync-benchmarking`. diff --git a/spatialstencil/assets/csl/sync/layout.csl b/spatialstencil/assets/csl/sync/layout.csl deleted file mode 100644 index 2ce43c76..00000000 --- a/spatialstencil/assets/csl/sync/layout.csl +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2025 Cerebras Systems. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -param colors:[5]color; -param entrypoints:[4]local_task_id; -param width : i16 ; // width of the core -param height: i16 ; // height of the core - -const C0 : color = colors[0]; -const C1 : color = colors[1]; -const C2 : color = colors[2]; -const C3 : color = colors[3]; -const C4 : color = colors[4]; - -const STARTUP: local_task_id = entrypoints[0]; -const SYNC_Y: local_task_id = entrypoints[1]; -const SYNC_BCAST: local_task_id = entrypoints[2]; -const EXIT: local_task_id = entrypoints[3]; - -fn get_params(px:i16, py:i16) comptime_struct { - - var first_py: bool = (0 == py); - var last_py: bool = ((height-1) == py); - var is_py_even: bool = (0 == (py % 2)); - - var first_px: bool = (0 == px); - var last_px: bool = ((width-1) == px); - var is_px_even: bool = (0 == (px % 2)); - - var c_recv_px: color = C0; - var c_send_px: color = C1; - if (is_px_even){ - c_recv_px = C0; - c_send_px = C1; - }else{ - c_recv_px = C1; - c_send_px = C0; - } - - var c_recv_py: color = C2; - var c_send_py: color = C3; - if (is_py_even){ - c_recv_py = C2; - c_send_py = C3; - }else{ - c_recv_py = C3; - c_send_py = C2; - } - - return .{ - .c_recv_px = c_recv_px, - .c_send_px = c_send_px, - .c_recv_py = c_recv_py, - .c_send_py = c_send_py, - .c_bcast = C4, - - .STARTUP = STARTUP, - .SYNC_Y = SYNC_Y, - .SYNC_BCAST = SYNC_BCAST, - .EXIT = EXIT, - - .first_px = first_px, - .last_px = last_px, - .first_py = first_py, - .last_py = last_py, - }; -} diff --git a/spatialstencil/assets/csl/sync/pe.csl b/spatialstencil/assets/csl/sync/pe.csl deleted file mode 100644 index 50fb29ef..00000000 --- a/spatialstencil/assets/csl/sync/pe.csl +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright 2025 Cerebras Systems. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -param c_recv_px: color; -param c_send_px: color; -param c_recv_py: color; -param c_send_py: color; -param c_bcast: color; - -param STARTUP: local_task_id; -param SYNC_Y: local_task_id; -param SYNC_BCAST: local_task_id; -param EXIT: local_task_id; - -param first_px: bool; -param last_px: bool; -param first_py: bool; -param last_py: bool; - -// f_callback = sys_mod.unblock_cmd_stream, to continue next command -param f_callback : fn ()void; - -// input_queues={2,3,4} -// output_queues={2,3,4} -param input_queues:[3]u16; -param output_queues:[3]u16; - -const c_recv_px_iq = @get_input_queue(input_queues[0]); -const c_send_px_oq = @get_output_queue(output_queues[0]); - -const c_recv_py_iq = @get_input_queue(input_queues[1]); -const c_send_py_oq = @get_output_queue(output_queues[1]); - -const c_bcast_iq = @get_input_queue(input_queues[2]); -const c_bcast_oq = @get_output_queue(input_queues[2]); - -const timestamp = @import_module("