diff --git a/.gitignore b/.gitignore
index 8fb87927ce..3327583d32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ sdist/
*.egg-info/
vivado_prj
.vscode
+.idea
my-hls-test
*.tar.gz
docs/_build
diff --git a/docs/backend/xls.rst b/docs/backend/xls.rst
new file mode 100644
index 0000000000..5b6b2ae4ae
--- /dev/null
+++ b/docs/backend/xls.rst
@@ -0,0 +1,47 @@
+============
+XLS
+============
+
+The XLS backend can convert hls4ml models into SystemVerilog via `Google XLS `_, which can be converted to IP via **Vivado**.
+
+To enable XLS:
+.. code-block:: bash
+
+ pip install hls4ml[xls]
+
+hls4ml uses `pyxls `_ package to access XLS API.
+pyxls comes with batteries included, and a separate XLS installation is not required.
+
+Workflow
+=========================
+
+XLS backend preforms the following transformations:
+.. code-block::
+
+ hls4ml representation -> DSLX (.x) -> XLS IR (.ir) -> Optimized XLS IR (.opt.ir) -> SystemVerilog (.sv) -> IP
+
+`DSLX `_ is a DSL with Rust-like syntax.
+DSLX project generated by hls4ml in ``/firmware`` contains the main module ``.x``, layer modules ``layer_.x``, and helper modules in ``ap_types/`` and ``nnet_utils/``.
+You may work with this project either through hls4ml or using your own XLS toolchain.
+
+hls4ml calls XLS compiler to convert DSLX into `XLS IR `_ format (``.ir``) and then runs IR optimization passes (``.opt.ir``).
+
+Then, hls4ml uses `XLS Codegen `_ to generate SystemVerilog (``.sv``) from IR, and calls **Vivado** for ``/build_prj.tcl`` to generate IP.
+
+You can override default codegen options:
+.. code-block:: python
+
+ config = hls4ml.utils.config_from_keras_model(model)
+ # This sets hls_model.config['XLSCodegenFlags']
+ hls_model = hls4ml.converters.convert_from_keras_model(
+ model, hls_config=config, backend='XLS',
+ xls_codegen_flags={'delay_model': 'asap7', 'generator': 'pipeline', 'use_system_verilog': False}
+ )
+
+I/O Types and Strategy
+=========================
+
+Currently, only ``io_parallel`` is supported. ``Strategy`` is ignored.
+All operations are fully unrolled.
+
+XLS supports only signed ``FixedPoint`` type (similar to ``ap_fixed``).
diff --git a/docs/index.rst b/docs/index.rst
index f170ca6858..2a4f60b733 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,6 +39,7 @@
backend/catapult
backend/quartus
backend/sr
+ backend/xls
.. toctree::
:hidden:
diff --git a/docs/intro/setup.rst b/docs/intro/setup.rst
index 4e3d192fcf..fd36c959b9 100644
--- a/docs/intro/setup.rst
+++ b/docs/intro/setup.rst
@@ -203,6 +203,9 @@ Optional Dependencies
# For symbolic regression
pip install hls4ml[sr]
+ # For XLS backend
+ pip install hls4ml[xls]
+
# For documentation building (developers)
pip install hls4ml[doc]
diff --git a/docs/intro/status.rst b/docs/intro/status.rst
index 7526c3bec4..3245b73540 100644
--- a/docs/intro/status.rst
+++ b/docs/intro/status.rst
@@ -47,6 +47,7 @@ HLS backends:
* Vitis HLS
* Catapult HLS
* oneAPI (experimental)
+* XLS (experimental)
A summary of the on-going status of the ``hls4ml`` tool is in the table below.
@@ -79,6 +80,8 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below.
+-----------------------+-----+-----+--------------+--------+--------+-----+
| oneAPI (experimental) | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ |
+-----------------------+-----+-----+--------------+--------+--------+-----+
+| XLS (experimental) | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
++-----------------------+-----+-----+--------------+--------+--------+-----+
Other feature notes:
diff --git a/docs/ir/attributes.rst b/docs/ir/attributes.rst
index dfbec51b1c..8fecef73aa 100644
--- a/docs/ir/attributes.rst
+++ b/docs/ir/attributes.rst
@@ -87,19 +87,19 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
ParametrizedActivation
======================
@@ -143,19 +143,19 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
PReLU
=====
@@ -203,19 +203,19 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Softmax
=======
@@ -251,43 +251,59 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* n_outer: int (Default: 1)
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* n_inner: int (Default: 1)
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* implementation: list [latency,stable,argmax,legacy] (Default: stable)
* Choice of implementation of softmax function. "latency" provides good latency at the expense of extra resources. performs well on small number of classes. "stable" may require extra clock cycles but has better accuracy. "legacy" is the older implementation which has bad accuracy, but is fast and has low resource use. It is superseded by the "latency" implementation for most applications. "argmax" is a special implementation that can be used if only the output with the highest probability is important. Using this implementation will save resources and clock cycles.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* skip: bool (Default: False)
* If enabled, skips the softmax node and returns the raw outputs.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* exp_table_t: NamedType (Default: fixed<18,8,RND,SAT,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* inv_table_t: NamedType (Default: fixed<18,8,RND,SAT,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* inv_inp_t: NamedType (Default: fixed<18,8,RND,SAT,0>)
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* accum_t: NamedType (Default: fixed<18,8,RND,SAT,0>)
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
TernaryTanh
===========
@@ -323,19 +339,19 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
HardActivation
==============
@@ -383,19 +399,19 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_size: int (Default: 1024)
* The size of the lookup table used to approximate the function.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
* The datatype (precision) used for the values of the lookup table.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Reshape
=======
@@ -471,13 +487,17 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* strategy: list [latency,resource] (Default: latency)
+
+ * Available in: Libero
Conv
====
@@ -509,13 +529,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Conv1D
======
@@ -577,13 +597,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -669,13 +689,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -761,13 +781,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -847,23 +867,23 @@ Backend-specific attributes
---------------------------
* depthwise_accum_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* pointwise_accum_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* depthwise_result_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* depthwise_reuse_factor: int (Default: 1)
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* pointwise_reuse_factor: int (Default: 1)
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* conv_implementation: list [LineBuffer,Encoded] (Default: LineBuffer)
@@ -965,13 +985,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -1063,23 +1083,23 @@ Backend-specific attributes
---------------------------
* depthwise_accum_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* pointwise_accum_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* depthwise_result_t: NamedType
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* depthwise_reuse_factor: int (Default: 1)
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* pointwise_reuse_factor: int (Default: 1)
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* conv_implementation: list [LineBuffer,Encoded] (Default: LineBuffer)
@@ -1205,13 +1225,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -1277,7 +1297,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Pooling1D
=========
@@ -1327,13 +1347,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* conv_implementation: list [LineBuffer,Encoded] (Default: LineBuffer)
@@ -1401,13 +1421,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* conv_implementation: list [LineBuffer,Encoded] (Default: LineBuffer)
@@ -1451,13 +1471,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
GlobalPooling2D
===============
@@ -1497,13 +1517,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
ZeroPadding1D
=============
@@ -1571,6 +1591,82 @@ Type attributes
* pad_right: int
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Cropping1D
+==========
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* in_width: int
+
+* out_width: int
+
+* n_chan: int
+
+* crop_left: int
+
+* crop_right: int
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Cropping2D
+==========
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* in_height: int
+
+* in_width: int
+
+* out_height: int
+
+* out_width: int
+
+* n_chan: int
+
+* crop_top: int
+
+* crop_bottom: int
+
+* crop_left: int
+
+* crop_right: int
+
Configurable attributes
-----------------------
* trace: int (Default: False)
@@ -1611,7 +1707,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
MatMul
======
@@ -1643,13 +1739,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Dot
===
@@ -1697,13 +1793,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI
+ * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI, Libero, Libero, XLS, XLS
Concatenate
===========
@@ -1751,7 +1847,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
Resize
======
@@ -1859,7 +1955,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
SimpleRNN
=========
@@ -1889,6 +1985,8 @@ Type attributes
* return_state: bool (Default: False)
+* pass_initial_states: bool (Default: False)
+
Weight attributes
-----------------
* weight: WeightVariable
@@ -1921,13 +2019,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* recurrent_reuse_factor: int (Default: 1)
@@ -1985,6 +2083,10 @@ Type attributes
* return_state: bool (Default: False)
+* pass_initial_states: bool (Default: False)
+
+* direction: list [forward,backward] (Default: forward)
+
* time_major: bool (Default: False)
Weight attributes
@@ -2007,8 +2109,6 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-* direction: list [forward,backward] (Default: forward)
-
* weight_t: NamedType
* bias_t: NamedType
@@ -2023,13 +2123,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* recurrent_reuse_factor: int (Default: 1)
@@ -2087,8 +2187,14 @@ Type attributes
* return_state: bool (Default: False)
+* pass_initial_states: bool (Default: False)
+
+* direction: list [forward,backward] (Default: forward)
+
* time_major: bool (Default: False)
+* apply_reset_gate: list [before,after] (Default: after)
+
Weight attributes
-----------------
* weight: WeightVariable
@@ -2109,10 +2215,6 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-* direction: list [forward,backward] (Default: forward)
-
-* apply_reset_gate: list [before,after] (Default: after)
-
* weight_t: NamedType
* bias_t: NamedType
@@ -2127,13 +2229,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* recurrent_reuse_factor: int (Default: 1)
@@ -2159,6 +2261,192 @@ Backend-specific attributes
* Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, oneAPI
+Bidirectional
+=============
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* forward_weight_t: NamedType
+
+* forward_bias_t: NamedType
+
+* forward_recurrent_weight_t: NamedType
+
+* forward_recurrent_bias_t: NamedType
+
+* backward_weight_t: NamedType
+
+* backward_bias_t: NamedType
+
+* backward_recurrent_weight_t: NamedType
+
+* backward_recurrent_bias_t: NamedType
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* n_out: int
+
+* return_sequences: bool (Default: False)
+
+* return_state: bool (Default: False)
+
+* pass_initial_states: bool (Default: False)
+
+* time_major: bool (Default: False)
+
+* forward_activation: str
+
+* forward_recurrent_activation: str
+
+* backward_activation: str
+
+* backward_recurrent_activation: str
+
+Weight attributes
+-----------------
+* forward_weight: WeightVariable
+
+* forward_bias: WeightVariable
+
+* forward_recurrent_weight: WeightVariable
+
+* forward_recurrent_bias: WeightVariable
+
+* backward_weight: WeightVariable
+
+* backward_bias: WeightVariable
+
+* backward_recurrent_weight: WeightVariable
+
+* backward_recurrent_bias: WeightVariable
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* forward_weight_t: NamedType
+
+* forward_bias_t: NamedType
+
+* forward_recurrent_weight_t: NamedType
+
+* forward_recurrent_bias_t: NamedType
+
+* backward_weight_t: NamedType
+
+* backward_bias_t: NamedType
+
+* backward_recurrent_weight_t: NamedType
+
+* backward_recurrent_bias_t: NamedType
+
+Backend-specific attributes
+---------------------------
+* accum_t: NamedType
+
+ * The datatype (precision) used to store intermediate results of the computation within the layer.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* forward_reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* backward_reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* forward_recurrent_reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* backward_recurrent_reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* static: bool (Default: True)
+
+ * If set to True, will reuse the the same recurrent block for computation, resulting in lower resource usage at the expense of serialized computation and higher latency/II.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* table_size: int (Default: 1024)
+
+ * The size of the lookup table used to approximate the function.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
+
+ * The datatype (precision) used for the values of the lookup table.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+TimeDistributed
+===============
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* wrapped_layer: None
+
+* n_time_steps: int
+
+* output_shape: list
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Backend-specific attributes
+---------------------------
+* time_step_loop_parallelism: list [Off,Unroll,Pipeline] (Default: Off)
+
+ * Controls the amont and type of parallelism in the loop over time steps. If set to "off", no parallelism will be used. If set to "unroll", the loop will be unrolled. This may result in excessive resource use and cannot be used in "io_stream" mode. If set to "pipeline", the loop will be pipelined.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
GarNet
======
Base attributes
@@ -2189,7 +2477,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI
+ * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI, Libero, Libero, XLS, XLS
GarNetStack
===========
@@ -2237,7 +2525,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI
+ * Available in: Vivado, Vivado, VivadoAccelerator, VivadoAccelerator, Vitis, Vitis, Quartus, Quartus, Catapult, Catapult, SymbolicExpression, SymbolicExpression, oneAPI, oneAPI, Libero, Libero, XLS, XLS
Quant
=====
@@ -2275,7 +2563,31 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+BipolarQuant
+============
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
ApplyAlpha
==========
@@ -2329,7 +2641,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
BatchNormOnnx
=============
@@ -2361,7 +2673,7 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
LayerGroup
==========
@@ -2427,6 +2739,174 @@ Configurable attributes
* The datatype (precision) of the output tensor.
+LayerNormalization
+==================
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* scale_t: NamedType
+
+* bias_t: NamedType
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* n_in: int
+
+* seq_len: int
+
+* axis: int (Default: 2)
+
+* epsilon_power_of_10: int (Default: 3)
+
+Weight attributes
+-----------------
+* scale: WeightVariable
+
+* bias: WeightVariable
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* scale_t: NamedType
+
+* bias_t: NamedType
+
+Backend-specific attributes
+---------------------------
+* accum_t: NamedType
+
+ * The datatype (precision) used to store intermediate results of the computation within the layer.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* reuse_factor: int (Default: 1)
+
+ * The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
+
+* table_range_power2: int (Default: 0)
+
+ * The negative power of 2 that represents the range of the lookup table, e.g. a value of 1 would represent a range of 0.5.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* table_size: int (Default: 4096)
+
+ * The size of the lookup table used to approximate the function.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* table_t: NamedType (Default: ufixed<8,5,RND_CONV,SAT,0>)
+
+ * The datatype (precision) used for the values of the lookup table.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+* accum_t: NamedType (Default: fixed<14,4,RND_CONV,SAT,0>)
+
+ * The datatype (precision) used to store intermediate results of the computation within the layer.
+
+ * Available in: Vivado, VivadoAccelerator, Vitis
+
+EinsumDense
+===========
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* weight_t: NamedType
+
+* bias_t: NamedType
+
+* accum_t: NamedType
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* equation: str
+
+* inp_shape: tuple
+
+* out_shape: tuple
+
+Weight attributes
+-----------------
+* weight: WeightVariable
+
+* bias: WeightVariable
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* weight_t: NamedType
+
+* bias_t: NamedType
+
+* accum_t: NamedType
+
+Einsum
+======
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* accum_t: NamedType
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* equation: str
+
+* inp0_shape: tuple
+
+* inp1_shape: tuple
+
+* out_shape: tuple
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* accum_t: NamedType
+
BiasAdd
=======
Base attributes
@@ -2473,10 +2953,10 @@ Backend-specific attributes
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
-FixedPointQuantizer
-===================
+DACombinational
+===============
Base attributes
---------------
* result_t: NamedType
@@ -2499,8 +2979,8 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-UnaryLUT
-========
+FixedPointQuantizer
+===================
Base attributes
---------------
* result_t: NamedType
@@ -2523,20 +3003,28 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-Repack
-======
+UnaryLUT
+========
Base attributes
---------------
* result_t: NamedType
* The datatype (precision) of the output tensor.
+* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
+
Type attributes
---------------
* index: int
* Internal node counter used for bookkeeping and variable/tensor naming.
+* n_in: int
+
+Weight attributes
+-----------------
+* table: WeightVariable
+
Configurable attributes
-----------------------
* trace: int (Default: False)
@@ -2547,8 +3035,10 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-Clone
-=====
+* table_t: NamedType (Default: fixed<18,8,TRN,WRAP,0>)
+
+Repack
+======
Base attributes
---------------
* result_t: NamedType
@@ -2571,26 +3061,20 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-BatchNormalizationQuantizedTanh
-===============================
+Clone
+=====
Base attributes
---------------
* result_t: NamedType
* The datatype (precision) of the output tensor.
-* accum_t: NamedType
-
Type attributes
---------------
* index: int
* Internal node counter used for bookkeeping and variable/tensor naming.
-* n_in: int
-
-* n_filt: int (Default: 0)
-
Configurable attributes
-----------------------
* trace: int (Default: False)
@@ -2601,10 +3085,6 @@ Configurable attributes
* The datatype (precision) of the output tensor.
-* accum_t: NamedType
-
-* reuse_factor: int (Default: 1)
-
PointwiseConv1D
===============
Base attributes
@@ -2665,13 +3145,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -2757,13 +3237,13 @@ Backend-specific attributes
* The datatype (precision) used to store intermediate results of the computation within the layer.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* reuse_factor: int (Default: 1)
* The number of times each multiplier is used by controlling the amount of pipelining/unrolling. Lower number results in more parallelism and lower latency at the expense of the resources used.Reuse factor = 1 corresponds to all multiplications executed in parallel, and hence, the lowest possible latency.
- * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI
+ * Available in: Vivado, VivadoAccelerator, Vitis, Quartus, Catapult, SymbolicExpression, oneAPI, Libero, XLS
* parallelization_factor: int (Default: 1)
@@ -2800,3 +3280,37 @@ Configurable attributes
* result_t: NamedType
* The datatype (precision) of the output tensor.
+
+BatchNormalizationQuantizedTanh
+===============================
+Base attributes
+---------------
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* accum_t: NamedType
+
+Type attributes
+---------------
+* index: int
+
+ * Internal node counter used for bookkeeping and variable/tensor naming.
+
+* n_in: int
+
+* n_filt: int (Default: 0)
+
+Configurable attributes
+-----------------------
+* trace: int (Default: False)
+
+ * Enables saving of layer output (tracing) when using hls_model.predict(...) or hls_model.trace(...)
+
+* result_t: NamedType
+
+ * The datatype (precision) of the output tensor.
+
+* accum_t: NamedType
+
+* reuse_factor: int (Default: 1)
diff --git a/hls4ml/backends/__init__.py b/hls4ml/backends/__init__.py
index 07a089cdf8..0ae8e3d20e 100644
--- a/hls4ml/backends/__init__.py
+++ b/hls4ml/backends/__init__.py
@@ -12,6 +12,7 @@
from hls4ml.backends.catapult.catapult_backend import CatapultBackend # isort: skip
from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip
+from hls4ml.backends.xls.xls_backend import XLSBackend
def _register_builtin_backends():
@@ -23,6 +24,7 @@ def _register_builtin_backends():
register_backend('SymbolicExpression', SymbolicExpressionBackend)
register_backend('oneAPI', OneAPIBackend)
register_backend('Libero', LiberoBackend)
+ register_backend('XLS', XLSBackend)
_register_builtin_backends()
diff --git a/hls4ml/backends/xls/__init__.py b/hls4ml/backends/xls/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/hls4ml/backends/xls/passes/build_attr.py b/hls4ml/backends/xls/passes/build_attr.py
new file mode 100644
index 0000000000..f24309b52e
--- /dev/null
+++ b/hls4ml/backends/xls/passes/build_attr.py
@@ -0,0 +1,430 @@
+# Typing imports
+from __future__ import annotations # makes all annotations into strings
+
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Literal
+
+from hls4ml.backends.xls.xls_types import (
+ XLSArray,
+ XLSArrayType,
+ XLSConst,
+ XLSFixedPoint,
+ XLSFixedPointType,
+ XLSFunctionCall,
+ XLSQualifiedName,
+ XLSTensorVariable,
+ float_to_significand,
+)
+from hls4ml.model.types import PrecisionType
+
+if TYPE_CHECKING:
+ from hls4ml.model.graph import ModelGraph
+ from hls4ml.model.layers import Layer
+
+from functools import wraps
+
+import numpy as np
+
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class XLSAttrBuilder:
+ """A helper class that sets XLS specific attributes for the layers of the original ModelGraph.
+ In doing so, we simplify the process of creating new optimization passes
+ and constructing the writer class.
+ The new attributes must be accessed with .get_attr(...)
+
+ New attributes:
+ - xls_module_name (str): DSLX module name (e.g. layer_4_softmax) used for the layer
+ - xls_input_variables(list[XLSTensorVariable]): XLS representation of input shape and precision
+ - xls_output_variables(list[XLSTensorVariable]): XLS representation of output shape and precision
+ - xls_weights(XLSArray): Weights converted to XLS array
+ - xls_bias(XLSArray): Bias converted to XLS array
+ - xls_extra_func_params(list[XLSConst]): Extra parameters for function call, e.g. stride, padding, pool_op, etc.
+ - xls_extra_func_args(list[XLSConst]): Extra arguments for function call, e.g. activation parameter.
+ - xls_func_call(XLSFunctionCall): Function used for transformation, e.g. softmax_stable or conv2d
+
+ Args:
+ - node (Layer): A layer of the model graph
+ """
+
+ def __init__(self, node) -> None:
+ self.node = node
+
+ @staticmethod
+ def attach_to_node(attr_name: str | None = None):
+ """A decorator-factory to easily chain 'set_attr' commands to the node.
+ It calls the provided function. This eliminates a lot of boiler plate code.
+ All the added attributes can be chained in one call since the wrapped function returns self.
+ """
+
+ def decorator(fn) -> Callable:
+ name = attr_name or fn.__name__
+
+ @wraps(fn)
+ def wrapped(self, *args, **kwargs):
+ val = fn(self, *args, **kwargs)
+ assert name not in self.node.attributes, f"Duplicate attribute: '{name}'"
+ self.node.set_attr(name, val)
+ return self
+
+ return wrapped
+
+ return decorator
+
+ @staticmethod
+ def _xls_const_array(name: str, data: np.ndarray, precision: PrecisionType) -> XLSConst:
+ # We allow unsigned types (e.g. XnorPrecisionType or uint<1>) for weights and biases.
+ # They will be converted to signed FixedPoint in DSLX.
+ allow_unsigned = True
+ xls_precision = XLSFixedPointType.from_precision(precision, allow_unsigned)
+ xls_raw_array = XLSArray(
+ array_type=XLSArrayType(element_type=xls_precision.significand_type, shape=data.shape),
+ array=float_to_significand(data, precision, allow_unsigned),
+ )
+ xls_fixed_point_array = XLSFunctionCall(
+ name=f'fixed_point_util::make_fixed_points_{len(data.shape)}d',
+ params=[xls_precision.binary_exponent],
+ args=[xls_raw_array],
+ )
+ return XLSConst(
+ name=name, value=xls_fixed_point_array, type=XLSArrayType(element_type=xls_precision, shape=data.shape)
+ )
+
+ @attach_to_node()
+ def xls_weights(self) -> XLSConst | None:
+ class_name = self.node.class_name
+ if class_name == 'ApplyAlpha':
+ class_name = 'BatchNormalization'
+
+ precision = None
+ xls_weights_name = None
+ if class_name == 'PReLU':
+ weights = self.node.weights.get('param_data')
+ xls_weights_name = 'PRELU_PARAM'
+ precision = self.node.get_attr('param_t').precision
+ elif class_name == 'BatchNormalization':
+ weights = self.node.weights.get('scale', None)
+ else:
+ weights = self.node.weights.get('weight', None)
+ if weights is None:
+ return None
+
+ xls_weights_name = xls_weights_name or f'WEIGHTS_{weights.name}'.upper()
+ precision: PrecisionType = precision or weights.type.precision
+
+ input_var = self.node.get_input_variable()
+ output_var = self.node.get_output_variable()
+
+ match class_name:
+ case 'BatchNormalization':
+ # NB: we need flattening because sometimes the weights can be e.g.
+ # (1,1,1,n_filt) instead of (n_filt,)
+ # We'll throw an error if there are several dimensions larger than 1.
+ data = np.asarray(weights.data).flatten()
+ n_filt = self.node.get_attr('n_filt')
+ if n_filt == -1:
+ n_filt = input_var.shape[-1]
+ expected_shape = (n_filt,)
+ case 'Conv1D':
+ data = np.asarray(weights.data)
+ expected_shape = tuple(self.node.get_attr(x) for x in ['filt_width', 'n_chan', 'n_filt'])
+ case 'DepthwiseConv1D':
+ data = np.asarray(weights.data)
+ expected_shape = tuple(self.node.get_attr(x) for x in ['filt_width', 'n_chan', 'depth_multiplier'])
+ case 'Conv2D':
+ data = np.asarray(weights.data)
+ expected_shape = tuple(self.node.get_attr(x) for x in ['filt_height', 'filt_width', 'n_chan', 'n_filt'])
+ case 'DepthwiseConv2D':
+ data = np.asarray(weights.data)
+ expected_shape = tuple(
+ self.node.get_attr(x) for x in ['filt_height', 'filt_width', 'n_chan', 'depth_multiplier']
+ )
+ case 'Dense':
+ # Transpose the weights so that we can call dot_prod(x, w[i]) in dense.x
+ data = np.asarray(weights.data).T
+ expected_shape = (output_var.shape[0], input_var.shape[0])
+ case 'PReLU':
+ data = weights
+ expected_shape = (input_var.shape[0],)
+ case _:
+ raise ValueError(f'Unsupported weights for layer {self.node.class_name}')
+
+ assert data.shape == expected_shape, f'Weights shape mismatch: expected {expected_shape}, got {data.shape}'
+
+ return XLSAttrBuilder._xls_const_array(name=xls_weights_name, data=data, precision=precision)
+
+ @attach_to_node()
+ def xls_bias(self) -> XLSConst | None:
+ bias = self.node.weights.get('bias', None)
+ if not bias:
+ return None
+
+ return XLSAttrBuilder._xls_const_array(
+ name=f'BIAS_{bias.name}'.upper(), data=bias.data, precision=bias.type.precision
+ )
+
+ @attach_to_node()
+ def xls_module_name(self) -> str:
+ name = ''.join(c for c in self.node.name if c.isalnum() or c == '_').lower()
+ return f'layer_{self.node.index}_{name}'
+
+ @attach_to_node()
+ def xls_output_variables(self) -> list[XLSTensorVariable]:
+ return [
+ XLSTensorVariable.from_tensor_variable(self.node.get_output_variable(name))
+ for (i, name) in enumerate(self.node.outputs)
+ ]
+
+ @attach_to_node()
+ def xls_input_variables(self) -> list[XLSTensorVariable]:
+ if self.node.class_name == 'Input':
+ assert self.node.get_input_variable() is None, f'Input layer {self.node.name} should not have input variable'
+ out_var = self.node.get_output_variable()
+ return [XLSTensorVariable.from_tensor_variable(out_var, name=f'input_{out_var.name}')]
+ else:
+ return [
+ XLSTensorVariable.from_tensor_variable(var=self.node.get_input_variable(name)) for name in self.node.inputs
+ ]
+
+ @attach_to_node()
+ def xls_min_input_rank(self) -> int:
+ """Minimally required rank of the input tensor.
+ Input tensor can have a higher rank if it consists of multiple batches.
+ NB: in the case of multiple input variables, the rank is determined by the first input variable.
+ """
+ name = self.node.class_name
+ if name.endswith('2D'):
+ return 3
+ elif name.endswith('1D'):
+ return 2
+ elif name in ('Reshape', 'Concatenate'):
+ return len(self.node.get_input_variable().shape)
+ elif name == 'Transpose':
+ return len(self.node.get_attr('perm'))
+ else:
+ return 1
+
+ @attach_to_node()
+ def xls_extra_func_params(self) -> list[XLSConst]:
+ layer = self.node
+ class_name = layer.class_name
+ if class_name == 'Concatenate':
+ rank = len(layer.get_input_variable().shape)
+ if rank == 1:
+ return []
+ axis = layer.get_attr('axis')
+ if axis > 0:
+ # Convert axis to a 0-based index.
+ # This is the same adjustment as in hls4ml.model.layers.Concatenate.initialize()
+ # TODO: should it be done earlier, when converting from frontend?
+ axis -= 1
+ if axis == -1:
+ axis = rank - 1
+ return [XLSConst(name='AXIS', value=axis, type='u32')]
+ elif class_name in ('Conv1D', 'DepthwiseConv1D'):
+ return [
+ XLSConst(name='STRIDE', value=layer.get_attr('stride_width'), type='u32'),
+ XLSConst(name='PAD_LEFT', value=layer.get_attr('pad_left'), type='u32'),
+ XLSConst(name='PAD_RIGHT', value=layer.get_attr('pad_right'), type='u32'),
+ XLSConst(name='DATA_FORMAT', value=f'data_format::DataFormat::{layer.get_attr("data_format").upper()}'),
+ ]
+ elif class_name in ('Conv2D', 'DepthwiseConv2D'):
+ return [
+ XLSConst(name='STRIDE_HEIGHT', value=layer.get_attr('stride_height'), type='u32'),
+ XLSConst(name='STRIDE_WIDTH', value=layer.get_attr('stride_width'), type='u32'),
+ XLSConst(name='PAD_TOP', value=layer.get_attr('pad_top'), type='u32'),
+ XLSConst(name='PAD_BOTTOM', value=layer.get_attr('pad_bottom'), type='u32'),
+ XLSConst(name='PAD_LEFT', value=layer.get_attr('pad_left'), type='u32'),
+ XLSConst(name='PAD_RIGHT', value=layer.get_attr('pad_right'), type='u32'),
+ XLSConst(name='DATA_FORMAT', value=f'data_format::DataFormat::{layer.get_attr("data_format").upper()}'),
+ ]
+ elif 'Pooling' in class_name:
+ pool_op = f'pooling::PoolingOperation::{layer.get_attr("pool_op").upper()}'
+ data_format = f'data_format::DataFormat::{layer.get_attr("data_format").upper()}'
+ if class_name.startswith('GlobalPooling'):
+ return [XLSConst(name='POOL_OP', value=pool_op), XLSConst(name='DATA_FORMAT', value=data_format)]
+ elif class_name.endswith('Pooling1D'):
+ count_pad = str(layer.get_attr('count_pad')).lower()
+ return [
+ XLSConst(name='POOL_OP', value=pool_op),
+ XLSConst(name='POOL_SIZE', value=layer.get_attr('pool_width'), type='u32'),
+ XLSConst(name='STRIDE', value=layer.get_attr('stride_width'), type='u32'),
+ XLSConst(name='PAD_LEFT', value=layer.get_attr('pad_left'), type='u32'),
+ XLSConst(name='PAD_RIGHT', value=layer.get_attr('pad_right'), type='u32'),
+ XLSConst(name='COUNT_PAD', value=count_pad, type='bool'),
+ XLSConst(name='DATA_FORMAT', value=data_format),
+ ]
+ elif class_name.endswith('Pooling2D'):
+ count_pad = str(layer.get_attr('count_pad')).lower()
+ return [
+ XLSConst(name='POOL_OP', value=pool_op),
+ XLSConst(name='POOL_HEIGHT', value=layer.get_attr('pool_height'), type='u32'),
+ XLSConst(name='POOL_WIDTH', value=layer.get_attr('pool_width'), type='u32'),
+ XLSConst(name='STRIDE_HEIGHT', value=layer.get_attr('stride_height'), type='u32'),
+ XLSConst(name='STRIDE_WIDTH', value=layer.get_attr('stride_width'), type='u32'),
+ XLSConst(name='PAD_TOP', value=layer.get_attr('pad_top'), type='u32'),
+ XLSConst(name='PAD_BOTTOM', value=layer.get_attr('pad_bottom'), type='u32'),
+ XLSConst(name='PAD_LEFT', value=layer.get_attr('pad_left'), type='u32'),
+ XLSConst(name='PAD_RIGHT', value=layer.get_attr('pad_right'), type='u32'),
+ XLSConst(name='COUNT_PAD', value=count_pad, type='bool'),
+ XLSConst(name='DATA_FORMAT', value=data_format),
+ ]
+ else:
+ raise ValueError(f'Unsupported pooling layer {class_name}')
+ elif class_name == 'Reshape':
+ out_vars = layer.get_attr('xls_output_variables')
+ assert len(out_vars) == 1, f'Reshape layer should have exactly one output variable, got {len(out_vars)}'
+ return list(out_vars[0].shape)
+ elif class_name == 'Transpose':
+ return [XLSConst(name=f'PERM_{i}', value=perm, type='u32') for i, perm in enumerate(layer.get_attr('perm'))]
+ else:
+ return []
+
+ @attach_to_node()
+ def xls_extra_func_args(self) -> list[XLSConst]:
+ layer = self.node
+ match layer.class_name:
+ case 'HardActivation':
+ return [
+ XLSConst(
+ name=arg_name.upper(),
+ value=XLSFixedPoint.from_float(
+ layer.get_attr(arg_name),
+ precision=layer.get_attr(f'{arg_name}_t').precision,
+ allow_unsigned=True,
+ ),
+ )
+ for arg_name in ['slope', 'shift']
+ ]
+ case 'ParametrizedActivation':
+ precision = layer.get_attr('param_t').precision
+ value = layer.get_attr('activ_param')
+ if layer.get_attr('activation').lower() in ('leakyrelu', 'leaky_relu', 'thresholdedrelu'):
+ return [
+ XLSConst(
+ name='ACTIVATION_PARAM', value=XLSFixedPoint.from_float(value, precision, allow_unsigned=True)
+ )
+ ]
+ case _:
+ pass
+ return []
+
+ @staticmethod
+ def func_name(layer: Layer) -> XLSQualifiedName:
+ match layer.class_name:
+ case 'Input':
+ # Identity transformation except for OverflowMode::SAT_SYM case.
+ return XLSQualifiedName(name='resize_1d', module_name='fixed_point_util')
+ case 'ApplyAlpha':
+ return XLSQualifiedName(name='normalize', module_name='batchnorm')
+ case 'BatchNormalization':
+ return XLSQualifiedName(name='normalize', module_name='batchnorm')
+ case 'Dense':
+ return XLSQualifiedName(name='dense', module_name='dense')
+ case 'Conv1D':
+ return XLSQualifiedName(name='conv1d_latency', module_name='conv1d')
+ case 'DepthwiseConv1D':
+ return XLSQualifiedName(name='depthwise_conv_1d', module_name='depthwise_conv')
+ case 'Conv2D':
+ return XLSQualifiedName(name='conv2d_latency', module_name='conv2d')
+ case 'DepthwiseConv2D':
+ return XLSQualifiedName(name='depthwise_conv_2d', module_name='depthwise_conv')
+ case 'Pooling1D':
+ return XLSQualifiedName(name='pooling_1d', module_name='pooling')
+ case 'Pooling2D':
+ return XLSQualifiedName(name='pooling_2d', module_name='pooling')
+ case 'GlobalPooling1D':
+ return XLSQualifiedName(name='global_pooling_1d', module_name='pooling')
+ case 'GlobalPooling2D':
+ return XLSQualifiedName(name='global_pooling_2d', module_name='pooling')
+ case 'Merge':
+ op = layer.get_attr('op').lower()
+ return XLSQualifiedName(name=op, module_name='merge')
+ case 'Concatenate':
+ rank = len(layer.get_input_variable().shape)
+ return XLSQualifiedName(name=f'concatenate{rank}d', module_name='merge')
+ case 'Dot':
+ return XLSQualifiedName(name='dot', module_name='merge')
+ case 'Activation':
+ return XLSQualifiedName(name=layer.get_attr('activation').lower(), module_name='activations')
+ case 'HardActivation':
+ return XLSQualifiedName(name=layer.get_attr('activation').lower(), module_name='activations')
+ case 'ParametrizedActivation':
+ return XLSQualifiedName(name=layer._get_act_function_name(), module_name='activations')
+ case 'PReLU':
+ return XLSQualifiedName(name='prelu', module_name='activations')
+ case 'Reshape':
+ in_shape = layer.get_input_variable().shape
+ out_shape = layer.get_output_variable().shape
+ name = f'reshape_{len(in_shape)}d_to_{len(out_shape)}d'
+ return XLSQualifiedName(name=name, module_name='reshape')
+ case 'Softmax':
+ implementation = layer.attributes.get('implementation', 'stable')
+ match implementation:
+ case 'stable':
+ name = 'softmax_stable'
+ case 'latency':
+ name = 'softmax_latency'
+ case 'argmax':
+ name = 'argmax'
+ case _:
+ # TODO: support implementation == 'legacy'
+ raise ValueError(f'Unknown softmax implementation {implementation}')
+ return XLSQualifiedName(name=name, module_name='activations')
+ case 'Transpose':
+ rank = len(layer.get_input_variable().shape)
+ return XLSQualifiedName(name=f'transpose_{rank}d', module_name='transpose')
+ case 'TernaryTanh':
+ return XLSQualifiedName(name='ternary_tanh', module_name='activations')
+ case _:
+ raise ValueError(f'Unknown layer type: {layer.class_name}')
+
+ @attach_to_node()
+ def xls_func_call(self) -> XLSFunctionCall:
+ in_vars = self.node.get_attr('xls_input_variables')
+ out_vars = self.node.get_attr('xls_output_variables')
+ name = self.func_name(self.node)
+ params = [
+ x.name
+ for out_var in out_vars
+ for x in (
+ out_var.num_bits,
+ out_var.binary_exponent,
+ out_var.rounding_mode,
+ out_var.overflow_mode,
+ )
+ ] + [x.name for x in self.node.get_attr('xls_extra_func_params')]
+ args = [f'x_{i}' for i in range(len(in_vars))]
+ args += [self.node.get_attr(x).name for x in ('xls_weights', 'xls_bias') if self.node.get_attr(x) is not None]
+ args += [x.lookup_table.name for x in self.node.get_attr('lookup_tables', [])]
+ args += [x.name for x in self.node.get_attr('xls_extra_func_args')]
+ return XLSFunctionCall(name=name, params=params, args=args)
+
+
+class BuildAttr(OptimizerPass):
+ """Builds the XLS-specific attributes for all layers."""
+
+ def match(self, node: Layer) -> bool:
+ return True
+
+ def transform(self, model: ModelGraph, node: Layer) -> Literal[False]:
+ try:
+ # uses the builder to add all the attributes
+ (
+ XLSAttrBuilder(node)
+ .xls_module_name()
+ .xls_min_input_rank()
+ .xls_input_variables()
+ .xls_output_variables()
+ .xls_weights()
+ .xls_bias()
+ .xls_extra_func_params()
+ .xls_extra_func_args()
+ .xls_func_call()
+ )
+ except Exception as e:
+ raise ValueError(
+ f'Failed to build XLS attributes for layer (name={node.name}, class_name={node.class_name}): {e}'
+ ) from e
+ return False
diff --git a/hls4ml/backends/xls/passes/build_tables.py b/hls4ml/backends/xls/passes/build_tables.py
new file mode 100644
index 0000000000..851542b704
--- /dev/null
+++ b/hls4ml/backends/xls/passes/build_tables.py
@@ -0,0 +1,259 @@
+# Typing imports
+from __future__ import annotations # makes all annotations into strings
+
+import warnings
+from collections.abc import Callable
+from copy import copy
+from enum import Enum
+from typing import TYPE_CHECKING, Literal
+
+from hls4ml.backends.xls.xls_types import XLSFixedPoint, XLSFixedPointType, XLSLookupTable, float_to_significand
+from hls4ml.model.types import FixedPrecisionType
+
+if TYPE_CHECKING:
+ from hls4ml.model.graph import ModelGraph
+ from hls4ml.model.layers import Layer
+
+import math
+
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class LookupTableRange(Enum):
+ FULL = 1
+ NON_NEGATIVE = 2
+ NEGATIVE = 3
+
+
+def build_table(
+ name: str,
+ func: Callable[[float], float],
+ table_size: int,
+ input_precision: FixedPrecisionType,
+ output_precision: FixedPrecisionType,
+ table_range: LookupTableRange,
+) -> XLSLookupTable:
+ # Hereafter 'raw' means operations with significand values, i.e.
+ # raw_x == x.significand == int(x * 2**precision.fractional)
+
+ raw_to_float = 2 ** (-input_precision.fractional)
+
+ def raw_func(raw_x: int) -> int:
+ return float_to_significand(func(raw_x * raw_to_float), output_precision)
+
+ raw_minus_inf = XLSFixedPoint.min_value(XLSFixedPointType.from_precision(input_precision)).significand.value
+ raw_plus_inf = XLSFixedPoint.max_value(XLSFixedPointType.from_precision(input_precision)).significand.value
+ match table_range:
+ # x = -inf..+inf
+ case LookupTableRange.FULL:
+ raw_original_x_min = raw_minus_inf
+ raw_original_x_max = raw_plus_inf
+ # x = 0..+inf
+ case LookupTableRange.NON_NEGATIVE:
+ raw_original_x_min = 0
+ raw_original_x_max = raw_plus_inf
+ # x = -inf..0
+ case LookupTableRange.NEGATIVE:
+ raw_original_x_min = raw_minus_inf
+ raw_original_x_max = -1
+
+ raw_x_min = raw_original_x_min
+ raw_x_max = raw_original_x_max
+
+ # Build input range for lookup table.
+ # If the function saturates at the table edges,
+ # we adjust the range to account for that.
+ recompute_range = True
+ while recompute_range:
+ raw_log2_step = math.ceil(math.log2((raw_x_max - raw_x_min) / (table_size - 1)))
+ if raw_log2_step < 0:
+ raw_log2_step = 0
+ raw_step = 2**raw_log2_step
+ f_min = raw_func(raw_x_min)
+ f_max = raw_func(raw_x_max)
+ raw_range = list(range(raw_x_min, raw_x_max + 1, raw_step))
+
+ recompute_range = False
+ for x in raw_range[1:]:
+ if raw_func(x) == f_min:
+ raw_x_min = x
+ recompute_range = True
+ else:
+ break
+ for x in reversed(raw_range[:-1]):
+ if x < raw_x_min:
+ break
+ if raw_func(x) == f_max:
+ raw_x_max = x
+ recompute_range = True
+
+ if raw_x_min != raw_original_x_min or raw_x_max != raw_original_x_max:
+ warnings.warn(
+ f'Lookup table {name} range has been reduced to account for saturation at the table edges. '
+ f'The original significand range was {raw_original_x_min}..{raw_original_x_max}, '
+ f'and the adjusted range is {raw_x_min}..{raw_x_max}.',
+ stacklevel=1,
+ )
+ if len(raw_range) < table_size:
+ warnings.warn(f'Lookup table {name} size has been reduced from {table_size} to {len(raw_range)}.', stacklevel=1)
+
+ assert 0 < len(raw_range) <= table_size
+ assert raw_range[0] == raw_x_min >= raw_original_x_min
+ assert raw_range[-1] <= raw_x_max <= raw_original_x_max
+
+ return XLSLookupTable(
+ name=name,
+ input_precision=XLSFixedPointType.from_precision(input_precision),
+ output_precision=XLSFixedPointType.from_precision(output_precision),
+ x_min=XLSFixedPoint(type=input_precision, significand=raw_x_min),
+ log2_step=raw_log2_step - input_precision.fractional,
+ raw_table=[raw_func(x) for x in raw_range],
+ )
+
+
+def build_softmax_tables(node: Layer) -> list[XLSLookupTable]:
+ table_size = int(node.get_attr('table_size'))
+ exp_table_size = int(node.get_attr('exp_table_size', table_size))
+ inv_table_size = int(node.get_attr('inv_table_size', table_size))
+ implementation = node.get_attr('implementation', 'stable')
+ input_precision = node.get_input_variable().type.precision
+ exp_in = copy(input_precision)
+ exp_out = node.get_attr('exp_table_t').precision
+ match implementation:
+ case 'stable':
+ exp_in.width += 1
+ exp_in.integer += 1
+ exp_name = 'EXP_NEG_TABLE'
+
+ def exp_func(x):
+ return math.exp(-x)
+
+ # Arguments of exp_func are (x_max - x_i) > 0
+ exp_table_range = LookupTableRange.NON_NEGATIVE
+ case 'latency':
+ exp_name = 'EXP_TABLE'
+ exp_func = math.exp
+ # Arguments of exp_func are x_i, which can be both positive and negative
+ exp_table_range = LookupTableRange.FULL
+ case _:
+ raise ValueError(f'Unknown softmax implementation={implementation}')
+
+ inv_in = exp_out
+ inv_out = node.get_attr('inv_table_t').precision
+ inv_name = 'INV_TABLE'
+
+ def inv_func(x):
+ if x == 0:
+ return inv_out.max
+ return 1.0 / x
+
+ exp_table = build_table(
+ name=exp_name,
+ func=exp_func,
+ table_size=exp_table_size,
+ input_precision=exp_in,
+ output_precision=exp_out,
+ table_range=exp_table_range,
+ )
+ inv_table = build_table(
+ name=inv_name,
+ func=inv_func,
+ table_size=inv_table_size,
+ input_precision=inv_in,
+ output_precision=inv_out,
+ # We're inverting sum of exponents, which is always non-negative.
+ table_range=LookupTableRange.NON_NEGATIVE,
+ )
+ return [exp_table, inv_table]
+
+
+def build_activation_table(node: Layer) -> XLSLookupTable:
+ activation = node.get_attr('activation').lower()
+ table_name = f'{activation.upper()}_TABLE'
+ match activation:
+ case 'elu':
+ table_range = LookupTableRange.NEGATIVE
+ alpha = node.get_attr('activ_param')
+
+ def func(x):
+ assert x < 0, f'Building ELU table only for x < 0, got {x}'
+ return alpha * (math.exp(x) - 1)
+ case 'selu':
+ table_range = LookupTableRange.NEGATIVE
+ alpha = 1.6732632423543772848170429916717
+ scale = 1.0507009873554804934193349852946
+
+ def func(x):
+ assert x < 0, f'Building ELU table only for x < 0, got {x}'
+ return scale * alpha * (math.exp(x) - 1)
+ case 'softplus':
+ table_range = LookupTableRange.FULL
+
+ def func(x):
+ return math.log(1 + math.exp(x))
+ case 'softsign':
+ table_range = LookupTableRange.NON_NEGATIVE
+
+ def func(x):
+ return x / (1 + abs(x))
+ case 'tanh':
+ table_range = LookupTableRange.NON_NEGATIVE
+
+ def func(x):
+ return math.tanh(x)
+ case 'sigmoid':
+ table_range = LookupTableRange.FULL
+
+ def func(x):
+ return 1 / (1 + math.exp(-x))
+ case _:
+ raise ValueError(f'Unknown activation={activation}')
+
+ match table_range:
+ case LookupTableRange.FULL:
+ pass
+ case LookupTableRange.NON_NEGATIVE:
+ table_name += '_NON_NEGATIVE'
+ case LookupTableRange.NEGATIVE:
+ table_name += '_NEGATIVE'
+
+ return build_table(
+ name=table_name,
+ func=func,
+ table_size=int(node.get_attr('table_size')),
+ input_precision=node.get_input_variable().type.precision,
+ output_precision=node.get_output_variable().type.precision,
+ table_range=table_range,
+ )
+
+
+class BuildTables(OptimizerPass):
+ """Builds attributes that store the softmax and multiplication inverse for the approximation
+ of the Softmax function.
+ """
+
+ def match(self, node: Layer) -> bool:
+ match node.class_name:
+ case 'Softmax':
+ return node.get_attr('implementation', 'stable') != 'argmax'
+ case 'Activation':
+ return node.get_attr('activation').lower() in ['selu', 'softplus', 'softsign', 'tanh', 'sigmoid']
+ case 'ParametrizedActivation':
+ return node.get_attr('activation').lower() in ['elu', 'prelu']
+ case _:
+ return False
+
+ def transform(self, model: ModelGraph, node: Layer) -> Literal[False]:
+ lookup_tables = node.get_attr('lookup_tables', [])
+ match node.class_name:
+ case 'Softmax':
+ lookup_tables += build_softmax_tables(node)
+ case 'Activation':
+ lookup_tables.append(build_activation_table(node))
+ case 'ParametrizedActivation':
+ lookup_tables.append(build_activation_table(node))
+ case _:
+ raise ValueError(f'Unknown layer type: {node.class_name}')
+
+ node.set_attr('lookup_tables', lookup_tables)
+ return False
diff --git a/hls4ml/backends/xls/xls_backend.py b/hls4ml/backends/xls/xls_backend.py
new file mode 100644
index 0000000000..ba7fe42597
--- /dev/null
+++ b/hls4ml/backends/xls/xls_backend.py
@@ -0,0 +1,429 @@
+# Typing imports
+from __future__ import annotations # makes all annotations into strings
+
+import functools
+import importlib
+import math
+from collections.abc import Callable, Iterable
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from numpy.typing import ArrayLike, NDArray
+
+from hls4ml.backends.xls.xls_types import float_to_significand
+from hls4ml.model.types import FixedPrecisionType
+
+if TYPE_CHECKING:
+ from hls4ml.model.graph import ModelGraph
+
+import subprocess
+from warnings import warn
+
+import numpy as np
+
+from hls4ml.backends import FPGABackend
+from hls4ml.model.flow import register_flow
+from hls4ml.model.optimizer import get_backend_passes
+from hls4ml.report import parse_xls_report
+
+
+@functools.lru_cache(maxsize=1)
+def import_xls():
+ try:
+ return importlib.import_module('xls')
+ except ModuleNotFoundError as e:
+ raise ModuleNotFoundError(
+ "XLS backend requires optional dependency 'xls'. "
+ "Please install hls4ml with XLS extras (or install package 'xls')."
+ ) from e
+
+
+class XLSBackend(FPGABackend):
+ def __init__(self) -> None:
+ super().__init__('XLS')
+ self._writer_flow = ''
+ self._default_flow = ''
+
+ self._register_layer_attributes()
+ self._register_flows()
+
+ def _register_layer_attributes(self) -> None:
+ pass
+
+ def _register_flows(self) -> None:
+ initializers: list = self._get_layer_initializers()
+ init_flow: str = register_flow('init_layers', initializers, requires=['optimize'], backend=self.name)
+
+ quantization_passes = [
+ # 'xls:merge_batch_norm_quantized_tanh',
+ # 'xls:quantize_dense_output',
+ 'fuse_consecutive_batch_normalization',
+ 'xls:xnor_pooling',
+ ]
+ quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)
+
+ optimization_passes = [
+ 'xls:remove_final_reshape',
+ 'xls:inplace_parallel_reshape',
+ 'xls:skip_softmax',
+ 'infer_precision_types',
+ ]
+ optimization_flow: str = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
+
+ xls_attributes = [
+ 'xls:build_tables',
+ 'xls:build_attr',
+ ]
+ xls_attributes_flow: str = register_flow('xls', xls_attributes, requires=[optimization_flow], backend=self.name)
+
+ # TODO: stamp is currently unused, shall we add it to myproject.x, myproject.ir, myproject.opt.ir, ...?
+ # In other backends, this is used to generate myproject-$STAMP.so.
+ # In XLS, .opt.ir file plays the same role as .so
+ # It is unclear whether we should copy or rename myproject.opt.ir to myproject-$STAMP.opt.ir.
+ writer_passes = ['make_stamp', 'xls:write_hls']
+ self._writer_flow = register_flow('write', writer_passes, requires=['xls:ip'], backend=self.name)
+
+ # Passed that are irrelevant for XLS
+ ignored_passes = [
+ f'xls:{opt_pass}'
+ for opt_pass in [
+ # io_stream only:
+ 'reshape_stream',
+ 'inplace_stream_flatten',
+ 'repack_function_template',
+ 'clone_output',
+ 'clone_function_template',
+ # HGQ passes, not implemented:
+ 'process_fixed_point_quantizer_layer',
+ 'fixedpointquantizer_function_template',
+ 'unarylut_function_template',
+ # Embedding
+ 'embedding_config_template',
+ 'embedding_function_template',
+ # we fix table sizes in xls:build_tables using a different method
+ 'fix_softmax_table_size',
+ # BRAM not supported
+ 'register_bram_weights',
+ ]
+ ]
+
+ all_passes: list = get_backend_passes(self.name)
+
+ extras = [
+ # Ideally, this should be empty
+ opt_pass
+ for opt_pass in all_passes
+ if opt_pass
+ not in initializers + quantization_passes + optimization_passes + xls_attributes + writer_passes + ignored_passes
+ ]
+
+ if len(extras) > 0:
+ for opt in extras:
+ warn(f'WARNING: Optimizer "{opt}" is not part of any flow and will not be executed.')
+
+ ip_flow_requirements = [
+ 'optimize',
+ init_flow,
+ quantization_flow,
+ optimization_flow,
+ xls_attributes_flow,
+ ]
+
+ self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
+
+ def get_default_flow(self) -> str:
+ return self._default_flow
+
+ def get_writer_flow(self) -> str:
+ return self._writer_flow
+
+ @staticmethod
+ def _to_xls_clock_period_ps(clock_period) -> int:
+ """Convert nanoseconds to picoseconds."""
+ return int(float(clock_period) * 1000)
+
+ @staticmethod
+ def _to_xls_clock_margin_percent(clock_uncertainty: str) -> int:
+ """Convert ClockUncertainty string to integer XLS option clock_margin_percent"""
+ assert isinstance(clock_uncertainty, str) and clock_uncertainty.endswith('%'), (
+ f'Clock uncertainty must be in percentage format, got {clock_uncertainty}'
+ )
+ return math.ceil(float(clock_uncertainty.strip('%')))
+
+ @staticmethod
+ def _percent_to_float(percent: str) -> float:
+ """Convert a string representing a percentage to a float."""
+ assert isinstance(percent, str) and percent.endswith('%'), (
+ f'Clock uncertainty must be in percentage format, got {percent}'
+ )
+ return float(percent.strip('%')) / 100
+
+ def create_initial_config(
+ self,
+ part='xcu250-figd2104-2L-e',
+ clock_period=5,
+ clock_uncertainty='12.5%',
+ io_type='io_parallel',
+ write_tar=False,
+ xls_codegen_flags=None,
+ **kwargs,
+ ) -> dict[str, Any]:
+ """Create an initial configuration of the XLS backend.
+
+ Args:
+ part (str, optional): The FPGA part to be used. Defaults to 'xcvu13p-flga2577-2-e'.
+ clock_period (int, optional): The clock period. Defaults to 5.
+ clock_uncertainty (str, optional): The clock uncertainty. Defaults to 12.5%.
+ io_type (str, optional): Type of implementation used. Only 'io_parallel' is currently supported.
+ write_tar (bool, optional): If True, compresses the output directory into a .tar.gz file. Defaults to False.
+ xls_codegen_flags (dict, optional): Flags to pass to the XLS codegen. Defaults to None.
+
+ Returns:
+ dict: initial configuration.
+ """
+ config = {}
+
+ config['Part'] = part if part is not None else 'xcvu13p-flga2577-2-e'
+ config['ClockPeriod'] = clock_period if clock_period is not None else 5
+ config['ClockUncertainty'] = clock_uncertainty if clock_uncertainty is not None else '12.5%'
+ config['IOType'] = io_type if io_type is not None else 'io_parallel'
+ config['HLSConfig'] = {}
+ config['WriterConfig'] = {
+ 'WriteTar': write_tar,
+ }
+
+ # Set default flags to mimic codegen_main executable behavior
+ config['XLSCodegenFlags'] = (
+ xls_codegen_flags
+ if xls_codegen_flags is not None
+ else {
+ 'delay_model': 'asap7',
+ 'generator': 'pipeline',
+ 'use_system_verilog': True,
+ 'flop_inputs': True,
+ 'flop_outputs': True,
+ 'max_inline_depth': 5,
+ 'flop_single_value_channels': True,
+ # convert nanoseconds to picoseconds
+ 'clock_period_ps': self._to_xls_clock_period_ps(config['ClockPeriod']),
+ # NB: XLS needs integer percents
+ 'clock_margin_percent': self._to_xls_clock_margin_percent(config['ClockUncertainty']),
+ }
+ )
+
+ for arg in kwargs:
+ warn(f'WARNING: Unknown argument {arg} for XLS backend will be ignored.')
+
+ return config
+
+ @staticmethod
+ def _ir_top_function_name(model: ModelGraph):
+ xls = import_xls()
+ name = model.config.get_project_name()
+ return xls.mangle_dslx_name(module_name=name, function_name=name)
+
+ def compile(self, model: ModelGraph) -> None:
+ xls = import_xls()
+ io_type = model.config.get_config_value('IOType')
+ if io_type != 'io_parallel':
+ raise NotImplementedError(f'XLS backend only supports IOType: io_parallel, but got: {io_type}')
+
+ kernel_name = model.config.get_project_name()
+ firmware_dir = Path(f'{model.config.get_output_dir()}') / 'firmware'
+ path_no_ext = firmware_dir / kernel_name
+
+ ir_text = xls.c_api.convert_dslx_path_to_ir(path=f'{path_no_ext}.x', additional_search_paths=[str(firmware_dir)])
+ with open(f'{path_no_ext}.ir', 'w') as ir_file:
+ ir_file.write(ir_text)
+
+ opt_ir_text = xls.optimize_ir(ir=ir_text, top=XLSBackend._ir_top_function_name(model))
+ with open(f'{path_no_ext}.opt.ir', 'w') as opt_ir_file:
+ opt_ir_file.write(opt_ir_text)
+
+ # This object can be heavy, so we don't want to cache it unless we call predict().
+ if hasattr(model, '_xls_top_function'):
+ del model._xls_top_function
+
+ @staticmethod
+ def _float_to_xls_ir(x: np.floating[Any] | NDArray[np.floating[Any]], precision: FixedPrecisionType):
+ xls = import_xls()
+ if np.isscalar(x):
+ significand = float_to_significand(x, precision)
+ bits = xls.Value.make_sbits(bit_count=precision.width, val=significand)
+ return bits
+ else:
+ return xls.Value.make_array([XLSBackend._float_to_xls_ir(item, precision) for item in x])
+
+ @staticmethod
+ def _bits_to_int(bits, signed: bool = True) -> int:
+ # bits: xls.Bits
+ n = bits.get_bit_count()
+ if n <= 64:
+ return bits.to_int64()
+ value = int.from_bytes(bits.to_bytes(), byteorder='little', signed=False)
+ value &= (1 << n) - 1
+ if signed and (bits.get_bit(n - 1) == 1):
+ value -= 1 << n
+ return value
+
+ @staticmethod
+ def _xls_ir_to_float(
+ x, precision: FixedPrecisionType | Iterable[FixedPrecisionType], dtype: np.typing.DTypeLike
+ ) -> ArrayLike | tuple[ArrayLike, ...]:
+ xls = import_xls()
+ # x: xls.Value
+ match x.get_kind():
+ case xls.c_api.ValueKind.BITS:
+ assert isinstance(precision, FixedPrecisionType), (
+ f'Precision must be FixedPrecisionType, got {type(precision)}'
+ )
+ return XLSBackend._bits_to_int(x.get_bits()) / (2**precision.fractional)
+ case xls.c_api.ValueKind.ARRAY:
+ return np.asarray(
+ [XLSBackend._xls_ir_to_float(x.get_element(i), precision, dtype) for i in range(x.get_element_count())],
+ dtype=dtype,
+ )
+ case xls.c_api.ValueKind.TUPLE:
+ precision = tuple(precision)
+ assert len(precision) == x.get_element_count(), (
+ f'Precision mismatch for tuple: {len(precision)} != {x.get_element_count()}'
+ )
+ return tuple(
+ XLSBackend._xls_ir_to_float(x.get_element(i), precision[i], dtype) for i in range(x.get_element_count())
+ )
+ case _:
+ raise ValueError(f'Unexpected output type: {x.get_kind()}')
+
+ @staticmethod
+ def get_top_function(model: ModelGraph, x: np.floating | NDArray[np.floating[Any]]) -> tuple[Callable, np.dtype]:
+ # Cache JIT function to avoid reparsing IR file.
+ top_function = getattr(model, '_xls_top_function', None)
+ if top_function is None:
+ top_function = XLSBackend._make_top_function(model)
+ model._xls_top_function = top_function
+
+ # TODO: this duplicates ModelGraph._get_top_function().
+ # NB: ctype is not used in XLS, but it is required by ModelGraph._predict
+ x0 = x[0] if isinstance(x, (list, tuple)) else x
+ if np.asarray(x0).dtype in [np.single, np.float32]:
+ ctype = np.float32
+ elif np.asarray(x0).dtype in [np.double, np.float64]:
+ ctype = np.float64
+ else:
+ raise TypeError(
+ 'Invalid type ({}) of numpy array. Supported types are: single, float32, double, float64, float_.'.format(
+ np.asarray(x0).dtype
+ )
+ )
+
+ return top_function, ctype
+
+ @staticmethod
+ def _make_top_function(model: ModelGraph) -> Callable:
+ xls = import_xls()
+ project_dir = model.config.get_output_dir()
+ project_name = model.config.get_project_name()
+ ir_path = Path(project_dir) / 'firmware' / f'{project_name}.opt.ir'
+ if not ir_path.exists():
+ raise FileNotFoundError(f'Optimized IR file not found: {ir_path}. Please compile your model first.')
+ ir_text = ir_path.read_text()
+ pkg = xls.Package.parse_ir(ir_text)
+ fn = pkg.get_function(XLSBackend._ir_top_function_name(model))
+ jit = fn.to_jit()
+
+ input_vars = model.get_input_variables()
+ output_vars = model.get_output_variables()
+
+ def top_function(*args):
+ assert len(args) == len(input_vars) + len(output_vars), (
+ f'Expected {len(input_vars)} inputs and {len(output_vars)} outputs, got {len(args)}'
+ )
+ inputs = args[: len(input_vars)]
+ outputs = args[len(input_vars) :]
+ ir_input = [
+ XLSBackend._float_to_xls_ir(np.asarray(x).reshape(var.shape), var.type.precision)
+ for x, var in zip(inputs, input_vars)
+ ]
+ ir_output = jit.run(ir_input)
+
+ out_precision = [output_var.type.precision for output_var in output_vars]
+ if len(out_precision) == 1:
+ out_precision = out_precision[0]
+ dtype = np.asarray(inputs[0]).dtype
+ output = XLSBackend._xls_ir_to_float(ir_output, out_precision, dtype)
+ # This is the case when len(output_vars) == 1
+ if not isinstance(output, tuple):
+ output = (output,)
+ for i in range(len(output_vars)):
+ outputs[i][:] = np.reshape(output[i], -1)
+
+ return top_function
+
+ def build(
+ self,
+ model: ModelGraph,
+ reset: bool | None = None,
+ pr: bool = False,
+ ) -> dict:
+ """Builds the RTL (SystemVerilog) code and uses Vivado to return the resource utilization.
+
+ Args:
+ model (ModelGraph): the hls4ml model.
+ reset (bool): the reset synthesis option
+ pr (bool): place and route option
+ """
+ xls = import_xls()
+ project_name = model.config.get_project_name()
+ output_dir = Path(model.config.get_output_dir())
+
+ clock_period_ns = model.config.get_config_value('ClockPeriod')
+ clock_period_ps = self._to_xls_clock_period_ps(clock_period_ns)
+
+ clock_uncertainty_str = model.config.get_config_value('ClockUncertainty')
+ clock_uncertainty_float = self._percent_to_float(clock_uncertainty_str)
+ clock_margin_percent: int = self._to_xls_clock_margin_percent(clock_uncertainty_str)
+
+ def build_codegen_flags() -> dict[str, Any]:
+ flags = dict(model.config.get_config_value('XLSCodegenFlags'))
+ flags['clock_period_ps'] = clock_period_ps
+ flags['clock_margin_percent'] = clock_margin_percent
+ if reset is not None:
+ flags['reset'] = 'reset' if reset else None
+ flags['reset_data_path'] = reset
+ return flags
+
+ def build_vivado_flags() -> list[str]:
+ flags = [
+ '-mode',
+ 'batch',
+ '-nolog',
+ '-nojournal',
+ '-source',
+ './build_prj.tcl',
+ '-tclargs',
+ project_name,
+ model.config.get_config_value('Part'),
+ clock_period_ps,
+ clock_uncertainty_float,
+ ]
+ if pr:
+ flags += ['--pr']
+ return [str(flag) for flag in flags]
+
+ # Generate RTL
+ firmware_dir = output_dir / 'firmware'
+
+ opt_ir_path = firmware_dir / f'{project_name}.opt.ir'
+ opt_ir_text = opt_ir_path.read_text()
+ codegen_flags = build_codegen_flags()
+
+ pkg = xls.parse_ir_package(ir=opt_ir_text, filename=str(opt_ir_path))
+ verilog_text = pkg.schedule_and_codegen(**codegen_flags).get_verilog_text()
+ sv_path = firmware_dir / f'{project_name}.sv'
+ sv_path.write_text(verilog_text)
+
+ # Run Vivado for resource report
+ vivado_command: list[str] = ['vivado'] + build_vivado_flags()
+ subprocess.run(vivado_command, cwd=output_dir, check=True)
+
+ return parse_xls_report(output_dir)
diff --git a/hls4ml/backends/xls/xls_types.py b/hls4ml/backends/xls/xls_types.py
new file mode 100644
index 0000000000..59c5b98117
--- /dev/null
+++ b/hls4ml/backends/xls/xls_types.py
@@ -0,0 +1,501 @@
+from __future__ import annotations
+
+import builtins
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+
+from hls4ml.model.types import (
+ ExponentPrecisionType,
+ FixedPrecisionType,
+ IntegerPrecisionType,
+ PrecisionType,
+ RoundingMode,
+ SaturationMode,
+ TensorVariable,
+ XnorPrecisionType,
+)
+
+
+def to_signed_fixed_precision(precision: PrecisionType, allow_unsigned: bool = False) -> FixedPrecisionType:
+ """Convert precision to a signed FixedPrecisionType used by XLS."""
+ rounding_mode = RoundingMode.TRN
+ saturation_mode = SaturationMode.WRAP
+ if isinstance(precision, IntegerPrecisionType) or isinstance(precision, FixedPrecisionType):
+ integer = precision.integer
+ rounding_mode = precision.rounding_mode
+ saturation_mode = precision.saturation_mode
+ elif isinstance(precision, XnorPrecisionType):
+ integer = precision.integer
+ elif isinstance(precision, ExponentPrecisionType):
+ integer = 1
+ else:
+ raise ValueError(f'Unknown precision type: {type(precision)}')
+ fixed_precision = FixedPrecisionType(
+ width=precision.width,
+ integer=integer,
+ signed=precision.signed,
+ rounding_mode=rounding_mode,
+ saturation_mode=saturation_mode,
+ )
+ # Only signed types are supported in XLS
+ if not fixed_precision.signed:
+ if not allow_unsigned:
+ raise ValueError(f'Expected signed precision, got: {precision}')
+ fixed_precision.signed = True
+ fixed_precision.width += 1
+ fixed_precision.integer += 1
+
+ return fixed_precision
+
+
+def float_to_significand(
+ x: np.floating[Any] | NDArray[np.floating[Any]], precision: PrecisionType, allow_unsigned: bool = False
+) -> int:
+ """Convert floating point value to fixed point significand.
+
+ Returns: x * 2^precision.fractional
+ """
+ if not np.isscalar(x):
+ if not isinstance(x, np.ndarray) or x.dtype.kind != 'f':
+ x = np.asarray(x, dtype=np.float64)
+
+ if isinstance(precision, XnorPrecisionType):
+ # hls4ml stores XNOR weights as bits {0,1};
+ # We convert it to XLS FixedPoint {-1, 1}
+ x = np.where(x == 0, -1, x)
+
+ precision = to_signed_fixed_precision(precision, allow_unsigned)
+
+ width = precision.width
+ frac = precision.fractional
+ scale = 2**frac
+ # TODO support different saturation and rounding modes
+ significand = np.round(x * scale).astype(np.int64)
+ n = 2**width
+ shift = 2 ** (width - 1)
+ return (significand + shift) % n - shift
+
+
+# XLS types
+
+
+class XLSIntegerType:
+ def __init__(self, width, signed: bool):
+ self.width = width
+ self.signed = signed
+
+ def __str__(self):
+ prefix = 's' if self.signed else 'u'
+ if isinstance(self.width, int) and 1 <= self.width <= 64:
+ # u32
+ return f'{prefix}{self.width}'
+ # uN[NUM_BITS]
+ return f'{prefix}N[{self.width}]'
+
+ @staticmethod
+ def u32():
+ return XLSIntegerType(width=32, signed=False)
+
+ @staticmethod
+ def s32():
+ return XLSIntegerType(width=32, signed=True)
+
+
+class XLSFixedPointType:
+ def __init__(self, num_bits, binary_exponent):
+ self.num_bits = num_bits
+ self.binary_exponent = binary_exponent
+
+ @classmethod
+ def from_precision(cls, precision: PrecisionType, allow_unsigned: bool = False):
+ precision = to_signed_fixed_precision(precision, allow_unsigned)
+ assert precision.signed, 'XLS FixedPoint is always a signed type'
+ num_bits = precision.width
+ binary_exponent = -precision.fractional
+ return cls(num_bits=num_bits, binary_exponent=binary_exponent)
+
+ @property
+ def significand_type(self):
+ return XLSIntegerType(width=self.num_bits, signed=True)
+
+ @property
+ def precision(self):
+ return FixedPrecisionType(width=self.num_bits, integer=self.num_bits + self.binary_exponent, signed=True)
+
+ def __str__(self):
+ return f'FixedPoint<{self.num_bits}, {self.binary_exponent}>'
+
+
+def as_xls_fixed_point_type(type: XLSFixedPointType | PrecisionType, allow_unsigned: bool = False) -> XLSFixedPointType:
+ if isinstance(type, XLSFixedPointType):
+ return type
+ return XLSFixedPointType.from_precision(type, allow_unsigned)
+
+
+# 1d array type. TODO make it explicitly multidimensional?
+class XLSArrayType:
+ def __init__(
+ self, element_type, shape: int | str | tuple[int | str, ...] | list[int | str], allow_unsigned: bool = False
+ ):
+ if isinstance(element_type, PrecisionType):
+ element_type = XLSFixedPointType.from_precision(element_type, allow_unsigned)
+
+ if isinstance(shape, str) or isinstance(shape, int):
+ shape = (shape,)
+ else:
+ shape = tuple(shape)
+ assert len(shape) > 0, 'Zero-dimensional arrays are not supported'
+ if len(shape) == 1:
+ self.element_type = element_type
+ else:
+ self.element_type = XLSArrayType(element_type, shape[1:], allow_unsigned)
+ self.size = shape[0]
+
+ def as_multidimensional(self) -> tuple[Any, tuple[int | str, ...]]:
+ """Returns: (inner element type, shape)
+
+ >>> element_type = XLSFixedPointType(num_bits=16, binary_exponent=-10)
+ >>> array_2d = XLSArrayType(element_type=element_type, shape=(2, 3))
+ >>> elt, shape = array_2d.as_multidimensional()
+ >>> str(elt)
+ 'FixedPoint<16, -10>'
+ >>> shape
+ (2, 3)
+
+ """
+ if isinstance(self.element_type, XLSArrayType):
+ elt, shape = self.element_type.as_multidimensional()
+ shape = (self.size,) + shape
+ else:
+ elt = self.element_type
+ shape = (self.size,)
+ return elt, shape
+
+ @property
+ def shape(self):
+ """Returns: shape of the multidimensional array type"""
+ _, shape = self.as_multidimensional()
+ return shape
+
+ @property
+ def rank(self):
+ """Returns: rank of the multidimensional array type"""
+ return len(self.shape)
+
+ @property
+ def innermost_element_type(self):
+ """Returns: inner element type, for example:
+
+ >>> element_type = XLSFixedPointType(num_bits=16, binary_exponent=-10)
+ >>> array_2d = XLSArrayType(element_type=element_type, shape=(2, 3))
+ >>> str(array_2d.innermost_element_type)
+ 'FixedPoint<16, -10>'
+ >>> str(array_2d.element_type)
+ 'FixedPoint<16, -10>[3]'
+ """
+ elt, shape = self.as_multidimensional()
+ return elt
+
+ def __str__(self):
+ return f'{self.element_type}[{self.size}]'
+
+
+# XLS values
+
+
+class XLSInteger:
+ def __init__(self, type: XLSIntegerType | str, value: int | str):
+ self.type = type
+ self.value = value
+
+ @classmethod
+ def u32(cls, value: int | str):
+ if isinstance(value, int):
+ assert value >= 0, f'value={value} is not an unsigned integer'
+ return cls(XLSIntegerType.u32(), value)
+
+ @classmethod
+ def s32(cls, value: int | str):
+ return cls(XLSIntegerType.s32(), value)
+
+ def __str__(self):
+ return f'{self.type}:{self.value}'
+
+
+class XLSFixedPoint:
+ def __init__(
+ self,
+ type: XLSFixedPointType | PrecisionType,
+ significand: XLSInteger | int | np.integer[Any] | str,
+ allow_unsigned: bool = False,
+ ):
+ type = as_xls_fixed_point_type(type, allow_unsigned)
+ if np.issubdtype(builtins.type(significand), np.integer):
+ significand = XLSInteger(type=type.significand_type, value=significand)
+ elif isinstance(significand, XLSInteger):
+ assert significand.type.width == type.num_bits
+ assert significand.type.signed, 'FixedPoint is always a signed type'
+
+ self.type = type
+ self.significand = significand
+
+ @classmethod
+ def from_float(cls, x: np.floating[Any], precision: PrecisionType, allow_unsigned: bool = False):
+ xls_type = XLSFixedPointType.from_precision(precision, allow_unsigned)
+ return cls(type=xls_type, significand=float_to_significand(x, precision, allow_unsigned))
+
+ @classmethod
+ def min_value(cls, type: XLSFixedPointType):
+ return cls(type=type, significand=-(2 ** (type.num_bits - 1)))
+
+ @classmethod
+ def max_value(cls, type: XLSFixedPointType):
+ return cls(type=type, significand=2 ** (type.num_bits - 1) - 1)
+
+ @classmethod
+ def zero(cls, type: XLSFixedPointType):
+ return cls(type=type, significand=0)
+
+ def __str__(self):
+ # return f'fp_util::make_fixed_point<{self.type.binary_exponent}>:<{self.significand}>'
+ return f'{self.type}{{ significand: {self.significand} }}'
+
+
+# 1d array. TODO make it explicitly multidimensional?
+class XLSArray:
+ def __init__(self, array_type: XLSArrayType, array):
+ self.array_type = array_type
+
+ if not isinstance(array, str):
+ if isinstance(array_type.element_type, XLSArrayType):
+ array = [XLSArray(array_type=array_type.element_type, array=inner_array) for inner_array in array]
+ if not isinstance(array_type.size, str):
+ assert len(array) == array_type.size, f'Array size mismatch: expected {array_type.size}, got {len(array)}'
+ self.array = array
+
+ def __str__(self):
+ # TODO make it less verbose, e.g. replace:
+ # FixedPoint<16,-6>[2]:[FixedPoint<16,-6>{ significand = sN[16]:-1}, FixedPoint<16,-6>{ significand = sN[16]:235} ]
+ # with
+ # fp_util::make_fixed_points_1d<-6>(sN[6][2]:[-1, 235])
+ # NB: this works only when self.array contains explicit values, not string(s)!
+ if isinstance(self.array, str):
+ return f'{self.array_type}:[{self.array}]'
+ elements = ', '.join(map(str, self.array))
+ return f'{self.array_type}:[{elements}]'
+
+
+class XLSQualifiedName:
+ def __init__(self, name: str, module_name: str | None = None):
+ self.name = name
+ self.module_name = module_name
+
+ def __str__(self):
+ if self.module_name:
+ return f'{self.module_name}::{self.name}'
+ return self.name
+
+
+class XLSFunctionCall:
+ def __init__(self, name, params=None, args=None):
+ self.name = name
+ self.params = params or []
+ self.args = args or []
+ if isinstance(self.params, str):
+ self.params = [self.params]
+ if isinstance(self.args, str):
+ self.args = [self.args]
+
+ @property
+ def namespace(self):
+ parts = self.name.split('::')
+ match len(parts):
+ case 1:
+ return None
+ case 2:
+ return parts[0]
+ case _:
+ raise ValueError(f'Cannot extract namespace from function name: {self.name}')
+
+ def __str__(self):
+ params = ', '.join(map(str, self.params))
+ if params:
+ params = f'<{params}>'
+ args = ', '.join(map(str, self.args))
+ return f'{self.name}{params}({args})'
+
+
+class XLSConst:
+ def __init__(self, name, value, type=None):
+ self.name = name
+ self.value = value
+ self.type = type
+
+ def __str__(self):
+ type = f': {self.type}' if self.type else ''
+ return f'pub const {self.name}{type} = {self.value};'
+
+
+class XLSTypeAlias:
+ def __init__(self, name, type):
+ self.name = name
+ self.type = type
+
+ def __str__(self):
+ return f'pub type {self.name} = {self.type};'
+
+
+class XLSImport:
+ def __init__(self, name, alias=None):
+ self.name = name
+ self.alias = alias
+
+ def __str__(self):
+ as_alias = f' as {self.alias}' if self.alias else ''
+ return f'import {self.name}{as_alias};'
+
+
+class XLSVariableDefinition:
+ def __init__(self, name, value, type=None):
+ self.name = name
+ self.type = type
+ self.value = value
+
+ def __str__(self):
+ type = f': {self.type}' if self.type else ''
+ return f'let {self.name}{type} = {self.value};'
+
+
+class XLSFunctionDefinition:
+ def __init__(self, name, params, args, output_type, body):
+ self.name = name
+ self.params = params or []
+ self.args = args or []
+ self.output_type = output_type or '()'
+ self.body = body or ''
+
+ def __str__(self):
+ if isinstance(self.params, str):
+ params = self.params
+ else:
+ params = ', '.join(map(str, self.params))
+ if params:
+ params = f'<{params}>'
+ if isinstance(self.args, str):
+ args = self.args
+ else:
+ args = ', '.join(map(str, self.args))
+ return f"""pub fn {self.name}{params}({args})
+ -> {self.output_type} {{
+ {self.body}
+}}"""
+
+
+class XLSTensorVariable:
+ """Helper class to generate XLS constants for tensor variables."""
+
+ def __init__(self, name: str, num_bits, binary_exponent, rounding_mode, saturation_mode, shape) -> None:
+ if isinstance(shape, int) or isinstance(shape, str):
+ shape = (shape,)
+ name = ''.join(filter(lambda s: s.isalnum() or s == '_', name))
+ self.name = name
+ name = name.upper()
+ self.num_bits = XLSConst(f'{name}_NUM_BITS', num_bits, type='u32')
+ self.binary_exponent = XLSConst(f'{name}_BINARY_EXPONENT', binary_exponent, type='s32')
+ self.rounding_mode = XLSConst(f'{name}_ROUNDING_MODE', f'RoundingMode::{rounding_mode}', type='RoundingMode')
+ self.overflow_mode = XLSConst(f'{name}_OVERFLOW_MODE', f'OverflowMode::{saturation_mode}', type='OverflowMode')
+ self.shape = tuple(XLSConst(f'{name}_DIM_{i}', dim, type='u32') for i, dim in enumerate(shape))
+ name = name[0].upper() + name[1:].lower()
+ self.type_alias = XLSTypeAlias(name=f'{name}_Type', type=self.to_array_type())
+ self.type_alias_bits = XLSTypeAlias(name=f'{name}_Type_Bits', type=self.to_array_type_bits())
+
+ @classmethod
+ def from_tensor_variable(cls, var: TensorVariable, name: str | None = None) -> XLSTensorVariable:
+ precision = var.type.precision
+ assert precision.signed, (
+ f'{var.__class__.__name__}: XLS supports only signed FixedPrecision, but got: {precision} ({type(precision)})'
+ )
+ element_type = XLSFixedPointType.from_precision(precision)
+ return cls(
+ name=name or var.name,
+ num_bits=element_type.num_bits,
+ binary_exponent=element_type.binary_exponent,
+ rounding_mode=precision.rounding_mode,
+ saturation_mode=precision.saturation_mode,
+ shape=var.shape,
+ )
+
+ def definitions(self) -> list[XLSConst | XLSTypeAlias]:
+ return (
+ [self.num_bits, self.binary_exponent, self.rounding_mode, self.overflow_mode]
+ + list(self.shape)
+ + [self.type_alias, self.type_alias_bits]
+ )
+
+ def to_array_type(self) -> XLSArrayType:
+ return XLSArrayType(
+ element_type=XLSFixedPointType(self.num_bits.name, binary_exponent=self.binary_exponent.name),
+ shape=tuple(dim.name for dim in self.shape),
+ )
+
+ def to_array_type_bits(self) -> XLSArrayType:
+ return XLSArrayType(
+ element_type=XLSIntegerType(width=self.num_bits.name, signed=True), shape=tuple(dim.name for dim in self.shape)
+ )
+
+
+class XLSLookupTable:
+ def __init__(
+ self,
+ name: str,
+ input_precision: XLSFixedPointType | FixedPrecisionType,
+ output_precision: XLSFixedPointType | FixedPrecisionType,
+ x_min,
+ log2_step,
+ raw_table,
+ ) -> None:
+ input_precision = as_xls_fixed_point_type(input_precision)
+ output_precision = as_xls_fixed_point_type(output_precision)
+ self.input_num_bits = XLSConst(f'{name}_INPUT_NUM_BITS', input_precision.num_bits, 'u32')
+ self.input_binary_exponent = XLSConst(f'{name}_INPUT_BINARY_EXPONENT', input_precision.binary_exponent, 's32')
+ self.output_num_bits = XLSConst(f'{name}_OUTPUT_NUM_BITS', output_precision.num_bits, 'u32')
+ self.output_binary_exponent = XLSConst(f'{name}_OUTPUT_BINARY_EXPONENT', output_precision.binary_exponent, 's32')
+ self.size = XLSConst(f'{name}_SIZE', len(raw_table), 'u32')
+ self.log2_step = XLSConst(f'{name}_LOG2_STEP', log2_step, 's32')
+ self.x_min = XLSConst(
+ f'{name}_X_MIN',
+ x_min,
+ XLSFixedPointType(num_bits=f'{name}_INPUT_NUM_BITS', binary_exponent=f'{name}_INPUT_BINARY_EXPONENT'),
+ )
+ int_table = XLSArray(
+ array_type=XLSArrayType(
+ element_type=XLSIntegerType(width=f'{name}_OUTPUT_NUM_BITS', signed=True), shape=f'{name}_SIZE'
+ ),
+ array=raw_table,
+ )
+ fixed_point_table = XLSFunctionCall(
+ name='fixed_point_util::make_fixed_points_1d', params=[self.output_binary_exponent.name], args=[int_table]
+ )
+ self.lookup_table = XLSConst(
+ name=name,
+ value=XLSFunctionCall(
+ name='lookup_table::create', params=[self.log2_step.name], args=[x_min, fixed_point_table]
+ ),
+ )
+
+ def definitions(self) -> list[XLSConst]:
+ return [
+ self.input_num_bits,
+ self.input_binary_exponent,
+ self.output_num_bits,
+ self.output_binary_exponent,
+ self.size,
+ self.log2_step,
+ self.x_min,
+ self.lookup_table,
+ ]
+
+ def __str__(self):
+ return '\n'.join(map(str, self.definitions()))
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index 4351b78950..d9399b38bf 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -820,7 +820,12 @@ def _compile(self):
dlclose_func(self._top_function_lib._handle)
self._top_function_lib = ctypes.cdll.LoadLibrary(lib_name)
- def _get_top_function(self, x):
+ def _get_top_function(self, x, *args, **kwargs):
+ backend = self.config.backend
+
+ if hasattr(backend, 'get_top_function') and callable(backend.get_top_function):
+ return backend.get_top_function(self, x, *args, **kwargs)
+
if self._top_function_lib is None:
raise Exception('Model not compiled')
if len(self.get_input_variables()) == 1:
diff --git a/hls4ml/report/__init__.py b/hls4ml/report/__init__.py
index 4d3641a5ac..4692741d61 100644
--- a/hls4ml/report/__init__.py
+++ b/hls4ml/report/__init__.py
@@ -18,3 +18,4 @@
print_vivado_report, # noqa: F401
read_vivado_report, # noqa: F401
)
+from hls4ml.report.xls_report import parse_xls_report # noqa: F401
diff --git a/hls4ml/report/xls_report.py b/hls4ml/report/xls_report.py
new file mode 100644
index 0000000000..b2cd17dec0
--- /dev/null
+++ b/hls4ml/report/xls_report.py
@@ -0,0 +1,47 @@
+import os
+import re
+from pathlib import Path
+
+
+def _get_project_name(path) -> str:
+ project_path = Path(path) / 'firmware'
+ sv_files = list(project_path.glob('*.sv'))
+ return sv_files[0].stem
+
+
+def parse_xls_report(hls_dir) -> dict:
+ if not os.path.exists(hls_dir):
+ print(f'Path {hls_dir} does not exist. Exiting.')
+ return {}
+
+ project_name = _get_project_name(hls_dir)
+ report_dir = Path(hls_dir) / f'output_{project_name}' / 'reports'
+
+ vivado_syn_file = report_dir / f'{project_name}_post_synth_util.rpt'
+ report = {}
+ if os.path.isfile(vivado_syn_file):
+ vivado_synth_rpt = {}
+ with open(vivado_syn_file) as f:
+ section = 0
+ for line in f.readlines():
+ match = re.match(r'^(\d)\.', line)
+ if match:
+ section = int(match.group(1))
+ # Sometimes, phrases such as 'CLB Registers' can show up in the non-tabular sections of the report
+ if '|' in line:
+ # CLB (2019.X) vs. Slice (2020.X)
+ if ('CLB LUTs' in line or 'Slice LUTs' in line) and section == 1:
+ vivado_synth_rpt['LUT'] = line.split('|')[2].strip()
+ elif ('CLB Registers' in line or 'Slice Registers' in line) and section == 1:
+ vivado_synth_rpt['FF'] = line.split('|')[2].strip()
+ elif 'Block RAM Tile' in line and section == 2:
+ vivado_synth_rpt['BRAM_18K'] = line.split('|')[2].strip()
+ elif 'URAM' in line and section == 2:
+ vivado_synth_rpt['URAM'] = line.split('|')[2].strip()
+ elif 'DSPs' in line and section == 3:
+ vivado_synth_rpt['DSP48E'] = line.split('|')[2].strip()
+ report['VivadoSynthReport'] = vivado_synth_rpt
+ else:
+ print(f'Vivado synthesis report not found at {vivado_syn_file}.')
+
+ return report
diff --git a/hls4ml/templates/xls/build_prj.tcl b/hls4ml/templates/xls/build_prj.tcl
new file mode 100644
index 0000000000..bb383a1776
--- /dev/null
+++ b/hls4ml/templates/xls/build_prj.tcl
@@ -0,0 +1,56 @@
+# build_prj.tcl
+# Usage:
+# vivado -mode batch -nolog -nojournal -source build_prj.tcl --tclargs [--pr]
+
+if {[llength $argv] < 4} {
+ puts stderr "ERROR: missing arguments\nUsage: vivado -mode batch -nolog -nojournal -source build_prj.tcl -tclargs [--pr]"
+ exit 1
+}
+
+# get arguments
+set project_name [lindex $argv 0]
+set board [lindex $argv 1]
+set clock_period [lindex $argv 2]
+set clock_uncertainty [lindex $argv 3]
+set do_pr 0
+if {[llength $argv] > 4 && [lindex $argv 4] eq "--pr"} {
+ set do_pr 1
+}
+
+set prj_root [file normalize [file dirname [info script]]]
+set prj_files [glob -nocomplain "${prj_root}/firmware/*.sv"]
+set output_dir "${prj_root}/output_${project_name}"
+set top_module "__${project_name}__${project_name}"
+
+# Parameters used in xdc
+set xdc_path "${prj_root}/constraints.xdc"
+set uncertainty_hold_r $clock_uncertainty
+set uncertainty_setup_r $clock_uncertainty
+set delay_max_r 0.4
+set delay_min_r 0.2
+
+
+set source_type "verilog"
+
+create_project $project_name "${output_dir}/$project_name" -force -part $board
+
+set_property DEFAULT_LIB work [current_project]
+set_property TARGET_LANGUAGE Verilog [current_project]
+
+read_verilog $prj_files
+read_xdc "${xdc_path}" -mode out_of_context
+
+set_property top $top_module [current_fileset]
+
+file mkdir $output_dir
+file mkdir "${output_dir}/reports"
+
+# synth
+synth_design -top $top_module -mode out_of_context -global_retiming on \
+ -flatten_hierarchy full -resource_sharing auto -directive AreaOptimized_High
+
+write_checkpoint -force "${output_dir}/${project_name}_post_synth.dcp"
+
+report_timing_summary -file "${output_dir}/reports/${project_name}_post_synth_timing.rpt"
+report_power -file "${output_dir}/reports/${project_name}_post_synth_power.rpt"
+report_utilization -file "${output_dir}/reports/${project_name}_post_synth_util.rpt"
diff --git a/hls4ml/templates/xls/constraints.xdc b/hls4ml/templates/xls/constraints.xdc
new file mode 100644
index 0000000000..4b4a9ab80a
--- /dev/null
+++ b/hls4ml/templates/xls/constraints.xdc
@@ -0,0 +1,21 @@
+
+
+# Calculate actual uncertainty values
+set uncertainty_setup [expr {$clock_period * $uncertainty_setup_r}]
+set uncertainty_hold [expr {$clock_period * $uncertainty_hold_r}]
+set delay_max [expr {$clock_period * $delay_max_r}]
+set delay_min [expr {$clock_period * $delay_min_r}]
+
+# Create clock with variable period
+create_clock -period $clock_period -name sys_clk [get_ports {clk}]
+
+# Input/Output constraints
+set_input_delay -clock sys_clk -max $delay_max [get_ports {x[*]}]
+set_input_delay -clock sys_clk -min $delay_min [get_ports {x[*]}]
+
+set_output_delay -clock sys_clk -max $delay_max [get_ports {out[*]}]
+set_output_delay -clock sys_clk -min $delay_min [get_ports {out[*]}]
+
+# Apply calculated uncertainty values
+set_clock_uncertainty -setup $uncertainty_setup [get_clocks sys_clk]
+set_clock_uncertainty -hold $uncertainty_hold [get_clocks sys_clk]
diff --git a/hls4ml/templates/xls/firmware/ap_types/fixed_point_util.x b/hls4ml/templates/xls/firmware/ap_types/fixed_point_util.x
new file mode 100644
index 0000000000..488c53e787
--- /dev/null
+++ b/hls4ml/templates/xls/firmware/ap_types/fixed_point_util.x
@@ -0,0 +1,1053 @@
+// Collection of utility functions for fixed_point::FixedPoint.
+// Here we use abbreviations NB -> NUM_BITS, BE -> BINARY_EXPONENT.
+// fixed_point::FixedPoint{significand: sN[NB]} represents a real number (significand * 2^BE)
+
+import std;
+import fixed_point;
+import round;
+
+type FixedPoint = fixed_point::FixedPoint;
+type Sign = round::Sign;
+
+// All modes from hls4ml.model.types.RoundingMode
+// NB: do not confuse with round.RoundingMode!
+// TODO: not all modes are currently supported, see convert_rounding_mode()
+type RoundingModeIntegerType = u3;
+pub enum RoundingMode: RoundingModeIntegerType {
+ // Trunacte toward -inf
+ TRN = 1,
+ // Truncate towards 0
+ TRN_ZERO = 2,
+ // Round towards +inf
+ RND = 3,
+ // Round towards 0
+ RND_ZERO = 4,
+ // Round towards +-inf
+ RND_INF = 5,
+ // Round towards -inf
+ RND_MIN_INF = 6,
+ // Round towards nearest even
+ RND_CONV = 7
+}
+
+// Same oveflow modes as in ac_fixed type and in hls4ml
+type OverflowModeIntegerType = u2;
+pub enum OverflowMode: OverflowModeIntegerType {
+ // Drop bits to the left of MSB
+ WRAP = 0,
+ // Saturate to [MIN, MAX]
+ SAT = 1,
+ // Set to 0 on overflow
+ SAT_ZERO = 2,
+ // Saturate to [-MAX, MAX]
+ SAT_SYM = 3
+}
+
+// === Non-public functions copied from stdlib/fixed_point.x ===
+
+// Returns the position of the most significant bit, where 0 is the bit just left of the binary
+// point.
+//
+// E.g. consider a value like x.xxxb, which corresponds to NB=4 BE=-3.
+// most_significant_bit_position(4,-3) is 0
+fn most_significant_bit_position(NB: u32, BE: s32) -> s33 { NB as s33 + BE as s33 - s33:1 }
+
+// Returns the position of the least significant bit, where 0 is the bit just left of the binary
+// point.
+//
+// E.g. consider a value like xxxx.b, which corresponds to NB=4 BE=0.
+// least_significant_bit_position(4,0) is 0
+fn least_significant_bit_position(NB: u32, BE: s32) -> s32 { BE }
+
+// Returns the number of representable bits where two fixed point numbers overlap.
+//
+// These examples use x to indicate a representable bit:
+// num_bits_overlapping(2,-1, 2,-1) -> x.x and x.x overlap = 2
+// num_bits_overlapping(2, -1, 3, -2) -> x.x and x.xx overlap = 2
+// num_bits_overlapping(4, 0, 2, -1) -> xxxx and x.x overlap = 1
+// num_bits_overlapping(4, 1, 1, 0) -> xxxx0 and x overlap = 0
+// num_bits_overlapping(4, 0, 2, -2) -> xxxx and .xx overlap = 0
+// num_bits_overlapping(4, 0, 2, -3) -> xxxx and .0xx overlap = 0
+pub fn num_bits_overlapping(NB_A: u32, BE_A: s32, NB_B: u32, BE_B: s32) -> u32 {
+ let msb_a = most_significant_bit_position(NB_A, BE_A);
+ let msb_b = most_significant_bit_position(NB_B, BE_B);
+ let lsb_a = least_significant_bit_position(NB_A, BE_A) as s33;
+ let lsb_b = least_significant_bit_position(NB_B, BE_B) as s33;
+ let overlap = std::min(msb_a, msb_b) - std::max(lsb_a, lsb_b) + s33:1;
+ std::max(overlap, s33:0) as u32
+}
+
+// Returns the total width of two fixed point numbers when their binary points are aligned and the
+// representable bits are unioned. Includes the bits that would always be zero if these values were
+// aligned and then ANDed or ORed.
+pub fn aligned_width(NB_A: u32, BE_A: s32, NB_B: u32, BE_B: s32) -> u32 {
+ assert!(NB_A > u32:0, "0_width_will_yield_nonsensical_results");
+ assert!(NB_B > u32:0, "0_width_will_yield_nonsensical_results");
+
+ let msb_a = most_significant_bit_position(NB_A, BE_A);
+ let msb_b = most_significant_bit_position(NB_B, BE_B);
+ let lsb_a = least_significant_bit_position(NB_A, BE_A);
+ let lsb_b = least_significant_bit_position(NB_B, BE_B);
+ let msb = std::max(msb_a, msb_b);
+ let lsb = std::min(lsb_a, lsb_b) as s33;
+ let NB = msb - lsb + s33:1;
+ NB as u32
+}
+
+// === Create FixedPoint constants ===
+
+pub fn one() -> FixedPoint {
+ // If BE > 0, 1 is below quantization limit
+ const_assert!(BE <= s32:0);
+ let SHIFT = std::abs(BE) as u32;
+ const_assert!(SHIFT <= NB);
+ let x = sN[NB]:1 << SHIFT;
+ fixed_point::make_fixed_point(x)
+}
+
+pub fn max_value() -> FixedPoint {
+ fixed_point::make_fixed_point(std::signed_max_value())
+}
+
+pub fn min_value() -> FixedPoint {
+ fixed_point::make_fixed_point(std::signed_min_value())
+}
+
+// === Create FixedPoint arrays numbers from arrays of significands sN[NB] ===
+
+
+pub fn make_fixed_points_1d
+
+ (significands: sN[NB][DIM])
+ -> FixedPoint[DIM] {
+ map(significands, fixed_point::make_fixed_point)
+}
+
+pub fn make_fixed_points_2d
+
+ (significands: sN[NB][DIM_1][DIM_0])
+ -> FixedPoint[DIM_1][DIM_0] {
+ map(significands, make_fixed_points_1d)
+}
+
+pub fn make_fixed_points_3d
+
+ (significands: sN[NB][DIM_2][DIM_1][DIM_0])
+ -> FixedPoint[DIM_2][DIM_1][DIM_0] {
+ map(significands, make_fixed_points_2d)
+}
+
+pub fn make_fixed_points_4d
+
+ (significands: sN[NB][DIM_3][DIM_2][DIM_1][DIM_0])
+ -> FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0] {
+ map(significands, make_fixed_points_3d)
+}
+
+pub fn const_array_1d
+
+ (value: FixedPoint)
+ -> FixedPoint[DIM] {
+ FixedPoint[DIM]:[value, ...]
+}
+
+pub fn const_array_2d
+
+ (value: FixedPoint)
+ -> FixedPoint[DIM_1][DIM_0] {
+ FixedPoint[DIM_1][DIM_0]:[const_array_1d(value), ...]
+}
+
+pub fn const_array_3d
+
+ (value: FixedPoint)
+ -> FixedPoint[DIM_2][DIM_1][DIM_0] {
+ FixedPoint[DIM_2][DIM_1][DIM_0]:[const_array_2d(value), ...]
+}
+
+pub fn const_array_4d
+
+ (value: FixedPoint)
+ -> FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0] {
+ FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0]:[const_array_3d(value), ...]
+}
+
+
+// === Compare ===
+
+pub enum Compare: s2 {
+ LESS = -1,
+ EQUAL = 0,
+ GREATER = 1
+}
+
+pub fn compare<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> Compare {
+ let diff = fixed_point::sub(a, b).significand;
+ if (diff == 0)
+ { Compare::EQUAL }
+ else if (std::msb(diff) == u1:1)
+ { Compare::LESS }
+ else
+ { Compare::GREATER }
+}
+
+pub fn greater<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> bool {
+ compare(a, b) as s2 == Compare::GREATER as s2
+}
+
+pub fn greater_or_equal<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> bool {
+ compare(a, b) as s2 >= Compare::EQUAL as s2
+}
+
+pub fn less<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> bool {
+ compare(a, b) as s2 == Compare::LESS as s2
+}
+
+pub fn less_or_equal<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> bool {
+ compare(a, b) as s2 <= Compare::EQUAL as s2
+}
+
+pub fn equal<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint
+) -> bool {
+ compare(a, b) as s2 == Compare::EQUAL as s2
+}
+
+fn check_compare_impl<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint,
+ expected_compare_result: Compare
+) {
+ let compare_result = compare(a, b);
+ assert_eq(compare_result as s2, expected_compare_result as s2);
+
+ match expected_compare_result {
+ Compare::LESS => {
+ assert_eq(less(a,b), true);
+ assert_eq(less_or_equal(a,b), true);
+ assert_eq(equal(a,b), false);
+ assert_eq(greater_or_equal(a,b), false);
+ assert_eq(greater(a,b), false);
+ },
+ Compare::EQUAL => {
+ assert_eq(less(a,b), false);
+ assert_eq(less_or_equal(a,b), true);
+ assert_eq(equal(a,b), true);
+ assert_eq(greater_or_equal(a,b), true);
+ assert_eq(greater(a,b), false);
+ },
+ Compare::GREATER => {
+ assert_eq(less(a,b), false);
+ assert_eq(less_or_equal(a,b), false);
+ assert_eq(equal(a,b), false);
+ assert_eq(greater_or_equal(a,b), true);
+ assert_eq(greater(a,b), true);
+ }
+ };
+}
+
+fn check_compare<
+ NB_A: u32, BE_A: s32,
+ NB_B: u32, BE_B: s32
+>(
+ a: FixedPoint,
+ b: FixedPoint,
+ expected_compare_result: Compare
+) {
+ check_compare_impl(a, b, expected_compare_result);
+ check_compare_impl(b, a, match expected_compare_result {
+ Compare::LESS => Compare::GREATER,
+ Compare::EQUAL => Compare::EQUAL,
+ Compare::GREATER => Compare::LESS
+ });
+}
+
+#[test]
+fn test_compare() {
+ let minus_one = fixed_point::from_integer(s3:-1);
+ let zero = fixed_point::from_integer(s3:0);
+ let one = fixed_point::from_integer(s3:1);
+ let two = fixed_point::from_integer(s3:2);
+
+ let minus_one_big = fixed_point::make_fixed_point<-8>(s16:-256);
+ let zero_big = fixed_point::make_fixed_point<-4>(s8:0);
+ let one_big = fixed_point::make_fixed_point<-5>(s12:32);
+ let two_big = fixed_point::make_fixed_point<-1>(s12:4);
+
+ let values = [minus_one, zero, one, two];
+ // Cannot make it an array because of different types
+ let values_big = (minus_one_big, zero_big, one_big, two_big);
+
+ check_compare(minus_one, minus_one_big, Compare::EQUAL);
+ check_compare(minus_one, minus_one_big, Compare::EQUAL);
+
+ for (i, _) in u32:0..4 {
+ for (j, _) in u32:0..4 {
+ let expected_result = if (i < j) {
+ Compare::LESS
+ } else if (i == j) {
+ Compare::EQUAL
+ } else {
+ Compare::GREATER
+ };
+ let a = values[i];
+ // values_big[i] or values_big.i does not compile,
+ // so we iterate manually
+ match j {
+ u32:0 => check_compare(a, values_big.0, expected_result),
+ u32:1 => check_compare(a, values_big.1, expected_result),
+ u32:2 => check_compare(a, values_big.2, expected_result),
+ u32:3 => check_compare(a, values_big.3, expected_result),
+ _ => fail!("index_out_of_bounds", ())
+ }
+ }(())
+ }(())
+}
+
+
+// === Transpose ===
+
+pub fn transpose
+
+(x: FixedPoint[DIM_1][DIM_0])
+-> FixedPoint[DIM_0][DIM_1] {
+ let res = zero![DIM_0][DIM_1]>();
+ for (i, res) in 0..DIM_0 {
+ for (j, res) in 0..DIM_1 {
+ update(res, (j,i), x[i][j])
+ }(res)
+ }(res)
+}
+
+#[test]
+fn test_transpose() {
+ let x = make_fixed_points_2d<0>([[s16:1, 2, 3], [s16:4, 5, 6]]);
+ let x_t = make_fixed_points_2d<0>([[s16:1, 4], [s16:2, 5], [s16:3, 6]]);
+ assert_eq(x_t, transpose(x));
+ assert_eq(x, transpose(x_t));
+}
+
+// Reshape to and from 1D arrays with C-style (row-major) ordering.
+
+pub fn flatten_2d<
+ NB: u32, BE: s32,
+ DIM_0: u32, DIM_1: u32,
+ DIM: u32 = {DIM_0 * DIM_1}
+>
+(x: FixedPoint[DIM_1][DIM_0])
+-> FixedPoint[DIM] {
+ let res = zero![DIM]>();
+ for (i, res) in 0..DIM_0 {
+ for (j, res) in 0..DIM_1 {
+ update(res, i * DIM_1 + j, x[i][j])
+ }(res)
+ }(res)
+}
+
+pub fn flatten_3d<
+ NB: u32, BE: s32,
+ DIM_0: u32, DIM_1: u32, DIM_2: u32,
+ DIM: u32 = {DIM_0 * DIM_1 * DIM_2}
+>(x: FixedPoint[DIM_2][DIM_1][DIM_0])
+-> FixedPoint[DIM] {
+ flatten_2d(map(x, flatten_2d))
+}
+
+pub fn flatten_4d<
+ NB: u32, BE: s32,
+ DIM_0: u32, DIM_1: u32, DIM_2: u32, DIM_3: u32,
+ DIM: u32 = {DIM_0 * DIM_1 * DIM_2 * DIM_3}
+>(x: FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0])
+-> FixedPoint[DIM] {
+ flatten_2d(map(x, flatten_3d))
+}
+
+pub fn reshape_to_2d<
+ DIM_0: u32, DIM_1: u32,
+ NB: u32, BE: s32,
+ DIM: u32 = {DIM_0 * DIM_1}>
+(x: FixedPoint[DIM])
+-> FixedPoint[DIM_1][DIM_0] {
+ let res = zero![DIM_1][DIM_0]>();
+ for (i, res) in 0..DIM_0 {
+ for (j, res) in 0..DIM_1 {
+ update(res, (i, j), x[i * DIM_1 + j])
+ }(res)
+ }(res)
+}
+
+pub fn reshape_to_3d<
+ DIM_0: u32, DIM_1: u32, DIM_2: u32,
+ NB: u32, BE: s32,
+ DIM: u32 = {DIM_0 * DIM_1 * DIM_2}>
+(x: FixedPoint[DIM])
+-> FixedPoint[DIM_2][DIM_1][DIM_0] {
+ let x_2d = reshape_to_2d(x);
+ map(x_2d, reshape_to_2d)
+}
+
+pub fn reshape_to_4d<
+ DIM_0: u32, DIM_1: u32, DIM_2: u32, DIM_3: u32,
+ NB: u32, BE: s32,
+ DIM: u32 = {DIM_0 * DIM_1 * DIM_2 * DIM_3}>
+(x: FixedPoint[DIM])
+-> FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0] {
+ let x_2d = reshape_to_2d(x);
+ map(x_2d, reshape_to_3d)
+}
+
+#[test]
+fn test_reshape_2d() {
+ let x_flat = make_fixed_points_1d<0>([s16:1, 2, 3, 4, 5, 6]);
+ let x = make_fixed_points_2d<0>([[s16:1, 2, 3], [s16:4, 5, 6]]);
+ assert_eq(x, reshape_to_2d<2,3>(x_flat));
+ assert_eq(x_flat, flatten_2d(x));
+}
+
+#[test]
+fn test_reshape_3d() {
+ let x_flat = make_fixed_points_1d<0>([s16:1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
+ let x = make_fixed_points_3d<0>([[[s16:1, 2], [s16:3, 4], [s16:5, 6]], [[s16:7, 8], [s16:9, 10], [s16:11, 12]]]);
+ assert_eq(x, reshape_to_3d<2,3,2>(x_flat));
+ assert_eq(x_flat, flatten_3d(x));
+}
+
+#[test]
+fn test_reshape_4d() {
+ let x = make_fixed_points_4d<0>([[[[s16:1, 2], [s16:3, 4], [s16:5, 6]]], [[[s16:7, 8], [s16:9, 10], [s16:11, 12]]]]);
+ let x_flat = make_fixed_points_1d<0>([s16:1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
+ assert_eq(x, reshape_to_4d<2,1,3,2>(x_flat));
+ assert_eq(x_flat, flatten_4d(x));
+}
+
+// === Convert FixedPoint array to array of significands sN[NB] ===
+
+pub fn to_significand
+
+ (x: FixedPoint)
+ -> sN[NB] {
+ x.significand
+}
+
+pub fn to_significand_1d
+
+ (x: FixedPoint[DIM_0])
+ -> sN[NB][DIM_0] {
+ map(x, to_significand)
+}
+pub fn to_significand_2d
+
+ (x: FixedPoint[DIM_1][DIM_0])
+ -> sN[NB][DIM_1][DIM_0] {
+ map(x, to_significand_1d)
+}
+pub fn to_significand_3d
+
+ (x: FixedPoint[DIM_2][DIM_1][DIM_0])
+ -> sN[NB][DIM_2][DIM_1][DIM_0] {
+ map(x, to_significand_2d)
+}
+pub fn to_significand_4d
+
+ (x: FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0])
+ -> sN[NB][DIM_3][DIM_2][DIM_1][DIM_0] {
+ map(x, to_significand_3d)
+}
+
+// === Change width and exponent ===
+
+fn overflow_truncated(
+ // result of truncate_msbs(x) or truncate_lsbs(x)
+ truncated: sN[N],
+ // Sign of the result (need to pass it because is could be lost during truncation)
+ sign: Sign,
+ // Did overflow happen during truncation?
+ had_overflow: bool
+ ) -> sN[N] {
+
+ assert!(N != 0, "illegal_zero_width");
+ // TODO: this fails due to eager instantiation for N=0
+ // let MAX = std::signed_max_value();
+ // let MIN = std::signed_max_value();
+ let MAX = (std::signed_max_value<{N+2}>() >> 2) as sN[N];
+ let MIN = (std::signed_min_value<{N+2}>() >> 2) as sN[N];
+
+ let has_overflow = match OVERFLOW {
+ OverflowMode::SAT_SYM => had_overflow || (truncated == MIN),
+ _ => had_overflow
+ };
+
+ if has_overflow {
+ match OVERFLOW {
+ OverflowMode::WRAP => {
+ truncated
+ },
+ OverflowMode::SAT => {
+ match sign {
+ Sign::NonNegative => MAX,
+ Sign::Negative => MIN
+ }
+ },
+ OverflowMode::SAT_ZERO => {
+ sN[N]:0
+ },
+ OverflowMode::SAT_SYM => {
+ match sign {
+ Sign::NonNegative => MAX,
+ Sign::Negative => -MAX
+ }
+ }
+ }
+ }
+ else {
+ truncated
+ }
+}
+
+// Drop (NB_IN - NB_OUT) MSBs and handle overflow
+fn truncate_msbs
+ (x: sN[NB_IN]) -> sN[NB_OUT] {
+
+ // TODO const_assert! fails due to eager instantiation.
+ // const_assert!(NB_IN > NB_OUT);
+ // let NB_OVERFLOW = NB_IN - NB_OUT;
+ assert!(NB_IN > NB_OUT, "truncate_msbs_nothing_to_truncate");
+ let NB_OVERFLOW = std::usub_or_zero(NB_IN, NB_OUT);
+
+ // TODO: this causes const_assert! in split_lsbs.
+ // So we have to introduce NB_SPLIT
+ // let (msbs, lsbs) = std::split_lsbs(std::to_unsigned(x));
+ let NB_SPLIT = std::min(NB_IN, NB_OUT);
+ let (_, lsbs) = std::split_lsbs(std::to_unsigned(x));
+ let truncated = std::to_signed(lsbs) as sN[NB_OUT];
+
+ // TODO this fails due to eager instantiation for NB_IN = 0
+ // let sign:Sign = std::msb(x) as Sign;
+ let sign:Sign = std::msb((x as sN[NB_IN + 1]) << 1) as Sign;
+
+ // TODO this fails due to eager instantiation for NB_IN = 0
+ // let NB_SIGN_EXT = NB_OVERFLOW + 1;
+ let NB_SIGN_EXT = std::min(NB_OVERFLOW + 1, NB_IN);
+ // If there is no overflow, overflow_bits and are either 000..0 or 111..1
+ let sign_ext = match sign {
+ Sign::NonNegative => zero!(),
+ Sign::Negative => all_ones!()
+ };
+ // Take all truncated bits and the sign bit
+ let (msbs, _) = std::split_msbs(std::to_unsigned(x));
+
+ // NB: overflow also happens when truncated == MIN for OverflowMode::SAT_SYM
+ // We handle this inside overflow_truncated()
+ let had_overflow = (msbs != sign_ext);
+ overflow_truncated(truncated as sN[NB_OUT], sign, had_overflow)
+}
+
+fn convert_rounding_mode() -> round::RoundingMode {
+ match rm {
+ RoundingMode::TRN => round::RoundingMode::RTN,
+ RoundingMode::TRN_ZERO => round::RoundingMode::RTZ,
+ // RoundingMode::RND => TODO,
+ // RoundingMode::RND_ZERO => TODO,
+ RoundingMode::RND_INF => round::RoundingMode::RNA,
+ // RoundingMode::RND_MIN_INF => TODO,
+ RoundingMode::RND_CONV => round::RoundingMode::RNE,
+ _ => {
+ assert_fmt!(false, "unsupported_RoundingMode_{}", (rm as RoundingModeIntegerType));
+ round::RoundingMode::RTN
+ }
+ }
+}
+
+// round::round_trunc_s, but with our RoundingMode
+fn round_trunc_s
+ (unrounded: sN[N]) -> (u1, sN[R]) {
+ round::round_trunc_s(convert_rounding_mode(), unrounded)
+}
+
+// Drop (NB_IN - NB_OUT) LSBs using RoundingMode,
+// and handle possible overflow (e.g. rounding MAX up) according to OverflowMode.
+fn truncate_lsbs
+ (x: sN[NB_IN]) -> sN[NB_OUT] {
+
+ // TODO const_assert! fails due to eager instantiation
+ // const_assert!(NB_IN > NB_OUT);
+ // let NUM_BITS_ROUNDED = NB_IN - NB_OUT;
+ assert!(NB_IN > NB_OUT, "truncate_lsbs_nothing_to_truncate");
+ let NUM_BITS_ROUNDED = std::usub_or_zero(NB_IN, NB_OUT);
+
+ let (had_overflow, truncated) = round_trunc_s(x);
+ let sign = std::msb(x) as Sign;
+ overflow_truncated(truncated as sN[NB_OUT], sign, had_overflow)
+}
+
+// FixedPoint ~ ac_fixed
+// ~ significand * 2^BE
+// 0b00111.001 ~ FixedPoint<8,-3>
+pub fn resize<
+ NB_OUT: u32, BE_OUT: s32,
+ ROUNDING: RoundingMode,
+ OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32>
+ (x: FixedPoint)
+ -> FixedPoint{
+
+ let SHIFT: s32 = BE_IN - BE_OUT;
+
+ let NB_ALIGNED = if (SHIFT >= s32:0) {
+ NB_IN + std::to_unsigned(SHIFT)
+ }
+ else {
+ std::usub_or_zero(NB_IN, std::to_unsigned(-SHIFT))
+ };
+
+ // Align exponent
+ let aligned : sN[NB_ALIGNED] =
+ if (SHIFT >= s32:0) {
+ (x.significand as sN[NB_ALIGNED]) << std::to_unsigned(SHIFT)
+ } else if (NB_ALIGNED == 0) {
+ // TODO: move this case inside truncate_lsbs?
+ zero!()
+ } else {
+ truncate_lsbs(x.significand)
+ };
+
+ // Resize width
+ let resized = if (NB_OUT < NB_ALIGNED) {
+ truncate_msbs(aligned)
+ } else if (NB_OUT == NB_ALIGNED){
+ // Here overflow_truncated() will change the result on in SAT_SYM mode, if aligned == MIN.
+ let sign = std::msb(aligned as sN[NB_OUT]) as Sign;
+ let had_overflow = false;
+ overflow_truncated(aligned as sN[NB_OUT], sign, had_overflow)
+ } else {
+ aligned as sN[NB_OUT]
+ };
+
+ FixedPoint{ significand: resized }
+}
+
+pub fn resize_1d<
+ NB_OUT: u32, BE_OUT: s32,
+ ROUNDING: RoundingMode,
+ OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ DIM: u32
+ >
+(x: FixedPoint[DIM])
+-> FixedPoint[DIM] {
+ map(x, resize)
+}
+
+pub fn resize_2d<
+ NB_OUT: u32, BE_OUT: s32,
+ ROUNDING: RoundingMode,
+ OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ DIM_0: u32, DIM_1: u32
+ >
+(x: FixedPoint[DIM_1][DIM_0])
+-> FixedPoint[DIM_1][DIM_0] {
+ map(x, resize_1d)
+}
+
+pub fn resize_3d<
+ NB_OUT: u32, BE_OUT: s32,
+ ROUNDING: RoundingMode,
+ OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ DIM_0: u32, DIM_1: u32, DIM_2: u32
+ >
+(x: FixedPoint[DIM_2][DIM_1][DIM_0])
+-> FixedPoint[DIM_2][DIM_1][DIM_0] {
+ map(x, resize_2d)
+}
+
+pub fn resize_4d<
+ NB_OUT: u32, BE_OUT: s32,
+ ROUNDING: RoundingMode,
+ OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ DIM_0: u32, DIM_1: u32, DIM_2: u32, DIM_3: u32
+ >
+(x: FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0])
+-> FixedPoint[DIM_3][DIM_2][DIM_1][DIM_0] {
+ map(x, resize_3d)
+}
+
+
+fn resize_test_case<
+ ROUNDING: RoundingMode, OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ NB_OUT: u32, BE_OUT: s32>
+ (input: FixedPoint, expected_output: FixedPoint) {
+
+ let output = resize(input);
+ assert_eq(output, expected_output);
+}
+
+#[test]
+fn test_resize() {
+ let R = RoundingMode::TRN;
+ let O = OverflowMode::WRAP;
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s2:1),
+ fixed_point::make_fixed_point<-2>(s4:1 << 2)
+ );
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s2:-1),
+ fixed_point::make_fixed_point<-2>(s4:-1 << 2)
+ );
+}
+
+#[test]
+fn test_resize_more() {
+ let R = RoundingMode::TRN;
+ let O = OverflowMode::WRAP;
+
+ // widen width only (sign extension)
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s2:1),
+ fixed_point::make_fixed_point<0>(s4:1)
+ );
+
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s2:-1),
+ fixed_point::make_fixed_point<0>(s4:-1)
+ );
+
+ // exponent decrease (SHIFT > 0) → left shift
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s3:1),
+ fixed_point::make_fixed_point<-2>(s5:1 << 2)
+ );
+
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s3:-2),
+ fixed_point::make_fixed_point<-2>(s5:-2 << 2)
+ );
+
+ // exponent increase (SHIFT < 0) → truncate LSBs
+ resize_test_case(
+ fixed_point::make_fixed_point<-2>(s4:0b0110), // 1.5
+ fixed_point::make_fixed_point<0>(s2:1)
+ );
+
+ resize_test_case(
+ fixed_point::make_fixed_point<-2>(s4:0b1010), // -1.5
+ fixed_point::make_fixed_point<0>(s2:-2)
+ );
+
+ // full LSB truncation (NB_ALIGNED = 0)
+ resize_test_case(
+ fixed_point::make_fixed_point<-1>(s3:3),
+ fixed_point::make_fixed_point<3>(s4:0)
+ );
+
+ resize_test_case(
+ fixed_point::make_fixed_point<-1>(s3:-3),
+ fixed_point::make_fixed_point<3>(s4:0)
+ );
+
+ // MSB truncation (wrap)
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s5:0b10110),
+ fixed_point::make_fixed_point<0>(s3:0b110)
+ );
+
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(s5:-7),
+ fixed_point::make_fixed_point<0>(s3:1)
+ );
+}
+
+fn resize_overflow_test_case<
+ OVERFLOW: OverflowMode,
+ NB_IN: u32,
+ NB_OUT: u32
+>(
+ x: sN[NB_IN],
+ expected: sN[NB_OUT]
+) {
+ resize_test_case(
+ fixed_point::make_fixed_point<0>(x),
+ fixed_point::make_fixed_point<0>(expected)
+ );
+}
+
+#[test]
+fn test_resize_overflow_modes() {
+ // WRAP
+ resize_overflow_test_case(s5:15, s3:-1);
+ resize_overflow_test_case(s5:8, s3:0);
+ // SAT
+ resize_overflow_test_case(s5:15, s4:7);
+ resize_overflow_test_case(s5:15, s3:3);
+ resize_overflow_test_case(s5:-16, s4:-8);
+ resize_overflow_test_case(s5:-16, s3:-4);
+ // SAT_ZERO
+ resize_overflow_test_case(s5:15,s3:0);
+ resize_overflow_test_case(s5:-15,s3:0);
+ resize_overflow_test_case(s5:-9,s3:0);
+ // SAT_SYM
+ resize_overflow_test_case(s5:-16, s3:-3);
+ resize_overflow_test_case(s5:-16, s5:-15);
+ resize_overflow_test_case(s5:15, s5:15);
+}
+
+
+// === Queries ===
+
+
+pub fn max
+ (x: FixedPoint, y: FixedPoint) -> FixedPoint {
+ fixed_point::make_fixed_point(std::max(x.significand, y.significand))
+}
+
+pub fn max_1d
+
+ (xs: FixedPoint[DIM])
+ -> FixedPoint {
+ // We could do 1..DIM, but compilation fails for empty range
+ let max_significand = for (i, acc) in 0..DIM {
+ std::max(acc, xs[i].significand)
+ }(xs[0].significand);
+ fixed_point::make_fixed_point(max_significand)
+}
+
+
+// === Clip ===
+
+pub fn clip(
+ x: FixedPoint,
+ min_value: FixedPoint,
+ max_value: FixedPoint
+ ) -> FixedPoint {
+
+ if (fixed_point::sub(x, min_value).significand < 0)
+ { min_value }
+ else if (fixed_point::sub(x, max_value).significand > 0)
+ { max_value }
+ else
+ { x }
+}
+
+pub fn clip_resize<
+ NB_OUT: u32, BE_OUT: s32, ROUNDING: RoundingMode, OVERFLOW: OverflowMode,
+ NB_IN: u32, BE_IN: s32,
+ NB_MIN: u32, BE_MIN: s32,
+ NB_MAX: u32, BE_MAX: s32>(
+ x: FixedPoint,
+ min_value: FixedPoint,
+ max_value: FixedPoint
+ ) -> FixedPoint {
+
+ if (fixed_point::sub(x, min_value).significand < 0)
+ { resize(min_value) }
+ else if (fixed_point::sub(x, max_value).significand > 0)
+ { resize(max_value) }
+ else
+ { resize(x) }
+}
+
+// === Arithmetic operations ===
+
+// Compute -x
+// Adds one extra bit to avoid overflow when x = -2^(NB-1)
+pub fn negate<
+ NB_IN: u32, BE_IN: s32,
+ NB_OUT: u32 = {NB_IN + 1}, BE_OUT: s32 = {BE_IN}
+>
+(x: FixedPoint)
+-> FixedPoint {
+ let xx = x.significand as sN[NB_OUT];
+ FixedPoint{ significand: -xx }
+}
+
+// Negate without adding extra bit
+pub fn negate_with_overflow<
+ OVERFLOW: OverflowMode,
+ NB: u32, BE: s32
+>
+(x: FixedPoint)
+-> FixedPoint {
+ let minus_x = negate(x);
+ let significand = truncate_msbs(minus_x.significand);
+ fixed_point::make_fixed_point(significand)
+}
+
+fn negate_test_case() {
+ let NB_OUT = NB + 1;
+
+ let MIN = std::signed_min_value();
+ let MAX = std::signed_max_value();
+
+ let ROUNDING = RoundingMode::TRN;
+ for (i, _) in MIN..MAX {
+ let x = fixed_point::make_fixed_point(i);
+ let expected = fixed_point::make_fixed_point(-(i as sN[NB_OUT]));
+ let expected_with_overflow = resize(expected);
+ assert_eq(expected, negate(x));
+ assert_eq(expected_with_overflow, negate_with_overflow(x));
+ }(());
+}
+
+#[test]
+fn test_negate() {
+ negate_test_case<3, 0, OverflowMode::WRAP>();
+ negate_test_case<3, 0, OverflowMode::SAT>();
+ negate_test_case<3, 0, OverflowMode::SAT_ZERO>();
+ negate_test_case<3, 0, OverflowMode::SAT_SYM>();
+}
+
+
+// Performs an add assuming that the rhs is already wide enough to not overflow.
+// WARNING: rhs must be wide enough to avoid any overflow
+pub fn add_already_widened
+
+ (fxd_a: FixedPoint, fxd_b: FixedPoint)
+ -> FixedPoint {
+ // Widen before left shifting to avoid overflow
+ let aligned_lhs = (fxd_a.significand as sN[NB_B]) << (BE_A - BE_B) as u32;
+ // TODO: I think this is also always the same in the dot product use case. Fraction bits stay
+ // the same
+ let aligned_rhs = fxd_b.significand;
+
+ fixed_point::make_fixed_point(aligned_lhs + aligned_rhs)
+}
+
+// Performs an subtraction assuming that the rhs is already wide enough to not overflow.
+// WARNING: rhs must be wide enough to avoid any overflow
+pub fn sub_already_widened
+
+ (fxd_a: FixedPoint, fxd_b: FixedPoint)
+ -> FixedPoint {
+ // Widen before left shifting to avoid overflow
+ let aligned_lhs = (fxd_a.significand as sN[NB_B]) << (BE_A - BE_B) as u32;
+ let aligned_rhs = fxd_b.significand;
+
+ fixed_point::make_fixed_point(aligned_lhs - aligned_rhs)
+}
+
+// Performs an fused-multiply-add assuming that the rhs is already wide enough to not overflow.
+// WARNING: the add rhs must be wide enough to avoid any overflow
+pub fn fmadd_already_widened
+
+ (fxd_a: FixedPoint,
+ fxd_b: FixedPoint,
+ fxd_c: FixedPoint)
+ -> FixedPoint {
+ let prod = fixed_point::mul(fxd_a, fxd_b);
+ add_already_widened(prod, fxd_c)
+}
+
+// Performs a dot product on 2 vectors. To implement this, the final widened result is
+// computed before. An accumulator is instantiated with this final size and the fmadd operation
+// is reimplemented in such a way as to not widen the output when summing in the accumulator.
+//
+// TYPE EXPLANATIONS:
+// number bits: a multiplication assumes to always double the number of bits.
+// Since our vectors must be of the same type
+// (each elem. within each vector follow the same fixed point representation)
+// we know the size of all elem. wise multiplications.
+// We can also guarantee that all elements will have overlapping positions
+// (again because elems. within vectors have the same type). This means that we must
+// widen by one bit for each element of the vector minus one. Minus one because we performs VEC_SZ - 1 adds.
+// binary exponent: The binary exponent will never change with additions since
+// all elem-wise multiplication will result in the same exponent.
+// exp is negative: inferred from 'binary exponent'
+// unsigned exp: inferred from 'binary exponent'
+// WARNINGS:
+// 1. made aligned_width() and num_bits_overlapping() public in a copy of the fixed_point_lib module.
+// to write the type inference
+// 2. We use ''already_widened'' functions.
+pub fn dot_prod
+
+ (x: FixedPoint[VEC_SZ],
+ y: FixedPoint[VEC_SZ])
+ -> FixedPoint {
+
+ for (i, acc) in 0..VEC_SZ {
+ fmadd_already_widened(x[i], y[i], acc)
+ }(zero!>())
+}
+
+// TODO
+// #[test]
+// fn fadd_test() {
+// let a = sN[u32:16]:1024; // 1.0
+// let b = sN[u32:16]:1024; // 1.0
+// let c = sN[u32:16]:1024; // 1.0
+
+// let result = fmadd(a, b, c);
+// // Solve: x * 2^(-20) = 2 (x must fit in 33 bits)
+// let expected = sN[u32:33]:2097152; // 2.0
+// assert_eq(expected, result);
+// }
+
+
+type FP = FixedPoint<16, -10>;
+
+#[test]
+fn dot_prod_test() {
+ // [1.5, 1.5]
+ let x = make_fixed_points_1d<-10>(sN[16][2]:[1536, ...]);
+ // [2.25, 2.25]
+ let y = make_fixed_points_1d<-10>(sN[16][2]:[2304, ...]);
+ // 6.75
+ let expected = fixed_point::make_fixed_point<-20>(sN[33]:7077888);
+ assert_eq(expected, dot_prod(x, y));
+
+ // [1.0, 1.0, 1.0]
+ let x = make_fixed_points_1d<-10>(sN[16][3]:[1024, ...]);
+ // [1.0, 1.0, 1.0]
+ let y = make_fixed_points_1d<-10>(sN[16][3]:[1024, ...]);
+ // 3.0
+ let expected = fixed_point::make_fixed_point<-20>(sN[34]:3145728);
+ assert_eq(expected, dot_prod(x, y));
+}
diff --git a/hls4ml/templates/xls/firmware/layer.x b/hls4ml/templates/xls/firmware/layer.x
new file mode 100644
index 0000000000..0aebb004fc
--- /dev/null
+++ b/hls4ml/templates/xls/firmware/layer.x
@@ -0,0 +1,30 @@
+import std;
+import fixed_point;
+import ap_types.fixed_point_util;
+
+type FixedPoint = fixed_point::FixedPoint;
+type RoundingMode = fixed_point_util::RoundingMode;
+type OverflowMode = fixed_point_util::OverflowMode;
+
+// hls-fpga-machine-learning insert imports
+
+// hls-fpga-machine-learning insert types
+
+// hls-fpga-machine-learning insert weights
+
+// hls-fpga-machine-learning insert lookup tables
+
+// hls-fpga-machine-learning insert other constants
+
+
+// hls-fpga-machine-learning insert helpers for different input ranks
+
+
+// Top-level function
+pub fn transform(
+ // hls-fpga-machine-learning insert layer input
+) ->
+// hls-fpga-machine-learning insert layer output
+{
+ // hls-fpga-machine-learning insert top-level function call
+}
diff --git a/hls4ml/templates/xls/firmware/myproject.x b/hls4ml/templates/xls/firmware/myproject.x
new file mode 100644
index 0000000000..0c807ab9ae
--- /dev/null
+++ b/hls4ml/templates/xls/firmware/myproject.x
@@ -0,0 +1,33 @@
+import fixed_point;
+import ap_types.fixed_point_util;
+
+// hls-fpga-machine-learning insert imports
+
+// Input and output types: arrays of FixedPoint
+pub fn myproject_fixed_point(
+ // hls-fpga-machine-learning insert architecture input
+) ->
+// hls-fpga-machine-learning insert architecture output
+{
+ // hls-fpga-machine-learning insert layers
+}
+
+// Input and output types: arrays of sN[N]
+pub fn myproject_bits(
+ // hls-fpga-machine-learning insert bits input
+) ->
+// hls-fpga-machine-learning insert bits output
+{
+ // hls-fpga-machine-learning insert convert from bits
+}
+
+// Top-level function
+pub fn myproject(
+ // hls-fpga-machine-learning insert bits input
+) ->
+// hls-fpga-machine-learning insert bits output
+{
+ // hls-fpga-machine-learning insert top-level function call
+}
+
+// hls-fpga-machine-learning insert debugging
diff --git a/hls4ml/templates/xls/firmware/nnet_utils/activations.x b/hls4ml/templates/xls/firmware/nnet_utils/activations.x
new file mode 100644
index 0000000000..9adfa1c11d
--- /dev/null
+++ b/hls4ml/templates/xls/firmware/nnet_utils/activations.x
@@ -0,0 +1,556 @@
+import std;
+import fixed_point;
+
+import ap_types.fixed_point_util;
+import nnet_utils.lookup_table;
+
+type FixedPoint = fixed_point::FixedPoint;
+type RoundingMode = fixed_point_util::RoundingMode;
+type OverflowMode = fixed_point_util::OverflowMode;
+type LookupTable = lookup_table::LookupTable;
+
+
+// =========================================================================
+// --------------------------------- ReLU ----------------------------------
+
+pub fn thresholded_relu
+ (
+ x: FixedPoint[DIM],
+ threshold: FixedPoint)
+ -> FixedPoint[DIM] {
+
+ for (i, acc) in 0..DIM {
+ let y = if (fixed_point_util::greater(x[i], threshold))
+ { fixed_point_util::resize(x[i]) }
+ else
+ { zero!>() };
+ update(acc, i, y)
+ }(zero![DIM]>())
+}
+
+pub fn relu
+
+ (x: FixedPoint[DIM]) -> FixedPoint[DIM] {
+
+ thresholded_relu(x, zero!>())
+}
+
+#[test]
+fn relu_test() {
+ let x = fixed_point_util::make_fixed_points_1d<-10>(sN[16][2]:[
+ 1536, 1024
+ ]);
+ let expected = fixed_point_util::make_fixed_points_1d<-10>(sN[16][2]:[
+ 1536, 1024
+ ]);
+ assert_eq(expected, relu<16, -10, RoundingMode::TRN, OverflowMode::WRAP>(x));
+
+ let x = fixed_point_util::make_fixed_points_1d<-10>(sN[16][4]:[
+ -1536, -1024, 0, -1024
+ ]);
+ let expected = fixed_point_util::make_fixed_points_1d<-10>(sN[16][4]:[
+ 0,...
+ ]);
+ assert_eq(expected, relu<16, -10, RoundingMode::TRN, OverflowMode::WRAP>(x));
+
+ let x = fixed_point_util::make_fixed_points_1d<-10>(sN[16][4]:[
+ -1536, -1024, 1024, -1024
+ ]);
+ let expected = fixed_point_util::make_fixed_points_1d<-10>(sN[16][4]:[
+ 0, 0, 1024, 0
+ ]);
+ assert_eq(expected, relu<16, -10, RoundingMode::TRN, OverflowMode::WRAP>(x));
+
+ // Different width and precision
+ let x = fixed_point_util::make_fixed_points_1d<-10>(sN[32][4]:[
+ -1536, -1024, 1024, -1024
+ ]);
+ let expected = fixed_point_util::make_fixed_points_1d<-11>(sN[16][4]:[
+ 0, 0, 2048, 0
+ ]);
+ assert_eq(expected, relu<16, -11, RoundingMode::TRN, OverflowMode::WRAP>(x));
+}
+
+pub fn leaky_relu
+ (
+ x: FixedPoint[DIM],
+ alpha: FixedPoint
+ ) -> FixedPoint[DIM] {
+
+ for (i, acc) in 0..DIM {
+ let y = if (x[i].significand >= 0)
+ { fixed_point_util::resize(x[i]) }
+ else
+ { fixed_point_util::resize(fixed_point::mul(x[i], alpha)) };
+ update(acc, i, y)
+ }(zero![DIM]>())
+}
+
+pub fn elu
+