From 7c6fff0804f1db48fe4f53eade4b63a95a8bd5c5 Mon Sep 17 00:00:00 2001 From: Soren Soe <2106410+stsoe@users.noreply.github.com> Date: Tue, 1 Apr 2025 13:54:47 -0700 Subject: [PATCH 1/4] Extend runner with an execution profile This WIP. Adding execution profile data that constraints how a run recipe is executed by the xrt::runner class. The profile is a separate json that is paired with a recipe when instantiating an xrt::runner. The profile is optional, while the recipe is required. The application can use the xrt::runner APIs to bind external resources or use the profile json to specify the binding and otherwise constaint the execution of a recipe. Signed-off-by: Soren Soe <2106410+stsoe@users.noreply.github.com> --- src/runtime_src/core/common/runner/README.md | 565 +----------------- src/runtime_src/core/common/runner/profile.md | 21 + src/runtime_src/core/common/runner/recipe.md | 562 +++++++++++++++++ src/runtime_src/core/common/runner/runner.cpp | 205 ++++++- src/runtime_src/core/common/runner/runner.h | 4 + .../core/common/runner/test/profile.json | 23 + .../core/common/runner/test/recipe.json | 3 + 7 files changed, 810 insertions(+), 573 deletions(-) create mode 100644 src/runtime_src/core/common/runner/profile.md create mode 100644 src/runtime_src/core/common/runner/recipe.md create mode 100644 src/runtime_src/core/common/runner/test/profile.json diff --git a/src/runtime_src/core/common/runner/README.md b/src/runtime_src/core/common/runner/README.md index 891fcf7f4a5..91bda995032 100644 --- a/src/runtime_src/core/common/runner/README.md +++ b/src/runtime_src/core/common/runner/README.md @@ -1,561 +1,12 @@ - -# Run recipe for XRT - -A run-recipe defines how to execute a graph model using XRT. - -This directory contains a stand-alone `xrt::runner` class that reads and -executes a run-recipe json file. The idea is to have tools, e.g. VAIML -geneate the run-recipe along with xclbin and control code for kernels. - -The format (schema) of the recipe json is loosely defined. The -implementation of the runner drove some of the defintion of the json -format. - -A run-recipe is associated with exactly one xclbin which, when loaded into -a region (partition) on the device, can run the recipe. - -# JSON format - -There are three sections in the run-recipe. - -1. [header](#header) -2. [resources](#resources) -3. [execution](#execution) - -The `header` trivially contains the path (full name) of the xclbin that should -be loaded before resources can be created or the recipe can be executed. - -The `resources` section defines all buffer objects, kernel objects, -and cpu function objects used to execute the recipe. The resources are -created as the run recipe is loaded. External input and output buffer -may be bound later during the execution stage of recipe. - -The `execution` section defines how the resources are connected -together during execution. It simply executes kernels and cpu -functions that were previously defined in the resource section with -arguments that were also defined in the resource section. Execution -of kernels can consume partial buffer input and produce partial buffer -output per `size` and `offset` fields define as part of specifying the -kernel arguments. - -## Header - -For the time being, the header stores nothing but the path to the -xclbin. The xclbin contains the kernel meta data used by XRT when -xrt::kernel objects are created. The xclbin contains PDIs for each -kernel, the PDIs are loaded by firmware prior to running a kernel. - -The header section can be amended with other meta data as needed. - -``` -{ - "header": { - "xclbin_path": "design.xclbin", - }, - - ... -} -``` - -The runner will use the xclbin from the `header` section to create an -xrt::hw_context, which is subsequently used to create xrt::kernel -objects. - -## Resources - -The resources section is a complete list of all objects that are used -when the recipe is executed. Each kernel used in the `execution` -section must be listed in the resources section. All kernel argument -buffers used by kernels in the `execution` section must be listed in -the resources section. Also all functions executed on the CPU must -be listed in the resources section. - -### Kernel functions - -Kernels listed in the resoruces section result in runner creating -`xrt::kernel` objects. In XRT, the kernel objects are identified by -name, which must match a kernel name in the xclbin. - -Kernels are constructed from the xclbin name and by specifying which -xrt::hw_context should execute the kernel and what control code the -kernel should execute. The hardware context is created by the runner -from the xclbin specified in the recipe `header` section, so kernels -in the resources section must contain just the xclbin kernel name -and the full path to an ELF with the control code. - -``` - "resources": { - "kernels": [ - { - "name": "k1", - "xclbin_kernel_name": "DPU", - "ctrlcode": "no-ctrl-packet.elf" - } - ] - }, -``` - -The name of the kernel in resources section must be unique in the list -of kernel instances, the name is used in the `execution` section to refer -to which instance should be executed. - -If a kernel is instantiated from the same xclbin kernel name and same -control code, then only one such kernel isntance needs to be listed in -the resources section. Listing multiple kernel instances referring to -the same xclbin kernel and using the same control code is not error, -but is not necessary. - -### CPU functions - -Functions to be executed on the CPU are listed in the resource section -along with a path to a library containing the individual function. -The library will be runtime loaded (dlopen); it will expose functions -through a function pointer that is returned through a query lookup -method, which it returned through a library entry (extern "C") function. - -CPU function arguments are expected to be `xrt::bo` objects, for -example format converting functions will take an input buffer and -and populate an output buffer, both buffers must be specified in the -resource buffer section of the recipe. - -A library path is relative to the install location of XRT based on -the environment value of `XILINX_XRT` or from its inferred location if -not set. On windows, the inferred location would be the driver store. - -``` - "resources": { - "cpus": [ - { - "name": "convert_ifm", - "library_path": "umd/convert.dll" - }, - { - "name": "convert_ofm", - "library_path": "umd/convert.dll" - }, - { - "name": "average_pool", - "library_path": "umd/operators.dll" - } - ] - }, -``` - -### Buffer - -The buffer instances listed in the resources section refer to -`xrt::bo` objects that are used during execution of kernels. The -buffers can be graph inputs or outputs, which refer to application -created input and output tensors, or they can be internal buffers used -during execution of the compiled graph at the discretion of the -compiler (VAIML). - -#### External buffers (graph input and output) - -External buffers (input and output) are created by the framework / -application outside of the runner and bound to the recipe during -execution. The runner itself does not create `xrt::bo` objects for -external buffers, but does rely on the framework to bind these buffers -to runner object created from the recipe. The external buffers must -still be listed in the resources section and specify a name that can -be used when execution sets kernel arguments. - -``` - "resources": { - "buffers": [ - { - "name": "wts", - "type": "input", - }, - { - "name": "ifm", - "type": "input", - }, - { - "name": "ofm", - "type": "output", - } - ] - } - -``` - -The `name` of the buffers in the resources section must be unique. -The name is used in the `execution` section to refer to kernel or cpu -buffer arguments. - - - -#### Internal buffers - -Internal buffers are created and managed by the runner. These are -buffers that are used internally within a graph to carry data from one -kernel or cpu execution to another. - -These buffers are created and managed by runner, hence unlike the -external buffers, the size of internal buffer size must be specified -in the recipe. - -``` - "resources": { - "buffers": [ - { - "name": "ifm_int", - "type": "internal", - "size": "1024" - }, - { - "name": "ofm_int", - "type": "internal", - "size": "1024" - }, - { - "name": "b0", - "type": "internal", - "size": "1024" - }, - { - "name": "b1", - "type": "internal:, - "size": "1024" - }, - { - "name": "b2", - "type": "internal", - "size": "1024" - } - ] - } - -``` -The `size` is currently specified in bytes, we could add support -K/M, e.g. `1048576 = 1024K = 1M` - -## Execution - -The execution section is an ordered list of xrt::kernel or cpu runs -with arguments from the resources section. - -Before the runner can execute the recipe in the execution section, all -graph inputs and outputs must be bound to the recipe. As mentioned -earlier, external inputs and outputs are defined by the framework that -uses the runner. Typically these external inputs and outputs are not -available at the time when the runner is initialized from the recipe -json. In other words, the runner can be created even before the -framework has created input and output tensors, but it can of course -not be executed until the inputs and outputs are defined. The runner -API has methods that must be called to bind the external inputs and -outputs. - -Arguments to a run can be a sub-buffer of the corresponding -resource. A buffer in the resources section refer to the full buffer, -but a run can use just a portion of the resource. By default -a run argument will use the full buffer, but optional attributes in -the json for a buffer can specify the size and an offset into the -resource buffer. - -As an example below, the kernel resource `k1` is executed twice with -3 arguments. The 3rd input is a sub-buffer of the `ifm_int` resource, the -4th is the full resource `wts`, and the finally the 5th is a -sub-buffer of `ofm_int`. - -The example illustrates the calling of a CPU function from the `cpu` -resources section. The CPU function calls are passed buffers from the -resources section and scalar values as needed. - -``` - "execution": { - "runs": [ - { - "name": "convert_ifm", - "where": "cpu", - "arguments" : [ - { "name": "ifm", "argidx": 0 }, - { "name": "ifm_int", "argidx": 1 } - ], - "constants" : [ - { "value": "nchw2nchw4c", "type": "string", "argidx": 2 } - ] - ] - }, - { - "name": "k1", - "arguments" : [ - { "name": "ifm_int", "size": 512, "offset": 0, "argidx": 3 }, - { "name": "wts", "argidx": 4 }, - { "name": "ofm_int", "size": 512, "offset": 512, "argidx": 5 } - ] - }, - { - "name": "k1", - "arguments" : [ - { "name": "ifm_int", "size": 512, "offset": 512, "argidx": 3 }, - { "name": "wts", "argidx": 4 }, - { "name": "ofm_int", "size": 512, "offset": 0, "argidx": 5 } - ] - }, - { - "name": "convert_ofm", - "where": "cpu" - "arguments" : [ - { "name": "ofm_int", "argidx": 0 }, - { "name": "ofm", "argidx": 1 } - ], - "constants" : [ - { "value": "nchw4c2nchw", "argidx": 2 } - ] - ] - }, - ... - ] - } -``` - -The runner internally creates sub-buffers out of the specified -resource buffers for each run. Both external and internal -resource buffers can be sliced and diced as required. - -The runner creates `xrt::run` or `xrt_core::cpu::run` objects out of -the specified execution runs. The runner creates a CPU or NPU runlist -for each contiguous sequence of CPU runs or NPU runs specified in the -run recipe. The runlist is inserted into a vector of runlists where -each individual runlist will be executed in sequence, when the -framework calls the runner API execute method. - -In addition to the buffer arguments referring to resource buffers, the -xclbin kernels and cpu functions may have additional arguments that -need to be set. For example the current DPU kernel have 8 arguments -and some of these must be set to some sentinel value. Here the -argument with index 0, represents the kernel opcode which specifies -the type of control packet used for the kernel resource object. The -value `3` implies transaction buffer. - -``` - "execution": { - "runs": [ - { - "name": "k1", - "arguments" : [ - { "name": "wts", "argidx": 4 }, - { "name": "ifm", "argidx": 3 }, - { "name": "ofm", "argidx": 5 } - ], - "constants" : [ - { "value": "3", "type": "int", "argidx": 0 }, - { "value": "0", "type": "int", "argidx": 1 }, - { "value": "0", "type": "int", "argidx": 2 }, - { "value": "0", "type": "int", "argidx": 6 }, - { "value": "0", "type": "int", "argidx": 7 } - ] - } - ] - } -``` - -# Complete run recipe - -For illustration here is a simple complete run-recipe.json file that -has been validated on NPU. There are no internal buffer and external -input and output are consumed during one kernel execution. See the -`runner/test/recipe.json` for an example leveraging cpu functions. - -``` -{ - "header": { - "xclbin_path": "design.xclbin", - }, - "resources": { - "buffers": [ - { - "name": "wts", - "type": "input", - }, - { - "name": "ifm", - "type": "input", - }, - { - "name": "ofm", - "type": "output", - } - ], - "kernels": [ - { - "name": "k1", - "xclbin_kernel_name": "DPU", - "ctrlcode": "no-ctrl-packet.elf" - } - ] - }, - "execution": { - "runs": [ - { - "name": "k1", - "arguments" : [ - { "name": "wts", "argidx": 4 }, - { "name": "ifm", "argidx": 3 }, - { "name": "ofm", "argidx": 5 } - ], - "constants": [ - { "value": "3", "type": "int", "argidx": 0 }, - { "value": "0", "type": "int", "argidx": 1 }, - { "value": "0", "type": "int", "argidx": 2 }, - { "value": "0", "type": "int", "argidx": 6 }, - { "value": "0", "type": "int", "argidx": 7 } - ] - } - ] - } -} -``` - -# Runner API - -The runner is contructed from a recipe json file and a device object. -The runner is a standard XRT C++ first class object with the following -API. Include documentation will be beefed up when the runner code is -moved to public XRT. - -``` -class runner_impl; -class runner -{ - std::shared_ptr m_impl; // probably unique_ptr is enough -public: - // ctor - Create runner from a recipe json - runner(const xrt::device& device, const std::string& recipe); - - // bind_input() - Bind a buffer object to an input tensor - void - bind_input(const std::string& name, const xrt::bo& bo); - - // bind_output() - Bind a buffer object to an output tensor - void - bind_output(const std::string& name, const xrt::bo& bo); - - // execute() - Execute the runner - void - execute(); - - // wait() - Wait for the execution to complete - void - wait(); -}; -``` - -# CPU library requirements - -The run recipe can refer to functions executed on the CPU. These -functions should be implemented in a shared library that can be -loaded at runtime by the runner based on `resources/cpus` section. - -A referenced library is loaded by the runner, which subsequently looks -for exported entry point (symbol) called `open` to initialize the shared -library. The `open()` is supposed to return function objects for callback -functions within the library. At present time, only one callback function -is required is the `lookup()` function, which the runner -uses to lookup functions referenced in the recipe resources section. - -The `lookup()` function must return the callable function that the -runner is requesting along with the number of arguments this function -expects. If the function the runner is looking for is not available, -then the `lookup()` function should throw an exception (TODO: define -the exact exception to throw). The reason the `lookup()` function is -not itself an exported "extern C" function like `open()` is that the -call semantics must be C++ with the bells and whistles that follow -(exceptions). - -The signature of the `extern "C"` exported `open()` function and the -C++ signature of the `lookup()` function is defined in `xrt_runner.h` -under `namespace xrt::cpu { ... }`. - -``` -/** - * The xrt::runner supports execution of CPU functions as well - * as xrt::kernel objects. - * - * The CPU functions are implemented in runtime loaded dynamic - * libraries. A library must define and export a function that - * initializes a callback structure with a lookup function. - * - * The signature of the lookup function must be - * @code - * void lookup_fn(const std::string& name, xrt::cpu::lookup_args* args) - * @endcode - * where the name is the name of the function to lookup and args is a - * structure that the lookup function must populate with the function - * information. - * - * The arguments to the CPU functions are elided via std::any and - * the signature of the CPU functions is fixed to - * @code - * void cpu_function(std::vector& args) - * @endcode - * Internally, the CPU library unwraps the arguments and calls the - * actual function. - */ -namespace xrt::cpu { -/** - * struct lookup_args - argument structure for the lookup function - * - * The lookup function takes as arguments the name of the function - * to lookup along with lookup_args to be populated with information - * about the function. - * - * @num_args - number of arguments to function - * @callable - a C++ function object wrapping the function - * - * The callable library functions uses type erasure on their arguments - * through a std::vector of std::any objects. The callable must - * unwrap the std::any objects to its expected type, which is - * cumbersome, but type safe. The type erased arguments allow the - * runner to be generic and not tied to a specific function signature. -*/ -struct lookup_args -{ - std::uint32_t num_args; - std::function&)> callable; -}; - -/** - * struct library_init_args - argument structure for libray initialization - * - * The library initialization function is the only function exported - * from the run time loaded library. The library initialization - * function is called by the runner when a resource references a - * function in a library and the library is not already loaded. - * - * @lookup_fn - a callback function to be populated with the - * lookup function. - * - * The library initialization function is C callable exported symbol, - * but returns a C++ function pointer to the lookup function. -*/ -struct library_init_args -{ - std::function lookup_fn; -}; - -/** - * library_init_fn - type of the library initialization function - * The name of the library initialization function is fixed to - * "library_init". -*/ -using library_init_fn = void (*)(library_init_args*); -} // xrt::cpu - -``` - -A unit test for the cpu library and corresponding sample run recipe -that references the cpu library is under `test/cpulib.cpp` and -`test/main.cpp` - - + +# Runner instrastructure +This directory contains xrt::runner infrastructure. The runner is +broken into two json components. First is the recipe that defines a +model executed by the xrt::runner. Second is the profile that defines +under what constraints how the model is executed. +- [recipe](recipe.md) +- [profile](profile.md) diff --git a/src/runtime_src/core/common/runner/profile.md b/src/runtime_src/core/common/runner/profile.md new file mode 100644 index 00000000000..387c60f4186 --- /dev/null +++ b/src/runtime_src/core/common/runner/profile.md @@ -0,0 +1,21 @@ + + +# Execution profile for XRT runner + +An execution profile is an extention to a run recipe (see +[recipe](recipe.md)). It automates the run recipe by binding +resources to the XRT runner that executes the run recipe. + +While the `xrt::runner` class can be used stand-alone by an +application or framework that explicitly manages external resources, +the execution recipe extends the runner to also manage the external +resources. + +An execution profile is useful for testing of a run recipe. It allows +for one external application controlling execution of a run recipe by +defininng: + +- how data is bound to resources +- how validation is performed +- how many times a run-recipe is executed and with what data + diff --git a/src/runtime_src/core/common/runner/recipe.md b/src/runtime_src/core/common/runner/recipe.md new file mode 100644 index 00000000000..b1d325f47b4 --- /dev/null +++ b/src/runtime_src/core/common/runner/recipe.md @@ -0,0 +1,562 @@ + + +# Run recipe for XRT + +A run recipe defines a graph model that can be executed by XRT. + +This directory contains a stand-alone `xrt::runner` class that reads and +executes a run recipe json file. The idea is to have tools, e.g. VAIML +geneate the run recipe along with xclbin and control code for kernels. + +The schema of the recipe json is defined in `schema/recipe.schema.json`. The +implementation of the runner drove some of the defintion of the json +format. + +A run recipe is associated with exactly one configuration (xclbin or +config elf) which, when loaded into a region (partition) on the +device, can run the recipe. + +# JSON format + +There are three sections in the run recipe. + +1. [header](#header) +2. [resources](#resources) +3. [execution](#execution) + +The `header` trivially contains the path (full name) of the +configuration data that should be loaded before resources can be +created or the recipe can be executed. + +The `resources` section defines all buffer objects, kernel objects, +and cpu function objects used to execute the recipe. The resources are +created as the run recipe is loaded. External input and output buffer +may be bound later during the execution stage of recipe. + +The `execution` section defines how the resources are connected +together during execution. It simply executes kernels and cpu +functions that were previously defined in the resource section with +arguments that were also defined in the resource section. Execution +of kernels can consume partial buffer input and produce partial buffer +output per `size` and `offset` fields defined as part of specifying the +kernel arguments. + +## Header + +For the time being, the header stores nothing but the path to the +xclbin. The xclbin contains the kernel meta data used by XRT when +xrt::kernel objects are created. The xclbin contains PDIs for each +kernel, the PDIs are loaded by firmware prior to running a kernel. + +The header section can be amended with other meta data as needed. + +``` +{ + "header": { + "xclbin": "design.xclbin", + }, + + ... +} +``` + +The runner will use the xclbin from the `header` section to create an +xrt::hw_context, which is subsequently used to create xrt::kernel +objects. + +## Resources + +The resources section is a complete list of all objects that are used +when the recipe is executed. Each kernel used in the `execution` +section must be listed in the resources section. All kernel argument +buffers used by kernels in the `execution` section must be listed in +the resources section. Also all functions executed on the CPU must +be listed in the resources section. + +### Kernel functions + +Kernels listed in the resoruces section result in runner creating +`xrt::kernel` objects. In XRT, the kernel objects are identified by +name, which must match a kernel instance name in the xclbin. + +Kernels are constructed from the instance name and what control code +the kernel should execute. The hardware context associated with the +kernel is created by the runner from the xclbin specified in the +recipe `header` section, so kernels in the resources section must +contain just the kernel instance name and the full path to an ELF with +the control code. + +``` + "resources": { + "kernels": [ + { + "name": "k1", + "instance": "DPU", + "ctrlcode": "no-ctrl-packet.elf" + } + ] + }, +``` + +The name of the kernel in resources section must be unique in the list +of kernel instances, the name is used in the `execution` section to refer +to which instance should be executed. + +If a kernel is instantiated from the same instance kernel name and same +control code, then only one such kernel instance needs to be listed in +the resources section. Listing multiple kernel instances referring to +the same xclbin kernel and using the same control code is not error, +but is not necessary. + +### CPU functions + +Functions to be executed on the CPU are listed in the resource section +along with a path to a library containing the individual function. +The library will be runtime loaded (dlopen); it will expose functions +through a function pointer that is returned through a query lookup +method, which it returned through a library entry (extern "C") function. + +CPU function arguments are expected to be `xrt::bo` objects, for +example format converting functions will take an input buffer and +and populate an output buffer, both buffers must be specified in the +resource buffer section of the recipe. + +A library path is relative to the install location of XRT based on +the environment value of `XILINX_XRT` or from its inferred location if +not set. On windows, the inferred location would be the driver store. + +``` + "resources": { + "cpus": [ + { + "name": "convert_ifm", + "library_path": "umd/convert.dll" + }, + { + "name": "convert_ofm", + "library_path": "umd/convert.dll" + }, + { + "name": "average_pool", + "library_path": "umd/operators.dll" + } + ] + }, +``` + +### Buffer + +The buffer instances listed in the resources section refer to +`xrt::bo` objects that are used during execution of kernels. The +buffers can be graph inputs or outputs, which refer to application +created input and output tensors, or they can be internal buffers used +during execution of the compiled graph at the discretion of the +compiler (VAIML). + +#### External buffers (graph input and output) + +External buffers (input and output) are created by the framework / +application outside of the runner and bound to the recipe during +execution. The runner itself does not create `xrt::bo` objects for +external buffers, but does rely on the framework to bind these buffers +to runner object created from the recipe. The external buffers must +still be listed in the resources section and specify a name that can +be used when execution sets kernel arguments. + +``` + "resources": { + "buffers": [ + { + "name": "wts", + "type": "input", + }, + { + "name": "ifm", + "type": "input", + }, + { + "name": "ofm", + "type": "output", + } + ] + } + +``` + +The `name` of the buffers in the resources section must be unique. +The name is used in the `execution` section to refer to kernel or cpu +buffer arguments. + + + +#### Internal buffers + +Internal buffers are created and managed by the runner. These are +buffers that are used internally within a graph to carry data from one +kernel or cpu execution to another. + +These buffers are created and managed by runner, hence unlike the +external buffers, the size of internal buffer size must be specified +in the recipe. + +``` + "resources": { + "buffers": [ + { + "name": "ifm_int", + "type": "internal", + "size": "1024" + }, + { + "name": "ofm_int", + "type": "internal", + "size": "1024" + }, + { + "name": "b0", + "type": "internal", + "size": "1024" + }, + { + "name": "b1", + "type": "internal:, + "size": "1024" + }, + { + "name": "b2", + "type": "internal", + "size": "1024" + } + ] + } + +``` +The `size` is currently specified in bytes. + +## Execution + +The execution section is an ordered list of kernel or cpu instances +with arguments from the resources section. + +Before the runner can execute the recipe in the execution section, all +graph inputs and outputs must be bound to the recipe. As mentioned +earlier, external inputs and outputs are defined by the framework that +uses the runner. Typically these external inputs and outputs are not +available at the time when the runner is initialized from the recipe +json. In other words, the runner can be created even before the +framework has created input and output tensors, but it can of course +not be executed until the inputs and outputs are defined. The runner +API has methods that must be called to bind the external inputs and +outputs. + +Arguments to a run can be a sub-buffer of the corresponding +resource. A buffer in the resources section refer to the full buffer, +but a run can use just a portion of the resource. By default +a run argument will use the full buffer, but optional attributes in +the json for a buffer can specify the size and an offset into the +resource buffer. + +As an example below, the kernel resource `k1` is executed twice with +3 arguments. The 3rd input is a sub-buffer of the `ifm_int` resource, the +4th is the full resource `wts`, and the finally the 5th is a +sub-buffer of `ofm_int`. + +The example illustrates the calling of a CPU function from the `cpu` +resources section. The CPU function calls are passed buffers from the +resources section and scalar values as needed. + +``` + "execution": { + "runs": [ + { + "name": "convert_ifm", + "where": "cpu", + "arguments" : [ + { "name": "ifm", "argidx": 0 }, + { "name": "ifm_int", "argidx": 1 } + ], + "constants" : [ + { "value": "nchw2nchw4c", "type": "string", "argidx": 2 } + ] + ] + }, + { + "name": "k1", + "arguments" : [ + { "name": "ifm_int", "size": 512, "offset": 0, "argidx": 3 }, + { "name": "wts", "argidx": 4 }, + { "name": "ofm_int", "size": 512, "offset": 512, "argidx": 5 } + ] + }, + { + "name": "k1", + "arguments" : [ + { "name": "ifm_int", "size": 512, "offset": 512, "argidx": 3 }, + { "name": "wts", "argidx": 4 }, + { "name": "ofm_int", "size": 512, "offset": 0, "argidx": 5 } + ] + }, + { + "name": "convert_ofm", + "where": "cpu" + "arguments" : [ + { "name": "ofm_int", "argidx": 0 }, + { "name": "ofm", "argidx": 1 } + ], + "constants" : [ + { "value": "nchw4c2nchw", "argidx": 2 } + ] + ] + }, + ... + ] + } +``` + +The runner internally creates sub-buffers out of the specified +resource buffers for each run. Both external and internal +resource buffers can be sliced and diced as required. + +The runner creates `xrt::run` or `xrt_core::cpu::run` objects out of +the specified execution runs. The runner creates a CPU or NPU runlist +for each contiguous sequence of CPU runs or NPU runs specified in the +run recipe. The runlist is inserted into a vector of runlists where +each individual runlist will be executed in sequence, when the +framework calls the runner API execute method. + +In addition to the buffer arguments referring to resource buffers, the +xclbin kernels and cpu functions may have additional arguments that +need to be set. For example the current DPU kernel have 8 arguments +and some of these must be set to some sentinel value. Here the +argument with index 0, represents the kernel opcode which specifies +the type of control packet used for the kernel resource object. The +value `3` implies transaction buffer. + +``` + "execution": { + "runs": [ + { + "name": "k1", + "arguments" : [ + { "name": "wts", "argidx": 4 }, + { "name": "ifm", "argidx": 3 }, + { "name": "ofm", "argidx": 5 } + ], + "constants" : [ + { "value": "3", "type": "int", "argidx": 0 }, + { "value": "0", "type": "int", "argidx": 1 }, + { "value": "0", "type": "int", "argidx": 2 }, + { "value": "0", "type": "int", "argidx": 6 }, + { "value": "0", "type": "int", "argidx": 7 } + ] + } + ] + } +``` + +# Complete run recipe + +For illustration here is a simple complete run recipe.json file that +has been validated on NPU. There are no internal buffer and external +input and output are consumed during one kernel execution. See the +`runner/test/recipe.json` for an example leveraging cpu functions. + +``` +{ + "header": { + "xclbin": "design.xclbin", + }, + "resources": { + "buffers": [ + { + "name": "wts", + "type": "input", + }, + { + "name": "ifm", + "type": "input", + }, + { + "name": "ofm", + "type": "output", + } + ], + "kernels": [ + { + "name": "k1", + "instance": "DPU", + "ctrlcode": "no-ctrl-packet.elf" + } + ] + }, + "execution": { + "runs": [ + { + "name": "k1", + "arguments" : [ + { "name": "wts", "argidx": 4 }, + { "name": "ifm", "argidx": 3 }, + { "name": "ofm", "argidx": 5 } + ], + "constants": [ + { "value": "3", "type": "int", "argidx": 0 }, + { "value": "0", "type": "int", "argidx": 1 }, + { "value": "0", "type": "int", "argidx": 2 }, + { "value": "0", "type": "int", "argidx": 6 }, + { "value": "0", "type": "int", "argidx": 7 } + ] + } + ] + } +} +``` + +# Runner API + +The runner is contructed from a recipe json file and a device object. +The runner is a standard XRT C++ first class object with the following +API. Include documentation will be beefed up when the runner code is +moved to public XRT. + +``` +class runner_impl; +class runner +{ + std::shared_ptr m_impl; // probably unique_ptr is enough +public: + // ctor - Create runner from a recipe json + runner(const xrt::device& device, const std::string& recipe); + + // bind_input() - Bind a buffer object to an input tensor + void + bind_input(const std::string& name, const xrt::bo& bo); + + // bind_output() - Bind a buffer object to an output tensor + void + bind_output(const std::string& name, const xrt::bo& bo); + + // execute() - Execute the runner + void + execute(); + + // wait() - Wait for the execution to complete + void + wait(); +}; +``` + +# CPU library requirements + +The run recipe can refer to functions executed on the CPU. These +functions should be implemented in a shared library that can be +loaded at runtime by the runner based on `resources/cpus` section. + +A referenced library is loaded by the runner, which subsequently looks +for exported entry point (symbol) called `open` to initialize the shared +library. The `open()` is supposed to return function objects for callback +functions within the library. At present time, only one callback function +is required is the `lookup()` function, which the runner +uses to lookup functions referenced in the recipe resources section. + +The `lookup()` function must return the callable function that the +runner is requesting along with the number of arguments this function +expects. If the function the runner is looking for is not available, +then the `lookup()` function should throw an exception (TODO: define +the exact exception to throw). The reason the `lookup()` function is +not itself an exported "extern C" function like `open()` is that the +call semantics must be C++ with the bells and whistles that follow +(exceptions). + +The signature of the `extern "C"` exported `open()` function and the +C++ signature of the `lookup()` function is defined in `xrt_runner.h` +under `namespace xrt::cpu { ... }`. + +``` +/** + * The xrt::runner supports execution of CPU functions as well + * as xrt::kernel objects. + * + * The CPU functions are implemented in runtime loaded dynamic + * libraries. A library must define and export a function that + * initializes a callback structure with a lookup function. + * + * The signature of the lookup function must be + * @code + * void lookup_fn(const std::string& name, xrt::cpu::lookup_args* args) + * @endcode + * where the name is the name of the function to lookup and args is a + * structure that the lookup function must populate with the function + * information. + * + * The arguments to the CPU functions are elided via std::any and + * the signature of the CPU functions is fixed to + * @code + * void cpu_function(std::vector& args) + * @endcode + * Internally, the CPU library unwraps the arguments and calls the + * actual function. + */ +namespace xrt::cpu { +/** + * struct lookup_args - argument structure for the lookup function + * + * The lookup function takes as arguments the name of the function + * to lookup along with lookup_args to be populated with information + * about the function. + * + * @num_args - number of arguments to function + * @callable - a C++ function object wrapping the function + * + * The callable library functions uses type erasure on their arguments + * through a std::vector of std::any objects. The callable must + * unwrap the std::any objects to its expected type, which is + * cumbersome, but type safe. The type erased arguments allow the + * runner to be generic and not tied to a specific function signature. +*/ +struct lookup_args +{ + std::uint32_t num_args; + std::function&)> callable; +}; + +/** + * struct library_init_args - argument structure for libray initialization + * + * The library initialization function is the only function exported + * from the run time loaded library. The library initialization + * function is called by the runner when a resource references a + * function in a library and the library is not already loaded. + * + * @lookup_fn - a callback function to be populated with the + * lookup function. + * + * The library initialization function is C callable exported symbol, + * but returns a C++ function pointer to the lookup function. +*/ +struct library_init_args +{ + std::function lookup_fn; +}; + +/** + * library_init_fn - type of the library initialization function + * The name of the library initialization function is fixed to + * "library_init". +*/ +using library_init_fn = void (*)(library_init_args*); +} // xrt::cpu + +``` + +A unit test for the cpu library and corresponding sample run recipe +that references the cpu library is under `test/cpulib.cpp` and +`test/main.cpp` + + + + + diff --git a/src/runtime_src/core/common/runner/runner.cpp b/src/runtime_src/core/common/runner/runner.cpp index d992c796519..064a491bc86 100644 --- a/src/runtime_src/core/common/runner/runner.cpp +++ b/src/runtime_src/core/common/runner/runner.cpp @@ -975,23 +975,169 @@ class recipe } }; // class recipe + +// A runner_impl (xrt::runner) always has a run recipe object and +// optionally a execution profile object. The latter is optional and default +// created from an in-mermory json. +// +// The profile implements the runner_impl bind APIs and +// execute/wait APIs, these APIs forward to the run recipe object +// and must be called for the default execution recipe. +// +// An external execution profile can be used to initialize run recipe +// resources at runner initialization time bind +// resources per the recipe. The calling application can still +// explicitly bind via the xrt::runner APIs, which may override +// the binding done by the execution recipe. +class profile +{ + class bindings + { + using name_t = std::string; + using path_t = std::string; + + // Map of resource names to file paths. Ths comes directly from + // the profile json. + std::map m_paths; + + // Map of resource names to buffers. The buffers are initialized + // with data loaded from the file path corresponding to the + // resource name. + std::map m_bindings; + + // Create a map of resource names to file paths from the profile json + static std::map + init_paths(const boost::property_tree::ptree& pt) + { + std::map paths; + for (const auto& [name, node] : pt) + paths.emplace(name, node.get("file")); + + return paths; + } + + // Create a map of resource names to buffers initialized with data + // from the file paths. The data is cached in an artifacts::repo + static std::map + create_bindings(const xrt::device& device, + const std::map& paths, + const artifacts::repo& repo) + { + std::map bindings; + for (const auto& [name, path] : paths) { + const auto& data = repo.get(path); + xrt::bo bo = xrt::ext::bo{device, data.size()}; + auto bo_data = bo.map(); + std::copy(data.data(), data.data() + data.size(), bo_data); + bindings.emplace(name, std::move(bo)); + } + return bindings; + } + + // Reset a specific binding to its original value. The data is + // retrived from the artifacts repo data member that was cached + // during initialization of the profile bindings. + void + reset(const std::string& name, xrt::bo& bo, const artifacts::repo& repo) + { + const auto& data = repo.get(m_paths[name]); + if (bo.size() != data.size()) + throw std::runtime_error("binding size mismatch during reset"); + + auto bo_data = bo.map(); + std::copy(data.data(), data.data() + data.size(), bo_data); + } + + public: + bindings() = default; + + bindings(const xrt::device& device, const boost::property_tree::ptree& pt, const artifacts::repo& repo) + : m_paths{init_paths(pt)} + , m_bindings{create_bindings(device, m_paths, repo)} + {} + + // Reset all bindings to their original values + void + reset(const artifacts::repo& repo) + { + for (auto& [name, bo] : m_bindings) + reset(name, bo, repo); + } + + // Reset a specific binding to its original value + void + reset(const std::string& name, const artifacts::repo& repo) + { + auto& bo = m_bindings.at(name); + reset(name, bo, repo); + } + + const std::map& + get_bindings() const + { + return m_bindings; + } + }; // class profile::bindings + + class execution + { + size_t m_iterations = 1; + + }; // class profile::execution + +private: + boost::property_tree::ptree m_profile; + artifacts::file_repo m_repo; + xrt::device m_device; + recipe* m_recipe = nullptr; + bindings m_bindings; + + static boost::property_tree::ptree + load(const std::string& path) + { + boost::property_tree::ptree pt; + boost::property_tree::read_json(path, pt); + return pt; + } + +public: + profile(xrt::device device, recipe* rr, const std::string& profile) + : m_profile{load(profile)} + , m_device{std::move(device)} + , m_recipe{rr} + , m_bindings{m_device, m_profile.get_child("bindings"), m_repo} + {} + + const std::map& + get_bo_bindings() const + { + return m_bindings.get_bindings(); + } +}; // class profile + } // namespace namespace xrt_core { -// class runner_impl - +// class runner_impl - Insulated implementation of xrt::runner +// +// Manages a run recipe and an execution profile. // -// A runner implementation is default created with one instance of a -// recipe. But the runner can be used by multiple threads and new -// recipe instances are created for each thread as needed. +// The recipe defines the resources and how to run a model. // -// The runner can be created from any thread, but member functions -// are thread specific. +// The profile controls how resources are bound to the recipe and how +// the recipe is executed, e.g. number of times, debug info, +// validation, etc. class runner_impl { - //std::map m_recipes; recipe m_recipe; - //thread_local recipe m_thread_recipe; + +protected: + recipe* + get_recipe() + { + return &m_recipe; + } public: runner_impl(const xrt::device& device, const std::string& recipe) @@ -1002,36 +1148,58 @@ class runner_impl : m_recipe{device, recipe, artifacts::ram_repo(artifacts)} {} - void + virtual ~runner_impl() = default; + + virtual void bind_input(const std::string& name, const xrt::bo& bo) { - m_recipe.bind_input(name, bo); + m_recipe.bind(name, bo); } - void + virtual void bind_output(const std::string& name, const xrt::bo& bo) { - m_recipe.bind_output(name, bo); + m_recipe.bind(name, bo); } - void + virtual void bind(const std::string& name, const xrt::bo& bo) { m_recipe.bind(name, bo); } - void + virtual void execute() { m_recipe.execute(); } - void + virtual void wait() { m_recipe.wait(); } -}; +}; // class runner_impl + +class profile_impl : public runner_impl +{ + profile m_profile; + +public: + profile_impl(const xrt::device& device, const std::string& recipe, const std::string& profile) + : runner_impl{device, recipe} + , m_profile{device, get_recipe(), profile} + {} + + void + execute() override + { + for (auto& [name, bo] : m_profile.get_bo_bindings()) + runner_impl::bind(name, bo); + + runner_impl::execute(); + } +}; // class profile_impl //////////////////////////////////////////////////////////////// // Public runner interface APIs @@ -1046,6 +1214,11 @@ runner(const xrt::device& device, const std::string& recipe, const artifacts_rep : m_impl{std::make_unique(device, recipe, repo)} {} +runner:: +runner(const xrt::device& device, const std::string& recipe, const std::string& profile) + : m_impl{std::make_unique(device, recipe, profile)} +{} + void runner:: bind_input(const std::string& name, const xrt::bo& bo) diff --git a/src/runtime_src/core/common/runner/runner.h b/src/runtime_src/core/common/runner/runner.h index 787c6b98c51..2d6d5d2c433 100644 --- a/src/runtime_src/core/common/runner/runner.h +++ b/src/runtime_src/core/common/runner/runner.h @@ -46,6 +46,10 @@ class runner XRT_CORE_COMMON_EXPORT runner(const xrt::device& device, const std::string& recipe, const artifacts_repository&); + // ctor - Create runner from a recipe json and execution profile json + XRT_CORE_COMMON_EXPORT + runner(const xrt::device& device, const std::string& recipe, const std::string& profile); + // bind_input() - Bind a buffer object to an input tensor XRT_CORE_COMMON_EXPORT void diff --git a/src/runtime_src/core/common/runner/test/profile.json b/src/runtime_src/core/common/runner/test/profile.json new file mode 100644 index 00000000000..5044d03ed81 --- /dev/null +++ b/src/runtime_src/core/common/runner/test/profile.json @@ -0,0 +1,23 @@ +{ + "version": "1.0", + "type": "execution profile", + + "bindings": [ + "wts": { + "file": "wts.bin" + }, + "ifm": { + "file": "ifm.bin" + }, + "ofm": { + "file": "ofm.bin" + } + ] + + "execution" : { + "iterations": 1, + "validation": { + "file": "gold.bin" + } + } +} diff --git a/src/runtime_src/core/common/runner/test/recipe.json b/src/runtime_src/core/common/runner/test/recipe.json index fa4cf0896e8..c1bd3c0dd52 100644 --- a/src/runtime_src/core/common/runner/test/recipe.json +++ b/src/runtime_src/core/common/runner/test/recipe.json @@ -1,8 +1,10 @@ { "version": "1.0", + "header": { "xclbin": "design.xclbin" }, + "resources": { "buffers": [ { @@ -46,6 +48,7 @@ } ] }, + "execution": { "runs": [ { From 908b3219b12992a3f4f993535fa0b4440e813638 Mon Sep 17 00:00:00 2001 From: Soren Soe <2106410+stsoe@users.noreply.github.com> Date: Tue, 1 Apr 2025 16:08:18 -0700 Subject: [PATCH 2/4] Extend profile json with bind, init, and validate nodes Signed-off-by: Soren Soe <2106410+stsoe@users.noreply.github.com> --- src/runtime_src/core/common/runner/runner.cpp | 283 +++++++++++++----- src/runtime_src/core/common/runner/runner.h | 25 +- .../core/common/runner/test/CMakeLists.txt | 7 +- .../core/common/runner/test/profile.json | 39 ++- .../common/runner/test/runner-profile.cpp | 102 +++++++ 5 files changed, 361 insertions(+), 95 deletions(-) create mode 100644 src/runtime_src/core/common/runner/test/runner-profile.cpp diff --git a/src/runtime_src/core/common/runner/runner.cpp b/src/runtime_src/core/common/runner/runner.cpp index 064a491bc86..ef31478afe4 100644 --- a/src/runtime_src/core/common/runner/runner.cpp +++ b/src/runtime_src/core/common/runner/runner.cpp @@ -33,6 +33,8 @@ # pragma warning (pop) #endif +#include +#include #include #include #include @@ -48,6 +50,17 @@ namespace { const boost::property_tree::ptree default_ptree; +template +static boost::property_tree::ptree +get_optional(const OptionalType& node) +{ +#if BOOST_VERSION >= 105600 + return node.value(); +#else + return node.get(); +#endif +} + // struct streambuf - wrap a std::streambuf around an external buffer // // This is used create elf files from memory through a std::istream @@ -110,22 +123,38 @@ class repo // Artifacts are loaded from disk and stored in persistent storage class file_repo : public repo { + std::filesystem::path base_dir; + public: + file_repo() + : base_dir{"."} + {} + + file_repo(std::filesystem::path basedir) + : base_dir{std::move(basedir)} + {} + const std::vector& get(const std::string& path) const override { - if (auto it = m_data.find(path); it != m_data.end()) + std::filesystem::path full_path = base_dir / path; + if (!std::filesystem::exists(full_path)) + throw std::runtime_error{"File not found: " + full_path.string()}; + + auto key = full_path.string(); + if (auto it = m_data.find(key); it != m_data.end()) return (*it).second; - std::ifstream ifs(path, std::ios::binary); + std::ifstream ifs(key, std::ios::binary); if (!ifs) - throw std::runtime_error{"Failed to open file: " + path}; + throw std::runtime_error{"Failed to open file: " + key}; ifs.seekg(0, std::ios::end); std::vector data(ifs.tellg()); ifs.seekg(0, std::ios::beg); ifs.read(data.data(), data.size()); - auto [itr, success] = m_data.emplace(path, std::move(data)); + auto [itr, success] = m_data.emplace(key, std::move(data)); + XRT_DEBUGF("artifacts::file_repo::get(%s) -> %s\n", path.c_str(), success ? "success" : "failure"); return (*itr).second; } @@ -149,6 +178,7 @@ class ram_repo : public repo if (auto it = m_reference.find(path); it != m_reference.end()) { auto [itr, success] = m_data.emplace(path, it->second); + XRT_DEBUGF("artifacts::ram_repo::get(%s) -> %s\n", path.c_str(), success ? "success" : "failure"); return (*itr).second; } @@ -177,12 +207,12 @@ get(const xrt::elf& elf) } static xrt::module -get(const std::string& path, const artifacts::repo& repo) +get(const std::string& path, const artifacts::repo* repo) { if (auto it = s_path2elf.find(path); it != s_path2elf.end()) return get((*it).second); - auto& data = repo.get(path); + auto& data = repo->get(path); streambuf buf{data.data(), data.data() + data.size()}; std::istream is{&buf}; xrt::elf elf{is}; @@ -201,15 +231,15 @@ class recipe xrt::xclbin m_xclbin; static xrt::xclbin - read_xclbin(const boost::property_tree::ptree& pt, const artifacts::repo& repo) + read_xclbin(const boost::property_tree::ptree& pt, const artifacts::repo* repo) { auto path = pt.get("xclbin"); - auto& data = repo.get(path); + auto& data = repo->get(path); return xrt::xclbin{data}; } public: - header(const boost::property_tree::ptree& pt, const artifacts::repo& repo) + header(const boost::property_tree::ptree& pt, const artifacts::repo* repo) : m_xclbin{read_xclbin(pt, repo)} { XRT_DEBUGF("Loaded xclbin: %s\n", m_xclbin.get_uuid().to_string().c_str()); @@ -352,7 +382,7 @@ class recipe // The kernel control module is created if necessary. static kernel create_kernel(const xrt::hw_context& hwctx, const boost::property_tree::ptree& pt, - const artifacts::repo& repo) + const artifacts::repo* repo) { auto name = pt.get("name"); // required, default xclbin kernel name auto elf = pt.get("ctrlcode", ""); // optional elf file @@ -439,7 +469,7 @@ class recipe // create_kernels - create kernel objects from kernel property tree nodes static std::map create_kernels(xrt::device device, const xrt::hw_context& hwctx, - const boost::property_tree::ptree& pt, const artifacts::repo& repo) + const boost::property_tree::ptree& pt, const artifacts::repo* repo) { std::map kernels; for (const auto& [name, node] : pt) @@ -461,7 +491,7 @@ class recipe public: resources(xrt::device device, const xrt::xclbin& xclbin, - const boost::property_tree::ptree& recipe, const artifacts::repo& repo) + const boost::property_tree::ptree& recipe, const artifacts::repo* repo) : m_device{std::move(device)} , m_hwctx{m_device, m_device.register_xclbin(xclbin)} , m_buffers{create_buffers(m_device, recipe.get_child("buffers"))} @@ -923,7 +953,7 @@ class recipe } public: - recipe(xrt::device device, const std::string& path, const artifacts::repo& repo) + recipe(xrt::device device, const std::string& path, const artifacts::repo* repo) : m_device{std::move(device)} , m_recipe{load(path)} , m_header{m_recipe.get_child("header"), repo} @@ -995,87 +1025,124 @@ class profile { using name_t = std::string; using path_t = std::string; + using binding_node = boost::property_tree::ptree; + using validate_node = boost::property_tree::ptree; - // Map of resource names to file paths. Ths comes directly from - // the profile json. - std::map m_paths; + // Map of resource name to json binding element. This comes + // directly from the profile json. + std::map m_bindings; // Map of resource names to buffers. The buffers are initialized // with data loaded from the file path corresponding to the // resource name. - std::map m_bindings; + std::map m_bo_bindings; - // Create a map of resource names to file paths from the profile json - static std::map - init_paths(const boost::property_tree::ptree& pt) + // Create a map of resource names to json binding nodes from the profile json + static std::map + init_bindings(const boost::property_tree::ptree& pt) { - std::map paths; + std::map bindings; for (const auto& [name, node] : pt) - paths.emplace(name, node.get("file")); + bindings.emplace(node.get("name"), node); - return paths; + return bindings; } // Create a map of resource names to buffers initialized with data // from the file paths. The data is cached in an artifacts::repo static std::map - create_bindings(const xrt::device& device, - const std::map& paths, - const artifacts::repo& repo) + create_buffers(const xrt::device& device, + const std::map& bindings, + const artifacts::repo* repo) { - std::map bindings; - for (const auto& [name, path] : paths) { - const auto& data = repo.get(path); + std::map bos; + for (const auto& [name, node] : bindings) { + const auto& data = repo->get(node.get("file")); xrt::bo bo = xrt::ext::bo{device, data.size()}; auto bo_data = bo.map(); std::copy(data.data(), data.data() + data.size(), bo_data); - bindings.emplace(name, std::move(bo)); + bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bos.emplace(node.get("name"), std::move(bo)); } - return bindings; + return bos; } - // Reset a specific binding to its original value. The data is - // retrived from the artifacts repo data member that was cached - // during initialization of the profile bindings. - void - reset(const std::string& name, xrt::bo& bo, const artifacts::repo& repo) + // Validate a resource buffer per the validate json node + static void + validate_buffer(xrt::bo& bo, const validate_node& node, const artifacts::repo* repo) { - const auto& data = repo.get(m_paths[name]); - if (bo.size() != data.size()) - throw std::runtime_error("binding size mismatch during reset"); + const auto& golden_data = repo->get(node.get("file")); + // here we could extract offset and size of region to validate + + bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + auto bo_data = bo.map(); + if (bo.size() != golden_data.size()) + throw std::runtime_error("Size mismatch during validation"); + + if (!std::equal(golden_data.data(), golden_data.data() + golden_data.size(), bo_data)) { + for (uint64_t i = 0; i < golden_data.size(); ++i) { + if (golden_data[i] != bo_data[i]) + throw std::runtime_error("gold[" + std::to_string(i) + "] = " + std::to_string(golden_data[i]) + + " does not match bo value " + std::to_string(bo_data[i])); + } + } + } - auto bo_data = bo.map(); - std::copy(data.data(), data.data() + data.size(), bo_data); + // Initialize a resource buffer per the binding json node + static void + init_buffer(xrt::bo& bo, const binding_node& node) + { + // Get the pattern, which must be one character + auto pattern = node.get("pattern"); + if (pattern.size() != 1) + throw std::runtime_error("pattern size must be 1"); + + // Fill the resource buffer with the pattern + auto bo_data = bo.map(); + std::fill(bo_data, bo_data + bo.size(), pattern[0]); + bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); } public: bindings() = default; - bindings(const xrt::device& device, const boost::property_tree::ptree& pt, const artifacts::repo& repo) - : m_paths{init_paths(pt)} - , m_bindings{create_bindings(device, m_paths, repo)} + bindings(const xrt::device& device, const boost::property_tree::ptree& pt, const artifacts::repo* repo) + : m_bindings{init_bindings(pt)} + , m_bo_bindings{create_buffers(device, m_bindings, repo)} {} - // Reset all bindings to their original values + // Validate resource buffers per json. Validation is per bound buffer + // as defined in the profile json. void - reset(const artifacts::repo& repo) + validate(const artifacts::repo* repo) { - for (auto& [name, bo] : m_bindings) - reset(name, bo, repo); + for (auto& [name, node] : m_bindings) { + if (auto validate_node = node.get_child_optional("validate")) { + validate_buffer(m_bo_bindings.at(name), get_optional(validate_node), repo); + } + } } - // Reset a specific binding to its original value + // Init bindings per json. Initialization is done by filling a + // pattern into a buffer that requires initialization. The + // pattern is currently limited to a single character. void - reset(const std::string& name, const artifacts::repo& repo) + init() { - auto& bo = m_bindings.at(name); - reset(name, bo, repo); + for (auto& [name, node] : m_bindings) { + if (auto init_node = node.get_child_optional("init")) + init_buffer(m_bo_bindings.at(name), get_optional(init_node)); + } } - const std::map& - get_bindings() const + // Bind resources to the recipe per json + void + bind(recipe* rr) { - return m_bindings; + for (auto& [name, node] : m_bindings) { + if (node.get("bind", false)) + rr->bind(name, m_bo_bindings.at(name)); + } } }; // class profile::bindings @@ -1087,7 +1154,7 @@ class profile private: boost::property_tree::ptree m_profile; - artifacts::file_repo m_repo; + std::shared_ptr m_repo; xrt::device m_device; recipe* m_recipe = nullptr; bindings m_bindings; @@ -1101,17 +1168,50 @@ class profile } public: - profile(xrt::device device, recipe* rr, const std::string& profile) + profile(xrt::device device, recipe* rr, const std::string& profile, + std::shared_ptr repo) : m_profile{load(profile)} + , m_repo{std::move(repo)} , m_device{std::move(device)} , m_recipe{rr} - , m_bindings{m_device, m_profile.get_child("bindings"), m_repo} + , m_bindings{m_device, m_profile.get_child("bindings"), m_repo.get()} {} - const std::map& - get_bo_bindings() const + void + bind() { - return m_bindings.get_bindings(); + m_bindings.bind(m_recipe); + } + + void + init() + { + m_bindings.init(); + } + + void + validate() + { + m_bindings.validate(m_repo.get()); + } + + void + execute() + { + // TBD, fill out execution control and pass control + // there. This will handle iterations and other + bind(); + init(); + + m_recipe->execute(); + } + + void + wait() + { + m_recipe->wait(); + + validate(); } }; // class profile @@ -1140,12 +1240,9 @@ class runner_impl } public: - runner_impl(const xrt::device& device, const std::string& recipe) - : m_recipe{device, recipe, artifacts::file_repo{}} - {} - - runner_impl(const xrt::device& device, const std::string& recipe, const runner::artifacts_repository& artifacts) - : m_recipe{device, recipe, artifacts::ram_repo(artifacts)} + runner_impl(const xrt::device& device, const std::string& recipe, + const std::shared_ptr& repo) + : m_recipe{device, recipe, repo.get()} {} virtual ~runner_impl() = default; @@ -1186,18 +1283,23 @@ class profile_impl : public runner_impl profile m_profile; public: - profile_impl(const xrt::device& device, const std::string& recipe, const std::string& profile) - : runner_impl{device, recipe} - , m_profile{device, get_recipe(), profile} + profile_impl(const xrt::device& device, + const std::string& recipe, const std::string& profile, + const std::shared_ptr& repo) + : runner_impl{device, recipe, repo} + , m_profile{device, get_recipe(), profile, repo} {} void execute() override { - for (auto& [name, bo] : m_profile.get_bo_bindings()) - runner_impl::bind(name, bo); + m_profile.execute(); + } - runner_impl::execute(); + void + wait() override + { + m_profile.wait(); } }; // class profile_impl @@ -1205,18 +1307,41 @@ class profile_impl : public runner_impl // Public runner interface APIs //////////////////////////////////////////////////////////////// runner:: -runner(const xrt::device& device, const std::string& recipe) - : m_impl{std::make_unique(device, recipe)} +runner(const xrt::device& device, + const std::string& recipe) + : m_impl{std::make_unique + (device, recipe, std::make_shared())} {} runner:: -runner(const xrt::device& device, const std::string& recipe, const artifacts_repository& repo) - : m_impl{std::make_unique(device, recipe, repo)} +runner(const xrt::device& device, + const std::string& recipe, + const std::filesystem::path& dir) + : m_impl{std::make_unique + (device, recipe, std::make_shared(dir))} +{} + +runner:: +runner(const xrt::device& device, + const std::string& recipe, + const artifacts_repository& repo) + : m_impl{std::make_unique + (device, recipe, std::make_shared(repo))} +{} + +runner:: +runner(const xrt::device& device, + const std::string& recipe, const std::string& profile) + : m_impl{std::make_unique + (device, recipe, profile, std::make_shared())} {} runner:: -runner(const xrt::device& device, const std::string& recipe, const std::string& profile) - : m_impl{std::make_unique(device, recipe, profile)} +runner(const xrt::device& device, + const std::string& recipe, const std::string& profile, + const std::filesystem::path& dir) + : m_impl{std::make_unique + (device, recipe, profile, std::make_shared(dir))} {} void diff --git a/src/runtime_src/core/common/runner/runner.h b/src/runtime_src/core/common/runner/runner.h index 2d6d5d2c433..c2684fb5798 100644 --- a/src/runtime_src/core/common/runner/runner.h +++ b/src/runtime_src/core/common/runner/runner.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -37,19 +38,37 @@ class runner */ using artifacts_repository = std::map>; - // ctor - Create runner from a recipe json + // ctor - Create runner from a recipe json. + // Any artifacts referenced by the recipe are looked up in the + // current directory. XRT_CORE_COMMON_EXPORT runner(const xrt::device& device, const std::string& recipe); + // ctor - Create runner from a recipe json and path to directory + // with artifacts + XRT_CORE_COMMON_EXPORT + runner(const xrt::device& device, const std::string& recipe, + const std::filesystem::path& artifacts_dir); + // ctor - Create runner from a recipe json and artifacts repository - // The lifetime of the repo must extend the lifetime of the runner + // The repo is not copied so the lifetime of the repo must extend + // the lifetime of the runner. XRT_CORE_COMMON_EXPORT - runner(const xrt::device& device, const std::string& recipe, const artifacts_repository&); + runner(const xrt::device& device, const std::string& recipe, + const artifacts_repository&); // ctor - Create runner from a recipe json and execution profile json + // Any artifacts referenced by recipe and profile are looked up in + // the current directory. XRT_CORE_COMMON_EXPORT runner(const xrt::device& device, const std::string& recipe, const std::string& profile); + // ctor - Create runner from a recipe json and execution profile + // json and path to directory with artifacts. + XRT_CORE_COMMON_EXPORT + runner(const xrt::device& device, const std::string& recipe, const std::string& profile, + const std::filesystem::path& artifacts_dir); + // bind_input() - Bind a buffer object to an input tensor XRT_CORE_COMMON_EXPORT void diff --git a/src/runtime_src/core/common/runner/test/CMakeLists.txt b/src/runtime_src/core/common/runner/test/CMakeLists.txt index 1d519d5f40d..24a0ab9d57d 100644 --- a/src/runtime_src/core/common/runner/test/CMakeLists.txt +++ b/src/runtime_src/core/common/runner/test/CMakeLists.txt @@ -23,10 +23,15 @@ add_executable(recipe recipe.cpp) target_include_directories(recipe PRIVATE ${XRT_INCLUDE_DIRS} ${XRT_ROOT}/src/runtime_src) target_link_libraries(recipe PRIVATE XRT::xrt_coreutil) +add_executable(runner-profile runner-profile.cpp) +target_include_directories(runner-profile PRIVATE ${XRT_INCLUDE_DIRS} ${XRT_ROOT}/src/runtime_src) +target_link_libraries(runner-profile PRIVATE XRT::xrt_coreutil) + if (NOT WIN32) target_link_libraries(runner PRIVATE pthread uuid dl) + target_link_libraries(runner-profile PRIVATE pthread uuid dl) target_link_libraries(recipe PRIVATE pthread uuid dl) endif() -install(TARGETS runner recipe) +install(TARGETS runner runner-profile recipe) diff --git a/src/runtime_src/core/common/runner/test/profile.json b/src/runtime_src/core/common/runner/test/profile.json index 5044d03ed81..63fa38fe725 100644 --- a/src/runtime_src/core/common/runner/test/profile.json +++ b/src/runtime_src/core/common/runner/test/profile.json @@ -1,23 +1,38 @@ { "version": "1.0", - "type": "execution profile", "bindings": [ - "wts": { - "file": "wts.bin" + { + "name": "wts", + "file": "wts.bin", + "bind": true }, - "ifm": { - "file": "ifm.bin" + { + "name": "ifm", + "file": "ifm.bin", + "bind": true }, - "ofm": { - "file": "ofm.bin" - } - ] + { + "name": "ofm", + "file": "ofm.bin", + "bind": true, + "init": { + "pattern": "A", + }, + "validate": { + "size": 0, + "offset": 0, + "file": "gold.bin" + } + } + ], "execution" : { - "iterations": 1, - "validation": { - "file": "gold.bin" + "iterations": 2, + "iteration" : { + "bind": false, + "init": true, + "validate": true } } } diff --git a/src/runtime_src/core/common/runner/test/runner-profile.cpp b/src/runtime_src/core/common/runner/test/runner-profile.cpp new file mode 100644 index 00000000000..c5474ca011c --- /dev/null +++ b/src/runtime_src/core/common/runner/test/runner-profile.cpp @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + +// This test configures and runs a recipe one time +// g++ -g -std=c++17 +// -I/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt/include +// -I/home/stsoe/git/stsoe/XRT/src/runtime_src +// -L/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt/lib +// -o runner-profile.exe runner-profile.cpp -lxrt_coreutil -pthread +// +// or +// +// mkdir build +// cd build +// cmake -DXILINX_XRT=/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt +// -DXRT_ROOT=/home/stsoe/git/stsoe/XRT .. +// cmake --build . --config Debug +// +// ./runner.exe --recipe ... --profile ... [--dir ...] + +#include "xrt/xrt_device.h" +#include "experimental/xrt_ext.h" +#include "core/common/runner/runner.h" + +#include +#include +#include +#include +#include +#include +#include + +static void +usage() +{ + std::cout << "usage: %s [options]\n"; + std::cout << " --recipe recipe file to run\n"; + std::cout << " --profile execution profile\n"; + std::cout << " [--dir ] directory containing artifacts (default: current dir)\n"; + std::cout << "\n\n"; + std::cout << "runner.exe --recipe recipe.json --profile profile.json\n"; +} + +static void +run(const std::string& recipe, + const std::string& profile, + const std::string& dir) +{ + xrt::device device{0}; + xrt_core::runner runner {device, recipe, profile, dir}; + runner.execute(); + runner.wait(); +} + +static void +run(int argc, char* argv[]) +{ + std::vector args(argv+1,argv+argc); + std::string cur; + std::string recipe; + std::string profile; + std::string dir = "."; + for (auto& arg : args) { + if (arg == "-h") { + usage(); + return; + } + + if (arg[0] == '-') { + cur = arg; + continue; + } + + if (cur == "--recipe") + recipe = arg; + else if (cur == "--profile") + profile = arg; + else if (cur == "--dir") + dir = arg; + else + throw std::runtime_error("Unknown option value " + cur + " " + arg); + } + + run(recipe, profile, dir); +} + +int +main(int argc, char **argv) +{ + try { + run(argc, argv); + return 0; + } + catch (const std::exception& ex) { + std::cerr << "Error: " << ex.what() << '\n'; + } + catch (...) { + std::cerr << "Unknown error\n"; + } + return 1; + +} From b9be79e669c44283b6df53aa4ef9a757e880c250 Mon Sep 17 00:00:00 2001 From: Soren Soe <2106410+stsoe@users.noreply.github.com> Date: Thu, 3 Apr 2025 10:53:49 -0700 Subject: [PATCH 3/4] Few more keys to profile::execution Signed-off-by: Soren Soe <2106410+stsoe@users.noreply.github.com> --- src/runtime_src/core/common/runner/runner.cpp | 91 +++++++++++++++---- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/src/runtime_src/core/common/runner/runner.cpp b/src/runtime_src/core/common/runner/runner.cpp index ef31478afe4..cbc58da51c0 100644 --- a/src/runtime_src/core/common/runner/runner.cpp +++ b/src/runtime_src/core/common/runner/runner.cpp @@ -1148,16 +1148,61 @@ class profile class execution { - size_t m_iterations = 1; + using iteration_node = boost::property_tree::ptree; + profile* m_profile; + size_t m_iterations; + iteration_node m_iteration; + + void + execute_iteration(size_t idx) + { + // (Re)bind buffers to recipe if requested + if (m_iteration.get("bind")) + m_profile->bind(); + + // Initialize buffers if requested + if (m_iteration.get("init")) + m_profile->init(); + + m_profile->execute_recipe(); + + // Wait execution to complete if requested + if (m_iteration.get("wait")) + m_profile->wait_recipe(); + + // Validate if requested (implies wait) + if (m_iteration.get("validate")) + m_profile->validate(); + } + + public: + execution(profile* pr, const boost::property_tree::ptree& pt) + : m_profile(pr) + , m_iterations(pt.get("iterations")) + , m_iteration(pt.get_child("iteration")) + { + // Bind buffers to the recipe prior to executing the recipe + m_profile->bind(); + } + + void + execute() + { + for (size_t i = 0; i < m_iterations; ++i) + execute_iteration(i); + } }; // class profile::execution private: + friend class bindings; // embedded class + friend class execution; // embedded class boost::property_tree::ptree m_profile; std::shared_ptr m_repo; xrt::device m_device; recipe* m_recipe = nullptr; bindings m_bindings; + execution m_execution; static boost::property_tree::ptree load(const std::string& path) @@ -1167,16 +1212,6 @@ class profile return pt; } -public: - profile(xrt::device device, recipe* rr, const std::string& profile, - std::shared_ptr repo) - : m_profile{load(profile)} - , m_repo{std::move(repo)} - , m_device{std::move(device)} - , m_recipe{rr} - , m_bindings{m_device, m_profile.get_child("bindings"), m_repo.get()} - {} - void bind() { @@ -1196,22 +1231,40 @@ class profile } void - execute() + execute_recipe() { - // TBD, fill out execution control and pass control - // there. This will handle iterations and other - bind(); - init(); - m_recipe->execute(); } void - wait() + wait_recipe() { m_recipe->wait(); + } + + +public: + profile(xrt::device device, recipe* rr, const std::string& profile, + std::shared_ptr repo) + : m_profile{load(profile)} + , m_repo{std::move(repo)} + , m_device{std::move(device)} + , m_recipe{rr} + , m_bindings{m_device, m_profile.get_child("bindings"), m_repo.get()} + , m_execution(this, m_profile.get_child("execution")) + {} - validate(); + void + execute() + { + m_execution.execute(); + } + + void + wait() + { + // waiting is controlled through execution in json + // so a noop here } }; // class profile From 18750843eec81c52de1f3c73288a8d2b06a7b451 Mon Sep 17 00:00:00 2001 From: Soren Soe <2106410+stsoe@users.noreply.github.com> Date: Thu, 3 Apr 2025 13:53:16 -0700 Subject: [PATCH 4/4] Make artfacts repo return string_view The std::vector return type by reference was awkward. Should really be a std::span (c++20). Update xrt::xclbin ctor to with std::string_view. Add comments to runner.cpp. Signed-off-by: Soren Soe <2106410+stsoe@users.noreply.github.com> --- .../core/common/api/xrt_device.cpp | 2 +- .../core/common/api/xrt_xclbin.cpp | 5 + src/runtime_src/core/common/runner/runner.cpp | 184 +++++++++++++----- .../include/xrt/experimental/xrt_xclbin.h | 14 ++ 4 files changed, 151 insertions(+), 54 deletions(-) diff --git a/src/runtime_src/core/common/api/xrt_device.cpp b/src/runtime_src/core/common/api/xrt_device.cpp index dc2b2c3ea02..a627cf8d958 100644 --- a/src/runtime_src/core/common/api/xrt_device.cpp +++ b/src/runtime_src/core/common/api/xrt_device.cpp @@ -612,7 +612,7 @@ xrtDeviceLoadXclbinFile(xrtDeviceHandle dhdl, const char* fnm) { try { return xdp::native::profiling_wrapper(__func__, [dhdl, fnm]{ - xrt::xclbin xclbin{fnm}; + xrt::xclbin xclbin{std::string{fnm}}; auto device = device_cache.get_or_error(dhdl); device->load_xclbin(xclbin); return 0; diff --git a/src/runtime_src/core/common/api/xrt_xclbin.cpp b/src/runtime_src/core/common/api/xrt_xclbin.cpp index 8f9d60f8914..fc857d6aa84 100644 --- a/src/runtime_src/core/common/api/xrt_xclbin.cpp +++ b/src/runtime_src/core/common/api/xrt_xclbin.cpp @@ -1035,6 +1035,11 @@ xclbin(const std::vector& data) : detail::pimpl(std::make_shared(data)) {} +xclbin:: +xclbin(const std::string_view& data) + : detail::pimpl(std::make_shared(std::vector{data.begin(), data.end()})) +{} + xclbin:: xclbin(const axlf* top) : detail::pimpl(std::make_shared(top)) diff --git a/src/runtime_src/core/common/runner/runner.cpp b/src/runtime_src/core/common/runner/runner.cpp index cbc58da51c0..ed6ea6c2d65 100644 --- a/src/runtime_src/core/common/runner/runner.cpp +++ b/src/runtime_src/core/common/runner/runner.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -115,8 +116,17 @@ class repo public: virtual ~repo() = default; - virtual const std::vector& + // Should be std::span, but not until c++20 + virtual const std::string_view get(const std::string& path) const = 0; + + // Should be std::span, but not until c++20 + static std::string_view + to_sv(const std::vector& vec) + { + // return {vec.begin(), vec.end()}; + return {vec.data(), vec.size()}; + } }; // class file_repo - file system artifact repository @@ -134,7 +144,7 @@ class file_repo : public repo : base_dir{std::move(basedir)} {} - const std::vector& + const std::string_view get(const std::string& path) const override { std::filesystem::path full_path = base_dir / path; @@ -143,7 +153,7 @@ class file_repo : public repo auto key = full_path.string(); if (auto it = m_data.find(key); it != m_data.end()) - return (*it).second; + return to_sv((*it).second); std::ifstream ifs(key, std::ios::binary); if (!ifs) @@ -156,7 +166,7 @@ class file_repo : public repo auto [itr, success] = m_data.emplace(key, std::move(data)); XRT_DEBUGF("artifacts::file_repo::get(%s) -> %s\n", path.c_str(), success ? "success" : "failure"); - return (*itr).second; + return to_sv((*itr).second); } }; @@ -170,16 +180,16 @@ class ram_repo : public repo : m_reference{data} {} - const std::vector& + const std::string_view get(const std::string& path) const override { if (auto it = m_data.find(path); it != m_data.end()) - return (*it).second; + return to_sv((*it).second); if (auto it = m_reference.find(path); it != m_reference.end()) { auto [itr, success] = m_data.emplace(path, it->second); XRT_DEBUGF("artifacts::ram_repo::get(%s) -> %s\n", path.c_str(), success ? "success" : "failure"); - return (*itr).second; + return to_sv((*itr).second); } throw std::runtime_error{"Failed to find artifact: " + path}; @@ -212,7 +222,7 @@ get(const std::string& path, const artifacts::repo* repo) if (auto it = s_path2elf.find(path); it != s_path2elf.end()) return get((*it).second); - auto& data = repo->get(path); + auto data = repo->get(path); streambuf buf{data.data(), data.data() + data.size()}; std::istream is{&buf}; xrt::elf elf{is}; @@ -234,7 +244,7 @@ class recipe read_xclbin(const boost::property_tree::ptree& pt, const artifacts::repo* repo) { auto path = pt.get("xclbin"); - auto& data = repo->get(path); + auto data = repo->get(path); return xrt::xclbin{data}; } @@ -1005,39 +1015,59 @@ class recipe } }; // class recipe - -// A runner_impl (xrt::runner) always has a run recipe object and -// optionally a execution profile object. The latter is optional and default -// created from an in-mermory json. +// class profile - Execution profile // -// The profile implements the runner_impl bind APIs and -// execute/wait APIs, these APIs forward to the run recipe object -// and must be called for the default execution recipe. +// The profile class controls how a run recipe is bound to external +// resources and how the recipe is executed. // -// An external execution profile can be used to initialize run recipe -// resources at runner initialization time bind -// resources per the recipe. The calling application can still -// explicitly bind via the xrt::runner APIs, which may override -// the binding done by the execution recipe. +// An execution profile can be used to initialize run recipe resources +// at runner initialization time by binding resources per the recipe. +// The calling application can still explicitly bind via the +// xrt::runner APIs, which may override the binding done by the +// execution profile. class profile { + // class bindings - represents the bindings sections of a profile json + // + // { + // "name": buffer name in recipe + // "file": (optional with init) if present use to initialize the buffer + // "size": (required if no file) the size of the buffer + // "init": (optional) how to initialize a buffer + // "validate": how to validate a buffer after execution + // } + // + // The bindings section specify what xrt::bo objects to create for + // external buffers. The buffers are bound to the recipe prior to + // first execution. + // + // A binding can specify a file from which the buffer should be + // initialized. If a "file" is specified, the buffer is created with + // this size unless "size" is also specified, in which case the size + // is exactly the size of the buffer and max size bytes of file is + // used to initialize the buffer. + // + // If "init" is specified, then it defines how the buffer should be + // initialzed. "init" takes precedence over "file" if "file" is also + // specified, potentially overwriting already initialized buffer. + // + // If "validate" is specified then it has instructions on how to + // validate a buffer after executing the recipe. class bindings { + // Convenience types for readability using name_t = std::string; using path_t = std::string; using binding_node = boost::property_tree::ptree; using validate_node = boost::property_tree::ptree; - // Map of resource name to json binding element. This comes - // directly from the profile json. + // Map of resource name to json binding element. std::map m_bindings; - // Map of resource names to buffers. The buffers are initialized - // with data loaded from the file path corresponding to the - // resource name. - std::map m_bo_bindings; + // Map of resource names to XRT buffer objects. + std::map m_xrt_bos; - // Create a map of resource names to json binding nodes from the profile json + // Create a map of resource names to json binding nodes static std::map init_bindings(const boost::property_tree::ptree& pt) { @@ -1048,8 +1078,11 @@ class profile return bindings; } - // Create a map of resource names to buffers initialized with data - // from the file paths. The data is cached in an artifacts::repo + // Create a map of resource names to XRT buffer objects. + // Initialize the BO with data from the file if any. + // The size of the xrt::bo is either the size of the "file" + // if present, or it is the "size" per json. An explicit + // "size" always has precedence. static std::map create_buffers(const xrt::device& device, const std::map& bindings, @@ -1057,21 +1090,32 @@ class profile { std::map bos; for (const auto& [name, node] : bindings) { - const auto& data = repo->get(node.get("file")); - xrt::bo bo = xrt::ext::bo{device, data.size()}; - auto bo_data = bo.map(); - std::copy(data.data(), data.data() + data.size(), bo_data); - bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + auto size = node.get("size", 0); + auto file = node.get("file", ""); + auto data = file.empty() ? std::string_view{} : repo->get(file); + size = size ? size : data.size(); // specified size has precedence + xrt::bo bo = xrt::ext::bo{device, size}; + if (!data.empty()) { + auto bo_data = bo.map(); + std::copy(data.data(), data.data() + std::min(size, data.size()), bo_data); + bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + } bos.emplace(node.get("name"), std::move(bo)); } return bos; } - // Validate a resource buffer per the validate json node + // Validate a resource buffer per profile.json validate json node + // "validate": { + // "size": 0, // unused for now + // "offset": 0, // unused for now + // "file": "gold.bin" + // } + static void validate_buffer(xrt::bo& bo, const validate_node& node, const artifacts::repo* repo) { - const auto& golden_data = repo->get(node.get("file")); + auto golden_data = repo->get(node.get("file")); // here we could extract offset and size of region to validate bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); @@ -1079,12 +1123,15 @@ class profile if (bo.size() != golden_data.size()) throw std::runtime_error("Size mismatch during validation"); - if (!std::equal(golden_data.data(), golden_data.data() + golden_data.size(), bo_data)) { - for (uint64_t i = 0; i < golden_data.size(); ++i) { - if (golden_data[i] != bo_data[i]) - throw std::runtime_error("gold[" + std::to_string(i) + "] = " + std::to_string(golden_data[i]) - + " does not match bo value " + std::to_string(bo_data[i])); - } + if (std::equal(golden_data.data(), golden_data.data() + golden_data.size(), bo_data)) + return; + + // Error + for (uint64_t i = 0; i < golden_data.size(); ++i) { + if (golden_data[i] != bo_data[i]) + throw std::runtime_error + ("gold[" + std::to_string(i) + "] = " + std::to_string(golden_data[i]) + + " does not match bo value in bo " + std::to_string(bo_data[i])); } } @@ -1108,7 +1155,7 @@ class profile bindings(const xrt::device& device, const boost::property_tree::ptree& pt, const artifacts::repo* repo) : m_bindings{init_bindings(pt)} - , m_bo_bindings{create_buffers(device, m_bindings, repo)} + , m_xrt_bos{create_buffers(device, m_bindings, repo)} {} // Validate resource buffers per json. Validation is per bound buffer @@ -1118,7 +1165,7 @@ class profile { for (auto& [name, node] : m_bindings) { if (auto validate_node = node.get_child_optional("validate")) { - validate_buffer(m_bo_bindings.at(name), get_optional(validate_node), repo); + validate_buffer(m_xrt_bos.at(name), get_optional(validate_node), repo); } } } @@ -1131,7 +1178,7 @@ class profile { for (auto& [name, node] : m_bindings) { if (auto init_node = node.get_child_optional("init")) - init_buffer(m_bo_bindings.at(name), get_optional(init_node)); + init_buffer(m_xrt_bos.at(name), get_optional(init_node)); } } @@ -1141,11 +1188,37 @@ class profile { for (auto& [name, node] : m_bindings) { if (node.get("bind", false)) - rr->bind(name, m_bo_bindings.at(name)); + rr->bind(name, m_xrt_bos.at(name)); } } }; // class profile::bindings + // class execution - represents the execution section of a profile json + // + // { + // "execution" : { + // "iterations": 2, + // "iteration" : { + // "bind": false, + // "init": true, + // "wait": true, + // "validate": true + // } + // } + // + // The execution section specifies how a recipe should be executed. + // Number of iterations specfied how many times the recipe should be + // executed when the application calls xrt::runnner::execute(). + // + // The behavior of an iteration is within the iteration sub-node. + // - "bind" indicates if a buffers should be re-bound to the + // recipe before an iteration. + // - "init" indicates of buffer should be initialized per what is + // specified in the binding element. + // - "wait" says that execution should wait for completion between + // iterations and after last iteration. + // - "validate" means buffer validation per what is specified in + // the binding element. class execution { using iteration_node = boost::property_tree::ptree; @@ -1181,10 +1254,12 @@ class profile , m_iterations(pt.get("iterations")) , m_iteration(pt.get_child("iteration")) { - // Bind buffers to the recipe prior to executing the recipe + // Bind buffers to the recipe prior to executing the recipe. This + // will bind the buffers which have binding::bind set to true. m_profile->bind(); } - + + // Execute the profile void execute() { @@ -1199,7 +1274,6 @@ class profile friend class execution; // embedded class boost::property_tree::ptree m_profile; std::shared_ptr m_repo; - xrt::device m_device; recipe* m_recipe = nullptr; bindings m_bindings; execution m_execution; @@ -1244,13 +1318,17 @@ class profile public: - profile(xrt::device device, recipe* rr, const std::string& profile, + // profile - constructor + // + // Reads json, creates xrt::bo bindings to recipe and initializes + // execution. The respository is used for looking up artifacts. + // The recipe is what the profile binds to and what it executes. + profile(const xrt::device& device, recipe* rr, const std::string& profile, std::shared_ptr repo) : m_profile{load(profile)} , m_repo{std::move(repo)} - , m_device{std::move(device)} , m_recipe{rr} - , m_bindings{m_device, m_profile.get_child("bindings"), m_repo.get()} + , m_bindings{device, m_profile.get_child("bindings"), m_repo.get()} , m_execution(this, m_profile.get_child("execution")) {} diff --git a/src/runtime_src/core/include/xrt/experimental/xrt_xclbin.h b/src/runtime_src/core/include/xrt/experimental/xrt_xclbin.h index dc3d40289ad..d467b9909bb 100644 --- a/src/runtime_src/core/include/xrt/experimental/xrt_xclbin.h +++ b/src/runtime_src/core/include/xrt/experimental/xrt_xclbin.h @@ -18,6 +18,7 @@ # include # include # include +# include #endif /** @@ -624,6 +625,19 @@ class xclbin : public detail::pimpl explicit xclbin(const std::vector& data); + /** + * xclbin() - Constructor from raw data + * + * @param data + * Raw data of xclbin + * + * The raw data of the xclbin can be deleted after calling the + * constructor. + */ + XRT_API_EXPORT + explicit + xclbin(const std::string_view& data); + /** * xclbin() - Constructor from raw data *