From b04198d56c0a930ed31f2ce67195eace16fc20aa Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 10 Oct 2024 11:43:41 +0200 Subject: [PATCH 01/27] DEV: Bcast & Receive examples --- samples/spatial/bcast.sptl | 38 +++++++++++++++++++++++++++++++++++++ samples/spatial/reduce.sptl | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 samples/spatial/bcast.sptl create mode 100644 samples/spatial/reduce.sptl diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl new file mode 100644 index 00000000..e0bc3bae --- /dev/null +++ b/samples/spatial/bcast.sptl @@ -0,0 +1,38 @@ + +kernel @add(stream readonly a_in, stream[N, N] writeonly out) { + + place u16 i, u16 j in [0:N, 0:N] { + f32[K] a; + } + + dataflow i16 i, i16 j in [0:N, 0:N] { + multistream bcast = broadcast(0, 0) { + hops = auto, + channels = 0 + } + } + + compute i16 i, i16 j in [1:N, 1:N] { + await receive(a, bcast) + await send(a, out[i, j]) + } + + compute i16 i, i16 j in [0, 0] { + await receive(a, a_in) + await send(a, bcast) + await send(a, out[i, j]) + } + + compute i16 i, i16 j in [0, 1:N] { + await foreach i32 k, f32 x in [0:K], receive(bcast) { + a[k] = x + 1 + } + await send(a, out[i, j]) + } + + compute i16 i, i16 j in [1:N, 0] { + await receive(a, bcast) + await send(a, out[i, j]) + } + +} \ No newline at end of file diff --git a/samples/spatial/reduce.sptl b/samples/spatial/reduce.sptl new file mode 100644 index 00000000..181c5484 --- /dev/null +++ b/samples/spatial/reduce.sptl @@ -0,0 +1,37 @@ + +kernel @add(stream[N, N] readonly a_in, stream writeonly out) { + + place u16 i, u16 j in [0:N, 0:N] { + f32[K] a; + } + + dataflow i16 i, i16 j in [0:N, 0:N] { + multistream red = reduce(0, 0) { + hops = auto, + channels = {0, 1}, + op = sum + } + } + + compute i16 i, i16 j in [1:N, 1:N] { + await receive(a, a_in) + await reduce(a, red) + } + + compute i16 i, i16 j in [0, 0] { + await receive(a, a_in) + await reduce(a, red) + await send(a, out) + } + + compute i16 i, i16 j in [0, 1:N] { + await receive(a, a_in) + await reduce(a, red) + } + + compute i16 i, i16 j in [1:N, 0] { + await receive(a, a_in) + await reduce(a, red) + } + +} \ No newline at end of file From 39ab495d1ff6cbbf07bee76a34d8d71a112d0dee Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Mon, 14 Oct 2024 16:25:36 +0200 Subject: [PATCH 02/27] begin documentation of collective communication --- .gitignore | 2 +- irspec/docs/collective/collective.md | 52 ++++++++++++++++++++++++++ irspec/docs/collective/design_goals.md | 6 +++ irspec/mkdocs.yml | 3 ++ 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 irspec/docs/collective/collective.md create mode 100644 irspec/docs/collective/design_goals.md diff --git a/.gitignore b/.gitignore index 52faecbe..1bff436f 100644 --- a/.gitignore +++ b/.gitignore @@ -131,7 +131,7 @@ venv.bak/ .ropeproject # mkdocs documentation -/site +site/ # mypy .mypy_cache/ diff --git a/irspec/docs/collective/collective.md b/irspec/docs/collective/collective.md new file mode 100644 index 00000000..e06c2643 --- /dev/null +++ b/irspec/docs/collective/collective.md @@ -0,0 +1,52 @@ +# Collective IR + +The goal of this document is to give an overview of the key concepts present in the IR. It does not (yet) fully describe the semantics of the computation. + + +## Syntax Fundamentals + +### Streams +The stream class of the Spatial IR is extended with `multistream`, for a scalar type ``. + +If `hops = auto` the routing is optimized while using at most `#channel` channels. For implementation details on the used number of channels see the `Channel Usage` section. + +In addition to `hops` and `channel` the operation `op` can be defined for certain collective communication pattern, i.e. reduce. The options for the operation `op` are: + +- CL_MAX (returns the maximum element) +- CL_MIN (returns the minimum element) +- CL_SUM (returns the sum of all elements) +- CL_PRODUCT (returns the product of all elements) + +### Collective Functions +Collective Communication functions can be called inside the compute block. For further implementation details see the specific collective definition. + +## Broadcast +A broadcast is defined with the standard send and receive framework provided by the Spatial IR. It is differentiated from the single point to point communication by using a `multistream` instead of a standard stream. + +Sending data in a broadcast that is defined via the multistream `bcast` can therefore be defined as: +```rust +compute i16 variable, i16 variable in subgrid_expression { + send(a, bcast) +} +``` + +???+ example "Example: Simple Broadcast" + ```rust + compute i16 i, i16 j in [1:N, 0] { + await receive(a, bcast) + } + + compute i16 i, i16 j in [0, 0] { + await receive(a, a_in) + await send(a, bcast) + } + ``` + where `i`, `j` are `i16` variables that are bound to the coordinates of the PEs in the subgrid and `bcast` is a multistream. + + + +## Reduce + + + +## Channel Usage \ No newline at end of file diff --git a/irspec/docs/collective/design_goals.md b/irspec/docs/collective/design_goals.md new file mode 100644 index 00000000..d1a8b3d3 --- /dev/null +++ b/irspec/docs/collective/design_goals.md @@ -0,0 +1,6 @@ +# Design Goals + +- Models collective communication schemas + - Broadcast + - Reduce +- Integrates into Spatial IR for device agnostic communication abstractions \ No newline at end of file diff --git a/irspec/mkdocs.yml b/irspec/mkdocs.yml index 76ffdc4c..13e29f20 100644 --- a/irspec/mkdocs.yml +++ b/irspec/mkdocs.yml @@ -13,6 +13,9 @@ nav: - Routing Semantics: spatial/routing.md - Parameterized Semantics: spatial/parametric.md - Examples: spatial/examples.md + - Collective IR: + - Design Goals: collective/design_goals.md + - Specification: collective/collective.md - Dataflow Task IR: dataflowtask/dataflowtask.md markdown_extensions: From 235430cdf3932f75e6338f6f4b5825c52b2afd91 Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 17 Oct 2024 16:46:02 +0200 Subject: [PATCH 03/27] Remove hops from reduce def --- samples/spatial/reduce.sptl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/spatial/reduce.sptl b/samples/spatial/reduce.sptl index 181c5484..e49d6ed5 100644 --- a/samples/spatial/reduce.sptl +++ b/samples/spatial/reduce.sptl @@ -7,7 +7,6 @@ kernel @add(stream[N, N] readonly a_in, stream writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream red = reduce(0, 0) { - hops = auto, channels = {0, 1}, op = sum } @@ -34,4 +33,4 @@ kernel @add(stream[N, N] readonly a_in, stream writeonly out) { await reduce(a, red) } -} \ No newline at end of file +} From dcefef37640408c72fb36ab4b90c4e251b78089d Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 17 Oct 2024 16:47:25 +0200 Subject: [PATCH 04/27] RM hops from bcast --- samples/spatial/bcast.sptl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl index e0bc3bae..7e0f2f8d 100644 --- a/samples/spatial/bcast.sptl +++ b/samples/spatial/bcast.sptl @@ -7,7 +7,6 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream bcast = broadcast(0, 0) { - hops = auto, channels = 0 } } @@ -35,4 +34,4 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { await send(a, out[i, j]) } -} \ No newline at end of file +} From 511cc2fd27bdafb8f8c5d3560934d49569b3166b Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Wed, 30 Oct 2024 16:36:53 +0100 Subject: [PATCH 05/27] basic broadcast is working reduce partially implemented (can translate back and forth + can separate compute blocks for the routing step) --- samples/spatial/bcast.sptl | 2 +- samples/spatial/reduce.sptl | 2 +- samples/spatial/simple_reduce.sptl | 18 ++ samples/spatial/simple_reduce_after.sptl | 23 +++ spatialstencil/syntax/spatial_ir/irnodes.py | 163 +++++++++++++++++- .../syntax/spatial_ir/language.lark | 9 +- .../syntax/spatial_ir/lark_to_ir.py | 22 ++- tests/test_spatial_ir_parser.py | 29 +++- 8 files changed, 247 insertions(+), 21 deletions(-) create mode 100644 samples/spatial/simple_reduce.sptl create mode 100644 samples/spatial/simple_reduce_after.sptl diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl index e0bc3bae..8a0fffb8 100644 --- a/samples/spatial/bcast.sptl +++ b/samples/spatial/bcast.sptl @@ -8,7 +8,7 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream bcast = broadcast(0, 0) { hops = auto, - channels = 0 + channel = 0 } } diff --git a/samples/spatial/reduce.sptl b/samples/spatial/reduce.sptl index 181c5484..07f81c6c 100644 --- a/samples/spatial/reduce.sptl +++ b/samples/spatial/reduce.sptl @@ -8,7 +8,7 @@ kernel @add(stream[N, N] readonly a_in, stream writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream red = reduce(0, 0) { hops = auto, - channels = {0, 1}, + channel = {0, 1}, op = sum } } diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl new file mode 100644 index 00000000..808e2dcc --- /dev/null +++ b/samples/spatial/simple_reduce.sptl @@ -0,0 +1,18 @@ + +kernel @add() { + + dataflow i16 i, i16 j in [0:1, 0:2] { + multistream red = reduce(0, 0) { + hops = auto, + channel = auto, + graph = 1, + op = 2 + } + } + + compute i16 i, i16 j in [0:1, 0:2] { + i16 a = 1 + await reduce(a, red) + } + +} \ No newline at end of file diff --git a/samples/spatial/simple_reduce_after.sptl b/samples/spatial/simple_reduce_after.sptl new file mode 100644 index 00000000..a4dc268f --- /dev/null +++ b/samples/spatial/simple_reduce_after.sptl @@ -0,0 +1,23 @@ + +kernel @add(stream[1, 2]) { + + dataflow i16 i, i16 j in [0:1, 0:2] { + multistream red = reduce(0, 0) { + hops = auto, + channel = auto, + graph = 1, + op = 2 + } + } + + compute i16 i, i16 j in [0, 0] { + i16 a = 1 + await reduce(a, red) + } + + compute i16 i, i16 j in [0, 1] { + i16 a = 1 + await reduce(a, red) + } + +} \ No newline at end of file diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index de21e956..c7e25f69 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -5,6 +5,8 @@ from spatialstencil.syntax.common.types import ScalarType, IRType from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle +from lark import Tree + @dataclass class SpatialNode(BaseNode): @@ -39,7 +41,7 @@ def validate(self) -> None: def as_ir(self, indent: int = 0) -> str: return str(self.value) - + # Parameters @dataclass @@ -91,6 +93,21 @@ def validate(self) -> None: def as_ir(self, indent: int = 0) -> str: return f'stream<{self.dtype.as_ir()}>' + + + +@dataclass +class MultiStreamType(SpatialNode, IRType): + """ + A multistream type that handles collective communication patterns. + """ + dtype: ScalarType + + def validate(self) -> None: + assert isinstance(self.dtype, ScalarType) + + def as_ir(self, indent: int = 0) -> str: + return f'multistream<{self.dtype.as_ir()}>' @@ -100,7 +117,7 @@ class ArrayType(SpatialNode, IRType): """ An array type of a scalar or stream, with one or more dimensions. """ - base_type: Union[ScalarType, StreamType] + base_type: Union[ScalarType, StreamType, MultiStreamType] shape: list[Union[int, 'Expression']] def validate(self) -> None: @@ -118,7 +135,7 @@ class TypedIdentifier(SpatialNode): """ A variable identifier (e.g., x, y, my_variable) with a type. """ - dtype: Union[ScalarType, StreamType, ArrayType] + dtype: Union[ScalarType, StreamType, MultiStreamType, ArrayType] identifier: Identifier def validate(self) -> None: @@ -383,12 +400,51 @@ def validate(self) -> None: for r in self.hops: dx, dy = r.offset assert abs(dx) + abs(dy) == 1, "Each hop must have an absolute sum of 1." + if isinstance(self.hops, Tree): + self.hops = self.hops.data + + # this doesn't work for self.channel != "auto" def as_ir(self, indent: int = 0) -> str: indent_str = ' ' * indent hops_str = "auto" if self.hops == "auto" else f"[{', '.join(hop.as_ir() for hop in self.hops)}]" channel_str = "auto" if self.channel == "auto" else str(self.channel) return f"{indent_str}hops = {hops_str},\n{indent_str}channel = {channel_str}" + + +@dataclass +class ReduceRoutingDeclaration(SpatialNode): + """ + A routing declaration for a reduce, optionally specifying hops and channel. + """ + hops: Literal["auto"] = "auto" # list of hops or 'auto' + channel: Union[int, Literal["auto"]] = "auto" # Channel ID or 'auto' + graph: int = 0 + op: int = 0 + + def validate(self) -> None: + if isinstance(self.hops, list): + for r in self.hops: + dx, dy = r.offset + assert abs(dx) + abs(dy) == 1, "Each hop must have an absolute sum of 1." + if isinstance(self.hops, Tree): + self.hops = self.hops.data + + # test this for self.channel != "auto" + if isinstance(self.channel, Tree): + self.channel = self.channel.data + + assert isinstance(self.graph, int) + assert isinstance(self.op, int) + + + def as_ir(self, indent: int = 0) -> str: + indent_str = ' ' * indent + hops_str = "auto" + channel_str = "auto" if self.channel == "auto" else str(self.channel) + graph_str = str(self.graph) + op_str = str(self.op) + return f"{indent_str}hops = {hops_str},\n{indent_str}channel = {channel_str},\n{indent_str}graph = {graph_str},\n{indent_str}op = {op_str}" @dataclass @@ -419,6 +475,38 @@ def as_ir(self, indent: int = 0) -> str: return f'{indent_str}stream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = relative_stream({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' +@dataclass +class MulStreamDeclaration(SpatialNode): + """ + A stream declaration inside a dataflow block that declares a communication stream + to and from PEs at relative positions, with an optional routing declaration. + """ + dtype: MultiStreamType + stream_name: Identifier + dx: Expression + dy: Expression + routing: Optional[Union[RoutingDeclaration, ReduceRoutingDeclaration]] = None + + def validate(self) -> None: + assert isinstance(self.dtype, MultiStreamType) + assert isinstance(self.stream_name, Identifier) + assert isinstance(self.dx, Expression) + assert isinstance(self.dy, Expression) + if self.routing: + assert isinstance(self.routing, Union[RoutingDeclaration, ReduceRoutingDeclaration]) + + def as_ir(self, indent: int = 0) -> str: + indent_str = ' ' * indent + routing_str = "" + if self.routing: + routing_str = f" {{\n{self.routing.as_ir(indent + 1)}\n{' ' * indent}}}" + if isinstance(self.routing, ReduceRoutingDeclaration): + return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = reduce({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' + elif isinstance(self.routing, RoutingDeclaration): + return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = broadcast({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' + else: + raise ValueError("Invalid routing declaration") + ### # Dataflow Block ### @@ -431,11 +519,11 @@ class DataflowBlock(SpatialNode): """ variables: list[TypedIdentifier] subgrid: SubgridExpression - statements: list[RelativeStreamDeclaration] + statements: Union[list[RelativeStreamDeclaration], list[MulStreamDeclaration]] def validate(self) -> None: assert all(isinstance(var, TypedIdentifier) for var in self.variables) - assert all(isinstance(stmt, RelativeStreamDeclaration) for stmt in self.statements) + assert all(isinstance(stmt, RelativeStreamDeclaration) for stmt in self.statements) or all(isinstance(stmt, MulStreamDeclaration) for stmt in self.statements) assert len(self.variables) == 2 def as_ir(self, indent: int = 0) -> str: @@ -519,6 +607,29 @@ def as_ir(self, indent: int = 0) -> str: if self.completion_name: return f'{indent_str}{self.completion_name.as_ir()} = receive({self.local_array.as_ir()}, {self.stream_name.as_ir()})' return f'{indent_str}await receive({self.local_array.as_ir()}, {self.stream_name.as_ir()})' + + +@dataclass +class ReduceStatement(Statement): + """ + Receive statement for receiving data asynchronously through a stream. + """ + local_array: Union[Identifier, ArraySlice] + stream_name: Union[Identifier, ArraySlice] + completion_name: Optional[Completion] = None + + def validate(self) -> None: + assert isinstance(self.local_array, (Identifier, ArraySlice)) + assert isinstance(self.stream_name, (Identifier, ArraySlice)) + if self.completion_name: + assert isinstance(self.completion_name, Completion) + + + def as_ir(self, indent: int = 0) -> str: + indent_str = ' ' * indent + if self.completion_name: + return f'{indent_str}{self.completion_name.as_ir()} = receive({self.local_array.as_ir()}, {self.stream_name.as_ir()})' + return f'{indent_str}await receive({self.local_array.as_ir()}, {self.stream_name.as_ir()})' # Receive generator @@ -758,14 +869,14 @@ class KernelArgument(SpatialNode): """ A kernel argument of a given type. """ - dtype: Union[ScalarType, ArrayType, StreamType] + dtype: Union[ScalarType, ArrayType, StreamType, MultiStreamType] identifier: Identifier readonly: bool = False writeonly: bool = False compiletime: bool = False def validate(self) -> None: - assert isinstance(self.dtype, (ScalarType, ArrayType, StreamType)) + assert isinstance(self.dtype, (ScalarType, ArrayType, StreamType, MultiStreamType)) assert isinstance(self.identifier, Identifier) assert not self.readonly or not self.writeonly assert not self.compiletime or not self.writeonly @@ -811,6 +922,7 @@ def validate(self) -> None: assert all(isinstance(stmt, (Phase, ComputeBlock, DataflowBlock, PlaceBlock)) for stmt in self.body) def as_ir(self, indent: int = 0) -> str: + self.reduce_subroutine() param_str = ", ".join(p.as_ir() for p in self.parameters) arg_str = ", ".join(arg.as_ir() for arg in self.arguments) body_str = "\n".join(stmt.as_ir(indent + 1) for stmt in self.body) @@ -844,6 +956,43 @@ def subgrids(self) -> list[Subgrid]: (0, elem))) return rectangles + + + def reduce_subroutine(self): + self.separate_computeblocks() + + return None + + + def separate_computeblocks(self): + newbody = [] + for elem in self.body: + includesReduceStatement = False + if isinstance(elem, ComputeBlock): + for stmt in elem.statements: + if isinstance(stmt, ReduceStatement): + includesReduceStatement = True + break + if includesReduceStatement: + x_start = elem.subgrid.x_range.start.value.value + x_end = elem.subgrid.x_range.stop.value.value + x_step = 1 if elem.subgrid.x_range.step == None else elem.subgrid.x_range.step.value.value + y_start = elem.subgrid.y_range.start.value.value + y_end = elem.subgrid.y_range.stop.value.value + y_step = 1 if elem.subgrid.y_range.step == None else elem.subgrid.y_range.step.value.value + + print(x_start, x_end, x_step) + print(y_start, y_end, y_step) + + for x in range(x_start, x_end, x_step): + for y in range(y_start, y_end, y_step): + newbody.append(ComputeBlock(elem.variables, SubgridExpression(RangeExpression(start=Expression(ConstantLiteral(x, ScalarType.i32))), RangeExpression(start=Expression(ConstantLiteral(y, ScalarType.i32)))), elem.statements)) + + else: + newbody.append(elem) + + self.body = newbody + return None # Specialized visitors diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spatialstencil/syntax/spatial_ir/language.lark index 684cd7e7..20e20815 100644 --- a/spatialstencil/syntax/spatial_ir/language.lark +++ b/spatialstencil/syntax/spatial_ir/language.lark @@ -32,7 +32,7 @@ identifier : suffix_id ("#" digits)? !uint_type : "u8" | "u16" | "u32" !bool_type : "bool" ?scalar_type : float_type | int_type | uint_type | bool_type -stream_type : "stream" "<" scalar_type ">" +stream_type : "stream" "<" scalar_type ">" | "multistream" "<" scalar_type ">" ?standard_type : scalar_type | stream_type // Array types @@ -115,9 +115,14 @@ subgrid_expression_2d : "[" range_expression "," range_expression "]" hop : "(" posneg_integer_literal "," posneg_integer_literal ")" // 2D at the moment, might expand hops : "[" hop ("," hop)* "]" routing : "hops" "=" (auto | hops) "," "channel" "=" (auto | integer_literal) +reduce_routing : "hops" "=" (auto | hops) "," "channel" "=" (auto) "," "graph" "=" (auto | integer_literal) "," "op" "=" integer_literal field_declaration : builtin_type identifier (";")? //(";" | NEWLINE) -stream_declaration : "stream" "<" scalar_type ">" identifier "=" "relative_stream" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? +stream_declaration : classic_stream | mul_stream +classic_stream : "stream" "<" scalar_type ">" identifier "=" "relative_stream" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? +mul_stream : bcast | red +bcast : "multistream" "<" scalar_type ">" identifier "=" "broadcast" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? +red : "multistream" "<" scalar_type ">" identifier "=" "reduce" "(" value_expr "," value_expr ")" ("{" reduce_routing "}")? (";")? vars : identifier ("," identifier)* typed_var : scalar_type identifier typed_vars : typed_var ("," typed_var)* diff --git a/spatialstencil/syntax/spatial_ir/lark_to_ir.py b/spatialstencil/syntax/spatial_ir/lark_to_ir.py index 192274a5..531c6bed 100644 --- a/spatialstencil/syntax/spatial_ir/lark_to_ir.py +++ b/spatialstencil/syntax/spatial_ir/lark_to_ir.py @@ -2,7 +2,7 @@ from spatialstencil.syntax.common.types import ScalarType from spatialstencil.syntax.spatial_ir import irnodes -from spatialstencil.syntax.spatial_ir.irnodes import StreamType, Identifier +from spatialstencil.syntax.spatial_ir.irnodes import StreamType, MultiStreamType, Identifier class TreeToSpatialIR(lark.Transformer): @@ -122,6 +122,8 @@ def function_call(self, args, meta=None): return irnodes.SendStatement(*arguments, completion_name=completion) elif func == 'receive': return irnodes.ReceiveStatement(*arguments, completion_name=completion) + elif func == 'reduce': + return irnodes.ReduceStatement(*arguments, completion_name=completion) raise SyntaxError(f'Unrecognized free function call to "{func}"') subscript = irnodes.ArraySlice.from_lark @@ -133,6 +135,7 @@ def function_call(self, args, meta=None): # Declarations and routing hop = irnodes.RoutingHop.from_lark routing = irnodes.RoutingDeclaration.from_lark + reduce_routing = irnodes.ReduceRoutingDeclaration.from_lark field_declaration = irnodes.FieldDeclaration.from_lark subgrid_expression_2d = irnodes.SubgridExpression.from_lark @@ -142,8 +145,21 @@ def hop(self, args): return irnodes.RoutingHop(o) def stream_declaration(self, args): - args[0] = StreamType(args[0]) - return irnodes.RelativeStreamDeclaration(*args) + # args[0] = StreamType(args[0]) + # print(args) + # exit() + if args[0].data == 'classic_stream': + args[0].children[0] = StreamType(args[0].children[0]) + return irnodes.RelativeStreamDeclaration(*args[0].children) + elif args[0].data == 'mul_stream': + args[0].children[0].children[0] = MultiStreamType(args[0].children[0].children[0]) + return irnodes.MulStreamDeclaration(*args[0].children[0].children) + else: + raise NotImplementedError('Only classic and mul stream declarations are supported at the moment') + + # original code + # args[0] = StreamType(args[0]) + # return irnodes.RelativeStreamDeclaration(*args) # Scopes def _scope_wrapper(self, cls, args): diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 931c6814..407e2278 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -96,6 +96,9 @@ def _rountrip_test(file): ir_1 = program.as_ir() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() + print(ir_1) + print('#' * 80) + print(ir_2) assert ir_1 == ir_2 def test_spatial_roundtrip_two_phase_unrouted(): @@ -108,11 +111,23 @@ def test_spatial_roundtrip_two_phase_split(): _rountrip_test(file) +def test_spatial_bcast(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'bcast.sptl') + _rountrip_test(file) + + +def test_simple_reduce(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce.sptl') + _rountrip_test(file) + + if __name__ == '__main__': - test_spatial_roundtrip_laplacian() - test_spatial_visitor() - test_spatial_roundtrip_two_phase() - test_spatial_roundtrip_two_phase_unrouted() - test_spatial_roundtrip_two_phase_split() - test_spatial_roundtrip_forward() - test_spatial_roundtrip_backward() + # test_spatial_roundtrip_laplacian() + # test_spatial_visitor() + # test_spatial_roundtrip_two_phase() + # test_spatial_roundtrip_two_phase_unrouted() + # test_spatial_roundtrip_two_phase_split() + # test_spatial_roundtrip_forward() + # test_spatial_roundtrip_backward() + # test_spatial_bcast() + test_simple_reduce() From 8a0c7e6b228eba1d6bd7c0880257130f3d5a6d83 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Wed, 30 Oct 2024 17:06:59 +0100 Subject: [PATCH 06/27] adapt to merge --- samples/spatial/bcast.sptl | 2 +- samples/spatial/reduce.sptl | 4 +- samples/spatial/simple_reduce.sptl | 3 +- samples/spatial/simple_reduce_after.sptl | 3 +- spatialstencil/syntax/spatial_ir/irnodes.py | 46 +++++++++++-------- .../syntax/spatial_ir/language.lark | 5 +- .../syntax/spatial_ir/lark_to_ir.py | 4 +- tests/test_spatial_ir_parser.py | 19 ++++---- 8 files changed, 43 insertions(+), 43 deletions(-) diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl index 7e0f2f8d..47f0f167 100644 --- a/samples/spatial/bcast.sptl +++ b/samples/spatial/bcast.sptl @@ -7,7 +7,7 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream bcast = broadcast(0, 0) { - channels = 0 + channels = auto } } diff --git a/samples/spatial/reduce.sptl b/samples/spatial/reduce.sptl index 436a5810..296d8d1b 100644 --- a/samples/spatial/reduce.sptl +++ b/samples/spatial/reduce.sptl @@ -7,8 +7,8 @@ kernel @add(stream[N, N] readonly a_in, stream writeonly out) { dataflow i16 i, i16 j in [0:N, 0:N] { multistream red = reduce(0, 0) { - hops = auto, - channel = auto + channels = auto + graph = snake op = sum } } diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 808e2dcc..9a9f285c 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -3,8 +3,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:1, 0:2] { multistream red = reduce(0, 0) { - hops = auto, - channel = auto, + channels = auto, graph = 1, op = 2 } diff --git a/samples/spatial/simple_reduce_after.sptl b/samples/spatial/simple_reduce_after.sptl index a4dc268f..1367e63e 100644 --- a/samples/spatial/simple_reduce_after.sptl +++ b/samples/spatial/simple_reduce_after.sptl @@ -3,8 +3,7 @@ kernel @add(stream[1, 2]) { dataflow i16 i, i16 j in [0:1, 0:2] { multistream red = reduce(0, 0) { - hops = auto, - channel = auto, + channels = auto, graph = 1, op = 2 } diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index c7e25f69..21ea4d15 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -412,27 +412,37 @@ def as_ir(self, indent: int = 0) -> str: return f"{indent_str}hops = {hops_str},\n{indent_str}channel = {channel_str}" +@dataclass +class BroadcastRoutingDeclaration(SpatialNode): + """ + A routing declaration for a stream, optionally specifying hops and channel. + """ + channels: Union[int, Literal["auto"]] = "auto" # Channel ID or 'auto' + + def validate(self) -> None: + # this doesn't work for self.channel != "auto" - check this + if isinstance(self.channels, Tree): + self.channels = self.channels.data + + def as_ir(self, indent: int = 0) -> str: + indent_str = ' ' * indent + channels_str = "auto" if self.channels == "auto" else str(self.channels) + return f"{indent_str}channels = {channels_str}" + + @dataclass class ReduceRoutingDeclaration(SpatialNode): """ A routing declaration for a reduce, optionally specifying hops and channel. """ - hops: Literal["auto"] = "auto" # list of hops or 'auto' - channel: Union[int, Literal["auto"]] = "auto" # Channel ID or 'auto' + channels: Union[int, Literal["auto"]] = "auto" # Channel ID or 'auto' graph: int = 0 op: int = 0 def validate(self) -> None: - if isinstance(self.hops, list): - for r in self.hops: - dx, dy = r.offset - assert abs(dx) + abs(dy) == 1, "Each hop must have an absolute sum of 1." - if isinstance(self.hops, Tree): - self.hops = self.hops.data - # test this for self.channel != "auto" - if isinstance(self.channel, Tree): - self.channel = self.channel.data + if isinstance(self.channels, Tree): + self.channels = self.channels.data assert isinstance(self.graph, int) assert isinstance(self.op, int) @@ -440,11 +450,10 @@ def validate(self) -> None: def as_ir(self, indent: int = 0) -> str: indent_str = ' ' * indent - hops_str = "auto" - channel_str = "auto" if self.channel == "auto" else str(self.channel) + channels_str = "auto" if self.channels == "auto" else str(self.channels) graph_str = str(self.graph) op_str = str(self.op) - return f"{indent_str}hops = {hops_str},\n{indent_str}channel = {channel_str},\n{indent_str}graph = {graph_str},\n{indent_str}op = {op_str}" + return f"{indent_str}channels = {channels_str},\n{indent_str}graph = {graph_str},\n{indent_str}op = {op_str}" @dataclass @@ -485,7 +494,7 @@ class MulStreamDeclaration(SpatialNode): stream_name: Identifier dx: Expression dy: Expression - routing: Optional[Union[RoutingDeclaration, ReduceRoutingDeclaration]] = None + routing: Optional[Union[BroadcastRoutingDeclaration, ReduceRoutingDeclaration]] = None def validate(self) -> None: assert isinstance(self.dtype, MultiStreamType) @@ -493,7 +502,7 @@ def validate(self) -> None: assert isinstance(self.dx, Expression) assert isinstance(self.dy, Expression) if self.routing: - assert isinstance(self.routing, Union[RoutingDeclaration, ReduceRoutingDeclaration]) + assert isinstance(self.routing, Union[BroadcastRoutingDeclaration, ReduceRoutingDeclaration]) def as_ir(self, indent: int = 0) -> str: indent_str = ' ' * indent @@ -502,7 +511,7 @@ def as_ir(self, indent: int = 0) -> str: routing_str = f" {{\n{self.routing.as_ir(indent + 1)}\n{' ' * indent}}}" if isinstance(self.routing, ReduceRoutingDeclaration): return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = reduce({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' - elif isinstance(self.routing, RoutingDeclaration): + elif isinstance(self.routing, BroadcastRoutingDeclaration): return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = broadcast({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' else: raise ValueError("Invalid routing declaration") @@ -981,9 +990,6 @@ def separate_computeblocks(self): y_end = elem.subgrid.y_range.stop.value.value y_step = 1 if elem.subgrid.y_range.step == None else elem.subgrid.y_range.step.value.value - print(x_start, x_end, x_step) - print(y_start, y_end, y_step) - for x in range(x_start, x_end, x_step): for y in range(y_start, y_end, y_step): newbody.append(ComputeBlock(elem.variables, SubgridExpression(RangeExpression(start=Expression(ConstantLiteral(x, ScalarType.i32))), RangeExpression(start=Expression(ConstantLiteral(y, ScalarType.i32)))), elem.statements)) diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spatialstencil/syntax/spatial_ir/language.lark index 20e20815..169c10fa 100644 --- a/spatialstencil/syntax/spatial_ir/language.lark +++ b/spatialstencil/syntax/spatial_ir/language.lark @@ -115,13 +115,14 @@ subgrid_expression_2d : "[" range_expression "," range_expression "]" hop : "(" posneg_integer_literal "," posneg_integer_literal ")" // 2D at the moment, might expand hops : "[" hop ("," hop)* "]" routing : "hops" "=" (auto | hops) "," "channel" "=" (auto | integer_literal) -reduce_routing : "hops" "=" (auto | hops) "," "channel" "=" (auto) "," "graph" "=" (auto | integer_literal) "," "op" "=" integer_literal +broadcast_routing : "channels" "=" (auto | integer_literal) +reduce_routing : "channels" "=" (auto) "," "graph" "=" (auto | integer_literal) "," "op" "=" integer_literal field_declaration : builtin_type identifier (";")? //(";" | NEWLINE) stream_declaration : classic_stream | mul_stream classic_stream : "stream" "<" scalar_type ">" identifier "=" "relative_stream" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? mul_stream : bcast | red -bcast : "multistream" "<" scalar_type ">" identifier "=" "broadcast" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? +bcast : "multistream" "<" scalar_type ">" identifier "=" "broadcast" "(" value_expr "," value_expr ")" ("{" broadcast_routing "}")? (";")? red : "multistream" "<" scalar_type ">" identifier "=" "reduce" "(" value_expr "," value_expr ")" ("{" reduce_routing "}")? (";")? vars : identifier ("," identifier)* typed_var : scalar_type identifier diff --git a/spatialstencil/syntax/spatial_ir/lark_to_ir.py b/spatialstencil/syntax/spatial_ir/lark_to_ir.py index 531c6bed..56757100 100644 --- a/spatialstencil/syntax/spatial_ir/lark_to_ir.py +++ b/spatialstencil/syntax/spatial_ir/lark_to_ir.py @@ -135,6 +135,7 @@ def function_call(self, args, meta=None): # Declarations and routing hop = irnodes.RoutingHop.from_lark routing = irnodes.RoutingDeclaration.from_lark + broadcast_routing = irnodes.BroadcastRoutingDeclaration.from_lark reduce_routing = irnodes.ReduceRoutingDeclaration.from_lark field_declaration = irnodes.FieldDeclaration.from_lark subgrid_expression_2d = irnodes.SubgridExpression.from_lark @@ -145,9 +146,6 @@ def hop(self, args): return irnodes.RoutingHop(o) def stream_declaration(self, args): - # args[0] = StreamType(args[0]) - # print(args) - # exit() if args[0].data == 'classic_stream': args[0].children[0] = StreamType(args[0].children[0]) return irnodes.RelativeStreamDeclaration(*args[0].children) diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 407e2278..bf5d51a3 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -96,9 +96,6 @@ def _rountrip_test(file): ir_1 = program.as_ir() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() - print(ir_1) - print('#' * 80) - print(ir_2) assert ir_1 == ir_2 def test_spatial_roundtrip_two_phase_unrouted(): @@ -122,12 +119,12 @@ def test_simple_reduce(): if __name__ == '__main__': - # test_spatial_roundtrip_laplacian() - # test_spatial_visitor() - # test_spatial_roundtrip_two_phase() - # test_spatial_roundtrip_two_phase_unrouted() - # test_spatial_roundtrip_two_phase_split() - # test_spatial_roundtrip_forward() - # test_spatial_roundtrip_backward() - # test_spatial_bcast() + test_spatial_roundtrip_laplacian() + test_spatial_visitor() + test_spatial_roundtrip_two_phase() + test_spatial_roundtrip_two_phase_unrouted() + test_spatial_roundtrip_two_phase_split() + test_spatial_roundtrip_forward() + test_spatial_roundtrip_backward() + test_spatial_bcast() test_simple_reduce() From b4901c8d46b71e9717b8acf1a39d41c27e64c3ea Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 31 Oct 2024 16:10:39 +0100 Subject: [PATCH 07/27] change reduce to send, receive in the ir --- samples/spatial/simple_reduce.sptl | 8 +- samples/spatial/simple_reduce_after.sptl | 24 ++- .../spatial/simple_reduce_intermediate.sptl | 22 +++ spatialstencil/syntax/spatial_ir/irnodes.py | 177 +++++++++++++++++- tests/test_spatial_ir_parser.py | 24 ++- 5 files changed, 232 insertions(+), 23 deletions(-) create mode 100644 samples/spatial/simple_reduce_intermediate.sptl diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 9a9f285c..5b8403d2 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -1,8 +1,12 @@ kernel @add() { + place i16 i, i16 j in [0:1, 0:2] { + i16[1] a + } + dataflow i16 i, i16 j in [0:1, 0:2] { - multistream red = reduce(0, 0) { + multistream red = reduce(0, 0) { channels = auto, graph = 1, op = 2 @@ -10,7 +14,7 @@ kernel @add() { } compute i16 i, i16 j in [0:1, 0:2] { - i16 a = 1 + a[0] = 1 await reduce(a, red) } diff --git a/samples/spatial/simple_reduce_after.sptl b/samples/spatial/simple_reduce_after.sptl index 1367e63e..d6d14ded 100644 --- a/samples/spatial/simple_reduce_after.sptl +++ b/samples/spatial/simple_reduce_after.sptl @@ -1,22 +1,26 @@ -kernel @add(stream[1, 2]) { +kernel @add() { + + place i16 i, i16 j in [0:1, 0:2] { + i16[1] a; + } dataflow i16 i, i16 j in [0:1, 0:2] { - multistream red = reduce(0, 0) { - channels = auto, - graph = 1, - op = 2 + stream reduce0 = relative_stream(0, 1) { + hops = [(0, 1)], + channel = auto } } compute i16 i, i16 j in [0, 0] { - i16 a = 1 - await reduce(a, red) + a[0] = 1; + await foreach i32 reduce_runner, i16 reduce_receive in [0:1], receive(reduce0) { + a[reduce_runner] = a[reduce_runner] + reduce_receive; + } } compute i16 i, i16 j in [0, 1] { - i16 a = 1 - await reduce(a, red) + a[0] = 1; + await send(a, reduce0); } - } \ No newline at end of file diff --git a/samples/spatial/simple_reduce_intermediate.sptl b/samples/spatial/simple_reduce_intermediate.sptl new file mode 100644 index 00000000..1367e63e --- /dev/null +++ b/samples/spatial/simple_reduce_intermediate.sptl @@ -0,0 +1,22 @@ + +kernel @add(stream[1, 2]) { + + dataflow i16 i, i16 j in [0:1, 0:2] { + multistream red = reduce(0, 0) { + channels = auto, + graph = 1, + op = 2 + } + } + + compute i16 i, i16 j in [0, 0] { + i16 a = 1 + await reduce(a, red) + } + + compute i16 i, i16 j in [0, 1] { + i16 a = 1 + await reduce(a, red) + } + +} \ No newline at end of file diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index 21ea4d15..e7d639de 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -919,6 +919,7 @@ class Kernel(SpatialNode): parameters: list[Parameter] arguments: list[KernelArgument] body: list[PlaceBlock | DataflowBlock | ComputeBlock | Phase] + _communication_patterns: Optional[dict[str, dict[tuple[int, int], list[list[list[int]]]]]] = None def validate(self) -> None: if self.name: @@ -969,11 +970,12 @@ def subgrids(self) -> list[Subgrid]: def reduce_subroutine(self): self.separate_computeblocks() - + self.create_communication_patterns() + self.replace_reduce_operator() return None - def separate_computeblocks(self): + def separate_computeblocks(self) -> None: newbody = [] for elem in self.body: includesReduceStatement = False @@ -992,13 +994,182 @@ def separate_computeblocks(self): for x in range(x_start, x_end, x_step): for y in range(y_start, y_end, y_step): - newbody.append(ComputeBlock(elem.variables, SubgridExpression(RangeExpression(start=Expression(ConstantLiteral(x, ScalarType.i32))), RangeExpression(start=Expression(ConstantLiteral(y, ScalarType.i32)))), elem.statements)) + newbody.append( + ComputeBlock( + elem.variables, + SubgridExpression( + RangeExpression( + start=Expression(ConstantLiteral(x, ScalarType.i32)) + ), + RangeExpression( + start=Expression(ConstantLiteral(y, ScalarType.i32)) + ) + ), + elem.statements + ) + ) else: newbody.append(elem) self.body = newbody return None + + + def create_communication_patterns(self) -> None: + #print(self._communication_patterns) + self._communication_patterns = {'red': {(0,0): [[[0,1]], []], (0,1): [[], [[0,0]]]}} + #print(self._communication_patterns) + return None + + + def replace_reduce_operator(self) -> None: + newbody = [] + reduce_operations = {} + counter = 0 + for elem in self.body: + if isinstance(elem, DataflowBlock): + olddataflobblock = [] + newdataflobblocks = [] + for stmt in elem.statements: + if isinstance(stmt, MulStreamDeclaration) and isinstance(stmt.routing, ReduceRoutingDeclaration): + reduce_operations.update({stmt.stream_name.name: {'op': stmt.routing.op}}) + for router in self._communication_patterns[stmt.stream_name.name]: + if self._communication_patterns[stmt.stream_name.name][router][0] != []: + for route in self._communication_patterns[stmt.stream_name.name][router][0]: + delta_x = route[0] - router[0] + delta_y = route[1] - router[1] + newdataflobblocks.append([router, route, + RelativeStreamDeclaration( + dtype=StreamType(stmt.dtype.dtype), + stream_name=Identifier(name="reduce"+str(counter), version=0), + dx=Expression(ConstantLiteral(delta_x, ScalarType.i32)), + dy=Expression(ConstantLiteral(delta_y, ScalarType.i32)), + routing=RoutingDeclaration( + hops=[RoutingHop(offset=(delta_x, delta_y))], + channel=stmt.routing.channels + ) + )] + ) + + for receiver in self._communication_patterns[stmt.stream_name.name][(route[0], route[1])][1]: + if receiver[0] == router[0] and receiver[1] == router[1]: + receiver.append(counter) + receiver.append(stmt.dtype.dtype) + receiver.append("reduce"+str(counter)) + route.append(counter) + route.append(stmt.dtype.dtype) + route.append("reduce"+str(counter)) + counter += 1 + else: + olddataflobblock.append(stmt) + + if olddataflobblock != []: # not tested + newbody.append(DataflowBlock(variables=elem.variables, subgrid=elem.subgrid, statements=olddataflobblock)) + for newdataflobblock in newdataflobblocks: + newbody.append( + DataflowBlock( + variables=elem.variables, + subgrid=SubgridExpression( + x_range=RangeExpression( + start=Expression( + ConstantLiteral(min(newdataflobblock[0][0], newdataflobblock[1][0]), ScalarType.i32) + ), + stop=Expression( + ConstantLiteral(max(newdataflobblock[0][0], newdataflobblock[1][0]) + 1, ScalarType.i32) + ) + ), + y_range=RangeExpression( + start=Expression( + ConstantLiteral(min(newdataflobblock[0][1], newdataflobblock[1][1]), ScalarType.i32) + ), + stop=Expression( + ConstantLiteral(max(newdataflobblock[0][1], newdataflobblock[1][1]) + 1, ScalarType.i32) + ) + ), + ), + statements=[newdataflobblock[2]])) + else: + newbody.append(elem) + + finalbody = [] + for elem in newbody: + if isinstance(elem, ComputeBlock): + statements = [] + for stmt in elem.statements: + if isinstance(stmt, ReduceStatement): + newstatements = [] + x = elem.subgrid.x_range.start.value.value + y = elem.subgrid.y_range.start.value.value + router_id = (x, y) + stream_name = stmt.stream_name.name + receive_list = self._communication_patterns[stream_name][router_id][0] + send_list = self._communication_patterns[stream_name][router_id][1] + operation_id = reduce_operations[stmt.stream_name.name]['op'] + + if operation_id == 2: + operation_id = "OP_SUM" + + for receive in receive_list: + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=Identifier(name="reduce_runner", version=0))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=receive[3], + identifier=Identifier(name="reduce_receive", version=0)), + receive_stream=ReceiveGenerator(stream_name=Identifier(name=receive[4], version=0)), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=Identifier(name="reduce_runner", version=0))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=Identifier(name="reduce_runner", version=0))] + ) + ), + op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented + right=Expression( + value=Identifier(name="reduce_receive", version=0) + ) + ) + ) + ) + ], + completion_name=None + ) + ) + + for send in send_list: + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=Identifier(name=send[4], version=0), + completion_name=None + ) + ) + + # add receive + calculation + send here + for new_statement in newstatements: + statements.append(new_statement) + + else: + statements.append(stmt) + + finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) + else: + finalbody.append(elem) + + self.body = finalbody + #exit() + + return None # Specialized visitors diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index bf5d51a3..21d50f24 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -94,6 +94,8 @@ def _rountrip_test(file): """ program = parser.parse_file(file) ir_1 = program.as_ir() + print(ir_1) + exit() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() assert ir_1 == ir_2 @@ -118,13 +120,19 @@ def test_simple_reduce(): _rountrip_test(file) +def test_simple_reduce_reference(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_after.sptl') + _rountrip_test(file) + + if __name__ == '__main__': - test_spatial_roundtrip_laplacian() - test_spatial_visitor() - test_spatial_roundtrip_two_phase() - test_spatial_roundtrip_two_phase_unrouted() - test_spatial_roundtrip_two_phase_split() - test_spatial_roundtrip_forward() - test_spatial_roundtrip_backward() - test_spatial_bcast() + # test_spatial_roundtrip_laplacian() + # test_spatial_visitor() + # test_spatial_roundtrip_two_phase() + # test_spatial_roundtrip_two_phase_unrouted() + # test_spatial_roundtrip_two_phase_split() + # test_spatial_roundtrip_forward() + # test_spatial_roundtrip_backward() + # test_spatial_bcast() test_simple_reduce() + # test_simple_reduce_reference() From 6979b82e9f39615e81fbe650e4b156be1c88498f Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Wed, 27 Nov 2024 19:24:18 +0100 Subject: [PATCH 08/27] refactor multistream to optimizations + implement initial grid reduce (not fully functional) --- samples/spatial/simple_reduce.sptl | 31 +- samples/spatial/simple_reduce_after.sptl | 7 +- samples/spatial/simple_reduce_four.sptl | 22 + samples/spatial/simple_reduce_three.sptl | 21 + spatialstencil/lowering/versioning.py | 7 + spatialstencil/optimizations/__init__.py | 0 .../optimizations/optimization_pass.py | 11 + .../optimizations/spatial_reduce.py | 404 ++++++++++++++++++ spatialstencil/syntax/spatial_ir/irnodes.py | 204 --------- tests/test_spatial_ir_parser.py | 16 +- 10 files changed, 513 insertions(+), 210 deletions(-) create mode 100644 samples/spatial/simple_reduce_four.sptl create mode 100644 samples/spatial/simple_reduce_three.sptl create mode 100644 spatialstencil/optimizations/__init__.py create mode 100644 spatialstencil/optimizations/optimization_pass.py create mode 100644 spatialstencil/optimizations/spatial_reduce.py diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 5b8403d2..5adeab93 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -1,21 +1,44 @@ kernel @add() { - place i16 i, i16 j in [0:1, 0:2] { + place i16 i, i16 j in [0:5, 0:5] { i16[1] a } - dataflow i16 i, i16 j in [0:1, 0:2] { + dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce(0, 0) { channels = auto, graph = 1, op = 2 } + multistream red2 = reduce(1, 1) { + channels = auto, + graph = 1, + op = 2 + } + multistream red3 = reduce(2, 2) { + channels = auto, + graph = 1, + op = 2 + } + multistream red4 = reduce(3, 3) { + channels = auto, + graph = 1, + op = 2 + } + multistream red5 = reduce(4, 4) { + channels = auto, + graph = 1, + op = 2 + } } - compute i16 i, i16 j in [0:1, 0:2] { + compute i16 i, i16 j in [0:5, 0:5] { a[0] = 1 await reduce(a, red) + await reduce(a, red2) + await reduce(a, red3) + await reduce(a, red4) + await reduce(a, red5) } - } \ No newline at end of file diff --git a/samples/spatial/simple_reduce_after.sptl b/samples/spatial/simple_reduce_after.sptl index d6d14ded..09dca704 100644 --- a/samples/spatial/simple_reduce_after.sptl +++ b/samples/spatial/simple_reduce_after.sptl @@ -1,8 +1,10 @@ +## look at this idea kernel @add() { place i16 i, i16 j in [0:1, 0:2] { i16[1] a; + i16[1] tmp; } dataflow i16 i, i16 j in [0:1, 0:2] { @@ -12,10 +14,13 @@ kernel @add() { } } + # -> stream westwards = relative_stream(-1, 0); + compute i16 i, i16 j in [0, 0] { a[0] = 1; await foreach i32 reduce_runner, i16 reduce_receive in [0:1], receive(reduce0) { - a[reduce_runner] = a[reduce_runner] + reduce_receive; + tmp[0] = a[reduce_runner] + reduce_receive; + await send(tmp) } } diff --git a/samples/spatial/simple_reduce_four.sptl b/samples/spatial/simple_reduce_four.sptl new file mode 100644 index 00000000..37672e90 --- /dev/null +++ b/samples/spatial/simple_reduce_four.sptl @@ -0,0 +1,22 @@ + +kernel @add() { + + place i16 i, i16 j in [0:1, 0:4] { + i16[1] a + i16[100] b + } + + dataflow i16 i, i16 j in [0:1, 0:4] { + multistream red = reduce(0, 0) { + channels = auto, + graph = 1, + op = 2 + } + } + + compute i16 i, i16 j in [0:1, 0:4] { + a[0] = 1 + await reduce(a, red) + } + +} \ No newline at end of file diff --git a/samples/spatial/simple_reduce_three.sptl b/samples/spatial/simple_reduce_three.sptl new file mode 100644 index 00000000..5337281e --- /dev/null +++ b/samples/spatial/simple_reduce_three.sptl @@ -0,0 +1,21 @@ + +kernel @add() { + + place i16 i, i16 j in [0:1, 0:3] { + i16[1] a + } + + dataflow i16 i, i16 j in [0:1, 0:3] { + multistream red = reduce(0, 0) { + channels = auto, + graph = 1, + op = 2 + } + } + + compute i16 i, i16 j in [0:1, 0:3] { + a[0] = 1 + await reduce(a, red) + } + +} \ No newline at end of file diff --git a/spatialstencil/lowering/versioning.py b/spatialstencil/lowering/versioning.py index 431e124b..fb870ba8 100644 --- a/spatialstencil/lowering/versioning.py +++ b/spatialstencil/lowering/versioning.py @@ -21,3 +21,10 @@ def next_version(self, name: str) -> T: version = self._var_counter[name] self._var_counter[name] += 1 return self.cls(name, version) + + + def current_version(self, name: str) -> T: + """ + Gets the current version of a variable name. + """ + return self.cls(name, self._var_counter[name] - 1) diff --git a/spatialstencil/optimizations/__init__.py b/spatialstencil/optimizations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/spatialstencil/optimizations/optimization_pass.py b/spatialstencil/optimizations/optimization_pass.py new file mode 100644 index 00000000..b32b3276 --- /dev/null +++ b/spatialstencil/optimizations/optimization_pass.py @@ -0,0 +1,11 @@ +from spatialstencil.optimizations.spatial_reduce import ReduceOptimizer + + + +def optimization_pass(program): + """ + Runs the spatial optimizations on the program. + """ + reduce_optimizer = ReduceOptimizer(program) + out = reduce_optimizer.reduce_subroutine() + return out \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py new file mode 100644 index 00000000..31dacbcf --- /dev/null +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -0,0 +1,404 @@ +from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument +from typing import Union, Tuple, Optional, Literal +import spatialstencil.syntax.spatial_ir.irnodes as spa +from spatialstencil.lowering.versioning import Versioning +from spatialstencil.syntax.common.visitor import ScopedIRNodeVisitor, IRNodeVisitor +# try ScopedIRNodeVisitor / IRNodeVisitor from spatialstencil.syntax.common.visitor to match nodes that have reduce in them + + +class ReduceOptimizer(): + name: str | None + parameters: list[Parameter] + arguments: list[KernelArgument] + body: list[PlaceBlock | DataflowBlock | ComputeBlock | Phase] + _communication_patterns: Optional[dict[str, dict[tuple[int, int], list[list[list[int]]]]]] = None + reduce_operations: dict[str, dict[str, Union[int, Literal['OP_SUM'], list[int]]]] = {} # needs to be adapted + relative_streams: dict[str, list[list]] = {} + pipelined: dict[str, bool] = {} + + + def __init__(self, kernel: Kernel) -> None: + self.name = kernel.name + self.parameters = kernel.parameters + self.arguments = kernel.arguments + self.body = kernel.body + self.versioning = Versioning[spa.Identifier](spa.Identifier) + return None + + def reduce_subroutine(self) -> Kernel: + self.change_data_blocks() + self.fix_subgrid() + self.change_compute_blocks() + return Kernel(name=self.name, parameters=self.parameters, arguments=self.arguments, body=self.body) + + + + def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name) -> None: + communication = [] + self.pipelined.update({name : False}) # not implemented yet + mode = 'grid' #'snake' # not implemented yet + if mode == 'snake': + raise NotImplementedError + if x == x_start and y == y_start: + print('upper left corner') + elif x == x_stop - 1 and y == y_start: + print('upper right corner') + elif x == x_start and y == y_stop - 1: + print('lower left corner') + elif x == x_stop - 1 and y == y_stop - 1: + print('lower right corner') + else: + raise NotImplementedError + + if mode == 'grid': + if x == x_start: + # horizontal movement + if x_start == x_stop - 1: + # print('no horizontal movement needed') + pass + else: + # print('right to left') + communication.append([x_start, x_stop, y_start, y_stop, -1, 0]) + + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper left corner') + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1]) + elif y == y_stop - 1: + # print('lower left corner') + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1]) + else: + # print('left edge') + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1]) + communication.append([x_start, x_start + 1, y, y_stop, 0, -1]) + + elif x == x_stop - 1: + # horizontal movement + if x_start == x_stop - 1: + # print('no horizontal movement needed') + pass + else: + # print('left to right') + communication.append([x_start, x_stop, y_start, y_stop, 1, 0]) + + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper right corner') + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1]) + elif y == y_stop - 1: + # print('lower right corner') + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1]) + else: + # print('right edge') + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1]) + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1]) + + else: + # horizontal movement + # print('middle') + communication.append([x_start, x + 1, y_start, y_stop, 1, 0]) # left to middle + communication.append([x, x_stop, y_start, y_stop, -1, 0]) # right to middle + + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper edge') + communication.append([x, x + 1, y_start, y_stop, 0, -1]) + elif y == y_stop - 1: + # print('lower edge') + communication.append([x, x + 1, y_start, y_stop, 0, 1]) + else: + # print('center') + communication.append([x, x + 1, y_start, y + 1, 0, 1]) + communication.append([x, x + 1, y, y_stop, 0, -1]) + + self.relative_streams.update({name : communication}) + return None + + + def change_data_blocks(self) -> None: + newbody = [] + self.reduce_operations = {} + for elem in self.body: + if isinstance(elem, DataflowBlock): + olddataflobblock = [] + newdataflobblocks = [] + for stmt in elem.statements: + if isinstance(stmt, MulStreamDeclaration) and isinstance(stmt.routing, ReduceRoutingDeclaration): + self.create_communication_patterns(elem.subgrid.x_range.start.value.value, + elem.subgrid.x_range.stop.value.value, + elem.subgrid.y_range.start.value.value, + elem.subgrid.y_range.stop.value.value, + stmt.dx.value.value, + stmt.dy.value.value, + stmt.stream_name.name) + + self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.dx.value.value, stmt.dy.value.value]]}) + new_relative_streams = [] + for com in self.relative_streams[stmt.stream_name.name]: + newdataflobblocks.append([[com[0], com[1]], [com[2], com[3]], + RelativeStreamDeclaration( + dtype=StreamType(stmt.dtype.dtype), + stream_name=self.versioning.next_version("reduce"), + dx=Expression(ConstantLiteral(com[4], ScalarType.i32)), + dy=Expression(ConstantLiteral(com[5], ScalarType.i32)) + )] + ) + if com[4] == -1: + new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left']) + elif com[4] == 1: + new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right']) + elif com[5] == -1: + new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top']) + elif com[5] == 1: + new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) + self.relative_streams.update({stmt.stream_name.name: new_relative_streams}) + + else: + olddataflobblock.append(stmt) + + if olddataflobblock != []: # not tested + newbody.append(DataflowBlock(variables=elem.variables, subgrid=elem.subgrid, statements=olddataflobblock)) + for newdataflobblock in newdataflobblocks: + newbody.append( + DataflowBlock( + variables=elem.variables, + subgrid=SubgridExpression( + x_range=RangeExpression( + start=Expression( + ConstantLiteral(newdataflobblock[0][0], ScalarType.i32) + ), + stop=Expression( + ConstantLiteral(newdataflobblock[0][1], ScalarType.i32) + ) + ), + y_range=RangeExpression( + start=Expression( + ConstantLiteral(newdataflobblock[1][0], ScalarType.i32) + ), + stop=Expression( + ConstantLiteral(newdataflobblock[1][1], ScalarType.i32) + ) + ), + ), + statements=[newdataflobblock[2]])) + else: + newbody.append(elem) + self.body = newbody + + + def fix_subgrid(self) -> None: + newbody = [] + + # change the outer loops to go through everything for each reduce and in that loop change the subgrids for the compute blocks + + for elem in self.body: + if isinstance(elem, ComputeBlock): + x_start = elem.subgrid.x_range.start.value.value + x_stop = elem.subgrid.x_range.stop.value.value + x_step = elem.subgrid.x_range.step.value.value if elem.subgrid.x_range.step is not None else None + y_start = elem.subgrid.y_range.start.value.value + y_stop = elem.subgrid.y_range.stop.value.value + y_step = elem.subgrid.y_range.step.value.value if elem.subgrid.y_range.step is not None else None + grid = [[[x_start, x_stop], [y_start, y_stop]]] + + for stmt in elem.statements: # walk operators in baseclass + if isinstance(stmt, ReduceStatement): + stream_name = stmt.stream_name.name + connections = self.relative_streams[stream_name] + reduce_connections = [] + send_connections = [] + for con in connections: + if con[3] == 'left': + send_connections.append([con[1][1] - 1, con[1][1], con[1][2], con[1][3]]) + elif con[3] == 'right': + send_connections.append([con[1][0], con[1][0] + 1, con[1][2], con[1][3]]) + elif con[3] == 'top': + send_connections.append([con[1][0], con[1][1], con[1][3] - 1, con[1][3]]) + elif con[3] == 'bottom': + send_connections.append([con[1][0], con[1][1], con[1][2], con[1][2] + 1]) + reduce_connections.append(con[1]) + root = self.reduce_operations[stmt.stream_name.name][1] + for send in send_connections: + reduce_connections.append(send) + + reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) + + # needs to be tested properly + for com_grid in reduce_connections: + to_remove = [] + for sub_grid in grid: + if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: + # print("left") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: + # print("right") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("top") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("bottom") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) + to_remove.append(sub_grid) + # delete old unused + for rmv in to_remove: + grid.remove(rmv) + + for com_grid in grid: + newbody.append( + ComputeBlock( + elem.variables, + SubgridExpression( + RangeExpression( + start=Expression(ConstantLiteral(com_grid[0][0], ScalarType.i32)), + stop=Expression(ConstantLiteral(com_grid[0][1], ScalarType.i32)) + ), + RangeExpression( + start=Expression(ConstantLiteral(com_grid[1][0], ScalarType.i32)), + stop=Expression(ConstantLiteral(com_grid[1][1], ScalarType.i32)) + ) + ), + elem.statements + ) + ) + else: + newbody.append(elem) + + self.body = newbody + return None + + + def change_compute_blocks(self) -> None: + finalbody = [] + for elem in self.body: + #print(elem) + #print('-'*50) + #for tst in elem.iter_child_nodes(): ### use this to go over nested nodes + # print(tst) + # print('@'*50) + #exit() + + if isinstance(elem, ComputeBlock): + statements = [] + for stmt in elem.statements: # walk operators in baseclass + if isinstance(stmt, ReduceStatement): + + current_position = [elem.subgrid.x_range.start.value.value, + elem.subgrid.x_range.stop.value.value, + elem.subgrid.y_range.start.value.value, + elem.subgrid.y_range.stop.value.value] + newstatements = [] + stream_name = stmt.stream_name.name + operation_id = self.reduce_operations[stmt.stream_name.name][0]['op'] + root = self.reduce_operations[stmt.stream_name.name][1] + connections = self.relative_streams[stream_name] + + if operation_id == 2: + operation_id = "OP_SUM" + + for con in connections: + if (current_position[0] >= con[1][0] + and current_position[1] <= con[1][1] + and current_position[2] >= con[1][2] + and current_position[3] <= con[1][3]): + + if (con[3] == 'left' and current_position[1] != con[1][1] + or con[3] == 'right' and current_position[0] != con[1][0] + or con[3] == 'top' and current_position[3] != con[1][3] + or con[3] == 'bottom' and current_position[2] != con[1][2]): + + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=con[2], + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=con[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented + right=Expression( + value=self.versioning.current_version("reduce_receive") + ) + ) + ) + ) + ], + completion_name=None + ) + ) + + if (con[3] == 'left' and current_position[0] != con[1][0] + or con[3] == 'right' and current_position[1] != con[1][1] + or con[3] == 'top' and current_position[2] != con[1][2] + or con[3] == 'bottom' and current_position[3] != con[1][3]): + + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=con[0], + completion_name=None + ) + ) + + # add receive + calculation + send here + for new_statement in newstatements: + statements.append(new_statement) + + else: + statements.append(stmt) + + finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) + else: + finalbody.append(elem) + + self.body = finalbody + #exit() + + return None \ No newline at end of file diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index e7d639de..cd0ead0e 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -919,7 +919,6 @@ class Kernel(SpatialNode): parameters: list[Parameter] arguments: list[KernelArgument] body: list[PlaceBlock | DataflowBlock | ComputeBlock | Phase] - _communication_patterns: Optional[dict[str, dict[tuple[int, int], list[list[list[int]]]]]] = None def validate(self) -> None: if self.name: @@ -932,7 +931,6 @@ def validate(self) -> None: assert all(isinstance(stmt, (Phase, ComputeBlock, DataflowBlock, PlaceBlock)) for stmt in self.body) def as_ir(self, indent: int = 0) -> str: - self.reduce_subroutine() param_str = ", ".join(p.as_ir() for p in self.parameters) arg_str = ", ".join(arg.as_ir() for arg in self.arguments) body_str = "\n".join(stmt.as_ir(indent + 1) for stmt in self.body) @@ -968,208 +966,6 @@ def subgrids(self) -> list[Subgrid]: return rectangles - def reduce_subroutine(self): - self.separate_computeblocks() - self.create_communication_patterns() - self.replace_reduce_operator() - return None - - - def separate_computeblocks(self) -> None: - newbody = [] - for elem in self.body: - includesReduceStatement = False - if isinstance(elem, ComputeBlock): - for stmt in elem.statements: - if isinstance(stmt, ReduceStatement): - includesReduceStatement = True - break - if includesReduceStatement: - x_start = elem.subgrid.x_range.start.value.value - x_end = elem.subgrid.x_range.stop.value.value - x_step = 1 if elem.subgrid.x_range.step == None else elem.subgrid.x_range.step.value.value - y_start = elem.subgrid.y_range.start.value.value - y_end = elem.subgrid.y_range.stop.value.value - y_step = 1 if elem.subgrid.y_range.step == None else elem.subgrid.y_range.step.value.value - - for x in range(x_start, x_end, x_step): - for y in range(y_start, y_end, y_step): - newbody.append( - ComputeBlock( - elem.variables, - SubgridExpression( - RangeExpression( - start=Expression(ConstantLiteral(x, ScalarType.i32)) - ), - RangeExpression( - start=Expression(ConstantLiteral(y, ScalarType.i32)) - ) - ), - elem.statements - ) - ) - - else: - newbody.append(elem) - - self.body = newbody - return None - - - def create_communication_patterns(self) -> None: - #print(self._communication_patterns) - self._communication_patterns = {'red': {(0,0): [[[0,1]], []], (0,1): [[], [[0,0]]]}} - #print(self._communication_patterns) - return None - - - def replace_reduce_operator(self) -> None: - newbody = [] - reduce_operations = {} - counter = 0 - for elem in self.body: - if isinstance(elem, DataflowBlock): - olddataflobblock = [] - newdataflobblocks = [] - for stmt in elem.statements: - if isinstance(stmt, MulStreamDeclaration) and isinstance(stmt.routing, ReduceRoutingDeclaration): - reduce_operations.update({stmt.stream_name.name: {'op': stmt.routing.op}}) - for router in self._communication_patterns[stmt.stream_name.name]: - if self._communication_patterns[stmt.stream_name.name][router][0] != []: - for route in self._communication_patterns[stmt.stream_name.name][router][0]: - delta_x = route[0] - router[0] - delta_y = route[1] - router[1] - newdataflobblocks.append([router, route, - RelativeStreamDeclaration( - dtype=StreamType(stmt.dtype.dtype), - stream_name=Identifier(name="reduce"+str(counter), version=0), - dx=Expression(ConstantLiteral(delta_x, ScalarType.i32)), - dy=Expression(ConstantLiteral(delta_y, ScalarType.i32)), - routing=RoutingDeclaration( - hops=[RoutingHop(offset=(delta_x, delta_y))], - channel=stmt.routing.channels - ) - )] - ) - - for receiver in self._communication_patterns[stmt.stream_name.name][(route[0], route[1])][1]: - if receiver[0] == router[0] and receiver[1] == router[1]: - receiver.append(counter) - receiver.append(stmt.dtype.dtype) - receiver.append("reduce"+str(counter)) - route.append(counter) - route.append(stmt.dtype.dtype) - route.append("reduce"+str(counter)) - counter += 1 - else: - olddataflobblock.append(stmt) - - if olddataflobblock != []: # not tested - newbody.append(DataflowBlock(variables=elem.variables, subgrid=elem.subgrid, statements=olddataflobblock)) - for newdataflobblock in newdataflobblocks: - newbody.append( - DataflowBlock( - variables=elem.variables, - subgrid=SubgridExpression( - x_range=RangeExpression( - start=Expression( - ConstantLiteral(min(newdataflobblock[0][0], newdataflobblock[1][0]), ScalarType.i32) - ), - stop=Expression( - ConstantLiteral(max(newdataflobblock[0][0], newdataflobblock[1][0]) + 1, ScalarType.i32) - ) - ), - y_range=RangeExpression( - start=Expression( - ConstantLiteral(min(newdataflobblock[0][1], newdataflobblock[1][1]), ScalarType.i32) - ), - stop=Expression( - ConstantLiteral(max(newdataflobblock[0][1], newdataflobblock[1][1]) + 1, ScalarType.i32) - ) - ), - ), - statements=[newdataflobblock[2]])) - else: - newbody.append(elem) - - finalbody = [] - for elem in newbody: - if isinstance(elem, ComputeBlock): - statements = [] - for stmt in elem.statements: - if isinstance(stmt, ReduceStatement): - newstatements = [] - x = elem.subgrid.x_range.start.value.value - y = elem.subgrid.y_range.start.value.value - router_id = (x, y) - stream_name = stmt.stream_name.name - receive_list = self._communication_patterns[stream_name][router_id][0] - send_list = self._communication_patterns[stream_name][router_id][1] - operation_id = reduce_operations[stmt.stream_name.name]['op'] - - if operation_id == 2: - operation_id = "OP_SUM" - - for receive in receive_list: - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=Identifier(name="reduce_runner", version=0))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=receive[3], - identifier=Identifier(name="reduce_receive", version=0)), - receive_stream=ReceiveGenerator(stream_name=Identifier(name=receive[4], version=0)), - body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=Identifier(name="reduce_runner", version=0))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=Identifier(name="reduce_runner", version=0))] - ) - ), - op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented - right=Expression( - value=Identifier(name="reduce_receive", version=0) - ) - ) - ) - ) - ], - completion_name=None - ) - ) - - for send in send_list: - newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=Identifier(name=send[4], version=0), - completion_name=None - ) - ) - - # add receive + calculation + send here - for new_statement in newstatements: - statements.append(new_statement) - - else: - statements.append(stmt) - - finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) - else: - finalbody.append(elem) - - self.body = finalbody - #exit() - - return None # Specialized visitors diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 21d50f24..8b2e6c38 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -1,4 +1,5 @@ from spatialstencil.syntax.spatial_ir import irnodes as spast, parser +from spatialstencil.optimizations.optimization_pass import optimization_pass import os @@ -93,7 +94,8 @@ def _rountrip_test(file): :return: """ program = parser.parse_file(file) - ir_1 = program.as_ir() + program_optimized = optimization_pass(program) + ir_1 = program_optimized.as_ir() print(ir_1) exit() program2 = parser.parse_string(ir_1) @@ -125,6 +127,16 @@ def test_simple_reduce_reference(): _rountrip_test(file) +def test_simple_reduce_three(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_three.sptl') + _rountrip_test(file) + + +def test_simple_reduce_four(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_four.sptl') + _rountrip_test(file) + + if __name__ == '__main__': # test_spatial_roundtrip_laplacian() # test_spatial_visitor() @@ -136,3 +148,5 @@ def test_simple_reduce_reference(): # test_spatial_bcast() test_simple_reduce() # test_simple_reduce_reference() + # test_simple_reduce_three() + # test_simple_reduce_four() \ No newline at end of file From 382440a1f4fd92a0c76526b341e9fc5e9f0468b0 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 28 Nov 2024 14:11:52 +0100 Subject: [PATCH 09/27] fix foreach loop to support _roundtrip_test and correct notation --- spatialstencil/optimizations/spatial_reduce.py | 2 +- tests/test_spatial_ir_parser.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 31dacbcf..8178cd2c 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -345,7 +345,7 @@ def change_compute_blocks(self) -> None: parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], - stream_variable=TypedIdentifier(dtype=con[2], + stream_variable=TypedIdentifier(dtype=con[2].dtype, identifier=self.versioning.next_version("reduce_receive")), receive_stream=ReceiveGenerator(stream_name=con[0]), body=[ diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 8b2e6c38..3ae8f77a 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -96,8 +96,8 @@ def _rountrip_test(file): program = parser.parse_file(file) program_optimized = optimization_pass(program) ir_1 = program_optimized.as_ir() - print(ir_1) - exit() + # print(ir_1) + # exit() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() assert ir_1 == ir_2 From e5e9b982c936f3f8ed1c26f0c940b14f9d99c2be Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Mon, 9 Dec 2024 16:33:33 +0100 Subject: [PATCH 10/27] basic snake communication pattern working - not exhaustively tested yet --- .gitignore | 4 + samples/spatial/simple_reduce.sptl | 24 - .../optimizations/spatial_reduce.py | 580 +++++++++++++----- tests/test_spatial_ir_parser.py | 4 +- 4 files changed, 430 insertions(+), 182 deletions(-) diff --git a/.gitignore b/.gitignore index 1bff436f..3b726fb8 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,7 @@ cython_debug/ #.idea/ /.vscode/ *.png + + +# custom run scripts +*.sh \ No newline at end of file diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 5adeab93..cff53b91 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -11,34 +11,10 @@ kernel @add() { graph = 1, op = 2 } - multistream red2 = reduce(1, 1) { - channels = auto, - graph = 1, - op = 2 - } - multistream red3 = reduce(2, 2) { - channels = auto, - graph = 1, - op = 2 - } - multistream red4 = reduce(3, 3) { - channels = auto, - graph = 1, - op = 2 - } - multistream red5 = reduce(4, 4) { - channels = auto, - graph = 1, - op = 2 - } } compute i16 i, i16 j in [0:5, 0:5] { a[0] = 1 await reduce(a, red) - await reduce(a, red2) - await reduce(a, red3) - await reduce(a, red4) - await reduce(a, red5) } } \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 8178cd2c..68272c8a 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -3,6 +3,7 @@ import spatialstencil.syntax.spatial_ir.irnodes as spa from spatialstencil.lowering.versioning import Versioning from spatialstencil.syntax.common.visitor import ScopedIRNodeVisitor, IRNodeVisitor +# TODO from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle # try ScopedIRNodeVisitor / IRNodeVisitor from spatialstencil.syntax.common.visitor to match nodes that have reduce in them @@ -13,7 +14,8 @@ class ReduceOptimizer(): body: list[PlaceBlock | DataflowBlock | ComputeBlock | Phase] _communication_patterns: Optional[dict[str, dict[tuple[int, int], list[list[list[int]]]]]] = None reduce_operations: dict[str, dict[str, Union[int, Literal['OP_SUM'], list[int]]]] = {} # needs to be adapted - relative_streams: dict[str, list[list]] = {} + grid_streams: dict[str, list[list]] = {} + snake_streams: dict[str, list[list]] = {} pipelined: dict[str, bool] = {} @@ -34,23 +36,99 @@ def reduce_subroutine(self) -> Kernel: def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name) -> None: + if x < x_start or x >= x_stop or y < y_start or y >= y_stop: + if x == x_stop or y == y_stop: + raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + + f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}." + + f" Remember that the stop value is exclusive.") + raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + + f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") communication = [] self.pipelined.update({name : False}) # not implemented yet - mode = 'grid' #'snake' # not implemented yet + mode = 'snake' # not implemented yet if mode == 'snake': - raise NotImplementedError - if x == x_start and y == y_start: - print('upper left corner') - elif x == x_stop - 1 and y == y_start: - print('upper right corner') - elif x == x_start and y == y_stop - 1: - print('lower left corner') - elif x == x_stop - 1 and y == y_stop - 1: - print('lower right corner') + if y == y_start: + if (y_stop - 1 - y_start) % 2 == 0: + # horizontal movement + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + + # vertical movement + if x == x_start: + # print('upper left corner odd') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop , 0, -1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 2]) + if x == x_stop - 1: + # print('upper right corner odd') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 2]) + else: + # horizontal movement + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + + # vertical movement + if x == x_start: + # print('upper left corner even') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 2]) + if x == x_stop - 1: + # print('upper right corner even') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 2]) + + elif y == y_stop - 1: + if (y_stop - 1 - y_start) % 2 == 0: + # horizontal movement + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + + # vertical movement + if x == x_start: + # print('lower left corner odd') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 2]) + if x == x_stop - 1: + # print('lower right corner odd') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 2]) + else: + # horizontal movement + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + + # vertical movement + if x == x_start: + # print('lower left corner even') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 2]) + if x == x_stop - 1: + # print('lower right corner even') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 2]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 2]) else: - raise NotImplementedError + raise NotImplementedError("Only the corners are implemented for 'snake'") + + self.snake_streams.update({name: communication}) - if mode == 'grid': + elif mode == 'grid': + # TODO add steps for pipelined communication if x == x_start: # horizontal movement if x_start == x_stop - 1: @@ -58,7 +136,7 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass else: # print('right to left') - communication.append([x_start, x_stop, y_start, y_stop, -1, 0]) + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) # vertical movement if y_start == y_stop - 1: @@ -66,14 +144,14 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper left corner') - communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1]) + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower left corner') - communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1]) + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) else: # print('left edge') - communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1]) - communication.append([x_start, x_start + 1, y, y_stop, 0, -1]) + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) elif x == x_stop - 1: # horizontal movement @@ -82,7 +160,7 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass else: # print('left to right') - communication.append([x_start, x_stop, y_start, y_stop, 1, 0]) + communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) # vertical movement if y_start == y_stop - 1: @@ -90,20 +168,20 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper right corner') - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1]) + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower right corner') - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1]) + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) else: # print('right edge') - communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1]) - communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1]) + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) else: # horizontal movement # print('middle') - communication.append([x_start, x + 1, y_start, y_stop, 1, 0]) # left to middle - communication.append([x, x_stop, y_start, y_stop, -1, 0]) # right to middle + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) # left to middle + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # right to middle # vertical movement if y_start == y_stop - 1: @@ -111,16 +189,20 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper edge') - communication.append([x, x + 1, y_start, y_stop, 0, -1]) + communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower edge') - communication.append([x, x + 1, y_start, y_stop, 0, 1]) + communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) else: # print('center') - communication.append([x, x + 1, y_start, y + 1, 0, 1]) - communication.append([x, x + 1, y, y_stop, 0, -1]) + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) - self.relative_streams.update({name : communication}) + self.grid_streams.update({name : communication}) + + else: + raise NotImplementedError(f"Communication mode '{mode}' is not implemented.") + return None @@ -141,26 +223,70 @@ def change_data_blocks(self) -> None: stmt.dy.value.value, stmt.stream_name.name) - self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.dx.value.value, stmt.dy.value.value]]}) - new_relative_streams = [] - for com in self.relative_streams[stmt.stream_name.name]: + self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.dx.value.value, stmt.dy.value.value], + [elem.subgrid.x_range.start.value.value, elem.subgrid.x_range.stop.value.value], + [elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value]]}) + new_grid_streams = [] + new_snake_streams = [] + + if stmt.stream_name.name in self.grid_streams: + current_grid_streams = self.grid_streams[stmt.stream_name.name] + elif stmt.stream_name.name in self.snake_streams: + current_grid_streams = self.snake_streams[stmt.stream_name.name] + + for com in current_grid_streams: newdataflobblocks.append([[com[0], com[1]], [com[2], com[3]], RelativeStreamDeclaration( dtype=StreamType(stmt.dtype.dtype), stream_name=self.versioning.next_version("reduce"), dx=Expression(ConstantLiteral(com[4], ScalarType.i32)), dy=Expression(ConstantLiteral(com[5], ScalarType.i32)) - )] + ), + [com[6], com[7]]], ) - if com[4] == -1: - new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left']) - elif com[4] == 1: - new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right']) - elif com[5] == -1: - new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top']) - elif com[5] == 1: - new_relative_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) - self.relative_streams.update({stmt.stream_name.name: new_relative_streams}) + if stmt.stream_name.name in self.grid_streams: + if com[4] == -1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left']) + elif com[4] == 1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right']) + elif com[5] == -1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top']) + elif com[5] == 1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) + elif stmt.stream_name.name in self.snake_streams: + if com[4] == -1: + unrolled_com = [] + for i in range(com[2], com[3]): + if (i - com[2]) % com[7] == 0: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left', 'horizontal', unrolled_com]) + elif com[4] == 1: + unrolled_com = [] + for i in range(com[2], com[3]): + if (i - com[2]) % com[7] == 0: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right', 'horizontal', unrolled_com]) + elif com[5] == -1: + unrolled_com = [] + for i in range(com[2], com[3]): + if (i - com[2]) % (com[7]) == 0: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'receiver']) + if (i - com[2]) % (com[7]) == 1: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'sender']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top', 'vertical', unrolled_com]) + elif com[5] == 1: + unrolled_com = [] + for i in range(com[2], com[3]): + if (i - com[2]) % (com[7]) == 0: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'sender']) + if (i - com[2]) % (com[7]) == 1: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'receiver']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) + + if stmt.stream_name.name in self.grid_streams: + self.grid_streams.update({stmt.stream_name.name: new_grid_streams}) + elif stmt.stream_name.name in self.snake_streams: + self.snake_streams.update({stmt.stream_name.name: new_snake_streams}) else: olddataflobblock.append(stmt) @@ -178,6 +304,9 @@ def change_data_blocks(self) -> None: ), stop=Expression( ConstantLiteral(newdataflobblock[0][1], ScalarType.i32) + ), + step=Expression( + ConstantLiteral(newdataflobblock[3][0], ScalarType.i32) ) ), y_range=RangeExpression( @@ -186,6 +315,9 @@ def change_data_blocks(self) -> None: ), stop=Expression( ConstantLiteral(newdataflobblock[1][1], ScalarType.i32) + ), + step=Expression( + ConstantLiteral(newdataflobblock[3][1], ScalarType.i32) ) ), ), @@ -213,69 +345,112 @@ def fix_subgrid(self) -> None: for stmt in elem.statements: # walk operators in baseclass if isinstance(stmt, ReduceStatement): stream_name = stmt.stream_name.name - connections = self.relative_streams[stream_name] - reduce_connections = [] - send_connections = [] - for con in connections: - if con[3] == 'left': - send_connections.append([con[1][1] - 1, con[1][1], con[1][2], con[1][3]]) - elif con[3] == 'right': - send_connections.append([con[1][0], con[1][0] + 1, con[1][2], con[1][3]]) - elif con[3] == 'top': - send_connections.append([con[1][0], con[1][1], con[1][3] - 1, con[1][3]]) - elif con[3] == 'bottom': - send_connections.append([con[1][0], con[1][1], con[1][2], con[1][2] + 1]) - reduce_connections.append(con[1]) - root = self.reduce_operations[stmt.stream_name.name][1] - for send in send_connections: - reduce_connections.append(send) - - reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) - - # needs to be tested properly - for com_grid in reduce_connections: - to_remove = [] - for sub_grid in grid: - if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: - # print("left") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) - grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: - # print("right") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) - grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: - # print("top") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) - grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: - # print("bottom") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) - grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) - to_remove.append(sub_grid) - # delete old unused - for rmv in to_remove: - grid.remove(rmv) + # test if stream_name is in grid_streams + if stmt.stream_name.name in self.grid_streams: + connections = self.grid_streams[stream_name] + reduce_connections = [] + send_connections = [] + for con in connections: + if con[3] == 'left': + send_connections.append([con[1][1] - 1, con[1][1], con[1][2], con[1][3]]) + elif con[3] == 'right': + send_connections.append([con[1][0], con[1][0] + 1, con[1][2], con[1][3]]) + elif con[3] == 'top': + send_connections.append([con[1][0], con[1][1], con[1][3] - 1, con[1][3]]) + elif con[3] == 'bottom': + send_connections.append([con[1][0], con[1][1], con[1][2], con[1][2] + 1]) + reduce_connections.append(con[1]) + root = self.reduce_operations[stmt.stream_name.name][1] + for send in send_connections: + reduce_connections.append(send) + + reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) + + # needs to be tested properly + for com_grid in reduce_connections: + to_remove = [] + for sub_grid in grid: + if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: + # print("left") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: + # print("right") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("top") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("bottom") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) + to_remove.append(sub_grid) + # delete old unused + for rmv in to_remove: + grid.remove(rmv) + + + # needs to be tested in combination with grid_streams + if self.snake_streams != {}: + new_grid = [] + complete_grid = [] + + for name in self.snake_streams: + complete_grid = [self.reduce_operations[name][2], self.reduce_operations[name][3]] + break + + list_grid = result = [[x] for x in grid] + + for com_grid in list_grid: + to_remove = [] + for com in com_grid: + if com[0][0] == complete_grid[0][0] and com[0][1] != complete_grid[0][0] + 1: + # print("left") + com_grid.append([[complete_grid[0][0], complete_grid[0][0] + 1], [com[1][0], com[1][1]]]) + com_grid.append([[complete_grid[0][0] + 1, com[0][1]], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[0][1] == complete_grid[0][1] and com[0][0] != complete_grid[0][1] - 1: + # print("right") + com_grid.append([[complete_grid[0][1] - 1, complete_grid[0][1]], [com[1][0], com[1][1]]]) + com_grid.append([[com[0][0], complete_grid[0][1] - 1], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[1][1] - com[1][0] != 1: + # print('multiple rows') + for i in range(com[1][0], com[1][1]): + com_grid.append([[com[0][0], com[0][1]], [i, i + 1]]) + to_remove.append(com) + + for rmv in to_remove: + com_grid.remove(rmv) + + for com in com_grid: + new_grid.append(com) + + grid = new_grid + + for com_grid in grid: newbody.append( ComputeBlock( @@ -323,69 +498,162 @@ def change_compute_blocks(self) -> None: stream_name = stmt.stream_name.name operation_id = self.reduce_operations[stmt.stream_name.name][0]['op'] root = self.reduce_operations[stmt.stream_name.name][1] - connections = self.relative_streams[stream_name] + complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] + + if stream_name in self.grid_streams: + connections = self.grid_streams[stream_name] + elif stream_name in self.snake_streams: + connections = self.snake_streams[stream_name] + else: + raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") if operation_id == 2: operation_id = "OP_SUM" - for con in connections: - if (current_position[0] >= con[1][0] - and current_position[1] <= con[1][1] - and current_position[2] >= con[1][2] - and current_position[3] <= con[1][3]): - - if (con[3] == 'left' and current_position[1] != con[1][1] - or con[3] == 'right' and current_position[0] != con[1][0] - or con[3] == 'top' and current_position[3] != con[1][3] - or con[3] == 'bottom' and current_position[2] != con[1][2]): - - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=con[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=con[0]), - body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + if stream_name in self.grid_streams: + for con in connections: + if (current_position[0] >= con[1][0] + and current_position[1] <= con[1][1] + and current_position[2] >= con[1][2] + and current_position[3] <= con[1][3]): + + if (con[3] == 'left' and current_position[1] != con[1][1] + or con[3] == 'right' and current_position[0] != con[1][0] + or con[3] == 'top' and current_position[3] != con[1][3] + or con[3] == 'bottom' and current_position[2] != con[1][2]): + + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=con[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=con[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented + right=Expression( + value=self.versioning.current_version("reduce_receive") ) - ), - op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented - right=Expression( - value=self.versioning.current_version("reduce_receive") ) ) ) - ) - ], - completion_name=None + ], + completion_name=None + ) ) - ) - if (con[3] == 'left' and current_position[0] != con[1][0] - or con[3] == 'right' and current_position[1] != con[1][1] - or con[3] == 'top' and current_position[2] != con[1][2] - or con[3] == 'bottom' and current_position[3] != con[1][3]): - - newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=con[0], - completion_name=None + if (con[3] == 'left' and current_position[0] != con[1][0] + or con[3] == 'right' and current_position[1] != con[1][1] + or con[3] == 'top' and current_position[2] != con[1][2] + or con[3] == 'bottom' and current_position[3] != con[1][3]): + + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=con[0], + completion_name=None + ) ) + + elif stream_name in self.snake_streams: + if not (((current_position[2] == complete_grid[1][0]) or (current_position[3] == complete_grid[1][1])) # top or bottom + and ((current_position[0] == complete_grid[0][0]) or (current_position[1] == complete_grid[0][1])) # left or right + and (root[1] != current_position[2]) # not the same vertical position as root + and ((((complete_grid[1][1] - complete_grid[1][0]) % 2 == 0) and (root[1] == current_position[0])) + or (((complete_grid[1][1] - complete_grid[1][0]) % 2 == 1) and not (root[1] == current_position[0])))): + # everything but the starting point receives first + + # get receive stream + receive_stream = None + for con in connections: + for detailed_con in con[5]: + if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] + and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] + and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) + or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) + or (detailed_con[4] == 0 and detailed_con[8] == 'receiver'))): + receive_stream = con + break + + if not receive_stream == None: + break + + # change receive statement + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented + right=Expression( + value=self.versioning.current_version("reduce_receive") + ) + ) + ) + ) + ], + completion_name=None ) + ) + + if not (current_position[0] == root[0] and current_position[2] == root[1]): + # only root does not send + + # get send stream + send_stream = None + for con in connections: + for detailed_con in con[5]: + if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] + and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] + and ((detailed_con[4] == 1 and not current_position[1] == detailed_con[1]) + or (detailed_con[4] == -1 and not current_position[0] == detailed_con[0]) + or (detailed_con[4] == 0 and detailed_con[8] == 'sender'))): + send_stream = con + break + + if not send_stream == None: + break + + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=send_stream[0], + completion_name=None + ) + ) # add receive + calculation + send here for new_statement in newstatements: diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 3ae8f77a..8b2e6c38 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -96,8 +96,8 @@ def _rountrip_test(file): program = parser.parse_file(file) program_optimized = optimization_pass(program) ir_1 = program_optimized.as_ir() - # print(ir_1) - # exit() + print(ir_1) + exit() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() assert ir_1 == ir_2 From 063a58397184e629fb8c44c7dd2f5ee178810033 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 9 Jan 2025 17:12:23 +0100 Subject: [PATCH 11/27] initial implementation of pipelined snake --- samples/spatial/bcast.sptl | 10 +- samples/spatial/simple_reduce.sptl | 6 +- .../optimizations/spatial_reduce.py | 491 ++++++++++++++---- spatialstencil/syntax/spatial_ir/irnodes.py | 20 +- .../syntax/spatial_ir/language.lark | 5 +- .../syntax/spatial_ir/lark_to_ir.py | 8 +- 6 files changed, 406 insertions(+), 134 deletions(-) diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl index 47f0f167..36aa8286 100644 --- a/samples/spatial/bcast.sptl +++ b/samples/spatial/bcast.sptl @@ -6,7 +6,7 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { } dataflow i16 i, i16 j in [0:N, 0:N] { - multistream bcast = broadcast(0, 0) { + multistream bcast = broadcast_stream(0, 0) { channels = auto } } @@ -34,4 +34,12 @@ kernel @add(stream readonly a_in, stream[N, N] writeonly out) { await send(a, out[i, j]) } + + + + + compute i16 i, i16 j in [0:N, 0:N] { + broadcast(a, bcast) + } + } diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index cff53b91..12d35273 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -7,9 +7,9 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce(0, 0) { - channels = auto, - graph = 1, - op = 2 + graph = snake, + op = S_SUM, + pipelined = true } } diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 68272c8a..08f55038 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1,4 +1,4 @@ -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument +from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement from typing import Union, Tuple, Optional, Literal import spatialstencil.syntax.spatial_ir.irnodes as spa from spatialstencil.lowering.versioning import Versioning @@ -35,7 +35,7 @@ def reduce_subroutine(self) -> Kernel: - def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name) -> None: + def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: if x < x_start or x >= x_stop or y < y_start or y >= y_stop: if x == x_stop or y == y_stop: raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + @@ -45,83 +45,172 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") communication = [] self.pipelined.update({name : False}) # not implemented yet - mode = 'snake' # not implemented yet + mode = graph + if pipelined: + print('pipelined communication is not implemented yet') if mode == 'snake': if y == y_start: if (y_stop - 1 - y_start) % 2 == 0: + # horizontal movement - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined: + if x_stop - x_start > 1: ## this should be handled differently + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + # effectively not pipelined in x direction as we have a column + # still pipelined in y direction + # communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) + # communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) + pass + else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined if x == x_start: # print('upper left corner odd') if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop , 0, -1, 1, 2]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop , 0, -1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) if x == x_stop - 1: # print('upper right corner odd') if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 2]) + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) else: + # horizontal movement - communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined: + if x_stop - x_start > 1: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + else: + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + else: + # effectively not pipelined in x direction as we have a column + # still pipelined in y direction + # communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 2, 2]) + # communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 2, 2]) + pass + else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined if x == x_start: # print('upper left corner even') if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 2]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) if x == x_stop - 1: # print('upper right corner even') if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 2]) + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: if (y_stop - 1 - y_start) % 2 == 0: + # horizontal movement - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined: + if x_stop - x_start > 1: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + # effectively not pipelined in x direction as we have a column + # still pipelined in y direction + # communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + # communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + pass + else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined if x == x_start: # print('lower left corner odd') if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 2]) + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) if x == x_stop - 1: # print('lower right corner odd') if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 2]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) else: + # horizontal movement - communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined: + if x_stop - x_start > 1: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + # effectively not pipelined in x direction as we have a column + # still pipelined in y direction + # communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 2, 2]) + # communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 2, 2]) + pass + else: + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined if x == x_start: # print('lower left corner even') if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 2]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) if x == x_stop - 1: # print('lower right corner even') if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 2]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 2]) + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) else: raise NotImplementedError("Only the corners are implemented for 'snake'") @@ -134,9 +223,19 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, if x_start == x_stop - 1: # print('no horizontal movement needed') pass - else: + elif x_stop - x_start == 2: # print('right to left') communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + else: + if pipelined: + if (x_stop - x_start) % 2 == 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1, 0, 2, 1]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1, 0, 2, 1]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 2, 1]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1, 0, 2, 1]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) # vertical movement if y_start == y_stop - 1: @@ -221,11 +320,15 @@ def change_data_blocks(self) -> None: elem.subgrid.y_range.stop.value.value, stmt.dx.value.value, stmt.dy.value.value, - stmt.stream_name.name) + stmt.stream_name.name, + stmt.routing.graph, + stmt.routing.pipelined) self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.dx.value.value, stmt.dy.value.value], [elem.subgrid.x_range.start.value.value, elem.subgrid.x_range.stop.value.value], - [elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value]]}) + [elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value], + [stmt.dx.value.value if (elem.subgrid.y_range.stop.value.value - elem.subgrid.y_range.start.value.value) % 2 == 0 else (elem.subgrid.x_range.stop.value.value - stmt.dx.value.value - 1), + elem.subgrid.y_range.stop.value.value - 1 if elem.subgrid.y_range.start.value.value == stmt.dy.value.value else elem.subgrid.y_range.start.value.value]]}) new_grid_streams = [] new_snake_streams = [] @@ -258,30 +361,44 @@ def change_data_blocks(self) -> None: unrolled_com = [] for i in range(com[2], com[3]): if (i - com[2]) % com[7] == 0: - unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) - new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left', 'horizontal', unrolled_com]) + if not stmt.routing.pipelined: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) + else: + for j in range(com[1], com[0], -1): + if j % 2 == com[1] % 2: + unrolled_com.append([j-1, j, i, i+1, com[4], com[5], com[6], com[7], 'sender']) + else: + unrolled_com.append([j-1, j, i, i+1, com[4], com[5], com[6], com[7], 'receiver']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left', 'horizontal', unrolled_com, stmt.routing.pipelined]) elif com[4] == 1: unrolled_com = [] for i in range(com[2], com[3]): if (i - com[2]) % com[7] == 0: - unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) - new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right', 'horizontal', unrolled_com]) + if not stmt.routing.pipelined: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7]]) + else: + for j in range(com[0], com[1]): + if j % 2 == com[0] % 2: + unrolled_com.append([j, j+1, i, i+1, com[4], com[5], com[6], com[7], 'sender']) + else: + unrolled_com.append([j, j+1, i, i+1, com[4], com[5], com[6], com[7], 'receiver']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right', 'horizontal', unrolled_com, stmt.routing.pipelined]) elif com[5] == -1: unrolled_com = [] - for i in range(com[2], com[3]): - if (i - com[2]) % (com[7]) == 0: - unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'receiver']) - if (i - com[2]) % (com[7]) == 1: - unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'sender']) - new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top', 'vertical', unrolled_com]) + for i in range(com[3], com[2], -1): + if i % 2 == com[3] % 2: + unrolled_com.append([com[0], com[1], i-1, i, com[4], com[5], com[6], com[7], 'sender']) + else: + unrolled_com.append([com[0], com[1], i-1, i, com[4], com[5], com[6], com[7], 'receiver']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top', 'vertical', unrolled_com, stmt.routing.pipelined]) elif com[5] == 1: unrolled_com = [] for i in range(com[2], com[3]): - if (i - com[2]) % (com[7]) == 0: + if i % 2 == com[2] % 2: unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'sender']) - if (i - com[2]) % (com[7]) == 1: + else: unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'receiver']) - new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) + new_snake_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom', 'vertical', unrolled_com, stmt.routing.pipelined]) if stmt.stream_name.name in self.grid_streams: self.grid_streams.update({stmt.stream_name.name: new_grid_streams}) @@ -416,37 +533,46 @@ def fix_subgrid(self) -> None: if self.snake_streams != {}: new_grid = [] complete_grid = [] + pipelined = False for name in self.snake_streams: complete_grid = [self.reduce_operations[name][2], self.reduce_operations[name][3]] + pipelined = self.snake_streams[name][0][6] break - list_grid = result = [[x] for x in grid] - - for com_grid in list_grid: - to_remove = [] - for com in com_grid: - if com[0][0] == complete_grid[0][0] and com[0][1] != complete_grid[0][0] + 1: - # print("left") - com_grid.append([[complete_grid[0][0], complete_grid[0][0] + 1], [com[1][0], com[1][1]]]) - com_grid.append([[complete_grid[0][0] + 1, com[0][1]], [com[1][0], com[1][1]]]) - to_remove.append(com) - elif com[0][1] == complete_grid[0][1] and com[0][0] != complete_grid[0][1] - 1: - # print("right") - com_grid.append([[complete_grid[0][1] - 1, complete_grid[0][1]], [com[1][0], com[1][1]]]) - com_grid.append([[com[0][0], complete_grid[0][1] - 1], [com[1][0], com[1][1]]]) - to_remove.append(com) - elif com[1][1] - com[1][0] != 1: - # print('multiple rows') - for i in range(com[1][0], com[1][1]): - com_grid.append([[com[0][0], com[0][1]], [i, i + 1]]) - to_remove.append(com) - - for rmv in to_remove: - com_grid.remove(rmv) - - for com in com_grid: - new_grid.append(com) + if not pipelined: + + list_grid = [[x] for x in grid] + + for com_grid in list_grid: + to_remove = [] + for com in com_grid: + if com[0][0] == complete_grid[0][0] and com[0][1] != complete_grid[0][0] + 1: + # print("left") + com_grid.append([[complete_grid[0][0], complete_grid[0][0] + 1], [com[1][0], com[1][1]]]) + com_grid.append([[complete_grid[0][0] + 1, com[0][1]], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[0][1] == complete_grid[0][1] and com[0][0] != complete_grid[0][1] - 1: + # print("right") + com_grid.append([[complete_grid[0][1] - 1, complete_grid[0][1]], [com[1][0], com[1][1]]]) + com_grid.append([[com[0][0], complete_grid[0][1] - 1], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[1][1] - com[1][0] != 1: + # print('multiple rows') + for i in range(com[1][0], com[1][1]): + com_grid.append([[com[0][0], com[0][1]], [i, i + 1]]) + to_remove.append(com) + + for rmv in to_remove: + com_grid.remove(rmv) + + for com in com_grid: + new_grid.append(com) + + else: + for i in range(grid[0][0][0], grid[0][0][1]): + for j in range(grid[0][1][0], grid[0][1][1]): + new_grid.append([[i, i + 1], [j, j + 1]]) grid = new_grid @@ -498,6 +624,7 @@ def change_compute_blocks(self) -> None: stream_name = stmt.stream_name.name operation_id = self.reduce_operations[stmt.stream_name.name][0]['op'] root = self.reduce_operations[stmt.stream_name.name][1] + origin = self.reduce_operations[stmt.stream_name.name][4] complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] if stream_name in self.grid_streams: @@ -507,9 +634,6 @@ def change_compute_blocks(self) -> None: else: raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") - if operation_id == 2: - operation_id = "OP_SUM" - if stream_name in self.grid_streams: for con in connections: if (current_position[0] >= con[1][0] @@ -522,6 +646,13 @@ def change_compute_blocks(self) -> None: or con[3] == 'top' and current_position[3] != con[1][3] or con[3] == 'bottom' and current_position[2] != con[1][2]): + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + newstatements.append( ForeachStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -545,7 +676,7 @@ def change_compute_blocks(self) -> None: indices=[Expression(value=self.versioning.current_version("reduce_runner"))] ) ), - op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented + op= current_op, right=Expression( value=self.versioning.current_version("reduce_receive") ) @@ -571,11 +702,7 @@ def change_compute_blocks(self) -> None: ) elif stream_name in self.snake_streams: - if not (((current_position[2] == complete_grid[1][0]) or (current_position[3] == complete_grid[1][1])) # top or bottom - and ((current_position[0] == complete_grid[0][0]) or (current_position[1] == complete_grid[0][1])) # left or right - and (root[1] != current_position[2]) # not the same vertical position as root - and ((((complete_grid[1][1] - complete_grid[1][0]) % 2 == 0) and (root[1] == current_position[0])) - or (((complete_grid[1][1] - complete_grid[1][0]) % 2 == 1) and not (root[1] == current_position[0])))): + if not (current_position[0] == origin[0] and current_position[2] == origin[1]): # everything but the starting point receives first # get receive stream @@ -586,48 +713,105 @@ def change_compute_blocks(self) -> None: and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) - or (detailed_con[4] == 0 and detailed_con[8] == 'receiver'))): + or (detailed_con[4] == 0 and detailed_con[8] == 'receiver') + or (con[6] == True and detailed_con[8] == 'receiver'))): receive_stream = con break if not receive_stream == None: break + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + # change receive statement - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), - body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + + # not pipelined + if not con[6]: + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("reduce_receive") ) - ), - op= '+' if operation_id == "OP_SUM" else '-----', # other operations not implemented - right=Expression( - value=self.versioning.current_version("reduce_receive") ) ) ) + ], + completion_name=None + ) + ) + + # pipelined root + elif (current_position[0] == root[0] and current_position[2] == root[1]): + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) ) - ], - completion_name=None + ) + ) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=receive_stream[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) ) - ) if not (current_position[0] == root[0] and current_position[2] == root[1]): # only root does not send @@ -640,20 +824,99 @@ def change_compute_blocks(self) -> None: and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] and ((detailed_con[4] == 1 and not current_position[1] == detailed_con[1]) or (detailed_con[4] == -1 and not current_position[0] == detailed_con[0]) - or (detailed_con[4] == 0 and detailed_con[8] == 'sender'))): + or (detailed_con[4] == 0 and detailed_con[8] == 'sender') + or (con[6] == True and detailed_con[8] == 'sender'))): send_stream = con break if not send_stream == None: break - newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=send_stream[0], - completion_name=None + # not pipelined + if not con[6]: + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=send_stream[0], + completion_name=None + ) + ) + + # pipelined origin + elif (current_position[0] == origin[0] and current_position[2] == origin[1]): + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=send_stream[0], + completion_name=None + ) + ], + ) + ) + + + # pipelined + else: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=receive_stream[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=send_stream[0], + completion_name=None + ) + ], + ) ) - ) # add receive + calculation + send here for new_statement in newstatements: diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index cd0ead0e..8d04233b 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -435,25 +435,19 @@ class ReduceRoutingDeclaration(SpatialNode): """ A routing declaration for a reduce, optionally specifying hops and channel. """ - channels: Union[int, Literal["auto"]] = "auto" # Channel ID or 'auto' - graph: int = 0 - op: int = 0 + graph: str = '' + op: str = '' + pipelined: bool = False def validate(self) -> None: - # test this for self.channel != "auto" - if isinstance(self.channels, Tree): - self.channels = self.channels.data - - assert isinstance(self.graph, int) - assert isinstance(self.op, int) + assert isinstance(self.graph, str) + assert isinstance(self.op, str) + assert isinstance(self.pipelined, bool) def as_ir(self, indent: int = 0) -> str: indent_str = ' ' * indent - channels_str = "auto" if self.channels == "auto" else str(self.channels) - graph_str = str(self.graph) - op_str = str(self.op) - return f"{indent_str}channels = {channels_str},\n{indent_str}graph = {graph_str},\n{indent_str}op = {op_str}" + return f"{indent_str}graph = {self.graph},\n{indent_str}op = {self.op},\n{indent_str}pipelined = {self.pipelined}" @dataclass diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spatialstencil/syntax/spatial_ir/language.lark index 169c10fa..c052cc87 100644 --- a/spatialstencil/syntax/spatial_ir/language.lark +++ b/spatialstencil/syntax/spatial_ir/language.lark @@ -18,7 +18,8 @@ hexadecimal_literal : "0x" hex_digits negated_integer_literal : "-" integer_literal ?posneg_integer_literal : integer_literal | negated_integer_literal float_literal : /[-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?/ -string_literal : ESCAPED_STRING +non_escaped_string : letters (underscore letters)* +string_literal : ESCAPED_STRING | non_escaped_string ?constant_literal : bool_literal | posneg_integer_literal | float_literal | string_literal // Identifier syntax (loosely following MLIR conventions) @@ -116,7 +117,7 @@ hop : "(" posneg_integer_literal "," posneg_integer_literal ")" // 2D at the mo hops : "[" hop ("," hop)* "]" routing : "hops" "=" (auto | hops) "," "channel" "=" (auto | integer_literal) broadcast_routing : "channels" "=" (auto | integer_literal) -reduce_routing : "channels" "=" (auto) "," "graph" "=" (auto | integer_literal) "," "op" "=" integer_literal +reduce_routing : "graph" "=" (auto | constant_literal) "," "op" "=" constant_literal "," "pipelined" "=" bool_literal field_declaration : builtin_type identifier (";")? //(";" | NEWLINE) stream_declaration : classic_stream | mul_stream diff --git a/spatialstencil/syntax/spatial_ir/lark_to_ir.py b/spatialstencil/syntax/spatial_ir/lark_to_ir.py index 56757100..6a5c51e5 100644 --- a/spatialstencil/syntax/spatial_ir/lark_to_ir.py +++ b/spatialstencil/syntax/spatial_ir/lark_to_ir.py @@ -36,7 +36,13 @@ def hexadecimal_literal(self, *digits): @lark.v_args(inline=True) def string_literal(self, s): - return irnodes.StringLiteral(s[1:-1].replace('\\"', '"')) + if type(s).__name__ == 'Tree': + combined_string = '' + for i in range(len(s.children)): + combined_string += s.children[i] + return combined_string + else: + return irnodes.StringLiteral(s[1:-1].replace('\\"', '"')) @lark.v_args(inline=True) def bare_id(self, *elements): From a8bb3e306fdde55ee931ce1cc6211efd5fa8068c Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 10 Jan 2025 15:43:08 +0100 Subject: [PATCH 12/27] add pipeline for grid (not fully tested / not optimized) --- samples/spatial/simple_reduce.sptl | 4 +- .../optimizations/spatial_reduce.py | 1344 ++++++++++++++--- 2 files changed, 1116 insertions(+), 232 deletions(-) diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 12d35273..9cbb288a 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -6,8 +6,8 @@ kernel @add() { } dataflow i16 i, i16 j in [0:5, 0:5] { - multistream red = reduce(0, 0) { - graph = snake, + multistream red = reduce(1, 1) { + graph = grid, op = S_SUM, pipelined = true } diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 08f55038..6f8951bc 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -46,14 +46,13 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, communication = [] self.pipelined.update({name : False}) # not implemented yet mode = graph - if pipelined: - print('pipelined communication is not implemented yet') if mode == 'snake': if y == y_start: if (y_stop - 1 - y_start) % 2 == 0: # horizontal movement if pipelined: + # not completely correct for small subgrids if x_stop - x_start > 1: ## this should be handled differently if (x_stop - x_start) % 2 != 0: communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) @@ -229,13 +228,14 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, else: if pipelined: if (x_stop - x_start) % 2 == 0: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1, 0, 2, 1]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1, 0, 2, 1]) + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) else: - communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 2, 1]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1, 0, 2, 1]) + communication.append([x_start, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) else: communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + # TODO add steps for pipelined communication from here # vertical movement if y_start == y_stop - 1: @@ -243,23 +243,75 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper left corner') - communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower left corner') - communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) else: # print('left edge') - communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) + if not pipelined: + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) + else: + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x_start, x_start + 1, y_start, y, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y + 1, 0, 1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y + 1, y_stop, 0, -1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) + + elif x == x_stop - 1: # horizontal movement if x_start == x_stop - 1: # print('no horizontal movement needed') pass - else: + elif x_stop - x_start == 2: # print('left to right') communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) + else: + # print('left to right') + if pipelined: + if (x_stop - x_start) % 2 == 0: + communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x_stop, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) # vertical movement if y_start == y_stop - 1: @@ -267,20 +319,82 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper right corner') - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower right corner') - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) else: # print('right edge') - communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) + if not pipelined: + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) + else: + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x_stop - 1, x_stop, y_start, y, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y + 1, 0, 1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y + 1, y_stop, 0, -1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) else: # horizontal movement # print('middle') - communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) # left to middle - communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # right to middle + if not pipelined: + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) # left to middle + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # right to middle + else: + # left + if (x - x_start) >= 2: # x is inclusive while x_stop is exclusive + if (x - x_start) % 2 == 0: + communication.append([x_start, x, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x + 1, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) + + # right + if (x_stop - x) > 2: + if (x_stop - x) % 2 == 0: + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # vertical movement if y_start == y_stop - 1: @@ -288,14 +402,53 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, pass elif y == y_start: # print('upper edge') - communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x, x + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: # print('lower edge') - communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) + if not pipelined or y_stop - y_start <= 2: + communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: + communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) + else: + communication.append([x, x + 1, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop, 0, 1, 1, 1]) else: # print('center') - communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) + if not pipelined: + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) + else: + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x, x + 1, y_start, y, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y + 1, 0, 1, 1, 1]) + else: + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) + communication.append([x, x + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x, x + 1, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x, x + 1, y + 1, y_stop, 0, -1, 1, 1]) + else: + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) self.grid_streams.update({name : communication}) @@ -348,14 +501,52 @@ def change_data_blocks(self) -> None: [com[6], com[7]]], ) if stmt.stream_name.name in self.grid_streams: - if com[4] == -1: - new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left']) - elif com[4] == 1: - new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right']) - elif com[5] == -1: - new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top']) - elif com[5] == 1: - new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom']) + if not stmt.routing.pipelined: + if com[4] == -1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'left', stmt.routing.pipelined]) + elif com[4] == 1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'right', stmt.routing.pipelined]) + elif com[5] == -1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'top', stmt.routing.pipelined]) + elif com[5] == 1: + new_grid_streams.append([self.versioning.current_version("reduce"), com, StreamType(stmt.dtype.dtype), 'bottom', stmt.routing.pipelined]) + else: + if com[4] == -1: + unrolled_com = [] + for i in range(com[1], com[0], -1): + if i % 2 == com[1] % 2: + for j in range(com[2], com[3]): + unrolled_com.append([i-1, i, j, j + 1, com[4], com[5], com[6], com[7], 'sender']) + else: + for j in range(com[2], com[3]): + unrolled_com.append([i-1, i, j, j + 1, com[4], com[5], com[6], com[7], 'receiver']) + new_grid_streams.append([self.versioning.current_version("reduce"), unrolled_com, StreamType(stmt.dtype.dtype), 'left', stmt.routing.pipelined]) + elif com[4] == 1: + unrolled_com = [] + for i in range(com[0], com[1]): + if i % 2 == com[0] % 2: + for j in range(com[2], com[3]): + unrolled_com.append([i, i+1, j, j + 1, com[4], com[5], com[6], com[7], 'sender']) + else: + for j in range(com[2], com[3]): + unrolled_com.append([i, i+1, j, j + 1, com[4], com[5], com[6], com[7], 'receiver']) + new_grid_streams.append([self.versioning.current_version("reduce"), unrolled_com, StreamType(stmt.dtype.dtype), 'right', stmt.routing.pipelined]) + elif com[5] == -1: + unrolled_com = [] + for i in range(com[3], com[2], -1): + if i % 2 == com[3] % 2: + unrolled_com.append([com[0], com[1], i-1, i, com[4], com[5], com[6], com[7], 'sender']) + else: + unrolled_com.append([com[0], com[1], i-1, i, com[4], com[5], com[6], com[7], 'receiver']) + new_grid_streams.append([self.versioning.current_version("reduce"), unrolled_com, StreamType(stmt.dtype.dtype), 'top', stmt.routing.pipelined]) + elif com[5] == 1: + unrolled_com = [] + for i in range(com[2], com[3]): + if i % 2 == com[2] % 2: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'sender']) + else: + unrolled_com.append([com[0], com[1], i, i+1, com[4], com[5], com[6], com[7], 'receiver']) + new_grid_streams.append([self.versioning.current_version("reduce"), unrolled_com, StreamType(stmt.dtype.dtype), 'bottom', stmt.routing.pipelined]) elif stmt.stream_name.name in self.snake_streams: if com[4] == -1: unrolled_com = [] @@ -466,67 +657,79 @@ def fix_subgrid(self) -> None: # test if stream_name is in grid_streams if stmt.stream_name.name in self.grid_streams: connections = self.grid_streams[stream_name] - reduce_connections = [] - send_connections = [] - for con in connections: - if con[3] == 'left': - send_connections.append([con[1][1] - 1, con[1][1], con[1][2], con[1][3]]) - elif con[3] == 'right': - send_connections.append([con[1][0], con[1][0] + 1, con[1][2], con[1][3]]) - elif con[3] == 'top': - send_connections.append([con[1][0], con[1][1], con[1][3] - 1, con[1][3]]) - elif con[3] == 'bottom': - send_connections.append([con[1][0], con[1][1], con[1][2], con[1][2] + 1]) - reduce_connections.append(con[1]) - root = self.reduce_operations[stmt.stream_name.name][1] - for send in send_connections: - reduce_connections.append(send) - - reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) - - # needs to be tested properly - for com_grid in reduce_connections: - to_remove = [] - for sub_grid in grid: - if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: - # print("left") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) - grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: - # print("right") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) - grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: - # print("top") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) - grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) - to_remove.append(sub_grid) - elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: - # print("bottom") - sub_x_start = sub_grid[0][0] - sub_x_stop = sub_grid[0][1] - sub_y_start = sub_grid[1][0] - sub_y_stop = sub_grid[1][1] - grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) - grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) - to_remove.append(sub_grid) - # delete old unused - for rmv in to_remove: - grid.remove(rmv) + + if not connections[0][4]: + #not pipelined + reduce_connections = [] + send_connections = [] + for con in connections: + if con[3] == 'left': + send_connections.append([con[1][1] - 1, con[1][1], con[1][2], con[1][3]]) + elif con[3] == 'right': + send_connections.append([con[1][0], con[1][0] + 1, con[1][2], con[1][3]]) + elif con[3] == 'top': + send_connections.append([con[1][0], con[1][1], con[1][3] - 1, con[1][3]]) + elif con[3] == 'bottom': + send_connections.append([con[1][0], con[1][1], con[1][2], con[1][2] + 1]) + reduce_connections.append(con[1]) + root = self.reduce_operations[stmt.stream_name.name][1] + for send in send_connections: + reduce_connections.append(send) + + reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) + + # needs to be tested properly + for com_grid in reduce_connections: + to_remove = [] + for sub_grid in grid: + if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: + # print("left") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: + # print("right") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("top") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("bottom") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) + to_remove.append(sub_grid) + # delete old unused + for rmv in to_remove: + grid.remove(rmv) + + else: + #pipelined + new_grid = [] + for i in range(grid[0][0][0], grid[0][0][1]): + for j in range(grid[0][1][0], grid[0][1][1]): + new_grid.append([[i, i + 1], [j, j + 1]]) + grid = new_grid + # needs to be tested in combination with grid_streams @@ -633,36 +836,115 @@ def change_compute_blocks(self) -> None: connections = self.snake_streams[stream_name] else: raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") + + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") if stream_name in self.grid_streams: - for con in connections: - if (current_position[0] >= con[1][0] - and current_position[1] <= con[1][1] - and current_position[2] >= con[1][2] - and current_position[3] <= con[1][3]): - - if (con[3] == 'left' and current_position[1] != con[1][1] - or con[3] == 'right' and current_position[0] != con[1][0] - or con[3] == 'top' and current_position[3] != con[1][3] - or con[3] == 'bottom' and current_position[2] != con[1][2]): - - if operation_id == "S_SUM": - current_op = '+' - elif operation_id == "S_PROD": - current_op = '*' - else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + pipelined_send = [] + pipelined_receive = [] + if not connections[0][4]: + # not pipelined + for con in connections: + if (current_position[0] >= con[1][0] + and current_position[1] <= con[1][1] + and current_position[2] >= con[1][2] + and current_position[3] <= con[1][3]): + + if (con[3] == 'left' and current_position[1] != con[1][1] + or con[3] == 'right' and current_position[0] != con[1][0] + or con[3] == 'top' and current_position[3] != con[1][3] + or con[3] == 'bottom' and current_position[2] != con[1][2]): + + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=con[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=con[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("reduce_receive") + ) + ) + ) + ) + ], + completion_name=None + ) + ) + + if (con[3] == 'left' and current_position[0] != con[1][0] + or con[3] == 'right' and current_position[1] != con[1][1] + or con[3] == 'top' and current_position[2] != con[1][2] + or con[3] == 'bottom' and current_position[3] != con[1][3]): + + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=con[0], + completion_name=None + ) + ) + + else: + print(current_position) + print(root) + for con_list in connections: + #print(con_list) + for con in con_list[1]: + if (current_position[0] >= con[0] and current_position[1] <= con[1] + and current_position[2] >= con[2] and current_position[3] <= con[3]): + print(con) + if con[8] == 'sender': + pipelined_send.append(con_list[0]) + elif con[8] == 'receiver': + pipelined_receive.append(con_list[0]) + + if pipelined_send != [] and pipelined_receive != []: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + if len(pipelined_receive) == 1: newstatements.append( - ForeachStatement( + ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], - stream_variable=TypedIdentifier(dtype=con[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=con[0]), body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), AssignmentStatement( destination=ArraySlice( array=stmt.local_array, @@ -678,141 +960,743 @@ def change_compute_blocks(self) -> None: ), op= current_op, right=Expression( - value=self.versioning.current_version("reduce_receive") + value=self.versioning.current_version("pipeline_helper") ) ) ) + ), + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[0], + completion_name=None ) ], - completion_name=None ) ) - - if (con[3] == 'left' and current_position[0] != con[1][0] - or con[3] == 'right' and current_position[1] != con[1][1] - or con[3] == 'top' and current_position[2] != con[1][2] - or con[3] == 'bottom' and current_position[3] != con[1][3]): - + elif len(pipelined_receive) == 2: newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=con[0], - completion_name=None - ) - ) - - elif stream_name in self.snake_streams: - if not (current_position[0] == origin[0] and current_position[2] == origin[1]): - # everything but the starting point receives first - - # get receive stream - receive_stream = None - for con in connections: - for detailed_con in con[5]: - if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] - and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] - and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) - or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) - or (detailed_con[4] == 0 and detailed_con[8] == 'receiver') - or (con[6] == True and detailed_con[8] == 'receiver'))): - receive_stream = con - break - - if not receive_stream == None: - break - - if operation_id == "S_SUM": - current_op = '+' - elif operation_id == "S_PROD": - current_op = '*' - else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") - - # change receive statement - - # not pipelined - if not con[6]: - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), - body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("reduce_receive") ) ) - ) - ) - ], - completion_name=None - ) - ) - - # pipelined root - elif (current_position[0] == root[0] and current_position[2] == root[1]): - newstatements.append( - AssignmentStatement( - destination=self.versioning.next_version("pipeline_helper"), - source=Expression( - ConstantLiteral(0, ScalarType.i32) - ) - ) - ) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=receive_stream[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") ) ) + ), + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[0], + completion_name=None ) - ) - ], + ], + ) ) - ) - + elif len(pipelined_receive) == 3: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[2], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[0], + completion_name=None + ) + ], + ) + ) + else: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[2], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[3], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[0], + completion_name=None + ) + ], + ) + ) + elif pipelined_send == [] and pipelined_receive != []: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + if len(pipelined_receive) == 1: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) + ) + elif len(pipelined_receive) == 2: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) + ) + elif len(pipelined_receive) == 3: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[2], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) + ) + else: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[1], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[2], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ), + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[3], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) + ) + elif pipelined_send != [] and pipelined_receive == []: + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[0], + completion_name=None + ) + ], + ) + ) + else: + raise ValueError(f"No pipelined send or receive found for position {current_position}.") + + + + elif stream_name in self.snake_streams: + if not (current_position[0] == origin[0] and current_position[2] == origin[1]): + # everything but the starting point receives first + + # get receive stream + receive_stream = None + for con in connections: + for detailed_con in con[5]: + if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] + and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] + and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) + or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) + or (detailed_con[4] == 0 and detailed_con[8] == 'receiver') + or (con[6] == True and detailed_con[8] == 'receiver'))): + receive_stream = con + break + + if not receive_stream == None: + break + + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + + # change receive statement + + # not pipelined + if not con[6]: + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), + body=[ + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("reduce_receive") + ) + ) + ) + ) + ], + completion_name=None + ) + ) + + # pipelined root + elif (current_position[0] == root[0] and current_position[2] == root[1]): + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + body=[ + ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=receive_stream[0], + completion_name=None + ), + AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=self.versioning.current_version("pipeline_helper") + ) + ) + ) + ) + ], + ) + ) + if not (current_position[0] == root[0] and current_position[2] == root[1]): # only root does not send @@ -917,7 +1801,7 @@ def change_compute_blocks(self) -> None: ], ) ) - + # add receive + calculation + send here for new_statement in newstatements: statements.append(new_statement) From 0548e91a09b1391d1e922258e6c675a77d776acb Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 10 Jan 2025 15:53:47 +0100 Subject: [PATCH 13/27] fix bug with pipelined snake on small grids (wrong communication pattern) --- samples/spatial/simple_reduce.sptl | 10 +- .../optimizations/spatial_reduce.py | 110 +++++++----------- 2 files changed, 46 insertions(+), 74 deletions(-) diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 9cbb288a..db17e189 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -1,19 +1,19 @@ kernel @add() { - place i16 i, i16 j in [0:5, 0:5] { + place i16 i, i16 j in [0:2, 0:5] { i16[1] a } - dataflow i16 i, i16 j in [0:5, 0:5] { - multistream red = reduce(1, 1) { - graph = grid, + dataflow i16 i, i16 j in [0:2, 0:5] { + multistream red = reduce(0, 0) { + graph = snake, op = S_SUM, pipelined = true } } - compute i16 i, i16 j in [0:5, 0:5] { + compute i16 i, i16 j in [0:2, 0:5] { a[0] = 1 await reduce(a, red) } diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 6f8951bc..c6d4b563 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -51,25 +51,17 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, if (y_stop - 1 - y_start) % 2 == 0: # horizontal movement - if pipelined: - # not completely correct for small subgrids - if x_stop - x_start > 1: ## this should be handled differently - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: - # effectively not pipelined in x direction as we have a column - # still pipelined in y direction - # communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) - # communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) - pass + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: if x_stop - x_start > 1: communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) @@ -92,24 +84,17 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, else: # horizontal movement - if pipelined: - if x_stop - x_start > 1: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) else: - # effectively not pipelined in x direction as we have a column - # still pipelined in y direction - # communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 2, 2]) - # communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 2, 2]) - pass + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) else: if x_stop - x_start > 1: communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) @@ -134,24 +119,17 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, if (y_stop - 1 - y_start) % 2 == 0: # horizontal movement - if pipelined: - if x_stop - x_start > 1: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: - # effectively not pipelined in x direction as we have a column - # still pipelined in y direction - # communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - # communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - pass + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: if x_stop - x_start > 1: communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) @@ -174,24 +152,18 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, else: # horizontal movement - if pipelined: - if x_stop - x_start > 1: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if pipelined and x_stop - x_start > 1: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: - # effectively not pipelined in x direction as we have a column - # still pipelined in y direction - # communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 2, 2]) - # communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 2, 2]) - pass + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) From 33b70fc9e6e7269945f306e74535a7a4f756ffa4 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Wed, 15 Jan 2025 20:18:09 +0100 Subject: [PATCH 14/27] first simplification by factoring out Send, Receive and Assign Statements --- samples/spatial/simple_reduce.sptl | 8 +- .../optimizations/spatial_reduce.py | 787 ++++-------------- 2 files changed, 145 insertions(+), 650 deletions(-) diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index db17e189..f9e94c1b 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -1,19 +1,19 @@ kernel @add() { - place i16 i, i16 j in [0:2, 0:5] { + place i16 i, i16 j in [0:5, 0:5] { i16[1] a } - dataflow i16 i, i16 j in [0:2, 0:5] { + dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce(0, 0) { - graph = snake, + graph = grid, op = S_SUM, pipelined = true } } - compute i16 i, i16 j in [0:2, 0:5] { + compute i16 i, i16 j in [0:5, 0:5] { a[0] = 1 await reduce(a, red) } diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index c6d4b563..14f65661 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -33,6 +33,51 @@ def reduce_subroutine(self) -> Kernel: self.change_compute_blocks() return Kernel(name=self.name, parameters=self.parameters, arguments=self.arguments, body=self.body) + + + def create_send_statement(self, stmt, pipelined_send, index) -> SendStatement: + send = SendStatement( + local_array=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + stream_name=pipelined_send[index], + completion_name=None + ) + return send + + def create_receive_statement(self, stmt, pipelined_receive, index) -> ReceiveStatement: + receive = ReceiveStatement( + local_array=self.versioning.current_version("pipeline_helper"), + stream_name=pipelined_receive[index], + completion_name=None + ) + return receive + + + def create_binary_operation(self, stmt, current_op, rhs) -> AssignmentStatement: + bin_op = AssignmentStatement( + destination=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ), + source=Expression( + BinaryOperator( + left=Expression( + value=ArraySlice( + array=stmt.local_array, + indices=[Expression(value=self.versioning.current_version("reduce_runner"))] + ) + ), + op= current_op, + right=Expression( + value=rhs + ) + ) + ) + ) + return bin_op + def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: @@ -832,6 +877,7 @@ def change_compute_blocks(self) -> None: or con[3] == 'top' and current_position[3] != con[1][3] or con[3] == 'bottom' and current_position[2] != con[1][2]): + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) newstatements.append( ForeachStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -842,26 +888,7 @@ def change_compute_blocks(self) -> None: identifier=self.versioning.next_version("reduce_receive")), receive_stream=ReceiveGenerator(stream_name=con[0]), body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("reduce_receive") - ) - ) - ) - ) + bin_op ], completion_name=None ) @@ -905,6 +932,9 @@ def change_compute_blocks(self) -> None: ) ) if len(pipelined_receive) == 1: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -912,43 +942,17 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=pipelined_send[0], - completion_name=None - ) + receive0, + bin_op, + send ], ) ) elif len(pipelined_receive) == 2: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -956,68 +960,20 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=pipelined_send[0], - completion_name=None - ) + receive0, + bin_op, + receive1, + bin_op, + send ], ) ) elif len(pipelined_receive) == 3: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1025,93 +981,23 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[2], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=pipelined_send[0], - completion_name=None - ) + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + send ], ) ) else: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1119,114 +1005,15 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[2], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[3], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=pipelined_send[0], - completion_name=None - ) + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + receive3, + bin_op, + send ], ) ) @@ -1240,6 +1027,8 @@ def change_compute_blocks(self) -> None: ) ) if len(pipelined_receive) == 1: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1247,35 +1036,15 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ) + receive0, + bin_op ], ) ) elif len(pipelined_receive) == 2: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1283,60 +1052,18 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ) + receive0, + bin_op, + receive1, + bin_op ], ) ) elif len(pipelined_receive) == 3: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1344,85 +1071,21 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[2], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ) + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op ], ) ) else: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1430,110 +1093,19 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[1], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[2], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=pipelined_receive[3], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ) + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + receive3, + bin_op ], ) ) elif pipelined_send != [] and pipelined_receive == []: + send = self.create_send_statement(stmt, pipelined_send, 0) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1541,14 +1113,7 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=pipelined_send[0], - completion_name=None - ) + send ], ) ) @@ -1588,6 +1153,7 @@ def change_compute_blocks(self) -> None: # not pipelined if not con[6]: + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) newstatements.append( ForeachStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1598,26 +1164,7 @@ def change_compute_blocks(self) -> None: identifier=self.versioning.next_version("reduce_receive")), receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), body=[ - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("reduce_receive") - ) - ) - ) - ) + bin_op ], completion_name=None ) @@ -1633,6 +1180,9 @@ def change_compute_blocks(self) -> None: ) ) ) + + receive0 = self.create_receive_statement(stmt, receive_stream, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1640,31 +1190,8 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=receive_stream[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ) + receive0, + bin_op ], ) ) @@ -1700,6 +1227,7 @@ def change_compute_blocks(self) -> None: # pipelined origin elif (current_position[0] == origin[0] and current_position[2] == origin[1]): + send0 = self.create_send_statement(stmt, send_stream, 0) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1707,14 +1235,7 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=send_stream[0], - completion_name=None - ) + send0 ], ) ) @@ -1730,6 +1251,10 @@ def change_compute_blocks(self) -> None: ) ) ) + + send0 = self.create_send_statement(stmt, send_stream, 0) + receive0 = self.create_receive_statement(stmt, receive_stream, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) newstatements.append( ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], @@ -1737,39 +1262,9 @@ def change_compute_blocks(self) -> None: stop=Expression(ConstantLiteral(1, ScalarType.i32)), step=None)], body=[ - ReceiveStatement( - local_array=self.versioning.current_version("pipeline_helper"), - stream_name=receive_stream[0], - completion_name=None - ), - AssignmentStatement( - destination=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - source=Expression( - BinaryOperator( - left=Expression( - value=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ) - ), - op= current_op, - right=Expression( - value=self.versioning.current_version("pipeline_helper") - ) - ) - ) - ), - SendStatement( - local_array=ArraySlice( - array=stmt.local_array, - indices=[Expression(value=self.versioning.current_version("reduce_runner"))] - ), - stream_name=send_stream[0], - completion_name=None - ) + receive0, + bin_op, + send0 ], ) ) From e3133cc785212d9967b28a0ad09e454a33a60bd5 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 16 Jan 2025 01:05:30 +0100 Subject: [PATCH 15/27] add support for arrays of length >= 1 --- samples/spatial/simple_reduce.sptl | 2 +- .../optimizations/spatial_reduce.py | 59 +++++++++++++------ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index f9e94c1b..2bdcb8ed 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -2,7 +2,7 @@ kernel @add() { place i16 i, i16 j in [0:5, 0:5] { - i16[1] a + i16[100] a } dataflow i16 i, i16 j in [0:5, 0:5] { diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 14f65661..bf120055 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1,4 +1,4 @@ -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement +from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement,FieldDeclaration,ArrayType from typing import Union, Tuple, Optional, Literal import spatialstencil.syntax.spatial_ir.irnodes as spa from spatialstencil.lowering.versioning import Versioning @@ -498,7 +498,9 @@ def change_data_blocks(self) -> None: [elem.subgrid.x_range.start.value.value, elem.subgrid.x_range.stop.value.value], [elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value], [stmt.dx.value.value if (elem.subgrid.y_range.stop.value.value - elem.subgrid.y_range.start.value.value) % 2 == 0 else (elem.subgrid.x_range.stop.value.value - stmt.dx.value.value - 1), - elem.subgrid.y_range.stop.value.value - 1 if elem.subgrid.y_range.start.value.value == stmt.dy.value.value else elem.subgrid.y_range.start.value.value]]}) + elem.subgrid.y_range.stop.value.value - 1 if elem.subgrid.y_range.start.value.value == stmt.dy.value.value else elem.subgrid.y_range.start.value.value], + None, + None]}) new_grid_streams = [] new_snake_streams = [] @@ -671,6 +673,11 @@ def fix_subgrid(self) -> None: if isinstance(stmt, ReduceStatement): stream_name = stmt.stream_name.name + if self.reduce_operations[stream_name][5] == None: + for tst in stmt.iter_child_nodes(): + self.reduce_operations[stream_name][5] = tst + break + # test if stream_name is in grid_streams if stmt.stream_name.name in self.grid_streams: connections = self.grid_streams[stream_name] @@ -824,12 +831,15 @@ def fix_subgrid(self) -> None: def change_compute_blocks(self) -> None: finalbody = [] for elem in self.body: - #print(elem) - #print('-'*50) - #for tst in elem.iter_child_nodes(): ### use this to go over nested nodes + # print(elem) + # print('-'*50) + # for tst in elem.iter_child_nodes(): ### use this to go over nested nodes # print(tst) # print('@'*50) - #exit() + + if isinstance(elem, PlaceBlock): + for srch in elem.iter_child_nodes(): + print(srch) if isinstance(elem, ComputeBlock): statements = [] @@ -846,6 +856,18 @@ def change_compute_blocks(self) -> None: root = self.reduce_operations[stmt.stream_name.name][1] origin = self.reduce_operations[stmt.stream_name.name][4] complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] + send_identifier = self.reduce_operations[stmt.stream_name.name][5] + send_amount = self.reduce_operations[stmt.stream_name.name][6] + if send_amount == None: + for elem in self.body: + for srch in elem.iter_child_nodes(): + if isinstance(srch, FieldDeclaration): + if srch.field_name == send_identifier: + if isinstance(srch.dtype, ArrayType): + send_amount = srch.dtype.shape[0].value.value + self.reduce_operations[stmt.stream_name.name][6] = send_amount + else: + raise ValueError(f"Field {send_identifier} is not an array. Only arrays are currently supported.") if stream_name in self.grid_streams: connections = self.grid_streams[stream_name] @@ -939,7 +961,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -957,7 +979,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -978,7 +1000,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1002,7 +1024,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1033,7 +1055,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1049,7 +1071,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1068,7 +1090,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1090,7 +1112,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1110,7 +1132,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ send @@ -1187,7 +1209,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1232,7 +1254,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ send0 @@ -1259,7 +1281,7 @@ def change_compute_blocks(self) -> None: ForStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], body=[ receive0, @@ -1281,6 +1303,5 @@ def change_compute_blocks(self) -> None: finalbody.append(elem) self.body = finalbody - #exit() return None \ No newline at end of file From 816bf336aec9df3aa20183a7d1df299f6797c8b1 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Mon, 3 Feb 2025 19:52:07 +0100 Subject: [PATCH 16/27] support nested reduce statements --- samples/spatial/simple_reduce.sptl | 11 +- .../optimizations/spatial_reduce.py | 983 +++++++++--------- 2 files changed, 522 insertions(+), 472 deletions(-) diff --git a/samples/spatial/simple_reduce.sptl b/samples/spatial/simple_reduce.sptl index 2bdcb8ed..377eb4d0 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/spatial/simple_reduce.sptl @@ -15,6 +15,15 @@ kernel @add() { compute i16 i, i16 j in [0:5, 0:5] { a[0] = 1 - await reduce(a, red) + for i32 test1 in [0:999] { + for i32 test2 in [0:999] { + for i32 test3 in [0:999] { + a[0] = 100 + for i32 test4 in [0:999] { + await reduce(a, red) + } + } + } + } } } \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index bf120055..391e32fe 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1,10 +1,9 @@ -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement,FieldDeclaration,ArrayType +from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement,FieldDeclaration,ArrayType, MapStatement, AsyncBlock, TernaryOperator from typing import Union, Tuple, Optional, Literal import spatialstencil.syntax.spatial_ir.irnodes as spa from spatialstencil.lowering.versioning import Versioning -from spatialstencil.syntax.common.visitor import ScopedIRNodeVisitor, IRNodeVisitor +import types # TODO from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle -# try ScopedIRNodeVisitor / IRNodeVisitor from spatialstencil.syntax.common.visitor to match nodes that have reduce in them class ReduceOptimizer(): @@ -78,6 +77,490 @@ def create_binary_operation(self, stmt, current_op, rhs) -> AssignmentStatement: ) return bin_op + + + #######recursively replace body +##### replace_bodypart(to_replace, replace_with) return bodyß +## get body +## for all nodes that have a body replace them with the same function call +## if searched object in body return the new object +## return object + + def replace_reduce(self, stmt, elem) -> list[Expression]: + + current_position = [elem.subgrid.x_range.start.value.value, + elem.subgrid.x_range.stop.value.value, + elem.subgrid.y_range.start.value.value, + elem.subgrid.y_range.stop.value.value] + newstatements = [] + stream_name = stmt.stream_name.name + operation_id = self.reduce_operations[stmt.stream_name.name][0]['op'] + root = self.reduce_operations[stmt.stream_name.name][1] + origin = self.reduce_operations[stmt.stream_name.name][4] + complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] + send_identifier = self.reduce_operations[stmt.stream_name.name][5] + send_amount = self.reduce_operations[stmt.stream_name.name][6] + if send_amount == None: + for elem in self.body: + for srch in elem.iter_child_nodes(): + if isinstance(srch, FieldDeclaration): + if srch.field_name == send_identifier: + if isinstance(srch.dtype, ArrayType): + send_amount = srch.dtype.shape[0].value.value + self.reduce_operations[stmt.stream_name.name][6] = send_amount + else: + raise ValueError(f"Field {send_identifier} is not an array. Only arrays are currently supported.") + + if stream_name in self.grid_streams: + connections = self.grid_streams[stream_name] + elif stream_name in self.snake_streams: + connections = self.snake_streams[stream_name] + else: + raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") + + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + + if stream_name in self.grid_streams: + pipelined_send = [] + pipelined_receive = [] + if not connections[0][4]: + # not pipelined + for con in connections: + if (current_position[0] >= con[1][0] + and current_position[1] <= con[1][1] + and current_position[2] >= con[1][2] + and current_position[3] <= con[1][3]): + + if (con[3] == 'left' and current_position[1] != con[1][1] + or con[3] == 'right' and current_position[0] != con[1][0] + or con[3] == 'top' and current_position[3] != con[1][3] + or con[3] == 'bottom' and current_position[2] != con[1][2]): + + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=con[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=con[0]), + body=[ + bin_op + ], + completion_name=None + ) + ) + + if (con[3] == 'left' and current_position[0] != con[1][0] + or con[3] == 'right' and current_position[1] != con[1][1] + or con[3] == 'top' and current_position[2] != con[1][2] + or con[3] == 'bottom' and current_position[3] != con[1][3]): + + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=con[0], + completion_name=None + ) + ) + + + else: + print(current_position) + print(root) + for con_list in connections: + #print(con_list) + for con in con_list[1]: + if (current_position[0] >= con[0] and current_position[1] <= con[1] + and current_position[2] >= con[2] and current_position[3] <= con[3]): + print(con) + if con[8] == 'sender': + pipelined_send.append(con_list[0]) + elif con[8] == 'receiver': + pipelined_receive.append(con_list[0]) + + if pipelined_send != [] and pipelined_receive != []: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + if len(pipelined_receive) == 1: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + send + ], + ) + ) + elif len(pipelined_receive) == 2: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op, + send + ], + ) + ) + elif len(pipelined_receive) == 3: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + send + ], + ) + ) + else: + send = self.create_send_statement(stmt, pipelined_send, 0) + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + receive3, + bin_op, + send + ], + ) + ) + elif pipelined_send == [] and pipelined_receive != []: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + if len(pipelined_receive) == 1: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op + ], + ) + ) + elif len(pipelined_receive) == 2: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op + ], + ) + ) + elif len(pipelined_receive) == 3: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op + ], + ) + ) + else: + receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) + receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) + receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) + receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + receive1, + bin_op, + receive2, + bin_op, + receive3, + bin_op + ], + ) + ) + elif pipelined_send != [] and pipelined_receive == []: + send = self.create_send_statement(stmt, pipelined_send, 0) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + send + ], + ) + ) + else: + raise ValueError(f"No pipelined send or receive found for position {current_position}.") + + + + elif stream_name in self.snake_streams: + if not (current_position[0] == origin[0] and current_position[2] == origin[1]): + # everything but the starting point receives first + + # get receive stream + receive_stream = None + for con in connections: + for detailed_con in con[5]: + if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] + and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] + and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) + or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) + or (detailed_con[4] == 0 and detailed_con[8] == 'receiver') + or (con[6] == True and detailed_con[8] == 'receiver'))): + receive_stream = con + break + + if not receive_stream == None: + break + + if operation_id == "S_SUM": + current_op = '+' + elif operation_id == "S_PROD": + current_op = '*' + else: + raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + + # change receive statement + + # not pipelined + if not con[6]: + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) + newstatements.append( + ForeachStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(1, ScalarType.i32)), + step=None)], + stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, + identifier=self.versioning.next_version("reduce_receive")), + receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), + body=[ + bin_op + ], + completion_name=None + ) + ) + + # pipelined root + elif (current_position[0] == root[0] and current_position[2] == root[1]): + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + + receive0 = self.create_receive_statement(stmt, receive_stream, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op + ], + ) + ) + + if not (current_position[0] == root[0] and current_position[2] == root[1]): + # only root does not send + + # get send stream + send_stream = None + for con in connections: + for detailed_con in con[5]: + if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] + and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] + and ((detailed_con[4] == 1 and not current_position[1] == detailed_con[1]) + or (detailed_con[4] == -1 and not current_position[0] == detailed_con[0]) + or (detailed_con[4] == 0 and detailed_con[8] == 'sender') + or (con[6] == True and detailed_con[8] == 'sender'))): + send_stream = con + break + + if not send_stream == None: + break + + # not pipelined + if not con[6]: + newstatements.append( + SendStatement( + local_array=stmt.local_array, + stream_name=send_stream[0], + completion_name=None + ) + ) + + # pipelined origin + elif (current_position[0] == origin[0] and current_position[2] == origin[1]): + send0 = self.create_send_statement(stmt, send_stream, 0) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + send0 + ], + ) + ) + + + # pipelined + else: + newstatements.append( + AssignmentStatement( + destination=self.versioning.next_version("pipeline_helper"), + source=Expression( + ConstantLiteral(0, ScalarType.i32) + ) + ) + ) + + send0 = self.create_send_statement(stmt, send_stream, 0) + receive0 = self.create_receive_statement(stmt, receive_stream, 0) + bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) + newstatements.append( + ForStatement( + variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], + range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), + step=None)], + body=[ + receive0, + bin_op, + send0 + ], + ) + ) + + return newstatements + + + + def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: + input_stmt = stmt + if isinstance(stmt, to_replace): + print("directly found") + if to_replace == ReduceStatement: + return self.replace_reduce(stmt, elem) + + # all of these use body + elif isinstance(stmt, ForeachStatement) or isinstance(stmt, ForStatement) or isinstance(stmt, MapStatement) or isinstance(stmt, AsyncBlock): + new_body = [] + for body_stmt in stmt.body: + replaced_stmts = self.replace_stmt(body_stmt, elem, to_replace) + for replaced_stmt in replaced_stmts: + new_body.append(replaced_stmt) + input_stmt.body = new_body + + # uses if_true and if_false + elif isinstance(stmt, TernaryOperator): + print("TernaryOperator") + print(stmt) + + return [input_stmt] + + #exit() def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: @@ -669,8 +1152,22 @@ def fix_subgrid(self) -> None: y_step = elem.subgrid.y_range.step.value.value if elem.subgrid.y_range.step is not None else None grid = [[[x_start, x_stop], [y_start, y_stop]]] - for stmt in elem.statements: # walk operators in baseclass - if isinstance(stmt, ReduceStatement): + for stmt in elem.statements: + red_stmt = None + nodes = [stmt] + found = False + while len(nodes) > 0 and not found: + for intermediate_stmt in nodes[0].iter_child_nodes(): + if not isinstance(intermediate_stmt, types.GeneratorType): + nodes.append(intermediate_stmt) + if isinstance(intermediate_stmt, ReduceStatement): # only finds one reduce statement + found = True + red_stmt = intermediate_stmt + nodes.pop(0) + + if red_stmt is not None or isinstance(stmt, ReduceStatement): + if red_stmt is not None: + stmt = red_stmt stream_name = stmt.stream_name.name if self.reduce_operations[stream_name][5] == None: @@ -831,476 +1328,20 @@ def fix_subgrid(self) -> None: def change_compute_blocks(self) -> None: finalbody = [] for elem in self.body: - # print(elem) - # print('-'*50) - # for tst in elem.iter_child_nodes(): ### use this to go over nested nodes - # print(tst) - # print('@'*50) - - if isinstance(elem, PlaceBlock): - for srch in elem.iter_child_nodes(): - print(srch) - if isinstance(elem, ComputeBlock): statements = [] for stmt in elem.statements: # walk operators in baseclass - if isinstance(stmt, ReduceStatement): - - current_position = [elem.subgrid.x_range.start.value.value, - elem.subgrid.x_range.stop.value.value, - elem.subgrid.y_range.start.value.value, - elem.subgrid.y_range.stop.value.value] - newstatements = [] - stream_name = stmt.stream_name.name - operation_id = self.reduce_operations[stmt.stream_name.name][0]['op'] - root = self.reduce_operations[stmt.stream_name.name][1] - origin = self.reduce_operations[stmt.stream_name.name][4] - complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] - send_identifier = self.reduce_operations[stmt.stream_name.name][5] - send_amount = self.reduce_operations[stmt.stream_name.name][6] - if send_amount == None: - for elem in self.body: - for srch in elem.iter_child_nodes(): - if isinstance(srch, FieldDeclaration): - if srch.field_name == send_identifier: - if isinstance(srch.dtype, ArrayType): - send_amount = srch.dtype.shape[0].value.value - self.reduce_operations[stmt.stream_name.name][6] = send_amount - else: - raise ValueError(f"Field {send_identifier} is not an array. Only arrays are currently supported.") - - if stream_name in self.grid_streams: - connections = self.grid_streams[stream_name] - elif stream_name in self.snake_streams: - connections = self.snake_streams[stream_name] - else: - raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") - - if operation_id == "S_SUM": - current_op = '+' - elif operation_id == "S_PROD": - current_op = '*' - else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") - - if stream_name in self.grid_streams: - pipelined_send = [] - pipelined_receive = [] - if not connections[0][4]: - # not pipelined - for con in connections: - if (current_position[0] >= con[1][0] - and current_position[1] <= con[1][1] - and current_position[2] >= con[1][2] - and current_position[3] <= con[1][3]): - - if (con[3] == 'left' and current_position[1] != con[1][1] - or con[3] == 'right' and current_position[0] != con[1][0] - or con[3] == 'top' and current_position[3] != con[1][3] - or con[3] == 'bottom' and current_position[2] != con[1][2]): - - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=con[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=con[0]), - body=[ - bin_op - ], - completion_name=None - ) - ) - - if (con[3] == 'left' and current_position[0] != con[1][0] - or con[3] == 'right' and current_position[1] != con[1][1] - or con[3] == 'top' and current_position[2] != con[1][2] - or con[3] == 'bottom' and current_position[3] != con[1][3]): - - newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=con[0], - completion_name=None - ) - ) - - - else: - print(current_position) - print(root) - for con_list in connections: - #print(con_list) - for con in con_list[1]: - if (current_position[0] >= con[0] and current_position[1] <= con[1] - and current_position[2] >= con[2] and current_position[3] <= con[3]): - print(con) - if con[8] == 'sender': - pipelined_send.append(con_list[0]) - elif con[8] == 'receiver': - pipelined_receive.append(con_list[0]) - - if pipelined_send != [] and pipelined_receive != []: - newstatements.append( - AssignmentStatement( - destination=self.versioning.next_version("pipeline_helper"), - source=Expression( - ConstantLiteral(0, ScalarType.i32) - ) - ) - ) - if len(pipelined_receive) == 1: - send = self.create_send_statement(stmt, pipelined_send, 0) - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - send - ], - ) - ) - elif len(pipelined_receive) == 2: - send = self.create_send_statement(stmt, pipelined_send, 0) - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op, - send - ], - ) - ) - elif len(pipelined_receive) == 3: - send = self.create_send_statement(stmt, pipelined_send, 0) - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op, - receive2, - bin_op, - send - ], - ) - ) - else: - send = self.create_send_statement(stmt, pipelined_send, 0) - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) - receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op, - receive2, - bin_op, - receive3, - bin_op, - send - ], - ) - ) - elif pipelined_send == [] and pipelined_receive != []: - newstatements.append( - AssignmentStatement( - destination=self.versioning.next_version("pipeline_helper"), - source=Expression( - ConstantLiteral(0, ScalarType.i32) - ) - ) - ) - if len(pipelined_receive) == 1: - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op - ], - ) - ) - elif len(pipelined_receive) == 2: - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op - ], - ) - ) - elif len(pipelined_receive) == 3: - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op, - receive2, - bin_op - ], - ) - ) - else: - receive0 = self.create_receive_statement(stmt, pipelined_receive, 0) - receive1 = self.create_receive_statement(stmt, pipelined_receive, 1) - receive2 = self.create_receive_statement(stmt, pipelined_receive, 2) - receive3 = self.create_receive_statement(stmt, pipelined_receive, 3) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - receive1, - bin_op, - receive2, - bin_op, - receive3, - bin_op - ], - ) - ) - elif pipelined_send != [] and pipelined_receive == []: - send = self.create_send_statement(stmt, pipelined_send, 0) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - send - ], - ) - ) - else: - raise ValueError(f"No pipelined send or receive found for position {current_position}.") - - - - elif stream_name in self.snake_streams: - if not (current_position[0] == origin[0] and current_position[2] == origin[1]): - # everything but the starting point receives first - - # get receive stream - receive_stream = None - for con in connections: - for detailed_con in con[5]: - if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] - and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] - and ((detailed_con[4] == -1 and not current_position[1] == detailed_con[1]) - or (detailed_con[4] == 1 and not current_position[0] == detailed_con[0]) - or (detailed_con[4] == 0 and detailed_con[8] == 'receiver') - or (con[6] == True and detailed_con[8] == 'receiver'))): - receive_stream = con - break - - if not receive_stream == None: - break - - if operation_id == "S_SUM": - current_op = '+' - elif operation_id == "S_PROD": - current_op = '*' - else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") - - # change receive statement - - # not pipelined - if not con[6]: - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("reduce_receive")) - newstatements.append( - ForeachStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), - step=None)], - stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, - identifier=self.versioning.next_version("reduce_receive")), - receive_stream=ReceiveGenerator(stream_name=receive_stream[0]), - body=[ - bin_op - ], - completion_name=None - ) - ) - - # pipelined root - elif (current_position[0] == root[0] and current_position[2] == root[1]): - newstatements.append( - AssignmentStatement( - destination=self.versioning.next_version("pipeline_helper"), - source=Expression( - ConstantLiteral(0, ScalarType.i32) - ) - ) - ) - - receive0 = self.create_receive_statement(stmt, receive_stream, 0) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op - ], - ) - ) - - if not (current_position[0] == root[0] and current_position[2] == root[1]): - # only root does not send - - # get send stream - send_stream = None - for con in connections: - for detailed_con in con[5]: - if (current_position[0] >= detailed_con[0] and current_position[1] <= detailed_con[1] - and current_position[2] >= detailed_con[2] and current_position[3] <= detailed_con[3] - and ((detailed_con[4] == 1 and not current_position[1] == detailed_con[1]) - or (detailed_con[4] == -1 and not current_position[0] == detailed_con[0]) - or (detailed_con[4] == 0 and detailed_con[8] == 'sender') - or (con[6] == True and detailed_con[8] == 'sender'))): - send_stream = con - break - - if not send_stream == None: - break - - # not pipelined - if not con[6]: - newstatements.append( - SendStatement( - local_array=stmt.local_array, - stream_name=send_stream[0], - completion_name=None - ) - ) - - # pipelined origin - elif (current_position[0] == origin[0] and current_position[2] == origin[1]): - send0 = self.create_send_statement(stmt, send_stream, 0) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - send0 - ], - ) - ) - - - # pipelined - else: - newstatements.append( - AssignmentStatement( - destination=self.versioning.next_version("pipeline_helper"), - source=Expression( - ConstantLiteral(0, ScalarType.i32) - ) - ) - ) - - send0 = self.create_send_statement(stmt, send_stream, 0) - receive0 = self.create_receive_statement(stmt, receive_stream, 0) - bin_op = self.create_binary_operation(stmt, current_op, self.versioning.current_version("pipeline_helper")) - newstatements.append( - ForStatement( - variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], - range_expression=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), - step=None)], - body=[ - receive0, - bin_op, - send0 - ], - ) - ) - - # add receive + calculation + send here - for new_statement in newstatements: - statements.append(new_statement) - - else: - statements.append(stmt) - - finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) + new_stmts = self.replace_stmt(stmt, elem, ReduceStatement) + print(new_stmts) + print('*'*50) + print('*'*50) + for nstmt in new_stmts: + statements.append(nstmt) + print(statements) + print('*'*50) + finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) else: - finalbody.append(elem) + finalbody.append(elem) self.body = finalbody From eb834966be190f122e58639b18f7c1029bc4f604 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 16:23:46 +0100 Subject: [PATCH 17/27] add tests and comments - start finishing --- irspec/docs/collective/collective.md | 79 +- irspec/docs/collective/layouts.md | 9 + irspec/mkdocs.yml | 1 + samples/collective/hard_reduce_1.ref_tile | 25 + samples/collective/hard_reduce_1.sptl | 32 + .../collective/medium_reduce_grid_1.ref_tile | 13 + samples/collective/medium_reduce_grid_1.sptl | 32 + samples/collective/simple_bcast.ref_tile | 3 + samples/collective/simple_bcast.sptl | 18 + .../collective/simple_reduce_grid_1.ref_tile | 5 + samples/collective/simple_reduce_grid_1.sptl | 20 + .../simple_reduce_grid_pipelined_1.ref_tile | 25 + .../simple_reduce_grid_pipelined_1.sptl | 20 + .../simple_reduce_looped.sptl} | 4 +- .../collective/simple_reduce_snake_1.ref_tile | 15 + samples/collective/simple_reduce_snake_1.sptl | 20 + .../simple_reduce_snake_pipelined_1.ref_tile | 25 + .../simple_reduce_snake_pipelined_1.sptl | 20 + samples/spatial/bcast.sptl | 45 - samples/spatial/reduce.sptl | 37 - samples/spatial/simple_reduce_after.sptl | 31 - samples/spatial/simple_reduce_four.sptl | 22 - .../spatial/simple_reduce_intermediate.sptl | 22 - samples/spatial/simple_reduce_three.sptl | 21 - .../optimizations/optimization_pass.py | 9 +- .../optimizations/spatial_broadcast.py | 243 ++++++ .../optimizations/spatial_reduce.py | 781 +++++++++--------- spatialstencil/syntax/spatial_ir/irnodes.py | 33 +- .../syntax/spatial_ir/language.lark | 4 +- .../syntax/spatial_ir/lark_to_ir.py | 2 + tests/test_collective_ir_parser.py | 78 ++ tests/test_spatial_ir_parser.py | 45 +- 32 files changed, 1124 insertions(+), 615 deletions(-) create mode 100644 irspec/docs/collective/layouts.md create mode 100644 samples/collective/hard_reduce_1.ref_tile create mode 100644 samples/collective/hard_reduce_1.sptl create mode 100644 samples/collective/medium_reduce_grid_1.ref_tile create mode 100644 samples/collective/medium_reduce_grid_1.sptl create mode 100644 samples/collective/simple_bcast.ref_tile create mode 100644 samples/collective/simple_bcast.sptl create mode 100644 samples/collective/simple_reduce_grid_1.ref_tile create mode 100644 samples/collective/simple_reduce_grid_1.sptl create mode 100644 samples/collective/simple_reduce_grid_pipelined_1.ref_tile create mode 100644 samples/collective/simple_reduce_grid_pipelined_1.sptl rename samples/{spatial/simple_reduce.sptl => collective/simple_reduce_looped.sptl} (87%) create mode 100644 samples/collective/simple_reduce_snake_1.ref_tile create mode 100644 samples/collective/simple_reduce_snake_1.sptl create mode 100644 samples/collective/simple_reduce_snake_pipelined_1.ref_tile create mode 100644 samples/collective/simple_reduce_snake_pipelined_1.sptl delete mode 100644 samples/spatial/bcast.sptl delete mode 100644 samples/spatial/reduce.sptl delete mode 100644 samples/spatial/simple_reduce_after.sptl delete mode 100644 samples/spatial/simple_reduce_four.sptl delete mode 100644 samples/spatial/simple_reduce_intermediate.sptl delete mode 100644 samples/spatial/simple_reduce_three.sptl create mode 100644 spatialstencil/optimizations/spatial_broadcast.py create mode 100644 tests/test_collective_ir_parser.py diff --git a/irspec/docs/collective/collective.md b/irspec/docs/collective/collective.md index e06c2643..05dc7441 100644 --- a/irspec/docs/collective/collective.md +++ b/irspec/docs/collective/collective.md @@ -8,27 +8,29 @@ The goal of this document is to give an overview of the key concepts present in ### Streams The stream class of the Spatial IR is extended with `multistream`, for a scalar type ``. -If `hops = auto` the routing is optimized while using at most `#channel` channels. For implementation details on the used number of channels see the `Channel Usage` section. - -In addition to `hops` and `channel` the operation `op` can be defined for certain collective communication pattern, i.e. reduce. The options for the operation `op` are: - -- CL_MAX (returns the maximum element) -- CL_MIN (returns the minimum element) -- CL_SUM (returns the sum of all elements) -- CL_PRODUCT (returns the product of all elements) +MultiStreams take a name and a root in (x,y) coordinates as arguments. Additionally a Broadcast or Reduce can be defined. ### Collective Functions Collective Communication functions can be called inside the compute block. For further implementation details see the specific collective definition. ## Broadcast -A broadcast is defined with the standard send and receive framework provided by the Spatial IR. It is differentiated from the single point to point communication by using a `multistream` instead of a standard stream. +A broadcast is defined with the standard send and receive framework provided by the Spatial IR. It is differentiated from the single point to point communication by using a `multistream` instead of a standard stream. This mimics the support for broadcast communication found in many spatial architectures. + +In the dataflow block a broadcast is defined in the following way: +``` +multistream name = broadcast_stream(root_x, root_y) { + channels = auto + } +``` +where (root_x, root_y) defines the sender. The name is important to give as an argument in the compute blocks. With channels a specific channel can be targeted for the communication in architectures that support it. In almost all situations auto should lead to optimal results. Sending data in a broadcast that is defined via the multistream `bcast` can therefore be defined as: ```rust compute i16 variable, i16 variable in subgrid_expression { - send(a, bcast) + send(data, bcast) } ``` +where data is the data being sent. ???+ example "Example: Simple Broadcast" ```rust @@ -37,16 +39,69 @@ compute i16 variable, i16 variable in subgrid_expression { } compute i16 i, i16 j in [0, 0] { - await receive(a, a_in) await send(a, bcast) } ``` where `i`, `j` are `i16` variables that are bound to the coordinates of the PEs in the subgrid and `bcast` is a multistream. + This can be generated with the following code: + ```rust + dataflow i16 i, i16 j in [0:N, 0] { + multistream bcast = broadcast_stream(0, 0) { + channels = auto + } + } + compute i16 i, i16 j in [0:N, 0] { + await broadcast(a, bcast); + } + ``` + +In the future the functionality could be extended with an optional send-receive routing (like in the reduce case) for devices that do not support broadcast communication. ## Reduce +Most architectures do not support Reduce operations. Therefore we translate reduces to simple send-receive communication. + +``` +NOTE: We currently only support reduce in a N-by-N grid +that can not be defined partially or in multiple rounds. +``` +In the dataflowblock a reduce is defined the following way: +``` +multistream name = reduce_stream(root_x, root_y) { + graph = auto, + op = S_SUM, + pipelined = true + } +``` +where (root_x, root_y) defines the receiver. The name is important to give as an argument in the compute blocks. graph chooses the layout the communication follows. Further details on the different layouts available can be found in the [Layouts section](layouts.md). op defines which operation to use for the reduce. The currently supported list can be found below. pipelined can either be 'true' or 'false' and defines whether when sending arrays the whole array gets received by the next processing element (pipelined = false) or if each element of the array gets send on before receiving the next element. -## Channel Usage \ No newline at end of file +The options for the operation `op` are: + +- CL_SUM (returns the sum of all elements) +- CL_PRODUCT (returns the product of all elements) + +At the moment parameters are not allowed in the range of the coordinate grid, i.e. +```rust +dataflow i16 i, i16 j in [0:N , 0:N] {...} +``` +is not allowed. + +In the computeblocks a reduce can then be used with the following line: +```rust +await reduce(data, name) +``` +where data is the element/array to reduce on. + +???+ example "Example: Simple Broadcast" + ```rust + result + ``` + + can be generated from: + + ```rust + input + ``` \ No newline at end of file diff --git a/irspec/docs/collective/layouts.md b/irspec/docs/collective/layouts.md new file mode 100644 index 00000000..1112eb3c --- /dev/null +++ b/irspec/docs/collective/layouts.md @@ -0,0 +1,9 @@ +# Layouts + +## Usage + +## Definitions + +### Snake + +### Grid \ No newline at end of file diff --git a/irspec/mkdocs.yml b/irspec/mkdocs.yml index 13e29f20..eb82843b 100644 --- a/irspec/mkdocs.yml +++ b/irspec/mkdocs.yml @@ -16,6 +16,7 @@ nav: - Collective IR: - Design Goals: collective/design_goals.md - Specification: collective/collective.md + - Layouts: collective/layouts.md - Dataflow Task IR: dataflowtask/dataflowtask.md markdown_extensions: diff --git a/samples/collective/hard_reduce_1.ref_tile b/samples/collective/hard_reduce_1.ref_tile new file mode 100644 index 00000000..9a0dafdb --- /dev/null +++ b/samples/collective/hard_reduce_1.ref_tile @@ -0,0 +1,25 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[1:2 , 0:1] +[1:2 , 1:2] +[1:2 , 2:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 2:3] +[2:3 , 3:4] +[2:3 , 4:5] +[3:4 , 0:1] +[3:4 , 1:2] +[3:4 , 2:3] +[3:4 , 3:4] +[3:4 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] \ No newline at end of file diff --git a/samples/collective/hard_reduce_1.sptl b/samples/collective/hard_reduce_1.sptl new file mode 100644 index 00000000..08f03198 --- /dev/null +++ b/samples/collective/hard_reduce_1.sptl @@ -0,0 +1,32 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = grid, + op = CL_SUM, + pipelined = false + } + multistream red1 = reduce_stream(2, 2) { + graph = grid, + op = CL_SUM, + pipelined = true + } + multistream red2 = reduce_stream(4, 4) { + graph = snake, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + await reduce(a, red1) + await reduce(a, red2) + } +} \ No newline at end of file diff --git a/samples/collective/medium_reduce_grid_1.ref_tile b/samples/collective/medium_reduce_grid_1.ref_tile new file mode 100644 index 00000000..4e4df0da --- /dev/null +++ b/samples/collective/medium_reduce_grid_1.ref_tile @@ -0,0 +1,13 @@ +[0:1 , 0:1] +[2:3 , 2:3] +[4:5 , 4:5] +[0:1 , 4:5] +[0:1 , 1:4] +[1:2 , 0:5] +[3:4 , 0:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 3:4] +[2:3 , 4:5] +[4:5 , 0:1] +[4:5 , 1:4] \ No newline at end of file diff --git a/samples/collective/medium_reduce_grid_1.sptl b/samples/collective/medium_reduce_grid_1.sptl new file mode 100644 index 00000000..245cc183 --- /dev/null +++ b/samples/collective/medium_reduce_grid_1.sptl @@ -0,0 +1,32 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = grid, + op = CL_SUM, + pipelined = false + } + multistream red1 = reduce_stream(2, 2) { + graph = grid, + op = CL_SUM, + pipelined = false + } + multistream red2 = reduce_stream(4, 4) { + graph = grid, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + await reduce(a, red1) + await reduce(a, red2) + } +} \ No newline at end of file diff --git a/samples/collective/simple_bcast.ref_tile b/samples/collective/simple_bcast.ref_tile new file mode 100644 index 00000000..a9533f2a --- /dev/null +++ b/samples/collective/simple_bcast.ref_tile @@ -0,0 +1,3 @@ +[0:1 , 0:1] +[0:1 , 1:N] +[1:N , 0:N] \ No newline at end of file diff --git a/samples/collective/simple_bcast.sptl b/samples/collective/simple_bcast.sptl new file mode 100644 index 00000000..dcd4faa6 --- /dev/null +++ b/samples/collective/simple_bcast.sptl @@ -0,0 +1,18 @@ + +kernel @add(stream readonly a_in, stream[N, N] writeonly out) { + + place u16 i, u16 j in [0:N, 0:N] { + f32[K] a; + } + + dataflow i16 i, i16 j in [0:N, 0:N] { + multistream bcast = broadcast_stream(0, 0) { + channels = auto + } + } + + compute i16 i, i16 j in [0:N, 0:N] { + await broadcast(a, bcast); + } + +} diff --git a/samples/collective/simple_reduce_grid_1.ref_tile b/samples/collective/simple_reduce_grid_1.ref_tile new file mode 100644 index 00000000..53ade9be --- /dev/null +++ b/samples/collective/simple_reduce_grid_1.ref_tile @@ -0,0 +1,5 @@ +[0:1 , 0:1] +[0:1 , 1:4] +[0:1 , 4:5] +[1:4 , 0:5] +[4:5 , 0:5] \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_1.sptl b/samples/collective/simple_reduce_grid_1.sptl new file mode 100644 index 00000000..b5717995 --- /dev/null +++ b/samples/collective/simple_reduce_grid_1.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = grid, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_pipelined_1.ref_tile b/samples/collective/simple_reduce_grid_pipelined_1.ref_tile new file mode 100644 index 00000000..9a0dafdb --- /dev/null +++ b/samples/collective/simple_reduce_grid_pipelined_1.ref_tile @@ -0,0 +1,25 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[1:2 , 0:1] +[1:2 , 1:2] +[1:2 , 2:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 2:3] +[2:3 , 3:4] +[2:3 , 4:5] +[3:4 , 0:1] +[3:4 , 1:2] +[3:4 , 2:3] +[3:4 , 3:4] +[3:4 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_pipelined_1.sptl b/samples/collective/simple_reduce_grid_pipelined_1.sptl new file mode 100644 index 00000000..2ff97a85 --- /dev/null +++ b/samples/collective/simple_reduce_grid_pipelined_1.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = grid, + op = CL_SUM, + pipelined = true + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/samples/spatial/simple_reduce.sptl b/samples/collective/simple_reduce_looped.sptl similarity index 87% rename from samples/spatial/simple_reduce.sptl rename to samples/collective/simple_reduce_looped.sptl index 377eb4d0..722e459a 100644 --- a/samples/spatial/simple_reduce.sptl +++ b/samples/collective/simple_reduce_looped.sptl @@ -6,9 +6,9 @@ kernel @add() { } dataflow i16 i, i16 j in [0:5, 0:5] { - multistream red = reduce(0, 0) { + multistream red = reduce_stream(0, 0) { graph = grid, - op = S_SUM, + op = CL_SUM, pipelined = true } } diff --git a/samples/collective/simple_reduce_snake_1.ref_tile b/samples/collective/simple_reduce_snake_1.ref_tile new file mode 100644 index 00000000..f7745597 --- /dev/null +++ b/samples/collective/simple_reduce_snake_1.ref_tile @@ -0,0 +1,15 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] +[1:4 , 0:1] +[1:4 , 1:2] +[1:4 , 2:3] +[1:4 , 3:4] +[1:4 , 4:5] \ No newline at end of file diff --git a/samples/collective/simple_reduce_snake_1.sptl b/samples/collective/simple_reduce_snake_1.sptl new file mode 100644 index 00000000..ea884cf1 --- /dev/null +++ b/samples/collective/simple_reduce_snake_1.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = snake, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/samples/collective/simple_reduce_snake_pipelined_1.ref_tile b/samples/collective/simple_reduce_snake_pipelined_1.ref_tile new file mode 100644 index 00000000..9a0dafdb --- /dev/null +++ b/samples/collective/simple_reduce_snake_pipelined_1.ref_tile @@ -0,0 +1,25 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[1:2 , 0:1] +[1:2 , 1:2] +[1:2 , 2:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 2:3] +[2:3 , 3:4] +[2:3 , 4:5] +[3:4 , 0:1] +[3:4 , 1:2] +[3:4 , 2:3] +[3:4 , 3:4] +[3:4 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] \ No newline at end of file diff --git a/samples/collective/simple_reduce_snake_pipelined_1.sptl b/samples/collective/simple_reduce_snake_pipelined_1.sptl new file mode 100644 index 00000000..cc6d722a --- /dev/null +++ b/samples/collective/simple_reduce_snake_pipelined_1.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + graph = snake, + op = CL_SUM, + pipelined = true + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/samples/spatial/bcast.sptl b/samples/spatial/bcast.sptl deleted file mode 100644 index 36aa8286..00000000 --- a/samples/spatial/bcast.sptl +++ /dev/null @@ -1,45 +0,0 @@ - -kernel @add(stream readonly a_in, stream[N, N] writeonly out) { - - place u16 i, u16 j in [0:N, 0:N] { - f32[K] a; - } - - dataflow i16 i, i16 j in [0:N, 0:N] { - multistream bcast = broadcast_stream(0, 0) { - channels = auto - } - } - - compute i16 i, i16 j in [1:N, 1:N] { - await receive(a, bcast) - await send(a, out[i, j]) - } - - compute i16 i, i16 j in [0, 0] { - await receive(a, a_in) - await send(a, bcast) - await send(a, out[i, j]) - } - - compute i16 i, i16 j in [0, 1:N] { - await foreach i32 k, f32 x in [0:K], receive(bcast) { - a[k] = x + 1 - } - await send(a, out[i, j]) - } - - compute i16 i, i16 j in [1:N, 0] { - await receive(a, bcast) - await send(a, out[i, j]) - } - - - - - - compute i16 i, i16 j in [0:N, 0:N] { - broadcast(a, bcast) - } - -} diff --git a/samples/spatial/reduce.sptl b/samples/spatial/reduce.sptl deleted file mode 100644 index 296d8d1b..00000000 --- a/samples/spatial/reduce.sptl +++ /dev/null @@ -1,37 +0,0 @@ - -kernel @add(stream[N, N] readonly a_in, stream writeonly out) { - - place u16 i, u16 j in [0:N, 0:N] { - f32[K] a; - } - - dataflow i16 i, i16 j in [0:N, 0:N] { - multistream red = reduce(0, 0) { - channels = auto - graph = snake - op = sum - } - } - - compute i16 i, i16 j in [1:N, 1:N] { - await receive(a, a_in) - await reduce(a, red) - } - - compute i16 i, i16 j in [0, 0] { - await receive(a, a_in) - await reduce(a, red) - await send(a, out) - } - - compute i16 i, i16 j in [0, 1:N] { - await receive(a, a_in) - await reduce(a, red) - } - - compute i16 i, i16 j in [1:N, 0] { - await receive(a, a_in) - await reduce(a, red) - } - -} diff --git a/samples/spatial/simple_reduce_after.sptl b/samples/spatial/simple_reduce_after.sptl deleted file mode 100644 index 09dca704..00000000 --- a/samples/spatial/simple_reduce_after.sptl +++ /dev/null @@ -1,31 +0,0 @@ -## look at this idea - -kernel @add() { - - place i16 i, i16 j in [0:1, 0:2] { - i16[1] a; - i16[1] tmp; - } - - dataflow i16 i, i16 j in [0:1, 0:2] { - stream reduce0 = relative_stream(0, 1) { - hops = [(0, 1)], - channel = auto - } - } - - # -> stream westwards = relative_stream(-1, 0); - - compute i16 i, i16 j in [0, 0] { - a[0] = 1; - await foreach i32 reduce_runner, i16 reduce_receive in [0:1], receive(reduce0) { - tmp[0] = a[reduce_runner] + reduce_receive; - await send(tmp) - } - } - - compute i16 i, i16 j in [0, 1] { - a[0] = 1; - await send(a, reduce0); - } -} \ No newline at end of file diff --git a/samples/spatial/simple_reduce_four.sptl b/samples/spatial/simple_reduce_four.sptl deleted file mode 100644 index 37672e90..00000000 --- a/samples/spatial/simple_reduce_four.sptl +++ /dev/null @@ -1,22 +0,0 @@ - -kernel @add() { - - place i16 i, i16 j in [0:1, 0:4] { - i16[1] a - i16[100] b - } - - dataflow i16 i, i16 j in [0:1, 0:4] { - multistream red = reduce(0, 0) { - channels = auto, - graph = 1, - op = 2 - } - } - - compute i16 i, i16 j in [0:1, 0:4] { - a[0] = 1 - await reduce(a, red) - } - -} \ No newline at end of file diff --git a/samples/spatial/simple_reduce_intermediate.sptl b/samples/spatial/simple_reduce_intermediate.sptl deleted file mode 100644 index 1367e63e..00000000 --- a/samples/spatial/simple_reduce_intermediate.sptl +++ /dev/null @@ -1,22 +0,0 @@ - -kernel @add(stream[1, 2]) { - - dataflow i16 i, i16 j in [0:1, 0:2] { - multistream red = reduce(0, 0) { - channels = auto, - graph = 1, - op = 2 - } - } - - compute i16 i, i16 j in [0, 0] { - i16 a = 1 - await reduce(a, red) - } - - compute i16 i, i16 j in [0, 1] { - i16 a = 1 - await reduce(a, red) - } - -} \ No newline at end of file diff --git a/samples/spatial/simple_reduce_three.sptl b/samples/spatial/simple_reduce_three.sptl deleted file mode 100644 index 5337281e..00000000 --- a/samples/spatial/simple_reduce_three.sptl +++ /dev/null @@ -1,21 +0,0 @@ - -kernel @add() { - - place i16 i, i16 j in [0:1, 0:3] { - i16[1] a - } - - dataflow i16 i, i16 j in [0:1, 0:3] { - multistream red = reduce(0, 0) { - channels = auto, - graph = 1, - op = 2 - } - } - - compute i16 i, i16 j in [0:1, 0:3] { - a[0] = 1 - await reduce(a, red) - } - -} \ No newline at end of file diff --git a/spatialstencil/optimizations/optimization_pass.py b/spatialstencil/optimizations/optimization_pass.py index b32b3276..1cda1e3a 100644 --- a/spatialstencil/optimizations/optimization_pass.py +++ b/spatialstencil/optimizations/optimization_pass.py @@ -1,4 +1,5 @@ from spatialstencil.optimizations.spatial_reduce import ReduceOptimizer +from spatialstencil.optimizations.spatial_broadcast import BroadcastOptimizer @@ -6,6 +7,8 @@ def optimization_pass(program): """ Runs the spatial optimizations on the program. """ - reduce_optimizer = ReduceOptimizer(program) - out = reduce_optimizer.reduce_subroutine() - return out \ No newline at end of file + broadcast_optimizer = BroadcastOptimizer(program) + pass_1 = broadcast_optimizer.broadcast_subroutine() + reduce_optimizer = ReduceOptimizer(pass_1) + pass_2 = reduce_optimizer.reduce_subroutine() + return pass_2 \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_broadcast.py b/spatialstencil/optimizations/spatial_broadcast.py new file mode 100644 index 00000000..967ad3da --- /dev/null +++ b/spatialstencil/optimizations/spatial_broadcast.py @@ -0,0 +1,243 @@ +from spatialstencil.syntax.spatial_ir.irnodes import Kernel, DataflowBlock, MulStreamDeclaration, BroadcastRoutingDeclaration, ComputeBlock, ConstantLiteral, SubgridExpression, RangeExpression, Expression, ScalarType, ForeachStatement, ForStatement, MapStatement, AsyncBlock, TernaryOperator, BroadcastStatement, SendStatement, ReceiveStatement, ArraySlice +import spatialstencil.syntax.spatial_ir.irnodes as spa +from spatialstencil.lowering.versioning import Versioning + +class BroadcastOptimizer(): + roots: list[list[int]] = [] + broadcast_operations: dict[str, list[int]] = {} + + def __init__(self, kernel: Kernel) -> None: + self.name = kernel.name + self.parameters = kernel.parameters + self.arguments = kernel.arguments + self.body = kernel.body + self.versioning = Versioning[spa.Identifier](spa.Identifier) + self.roots = [] + self.broadcast_operations = {} + return None + + ## + # Replace the broadcast statements in the compute blocks and change the tiling of the compute statemtents accordingly + # Entry Function + ## + def broadcast_subroutine(self) -> Kernel: + self.find_roots() + if self.roots != []: + self.fix_subgrid() + self.replace_broadcast() + return Kernel(name=self.name, parameters=self.parameters, arguments=self.arguments, body=self.body) + + + ## + # Function to aggregate the roots of all broadcast operations + ## + def find_roots(self) -> None: + for elem in self.body: + if isinstance(elem, DataflowBlock): + for stmt in elem.statements: + if isinstance(stmt, MulStreamDeclaration) and isinstance(stmt.routing, BroadcastRoutingDeclaration): + self.roots.append([stmt.x.value.value, stmt.x.value.value + 1, stmt.y.value.value, stmt.y.value.value + 1]) + self.broadcast_operations[stmt.stream_name.name] = [stmt.x.value.value, stmt.y.value.value] + return None + + + ## + # Updates the compute block tiling for the new communication patterns + ## + def fix_subgrid(self) -> None: + newbody = [] + for elem in self.body: + if isinstance(elem, ComputeBlock): + x_start = elem.subgrid.x_range.start.value.value + x_stop = elem.subgrid.x_range.stop.value.value if isinstance(elem.subgrid.x_range.stop.value, ConstantLiteral) else None + y_start = elem.subgrid.y_range.start.value.value + y_stop = elem.subgrid.y_range.stop.value.value if isinstance(elem.subgrid.y_range.stop.value, ConstantLiteral) else None + + ## fix to deal with parameters + x_literal = True if x_stop is None else False + y_literal = True if y_stop is None else False + if x_literal: + x_stop = 9999999999999 + if y_literal: + y_stop = 9999999999999 + grid = [[[x_start, x_stop], [y_start, y_stop]]] + + for com_grid in self.roots: + to_remove = [] + for sub_grid in grid: + if com_grid[0] > sub_grid[0][0] and com_grid[0] < sub_grid[0][1]: + # print("left") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[0]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[0], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[1] > sub_grid[0][0] and com_grid[1] < sub_grid[0][1]: + # print("right") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, com_grid[1]], [sub_y_start, sub_y_stop]]) + grid.append([[com_grid[1], sub_x_stop], [sub_y_start, sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[2] > sub_grid[1][0] and com_grid[2] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("top") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[2]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[2], sub_y_stop]]) + to_remove.append(sub_grid) + elif com_grid[3] > sub_grid[1][0] and com_grid[3] < sub_grid[1][1] and com_grid[0] <= sub_grid[0][0] and com_grid[1] >= sub_grid[0][1]: + # print("bottom") + sub_x_start = sub_grid[0][0] + sub_x_stop = sub_grid[0][1] + sub_y_start = sub_grid[1][0] + sub_y_stop = sub_grid[1][1] + grid.append([[sub_x_start, sub_x_stop], [sub_y_start, com_grid[3]]]) + grid.append([[sub_x_start, sub_x_stop], [com_grid[3], sub_y_stop]]) + to_remove.append(sub_grid) + # delete old unused + for rmv in to_remove: + grid.remove(rmv) + + for sub_grid in grid: + if x_literal or y_literal: + if sub_grid[0][1] == 9999999999999: + sub_grid[0][1] = elem.subgrid.x_range.stop.value + else: + sub_grid[0][1] = ConstantLiteral(value=sub_grid[0][1], dtype=elem.subgrid.x_range.start.value.dtype) + if sub_grid[1][1] == 9999999999999: + sub_grid[1][1] = elem.subgrid.y_range.stop.value + else: + sub_grid[1][1] = ConstantLiteral(value=sub_grid[1][1], dtype=elem.subgrid.y_range.start.value.dtype) + + if not x_literal and not y_literal: + for com_grid in grid: + newbody.append( + ComputeBlock( + elem.variables, + SubgridExpression( + RangeExpression( + start=Expression(ConstantLiteral(com_grid[0][0], ScalarType.i32)), + stop=Expression(ConstantLiteral(com_grid[0][1], ScalarType.i32)) + ), + RangeExpression( + start=Expression(ConstantLiteral(com_grid[1][0], ScalarType.i32)), + stop=Expression(ConstantLiteral(com_grid[1][1], ScalarType.i32)) + ) + ), + elem.statements + ) + ) + else: + for com_grid in grid: + newbody.append( + ComputeBlock( + elem.variables, + SubgridExpression( + RangeExpression( + start=Expression(ConstantLiteral(com_grid[0][0], ScalarType.i32)), + stop=Expression(com_grid[0][1]) + ), + RangeExpression( + start=Expression(ConstantLiteral(com_grid[1][0], ScalarType.i32)), + stop=Expression(com_grid[1][1]) + ) + ), + elem.statements + ) + ) + + else: + newbody.append(elem) + + self.body = newbody + return None + + + + ## + # Replace the broadcast statements with send and receive statements + ## + def _replace_broadcast(self, stmt, elem) -> list[Expression]: + x_start = elem.subgrid.x_range.start.value.value + x_stop = elem.subgrid.x_range.stop.value.value if isinstance(elem.subgrid.x_range.stop.value, ConstantLiteral) else None + y_start = elem.subgrid.y_range.start.value.value + y_stop = elem.subgrid.y_range.stop.value.value if isinstance(elem.subgrid.y_range.stop.value, ConstantLiteral) else None + + ## fix to deal with parameters + x_literal = True if x_stop is None else False + y_literal = True if y_stop is None else False + if x_literal: + x_stop = 9999999999999 + if y_literal: + y_stop = 9999999999999 + + name = stmt.stream_name.name + root = self.broadcast_operations[name] + + if x_start == root[0] and y_start == root[1] and x_stop == root[0] + 1 and y_stop == root[1] + 1: + send = SendStatement( + local_array=stmt.local_array, + stream_name=stmt.stream_name, + completion_name=None + ) + return [send] + else: + receive = ReceiveStatement( + local_array=stmt.local_array, + stream_name=stmt.stream_name, + completion_name=None + ) + return [receive] + + + ## + # Function to recursively go through the compute blocks and find all the occurernces of the broadcast statements + ## + def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: + input_stmt = stmt + if isinstance(stmt, to_replace): + if to_replace == BroadcastStatement: + return self._replace_broadcast(stmt, elem) + + # all of these use body + elif isinstance(stmt, ForeachStatement) or isinstance(stmt, ForStatement) or isinstance(stmt, MapStatement) or isinstance(stmt, AsyncBlock): + new_body = [] + for body_stmt in stmt.body: + replaced_stmts = self.replace_stmt(body_stmt, elem, to_replace) + for replaced_stmt in replaced_stmts: + new_body.append(replaced_stmt) + input_stmt.body = new_body + + # uses if_true and if_false + elif isinstance(stmt, TernaryOperator): + print("TODO: TernaryOperator") + print(stmt) + + return [input_stmt] + + + ## + # Changes the occurences of broadcast statements in the compute blocks + ## + def replace_broadcast(self) -> None: + finalbody = [] + for elem in self.body: + if isinstance(elem, ComputeBlock): + statements = [] + for stmt in elem.statements: + new_stmts = self.replace_stmt(stmt, elem, BroadcastStatement) + for nstmt in new_stmts: + statements.append(nstmt) + finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) + else: + finalbody.append(elem) + + self.body = finalbody + return None \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 391e32fe..e4d14c37 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -24,16 +24,29 @@ def __init__(self, kernel: Kernel) -> None: self.arguments = kernel.arguments self.body = kernel.body self.versioning = Versioning[spa.Identifier](spa.Identifier) + self._communication_patterns = None + self.reduce_operations = {} + self.grid_streams = {} + self.snake_streams = {} + self.pipelined = {} return None + + ## + # Replace the reduce statements and change the tiling of the compute statemtents accordingly + # Entry Function + ## def reduce_subroutine(self) -> Kernel: self.change_data_blocks() - self.fix_subgrid() - self.change_compute_blocks() + if self.reduce_operations != {}: + self.fix_subgrid() + self.change_compute_blocks() return Kernel(name=self.name, parameters=self.parameters, arguments=self.arguments, body=self.body) - + ## + # Template Send Statement + ## def create_send_statement(self, stmt, pipelined_send, index) -> SendStatement: send = SendStatement( local_array=ArraySlice( @@ -45,6 +58,9 @@ def create_send_statement(self, stmt, pipelined_send, index) -> SendStatement: ) return send + ## + # Template Receive Statement + ## def create_receive_statement(self, stmt, pipelined_receive, index) -> ReceiveStatement: receive = ReceiveStatement( local_array=self.versioning.current_version("pipeline_helper"), @@ -54,6 +70,9 @@ def create_receive_statement(self, stmt, pipelined_receive, index) -> ReceiveSta return receive + ## + # Template Binary Operation + ## def create_binary_operation(self, stmt, current_op, rhs) -> AssignmentStatement: bin_op = AssignmentStatement( destination=ArraySlice( @@ -78,14 +97,11 @@ def create_binary_operation(self, stmt, current_op, rhs) -> AssignmentStatement: return bin_op - - #######recursively replace body -##### replace_bodypart(to_replace, replace_with) return bodyß -## get body -## for all nodes that have a body replace them with the same function call -## if searched object in body return the new object -## return object - + ## + # Function that replaces the reduce statement in the compute statement + # stmt is the reduce statement that is getting replaced + # elem is the compute statement that is getting changed to receive context information + ## def replace_reduce(self, stmt, elem) -> list[Expression]: current_position = [elem.subgrid.x_range.start.value.value, @@ -118,12 +134,12 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: else: raise ValueError(f"Stream name {stream_name} not found in grid_streams or snake_streams.") - if operation_id == "S_SUM": + if operation_id == "CL_SUM": current_op = '+' - elif operation_id == "S_PROD": + elif operation_id == "CL_PROD": current_op = '*' else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + raise NotImplementedError("Currently only CL_SUM and CL_PROD are supported.") if stream_name in self.grid_streams: pipelined_send = [] @@ -173,14 +189,10 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: else: - print(current_position) - print(root) for con_list in connections: - #print(con_list) for con in con_list[1]: if (current_position[0] >= con[0] and current_position[1] <= con[1] and current_position[2] >= con[2] and current_position[3] <= con[3]): - print(con) if con[8] == 'sender': pipelined_send.append(con_list[0]) elif con[8] == 'receiver': @@ -406,12 +418,12 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: if not receive_stream == None: break - if operation_id == "S_SUM": + if operation_id == "CL_SUM": current_op = '+' - elif operation_id == "S_PROD": + elif operation_id == "CL_PROD": current_op = '*' else: - raise NotImplementedError("Currently only S_SUM and S_PROD are supported.") + raise NotImplementedError("Currently only CL_SUM and CL_PROD are supported.") # change receive statement @@ -537,10 +549,12 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: + ## + # Function to recursively go through the compute blocks and find all the occurernces of the reduce statements + ## def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: input_stmt = stmt if isinstance(stmt, to_replace): - print("directly found") if to_replace == ReduceStatement: return self.replace_reduce(stmt, elem) @@ -555,402 +569,419 @@ def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: # uses if_true and if_false elif isinstance(stmt, TernaryOperator): - print("TernaryOperator") + print("TODO: TernaryOperator") print(stmt) return [input_stmt] - - #exit() - def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: - if x < x_start or x >= x_stop or y < y_start or y >= y_stop: - if x == x_stop or y == y_stop: - raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + - f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}." + - f" Remember that the stop value is exclusive.") - raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + - f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") + + ## + # Defines the snake communication pattern + ## + def snake_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, name, pipelined) -> None: communication = [] - self.pipelined.update({name : False}) # not implemented yet - mode = graph - if mode == 'snake': - if y == y_start: - if (y_stop - 1 - y_start) % 2 == 0: - - # horizontal movement - if pipelined and x_stop - x_start > 2: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + if y == y_start: + if (y_stop - 1 - y_start) % 2 == 0: + + # horizontal movement + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: - if x_stop - x_start > 1: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) - - # vertical movement - # not dependent on pipelined as if we have a column it's already pipelined - if x == x_start: - # print('upper left corner odd') - if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop , 0, -1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) - if x == x_stop - 1: - # print('upper right corner odd') - if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 1]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) - # horizontal movement - if pipelined and x_stop - x_start > 2: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - else: - if x_stop - x_start > 1: - communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - - # vertical movement - # not dependent on pipelined as if we have a column it's already pipelined - if x == x_start: - # print('upper left corner even') - if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) - if x == x_stop - 1: - # print('upper right corner even') - if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined + if x == x_start: + # print('upper left corner odd') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop , 0, -1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) + if x == x_stop - 1: + # print('upper right corner odd') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + else: - elif y == y_stop - 1: - if (y_stop - 1 - y_start) % 2 == 0: - - # horizontal movement - if pipelined and x_stop - x_start > 2: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + # horizontal movement + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) else: - if x_stop - x_start > 1: - communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - - # vertical movement - # not dependent on pipelined as if we have a column it's already pipelined - if x == x_start: - # print('lower left corner odd') - if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) - if x == x_stop - 1: - # print('lower right corner odd') - if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop - 1, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop, 1 if x == x_start else -1, 0, 1, 2]) - # horizontal movement - if pipelined and x_stop - x_start > 1: - if (x_stop - x_start) % 2 != 0: - communication.append([x_start, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - else: - communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined + if x == x_start: + # print('upper left corner even') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + if x == x_stop - 1: + # print('upper right corner even') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) - else: - communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) - communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + elif y == y_stop - 1: + if (y_stop - 1 - y_start) % 2 == 0: - # vertical movement - # not dependent on pipelined as if we have a column it's already pipelined - if x == x_start: - # print('lower left corner even') - if y_stop - y_start > 2: - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) - if x == x_stop - 1: - # print('lower right corner even') - if y_stop - y_start > 2: - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) - if y_stop - y_start > 1: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + # horizontal movement + if pipelined and x_stop - x_start > 2: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + else: + if x_stop - x_start > 1: + communication.append([x_start, x_stop, y_start, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + + # vertical movement + # not dependent on pipelined as if we have a column it's already pipelined + if x == x_start: + # print('lower left corner odd') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) + if x == x_stop - 1: + # print('lower right corner odd') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) else: - raise NotImplementedError("Only the corners are implemented for 'snake'") - - self.snake_streams.update({name: communication}) - elif mode == 'grid': - # TODO add steps for pipelined communication - if x == x_start: # horizontal movement - if x_start == x_stop - 1: - # print('no horizontal movement needed') - pass - elif x_stop - x_start == 2: - # print('right to left') - communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) - else: - if pipelined: - if (x_stop - x_start) % 2 == 0: - communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) - else: - communication.append([x_start, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) - communication.append([x_start + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) + if pipelined and x_stop - x_start > 1: + if (x_stop - x_start) % 2 != 0: + communication.append([x_start, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) else: - communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) - # TODO add steps for pipelined communication from here + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) + + else: + communication.append([x_start, x_stop, y_start + 1, y_stop, -1 if x == x_start else 1, 0, 1, 2]) + communication.append([x_start, x_stop, y_start, y_stop - 1, 1 if x == x_start else -1, 0, 1, 2]) # vertical movement - if y_start == y_stop - 1: - # print('no vertical movement needed') - pass - elif y == y_start: - # print('upper left corner') - if not pipelined or y_stop - y_start <= 2: + # not dependent on pipelined as if we have a column it's already pipelined + if x == x_start: + # print('lower left corner even') + if y_stop - y_start > 2: + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + if x == x_stop - 1: + # print('lower right corner even') + if y_stop - y_start > 2: + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) + if y_stop - y_start > 1: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + else: + raise NotImplementedError("Only the corners are implemented for 'snake'") + + self.snake_streams.update({name: communication}) + + + ## + # Defines the grid communication pattern + ## + def grid_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, name, pipelined) -> None: + communication = [] + if x == x_start: + # horizontal movement + if x_start == x_stop - 1: + # print('no horizontal movement needed') + pass + elif x_stop - x_start == 2: + # print('right to left') + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + else: + if pipelined: + if (x_stop - x_start) % 2 == 0: + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x_start, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x_start + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x_start, x_stop, y_start, y_stop, -1, 0, 1, 1]) + # TODO add steps for pipelined communication from here + + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper left corner') + if not pipelined or y_stop - y_start <= 2: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, -1, 1, 1]) - elif y == y_stop - 1: - # print('lower left corner') - if not pipelined or y_stop - y_start <= 2: + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, -1, 1, 1]) + elif y == y_stop - 1: + # print('lower left corner') + if not pipelined or y_stop - y_start <= 2: + communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x_start, x_start + 1, y_start, y_stop, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) - else: - communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y_stop, 0, 1, 1, 1]) + else: + # print('left edge') + if not pipelined: + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) else: - # print('left edge') - if not pipelined: - communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) - else: - # upper part - if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive - if (y - y_start) % 2 == 0: - communication.append([x_start, x_start + 1, y_start, y, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y + 1, 0, 1, 1, 1]) - else: - communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_start, x_start + 1, y_start + 1, y, 0, 1, 1, 1]) + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x_start, x_start + 1, y_start, y, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y + 1, 0, 1, 1, 1]) else: communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_start, x_start + 1, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x_start, x_start + 1, y_start, y + 1, 0, 1, 1, 1]) - # lower part - if (y_stop - y) > 2: - if (y_stop - y) % 2 == 0: - communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) - communication.append([x_start, x_start + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x_start, x_start + 1, y, y_stop - 1, 0, -1, 1, 1]) - communication.append([x_start, x_start + 1, y + 1, y_stop, 0, -1, 1, 1]) - else: + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) - - - - elif x == x_stop - 1: - # horizontal movement - if x_start == x_stop - 1: - # print('no horizontal movement needed') - pass - elif x_stop - x_start == 2: - # print('left to right') - communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) - else: - # print('left to right') - if pipelined: - if (x_stop - x_start) % 2 == 0: - communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) - communication.append([x_start + 1, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start, x_start + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) else: - communication.append([x_start, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) - communication.append([x_start + 1, x_stop, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start, x_start + 1, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_start, x_start + 1, y + 1, y_stop, 0, -1, 1, 1]) else: + communication.append([x_start, x_start + 1, y, y_stop, 0, -1, 1, 1]) + + + + elif x == x_stop - 1: + # horizontal movement + if x_start == x_stop - 1: + # print('no horizontal movement needed') + pass + elif x_stop - x_start == 2: + # print('left to right') + communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) + else: + # print('left to right') + if pipelined: + if (x_stop - x_start) % 2 == 0: communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x_stop - 1, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x_stop, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x_stop, y_start, y_stop, 1, 0, 1, 1]) - # vertical movement - if y_start == y_stop - 1: - # print('no vertical movement needed') - pass - elif y == y_start: - # print('upper right corner') - if not pipelined or y_stop - y_start <= 2: + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper right corner') + if not pipelined or y_stop - y_start <= 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, -1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) - elif y == y_stop - 1: - # print('lower right corner') - if not pipelined or y_stop - y_start <= 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, -1, 1, 1]) + elif y == y_stop - 1: + # print('lower right corner') + if not pipelined or y_stop - y_start <= 2: + communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x_stop - 1, x_stop, y_start, y_stop, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop - 1, 0, 1, 1, 1]) - else: - communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y_stop, 0, 1, 1, 1]) + else: + # print('right edge') + if not pipelined: + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) else: - # print('right edge') - if not pipelined: - communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) - else: - # upper part - if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive - if (y - y_start) % 2 == 0: - communication.append([x_stop - 1, x_stop, y_start, y, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y + 1, 0, 1, 1, 1]) - else: - communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x_stop - 1, x_stop, y_start + 1, y, 0, 1, 1, 1]) + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x_stop - 1, x_stop, y_start, y, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y + 1, 0, 1, 1, 1]) else: communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x_stop - 1, x_stop, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y_start, y + 1, 0, 1, 1, 1]) - # lower part - if (y_stop - y) > 2: - if (y_stop - y) % 2 == 0: - communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) - communication.append([x_stop - 1, x_stop, y + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x_stop - 1, x_stop, y, y_stop - 1, 0, -1, 1, 1]) - communication.append([x_stop - 1, x_stop, y + 1, y_stop, 0, -1, 1, 1]) - else: + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x_stop - 1, x_stop, y + 1, y_stop, 0, -1, 1, 1]) + else: + communication.append([x_stop - 1, x_stop, y, y_stop, 0, -1, 1, 1]) + else: + # horizontal movement + # print('middle') + if not pipelined: + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) # left to middle + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # right to middle else: - # horizontal movement - # print('middle') - if not pipelined: - communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) # left to middle - communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) # right to middle - else: - # left - if (x - x_start) >= 2: # x is inclusive while x_stop is exclusive - if (x - x_start) % 2 == 0: - communication.append([x_start, x, y_start, y_stop, 1, 0, 1, 1]) - communication.append([x_start + 1, x + 1, y_start, y_stop, 1, 0, 1, 1]) - else: - communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) - communication.append([x_start + 1, x, y_start, y_stop, 1, 0, 1, 1]) + # left + if (x - x_start) >= 2: # x is inclusive while x_stop is exclusive + if (x - x_start) % 2 == 0: + communication.append([x_start, x, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x + 1, y_start, y_stop, 1, 0, 1, 1]) else: communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) + communication.append([x_start + 1, x, y_start, y_stop, 1, 0, 1, 1]) + else: + communication.append([x_start, x + 1, y_start, y_stop, 1, 0, 1, 1]) - # right - if (x_stop - x) > 2: - if (x_stop - x) % 2 == 0: - communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) - communication.append([x + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) - else: - communication.append([x, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) - communication.append([x + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) - else: + # right + if (x_stop - x) > 2: + if (x_stop - x) % 2 == 0: communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) - - # vertical movement - if y_start == y_stop - 1: - # print('no vertical movement needed') - pass - elif y == y_start: - # print('upper edge') - if not pipelined or y_stop - y_start <= 2: + communication.append([x + 1, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x, x_stop - 1, y_start, y_stop, -1, 0, 1, 1]) + communication.append([x + 1, x_stop, y_start, y_stop, -1, 0, 1, 1]) + else: + communication.append([x, x_stop, y_start, y_stop, -1, 0, 1, 1]) + + # vertical movement + if y_start == y_stop - 1: + # print('no vertical movement needed') + pass + elif y == y_start: + # print('upper edge') + if not pipelined or y_stop - y_start <= 2: + communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x, x + 1, y_start, y_stop, 0, -1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x, x + 1, y_start, y_stop - 1, 0, -1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y_stop, 0, -1, 1, 1]) - elif y == y_stop - 1: - # print('lower edge') - if not pipelined or y_stop - y_start <= 2: + communication.append([x, x + 1, y_start, y_stop - 1, 0, -1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop, 0, -1, 1, 1]) + elif y == y_stop - 1: + # print('lower edge') + if not pipelined or y_stop - y_start <= 2: + communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) + else: + if (y_stop - y_start) % 2 == 0: communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) else: - if (y_stop - y_start) % 2 == 0: - communication.append([x, x + 1, y_start, y_stop, 0, 1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y_stop - 1, 0, 1, 1, 1]) - else: - communication.append([x, x + 1, y_start, y_stop - 1, 0, 1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y_stop, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start, y_stop - 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y_stop, 0, 1, 1, 1]) + else: + # print('center') + if not pipelined: + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) else: - # print('center') - if not pipelined: - communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) - else: - # upper part - if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive - if (y - y_start) % 2 == 0: - communication.append([x, x + 1, y_start, y, 0, 1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y + 1, 0, 1, 1, 1]) - else: - communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) - communication.append([x, x + 1, y_start + 1, y, 0, 1, 1, 1]) + # upper part + if (y - y_start) >= 2: # y is inclusive while y_stop is exclusive + if (y - y_start) % 2 == 0: + communication.append([x, x + 1, y_start, y, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y + 1, 0, 1, 1, 1]) else: communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) + communication.append([x, x + 1, y_start + 1, y, 0, 1, 1, 1]) + else: + communication.append([x, x + 1, y_start, y + 1, 0, 1, 1, 1]) - # lower part - if (y_stop - y) > 2: - if (y_stop - y) % 2 == 0: - communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) - communication.append([x, x + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) - else: - communication.append([x, x + 1, y, y_stop - 1, 0, -1, 1, 1]) - communication.append([x, x + 1, y + 1, y_stop, 0, -1, 1, 1]) - else: + # lower part + if (y_stop - y) > 2: + if (y_stop - y) % 2 == 0: communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) - - self.grid_streams.update({name : communication}) + communication.append([x, x + 1, y + 1, y_stop - 1, 0, -1, 1, 1]) + else: + communication.append([x, x + 1, y, y_stop - 1, 0, -1, 1, 1]) + communication.append([x, x + 1, y + 1, y_stop, 0, -1, 1, 1]) + else: + communication.append([x, x + 1, y, y_stop, 0, -1, 1, 1]) + + self.grid_streams.update({name : communication}) + + + ## + # Creates the communication patterns for the reduce operation (snake or grid) + ## + def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: + if x < x_start or x >= x_stop or y < y_start or y >= y_stop: + if x == x_stop or y == y_stop: + raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + + f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}." + + f" Remember that the stop value is exclusive.") + raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + + f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") + communication = [] + self.pipelined.update({name : False}) + mode = graph if graph != 'auto' else 'snake' + if mode == 'snake': + self.snake_communication_pattern(x_start, x_stop, y_start, y_stop, x, y, name, pipelined) + + elif mode == 'grid': + self.grid_communication_pattern(x_start, x_stop, y_start, y_stop, x, y, name, pipelined) else: raise NotImplementedError(f"Communication mode '{mode}' is not implemented.") @@ -958,6 +989,9 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, return None + ## + # Updates the datablocks with the new communication patterns + ## def change_data_blocks(self) -> None: newbody = [] self.reduce_operations = {} @@ -971,17 +1005,17 @@ def change_data_blocks(self) -> None: elem.subgrid.x_range.stop.value.value, elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value, - stmt.dx.value.value, - stmt.dy.value.value, + stmt.x.value.value, + stmt.y.value.value, stmt.stream_name.name, stmt.routing.graph, stmt.routing.pipelined) - self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.dx.value.value, stmt.dy.value.value], + self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.x.value.value, stmt.y.value.value], [elem.subgrid.x_range.start.value.value, elem.subgrid.x_range.stop.value.value], [elem.subgrid.y_range.start.value.value, elem.subgrid.y_range.stop.value.value], - [stmt.dx.value.value if (elem.subgrid.y_range.stop.value.value - elem.subgrid.y_range.start.value.value) % 2 == 0 else (elem.subgrid.x_range.stop.value.value - stmt.dx.value.value - 1), - elem.subgrid.y_range.stop.value.value - 1 if elem.subgrid.y_range.start.value.value == stmt.dy.value.value else elem.subgrid.y_range.start.value.value], + [stmt.x.value.value if (elem.subgrid.y_range.stop.value.value - elem.subgrid.y_range.start.value.value) % 2 == 0 else (elem.subgrid.x_range.stop.value.value - stmt.x.value.value - 1), + elem.subgrid.y_range.stop.value.value - 1 if elem.subgrid.y_range.start.value.value == stmt.y.value.value else elem.subgrid.y_range.start.value.value], None, None]}) new_grid_streams = [] @@ -1101,7 +1135,7 @@ def change_data_blocks(self) -> None: else: olddataflobblock.append(stmt) - if olddataflobblock != []: # not tested + if olddataflobblock != []: newbody.append(DataflowBlock(variables=elem.variables, subgrid=elem.subgrid, statements=olddataflobblock)) for newdataflobblock in newdataflobblocks: newbody.append( @@ -1137,6 +1171,9 @@ def change_data_blocks(self) -> None: self.body = newbody + ## + # Updates the compute block tiling for the new communication patterns + ## def fix_subgrid(self) -> None: newbody = [] @@ -1146,10 +1183,8 @@ def fix_subgrid(self) -> None: if isinstance(elem, ComputeBlock): x_start = elem.subgrid.x_range.start.value.value x_stop = elem.subgrid.x_range.stop.value.value - x_step = elem.subgrid.x_range.step.value.value if elem.subgrid.x_range.step is not None else None y_start = elem.subgrid.y_range.start.value.value y_stop = elem.subgrid.y_range.stop.value.value - y_step = elem.subgrid.y_range.step.value.value if elem.subgrid.y_range.step is not None else None grid = [[[x_start, x_stop], [y_start, y_stop]]] for stmt in elem.statements: @@ -1325,20 +1360,18 @@ def fix_subgrid(self) -> None: return None + ## + # Changes the occurences of reduce statements in the compute blocks + ## def change_compute_blocks(self) -> None: finalbody = [] for elem in self.body: if isinstance(elem, ComputeBlock): statements = [] - for stmt in elem.statements: # walk operators in baseclass + for stmt in elem.statements: new_stmts = self.replace_stmt(stmt, elem, ReduceStatement) - print(new_stmts) - print('*'*50) - print('*'*50) for nstmt in new_stmts: statements.append(nstmt) - print(statements) - print('*'*50) finalbody.append(ComputeBlock(elem.variables, elem.subgrid, statements)) else: finalbody.append(elem) diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index 8d04233b..d3f0230a 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -486,15 +486,15 @@ class MulStreamDeclaration(SpatialNode): """ dtype: MultiStreamType stream_name: Identifier - dx: Expression - dy: Expression + x: Expression + y: Expression routing: Optional[Union[BroadcastRoutingDeclaration, ReduceRoutingDeclaration]] = None def validate(self) -> None: assert isinstance(self.dtype, MultiStreamType) assert isinstance(self.stream_name, Identifier) - assert isinstance(self.dx, Expression) - assert isinstance(self.dy, Expression) + assert isinstance(self.x, Expression) + assert isinstance(self.y, Expression) if self.routing: assert isinstance(self.routing, Union[BroadcastRoutingDeclaration, ReduceRoutingDeclaration]) @@ -504,9 +504,9 @@ def as_ir(self, indent: int = 0) -> str: if self.routing: routing_str = f" {{\n{self.routing.as_ir(indent + 1)}\n{' ' * indent}}}" if isinstance(self.routing, ReduceRoutingDeclaration): - return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = reduce({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' + return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = reduce_stream({self.x.as_ir()}, {self.y.as_ir()}){routing_str}' elif isinstance(self.routing, BroadcastRoutingDeclaration): - return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = broadcast({self.dx.as_ir()}, {self.dy.as_ir()}){routing_str}' + return f'{indent_str}multistream<{self.dtype.dtype.as_ir()}> {self.stream_name.as_ir()} = broadcast_stream({self.x.as_ir()}, {self.y.as_ir()}){routing_str}' else: raise ValueError("Invalid routing declaration") @@ -612,6 +612,27 @@ def as_ir(self, indent: int = 0) -> str: return f'{indent_str}await receive({self.local_array.as_ir()}, {self.stream_name.as_ir()})' +@dataclass +class BroadcastStatement(Statement): + """ + Branch statement for sending data asynchronously through a stream. + """ + local_array: Union[Identifier, ArraySlice] + stream_name: Union[Identifier, ArraySlice] + completion_name: Optional[Completion] = None + + def validate(self) -> None: + assert isinstance(self.local_array, (Identifier, ArraySlice)) + assert isinstance(self.stream_name, (Identifier, ArraySlice)) + if self.completion_name: + assert isinstance(self.completion_name, Completion) + + def as_ir(self, indent: int = 0) -> str: + indent_str = ' ' * indent + if self.completion_name: + return f'{indent_str}{self.completion_name.as_ir()} = broadcast({self.local_array.as_ir()}, {self.stream_name.as_ir()})' + return f'{indent_str}await broadcast({self.local_array.as_ir()}, {self.stream_name.as_ir()})' + @dataclass class ReduceStatement(Statement): """ diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spatialstencil/syntax/spatial_ir/language.lark index c052cc87..3bc7fcd2 100644 --- a/spatialstencil/syntax/spatial_ir/language.lark +++ b/spatialstencil/syntax/spatial_ir/language.lark @@ -123,8 +123,8 @@ field_declaration : builtin_type identifier (";")? //(";" | NEWLINE) stream_declaration : classic_stream | mul_stream classic_stream : "stream" "<" scalar_type ">" identifier "=" "relative_stream" "(" value_expr "," value_expr ")" ("{" routing "}")? (";")? mul_stream : bcast | red -bcast : "multistream" "<" scalar_type ">" identifier "=" "broadcast" "(" value_expr "," value_expr ")" ("{" broadcast_routing "}")? (";")? -red : "multistream" "<" scalar_type ">" identifier "=" "reduce" "(" value_expr "," value_expr ")" ("{" reduce_routing "}")? (";")? +bcast : "multistream" "<" scalar_type ">" identifier "=" "broadcast_stream" "(" value_expr "," value_expr ")" ("{" broadcast_routing "}")? (";")? +red : "multistream" "<" scalar_type ">" identifier "=" "reduce_stream" "(" value_expr "," value_expr ")" ("{" reduce_routing "}")? (";")? vars : identifier ("," identifier)* typed_var : scalar_type identifier typed_vars : typed_var ("," typed_var)* diff --git a/spatialstencil/syntax/spatial_ir/lark_to_ir.py b/spatialstencil/syntax/spatial_ir/lark_to_ir.py index 6a5c51e5..bf8aa010 100644 --- a/spatialstencil/syntax/spatial_ir/lark_to_ir.py +++ b/spatialstencil/syntax/spatial_ir/lark_to_ir.py @@ -128,6 +128,8 @@ def function_call(self, args, meta=None): return irnodes.SendStatement(*arguments, completion_name=completion) elif func == 'receive': return irnodes.ReceiveStatement(*arguments, completion_name=completion) + elif func == 'broadcast': + return irnodes.BroadcastStatement(*arguments, completion_name=completion) elif func == 'reduce': return irnodes.ReduceStatement(*arguments, completion_name=completion) raise SyntaxError(f'Unrecognized free function call to "{func}"') diff --git a/tests/test_collective_ir_parser.py b/tests/test_collective_ir_parser.py new file mode 100644 index 00000000..b6afa8e5 --- /dev/null +++ b/tests/test_collective_ir_parser.py @@ -0,0 +1,78 @@ +from spatialstencil.syntax.spatial_ir import parser +from spatialstencil.optimizations.optimization_pass import optimization_pass +import os +from spatialstencil.optimizations.spatial_reduce import ReduceOptimizer +from spatialstencil.optimizations.spatial_broadcast import BroadcastOptimizer + + + +def _load_ref_file(file) -> list[str]: + # change the file extension to .ref + file = file[:-5] + '.ref_tile' + # read the file and return the lines as a list without the newline character + with open(file, 'r') as f: + return [line.strip() for line in f.readlines()] + +def _tiling_test(file): + """ + Tests a roundtrip IR->parse->IR->parse->IR for differences. + + :param file: + :return: + """ + program = parser.parse_file(file) + program_optimized = optimization_pass(program) + ir_1 = program_optimized.as_ir() + #print(ir_1) + + ir_ref = _load_ref_file(file) + count_ref = 0 + for line in ir_ref: + assert ("compute i16 i, i16 j in " + line) in ir_1 + count_ref += 1 + count = 0 + for line in ir_1.splitlines(): + if "compute i16 i, i16 j in " in line: + count += 1 + assert count == count_ref + + + + +def test_simple_bcast(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_bcast.sptl') + _tiling_test(file) + +def test_simple_reduce_grid_pipelined_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_pipelined_1.sptl') + _tiling_test(file) + +def test_simple_reduce_grid_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_1.sptl') + _tiling_test(file) + +def test_simple_reduce_snake_pipelined_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_snake_pipelined_1.sptl') + _tiling_test(file) + +def test_simple_reduce_snake_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_snake_1.sptl') + _tiling_test(file) + +def test_medium_reduce_grid_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'medium_reduce_grid_1.sptl') + _tiling_test(file) + +def test_hard_reduce_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') + _tiling_test(file) + + +if __name__ == '__main__': + test_simple_bcast() + test_simple_reduce_grid_pipelined_1() + test_simple_reduce_grid_1() + test_simple_reduce_snake_pipelined_1() + test_simple_reduce_snake_1() + test_medium_reduce_grid_1() + #test_hard_reduce_1() \ No newline at end of file diff --git a/tests/test_spatial_ir_parser.py b/tests/test_spatial_ir_parser.py index 8b2e6c38..d82b36e9 100644 --- a/tests/test_spatial_ir_parser.py +++ b/tests/test_spatial_ir_parser.py @@ -96,8 +96,6 @@ def _rountrip_test(file): program = parser.parse_file(file) program_optimized = optimization_pass(program) ir_1 = program_optimized.as_ir() - print(ir_1) - exit() program2 = parser.parse_string(ir_1) ir_2 = program2.as_ir() assert ir_1 == ir_2 @@ -112,41 +110,12 @@ def test_spatial_roundtrip_two_phase_split(): _rountrip_test(file) -def test_spatial_bcast(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'bcast.sptl') - _rountrip_test(file) - - -def test_simple_reduce(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce.sptl') - _rountrip_test(file) - - -def test_simple_reduce_reference(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_after.sptl') - _rountrip_test(file) - - -def test_simple_reduce_three(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_three.sptl') - _rountrip_test(file) - - -def test_simple_reduce_four(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'spatial', 'simple_reduce_four.sptl') - _rountrip_test(file) - if __name__ == '__main__': - # test_spatial_roundtrip_laplacian() - # test_spatial_visitor() - # test_spatial_roundtrip_two_phase() - # test_spatial_roundtrip_two_phase_unrouted() - # test_spatial_roundtrip_two_phase_split() - # test_spatial_roundtrip_forward() - # test_spatial_roundtrip_backward() - # test_spatial_bcast() - test_simple_reduce() - # test_simple_reduce_reference() - # test_simple_reduce_three() - # test_simple_reduce_four() \ No newline at end of file + test_spatial_roundtrip_laplacian() + test_spatial_visitor() + test_spatial_roundtrip_two_phase() + test_spatial_roundtrip_two_phase_unrouted() + test_spatial_roundtrip_two_phase_split() + test_spatial_roundtrip_forward() + test_spatial_roundtrip_backward() \ No newline at end of file From f27b846570eea0c75e4168300a8090c95e928a77 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 16:50:40 +0100 Subject: [PATCH 18/27] test pipeline --- tests/test_collective_ir_parser.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_collective_ir_parser.py b/tests/test_collective_ir_parser.py index b6afa8e5..bf068d7e 100644 --- a/tests/test_collective_ir_parser.py +++ b/tests/test_collective_ir_parser.py @@ -1,8 +1,6 @@ from spatialstencil.syntax.spatial_ir import parser from spatialstencil.optimizations.optimization_pass import optimization_pass import os -from spatialstencil.optimizations.spatial_reduce import ReduceOptimizer -from spatialstencil.optimizations.spatial_broadcast import BroadcastOptimizer @@ -63,9 +61,9 @@ def test_medium_reduce_grid_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'medium_reduce_grid_1.sptl') _tiling_test(file) -def test_hard_reduce_1(): - file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') - _tiling_test(file) +# def test_hard_reduce_1(): +# file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') +# _tiling_test(file) if __name__ == '__main__': From 419a4e6ff1e4233dd6590c2649289948017f054b Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 16:58:29 +0100 Subject: [PATCH 19/27] fix versioning numbering --- spatialstencil/lowering/versioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spatialstencil/lowering/versioning.py b/spatialstencil/lowering/versioning.py index fb870ba8..8c93ac5e 100644 --- a/spatialstencil/lowering/versioning.py +++ b/spatialstencil/lowering/versioning.py @@ -27,4 +27,4 @@ def current_version(self, name: str) -> T: """ Gets the current version of a variable name. """ - return self.cls(name, self._var_counter[name] - 1) + return self.cls(name, self._var_counter[name]) From f771804349865f9561003d830226b51ef1d512ba Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 17:51:33 +0100 Subject: [PATCH 20/27] fix using the correct grid for pipelined reductions --- .../optimizations/spatial_reduce.py | 76 +++++++++---------- tests/test_collective_ir_parser.py | 8 +- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index e4d14c37..3e1cabec 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1277,17 +1277,15 @@ def fix_subgrid(self) -> None: # delete old unused for rmv in to_remove: grid.remove(rmv) - else: #pipelined new_grid = [] - for i in range(grid[0][0][0], grid[0][0][1]): - for j in range(grid[0][1][0], grid[0][1][1]): + for i in range(x_start, x_stop): + for j in range(y_start, y_stop): new_grid.append([[i, i + 1], [j, j + 1]]) grid = new_grid - # needs to be tested in combination with grid_streams if self.snake_streams != {}: new_grid = [] @@ -1297,43 +1295,43 @@ def fix_subgrid(self) -> None: for name in self.snake_streams: complete_grid = [self.reduce_operations[name][2], self.reduce_operations[name][3]] pipelined = self.snake_streams[name][0][6] - break + - if not pipelined: - - list_grid = [[x] for x in grid] - - for com_grid in list_grid: - to_remove = [] - for com in com_grid: - if com[0][0] == complete_grid[0][0] and com[0][1] != complete_grid[0][0] + 1: - # print("left") - com_grid.append([[complete_grid[0][0], complete_grid[0][0] + 1], [com[1][0], com[1][1]]]) - com_grid.append([[complete_grid[0][0] + 1, com[0][1]], [com[1][0], com[1][1]]]) - to_remove.append(com) - elif com[0][1] == complete_grid[0][1] and com[0][0] != complete_grid[0][1] - 1: - # print("right") - com_grid.append([[complete_grid[0][1] - 1, complete_grid[0][1]], [com[1][0], com[1][1]]]) - com_grid.append([[com[0][0], complete_grid[0][1] - 1], [com[1][0], com[1][1]]]) - to_remove.append(com) - elif com[1][1] - com[1][0] != 1: - # print('multiple rows') - for i in range(com[1][0], com[1][1]): - com_grid.append([[com[0][0], com[0][1]], [i, i + 1]]) - to_remove.append(com) - - for rmv in to_remove: - com_grid.remove(rmv) - - for com in com_grid: - new_grid.append(com) - - else: - for i in range(grid[0][0][0], grid[0][0][1]): - for j in range(grid[0][1][0], grid[0][1][1]): - new_grid.append([[i, i + 1], [j, j + 1]]) + if not pipelined: + + list_grid = [[x] for x in grid] + + for com_grid in list_grid: + to_remove = [] + for com in com_grid: + if com[0][0] == complete_grid[0][0] and com[0][1] != complete_grid[0][0] + 1: + # print("left") + com_grid.append([[complete_grid[0][0], complete_grid[0][0] + 1], [com[1][0], com[1][1]]]) + com_grid.append([[complete_grid[0][0] + 1, com[0][1]], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[0][1] == complete_grid[0][1] and com[0][0] != complete_grid[0][1] - 1: + # print("right") + com_grid.append([[complete_grid[0][1] - 1, complete_grid[0][1]], [com[1][0], com[1][1]]]) + com_grid.append([[com[0][0], complete_grid[0][1] - 1], [com[1][0], com[1][1]]]) + to_remove.append(com) + elif com[1][1] - com[1][0] != 1: + # print('multiple rows') + for i in range(com[1][0], com[1][1]): + com_grid.append([[com[0][0], com[0][1]], [i, i + 1]]) + to_remove.append(com) + + for rmv in to_remove: + com_grid.remove(rmv) + + for com in com_grid: + new_grid.append(com) + + else: + for i in range(x_start, x_stop): + for j in range(y_start, y_stop): + new_grid.append([[i, i + 1], [j, j + 1]]) - grid = new_grid + grid = new_grid for com_grid in grid: diff --git a/tests/test_collective_ir_parser.py b/tests/test_collective_ir_parser.py index bf068d7e..e9c1286e 100644 --- a/tests/test_collective_ir_parser.py +++ b/tests/test_collective_ir_parser.py @@ -61,9 +61,9 @@ def test_medium_reduce_grid_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'medium_reduce_grid_1.sptl') _tiling_test(file) -# def test_hard_reduce_1(): -# file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') -# _tiling_test(file) +def test_hard_reduce_1(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') + _tiling_test(file) if __name__ == '__main__': @@ -73,4 +73,4 @@ def test_medium_reduce_grid_1(): test_simple_reduce_snake_pipelined_1() test_simple_reduce_snake_1() test_medium_reduce_grid_1() - #test_hard_reduce_1() \ No newline at end of file + test_hard_reduce_1() \ No newline at end of file From b349ec599ebf375edd7c8e30cceb4a18763de438 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 18:24:11 +0100 Subject: [PATCH 21/27] rename graph -> algortihm, fix receiving all elements in non pipelined case --- samples/collective/hard_reduce_1.sptl | 6 +++--- samples/collective/medium_reduce_grid_1.sptl | 6 +++--- samples/collective/simple_reduce_grid_1.sptl | 2 +- samples/collective/simple_reduce_grid_pipelined_1.sptl | 2 +- samples/collective/simple_reduce_looped.sptl | 2 +- samples/collective/simple_reduce_snake_1.sptl | 2 +- .../collective/simple_reduce_snake_pipelined_1.sptl | 2 +- spatialstencil/optimizations/spatial_reduce.py | 10 +++++----- spatialstencil/syntax/spatial_ir/irnodes.py | 10 +++++----- spatialstencil/syntax/spatial_ir/language.lark | 2 +- 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/samples/collective/hard_reduce_1.sptl b/samples/collective/hard_reduce_1.sptl index 08f03198..da2f608f 100644 --- a/samples/collective/hard_reduce_1.sptl +++ b/samples/collective/hard_reduce_1.sptl @@ -7,17 +7,17 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = false } multistream red1 = reduce_stream(2, 2) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = true } multistream red2 = reduce_stream(4, 4) { - graph = snake, + algorithm = snake, op = CL_SUM, pipelined = false } diff --git a/samples/collective/medium_reduce_grid_1.sptl b/samples/collective/medium_reduce_grid_1.sptl index 245cc183..9abf7f34 100644 --- a/samples/collective/medium_reduce_grid_1.sptl +++ b/samples/collective/medium_reduce_grid_1.sptl @@ -7,17 +7,17 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = false } multistream red1 = reduce_stream(2, 2) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = false } multistream red2 = reduce_stream(4, 4) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = false } diff --git a/samples/collective/simple_reduce_grid_1.sptl b/samples/collective/simple_reduce_grid_1.sptl index b5717995..15d4ca62 100644 --- a/samples/collective/simple_reduce_grid_1.sptl +++ b/samples/collective/simple_reduce_grid_1.sptl @@ -7,7 +7,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = false } diff --git a/samples/collective/simple_reduce_grid_pipelined_1.sptl b/samples/collective/simple_reduce_grid_pipelined_1.sptl index 2ff97a85..a64eb84b 100644 --- a/samples/collective/simple_reduce_grid_pipelined_1.sptl +++ b/samples/collective/simple_reduce_grid_pipelined_1.sptl @@ -7,7 +7,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = true } diff --git a/samples/collective/simple_reduce_looped.sptl b/samples/collective/simple_reduce_looped.sptl index 722e459a..6bcdede0 100644 --- a/samples/collective/simple_reduce_looped.sptl +++ b/samples/collective/simple_reduce_looped.sptl @@ -7,7 +7,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = grid, + algorithm = grid, op = CL_SUM, pipelined = true } diff --git a/samples/collective/simple_reduce_snake_1.sptl b/samples/collective/simple_reduce_snake_1.sptl index ea884cf1..b81c445d 100644 --- a/samples/collective/simple_reduce_snake_1.sptl +++ b/samples/collective/simple_reduce_snake_1.sptl @@ -7,7 +7,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = snake, + algorithm = snake, op = CL_SUM, pipelined = false } diff --git a/samples/collective/simple_reduce_snake_pipelined_1.sptl b/samples/collective/simple_reduce_snake_pipelined_1.sptl index cc6d722a..7006db6d 100644 --- a/samples/collective/simple_reduce_snake_pipelined_1.sptl +++ b/samples/collective/simple_reduce_snake_pipelined_1.sptl @@ -7,7 +7,7 @@ kernel @add() { dataflow i16 i, i16 j in [0:5, 0:5] { multistream red = reduce_stream(0, 0) { - graph = snake, + algorithm = snake, op = CL_SUM, pipelined = true } diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 3e1cabec..cbc3d7a3 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -162,7 +162,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: ForeachStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], stream_variable=TypedIdentifier(dtype=con[2].dtype, identifier=self.versioning.next_version("reduce_receive")), @@ -434,7 +434,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: ForeachStatement( variables=[TypedIdentifier(dtype=ScalarType.i32, identifier=self.versioning.next_version("reduce_runner"))], parameter_range=[RangeExpression(start=Expression(ConstantLiteral(0, ScalarType.i32)), - stop=Expression(ConstantLiteral(1, ScalarType.i32)), + stop=Expression(ConstantLiteral(send_amount, ScalarType.i32)), step=None)], stream_variable=TypedIdentifier(dtype=receive_stream[2].dtype, identifier=self.versioning.next_version("reduce_receive")), @@ -966,7 +966,7 @@ def grid_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, nam ## # Creates the communication patterns for the reduce operation (snake or grid) ## - def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, graph, pipelined) -> None: + def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, name, algorithm, pipelined) -> None: if x < x_start or x >= x_stop or y < y_start or y >= y_stop: if x == x_stop or y == y_stop: raise ValueError(f"The communication point (x, y) = ({x}, {y}) is not within the subgrid" + @@ -976,7 +976,7 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") communication = [] self.pipelined.update({name : False}) - mode = graph if graph != 'auto' else 'snake' + mode = algorithm if algorithm != 'auto' else 'snake' if mode == 'snake': self.snake_communication_pattern(x_start, x_stop, y_start, y_stop, x, y, name, pipelined) @@ -1008,7 +1008,7 @@ def change_data_blocks(self) -> None: stmt.x.value.value, stmt.y.value.value, stmt.stream_name.name, - stmt.routing.graph, + stmt.routing.algorithm, stmt.routing.pipelined) self.reduce_operations.update({stmt.stream_name.name: [{'op': stmt.routing.op}, [stmt.x.value.value, stmt.y.value.value], diff --git a/spatialstencil/syntax/spatial_ir/irnodes.py b/spatialstencil/syntax/spatial_ir/irnodes.py index d3f0230a..0637c638 100644 --- a/spatialstencil/syntax/spatial_ir/irnodes.py +++ b/spatialstencil/syntax/spatial_ir/irnodes.py @@ -435,19 +435,19 @@ class ReduceRoutingDeclaration(SpatialNode): """ A routing declaration for a reduce, optionally specifying hops and channel. """ - graph: str = '' + algorithm: str = '' op: str = '' pipelined: bool = False def validate(self) -> None: - assert isinstance(self.graph, str) + assert isinstance(self.algorithm, str) assert isinstance(self.op, str) assert isinstance(self.pipelined, bool) def as_ir(self, indent: int = 0) -> str: indent_str = ' ' * indent - return f"{indent_str}graph = {self.graph},\n{indent_str}op = {self.op},\n{indent_str}pipelined = {self.pipelined}" + return f"{indent_str}algorithm = {self.algorithm},\n{indent_str}op = {self.op},\n{indent_str}pipelined = {self.pipelined}" @dataclass @@ -522,11 +522,11 @@ class DataflowBlock(SpatialNode): """ variables: list[TypedIdentifier] subgrid: SubgridExpression - statements: Union[list[RelativeStreamDeclaration], list[MulStreamDeclaration]] + statements: list[Union[list[RelativeStreamDeclaration], list[MulStreamDeclaration]]] def validate(self) -> None: assert all(isinstance(var, TypedIdentifier) for var in self.variables) - assert all(isinstance(stmt, RelativeStreamDeclaration) for stmt in self.statements) or all(isinstance(stmt, MulStreamDeclaration) for stmt in self.statements) + assert all(isinstance(stmt, RelativeStreamDeclaration) or isinstance(stmt, MulStreamDeclaration) for stmt in self.statements) assert len(self.variables) == 2 def as_ir(self, indent: int = 0) -> str: diff --git a/spatialstencil/syntax/spatial_ir/language.lark b/spatialstencil/syntax/spatial_ir/language.lark index 3bc7fcd2..8673c1b8 100644 --- a/spatialstencil/syntax/spatial_ir/language.lark +++ b/spatialstencil/syntax/spatial_ir/language.lark @@ -117,7 +117,7 @@ hop : "(" posneg_integer_literal "," posneg_integer_literal ")" // 2D at the mo hops : "[" hop ("," hop)* "]" routing : "hops" "=" (auto | hops) "," "channel" "=" (auto | integer_literal) broadcast_routing : "channels" "=" (auto | integer_literal) -reduce_routing : "graph" "=" (auto | constant_literal) "," "op" "=" constant_literal "," "pipelined" "=" bool_literal +reduce_routing : "algorithm" "=" (auto | constant_literal) "," "op" "=" constant_literal "," "pipelined" "=" bool_literal field_declaration : builtin_type identifier (";")? //(";" | NEWLINE) stream_declaration : classic_stream | mul_stream From 59ecb369601e98005cf52acc2797076e93766b58 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 21:43:45 +0100 Subject: [PATCH 22/27] fix missing reduces in subgrid + add dataflow test --- samples/collective/hard_reduce_1.ref_tile | 3 +- samples/collective/hard_reduce_2.ref_tile | 16 +++++++ samples/collective/hard_reduce_2.sptl | 44 +++++++++++++++++++ samples/collective/hard_reduce_3.ref_tile | 8 ++++ samples/collective/hard_reduce_3.sptl | 44 +++++++++++++++++++ .../collective/medium_reduce_grid_1.ref_tile | 3 +- samples/collective/simple_bcast.ref_tile | 3 +- .../collective/simple_reduce_grid_1.ref_tile | 3 +- .../simple_reduce_grid_pipelined_1.ref_tile | 3 +- .../collective/simple_reduce_looped.ref_tile | 26 +++++++++++ samples/collective/simple_reduce_looped.sptl | 10 +---- .../collective/simple_reduce_snake_1.ref_tile | 3 +- .../simple_reduce_snake_pipelined_1.ref_tile | 3 +- .../optimizations/spatial_reduce.py | 32 +++++++------- tests/test_collective_ir_parser.py | 25 ++++++++++- 15 files changed, 194 insertions(+), 32 deletions(-) create mode 100644 samples/collective/hard_reduce_2.ref_tile create mode 100644 samples/collective/hard_reduce_2.sptl create mode 100644 samples/collective/hard_reduce_3.ref_tile create mode 100644 samples/collective/hard_reduce_3.sptl create mode 100644 samples/collective/simple_reduce_looped.ref_tile diff --git a/samples/collective/hard_reduce_1.ref_tile b/samples/collective/hard_reduce_1.ref_tile index 9a0dafdb..ef96d70d 100644 --- a/samples/collective/hard_reduce_1.ref_tile +++ b/samples/collective/hard_reduce_1.ref_tile @@ -22,4 +22,5 @@ [4:5 , 1:2] [4:5 , 2:3] [4:5 , 3:4] -[4:5 , 4:5] \ No newline at end of file +[4:5 , 4:5] +14 \ No newline at end of file diff --git a/samples/collective/hard_reduce_2.ref_tile b/samples/collective/hard_reduce_2.ref_tile new file mode 100644 index 00000000..1a7319c3 --- /dev/null +++ b/samples/collective/hard_reduce_2.ref_tile @@ -0,0 +1,16 @@ +[0:1 , 0:1] +[0:1 , 4:5] +[4:5 , 0:1] +[4:5 , 4:5] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[1:4 , 0:1] +[1:4 , 1:2] +[1:4 , 2:3] +[1:4 , 3:4] +[1:4 , 4:5] +8 \ No newline at end of file diff --git a/samples/collective/hard_reduce_2.sptl b/samples/collective/hard_reduce_2.sptl new file mode 100644 index 00000000..d165aa51 --- /dev/null +++ b/samples/collective/hard_reduce_2.sptl @@ -0,0 +1,44 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + multistream red2 = reduce_stream(0, 4) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + multistream red3 = reduce_stream(4, 0) { + algorithm = snake, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + for i32 test1 in [0:999] { + a[0] = 100 + await reduce(a, red) + for i32 test2 in [0:25] { + await reduce(a, red2) + await reduce(a, red) + for i32 test3 in [0:30] { + for i32 test4 in [0:35] { + for i32 test5 in [0:40] { + await reduce(a, red3) + } + } + } + } + } + } +} \ No newline at end of file diff --git a/samples/collective/hard_reduce_3.ref_tile b/samples/collective/hard_reduce_3.ref_tile new file mode 100644 index 00000000..ba06f1a4 --- /dev/null +++ b/samples/collective/hard_reduce_3.ref_tile @@ -0,0 +1,8 @@ +[0:1 , 0:1] +[0:1 , 4:5] +[4:5 , 0:1] +[4:5 , 4:5] +[0:1 , 1:4] +[4:5 , 1:4] +[1:4 , 0:5] +6 \ No newline at end of file diff --git a/samples/collective/hard_reduce_3.sptl b/samples/collective/hard_reduce_3.sptl new file mode 100644 index 00000000..8a57ea51 --- /dev/null +++ b/samples/collective/hard_reduce_3.sptl @@ -0,0 +1,44 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + multistream red2 = reduce_stream(0, 4) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + multistream red3 = reduce_stream(4, 0) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + for i32 test1 in [0:999] { + a[0] = 100 + await reduce(a, red) + for i32 test2 in [0:25] { + await reduce(a, red2) + await reduce(a, red) + for i32 test3 in [0:30] { + for i32 test4 in [0:35] { + for i32 test5 in [0:40] { + await reduce(a, red3) + } + } + } + } + } + } +} \ No newline at end of file diff --git a/samples/collective/medium_reduce_grid_1.ref_tile b/samples/collective/medium_reduce_grid_1.ref_tile index 4e4df0da..66d2a222 100644 --- a/samples/collective/medium_reduce_grid_1.ref_tile +++ b/samples/collective/medium_reduce_grid_1.ref_tile @@ -10,4 +10,5 @@ [2:3 , 3:4] [2:3 , 4:5] [4:5 , 0:1] -[4:5 , 1:4] \ No newline at end of file +[4:5 , 1:4] +8 \ No newline at end of file diff --git a/samples/collective/simple_bcast.ref_tile b/samples/collective/simple_bcast.ref_tile index a9533f2a..8d20c461 100644 --- a/samples/collective/simple_bcast.ref_tile +++ b/samples/collective/simple_bcast.ref_tile @@ -1,3 +1,4 @@ [0:1 , 0:1] [0:1 , 1:N] -[1:N , 0:N] \ No newline at end of file +[1:N , 0:N] +1 \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_1.ref_tile b/samples/collective/simple_reduce_grid_1.ref_tile index 53ade9be..ff6a7b52 100644 --- a/samples/collective/simple_reduce_grid_1.ref_tile +++ b/samples/collective/simple_reduce_grid_1.ref_tile @@ -2,4 +2,5 @@ [0:1 , 1:4] [0:1 , 4:5] [1:4 , 0:5] -[4:5 , 0:5] \ No newline at end of file +[4:5 , 0:5] +2 \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_pipelined_1.ref_tile b/samples/collective/simple_reduce_grid_pipelined_1.ref_tile index 9a0dafdb..f1ce4840 100644 --- a/samples/collective/simple_reduce_grid_pipelined_1.ref_tile +++ b/samples/collective/simple_reduce_grid_pipelined_1.ref_tile @@ -22,4 +22,5 @@ [4:5 , 1:2] [4:5 , 2:3] [4:5 , 3:4] -[4:5 , 4:5] \ No newline at end of file +[4:5 , 4:5] +4 \ No newline at end of file diff --git a/samples/collective/simple_reduce_looped.ref_tile b/samples/collective/simple_reduce_looped.ref_tile new file mode 100644 index 00000000..f1ce4840 --- /dev/null +++ b/samples/collective/simple_reduce_looped.ref_tile @@ -0,0 +1,26 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[1:2 , 0:1] +[1:2 , 1:2] +[1:2 , 2:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 2:3] +[2:3 , 3:4] +[2:3 , 4:5] +[3:4 , 0:1] +[3:4 , 1:2] +[3:4 , 2:3] +[3:4 , 3:4] +[3:4 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] +4 \ No newline at end of file diff --git a/samples/collective/simple_reduce_looped.sptl b/samples/collective/simple_reduce_looped.sptl index 6bcdede0..151cf0a6 100644 --- a/samples/collective/simple_reduce_looped.sptl +++ b/samples/collective/simple_reduce_looped.sptl @@ -16,14 +16,8 @@ kernel @add() { compute i16 i, i16 j in [0:5, 0:5] { a[0] = 1 for i32 test1 in [0:999] { - for i32 test2 in [0:999] { - for i32 test3 in [0:999] { - a[0] = 100 - for i32 test4 in [0:999] { - await reduce(a, red) - } - } - } + a[0] = 100 + await reduce(a, red) } } } \ No newline at end of file diff --git a/samples/collective/simple_reduce_snake_1.ref_tile b/samples/collective/simple_reduce_snake_1.ref_tile index f7745597..99a429c3 100644 --- a/samples/collective/simple_reduce_snake_1.ref_tile +++ b/samples/collective/simple_reduce_snake_1.ref_tile @@ -12,4 +12,5 @@ [1:4 , 1:2] [1:4 , 2:3] [1:4 , 3:4] -[1:4 , 4:5] \ No newline at end of file +[1:4 , 4:5] +4 \ No newline at end of file diff --git a/samples/collective/simple_reduce_snake_pipelined_1.ref_tile b/samples/collective/simple_reduce_snake_pipelined_1.ref_tile index 9a0dafdb..4bf29eac 100644 --- a/samples/collective/simple_reduce_snake_pipelined_1.ref_tile +++ b/samples/collective/simple_reduce_snake_pipelined_1.ref_tile @@ -22,4 +22,5 @@ [4:5 , 1:2] [4:5 , 2:3] [4:5 , 3:4] -[4:5 , 4:5] \ No newline at end of file +[4:5 , 4:5] +6 \ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index cbc3d7a3..6a9dda66 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1177,8 +1177,6 @@ def change_data_blocks(self) -> None: def fix_subgrid(self) -> None: newbody = [] - # change the outer loops to go through everything for each reduce and in that loop change the subgrids for the compute blocks - for elem in self.body: if isinstance(elem, ComputeBlock): x_start = elem.subgrid.x_range.start.value.value @@ -1188,21 +1186,23 @@ def fix_subgrid(self) -> None: grid = [[[x_start, x_stop], [y_start, y_stop]]] for stmt in elem.statements: - red_stmt = None + red_stmt = [] nodes = [stmt] - found = False - while len(nodes) > 0 and not found: - for intermediate_stmt in nodes[0].iter_child_nodes(): - if not isinstance(intermediate_stmt, types.GeneratorType): - nodes.append(intermediate_stmt) - if isinstance(intermediate_stmt, ReduceStatement): # only finds one reduce statement - found = True - red_stmt = intermediate_stmt - nodes.pop(0) - - if red_stmt is not None or isinstance(stmt, ReduceStatement): - if red_stmt is not None: - stmt = red_stmt + while len(nodes) > 0: + for intermediate_stmt in nodes: + if isinstance(intermediate_stmt, ForeachStatement) or isinstance(intermediate_stmt, ForStatement) or isinstance(intermediate_stmt, MapStatement) or isinstance(intermediate_stmt, AsyncBlock): + for element in intermediate_stmt.body: + nodes.append(element) + if isinstance(intermediate_stmt, TernaryOperator): + pass + if isinstance(intermediate_stmt, ReduceStatement): + red_stmt.append(intermediate_stmt) + nodes.remove(intermediate_stmt) + + if isinstance(stmt, ReduceStatement): + red_stmt.append(stmt) + + for stmt in red_stmt: stream_name = stmt.stream_name.name if self.reduce_operations[stream_name][5] == None: diff --git a/tests/test_collective_ir_parser.py b/tests/test_collective_ir_parser.py index e9c1286e..4ace22dd 100644 --- a/tests/test_collective_ir_parser.py +++ b/tests/test_collective_ir_parser.py @@ -24,6 +24,9 @@ def _tiling_test(file): #print(ir_1) ir_ref = _load_ref_file(file) + num_dataflow = ir_ref[-1] + ir_ref = ir_ref[:-1] + count_ref = 0 for line in ir_ref: assert ("compute i16 i, i16 j in " + line) in ir_1 @@ -33,6 +36,11 @@ def _tiling_test(file): if "compute i16 i, i16 j in " in line: count += 1 assert count == count_ref + count_dataflow = 0 + for line in ir_1.splitlines(): + if "dataflow i16 i, i16 j in" in line: + count_dataflow += 1 + assert count_dataflow == int(num_dataflow) @@ -57,6 +65,10 @@ def test_simple_reduce_snake_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_snake_1.sptl') _tiling_test(file) +def test_simple_reduce_looped(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_looped.sptl') + _tiling_test(file) + def test_medium_reduce_grid_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'medium_reduce_grid_1.sptl') _tiling_test(file) @@ -65,6 +77,14 @@ def test_hard_reduce_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_1.sptl') _tiling_test(file) +def test_hard_reduce_2(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_2.sptl') + _tiling_test(file) + +def test_hard_reduce_3(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'hard_reduce_3.sptl') + _tiling_test(file) + if __name__ == '__main__': test_simple_bcast() @@ -72,5 +92,8 @@ def test_hard_reduce_1(): test_simple_reduce_grid_1() test_simple_reduce_snake_pipelined_1() test_simple_reduce_snake_1() + test_simple_reduce_looped() test_medium_reduce_grid_1() - test_hard_reduce_1() \ No newline at end of file + test_hard_reduce_1() + test_hard_reduce_2() + test_hard_reduce_3() \ No newline at end of file From 2f2ebd3422072ce6efa4b83cb6a8b83d954148c4 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Thu, 13 Feb 2025 23:36:58 +0100 Subject: [PATCH 23/27] adapt imports --- .../optimizations/spatial_broadcast.py | 15 +++++-------- .../optimizations/spatial_reduce.py | 22 ++++++------------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/spatialstencil/optimizations/spatial_broadcast.py b/spatialstencil/optimizations/spatial_broadcast.py index 967ad3da..d28eae02 100644 --- a/spatialstencil/optimizations/spatial_broadcast.py +++ b/spatialstencil/optimizations/spatial_broadcast.py @@ -1,5 +1,6 @@ -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, DataflowBlock, MulStreamDeclaration, BroadcastRoutingDeclaration, ComputeBlock, ConstantLiteral, SubgridExpression, RangeExpression, Expression, ScalarType, ForeachStatement, ForStatement, MapStatement, AsyncBlock, TernaryOperator, BroadcastStatement, SendStatement, ReceiveStatement, ArraySlice -import spatialstencil.syntax.spatial_ir.irnodes as spa +from spatialstencil.syntax.spatial_ir.irnodes import (Kernel, DataflowBlock, MulStreamDeclaration, BroadcastRoutingDeclaration, ComputeBlock, ConstantLiteral, + SubgridExpression, RangeExpression, Expression, ScalarType, ForeachStatement, ForStatement, MapStatement, + AsyncBlock, BroadcastStatement, SendStatement, ReceiveStatement, Identifier) from spatialstencil.lowering.versioning import Versioning class BroadcastOptimizer(): @@ -11,7 +12,7 @@ def __init__(self, kernel: Kernel) -> None: self.parameters = kernel.parameters self.arguments = kernel.arguments self.body = kernel.body - self.versioning = Versioning[spa.Identifier](spa.Identifier) + self.versioning = Versioning[Identifier](Identifier) self.roots = [] self.broadcast_operations = {} return None @@ -180,7 +181,7 @@ def _replace_broadcast(self, stmt, elem) -> list[Expression]: name = stmt.stream_name.name root = self.broadcast_operations[name] - + if x_start == root[0] and y_start == root[1] and x_stop == root[0] + 1 and y_stop == root[1] + 1: send = SendStatement( local_array=stmt.local_array, @@ -214,12 +215,6 @@ def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: for replaced_stmt in replaced_stmts: new_body.append(replaced_stmt) input_stmt.body = new_body - - # uses if_true and if_false - elif isinstance(stmt, TernaryOperator): - print("TODO: TernaryOperator") - print(stmt) - return [input_stmt] diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 6a9dda66..a0ce140c 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -1,9 +1,9 @@ -from spatialstencil.syntax.spatial_ir.irnodes import Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, RoutingDeclaration, RoutingHop, StreamType, Identifier, TypedIdentifier, ForeachStatement, ArraySlice, BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, Parameter, KernelArgument, ReceiveStatement, ForStatement,FieldDeclaration,ArrayType, MapStatement, AsyncBlock, TernaryOperator -from typing import Union, Tuple, Optional, Literal -import spatialstencil.syntax.spatial_ir.irnodes as spa +from spatialstencil.syntax.spatial_ir.irnodes import (Kernel, ComputeBlock, ReduceStatement, Expression, SubgridExpression, RangeExpression, ConstantLiteral, ScalarType, + DataflowBlock, MulStreamDeclaration, ReduceRoutingDeclaration, StreamType, TypedIdentifier, ForeachStatement, ArraySlice, + BinaryOperator, SendStatement, ReceiveGenerator, AssignmentStatement, RelativeStreamDeclaration, PlaceBlock, Phase, + Parameter, KernelArgument, ReceiveStatement, ForStatement, FieldDeclaration, ArrayType, MapStatement, AsyncBlock, Identifier) +from typing import Union, Optional, Literal from spatialstencil.lowering.versioning import Versioning -import types -# TODO from spatialstencil.syntax.spatial_ir.grid_geometry import Rectangle class ReduceOptimizer(): @@ -23,7 +23,7 @@ def __init__(self, kernel: Kernel) -> None: self.parameters = kernel.parameters self.arguments = kernel.arguments self.body = kernel.body - self.versioning = Versioning[spa.Identifier](spa.Identifier) + self.versioning = Versioning[Identifier](Identifier) self._communication_patterns = None self.reduce_operations = {} self.grid_streams = {} @@ -566,12 +566,6 @@ def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: for replaced_stmt in replaced_stmts: new_body.append(replaced_stmt) input_stmt.body = new_body - - # uses if_true and if_false - elif isinstance(stmt, TernaryOperator): - print("TODO: TernaryOperator") - print(stmt) - return [input_stmt] @@ -1193,9 +1187,7 @@ def fix_subgrid(self) -> None: if isinstance(intermediate_stmt, ForeachStatement) or isinstance(intermediate_stmt, ForStatement) or isinstance(intermediate_stmt, MapStatement) or isinstance(intermediate_stmt, AsyncBlock): for element in intermediate_stmt.body: nodes.append(element) - if isinstance(intermediate_stmt, TernaryOperator): - pass - if isinstance(intermediate_stmt, ReduceStatement): + elif isinstance(intermediate_stmt, ReduceStatement): red_stmt.append(intermediate_stmt) nodes.remove(intermediate_stmt) From 344378ea52af4287f41613263b3afb36207f8ea0 Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 14 Feb 2025 16:01:18 +0100 Subject: [PATCH 24/27] add layout documentation --- irspec/docs/collective/.$base.drawio.bkp | 683 ++++++++++++++++++ irspec/docs/collective/base.drawio | 683 ++++++++++++++++++ irspec/docs/collective/grid.drawio.svg | 4 + irspec/docs/collective/layouts.md | 24 +- irspec/docs/collective/snake.drawio.svg | 4 + .../optimizations/spatial_reduce.py | 2 +- 6 files changed, 1398 insertions(+), 2 deletions(-) create mode 100644 irspec/docs/collective/.$base.drawio.bkp create mode 100644 irspec/docs/collective/base.drawio create mode 100644 irspec/docs/collective/grid.drawio.svg create mode 100644 irspec/docs/collective/snake.drawio.svg diff --git a/irspec/docs/collective/.$base.drawio.bkp b/irspec/docs/collective/.$base.drawio.bkp new file mode 100644 index 00000000..99ca9f27 --- /dev/null +++ b/irspec/docs/collective/.$base.drawio.bkp @@ -0,0 +1,683 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/irspec/docs/collective/base.drawio b/irspec/docs/collective/base.drawio new file mode 100644 index 00000000..83fe4757 --- /dev/null +++ b/irspec/docs/collective/base.drawio @@ -0,0 +1,683 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/irspec/docs/collective/grid.drawio.svg b/irspec/docs/collective/grid.drawio.svg new file mode 100644 index 00000000..eba57855 --- /dev/null +++ b/irspec/docs/collective/grid.drawio.svg @@ -0,0 +1,4 @@ + + + +
*
\ No newline at end of file diff --git a/irspec/docs/collective/layouts.md b/irspec/docs/collective/layouts.md index 1112eb3c..88e6468a 100644 --- a/irspec/docs/collective/layouts.md +++ b/irspec/docs/collective/layouts.md @@ -1,9 +1,31 @@ # Layouts +When using Collective Reduce functions two different communication schemas / layouts can be used. + ## Usage +To choose the layout the `algorithm` flag can be set to +``` +algorithm = grid +or +algorithm = snake +or +algorithm = auto +``` +A schematic example for both the snake and grid layout can be found below. Currently `auto` chooses the grid algorithm. The snake algorithm can only be choosen if the root of the reduce is in one of the 4 corners of the communication grid the reduce is defined on. For large arrays snake will maximize throughput while for short arrays grid will minimize latency. + ## Definitions +Below are schematics to understand the logic of the snake and grid pattern. The root of the reduce is marked with a star `*`. + ### Snake -### Grid \ No newline at end of file +The snake pattern currently only works with the root in one of the four corners. It then puts all the PEs on a string favoring horizontal communication. + +![Alternative Text](snake.drawio.svg) + +### Grid + +The grid pattern works with the root in every PE. The first reduction is horizontally and the second one is vertically. + +![Alternative Text](grid.drawio.svg) \ No newline at end of file diff --git a/irspec/docs/collective/snake.drawio.svg b/irspec/docs/collective/snake.drawio.svg new file mode 100644 index 00000000..08473e95 --- /dev/null +++ b/irspec/docs/collective/snake.drawio.svg @@ -0,0 +1,4 @@ + + + +
*
\ No newline at end of file diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index a0ce140c..2de39831 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -970,7 +970,7 @@ def create_communication_patterns(self, x_start, x_stop, y_start, y_stop, x, y, f"[x_start, x_stop, y_start, y_stop] = [{x_start}, {x_stop}, {y_start}, {y_stop}] for the operation {name}.") communication = [] self.pipelined.update({name : False}) - mode = algorithm if algorithm != 'auto' else 'snake' + mode = algorithm if algorithm != 'auto' else 'grid' if mode == 'snake': self.snake_communication_pattern(x_start, x_stop, y_start, y_stop, x, y, name, pipelined) From 0643c1d644f43677ba45b18af216ea4619115a3c Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 14 Feb 2025 17:27:06 +0100 Subject: [PATCH 25/27] small changes to increase readibility --- .../optimizations/spatial_reduce.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/spatialstencil/optimizations/spatial_reduce.py b/spatialstencil/optimizations/spatial_reduce.py index 2de39831..4a7fe6ed 100644 --- a/spatialstencil/optimizations/spatial_reduce.py +++ b/spatialstencil/optimizations/spatial_reduce.py @@ -12,7 +12,7 @@ class ReduceOptimizer(): arguments: list[KernelArgument] body: list[PlaceBlock | DataflowBlock | ComputeBlock | Phase] _communication_patterns: Optional[dict[str, dict[tuple[int, int], list[list[list[int]]]]]] = None - reduce_operations: dict[str, dict[str, Union[int, Literal['OP_SUM'], list[int]]]] = {} # needs to be adapted + reduce_operations: dict[str, dict[str, Union[int, Literal['OP_SUM'], list[int]]]] = {} grid_streams: dict[str, list[list]] = {} snake_streams: dict[str, list[list]] = {} pipelined: dict[str, bool] = {} @@ -115,6 +115,8 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: origin = self.reduce_operations[stmt.stream_name.name][4] complete_grid = [self.reduce_operations[stmt.stream_name.name][2], self.reduce_operations[stmt.stream_name.name][3]] send_identifier = self.reduce_operations[stmt.stream_name.name][5] + + # send_amount is the length of the array that is being sent - needed for the custom for loop send_amount = self.reduce_operations[stmt.stream_name.name][6] if send_amount == None: for elem in self.body: @@ -189,6 +191,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: else: + # pipelined for con_list in connections: for con in con_list[1]: if (current_position[0] >= con[0] and current_position[1] <= con[1] @@ -199,6 +202,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: pipelined_receive.append(con_list[0]) if pipelined_send != [] and pipelined_receive != []: + # receive first then send newstatements.append( AssignmentStatement( destination=self.versioning.next_version("pipeline_helper"), @@ -294,6 +298,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: ) ) elif pipelined_send == [] and pipelined_receive != []: + # receive only newstatements.append( AssignmentStatement( destination=self.versioning.next_version("pipeline_helper"), @@ -381,6 +386,7 @@ def replace_reduce(self, stmt, elem) -> list[Expression]: ) ) elif pipelined_send != [] and pipelined_receive == []: + # send only send = self.create_send_statement(stmt, pipelined_send, 0) newstatements.append( ForStatement( @@ -576,7 +582,7 @@ def replace_stmt(self, stmt, elem, to_replace) -> list[Expression]: def snake_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, name, pipelined) -> None: communication = [] if y == y_start: - if (y_stop - 1 - y_start) % 2 == 0: + if (y_stop - y_start) % 2 != 0: # horizontal movement if pipelined and x_stop - x_start > 2: @@ -596,7 +602,7 @@ def snake_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, na communication.append([x_start, x_stop, y_start + 1, y_stop - 1, 1 if x == x_start else -1, 0, 1, 1]) # vertical movement - # not dependent on pipelined as if we have a column it's already pipelined + # not dependent on pipelined as if we have a column it's already pipelined through the left and right edge being the same edge if x == x_start: # print('upper left corner odd') if y_stop - y_start > 2: @@ -644,7 +650,7 @@ def snake_communication_pattern(self, x_start, x_stop, y_start, y_stop, x, y, na communication.append([x_start, x_start + 1, y_start, y_stop, 0, -1, 1, 1]) elif y == y_stop - 1: - if (y_stop - 1 - y_start) % 2 == 0: + if (y_stop - y_start) % 2 != 0: # horizontal movement if pipelined and x_stop - x_start > 2: @@ -1020,6 +1026,7 @@ def change_data_blocks(self) -> None: elif stmt.stream_name.name in self.snake_streams: current_grid_streams = self.snake_streams[stmt.stream_name.name] + # create intermediate datastructure to express all communication for com in current_grid_streams: newdataflobblocks.append([[com[0], com[1]], [com[2], com[3]], RelativeStreamDeclaration( @@ -1182,6 +1189,8 @@ def fix_subgrid(self) -> None: for stmt in elem.statements: red_stmt = [] nodes = [stmt] + + # get all the reduce statements while len(nodes) > 0: for intermediate_stmt in nodes: if isinstance(intermediate_stmt, ForeachStatement) or isinstance(intermediate_stmt, ForStatement) or isinstance(intermediate_stmt, MapStatement) or isinstance(intermediate_stmt, AsyncBlock): @@ -1202,7 +1211,6 @@ def fix_subgrid(self) -> None: self.reduce_operations[stream_name][5] = tst break - # test if stream_name is in grid_streams if stmt.stream_name.name in self.grid_streams: connections = self.grid_streams[stream_name] @@ -1225,8 +1233,7 @@ def fix_subgrid(self) -> None: reduce_connections.append(send) reduce_connections.append([root[0], root[0] + 1, root[1], root[1] + 1]) - - # needs to be tested properly + for com_grid in reduce_connections: to_remove = [] for sub_grid in grid: @@ -1277,8 +1284,6 @@ def fix_subgrid(self) -> None: new_grid.append([[i, i + 1], [j, j + 1]]) grid = new_grid - - # needs to be tested in combination with grid_streams if self.snake_streams != {}: new_grid = [] complete_grid = [] From 35031958938023deedeb0539cea1e1d951ae7dad Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 14 Feb 2025 17:34:26 +0100 Subject: [PATCH 26/27] add to documentation --- irspec/docs/collective/collective.md | 145 ++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 4 deletions(-) diff --git a/irspec/docs/collective/collective.md b/irspec/docs/collective/collective.md index 05dc7441..41d3ce5d 100644 --- a/irspec/docs/collective/collective.md +++ b/irspec/docs/collective/collective.md @@ -95,13 +95,150 @@ await reduce(data, name) ``` where data is the element/array to reduce on. -???+ example "Example: Simple Broadcast" +???+ example "Example: Simple Broadcast with snake communication" ```rust - result + kernel @add() { + place i16 i, i16 j in [0:5 , 0:5] { + i16[100] a + } + dataflow i16 i, i16 j in [0:5:1 , 0:5:1] { + stream reduce = relative_stream(-1, 0) + } + dataflow i16 i, i16 j in [0:5:1 , 1:4:1] { + stream reduce#1 = relative_stream(1, 0) + } + dataflow i16 i, i16 j in [0:1:1 , 1:5:1] { + stream reduce#2 = relative_stream(0, -1) + } + dataflow i16 i, i16 j in [4:5:1 , 0:4:1] { + stream reduce#3 = relative_stream(0, -1) + } + compute i16 i, i16 j in [0:1 , 0:1] { + a[0] = 1 + await foreach i32 reduce_runner, i16 reduce_receive in [0:100], receive(reduce#1) { + a[reduce_runner] = (a[reduce_runner] + reduce_receive) + } + } + compute i16 i, i16 j in [0:1 , 1:2] { + a[0] = 1 + await foreach i32 reduce_runner#1, i16 reduce_receive#1 in [0:100], receive(reduce#1) { + a[reduce_runner#1] = (a[reduce_runner#1] + reduce_receive#1) + } + await send(a, reduce#2) + } + compute i16 i, i16 j in [0:1 , 2:3] { + a[0] = 1 + await foreach i32 reduce_runner#2, i16 reduce_receive#2 in [0:100], receive(reduce#1) { + a[reduce_runner#2] = (a[reduce_runner#2] + reduce_receive#2) + } + await send(a, reduce#2) + } + compute i16 i, i16 j in [0:1 , 3:4] { + a[0] = 1 + await foreach i32 reduce_runner#3, i16 reduce_receive#3 in [0:100], receive(reduce#1) { + a[reduce_runner#3] = (a[reduce_runner#3] + reduce_receive#3) + } + await send(a, reduce#2) + } + compute i16 i, i16 j in [0:1 , 4:5] { + a[0] = 1 + await foreach i32 reduce_runner#4, i16 reduce_receive#4 in [0:100], receive(reduce#1) { + a[reduce_runner#4] = (a[reduce_runner#4] + reduce_receive#4) + } + await send(a, reduce#3) + } + compute i16 i, i16 j in [4:5 , 0:1] { + a[0] = 1 + await foreach i32 reduce_runner#5, i16 reduce_receive#5 in [0:100], receive(reduce#4) { + a[reduce_runner#5] = (a[reduce_runner#5] + reduce_receive#5) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [4:5 , 1:2] { + a[0] = 1 + await foreach i32 reduce_runner#6, i16 reduce_receive#6 in [0:100], receive(reduce#2) { + a[reduce_runner#6] = (a[reduce_runner#6] + reduce_receive#6) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [4:5 , 2:3] { + a[0] = 1 + await foreach i32 reduce_runner#7, i16 reduce_receive#7 in [0:100], receive(reduce#2) { + a[reduce_runner#7] = (a[reduce_runner#7] + reduce_receive#7) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [4:5 , 3:4] { + a[0] = 1 + await foreach i32 reduce_runner#8, i16 reduce_receive#8 in [0:100], receive(reduce#2) { + a[reduce_runner#8] = (a[reduce_runner#8] + reduce_receive#8) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [4:5 , 4:5] { + a[0] = 1 + await send(a, reduce#1) + } + compute i16 i, i16 j in [1:4 , 0:1] { + a[0] = 1 + await foreach i32 reduce_runner#9, i16 reduce_receive#9 in [0:100], receive(reduce#1) { + a[reduce_runner#9] = (a[reduce_runner#9] + reduce_receive#9) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [1:4 , 1:2] { + a[0] = 1 + await foreach i32 reduce_runner#10, i16 reduce_receive#10 in [0:100], receive(reduce#1) { + a[reduce_runner#10] = (a[reduce_runner#10] + reduce_receive#10) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [1:4 , 2:3] { + a[0] = 1 + await foreach i32 reduce_runner#11, i16 reduce_receive#11 in [0:100], receive(reduce#1) { + a[reduce_runner#11] = (a[reduce_runner#11] + reduce_receive#11) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [1:4 , 3:4] { + a[0] = 1 + await foreach i32 reduce_runner#12, i16 reduce_receive#12 in [0:100], receive(reduce#1) { + a[reduce_runner#12] = (a[reduce_runner#12] + reduce_receive#12) + } + await send(a, reduce#1) + } + compute i16 i, i16 j in [1:4 , 4:5] { + a[0] = 1 + await foreach i32 reduce_runner#13, i16 reduce_receive#13 in [0:100], receive(reduce#1) { + a[reduce_runner#13] = (a[reduce_runner#13] + reduce_receive#13) + } + await send(a, reduce#1) + } + } ``` can be generated from: ```rust - input - ``` \ No newline at end of file + kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(0, 0) { + algorithm = snake, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } + } + ``` + + The layout for this example can be found as the snake example in the [Layouts section](layouts.md). \ No newline at end of file From 6051cf5ba14cbec036f5cac9ce77471cb86ff79e Mon Sep 17 00:00:00 2001 From: Niklas Roemer Date: Fri, 14 Feb 2025 18:19:13 +0100 Subject: [PATCH 27/27] add additional test cases --- .../collective/simple_reduce_grid_2.ref_tile | 8 ++++++ samples/collective/simple_reduce_grid_2.sptl | 20 ++++++++++++++ .../simple_reduce_grid_pipelined_2.ref_tile | 26 +++++++++++++++++++ .../simple_reduce_grid_pipelined_2.sptl | 20 ++++++++++++++ tests/test_collective_ir_parser.py | 11 +++++++- 5 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 samples/collective/simple_reduce_grid_2.ref_tile create mode 100644 samples/collective/simple_reduce_grid_2.sptl create mode 100644 samples/collective/simple_reduce_grid_pipelined_2.ref_tile create mode 100644 samples/collective/simple_reduce_grid_pipelined_2.sptl diff --git a/samples/collective/simple_reduce_grid_2.ref_tile b/samples/collective/simple_reduce_grid_2.ref_tile new file mode 100644 index 00000000..548a6420 --- /dev/null +++ b/samples/collective/simple_reduce_grid_2.ref_tile @@ -0,0 +1,8 @@ +[0:1 , 0:5] +[1:2 , 0:1] +[1:2 , 1:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:4 , 0:5] +[4:5 , 0:5] +4 \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_2.sptl b/samples/collective/simple_reduce_grid_2.sptl new file mode 100644 index 00000000..e8486149 --- /dev/null +++ b/samples/collective/simple_reduce_grid_2.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(1, 3) { + algorithm = grid, + op = CL_SUM, + pipelined = false + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_pipelined_2.ref_tile b/samples/collective/simple_reduce_grid_pipelined_2.ref_tile new file mode 100644 index 00000000..4bf29eac --- /dev/null +++ b/samples/collective/simple_reduce_grid_pipelined_2.ref_tile @@ -0,0 +1,26 @@ +[0:1 , 0:1] +[0:1 , 1:2] +[0:1 , 2:3] +[0:1 , 3:4] +[0:1 , 4:5] +[1:2 , 0:1] +[1:2 , 1:2] +[1:2 , 2:3] +[1:2 , 3:4] +[1:2 , 4:5] +[2:3 , 0:1] +[2:3 , 1:2] +[2:3 , 2:3] +[2:3 , 3:4] +[2:3 , 4:5] +[3:4 , 0:1] +[3:4 , 1:2] +[3:4 , 2:3] +[3:4 , 3:4] +[3:4 , 4:5] +[4:5 , 0:1] +[4:5 , 1:2] +[4:5 , 2:3] +[4:5 , 3:4] +[4:5 , 4:5] +6 \ No newline at end of file diff --git a/samples/collective/simple_reduce_grid_pipelined_2.sptl b/samples/collective/simple_reduce_grid_pipelined_2.sptl new file mode 100644 index 00000000..85eab140 --- /dev/null +++ b/samples/collective/simple_reduce_grid_pipelined_2.sptl @@ -0,0 +1,20 @@ + +kernel @add() { + + place i16 i, i16 j in [0:5, 0:5] { + i16[100] a + } + + dataflow i16 i, i16 j in [0:5, 0:5] { + multistream red = reduce_stream(1, 3) { + algorithm = grid, + op = CL_SUM, + pipelined = true + } + } + + compute i16 i, i16 j in [0:5, 0:5] { + a[0] = 1 + await reduce(a, red) + } +} \ No newline at end of file diff --git a/tests/test_collective_ir_parser.py b/tests/test_collective_ir_parser.py index 4ace22dd..88b0b79c 100644 --- a/tests/test_collective_ir_parser.py +++ b/tests/test_collective_ir_parser.py @@ -21,7 +21,6 @@ def _tiling_test(file): program = parser.parse_file(file) program_optimized = optimization_pass(program) ir_1 = program_optimized.as_ir() - #print(ir_1) ir_ref = _load_ref_file(file) num_dataflow = ir_ref[-1] @@ -53,10 +52,18 @@ def test_simple_reduce_grid_pipelined_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_pipelined_1.sptl') _tiling_test(file) +def test_simple_reduce_grid_pipelined_2(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_pipelined_2.sptl') + _tiling_test(file) + def test_simple_reduce_grid_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_1.sptl') _tiling_test(file) +def test_simple_reduce_grid_2(): + file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_grid_2.sptl') + _tiling_test(file) + def test_simple_reduce_snake_pipelined_1(): file = os.path.join(os.path.dirname(__file__), '..', 'samples', 'collective', 'simple_reduce_snake_pipelined_1.sptl') _tiling_test(file) @@ -89,7 +96,9 @@ def test_hard_reduce_3(): if __name__ == '__main__': test_simple_bcast() test_simple_reduce_grid_pipelined_1() + test_simple_reduce_grid_pipelined_2() test_simple_reduce_grid_1() + test_simple_reduce_grid_2() test_simple_reduce_snake_pipelined_1() test_simple_reduce_snake_1() test_simple_reduce_looped()