From eecee073a311b1ad4aad4bd24b97fdfc2caf3d03 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 13 Mar 2026 23:32:23 +0000
Subject: [PATCH 01/51] save work

---
 examples/xegpu/softmax.py | 336 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 336 insertions(+)
 create mode 100644 examples/xegpu/softmax.py

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
new file mode 100644
index 00000000..bb90f812
--- /dev/null
+++ b/examples/xegpu/softmax.py
@@ -0,0 +1,336 @@
+# RUN: %PYTHON %s --dump-kernel=xegpu-wg | FileCheck %s
+# CHECK: module attributes {gpu.container_module} {
+
+"""
+XeGPU softmax benchmark.
+"""
+
+import argparse
+import ctypes
+from typing import Optional
+from functools import cached_property
+
+import numpy as np
+from mlir import ir
+from mlir.execution_engine import ExecutionEngine
+from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func
+
+from lighthouse.workload import benchmark
+from lighthouse.utils.memref import to_ctype as memref_to_ctype
+from lighthouse.utils.numpy import numpy_to_ctype
+from lighthouse.utils.mlir import func_cif
+from lighthouse.ingress.mlir_gen import get_mlir_elem_type
+from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs, emit_buf_to_tensor
+
+from xegpu_workload import XeGPUWorkload
+
+
+def softmax_complexity(M: int, N: int, nbytes: int):
+    """
+    Complexity of softmax operation.
+    
+    For each row:
+    - O(N) to find max
+    - O(N) to compute exp(x - max) and sum
+    - O(N) to normalize
+    Total: 3*N operations per row, but with transcendental (exp) operations
+    """
+    # Approximation: 5 FLOPs per element (max, sub, exp, sum, div)
+    # exp is expensive but we count it as ~1 FLOP for simplicity
+    flop_count = M * N * 5
+    memory_reads = M * N * nbytes  # read input
+    memory_writes = M * N * nbytes  # write output
+    return flop_count, memory_reads, memory_writes
+
+
+class XeGPUSoftmax(XeGPUWorkload):
+    """
+    Softmax workload on XeGPU.
+
+    Computes softmax along the last dimension (rows):
+    output[i, j] = exp(input[i, j] - max_i) / sum_i(exp(input[i, j] - max_i))
+
+    where max_i and sum_i are computed over row i.
+    """
+
+    def __init__(
+        self,
+        M: int,
+        N: int,
+        dtype: str = "f32",
+    ):
+        super().__init__()
+        self.M = M
+        self.N = N
+        self.shape = (M, N)
+        assert dtype == "f32", "Only f32 type is supported for softmax"
+        self.dtype_str = dtype
+        type_str_to_numpy = {
+            "f16": np.float16,
+            "f32": np.float32,
+        }
+        self.dtype = type_str_to_numpy[dtype]
+
+    @cached_property
+    def _initial_host_arrays(self) -> tuple[np.ndarray]:
+        """Generate initial values on host with numpy."""
+        np.random.seed(42)
+        # Use values in range [-0.5, 0.5] to avoid numerical issues
+        input_arr = np.random.uniform(-0.5, 0.5, self.shape).astype(self.dtype)
+        return (input_arr,)
+
+    @cached_property
+    def _reference_solution(self) -> np.ndarray:
+        """Compute reference solution on host with numpy."""
+        (input_arr,) = self._initial_host_arrays
+        # Use float32 for computation
+        x = input_arr.astype(np.float32)
+        # Compute softmax along axis 1 (each row independently)
+        # Numerically stable version: subtract max before exp
+        max_vals = np.max(x, axis=1, keepdims=True)
+        exp_vals = np.exp(x - max_vals)
+        sum_vals = np.sum(exp_vals, axis=1, keepdims=True)
+        output = exp_vals / sum_vals
+        return output.astype(self.dtype)
+
+    def _get_input_arrays(
+        self, execution_engine: ExecutionEngine
+    ) -> list[ctypes.Structure]:
+        # Allocate device memory for input and output
+        input_gpu = self._allocate_array(
+            "input", self.shape, self.dtype_str, execution_engine
+        )
+        output_gpu = self._allocate_array(
+            "output", self.shape, self.dtype_str, execution_engine
+        )
+
+        # Copy input to device
+        (input_host,) = self._initial_host_arrays
+        copy_fn = f"gpu_copy_2d_{self.dtype_str}"
+        execution_engine.invoke(
+            copy_fn, numpy_to_ctype(input_host), memref_to_ctype(input_gpu)
+        )
+
+        # Return memrefs: [output, input]
+        return [output_gpu, input_gpu]
+
+    def check_correctness(
+        self, execution_engine: ExecutionEngine, verbose: int = 0
+    ) -> bool:
+        # Copy result from device to host
+        output_gpu = self.gpu_memrefs[("output", self.dtype_str)]
+        output_host = np.zeros(self.shape, dtype=self.dtype)
+        execution_engine.invoke(
+            f"gpu_copy_2d_{self.dtype_str}",
+            memref_to_ctype(output_gpu),
+            numpy_to_ctype(output_host),
+        )
+
+        output_ref = self._reference_solution
+        output_computed = output_host.astype(np.float32)
+        
+        if verbose > 1:
+            print("Reference solution (first 5 rows):")
+            print(output_ref[:5])
+            print("Computed solution (first 5 rows):")
+            print(output_computed[:5])
+
+        # Check row sums are close to 1.0
+        row_sums = np.sum(output_computed, axis=1)
+        sums_ok = np.allclose(row_sums, 1.0, rtol=1e-5, atol=1e-6)
+        
+        # Check values match reference
+        values_ok = np.allclose(output_computed, output_ref, rtol=1e-4, atol=1e-6)
+        
+        success = sums_ok and values_ok
+
+        if verbose:
+            if success:
+                print("PASSED")
+            else:
+                print("FAILED!")
+                if not sums_ok:
+                    print(f"  Row sums check failed. Min: {row_sums.min():.6f}, Max: {row_sums.max():.6f}")
+                if not values_ok:
+                    max_diff = np.abs(output_computed - output_ref).max()
+                    print(f"  Values mismatch. Max abs diff: {max_diff:.6e}")
+        return success
+
+    def get_complexity(self) -> tuple[int, int, int]:
+        nbytes = np.dtype(self.dtype).itemsize
+        return softmax_complexity(self.M, self.N, nbytes)
+
+    def payload_module(self) -> ir.Module:
+        """Generate MLIR module for softmax payload."""
+        mod = ir.Module.create()
+        dtype = get_mlir_elem_type(self.dtype_str)
+        memref_t = ir.MemRefType.get(self.shape, dtype)
+        
+        with ir.InsertionPoint(mod.body):
+            # Function signature: payload(output, input)
+            @func_cif(memref_t, memref_t, name=self.payload_function_name)
+            def payload(output, input_arg):
+                # Convert memrefs to tensors
+                output_tensor = emit_buf_to_tensor(output, restrict=True, writable=True)
+                input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
+                
+                # Create intermediate buffer for softmax (used internally by linalg.softmax)
+                # This stores the sum of exp values
+                M, N = self.shape
+                softmax_buf_type = ir.MemRefType.get((M,N), dtype)
+                softmax_buf = gpu.alloc(softmax_buf_type, None, [], [], [])
+                softmax_buf_tensor = emit_buf_to_tensor(softmax_buf, restrict=True, writable=True)
+                
+                # Compute softmax along dimension 1 (rows)
+                # linalg.softmax performs: exp(x - max(x)) / sum(exp(x - max(x)))
+                result = linalg.softmax(
+                    (input_tensor.type,), input_tensor, softmax_buf_tensor, dimension=1
+                )
+                
+                # Materialize result back to output memref
+                bufferization.materialize_in_destination(
+                    None, result, output, restrict=True, writable=True
+                )
+                
+                # Cleanup
+                gpu.dealloc(None, [], softmax_buf)
+
+            # Emit utility functions for GPU memory management
+            emit_gpu_util_funcs(dtype, rank=2)
+
+        return mod
+
+    def schedule_module(
+        self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
+    ) -> ir.Module:
+        """
+        Generate transform schedule for softmax.
+        
+        For now, returns an empty schedule. In the future, this would contain
+        tiling, vectorization, and XeGPU-specific lowering transformations.
+        """
+        # TODO: Implement proper transform schedule
+        # For now, create a minimal schedule that just applies bufferization
+        mod = ir.Module.create()
+        with ir.InsertionPoint(mod.body):
+            from mlir.dialects import transform
+            
+            # Create a simple transform sequence
+            @func_cif(name="__transform_main")
+            def transform_main():
+                # Empty transform - just identity
+                # In a full implementation, this would tile, vectorize,
+                # and lower to XeGPU operations
+                pass
+        
+        return mod
+
+    def shared_libs(self) -> list[str]:
+        return ["libmlir_levelzero_runtime.so"]
+
+
+def parse_cli():
+    parser = argparse.ArgumentParser(
+        description="Softmax using MLIR XeGPU",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--sizes",
+        type=int,
+        nargs=2,
+        default=[1024, 512],
+        help="M,N matrix sizes (MxN)",
+    )
+    parser.add_argument(
+        "--wg-tile",
+        type=int,
+        nargs=2,
+        default=[64, 32],
+        help="Workgroup tile size M,N.",
+    )
+    parser.add_argument(
+        "--nruns",
+        type=int,
+        default=1000,
+        help="Number of runs to average the execution time.",
+    )
+    parser.add_argument(
+        "--nwarmup",
+        type=int,
+        default=20,
+        help="Number of warm-up iterations before benchmarking.",
+    )
+    parser.add_argument(
+        "--check-result",
+        action="store_true",
+        help="Check the result of the softmax computation.",
+    )
+    parser.add_argument(
+        "--dump-kernel",
+        type=str,
+        choices=[
+            "initial",
+            "tiled",
+            "vectorized",
+            "bufferized",
+            "xegpu-initial",
+            "xegpu-wg",
+            "final",
+        ],
+        help="Dump kernel IR at different stages of lowering and exit without "
+        "executing the kernel.",
+    )
+    parser.add_argument(
+        "--dump-schedule",
+        action="store_true",
+        help="Dump transform schedule.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_cli()
+
+    params = {
+        "wg_m": args.wg_tile[0],
+        "wg_n": args.wg_tile[1],
+    }
+
+    M, N = args.sizes
+    dtype = "f32"
+
+    with ir.Context(), ir.Location.unknown():
+        wload = XeGPUSoftmax(M=M, N=N, dtype=dtype)
+
+        if args.dump_kernel or args.dump_schedule:
+            wload.lower_payload(
+                dump_payload=args.dump_kernel,
+                dump_schedule=args.dump_schedule,
+                schedule_parameters=params,
+            )
+        else:
+            times = benchmark(
+                wload,
+                nruns=args.nruns,
+                nwarmup=args.nwarmup,
+                schedule_parameters=params,
+                check_correctness=args.check_result,
+                verbose=1,
+            )
+            times *= 1e6  # convert to microseconds
+            elapsed = np.mean(times)
+            flop_count = wload.get_complexity()[0]
+            gflops = flop_count / (elapsed * 1e-6) / 1e9
+
+            def list2str(a):
+                return ",".join(map(str, a))
+
+            parts = [
+                f"sizes={list2str(args.sizes)}",
+                f"dt={dtype}",
+                f"wg-tile={list2str(args.wg_tile)}",
+                f"time(us): {elapsed:.2f}",
+                f"GFLOPS: {gflops:.2f}",
+            ]
+            print(" ".join(parts))

From f991027da04fd09f164f10b771ea470b9b577519 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 16 Mar 2026 22:46:40 +0000
Subject: [PATCH 02/51] save work

---
 examples/xegpu/softmax.py | 93 ++++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 17 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index bb90f812..ab61f1e5 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -13,9 +13,9 @@
 import numpy as np
 from mlir import ir
 from mlir.execution_engine import ExecutionEngine
-from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func
+from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func, math
 
-from lighthouse.workload import benchmark
+from lighthouse.workload import benchmark, get_bench_wrapper_schedule
 from lighthouse.utils.memref import to_ctype as memref_to_ctype
 from lighthouse.utils.numpy import numpy_to_ctype
 from lighthouse.utils.mlir import func_cif
@@ -174,35 +174,94 @@ def payload(output, input_arg):
                 output_tensor = emit_buf_to_tensor(output, restrict=True, writable=True)
                 input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
                 
-                # Create intermediate buffer for softmax (used internally by linalg.softmax)
-                # This stores the sum of exp values
                 M, N = self.shape
-                softmax_buf_type = ir.MemRefType.get((M,N), dtype)
-                softmax_buf = gpu.alloc(softmax_buf_type, None, [], [], [])
-                softmax_buf_tensor = emit_buf_to_tensor(softmax_buf, restrict=True, writable=True)
                 
-                # Compute softmax along dimension 1 (rows)
-                # linalg.softmax performs: exp(x - max(x)) / sum(exp(x - max(x)))
-                result = linalg.softmax(
-                    (input_tensor.type,), input_tensor, softmax_buf_tensor, dimension=1
+                # Define affine maps for indexing
+                # #map = affine_map<(d0, d1) -> (d0, d1)>  (identity 2D)
+                # #map1 = affine_map<(d0, d1) -> (d0)>     (broadcast/reduce along d1)
+                d0 = ir.AffineDimExpr.get(0)
+                d1 = ir.AffineDimExpr.get(1)
+                map_2d = ir.AffineMap.get(2, 0, [d0, d1])
+                map_1d = ir.AffineMap.get(2, 0, [d0])
+                
+                # Step 1: Find max - linalg.generic reduction
+                neg_inf = arith.constant(dtype, float('-inf'))
+                max_init = tensor.empty((M,), dtype)
+                max_filled = linalg.fill(neg_inf, outs=[max_init])
+                
+                @linalg.generic(
+                    [input_tensor],  # inputs
+                    [max_filled],  # outputs
+                    [map_2d, map_1d],  # indexing_maps
+                    [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
+                )
+                def row_max(in_val, acc):
+                    return arith.maximumf(in_val, acc)
+                
+                # Step 2: Subtract max (broadcast) - linalg.generic elementwise
+                output_init = tensor.empty((M, N), dtype)
+                
+                @linalg.generic(
+                    [input_tensor, row_max],  # inputs
+                    [output_init],  # outputs
+                    [map_2d, map_1d, map_2d],  # indexing_maps
+                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                )
+                def shifted(in_val, max_val, out):
+                    return arith.subf(in_val, max_val)
+                
+                # Step 3: Compute exp - linalg.generic elementwise
+                @linalg.generic(
+                    [shifted],  # inputs
+                    [output_init],  # outputs
+                    [map_2d, map_2d],  # indexing_maps
+                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                )
+                def exp_vals(in_val, out):
+                    return math.exp(in_val)
+                
+                # Step 4: Sum exp values - linalg.generic reduction
+                # Create collapsed tensor for sum init
+                # sum_init_2d = tensor.empty((M, 1), dtype)
+                sum_init = tensor.empty((M,), dtype)
+                # sum_init = tensor.CollapseShapeOp(sum_init_2d, [[0, 1]])
+
+                
+                zero = arith.constant(dtype, 0.0)
+                sum_filled = linalg.fill(zero, outs=[sum_init])
+                
+                @linalg.generic(
+                    [exp_vals],  # inputs
+                    [sum_filled],  # outputs
+                    [map_2d, map_1d],  # indexing_maps
+                    [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
                 )
+                def row_sum(in_val, acc):
+                    return arith.addf(in_val, acc)
+                
+                # Step 5: Divide by sum (broadcast) - linalg.generic elementwise
+                @linalg.generic(
+                    [exp_vals, row_sum],  # inputs
+                    [output_init],  # outputs
+                    [map_2d, map_1d, map_2d],  # indexing_maps
+                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                )
+                def result(exp_val, sum_val, out):
+                    return arith.divf(exp_val, sum_val)
                 
                 # Materialize result back to output memref
                 bufferization.materialize_in_destination(
                     None, result, output, restrict=True, writable=True
                 )
-                
-                # Cleanup
-                gpu.dealloc(None, [], softmax_buf)
 
             # Emit utility functions for GPU memory management
             emit_gpu_util_funcs(dtype, rank=2)
 
         return mod
 
-    def schedule_module(
+    def schedule_modules(
         self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
-    ) -> ir.Module:
+    ) -> list[ir.Module]:
         """
         Generate transform schedule for softmax.
         
@@ -223,7 +282,7 @@ def transform_main():
                 # and lower to XeGPU operations
                 pass
         
-        return mod
+        return [get_bench_wrapper_schedule(self), mod]
 
     def shared_libs(self) -> list[str]:
         return ["libmlir_levelzero_runtime.so"]

From 22415bb54f34727337b66392f4585f09110d2b6a Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 16 Mar 2026 22:48:34 +0000
Subject: [PATCH 03/51] save work

---
 examples/xegpu/softmax.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index ab61f1e5..dff8bd99 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -224,7 +224,7 @@ def exp_vals(in_val, out):
                 # Create collapsed tensor for sum init
                 # sum_init_2d = tensor.empty((M, 1), dtype)
                 sum_init = tensor.empty((M,), dtype)
-                # sum_init = tensor.CollapseShapeOp(sum_init_2d, [[0, 1]])
+                # tensor.CollapseShapeOp(sum_init, sum_init_2d, [[0, 1]])
 
                 
                 zero = arith.constant(dtype, 0.0)

From cb9ead174134465610ef797dbb33fbee0196e1cd Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 17 Mar 2026 22:05:47 +0000
Subject: [PATCH 04/51] save work

---
 examples/xegpu/softmax.py | 64 +++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index dff8bd99..97abd49a 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -269,20 +269,66 @@ def schedule_modules(
         tiling, vectorization, and XeGPU-specific lowering transformations.
         """
         # TODO: Implement proper transform schedule
-        # For now, create a minimal schedule that just applies bufferization
+        # For now, create a minimal schedule that prints the last linalg operation
         mod = ir.Module.create()
+        mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
+        
         with ir.InsertionPoint(mod.body):
             from mlir.dialects import transform
+            from mlir.dialects.transform import structured
+            
+            # Create a transform sequence with proper signature
+            named_sequence = transform.named_sequence(
+                "__transform_main",
+                [transform.AnyOpType.get()],  # input: module
+                [],  # no outputs
+                arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}]
+            )
             
-            # Create a simple transform sequence
-            @func_cif(name="__transform_main")
-            def transform_main():
-                # Empty transform - just identity
-                # In a full implementation, this would tile, vectorize,
-                # and lower to XeGPU operations
-                pass
+            with ir.InsertionPoint(named_sequence.body):
+                # Get the input module (bodyTarget)
+                payload_mod = named_sequence.bodyTarget
+                
+                # Match all linalg.generic operations
+                # We have 5 generic ops in softmax: max, sub, exp, sum, div
+                generic_ops = structured.structured_match(
+                    transform.AnyOpType.get(),
+                    payload_mod,
+                    ops=["linalg.generic"]
+                )
+                
+                # Split the handle into individual operation handles
+                # For softmax, we have 5 operations
+                anytype = transform.AnyOpType.get()
+                split_ops = transform.split_handle(
+                    (anytype, anytype, anytype, anytype, anytype),  # 5 result types
+                    generic_ops
+                )
+                
+                # The last operation (index 4) is the division
+                last_op = split_ops[-1]
+
+                # Print the last operation before tiling
+                # transform.print_(target=last_op, name="last_linalg_generic_before_tiling")
+
+                # Tile the last operation using tile_using_forall
+                # Tile sizes: [64, 64] for the two parallel dimensions (M, N)
+                tiled_op, for_op = structured.structured_tile_using_forall(
+                    anytype, anytype,
+                    last_op,
+                    num_threads=[],
+                    tile_sizes=[],
+                    static_tile_sizes=[64, 64],
+                )
+
+                # Print the tiled operation
+                # transform.print_(target=tiled_op, name="tiled_linalg_generic")
+                # transform.print_(target=for_op, name="forall_op")
+
+                # Required: yield to end the transform sequence
+                transform.yield_()
         
-        return [get_bench_wrapper_schedule(self), mod]
+        return [mod]
 
     def shared_libs(self) -> list[str]:
         return ["libmlir_levelzero_runtime.so"]

From ac39be33c3b90e6eb77c16237611fd33d4490695 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 18 Mar 2026 22:41:32 +0000
Subject: [PATCH 05/51] save work

---
 examples/xegpu/softmax.py | 86 +++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 8 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 97abd49a..6d6f87eb 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -14,6 +14,8 @@
 from mlir import ir
 from mlir.execution_engine import ExecutionEngine
 from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func, math
+from mlir.dialects import transform
+from mlir.dialects.transform import structured, loop
 
 from lighthouse.workload import benchmark, get_bench_wrapper_schedule
 from lighthouse.utils.memref import to_ctype as memref_to_ctype
@@ -21,9 +23,25 @@
 from lighthouse.utils.mlir import func_cif
 from lighthouse.ingress.mlir_gen import get_mlir_elem_type
 from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs, emit_buf_to_tensor
+from lighthouse.pipeline.helper import (
+    apply_registered_pass,
+    canonicalize,
+    match,
+)
+from mlir.dialects.transform import bufferization as transform_bufferization
+from mlir.dialects.bufferization import LayoutMapOption
 
 from xegpu_workload import XeGPUWorkload
 
+def match_and_split(*args, nhandles=1, **kwargs):
+    """Helper function that splits matched handles."""
+    matched = match(*args, **kwargs)
+    anytype = transform.AnyOpType.get()
+    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
+    if nhandles == 1:
+        matched_ops = [matched_ops]
+    return matched_ops
+
 
 def softmax_complexity(M: int, N: int, nbytes: int):
     """
@@ -274,8 +292,7 @@ def schedule_modules(
         mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
         
         with ir.InsertionPoint(mod.body):
-            from mlir.dialects import transform
-            from mlir.dialects.transform import structured
+
             
             # Create a transform sequence with proper signature
             named_sequence = transform.named_sequence(
@@ -305,8 +322,11 @@ def schedule_modules(
                     generic_ops
                 )
                 
-                # The last operation (index 4) is the division
-                last_op = split_ops[-1]
+                # Reverse split_ops to have operations in reverse order
+                split_ops = list(reversed(split_ops))
+                
+                # The first operation (after reversal) is the division - this is the consumer
+                last_op = split_ops[0]
 
                 # Print the last operation before tiling
                 # transform.print_(target=last_op, name="last_linalg_generic_before_tiling")
@@ -318,12 +338,62 @@ def schedule_modules(
                     last_op,
                     num_threads=[],
                     tile_sizes=[],
-                    static_tile_sizes=[64, 64],
+                    static_tile_sizes=(64,),
                 )
 
-                # Print the tiled operation
-                # transform.print_(target=tiled_op, name="tiled_linalg_generic")
-                # transform.print_(target=for_op, name="forall_op")
+                # Fuse the producer operations into the forall loop
+                # Iterate through remaining operations (already in reverse order)
+                current_forall = for_op
+                for producer_op in split_ops[1:]:
+                    fused_op, current_forall = structured.structured_fuse_into_containing_op(
+                        anytype, anytype,
+                        producer_op,
+                        current_forall
+                    )
+                    
+                func = transform.get_parent_op(
+                    anytype,
+                    current_forall,
+                    op_name="func.func",
+                    deduplicate=True,
+                )
+                transform.apply_cse(func)
+                canonicalize(func)
+                func = apply_registered_pass(func, "eliminate-empty-tensors")
+                func = structured.VectorizeChildrenAndApplyPatternsOp(
+                    func,
+                    fold_type_extensions_into_contract=True,
+                ).result
+                identity_layout = LayoutMapOption.IdentityLayoutMap
+                payload_mod = transform.get_parent_op(
+                    anytype,
+                    func,
+                    op_name="builtin.module",
+                    deduplicate=True,
+                )
+                payload_mod = transform_bufferization.OneShotBufferizeOp(
+                    payload_mod,
+                    allow_return_allocs_from_loops=True,
+                    bufferize_function_boundaries=True,
+                    function_boundary_type_conversion=identity_layout,
+                ).result
+                payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
+                transform.apply_cse(payload_mod)
+                canonicalize(payload_mod)
+                
+                # convert forall to parallel
+                wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
+                for wg_loop in wg_loops:
+                    wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
+                func = transform.get_parent_op(anytype, wg_loop)
+
+                # convert to scf.parallel to gpu.launch
+                func = apply_registered_pass(func, "gpu-map-parallel-loops")
+                func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
+                func = apply_registered_pass(func, "lower-affine")
+                transform.apply_cse(func)
+                canonicalize(func)
+                
 
                 # Required: yield to end the transform sequence
                 transform.yield_()

From 51d494e23aa34e3166210e080aa23663e98924c2 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 20 Mar 2026 17:10:46 +0000
Subject: [PATCH 06/51] save work

---
 examples/xegpu/softmax.py | 67 +++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 10 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 6d6f87eb..be29cdd0 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -15,7 +15,7 @@
 from mlir.execution_engine import ExecutionEngine
 from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func, math
 from mlir.dialects import transform
-from mlir.dialects.transform import structured, loop
+from mlir.dialects.transform import structured, loop, xegpu
 
 from lighthouse.workload import benchmark, get_bench_wrapper_schedule
 from lighthouse.utils.memref import to_ctype as memref_to_ctype
@@ -338,7 +338,7 @@ def schedule_modules(
                     last_op,
                     num_threads=[],
                     tile_sizes=[],
-                    static_tile_sizes=(64,),
+                    static_tile_sizes=(parameters["wg_rows"],),
                 )
 
                 # Fuse the producer operations into the forall loop
@@ -393,6 +393,39 @@ def schedule_modules(
                 func = apply_registered_pass(func, "lower-affine")
                 transform.apply_cse(func)
                 canonicalize(func)
+                # set the number of threads for the gpu.launch operation
+                launch_op = match_and_split(func, ops={"gpu.launch"})
+                num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
+                xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
+                
+                # outline gpu func
+                func = apply_registered_pass(func, "lower-affine")
+                canonicalize(func)
+                func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
+                payload_mod = transform.get_parent_op(
+                    anytype,
+                    func,
+                    op_name="builtin.module",
+                    deduplicate=True,
+                )
+                payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
+                # transform.PrintOp(target=payload_mod, name="before_gpu_outlining")
+                # transform.apply_cse(payload_mod)
+
+                # set xevm target
+                # payload_mod = apply_registered_pass(
+                #     payload_mod,
+                #     "xevm-attach-target",
+                #     options={"O": "3", "chip": "bmg"},
+                # )
+
+                # # convert vector to xegpu
+                # gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
+                # for gpu_mod in gpu_mod_ops:
+                #     gpu_func = match(gpu_mod, ops={"gpu.func"})
+                #     gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+                #     transform.apply_cse(gpu_func)
+                
                 
 
                 # Required: yield to end the transform sequence
@@ -413,15 +446,26 @@ def parse_cli():
         "--sizes",
         type=int,
         nargs=2,
-        default=[1024, 512],
+        default=[1024, 64],
         help="M,N matrix sizes (MxN)",
     )
     parser.add_argument(
-        "--wg-tile",
+        "--wg-rows",
         type=int,
-        nargs=2,
-        default=[64, 32],
-        help="Workgroup tile size M,N.",
+        default=64,
+        help="Number of rows per workgroup.",
+    )
+    parser.add_argument(
+        "--sg-rows",
+        type=int,
+        default=8,
+        help="Number of rows per subgroup.",
+    )
+    parser.add_argument(
+        "--subgroup-size",
+        type=int,
+        default=16,
+        help="Subgroup size.",
     )
     parser.add_argument(
         "--nruns",
@@ -468,8 +512,9 @@ def parse_cli():
     args = parse_cli()
 
     params = {
-        "wg_m": args.wg_tile[0],
-        "wg_n": args.wg_tile[1],
+        "wg_rows": args.wg_rows,
+        "sg_rows": args.sg_rows,
+        "subgroup_size": args.subgroup_size,
     }
 
     M, N = args.sizes
@@ -504,7 +549,9 @@ def list2str(a):
             parts = [
                 f"sizes={list2str(args.sizes)}",
                 f"dt={dtype}",
-                f"wg-tile={list2str(args.wg_tile)}",
+                f"wg-rows={args.wg_rows}",
+                f"sg-rows={args.sg_rows}",
+                f"subgroup-size={args.subgroup_size}",
                 f"time(us): {elapsed:.2f}",
                 f"GFLOPS: {gflops:.2f}",
             ]

From 7ac8852412f06100c2cf188c37c2f18254e38c83 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 20 Mar 2026 21:37:04 +0000
Subject: [PATCH 07/51] save work

---
 examples/xegpu/softmax.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index be29cdd0..56b43c21 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -408,23 +408,34 @@ def schedule_modules(
                     op_name="builtin.module",
                     deduplicate=True,
                 )
-                payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
+                # payload = match(payload_mod, ops={"func.func"})
                 # transform.PrintOp(target=payload_mod, name="before_gpu_outlining")
-                # transform.apply_cse(payload_mod)
+                payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
+                transform.apply_cse(payload_mod)
 
                 # set xevm target
-                # payload_mod = apply_registered_pass(
-                #     payload_mod,
-                #     "xevm-attach-target",
-                #     options={"O": "3", "chip": "bmg"},
-                # )
-
-                # # convert vector to xegpu
-                # gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
+                payload_mod = apply_registered_pass(
+                    payload_mod,
+                    "xevm-attach-target",
+                    options={"O": "3", "chip": "bmg"},
+                )
+
+                # convert vector to xegpu
+                gpu_mod = match_and_split(payload_mod, ops={"gpu.module"})
                 # for gpu_mod in gpu_mod_ops:
-                #     gpu_func = match(gpu_mod, ops={"gpu.func"})
-                #     gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-                #     transform.apply_cse(gpu_func)
+                gpu_func = match(gpu_mod[0], ops={"gpu.func"})
+                gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+                transform.apply_cse(gpu_func)
+                
+                # Set layout attributes for xegpu.store_nd operations
+                store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+                # for store_op in store_ops:
+                xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
+                
+                payload_mod = apply_registered_pass(
+                    payload_mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
+                )
+                
                 
                 
 

From d65bf9fe47d2c95c5fdc986177e604ec4c0b6f82 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 20 Mar 2026 22:58:52 +0000
Subject: [PATCH 08/51] save work

---
 examples/xegpu/softmax.py | 88 ++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 20 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 56b43c21..2debc8c8 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -311,14 +311,14 @@ def schedule_modules(
                 generic_ops = structured.structured_match(
                     transform.AnyOpType.get(),
                     payload_mod,
-                    ops=["linalg.generic"]
+                    ops=["linalg.generic", "linalg.fill"]
                 )
                 
                 # Split the handle into individual operation handles
                 # For softmax, we have 5 operations
                 anytype = transform.AnyOpType.get()
                 split_ops = transform.split_handle(
-                    (anytype, anytype, anytype, anytype, anytype),  # 5 result types
+                    (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
                     generic_ops
                 )
                 
@@ -350,20 +350,23 @@ def schedule_modules(
                         producer_op,
                         current_forall
                     )
-                    
-                func = transform.get_parent_op(
-                    anytype,
-                    current_forall,
-                    op_name="func.func",
-                    deduplicate=True,
-                )
-                transform.apply_cse(func)
-                canonicalize(func)
-                func = apply_registered_pass(func, "eliminate-empty-tensors")
+                transform.annotate(current_forall, "gpu_loop")
+                
+                transform.apply_cse(payload_mod)
+                # canonicalize(payload_mod)
+                
+                # Vectorize and bufferize sequence
+                func = match(payload_mod, ops={"func.func"})
                 func = structured.VectorizeChildrenAndApplyPatternsOp(
                     func,
                     fold_type_extensions_into_contract=True,
                 ).result
+                loops = match_and_split(payload_mod, ops={"scf.forall"})
+                loop.loop_hoist_loop_invariant_subsets(loops[0])
+                transform.apply_cse(payload_mod)
+                canonicalize(payload_mod)
+                # transform.PrintOp(target=payload_mod, name="vectorize")
+                
                 identity_layout = LayoutMapOption.IdentityLayoutMap
                 payload_mod = transform.get_parent_op(
                     anytype,
@@ -377,28 +380,73 @@ def schedule_modules(
                     bufferize_function_boundaries=True,
                     function_boundary_type_conversion=identity_layout,
                 ).result
+                # payload_mod = transform_bufferization.OneShotBufferizeOp(
+                #     payload_mod,
+                #     allow_return_allocs_from_loops=False,
+                #     bufferize_function_boundaries=True,
+                #     function_boundary_type_conversion=identity_layout,
+                # ).result
                 payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
+                payload_mod = apply_registered_pass(payload_mod, "drop-equivalent-buffer-results")
+                payload_mod = apply_registered_pass(
+                    payload_mod,
+                    "buffer-results-to-out-params",
+                    options={
+                        "add-result-attr": "true",
+                        "hoist-dynamic-allocs": "true",
+                        "hoist-static-allocs": "true",
+                        "modify-public-functions": "true"
+                    }
+                )
                 transform.apply_cse(payload_mod)
                 canonicalize(payload_mod)
+                # # # transform.PrintOp(target=payload_mod, name="bufferize")
+                
+                # # func = match(payload_mod, ops={"func.func"})
+                gpu_loop = match(payload_mod, op_attrs={"gpu_loop": ir.UnitAttr.get()})
+                # # gpu_loop = transform.split_handle(anytype, gpu_loop)
+                gpu_loop = loop.loop_forall_to_parallel([anytype], gpu_loop)
+                
+                # # func = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
+                # # func = structured.VectorizeChildrenAndApplyPatternsOp(
+                # #     func,
+                # #     fold_type_extensions_into_contract=True,
+                # # ).result
+                # # identity_layout = LayoutMapOption.IdentityLayoutMap
+                payload_mod = transform.get_parent_op(
+                    anytype,
+                    gpu_loop,
+                    op_name="func.func",
+                    deduplicate=True,
+                )
+                # payload_mod = transform_bufferization.OneShotBufferizeOp(
+                #     payload_mod,
+                #     allow_return_allocs_from_loops=True,
+                #     bufferize_function_boundaries=True,
+                #     function_boundary_type_conversion=identity_layout,
+                # ).result
+                # payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
+                # transform.apply_cse(payload_mod)
+                # canonicalize(payload_mod)
                 
-                # convert forall to parallel
-                wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
-                for wg_loop in wg_loops:
-                    wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
-                func = transform.get_parent_op(anytype, wg_loop)
+                # # convert forall to parallel
+                # wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
+                # for wg_loop in wg_loops:
+                #     wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
+                # func = transform.get_parent_op(anytype, wg_loop)
 
                 # convert to scf.parallel to gpu.launch
-                func = apply_registered_pass(func, "gpu-map-parallel-loops")
+                func = apply_registered_pass(payload_mod, "gpu-map-parallel-loops")
                 func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
                 func = apply_registered_pass(func, "lower-affine")
                 transform.apply_cse(func)
                 canonicalize(func)
-                # set the number of threads for the gpu.launch operation
+                # # set the number of threads for the gpu.launch operation
                 launch_op = match_and_split(func, ops={"gpu.launch"})
                 num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
                 xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
                 
-                # outline gpu func
+                # # outline gpu func
                 func = apply_registered_pass(func, "lower-affine")
                 canonicalize(func)
                 func = apply_registered_pass(func, "gpu-launch-sink-index-computations")

From 0bf3eb32a7fa9fbd66b0a7a238fb302607c2896a Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 24 Mar 2026 20:32:41 +0000
Subject: [PATCH 09/51] save working version

---
 examples/xegpu/softmax.py | 138 ++++++++++++--------------------------
 1 file changed, 42 insertions(+), 96 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 2debc8c8..e95286b5 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -17,6 +17,7 @@
 from mlir.dialects import transform
 from mlir.dialects.transform import structured, loop, xegpu
 
+from lighthouse import dialects as lh_dialects
 from lighthouse.workload import benchmark, get_bench_wrapper_schedule
 from lighthouse.utils.memref import to_ctype as memref_to_ctype
 from lighthouse.utils.numpy import numpy_to_ctype
@@ -54,7 +55,6 @@ def softmax_complexity(M: int, N: int, nbytes: int):
     Total: 3*N operations per row, but with transcendental (exp) operations
     """
     # Approximation: 5 FLOPs per element (max, sub, exp, sum, div)
-    # exp is expensive but we count it as ~1 FLOP for simplicity
     flop_count = M * N * 5
     memory_reads = M * N * nbytes  # read input
     memory_writes = M * N * nbytes  # write output
@@ -303,8 +303,15 @@ def schedule_modules(
             )
             
             with ir.InsertionPoint(named_sequence.body):
-                # Get the input module (bodyTarget)
-                payload_mod = named_sequence.bodyTarget
+                # match the payload module
+                anytype = transform.AnyOpType.get()
+                func = match(named_sequence.bodyTarget, ops={"func.func"})
+                payload_mod = transform.get_parent_op(
+                    anytype,
+                    func,
+                    op_name="builtin.module",
+                    deduplicate=True,
+                )
                 
                 # Match all linalg.generic operations
                 # We have 5 generic ops in softmax: max, sub, exp, sum, div
@@ -315,7 +322,7 @@ def schedule_modules(
                 )
                 
                 # Split the handle into individual operation handles
-                # For softmax, we have 5 operations
+                # For softmax, we have 7 operations
                 anytype = transform.AnyOpType.get()
                 split_ops = transform.split_handle(
                     (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
@@ -350,117 +357,59 @@ def schedule_modules(
                         producer_op,
                         current_forall
                     )
-                transform.annotate(current_forall, "gpu_loop")
-                
-                transform.apply_cse(payload_mod)
-                # canonicalize(payload_mod)
+                    
+                func = transform.get_parent_op(
+                    anytype,
+                    current_forall,
+                    op_name="func.func",
+                    deduplicate=True,
+                )
+                transform.apply_cse(func)
+                canonicalize(func)
                 
-                # Vectorize and bufferize sequence
-                func = match(payload_mod, ops={"func.func"})
                 func = structured.VectorizeChildrenAndApplyPatternsOp(
                     func,
                     fold_type_extensions_into_contract=True,
                 ).result
-                loops = match_and_split(payload_mod, ops={"scf.forall"})
-                loop.loop_hoist_loop_invariant_subsets(loops[0])
-                transform.apply_cse(payload_mod)
-                canonicalize(payload_mod)
-                # transform.PrintOp(target=payload_mod, name="vectorize")
-                
+                transform.apply_cse(func)
+                canonicalize(func)
+                payload_mod = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
                 identity_layout = LayoutMapOption.IdentityLayoutMap
-                payload_mod = transform.get_parent_op(
-                    anytype,
-                    func,
-                    op_name="builtin.module",
-                    deduplicate=True,
-                )
                 payload_mod = transform_bufferization.OneShotBufferizeOp(
                     payload_mod,
                     allow_return_allocs_from_loops=True,
                     bufferize_function_boundaries=True,
                     function_boundary_type_conversion=identity_layout,
                 ).result
-                # payload_mod = transform_bufferization.OneShotBufferizeOp(
-                #     payload_mod,
-                #     allow_return_allocs_from_loops=False,
-                #     bufferize_function_boundaries=True,
-                #     function_boundary_type_conversion=identity_layout,
-                # ).result
+                # fold memref.subviews into vector.transfer_read/write ops
                 payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
-                payload_mod = apply_registered_pass(payload_mod, "drop-equivalent-buffer-results")
-                payload_mod = apply_registered_pass(
-                    payload_mod,
-                    "buffer-results-to-out-params",
-                    options={
-                        "add-result-attr": "true",
-                        "hoist-dynamic-allocs": "true",
-                        "hoist-static-allocs": "true",
-                        "modify-public-functions": "true"
-                    }
-                )
                 transform.apply_cse(payload_mod)
                 canonicalize(payload_mod)
-                # # # transform.PrintOp(target=payload_mod, name="bufferize")
-                
-                # # func = match(payload_mod, ops={"func.func"})
-                gpu_loop = match(payload_mod, op_attrs={"gpu_loop": ir.UnitAttr.get()})
-                # # gpu_loop = transform.split_handle(anytype, gpu_loop)
-                gpu_loop = loop.loop_forall_to_parallel([anytype], gpu_loop)
                 
-                # # func = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
-                # # func = structured.VectorizeChildrenAndApplyPatternsOp(
-                # #     func,
-                # #     fold_type_extensions_into_contract=True,
-                # # ).result
-                # # identity_layout = LayoutMapOption.IdentityLayoutMap
-                payload_mod = transform.get_parent_op(
-                    anytype,
-                    gpu_loop,
-                    op_name="func.func",
-                    deduplicate=True,
-                )
-                # payload_mod = transform_bufferization.OneShotBufferizeOp(
-                #     payload_mod,
-                #     allow_return_allocs_from_loops=True,
-                #     bufferize_function_boundaries=True,
-                #     function_boundary_type_conversion=identity_layout,
-                # ).result
-                # payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
-                # transform.apply_cse(payload_mod)
-                # canonicalize(payload_mod)
-                
-                # # convert forall to parallel
-                # wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
-                # for wg_loop in wg_loops:
-                #     wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
-                # func = transform.get_parent_op(anytype, wg_loop)
-
+                # convert forall to parallel
+                wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
+                for wg_loop in wg_loops:
+                    wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
+                func = transform.get_parent_op(anytype, wg_loop)
                 # convert to scf.parallel to gpu.launch
-                func = apply_registered_pass(payload_mod, "gpu-map-parallel-loops")
+                func = apply_registered_pass(func, "gpu-map-parallel-loops")
                 func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
                 func = apply_registered_pass(func, "lower-affine")
                 transform.apply_cse(func)
                 canonicalize(func)
-                # # set the number of threads for the gpu.launch operation
+                
+                # set the number of threads for the gpu.launch operation
                 launch_op = match_and_split(func, ops={"gpu.launch"})
                 num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
                 xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
                 
-                # # outline gpu func
+                # outline gpu func
                 func = apply_registered_pass(func, "lower-affine")
                 canonicalize(func)
                 func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
-                payload_mod = transform.get_parent_op(
-                    anytype,
-                    func,
-                    op_name="builtin.module",
-                    deduplicate=True,
-                )
-                # payload = match(payload_mod, ops={"func.func"})
-                # transform.PrintOp(target=payload_mod, name="before_gpu_outlining")
                 payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
                 transform.apply_cse(payload_mod)
-
+                
                 # set xevm target
                 payload_mod = apply_registered_pass(
                     payload_mod,
@@ -469,12 +418,12 @@ def schedule_modules(
                 )
 
                 # convert vector to xegpu
-                gpu_mod = match_and_split(payload_mod, ops={"gpu.module"})
-                # for gpu_mod in gpu_mod_ops:
-                gpu_func = match(gpu_mod[0], ops={"gpu.func"})
-                gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-                transform.apply_cse(gpu_func)
-                
+                gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
+                for gpu_mod in gpu_mod_ops:
+                    gpu_func = match(gpu_mod, ops={"gpu.func"})
+                    gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+                    transform.apply_cse(gpu_func)
+                    
                 # Set layout attributes for xegpu.store_nd operations
                 store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
                 # for store_op in store_ops:
@@ -483,14 +432,10 @@ def schedule_modules(
                 payload_mod = apply_registered_pass(
                     payload_mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
                 )
-                
-                
-                
-
                 # Required: yield to end the transform sequence
                 transform.yield_()
         
-        return [mod]
+        return [get_bench_wrapper_schedule(self), mod]
 
     def shared_libs(self) -> list[str]:
         return ["libmlir_levelzero_runtime.so"]
@@ -580,6 +525,7 @@ def parse_cli():
     dtype = "f32"
 
     with ir.Context(), ir.Location.unknown():
+        lh_dialects.register_and_load()
         wload = XeGPUSoftmax(M=M, N=N, dtype=dtype)
 
         if args.dump_kernel or args.dump_schedule:

From fabd656c1267437174b7c7b9ed472c0b76556ee9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 24 Mar 2026 21:48:44 +0000
Subject: [PATCH 10/51] save working version

---
 examples/xegpu/softmax.py                     | 288 +-----------------
 .../ingress/mlir_gen/gpu_softmax_payload.py   | 121 ++++++++
 lighthouse/schedule/xegpu/softmax_schedule.py | 195 ++++++++++++
 3 files changed, 332 insertions(+), 272 deletions(-)
 create mode 100644 lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
 create mode 100644 lighthouse/schedule/xegpu/softmax_schedule.py

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index e95286b5..474eb1a3 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -13,36 +13,17 @@
 import numpy as np
 from mlir import ir
 from mlir.execution_engine import ExecutionEngine
-from mlir.dialects import linalg, gpu, bufferization, arith, tensor, func, math
-from mlir.dialects import transform
-from mlir.dialects.transform import structured, loop, xegpu
 
 from lighthouse import dialects as lh_dialects
 from lighthouse.workload import benchmark, get_bench_wrapper_schedule
 from lighthouse.utils.memref import to_ctype as memref_to_ctype
 from lighthouse.utils.numpy import numpy_to_ctype
-from lighthouse.utils.mlir import func_cif
 from lighthouse.ingress.mlir_gen import get_mlir_elem_type
-from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs, emit_buf_to_tensor
-from lighthouse.pipeline.helper import (
-    apply_registered_pass,
-    canonicalize,
-    match,
-)
-from mlir.dialects.transform import bufferization as transform_bufferization
-from mlir.dialects.bufferization import LayoutMapOption
+from lighthouse.ingress.mlir_gen.gpu_softmax_payload import generate_gpu_softmax_payload
+from lighthouse.schedule.xegpu.softmax_schedule import get_softmax_schedule_module
 
 from xegpu_workload import XeGPUWorkload
 
-def match_and_split(*args, nhandles=1, **kwargs):
-    """Helper function that splits matched handles."""
-    matched = match(*args, **kwargs)
-    anytype = transform.AnyOpType.get()
-    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
-    if nhandles == 1:
-        matched_ops = [matched_ops]
-    return matched_ops
-
 
 def softmax_complexity(M: int, N: int, nbytes: int):
     """
@@ -180,262 +161,25 @@ def get_complexity(self) -> tuple[int, int, int]:
 
     def payload_module(self) -> ir.Module:
         """Generate MLIR module for softmax payload."""
-        mod = ir.Module.create()
         dtype = get_mlir_elem_type(self.dtype_str)
-        memref_t = ir.MemRefType.get(self.shape, dtype)
-        
-        with ir.InsertionPoint(mod.body):
-            # Function signature: payload(output, input)
-            @func_cif(memref_t, memref_t, name=self.payload_function_name)
-            def payload(output, input_arg):
-                # Convert memrefs to tensors
-                output_tensor = emit_buf_to_tensor(output, restrict=True, writable=True)
-                input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
-                
-                M, N = self.shape
-                
-                # Define affine maps for indexing
-                # #map = affine_map<(d0, d1) -> (d0, d1)>  (identity 2D)
-                # #map1 = affine_map<(d0, d1) -> (d0)>     (broadcast/reduce along d1)
-                d0 = ir.AffineDimExpr.get(0)
-                d1 = ir.AffineDimExpr.get(1)
-                map_2d = ir.AffineMap.get(2, 0, [d0, d1])
-                map_1d = ir.AffineMap.get(2, 0, [d0])
-                
-                # Step 1: Find max - linalg.generic reduction
-                neg_inf = arith.constant(dtype, float('-inf'))
-                max_init = tensor.empty((M,), dtype)
-                max_filled = linalg.fill(neg_inf, outs=[max_init])
-                
-                @linalg.generic(
-                    [input_tensor],  # inputs
-                    [max_filled],  # outputs
-                    [map_2d, map_1d],  # indexing_maps
-                    [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
-                )
-                def row_max(in_val, acc):
-                    return arith.maximumf(in_val, acc)
-                
-                # Step 2: Subtract max (broadcast) - linalg.generic elementwise
-                output_init = tensor.empty((M, N), dtype)
-                
-                @linalg.generic(
-                    [input_tensor, row_max],  # inputs
-                    [output_init],  # outputs
-                    [map_2d, map_1d, map_2d],  # indexing_maps
-                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
-                )
-                def shifted(in_val, max_val, out):
-                    return arith.subf(in_val, max_val)
-                
-                # Step 3: Compute exp - linalg.generic elementwise
-                @linalg.generic(
-                    [shifted],  # inputs
-                    [output_init],  # outputs
-                    [map_2d, map_2d],  # indexing_maps
-                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
-                )
-                def exp_vals(in_val, out):
-                    return math.exp(in_val)
-                
-                # Step 4: Sum exp values - linalg.generic reduction
-                # Create collapsed tensor for sum init
-                # sum_init_2d = tensor.empty((M, 1), dtype)
-                sum_init = tensor.empty((M,), dtype)
-                # tensor.CollapseShapeOp(sum_init, sum_init_2d, [[0, 1]])
-
-                
-                zero = arith.constant(dtype, 0.0)
-                sum_filled = linalg.fill(zero, outs=[sum_init])
-                
-                @linalg.generic(
-                    [exp_vals],  # inputs
-                    [sum_filled],  # outputs
-                    [map_2d, map_1d],  # indexing_maps
-                    [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
-                )
-                def row_sum(in_val, acc):
-                    return arith.addf(in_val, acc)
-                
-                # Step 5: Divide by sum (broadcast) - linalg.generic elementwise
-                @linalg.generic(
-                    [exp_vals, row_sum],  # inputs
-                    [output_init],  # outputs
-                    [map_2d, map_1d, map_2d],  # indexing_maps
-                    [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
-                )
-                def result(exp_val, sum_val, out):
-                    return arith.divf(exp_val, sum_val)
-                
-                # Materialize result back to output memref
-                bufferization.materialize_in_destination(
-                    None, result, output, restrict=True, writable=True
-                )
-
-            # Emit utility functions for GPU memory management
-            emit_gpu_util_funcs(dtype, rank=2)
-
-        return mod
+        return generate_gpu_softmax_payload(
+            func_name=self.payload_function_name,
+            M=self.M,
+            N=self.N,
+            dtype=dtype,
+        )
 
     def schedule_modules(
         self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
     ) -> list[ir.Module]:
-        """
-        Generate transform schedule for softmax.
-        
-        For now, returns an empty schedule. In the future, this would contain
-        tiling, vectorization, and XeGPU-specific lowering transformations.
-        """
-        # TODO: Implement proper transform schedule
-        # For now, create a minimal schedule that prints the last linalg operation
-        mod = ir.Module.create()
-        mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
-        
-        with ir.InsertionPoint(mod.body):
-
-            
-            # Create a transform sequence with proper signature
-            named_sequence = transform.named_sequence(
-                "__transform_main",
-                [transform.AnyOpType.get()],  # input: module
-                [],  # no outputs
-                arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}]
-            )
-            
-            with ir.InsertionPoint(named_sequence.body):
-                # match the payload module
-                anytype = transform.AnyOpType.get()
-                func = match(named_sequence.bodyTarget, ops={"func.func"})
-                payload_mod = transform.get_parent_op(
-                    anytype,
-                    func,
-                    op_name="builtin.module",
-                    deduplicate=True,
-                )
-                
-                # Match all linalg.generic operations
-                # We have 5 generic ops in softmax: max, sub, exp, sum, div
-                generic_ops = structured.structured_match(
-                    transform.AnyOpType.get(),
-                    payload_mod,
-                    ops=["linalg.generic", "linalg.fill"]
-                )
-                
-                # Split the handle into individual operation handles
-                # For softmax, we have 7 operations
-                anytype = transform.AnyOpType.get()
-                split_ops = transform.split_handle(
-                    (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
-                    generic_ops
-                )
-                
-                # Reverse split_ops to have operations in reverse order
-                split_ops = list(reversed(split_ops))
-                
-                # The first operation (after reversal) is the division - this is the consumer
-                last_op = split_ops[0]
-
-                # Print the last operation before tiling
-                # transform.print_(target=last_op, name="last_linalg_generic_before_tiling")
-
-                # Tile the last operation using tile_using_forall
-                # Tile sizes: [64, 64] for the two parallel dimensions (M, N)
-                tiled_op, for_op = structured.structured_tile_using_forall(
-                    anytype, anytype,
-                    last_op,
-                    num_threads=[],
-                    tile_sizes=[],
-                    static_tile_sizes=(parameters["wg_rows"],),
-                )
-
-                # Fuse the producer operations into the forall loop
-                # Iterate through remaining operations (already in reverse order)
-                current_forall = for_op
-                for producer_op in split_ops[1:]:
-                    fused_op, current_forall = structured.structured_fuse_into_containing_op(
-                        anytype, anytype,
-                        producer_op,
-                        current_forall
-                    )
-                    
-                func = transform.get_parent_op(
-                    anytype,
-                    current_forall,
-                    op_name="func.func",
-                    deduplicate=True,
-                )
-                transform.apply_cse(func)
-                canonicalize(func)
-                
-                func = structured.VectorizeChildrenAndApplyPatternsOp(
-                    func,
-                    fold_type_extensions_into_contract=True,
-                ).result
-                transform.apply_cse(func)
-                canonicalize(func)
-                payload_mod = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
-                identity_layout = LayoutMapOption.IdentityLayoutMap
-                payload_mod = transform_bufferization.OneShotBufferizeOp(
-                    payload_mod,
-                    allow_return_allocs_from_loops=True,
-                    bufferize_function_boundaries=True,
-                    function_boundary_type_conversion=identity_layout,
-                ).result
-                # fold memref.subviews into vector.transfer_read/write ops
-                payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
-                transform.apply_cse(payload_mod)
-                canonicalize(payload_mod)
-                
-                # convert forall to parallel
-                wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
-                for wg_loop in wg_loops:
-                    wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
-                func = transform.get_parent_op(anytype, wg_loop)
-                # convert to scf.parallel to gpu.launch
-                func = apply_registered_pass(func, "gpu-map-parallel-loops")
-                func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
-                func = apply_registered_pass(func, "lower-affine")
-                transform.apply_cse(func)
-                canonicalize(func)
-                
-                # set the number of threads for the gpu.launch operation
-                launch_op = match_and_split(func, ops={"gpu.launch"})
-                num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
-                xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
-                
-                # outline gpu func
-                func = apply_registered_pass(func, "lower-affine")
-                canonicalize(func)
-                func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
-                payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
-                transform.apply_cse(payload_mod)
-                
-                # set xevm target
-                payload_mod = apply_registered_pass(
-                    payload_mod,
-                    "xevm-attach-target",
-                    options={"O": "3", "chip": "bmg"},
-                )
-
-                # convert vector to xegpu
-                gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
-                for gpu_mod in gpu_mod_ops:
-                    gpu_func = match(gpu_mod, ops={"gpu.func"})
-                    gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-                    transform.apply_cse(gpu_func)
-                    
-                # Set layout attributes for xegpu.store_nd operations
-                store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
-                # for store_op in store_ops:
-                xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
-                
-                payload_mod = apply_registered_pass(
-                    payload_mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
-                )
-                # Required: yield to end the transform sequence
-                transform.yield_()
-        
-        return [get_bench_wrapper_schedule(self), mod]
+        """Generate transform schedule for softmax."""
+        return [
+            get_bench_wrapper_schedule(self),
+            get_softmax_schedule_module(
+                stop_at_stage=stop_at_stage,
+                parameters=parameters,
+            ),
+        ]
 
     def shared_libs(self) -> list[str]:
         return ["libmlir_levelzero_runtime.so"]
diff --git a/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
new file mode 100644
index 00000000..26fd9042
--- /dev/null
+++ b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
@@ -0,0 +1,121 @@
+"""Generate MLIR payload for GPU softmax operation."""
+
+from mlir import ir
+from mlir.dialects import linalg, bufferization, arith, tensor, func, math
+
+from lighthouse.utils.mlir import func_cif
+from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs, emit_buf_to_tensor
+
+
+def generate_gpu_softmax_payload(
+    func_name: str,
+    M: int,
+    N: int,
+    dtype: ir.Type,
+) -> ir.Module:
+    """
+    Generate MLIR module for softmax payload.
+    
+    Computes softmax along the last dimension (rows):
+    output[i, j] = exp(input[i, j] - max_i) / sum_i(exp(input[i, j] - max_i))
+    
+    where max_i and sum_i are computed over row i.
+    
+    Args:
+        func_name: Name of the payload function
+        M: Number of rows
+        N: Number of columns  
+        dtype: MLIR element type (e.g., F32Type)
+        
+    Returns:
+        MLIR module containing the softmax payload function
+    """
+    mod = ir.Module.create()
+    shape = (M, N)
+    memref_t = ir.MemRefType.get(shape, dtype)
+    
+    with ir.InsertionPoint(mod.body):
+        # Function signature: payload(output, input)
+        @func_cif(memref_t, memref_t, name=func_name)
+        def payload(output, input_arg):
+            # Convert memrefs to tensors
+            output_tensor = emit_buf_to_tensor(output, restrict=True, writable=True)
+            input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
+            
+            # Define affine maps for indexing
+            # #map = affine_map<(d0, d1) -> (d0, d1)>  (identity 2D)
+            # #map1 = affine_map<(d0, d1) -> (d0)>     (broadcast/reduce along d1)
+            d0 = ir.AffineDimExpr.get(0)
+            d1 = ir.AffineDimExpr.get(1)
+            map_2d = ir.AffineMap.get(2, 0, [d0, d1])
+            map_1d = ir.AffineMap.get(2, 0, [d0])
+            
+            # Step 1: Find max - linalg.generic reduction
+            neg_inf = arith.constant(dtype, float('-inf'))
+            max_init = tensor.empty((M,), dtype)
+            max_filled = linalg.fill(neg_inf, outs=[max_init])
+            
+            @linalg.generic(
+                [input_tensor],  # inputs
+                [max_filled],  # outputs
+                [map_2d, map_1d],  # indexing_maps
+                [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
+            )
+            def row_max(in_val, acc):
+                return arith.maximumf(in_val, acc)
+            
+            # Step 2: Subtract max (broadcast) - linalg.generic elementwise
+            output_init = tensor.empty((M, N), dtype)
+            
+            @linalg.generic(
+                [input_tensor, row_max],  # inputs
+                [output_init],  # outputs
+                [map_2d, map_1d, map_2d],  # indexing_maps
+                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+            )
+            def shifted(in_val, max_val, out):
+                return arith.subf(in_val, max_val)
+            
+            # Step 3: Compute exp - linalg.generic elementwise
+            @linalg.generic(
+                [shifted],  # inputs
+                [output_init],  # outputs
+                [map_2d, map_2d],  # indexing_maps
+                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+            )
+            def exp_vals(in_val, out):
+                return math.exp(in_val)
+            
+            # Step 4: Sum exp values - linalg.generic reduction
+            sum_init = tensor.empty((M,), dtype)
+            zero = arith.constant(dtype, 0.0)
+            sum_filled = linalg.fill(zero, outs=[sum_init])
+            
+            @linalg.generic(
+                [exp_vals],  # inputs
+                [sum_filled],  # outputs
+                [map_2d, map_1d],  # indexing_maps
+                [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
+            )
+            def row_sum(in_val, acc):
+                return arith.addf(in_val, acc)
+            
+            # Step 5: Divide by sum (broadcast) - linalg.generic elementwise
+            @linalg.generic(
+                [exp_vals, row_sum],  # inputs
+                [output_init],  # outputs
+                [map_2d, map_1d, map_2d],  # indexing_maps
+                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+            )
+            def result(exp_val, sum_val, out):
+                return arith.divf(exp_val, sum_val)
+            
+            # Materialize result back to output memref
+            bufferization.materialize_in_destination(
+                None, result, output, restrict=True, writable=True
+            )
+
+        # Emit utility functions for GPU memory management
+        emit_gpu_util_funcs(dtype, rank=2)
+
+    return mod
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
new file mode 100644
index 00000000..e01f9af6
--- /dev/null
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -0,0 +1,195 @@
+"""Generate MLIR transform schedule for XeGPU softmax operation."""
+
+from typing import Optional
+
+from mlir import ir
+from mlir.dialects import transform
+from mlir.dialects.transform import structured, loop, xegpu
+from mlir.dialects.transform import bufferization as transform_bufferization
+from mlir.dialects.bufferization import LayoutMapOption
+
+from lighthouse.pipeline.helper import (
+    apply_registered_pass,
+    canonicalize,
+    match,
+)
+
+
+def match_and_split(*args, nhandles=1, **kwargs):
+    """Helper function that splits matched handles."""
+    matched = match(*args, **kwargs)
+    anytype = transform.AnyOpType.get()
+    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
+    if nhandles == 1:
+        matched_ops = [matched_ops]
+    return matched_ops
+
+
+def get_softmax_schedule_module(
+    stop_at_stage: Optional[str] = None,
+    parameters: Optional[dict] = None,
+) -> ir.Module:
+    """
+    Generate transform schedule for softmax operation.
+    
+    The schedule performs the following transformations:
+    1. Tile the consumer operation (division) using forall
+    2. Fuse producer operations into the forall loop
+    3. Vectorize operations
+    4. Bufferize tensors
+    5. Convert to GPU dialect
+    6. Lower to XeGPU operations
+    
+    Args:
+        stop_at_stage: Optional stage name to stop early (for debugging)
+        parameters: Dictionary with scheduling parameters:
+            - wg_rows: Number of rows per workgroup
+            - sg_rows: Number of rows per subgroup  
+            - subgroup_size: Size of subgroup
+            
+    Returns:
+        MLIR module containing the transform schedule
+    """
+    assert parameters is not None, "Schedule parameters must be provided"
+    
+    mod = ir.Module.create()
+    mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
+    
+    with ir.InsertionPoint(mod.body):
+        # Create a transform sequence with proper signature
+        named_sequence = transform.named_sequence(
+            "__transform_main",
+            [transform.AnyOpType.get()],  # input: module
+            [],  # no outputs
+            arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}]
+        )
+        
+        with ir.InsertionPoint(named_sequence.body):
+            # match the payload module
+            anytype = transform.AnyOpType.get()
+            func = match(named_sequence.bodyTarget, ops={"func.func"})
+            payload_mod = transform.get_parent_op(
+                anytype,
+                func,
+                op_name="builtin.module",
+                deduplicate=True,
+            )
+            
+            # Match all linalg.generic and linalg.fill operations
+            # We have 7 operations in softmax: 
+            # fill(max_init), max, sub, exp, fill(sum_init), sum, div
+            generic_ops = structured.structured_match(
+                transform.AnyOpType.get(),
+                payload_mod,
+                ops=["linalg.generic", "linalg.fill"]
+            )
+            
+            # Split the handle into individual operation handles
+            anytype = transform.AnyOpType.get()
+            split_ops = transform.split_handle(
+                (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
+                generic_ops
+            )
+            
+            # Reverse split_ops to have operations in reverse order
+            split_ops = list(reversed(split_ops))
+            
+            # The first operation (after reversal) is the division - this is the consumer
+            last_op = split_ops[0]
+
+            # Tile the last operation using tile_using_forall
+            tiled_op, for_op = structured.structured_tile_using_forall(
+                anytype, anytype,
+                last_op,
+                num_threads=[],
+                tile_sizes=[],
+                static_tile_sizes=(parameters["wg_rows"],),
+            )
+
+            # Fuse the producer operations into the forall loop
+            # Iterate through remaining operations (already in reverse order)
+            current_forall = for_op
+            for producer_op in split_ops[1:]:
+                fused_op, current_forall = structured.structured_fuse_into_containing_op(
+                    anytype, anytype,
+                    producer_op,
+                    current_forall
+                )
+                
+            func = transform.get_parent_op(
+                anytype,
+                current_forall,
+                op_name="func.func",
+                deduplicate=True,
+            )
+            transform.apply_cse(func)
+            canonicalize(func)
+            
+            func = structured.VectorizeChildrenAndApplyPatternsOp(
+                func,
+                fold_type_extensions_into_contract=True,
+            ).result
+            transform.apply_cse(func)
+            canonicalize(func)
+            payload_mod = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
+            identity_layout = LayoutMapOption.IdentityLayoutMap
+            payload_mod = transform_bufferization.OneShotBufferizeOp(
+                payload_mod,
+                allow_return_allocs_from_loops=True,
+                bufferize_function_boundaries=True,
+                function_boundary_type_conversion=identity_layout,
+            ).result
+            # fold memref.subviews into vector.transfer_read/write ops
+            payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
+            transform.apply_cse(payload_mod)
+            canonicalize(payload_mod)
+            
+            # convert forall to parallel
+            wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
+            for wg_loop in wg_loops:
+                wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
+            func = transform.get_parent_op(anytype, wg_loop)
+            # convert scf.parallel to gpu.launch
+            func = apply_registered_pass(func, "gpu-map-parallel-loops")
+            func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
+            func = apply_registered_pass(func, "lower-affine")
+            transform.apply_cse(func)
+            canonicalize(func)
+            
+            # set the number of threads for the gpu.launch operation
+            launch_op = match_and_split(func, ops={"gpu.launch"})
+            num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
+            xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
+            
+            # outline gpu func
+            func = apply_registered_pass(func, "lower-affine")
+            canonicalize(func)
+            func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
+            payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
+            transform.apply_cse(payload_mod)
+            
+            # set xevm target
+            payload_mod = apply_registered_pass(
+                payload_mod,
+                "xevm-attach-target",
+                options={"O": "3", "chip": "bmg"},
+            )
+
+            # convert vector to xegpu
+            gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
+            for gpu_mod in gpu_mod_ops:
+                gpu_func = match(gpu_mod, ops={"gpu.func"})
+                gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+                transform.apply_cse(gpu_func)
+                
+            # Set layout attributes for xegpu.store_nd operations
+            store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+            xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
+            
+            payload_mod = apply_registered_pass(
+                payload_mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
+            )
+            # Required: yield to end the transform sequence
+            transform.yield_()
+    
+    return mod

From 1e63d7de4e88047d2251643e5649a477fd0a7c1b Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 24 Mar 2026 22:15:39 +0000
Subject: [PATCH 11/51] save working version

---
 lighthouse/pipeline/helper.py                 |  16 +
 lighthouse/schedule/xegpu/mlp_schedule.py     |  18 +-
 lighthouse/schedule/xegpu/softmax_schedule.py | 307 +++++++++++-------
 3 files changed, 203 insertions(+), 138 deletions(-)

diff --git a/lighthouse/pipeline/helper.py b/lighthouse/pipeline/helper.py
index 213b45df..9c4820d5 100644
--- a/lighthouse/pipeline/helper.py
+++ b/lighthouse/pipeline/helper.py
@@ -35,3 +35,19 @@ def cleanup_func(target):
     func = structured.MatchOp.match_op_names(target, ["func.func"]).result
     transform.apply_cse(func)
     canonicalize(func)
+
+
+class PipelineInterrupt(Exception):
+    """Exception to signal early termination of the transform schedule."""
+
+    pass
+
+
+def match_and_split(*args, nhandles=1, **kwargs):
+    """Helper function that splits matched handles."""
+    matched = match(*args, **kwargs)
+    anytype = transform.AnyOpType.get()
+    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
+    if nhandles == 1:
+        matched_ops = [matched_ops]
+    return matched_ops
diff --git a/lighthouse/schedule/xegpu/mlp_schedule.py b/lighthouse/schedule/xegpu/mlp_schedule.py
index e9fa7909..94cbdf01 100644
--- a/lighthouse/schedule/xegpu/mlp_schedule.py
+++ b/lighthouse/schedule/xegpu/mlp_schedule.py
@@ -11,6 +11,8 @@
     apply_registered_pass,
     canonicalize,
     match,
+    match_and_split,
+    PipelineInterrupt,
 )
 
 from lighthouse.dialects import smt_ext, transform_smt_ext as td_smt_ext
@@ -33,22 +35,6 @@
 MIN_NB_THREADS = 16
 
 
-class PipelineInterrupt(Exception):
-    """Exception to signal early termination of the transform schedule."""
-
-    pass
-
-
-def match_and_split(*args, nhandles=1, **kwargs):
-    """Helper function that splits matched handles."""
-    matched = match(*args, **kwargs)
-    anytype = transform.AnyOpType.get()
-    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
-    if nhandles == 1:
-        matched_ops = [matched_ops]
-    return matched_ops
-
-
 @KnobValue.ast_rewrite(in_exprs=True)
 def params_with_constraints_imposed(
     params: dict[str, int | None], knob_name_prefix=""
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index e01f9af6..98acd4e6 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -12,19 +12,11 @@
     apply_registered_pass,
     canonicalize,
     match,
+    match_and_split,
+    PipelineInterrupt,
 )
 
 
-def match_and_split(*args, nhandles=1, **kwargs):
-    """Helper function that splits matched handles."""
-    matched = match(*args, **kwargs)
-    anytype = transform.AnyOpType.get()
-    matched_ops = transform.split_handle((anytype,) * nhandles, matched)
-    if nhandles == 1:
-        matched_ops = [matched_ops]
-    return matched_ops
-
-
 def get_softmax_schedule_module(
     stop_at_stage: Optional[str] = None,
     parameters: Optional[dict] = None,
@@ -75,121 +67,192 @@ def get_softmax_schedule_module(
                 deduplicate=True,
             )
             
-            # Match all linalg.generic and linalg.fill operations
-            # We have 7 operations in softmax: 
-            # fill(max_init), max, sub, exp, fill(sum_init), sum, div
-            generic_ops = structured.structured_match(
-                transform.AnyOpType.get(),
+            xegpu_softmax_transform_schedule(
                 payload_mod,
-                ops=["linalg.generic", "linalg.fill"]
-            )
-            
-            # Split the handle into individual operation handles
-            anytype = transform.AnyOpType.get()
-            split_ops = transform.split_handle(
-                (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
-                generic_ops
-            )
-            
-            # Reverse split_ops to have operations in reverse order
-            split_ops = list(reversed(split_ops))
-            
-            # The first operation (after reversal) is the division - this is the consumer
-            last_op = split_ops[0]
-
-            # Tile the last operation using tile_using_forall
-            tiled_op, for_op = structured.structured_tile_using_forall(
-                anytype, anytype,
-                last_op,
-                num_threads=[],
-                tile_sizes=[],
-                static_tile_sizes=(parameters["wg_rows"],),
+                parameters=parameters,
+                stop_at_stage=stop_at_stage or "",
             )
+    
+    return mod
 
-            # Fuse the producer operations into the forall loop
-            # Iterate through remaining operations (already in reverse order)
-            current_forall = for_op
-            for producer_op in split_ops[1:]:
-                fused_op, current_forall = structured.structured_fuse_into_containing_op(
-                    anytype, anytype,
-                    producer_op,
-                    current_forall
-                )
-                
-            func = transform.get_parent_op(
-                anytype,
-                current_forall,
-                op_name="func.func",
-                deduplicate=True,
-            )
-            transform.apply_cse(func)
-            canonicalize(func)
-            
-            func = structured.VectorizeChildrenAndApplyPatternsOp(
-                func,
-                fold_type_extensions_into_contract=True,
-            ).result
-            transform.apply_cse(func)
-            canonicalize(func)
-            payload_mod = apply_registered_pass(payload_mod, "eliminate-empty-tensors")
-            identity_layout = LayoutMapOption.IdentityLayoutMap
-            payload_mod = transform_bufferization.OneShotBufferizeOp(
-                payload_mod,
-                allow_return_allocs_from_loops=True,
-                bufferize_function_boundaries=True,
-                function_boundary_type_conversion=identity_layout,
-            ).result
-            # fold memref.subviews into vector.transfer_read/write ops
-            payload_mod = apply_registered_pass(payload_mod, "fold-memref-alias-ops")
-            transform.apply_cse(payload_mod)
-            canonicalize(payload_mod)
-            
-            # convert forall to parallel
-            wg_loops = match_and_split(payload_mod, ops={"scf.forall"})
-            for wg_loop in wg_loops:
-                wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
-            func = transform.get_parent_op(anytype, wg_loop)
-            # convert scf.parallel to gpu.launch
-            func = apply_registered_pass(func, "gpu-map-parallel-loops")
-            func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
-            func = apply_registered_pass(func, "lower-affine")
-            transform.apply_cse(func)
-            canonicalize(func)
-            
-            # set the number of threads for the gpu.launch operation
-            launch_op = match_and_split(func, ops={"gpu.launch"})
-            num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
-            xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
-            
-            # outline gpu func
-            func = apply_registered_pass(func, "lower-affine")
-            canonicalize(func)
-            func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
-            payload_mod = apply_registered_pass(payload_mod, "gpu-kernel-outlining")
-            transform.apply_cse(payload_mod)
-            
-            # set xevm target
-            payload_mod = apply_registered_pass(
-                payload_mod,
-                "xevm-attach-target",
-                options={"O": "3", "chip": "bmg"},
-            )
 
-            # convert vector to xegpu
-            gpu_mod_ops = match_and_split(payload_mod, ops={"gpu.module"})
-            for gpu_mod in gpu_mod_ops:
-                gpu_func = match(gpu_mod, ops={"gpu.func"})
-                gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-                transform.apply_cse(gpu_func)
-                
-            # Set layout attributes for xegpu.store_nd operations
-            store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
-            xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
-            
-            payload_mod = apply_registered_pass(
-                payload_mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
-            )
-            # Required: yield to end the transform sequence
-            transform.yield_()
+def xegpu_softmax_transform_schedule(
+    mod: ir.Value[transform.AnyOpType],
+    parameters: dict,
+    stop_at_stage: str = "",
+):
+    """Transform schedule for softmax payload."""
+    try:
+        mod = bundle_xegpu_softmax_schedule(
+            mod,
+            parameters=parameters,
+            stop_at_stage=stop_at_stage,
+        )
+
+        mod = bundle_xegpu_to_binary(
+            mod,
+            stop_at_stage=stop_at_stage,
+        )
+    except PipelineInterrupt:
+        pass
+    finally:
+        transform.yield_()
+
+
+def bundle_xegpu_softmax_schedule(
+    mod: ir.Value[transform.AnyOpType],
+    parameters: dict,
+    stop_at_stage: str = "",
+) -> ir.Value[transform.AnyOpType]:
+    """Schedule for lowering softmax payload to xegpu wg level."""
     
+    if stop_at_stage == "initial":
+        raise PipelineInterrupt()
+    
+    anytype = transform.AnyOpType.get()
+    
+    # Match all linalg.generic and linalg.fill operations
+    # We have 7 operations in softmax: 
+    # fill(max_init), max, sub, exp, fill(sum_init), sum, div
+    generic_ops = structured.structured_match(
+        transform.AnyOpType.get(),
+        mod,
+        ops=["linalg.generic", "linalg.fill"]
+    )
+    
+    # Split the handle into individual operation handles
+    split_ops = transform.split_handle(
+        (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
+        generic_ops
+    )
+    
+    # Reverse split_ops to have operations in reverse order
+    split_ops = list(reversed(split_ops))
+    
+    # The first operation (after reversal) is the division - this is the consumer
+    last_op = split_ops[0]
+
+    # Tile the last operation using tile_using_forall
+    tiled_op, for_op = structured.structured_tile_using_forall(
+        anytype, anytype,
+        last_op,
+        num_threads=[],
+        tile_sizes=[],
+        static_tile_sizes=(parameters["wg_rows"],),
+    )
+
+    # Fuse the producer operations into the forall loop
+    # Iterate through remaining operations (already in reverse order)
+    current_forall = for_op
+    for producer_op in split_ops[1:]:
+        fused_op, current_forall = structured.structured_fuse_into_containing_op(
+            anytype, anytype,
+            producer_op,
+            current_forall
+        )
+        
+    func = transform.get_parent_op(
+        anytype,
+        current_forall,
+        op_name="func.func",
+        deduplicate=True,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
+    
+    if stop_at_stage == "tiled":
+        raise PipelineInterrupt()
+    
+    # vectorize
+    func = structured.VectorizeChildrenAndApplyPatternsOp(
+        func,
+        fold_type_extensions_into_contract=True,
+    ).result
+    transform.apply_cse(func)
+    canonicalize(func)
+    
+    if stop_at_stage == "vectorized":
+        raise PipelineInterrupt()
+    
+    # bufferize
+    mod = apply_registered_pass(mod, "eliminate-empty-tensors")
+    identity_layout = LayoutMapOption.IdentityLayoutMap
+    mod = transform_bufferization.OneShotBufferizeOp(
+        mod,
+        allow_return_allocs_from_loops=True,
+        bufferize_function_boundaries=True,
+        function_boundary_type_conversion=identity_layout,
+    ).result
+    # fold memref.subviews into vector.transfer_read/write ops
+    mod = apply_registered_pass(mod, "fold-memref-alias-ops")
+    transform.apply_cse(mod)
+    canonicalize(mod)
+    
+    if stop_at_stage == "bufferized":
+        raise PipelineInterrupt()
+    
+    # convert forall to parallel
+    wg_loops = match_and_split(mod, ops={"scf.forall"})
+    for wg_loop in wg_loops:
+        wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
+    func = transform.get_parent_op(anytype, wg_loop)
+    
+    # convert scf.parallel to gpu.launch
+    func = apply_registered_pass(func, "gpu-map-parallel-loops")
+    func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
+    func = apply_registered_pass(func, "lower-affine")
+    transform.apply_cse(func)
+    canonicalize(func)
+    
+    # set the number of threads for the gpu.launch operation
+    launch_op = match_and_split(func, ops={"gpu.launch"})
+    num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
+    xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
+    
+    # outline gpu func
+    func = apply_registered_pass(func, "lower-affine")
+    canonicalize(func)
+    func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
+    mod = apply_registered_pass(mod, "gpu-kernel-outlining")
+    transform.apply_cse(mod)
+    
+    # set xevm target
+    mod = apply_registered_pass(
+        mod,
+        "xevm-attach-target",
+        options={"O": "3", "chip": "bmg"},
+    )
+
+    # convert vector to xegpu
+    gpu_mod_ops = match_and_split(mod, ops={"gpu.module"})
+    for gpu_mod in gpu_mod_ops:
+        gpu_func = match(gpu_mod, ops={"gpu.func"})
+        gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+        transform.apply_cse(gpu_func)
+    
+    if stop_at_stage == "xegpu-initial":
+        raise PipelineInterrupt()
+    
+    # Set layout attributes for xegpu.store_nd operations
+    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+    xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
+    
+    if stop_at_stage == "xegpu-wg":
+        raise PipelineInterrupt()
+    
+    return mod
+
+
+def bundle_xegpu_to_binary(
+    mod: ir.Value, stop_at_stage: str = ""
+) -> ir.Value[transform.AnyOpType]:
+    """Schedule for lowering xegpu wg level to binary."""
+    # upstream xegpu/xevm pipeline is payload independent.
+    mod = apply_registered_pass(
+        mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
+    )
+    
+    if stop_at_stage == "final":
+        raise PipelineInterrupt()
+
     return mod

From 64b5d73f52813059ad51293e028c0c1f2cabd459 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 24 Mar 2026 22:47:05 +0000
Subject: [PATCH 12/51] save working version

---
 examples/xegpu/softmax.py                     |  1 +
 lighthouse/schedule/xegpu/softmax_schedule.py | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 474eb1a3..c31af47a 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -260,6 +260,7 @@ def parse_cli():
     args = parse_cli()
 
     params = {
+        "sizes": args.sizes,
         "wg_rows": args.wg_rows,
         "sg_rows": args.sg_rows,
         "subgroup_size": args.subgroup_size,
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 98acd4e6..26bc5f3e 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -206,7 +206,8 @@ def bundle_xegpu_softmax_schedule(
     
     # set the number of threads for the gpu.launch operation
     launch_op = match_and_split(func, ops={"gpu.launch"})
-    num_threads = parameters["sg_rows"] * parameters["subgroup_size"]
+    num_subgroups = parameters["wg_rows"] // parameters["sg_rows"]
+    num_threads = num_subgroups * parameters["subgroup_size"]
     xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
     
     # outline gpu func
@@ -233,9 +234,12 @@ def bundle_xegpu_softmax_schedule(
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
     
-    # Set layout attributes for xegpu.store_nd operations
+    # Set layout attributes for xegpu.store_nd operations.
+    # FIXME: currently ecah subgroup is handling the entire row.
     store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
-    xegpu.set_op_layout_attr(store_ops[0], sg_layout=[8, 1], sg_data=[8, 64])
+    sg_layout = [parameters["sg_rows"], 1]
+    sg_data = [parameters["sg_rows"], parameters["sizes"][1]]
+    xegpu.set_op_layout_attr(store_ops[0], sg_layout=sg_layout, sg_data=sg_data)
     
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()

From 108f2c09e8ee111b6b96f97553bc9bacead0013e Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 25 Mar 2026 20:48:48 +0000
Subject: [PATCH 13/51] save working version

---
 lighthouse/schedule/xegpu/mlp_schedule.py     | 13 +------------
 lighthouse/schedule/xegpu/softmax_schedule.py | 17 ++---------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/lighthouse/schedule/xegpu/mlp_schedule.py b/lighthouse/schedule/xegpu/mlp_schedule.py
index 94cbdf01..ab11a75c 100644
--- a/lighthouse/schedule/xegpu/mlp_schedule.py
+++ b/lighthouse/schedule/xegpu/mlp_schedule.py
@@ -14,6 +14,7 @@
     match_and_split,
     PipelineInterrupt,
 )
+from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
 
 from lighthouse.dialects import smt_ext, transform_smt_ext as td_smt_ext
 from lighthouse.dialects.transform_tune_ext import knob, KnobValue
@@ -600,15 +601,3 @@ def annotate_ab_load(tile, layout_load, layout_dpas):
 
     canonicalize(gpu_func)
     transform.apply_cse(gpu_func)
-
-
-def bundle_xegpu_to_binary(
-    mod: ir.Value, stop_at_stage: str = ""
-) -> ir.Value[transform.AnyOpType]:
-    """Schedule for lowering xegpu wg level to binary."""
-    # upstream xegpu/xevm pipeline is payload independent.
-    mod = apply_registered_pass(
-        mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
-    )
-
-    return mod
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 26bc5f3e..9e5226b9 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -15,6 +15,7 @@
     match_and_split,
     PipelineInterrupt,
 )
+from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
 
 
 def get_softmax_schedule_module(
@@ -38,6 +39,7 @@ def get_softmax_schedule_module(
             - wg_rows: Number of rows per workgroup
             - sg_rows: Number of rows per subgroup  
             - subgroup_size: Size of subgroup
+            - sizes: Tuple with the sizes of the input tensors (e.g. (M, N))
             
     Returns:
         MLIR module containing the transform schedule
@@ -245,18 +247,3 @@ def bundle_xegpu_softmax_schedule(
         raise PipelineInterrupt()
     
     return mod
-
-
-def bundle_xegpu_to_binary(
-    mod: ir.Value, stop_at_stage: str = ""
-) -> ir.Value[transform.AnyOpType]:
-    """Schedule for lowering xegpu wg level to binary."""
-    # upstream xegpu/xevm pipeline is payload independent.
-    mod = apply_registered_pass(
-        mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
-    )
-    
-    if stop_at_stage == "final":
-        raise PipelineInterrupt()
-
-    return mod

From a7e1e6c7535c5dd085c14ebea39ae13ac25eeb69 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 25 Mar 2026 20:58:29 +0000
Subject: [PATCH 14/51] save working version

---
 lighthouse/schedule/xegpu/helper.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 lighthouse/schedule/xegpu/helper.py

diff --git a/lighthouse/schedule/xegpu/helper.py b/lighthouse/schedule/xegpu/helper.py
new file mode 100644
index 00000000..1452301e
--- /dev/null
+++ b/lighthouse/schedule/xegpu/helper.py
@@ -0,0 +1,21 @@
+"""Helper functions for XeGPU scheduling."""
+
+from mlir import ir
+from mlir.dialects import transform
+
+from lighthouse.pipeline.helper import apply_registered_pass, PipelineInterrupt
+
+
+def bundle_xegpu_to_binary(
+    mod: ir.Value, stop_at_stage: str = ""
+) -> ir.Value[transform.AnyOpType]:
+    """Schedule for lowering xegpu wg level to binary."""
+    # upstream xegpu/xevm pipeline is payload independent.
+    mod = apply_registered_pass(
+        mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
+    )
+    
+    if stop_at_stage == "final":
+        raise PipelineInterrupt()
+
+    return mod

From df53caa54ff2c066393f3fca9479203cc0ef471a Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 25 Mar 2026 21:09:29 +0000
Subject: [PATCH 15/51] precommit issues

---
 examples/xegpu/softmax.py                     | 12 +--
 .../ingress/mlir_gen/gpu_softmax_payload.py   | 68 +++++++++------
 lighthouse/schedule/xegpu/helper.py           |  2 +-
 lighthouse/schedule/xegpu/softmax_schedule.py | 87 ++++++++++---------
 4 files changed, 97 insertions(+), 72 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index c31af47a..e3cf5840 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -28,7 +28,7 @@
 def softmax_complexity(M: int, N: int, nbytes: int):
     """
     Complexity of softmax operation.
-    
+
     For each row:
     - O(N) to find max
     - O(N) to compute exp(x - max) and sum
@@ -127,7 +127,7 @@ def check_correctness(
 
         output_ref = self._reference_solution
         output_computed = output_host.astype(np.float32)
-        
+
         if verbose > 1:
             print("Reference solution (first 5 rows):")
             print(output_ref[:5])
@@ -137,10 +137,10 @@ def check_correctness(
         # Check row sums are close to 1.0
         row_sums = np.sum(output_computed, axis=1)
         sums_ok = np.allclose(row_sums, 1.0, rtol=1e-5, atol=1e-6)
-        
+
         # Check values match reference
         values_ok = np.allclose(output_computed, output_ref, rtol=1e-4, atol=1e-6)
-        
+
         success = sums_ok and values_ok
 
         if verbose:
@@ -149,7 +149,9 @@ def check_correctness(
             else:
                 print("FAILED!")
                 if not sums_ok:
-                    print(f"  Row sums check failed. Min: {row_sums.min():.6f}, Max: {row_sums.max():.6f}")
+                    print(
+                        f"  Row sums check failed. Min: {row_sums.min():.6f}, Max: {row_sums.max():.6f}"
+                    )
                 if not values_ok:
                     max_diff = np.abs(output_computed - output_ref).max()
                     print(f"  Values mismatch. Max abs diff: {max_diff:.6e}")
diff --git a/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
index 26fd9042..4568448e 100644
--- a/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
@@ -1,10 +1,13 @@
 """Generate MLIR payload for GPU softmax operation."""
 
 from mlir import ir
-from mlir.dialects import linalg, bufferization, arith, tensor, func, math
+from mlir.dialects import linalg, bufferization, arith, tensor, math
 
 from lighthouse.utils.mlir import func_cif
-from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs, emit_buf_to_tensor
+from lighthouse.ingress.mlir_gen.gpu_utils import (
+    emit_gpu_util_funcs,
+    emit_buf_to_tensor,
+)
 
 
 def generate_gpu_softmax_payload(
@@ -15,33 +18,33 @@ def generate_gpu_softmax_payload(
 ) -> ir.Module:
     """
     Generate MLIR module for softmax payload.
-    
+
     Computes softmax along the last dimension (rows):
     output[i, j] = exp(input[i, j] - max_i) / sum_i(exp(input[i, j] - max_i))
-    
+
     where max_i and sum_i are computed over row i.
-    
+
     Args:
         func_name: Name of the payload function
         M: Number of rows
-        N: Number of columns  
+        N: Number of columns
         dtype: MLIR element type (e.g., F32Type)
-        
+
     Returns:
         MLIR module containing the softmax payload function
     """
     mod = ir.Module.create()
     shape = (M, N)
     memref_t = ir.MemRefType.get(shape, dtype)
-    
+
     with ir.InsertionPoint(mod.body):
         # Function signature: payload(output, input)
         @func_cif(memref_t, memref_t, name=func_name)
         def payload(output, input_arg):
             # Convert memrefs to tensors
-            output_tensor = emit_buf_to_tensor(output, restrict=True, writable=True)
+            emit_buf_to_tensor(output, restrict=True, writable=True)
             input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
-            
+
             # Define affine maps for indexing
             # #map = affine_map<(d0, d1) -> (d0, d1)>  (identity 2D)
             # #map1 = affine_map<(d0, d1) -> (d0)>     (broadcast/reduce along d1)
@@ -49,67 +52,82 @@ def payload(output, input_arg):
             d1 = ir.AffineDimExpr.get(1)
             map_2d = ir.AffineMap.get(2, 0, [d0, d1])
             map_1d = ir.AffineMap.get(2, 0, [d0])
-            
+
             # Step 1: Find max - linalg.generic reduction
-            neg_inf = arith.constant(dtype, float('-inf'))
+            neg_inf = arith.constant(dtype, float("-inf"))
             max_init = tensor.empty((M,), dtype)
             max_filled = linalg.fill(neg_inf, outs=[max_init])
-            
+
             @linalg.generic(
                 [input_tensor],  # inputs
                 [max_filled],  # outputs
                 [map_2d, map_1d],  # indexing_maps
-                [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
+                [
+                    linalg.IteratorType.parallel,
+                    linalg.IteratorType.reduction,
+                ],  # iterator_types
             )
             def row_max(in_val, acc):
                 return arith.maximumf(in_val, acc)
-            
+
             # Step 2: Subtract max (broadcast) - linalg.generic elementwise
             output_init = tensor.empty((M, N), dtype)
-            
+
             @linalg.generic(
                 [input_tensor, row_max],  # inputs
                 [output_init],  # outputs
                 [map_2d, map_1d, map_2d],  # indexing_maps
-                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                [
+                    linalg.IteratorType.parallel,
+                    linalg.IteratorType.parallel,
+                ],  # iterator_types
             )
             def shifted(in_val, max_val, out):
                 return arith.subf(in_val, max_val)
-            
+
             # Step 3: Compute exp - linalg.generic elementwise
             @linalg.generic(
                 [shifted],  # inputs
                 [output_init],  # outputs
                 [map_2d, map_2d],  # indexing_maps
-                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                [
+                    linalg.IteratorType.parallel,
+                    linalg.IteratorType.parallel,
+                ],  # iterator_types
             )
             def exp_vals(in_val, out):
                 return math.exp(in_val)
-            
+
             # Step 4: Sum exp values - linalg.generic reduction
             sum_init = tensor.empty((M,), dtype)
             zero = arith.constant(dtype, 0.0)
             sum_filled = linalg.fill(zero, outs=[sum_init])
-            
+
             @linalg.generic(
                 [exp_vals],  # inputs
                 [sum_filled],  # outputs
                 [map_2d, map_1d],  # indexing_maps
-                [linalg.IteratorType.parallel, linalg.IteratorType.reduction],  # iterator_types
+                [
+                    linalg.IteratorType.parallel,
+                    linalg.IteratorType.reduction,
+                ],  # iterator_types
             )
             def row_sum(in_val, acc):
                 return arith.addf(in_val, acc)
-            
+
             # Step 5: Divide by sum (broadcast) - linalg.generic elementwise
             @linalg.generic(
                 [exp_vals, row_sum],  # inputs
                 [output_init],  # outputs
                 [map_2d, map_1d, map_2d],  # indexing_maps
-                [linalg.IteratorType.parallel, linalg.IteratorType.parallel],  # iterator_types
+                [
+                    linalg.IteratorType.parallel,
+                    linalg.IteratorType.parallel,
+                ],  # iterator_types
             )
             def result(exp_val, sum_val, out):
                 return arith.divf(exp_val, sum_val)
-            
+
             # Materialize result back to output memref
             bufferization.materialize_in_destination(
                 None, result, output, restrict=True, writable=True
diff --git a/lighthouse/schedule/xegpu/helper.py b/lighthouse/schedule/xegpu/helper.py
index 1452301e..0c1d93a7 100644
--- a/lighthouse/schedule/xegpu/helper.py
+++ b/lighthouse/schedule/xegpu/helper.py
@@ -14,7 +14,7 @@ def bundle_xegpu_to_binary(
     mod = apply_registered_pass(
         mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
     )
-    
+
     if stop_at_stage == "final":
         raise PipelineInterrupt()
 
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 9e5226b9..d9701b84 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -24,7 +24,7 @@ def get_softmax_schedule_module(
 ) -> ir.Module:
     """
     Generate transform schedule for softmax operation.
-    
+
     The schedule performs the following transformations:
     1. Tile the consumer operation (division) using forall
     2. Fuse producer operations into the forall loop
@@ -32,32 +32,32 @@ def get_softmax_schedule_module(
     4. Bufferize tensors
     5. Convert to GPU dialect
     6. Lower to XeGPU operations
-    
+
     Args:
         stop_at_stage: Optional stage name to stop early (for debugging)
         parameters: Dictionary with scheduling parameters:
             - wg_rows: Number of rows per workgroup
-            - sg_rows: Number of rows per subgroup  
+            - sg_rows: Number of rows per subgroup
             - subgroup_size: Size of subgroup
             - sizes: Tuple with the sizes of the input tensors (e.g. (M, N))
-            
+
     Returns:
         MLIR module containing the transform schedule
     """
     assert parameters is not None, "Schedule parameters must be provided"
-    
+
     mod = ir.Module.create()
     mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
-    
+
     with ir.InsertionPoint(mod.body):
         # Create a transform sequence with proper signature
         named_sequence = transform.named_sequence(
             "__transform_main",
             [transform.AnyOpType.get()],  # input: module
             [],  # no outputs
-            arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}]
+            arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}],
         )
-        
+
         with ir.InsertionPoint(named_sequence.body):
             # match the payload module
             anytype = transform.AnyOpType.get()
@@ -68,13 +68,13 @@ def get_softmax_schedule_module(
                 op_name="builtin.module",
                 deduplicate=True,
             )
-            
+
             xegpu_softmax_transform_schedule(
                 payload_mod,
                 parameters=parameters,
                 stop_at_stage=stop_at_stage or "",
             )
-    
+
     return mod
 
 
@@ -107,36 +107,43 @@ def bundle_xegpu_softmax_schedule(
     stop_at_stage: str = "",
 ) -> ir.Value[transform.AnyOpType]:
     """Schedule for lowering softmax payload to xegpu wg level."""
-    
+
     if stop_at_stage == "initial":
         raise PipelineInterrupt()
-    
+
     anytype = transform.AnyOpType.get()
-    
+
     # Match all linalg.generic and linalg.fill operations
-    # We have 7 operations in softmax: 
+    # We have 7 operations in softmax:
     # fill(max_init), max, sub, exp, fill(sum_init), sum, div
     generic_ops = structured.structured_match(
-        transform.AnyOpType.get(),
-        mod,
-        ops=["linalg.generic", "linalg.fill"]
+        transform.AnyOpType.get(), mod, ops=["linalg.generic", "linalg.fill"]
     )
-    
+
     # Split the handle into individual operation handles
     split_ops = transform.split_handle(
-        (anytype, anytype, anytype, anytype, anytype, anytype, anytype),  # 7 result types
-        generic_ops
+        (
+            anytype,
+            anytype,
+            anytype,
+            anytype,
+            anytype,
+            anytype,
+            anytype,
+        ),  # 7 result types
+        generic_ops,
     )
-    
+
     # Reverse split_ops to have operations in reverse order
     split_ops = list(reversed(split_ops))
-    
+
     # The first operation (after reversal) is the division - this is the consumer
     last_op = split_ops[0]
 
     # Tile the last operation using tile_using_forall
     tiled_op, for_op = structured.structured_tile_using_forall(
-        anytype, anytype,
+        anytype,
+        anytype,
         last_op,
         num_threads=[],
         tile_sizes=[],
@@ -148,11 +155,9 @@ def bundle_xegpu_softmax_schedule(
     current_forall = for_op
     for producer_op in split_ops[1:]:
         fused_op, current_forall = structured.structured_fuse_into_containing_op(
-            anytype, anytype,
-            producer_op,
-            current_forall
+            anytype, anytype, producer_op, current_forall
         )
-        
+
     func = transform.get_parent_op(
         anytype,
         current_forall,
@@ -161,10 +166,10 @@ def bundle_xegpu_softmax_schedule(
     )
     transform.apply_cse(func)
     canonicalize(func)
-    
+
     if stop_at_stage == "tiled":
         raise PipelineInterrupt()
-    
+
     # vectorize
     func = structured.VectorizeChildrenAndApplyPatternsOp(
         func,
@@ -172,10 +177,10 @@ def bundle_xegpu_softmax_schedule(
     ).result
     transform.apply_cse(func)
     canonicalize(func)
-    
+
     if stop_at_stage == "vectorized":
         raise PipelineInterrupt()
-    
+
     # bufferize
     mod = apply_registered_pass(mod, "eliminate-empty-tensors")
     identity_layout = LayoutMapOption.IdentityLayoutMap
@@ -189,36 +194,36 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "fold-memref-alias-ops")
     transform.apply_cse(mod)
     canonicalize(mod)
-    
+
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
-    
+
     # convert forall to parallel
     wg_loops = match_and_split(mod, ops={"scf.forall"})
     for wg_loop in wg_loops:
         wg_loop = loop.loop_forall_to_parallel([anytype], wg_loop)
     func = transform.get_parent_op(anytype, wg_loop)
-    
+
     # convert scf.parallel to gpu.launch
     func = apply_registered_pass(func, "gpu-map-parallel-loops")
     func = apply_registered_pass(func, "convert-parallel-loops-to-gpu")
     func = apply_registered_pass(func, "lower-affine")
     transform.apply_cse(func)
     canonicalize(func)
-    
+
     # set the number of threads for the gpu.launch operation
     launch_op = match_and_split(func, ops={"gpu.launch"})
     num_subgroups = parameters["wg_rows"] // parameters["sg_rows"]
     num_threads = num_subgroups * parameters["subgroup_size"]
     xegpu.set_gpu_launch_threads(launch_op[0], threads=[num_threads, 1, 1])
-    
+
     # outline gpu func
     func = apply_registered_pass(func, "lower-affine")
     canonicalize(func)
     func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
-    
+
     # set xevm target
     mod = apply_registered_pass(
         mod,
@@ -232,18 +237,18 @@ def bundle_xegpu_softmax_schedule(
         gpu_func = match(gpu_mod, ops={"gpu.func"})
         gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
         transform.apply_cse(gpu_func)
-    
+
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
-    
+
     # Set layout attributes for xegpu.store_nd operations.
     # FIXME: currently ecah subgroup is handling the entire row.
     store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
     sg_layout = [parameters["sg_rows"], 1]
     sg_data = [parameters["sg_rows"], parameters["sizes"][1]]
     xegpu.set_op_layout_attr(store_ops[0], sg_layout=sg_layout, sg_data=sg_data)
-    
+
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()
-    
+
     return mod

From 9bcc6538adc23d8cb2f9186bfb24cb581919c68d Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 27 Mar 2026 22:36:53 +0000
Subject: [PATCH 16/51] use linalg.softmax

---
 .../ingress/mlir_gen/gpu_softmax_payload.py   | 90 ++-----------------
 lighthouse/schedule/xegpu/softmax_schedule.py | 60 ++++---------
 2 files changed, 27 insertions(+), 123 deletions(-)

diff --git a/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
index 4568448e..05dc148a 100644
--- a/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_softmax_payload.py
@@ -1,7 +1,7 @@
 """Generate MLIR payload for GPU softmax operation."""
 
 from mlir import ir
-from mlir.dialects import linalg, bufferization, arith, tensor, math
+from mlir.dialects import linalg, bufferization, tensor
 
 from lighthouse.utils.mlir import func_cif
 from lighthouse.ingress.mlir_gen.gpu_utils import (
@@ -45,88 +45,16 @@ def payload(output, input_arg):
             emit_buf_to_tensor(output, restrict=True, writable=True)
             input_tensor = emit_buf_to_tensor(input_arg, restrict=True)
 
-            # Define affine maps for indexing
-            # #map = affine_map<(d0, d1) -> (d0, d1)>  (identity 2D)
-            # #map1 = affine_map<(d0, d1) -> (d0)>     (broadcast/reduce along d1)
-            d0 = ir.AffineDimExpr.get(0)
-            d1 = ir.AffineDimExpr.get(1)
-            map_2d = ir.AffineMap.get(2, 0, [d0, d1])
-            map_1d = ir.AffineMap.get(2, 0, [d0])
+            # Create output tensor and fill with zeros
+            output_init = tensor.empty(shape, dtype)
 
-            # Step 1: Find max - linalg.generic reduction
-            neg_inf = arith.constant(dtype, float("-inf"))
-            max_init = tensor.empty((M,), dtype)
-            max_filled = linalg.fill(neg_inf, outs=[max_init])
-
-            @linalg.generic(
-                [input_tensor],  # inputs
-                [max_filled],  # outputs
-                [map_2d, map_1d],  # indexing_maps
-                [
-                    linalg.IteratorType.parallel,
-                    linalg.IteratorType.reduction,
-                ],  # iterator_types
-            )
-            def row_max(in_val, acc):
-                return arith.maximumf(in_val, acc)
-
-            # Step 2: Subtract max (broadcast) - linalg.generic elementwise
-            output_init = tensor.empty((M, N), dtype)
-
-            @linalg.generic(
-                [input_tensor, row_max],  # inputs
-                [output_init],  # outputs
-                [map_2d, map_1d, map_2d],  # indexing_maps
-                [
-                    linalg.IteratorType.parallel,
-                    linalg.IteratorType.parallel,
-                ],  # iterator_types
-            )
-            def shifted(in_val, max_val, out):
-                return arith.subf(in_val, max_val)
-
-            # Step 3: Compute exp - linalg.generic elementwise
-            @linalg.generic(
-                [shifted],  # inputs
-                [output_init],  # outputs
-                [map_2d, map_2d],  # indexing_maps
-                [
-                    linalg.IteratorType.parallel,
-                    linalg.IteratorType.parallel,
-                ],  # iterator_types
-            )
-            def exp_vals(in_val, out):
-                return math.exp(in_val)
-
-            # Step 4: Sum exp values - linalg.generic reduction
-            sum_init = tensor.empty((M,), dtype)
-            zero = arith.constant(dtype, 0.0)
-            sum_filled = linalg.fill(zero, outs=[sum_init])
-
-            @linalg.generic(
-                [exp_vals],  # inputs
-                [sum_filled],  # outputs
-                [map_2d, map_1d],  # indexing_maps
-                [
-                    linalg.IteratorType.parallel,
-                    linalg.IteratorType.reduction,
-                ],  # iterator_types
-            )
-            def row_sum(in_val, acc):
-                return arith.addf(in_val, acc)
-
-            # Step 5: Divide by sum (broadcast) - linalg.generic elementwise
-            @linalg.generic(
-                [exp_vals, row_sum],  # inputs
-                [output_init],  # outputs
-                [map_2d, map_1d, map_2d],  # indexing_maps
-                [
-                    linalg.IteratorType.parallel,
-                    linalg.IteratorType.parallel,
-                ],  # iterator_types
+            # Apply softmax along dimension 1 (last dimension)
+            result = linalg.softmax(
+                result=[ir.RankedTensorType.get(shape, dtype)],
+                input=input_tensor,
+                output=output_init,
+                dimension=1,
             )
-            def result(exp_val, sum_val, out):
-                return arith.divf(exp_val, sum_val)
 
             # Materialize result back to output memref
             bufferization.materialize_in_destination(
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index d9701b84..35c3ab4c 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -26,12 +26,11 @@ def get_softmax_schedule_module(
     Generate transform schedule for softmax operation.
 
     The schedule performs the following transformations:
-    1. Tile the consumer operation (division) using forall
-    2. Fuse producer operations into the forall loop
-    3. Vectorize operations
-    4. Bufferize tensors
-    5. Convert to GPU dialect
-    6. Lower to XeGPU operations
+    1. Tile the linalg.softmax operation using forall
+    2. Vectorize operations
+    3. Bufferize tensors
+    4. Convert to GPU dialect
+    5. Lower to XeGPU operations
 
     Args:
         stop_at_stage: Optional stage name to stop early (for debugging)
@@ -113,57 +112,34 @@ def bundle_xegpu_softmax_schedule(
 
     anytype = transform.AnyOpType.get()
 
-    # Match all linalg.generic and linalg.fill operations
-    # We have 7 operations in softmax:
-    # fill(max_init), max, sub, exp, fill(sum_init), sum, div
-    generic_ops = structured.structured_match(
-        transform.AnyOpType.get(), mod, ops=["linalg.generic", "linalg.fill"]
+    # Match linalg.softmax operation
+    # We have only 1 operation: linalg.softmax
+    softmax_op = structured.structured_match(
+        transform.AnyOpType.get(), mod, ops=["linalg.softmax"]
     )
 
-    # Split the handle into individual operation handles
-    split_ops = transform.split_handle(
-        (
-            anytype,
-            anytype,
-            anytype,
-            anytype,
-            anytype,
-            anytype,
-            anytype,
-        ),  # 7 result types
-        generic_ops,
-    )
-
-    # Reverse split_ops to have operations in reverse order
-    split_ops = list(reversed(split_ops))
-
-    # The first operation (after reversal) is the division - this is the consumer
-    last_op = split_ops[0]
-
-    # Tile the last operation using tile_using_forall
+    # Tile the softmax operation using tile_using_forall
     tiled_op, for_op = structured.structured_tile_using_forall(
         anytype,
         anytype,
-        last_op,
+        softmax_op,
         num_threads=[],
         tile_sizes=[],
         static_tile_sizes=(parameters["wg_rows"],),
     )
 
-    # Fuse the producer operations into the forall loop
-    # Iterate through remaining operations (already in reverse order)
-    current_forall = for_op
-    for producer_op in split_ops[1:]:
-        fused_op, current_forall = structured.structured_fuse_into_containing_op(
-            anytype, anytype, producer_op, current_forall
-        )
-
     func = transform.get_parent_op(
         anytype,
-        current_forall,
+        for_op,
         op_name="func.func",
         deduplicate=True,
     )
+    # Decompose softmax into linalg.generic operations
+    softmax_ops = structured.structured_match(
+        transform.AnyOpType.get(), func, ops=["linalg.softmax"]
+    )
+    structured.structured_decompose_interface(anytype, softmax_ops)
+
     transform.apply_cse(func)
     canonicalize(func)
 

From 3f5cbceacfca0cf15ea47bab5e29e1b62de48b4e Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 30 Mar 2026 19:28:26 +0000
Subject: [PATCH 17/51] save work

---
 examples/xegpu/softmax.py                     | 17 +++++++++--
 lighthouse/schedule/xegpu/softmax_schedule.py | 29 +++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index e3cf5840..27a58ec2 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -217,6 +217,12 @@ def parse_cli():
         default=16,
         help="Subgroup size.",
     )
+    parser.add_argument(
+        "--reduction-step-size",
+        type=int,
+        default=16,
+        help="Step size for reduction loop tiling (optional).",
+    )
     parser.add_argument(
         "--nruns",
         type=int,
@@ -266,6 +272,7 @@ def parse_cli():
         "wg_rows": args.wg_rows,
         "sg_rows": args.sg_rows,
         "subgroup_size": args.subgroup_size,
+        "reduction_step_size": args.reduction_step_size,
     }
 
     M, N = args.sizes
@@ -304,7 +311,13 @@ def list2str(a):
                 f"wg-rows={args.wg_rows}",
                 f"sg-rows={args.sg_rows}",
                 f"subgroup-size={args.subgroup_size}",
-                f"time(us): {elapsed:.2f}",
-                f"GFLOPS: {gflops:.2f}",
             ]
+            if args.reduction_step_size is not None:
+                parts.append(f"reduction-step-size={args.reduction_step_size}")
+            parts.extend(
+                [
+                    f"time(us): {elapsed:.2f}",
+                    f"GFLOPS: {gflops:.2f}",
+                ]
+            )
             print(" ".join(parts))
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 35c3ab4c..11410612 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -39,6 +39,7 @@ def get_softmax_schedule_module(
             - sg_rows: Number of rows per subgroup
             - subgroup_size: Size of subgroup
             - sizes: Tuple with the sizes of the input tensors (e.g. (M, N))
+            - reduction_step_size: Optional step size for tiling reduction loops
 
     Returns:
         MLIR module containing the transform schedule
@@ -140,6 +141,34 @@ def bundle_xegpu_softmax_schedule(
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
 
+    linalg_ops = match_and_split(
+        func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
+    )
+    init_max_reduction = linalg_ops[0]
+    max_reduction = linalg_ops[1]
+    max_center_and_exp_op = linalg_ops[2]
+    init_sum_reduction = linalg_ops[3]
+    sum_reduction = linalg_ops[4]
+    div_op = linalg_ops[5]
+
+    reduction_step_size = parameters["reduction_step_size"]
+
+    # Tile the max reduction using TileReductionUsingFor
+    _, _, _, for_op = structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
+        anytype,
+        anytype,
+        target=max_reduction,
+        tile_sizes=[0, reduction_step_size],
+    )
+
+    # Fuse the init_max_reduction into the for loop
+    # fused_init, new_for_loop = structured.structured_fuse_into_containing_op(
+    #     anytype, anytype, init_max_reduction, for_op
+    # )
+    transform.PrintOp(target=init_max_reduction)
+
     transform.apply_cse(func)
     canonicalize(func)
 

From 6204d6c2a59cb8d816f87b2fc811d96163a613a3 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 30 Mar 2026 22:21:34 +0000
Subject: [PATCH 18/51] add inner dim tiling

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 11410612..9bda77fc 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -144,30 +144,28 @@ def bundle_xegpu_softmax_schedule(
     linalg_ops = match_and_split(
         func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
     )
-    init_max_reduction = linalg_ops[0]
     max_reduction = linalg_ops[1]
     max_center_and_exp_op = linalg_ops[2]
-    init_sum_reduction = linalg_ops[3]
     sum_reduction = linalg_ops[4]
     div_op = linalg_ops[5]
 
     reduction_step_size = parameters["reduction_step_size"]
 
-    # Tile the max reduction using TileReductionUsingFor
-    _, _, _, for_op = structured.structured_tile_reduction_using_for(
-        [anytype],
-        anytype,
-        anytype,
-        anytype,
-        target=max_reduction,
-        tile_sizes=[0, reduction_step_size],
-    )
-
-    # Fuse the init_max_reduction into the for loop
-    # fused_init, new_for_loop = structured.structured_fuse_into_containing_op(
-    #     anytype, anytype, init_max_reduction, for_op
-    # )
-    transform.PrintOp(target=init_max_reduction)
+    # Tile all reduction ops using the same step size
+    reduction_ops = [max_reduction, sum_reduction]
+    for reduction_op in reduction_ops:
+        structured.structured_tile_reduction_using_for(
+            [anytype],
+            anytype,
+            anytype,
+            anytype,
+            target=reduction_op,
+            tile_sizes=[0, reduction_step_size],
+        )
+    # Tile elementwise ops to match the reduction tile size
+    elementwise_ops = [max_center_and_exp_op, div_op]
+    for elementwise_op in elementwise_ops:
+        structured.TileUsingForOp(elementwise_op, sizes=[0, reduction_step_size])
 
     transform.apply_cse(func)
     canonicalize(func)

From 1feb0d48d94e6cc46fb6cbf7c01dc9825e43f73a Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 1 Apr 2026 22:09:26 +0000
Subject: [PATCH 19/51] save fused version

---
 examples/xegpu/softmax.py                     |  1 +
 lighthouse/schedule/xegpu/softmax_schedule.py | 76 +++++++++++++++----
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 324d6e0c..8049f0f4 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -212,6 +212,7 @@ def parse_cli():
             "tiled",
             "vectorized",
             "bufferized",
+            "gpu-outlining",
             "xegpu-initial",
             "xegpu-wg",
             "final",
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 361dfdfe..b42b6479 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -140,6 +140,7 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), func, ops=["linalg.softmax"]
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
+    # transform.print_(target=func, name="After structured_decompose_interface")
 
     linalg_ops = match_and_split(
         func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
@@ -151,22 +152,60 @@ def bundle_xegpu_softmax_schedule(
 
     reduction_step_size = parameters["reduction_step_size"]
 
-    # Tile all reduction ops using the same step size
-    reduction_ops = [max_reduction, sum_reduction]
-    for reduction_op in reduction_ops:
-        structured.structured_tile_reduction_using_for(
-            [anytype],
-            anytype,
-            anytype,
-            anytype,
-            target=reduction_op,
-            tile_sizes=[0, reduction_step_size],
-        )
-    # Tile elementwise ops to match the reduction tile size
-    elementwise_ops = [max_center_and_exp_op, div_op]
-    for elementwise_op in elementwise_ops:
-        structured.TileUsingForOp(elementwise_op, sizes=[0, reduction_step_size])
+    # Tile the division op and fuse the sub+exp producer into it
+    _, div_loop = structured.TileUsingForOp(
+        div_op, sizes=[0, reduction_step_size]
+    ).results
+
+    # Fuse max_center_and_exp_op into the div loop
+    _, fused_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=max_center_and_exp_op,
+        containing_op=div_loop,
+    )
+
+    # Tile the sum reduction and fuse the sub+exp producer into it
+    _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
+        anytype,
+        anytype,
+        target=sum_reduction,
+        tile_sizes=[0, reduction_step_size],
+    )
+
+    func = transform.get_parent_op(
+        anytype,
+        fused_loop,
+        op_name="func.func",
+        deduplicate=True,
+    )
 
+    # Re-match and split linalg generic ops, there are 5 at this point
+    linalg_ops = match_and_split(func, ops={"linalg.generic"}, nhandles=5)
+    max_center_and_exp_op = linalg_ops[1]
+
+    # Fuse max_center_and_exp_op into the sum reduction loop
+    _, fused_sum_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=max_center_and_exp_op,
+        containing_op=sum_loop,
+    )
+
+    # Tile the max reduction.
+    max_reduction = linalg_ops[0]
+    structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
+        anytype,
+        anytype,
+        target=max_reduction,
+        tile_sizes=[0, reduction_step_size],
+    )
+
+    # Cleanup after tiling and fusion
     transform.apply_cse(func)
     canonicalize(func)
 
@@ -227,6 +266,9 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
 
+    if stop_at_stage == "gpu-outlining":
+        raise PipelineInterrupt()
+
     # set xevm target
     mod = apply_registered_pass(
         mod,
@@ -241,6 +283,10 @@ def bundle_xegpu_softmax_schedule(
         gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
         transform.apply_cse(gpu_func)
 
+    # Cleanup.
+    transform.apply_cse(mod)
+    canonicalize(mod)
+
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
 

From a28cf4a1035cf12956aa9922ff0fbd3a46e51314 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 1 Apr 2026 22:30:40 +0000
Subject: [PATCH 20/51] save work

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index b42b6479..18186b89 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -237,6 +237,17 @@ def bundle_xegpu_softmax_schedule(
     transform.apply_cse(mod)
     canonicalize(mod)
 
+    # promote memref.alloc to memref.alloca in payload function
+    func = match(mod, ops={"func.func"})
+    func = apply_registered_pass(
+        func,
+        "promote-buffers-to-stack",
+        options={
+            "max-alloc-size-in-bytes": "8192",
+            "max-rank-of-allocated-memref": "2",
+        },
+    )
+
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
 

From 79e2f737caae45431f207744187a7119c5504b12 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 19:16:35 +0000
Subject: [PATCH 21/51] save work

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 18186b89..bda5f819 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -303,10 +303,10 @@ def bundle_xegpu_softmax_schedule(
 
     # Set layout attributes for xegpu.store_nd operations.
     # FIXME: currently ecah subgroup is handling the entire row.
-    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=5)
     sg_layout = [parameters["sg_rows"], 1]
-    sg_data = [parameters["sg_rows"], parameters["sizes"][1]]
-    xegpu.set_anchor_layout(store_ops[0], sg_layout=sg_layout, sg_data=sg_data)
+    sg_data = [parameters["sg_rows"], parameters["reduction_step_size"]]
+    xegpu.set_anchor_layout(store_ops[-1], sg_layout=sg_layout, sg_data=sg_data)
 
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()

From 55c175c0f69c511f6d753b0f7c55b5480d5303b3 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 22:24:15 +0000
Subject: [PATCH 22/51] save work

---
 docs/softmax_lowering.md | 402 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 402 insertions(+)
 create mode 100644 docs/softmax_lowering.md

diff --git a/docs/softmax_lowering.md b/docs/softmax_lowering.md
new file mode 100644
index 00000000..f67779ae
--- /dev/null
+++ b/docs/softmax_lowering.md
@@ -0,0 +1,402 @@
+# Linalg softmax lowering in XeGPU pipeline
+
+## Overview
+
+The lowering process consists of seven stages:
+1. **initial** - High-level tensor operations
+2. **tiled-softmax** - Tiled softmax operations
+3. **decomposed** - Decomposition into constituent operations
+4. **vectorized** - Vector operations
+5. **bufferized** - Memory-based representation
+6. **xegpu-initial** - GPU kernel with XeGPU operations
+7. **xegpu-wg** - Work-group optimized XeGPU
+
+---
+
+## Stage 1: Initial
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  // ...
+  %2 = tensor.empty() : tensor<1024x64xf32>
+  %3 = linalg.softmax dimension(1) ins(%1 : tensor<1024x64xf32>) 
+                                  outs(%2 : tensor<1024x64xf32>) -> tensor<1024x64xf32>
+  // ...
+  return
+}
+```
+---
+
+## Stage 2: Tiled Softmax
+
+**Key Characteristics:**
+- Work distribution via `scf.forall` (16 parallel iterations)
+- Each tile processes 64x64 elements
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  // ...
+  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
+    %4 = affine.apply affine_map<(d0) -> (d0 * 64)>(%arg2)
+    // Extract 64x64 input slice
+    %extracted_slice = tensor.extract_slice ...
+    // Extract 64x64 output slice
+    %extracted_slice_0 = tensor.extract_slice ...
+    // Apply softmax to the tile
+    %5 = linalg.softmax dimension(1) ins(%extracted_slice : tensor<64x64xf32>) 
+                                     outs(%extracted_slice_0 : tensor<64x64xf32>) -> tensor<64x64xf32>
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %5 into %arg3[%4, %c0] [64, 64] [1, 1] : 
+        tensor<64x64xf32> into tensor<1024x64xf32>
+    }
+  }
+  // ...  
+  return
+}
+```
+
+---
+
+## Stage 3: Decomposed
+
+**Key Characteristics:**
+- Softmax decomposed into 4 constituent `linalg.generic` ops : max, sub+exp, sum, divide
+- Uses `structured.structured_decompose_interface` implemented by `linalg.softmax`
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  %cst = arith.constant 0.000000e+00 : f32
+  %cst_0 = arith.constant 0xFFC00000 : f32  // -inf for max reduction
+  %0 = bufferization.to_tensor %arg1 restrict : memref<1024x64xf32> to tensor<1024x64xf32>
+  %1 = tensor.empty() : tensor<1024x64xf32>
+  
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x64xf32>) {
+    %3 = affine.apply #map(%arg2)  // %3 = %arg2 * 64
+    %extracted_slice = tensor.extract_slice %0[%3, 0] [64, 64] [1, 1] : 
+      tensor<1024x64xf32> to tensor<64x64xf32>
+    
+    // Step 1: Find max along dimension 1
+    %4 = tensor.empty() : tensor<64xf32>
+    %5 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
+    %6 = linalg.generic {indexing_maps = [#map1, #map2], 
+                         iterator_types = ["parallel", "reduction"]} 
+         ins(%extracted_slice : tensor<64x64xf32>) outs(%5 : tensor<64xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %11 = arith.maxnumf %in, %out : f32
+      linalg.yield %11 : f32
+    } -> tensor<64xf32>
+    
+    // Step 2: Subtract max and exponentiate
+    %7 = linalg.generic {indexing_maps = [#map1, #map2, #map1], 
+                         iterator_types = ["parallel", "parallel"]} 
+         ins(%extracted_slice, %6 : tensor<64x64xf32>, tensor<64xf32>) 
+         outs(%extracted_slice_1 : tensor<64x64xf32>) {
+    ^bb0(%in: f32, %in_2: f32, %out: f32):
+      %11 = arith.subf %in, %in_2 : f32
+      %12 = math.exp %11 : f32
+      linalg.yield %12 : f32
+    } -> tensor<64x64xf32>
+    
+    // Step 3: Sum exponentials
+    %8 = linalg.fill ins(%cst : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
+    %9 = linalg.generic {indexing_maps = [#map1, #map2], 
+                         iterator_types = ["parallel", "reduction"]} 
+         ins(%7 : tensor<64x64xf32>) outs(%8 : tensor<64xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %11 = arith.addf %in, %out : f32
+      linalg.yield %11 : f32
+    } -> tensor<64xf32>
+    
+    // Step 4: Normalize by sum
+    %10 = linalg.generic {indexing_maps = [#map1, #map2, #map1], 
+                          iterator_types = ["parallel", "parallel"]} 
+          ins(%7, %9 : tensor<64x64xf32>, tensor<64xf32>) 
+          outs(%extracted_slice_1 : tensor<64x64xf32>) {
+    ^bb0(%in: f32, %in_2: f32, %out: f32):
+      %11 = arith.divf %in, %in_2 : f32
+      linalg.yield %11 : f32
+    } -> tensor<64x64xf32>
+    
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %10 into %arg3[%3, 0] [64, 64] [1, 1] : 
+        tensor<64x64xf32> into tensor<1024x64xf32>
+    }
+  }
+  return
+}
+```
+
+**What Happens:**
+- The 1024x64 input is divided into 16 tiles of 64x64 each
+- Softmax algorithm made explicit:
+  1. **Max reduction**: Find maximum value per row (for numerical stability)
+  2. **Exp**: Compute exp(x - max) for each element
+  3. **Sum reduction**: Sum exponentials per row
+  4. **Normalize**: Divide each element by its row sum
+- Each tile is processed independently, enabling parallelization
+- Results are inserted back into the output tensor
+
+---
+
+## Stage 4: Vectorized
+
+**Key Characteristics:**
+- `linalg.generic` operations replaced with vector operations
+- SIMD-friendly representation using `vector<64x64xf32>`
+- Explicit vector multi-reductions
+- Vector transfers for reading/writing data
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
+  %0 = ub.poison : f32
+  %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
+  %c0 = arith.constant 0 : index
+  %1 = bufferization.to_tensor %arg1 restrict : memref<1024x64xf32> to tensor<1024x64xf32>
+  %2 = tensor.empty() : tensor<1024x64xf32>
+  
+  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
+    %4 = affine.apply #map(%arg2)  // %4 = %arg2 * 64
+    %extracted_slice = tensor.extract_slice %arg3[%4, 0] [64, 64] [1, 1]
+    
+    // Vector read: Load 64x64 tile
+    %5 = vector.transfer_read %1[%4, %c0], %0 {in_bounds = [true, true]} : 
+      tensor<1024x64xf32>, vector<64x64xf32>
+    
+    // Max reduction: Reduce dimension 1 -> vector<64xf32>
+    %6 = vector.multi_reduction <maxnumf>, %5, %cst_0 [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    
+    // Broadcast max values back to 64x64 and transpose
+    %7 = vector.broadcast %6 : vector<64xf32> to vector<64x64xf32>
+    %8 = vector.transpose %7, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    
+    // Subtract max and exponentiate
+    %9 = arith.subf %5, %8 : vector<64x64xf32>
+    %10 = math.exp %9 : vector<64x64xf32>
+    
+    // Sum reduction: Reduce dimension 1 -> vector<64xf32>
+    %11 = vector.multi_reduction <add>, %10, %cst [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    
+    // Broadcast sums back to 64x64 and transpose
+    %12 = vector.broadcast %11 : vector<64xf32> to vector<64x64xf32>
+    %13 = vector.transpose %12, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    
+    // Normalize
+    %14 = arith.divf %10, %13 : vector<64x64xf32>
+    
+    // Vector write
+    %15 = vector.transfer_write %14, %extracted_slice[%c0, %c0] {in_bounds = [true, true]} : 
+      vector<64x64xf32>, tensor<64x64xf32>
+    
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %15 into %arg3[%4, 0] [64, 64] [1, 1]
+    }
+  }
+  return
+}
+```
+
+**What Happens:**
+- Linalg operations converted to vector dialect operations
+- `vector.transfer_read` loads entire 64x64 tile at once
+- `vector.multi_reduction` performs SIMD reductions (max and sum)
+- `vector.broadcast` and `vector.transpose` handle dimension alignment
+- All arithmetic operations work on vectors, enabling SIMD execution
+- `vector.transfer_write` stores results back
+
+---
+
+## Stage 5: Bufferized
+
+**Key Characteristics:**
+- Tensors eliminated, working directly with memrefs
+- Vector operations read/write directly from/to memory
+- No more tensor extract/insert operations
+- Simplified control flow
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
+  %0 = ub.poison : f32
+  %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
+  %c0 = arith.constant 0 : index
+  
+  scf.forall (%arg2) in (16) {
+    %1 = affine.apply #map(%arg2)  // %1 = %arg2 * 64
+    
+    // Direct memref read
+    %2 = vector.transfer_read %arg1[%1, %c0], %0 {in_bounds = [true, true]} : 
+      memref<1024x64xf32>, vector<64x64xf32>
+    
+    // Max reduction
+    %3 = vector.multi_reduction <maxnumf>, %2, %cst_0 [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    %4 = vector.broadcast %3 : vector<64xf32> to vector<64x64xf32>
+    %5 = vector.transpose %4, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    
+    // Subtract and exp
+    %6 = arith.subf %2, %5 : vector<64x64xf32>
+    %7 = math.exp %6 : vector<64x64xf32>
+    
+    // Sum reduction
+    %8 = vector.multi_reduction <add>, %7, %cst [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    %9 = vector.broadcast %8 : vector<64xf32> to vector<64x64xf32>
+    %10 = vector.transpose %9, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    
+    // Normalize
+    %11 = arith.divf %7, %10 : vector<64x64xf32>
+    
+    // Direct memref write
+    vector.transfer_write %11, %arg0[%1, %c0] {in_bounds = [true, true]} : 
+      vector<64x64xf32>, memref<1024x64xf32>
+  }
+  return
+}
+```
+
+**What Happens:**
+- All tensor operations converted to memref-based operations
+- `scf.forall` no longer uses `shared_outs`, simplified to pure side effects
+- Vector transfers work directly on input/output memrefs
+- Memory layout is now explicit
+- This representation is ready for GPU kernel extraction
+
+---
+
+## Stage 6: XeGPU-Initial
+
+**Key Characteristics:**
+- GPU kernel separated from host code
+- `gpu.launch_func` invocation with grid/block dimensions
+- XeGPU tensor descriptors for memory access
+- Block-based load/store operations
+
+**Code:**
+
+**Host Side:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  %c1 = arith.constant 1 : index
+  %c16 = arith.constant 16 : index
+  %c128 = arith.constant 128 : index
+  gpu.launch_func @payload_kernel::@payload_kernel 
+    blocks in (%c16, %c1, %c1) 
+    threads in (%c128, %c1, %c1)
+    args(%arg1 : memref<1024x64xf32>, %arg0 : memref<1024x64xf32>)
+  return
+}
+```
+
+**GPU Kernel:**
+```mlir
+gpu.module @payload_kernel [#xevm.target<O = 3>] {
+  gpu.func @payload_kernel(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) kernel 
+    attributes {known_block_size = array<i32: 128, 1, 1>, 
+                known_grid_size = array<i32: 16, 1, 1>} {
+    %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
+    %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
+    %c64 = arith.constant 64 : index
+    %block_id_x = gpu.block_id x
+    %0 = arith.muli %block_id_x, %c64 overflow<nsw> : index
+    
+    // Create XeGPU tensor descriptor for load
+    %1 = xegpu.create_nd_tdesc %arg0 : memref<1024x64xf32> -> 
+      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
+    
+    // XeGPU block load
+    %2 = xegpu.load_nd %1[%0, 0] : 
+      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>> -> 
+      vector<64x64xf32>
+    
+    // Same compute operations as before
+    %3 = vector.multi_reduction <maxnumf>, %2, %cst_0 [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    %4 = vector.broadcast %3 : vector<64xf32> to vector<64x64xf32>
+    %5 = vector.transpose %4, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    %6 = arith.subf %2, %5 : vector<64x64xf32>
+    %7 = math.exp %6 : vector<64x64xf32>
+    %8 = vector.multi_reduction <add>, %7, %cst [1] : 
+      vector<64x64xf32> to vector<64xf32>
+    %9 = vector.broadcast %8 : vector<64xf32> to vector<64x64xf32>
+    %10 = vector.transpose %9, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
+    %11 = arith.divf %7, %10 : vector<64x64xf32>
+    
+    // Create XeGPU tensor descriptor for store
+    %12 = xegpu.create_nd_tdesc %arg1 : memref<1024x64xf32> -> 
+      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
+    
+    // XeGPU block store
+    xegpu.store_nd %11, %12[%0, 0] : 
+      vector<64x64xf32>, 
+      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
+    
+    gpu.return
+  }
+}
+```
+
+**What Happens:**
+- Host function now calls `gpu.launch_func` with 16 blocks, 128 threads per block
+- Separate `gpu.module` contains the kernel code
+- `gpu.block_id x` replaces the loop iterator
+- `xegpu.create_nd_tdesc` creates tensor descriptors for memory regions
+- `xegpu.load_nd` performs hardware-optimized block loads
+- `xegpu.store_nd` performs hardware-optimized block stores
+- XeGPU operations map directly to Intel GPU instructions
+- Boundary checking disabled for performance (sizes known at compile time)
+
+---
+
+## Stage 7: XeGPU-WG (Work-Group Optimized)
+
+**Key Characteristics:**
+- Additional layout hints for work-group optimization
+- Sub-group layout specification: `sg_layout` and `sg_data`
+- Optimized memory access patterns for Intel XeGPU
+
+**Code (differences from xegpu-initial):**
+```mlir
+// Store operation now includes layout hints
+xegpu.store_nd %11, %12[%0, 0] 
+  <{layout = #xegpu.layout<sg_layout = [8, 1], sg_data = [8, 64]>}> : 
+  vector<64x64xf32>, 
+  !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
+```
+
+**What Happens:**
+- The `layout` attribute provides explicit sub-group (SG) tiling information:
+  - `sg_layout = [8, 1]`: Data is distributed across 8 sub-groups in the first dimension
+  - `sg_data = [8, 64]`: Each sub-group handles an 8x64 slice of data
+- This layout specification:
+  - Matches the 64x64 total data size (8 sub-groups × 8 rows = 64 rows, 64 columns)
+  - Optimizes coalesced memory accesses
+  - Enables efficient SIMD execution within each sub-group
+  - Aligns with Intel GPU hardware execution model (128 threads = 8 sub-groups of 16 threads)
+- The layout is applied to the store operation to optimize write patterns
+- Load operation remains unchanged as reads are typically more flexible
+
+---
+
+## Summary
+
+The lowering pipeline progressively transforms abstract operations into hardware-specific instructions:
+
+| Stage | Abstraction Level | Key Operations |
+|-------|------------------|----------------|
+| **initial** | High-level ML | `linalg.softmax` on full tensor |
+| **tiled-softmax** | Tiled high-level | `linalg.softmax` on tiles, `scf.forall` |
+| **decomposed** | Tiled computation | `linalg.generic` with reductions |
+| **vectorized** | Vector operations | `vector.multi_reduction`, `vector.transfer_read/write` |
+| **bufferized** | Memory-based | Direct memref operations with vectors |
+| **xegpu-initial** | GPU-specific | `xegpu.load_nd`, `xegpu.store_nd`, `gpu.launch_func` |
+| **xegpu-wg** | Hardware-optimized | Layout hints for sub-group optimization |
+
+Each stage maintains the same computational semantics while providing increasingly detailed control over execution and memory access patterns, ultimately targeting efficient execution on Intel XeGPU hardware.

From bf3a8c66a058cf4f7e8443d7247f2494437672a1 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 22:51:42 +0000
Subject: [PATCH 23/51] save work

---
 docs/softmax_lowering.md | 158 ++++++++-------------------------------
 1 file changed, 31 insertions(+), 127 deletions(-)

diff --git a/docs/softmax_lowering.md b/docs/softmax_lowering.md
index f67779ae..c5608e37 100644
--- a/docs/softmax_lowering.md
+++ b/docs/softmax_lowering.md
@@ -1,7 +1,10 @@
-# Linalg softmax lowering in XeGPU pipeline
+# Linalg softmax lowering to XeGPU (Currently supported in lighthouse)
 
 ## Overview
 
+**Assumptions:**
+Softmax dimension size is small (64 in this example). 
+
 The lowering process consists of seven stages:
 1. **initial** - High-level tensor operations
 2. **tiled-softmax** - Tiled softmax operations
@@ -30,7 +33,7 @@ func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
 
 ## Stage 2: Tiled Softmax
 
-**Key Characteristics:**
+**Notes**
 - Work distribution via `scf.forall` (16 parallel iterations)
 - Each tile processes 64x64 elements
 
@@ -61,63 +64,45 @@ func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
 
 ## Stage 3: Decomposed
 
-**Key Characteristics:**
+**Notes**
 - Softmax decomposed into 4 constituent `linalg.generic` ops : max, sub+exp, sum, divide
 - Uses `structured.structured_decompose_interface` implemented by `linalg.softmax`
 
 **Code:**
 ```mlir
 func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  %cst = arith.constant 0.000000e+00 : f32
-  %cst_0 = arith.constant 0xFFC00000 : f32  // -inf for max reduction
-  %0 = bufferization.to_tensor %arg1 restrict : memref<1024x64xf32> to tensor<1024x64xf32>
-  %1 = tensor.empty() : tensor<1024x64xf32>
+  // ...
   
   %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x64xf32>) {
     %3 = affine.apply #map(%arg2)  // %3 = %arg2 * 64
-    %extracted_slice = tensor.extract_slice %0[%3, 0] [64, 64] [1, 1] : 
-      tensor<1024x64xf32> to tensor<64x64xf32>
+    %extracted_slice = tensor.extract_slice ...
     
     // Step 1: Find max along dimension 1
     %4 = tensor.empty() : tensor<64xf32>
     %5 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
-    %6 = linalg.generic {indexing_maps = [#map1, #map2], 
-                         iterator_types = ["parallel", "reduction"]} 
-         ins(%extracted_slice : tensor<64x64xf32>) outs(%5 : tensor<64xf32>) {
-    ^bb0(%in: f32, %out: f32):
+    %6 = linalg.generic // ...
       %11 = arith.maxnumf %in, %out : f32
-      linalg.yield %11 : f32
+      // ...
     } -> tensor<64xf32>
     
     // Step 2: Subtract max and exponentiate
-    %7 = linalg.generic {indexing_maps = [#map1, #map2, #map1], 
-                         iterator_types = ["parallel", "parallel"]} 
-         ins(%extracted_slice, %6 : tensor<64x64xf32>, tensor<64xf32>) 
-         outs(%extracted_slice_1 : tensor<64x64xf32>) {
-    ^bb0(%in: f32, %in_2: f32, %out: f32):
+    %7 = linalg.generic // ...
       %11 = arith.subf %in, %in_2 : f32
       %12 = math.exp %11 : f32
-      linalg.yield %12 : f32
+      // ...
     } -> tensor<64x64xf32>
     
     // Step 3: Sum exponentials
     %8 = linalg.fill ins(%cst : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
-    %9 = linalg.generic {indexing_maps = [#map1, #map2], 
-                         iterator_types = ["parallel", "reduction"]} 
-         ins(%7 : tensor<64x64xf32>) outs(%8 : tensor<64xf32>) {
-    ^bb0(%in: f32, %out: f32):
+    %9 = linalg.generic // ...
       %11 = arith.addf %in, %out : f32
-      linalg.yield %11 : f32
+      // ...
     } -> tensor<64xf32>
     
     // Step 4: Normalize by sum
-    %10 = linalg.generic {indexing_maps = [#map1, #map2, #map1], 
-                          iterator_types = ["parallel", "parallel"]} 
-          ins(%7, %9 : tensor<64x64xf32>, tensor<64xf32>) 
-          outs(%extracted_slice_1 : tensor<64x64xf32>) {
-    ^bb0(%in: f32, %in_2: f32, %out: f32):
+    %10 = linalg.generic // ...
       %11 = arith.divf %in, %in_2 : f32
-      linalg.yield %11 : f32
+      // ...
     } -> tensor<64x64xf32>
     
     scf.forall.in_parallel {
@@ -129,39 +114,21 @@ func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
 }
 ```
 
-**What Happens:**
-- The 1024x64 input is divided into 16 tiles of 64x64 each
-- Softmax algorithm made explicit:
-  1. **Max reduction**: Find maximum value per row (for numerical stability)
-  2. **Exp**: Compute exp(x - max) for each element
-  3. **Sum reduction**: Sum exponentials per row
-  4. **Normalize**: Divide each element by its row sum
-- Each tile is processed independently, enabling parallelization
-- Results are inserted back into the output tensor
-
 ---
 
 ## Stage 4: Vectorized
 
-**Key Characteristics:**
+**Notes**
 - `linalg.generic` operations replaced with vector operations
-- SIMD-friendly representation using `vector<64x64xf32>`
-- Explicit vector multi-reductions
 - Vector transfers for reading/writing data
 
 **Code:**
 ```mlir
 func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
-  %0 = ub.poison : f32
-  %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
-  %c0 = arith.constant 0 : index
-  %1 = bufferization.to_tensor %arg1 restrict : memref<1024x64xf32> to tensor<1024x64xf32>
-  %2 = tensor.empty() : tensor<1024x64xf32>
-  
+  // ...  
   %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
     %4 = affine.apply #map(%arg2)  // %4 = %arg2 * 64
-    %extracted_slice = tensor.extract_slice %arg3[%4, 0] [64, 64] [1, 1]
+    %extracted_slice = tensor.extract_slice ..
     
     // Vector read: Load 64x64 tile
     %5 = vector.transfer_read %1[%4, %c0], %0 {in_bounds = [true, true]} : 
@@ -201,32 +168,17 @@ func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
   return
 }
 ```
-
-**What Happens:**
-- Linalg operations converted to vector dialect operations
-- `vector.transfer_read` loads entire 64x64 tile at once
-- `vector.multi_reduction` performs SIMD reductions (max and sum)
-- `vector.broadcast` and `vector.transpose` handle dimension alignment
-- All arithmetic operations work on vectors, enabling SIMD execution
-- `vector.transfer_write` stores results back
-
 ---
 
 ## Stage 5: Bufferized
 
-**Key Characteristics:**
+**Notes**
 - Tensors eliminated, working directly with memrefs
-- Vector operations read/write directly from/to memory
-- No more tensor extract/insert operations
-- Simplified control flow
 
 **Code:**
 ```mlir
 func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
-  %0 = ub.poison : f32
-  %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
-  %c0 = arith.constant 0 : index
+  // ...
   
   scf.forall (%arg2) in (16) {
     %1 = affine.apply #map(%arg2)  // %1 = %arg2 * 64
@@ -262,31 +214,21 @@ func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
 }
 ```
 
-**What Happens:**
-- All tensor operations converted to memref-based operations
-- `scf.forall` no longer uses `shared_outs`, simplified to pure side effects
-- Vector transfers work directly on input/output memrefs
-- Memory layout is now explicit
-- This representation is ready for GPU kernel extraction
-
 ---
 
 ## Stage 6: XeGPU-Initial
 
-**Key Characteristics:**
-- GPU kernel separated from host code
+**Notes**
+- GPU kernel separated from host code (Gpu Outlining)
 - `gpu.launch_func` invocation with grid/block dimensions
-- XeGPU tensor descriptors for memory access
-- Block-based load/store operations
+- Use `vector-to-xegpu`
 
 **Code:**
 
 **Host Side:**
 ```mlir
 func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  %c1 = arith.constant 1 : index
-  %c16 = arith.constant 16 : index
-  %c128 = arith.constant 128 : index
+  // ...
   gpu.launch_func @payload_kernel::@payload_kernel 
     blocks in (%c16, %c1, %c1) 
     threads in (%c128, %c1, %c1)
@@ -301,9 +243,7 @@ gpu.module @payload_kernel [#xevm.target<O = 3>] {
   gpu.func @payload_kernel(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) kernel 
     attributes {known_block_size = array<i32: 128, 1, 1>, 
                 known_grid_size = array<i32: 16, 1, 1>} {
-    %cst = arith.constant dense<0.000000e+00> : vector<64xf32>
-    %cst_0 = arith.constant dense<0xFFC00000> : vector<64xf32>
-    %c64 = arith.constant 64 : index
+    // ...
     %block_id_x = gpu.block_id x
     %0 = arith.muli %block_id_x, %c64 overflow<nsw> : index
     
@@ -343,24 +283,14 @@ gpu.module @payload_kernel [#xevm.target<O = 3>] {
 }
 ```
 
-**What Happens:**
-- Host function now calls `gpu.launch_func` with 16 blocks, 128 threads per block
-- Separate `gpu.module` contains the kernel code
-- `gpu.block_id x` replaces the loop iterator
-- `xegpu.create_nd_tdesc` creates tensor descriptors for memory regions
-- `xegpu.load_nd` performs hardware-optimized block loads
-- `xegpu.store_nd` performs hardware-optimized block stores
-- XeGPU operations map directly to Intel GPU instructions
-- Boundary checking disabled for performance (sizes known at compile time)
-
 ---
 
 ## Stage 7: XeGPU-WG (Work-Group Optimized)
 
-**Key Characteristics:**
-- Additional layout hints for work-group optimization
-- Sub-group layout specification: `sg_layout` and `sg_data`
-- Optimized memory access patterns for Intel XeGPU
+**Notes**
+- Sets the layout for anchor xegpu ops. Each Wg consistes of [8, 1] subgroups
+  doing 8x64 softmax slice. 
+- Only sets the layotu for `store_nd`. Layout propagation does the rest.  
 
 **Code (differences from xegpu-initial):**
 ```mlir
@@ -371,32 +301,6 @@ xegpu.store_nd %11, %12[%0, 0]
   !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
 ```
 
-**What Happens:**
-- The `layout` attribute provides explicit sub-group (SG) tiling information:
-  - `sg_layout = [8, 1]`: Data is distributed across 8 sub-groups in the first dimension
-  - `sg_data = [8, 64]`: Each sub-group handles an 8x64 slice of data
-- This layout specification:
-  - Matches the 64x64 total data size (8 sub-groups × 8 rows = 64 rows, 64 columns)
-  - Optimizes coalesced memory accesses
-  - Enables efficient SIMD execution within each sub-group
-  - Aligns with Intel GPU hardware execution model (128 threads = 8 sub-groups of 16 threads)
-- The layout is applied to the store operation to optimize write patterns
-- Load operation remains unchanged as reads are typically more flexible
-
 ---
 
-## Summary
-
-The lowering pipeline progressively transforms abstract operations into hardware-specific instructions:
-
-| Stage | Abstraction Level | Key Operations |
-|-------|------------------|----------------|
-| **initial** | High-level ML | `linalg.softmax` on full tensor |
-| **tiled-softmax** | Tiled high-level | `linalg.softmax` on tiles, `scf.forall` |
-| **decomposed** | Tiled computation | `linalg.generic` with reductions |
-| **vectorized** | Vector operations | `vector.multi_reduction`, `vector.transfer_read/write` |
-| **bufferized** | Memory-based | Direct memref operations with vectors |
-| **xegpu-initial** | GPU-specific | `xegpu.load_nd`, `xegpu.store_nd`, `gpu.launch_func` |
-| **xegpu-wg** | Hardware-optimized | Layout hints for sub-group optimization |
-
-Each stage maintains the same computational semantics while providing increasingly detailed control over execution and memory access patterns, ultimately targeting efficient execution on Intel XeGPU hardware.
+# Supporting larger Softmax dimension sizes.

From b083887154b083d184430c0c87baf887d155dcca Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 23:03:15 +0000
Subject: [PATCH 24/51] save work

---
 examples/xegpu/softmax.py                     |  8 --
 lighthouse/schedule/xegpu/softmax_schedule.py | 90 +------------------
 2 files changed, 3 insertions(+), 95 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index 8049f0f4..afca86e3 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -181,12 +181,6 @@ def parse_cli():
         default=16,
         help="Subgroup size.",
     )
-    parser.add_argument(
-        "--reduction-step-size",
-        type=int,
-        default=16,
-        help="Step size for reduction loop tiling (optional).",
-    )
     parser.add_argument(
         "--nruns",
         type=int,
@@ -212,7 +206,6 @@ def parse_cli():
             "tiled",
             "vectorized",
             "bufferized",
-            "gpu-outlining",
             "xegpu-initial",
             "xegpu-wg",
             "final",
@@ -244,7 +237,6 @@ def parse_cli():
         "wg_rows": args.wg_rows,
         "sg_rows": args.sg_rows,
         "subgroup_size": args.subgroup_size,
-        "reduction_step_size": args.reduction_step_size,
     }
 
     M, N = args.sizes
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index bda5f819..0907bc5a 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -39,7 +39,6 @@ def get_softmax_schedule_module(
             - sg_rows: Number of rows per subgroup
             - subgroup_size: Size of subgroup
             - sizes: Tuple with the sizes of the input tensors (e.g. (M, N))
-            - reduction_step_size: Optional step size for tiling reduction loops
 
     Returns:
         MLIR module containing the transform schedule
@@ -140,72 +139,7 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), func, ops=["linalg.softmax"]
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
-    # transform.print_(target=func, name="After structured_decompose_interface")
 
-    linalg_ops = match_and_split(
-        func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
-    )
-    max_reduction = linalg_ops[1]
-    max_center_and_exp_op = linalg_ops[2]
-    sum_reduction = linalg_ops[4]
-    div_op = linalg_ops[5]
-
-    reduction_step_size = parameters["reduction_step_size"]
-
-    # Tile the division op and fuse the sub+exp producer into it
-    _, div_loop = structured.TileUsingForOp(
-        div_op, sizes=[0, reduction_step_size]
-    ).results
-
-    # Fuse max_center_and_exp_op into the div loop
-    _, fused_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=max_center_and_exp_op,
-        containing_op=div_loop,
-    )
-
-    # Tile the sum reduction and fuse the sub+exp producer into it
-    _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
-        [anytype],
-        anytype,
-        anytype,
-        anytype,
-        target=sum_reduction,
-        tile_sizes=[0, reduction_step_size],
-    )
-
-    func = transform.get_parent_op(
-        anytype,
-        fused_loop,
-        op_name="func.func",
-        deduplicate=True,
-    )
-
-    # Re-match and split linalg generic ops, there are 5 at this point
-    linalg_ops = match_and_split(func, ops={"linalg.generic"}, nhandles=5)
-    max_center_and_exp_op = linalg_ops[1]
-
-    # Fuse max_center_and_exp_op into the sum reduction loop
-    _, fused_sum_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=max_center_and_exp_op,
-        containing_op=sum_loop,
-    )
-
-    # Tile the max reduction.
-    max_reduction = linalg_ops[0]
-    structured.structured_tile_reduction_using_for(
-        [anytype],
-        anytype,
-        anytype,
-        anytype,
-        target=max_reduction,
-        tile_sizes=[0, reduction_step_size],
-    )
-
-    # Cleanup after tiling and fusion
     transform.apply_cse(func)
     canonicalize(func)
 
@@ -237,17 +171,6 @@ def bundle_xegpu_softmax_schedule(
     transform.apply_cse(mod)
     canonicalize(mod)
 
-    # promote memref.alloc to memref.alloca in payload function
-    func = match(mod, ops={"func.func"})
-    func = apply_registered_pass(
-        func,
-        "promote-buffers-to-stack",
-        options={
-            "max-alloc-size-in-bytes": "8192",
-            "max-rank-of-allocated-memref": "2",
-        },
-    )
-
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
 
@@ -277,9 +200,6 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
 
-    if stop_at_stage == "gpu-outlining":
-        raise PipelineInterrupt()
-
     # set xevm target
     mod = apply_registered_pass(
         mod,
@@ -294,19 +214,15 @@ def bundle_xegpu_softmax_schedule(
         gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
         transform.apply_cse(gpu_func)
 
-    # Cleanup.
-    transform.apply_cse(mod)
-    canonicalize(mod)
-
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
 
     # Set layout attributes for xegpu.store_nd operations.
     # FIXME: currently ecah subgroup is handling the entire row.
-    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=5)
+    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
     sg_layout = [parameters["sg_rows"], 1]
-    sg_data = [parameters["sg_rows"], parameters["reduction_step_size"]]
-    xegpu.set_anchor_layout(store_ops[-1], sg_layout=sg_layout, sg_data=sg_data)
+    sg_data = [parameters["sg_rows"], parameters["sizes"][1]]
+    xegpu.set_anchor_layout(store_ops[0], sg_layout=sg_layout, sg_data=sg_data)
 
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()

From c02b66b6b95e34e1dfdda31508e97ec50d507a03 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 23:16:12 +0000
Subject: [PATCH 25/51] fused version

---
 examples/xegpu/softmax.py                     |  8 ++
 lighthouse/schedule/xegpu/softmax_schedule.py | 90 ++++++++++++++++++-
 2 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index afca86e3..8049f0f4 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -181,6 +181,12 @@ def parse_cli():
         default=16,
         help="Subgroup size.",
     )
+    parser.add_argument(
+        "--reduction-step-size",
+        type=int,
+        default=16,
+        help="Step size for reduction loop tiling (optional).",
+    )
     parser.add_argument(
         "--nruns",
         type=int,
@@ -206,6 +212,7 @@ def parse_cli():
             "tiled",
             "vectorized",
             "bufferized",
+            "gpu-outlining",
             "xegpu-initial",
             "xegpu-wg",
             "final",
@@ -237,6 +244,7 @@ def parse_cli():
         "wg_rows": args.wg_rows,
         "sg_rows": args.sg_rows,
         "subgroup_size": args.subgroup_size,
+        "reduction_step_size": args.reduction_step_size,
     }
 
     M, N = args.sizes
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 0907bc5a..bda5f819 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -39,6 +39,7 @@ def get_softmax_schedule_module(
             - sg_rows: Number of rows per subgroup
             - subgroup_size: Size of subgroup
             - sizes: Tuple with the sizes of the input tensors (e.g. (M, N))
+            - reduction_step_size: Optional step size for tiling reduction loops
 
     Returns:
         MLIR module containing the transform schedule
@@ -139,7 +140,72 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), func, ops=["linalg.softmax"]
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
+    # transform.print_(target=func, name="After structured_decompose_interface")
 
+    linalg_ops = match_and_split(
+        func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
+    )
+    max_reduction = linalg_ops[1]
+    max_center_and_exp_op = linalg_ops[2]
+    sum_reduction = linalg_ops[4]
+    div_op = linalg_ops[5]
+
+    reduction_step_size = parameters["reduction_step_size"]
+
+    # Tile the division op and fuse the sub+exp producer into it
+    _, div_loop = structured.TileUsingForOp(
+        div_op, sizes=[0, reduction_step_size]
+    ).results
+
+    # Fuse max_center_and_exp_op into the div loop
+    _, fused_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=max_center_and_exp_op,
+        containing_op=div_loop,
+    )
+
+    # Tile the sum reduction and fuse the sub+exp producer into it
+    _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
+        anytype,
+        anytype,
+        target=sum_reduction,
+        tile_sizes=[0, reduction_step_size],
+    )
+
+    func = transform.get_parent_op(
+        anytype,
+        fused_loop,
+        op_name="func.func",
+        deduplicate=True,
+    )
+
+    # Re-match and split linalg generic ops, there are 5 at this point
+    linalg_ops = match_and_split(func, ops={"linalg.generic"}, nhandles=5)
+    max_center_and_exp_op = linalg_ops[1]
+
+    # Fuse max_center_and_exp_op into the sum reduction loop
+    _, fused_sum_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=max_center_and_exp_op,
+        containing_op=sum_loop,
+    )
+
+    # Tile the max reduction.
+    max_reduction = linalg_ops[0]
+    structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
+        anytype,
+        anytype,
+        target=max_reduction,
+        tile_sizes=[0, reduction_step_size],
+    )
+
+    # Cleanup after tiling and fusion
     transform.apply_cse(func)
     canonicalize(func)
 
@@ -171,6 +237,17 @@ def bundle_xegpu_softmax_schedule(
     transform.apply_cse(mod)
     canonicalize(mod)
 
+    # promote memref.alloc to memref.alloca in payload function
+    func = match(mod, ops={"func.func"})
+    func = apply_registered_pass(
+        func,
+        "promote-buffers-to-stack",
+        options={
+            "max-alloc-size-in-bytes": "8192",
+            "max-rank-of-allocated-memref": "2",
+        },
+    )
+
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
 
@@ -200,6 +277,9 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
 
+    if stop_at_stage == "gpu-outlining":
+        raise PipelineInterrupt()
+
     # set xevm target
     mod = apply_registered_pass(
         mod,
@@ -214,15 +294,19 @@ def bundle_xegpu_softmax_schedule(
         gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
         transform.apply_cse(gpu_func)
 
+    # Cleanup.
+    transform.apply_cse(mod)
+    canonicalize(mod)
+
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
 
     # Set layout attributes for xegpu.store_nd operations.
     # FIXME: currently ecah subgroup is handling the entire row.
-    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=5)
     sg_layout = [parameters["sg_rows"], 1]
-    sg_data = [parameters["sg_rows"], parameters["sizes"][1]]
-    xegpu.set_anchor_layout(store_ops[0], sg_layout=sg_layout, sg_data=sg_data)
+    sg_data = [parameters["sg_rows"], parameters["reduction_step_size"]]
+    xegpu.set_anchor_layout(store_ops[-1], sg_layout=sg_layout, sg_data=sg_data)
 
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()

From bce6260e57741e5439efa90c9640aff19f90c8f0 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 23:35:41 +0000
Subject: [PATCH 26/51] tiled reduction doc

---
 docs/softmax_lowering.md | 331 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 330 insertions(+), 1 deletion(-)

diff --git a/docs/softmax_lowering.md b/docs/softmax_lowering.md
index c5608e37..a31e6f4c 100644
--- a/docs/softmax_lowering.md
+++ b/docs/softmax_lowering.md
@@ -303,4 +303,333 @@ xegpu.store_nd %11, %12[%0, 0]
 
 ---
 
-# Supporting larger Softmax dimension sizes.
+# Supporting larger Softmax dimension sizes
+
+When the softmax dimension is larger than what can fit efficiently in registers, additional tiling and fusion transformations are applied to the reduction dimension. This section shows the intermediate stages between "decomposed" and "vectorized".
+
+**Approach:** Tile reductions along dimension 1 (step size = 16) and fuse producers into consumers to enable streaming computation.
+
+---
+
+## Decomposed → Tiled: Stage A - Tile div op
+
+**Notes:**
+- Tile the division operation with step size 16 along dimension 1
+- Creates `scf.for` loop iterating over 64 elements in chunks of 16
+
+**Key Changes:**
+```mlir
+// Before: Single division linalg.generic over 64x64
+%11 = linalg.generic {...} ins(%8, %10 : tensor<64x64xf32>, tensor<64xf32>) 
+      outs(%extracted_slice_0 : tensor<64x64xf32>) { ... } -> tensor<64x64xf32>
+
+// After: Division tiled into 64x16 chunks
+%11 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %extracted_slice_0) -> (tensor<64x64xf32>) {
+  %extracted_slice_3 = tensor.extract_slice %8[0, %arg4] [64, 16] [1, 1]
+  %12 = linalg.generic {...} ins(%extracted_slice_3, %extracted_slice_4 : tensor<64x16xf32>, tensor<64xf32>) 
+        outs(%extracted_slice_5 : tensor<64x16xf32>) { ... } -> tensor<64x16xf32>
+  %inserted_slice = tensor.insert_slice %12 into %arg5[0, %arg4] [64, 16] [1, 1]
+  scf.yield %inserted_slice
+}
+```
+
+---
+
+## Stage B - Fuse sub+exp into div loop
+
+**Notes:**
+- Fuse the `sub+exp` producer (max_center_and_exp_op) into the div loop
+- Recomputes exp values on-the-fly instead of materializing full 64x64 tensor
+
+**Key Changes:**
+```mlir
+%11 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %extracted_slice_0) -> (tensor<64x64xf32>) {
+  %extracted_slice_3 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
+  
+  // Fused: sub+exp computed per 16-element chunk
+  %12 = linalg.generic {...} ins(%extracted_slice_3, %extracted_slice_4 : tensor<64x16xf32>, tensor<64xf32>) 
+        outs(%extracted_slice_5 : tensor<64x16xf32>) {
+    ^bb0(%in: f32, %in_8: f32, %out: f32):
+      %14 = arith.subf %in, %in_8 : f32
+      %15 = math.exp %14 : f32
+      linalg.yield %15 : f32
+  } -> tensor<64x16xf32>
+  
+  // Division operation
+  %13 = linalg.generic {...} ins(%12, %extracted_slice_6 : tensor<64x16xf32>, tensor<64xf32>) 
+        outs(%extracted_slice_7 : tensor<64x16xf32>) { ... } -> tensor<64x16xf32>
+  // ...
+}
+```
+
+---
+
+## Stage C - Tile sum reduction
+
+**Notes:**
+- Tile the sum reduction using `structured_tile_reduction_using_for`
+- Creates intermediate accumulator tensor (64x16)
+- Final reduction via `linalg.reduce` over dimension 1
+
+**Key Changes:**
+```mlir
+// Tiled sum reduction with intermediate accumulator
+%10 = tensor.empty() : tensor<64x16xf32>
+%11 = linalg.fill ins(%cst_2 : f32) outs(%10 : tensor<64x16xf32>) -> tensor<64x16xf32>
+
+%12 = scf.for %arg4 = %c0_3 to %c64 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
+  %extracted_slice_7 = tensor.extract_slice %8[0, %arg4] [64, 16] [1, 1]
+  %14 = linalg.generic {...} ins(%extracted_slice_7 : tensor<64x16xf32>) 
+        outs(%extracted_slice_8 : tensor<64x16xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %15 = arith.addf %in, %out : f32
+      linalg.yield %15 : f32
+  } -> tensor<64x16xf32>
+  // ...
+}
+
+// Final reduction to 64xf32
+%reduced = linalg.reduce ins(%12 : tensor<64x16xf32>) outs(%9 : tensor<64xf32>) dimensions = [1] 
+  (%in: f32, %init: f32) {
+    %14 = arith.addf %in, %init : f32
+    linalg.yield %14 : f32
+  }
+```
+
+---
+
+## Stage D - Fuse sub+exp into sum loop
+
+**Notes:**
+- Fuse `sub+exp` into the sum reduction loop
+- Stream computation: compute exp and accumulate in same loop
+
+**Key Changes:**
+```mlir
+%12 = scf.for %arg4 = %c0_3 to %c64 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
+  %extracted_slice_7 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
+  
+  // Fused: sub+exp
+  %14 = linalg.generic {...} ins(%extracted_slice_7, %extracted_slice_8 : tensor<64x16xf32>, tensor<64xf32>) 
+        outs(%extracted_slice_9 : tensor<64x16xf32>) {
+    ^bb0(%in: f32, %in_11: f32, %out: f32):
+      %16 = arith.subf %in, %in_11 : f32
+      %17 = math.exp %16 : f32
+      linalg.yield %17 : f32
+  } -> tensor<64x16xf32>
+  
+  // Accumulate sum
+  %15 = linalg.generic {...} ins(%14 : tensor<64x16xf32>) 
+        outs(%extracted_slice_10 : tensor<64x16xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %16 = arith.addf %in, %out : f32
+      linalg.yield %16 : f32
+  } -> tensor<64x16xf32>
+  // ...
+}
+```
+
+---
+
+## Stage E - Tile max reduction
+
+**Notes:**
+- Tile max reduction similar to sum reduction
+- Creates 64x16 intermediate accumulator
+- Final reduction via `linalg.reduce` with maxnumf
+
+**Key Changes:**
+```mlir
+// Tiled max reduction
+%7 = tensor.empty() : tensor<64x16xf32>
+%8 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<64x16xf32>) -> tensor<64x16xf32>
+
+%9 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %8) -> (tensor<64x16xf32>) {
+  %extracted_slice_12 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
+  %16 = linalg.generic {...} ins(%extracted_slice_12 : tensor<64x16xf32>) 
+        outs(%extracted_slice_13 : tensor<64x16xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %17 = arith.maxnumf %in, %out : f32
+      linalg.yield %17 : f32
+  } -> tensor<64x16xf32>
+  // ...
+}
+
+// Final max reduction
+%reduced = linalg.reduce ins(%9 : tensor<64x16xf32>) outs(%6 : tensor<64xf32>) dimensions = [1] 
+  (%in: f32, %init: f32) {
+    %16 = arith.maxnumf %in, %init : f32
+    linalg.yield %16 : f32
+  }
+```
+
+**Result:** Now all three major computations (max, sum, div) are tiled and operate on 64x16 chunks, with exp computation fused into both sum and div loops.
+
+---
+
+## Stage F - Vectorization
+
+**Notes:**
+- Convert tiled linalg operations to vector operations
+- `scf.for` loops remain but operate on vectors
+- Vector size: 64x16 for tiled operations
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  // ...
+  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
+    // ...
+    
+    // Vectorized max reduction loop
+    %6 = vector.transfer_write %cst_1, %5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
+    %7 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %6) -> (tensor<64x16xf32>) {
+      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
+      %16 = vector.transfer_read %arg5[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
+      %17 = arith.maxnumf %15, %16 : vector<64x16xf32>
+      %18 = vector.transfer_write %17, %arg5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
+      scf.yield %18 : tensor<64x16xf32>
+    }
+    %8 = vector.transfer_read %7[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
+    %9 = vector.multi_reduction <maxnumf>, %8, %cst_2 [1] : vector<64x16xf32> to vector<64xf32>
+    
+    // Vectorized sum reduction loop with fused sub+exp
+    %11 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %10) -> (tensor<64x16xf32>) {
+      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
+      %16 = vector.broadcast %9 : vector<64xf32> to vector<16x64xf32>
+      %17 = vector.transpose %16, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
+      %18 = arith.subf %15, %17 : vector<64x16xf32>
+      %19 = math.exp %18 : vector<64x16xf32>
+      %20 = vector.transfer_read %arg5[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
+      %21 = arith.addf %19, %20 : vector<64x16xf32>
+      %22 = vector.transfer_write %21, %arg5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
+      scf.yield %22 : tensor<64x16xf32>
+    }
+    %12 = vector.transfer_read %11[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
+    %13 = vector.multi_reduction <add>, %12, %cst_0 [1] : vector<64x16xf32> to vector<64xf32>
+    
+    // Vectorized div loop with fused sub+exp
+    %14 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %extracted_slice) -> (tensor<64x64xf32>) {
+      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
+      %16 = vector.broadcast %9 : vector<64xf32> to vector<16x64xf32>
+      %17 = vector.transpose %16, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
+      %18 = arith.subf %15, %17 : vector<64x16xf32>
+      %19 = math.exp %18 : vector<64x16xf32>
+      %20 = vector.broadcast %13 : vector<64xf32> to vector<16x64xf32>
+      %21 = vector.transpose %20, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
+      %22 = arith.divf %19, %21 : vector<64x16xf32>
+      %23 = vector.transfer_write %22, %arg5[%c0, %arg4] : vector<64x16xf32>, tensor<64x64xf32>
+      scf.yield %23 : tensor<64x64xf32>
+    }
+  }
+  // ...
+}
+```
+
+---
+
+## Stage G - Bufferization
+
+**Notes:**
+- Convert tensors to memrefs
+- Allocate stack buffer for 64x16 accumulator: `memref.alloc()`
+
+**Code:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  // ...
+  scf.forall (%arg2) in (16) {
+    %1 = affine.apply #map(%arg2)
+    %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
+    
+    // Allocate accumulator buffer
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<64x16xf32>
+    
+    // Max reduction loop
+    vector.transfer_write %cst_1, %alloc[%c0, %c0] : vector<64x16xf32>, memref<64x16xf32>
+    scf.for %arg3 = %c0 to %c64 step %c16 {
+      %6 = vector.transfer_read %arg1[%1, %arg3], %0 : memref<1024x64xf32>, vector<64x16xf32>
+      %7 = vector.transfer_read %alloc[%c0, %c0], %0 : memref<64x16xf32>, vector<64x16xf32>
+      %8 = arith.maxnumf %6, %7 : vector<64x16xf32>
+      vector.transfer_write %8, %alloc[%c0, %c0] : vector<64x16xf32>, memref<64x16xf32>
+    }
+    %2 = vector.transfer_read %alloc[%c0, %c0], %0 : memref<64x16xf32>, vector<64x16xf32>
+    %3 = vector.multi_reduction <maxnumf>, %2, %cst_2 [1] : vector<64x16xf32> to vector<64xf32>
+    
+    // Sum reduction loop (reuses %alloc)
+    // ...
+    
+    // Div loop (writes to %subview)
+    // ...
+  }
+}
+```
+
+---
+
+## Stage H - Promote buffers to stack
+
+**Notes:**
+- Convert `memref.alloc()` to `memref.alloca()` for stack allocation
+- Reduces memory allocation overhead
+
+**Code:**
+```mlir
+scf.forall (%arg2) in (16) {
+  %1 = affine.apply #map(%arg2)
+  %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
+  
+  // Stack allocation instead of heap
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<64x16xf32>
+  
+  // ... same operations using %alloca ...
+}
+```
+
+---
+
+## Stage I - GPU outlining
+
+**Notes:**
+- Convert `scf.forall` to `scf.parallel`, then to `gpu.launch`
+- Extract GPU kernel into separate `gpu.module`
+- Set thread count: 128 threads = (64 rows / 8 sg_rows) × 16 subgroup_size
+
+**Host Side:**
+```mlir
+func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
+  %c16 = arith.constant 16 : index
+  %c1 = arith.constant 1 : index
+  %c128 = arith.constant 128 : index
+  gpu.launch_func @payload_kernel::@payload_kernel 
+    blocks in (%c16, %c1, %c1) 
+    threads in (%c128, %c1, %c1)
+    args(%arg0 : memref<1024x64xf32>, %arg1 : memref<1024x64xf32>)
+  return
+}
+```
+
+**GPU Kernel:**
+```mlir
+gpu.module @payload_kernel {
+  gpu.func @payload_kernel(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) kernel 
+    attributes {known_block_size = array<i32: 128, 1, 1>, 
+                known_grid_size = array<i32: 16, 1, 1>} {
+    %block_id_x = gpu.block_id x
+    %1 = arith.muli %block_id_x, %c64 overflow<nsw> : index
+    %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
+    %alloca = memref.alloca() {alignment = 64 : i64} : memref<64x16xf32>
+    
+    // Three reduction loops (max, sum, div) with same structure
+    scf.for %arg2 = %c0 to %c64 step %c16 {
+      // Max: accumulate max values
+      // Sum: compute & accumulate exp(x - max)
+      // Div: compute exp(x - max) / sum
+    }
+    
+    gpu.return
+  }
+}
+```
+
+**Summary:** At this stage, the kernel processes 64x16 chunks in streaming fashion through three sequential loops, minimizing memory footprint.

From 2df0777727704d1e3e14f2b6d66911908fedab27 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 23:36:42 +0000
Subject: [PATCH 27/51] tiled reduction doc

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index bda5f819..0248919f 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -140,7 +140,7 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), func, ops=["linalg.softmax"]
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
-    # transform.print_(target=func, name="After structured_decompose_interface")
+    transform.print_(target=func, name="Aftemr structured_decompose_interface")
 
     linalg_ops = match_and_split(
         func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
@@ -156,6 +156,8 @@ def bundle_xegpu_softmax_schedule(
     _, div_loop = structured.TileUsingForOp(
         div_op, sizes=[0, reduction_step_size]
     ).results
+    
+    transform.print_(target=func, name="After tiling div op")
 
     # Fuse max_center_and_exp_op into the div loop
     _, fused_loop = structured.structured_fuse_into_containing_op(
@@ -164,6 +166,8 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=div_loop,
     )
+    transform.print_(target=func, name="After fusing max_center_and_exp_op into div loop")
+
 
     # Tile the sum reduction and fuse the sub+exp producer into it
     _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
@@ -174,6 +178,8 @@ def bundle_xegpu_softmax_schedule(
         target=sum_reduction,
         tile_sizes=[0, reduction_step_size],
     )
+    
+    transform.print_(target=func, name="After tiling sum reduction")
 
     func = transform.get_parent_op(
         anytype,
@@ -193,6 +199,7 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=sum_loop,
     )
+    transform.print_(target=func, name="After fusing max_center_and_exp_op into sum loop")
 
     # Tile the max reduction.
     max_reduction = linalg_ops[0]
@@ -204,6 +211,8 @@ def bundle_xegpu_softmax_schedule(
         target=max_reduction,
         tile_sizes=[0, reduction_step_size],
     )
+    transform.print_(target=func, name="After tiling max reduction")
+
 
     # Cleanup after tiling and fusion
     transform.apply_cse(func)
@@ -219,6 +228,8 @@ def bundle_xegpu_softmax_schedule(
     ).result
     transform.apply_cse(func)
     canonicalize(func)
+    
+    transform.print_(target=func, name="After vectorization")
 
     if stop_at_stage == "vectorized":
         raise PipelineInterrupt()
@@ -236,6 +247,8 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "fold-memref-alias-ops")
     transform.apply_cse(mod)
     canonicalize(mod)
+    
+    transform.print_(target=mod, name="After bufferization")
 
     # promote memref.alloc to memref.alloca in payload function
     func = match(mod, ops={"func.func"})
@@ -247,6 +260,8 @@ def bundle_xegpu_softmax_schedule(
             "max-rank-of-allocated-memref": "2",
         },
     )
+    
+    transform.print_(target=func, name="After promoting buffers to stack")
 
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
@@ -276,6 +291,8 @@ def bundle_xegpu_softmax_schedule(
     func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
+    
+    transform.print_(target=mod, name="After GPU outlining")
 
     if stop_at_stage == "gpu-outlining":
         raise PipelineInterrupt()

From 56687b7571d3160474dea5dfd4503c307a258dad Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 3 Apr 2026 23:37:03 +0000
Subject: [PATCH 28/51] tiled reduction doc

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 0248919f..7912a45b 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -156,7 +156,7 @@ def bundle_xegpu_softmax_schedule(
     _, div_loop = structured.TileUsingForOp(
         div_op, sizes=[0, reduction_step_size]
     ).results
-    
+
     transform.print_(target=func, name="After tiling div op")
 
     # Fuse max_center_and_exp_op into the div loop
@@ -166,8 +166,9 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=div_loop,
     )
-    transform.print_(target=func, name="After fusing max_center_and_exp_op into div loop")
-
+    transform.print_(
+        target=func, name="After fusing max_center_and_exp_op into div loop"
+    )
 
     # Tile the sum reduction and fuse the sub+exp producer into it
     _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
@@ -178,7 +179,7 @@ def bundle_xegpu_softmax_schedule(
         target=sum_reduction,
         tile_sizes=[0, reduction_step_size],
     )
-    
+
     transform.print_(target=func, name="After tiling sum reduction")
 
     func = transform.get_parent_op(
@@ -199,7 +200,9 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=sum_loop,
     )
-    transform.print_(target=func, name="After fusing max_center_and_exp_op into sum loop")
+    transform.print_(
+        target=func, name="After fusing max_center_and_exp_op into sum loop"
+    )
 
     # Tile the max reduction.
     max_reduction = linalg_ops[0]
@@ -213,7 +216,6 @@ def bundle_xegpu_softmax_schedule(
     )
     transform.print_(target=func, name="After tiling max reduction")
 
-
     # Cleanup after tiling and fusion
     transform.apply_cse(func)
     canonicalize(func)
@@ -228,7 +230,7 @@ def bundle_xegpu_softmax_schedule(
     ).result
     transform.apply_cse(func)
     canonicalize(func)
-    
+
     transform.print_(target=func, name="After vectorization")
 
     if stop_at_stage == "vectorized":
@@ -247,7 +249,7 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "fold-memref-alias-ops")
     transform.apply_cse(mod)
     canonicalize(mod)
-    
+
     transform.print_(target=mod, name="After bufferization")
 
     # promote memref.alloc to memref.alloca in payload function
@@ -260,7 +262,7 @@ def bundle_xegpu_softmax_schedule(
             "max-rank-of-allocated-memref": "2",
         },
     )
-    
+
     transform.print_(target=func, name="After promoting buffers to stack")
 
     if stop_at_stage == "bufferized":
@@ -291,7 +293,7 @@ def bundle_xegpu_softmax_schedule(
     func = apply_registered_pass(func, "gpu-launch-sink-index-computations")
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
-    
+
     transform.print_(target=mod, name="After GPU outlining")
 
     if stop_at_stage == "gpu-outlining":

From d2d4c49f70c10328a3e57da2b6b359f2539ad5df Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 15 Apr 2026 23:50:39 +0000
Subject: [PATCH 29/51] save work

---
 .../transform/transform_ext/__init__.py       |   2 +
 .../transform_ext/ops/update_address_space.py | 102 ++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 lighthouse/dialects/transform/transform_ext/ops/update_address_space.py

diff --git a/lighthouse/dialects/transform/transform_ext/__init__.py b/lighthouse/dialects/transform/transform_ext/__init__.py
index aba08bf0..fb705805 100644
--- a/lighthouse/dialects/transform/transform_ext/__init__.py
+++ b/lighthouse/dialects/transform/transform_ext/__init__.py
@@ -9,6 +9,7 @@
 from .ops.extract_handle import extract_handle
 from .ops.get_tileable_consumers import get_tileable_consumers
 from .ops.get_tiling_sizes import get_tiling_sizes
+from .ops.update_address_space import update_address_space
 
 __all__ = [
     "TransformExtensionDialect",
@@ -22,4 +23,5 @@
     "register_and_load",
     "replace",
     "wrap_in_benching_func",
+    "update_address_space",
 ]
diff --git a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
new file mode 100644
index 00000000..e0752f34
--- /dev/null
+++ b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
@@ -0,0 +1,102 @@
+from mlir import ir
+from mlir.dialects import ext, transform, memref
+from mlir.dialects.transform import DiagnosedSilenceableFailure
+
+from lighthouse.dialects.transform.transform_ext import TransformExtensionDialect
+
+
+class UpdateAddressSpace(TransformExtensionDialect.Operation, name="update_address_space"):
+    """Update the address space of a memref allocation operation.
+
+    Takes a target memref allocation operation and updates its address space
+    to the provided value.
+    """
+
+    target: ext.Operand[transform.AnyOpType]
+    address_space: ir.IntegerAttr
+    updated_op: ext.Result[transform.AnyOpType[()]] = ext.result(infer_type=True)
+
+    @classmethod
+    def attach_interface_impls(cls, ctx=None):
+        cls.TransformOpInterfaceModel.attach(cls.OPERATION_NAME, context=ctx)
+        cls.MemoryEffectsOpInterfaceModel.attach(cls.OPERATION_NAME, context=ctx)
+
+    class TransformOpInterfaceModel(transform.TransformOpInterface):
+        @staticmethod
+        def apply(
+            op: "UpdateAddressSpace",
+            rewriter: transform.TransformRewriter,
+            results: transform.TransformResults,
+            state: transform.TransformState,
+        ) -> DiagnosedSilenceableFailure:
+            # Get the target operations to transform
+            target_ops = state.get_payload_ops(op.target)
+
+            # Get the address space value from the attribute
+            address_space_value = ir.IntegerAttr(op.address_space).value
+
+            new_ops = []
+
+            for target_op in target_ops:
+                # Verify this is a memref.alloca operation
+                if target_op.OPERATION_NAME != "memref.alloca":
+                    return DiagnosedSilenceableFailure.emit_silenceable_error(
+                        f"Expected memref.alloca operation, got {target_op.OPERATION_NAME}"
+                    )
+
+                # Get the current result type (should be a MemRefType)
+                old_result_type = target_op.results[0].type
+
+                memref_type = ir.MemRefType(old_result_type)
+
+                # Create a new memref type with the specified address space
+                new_memref_type = ir.MemRefType.get(
+                    memref_type.shape,
+                    memref_type.element_type,
+                    layout=memref_type.layout,
+                    memory_space=ir.Attribute.parse(f"{address_space_value}")
+                )
+                print(new_memref_type)
+
+                # Replace the operation with a new one that has the updated type
+                with ir.InsertionPoint(target_op):
+
+                    # Get the operands from the original alloca (dynamic sizes and symbols)
+                    dynamic_sizes = list(target_op.operands[:target_op.attributes["operandSegmentSizes"][0]])
+                    symbol_operands = list(target_op.operands[target_op.attributes["operandSegmentSizes"][0]:])
+
+                    # Create a new alloca with the updated type
+                    new_alloca = memref.alloca(new_memref_type, dynamic_sizes, symbol_operands)
+                    print(new_alloca)
+
+                    # Replace all uses of the old operation with the new one
+                    # rewriter.replace_all_uses_with(target_op.results[0], new_alloca.results[0])
+
+                    # Erase the old operation
+                    rewriter.replace_op(target_op, [new_alloca])
+
+                    new_ops.append(new_alloca.owner)
+
+            # Set the results to the new operations
+            results.set_ops(op.updated_op, new_ops)
+            return DiagnosedSilenceableFailure.Success
+
+        @staticmethod
+        def allow_repeated_handle_operands(_op: "UpdateAddressSpace") -> bool:
+            return False
+
+    class MemoryEffectsOpInterfaceModel(ir.MemoryEffectsOpInterface):
+        @staticmethod
+        def get_effects(op: ir.Operation, effects):
+            transform.consumes_handle(op.op_operands[:1], effects)
+            transform.produces_handle(op.results, effects)
+            transform.modifies_payload(effects)
+
+
+def update_address_space(
+    target: ir.Value,
+    address_space: int | ir.IntegerAttr,
+) -> ir.Value:
+    if not isinstance(address_space, ir.IntegerAttr):
+        address_space = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), address_space)
+    return UpdateAddressSpace(target, address_space=address_space).updated_op

From 32a345a87913e8f6a4ad982752dbd1fa234a03e2 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 16 Apr 2026 23:16:02 +0000
Subject: [PATCH 30/51] save work

---
 .../transform/transform_ext/__init__.py       |  2 +-
 .../transform_ext/ops/update_address_space.py | 41 ++++++++++---------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/lighthouse/dialects/transform/transform_ext/__init__.py b/lighthouse/dialects/transform/transform_ext/__init__.py
index fb705805..997522a2 100644
--- a/lighthouse/dialects/transform/transform_ext/__init__.py
+++ b/lighthouse/dialects/transform/transform_ext/__init__.py
@@ -22,6 +22,6 @@
     "param_cmp_eq",
     "register_and_load",
     "replace",
-    "wrap_in_benching_func",
     "update_address_space",
+    "wrap_in_benching_func",
 ]
diff --git a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
index e0752f34..09313647 100644
--- a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
+++ b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
@@ -5,7 +5,9 @@
 from lighthouse.dialects.transform.transform_ext import TransformExtensionDialect
 
 
-class UpdateAddressSpace(TransformExtensionDialect.Operation, name="update_address_space"):
+class UpdateAddressSpace(
+    TransformExtensionDialect.Operation, name="update_address_space"
+):
     """Update the address space of a memref allocation operation.
 
     Takes a target memref allocation operation and updates its address space
@@ -31,10 +33,8 @@ def apply(
         ) -> DiagnosedSilenceableFailure:
             # Get the target operations to transform
             target_ops = state.get_payload_ops(op.target)
-
             # Get the address space value from the attribute
             address_space_value = ir.IntegerAttr(op.address_space).value
-
             new_ops = []
 
             for target_op in target_ops:
@@ -46,35 +46,36 @@ def apply(
 
                 # Get the current result type (should be a MemRefType)
                 old_result_type = target_op.results[0].type
-
                 memref_type = ir.MemRefType(old_result_type)
-
                 # Create a new memref type with the specified address space
                 new_memref_type = ir.MemRefType.get(
                     memref_type.shape,
                     memref_type.element_type,
                     layout=memref_type.layout,
-                    memory_space=ir.Attribute.parse(f"{address_space_value}")
+                    memory_space=ir.Attribute.parse(f"{address_space_value}"),
                 )
-                print(new_memref_type)
 
                 # Replace the operation with a new one that has the updated type
                 with ir.InsertionPoint(target_op):
-
                     # Get the operands from the original alloca (dynamic sizes and symbols)
-                    dynamic_sizes = list(target_op.operands[:target_op.attributes["operandSegmentSizes"][0]])
-                    symbol_operands = list(target_op.operands[target_op.attributes["operandSegmentSizes"][0]:])
-
+                    dynamic_sizes = list(
+                        target_op.operands[
+                            : target_op.attributes["operandSegmentSizes"][0]
+                        ]
+                    )
+                    symbol_operands = list(
+                        target_op.operands[
+                            target_op.attributes["operandSegmentSizes"][0] :
+                        ]
+                    )
                     # Create a new alloca with the updated type
-                    new_alloca = memref.alloca(new_memref_type, dynamic_sizes, symbol_operands)
-                    print(new_alloca)
-
+                    new_alloca = memref.alloca(
+                        new_memref_type, dynamic_sizes, symbol_operands
+                    )
                     # Replace all uses of the old operation with the new one
-                    # rewriter.replace_all_uses_with(target_op.results[0], new_alloca.results[0])
-
-                    # Erase the old operation
+                    # FIXME: This won't handle operations that consume the memref type and
+                    # return a new memref (such as subview).
                     rewriter.replace_op(target_op, [new_alloca])
-
                     new_ops.append(new_alloca.owner)
 
             # Set the results to the new operations
@@ -98,5 +99,7 @@ def update_address_space(
     address_space: int | ir.IntegerAttr,
 ) -> ir.Value:
     if not isinstance(address_space, ir.IntegerAttr):
-        address_space = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), address_space)
+        address_space = ir.IntegerAttr.get(
+            ir.IntegerType.get_signless(64), address_space
+        )
     return UpdateAddressSpace(target, address_space=address_space).updated_op

From eacc9d876840c7c2d27597fbfd97d605e6ac4319 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 17 Apr 2026 00:10:42 +0000
Subject: [PATCH 31/51] save work

---
 .../transform_ext/ops/update_address_space.py | 73 +++++++++----------
 lighthouse/schedule/xegpu/softmax_schedule.py | 31 ++------
 2 files changed, 42 insertions(+), 62 deletions(-)

diff --git a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
index 09313647..2c40bfce 100644
--- a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
+++ b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
@@ -32,51 +32,46 @@ def apply(
             state: transform.TransformState,
         ) -> DiagnosedSilenceableFailure:
             # Get the target operations to transform
-            target_ops = state.get_payload_ops(op.target)
+            target_op = state.get_payload_ops(op.target)[0]
             # Get the address space value from the attribute
             address_space_value = ir.IntegerAttr(op.address_space).value
             new_ops = []
 
-            for target_op in target_ops:
-                # Verify this is a memref.alloca operation
-                if target_op.OPERATION_NAME != "memref.alloca":
-                    return DiagnosedSilenceableFailure.emit_silenceable_error(
-                        f"Expected memref.alloca operation, got {target_op.OPERATION_NAME}"
-                    )
-
-                # Get the current result type (should be a MemRefType)
-                old_result_type = target_op.results[0].type
-                memref_type = ir.MemRefType(old_result_type)
-                # Create a new memref type with the specified address space
-                new_memref_type = ir.MemRefType.get(
-                    memref_type.shape,
-                    memref_type.element_type,
-                    layout=memref_type.layout,
-                    memory_space=ir.Attribute.parse(f"{address_space_value}"),
+            # Verify this is a memref.alloca operation
+            if target_op.OPERATION_NAME != "memref.alloca":
+                return DiagnosedSilenceableFailure.emit_silenceable_error(
+                    f"Expected memref.alloca operation, got {target_op.OPERATION_NAME}"
                 )
 
-                # Replace the operation with a new one that has the updated type
-                with ir.InsertionPoint(target_op):
-                    # Get the operands from the original alloca (dynamic sizes and symbols)
-                    dynamic_sizes = list(
-                        target_op.operands[
-                            : target_op.attributes["operandSegmentSizes"][0]
-                        ]
-                    )
-                    symbol_operands = list(
-                        target_op.operands[
-                            target_op.attributes["operandSegmentSizes"][0] :
-                        ]
-                    )
-                    # Create a new alloca with the updated type
-                    new_alloca = memref.alloca(
-                        new_memref_type, dynamic_sizes, symbol_operands
-                    )
-                    # Replace all uses of the old operation with the new one
-                    # FIXME: This won't handle operations that consume the memref type and
-                    # return a new memref (such as subview).
-                    rewriter.replace_op(target_op, [new_alloca])
-                    new_ops.append(new_alloca.owner)
+            # Get the current result type (should be a MemRefType)
+            old_result_type = target_op.results[0].type
+            memref_type = ir.MemRefType(old_result_type)
+            # Create a new memref type with the specified address space
+            new_memref_type = ir.MemRefType.get(
+                memref_type.shape,
+                memref_type.element_type,
+                layout=memref_type.layout,
+                memory_space=ir.Attribute.parse(f"{address_space_value}"),
+            )
+
+            # Replace the operation with a new one that has the updated type
+            with ir.InsertionPoint(target_op):
+                # Get the operands from the original alloca (dynamic sizes and symbols)
+                dynamic_sizes = list(
+                    target_op.operands[: target_op.attributes["operandSegmentSizes"][0]]
+                )
+                symbol_operands = list(
+                    target_op.operands[target_op.attributes["operandSegmentSizes"][0] :]
+                )
+                # Create a new alloca with the updated type
+                new_alloca = memref.alloca(
+                    new_memref_type, dynamic_sizes, symbol_operands
+                )
+                # Replace all uses of the old operation with the new one
+                # FIXME: This won't handle operations that consume the memref type and
+                # return a new memref (such as subview).
+                rewriter.replace_op(target_op, [new_alloca])
+                new_ops.append(new_alloca.owner)
 
             # Set the results to the new operations
             results.set_ops(op.updated_op, new_ops)
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 7912a45b..8876d052 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -16,6 +16,7 @@
     PipelineInterrupt,
 )
 from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
+from lighthouse.dialects.transform import transform_ext
 
 
 def get_softmax_schedule_module(
@@ -140,7 +141,6 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), func, ops=["linalg.softmax"]
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
-    transform.print_(target=func, name="Aftemr structured_decompose_interface")
 
     linalg_ops = match_and_split(
         func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
@@ -157,8 +157,6 @@ def bundle_xegpu_softmax_schedule(
         div_op, sizes=[0, reduction_step_size]
     ).results
 
-    transform.print_(target=func, name="After tiling div op")
-
     # Fuse max_center_and_exp_op into the div loop
     _, fused_loop = structured.structured_fuse_into_containing_op(
         anytype,
@@ -166,9 +164,6 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=div_loop,
     )
-    transform.print_(
-        target=func, name="After fusing max_center_and_exp_op into div loop"
-    )
 
     # Tile the sum reduction and fuse the sub+exp producer into it
     _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
@@ -180,8 +175,6 @@ def bundle_xegpu_softmax_schedule(
         tile_sizes=[0, reduction_step_size],
     )
 
-    transform.print_(target=func, name="After tiling sum reduction")
-
     func = transform.get_parent_op(
         anytype,
         fused_loop,
@@ -200,9 +193,6 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=sum_loop,
     )
-    transform.print_(
-        target=func, name="After fusing max_center_and_exp_op into sum loop"
-    )
 
     # Tile the max reduction.
     max_reduction = linalg_ops[0]
@@ -214,7 +204,6 @@ def bundle_xegpu_softmax_schedule(
         target=max_reduction,
         tile_sizes=[0, reduction_step_size],
     )
-    transform.print_(target=func, name="After tiling max reduction")
 
     # Cleanup after tiling and fusion
     transform.apply_cse(func)
@@ -231,8 +220,6 @@ def bundle_xegpu_softmax_schedule(
     transform.apply_cse(func)
     canonicalize(func)
 
-    transform.print_(target=func, name="After vectorization")
-
     if stop_at_stage == "vectorized":
         raise PipelineInterrupt()
 
@@ -250,8 +237,6 @@ def bundle_xegpu_softmax_schedule(
     transform.apply_cse(mod)
     canonicalize(mod)
 
-    transform.print_(target=mod, name="After bufferization")
-
     # promote memref.alloc to memref.alloca in payload function
     func = match(mod, ops={"func.func"})
     func = apply_registered_pass(
@@ -263,8 +248,6 @@ def bundle_xegpu_softmax_schedule(
         },
     )
 
-    transform.print_(target=func, name="After promoting buffers to stack")
-
     if stop_at_stage == "bufferized":
         raise PipelineInterrupt()
 
@@ -294,8 +277,6 @@ def bundle_xegpu_softmax_schedule(
     mod = apply_registered_pass(mod, "gpu-kernel-outlining")
     transform.apply_cse(mod)
 
-    transform.print_(target=mod, name="After GPU outlining")
-
     if stop_at_stage == "gpu-outlining":
         raise PipelineInterrupt()
 
@@ -306,12 +287,16 @@ def bundle_xegpu_softmax_schedule(
         options={"O": "3", "chip": "bmg"},
     )
 
-    # convert vector to xegpu
+    # for each gpu function in the gpu module, change memref.alloca address
+    # space to 3 (SLM) and convert vector to xegpu.
     gpu_mod_ops = match_and_split(mod, ops={"gpu.module"})
     for gpu_mod in gpu_mod_ops:
         gpu_func = match(gpu_mod, ops={"gpu.func"})
-        gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-        transform.apply_cse(gpu_func)
+        allocas = match_and_split(gpu_func, ops={"memref.alloca"})
+        for alloca in allocas:
+            transform_ext.update_address_space(alloca, address_space=3)
+        # gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+        # transform.apply_cse(gpu_func)
 
     # Cleanup.
     transform.apply_cse(mod)

From 1313477172c7442e322c6bbb1cbd614c415e8172 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 20 Apr 2026 20:34:32 +0000
Subject: [PATCH 32/51] working version

---
 examples/xegpu/softmax.py                         |  4 +++-
 .../transform_ext/ops/update_address_space.py     |  2 +-
 lighthouse/schedule/xegpu/softmax_schedule.py     | 15 +++++++++------
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/examples/xegpu/softmax.py b/examples/xegpu/softmax.py
index cc130297..f75613d0 100644
--- a/examples/xegpu/softmax.py
+++ b/examples/xegpu/softmax.py
@@ -155,7 +155,7 @@ def parse_cli():
         "--sizes",
         type=int,
         nargs=2,
-        default=[1024, 64],
+        default=[1024, 512],
         help="M,N matrix sizes (MxN)",
     )
     parser.add_argument(
@@ -290,6 +290,8 @@ def parse_cli():
                 )
                 if not success:
                     raise ValueError("Result mismatch!")
+                else:
+                    print("Result is correct. Proceeding to benchmark...")
 
             times = runner.benchmark(
                 host_input_buffers=wload._initial_host_arrays,
diff --git a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
index 2c40bfce..8d0b6041 100644
--- a/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
+++ b/lighthouse/dialects/transform/transform_ext/ops/update_address_space.py
@@ -16,7 +16,7 @@ class UpdateAddressSpace(
 
     target: ext.Operand[transform.AnyOpType]
     address_space: ir.IntegerAttr
-    updated_op: ext.Result[transform.AnyOpType[()]] = ext.result(infer_type=True)
+    updated_op: ext.Result[transform.AnyOpType[()]] = ext.infer_result()
 
     @classmethod
     def attach_interface_impls(cls, ctx=None):
diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 8876d052..862677df 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -295,8 +295,8 @@ def bundle_xegpu_softmax_schedule(
         allocas = match_and_split(gpu_func, ops={"memref.alloca"})
         for alloca in allocas:
             transform_ext.update_address_space(alloca, address_space=3)
-        # gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
-        # transform.apply_cse(gpu_func)
+        gpu_func = apply_registered_pass(gpu_func, "convert-vector-to-xegpu")
+        transform.apply_cse(gpu_func)
 
     # Cleanup.
     transform.apply_cse(mod)
@@ -305,12 +305,15 @@ def bundle_xegpu_softmax_schedule(
     if stop_at_stage == "xegpu-initial":
         raise PipelineInterrupt()
 
-    # Set layout attributes for xegpu.store_nd operations.
-    # FIXME: currently ecah subgroup is handling the entire row.
-    store_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=5)
+    # Set layout attributes for xegpu.store_nd and xegpu.store_matrix ops.
+    store_nd_ops = match_and_split(gpu_func, ops={"xegpu.store_nd"}, nhandles=1)
+    store_matrix_ops = match_and_split(gpu_func, ops={"xegpu.store_matrix"}, nhandles=4)
     sg_layout = [parameters["sg_rows"], 1]
     sg_data = [parameters["sg_rows"], parameters["reduction_step_size"]]
-    xegpu.set_anchor_layout(store_ops[-1], sg_layout=sg_layout, sg_data=sg_data)
+    for store_op in store_nd_ops:
+        xegpu.set_anchor_layout(store_op, sg_layout=sg_layout, sg_data=sg_data)
+    for store_op in store_matrix_ops:
+        xegpu.set_anchor_layout(store_op, sg_layout=sg_layout, sg_data=sg_data)
 
     if stop_at_stage == "xegpu-wg":
         raise PipelineInterrupt()

From f1857aabfd159b7ab30d9a3b5fb3217efff781c9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 20 Apr 2026 20:36:54 +0000
Subject: [PATCH 33/51] working version

---
 docs/softmax_lowering.md | 635 ---------------------------------------
 1 file changed, 635 deletions(-)
 delete mode 100644 docs/softmax_lowering.md

diff --git a/docs/softmax_lowering.md b/docs/softmax_lowering.md
deleted file mode 100644
index a31e6f4c..00000000
--- a/docs/softmax_lowering.md
+++ /dev/null
@@ -1,635 +0,0 @@
-# Linalg softmax lowering to XeGPU (Currently supported in lighthouse)
-
-## Overview
-
-**Assumptions:**
-Softmax dimension size is small (64 in this example). 
-
-The lowering process consists of seven stages:
-1. **initial** - High-level tensor operations
-2. **tiled-softmax** - Tiled softmax operations
-3. **decomposed** - Decomposition into constituent operations
-4. **vectorized** - Vector operations
-5. **bufferized** - Memory-based representation
-6. **xegpu-initial** - GPU kernel with XeGPU operations
-7. **xegpu-wg** - Work-group optimized XeGPU
-
----
-
-## Stage 1: Initial
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  %2 = tensor.empty() : tensor<1024x64xf32>
-  %3 = linalg.softmax dimension(1) ins(%1 : tensor<1024x64xf32>) 
-                                  outs(%2 : tensor<1024x64xf32>) -> tensor<1024x64xf32>
-  // ...
-  return
-}
-```
----
-
-## Stage 2: Tiled Softmax
-
-**Notes**
-- Work distribution via `scf.forall` (16 parallel iterations)
-- Each tile processes 64x64 elements
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
-    %4 = affine.apply affine_map<(d0) -> (d0 * 64)>(%arg2)
-    // Extract 64x64 input slice
-    %extracted_slice = tensor.extract_slice ...
-    // Extract 64x64 output slice
-    %extracted_slice_0 = tensor.extract_slice ...
-    // Apply softmax to the tile
-    %5 = linalg.softmax dimension(1) ins(%extracted_slice : tensor<64x64xf32>) 
-                                     outs(%extracted_slice_0 : tensor<64x64xf32>) -> tensor<64x64xf32>
-    scf.forall.in_parallel {
-      tensor.parallel_insert_slice %5 into %arg3[%4, %c0] [64, 64] [1, 1] : 
-        tensor<64x64xf32> into tensor<1024x64xf32>
-    }
-  }
-  // ...  
-  return
-}
-```
-
----
-
-## Stage 3: Decomposed
-
-**Notes**
-- Softmax decomposed into 4 constituent `linalg.generic` ops : max, sub+exp, sum, divide
-- Uses `structured.structured_decompose_interface` implemented by `linalg.softmax`
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  
-  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x64xf32>) {
-    %3 = affine.apply #map(%arg2)  // %3 = %arg2 * 64
-    %extracted_slice = tensor.extract_slice ...
-    
-    // Step 1: Find max along dimension 1
-    %4 = tensor.empty() : tensor<64xf32>
-    %5 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
-    %6 = linalg.generic // ...
-      %11 = arith.maxnumf %in, %out : f32
-      // ...
-    } -> tensor<64xf32>
-    
-    // Step 2: Subtract max and exponentiate
-    %7 = linalg.generic // ...
-      %11 = arith.subf %in, %in_2 : f32
-      %12 = math.exp %11 : f32
-      // ...
-    } -> tensor<64x64xf32>
-    
-    // Step 3: Sum exponentials
-    %8 = linalg.fill ins(%cst : f32) outs(%4 : tensor<64xf32>) -> tensor<64xf32>
-    %9 = linalg.generic // ...
-      %11 = arith.addf %in, %out : f32
-      // ...
-    } -> tensor<64xf32>
-    
-    // Step 4: Normalize by sum
-    %10 = linalg.generic // ...
-      %11 = arith.divf %in, %in_2 : f32
-      // ...
-    } -> tensor<64x64xf32>
-    
-    scf.forall.in_parallel {
-      tensor.parallel_insert_slice %10 into %arg3[%3, 0] [64, 64] [1, 1] : 
-        tensor<64x64xf32> into tensor<1024x64xf32>
-    }
-  }
-  return
-}
-```
-
----
-
-## Stage 4: Vectorized
-
-**Notes**
-- `linalg.generic` operations replaced with vector operations
-- Vector transfers for reading/writing data
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...  
-  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
-    %4 = affine.apply #map(%arg2)  // %4 = %arg2 * 64
-    %extracted_slice = tensor.extract_slice ..
-    
-    // Vector read: Load 64x64 tile
-    %5 = vector.transfer_read %1[%4, %c0], %0 {in_bounds = [true, true]} : 
-      tensor<1024x64xf32>, vector<64x64xf32>
-    
-    // Max reduction: Reduce dimension 1 -> vector<64xf32>
-    %6 = vector.multi_reduction <maxnumf>, %5, %cst_0 [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    
-    // Broadcast max values back to 64x64 and transpose
-    %7 = vector.broadcast %6 : vector<64xf32> to vector<64x64xf32>
-    %8 = vector.transpose %7, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    
-    // Subtract max and exponentiate
-    %9 = arith.subf %5, %8 : vector<64x64xf32>
-    %10 = math.exp %9 : vector<64x64xf32>
-    
-    // Sum reduction: Reduce dimension 1 -> vector<64xf32>
-    %11 = vector.multi_reduction <add>, %10, %cst [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    
-    // Broadcast sums back to 64x64 and transpose
-    %12 = vector.broadcast %11 : vector<64xf32> to vector<64x64xf32>
-    %13 = vector.transpose %12, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    
-    // Normalize
-    %14 = arith.divf %10, %13 : vector<64x64xf32>
-    
-    // Vector write
-    %15 = vector.transfer_write %14, %extracted_slice[%c0, %c0] {in_bounds = [true, true]} : 
-      vector<64x64xf32>, tensor<64x64xf32>
-    
-    scf.forall.in_parallel {
-      tensor.parallel_insert_slice %15 into %arg3[%4, 0] [64, 64] [1, 1]
-    }
-  }
-  return
-}
-```
----
-
-## Stage 5: Bufferized
-
-**Notes**
-- Tensors eliminated, working directly with memrefs
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  
-  scf.forall (%arg2) in (16) {
-    %1 = affine.apply #map(%arg2)  // %1 = %arg2 * 64
-    
-    // Direct memref read
-    %2 = vector.transfer_read %arg1[%1, %c0], %0 {in_bounds = [true, true]} : 
-      memref<1024x64xf32>, vector<64x64xf32>
-    
-    // Max reduction
-    %3 = vector.multi_reduction <maxnumf>, %2, %cst_0 [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    %4 = vector.broadcast %3 : vector<64xf32> to vector<64x64xf32>
-    %5 = vector.transpose %4, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    
-    // Subtract and exp
-    %6 = arith.subf %2, %5 : vector<64x64xf32>
-    %7 = math.exp %6 : vector<64x64xf32>
-    
-    // Sum reduction
-    %8 = vector.multi_reduction <add>, %7, %cst [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    %9 = vector.broadcast %8 : vector<64xf32> to vector<64x64xf32>
-    %10 = vector.transpose %9, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    
-    // Normalize
-    %11 = arith.divf %7, %10 : vector<64x64xf32>
-    
-    // Direct memref write
-    vector.transfer_write %11, %arg0[%1, %c0] {in_bounds = [true, true]} : 
-      vector<64x64xf32>, memref<1024x64xf32>
-  }
-  return
-}
-```
-
----
-
-## Stage 6: XeGPU-Initial
-
-**Notes**
-- GPU kernel separated from host code (Gpu Outlining)
-- `gpu.launch_func` invocation with grid/block dimensions
-- Use `vector-to-xegpu`
-
-**Code:**
-
-**Host Side:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  gpu.launch_func @payload_kernel::@payload_kernel 
-    blocks in (%c16, %c1, %c1) 
-    threads in (%c128, %c1, %c1)
-    args(%arg1 : memref<1024x64xf32>, %arg0 : memref<1024x64xf32>)
-  return
-}
-```
-
-**GPU Kernel:**
-```mlir
-gpu.module @payload_kernel [#xevm.target<O = 3>] {
-  gpu.func @payload_kernel(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) kernel 
-    attributes {known_block_size = array<i32: 128, 1, 1>, 
-                known_grid_size = array<i32: 16, 1, 1>} {
-    // ...
-    %block_id_x = gpu.block_id x
-    %0 = arith.muli %block_id_x, %c64 overflow<nsw> : index
-    
-    // Create XeGPU tensor descriptor for load
-    %1 = xegpu.create_nd_tdesc %arg0 : memref<1024x64xf32> -> 
-      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
-    
-    // XeGPU block load
-    %2 = xegpu.load_nd %1[%0, 0] : 
-      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>> -> 
-      vector<64x64xf32>
-    
-    // Same compute operations as before
-    %3 = vector.multi_reduction <maxnumf>, %2, %cst_0 [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    %4 = vector.broadcast %3 : vector<64xf32> to vector<64x64xf32>
-    %5 = vector.transpose %4, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    %6 = arith.subf %2, %5 : vector<64x64xf32>
-    %7 = math.exp %6 : vector<64x64xf32>
-    %8 = vector.multi_reduction <add>, %7, %cst [1] : 
-      vector<64x64xf32> to vector<64xf32>
-    %9 = vector.broadcast %8 : vector<64xf32> to vector<64x64xf32>
-    %10 = vector.transpose %9, [1, 0] : vector<64x64xf32> to vector<64x64xf32>
-    %11 = arith.divf %7, %10 : vector<64x64xf32>
-    
-    // Create XeGPU tensor descriptor for store
-    %12 = xegpu.create_nd_tdesc %arg1 : memref<1024x64xf32> -> 
-      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
-    
-    // XeGPU block store
-    xegpu.store_nd %11, %12[%0, 0] : 
-      vector<64x64xf32>, 
-      !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
-    
-    gpu.return
-  }
-}
-```
-
----
-
-## Stage 7: XeGPU-WG (Work-Group Optimized)
-
-**Notes**
-- Sets the layout for anchor xegpu ops. Each Wg consistes of [8, 1] subgroups
-  doing 8x64 softmax slice. 
-- Only sets the layotu for `store_nd`. Layout propagation does the rest.  
-
-**Code (differences from xegpu-initial):**
-```mlir
-// Store operation now includes layout hints
-xegpu.store_nd %11, %12[%0, 0] 
-  <{layout = #xegpu.layout<sg_layout = [8, 1], sg_data = [8, 64]>}> : 
-  vector<64x64xf32>, 
-  !xegpu.tensor_desc<64x64xf32, #xegpu.block_tdesc_attr<boundary_check = false>>
-```
-
----
-
-# Supporting larger Softmax dimension sizes
-
-When the softmax dimension is larger than what can fit efficiently in registers, additional tiling and fusion transformations are applied to the reduction dimension. This section shows the intermediate stages between "decomposed" and "vectorized".
-
-**Approach:** Tile reductions along dimension 1 (step size = 16) and fuse producers into consumers to enable streaming computation.
-
----
-
-## Decomposed → Tiled: Stage A - Tile div op
-
-**Notes:**
-- Tile the division operation with step size 16 along dimension 1
-- Creates `scf.for` loop iterating over 64 elements in chunks of 16
-
-**Key Changes:**
-```mlir
-// Before: Single division linalg.generic over 64x64
-%11 = linalg.generic {...} ins(%8, %10 : tensor<64x64xf32>, tensor<64xf32>) 
-      outs(%extracted_slice_0 : tensor<64x64xf32>) { ... } -> tensor<64x64xf32>
-
-// After: Division tiled into 64x16 chunks
-%11 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %extracted_slice_0) -> (tensor<64x64xf32>) {
-  %extracted_slice_3 = tensor.extract_slice %8[0, %arg4] [64, 16] [1, 1]
-  %12 = linalg.generic {...} ins(%extracted_slice_3, %extracted_slice_4 : tensor<64x16xf32>, tensor<64xf32>) 
-        outs(%extracted_slice_5 : tensor<64x16xf32>) { ... } -> tensor<64x16xf32>
-  %inserted_slice = tensor.insert_slice %12 into %arg5[0, %arg4] [64, 16] [1, 1]
-  scf.yield %inserted_slice
-}
-```
-
----
-
-## Stage B - Fuse sub+exp into div loop
-
-**Notes:**
-- Fuse the `sub+exp` producer (max_center_and_exp_op) into the div loop
-- Recomputes exp values on-the-fly instead of materializing full 64x64 tensor
-
-**Key Changes:**
-```mlir
-%11 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %extracted_slice_0) -> (tensor<64x64xf32>) {
-  %extracted_slice_3 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
-  
-  // Fused: sub+exp computed per 16-element chunk
-  %12 = linalg.generic {...} ins(%extracted_slice_3, %extracted_slice_4 : tensor<64x16xf32>, tensor<64xf32>) 
-        outs(%extracted_slice_5 : tensor<64x16xf32>) {
-    ^bb0(%in: f32, %in_8: f32, %out: f32):
-      %14 = arith.subf %in, %in_8 : f32
-      %15 = math.exp %14 : f32
-      linalg.yield %15 : f32
-  } -> tensor<64x16xf32>
-  
-  // Division operation
-  %13 = linalg.generic {...} ins(%12, %extracted_slice_6 : tensor<64x16xf32>, tensor<64xf32>) 
-        outs(%extracted_slice_7 : tensor<64x16xf32>) { ... } -> tensor<64x16xf32>
-  // ...
-}
-```
-
----
-
-## Stage C - Tile sum reduction
-
-**Notes:**
-- Tile the sum reduction using `structured_tile_reduction_using_for`
-- Creates intermediate accumulator tensor (64x16)
-- Final reduction via `linalg.reduce` over dimension 1
-
-**Key Changes:**
-```mlir
-// Tiled sum reduction with intermediate accumulator
-%10 = tensor.empty() : tensor<64x16xf32>
-%11 = linalg.fill ins(%cst_2 : f32) outs(%10 : tensor<64x16xf32>) -> tensor<64x16xf32>
-
-%12 = scf.for %arg4 = %c0_3 to %c64 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
-  %extracted_slice_7 = tensor.extract_slice %8[0, %arg4] [64, 16] [1, 1]
-  %14 = linalg.generic {...} ins(%extracted_slice_7 : tensor<64x16xf32>) 
-        outs(%extracted_slice_8 : tensor<64x16xf32>) {
-    ^bb0(%in: f32, %out: f32):
-      %15 = arith.addf %in, %out : f32
-      linalg.yield %15 : f32
-  } -> tensor<64x16xf32>
-  // ...
-}
-
-// Final reduction to 64xf32
-%reduced = linalg.reduce ins(%12 : tensor<64x16xf32>) outs(%9 : tensor<64xf32>) dimensions = [1] 
-  (%in: f32, %init: f32) {
-    %14 = arith.addf %in, %init : f32
-    linalg.yield %14 : f32
-  }
-```
-
----
-
-## Stage D - Fuse sub+exp into sum loop
-
-**Notes:**
-- Fuse `sub+exp` into the sum reduction loop
-- Stream computation: compute exp and accumulate in same loop
-
-**Key Changes:**
-```mlir
-%12 = scf.for %arg4 = %c0_3 to %c64 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
-  %extracted_slice_7 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
-  
-  // Fused: sub+exp
-  %14 = linalg.generic {...} ins(%extracted_slice_7, %extracted_slice_8 : tensor<64x16xf32>, tensor<64xf32>) 
-        outs(%extracted_slice_9 : tensor<64x16xf32>) {
-    ^bb0(%in: f32, %in_11: f32, %out: f32):
-      %16 = arith.subf %in, %in_11 : f32
-      %17 = math.exp %16 : f32
-      linalg.yield %17 : f32
-  } -> tensor<64x16xf32>
-  
-  // Accumulate sum
-  %15 = linalg.generic {...} ins(%14 : tensor<64x16xf32>) 
-        outs(%extracted_slice_10 : tensor<64x16xf32>) {
-    ^bb0(%in: f32, %out: f32):
-      %16 = arith.addf %in, %out : f32
-      linalg.yield %16 : f32
-  } -> tensor<64x16xf32>
-  // ...
-}
-```
-
----
-
-## Stage E - Tile max reduction
-
-**Notes:**
-- Tile max reduction similar to sum reduction
-- Creates 64x16 intermediate accumulator
-- Final reduction via `linalg.reduce` with maxnumf
-
-**Key Changes:**
-```mlir
-// Tiled max reduction
-%7 = tensor.empty() : tensor<64x16xf32>
-%8 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<64x16xf32>) -> tensor<64x16xf32>
-
-%9 = scf.for %arg4 = %c0_2 to %c64 step %c16 iter_args(%arg5 = %8) -> (tensor<64x16xf32>) {
-  %extracted_slice_12 = tensor.extract_slice %extracted_slice[0, %arg4] [64, 16] [1, 1]
-  %16 = linalg.generic {...} ins(%extracted_slice_12 : tensor<64x16xf32>) 
-        outs(%extracted_slice_13 : tensor<64x16xf32>) {
-    ^bb0(%in: f32, %out: f32):
-      %17 = arith.maxnumf %in, %out : f32
-      linalg.yield %17 : f32
-  } -> tensor<64x16xf32>
-  // ...
-}
-
-// Final max reduction
-%reduced = linalg.reduce ins(%9 : tensor<64x16xf32>) outs(%6 : tensor<64xf32>) dimensions = [1] 
-  (%in: f32, %init: f32) {
-    %16 = arith.maxnumf %in, %init : f32
-    linalg.yield %16 : f32
-  }
-```
-
-**Result:** Now all three major computations (max, sum, div) are tiled and operate on 64x16 chunks, with exp computation fused into both sum and div loops.
-
----
-
-## Stage F - Vectorization
-
-**Notes:**
-- Convert tiled linalg operations to vector operations
-- `scf.for` loops remain but operate on vectors
-- Vector size: 64x16 for tiled operations
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x64xf32>) {
-    // ...
-    
-    // Vectorized max reduction loop
-    %6 = vector.transfer_write %cst_1, %5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
-    %7 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %6) -> (tensor<64x16xf32>) {
-      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
-      %16 = vector.transfer_read %arg5[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
-      %17 = arith.maxnumf %15, %16 : vector<64x16xf32>
-      %18 = vector.transfer_write %17, %arg5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
-      scf.yield %18 : tensor<64x16xf32>
-    }
-    %8 = vector.transfer_read %7[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
-    %9 = vector.multi_reduction <maxnumf>, %8, %cst_2 [1] : vector<64x16xf32> to vector<64xf32>
-    
-    // Vectorized sum reduction loop with fused sub+exp
-    %11 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %10) -> (tensor<64x16xf32>) {
-      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
-      %16 = vector.broadcast %9 : vector<64xf32> to vector<16x64xf32>
-      %17 = vector.transpose %16, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
-      %18 = arith.subf %15, %17 : vector<64x16xf32>
-      %19 = math.exp %18 : vector<64x16xf32>
-      %20 = vector.transfer_read %arg5[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
-      %21 = arith.addf %19, %20 : vector<64x16xf32>
-      %22 = vector.transfer_write %21, %arg5[%c0, %c0] : vector<64x16xf32>, tensor<64x16xf32>
-      scf.yield %22 : tensor<64x16xf32>
-    }
-    %12 = vector.transfer_read %11[%c0, %c0], %0 : tensor<64x16xf32>, vector<64x16xf32>
-    %13 = vector.multi_reduction <add>, %12, %cst_0 [1] : vector<64x16xf32> to vector<64xf32>
-    
-    // Vectorized div loop with fused sub+exp
-    %14 = scf.for %arg4 = %c0 to %c64 step %c16 iter_args(%arg5 = %extracted_slice) -> (tensor<64x64xf32>) {
-      %15 = vector.transfer_read %1[%4, %arg4], %0 : tensor<1024x64xf32>, vector<64x16xf32>
-      %16 = vector.broadcast %9 : vector<64xf32> to vector<16x64xf32>
-      %17 = vector.transpose %16, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
-      %18 = arith.subf %15, %17 : vector<64x16xf32>
-      %19 = math.exp %18 : vector<64x16xf32>
-      %20 = vector.broadcast %13 : vector<64xf32> to vector<16x64xf32>
-      %21 = vector.transpose %20, [1, 0] : vector<16x64xf32> to vector<64x16xf32>
-      %22 = arith.divf %19, %21 : vector<64x16xf32>
-      %23 = vector.transfer_write %22, %arg5[%c0, %arg4] : vector<64x16xf32>, tensor<64x64xf32>
-      scf.yield %23 : tensor<64x64xf32>
-    }
-  }
-  // ...
-}
-```
-
----
-
-## Stage G - Bufferization
-
-**Notes:**
-- Convert tensors to memrefs
-- Allocate stack buffer for 64x16 accumulator: `memref.alloc()`
-
-**Code:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  // ...
-  scf.forall (%arg2) in (16) {
-    %1 = affine.apply #map(%arg2)
-    %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
-    
-    // Allocate accumulator buffer
-    %alloc = memref.alloc() {alignment = 64 : i64} : memref<64x16xf32>
-    
-    // Max reduction loop
-    vector.transfer_write %cst_1, %alloc[%c0, %c0] : vector<64x16xf32>, memref<64x16xf32>
-    scf.for %arg3 = %c0 to %c64 step %c16 {
-      %6 = vector.transfer_read %arg1[%1, %arg3], %0 : memref<1024x64xf32>, vector<64x16xf32>
-      %7 = vector.transfer_read %alloc[%c0, %c0], %0 : memref<64x16xf32>, vector<64x16xf32>
-      %8 = arith.maxnumf %6, %7 : vector<64x16xf32>
-      vector.transfer_write %8, %alloc[%c0, %c0] : vector<64x16xf32>, memref<64x16xf32>
-    }
-    %2 = vector.transfer_read %alloc[%c0, %c0], %0 : memref<64x16xf32>, vector<64x16xf32>
-    %3 = vector.multi_reduction <maxnumf>, %2, %cst_2 [1] : vector<64x16xf32> to vector<64xf32>
-    
-    // Sum reduction loop (reuses %alloc)
-    // ...
-    
-    // Div loop (writes to %subview)
-    // ...
-  }
-}
-```
-
----
-
-## Stage H - Promote buffers to stack
-
-**Notes:**
-- Convert `memref.alloc()` to `memref.alloca()` for stack allocation
-- Reduces memory allocation overhead
-
-**Code:**
-```mlir
-scf.forall (%arg2) in (16) {
-  %1 = affine.apply #map(%arg2)
-  %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
-  
-  // Stack allocation instead of heap
-  %alloca = memref.alloca() {alignment = 64 : i64} : memref<64x16xf32>
-  
-  // ... same operations using %alloca ...
-}
-```
-
----
-
-## Stage I - GPU outlining
-
-**Notes:**
-- Convert `scf.forall` to `scf.parallel`, then to `gpu.launch`
-- Extract GPU kernel into separate `gpu.module`
-- Set thread count: 128 threads = (64 rows / 8 sg_rows) × 16 subgroup_size
-
-**Host Side:**
-```mlir
-func.func @payload(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) {
-  %c16 = arith.constant 16 : index
-  %c1 = arith.constant 1 : index
-  %c128 = arith.constant 128 : index
-  gpu.launch_func @payload_kernel::@payload_kernel 
-    blocks in (%c16, %c1, %c1) 
-    threads in (%c128, %c1, %c1)
-    args(%arg0 : memref<1024x64xf32>, %arg1 : memref<1024x64xf32>)
-  return
-}
-```
-
-**GPU Kernel:**
-```mlir
-gpu.module @payload_kernel {
-  gpu.func @payload_kernel(%arg0: memref<1024x64xf32>, %arg1: memref<1024x64xf32>) kernel 
-    attributes {known_block_size = array<i32: 128, 1, 1>, 
-                known_grid_size = array<i32: 16, 1, 1>} {
-    %block_id_x = gpu.block_id x
-    %1 = arith.muli %block_id_x, %c64 overflow<nsw> : index
-    %subview = memref.subview %arg0[%1, 0] [64, 64] [1, 1]
-    %alloca = memref.alloca() {alignment = 64 : i64} : memref<64x16xf32>
-    
-    // Three reduction loops (max, sum, div) with same structure
-    scf.for %arg2 = %c0 to %c64 step %c16 {
-      // Max: accumulate max values
-      // Sum: compute & accumulate exp(x - max)
-      // Div: compute exp(x - max) / sum
-    }
-    
-    gpu.return
-  }
-}
-```
-
-**Summary:** At this stage, the kernel processes 64x16 chunks in streaming fashion through three sequential loops, minimizing memory footprint.

From 240cf084338baf9f30ebe2315d09a489170530d9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 20 Apr 2026 22:27:53 +0000
Subject: [PATCH 34/51] add initial version

---
 examples/xegpu/fused_attention.py             | 339 ++++++++++++++++++
 .../mlir_gen/gpu_fused_attention_payload.py   |  69 ++++
 .../xegpu/fused_attention_schedule.py         | 164 +++++++++
 3 files changed, 572 insertions(+)
 create mode 100644 examples/xegpu/fused_attention.py
 create mode 100644 lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
 create mode 100644 lighthouse/schedule/xegpu/fused_attention_schedule.py

diff --git a/examples/xegpu/fused_attention.py b/examples/xegpu/fused_attention.py
new file mode 100644
index 00000000..5aa84e2a
--- /dev/null
+++ b/examples/xegpu/fused_attention.py
@@ -0,0 +1,339 @@
+# RUN: %PYTHON %s --dump-kernel=xegpu-wg | FileCheck %s
+# CHECK: module attributes {gpu.container_module} {
+
+"""
+XeGPU fused attention benchmark.
+"""
+
+import argparse
+from typing import Optional
+from functools import cached_property
+
+import numpy as np
+from mlir import ir
+
+from lighthouse import dialects as lh_dialects
+from lighthouse.execution.runner import Runner
+from lighthouse.pipeline.driver import TransformDriver
+from lighthouse.execution import GPUMemoryManager
+from lighthouse.utils.numpy import mlir_to_numpy_dtype
+from lighthouse.ingress.mlir_gen import get_mlir_elem_type
+from lighthouse.ingress.mlir_gen.gpu_fused_attention_payload import generate_gpu_fused_attention_payload
+from lighthouse.schedule.xegpu.fused_attention_schedule import get_fused_attention_schedule_module
+
+
+def fused_attention_complexity(Z: int, H: int, n_ctx: int, n_head: int, nbytes: int):
+    """
+    Complexity of fused attention operation.
+
+    For each batch and head:
+    - Q @ K^T: O(n_ctx^2 * n_head) operations
+    - Softmax: O(n_ctx^2) operations
+    - Attention @ V: O(n_ctx^2 * n_head) operations
+    Total: approximately 2*n_ctx^2*n_head FLOPs per batch and head
+    """
+    # Approximation: 2 * n_ctx^2 * n_head FLOPs per batch and head
+    flop_count = Z * H * 2 * n_ctx * n_ctx * n_head
+    # Memory: read Q, K, V and write output
+    memory_reads = 3 * Z * H * n_ctx * n_head * nbytes
+    memory_writes = Z * H * n_ctx * n_head * nbytes
+    return flop_count, memory_reads, memory_writes
+
+
+def check_correctness(
+    Q: np.ndarray, K: np.ndarray, V: np.ndarray, output_arr: np.ndarray, verbose: int = 0
+) -> bool:
+    """
+    Check correctness of fused attention output.
+
+    Reference implementation:
+    - scores = Q @ K^T / sqrt(n_head)
+    - attention_weights = softmax(scores, dim=-1)
+    - output = attention_weights @ V
+    """
+    # Use float32 for computation
+    Q_f32 = Q.astype(np.float32)
+    K_f32 = K.astype(np.float32)
+    V_f32 = V.astype(np.float32)
+
+    Z, H, n_ctx, n_head = Q.shape
+    scale = 1.0 / np.sqrt(n_head)
+
+    output_ref = np.zeros_like(Q_f32)
+
+    # Compute reference for each batch and head
+    for z in range(Z):
+        for h in range(H):
+            # scores = Q @ K^T / sqrt(n_head)
+            scores = Q_f32[z, h] @ K_f32[z, h].T * scale
+
+            # softmax along last dimension
+            max_vals = np.max(scores, axis=1, keepdims=True)
+            exp_vals = np.exp(scores - max_vals)
+            sum_vals = np.sum(exp_vals, axis=1, keepdims=True)
+            attention_weights = exp_vals / sum_vals
+
+            # output = attention_weights @ V
+            output_ref[z, h] = attention_weights @ V_f32[z, h]
+
+    output = output_arr.astype(np.float32)
+
+    if verbose > 1:
+        print("Reference solution (first batch, first head, first 5 rows):")
+        print(output_ref[0, 0, :5])
+        print("Computed solution (first batch, first head, first 5 rows):")
+        print(output[0, 0, :5])
+
+    # Check values match reference
+    values_ok = np.allclose(output, output_ref, rtol=1e-3, atol=1e-4)
+
+    success = values_ok
+
+    if verbose:
+        if success:
+            print("PASSED")
+        else:
+            print("FAILED!")
+            if not values_ok:
+                max_diff = np.abs(output - output_ref).max()
+                print(f"  Values mismatch. Max abs diff: {max_diff:.6e}")
+    return success
+
+
+class XeGPUFusedAttention:
+    """
+    Fused attention workload on XeGPU.
+
+    Computes fused attention:
+    output = softmax(Q @ K^T / sqrt(n_head)) @ V
+
+    All Q, K, V matrices have shape (Z, H, n_ctx, n_head) where:
+    - Z: batch size
+    - H: number of heads
+    - n_ctx: context length
+    - n_head: head dimension
+    """
+
+    def __init__(
+        self,
+        Z: int,
+        H: int,
+        n_ctx: int,
+        n_head: int,
+        dtype: str = "f32",
+    ):
+        self.Z = Z
+        self.H = H
+        self.n_ctx = n_ctx
+        self.n_head = n_head
+        self.shape = (Z, H, n_ctx, n_head)
+        assert dtype == "f32", "Only f32 type is supported for fused attention"
+        self.elem_type = get_mlir_elem_type(dtype)
+        self.dtype = mlir_to_numpy_dtype(self.elem_type)
+        self.memory_manager_class = GPUMemoryManager
+        self.payload_function_name = "payload"
+
+    @cached_property
+    def _initial_host_arrays(self) -> tuple[np.ndarray]:
+        """Generate initial values on host with numpy."""
+        np.random.seed(42)
+        # Initialize Q, K, V with small random values
+        Q = np.random.uniform(-0.5, 0.5, self.shape).astype(self.dtype)
+        K = np.random.uniform(-0.5, 0.5, self.shape).astype(self.dtype)
+        V = np.random.uniform(-0.5, 0.5, self.shape).astype(self.dtype)
+        output_arr = np.zeros(self.shape, dtype=self.dtype)
+        return (output_arr, Q, K, V)
+
+    def get_complexity(self) -> tuple[int, int, int]:
+        nbytes = np.dtype(self.dtype).itemsize
+        return fused_attention_complexity(self.Z, self.H, self.n_ctx, self.n_head, nbytes)
+
+    def payload_module(self) -> ir.Module:
+        """Generate MLIR module for fused attention payload."""
+        return generate_gpu_fused_attention_payload(
+            func_name=self.payload_function_name,
+            Z=self.Z,
+            H=self.H,
+            n_ctx=self.n_ctx,
+            n_head=self.n_head,
+            dtype=self.elem_type,
+        )
+
+    def schedule_modules(
+        self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
+    ) -> list[ir.Module]:
+        """Generate transform schedule for fused attention."""
+        return [
+            Runner.get_bench_wrapper_schedule(self.payload_function_name),
+            get_fused_attention_schedule_module(
+                stop_at_stage=stop_at_stage,
+                parameters=parameters,
+            ),
+        ]
+
+    def shared_libs(self) -> list[str]:
+        return ["libmlir_levelzero_runtime.so"]
+
+
+def parse_cli():
+    parser = argparse.ArgumentParser(
+        description="Fused Attention using MLIR XeGPU",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=2,
+        help="Batch size (Z)",
+    )
+    parser.add_argument(
+        "--num-heads",
+        type=int,
+        default=8,
+        help="Number of attention heads (H)",
+    )
+    parser.add_argument(
+        "--n-ctx",
+        type=int,
+        default=512,
+        help="Context length (sequence length)",
+    )
+    parser.add_argument(
+        "--n-head",
+        type=int,
+        default=64,
+        help="Head dimension",
+    )
+    parser.add_argument(
+        "--nruns",
+        type=int,
+        default=1000,
+        help="Number of runs to average the execution time.",
+    )
+    parser.add_argument(
+        "--nwarmup",
+        type=int,
+        default=20,
+        help="Number of warm-up iterations before benchmarking.",
+    )
+    parser.add_argument(
+        "--check-result",
+        action="store_true",
+        help="Check the result of the fused attention computation.",
+    )
+    parser.add_argument(
+        "--dump-kernel",
+        type=str,
+        choices=[
+            "initial",
+            "tiled",
+            "vectorized",
+            "bufferized",
+            "gpu-outlining",
+            "xegpu-initial",
+            "xegpu-wg",
+            "final",
+        ],
+        help="Dump kernel IR at different stages of lowering and exit without "
+        "executing the kernel.",
+    )
+    parser.add_argument(
+        "--dump-schedule",
+        action="store_true",
+        help="Dump transform schedule.",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="count",
+        default=0,
+        help="Increase output verbosity (e.g. print reference and computed solutions).",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_cli()
+
+    params = {
+        "batch_size": args.batch_size,
+        "num_heads": args.num_heads,
+        "n_ctx": args.n_ctx,
+        "n_head": args.n_head,
+    }
+
+    Z = args.batch_size
+    H = args.num_heads
+    n_ctx = args.n_ctx
+    n_head = args.n_head
+    dtype = "f32"
+
+    with ir.Context(), ir.Location.unknown():
+        lh_dialects.register_and_load()
+        wload = XeGPUFusedAttention(Z=Z, H=H, n_ctx=n_ctx, n_head=n_head, dtype=dtype)
+
+        if args.dump_kernel or args.dump_schedule:
+            pipeline = TransformDriver(
+                wload.schedule_modules(
+                    stop_at_stage=args.dump_kernel, parameters=params
+                )
+            )
+            payload = pipeline.apply(wload.payload_module())
+            if args.dump_kernel:
+                print(payload)
+            if args.dump_schedule:
+                for schedule_module in wload.schedule_modules(parameters=params):
+                    print(schedule_module)
+        else:
+            pipeline = TransformDriver(wload.schedule_modules(parameters=params))
+            payload = pipeline.apply(wload.payload_module())
+            runner = Runner(
+                payload,
+                mem_manager_cls=wload.memory_manager_class,
+                shared_libs=wload.shared_libs(),
+            )
+            if args.check_result:
+                # Setup callback function to copy result from device to host.
+                result_host_copy, argument_access_callback = (
+                    Runner.get_gpu_argument_access_callback(wload.shape, wload.dtype)
+                )
+
+                # Execute kernel once.
+                runner.execute(
+                    host_input_buffers=wload._initial_host_arrays,
+                    payload_function_name=wload.payload_function_name,
+                    argument_access_callback=argument_access_callback,
+                )
+
+                # Compute reference solution on host.
+                Q, K, V = wload._initial_host_arrays[1:4]
+                success = check_correctness(
+                    Q, K, V,
+                    result_host_copy,
+                    verbose=args.verbose,
+                )
+                if not success:
+                    raise ValueError("Result mismatch!")
+                else:
+                    print("Result is correct. Proceeding to benchmark...")
+
+            times = runner.benchmark(
+                host_input_buffers=wload._initial_host_arrays,
+                nruns=args.nruns,
+                nwarmup=args.nwarmup,
+            )
+            times *= 1e6  # convert to microseconds
+            elapsed = np.mean(times)
+            flop_count = wload.get_complexity()[0]
+            gflops = flop_count / (elapsed * 1e-6) / 1e9
+
+            print(
+                f"batch-size={Z} "
+                f"num-heads={H} "
+                f"n-ctx={n_ctx} "
+                f"n-head={n_head} "
+                f"dt={dtype} "
+                f"time(us): {elapsed:.2f} "
+                f"GFLOPS: {gflops:.2f} "
+            )
diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
new file mode 100644
index 00000000..788aa175
--- /dev/null
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -0,0 +1,69 @@
+"""Generate MLIR payload for GPU fused attention operation."""
+
+from mlir import ir
+from mlir.dialects import linalg, bufferization, tensor, arith
+
+from lighthouse.utils.mlir import func_cif
+from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs
+from lighthouse.ingress.mlir_gen.utils import emit_buf_to_tensor
+
+
+def generate_gpu_fused_attention_payload(
+    func_name: str,
+    Z: int,
+    H: int,
+    n_ctx: int,
+    n_head: int,
+    dtype: ir.Type,
+) -> ir.Module:
+    """
+    Generate MLIR module for fused attention payload.
+
+    Computes fused attention:
+    output = softmax(Q @ K^T / sqrt(n_head)) @ V
+
+    Args:
+        func_name: Name of the payload function
+        Z: Batch size
+        H: Number of attention heads
+        n_ctx: Context length (sequence length)
+        n_head: Head dimension
+        dtype: MLIR element type (e.g., F32Type)
+
+    Returns:
+        MLIR module containing the fused attention payload function
+    """
+    mod = ir.Module.create()
+    shape = (Z, H, n_ctx, n_head)
+    memref_t = ir.MemRefType.get(shape, dtype)
+
+    with ir.InsertionPoint(mod.body):
+        # Function signature: payload(output, Q, K, V)
+        @func_cif(memref_t, memref_t, memref_t, memref_t, name=func_name)
+        def payload(output, Q_arg, K_arg, V_arg):
+            # Convert memrefs to tensors
+            emit_buf_to_tensor(output, restrict=True, writable=True)
+            Q_tensor = emit_buf_to_tensor(Q_arg, restrict=True)
+            K_tensor = emit_buf_to_tensor(K_arg, restrict=True)
+            V_tensor = emit_buf_to_tensor(V_arg, restrict=True)
+
+            # TODO: Implement fused attention computation
+            # This will involve:
+            # 1. Q @ K^T (batch matmul with transpose)
+            # 2. Scale by 1/sqrt(n_head)
+            # 3. Softmax along last dimension
+            # 4. Result @ V (batch matmul)
+
+            # Placeholder: create empty output tensor
+            output_init = tensor.empty(shape, dtype)
+            result = output_init
+
+            # Materialize result back to output memref
+            bufferization.materialize_in_destination(
+                None, result, output, restrict=True, writable=True
+            )
+
+        # Emit utility functions for GPU memory management
+        emit_gpu_util_funcs(dtype, rank=4)
+
+    return mod
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
new file mode 100644
index 00000000..a05e2ca9
--- /dev/null
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -0,0 +1,164 @@
+"""Generate MLIR transform schedule for XeGPU fused attention operation."""
+
+from typing import Optional
+
+from mlir import ir
+from mlir.dialects import transform
+from mlir.dialects.transform import structured, loop, xegpu
+from mlir.dialects.transform import bufferization as transform_bufferization
+from mlir.dialects.bufferization import LayoutMapOption
+
+from lighthouse.pipeline.helper import (
+    apply_registered_pass,
+    canonicalize,
+    match,
+    match_and_split,
+    PipelineInterrupt,
+)
+from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
+from lighthouse.dialects.transform import transform_ext
+
+
+def get_fused_attention_schedule_module(
+    stop_at_stage: Optional[str] = None,
+    parameters: Optional[dict] = None,
+) -> ir.Module:
+    """
+    Generate transform schedule for fused attention operation.
+
+    The schedule performs the following transformations:
+    1. Tile the fused attention operation
+    2. Vectorize operations
+    3. Bufferize tensors
+    4. Convert to GPU dialect
+    5. Lower to XeGPU operations
+
+    Args:
+        stop_at_stage: Optional stage name to stop early (for debugging)
+        parameters: Dictionary with scheduling parameters:
+            - batch_size: Batch size (Z)
+            - num_heads: Number of attention heads (H)
+            - n_ctx: Context length
+            - n_head: Head dimension
+
+    Returns:
+        MLIR module containing the transform schedule
+    """
+    assert parameters is not None, "Schedule parameters must be provided"
+
+    mod = ir.Module.create()
+    mod.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
+
+    with ir.InsertionPoint(mod.body):
+        # Create a transform sequence with proper signature
+        named_sequence = transform.named_sequence(
+            "__transform_main",
+            [transform.AnyOpType.get()],  # input: module
+            [],  # no outputs
+            arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}],
+        )
+
+        with ir.InsertionPoint(named_sequence.body):
+            # match the payload module
+            anytype = transform.AnyOpType.get()
+            func = match(named_sequence.bodyTarget, ops={"func.func"})
+            payload_mod = transform.get_parent_op(
+                anytype,
+                func,
+                op_name="builtin.module",
+                deduplicate=True,
+            )
+
+            xegpu_fused_attention_transform_schedule(
+                payload_mod,
+                parameters=parameters,
+                stop_at_stage=stop_at_stage or "",
+            )
+
+    return mod
+
+
+def xegpu_fused_attention_transform_schedule(
+    mod: ir.Value[transform.AnyOpType],
+    parameters: dict,
+    stop_at_stage: str = "",
+):
+    """Transform schedule for fused attention payload."""
+    try:
+        mod = bundle_xegpu_fused_attention_schedule(
+            mod,
+            parameters=parameters,
+            stop_at_stage=stop_at_stage,
+        )
+
+        mod = bundle_xegpu_to_binary(
+            mod,
+            stop_at_stage=stop_at_stage,
+        )
+    except PipelineInterrupt:
+        pass
+    finally:
+        transform.yield_()
+
+
+def bundle_xegpu_fused_attention_schedule(
+    mod: ir.Value[transform.AnyOpType],
+    parameters: dict,
+    stop_at_stage: str = "",
+) -> ir.Value[transform.AnyOpType]:
+    """Schedule for lowering fused attention payload to xegpu wg level."""
+
+    if stop_at_stage == "initial":
+        raise PipelineInterrupt()
+
+    anytype = transform.AnyOpType.get()
+
+    # TODO: Implement tiling, fusion, and lowering for fused attention
+    # This will involve:
+    # 1. Matching and tiling matmul operations (Q @ K^T)
+    # 2. Fusing softmax operation
+    # 3. Tiling second matmul (attention @ V)
+    # 4. Vectorization
+    # 5. Bufferization
+    # 6. GPU outlining
+    # 7. XeGPU lowering
+
+    func = match(mod, ops={"func.func"})
+
+    if stop_at_stage == "tiled":
+        raise PipelineInterrupt()
+
+    # vectorize (placeholder)
+    # func = structured.VectorizeChildrenAndApplyPatternsOp(
+    #     func,
+    #     fold_type_extensions_into_contract=True,
+    # ).result
+    transform.apply_cse(func)
+    canonicalize(func)
+
+    if stop_at_stage == "vectorized":
+        raise PipelineInterrupt()
+
+    # bufferize (placeholder)
+    # mod = apply_registered_pass(mod, "eliminate-empty-tensors")
+    # identity_layout = LayoutMapOption.IdentityLayoutMap
+    # mod = transform_bufferization.OneShotBufferizeOp(
+    #     mod,
+    #     allow_return_allocs_from_loops=True,
+    #     bufferize_function_boundaries=True,
+    #     function_boundary_type_conversion=identity_layout,
+    # ).result
+
+    if stop_at_stage == "bufferized":
+        raise PipelineInterrupt()
+
+    if stop_at_stage == "gpu-outlining":
+        raise PipelineInterrupt()
+
+    if stop_at_stage == "xegpu-initial":
+        raise PipelineInterrupt()
+
+    if stop_at_stage == "xegpu-wg":
+        raise PipelineInterrupt()
+
+    return mod

From 3262e4a58ee3e7756b3098033815f3a719041407 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Mon, 20 Apr 2026 22:28:04 +0000
Subject: [PATCH 35/51] add initial version

---
 examples/xegpu/fused_attention.py             | 22 ++++++++++++++-----
 .../mlir_gen/gpu_fused_attention_payload.py   |  2 +-
 .../xegpu/fused_attention_schedule.py         |  6 -----
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/examples/xegpu/fused_attention.py b/examples/xegpu/fused_attention.py
index 5aa84e2a..c42faf0e 100644
--- a/examples/xegpu/fused_attention.py
+++ b/examples/xegpu/fused_attention.py
@@ -18,8 +18,12 @@
 from lighthouse.execution import GPUMemoryManager
 from lighthouse.utils.numpy import mlir_to_numpy_dtype
 from lighthouse.ingress.mlir_gen import get_mlir_elem_type
-from lighthouse.ingress.mlir_gen.gpu_fused_attention_payload import generate_gpu_fused_attention_payload
-from lighthouse.schedule.xegpu.fused_attention_schedule import get_fused_attention_schedule_module
+from lighthouse.ingress.mlir_gen.gpu_fused_attention_payload import (
+    generate_gpu_fused_attention_payload,
+)
+from lighthouse.schedule.xegpu.fused_attention_schedule import (
+    get_fused_attention_schedule_module,
+)
 
 
 def fused_attention_complexity(Z: int, H: int, n_ctx: int, n_head: int, nbytes: int):
@@ -41,7 +45,11 @@ def fused_attention_complexity(Z: int, H: int, n_ctx: int, n_head: int, nbytes:
 
 
 def check_correctness(
-    Q: np.ndarray, K: np.ndarray, V: np.ndarray, output_arr: np.ndarray, verbose: int = 0
+    Q: np.ndarray,
+    K: np.ndarray,
+    V: np.ndarray,
+    output_arr: np.ndarray,
+    verbose: int = 0,
 ) -> bool:
     """
     Check correctness of fused attention output.
@@ -146,7 +154,9 @@ def _initial_host_arrays(self) -> tuple[np.ndarray]:
 
     def get_complexity(self) -> tuple[int, int, int]:
         nbytes = np.dtype(self.dtype).itemsize
-        return fused_attention_complexity(self.Z, self.H, self.n_ctx, self.n_head, nbytes)
+        return fused_attention_complexity(
+            self.Z, self.H, self.n_ctx, self.n_head, nbytes
+        )
 
     def payload_module(self) -> ir.Module:
         """Generate MLIR module for fused attention payload."""
@@ -309,7 +319,9 @@ def parse_cli():
                 # Compute reference solution on host.
                 Q, K, V = wload._initial_host_arrays[1:4]
                 success = check_correctness(
-                    Q, K, V,
+                    Q,
+                    K,
+                    V,
                     result_host_copy,
                     verbose=args.verbose,
                 )
diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
index 788aa175..73046604 100644
--- a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -1,7 +1,7 @@
 """Generate MLIR payload for GPU fused attention operation."""
 
 from mlir import ir
-from mlir.dialects import linalg, bufferization, tensor, arith
+from mlir.dialects import bufferization, tensor
 
 from lighthouse.utils.mlir import func_cif
 from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
index a05e2ca9..5fa47770 100644
--- a/lighthouse/schedule/xegpu/fused_attention_schedule.py
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -4,19 +4,13 @@
 
 from mlir import ir
 from mlir.dialects import transform
-from mlir.dialects.transform import structured, loop, xegpu
-from mlir.dialects.transform import bufferization as transform_bufferization
-from mlir.dialects.bufferization import LayoutMapOption
 
 from lighthouse.pipeline.helper import (
-    apply_registered_pass,
     canonicalize,
     match,
-    match_and_split,
     PipelineInterrupt,
 )
 from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
-from lighthouse.dialects.transform import transform_ext
 
 
 def get_fused_attention_schedule_module(

From 2135de3537ed55cca6da480ec38fcfa82cf83dd7 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 21 Apr 2026 20:59:59 +0000
Subject: [PATCH 36/51] payload done

---
 .../mlir_gen/gpu_fused_attention_payload.py   | 90 ++++++++++++++++---
 1 file changed, 80 insertions(+), 10 deletions(-)

diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
index 73046604..87818d08 100644
--- a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -1,7 +1,9 @@
 """Generate MLIR payload for GPU fused attention operation."""
 
+import math
+
 from mlir import ir
-from mlir.dialects import bufferization, tensor
+from mlir.dialects import arith, bufferization, linalg, tensor
 
 from lighthouse.utils.mlir import func_cif
 from lighthouse.ingress.mlir_gen.gpu_utils import emit_gpu_util_funcs
@@ -47,16 +49,84 @@ def payload(output, Q_arg, K_arg, V_arg):
             K_tensor = emit_buf_to_tensor(K_arg, restrict=True)
             V_tensor = emit_buf_to_tensor(V_arg, restrict=True)
 
-            # TODO: Implement fused attention computation
-            # This will involve:
-            # 1. Q @ K^T (batch matmul with transpose)
-            # 2. Scale by 1/sqrt(n_head)
-            # 3. Softmax along last dimension
-            # 4. Result @ V (batch matmul)
+            # Collapse first 3 dimensions (Z, H, n_ctx) into a single dimension
+            # From (Z, H, n_ctx, n_head) to (Z*H*n_ctx, n_head)
+            collapsed_dim = Z * H * n_ctx
+            collapsed_shape_2d = (collapsed_dim, n_head)
+
+            Q_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+                Q_tensor,
+                reassociation=[[0, 1, 2], [3]],
+            )
+            K_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+                K_tensor,
+                reassociation=[[0, 1, 2], [3]],
+            )
+            V_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+                V_tensor,
+                reassociation=[[0, 1, 2], [3]],
+            )
+
+            # Step 1: Transpose K to get K^T
+            # Permute from (collapsed_dim, n_head) to (n_head, collapsed_dim)
+            kt_shape_2d = (n_head, collapsed_dim)
+            kt_init = tensor.empty(kt_shape_2d, dtype)
+            K_transposed = linalg.transpose(K_2d, outs=[kt_init], permutation=[1, 0])
+
+            # Step 2: Compute Q @ K^T
+            # Q: (collapsed_dim, n_head) @ K^T: (n_head, collapsed_dim)
+            # Result: (collapsed_dim, collapsed_dim)
+            qkt_shape_2d = (collapsed_dim, collapsed_dim)
+            qkt_init = tensor.empty(qkt_shape_2d, dtype)
+            # Initialize with zeros for matmul accumulation
+            zero = arith.constant(dtype, 0.0)
+            qkt_init_filled = linalg.fill(zero, outs=[qkt_init])
+
+            # Matmul: Q @ K^T
+            qkt = linalg.matmul(Q_2d, K_transposed, outs=[qkt_init_filled])
+
+            # Step 3: Scale by 1/sqrt(n_head)
+            scale_factor = 1.0 / math.sqrt(n_head)
+            scale_const = arith.constant(dtype, scale_factor)
 
-            # Placeholder: create empty output tensor
-            output_init = tensor.empty(shape, dtype)
-            result = output_init
+            # Create a tensor filled with the scale factor
+            scale_tensor_init = tensor.empty(qkt_shape_2d, dtype)
+            scale_tensor = linalg.fill(scale_const, outs=[scale_tensor_init])
+
+            # Elementwise multiply qkt with scale tensor
+            scaled_qkt_init = tensor.empty(qkt_shape_2d, dtype)
+            scaled_qkt = linalg.mul(qkt, scale_tensor, outs=[scaled_qkt_init])
+
+            # Step 4: Apply softmax along the last dimension (dim=1 in 2D)
+            softmax_init = tensor.empty(qkt_shape_2d, dtype)
+            attention_weights = linalg.softmax(
+                result=[ir.RankedTensorType.get(qkt_shape_2d, dtype)],
+                input=scaled_qkt,
+                output=softmax_init,
+                dimension=1,
+            )
+
+            # Step 5: Multiply attention weights by V
+            # attention_weights: (collapsed_dim, collapsed_dim) @ V: (collapsed_dim, n_head)
+            # Result: (collapsed_dim, n_head)
+            output_2d_init = tensor.empty(collapsed_shape_2d, dtype)
+            output_2d_init_filled = linalg.fill(zero, outs=[output_2d_init])
+
+            result_2d = linalg.matmul(
+                attention_weights, V_2d, outs=[output_2d_init_filled]
+            )
+
+            # Expand back to 4D: (Z*H*n_ctx, n_head) -> (Z, H, n_ctx, n_head)
+            result = tensor.expand_shape(
+                ir.RankedTensorType.get(shape, dtype),
+                result_2d,
+                reassociation=[[0, 1, 2], [3]],
+                output_shape=[],
+                static_output_shape=shape,
+            )
 
             # Materialize result back to output memref
             bufferization.materialize_in_destination(

From 361e069c7f079dd781c96b7331d97f465bd62446 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 21 Apr 2026 21:51:32 +0000
Subject: [PATCH 37/51] tiled last matmul

---
 examples/xegpu/fused_attention.py             |  7 ++++
 .../xegpu/fused_attention_schedule.py         | 42 +++++++++++++------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/examples/xegpu/fused_attention.py b/examples/xegpu/fused_attention.py
index c42faf0e..213915f0 100644
--- a/examples/xegpu/fused_attention.py
+++ b/examples/xegpu/fused_attention.py
@@ -214,6 +214,12 @@ def parse_cli():
         default=64,
         help="Head dimension",
     )
+    parser.add_argument(
+        "--wg-tile-size",
+        type=int,
+        default=64,
+        help="Workgroup tile size for the collapsed batch dimension (Z*H*n_ctx)",
+    )
     parser.add_argument(
         "--nruns",
         type=int,
@@ -271,6 +277,7 @@ def parse_cli():
         "num_heads": args.num_heads,
         "n_ctx": args.n_ctx,
         "n_head": args.n_head,
+        "wg_tile_size": args.wg_tile_size,
     }
 
     Z = args.batch_size
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
index 5fa47770..d66efa24 100644
--- a/lighthouse/schedule/xegpu/fused_attention_schedule.py
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -4,10 +4,12 @@
 
 from mlir import ir
 from mlir.dialects import transform
+from mlir.dialects.transform import structured
 
 from lighthouse.pipeline.helper import (
     canonicalize,
     match,
+    match_and_split,
     PipelineInterrupt,
 )
 from lighthouse.schedule.xegpu.helper import bundle_xegpu_to_binary
@@ -34,6 +36,7 @@ def get_fused_attention_schedule_module(
             - num_heads: Number of attention heads (H)
             - n_ctx: Context length
             - n_head: Head dimension
+            - wg_tile_size: Workgroup tile size for the collapsed batch dimension (Z*H*n_ctx)
 
     Returns:
         MLIR module containing the transform schedule
@@ -106,18 +109,33 @@ def bundle_xegpu_fused_attention_schedule(
         raise PipelineInterrupt()
 
     anytype = transform.AnyOpType.get()
-
-    # TODO: Implement tiling, fusion, and lowering for fused attention
-    # This will involve:
-    # 1. Matching and tiling matmul operations (Q @ K^T)
-    # 2. Fusing softmax operation
-    # 3. Tiling second matmul (attention @ V)
-    # 4. Vectorization
-    # 5. Bufferization
-    # 6. GPU outlining
-    # 7. XeGPU lowering
-
-    func = match(mod, ops={"func.func"})
+    # Match all matmul operations - there should be 2:
+    # 1. Q @ K^T
+    # 2. attention_weights @ V
+    matmul_ops = match_and_split(mod, ops={"linalg.matmul"}, nhandles=2)
+
+    # Get the last matmul (attention_weights @ V)
+    last_matmul = matmul_ops[1]
+    func = transform.get_parent_op(
+        anytype,
+        last_matmul,
+        op_name="func.func",
+        deduplicate=True,
+    )
+
+    # Tile the last matmul in the batch dimension using tile_using_forall
+    # Batch dimension is the first dimension (collapsed_dim = Z * H * n_ctx)
+    # Extract workgroup tile size from parameters
+    wg_tile_size = parameters["wg_tile_size"]
+
+    tiled_matmul, forall_loop = structured.structured_tile_using_forall(
+        anytype,
+        anytype,
+        last_matmul,
+        num_threads=[],
+        tile_sizes=[],
+        static_tile_sizes=(wg_tile_size, 0),
+    )
 
     if stop_at_stage == "tiled":
         raise PipelineInterrupt()

From 4d0827ec0b4e533a07ffe13fff42f7c9447722e8 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Tue, 21 Apr 2026 23:59:37 +0000
Subject: [PATCH 38/51] change to batch matmul

---
 .../mlir_gen/gpu_fused_attention_payload.py   | 82 +++++++++----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
index 87818d08..c2f3d4ec 100644
--- a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -49,81 +49,81 @@ def payload(output, Q_arg, K_arg, V_arg):
             K_tensor = emit_buf_to_tensor(K_arg, restrict=True)
             V_tensor = emit_buf_to_tensor(V_arg, restrict=True)
 
-            # Collapse first 3 dimensions (Z, H, n_ctx) into a single dimension
-            # From (Z, H, n_ctx, n_head) to (Z*H*n_ctx, n_head)
-            collapsed_dim = Z * H * n_ctx
-            collapsed_shape_2d = (collapsed_dim, n_head)
+            # Collapse first 2 dimensions (Z, H) into a batch dimension
+            # From (Z, H, n_ctx, n_head) to (Z*H, n_ctx, n_head)
+            batch_dim = Z * H
+            collapsed_shape_3d = (batch_dim, n_ctx, n_head)
 
-            Q_2d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+            Q_3d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
                 Q_tensor,
-                reassociation=[[0, 1, 2], [3]],
+                reassociation=[[0, 1], [2], [3]],
             )
-            K_2d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+            K_3d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
                 K_tensor,
-                reassociation=[[0, 1, 2], [3]],
+                reassociation=[[0, 1], [2], [3]],
             )
-            V_2d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
+            V_3d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
                 V_tensor,
-                reassociation=[[0, 1, 2], [3]],
+                reassociation=[[0, 1], [2], [3]],
             )
 
             # Step 1: Transpose K to get K^T
-            # Permute from (collapsed_dim, n_head) to (n_head, collapsed_dim)
-            kt_shape_2d = (n_head, collapsed_dim)
-            kt_init = tensor.empty(kt_shape_2d, dtype)
-            K_transposed = linalg.transpose(K_2d, outs=[kt_init], permutation=[1, 0])
-
-            # Step 2: Compute Q @ K^T
-            # Q: (collapsed_dim, n_head) @ K^T: (n_head, collapsed_dim)
-            # Result: (collapsed_dim, collapsed_dim)
-            qkt_shape_2d = (collapsed_dim, collapsed_dim)
-            qkt_init = tensor.empty(qkt_shape_2d, dtype)
+            # Permute from (batch_dim, n_ctx, n_head) to (batch_dim, n_head, n_ctx)
+            kt_shape_3d = (batch_dim, n_head, n_ctx)
+            kt_init = tensor.empty(kt_shape_3d, dtype)
+            K_transposed = linalg.transpose(K_3d, outs=[kt_init], permutation=[0, 2, 1])
+
+            # Step 2: Compute Q @ K^T using batch_matmul
+            # Q: (batch_dim, n_ctx, n_head) @ K^T: (batch_dim, n_head, n_ctx)
+            # Result: (batch_dim, n_ctx, n_ctx)
+            qkt_shape_3d = (batch_dim, n_ctx, n_ctx)
+            qkt_init = tensor.empty(qkt_shape_3d, dtype)
             # Initialize with zeros for matmul accumulation
             zero = arith.constant(dtype, 0.0)
             qkt_init_filled = linalg.fill(zero, outs=[qkt_init])
 
-            # Matmul: Q @ K^T
-            qkt = linalg.matmul(Q_2d, K_transposed, outs=[qkt_init_filled])
+            # Batch matmul: Q @ K^T
+            qkt = linalg.batch_matmul(Q_3d, K_transposed, outs=[qkt_init_filled])
 
             # Step 3: Scale by 1/sqrt(n_head)
             scale_factor = 1.0 / math.sqrt(n_head)
             scale_const = arith.constant(dtype, scale_factor)
 
             # Create a tensor filled with the scale factor
-            scale_tensor_init = tensor.empty(qkt_shape_2d, dtype)
+            scale_tensor_init = tensor.empty(qkt_shape_3d, dtype)
             scale_tensor = linalg.fill(scale_const, outs=[scale_tensor_init])
 
             # Elementwise multiply qkt with scale tensor
-            scaled_qkt_init = tensor.empty(qkt_shape_2d, dtype)
+            scaled_qkt_init = tensor.empty(qkt_shape_3d, dtype)
             scaled_qkt = linalg.mul(qkt, scale_tensor, outs=[scaled_qkt_init])
 
-            # Step 4: Apply softmax along the last dimension (dim=1 in 2D)
-            softmax_init = tensor.empty(qkt_shape_2d, dtype)
+            # Step 4: Apply softmax along the last dimension (dim=2 in 3D)
+            softmax_init = tensor.empty(qkt_shape_3d, dtype)
             attention_weights = linalg.softmax(
-                result=[ir.RankedTensorType.get(qkt_shape_2d, dtype)],
+                result=[ir.RankedTensorType.get(qkt_shape_3d, dtype)],
                 input=scaled_qkt,
                 output=softmax_init,
-                dimension=1,
+                dimension=2,
             )
 
-            # Step 5: Multiply attention weights by V
-            # attention_weights: (collapsed_dim, collapsed_dim) @ V: (collapsed_dim, n_head)
-            # Result: (collapsed_dim, n_head)
-            output_2d_init = tensor.empty(collapsed_shape_2d, dtype)
-            output_2d_init_filled = linalg.fill(zero, outs=[output_2d_init])
+            # Step 5: Multiply attention weights by V using batch_matmul
+            # attention_weights: (batch_dim, n_ctx, n_ctx) @ V: (batch_dim, n_ctx, n_head)
+            # Result: (batch_dim, n_ctx, n_head)
+            output_3d_init = tensor.empty(collapsed_shape_3d, dtype)
+            output_3d_init_filled = linalg.fill(zero, outs=[output_3d_init])
 
-            result_2d = linalg.matmul(
-                attention_weights, V_2d, outs=[output_2d_init_filled]
+            result_3d = linalg.batch_matmul(
+                attention_weights, V_3d, outs=[output_3d_init_filled]
             )
 
-            # Expand back to 4D: (Z*H*n_ctx, n_head) -> (Z, H, n_ctx, n_head)
+            # Expand back to 4D: (Z*H, n_ctx, n_head) -> (Z, H, n_ctx, n_head)
             result = tensor.expand_shape(
                 ir.RankedTensorType.get(shape, dtype),
-                result_2d,
-                reassociation=[[0, 1, 2], [3]],
+                result_3d,
+                reassociation=[[0, 1], [2], [3]],
                 output_shape=[],
                 static_output_shape=shape,
             )

From e379b683225f6943ae0b9645c0b3a5aed4bec420 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 22 Apr 2026 16:26:23 +0000
Subject: [PATCH 39/51] save work

---
 examples/xegpu/fused_attention.py             |   3 +-
 .../xegpu/fused_attention_schedule.py         | 109 ++++++++++++++++--
 2 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/examples/xegpu/fused_attention.py b/examples/xegpu/fused_attention.py
index 213915f0..da61b3f7 100644
--- a/examples/xegpu/fused_attention.py
+++ b/examples/xegpu/fused_attention.py
@@ -242,7 +242,8 @@ def parse_cli():
         type=str,
         choices=[
             "initial",
-            "tiled",
+            "outer-tiled",
+            "inner-tiled",
             "vectorized",
             "bufferized",
             "gpu-outlining",
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
index d66efa24..6ea15081 100644
--- a/lighthouse/schedule/xegpu/fused_attention_schedule.py
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -109,6 +109,7 @@ def bundle_xegpu_fused_attention_schedule(
         raise PipelineInterrupt()
 
     anytype = transform.AnyOpType.get()
+    anyvalue = transform.AnyValueType.get()
     # Match all matmul operations - there should be 2:
     # 1. Q @ K^T
     # 2. attention_weights @ V
@@ -137,17 +138,111 @@ def bundle_xegpu_fused_attention_schedule(
         static_tile_sizes=(wg_tile_size, 0),
     )
 
-    if stop_at_stage == "tiled":
-        raise PipelineInterrupt()
+    # Fuse the softmax producer into forall
+    softmax_ops = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)
+    softmax_op = softmax_ops[0]
+    fused_softmax_op, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=softmax_op,
+        containing_op=forall_loop,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
 
-    # vectorize (placeholder)
-    # func = structured.VectorizeChildrenAndApplyPatternsOp(
-    #     func,
-    #     fold_type_extensions_into_contract=True,
-    # ).result
+    # Fuse linalg.mul (scaling) into forall
+    mul_ops = match_and_split(func, ops={"linalg.mul"}, nhandles=1)
+    mul_op = mul_ops[0]
+    _, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=mul_op,
+        containing_op=forall_loop,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
+
+    # Fuse the first matmul (Q @ K^T) into forall
+    matmul_ops = match_and_split(
+        func, ops={"linalg.matmul"}, nhandles=2
+    )  # Two matmuls are present.
+    first_matmul = matmul_ops[0]
+    _, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=first_matmul,
+        containing_op=forall_loop,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
+
+    # Fuse linalg.transpose (K transpose) into forall
+    transpose_ops = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)
+    transpose_op = transpose_ops[0]
+    _, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=transpose_op,
+        containing_op=forall_loop,
+    )
     transform.apply_cse(func)
     canonicalize(func)
 
+    # At this point all of the key operations are fused into the forall loop.
+    # Remaining linalg.fill ops can be fused trivially.
+    fill_ops = match_and_split(func, ops={"linalg.fill"}, nhandles=3)
+    for fill_op in fill_ops:
+        _, forall_loop = structured.structured_fuse_into_containing_op(
+            anytype,
+            anytype,
+            producer_op=fill_op,
+            containing_op=forall_loop,
+        )
+        transform.apply_cse(func)
+        canonicalize(func)
+
+    # tensor.empty() holding the result of transpose can be fused.
+    transpose_op = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)[0]
+    transpose_init = transform.get_producer_of_operand(
+        anytype, transpose_op, operand_number=1
+    )
+    _, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=transpose_init,
+        containing_op=forall_loop,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
+
+    # tensor.empty() ops holding the result of the softmax can also be fused.
+    softmax_op = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)[0]
+    softmax_init = transform.get_producer_of_operand(
+        anytype, softmax_op, operand_number=1
+    )
+    _, forall_loop = structured.structured_fuse_into_containing_op(
+        anytype,
+        anytype,
+        producer_op=softmax_init,
+        containing_op=forall_loop,
+    )
+    transform.apply_cse(func)
+    canonicalize(func)
+
+    if stop_at_stage == "outer-tiled":
+        raise PipelineInterrupt()
+
+    # # vectorize (placeholder)
+    # # func = structured.VectorizeChildrenAndApplyPatternsOp(
+    # #     func,
+    # #     fold_type_extensions_into_contract=True,
+    # # ).result
+    # transform.apply_cse(func)
+    # canonicalize(func)
+
+    if stop_at_stage == "inner-tiled":
+        raise PipelineInterrupt()
+
     if stop_at_stage == "vectorized":
         raise PipelineInterrupt()
 

From 06e7edefa4213bd656a5d881cb8b105459b027b9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 22 Apr 2026 20:12:39 +0000
Subject: [PATCH 40/51] save initial softmax doc

---
 .../softmax_lowering_flow.md                  | 608 ++++++++++++++++++
 1 file changed, 608 insertions(+)
 create mode 100644 reduction_tiling_docs/softmax_lowering_flow.md

diff --git a/reduction_tiling_docs/softmax_lowering_flow.md b/reduction_tiling_docs/softmax_lowering_flow.md
new file mode 100644
index 00000000..bccf3aa0
--- /dev/null
+++ b/reduction_tiling_docs/softmax_lowering_flow.md
@@ -0,0 +1,608 @@
+# Softmax Lowering Flow: IR Transformation Stages
+
+**Input Shape**: `1024x512xf32` (1024 rows, 512 columns)
+**Softmax Dimension**: dim=1 (along the 512-element rows)
+
+---
+
+## Stage 1: Initial IR
+
+Single high-level `linalg.softmax` operation on the full tensor.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %1 = bufferization.to_tensor %arg1 : tensor<1024x512xf32>
+
+  // Single softmax op over entire tensor
+  %3 = linalg.softmax dimension(1) ins(%1 : tensor<1024x512xf32>)
+                                   outs(%2 : tensor<1024x512xf32>) -> tensor<1024x512xf32>
+
+  bufferization.materialize_in_destination %3 in %arg0
+}
+```
+
+---
+
+## Stage 2: After Tiling Parallel Dim
+
+Parallel dimension (rows) tiled into 16 chunks of 64 rows each. Introduces `scf.forall` for parallel execution.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  // Parallel loop over 16 tiles (1024 / 64 = 16)
+  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x512xf32>) {
+    %4 = affine.apply affine_map<(d0) -> (d0 * 64)>(%arg2)
+    %slice = tensor.extract_slice %1[%4, 0] [64, 512] [1, 1]
+
+    // Softmax on 64x512 slice
+    %5 = linalg.softmax dimension(1) ins(%slice : tensor<64x512xf32>)
+                                     outs(%slice_0 : tensor<64x512xf32>) -> tensor<64x512xf32>
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %5 into %arg3[%4, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+---
+
+## Stage 3: After Decomposing Softmax
+
+Softmax decomposed into 4 operations: max reduction → center+exp → sum reduction → division.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %3 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %2) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %1[%4, 0] [64, 512] [1, 1]
+
+    // 1. Max reduction: (64,512) -> (64,)
+    %7 = linalg.generic {indexing_maps = [map<(d0,d1) -> (d0,d1)>, map<(d0,d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+         ins(%slice : tensor<64x512xf32>) outs(%6 : tensor<64xf32>) {
+      ^bb0(%in: f32, %out: f32):
+        %12 = arith.maxnumf %in, %out : f32
+        linalg.yield %12 : f32
+    } -> tensor<64xf32>
+
+    // 2. Center and exp: (64,512) -> (64,512)
+    %8 = linalg.generic {indexing_maps = [map<(d0,d1) -> (d0,d1)>, map<(d0,d1) -> (d0)>, map<(d0,d1) -> (d0,d1)>],
+                         iterator_types = ["parallel", "parallel"]}
+         ins(%slice, %7 : tensor<64x512xf32>, tensor<64xf32>) outs(%slice_0 : tensor<64x512xf32>) {
+      ^bb0(%in: f32, %in_2: f32, %out: f32):
+        %12 = arith.subf %in, %in_2 : f32
+        %13 = math.exp %12 : f32
+        linalg.yield %13 : f32
+    } -> tensor<64x512xf32>
+
+    // 3. Sum reduction: (64,512) -> (64,)
+    %10 = linalg.generic {indexing_maps = [map<(d0,d1) -> (d0,d1)>, map<(d0,d1) -> (d0)>],
+                          iterator_types = ["parallel", "reduction"]}
+          ins(%8 : tensor<64x512xf32>) outs(%9 : tensor<64xf32>) {
+      ^bb0(%in: f32, %out: f32):
+        %12 = arith.addf %in, %out : f32
+        linalg.yield %12 : f32
+    } -> tensor<64xf32>
+
+    // 4. Division: (64,512) -> (64,512)
+    %11 = linalg.generic {indexing_maps = [map<(d0,d1) -> (d0,d1)>, map<(d0,d1) -> (d0)>, map<(d0,d1) -> (d0,d1)>],
+                          iterator_types = ["parallel", "parallel"]}
+          ins(%8, %10 : tensor<64x512xf32>, tensor<64xf32>) outs(%slice_0 : tensor<64x512xf32>) {
+      ^bb0(%in: f32, %in_2: f32, %out: f32):
+        %12 = arith.divf %in, %in_2 : f32
+        linalg.yield %12 : f32
+    } -> tensor<64x512xf32>
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %11 into %arg3[%4, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations
+- Each iteration performs 4 sequential linalg ops
+
+**Key operations:**
+1. **Max reduction**: Reduces from `(64, 512)` to `(64,)` using `maxnumf`
+2. **Center+exp**: Element-wise subtract max and apply exp
+3. **Sum reduction**: Reduces from `(64, 512)` to `(64,)` using `addf`
+4. **Division**: Element-wise divide by sum
+
+---
+
+## Stage 4: After Tiling Division
+
+Division operation tiled along dimension 1 into chunks of 16 columns.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Max reduction (64,512) -> (64,)
+    %6 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%slice) outs(%5) { maxnumf } -> tensor<64xf32>
+
+    // Center and exp (64,512) -> (64,512)
+    %7 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+         ins(%slice, %6) outs(%slice_1) { subf, exp } -> tensor<64x512xf32>
+
+    // Sum reduction (64,512) -> (64,)
+    %9 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%7) outs(%8) { addf } -> tensor<64xf32>
+
+    // Division tiled over columns: loop from 0 to 512 step 16
+    %10 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_2 = tensor.extract_slice %7[0, %arg4] [64, 16] [1, 1]
+      %slice_3 = tensor.extract_slice %arg5[0, %arg4] [64, 16] [1, 1]
+
+      // Division on 64x16 tile
+      %11 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_2, %9 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %12 = arith.divf %in, %in_4 : f32
+          linalg.yield %12 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %11 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %10 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations (64-row tiles)
+- **Inner**: `scf.for` with 32 sequential iterations (512/16 = 32 column tiles)
+
+**Key change:** Division now operates on `64x16` tiles instead of full `64x512`
+
+---
+
+## Stage 5: After Fusing Max+Center+Exp into Division Loop
+
+The center-and-exp computation is fused into the division loop to recompute values on-the-fly.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Max reduction (64,512) -> (64,)
+    %6 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%slice) outs(%5) { maxnumf } -> tensor<64xf32>
+
+    // Center and exp (still materialized for sum reduction)
+    %7 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+         ins(%slice, %6) outs(%slice_1) { subf, exp } -> tensor<64x512xf32>
+
+    // Sum reduction (64,512) -> (64,)
+    %9 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%7) outs(%8) { addf } -> tensor<64xf32>
+
+    // Division loop with fused center+exp+div
+    %10 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_2 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]  // from original input
+      %slice_3 = tensor.extract_slice %arg5[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp on 64x16 tile
+      %11 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_2, %6 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %13 = arith.subf %in, %in_4 : f32
+          %14 = math.exp %13 : f32
+          linalg.yield %14 : f32
+      } -> tensor<64x16xf32>
+
+      // Division on 64x16 tile
+      %12 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%11, %9 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %13 = arith.divf %in, %in_4 : f32
+          linalg.yield %13 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %12 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %10 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations
+- **Inner**: `scf.for` with 32 sequential iterations
+
+**Key change:** Inside the division loop, center+exp is recomputed on `64x16` tiles from the original input, avoiding the need to store the full `64x512` exp tensor.
+
+---
+
+## Stage 6: After Tiling Sum Reduction
+
+Sum reduction tiled into chunks of 16 columns, introducing partial sums followed by a final reduction.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Max reduction (64,512) -> (64,)
+    %6 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%slice) outs(%5) { maxnumf } -> tensor<64xf32>
+
+    // Center and exp (64,512) -> (64,512)
+    %7 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+         ins(%slice, %6) outs(%slice_1) { subf, exp } -> tensor<64x512xf32>
+
+    // Tiled sum reduction: accumulate into 64x16 buffer
+    %11 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %10) -> (tensor<64x16xf32>) {
+      %slice_2 = tensor.extract_slice %7[0, %arg4] [64, 16] [1, 1]
+
+      // Accumulate sums
+      %13 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_2 : tensor<64x16xf32>) outs(%arg5 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %14 = arith.addf %in, %out : f32
+          linalg.yield %14 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %13 : tensor<64x16xf32>
+    }
+
+    // Final reduction: (64,16) -> (64,)
+    %reduced = linalg.reduce ins(%11 : tensor<64x16xf32>) outs(%8 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %13 = arith.addf %in, %init : f32
+        linalg.yield %13 : f32
+      }
+    }
+
+    // Division loop (same as before)
+    %12 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      // ... fused center+exp+div ...
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %12 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations
+- **Sum reduction loop**: `scf.for` with 32 iterations, accumulating into `64x16`
+- **Final reduction**: `linalg.reduce` from `(64, 16)` to `(64,)`
+- **Division loop**: `scf.for` with 32 iterations
+
+**Key change:** Sum reduction split into partial accumulation (loop) + final reduction (linalg.reduce)
+
+---
+
+## Stage 7: After Fusing Max+Center+Exp into Sum Reduction Loop
+
+The center-and-exp computation is now fused into the sum reduction loop as well.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Max reduction (64,512) -> (64,)
+    %6 = linalg.generic {iterator_types = ["parallel", "reduction"]}
+         ins(%slice) outs(%5) { maxnumf } -> tensor<64xf32>
+
+    // Sum reduction loop with fused center+exp
+    %10 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %9) -> (tensor<64x16xf32>) {
+      %slice_2 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp
+      %12 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_2, %6 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %14 = arith.subf %in, %in_4 : f32
+          %15 = math.exp %14 : f32
+          linalg.yield %15 : f32
+      } -> tensor<64x16xf32>
+
+      // Accumulate into sum buffer
+      %13 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%12 : tensor<64x16xf32>) outs(%arg5 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %14 = arith.addf %in, %out : f32
+          linalg.yield %14 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %13 : tensor<64x16xf32>
+    }
+
+    // Final reduction: (64,16) -> (64,)
+    %reduced = linalg.reduce ins(%10 : tensor<64x16xf32>) outs(%7 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %12 = arith.addf %in, %init : f32
+        linalg.yield %12 : f32
+      }
+    }
+
+    // Division loop with fused center+exp+div
+    %11 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_2 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp
+      %12 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_2, %6 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %14 = arith.subf %in, %in_4 : f32
+          %15 = math.exp %14 : f32
+          linalg.yield %15 : f32
+      } -> tensor<64x16xf32>
+
+      // Division
+      %13 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%12, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_3 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_4: f32, %out: f32):
+          %14 = arith.divf %in, %in_4 : f32
+          linalg.yield %14 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %13 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %11 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations
+- **Sum reduction loop**: `scf.for` with 32 iterations (fused center+exp+accumulate)
+- **Final reduction**: `linalg.reduce`
+- **Division loop**: `scf.for` with 32 iterations (fused center+exp+div)
+
+**Key change:** Center+exp is recomputed twice (once for sum, once for division) to avoid storing intermediate `64x512` tensor.
+
+---
+
+## Stage 8: After Tiling Max Reduction
+
+Max reduction also tiled into 16-column chunks with partial max followed by final reduction.
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Tiled max reduction: accumulate into 64x16 buffer
+    %8 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %7) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Max accumulation
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7 : tensor<64x16xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %15 = arith.maxnumf %in, %out : f32
+          linalg.yield %15 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %14 into %arg5[0, 0] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x16xf32>
+    }
+
+    // Final max reduction: (64,16) -> (64,)
+    %reduced = linalg.reduce ins(%8 : tensor<64x16xf32>) outs(%5 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %14 = arith.maxnumf %in, %init : f32
+        linalg.yield %14 : f32
+      }
+    }
+
+    // Sum reduction loop with fused center+exp
+    %12 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp using reduced max
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.subf %in, %in_9 : f32
+          %17 = math.exp %16 : f32
+          linalg.yield %17 : f32
+      } -> tensor<64x16xf32>
+
+      // Sum accumulation
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14 : tensor<64x16xf32>) outs(%arg5 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %16 = arith.addf %in, %out : f32
+          linalg.yield %16 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %15 : tensor<64x16xf32>
+    }
+
+    // Final sum reduction: (64,16) -> (64,)
+    %reduced_6 = linalg.reduce ins(%12 : tensor<64x16xf32>) outs(%9 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %14 = arith.addf %in, %init : f32
+        linalg.yield %14 : f32
+      }
+    }
+
+    // Division loop with fused center+exp+div
+    %13 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.subf %in, %in_9 : f32
+          %17 = math.exp %16 : f32
+          linalg.yield %17 : f32
+      } -> tensor<64x16xf32>
+
+      // Division
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14, %reduced_6 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.divf %in, %in_9 : f32
+          linalg.yield %16 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %15 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %13 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+```
+
+**Loop structure:**
+- **Outer**: `scf.forall` with 16 parallel iterations (64-row tiles)
+- **Max reduction loop**: `scf.for` with 32 iterations → `linalg.reduce`
+- **Sum reduction loop**: `scf.for` with 32 iterations → `linalg.reduce`
+- **Division loop**: `scf.for` with 32 iterations
+
+**Key change:** All three stages (max, sum, div) now use tiled loops operating on `64x16` chunks.
+
+---
+
+## Stage 9: Final Vectorized XeGPU Version
+
+After vectorization, bufferization, and conversion to XeGPU operations. Uses shared local memory (SLM) for partial reductions.
+
+```mlir
+gpu.module @payload_kernel {
+  gpu.func @payload_kernel(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) kernel {
+    %block_id_x = gpu.block_id x
+    %0 = arith.muli %block_id_x, %c64 : index
+    %subview = memref.subview %arg0[%0, 0] [64, 512] [1, 1]
+
+    // Allocate SLM buffer for partial reductions
+    %alloca = memref.alloca() : memref<64x16xf32, 3>
+    %1 = xegpu.create_mem_desc %alloca : !xegpu.mem_desc<64x16xf32>
+
+    // Max reduction loop
+    xegpu.store_matrix %cst_2, %1[0, 0]  // init with -inf
+    scf.for %arg2 = %c0 to %c512 step %c16 {
+      // Load 64x16 tile from global memory
+      %6 = xegpu.create_nd_tdesc %arg1 : !xegpu.tensor_desc<64x16xf32>
+      %7 = xegpu.load_nd %6[%0, %arg2] : vector<64x16xf32>
+
+      // Load partial max from SLM, compute max, store back
+      %8 = xegpu.load_matrix %1[0, 0] : vector<64x16xf32>
+      %9 = arith.maxnumf %7, %8 : vector<64x16xf32>
+      xegpu.store_matrix %9, %1[0, 0]
+    }
+
+    // Final max reduction across 16 columns
+    %2 = xegpu.load_matrix %1[0, 0] : vector<64x16xf32>
+    %3 = vector.multi_reduction <maxnumf>, %2, %cst_1 [1] : vector<64x16xf32> to vector<64xf32>
+
+    // Sum reduction loop
+    xegpu.store_matrix %cst_0, %1[0, 0]  // init with 0.0
+    scf.for %arg2 = %c0 to %c512 step %c16 {
+      // Load 64x16 tile
+      %6 = xegpu.create_nd_tdesc %arg1 : !xegpu.tensor_desc<64x16xf32>
+      %7 = xegpu.load_nd %6[%0, %arg2] : vector<64x16xf32>
+
+      // Fused center+exp
+      %8 = vector.broadcast %3 : vector<64xf32> to vector<16x64xf32>
+      %9 = vector.transpose %8, [1, 0] : vector<64x16xf32>
+      %10 = arith.subf %7, %9 : vector<64x16xf32>
+      %11 = math.exp %10 : vector<64x16xf32>
+
+      // Accumulate sum in SLM
+      %12 = xegpu.load_matrix %1[0, 0] : vector<64x16xf32>
+      %13 = arith.addf %11, %12 : vector<64x16xf32>
+      xegpu.store_matrix %13, %1[0, 0]
+    }
+
+    // Final sum reduction across 16 columns
+    %4 = xegpu.load_matrix %1[0, 0] : vector<64x16xf32>
+    %5 = vector.multi_reduction <add>, %4, %cst [1] : vector<64x16xf32> to vector<64xf32>
+
+    // Division loop
+    scf.for %arg2 = %c0 to %c512 step %c16 {
+      // Load 64x16 tile
+      %6 = xegpu.create_nd_tdesc %arg1 : !xegpu.tensor_desc<64x16xf32>
+      %7 = xegpu.load_nd %6[%0, %arg2] : vector<64x16xf32>
+
+      // Fused center+exp
+      %8 = vector.broadcast %3 : vector<64xf32> to vector<16x64xf32>
+      %9 = vector.transpose %8, [1, 0] : vector<64x16xf32>
+      %10 = arith.subf %7, %9 : vector<64x16xf32>
+      %11 = math.exp %10 : vector<64x16xf32>
+
+      // Division
+      %12 = vector.broadcast %5 : vector<64xf32> to vector<16x64xf32>
+      %13 = vector.transpose %12, [1, 0] : vector<64x16xf32>
+      %14 = arith.divf %11, %13 : vector<64x16xf32>
+
+      // Store result to global memory
+      %18 = xegpu.create_nd_tdesc %intptr : !xegpu.tensor_desc<64x16xf32>
+      xegpu.store_nd %14, %18[0, %arg2]
+    }
+
+    gpu.return
+  }
+}
+```
+
+**Loop structure:**
+- **Grid**: 16 blocks (one per 64-row tile)
+- **Per block**:
+  - **Max reduction loop**: 32 iterations (512/16)
+  - **Final max reduction**: `vector.multi_reduction`
+  - **Sum reduction loop**: 32 iterations (512/16)
+  - **Final sum reduction**: `vector.multi_reduction`
+  - **Division loop**: 32 iterations (512/16)
+
+**Key transformations:**
+- Linalg operations → vectorized operations on `vector<64x16xf32>`
+- Tensor buffers → SLM allocation (`memref<64x16xf32, 3>`)
+- Memory operations → XeGPU load/store operations (`xegpu.load_nd`, `xegpu.store_nd`, `xegpu.load_matrix`, `xegpu.store_matrix`)
+- GPU kernel launch with 16 blocks × 128 threads
+
+---
+
+## Summary of Transformations
+
+| Stage | Key Transformation | Loop Structure |
+|-------|-------------------|----------------|
+| 1 | Initial high-level softmax | No loops |
+| 2 | Tile parallel dimension | `scf.forall(16)` |
+| 3 | Decompose softmax | `scf.forall(16)` + 4 sequential ops |
+| 4 | Tile division | `scf.forall(16)` → `scf.for(32)` |
+| 5 | Fuse into division loop | Recompute center+exp in div loop |
+| 6 | Tile sum reduction | Add sum loop + final reduction |
+| 7 | Fuse into sum loop | Recompute center+exp in sum loop |
+| 8 | Tile max reduction | Add max loop + final reduction |
+| 9 | Vectorize + XeGPU | GPU kernel with SLM and vector ops |
+
+**Final computation pattern per GPU block:**
+1. **Max reduction**: 32-iteration loop with SLM accumulation → final reduction
+2. **Sum reduction**: 32-iteration loop (fused center+exp) with SLM accumulation → final reduction
+3. **Division**: 32-iteration loop (fused center+exp+div) writing to global memory
+
+This progressive lowering enables efficient GPU execution with:
+- Parallelism across 64-row tiles
+- SLM for partial reduction storage
+- Recomputation of center+exp to reduce memory traffic
+- Vectorized 64x16 tile operations

From 05e3e07f677e1d5d253e8c94b8d262eca9df2dd0 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 22 Apr 2026 20:14:27 +0000
Subject: [PATCH 41/51] save work

---
 lighthouse/schedule/xegpu/softmax_schedule.py | 41 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/lighthouse/schedule/xegpu/softmax_schedule.py b/lighthouse/schedule/xegpu/softmax_schedule.py
index 862677df..b406b5fc 100644
--- a/lighthouse/schedule/xegpu/softmax_schedule.py
+++ b/lighthouse/schedule/xegpu/softmax_schedule.py
@@ -102,6 +102,22 @@ def xegpu_softmax_transform_schedule(
         transform.yield_()
 
 
+def match_and_print_parent_function(op, msg):
+    """Get the parent function of an operation and print it.
+
+    Args:
+        op: The operation whose parent function to find
+        func_name: Name label to use when printing the function
+    """
+    anytype = transform.AnyOpType.get()
+    func = transform.get_parent_op(
+        anytype,
+        op,
+        op_name="func.func",
+        deduplicate=True,
+    )
+    transform.print_(target=func, name=msg)
+
 def bundle_xegpu_softmax_schedule(
     mod: ir.Value[transform.AnyOpType],
     parameters: dict,
@@ -120,6 +136,8 @@ def bundle_xegpu_softmax_schedule(
         transform.AnyOpType.get(), mod, ops=["linalg.softmax"]
     )
 
+    match_and_print_parent_function(softmax_op, "initial")
+
     # Tile the softmax operation using tile_using_forall
     tiled_op, for_op = structured.structured_tile_using_forall(
         anytype,
@@ -129,6 +147,7 @@ def bundle_xegpu_softmax_schedule(
         tile_sizes=[],
         static_tile_sizes=(parameters["wg_rows"],),
     )
+    match_and_print_parent_function(for_op, "after tiling parallel dim")
 
     func = transform.get_parent_op(
         anytype,
@@ -142,9 +161,12 @@ def bundle_xegpu_softmax_schedule(
     )
     structured.structured_decompose_interface(anytype, softmax_ops)
 
+
+
     linalg_ops = match_and_split(
         func, ops={"linalg.generic", "linalg.fill"}, nhandles=6
     )
+    match_and_print_parent_function(linalg_ops[0], "after decomposing softmax")
     max_reduction = linalg_ops[1]
     max_center_and_exp_op = linalg_ops[2]
     sum_reduction = linalg_ops[4]
@@ -156,6 +178,10 @@ def bundle_xegpu_softmax_schedule(
     _, div_loop = structured.TileUsingForOp(
         div_op, sizes=[0, reduction_step_size]
     ).results
+    # Cleanup after tiling and fusion
+    transform.apply_cse(func)
+    canonicalize(func)
+    match_and_print_parent_function(div_loop, "after tiling div")
 
     # Fuse max_center_and_exp_op into the div loop
     _, fused_loop = structured.structured_fuse_into_containing_op(
@@ -164,6 +190,10 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=div_loop,
     )
+    # Cleanup after tiling and fusion
+    transform.apply_cse(func)
+    canonicalize(func)
+    match_and_print_parent_function(fused_loop, "after fusing max_center_and_exp into div loop")
 
     # Tile the sum reduction and fuse the sub+exp producer into it
     _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
@@ -174,6 +204,10 @@ def bundle_xegpu_softmax_schedule(
         target=sum_reduction,
         tile_sizes=[0, reduction_step_size],
     )
+    # Cleanup after tiling and fusion
+    transform.apply_cse(func)
+    canonicalize(func)
+    match_and_print_parent_function(sum_loop, "after tiling sum reduction")
 
     func = transform.get_parent_op(
         anytype,
@@ -193,10 +227,14 @@ def bundle_xegpu_softmax_schedule(
         producer_op=max_center_and_exp_op,
         containing_op=sum_loop,
     )
+    # Cleanup after tiling and fusion
+    transform.apply_cse(func)
+    canonicalize(func)
+    match_and_print_parent_function(fused_sum_loop, "after fusing max_center_and_exp into sum reduction loop")
 
     # Tile the max reduction.
     max_reduction = linalg_ops[0]
-    structured.structured_tile_reduction_using_for(
+    _, _, _, max_loop = structured.structured_tile_reduction_using_for(
         [anytype],
         anytype,
         anytype,
@@ -204,6 +242,7 @@ def bundle_xegpu_softmax_schedule(
         target=max_reduction,
         tile_sizes=[0, reduction_step_size],
     )
+    match_and_print_parent_function(max_loop, "after tiling max reduction")
 
     # Cleanup after tiling and fusion
     transform.apply_cse(func)

From 028b27d36d6da38031652cf6ffff94acae913c66 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Wed, 22 Apr 2026 20:23:28 +0000
Subject: [PATCH 42/51] save work

---
 .../softmax_lowering_flow.md                  | 68 +------------------
 1 file changed, 1 insertion(+), 67 deletions(-)

diff --git a/reduction_tiling_docs/softmax_lowering_flow.md b/reduction_tiling_docs/softmax_lowering_flow.md
index bccf3aa0..4996e3d7 100644
--- a/reduction_tiling_docs/softmax_lowering_flow.md
+++ b/reduction_tiling_docs/softmax_lowering_flow.md
@@ -100,16 +100,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations
-- Each iteration performs 4 sequential linalg ops
-
-**Key operations:**
-1. **Max reduction**: Reduces from `(64, 512)` to `(64,)` using `maxnumf`
-2. **Center+exp**: Element-wise subtract max and apply exp
-3. **Sum reduction**: Reduces from `(64, 512)` to `(64,)` using `addf`
-4. **Division**: Element-wise divide by sum
-
 ---
 
 ## Stage 4: After Tiling Division
@@ -157,12 +147,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations (64-row tiles)
-- **Inner**: `scf.for` with 32 sequential iterations (512/16 = 32 column tiles)
-
-**Key change:** Division now operates on `64x16` tiles instead of full `64x512`
-
 ---
 
 ## Stage 5: After Fusing Max+Center+Exp into Division Loop
@@ -219,11 +203,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations
-- **Inner**: `scf.for` with 32 sequential iterations
-
-**Key change:** Inside the division loop, center+exp is recomputed on `64x16` tiles from the original input, avoiding the need to store the full `64x512` exp tensor.
 
 ---
 
@@ -279,14 +258,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations
-- **Sum reduction loop**: `scf.for` with 32 iterations, accumulating into `64x16`
-- **Final reduction**: `linalg.reduce` from `(64, 16)` to `(64,)`
-- **Division loop**: `scf.for` with 32 iterations
-
-**Key change:** Sum reduction split into partial accumulation (loop) + final reduction (linalg.reduce)
-
 ---
 
 ## Stage 7: After Fusing Max+Center+Exp into Sum Reduction Loop
@@ -366,14 +337,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations
-- **Sum reduction loop**: `scf.for` with 32 iterations (fused center+exp+accumulate)
-- **Final reduction**: `linalg.reduce`
-- **Division loop**: `scf.for` with 32 iterations (fused center+exp+div)
-
-**Key change:** Center+exp is recomputed twice (once for sum, once for division) to avoid storing intermediate `64x512` tensor.
-
 ---
 
 ## Stage 8: After Tiling Max Reduction
@@ -473,14 +436,6 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 }
 ```
 
-**Loop structure:**
-- **Outer**: `scf.forall` with 16 parallel iterations (64-row tiles)
-- **Max reduction loop**: `scf.for` with 32 iterations → `linalg.reduce`
-- **Sum reduction loop**: `scf.for` with 32 iterations → `linalg.reduce`
-- **Division loop**: `scf.for` with 32 iterations
-
-**Key change:** All three stages (max, sum, div) now use tiled loops operating on `64x16` chunks.
-
 ---
 
 ## Stage 9: Final Vectorized XeGPU Version
@@ -565,21 +520,6 @@ gpu.module @payload_kernel {
 }
 ```
 
-**Loop structure:**
-- **Grid**: 16 blocks (one per 64-row tile)
-- **Per block**:
-  - **Max reduction loop**: 32 iterations (512/16)
-  - **Final max reduction**: `vector.multi_reduction`
-  - **Sum reduction loop**: 32 iterations (512/16)
-  - **Final sum reduction**: `vector.multi_reduction`
-  - **Division loop**: 32 iterations (512/16)
-
-**Key transformations:**
-- Linalg operations → vectorized operations on `vector<64x16xf32>`
-- Tensor buffers → SLM allocation (`memref<64x16xf32, 3>`)
-- Memory operations → XeGPU load/store operations (`xegpu.load_nd`, `xegpu.store_nd`, `xegpu.load_matrix`, `xegpu.store_matrix`)
-- GPU kernel launch with 16 blocks × 128 threads
-
 ---
 
 ## Summary of Transformations
@@ -599,10 +539,4 @@ gpu.module @payload_kernel {
 **Final computation pattern per GPU block:**
 1. **Max reduction**: 32-iteration loop with SLM accumulation → final reduction
 2. **Sum reduction**: 32-iteration loop (fused center+exp) with SLM accumulation → final reduction
-3. **Division**: 32-iteration loop (fused center+exp+div) writing to global memory
-
-This progressive lowering enables efficient GPU execution with:
-- Parallelism across 64-row tiles
-- SLM for partial reduction storage
-- Recomputation of center+exp to reduce memory traffic
-- Vectorized 64x16 tile operations
+3. **Division**: 32-iteration loop (fused center+exp+div) writing to global memory
\ No newline at end of file

From cd93df60b53a0a47307e00e33d23645849ab6eb3 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 02:10:36 +0000
Subject: [PATCH 43/51] add optimization note

---
 .../softmax_lowering_flow.md                  | 192 +++++++++++++++++-
 1 file changed, 191 insertions(+), 1 deletion(-)

diff --git a/reduction_tiling_docs/softmax_lowering_flow.md b/reduction_tiling_docs/softmax_lowering_flow.md
index 4996e3d7..c6d8ad50 100644
--- a/reduction_tiling_docs/softmax_lowering_flow.md
+++ b/reduction_tiling_docs/softmax_lowering_flow.md
@@ -539,4 +539,194 @@ gpu.module @payload_kernel {
 **Final computation pattern per GPU block:**
 1. **Max reduction**: 32-iteration loop with SLM accumulation → final reduction
 2. **Sum reduction**: 32-iteration loop (fused center+exp) with SLM accumulation → final reduction
-3. **Division**: 32-iteration loop (fused center+exp+div) writing to global memory
\ No newline at end of file
+3. **Division**: 32-iteration loop (fused center+exp+div) writing to global memory
+
+---
+
+## Optimization: Fusing Max and Sum Reduction Loops
+
+After Stage 8, we can apply an additional optimization to fuse the max reduction loop and sum reduction loop into a single loop. This reduces the number of loops from 3 to 2.
+
+### Key Insight
+
+The optimization leverages the **online softmax algorithm**, which allows us to incrementally update both the global maximum and the global sum as we process each tile of the reduction dimension. For each 16-column tile:
+
+1. Compute the **local max** for the tile
+2. Update the **global max** using the local max
+3. Compute the **local centered sum** using exp(x - local_max)
+4. **Rescale** the global sum by exp(global_max_old - global_max_new)
+5. **Add** the rescaled local sum to the global sum
+
+This maintains numerical stability while processing tiles incrementally, since we adjust previous sums by the correction factor when we discover a new maximum.
+
+### Before: Separate Max and Sum Loops (3 loops total)
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Loop 1: Max reduction
+    %8 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %7) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7 : tensor<64x16xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %15 = arith.maxnumf %in, %out : f32
+          linalg.yield %15 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %14 : tensor<64x16xf32>
+    }
+    %reduced = linalg.reduce ins(%8) outs(%5) dimensions = [1] { maxnumf }
+
+    // Loop 2: Sum reduction with center+exp
+    %12 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Center+exp using global max
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.subf %in, %in_9 : f32
+          %17 = math.exp %16 : f32
+          linalg.yield %17 : f32
+      } -> tensor<64x16xf32>
+
+      // Accumulate sum
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14) outs(%arg5) {
+        ^bb0(%in: f32, %out: f32):
+          %16 = arith.addf %in, %out : f32
+          linalg.yield %16 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %15 : tensor<64x16xf32>
+    }
+    %reduced_6 = linalg.reduce ins(%12) outs(%9) dimensions = [1] { addf }
+
+    // Loop 3: Division with center+exp+div
+    %13 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      // ... fused center+exp+div ...
+    }
+  }
+}
+```
+
+### After: Fused Max+Sum Loop (2 loops total)
+
+```mlir
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Loop 1: Fused max+sum reduction (online softmax)
+    %fused = scf.for %arg4 = %c0 to %c512 step %c16
+             iter_args(%global_max = %init_max, %global_sum_buffer = %7)
+             -> (tensor<64xf32>, tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Step 1: Compute local max for this tile
+      %local_max = linalg.reduce ins(%slice_7 : tensor<64x16xf32>) outs(%5 : tensor<64xf32>) dimensions = [1] {
+        (%in: f32, %init: f32) {
+          %max = arith.maxnumf %in, %init : f32
+          linalg.yield %max : f32
+        }
+      }
+
+      // Step 2: Update global max
+      %new_global_max = linalg.generic {iterator_types = ["parallel"]}
+            ins(%global_max, %local_max : tensor<64xf32>, tensor<64xf32>) outs(%out_max : tensor<64xf32>) {
+        ^bb0(%old_max: f32, %curr_max: f32, %out: f32):
+          %updated = arith.maxnumf %old_max, %curr_max : f32
+          linalg.yield %updated : f32
+      } -> tensor<64xf32>
+
+      // Step 3: Compute local centered sum: exp(x - local_max)
+      %local_exp = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %local_max : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %max: f32, %out: f32):
+          %centered = arith.subf %in, %max : f32
+          %exp_val = math.exp %centered : f32
+          linalg.yield %exp_val : f32
+      } -> tensor<64x16xf32>
+
+      // Reduce to get tile sum
+      %local_sum_buffer = linalg.reduce ins(%local_exp : tensor<64x16xf32>) outs(%9 : tensor<64x16xf32>) dimensions = [1] {
+        (%in: f32, %init: f32) {
+          %sum = arith.addf %in, %init : f32
+          linalg.yield %sum : f32
+        }
+      }
+
+      // Step 4: Rescale global sum by exp(old_max - new_max) and add local sum
+      %updated_global_sum = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%global_sum_buffer, %global_max, %new_global_max, %local_sum_buffer :
+                tensor<64x16xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64x16xf32>)
+            outs(%out_sum : tensor<64x16xf32>) {
+        ^bb0(%old_sum: f32, %old_max: f32, %new_max: f32, %local: f32, %out: f32):
+          // Correction factor: exp(old_max - new_max)
+          %max_diff = arith.subf %old_max, %new_max : f32
+          %scale = math.exp %max_diff : f32
+          %rescaled_sum = arith.mulf %old_sum, %scale : f32
+
+          // Add local sum (already centered on local_max, need to rescale)
+          %local_scale_diff = arith.subf %local_max, %new_max : f32
+          %local_scale = math.exp %local_scale_diff : f32
+          %rescaled_local = arith.mulf %local, %local_scale : f32
+
+          %updated = arith.addf %rescaled_sum, %rescaled_local : f32
+          linalg.yield %updated : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %new_global_max, %updated_global_sum : tensor<64xf32>, tensor<64x16xf32>
+    }
+
+    // Extract final results
+    %final_max = %fused#0 : tensor<64xf32>
+    %final_sum_buffer = %fused#1 : tensor<64x16xf32>
+    %final_sum = linalg.reduce ins(%final_sum_buffer) outs(%9) dimensions = [1] { addf }
+
+    // Loop 2: Division with center+exp+div
+    %13 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Center+exp
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %final_max : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8) {
+        ^bb0(%in: f32, %max: f32, %out: f32):
+          %centered = arith.subf %in, %max : f32
+          %exp_val = math.exp %centered : f32
+          linalg.yield %exp_val : f32
+      } -> tensor<64x16xf32>
+
+      // Division
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14, %final_sum : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8) {
+        ^bb0(%exp_val: f32, %sum: f32, %out: f32):
+          %result = arith.divf %exp_val, %sum : f32
+          linalg.yield %result : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %15 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+  }
+}
+```
+
+### Benefits
+
+1. **Reduced loop count**: 3 loops → 2 loops (fused max+sum, division)
+2. **Better memory locality**: Input data is read only twice instead of three times
+3. **Lower latency**: One fewer synchronization point between reduction phases
+4. **Same numerical stability**: Uses the online softmax algorithm which maintains stability through incremental rescaling
+
+### Trade-offs
+
+- **Increased per-iteration complexity**: Each iteration of the fused loop performs more operations (max update, sum rescaling, correction factors)
+- **More register pressure**: Need to carry both `global_max` and `global_sum_buffer` across iterations
+- **Additional exp operations**: Computing correction factors requires exp(old_max - new_max) and exp(local_max - new_max) per tile
+
+This optimization is particularly valuable for GPU implementations where memory bandwidth is the bottleneck, as reducing the number of passes over the input data can significantly improve performance despite the increased computational complexity per iteration.

From 9bbb99c982c1e3a59bcfaa4608e1ca7f195b24ab Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 02:19:03 +0000
Subject: [PATCH 44/51] save work

---
 reduction_tiling_docs/softmax_lowering_flow.md | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/reduction_tiling_docs/softmax_lowering_flow.md b/reduction_tiling_docs/softmax_lowering_flow.md
index c6d8ad50..45dd0249 100644
--- a/reduction_tiling_docs/softmax_lowering_flow.md
+++ b/reduction_tiling_docs/softmax_lowering_flow.md
@@ -720,13 +720,5 @@ func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
 
 1. **Reduced loop count**: 3 loops → 2 loops (fused max+sum, division)
 2. **Better memory locality**: Input data is read only twice instead of three times
-3. **Lower latency**: One fewer synchronization point between reduction phases
-4. **Same numerical stability**: Uses the online softmax algorithm which maintains stability through incremental rescaling
-
-### Trade-offs
-
-- **Increased per-iteration complexity**: Each iteration of the fused loop performs more operations (max update, sum rescaling, correction factors)
-- **More register pressure**: Need to carry both `global_max` and `global_sum_buffer` across iterations
-- **Additional exp operations**: Computing correction factors requires exp(old_max - new_max) and exp(local_max - new_max) per tile
 
 This optimization is particularly valuable for GPU implementations where memory bandwidth is the bottleneck, as reducing the number of passes over the input data can significantly improve performance despite the increased computational complexity per iteration.

From 8637269866ad578fb68bb9611b9e6cf80ec836af Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 04:54:26 +0000
Subject: [PATCH 45/51] add attention doc

---
 .../fused_attention_tiling.md                 | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 reduction_tiling_docs/fused_attention_tiling.md

diff --git a/reduction_tiling_docs/fused_attention_tiling.md b/reduction_tiling_docs/fused_attention_tiling.md
new file mode 100644
index 00000000..445b692a
--- /dev/null
+++ b/reduction_tiling_docs/fused_attention_tiling.md
@@ -0,0 +1,53 @@
+## Attention Tiling
+
+# Linalg level implementation
+
+Input sizes:
+- Q: 4096x64
+- K: 4096x64
+- V: 4096x64
+
+```
+func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+  ...
+  // Transpose K
+  %k_transpose = linalg.transpose ... -> tensor<64x4096xf32>
+
+  // QK^T
+  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<4096x64xf32>, tensor<64x4096xf32>)
+                       outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+
+  // Fill with -inf
+  %t_minf = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_N : tensor<4096xf32>) -> tensor<4096xf32>
+
+  // Max reduce along rows
+  %max = linalg.reduce ins(%QKT : tensor<4096x4096xf32>) ... %m = arith.maximumf %in, %init : f32 -> tensor<4096xf32>
+
+  // Broadcast max
+  %maxb = linalg.broadcast ins(%max: tensor<4096xf32>) outs(%empty_NxN : tensor<4096x4096xf32>) dimensions = [1] -> tensor<4096x4096xf32>
+
+  // Subtract
+  %sub = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>} ... -> tensor<4096x4096xf32>
+
+  // Exp
+  %exp = linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ... -> tensor<4096x4096xf32>
+
+  // Fill with zeros
+  %t_zeros = linalg.fill ins(%c0f : f32) outs(%empty_N : tensor<4096xf32>) -> tensor<4096xf32>
+
+  // Sum reduce along rows
+  %sum = linalg.reduce ... %s = arith.addf %in, %init : f32 ... -> tensor<4096xf32>
+
+  // Broadcast sum and div
+  %sums = linalg.broadcast ... -> tensor<4096x4096xf32>
+  %p = linalg.elemwise_binary {fun = #linalg.binary_fn<div>}
+       ins(%exp, %sums : tensor<4096x4096xf32>, tensor<4096x4096xf32>) ... -> tensor<4096x4096xf32>
+
+  // Final matmul
+  %o = linalg.matmul ins(%p, %v : tensor<4096x4096xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+  ...
+}
+```
+
+# Stage 1: Tile the last matmul in K dim.

From 037193b2ebf52dc28a0fd4f173d97116c26a7ad5 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 05:13:46 +0000
Subject: [PATCH 46/51] add optimization note

---
 .../fused_attention_tiling.md                 | 193 +++++++++++++++++-
 1 file changed, 190 insertions(+), 3 deletions(-)

diff --git a/reduction_tiling_docs/fused_attention_tiling.md b/reduction_tiling_docs/fused_attention_tiling.md
index 445b692a..8ac7657e 100644
--- a/reduction_tiling_docs/fused_attention_tiling.md
+++ b/reduction_tiling_docs/fused_attention_tiling.md
@@ -1,6 +1,6 @@
-## Attention Tiling
+# Attention Tiling
 
-# Linalg level implementation
+## Linalg level implementation
 
 Input sizes:
 - Q: 4096x64
@@ -50,4 +50,191 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 }
 ```
 
-# Stage 1: Tile the last matmul in K dim.
+---
+
+## Stage 1: Tile the last matmul in K dim (tile size = 16)
+
+After tiling the final matmul `%o = linalg.matmul ins(%p, %v)` along the K dimension with tile size 16:
+
+```
+func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+  ...
+  // Compute p = Softmax(Q @ K^T)
+  // ...
+
+  // Final matmul TILED in K dimension (4096 / 16 = 256 tiles)
+  // Loop over K dimension: k = 0 to 4096 step 16
+  %c0 = arith.constant 0 : index
+  %c4096 = arith.constant 4096 : index
+  %c16 = arith.constant 16 : index
+
+  // Initialize output with zeros: 4096x64
+  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+
+  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<4096x64xf32>) {
+    // Extract slice from %p: 4096x16 (from columns [k:k+16])
+    %p_slice = tensor.extract_slice %p[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+
+    // Extract slice from %v: 16x64 (from rows [k:k+16])
+    %v_slice = tensor.extract_slice %v[%k, 0][16, 64][1, 1] -> tensor<16x64xf32>
+
+    // Partial matmul: (4096x16) @ (16x64) -> 4096x64
+    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<4096x16xf32>, tensor<16x64xf32>)
+                             outs(%empty_partial : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+
+    // Accumulate: 4096x64 + 4096x64 -> 4096x64
+    %o_new = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+             ins(%o_acc, %partial : tensor<4096x64xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+
+    scf.yield %o_new : tensor<4096x64xf32>
+  }
+  ...
+}
+```
+
+## Stage 2: Tile and fuse the softmax computation (tile size = 16)
+
+After tiling the softmax computation in the reduction dimension with tile size 16 and fusing operations, following the pattern from the softmax lowering flow:
+
+```
+func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+  ...
+  // === First matmul: Q @ K^T ===
+  // Transpose K: 4096x64 -> 64x4096
+  %k_transpose = linalg.transpose ... -> tensor<64x4096xf32>
+
+  // QK^T: (4096x64) @ (64x4096) -> 4096x4096
+  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<4096x64xf32>, tensor<64x4096xf32>)
+                       outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+
+
+  // === Tiled and fused softmax computation ===
+  // Tile size = 16, number of tiles = 4096 / 16 = 256
+  %c0 = arith.constant 0 : index
+  %c4096 = arith.constant 4096 : index
+  %c16 = arith.constant 16 : index
+
+  // Initialize max buffer with -inf: 4096x16
+  %max_buffer_init = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_max_buf : tensor<4096x16xf32>) -> tensor<4096x16xf32>
+
+  // Loop 1: Max reduction (4096 / 16 = 256 iterations)
+  %max_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%max_acc = %max_buffer_init) -> (tensor<4096x16xf32>) {
+    // Extract slice from QKT: 4096x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+
+    // Max accumulation: 4096x16
+    %max_new = linalg.generic {iterator_types = ["parallel", "parallel"]}
+               ins(%QKT_slice : tensor<4096x16xf32>) outs(%max_acc : tensor<4096x16xf32>) {
+      ^bb0(%in: f32, %out: f32):
+        %max_val = arith.maxnumf %in, %out : f32
+        linalg.yield %max_val : f32
+    } -> tensor<4096x16xf32>
+
+    scf.yield %max_new : tensor<4096x16xf32>
+  }
+
+  // Final max reduction: 4096x16 -> 4096
+  %max = linalg.reduce ins(%max_buffer : tensor<4096x16xf32>) outs(%empty_N : tensor<4096xf32>) dimensions = [1] {
+    (%in: f32, %init: f32) {
+      %m = arith.maxnumf %in, %init : f32
+      linalg.yield %m : f32
+    }
+  } -> tensor<4096xf32>
+
+
+  // Initialize sum buffer with zeros: 4096x16
+  %sum_buffer_init = linalg.fill ins(%c0f : f32) outs(%empty_sum_buf : tensor<4096x16xf32>) -> tensor<4096x16xf32>
+
+  // Loop 2: Sum reduction with fused center+exp (256 iterations)
+  %sum_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%sum_acc = %sum_buffer_init) -> (tensor<4096x16xf32>) {
+    // Extract slice from QKT: 4096x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+
+    // Fused center+exp: 4096x16
+    %exp_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
+                 ins(%QKT_slice, %max : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+      ^bb0(%in: f32, %max_val: f32, %out: f32):
+        %centered = arith.subf %in, %max_val : f32
+        %exp_val = math.exp %centered : f32
+        linalg.yield %exp_val : f32
+    } -> tensor<4096x16xf32>
+
+    // Sum accumulation: 4096x16
+    %sum_new = linalg.generic {iterator_types = ["parallel", "parallel"]}
+               ins(%exp_slice : tensor<4096x16xf32>) outs(%sum_acc : tensor<4096x16xf32>) {
+      ^bb0(%in: f32, %out: f32):
+        %sum_val = arith.addf %in, %out : f32
+        linalg.yield %sum_val : f32
+    } -> tensor<4096x16xf32>
+
+    scf.yield %sum_new : tensor<4096x16xf32>
+  }
+
+  // Final sum reduction: 4096x16 -> 4096
+  %sum = linalg.reduce ins(%sum_buffer : tensor<4096x16xf32>) outs(%empty_N : tensor<4096xf32>) dimensions = [1] {
+    (%in: f32, %init: f32) {
+      %s = arith.addf %in, %init : f32
+      linalg.yield %s : f32
+    }
+  } -> tensor<4096xf32>
+
+
+  // Initialize output buffer for softmax: 4096x4096
+  %p_init = linalg.fill ins(%c0f : f32) outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+
+  // Loop 3: Division with fused center+exp+div (256 iterations)
+  %p = scf.for %k = %c0 to %c4096 step %c16 iter_args(%p_acc = %p_init) -> (tensor<4096x4096xf32>) {
+    // Extract slice from QKT: 4096x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+
+    // Fused center+exp: 4096x16
+    %exp_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
+                 ins(%QKT_slice, %max : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+      ^bb0(%in: f32, %max_val: f32, %out: f32):
+        %centered = arith.subf %in, %max_val : f32
+        %exp_val = math.exp %centered : f32
+        linalg.yield %exp_val : f32
+    } -> tensor<4096x16xf32>
+
+    // Division: 4096x16
+    %p_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
+               ins(%exp_slice, %sum : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+      ^bb0(%exp_val: f32, %sum_val: f32, %out: f32):
+        %result = arith.divf %exp_val, %sum_val : f32
+        linalg.yield %result : f32
+    } -> tensor<4096x16xf32>
+
+    // Insert slice back: 4096x16 -> 4096x4096
+    %p_new = tensor.insert_slice %p_slice into %p_acc[0, %k][4096, 16][1, 1] -> tensor<4096x4096xf32>
+
+    scf.yield %p_new : tensor<4096x4096xf32>
+  }
+  // Result: %p contains softmax(Q @ K^T) with shape 4096x4096
+
+
+  // === Final matmul TILED in K dimension (256 iterations) ===
+  // Initialize output with zeros: 4096x64
+  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+
+  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<4096x64xf32>) {
+    // Extract slice from %p: 4096x16
+    %p_slice = tensor.extract_slice %p[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+
+    // Extract slice from %v: 16x64
+    %v_slice = tensor.extract_slice %v[%k, 0][16, 64][1, 1] -> tensor<16x64xf32>
+
+    // Partial matmul: (4096x16) @ (16x64) -> 4096x64
+    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<4096x16xf32>, tensor<16x64xf32>)
+                             outs(%empty_partial : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+
+    // Accumulate: 4096x64 + 4096x64 -> 4096x64
+    %o_new = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+             ins(%o_acc, %partial : tensor<4096x64xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+
+    scf.yield %o_new : tensor<4096x64xf32>
+  }
+  ...
+}
+```

From 32133cf387feab4bf23d62781a3f23354900bf53 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 05:16:10 +0000
Subject: [PATCH 47/51] add optimization note

---
 reduction_tiling_docs/fused_attention_tiling.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/reduction_tiling_docs/fused_attention_tiling.md b/reduction_tiling_docs/fused_attention_tiling.md
index 8ac7657e..f392f6fe 100644
--- a/reduction_tiling_docs/fused_attention_tiling.md
+++ b/reduction_tiling_docs/fused_attention_tiling.md
@@ -7,7 +7,7 @@ Input sizes:
 - K: 4096x64
 - V: 4096x64
 
-```
+```mlir
 func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 %V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
   ...
@@ -56,7 +56,7 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 
 After tiling the final matmul `%o = linalg.matmul ins(%p, %v)` along the K dimension with tile size 16:
 
-```
+```mlir
 func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 %V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
   ...
@@ -97,7 +97,7 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 
 After tiling the softmax computation in the reduction dimension with tile size 16 and fusing operations, following the pattern from the softmax lowering flow:
 
-```
+```mlir
 func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 %V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
   ...

From 3583e8133e5e2eca216434ded2d21f0608aced14 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 14:00:39 +0000
Subject: [PATCH 48/51] add attention doc

---
 .../fused_attention_tiling.md                 | 172 +++++++++---------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/reduction_tiling_docs/fused_attention_tiling.md b/reduction_tiling_docs/fused_attention_tiling.md
index f392f6fe..24375a16 100644
--- a/reduction_tiling_docs/fused_attention_tiling.md
+++ b/reduction_tiling_docs/fused_attention_tiling.md
@@ -3,49 +3,49 @@
 ## Linalg level implementation
 
 Input sizes:
-- Q: 4096x64
+- Q: 64x64
 - K: 4096x64
 - V: 4096x64
 
 ```mlir
-func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
-%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+func.func @attention(%Q : memref<64x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<64x64xf32>) {
   ...
   // Transpose K
   %k_transpose = linalg.transpose ... -> tensor<64x4096xf32>
 
   // QK^T
-  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<4096x64xf32>, tensor<64x4096xf32>)
-                       outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<64x64xf32>, tensor<64x4096xf32>)
+                       outs(%empty_NxN : tensor<64x4096xf32>) -> tensor<64x4096xf32>
 
   // Fill with -inf
-  %t_minf = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_N : tensor<4096xf32>) -> tensor<4096xf32>
+  %t_minf = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_N : tensor<64xf32>) -> tensor<64xf32>
 
   // Max reduce along rows
-  %max = linalg.reduce ins(%QKT : tensor<4096x4096xf32>) ... %m = arith.maximumf %in, %init : f32 -> tensor<4096xf32>
+  %max = linalg.reduce ins(%QKT : tensor<64x4096xf32>) ... %m = arith.maximumf %in, %init : f32 -> tensor<64xf32>
 
   // Broadcast max
-  %maxb = linalg.broadcast ins(%max: tensor<4096xf32>) outs(%empty_NxN : tensor<4096x4096xf32>) dimensions = [1] -> tensor<4096x4096xf32>
+  %maxb = linalg.broadcast ins(%max: tensor<64xf32>) outs(%empty_NxN : tensor<64x4096xf32>) dimensions = [1] -> tensor<64x4096xf32>
 
   // Subtract
-  %sub = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>} ... -> tensor<4096x4096xf32>
+  %sub = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>} ... -> tensor<64x4096xf32>
 
   // Exp
-  %exp = linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ... -> tensor<4096x4096xf32>
+  %exp = linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ... -> tensor<64x4096xf32>
 
   // Fill with zeros
-  %t_zeros = linalg.fill ins(%c0f : f32) outs(%empty_N : tensor<4096xf32>) -> tensor<4096xf32>
+  %t_zeros = linalg.fill ins(%c0f : f32) outs(%empty_N : tensor<64xf32>) -> tensor<64xf32>
 
   // Sum reduce along rows
-  %sum = linalg.reduce ... %s = arith.addf %in, %init : f32 ... -> tensor<4096xf32>
+  %sum = linalg.reduce ... %s = arith.addf %in, %init : f32 ... -> tensor<64xf32>
 
   // Broadcast sum and div
-  %sums = linalg.broadcast ... -> tensor<4096x4096xf32>
+  %sums = linalg.broadcast ... -> tensor<64x4096xf32>
   %p = linalg.elemwise_binary {fun = #linalg.binary_fn<div>}
-       ins(%exp, %sums : tensor<4096x4096xf32>, tensor<4096x4096xf32>) ... -> tensor<4096x4096xf32>
+       ins(%exp, %sums : tensor<64x4096xf32>, tensor<64x4096xf32>) ... -> tensor<64x4096xf32>
 
   // Final matmul
-  %o = linalg.matmul ins(%p, %v : tensor<4096x4096xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+  %o = linalg.matmul ins(%p, %v : tensor<64x4096xf32>, tensor<4096x64xf32>) ... -> tensor<64x64xf32>
   ...
 }
 ```
@@ -57,8 +57,8 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 After tiling the final matmul `%o = linalg.matmul ins(%p, %v)` along the K dimension with tile size 16:
 
 ```mlir
-func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
-%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+func.func @attention(%Q : memref<64x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<64x64xf32>) {
   ...
   // Compute p = Softmax(Q @ K^T)
   // ...
@@ -69,25 +69,25 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
   %c4096 = arith.constant 4096 : index
   %c16 = arith.constant 16 : index
 
-  // Initialize output with zeros: 4096x64
-  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+  // Initialize output with zeros: 64x64
+  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<64x64xf32>) -> tensor<64x64xf32>
 
-  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<4096x64xf32>) {
-    // Extract slice from %p: 4096x16 (from columns [k:k+16])
-    %p_slice = tensor.extract_slice %p[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<64x64xf32>) {
+    // Extract slice from %p: 64x16 (from columns [k:k+16])
+    %p_slice = tensor.extract_slice %p[0, %k][64, 16][1, 1] -> tensor<64x16xf32>
 
     // Extract slice from %v: 16x64 (from rows [k:k+16])
     %v_slice = tensor.extract_slice %v[%k, 0][16, 64][1, 1] -> tensor<16x64xf32>
 
-    // Partial matmul: (4096x16) @ (16x64) -> 4096x64
-    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<4096x16xf32>, tensor<16x64xf32>)
-                             outs(%empty_partial : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+    // Partial matmul: (64x16) @ (16x64) -> 64x64
+    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<64x16xf32>, tensor<16x64xf32>)
+                             outs(%empty_partial : tensor<64x64xf32>) -> tensor<64x64xf32>
 
-    // Accumulate: 4096x64 + 4096x64 -> 4096x64
+    // Accumulate: 64x64 + 64x64 -> 64x64
     %o_new = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-             ins(%o_acc, %partial : tensor<4096x64xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+             ins(%o_acc, %partial : tensor<64x64xf32>, tensor<64x64xf32>) ... -> tensor<64x64xf32>
 
-    scf.yield %o_new : tensor<4096x64xf32>
+    scf.yield %o_new : tensor<64x64xf32>
   }
   ...
 }
@@ -98,16 +98,16 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
 After tiling the softmax computation in the reduction dimension with tile size 16 and fusing operations, following the pattern from the softmax lowering flow:
 
 ```mlir
-func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
-%V: memref<4096x64xf32>, %out: memref<4096x64xf32>) {
+func.func @attention(%Q : memref<64x64xf32>, %K: memref<4096x64xf32>,
+%V: memref<4096x64xf32>, %out: memref<64x64xf32>) {
   ...
   // === First matmul: Q @ K^T ===
   // Transpose K: 4096x64 -> 64x4096
   %k_transpose = linalg.transpose ... -> tensor<64x4096xf32>
 
-  // QK^T: (4096x64) @ (64x4096) -> 4096x4096
-  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<4096x64xf32>, tensor<64x4096xf32>)
-                       outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+  // QK^T: (64x64) @ (64x4096) -> 64x4096
+  %QKT = linalg.matmul ins(%q, %k_transpose : tensor<64x64xf32>, tensor<64x4096xf32>)
+                       outs(%empty_NxN : tensor<64x4096xf32>) -> tensor<64x4096xf32>
 
 
   // === Tiled and fused softmax computation ===
@@ -116,124 +116,124 @@ func.func @attention(%Q : memref<4096x64xf32>, %K: memref<4096x64xf32>,
   %c4096 = arith.constant 4096 : index
   %c16 = arith.constant 16 : index
 
-  // Initialize max buffer with -inf: 4096x16
-  %max_buffer_init = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_max_buf : tensor<4096x16xf32>) -> tensor<4096x16xf32>
+  // Initialize max buffer with -inf: 64x16
+  %max_buffer_init = linalg.fill ins(%cst_minus_inf : f32) outs(%empty_max_buf : tensor<64x16xf32>) -> tensor<64x16xf32>
 
   // Loop 1: Max reduction (4096 / 16 = 256 iterations)
-  %max_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%max_acc = %max_buffer_init) -> (tensor<4096x16xf32>) {
-    // Extract slice from QKT: 4096x16
-    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+  %max_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%max_acc = %max_buffer_init) -> (tensor<64x16xf32>) {
+    // Extract slice from QKT: 64x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][64, 16][1, 1] -> tensor<64x16xf32>
 
-    // Max accumulation: 4096x16
+    // Max accumulation: 64x16
     %max_new = linalg.generic {iterator_types = ["parallel", "parallel"]}
-               ins(%QKT_slice : tensor<4096x16xf32>) outs(%max_acc : tensor<4096x16xf32>) {
+               ins(%QKT_slice : tensor<64x16xf32>) outs(%max_acc : tensor<64x16xf32>) {
       ^bb0(%in: f32, %out: f32):
         %max_val = arith.maxnumf %in, %out : f32
         linalg.yield %max_val : f32
-    } -> tensor<4096x16xf32>
+    } -> tensor<64x16xf32>
 
-    scf.yield %max_new : tensor<4096x16xf32>
+    scf.yield %max_new : tensor<64x16xf32>
   }
 
-  // Final max reduction: 4096x16 -> 4096
-  %max = linalg.reduce ins(%max_buffer : tensor<4096x16xf32>) outs(%empty_N : tensor<4096xf32>) dimensions = [1] {
+  // Final max reduction: 64x16 -> 64
+  %max = linalg.reduce ins(%max_buffer : tensor<64x16xf32>) outs(%empty_N : tensor<64xf32>) dimensions = [1] {
     (%in: f32, %init: f32) {
       %m = arith.maxnumf %in, %init : f32
       linalg.yield %m : f32
     }
-  } -> tensor<4096xf32>
+  } -> tensor<64xf32>
 
 
-  // Initialize sum buffer with zeros: 4096x16
-  %sum_buffer_init = linalg.fill ins(%c0f : f32) outs(%empty_sum_buf : tensor<4096x16xf32>) -> tensor<4096x16xf32>
+  // Initialize sum buffer with zeros: 64x16
+  %sum_buffer_init = linalg.fill ins(%c0f : f32) outs(%empty_sum_buf : tensor<64x16xf32>) -> tensor<64x16xf32>
 
   // Loop 2: Sum reduction with fused center+exp (256 iterations)
-  %sum_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%sum_acc = %sum_buffer_init) -> (tensor<4096x16xf32>) {
-    // Extract slice from QKT: 4096x16
-    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+  %sum_buffer = scf.for %k = %c0 to %c4096 step %c16 iter_args(%sum_acc = %sum_buffer_init) -> (tensor<64x16xf32>) {
+    // Extract slice from QKT: 64x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][64, 16][1, 1] -> tensor<64x16xf32>
 
-    // Fused center+exp: 4096x16
+    // Fused center+exp: 64x16
     %exp_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
-                 ins(%QKT_slice, %max : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+                 ins(%QKT_slice, %max : tensor<64x16xf32>, tensor<64xf32>) outs(%empty_slice : tensor<64x16xf32>) {
       ^bb0(%in: f32, %max_val: f32, %out: f32):
         %centered = arith.subf %in, %max_val : f32
         %exp_val = math.exp %centered : f32
         linalg.yield %exp_val : f32
-    } -> tensor<4096x16xf32>
+    } -> tensor<64x16xf32>
 
-    // Sum accumulation: 4096x16
+    // Sum accumulation: 64x16
     %sum_new = linalg.generic {iterator_types = ["parallel", "parallel"]}
-               ins(%exp_slice : tensor<4096x16xf32>) outs(%sum_acc : tensor<4096x16xf32>) {
+               ins(%exp_slice : tensor<64x16xf32>) outs(%sum_acc : tensor<64x16xf32>) {
       ^bb0(%in: f32, %out: f32):
         %sum_val = arith.addf %in, %out : f32
         linalg.yield %sum_val : f32
-    } -> tensor<4096x16xf32>
+    } -> tensor<64x16xf32>
 
-    scf.yield %sum_new : tensor<4096x16xf32>
+    scf.yield %sum_new : tensor<64x16xf32>
   }
 
-  // Final sum reduction: 4096x16 -> 4096
-  %sum = linalg.reduce ins(%sum_buffer : tensor<4096x16xf32>) outs(%empty_N : tensor<4096xf32>) dimensions = [1] {
+  // Final sum reduction: 64x16 -> 64
+  %sum = linalg.reduce ins(%sum_buffer : tensor<64x16xf32>) outs(%empty_N : tensor<64xf32>) dimensions = [1] {
     (%in: f32, %init: f32) {
       %s = arith.addf %in, %init : f32
       linalg.yield %s : f32
     }
-  } -> tensor<4096xf32>
+  } -> tensor<64xf32>
 
 
-  // Initialize output buffer for softmax: 4096x4096
-  %p_init = linalg.fill ins(%c0f : f32) outs(%empty_NxN : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
+  // Initialize output buffer for softmax: 64x4096
+  %p_init = linalg.fill ins(%c0f : f32) outs(%empty_NxN : tensor<64x4096xf32>) -> tensor<64x4096xf32>
 
   // Loop 3: Division with fused center+exp+div (256 iterations)
-  %p = scf.for %k = %c0 to %c4096 step %c16 iter_args(%p_acc = %p_init) -> (tensor<4096x4096xf32>) {
-    // Extract slice from QKT: 4096x16
-    %QKT_slice = tensor.extract_slice %QKT[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+  %p = scf.for %k = %c0 to %c4096 step %c16 iter_args(%p_acc = %p_init) -> (tensor<64x4096xf32>) {
+    // Extract slice from QKT: 64x16
+    %QKT_slice = tensor.extract_slice %QKT[0, %k][64, 16][1, 1] -> tensor<64x16xf32>
 
-    // Fused center+exp: 4096x16
+    // Fused center+exp: 64x16
     %exp_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
-                 ins(%QKT_slice, %max : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+                 ins(%QKT_slice, %max : tensor<64x16xf32>, tensor<64xf32>) outs(%empty_slice : tensor<64x16xf32>) {
       ^bb0(%in: f32, %max_val: f32, %out: f32):
         %centered = arith.subf %in, %max_val : f32
         %exp_val = math.exp %centered : f32
         linalg.yield %exp_val : f32
-    } -> tensor<4096x16xf32>
+    } -> tensor<64x16xf32>
 
-    // Division: 4096x16
+    // Division: 64x16
     %p_slice = linalg.generic {iterator_types = ["parallel", "parallel"]}
-               ins(%exp_slice, %sum : tensor<4096x16xf32>, tensor<4096xf32>) outs(%empty_slice : tensor<4096x16xf32>) {
+               ins(%exp_slice, %sum : tensor<64x16xf32>, tensor<64xf32>) outs(%empty_slice : tensor<64x16xf32>) {
       ^bb0(%exp_val: f32, %sum_val: f32, %out: f32):
         %result = arith.divf %exp_val, %sum_val : f32
         linalg.yield %result : f32
-    } -> tensor<4096x16xf32>
+    } -> tensor<64x16xf32>
 
-    // Insert slice back: 4096x16 -> 4096x4096
-    %p_new = tensor.insert_slice %p_slice into %p_acc[0, %k][4096, 16][1, 1] -> tensor<4096x4096xf32>
+    // Insert slice back: 64x16 -> 64x4096
+    %p_new = tensor.insert_slice %p_slice into %p_acc[0, %k][64, 16][1, 1] -> tensor<64x4096xf32>
 
-    scf.yield %p_new : tensor<4096x4096xf32>
+    scf.yield %p_new : tensor<64x4096xf32>
   }
-  // Result: %p contains softmax(Q @ K^T) with shape 4096x4096
+  // Result: %p contains softmax(Q @ K^T) with shape 64x4096
 
 
   // === Final matmul TILED in K dimension (256 iterations) ===
-  // Initialize output with zeros: 4096x64
-  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+  // Initialize output with zeros: 64x64
+  %o_init = linalg.fill ins(%c0f : f32) outs(%empty_out : tensor<64x64xf32>) -> tensor<64x64xf32>
 
-  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<4096x64xf32>) {
-    // Extract slice from %p: 4096x16
-    %p_slice = tensor.extract_slice %p[0, %k][4096, 16][1, 1] -> tensor<4096x16xf32>
+  %o = scf.for %k = %c0 to %c4096 step %c16 iter_args(%o_acc = %o_init) -> (tensor<64x64xf32>) {
+    // Extract slice from %p: 64x16
+    %p_slice = tensor.extract_slice %p[0, %k][64, 16][1, 1] -> tensor<64x16xf32>
 
     // Extract slice from %v: 16x64
     %v_slice = tensor.extract_slice %v[%k, 0][16, 64][1, 1] -> tensor<16x64xf32>
 
-    // Partial matmul: (4096x16) @ (16x64) -> 4096x64
-    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<4096x16xf32>, tensor<16x64xf32>)
-                             outs(%empty_partial : tensor<4096x64xf32>) -> tensor<4096x64xf32>
+    // Partial matmul: (64x16) @ (16x64) -> 64x64
+    %partial = linalg.matmul ins(%p_slice, %v_slice : tensor<64x16xf32>, tensor<16x64xf32>)
+                             outs(%empty_partial : tensor<64x64xf32>) -> tensor<64x64xf32>
 
-    // Accumulate: 4096x64 + 4096x64 -> 4096x64
+    // Accumulate: 64x64 + 64x64 -> 64x64
     %o_new = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-             ins(%o_acc, %partial : tensor<4096x64xf32>, tensor<4096x64xf32>) ... -> tensor<4096x64xf32>
+             ins(%o_acc, %partial : tensor<64x64xf32>, tensor<64x64xf32>) ... -> tensor<64x64xf32>
 
-    scf.yield %o_new : tensor<4096x64xf32>
+    scf.yield %o_new : tensor<64x64xf32>
   }
   ...
 }

From 06cf4f2ee2958a457332c06bec75a87613bbfcc8 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 21:26:59 +0000
Subject: [PATCH 49/51] save work

---
 examples/xegpu/fused_attention.py             |   4 +-
 .../mlir_gen/gpu_fused_attention_payload.py   |  98 +++----
 .../xegpu/fused_attention_schedule.py         | 243 +++++++++---------
 3 files changed, 179 insertions(+), 166 deletions(-)

diff --git a/examples/xegpu/fused_attention.py b/examples/xegpu/fused_attention.py
index da61b3f7..2bf59882 100644
--- a/examples/xegpu/fused_attention.py
+++ b/examples/xegpu/fused_attention.py
@@ -193,13 +193,13 @@ def parse_cli():
     parser.add_argument(
         "--batch-size",
         type=int,
-        default=2,
+        default=1,
         help="Batch size (Z)",
     )
     parser.add_argument(
         "--num-heads",
         type=int,
-        default=8,
+        default=1,
         help="Number of attention heads (H)",
     )
     parser.add_argument(
diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
index c2f3d4ec..07bc3665 100644
--- a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -49,81 +49,81 @@ def payload(output, Q_arg, K_arg, V_arg):
             K_tensor = emit_buf_to_tensor(K_arg, restrict=True)
             V_tensor = emit_buf_to_tensor(V_arg, restrict=True)
 
-            # Collapse first 2 dimensions (Z, H) into a batch dimension
-            # From (Z, H, n_ctx, n_head) to (Z*H, n_ctx, n_head)
-            batch_dim = Z * H
-            collapsed_shape_3d = (batch_dim, n_ctx, n_head)
+            # Collapse first 3 dimensions (Z, H, n_ctx) into a single batch dimension
+            # From (Z, H, n_ctx, n_head) to (Z*H*n_ctx, n_head)
+            batch_dim = Z * H * n_ctx
+            collapsed_shape_2d = (batch_dim, n_head)
 
-            Q_3d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
+            Q_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
                 Q_tensor,
-                reassociation=[[0, 1], [2], [3]],
+                reassociation=[[0, 1, 2], [3]],
             )
-            K_3d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
+            K_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
                 K_tensor,
-                reassociation=[[0, 1], [2], [3]],
+                reassociation=[[0, 1, 2], [3]],
             )
-            V_3d = tensor.collapse_shape(
-                ir.RankedTensorType.get(collapsed_shape_3d, dtype),
+            V_2d = tensor.collapse_shape(
+                ir.RankedTensorType.get(collapsed_shape_2d, dtype),
                 V_tensor,
-                reassociation=[[0, 1], [2], [3]],
+                reassociation=[[0, 1, 2], [3]],
             )
 
             # Step 1: Transpose K to get K^T
-            # Permute from (batch_dim, n_ctx, n_head) to (batch_dim, n_head, n_ctx)
-            kt_shape_3d = (batch_dim, n_head, n_ctx)
-            kt_init = tensor.empty(kt_shape_3d, dtype)
-            K_transposed = linalg.transpose(K_3d, outs=[kt_init], permutation=[0, 2, 1])
-
-            # Step 2: Compute Q @ K^T using batch_matmul
-            # Q: (batch_dim, n_ctx, n_head) @ K^T: (batch_dim, n_head, n_ctx)
-            # Result: (batch_dim, n_ctx, n_ctx)
-            qkt_shape_3d = (batch_dim, n_ctx, n_ctx)
-            qkt_init = tensor.empty(qkt_shape_3d, dtype)
+            # Transpose from (batch_dim, n_head) to (n_head, batch_dim)
+            kt_shape_2d = (n_head, batch_dim)
+            kt_init = tensor.empty(kt_shape_2d, dtype)
+            K_transposed = linalg.transpose(K_2d, outs=[kt_init], permutation=[1, 0])
+
+            # Step 2: Compute Q @ K^T using matmul
+            # Q: (batch_dim, n_head) @ K^T: (n_head, batch_dim)
+            # Result: (batch_dim, batch_dim)
+            qkt_shape_2d = (batch_dim, batch_dim)
+            qkt_init = tensor.empty(qkt_shape_2d, dtype)
             # Initialize with zeros for matmul accumulation
             zero = arith.constant(dtype, 0.0)
             qkt_init_filled = linalg.fill(zero, outs=[qkt_init])
 
-            # Batch matmul: Q @ K^T
-            qkt = linalg.batch_matmul(Q_3d, K_transposed, outs=[qkt_init_filled])
+            # Matmul: Q @ K^T
+            qkt = linalg.matmul(Q_2d, K_transposed, outs=[qkt_init_filled])
 
-            # Step 3: Scale by 1/sqrt(n_head)
-            scale_factor = 1.0 / math.sqrt(n_head)
-            scale_const = arith.constant(dtype, scale_factor)
+            # # Step 3: Scale by 1/sqrt(n_head)
+            # scale_factor = 1.0 / math.sqrt(n_head)
+            # scale_const = arith.constant(dtype, scale_factor)
 
-            # Create a tensor filled with the scale factor
-            scale_tensor_init = tensor.empty(qkt_shape_3d, dtype)
-            scale_tensor = linalg.fill(scale_const, outs=[scale_tensor_init])
+            # # Create a tensor filled with the scale factor
+            # scale_tensor_init = tensor.empty(qkt_shape_2d, dtype)
+            # scale_tensor = linalg.fill(scale_const, outs=[scale_tensor_init])
 
-            # Elementwise multiply qkt with scale tensor
-            scaled_qkt_init = tensor.empty(qkt_shape_3d, dtype)
-            scaled_qkt = linalg.mul(qkt, scale_tensor, outs=[scaled_qkt_init])
+            # # Elementwise multiply qkt with scale tensor
+            # scaled_qkt_init = tensor.empty(qkt_shape_2d, dtype)
+            # scaled_qkt = linalg.mul(qkt, scale_tensor, outs=[scaled_qkt_init])
 
-            # Step 4: Apply softmax along the last dimension (dim=2 in 3D)
-            softmax_init = tensor.empty(qkt_shape_3d, dtype)
+            # Step 4: Apply softmax along the last dimension (dim=1 in 2D)
+            softmax_init = tensor.empty(qkt_shape_2d, dtype)
             attention_weights = linalg.softmax(
-                result=[ir.RankedTensorType.get(qkt_shape_3d, dtype)],
-                input=scaled_qkt,
+                result=[ir.RankedTensorType.get(qkt_shape_2d, dtype)],
+                input=qkt,
                 output=softmax_init,
-                dimension=2,
+                dimension=1,
             )
 
-            # Step 5: Multiply attention weights by V using batch_matmul
-            # attention_weights: (batch_dim, n_ctx, n_ctx) @ V: (batch_dim, n_ctx, n_head)
-            # Result: (batch_dim, n_ctx, n_head)
-            output_3d_init = tensor.empty(collapsed_shape_3d, dtype)
-            output_3d_init_filled = linalg.fill(zero, outs=[output_3d_init])
+            # Step 5: Multiply attention weights by V using matmul
+            # attention_weights: (batch_dim, batch_dim) @ V: (batch_dim, n_head)
+            # Result: (batch_dim, n_head)
+            output_2d_init = tensor.empty(collapsed_shape_2d, dtype)
+            output_2d_init_filled = linalg.fill(zero, outs=[output_2d_init])
 
-            result_3d = linalg.batch_matmul(
-                attention_weights, V_3d, outs=[output_3d_init_filled]
+            result_2d = linalg.matmul(
+                attention_weights, V_2d, outs=[output_2d_init_filled]
             )
 
-            # Expand back to 4D: (Z*H, n_ctx, n_head) -> (Z, H, n_ctx, n_head)
+            # Expand back to 4D: (Z*H*n_ctx, n_head) -> (Z, H, n_ctx, n_head)
             result = tensor.expand_shape(
                 ir.RankedTensorType.get(shape, dtype),
-                result_3d,
-                reassociation=[[0, 1], [2], [3]],
+                result_2d,
+                reassociation=[[0, 1, 2], [3]],
                 output_shape=[],
                 static_output_shape=shape,
             )
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
index 6ea15081..b629cdfb 100644
--- a/lighthouse/schedule/xegpu/fused_attention_schedule.py
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -110,133 +110,146 @@ def bundle_xegpu_fused_attention_schedule(
 
     anytype = transform.AnyOpType.get()
     anyvalue = transform.AnyValueType.get()
-    # Match all matmul operations - there should be 2:
-    # 1. Q @ K^T
-    # 2. attention_weights @ V
-    matmul_ops = match_and_split(mod, ops={"linalg.matmul"}, nhandles=2)
+    # # Match all matmul operations - there should be 2:
+    # # 1. Q @ K^T
+    # # 2. attention_weights @ V
+    # matmul_ops = match_and_split(mod, ops={"linalg.batch_matmul"}, nhandles=2)
+
+    # # Get the last matmul (attention_weights @ V)
+    # last_matmul = matmul_ops[1]
+    # func = transform.get_parent_op(
+    #     anytype,
+    #     last_matmul,
+    #     op_name="func.func",
+    #     deduplicate=True,
+    # )
+
+    # # Tile the last matmul in the batch dimension using tile_using_forall
+    # # Batch dimension is the first dimension (collapsed_dim = Z * H * n_ctx)
+    # # Extract workgroup tile size from parameters
+    # wg_tile_size = parameters["wg_tile_size"]
+
+    # tiled_matmul, forall_loop = structured.structured_tile_using_forall(
+    #     anytype,
+    #     anytype,
+    #     last_matmul,
+    #     num_threads=[],
+    #     tile_sizes=[],
+    #     static_tile_sizes=(1, wg_tile_size, 0),
+    # )
+
+    # # Fuse the softmax producer into forall
+    # softmax_ops = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)
+    # softmax_op = softmax_ops[0]
+    # fused_softmax_op, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=softmax_op,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    # Get the last matmul (attention_weights @ V)
-    last_matmul = matmul_ops[1]
-    func = transform.get_parent_op(
-        anytype,
-        last_matmul,
-        op_name="func.func",
-        deduplicate=True,
-    )
+    # # Fuse linalg.mul (scaling) into forall
+    # mul_ops = match_and_split(func, ops={"linalg.mul"}, nhandles=1)
+    # mul_op = mul_ops[0]
+    # _, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=mul_op,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    # Tile the last matmul in the batch dimension using tile_using_forall
-    # Batch dimension is the first dimension (collapsed_dim = Z * H * n_ctx)
-    # Extract workgroup tile size from parameters
-    wg_tile_size = parameters["wg_tile_size"]
+    # # Fuse the first matmul (Q @ K^T) into forall
+    # matmul_ops = match_and_split(
+    #     func, ops={"linalg.batch_matmul"}, nhandles=2
+    # )  # Two matmuls are present.
+    # first_matmul = matmul_ops[0]
+    # _, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=first_matmul,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    tiled_matmul, forall_loop = structured.structured_tile_using_forall(
-        anytype,
-        anytype,
-        last_matmul,
-        num_threads=[],
-        tile_sizes=[],
-        static_tile_sizes=(wg_tile_size, 0),
-    )
+    # # Fuse linalg.transpose (K transpose) into forall
+    # transpose_ops = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)
+    # transpose_op = transpose_ops[0]
+    # _, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=transpose_op,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    # Fuse the softmax producer into forall
-    softmax_ops = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)
-    softmax_op = softmax_ops[0]
-    fused_softmax_op, forall_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=softmax_op,
-        containing_op=forall_loop,
-    )
-    transform.apply_cse(func)
-    canonicalize(func)
+    # # At this point all of the key operations are fused into the forall loop.
+    # # Remaining linalg.fill ops can be fused trivially.
+    # fill_ops = match_and_split(func, ops={"linalg.fill"}, nhandles=3)
+    # for fill_op in fill_ops:
+    #     _, forall_loop = structured.structured_fuse_into_containing_op(
+    #         anytype,
+    #         anytype,
+    #         producer_op=fill_op,
+    #         containing_op=forall_loop,
+    #     )
+    #     transform.apply_cse(func)
+    #     canonicalize(func)
+
+    # # tensor.empty() holding the result of transpose can be fused.
+    # transpose_op = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)[0]
+    # transpose_init = transform.get_producer_of_operand(
+    #     anytype, transpose_op, operand_number=1
+    # )
+    # _, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=transpose_init,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    # Fuse linalg.mul (scaling) into forall
-    mul_ops = match_and_split(func, ops={"linalg.mul"}, nhandles=1)
-    mul_op = mul_ops[0]
-    _, forall_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=mul_op,
-        containing_op=forall_loop,
-    )
-    transform.apply_cse(func)
-    canonicalize(func)
-
-    # Fuse the first matmul (Q @ K^T) into forall
-    matmul_ops = match_and_split(
-        func, ops={"linalg.matmul"}, nhandles=2
-    )  # Two matmuls are present.
-    first_matmul = matmul_ops[0]
-    _, forall_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=first_matmul,
-        containing_op=forall_loop,
-    )
-    transform.apply_cse(func)
-    canonicalize(func)
+    # # tensor.empty() ops holding the result of the softmax can also be fused.
+    # softmax_op = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)[0]
+    # softmax_init = transform.get_producer_of_operand(
+    #     anytype, softmax_op, operand_number=1
+    # )
+    # _, forall_loop = structured.structured_fuse_into_containing_op(
+    #     anytype,
+    #     anytype,
+    #     producer_op=softmax_init,
+    #     containing_op=forall_loop,
+    # )
+    # transform.apply_cse(func)
+    # canonicalize(func)
 
-    # Fuse linalg.transpose (K transpose) into forall
-    transpose_ops = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)
-    transpose_op = transpose_ops[0]
-    _, forall_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=transpose_op,
-        containing_op=forall_loop,
-    )
-    transform.apply_cse(func)
-    canonicalize(func)
-
-    # At this point all of the key operations are fused into the forall loop.
-    # Remaining linalg.fill ops can be fused trivially.
-    fill_ops = match_and_split(func, ops={"linalg.fill"}, nhandles=3)
-    for fill_op in fill_ops:
-        _, forall_loop = structured.structured_fuse_into_containing_op(
-            anytype,
-            anytype,
-            producer_op=fill_op,
-            containing_op=forall_loop,
-        )
-        transform.apply_cse(func)
-        canonicalize(func)
 
-    # tensor.empty() holding the result of transpose can be fused.
-    transpose_op = match_and_split(func, ops={"linalg.transpose"}, nhandles=1)[0]
-    transpose_init = transform.get_producer_of_operand(
-        anytype, transpose_op, operand_number=1
-    )
-    _, forall_loop = structured.structured_fuse_into_containing_op(
-        anytype,
-        anytype,
-        producer_op=transpose_init,
-        containing_op=forall_loop,
-    )
-    transform.apply_cse(func)
-    canonicalize(func)
+    if stop_at_stage == "outer-tiled":
+        raise PipelineInterrupt()
 
-    # tensor.empty() ops holding the result of the softmax can also be fused.
-    softmax_op = match_and_split(func, ops={"linalg.softmax"}, nhandles=1)[0]
-    softmax_init = transform.get_producer_of_operand(
-        anytype, softmax_op, operand_number=1
-    )
-    _, forall_loop = structured.structured_fuse_into_containing_op(
+    # Match the last matmul (attention_weights @ V)
+    # There should be 2 matmuls: Q @ K^T and attention_weights @ V
+    matmul_ops = match_and_split(mod, ops={"linalg.matmul"}, nhandles=2)
+    last_matmul = matmul_ops[1]
+
+    # Tile the last matmul in the K dimension only (reduction dimension)
+    # Matmul shape: (512, 512) @ (512, 64)
+    # Tile sizes: [M, N, K] = [0, 0, 64] - only tile the K dimension
+    _, _, _, sum_loop = structured.structured_tile_reduction_using_for(
+        [anytype],
+        anytype,
         anytype,
         anytype,
-        producer_op=softmax_init,
-        containing_op=forall_loop,
+        target=last_matmul,
+        tile_sizes=[0, 0, 32],
     )
-    transform.apply_cse(func)
-    canonicalize(func)
-
-    if stop_at_stage == "outer-tiled":
-        raise PipelineInterrupt()
 
-    # # vectorize (placeholder)
-    # # func = structured.VectorizeChildrenAndApplyPatternsOp(
-    # #     func,
-    # #     fold_type_extensions_into_contract=True,
-    # # ).result
     # transform.apply_cse(func)
     # canonicalize(func)
 

From 1caf9ce26b085531a625ca8d8c2dce7b47cfeb91 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Thu, 23 Apr 2026 21:27:08 +0000
Subject: [PATCH 50/51] save work

---
 lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py | 2 --
 lighthouse/schedule/xegpu/fused_attention_schedule.py      | 2 --
 2 files changed, 4 deletions(-)

diff --git a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
index 07bc3665..d4a80856 100644
--- a/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
+++ b/lighthouse/ingress/mlir_gen/gpu_fused_attention_payload.py
@@ -1,7 +1,5 @@
 """Generate MLIR payload for GPU fused attention operation."""
 
-import math
-
 from mlir import ir
 from mlir.dialects import arith, bufferization, linalg, tensor
 
diff --git a/lighthouse/schedule/xegpu/fused_attention_schedule.py b/lighthouse/schedule/xegpu/fused_attention_schedule.py
index b629cdfb..6217d835 100644
--- a/lighthouse/schedule/xegpu/fused_attention_schedule.py
+++ b/lighthouse/schedule/xegpu/fused_attention_schedule.py
@@ -7,7 +7,6 @@
 from mlir.dialects.transform import structured
 
 from lighthouse.pipeline.helper import (
-    canonicalize,
     match,
     match_and_split,
     PipelineInterrupt,
@@ -229,7 +228,6 @@ def bundle_xegpu_fused_attention_schedule(
     # transform.apply_cse(func)
     # canonicalize(func)
 
-
     if stop_at_stage == "outer-tiled":
         raise PipelineInterrupt()
 

From d1e3c3f831a4cb7fb363914fbfb25c3fa92e038a Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge@intel.com>
Date: Fri, 24 Apr 2026 22:03:58 +0000
Subject: [PATCH 51/51] add pdf slices

---
 .../softmax_lowering_flow.pdf                 | Bin 0 -> 211002 bytes
 .../softmax_lowering_flow.tex                 | 567 ++++++++++++++++++
 2 files changed, 567 insertions(+)
 create mode 100644 reduction_tiling_docs/softmax_lowering_flow.pdf
 create mode 100644 reduction_tiling_docs/softmax_lowering_flow.tex

diff --git a/reduction_tiling_docs/softmax_lowering_flow.pdf b/reduction_tiling_docs/softmax_lowering_flow.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..ed96b0bbc56ed9e1bb11a065f78d7899a59dd377
GIT binary patch
literal 211002
zcmb4}Q<N>uwyn#nY<rb$+qP}nwr$(CZQC|hd6lhO|Jmo^w6^!dzBA@SW{Zc2=#eqT
zmwhC$oRA1LJq<G?@!Znz8YCkdJ{`WTfdwQN7bLBev5l#d89pmL8~(q3khCJ^R!+ta
z__QKc`cB3|#)h^=#*jQbkd96c#`@NfZW~vs)3%EA(7nCA1EWuM@F$$u@#qEz4b7}W
z`5slq3mfL!MYku0C(GlSsxU)Ckcf#XT;B$qPqVKTAt+`qb7$SS<)Y`d5c7Lo$~)?G
zJCs8po04iSC$dp7E0hm)mR+}X+WkCE_G1)8Ys};@BC&B$CH{+|btB{!$&v_f&aP0~
zNZM?Barl{up}+JJHN_@^su-h3v6(rW$Tbd<m*hPEID?(cWj7i-)LAxQ6?g{8ISrR8
zW_+>9raH>hk=RD1ZRo1vR1iF-#1ZSs88@x#Q-H@n3!7V!-F{|uOV1<2!cbTediVJC
z<vBE^W6Xt)MN-J8h2x8IE64<>8M{E>-;rhr!b?vUm^f~Ui-T%QFq_qa!pn&F-Uv%x
z0R&ZX$&n{AohDm&XGw&nDrdA<$I!=RNk*j;EOtUFxq!aCms_0`X>%NSO;ZY{CeN?L
z9#P4NvbVYqA*IK5*R64#>dRF*V`SktBYl&g&8~c>g1<kbj(;!oZ0Nisil%LmIXvKj
z7i=WhfMWvd8mNa(O#rET%$;JR^BtzO#s$zP<w4P`Q9KWcF3h||j7<u1g;6CwpztSG
zxu|i^H79d}QaFZft&)M1>qkVAmOtE7znPy+hg~<$>l_le7bB%`stK->bKI;{?JB(X
z5G1Mdd!20>ckMnKWj?-uq3Isz`^G*$-my0OFs#rx`cS92Y_2lqQL&FI8jumBxYMV2
zRm_o8#tkV}6YJUUC=85sz?A=zd93#ZikJRVHN{q05qv1Lz&G+$X}m@^>Uq!D{(%=d
zSRG#I`X1gGL<B|pc=cto)6MQ&47~yhd`~=7#Fg!ZUdF{f@ImR7Gi8(Vd@=ld7S61H
zcV=R7aO9skd9`f9oL9OE{NW<8_%c2U6>@0ABW=>4ZS^<B*sW7%OLyA}EOGtQz}cPk
zlYy7>Ew<$O{_iio*{)<rV;iIYeXxJ7{y7w^bpL)j=;@i*|NGM+X^X`Q-*cgQ(*{Hl
zGPs%m=s>Q4_DJ61CFy8q{EFa4%p3pl;Uh@XPU)`5Dvi|22cxD1d-6JMCq`laJL$!^
z<Vj>e4BeJret?58n^}O9G(4b9GxC0Y9w{FUi<o`>o9$|7J9v#0M^^k5|1B53VGi~<
zO@Hr39Nn7Orx>PM&ACyN-gAJG1UbOS!0w<>FJ2R&=t*9<s<mFZYA$F6K(#L3A?iW_
zJA55M5{&XK*R0W%6WJ}*Nu&H`)P4iq5FV$AUU>Dk;|@O(9fs^Api8PU5Ia~XKVDBC
z6AJY^N7Ocv4Jss~h@P6K7l@D6(2{^KucXh?*e?^1C^8;iF(1AB9U$x_@c|ja#Jxf-
z>rFjQr&!7OZw)+T$)2oR{t}7U&=kgmff2rUsaCI4$>*vu<|a!^^(Yq!9wC>aHyA}n
z@WXYe132l!KJqb+YKTNK{gP~WI~mR}*BW^SKF`X)B7fjSq_9Tu@NuNgL2vkQ?w@a3
zz^7fxgm1<7?WciOEB#|3@@n6Ka0)1$YZP~$p@O^kbg1z?fx-e)AoIz#*S%&DzZFf}
zb1zV?fQAueiFS8n7-OBNDvDqux(S74KSN3mHVvz!*9s4LvgPT51937+d-X@?eQN*b
zeY0Boge)ch1&k`84f2WmpL0TdUn#+F6n?P4hCT9r2-zSXEqhZ?X|}d|Sn2U{k<r`0
zg8j!!b8gUF*OQNeeUfCnl$^M8sl@Hvh)zX(IF~{jn`R<3uFQDHUja*IxqjClABmlU
zJEp>6o-Hn}f33Iq2<Cov_ZWle%hqubvt)F(I%T@E4lGG;s9<N!mn*%SqYlE$up5>1
zgv_qAY7Q*T9g)=ZK>Pm5=KcM#Ba=4jY1-lQ#*NX}*Wo2g))S#GW`AMSgrEgDD{$Fh
zU;fp4EW~Gfi;Jf($~N=(_zb2;snJBU7FG%!%G2xMwV+|`;&nh{Gj4%}anohWiiw2?
z<<D+h4SDiwqaFK#Vuc(8b;kyhGL4VU@@o<^>1+O?BYL!0v_DU&0({!~9~;iq{8zVc
zK^>!5MDONqpG%f!y^@__94$1X0;ZXpP$wEqHJe$yjzw9Fjog}!$3I^do#_r#EoOZ0
zV>#N+wq%;7c(S#vnUR;vgU1~mt!jm0bj2o&rGNBUBW61`v9wNAT29Agx4F}>ELt3s
z2DG2uyg81Y>=rOEv%n(js*iX&_$Hv!yBu;K9iSE68&3u6D_)b-y9O-Dxg`}hkbxtQ
z)~B49k70B4A~wB`CHe^m=DN}wDuFY2tI~Y1sz|9j`>vb8=eeJ=q)yBku;(N9a_yTA
zb72UTw;HR^>GNYex$HP#yl_dp`3--y;LOe$8HRb5U6C~`WX^w02GhO28o6+4Q8A%W
z<LzsK<#;Z3K1*cj-^STSg<L~TZ9v58ShowzJDQ5N2eUT^nkt(ZOMardaUOGshXNb<
z&sVVAd#6pr35vYp<~Mut2=fHdiqD6RBkyy6-;uv9C_<8d0{8#A_Wt)^Mo-81AEFr}
z8{>Z+&D7NYiDo{}wR?w<r#0?)fNu(Uun9oIhVRqkf%uaR0608JEU(X>AzBWq3miO0
zB-C=iB&mWkqv`v0CS)@vQH`ylUZrTKpl_OTY>OhyB9tW2=0P`?HWM{5JeTyJbv+0n
z&C&58+tX__XHH?09xE(6q$Si7<mvuPXj0<adDcy?Rt;Po#-g5<>j3y-6mElG215>$
zUWC?rfcG?PGJX@Bpq&Qkdm)z|BRqA5a6@OzmYI2pPMXMKX!Op5x1t{B@ByZhc?iNO
zKJqJj$n%tPHEDIySywMXvB>j=Q;}~`>OG^?tr3)sFvVs`4li7Nek~9(pbY?c3^|ru
zLl5}o4?rBo5f(<3e((?ENAM7v3vo!DW}-dyT)4^_-BUqUidC^s0hKUuAk;D?Q<+Xh
z*G?<Ee~Pa3Nuw>XvKrAt$qfCBy14xfwx!;lGaD2~ThEPdh~2?rbLL-VSJ(reoc=zI
zi|Jh#&eaL-nKL-vv$;PB(%wQ@-%j8ACiH)K%G`a=`?Pl6$cEHq$vd3l!57>ZR5xp>
zNf`KV@<lR==jZMBBl3$$yEN`dK!7$ct0dooJl$wfDw4I|je)yov8E$Om~|a!AcGWD
z>*1U25XO=UM@W$yn1pdFlU#P}Ih|+s?_7JjM#GLXZAGR9vPZH8Yj$KDbLM^&$>pgR
zrLDP+yS3)}vEKNA&8lJOKgY;Q+@&6W+-apwm21Rlx6sn#cVv}N2S*DcJfqN@D}rsC
z1ScJnZ>jG=@01f^S6`74?;%HRgM;MoX^i)%7$=`n9wgGcRHiuT>-46O7S8ud@>gQz
z@UUdk+E$~G>C|^9UG`?-;pCp0c@<Y1ov7N`4KgP}vNQ%{>o!E5TQTNHi#8{;Ali!f
z(Jyx{dnrX)FyhfJ40SHfm)<|ej<**s--h2g;n?+qT)}L5Qmb2&%ayy_zgo29Z6{aM
zoHSl|(0@91<W0xO;@ih=wv(XthuhNYMx!<U@$qg}kLfLX-0XB;cI(>Mc)ZAzr}k#<
z#<Kl$p}ax+oOwa!S5w17i%M<Y(V-jLXSXUed_Q@!>c|Bv*%lOu>DCyW8Dw_zVs!-0
zyHF(I{XCy$f7kXagwo0j@A3f2G&RjLcYvsXs9LMx34u=r;t0?lTA=Y2Rp8*{9X#jb
zNE{rHbqJczYytQV4?mcMzRSIbyyL`kwk#sbVlV%u>nR8x(mT%yBg*~M?+LDC&`m7^
zl`;JO27?#Ng0JGKtcBS!_PD$vd5~j>=eSL$pYN9*bI9CWkF@@*1<|%%aj_b{JhVBC
z8i>G|q7WPIa!g^gef7{jtZ~=~jRl(9<7k<PC*G#H74PU1L(9d9K*3QNN#TZNoq{LE
z=2;+}9>RpOZ3`;2XxYklvZzU2=&Nt2ecd-N*17)o?#gBHmv5MN#B5K1zfMfDmyXSQ
z*HW2QvsIF1<5kB=Rp#1`m136S^6b*t=?;U#*f~YZ(C-T458Tlws0tA4ir6i?w*!<3
zuxpAV<!cDc-P}V~wP40&VWM!cgImoJTj|Asy}cKb7J`;E64mdZn9OrhF%!^jI5acL
z6LHKor6hKEII%bpuAC;68WHf?q$bX~R1#}^bShqab6&vx`Jzt;a*kQ=5^|Tw5v9fo
zAL3MIka3JmuGiKxRV9m+->8)G)hCtHpK=Icezs?z327~ae-<OI50M&)rK+tDlML-R
zy8t#lA8p{>YQK~p7$u`q3t8TmuLn^m9QrL7do%y0lyE{EsB0Lh>+px~u8P-X+9z3b
zsY#*TzZ)lXB{PZhh%WO;KGX{3+JwLzg>(OX+}+ZqFZ~a3o00xM#BFvurvEx_tN)8a
z-+w67q7r5o4w${k43VgtsKr(W9^G;rl|nLxXz;A}ddhBGTfA~@p+X#L$<MT1@Q%>*
zWjapM-er)@({X?`2k58i@(rf5R4{4Kn@f=gnMNXD<$A51KMdv+(^a90V79dBXI3=U
z=&UqqxZ!bDo6pR_nDyvMoFf{QcN;EmkE6LXET=dF1W16|t`QE>=%&#Y!*KC+L&Xs0
z-O2dt=BMw0eR|CB)Fr~LEn2EX<{e(HFwt=iED?wDHYtX}b9DkUEscF-{b8w#xKYZ(
zVAPkauzrJ(#hgD+!))d(dF8(9#%Pw}Nh(m9;I9t_w#LYUJO$i2=2G=YEBTciQVNin
zAd=j`!Us+Txn3Ngf17ORD&PCM;Gj{l_C`aVAe+zq3RSj}kGRIx!2q*R*0ZSbotE74
zse(Dc!e+Y9J%<fE6}3tk*PW^K?W4-_=Vc=c#3yuWZzw8+cl-~?MngLW>2LmL7RVkJ
za4waSoh*u3Uo&{T(@5Kz??&vWUBdBi$@gu8{ah)1b5Zf~UPHuo+NS0dsK`vUJr=#g
zA_cpj=v^Qn7VYy-^aiu2te&_=c^P!9mRc!F6crG=HjksQ6gop$B_Ql;L4lTHijLo@
z<ccgBUzCE^EK#~-Ib`~i9oW0uYyg8cp0owE5y)OBwvQ^vn7TNjst%kIKQh<29>toA
z@&MD-9Y|UgREKe>yz^nK)2p;<^~X&kZrUc-5kOw%Fm7O>GS5Dk(DP1`QB9@z^=QX@
z0HTXtoXu*}%wb?0Jre``4Wvn;rFx=TXcHbUyWZU*9MJKjk&6T7TU$zF`eI$NncOW@
z!+(U5Ws3A^mqtdXYvt&l_>E?W-?pD24$9LQpsm$7dEn*j%!mn{+!V}(0}w^-gHtO4
zrPO|4z>qy<WAOM6NLC|9Ra3G!&t&17yA^FRIqt>s;&l6$T#*IZ_g2F_3O$V9e=+d*
zRwDoc42V$%4(ySw%oa?!qirT|ppt^eVGzyH>Y**DAuL)Y@$8g?C2dOOUJa2&MWq$4
zkvCPI6>qe3x!#5zei-}8s|qWsreZVUQgSz}mS&HUo>T_E5>Ure*CINn^jZ|wT`a@R
zX!pZIitemC7_{EX3+mzy$#hljk)}j)EM@H`W*HAyOa@>lM+$&P@|!wnE!URN?P#(Q
z9!MzS%x}(2#Ln9!-yU=4@+$lfWzVrfFpV=R!C=v&f7B`P-;Dk=?a9V&DwvVK>q}|&
z9G$9ewq-Hu%aqMF>fIEx%+$5Yc;83M907qIddXn(d_CdovleTffx|`&CWtdaeMI~E
zq7Dotsc3=uhxp{K$4*;$PG@V!bD*&4=HL$E4nlYSN6h1|J#lt-WEK5$mZuSS8AoQM
z&`t;wD5aJ#9gB+7zLk=NZ}Yr@4<_F|Vx8l?CM)?{*ZY}Lt6wwh=oAUsHHq3B4>7dC
zWzP$qdL;qVPUv>AlE%tX;*7<2Wh+7|`$cwGr332Iit2qnXFzwzM+VV;OMJ+bd<erJ
zMMPTSZ4EC{4Cl4=oZWmNGY6BzkB3USEk*d|;v+CRX4;P5nOUq2Of{x6wsaniKDK4s
zO}ed@ay!qrvL+V@*iLLKERW6js_2>#PGiC?SVUHJ!-UDaitaMU^vR=2ItuPW#(MRR
zJ#OJ}(pf39drVB95V#`0b^SNq%u_60%#ic(B(b7)sJ5{zSS44fIg^ULKN8cOL4M)H
zERQRk>{TjSz7SO-F~!Y#5~aF|k&&q?;r_06BO<7^G*NPtJ^n>eOGoK2l`rr<NFJLl
z`);77`5|>FOV6kHZp=qYDnnYNm=IEgPcH7`!J(svQMLSeP~3ltL+dGc6_e^b2^*ct
z^EHmm8xROPdOmZGnwLaXT)xP0H#c9OxrRrE%fb|2FG(RQE+eKwMBl(AOYS2s?j_?O
z#ei${x6DiUj#6QLE*UtfXi#PbpJ@zYJc5W%vL@_aWoQw0*)s@uXD$!09FBrxiEG?b
zUvkv*FA|K1?ann!bLR6!3eLp)&)8br|1<Uu>CKzwodVeA4Ue}?jo41DWsr(Jo?X(Q
z;)qYt-HD0{UMs_E!UQ2j>$5)(`pV_TFYGG-*n4iA6f%%Od4ad_NRE7oU|tOD<B}Rc
z&W8$E_$XxWn)4k0n<M4z{YJCXo>EGd>%hb4W(!@2RaB2PzO2~GZ^MUGy|OCG9k?X2
zKZ)xW;gUV>=~hfx^EW3{=S@DccU_s!XtmQDS|OMN>g-_aSO)by{2rSSUCxcwH>@l9
zWW_axY&t@f2R;`LSl895ZYXfMZ2^)v(Sw<lV6<*8U8&=3so77d8)-{GwJpXUDgD2o
zdwgn}Qn7;U>ay|1;c9=*^(jPC<`fItwpo*<YfHh9g7t#vjB)CAeVPWw@AtHG>8xu6
zXm3tUYW&WA16^LF!i<Kv^G(SS$?LnN)a5qp*?0_X6Ah}jB^Rk)A1X|VW}l*dJi4Bk
zicsaR_&<HS<@l*zF!||^X8$39|KI$r<Zfq-Pb+I+q3C1{Nh^a-Pe=EU%j)3hgwMwG
zbNRm>EIkX;|C+G>g$Jwoul#;pyGP%C>A>cNDgr=Sz9^W8+5%^dgQuhk6V#@FKUw{L
z{XPv_OcsL#-U0;C4}{u2iQ{v>XLvHWU=+=K1e2Jrhs`NC-*yT^jKqdHK|;c!Bt_w-
zC#}x{JHUc^h<{~eg$=S+h_mh0G6I?h(3kHExzh%c74QTNLOz#lKo>>QB9p{9yUy1M
zMb1({fUq&o{L2zOC0PjUsSXqtEo{InR?>+MH+JoSBtHa)F1nj`1ek%{!fu@1h)|9=
z5h94@hKlVF=Mu__3#|#5p|1x%Dz=VRh_8=FUOulWR4gB&%?X4XRT~hq*iwc>fQbM-
zejYR$+;9x^3b4SAKqL%^DZ(rO5pG@~gQ#F9JxuRCYLDME3LvPu9)OuM(Tbd;Q5;r&
zc84egB~H2rB(YE&VO}sb$-vxU#BSY;L5K<{AkBE7uWxh;eUAYCriBiFPK~Q$z}|nN
z4pUH@D$4IFn2i-QR!hK;AB_nyKpV&pgwX(fOoB8`7}X3&7=0_jbDf}iPJk$Y3QpW4
z4sHoPB;9IkAw=CMaq2f>7ItwcNFlO2W-(fTSen{t@?31uHWbe|?F;_Byb%Ifg1W@|
zdWcbuLO)r;l#*#=(%gQQlxKQ%Od(gagXwWk@5OcF7DPjGWmLOxgM=1>Cw<~AR5?2v
z-FEny&a{kaiKltaheKmqhSX?Fap|A+#y{hlG6=m7D;k~z!vYcU$k#>alD8`!l+Gy!
zp^qto<X;+(7sV$RK&1*&MQs||(yFJD<*Ss%trydylv9%AHbre}<QMfO+<S|iJ~{eo
zRqQO=^G5bGF}T=F+h@LbBPH+74>ns)v?hzgZXInSK*;?ioVdEPHh<6#E!C^NCogV~
zjUK#QEN6^p@BBCMJ;lMPL$133cf6RoS(NYV%>ZAp%hZ}7?b)(t@(!h3sOdu+REle}
zmLr-RH0o7!D)W*(4v~k+BNUNU{%=dV|C8wUz{0XUKAju+^>s4-;JwJ5J54)!zvJk+
z^SRW0rjym&|KNLqSREP6Za7+9MYoAsC1qk^oj|<sCI-*J<Z8N`InSHuS~zUPvV7*6
z`}^6v<m*r2#)5_wzhraUy`@DZX*c2aem}qKLN~@IcMnJcI>F&L=rUXcVChOoGCfaI
zi!Y2dJn-`{I@cKdwPr77Bwux@XzcLdC0TXqxE4$}b!r`wAP|s&ySZ2q>id=rG&%Lj
z@|=(mi0!4eR6P1d5W13Jf~8_!wh)ne*$kziLgvwSGn62F{PLI)NRjWRt)A$54GspN
zeWf;0DT_^)*<}i~cr_8na>7CV@?Vo><W&t2eRFUZ_i=j{eQ*~yqI8R})-bWPVeDH}
zKy_-&qSR{RjdAQ-mR28jhMc<H&FrYC(xBEY2i|)t&6rG^s(l2AFj1g{JJD{8ObpvJ
zKQ27&F5fnTm}>a83Uu3#CLYk9f)_J}Zm%|t*ywmwM!0ri4P7AJ$7eH^Z&xPW#qN}>
z@27XXF>}o)y_+$lAXaJHnwptb{ESMo7pW!u=iZEIS)M4G5s^isqN~VZHy-8;E?3?i
z8l7w^k;wY_qK>l|KD4)WClG}*<*M1|lg7wIdI6BARM8SnoL!o~Bx|nVq7AMPqEnls
z*gEV1sxk0MgdonOWuN@na1#gt4ncYR&=X7n1t7xV5r`<n6cS2FWn}X51qDM)@G;&5
z*j(^2wi!CSS$1vSzyG=Mw}TEc#K#8SM+YBu6=XA5PZPEx>tIIpw|sR|soi%xF!p2^
zU$r#aBVV#C+Bbi71~&JRn6zXdr#=3yh&!k-cBh>|kUQ5hqu4M)VhPajma}|Sg=^}}
zQkko+J$0@vFQI^3OeP_pR6r&yA5nlTOdg?tEX;%T7e?M5SU&xY`!|J+ZW!b>l>EN;
zZweoL3SfDRun9gTumT47<m;2kFy~C)>oqx-NG0R!oS`qjtveCK3OBg?Gf`gdh;TaZ
z5uu0E#gtgHbqTPsl?;n3i|OX4X4vMK<{Hzkl+dnoa@vZ~uCko_0*|@Kb8*@V(eARG
z#{!SF$aC|bp6;iBtQ$U=5`Eb*h-)bJ+jjw(kDj<hy0}FDEa)R6d{SU`jIr@&L7DSj
zOe-N7mZVRWBctqCuK}eVKA9T;?U#F7T3u7>Z{T$1Ea3l;-<Vnd{RNeQf$jezfoa-q
zv7r7Ki*r5ZEu7HTXu#TfuAS8Id3;Fk)L0-RZdsT&Q8=;LkF(eN`C<}=<1J(6hr=y=
zF-3%Ti|zMuk8^(<1v7JFPIr>~llvc`?92$0@g*WcD234*r>{EN**u=j7FatGOEO}}
zYBDxwr+F?u*Ql&BW7M!nVA!s^F}mKW5VR2UgftebSVVO*3PlYw5%}~VOqK6<M`T|Y
z)CQUhVRx`h%YGrRfY(t%8!EYX<}h7n6uSD9Buk5CK`+3t;o}%c2<-FLmO9O>n*f{T
z3@$!WL<7<eC<B}n=x6M0a>c42)$I(imGRO~Ja;{|urX}JF%ks%Z$biF97Nge069&$
zg{Z+V0!a=e`^&@=f?p%+P3<!9j0DWSiJq4>{vAh@o>$g?AYmww*5}%Q60i;${@Bx-
zc!{5S3-tQ5=BmEnf-%2{n!mY}6Zw@eKLb485&!rmoxWV3&4J)`NEm2fbo3I=Av%{c
z^U8?;QSvPRVJMQfZ=5B4dpyo6eITI8b@q*hap_Ad_T_Yimjcym3K2;uLG?)}P}EMR
zVmZ;IJA}PF0~bRD2m|B1!HAx^W3yo~!_Qw{jC$q{`c$U`naoyy9|nHQ?qih97Ny2d
zED2s`#SR)UVmoxAoIv1^kcMy&Pf_`2H7IeJ6VS8w>NvwSfnbS2E6_YJU7%TZ+6j-l
zR|0zunjNy%oqngcMz5yu6NSq_IOr2ww|F?0^n2LVo&I?nXtvV30ubF{!8gI&%jiYh
zL@xIsjyFY3K8J}D61%@qV(O(nJrPo7V6B#qZq&WnO}COM;t{33AN^fhQY>xo`EJMT
zD;GkTlDEhAueH=pF>+shfqF^IXKtyetxuAf*KcN98Cv{4lw=q*J;v#b#rr>ko(`^0
z?%isTHJbKa0O-qKhX*#^7wRRS`rREnTU04_p52{WKCr;DHMD<OOyMG9OKo$Jd}Nnd
zc_A+xK^kJ*{ri5a+?hZ$i()K_FXht)!(X{0_XPl4IL;<r9lP{zbPgvpuj(g3r(V$6
zd3(CJx_58mcmno=7~BX~_Q}Yh4~}v$>m{AV26V%*a>2e$lEnpm=En%CTu`H5bv_PG
zqi2}}u`>eOxb|Y8NWxF*;HN|g&d~Pi{Ov<Mt!V52d?vrNS6mL2Qj|j`51k<K+S>b@
zDu8lKVa4|J+=D>M^W2w*NWS^&{U)B}<mRt+*^}Jm=a}^3m9du%;9VG71o%jf0Qmix
zA;`txr!Y7yqINW&SnaQuN&(2TVSt<UlCe^F?m|d93dg#qzJ*P9a0(6~`l>gXa`Io?
z4pMN28X2&RVT@;ouX2P6psH8qEA7X{6~d;<>!ckQ@L&~v1QUgTpX5rm)cLM<8J1E$
zr)NbVYWz_Nzk7G7QbK@+C9+ZyqZmdkd8ncbuV^EQ=WrI77d(qj#A;6j1WuM}P$en7
zrNKO6oVX!?71QXhZuR=yg3k|j`8vUdYbm}MMVvooLo#B%^G%9xbq#Zo8-LNsv8#l+
z^F+DBZbFz~(9Z;?g$nP4KS7sS*)DdQ)y6pnKVQTk1x!jFdm?qY?*>!a(&D>k!Kvo8
z2C@wSt{Jec=l~suO(LYW=Ei}2wdcmb#f5F&+yI`)d&znv{azOP7DVA<An!zWV|ZvL
z(Ms=_n?+yF|8ZtH9zrvcLwusW720t7_>R?L1$70+u+~z{b_raMaVwged627b5q>>W
zc7@h4!TaonO1d=;v$DGV6xw?XQ<g+a;W|5Y;CT_Ypc4*j!3z+C4nFC31fdt{4u<ub
zK4I^pe@Bb^<@&i|mP0#yQJXnNb;Zv9l#RV%tF`bLM4S_*LNa6$esmC5_|akQA01Yl
zY^m7QDjN7qqAYfQWP19PYgE`>T_b(H0QlJ(j}lPtEm}c9<+4r!8KtMmxrrP-LC@9Z
z=A50RKDwkmw<o2t6kfyWAd}Ixr~D$`-eK5rRg+-ro}m=oSK*;xP=5XD5nSBAt@peJ
zox*@j%VOU2t1D=ZZgOH63Ngs4e(RlRn>6-MyQZ?q_R9>As#w856?GeBqX%5=2jF=1
z$&@l9Wi=d)`6ZPza*YcWiiWvL<gj{q4ps9lQx0;1;0IAxun3-<3BS6&gB~ay?*0nB
zfX$%eU@}L(mU;fqKdu%*d>@U8fczVj#4v~620`f+4qZw&5C9C2&C1cs$P$%>;$_F3
ztpqxP-fTGMsy>ZaL8+z@qmbjkR8U&<<7e2g+IrcD1z_|tY|D<(f()_AbFW<b3h$o{
z-FOz9mhk?0s*Y-Zx`8U&Mh%kWtaT-0hoL;FL^pjOX9Qctu066gU*q!i9o?9W^1)|S
zMi2h(yf(lA++P7!&Rp1xC)J?bloL;i7vQT{R;YxOQEN2DlUT0ERSzU68)m8*!|G)j
zl#TzLQXfbIKPJNl<<+BfxnKAL{Z^diJTq$;Fq|T}Kniz2RT3cYqN6)j<fD#c!Y*H8
zgEIr)jKGa$&QK6)fg<7*ZEb*#+s8I6m$;ppFVxt<kn-Cin+MB$;Y_d@GF!(m^Ryfo
zy%f8m>53u2jzULrm3Tt7MW$|I$1t?<_1@?rGuMh-vZ+SXfI-N*IX#+5-8#eqMI{q+
za|qEM;1F3*nngMTiAMSjFM2L{%W%ZXCl#q$`})enu7FrdH5j);<LHdENH;#`ceel}
zUDl#tj=i`vbhV~AqmcFGtVl-GXTC;oy{=&<dfhKN8Tut7Phukaiis15{FP%ze-dt<
zx$?Pv6qj(B3RWq41#?|usxPXe<Ok)32PVlTUcL4nsbWnh={cHhD623daY3W6+Sz0_
z(vjulC}??2MiWA5)aai9lnX~#&frQfq7|W;cL>b+768Ts^>5q_>-_tBs~lchRXnp~
z*N!f4z97$t7f=+qsLsDED49fW)5vla$7yWeeGk@}wQL8vv^V~S=~S}tT(|22(uoo^
zhrS~4jZl2R005X-Q1}4d0QsHx0Lsw_eo@}Io0NUh&)eR9f42PxfcmMy{@Yhr3{3Pa
z|Fy%f@dHpTi2ndo;Z|a@doQ|~Kkh>l-;8A93pEV1VYdw0cBEmgqr;cCX$S%Fm4v0_
z7Ryg@Hm5k`h;+t|rbtZ1i%RWw5o7cH_-1nd8Ocs9zp1JzA$YFiU<(B<^5E~~G=FLB
zlcv4)k`R<OmlxHOy6w9MhU(H#`&{{;d!z;5t^4ob3O|*Jb%P$Bccm<xCTb4=f8y@N
z;cv*xyK=i^x*Yei)*S|rq#21hN~S}pLY-X{+lWTiwZ-xR5}n|AXEr*H0VR|`-Xul$
zYwdb4rlYWjP6=y0)O}T;lbXGff{hY9*P;FDn0on6L+suAjZL}9GQoaIa%yBoWHSoe
zVo{hnp5yh2ema~w<PV29at9+~3qZC}0yJ+bH)<Y+%b5#ORSXeD6pdO!!gJ(}#Gbw-
z4>fG8U&<q?OjW$*HN7)ZnO*5%t&c01Zcj`iaD;}B&_q`vE4sSL(QGxSqye?_4Cv}2
zhP?8Bz&8@w-DQOW{#qk@S;4E7MRj8+_q<ogaLJ>Io_q^KzQ1FP^D4fVz7F7F3>b)n
z=l7bzm(w;ehu8;-Pca_W+Z&0i&C%J$^#==dW0(d304B?el$@}i4$>1l9}Fdn3Wd$y
z65kK)CnrkX1_78)TB4hIiF)Of$_vS0ngp@I8|!>z*kRu8`1k0xdcly2t4`TZ^jG1_
z>Vs)BCLd1CBK;yFj49bA!MOgO$*(oNg2t^U80d+eRlGwp|1!`{oU(7m&#=lh07%ao
zP<0TD&Dxoy9#6#0NF`RmmOaP$h24)8-!|L=LkgM|RFCJM7b{vWq*<{P(F$tMkMUla
zvPV~{J6X7Jvm27P%+=#v!@_YbCBDa|KD7|>xvOf5#!I2lqa=c?>Z-SwQ7#}hm%cx|
zY<=6~)!O;m#+ECtFX8MCOV{!7Wz*`$mqJ~eQhEHxu<T3!)%~??%VvgGtJ}wB$`euN
zgAAtwj16nlzK@HII3hg`g^eVdFn_iv>G<i(i$ts4t!<rn$EICl%RoMvwURm-vaNOU
z(O}W(D8JvKaT9FhW3_bJHkSEZyX?oT+^or2$sCO!irDP^*6N3)8y2b)gdw@`Hb$XH
zZV7=Fh?>)9?&+nEqPuMm2DPcshPwFtrLrZkd;t&f8{S~hS-$pu{x86b9$kPxE0inr
z?+4RRa9HVmVQ}0mzGpr%<iX7>c`|G;zu0A=Mt<9Jr2>g;&`sHUU|>gFq=HosGY{C(
z;(l~o>1d1)My|oad=C*66ARe}4;38>jjT$l&!Tk>{{V(MifA)FUr7IHGO@}X@W5N4
zJ_k{9Ix@9^HI6#%tXV2xiFpd)4Y1-^)vjFi*@@g)Q$D<KnlK)OZsHCcD2T`i2Ol0s
z$PR*^Tl64*8m*1HhQ&NyuH`3N=i{K+#&0mf%`Z_wm`z{RhVrM}!d#<9Qn@*&D!uyx
z%lm<X5IyJd3nEDM#mPIcG73Be^%Tm3s<N&!_Wm|IrL^Ain-&&Td4Yl^+-a0k!BN<9
zKw4M7fk|RH{<Z)+VRvb!8{2wLZa+esfkDsn(Xt*3d6^~n&tP-6BZ*}iyQPQds;p_~
zPZ*Jwv)B+!md_d^MQL79-f=D2yu35D(2`m=<KI(`hWduep`KwYGx_8kk*L3p*^~RJ
zsMB^^_eqzm%Kqd`$SUpRchfh?Fm-BAhQ*?@w&}dW_@e{KEA<pHtkFSH>p+i8_{R6z
z7tBMPqR5$sRnX0-@@GgMc)RV0Go2=y{t5ljKwLx+hEx1U1DS*$4c7iggZ<`uq26)m
zI2e(qt)Kbjy#LJagh!OuyhrAr`E>&#rlM($Lu$x-X|<++>6Up(KQP*77zYfASyV$H
zEY33w$c-&DeK9wouApoC(}X!nA2TQ0mVJO8CPm7_yWD|S+=B9@3Y`c60=Og~&q2dc
zf>k_&k2V+UI~f?fyxDopclUD7y;O_wHCAN<Qf&nC$<YbXV`GIDhW?UHLP8PBynP~s
z{5yfcC#LS>%EL%+$=46Boc~w3>X3F90sNyX8RHf!%BmI*hWYYQd%VQMJH{uWG>4CW
zu|A|*=T72%<7&|28;bq)Q*s-Z%bp+hIrx-2<9F;#)!KY>3SnZ)Msy^bR(fx(C*Gsb
zheP$Tyrd<J(Ha(37DTR;3;^q8yf(x;_gz`oIHjX=a@U#A#6;f(eZ%0jiilD|VOS3k
zk2>ui;AuKMJ{RaGa8N;|;11P?yl{sCf)?EP-)R+TuI?BG{bMumh4~>@^_1&N(D6Uo
zv;Iqa=zc5XEEz0Q@P1k3q*dRsftz~e&c9U9#K+L0ul@1?KpsD8&tnx(761@}yqlK0
z9TXR!p<E(09Pk0hV}Xef!if0Ss38UN#n=?==J2s6=7aS@vZ>goqJ3b6ZHePWGqIg%
z*OSP2#Mm52W%IEw=I4aVhEBo<V}ZeIsjQq&Ei$Tq@r4_vCxjI>R3J(3*@ib_cqfdU
z9sRcERI%R#Jl-rkF^{;)F6|aVr<M@+fIocX%Vo#Ey3#ni@t<I&_AzTLz6+U%J#<D)
z^hZlmThNclD@=);xt&m>*HeDII+cmbCMMR`O6jm)A8V5s>#|znI|3`27Yp<+r(7B8
zODfDnmzK%Pf-d`99N9ssu&=1AlM9YU2rl?TweXlNJc4qPvSliKe37ELO>991B5b;q
zElL|*nwTO(3X?5^$)yDt0c$+&4b84}<amHZ;am1P>kT<qg2+O9#sc5o!i{M@*V1Vq
z-E@V$EfmP*??57mqtT$^M+A9AiT3;?h<E5x9hDAywx}{0^&S?=`Mb`La9*LEu1Oy<
z_cTGaN~5#UdXo3A#(Q&J<7V*ZhVe^!%>JnpqqWCC&gpBSB&fvDWH$!QBHvNRlvxpf
z>RANyys?85($xpZL^|~HHNynj#DVbq9>eC!V<pb~J58u1`^MIEeNo)hwuv6!z&WF<
z8dmTu^8OK}H@a;WhZav<wO*b9T^sl(Zuqm%<AqA^?5}5U+}!Tf3bo^6p6TutFw28(
z6|qmPiYUu!S^&4dDi;s*M6BrLA)l2C`ztM0yD2fd%-+Mo-t!l|k>7_$Pg|8M38S&9
zZrPgUGt(BzH7!n!n6zrw+D$q`nhTH07KH<pNmpG!AWB7o9;38T;XCni^equ+HMfw@
zJ76cly?Wifz*VYXfd39~40QkDGkOLVdWQcR;54^wH`x$<XLa*MkDsdUGL`&1*Eg?#
zhPky8yPAdJ*<>v7TNa1Tnx9v9r{s#rHXEqtH+KwL`9zn(KGGSpXcZ7Bc~GlfuA;29
z*nb)?a43qzqy(ipcs1pW_AL|T-`O1-m^b~^h>L>X8m9+mhV|neTg($}1;1xr`Eh25
zD*%U2f{`nLG{{XO&kimf?OT>x*8st()H$_3O8b82La9P1He%*%Zu~F7IsHR$wGpku
z>zdeSO7n3Vuj@Yq$N3Mz`I41ZTx_5~@d?I+t%k1sQbDLeWP1i>Md7v{wHL+Cfridr
z@YnYVZEh{f;SUW^RnRaqA)8ZbPG+|7yHd!h#+F(Hl<ZFql=3H}oCoh?XK7y>4#*wW
z3>kEHJfRpgEBCq<)&t2F@LfPl+kizKozY3&7u8sC)wsZGTp+P6?zgtIgxOkU8pSjj
z$v_F)fT{ZSRc<+G6UhST;hWSm!l?cPNe4C~aD3+kga|1mbK4}K`=*OBdAK<b<+v5o
zzSRCMM1C4&Oz2H{>U<rz#uyL~fynnAB|1<$wS+zdOH`Ydip~a>6yP7U2!y6VKA&bI
z1OYFLVqn;j<;RO5FDE2b!Ug=j6^fTG0$ZD96d?*Crr8`49<mGmyvP>W7rqD$uUQ}!
zDT`2wat-<LNpj1qMWjtXO8%=cfGZfoX?hKok~bPVE{ZLq#+qTLOGe*ndI1`@4rrhx
zR@U$i&Hi(sktN-*8GpkH&+s3F%RLIQVgICQC6etj_oZqW@H#l9+XvW*7@~f%W~bj1
zikb!oF|<gMC{h^K;X)a5H?~rt-qDx&AB5|D<ge`R=6YaayB7Kf;iL?PzwRH`D_0|u
zXp<3!AGy)nOe)|JolM;9y;Zv?YgbzFOiz%=rOIh%07X)3XM?GAb515>$6AkhO(!2O
z&q{TBdER{Pk3Ehxd#8G*a!)FYub*l628<uwzV_dyf0wg3-JCZ&opSBPV4Ffx9w(8J
zXsHgort8d<R1P0bVwzGavlpvN+oZZCVoXa-I5cNf@lKOm|DN!=&w5>W?y)q5-1?PO
z9+<Aev|M-p{)oxqOOG7@VG)K3Z33hS5WLV5$M1%)p?Z*+IpBx%7m3bJ{z{?F6;I9n
z*X+&<9k@KaCrw1jm}nvJ(bbvV7~&q9C;W$7OZ46xY>YrzKkx%Ac(B6~izzB(Dd1k9
z^1RI~Qn7>_g;ZI9w!GSJaOav4CpF9MRz6LrM7@XElKzLqGws8DIZl(~R13id6ZIGr
zYATf!p8<z#0e%d%WD%yk-q$)4WTNE_@W6TjzWPG^DtaAqI|NA7+8NRmz+y4vL#xP!
zNY$>)wT$F%o?HNL5)Q&Qfrpq=3o@c4<l_gg17v7n4^gKTC}6?1reopK!N1_kYrTug
z)7Ckkt?27dyyz9XU!4ij&EMFcLYdtD0V9U)nP}RXXsFCeiV~!mp+stvcc^6)xdvDb
zIQF<@+?H%RZFVYYndSEDX_aKx{q7b_s(G?FoJFu*TELLBCw!p$kOL^kF!Hm#gZoav
zF>Md5*Jjv-C&~is@`4ZCi*}RA7Fv3>XPV)d<BXrEgqsh)1RpbfR2%9`eu&bn<|m`O
zRaq~pb~T`$J6lp#XV}5bZ?H27o6aBPaTl|s6@%WthxWdZN_yOhU7=aj&hMaWkY;Ri
z&>wJ*O!u>YiQtFuAve>O!LUqu#TNFfy&}D0kz!`~`2gi$6im$CU+ZK*9eA@{5`nSu
zrN8D6kGUlD%YDi$BwJM_9-We6K0zQe;a2vttkiw3qysh#kLkSs^jq#GTU~CPoRZe8
z{?l)1>gNPBG*_pbEbXU7)ZDLXu?znxkkuj+hb~dg%wxn+K?preB@J4aZ=h!A2FgJ=
z1JjJQ)KmPV3avyF#J^@vST=$Oz(LAE3<&@P2^(b=@}A(Iv-%IunL7m(@M^5-I@|t*
zW5Pv&I8AA0_B-?N&(jYx;9B9qy)beaUAkOQu9}G&rxt0^g$+4>o~$u4{A3X2olpIN
z&0DokdiAu3%Bi$11Ebncy|S<7YY7&YE5`?<)e8{Fi9QZ^nU+~Pd=txr?iwq$8*Hqy
zJb>;31E=hz!5$VLbT5y7Z=+V^^>U?}@+z=5At&6!o?_?&_LdLZg?pQq{BUuXd>cbF
zk0eB|g>rgbz&?Yi`e`KMQI^+oqSse%Amhf27?-?mE}kz(WqZ>*1HQ<39AGfP$5FqZ
zSsL=i`22%d{D<{#7YsJ~z?9vw@Aql7MnbWpE5>pZ(_<~Yh{5(+cXHO`Ev~H4tVmU=
zTXhL>l=p=fw56B``9CUr8e^~M+7<q=VZdo!L{zwhSk6iOj})T+NHO*^2=_lyME*+(
z+;OMlwn$NNy*t_?kjP!*(uXWAZ6_Cr1h1udUJgibKHD&#-yV_={$zdN{a;?5r5W?{
zt}%rz@a;Yv=;f=@od(el3)tHrqa6MYB7%=L8B_ofU8h?DM>KDGMLWXR5zG$rj!sA_
z=O7apaLg{PM*zXbHYxZq-vaU8xP@I-5pO4JDN1{l(Lmp2+CkO@j}-Z_gF0WhD0<eW
ztqjsG#F1)R=OoqDnMq7)=;=$@HnHg5jZ8o%MZ>tl1z=1oW$zq)CCEl}4Ie^I-;+*h
z*FH>T*`GT1{zGprP9zK^2_&>g&G7B~=5z|Sa_14u3}nZrBti*DWTe0r_s1_Yck^t@
z=zC`SgL2>bpzLyFzf_fgc;wzgHg;6637L_71oqoR)Utb|S|1p;=OuYvq6IIQ-U
zP$6=WcIDm$2YDY><SHd@yMi_ajy9h3igY&rMldh5{Hz)J?q|&~&DP`K{YHSpJ}`Ij
z#Iu=%)J&-??r9|MTSQmx22+f^7#2Eh*X&_?8HJ#N4?TUVLLn|dJ%SG*+h6*awC*x2
zT9&=h9XF}B=rKBjqmCfHQ+6*XoruXmSAW<fq2e#70^>chiz!#cOvmS?>u!+9OqrHs
zOygycCzPbZKS$jzikhN){hYVxb1XkB#7AD!vM{ip_}X6ohI&cN=N=m2BKrZPhRc@Z
zbe%#7^9b(XXg7<0G-Ip60JO(&z?ZH{EBcFYoVjI#^iHTuh~;hn4ab8lk7y@Jtxn)Q
z)JF2YbZAQvH7>LZYL##po3WraA<Xm#<kwea2^iald;Y+a6PmRFj@>;NC%P$vchh;b
zpS^WXk&<CoZv2-1wt0BHAw_IC(8@W53J+s`0*25Or*Y_sWrMc6xeuFn4AT{bRvP?s
z!t}L?m+m8v#NwKKEWPmN-jdaAs#R?t%*xvrAGcivv5(ygtx(Z5G*I2;$$nzC9M_Zc
z`OFl#Zs1I#H+nzqYwft}1<~X)BpaUdXU@0%DVDCAdq?5q@#CpK*4KjPs$F|lb+USJ
z&#+c6^BZ{1S+e>+Kqw>Ae{d05nEyY@u)4N~Z3w=zrE`S~oQ|)#N`9zq1>L|cwA)0s
zo{{+Wm)eCc4OpA5n?7%_p#;M0Dc12lo49iT`Q-?&NTzIz*s#PJ-c@Fgi=f-D#y69D
z&rot^#QzjxBmODGE>Biv=yrK7pJjd`mQ=LJX(`j<RcbFh_ULS=SnF3`FBNKW(z89O
z6Son`A(~4wYioOz)95r;KLG%VGpQ)|s?<!TP9WA`ZS;9BqMri<GXot#PNOT!)UYmh
zo*!gim>VynMn4o@$m=<UkwXOYj+R<wjHpoZIU?iwrq;tu5qv=AO;MIW>|DLkM<S;<
zK}Rpd=zEP8KL=3<LbDSJpqHV7%O#G4jfngGMaub&8e;BGw4Vwn>rVh~i=j8AjIk>k
zFxnw`-qH9pfhcWX;dP;m50Wn6nS@rhfs5!E-Ay(q$gyzEw7P3j;=C>uxXv<S)@EZD
z1rrt3!U$iCCHDPZaJg1b%?apr%oJE);`D~(2Ac;lyX^>s=p-w@WgN=)IX#zoFr8^@
zF_hlG#`aALaT!}m^qsr0JB{iiiH53@wD<N5P4E=lqM=x_O|T-j-8SM+grAom?E*4V
zoS4~?;ixBPQ7+t-6Tn54EFdfq#swtM1`C%i0!gSMH%<UVr5PRIA7UDGVU8`FH>3|1
zrdlOc`NN@x*I-AVEcMK+;t_I_b}%I<8j#B~OorT3A502<j74N0SEz^EyW|hnW*q^)
zs{HCK4;R~B?%90zbz0Lp&HP;}y&nL}92U3}%)5+Uv`pf1-R8Ne5(eB1uIcswcIt<s
zEH~u~2E)-ialzl1VkmPQP&Dh45H5o2v6|fi*?p<5gZ_L)yJ~D7Kh}5lyOAmY3`bZX
z5<d0r=ybix7u*Cdb#-J122fH^((svND7VHgzrUF{e%rgSBWSI@og?ZgeGYmtdfF2y
z?$V@0md1WPO}iY2uQrZeIW@ZZbbQ#oKdqg*x_RFh*3F3ZU8Z3nOob(PWzY$?S&IhE
zA{6+9&-zQM+oDdc7xp%~ZE(F~E+(GcJ9JuXO7U9fel<U}`e<A6yl$MlOWItdP9ON3
zWmdOVGWXAiO7HI4>hGd)|2E4765aPgcT<WZ{tHg-hFaqV-_wZ)&9=`02z;T$0_yqg
zuM%r_UH$C02cSKvygc$OvkTxpYq$3r`u4u_;Ou*WychHZL)NnajE@5sJ&xz^BnWUi
zFL3&F6wU@3mdKnUW<0Rv=i3;q35-=7Qu1dxi3okL(479$4$Rb*Ybi(m)qG>WDmzJ0
z9ZFG|D3?zA1YQmhRUQ^*BDafn5m9A9#PJIIS17n>0Bs?*@4RDIdlot*aYyl~7H5)o
z)A{ERQ`6VUF~|gS3L{mk8jry%7b@aGn1=^@Jz_`49m3=~KD3kmz>;b=*t@r4<+n;8
z?TPW*u0Ha`w^^C~=e1)><u~zNIEN<~;IW~5_KA1)$%s36Sh-cFh7TAh$|HADgHO1#
zJZ*f#&|6ip+p$BT0~E7Lji&SQBX(4&{W;{Zq%2%LU?r>Hz&U6Vwmtj7-5ac=tgqAd
zEfDnWxNg^|5At<lk&&7Om2U|oO?px_1h$elVR%AtP46I?%ICb?V8!kUmFlD95z_J_
zG|<GTNA+6*f+4;bYYN@(uJgdwmX-1XW|_l0o?(u%V!+4u#_wC#oSPZbD%<)V`!|tn
z7mnslA_*_Pb`12<TzDbEu%S1U75>n2TYCod>Rv%lLp3nMKG8nz=;DXNj~z$H;3l}y
zXd%b7AA<l6N2ESJ>c`Ve;4y6%Q6R`ITQh~1<5{t@sv?^gNS*c$mFIf?vAs@uX4U_0
zrH+x#tYUy9#Wt_<YrtBOiL=?_98rYB^cZ}?PU^-j3_}dGD;PGprlTH$NApPP160wY
ztITu@unSJ$tf;E>j=rg$lv&vY*6q9GO>hHDfTk!?0RjxTJeXy?#sOZl!p4vX4xX|L
z=bEx%J752WOPYkUH&bb1_S^Re$t$RL`ciZEtxA~BMr?$VY%(@>S=UG2JpS3l&P&G>
z%haDuY;pQ$6Ja)skAf~(OrCFFRGS87OE-wqCZMSgC{yNQAyBKN7Xs(uwWP9oHtcEq
z#XqaB;9RD2gv-pRvOmo)F5zg9Q=lbdj@crQGSGF}6kO1UQQ0vq)(GCSjx~(Jy$VpW
z`T1Ve2IUbG+lt9D-eC*f{{Gm~v);LlTkW**vRSo^;AH?8VS(=1${@DYajtGf48b*%
zORcA&Xs4m6YPtHK_FfSSdm4V3s?bRyq(q=0&J++AMu;n~ICLZCFeKFZUruz|<gSa!
zq``_{tLWPy35!H+TEvgWh$=Q4dQMD(=t=_-H$lfG(<3HC4tNP(H(cc2IWNXsL~xdv
zgUr$I%+h-qA@1~%?(~x)H6}?&laQ*7u!U=d(IJ>mX5KRn+!{a2K;sDXyQ_a1O@$Rl
zf|yqsi}c^NnzvC=>m7U(6v#F7&aryX`PiwQ&iEv~RiRb$9qEXKR9-u}nwDR8IV^eK
zY!!^ylrwN%m)PP@s<`^`&8n{&lgpck8z4Mf!1j^(u5wr}m-VgnF;avkT;{K22rqjU
z!{Q>{L_l>&B)+gwILn?n#x<<4t0ZPgp>t%-xN_%cx*>@#Dg0*YW{@}pJ4W6@Q+%Xa
zqlu$Eb&L60Il~mTaif{aHoaRIoI8bB_<zshXIYi4W>gX3-%fO}cE9XTe}R!l8jW&G
zCfIpsCG;6j^TqxSX>(|2jjmZM1t^fFO6=NpG{D<YSs7QTyG`$+{7#J3zx5)tnfv_u
zd|3}(!9OI33<oa;V?;HZ)A8Uk&XDxi3W!LBB0)2+trnx80QRq^D~^ZK@(Pu~ar~*2
zFB==RYK`cqsbp*i$bZKftvI#{hDJdYZ6RwY{*t$lDU1iZDxXy;;ZGhTS?k9LN`zrf
z+e`?MQ)fCs$ak6;1%jDzi!GgkppBDED{|;RB7n)E7L$&H=PKiC{k^I6-5&d~;!T>2
z{W>fN{v9w3dEu(&D|YHwH9NQ;G#u(^<8uMbPaQ@led{m8e`oS5lJP3WI1T<$7vBZS
zHqLCzz|5<CX#1UKi<2Aa|55f1(3L&ix_4~bcE?V~wvCQ$+fF*R?T&4BY#SZh=H2~&
z&;9QC&Uwcj=VoUlE6EyJyK2of>ZxDN`4kyee)GhM8&WZWKI>c_>~TZ#38<7Ys>~By
z|7hs<YbjO(z*u#Rk^k;;>>vQC8!G2M=8OJan|^bAyf|=FY>c;mCkv|tc0gJ2i!LJ7
zuWv3FsnY1jj+g=gCmq&A(3^1lbxb1ze=LECeHP?))evjkAo|SinESQMIcVwZ6V<rO
z6!#(GXn9|!Pt8Uyk^5`kJO-03IAd^nJrpAe8fCT^2>el&QjVjt+>2v`MS)@8K52HA
zv`JFhb;L3uMQUUaG3S~B!JC*glF_6`_e`Wh`nDPLOi993L(CKZqmSAnlp!Ot99Re8
z(+dNF=&O(3`1)_39%ya!g<-#m&x=Es=~92G-${{g7>Z-pOnH?pXRDq&R^4yXhbmZ+
z7nU8)>hv#Ak)x;A7dr!wW@Eptk18L3>&<k0fxrY2ZTuU_3J|CNec_v(i|v2RO?3dN
z?_4O|Z|e7oTbfMX{2=Ehb(z3{yw=v+%z|<82T{Oh$kK9w9}n8J?5ozAbmfvJ%q$W>
z4Xca$%aw+zbsE+o?1Z>$@7{uL)o_4UuD=j&Ihdjns}6O4nhn9x<mO<eS$6k&GUH-n
z$Zmzl`^tGj_2a`_vjwb`a4EsT*w4O(X`DXtfCLGvjMd#;Wv81;n+8ZEiGIyRZwv2K
z`Dk(-?$ez%hfq%egJ_^OAr`UZ)H9d6k~a4}FRj(akVFG#UYIBZCi492={xFdr)Eu{
zjM8goABn$9-!%S?%Z#$u7R#}kMYWYi)=6$rd))M0<jqmc`%zE^d0NB*vRTMe(*H@Y
zj`^2LAP2X=F&_Skx+3kf`k;H*)buioDB)Die;`3AkjdvAgH^FXjCdImOu8Y*KXJ=A
zyJDGdw=L<j&St{7#E+Tn8!_hwm28i{AI3Bpj<H#d*nzFVMq?|S&mD0Bn{>b2LXFS!
zirKZ}2E!p3+Cn4L^;0LMPa5od;g{6+<0F4zYU;_P^9JZu^`zs!diCKSz4}3NSmxU8
zKn4wEUN)c8+j{RfujYgRCq>sJ@x&4rR;*Me5)ltr`hL10ZxAu~4MU?T<Zm)g{deiR
zGq+e#L<+F<?LIVC`9k-{`TR#vr=D5!A*Cd;C3baRqo8#DTBd2maC$B|jD09t$g(Zd
zR@YQsP2mg7-=**B=;^uL>G}6TvrWUxLh!As$5F_<G088flADx2Ug?6qbFxqMB9VKN
zU3>nd?!zc7-8SNq2$Tlro<zrcJnB40G);zFr0h^e9G1`!PM;cEu#;cuJB{t@zl`Jy
zZ{&)BBGD&VMbE!F+uU!Y{*kL~ZJ^-lD(pJ`6RcLYwwW!7XBT0ycjM(do`$-*AW<2e
z?fa&;jct0q_s%SBdbaBMgAem0D%FQA@(mXsme^YFkU0Y;P2JvZcduCd+#5aa<dqz_
zj3S4_P)^3=b|o^%wgXrO0vHhcVbuG^?0U9GvGa)I>nHDLYx5#_Bn#}EBIb{2@W{uA
zzTr!V$aehJ{SQBnTt2S_?DrjDpRaINz$tp5+RJ)v++!g=(aC<k3B&k?{z6K)x<nwA
z>zwZGRZE`i+v+Zyq~r0ze<yAg-gcaAob6ub6WC(GbN|Z!%;)gg+V*te%<lnZQSbsP
zI_gWmMV8<P^CzGSwhhaPQ1<6cZxEqDJ%Z@WcJUQ;mV%d^Y@i-x3>BX@4E+N)b?IIP
zNEe)M9MI&ZF0IEXt&rk#XrCg^gP|?p!AatWoGu}+{3Yjfkqr^%E*@xBh*xvWrmG_d
zAA<5j$*~E4j8E&%)e<lJ{mBvL0AidgMP6geunh+mT5-_Wm8%J*{q+`O{N*3XDx>>|
zbN(b-^{EY?lf8W67p7SC1<+y-J=2=ypUWeZC+FB;DlzEJIZrM*xL3qP{q+}8(7!T+
z2H~YNfRypo_=|Y-eYTVW9bXh*L~*{USGC`c;$y|$0VJ!@U&)$j4ICnSnZN%x6;OW5
z2>ZCDn|sTjn0{bV`&|5nYg8(<c<2_hVkbp%HKSK>kt2ah^W>E~%5o8~P+R%xGbN?k
zBgQBBW10w%&vsP7fXxIU<Z1+sN>aw>Ze`U)qqWC|aLQ5{q=lo<;}<OLUjLiYs*>Np
z=j<Wc^WxsJWisj6-;sqeriVCO3@PFvRRa>gB!6&J-}x4G4R?SU2e=y~$AZ=b>u4L4
zgCYL$vNhfu0rzg~A%CEZJprJa#paGG7bGnA6Vcw_Kxl?>VxrN>wT|1{t{5whd|4N+
zH=ek@WxwIP6ukhn&RE^!cZu-;cyj2YsvsTXx<2*Fp5a>-rFKq1cXi7s1p0YHy!=@+
zYB+y3h=H*Vb*du?kJtzBC4T*nUv4Vw?Sd;FUJ3_#KnJ{lS;l{8){r>hTyDhSq&|cD
zEW;5dyqI>kC&)3gpPa0})^K|5L4_4&H9Y+)1NqffkL4m(L&(?azjvH7R$Mr;F4X>b
z<oeb$j`KpM{m;7#^<iHAeJz!{VW2YoVP5v9?aE2*`xD!(<eduyu&O@31Ld?6)=mTZ
zDen%m;z`-YK2E!#^UoY-=OLlIZNyJ}77PWye$VSS?aFN<pO6E`cj@ze*b<@3x(&mZ
zb$6_dYN3gb;F~9k&<}+kd_>c@zO*Mz!?sDPSjc&HB>^Nq#+m_p@!fY2=(`3!pDukA
z4qjEh!hy0!Y(T*Ih~+jY<tmcq8Xo~B2V6^^oKvL01<V$?>lp<@{?Z25#gsj!_uQ4+
zCIRb4;RkC!uD@nD56!7!HS`R8_6pOn8ZS1T*4UBeKkU!*c9rC4f9afmkFp%Z2ZT{8
zAdJvk0AaK`MU;nAxyib#tGc8H(n^6s;;?W)KW_eHg1tvz6cYI!NBu@a?XGes+l;&F
zsgZny4}XZyc8K33pD9qD$C~E0fSNOlJ;%1R-E?pC{gSG#eTu^ksR@S<$3mg+%^LmG
zE!w(3Dn}5IKsc?6?Yfu{In*cGtozapAJExe$s*r5nZ&+u#bT24)Oi~KN1;4nRg0l3
zPK+gLhKECDwPHu;z_eogvyc6JQZH1RD|9f5{I@lnbo9ZEby3~#7kN&asN$e9wiLj!
z_|Tl$uNe(XV^p?xxPI!;H7nZw+o9LkT&)&DxxDUO1jB5Tf_C1J<EaWm3aO)jhPw}M
z+RejMf#{$gbc$Tdni;39;Qf;eM|S6}XxNGEJcQ&Q#Nodt#RDMSUFN2a;<zl~@6=7A
z0f~iJ013Hf7+Wf&Tp1_Ln(Nt@L;_E*1<@$3Vn@DnNV%XXGZm?b;bZOLAJ8rpa!zE)
z<_J)KQje=N3!^xzBC6km>?89bSPGPo0IBUf#0zpS8hL%#S2rr0f6yHx5>*(|l#2SC
zsLP>I3Q$pOEFErSV~W{V=S8nHq=xdiy}@dGU%e{7GG?!8eN(HduN|i*TlgC10-^Rk
zCBoDKkuA@>ogfvh$^bf;u4!5=zkF$gLTd#(SQ*Y`=S-_#<e;UAPUs`GY0n?QTRrne
z<z3aLPN)4!-wIqgO2w0Jkc&x1-PiCN#n%|gz_i8|sY&7~2n$mEzRsvV2Dz^-?%2b#
z;wNX07NPA#Y@v%)VTcExOEyz*db>R%u?l7QUr8C}m?JeWeBdZaoo(5dL>kRWPDJr3
zbi&{y!I=D8#rJw%aG~M>Fnj5`Y)~y*VXKdLbqCB|gy9dhOW>Xp`4OOp2vB6wDN1e|
z7U2rlbO6aW>(YRSA*eiFUg-`n)5Ej6t^@*s>@RwA(6C<`0MXx}X_(w7zyzhIu1qQ=
zB$pAyZ2YCO3yAhYK(t#o0-`;-S&VOV1`zFzl^<fTYg`XJO6bnc$sgv-=W<IvFv`0{
zO*DtsYSRd$u7+)mYUwa=_ULa*G}n~Gg=V`?aeZ6dZfL&DWXx`4h1{OVuQqXy!PA(U
zJ=4>^#VuT~%J=0v!8l$yxdmqWA^f7bFk+Ps==@^*;D1)8XTOQXR~l4OS$-XbeSzY_
z&4>SYUCYGA^`CU@|E1)Ui;3eum3*@N&yB@0W$kdq;X<!o(RrFvH;IpeSp1QR2wl9)
zgGi?GtNsL8!Un{vz@Qb&e);HCQw`eh3Mg1hz+=?1H8gaPh3!w>2C!zqg72nDY6)pm
ziHiM7YQR^gnT)|zRoq1@Lyw}2a_ALDG3-;#D%eb>%(DN^5~@r=KPrcASoJIsHv@)g
zs3H`wwII>@M+Je5385u{Y_Ob`jC|)2Pf}$q*dR^?HT<lIT4i5~sdh7GmMK7$Dsm^I
zAFW|0BQQPz_j`PS8m=T7?z%(?8nZQh8awSkxZ$T^DNS*(0e!R*bS*YJ`QW=|x{_*U
zq5=Y3CYuubfF`O*`Al`XV*;yzPAZFxeu%hqgL1NCw5wO)2D6S5XtF*CQvgYyY5WY6
zxSCjLMoW^EP$iZxZH7lbI_aYGA3acEFPox6E!zOJ9HB7D#9EJRRx@sNNoiqYB%3Pa
z&_4pWrWOWecstQ0P|bs=hf7drt;Fx?$rG(wv_#=Ps^O_bDw!+g5_G+VC@5?!%0eX=
zH+|^Y!BA4>$7P$kW$+-Rmo)5wQ5NvJn4!Bs+%p4kf#AwmLPD&6FwTK$3L&RMP%dZ>
z(b3u{F&1Wsq{uV^wLDkvJ!*&R1LE<I1tI59xF~1(uvsty5JXVQ$!CC{)-;cMv+@U&
zU?7SkG+cL(lt`Q<`a1^+u<NW*e+Ho$ckaXs)oALjvoK3En@i&l<&kC>fyLqs_f>Ef
zxsD|!CQ5$YZnv<U`ek=#$i9~Te6eKL>k`iSma~MJ_&IpKx%+YJ_-HHfVQ`$fy7+KE
zx(k{O30H*I5rri+g-_wc6!81N`7=gNu8hC%6P(wRHQ<?ipX6Yfgkbsz)|`uQ2{a1s
ztgl_w?pKS=X2}kn*S9S(yTlfHhPKZA+k|mxdWMe9^V|CIae9WDgC8>bAL0z~uJW%9
zPkz;*d$~17HIyHTgSVE=c%8`fueq;BAM<zg^Ve<$^qo4h9!V?2@>I?ve39`Tc`2?x
zXrg`+*u6jO?H&Zan%!^Soop}Hl>9my<ngiNd(R@you3%!6K|@5b#LXcbV#x-dq{^v
zJt54^x;Oi{JJD<SCc!LB`|VPpD;2T8mDny?vJLie=6pBEu@v!<nXH)J#wfl``hBr(
zWjbbwoF^VvSPIi-bYn2iyC>9`6AU&ASE==9MHo|&OP#H-@(!|BCLWQf)Fm39y|Bvn
zN$x|M?HXd7ymnjoSvi|*{$S$k5q4&D1njLm?v>fg<rtW-5m)r`wd5a!I0#SH5xdzf
zE|St3mJdYv7IWa721Zb79^V=f44~B9Rnn}4mHfqTi5<znIgQ_Dih#mI_#|SA6gnCs
z;4J-U8z3V92^!kA4LQYF?HaB+8svVWz=4jP^RJ3_gN7Jxv;sLY40=~JoGEhSrs)#y
zduNkq&w;!1XD^GNmZ%s*t&=<p7@l{?&%568nB#sW<GAg)vs_0m<m-Q{IeX{nwMJuw
zfScGSw|hLC>V}(wg}x>YJsw>>xe8tSe!}=PWzZcpRtLQi@>TlsXv?azXkm^dexg}3
zl8sVZIxzISDNm^U=m#8bMipKz?fdLKE3iX8UTR^UfMU^Nrkb%l!(;Gp7k?g{51hI#
z`ee#lz6Z<F@B?L{nFZ}yD`de(Hi$VaQBC0tY8G-@p_#$EEGeWO$r-xgbreP&ELV*y
zr<T&o|ImdW9-#0rPHZE9&fGULNOYvNR*DIcbv90nmlb2jUN!!E(2dVtUeBF*?tTGU
zH~It45wq!hlmg|{hCRM%t;P`bk3Ftjg|Lu|L>U6~B5^*KGizAP;mj=F_Y7fir(<*a
znzqiv+cm8mAL16<RqY#g&&<xrVNgnw;6x8z6i(`45K2#4`vSl>m*7NXIWbP`KgNF#
zy0JOy^y)8QC_8k$1UOeQB@AD|Bno(d;#RIsA%OHDL0pM;&TVegx~jIm>xYAea_WbV
zqoz{Q%j@K{@;G?x-u^xiO~rh1_ZJZ-YP-rXwIiA}0#5Lx6;O%=mso=$BaMp^U<Mie
zd*ID2fU<WfP`}soB}_D$>nof<xQmN&Lr1==E%m!4Dcp`4d0UC3yI5oc^}qhcTRP%7
z<-eW!65dT*P8zr2F8dCN=YmxK-^VrPe_N=)2^j7E`S_n$6#(+wMv(oU^aSyc{2`_?
zKR%)63aDl<pzIObo+o8N#Zyr8>yt;=6Lx04x>y=E$f+X|MpWr}^L(-BQ#<%CC#IHT
z%yoUr+#h0Dx*gE%(d|Frcsu9Bm<m<NqYZ?fEqQBfr*-XQUk%hLWEPP~Wt%<~Y+OIu
ztx_ITg*#&tJ$ZWyu;P3asmrkyO1gfTx|!wvYN)DPL=ZSu>dQQp4pHV)&ymD+_dlZN
zjQMLP=NMuTh?{hldFtZwh-2pYgN^LCwOtpnGKP_D?>fvI;3rpe2?XzD+)aOnwIbR|
zbKF)HY;xb2P08~RKJ}ZmjGL+Is`mcU!Z;xxO@<nB5(HQaBMXKKxLevi#zUhdj2>Pc
zn5URv(0ITfX$8$u9kUa|j!O=6GTSU=UTtvJt(<feZ78u*VA0hX8V^r2U3bp)L(c2N
z$aO8{f;G*?eyZOmj}1Zt+s>6pn#1Me>Sw*lFvmIBHTMFXT*mCF{4Q63tGn%XVw}Dw
zCb*q87y)6)Y8FBFOTHM7B8Jz~7gfN=P163C_FKnl|C6%;bphFOj}fv5ebirDB0*Yv
zTzmUdrMQ`E{W@WZXve>{L~ws?iT)y+Bdzp}MCRDwxXXEI(>8I&npO;K;C?m6+L`+p
zXa4bD61*Pi(1t&gH6NJv8qd3cJ$g=8aBLF{Hee(BI!UDeV92tx%uh`_BHD}bhF04!
zZgo!S+wN>a<<z3|mqbrHUZ;9|%{8#688s8Fo9Frfm1hm9!$`LTZhgbMNwix_J*aNX
z;o+v^3}~T+4l`QMIk#sXDuE2-0&WAC)!b6en*0_wur{Te)t5^ipd9F!fvGpDsrZ>?
z^Yw6h{<57c?g9dizIb3m&Z~^mC?Kq5E9By#{A004$B(y1L2B{rWpe)9p{{*c`-dI_
zq1sK~o*`FH#N^<|9t|JHqhYfa-{$5pWJ9d{)0b~Jpd@iP)?udf%<1F|FiLr16K)o#
zZdRv^FSa>9VB*E^Z0}9Ba6aYg9DAibJ>H_*yAxkiEz)w;B>kE9K7L&%&+4eB4BUjo
zC}!M|0~-sHGsdIU&PW5Z%Na(IYrpajaKYLJ@9-S|h3o!E<4I&Mz<FD3d26*{C$LqA
zhuG1GNF;Z(4m7!h4*`|uX$l!AMj!Z1BWOqz=+T_3ad<aI1YHX42^2)Efp^EyyO$9y
z3VSX4;)exOsQSGA&;gv?wRsu-FH81{Y0ZXj!}Tdo6(W2d9dbl@D70Cz5ol~gek8-j
zOz#li=w6C@!Jg?y@e_A6M@Q_Y89J+T!?#eg<C{vGRMmG-btW^WCR1mOyx<~_sz1kH
zM1kDERTh^;dK9M<)_!f&SY501(_G7i=IU|k@&|bp-Vp28MEDZH#5(vegf1UdmR$jn
zEjZ<MThuto`!Cv6s#+@dfVr!~<`L+D*ht#IGdLG~O4)IUfGbL}K8kykEie?9eZk$e
zmivWr&<MZ#Z#2IkQ|_6y^u!<tf`#Yno~u7kx*@iQBCHwP@B$y_C9dqa_DbTJa<u(K
zm1Mwvvlvtl#DUjecQCid!$~;PCxU;Pb?79sLNdnAT1!|nCUh+Fp^AM3c13_dEX<C7
zunS9G0Y5a>R5{a65^K6q7=T$nvNlxtmLRWIhze!?eF;Go&D}Pn0f>yRAi`i;!o_$Y
zlmFr_tKOF=muvmvDFnrfG+V8#EkL<!_t+^w7xf3bGw3Y1uLx4&>`BX5Pu|T_tJ>zo
zw5xH%P@O%DXiVU*KU7FmIY2*9vz5Q#o3@dEAo~=2id*vQr!|RJBFQXjLiQJ5-$nhV
zlt`K^Aa>N7k!UosAlVm$ZI0oh9HVU=q-x#)U>=N(BkKxqz|y1kCN!8zpbIBMlY1WI
zP!>>?{cH&#_Y2%Au%@j5eu?#wmzlLTvLrrge3XGMnphD=p++uXY3|G^x1s!1ha|T$
zh!P1<F=BJU76~4v5MV*?B!ZHm!*aZx;OroHf!(B7CD`;BLQ^^->g2eG1|#JBU~|0+
zr>iA&4f2J5<gRi)H0uQe9n%k5Mf}uL_z2pRx}fVk1Qc`B?C_Lzj6Q2-91pX7cp@>+
zbM&x2vWC8?QScpR?+9zM89uIrGYf>0kLOE5I%p5yfPj^!DvhDB15VM})hIr+s(N@y
zrSMyvY;&UHADK{`N=w1!ArOo-Y1ghns;x&rAG~cHS)UF8`oL}(&<Fm5a0S2@zS#4R
zqIQ)XTUb_?*AxTJ=2<sRjwI7wpSMVi?fUi$sr!wX`;Dm!Im&OQtXS^z+>RN``vN^N
z;MY3ws2uwn<pK~Fl$O(F9}*YfP3j_7ka|LY3aXg#r$e14u}Qm~QsmJfR>v<)_@dJe
zi<k(f?TPwI19wSh7oUGDaKT+9LtILFRn+Zqa4uX?Gx8&MUCg}1_;+z+K+hZr^`sC-
zZmww*8PqHpSyd<ln)WDs(4P+1MCunsAWdVMbcVtD{={HT6+r`Y2N|O}X+ZcBb)5}3
znk%C?!@qz`Q9=^-PZDhZLA<Tn&GSN1r&&jg0sYJiedbHnajAxLUPOTbQ#$2y*zvi!
zl<Ut~(e6T5XMQV7zdZJ2?tA0|1SihD{g@CXQX8hoDrW#~OMm3bv@9jR9bIw(0hS$J
zV^^I{w?7U7W9c5H_atW$cQ%uhX(6uz)<)A7jh)eWQkVsuI3ee)7rDtZ;X-F6TTLMo
zMwXqXQxxS@*_}a+yv&2w!L;xZqgs5a69~fD-m@s^#vCEHFmKZ@A-SL$)ifmw=VUw`
zVS|@#0+EC!NG=C$^UiZBxC$@8=KKgNv*VnOEn9*j&3FZ~J40PNkB-JV-x`|XP=I&1
zf74{&9xy?u!G1DfDh;7x>j5yO$7M6C0^pPpiq%QzU>0o4b%-EiUEl<xr3mvNayDkl
z^tgNp53O*e18l$=DUVMx1&w@!^;}G@04JcH#L5_hC8gedQz*&y)x4-r_Nsx+VP4ue
z(e)IpKOKRsofATF(?L~pkjRAYTZ~}mYjI@``k8Pw^kDqZcFC3FI=kT8_pp1cUVL(B
z+rs{)={Jk%BC0!II+1eXC@~z?U_NbB_<Rv|NiTV#tWgE-d38)0QmisfZ6(P?P#T?c
zhDy#=@7cQJqS8$UUOwy6(#Sbw{GagD#NAho%Ul*Qi*RN;y`s=IGEzok->0Fg>`>NL
zdQR_q&hGt4U&~oi5u!;jOyTz7m{<fpms)*R%%`$87Wi$_W~vR=9{JXEj&36@`8IWw
zraLVN!Jp?-fQo~YOzo@Wo=vCtSkzM_JC;lBf)H`MVO<;3Z4GlloO<uyDlb_-jNE1A
zc32an*}au!Wg+m@`th6afAULL04Z4u?UhbFjhw~2GH`FxrdD(okdoAVIVm(0n?|OB
z*J2br!+zFD3=m#Ftz^7@_y$<tQa=3?-c`|oRwZmuYfOYP3&Wy4^i+*k>h*iEz>twF
zBp7BB`3w`ozJ4xcXw-mD?qUhSVhL>N_zJzqBE8o&Kk9IP7@psjAMQEiA4$8;19w~X
z-NDkU9_1la^rx|+IMpYB%W2Iq#K~6or^1#3onDuQgV<oo8T9Y?Y1w&49CM4lkkS$R
z`j`(IOLLxf42-x&%$Wu&W#hxLmeup-P*_?A=p>{h!<y2}H9NNZf4S*UhkpH(ZGWkG
zo<H`QYk4|h;eAibKO$$d>!BKEeFImkIq7Mx6xsxh%+2H8etRODEqg|t9bS1L`P^`k
z_K7Tuk}v4rjW^tDNFc4rTloU->~LN9@7kN?-`AKh|IgP-0XAWHZO8yK`g?K#*wy?O
zU)o(f5M$8i!zSCvIwY|K8-ax7geaHV5kGm=x>3_<nQ56sBW~DAge+UvJ1e!DUyy{m
zn;kK3U*Gp|dq8}_-+cpdwk!(MW-Ahh7>T`~+~+rQy0$FuH+|C?Hx;lf+K1gXO&6y-
zv^2NjL!JPxKZj;2ppgvU$=QMuEUwxgK2}RFt<N@XA&FGVcVwTm!({nf{(surGh`Wz
zqimUD^X&4Rt@*wkH!{!Yih^whU<evDKsZ(|4AtEM$yyoz@<iVQS_stxV`<B6fX$1)
z4lU1CV4kK+gIn^6zqAF~mF`Rht~}Of5(GjAD+j&+v|Y?I%;P*SoE)kL%v_8!xKHGd
z<W9X`P4i5<<!DVG$~{e6njdMmT^d=W?4ElMuClI3etl3jb{}7NiW~j#gl)0TKBwQ@
z#|8To@QRgZ=8Y9lxvBE-RWqiD#~+^9dliqJmBw^SIhrK^n?8I+MrN*CuyYaKyEm|q
zF+aeuy%_$@rOxd=R%wMWmdp>~f}<g%=A>UqM6%jXoYGO-eOZg17NV}#rZ-hNIWRaB
z@+Z2G<wE9zniP_PwvB4HUJxJ8^4NYxk_TC^b1q-~6kDLjVABW`QgdKHOo?65o6DtW
z#pou$6SSbR;|bdg?F#<*P1V4PQzF!XfePX-hBic-b<#Uq%s1OgfVqO{o*QCsa+|FY
zzB6hE`WvzJ+Vgivzw}`SUfduqQ&`8S$5lwA16E%pjdt|=AAclryjgr{)r&v(m&ymA
zz0o8&z1=ED5h#tUJ&DXw6=*c2lDAQciTG%YSgax-oGS2qv+T)Gbu4VtLN|WUKw8(D
zM?QmvPD8TZ=uaHV7()!oDbjD>UgG3*N2)=Jg<Wm!y&<Cq(+bFgG&h*(gYGzR>?;GS
z*v4SaFoH&}Cy|w(Q7S6ZEtFw^&y&`)tLE5#?CqWp%42Nm)XuqLw)_CX1l5(XOiM|h
z`Jv;M`k^<aPJa&~wT(+vb*`7w)8+1h;lKnk2M*SK>3D#5RWqf&^$@G@`U(1jfBxYu
zUo#0_7bD~89M9L;&#xN>MO)}4WDJsqUzj?ywB_2xtX7zjLC{#(4tnp6a3?-8HtCpO
zDV1OOCF%#pN9@f}$|+iMLVKdvMJ{GYbFsmge>1s^RPZlc?_MP3sMEfh(6!lb(@D*l
z33QF{(fBacvivw5AcVXjfm*fflAQxIGi`+zlGo?owFdDS6G$QRz4LA($27idGf$YA
zv!A}G&v!$suZ~}FBu_>*c+$1Q583TG=C%o^e}pq){5Idc44<9Y98|d5$d*zY-^BPZ
z?#>nve9%~zX-dJ4hzVqwAc`pe5`Ap-1t**T*nV=i9T?SwFeXnzRN+HDDG>JAFm6(5
z{=((0SFzM@Jvi>^5`hthM(o70v@KUrA)Qtyx}M4H;yKLV+Byyrk1{1-2+cUUQK{D=
zXgQ{Ix&ni#7+&MD<$MI;sbR6<tL!z$e>+fUFzO-JJXqm4p^RRpMpKDpI~xEi6Denc
zL|;>CRYkL9`%cl?oqro|ne!6|u4L*`UIo2rXMqI#hLVO~>4g*bH|WD}MO<+ksw7A$
zPtf8Hg-<1(u0rN!t<oo64pCRx%Xz_V{<u=#yxVjs=bojZ^Cd;kD(bGKICFiATIK2<
z9zD%Pr`Grs=3RG4?3Z;XsR@RYA*FI7_S*qLNLn77Uyay*!e_06f94-X+tX-PAvYgW
z;!Z&W-f0ud0lXkk5`+YLv;tau4iq`WPz3be@7~s+&iWjW&EpL~7wFOfx?mB|1-eau
zF8BimW7+qerwzpIN`Rgc+nW;QB7&;9Wa7?s9QxO&Ok@_#M856uYfN=bVaqxUPGnXc
zeV`Z|uqO_y?*=NXK6`UI>05qJ6IXp6o+idWUkP$aVL{=3{V0-pCuGWywP0H4q?RTB
z#a0Zsxv#nZ+}uzjG+2bH1eObxj5GYA$R%2;S+Qi~!gLxyi^`dkD^qCbUPlMKK)@G`
zCgn@f!uN1Ce}~WVJOcha>`SA0u>!bxv4E<Wg};(mP?KN;=d1vCwivgs*!LM_qWyeF
zR96Aw$q;1}0oces80cNOuJuJ=SzTCP7Y0JP$Sbm19gV$*7W|qg)Lppmhmn2Q*|c+4
z$r;uw|IC9p4WTK&5Ws3zIcRUNw*8BPqci+235aSUg#%-Bhqq2g*44=5>bZnS{yc*v
zDHH27<4@8^=EdbX{5K`WjU8)8I#RI)D@{1IE3J}*#r70HtKIDXX^9z;63e(xos#e`
zRKbJ;$@<RLc*@b+NYA7De720W=oIr03zwsev1yfR<u2I$im@hJgL=ha1Dl~z*)-9H
zz?u%n>Y1%s$;9}6H}hzyvFX8z+4(=lagakcMna5+tlAfbWgfo(kC9aqp!ZT-|6bEZ
zyDG>Z5nV{aG&)MHL(oH6^d&N)_EExq=Y}phQYz!|mb#C4Vh|`MrgzT6A8Un<3tX7x
z?Vc6fsdI>4a9UR^g8t{?QcK<GQ;-@89%=zyV>D6~dp(xLl1Pc6s+Po0Ta<G7ye!MI
z%`-)vfMN$z!QMHZP;&A}v6zDRxCGauCQ%4swbC*$2v`o@q+N>fSHgtM&`z(XN4{($
z>=DE(JkxS<-!zJp7^EuNA5pC1bLASzv^KM{1xwIL%J5AC5s))6r=4bU;}=d^uN_(Q
z3HK^^UUK7??9e=w3NBjX)Jx5oaX?zDJxFFLZ^Z)16E(|{BvuVd6#)oW`2OhT<~zM6
zUQh5-CN^oy7}6iev`vAMn}?<1;c6xHVi^agDwR^r&Bqk8=U`*yp&7gr`fPVn1RY{E
zpo#er#Mt@CMA8AKFHG^}2?X$-lEQ1%X<9|A2dJd!ZkHOiRKrP@PK2Z)-?%YIC+Cc4
z)X4d@oD)MI{`ps%t$l>st{}|Bths(VP^^BhbxGxq=SG0Kc8fvNK^0O}+r{3az$P1R
zwz!(kd~fp4*xw(^+d8ve?CPfcC{*CmR+)A0E-x4nSALmtOnKftog5C>OS3rNs$334
zZalfta8vW0O0)>Xy*GN$i(h*#eFP&63@%~e?`nqa_NhvWZ8u2dkS@4?!^S^aBc&Ru
zULh`@nQqM4sL8P{#Gr|=l;eG7l42TqVG9!Iiuu1xBMu9GU-JyqfCL53~{UO3f!
zug2)a!$c>uVG5Anzkyp}*(7RKgf`2bnh>HcOLAa1F26?P2x`bjg!@u!rdNw~a0{K+
z-2Aie;21JX_bIfj?u;)_*&{u^Kw<|yAGq7ci&J8=ZcIeVnz<*G=V3)_Qo%Ol*<}oB
zsU|}=H}wOXn)TDK8Qbt}%4jR%@`lx&!k%_sH)2{GL`^P3qPYu$^K$>$@a}db8;_8K
zVNb1?ik;y=fyGvLh%{PC_Q|!E*)JEOfz+>0=M%CX=9j|1QKT&Y&bEe)>;IwZS=(-t
z{jY5e5)4{g9oxVg`O3ur$U2CJ^j-@q{M?>2{BK)BO7owhUtWKGYqCkRokVElTow7F
zs0aYw+t2V}9CwH9e<q(Oz0at3sCQln9Ni0}4aI(m;S2;{Pkxv<x!ih}&Y5{J8)V2~
zS!BGOnR1_nm1`|=WMA{C;T*jCn+iOL78aQjr9W1$Zsc^@$+?cPqx#1c2Xai>!K-;}
zyHfzR7V&lKa{yD>qOSdhhPsjWLyvVjGx11c{;UL5aCj-&G0!Eq96OAE0bo$??F4I{
zMtXN=Ll4>oB@fP}9$g#ddvOLW&t<5<gpQ*obHQ!h#Z8lL{a23)*7y6TfR}=3`O9NZ
zGa*ji#})sf7AQ?BOgl$I0!b8-GwCw}IMln$c6pd`xv0|mWHKkn=JTw=ma)f^*V?n1
zyh`LgNcensc&;ryu(tkX!Cran5a!rdo<!OlCO$7E=ZiPl-=W_z()%K?dHoWmpq=xa
zzOsYC7Q?8X*GR9QjVdNjhB0#&BVcXnTE1eLSH1`&KQGtB??U9cf{ViwLO&zK_S+Sf
z{<j?Mr<U&fD@U<~EF1sIQH8&9)OXvT_pcnyV!e}qI^p#(PILmhi?oVICb9+xg_qry
zxXRp!OiIj5CvsI;W$iJ}((S_3zcK^W8<1m;@)Bf9<JhO(D6l$oK7Em$0-mHAo@qcF
zTAzkrfcM7gLsb)6dAyND_+)<4d#yq|<kXJyX`D9t>rqb`QmihxjUID9%J4DlsAef&
zosE;#>y;clQ12s00ARt7P+%?~mgHet2TF!z;ju$2IZYyAnpOzg44etDIe+zNTZan5
zivBG64dyW);mlwV)B3k~Y&32$kC#Xe%<Hp;W0xIgjYo`eaTNtepmvxQ<MCcd{ok8&
z^5BeZT)Kh-a4OX-2}LC}h6PGxTeL7K;<{{R9&Ha-cZrnSns|qS$<16nmX#r@_}-!e
z^%KE|?*$&KzCNB5jkbhx(}C9837y1k#2%8jY?5xChgr5?F0ZHWtCH`u5E4=XI@Y@V
z(-j{;CR~eq7x<%)2|Wp&S2W_F0_gcB<9YBDOE(u+MELpcRM96KNn3EYddjZ>b9U4|
zEL5PJQ+H+9KAXza^2I#Kw<=Otv)w$r4HXC1BsOpG3k#u^G_TdmD-dzZWNe@M>*@*<
zRF$XhR+6XBOknoi=+{v9(*FHr4u;cyQ;=Qs@X8Rc!u`7vt;{GOp;$&}iM3dk1*$pJ
zHZ5%0$Avh!Xgb+GtnA=??V$l<b*?^45>QW^WK!Xj-(L;)3y#~cw*7U*<Di6MLKsxy
z%ZAG+W&z_WN;KOya>ZyL$Ke`%KnBOCZ(;XQ2A3|E|2@{CPY;%5_tkJl@}m#n1V4Za
z1PrYy68ygXx8~`epiu;nUSWxL2+~P2@Qm{2j2_I9ePMA-mBOv>F+xY;FxgB+x=OEx
z)M-ada<NBH1DXRLw8k8I&%;|FQbX84Q)EP+-P)BlwMC^@_npMNS5!V-I_m-rX{?Fu
zM9|lQQKlN;rf3&*)Xyc#u28iMN>EU|QwySzz+xBXb$FJpjVp7N_Kjur^0(J)a4Iou
zTn}z};2I+3zK4kC$iE4q6XDzZED2K~vr5)RZMNg0UY0wQy+oD0Wap1$9y$bH`4~7y
z@UvJh{Pqlq<c{~8znB>yyj!rsUwRO3^$>5rh+b?Jo`9Rs^0w2JAr3Vi*h5}7jSvk<
zDyUm6de$6^%MI61x(gnuEQ2oR>>3vCU1R{TW-$i1IM`yxT08stM|Ric<;jLMVYb`s
z5q^)jcB2q}U=7H34zQ@EY&?({MGi!WGYy80+ls?);@L;LmotC*g3UGHx0M`@kIUuH
z_Rb;V?`MH6%WKTjC)fEASvEWmT}%Ydm?}Wm0QZeOmhBt7IosnXo^~I=xv--WjWJ{i
zr}BafbYo;37skSkjF9p6`BB`K^9G8;C8~%EUz8aN5)htookdy8Nv@@4wciWTTbV7w
ztUWO}HYickVu{aBau_YaA*ju_O>C{;<bem$VLP~ul~x~fgW$<GEbH;iT4q!PC!3RE
zxYUyhsptwxwsVwp-b%VPy*6inxDhXkZT-Bqh`kyMUz*{&L5@4EVw$oYCI?PUDSr%m
zt`4fP>A_p>-von7Ls7{M==Y|({?I@1xZF#z%u~e^R|`@hi&(_dglE{<W(`>!QH~>&
z^d*WA_MhD&?j0k2Y!^N25s+82tZYzZ)Z{x()@sNT`C)&~R1vfX=nq6ZD>WQEtMkMU
zLvYY!ql?!RSa&2ig&geJ!J*vibD>6l)VqTH*#WW7e2bF<^MJ6k*iv^g!}y<S#F&C0
znBWRNNCc`pTC<JP1=NwJ?7s&R_JBl=SO9M!TvG$R^E~nvO%mHLg1}$!;=}+^VWxa)
z$wMGk@d6ki-FfvZHOnCvRwx@>|6W@buLkPyW-P3JLZhkUk3sCpb>0R1)iD^O4XAJ4
zJwt~!epd5$|0~2G#(NgY{Ti3wPDHdjSZiRw%uzbMa&Jg#+0Ma1ovDF;s)A{@p0E}>
z_2Ag1-|9?@I|06jho>+QG{YH3yfW3aPaU0ZVIFpLwzOWG*?z<23-o&i6J`#KF?YG!
z&|mmc{0ElYc#p&OX~JX;fbCUc6(|1yCZu=-R%&B^aNdl!01q@yr45Q4LCT(sGcj3%
zn3x*FR#eQBjDFsy5o3-Xf^FE&Denwtc8z_j#9C>5zxw@n-JYGjOnCdBmasCDOT!|~
z-Izt1eVb?-lts0_E`jHpG<{qRc|?l$(^y^8=3pPL({WbMQ%cOgS$^&o7Znv3VbPM4
z(~*nzP&<eB5|kTNKy3&wptv*5(FP1#qNB79u}8Rk!z#tHe58+1uC%4(1zvD{7Rp+^
zB%o1QSv_Ev{7|+@Qp%gNB1ItBTHM45FgO8^HgHh%>v)e0Do(p>_sUlECG*IfZ|(qo
zrPxM{ZV|-CN8B5XHewLd=qswmz48p#dSwvHaPhCx3ZYjp%KW{F9tCh8bR+GzJWSsB
zL7CpeAoAX$8$J^-T07?w9@VsE5`GZJ;>$nR@JjNVMos#%qhwWzDOE3kF~Q@g`!_Dq
z7*p{ni8`VpZW}-^GrL!+8SGco$?h(}3{i(>^t|SA!79mYd;71T$GYerys$~ee+H4;
zQu*~AWr*h<;PBq?e?N3!<$3<Xy1sVdxJ4Cu@OcjrxNEry`<}ct$j~C)&8uev$Xq<p
zhQp~@^%|+4#pJ*6=r+_k9UGVcQ}jz|L$a7wJXEOKO`LFCzYmLD{Ci#g5w|GV8>pk_
z+*_<13*1xpOr#2n4kYuOCgE%;Np0IynMTWjB$7!20i2;|p9!NUPUKds5z^RAH>zAj
z?-GQK)XyM3_kKO11p(ZDBRKzUxd<D_|9l6#|ARmW1z>ONUwp9~5CrRiz3;R*L>YtQ
z$#HAXo5;-Y!CtP)Y1miK(@)YZ9GB$$k9!YmM`J5@)m6P4-QAXNF73>`_x=%_;8O0T
ze+8%9U%|OK&~f>A{-9P!_-NekENobGLU7qWS)k|IRNsw@a%EH~Im%E5fu`pyWeGt&
zzvXiF*1)j2s`i(FqWuQ}?RNf$fIj_4_JL=JWgzatS>~mS+ar#)=O(=5jDvF`3KqVB
z<<K9*g9T!+?M+ael#%!Mw6k8?er3T?8%o;379;LI<sxYC%Ek2tINJsnJ_Q+)VM>g}
zc*eMB=9>02c@WR^2V92+X#|n1Jm4cqQfP@mKtlRzBR@oE=sx#Xbl`e;8nQ}gmb&G^
zB})P3%6|bUS*@dr$*D2ztDMt^k?C6V<iD~Ht^|zW7@7Gd1eR=RUa!;-sA33zOYS_4
zz{yQuy(AjT<%LWb@k2vrq*6UQiR|2%SWccCWD;8bPHtb}{t~A?_lx~o;Cy8Y+fxcF
zjZEJ5i61m#lV1(k(GCu<qn&@Ua(o~-JR$~c@Io2KSyd8IUh`VTyBGW`U`xA1>A5jd
z;-c;HL>GGGiEq`SV*a)0aB!>-@xC>8R7#QrrhzLcCYxQ;oQ3Xo0^2n(+5?jRQk-ta
z0L5u>=$-N6LJvGz_It(&ZYXp5zXH%BhAsdAt;X2Lx@bTo8nZZ8(s-pV(*pp|ccXJ!
z%;KB<C5zsF0B9w5ig8#X6FV<bBQ#YqZSh!>2z7Yu*osb*ND$W|j_=0*2B1y<5}ar^
z@G`lG=DU4qli4G1k=X@0x;K{)TmJ%}E?0HFBH+ZZGl~8ec-SjI?i=tgDnQAZd*G}S
z0(<P_k~puDDk_K}lT31oB`e-lje+oq>YFrny`SC=kMq+Q_<H~1paO=%Jvi)^6wa6&
zn<ig3-stXM0dw+6y2?1?S5r0gx+c)=<H<AiW=GdMAN<qmTSn4XuWm_S5-*u|=;s@+
zX4ltUy@XvM3z47Te8zs#c4%Gk3xacxdJHghk^@Nrvr;QEQ0g2O{=_9%i-~IRO-3pJ
zQOH5AJz^0J`VZI)1If-2`v2%721(!7>Bxc~sh3U<NW#gdJSY>#1u)eq+*NM4&K1>%
z$O&ouCCdRJyCz-o#aCK7jwd`^OXnBF56L3NqU-*j`THqKqU5*U<RSP&4XMLO<g`$$
zzxM!`YZx#Xdhh@+*DwIoY;Xqzp4Pce;ZE`{Puz$RvKz8ae;Ag%wB<^vaX$f0xC3N;
zH0j&47i8;B_PGWQu#>>aUBtgnW@OD@sMV?_>Yq`qc)~Qt1XjiNEh*K)qgngd87BGE
z4GRm=Zr#WE0_<VJ(DsQ<{t$zENO*LHq1Ey^luQa^U{$Hr<5@0Av6KC3OrZE6GtK>{
z>FfpoHkVJUOj9J`7e|v*4Y^jvY{;r?!nDAn3IJM4#iINko&W<x&&pMOno2WWj`j%T
zLXS&K;mPR_mHM?dyIJ|18dRz>;GftcL5-b}c;o*<b96S8S;jL~_ayv+dAo5b@#jd4
z5aQug{FvJ7@<}}ehL=i=D^SDP!|E`ZPtw3V>PS)QefnhxID5SNJxac^FO*|`dGBUY
z{FAOixbCT`5W_zWT)y0F-I0$hfb(9Dw6)2~C{uoo+Mb`q*umWX&gT-gLKPMc;@xq2
ztz*mTG8pt9TqHmu_XCi!jugQ7UTp#(r&boT@~)7S3JSbS?=Zg|OR^)o_g1;E$9%`X
zjc+Rvyd3F#IZn^5m!Fch97A(HjF;=l?fqdTAi%uaZyr-dY@F+JWg7!8&r{ju>!aVK
zeD9{DhT#Qjzp!rhnpCyINHS+xMlM{{0d!908GyJt+04qb*8w_*z-nD5+f>Qi|8^pr
zc)^cC$glW5Vh;KyAnb<qJM9L}Ti~s@Z8Wc<E+iS}^)n!hsBMBhB<*3mX9Io%`68B+
zgL3TS(7N(=HgR<#9w2=8!j@lR`1$drigv3PHYpBFs$<lM%ak!`27s?GG649R3&4<L
z|G?Ld>AY4Ds_`iNf=zvLo={#G*%+b3xCDBeMp2R=UTUd;5s>guSXz^^_((J547G93
z5&l209dApJTj9o_3P(`&QhnD;O%au(Vl0|~b|t#dXwqqF-zP%<0~{}|AAniGjFU4o
zFYo4PJHF$WH@S5UhIOqA*Oht|yuV!}W0WRiAl+tQc`FyS+`K}sPK^@dN>Uh>gzCHD
zg0TA_`#jJ=+n+y_?gTij9~S|5_10fx4+t%-t-r|r;U8Y@?E;jN2X86PY_;#!+P=+L
zM|b&3+KLMxl#?d0NL3R6lqkIcK#84yQR3QPl*r~~#=vQY0mgGnw!L?rMiCQ*RE4Jr
zAdc~X)9@^2WiwV#sY-DyA1epF4jXw=NaX68=Rd(_q+&xj8}_NdD~iOr<mRl|zZIjH
zKk5#{P`G4LVi9YVdNR$O8_N$?WL;ux7I^?1drtFd7@R9jbw``uvhrSg_PJ3Ozk=ov
zRc_PnA`#T@b2<`NVKbhoLkSS_4PVc06<<GFPEvs9p2o#dr&aLrLuWbgJtEwcN!Qvr
z9b&8|N(=T4zGM?mziL|oQ3m>MIkXf$+k5JHqQuVBzjt3VP7iOr9`_oX-*RK;FJ91s
zXFPmIO{dA03(`#}G*<_3wV0+bQ>C^kiNox5K52!pX%u}>=ig6S4HxnGEpKuObwOqa
zF9V>(Zb_%^;n$8lR&fas{a!qsPB(?>aw}rwS!_*gxXXORo}v4uTkw3}2<>|XmNnU&
ztD~Q0X3hA}-SiL#H<#HC2&w$UJ-!DmADrywC+9fIlsA+2Ld{zK{26Z=?Lebe%m$#W
z*t7q-6N!ZYsV5S&QPQ<q&=upLJ;xPBrj|eq7D*B3CtcoKL6Ob0eghhmoJf)2SW}#{
zgE2zWez;N3FVR!XA}XOulR74E7uFktm>ab>(Bh+&g_OZhs|&^YrRhMed?7yg;5nKL
z%<-)R!{SqqgePfb_ge^;RUE37Jr>D*J_Bk_vzk|IlJ8dC+BZ2gS?j&{#Q?cw;xB<0
ztFfA&w=GG2MepKcepIJ%T?an_j_%gXzHgm!uv!MkOQ<qYMpTW?P&Icp3scV#z&A7*
zi{$C)#VW&7Im3WL1n(|;m*4m_^GY1L`4r)gxC<c?Ui1pmbro0gXD%$d9_-%$0d~Yc
z<MHmMH{6OvqPCxh#f9YuToprvINxlu7mu7#-T;uET*Wd?zsr}UbIwf%8%glr-Vu+s
z9sdo^WM==*yS@Kki#C}4PtgV&Gvog}`SqWojV{1GU3FdO2%|4xQ9v(c^TN1bO^7z|
zfPK24qKcG=zS+E9y|or36<H|vArpb>3P3aw-aUuzPhA_B%!&n&K^RWf$|#|!QPpRw
zOV#y@jj{DnRFKrBYt`R{aI)fRM2I#euK24kvH~-M>qyCD0TW4vOE*bNZD0+cPC+on
zfmuTkU5nD-k!7jW%|3(bws4JO#r4%e=uC6Y_lndfgj$=Clm`bgM5qYrI#NU*vS7|l
zxRxhi+=T&aWjcd2GeL!74plO#z?9So4r}}l-;s#eQDf{RiwwC?D$z(B{H0*MNUthI
z%igId6yD3Fpac|1)(?fqrX&@40$bUqT9Opf*P}oK6}2G4r7;1VjxDbA&7=t$6gYr3
zyHS8kbz>vV5&7E65E?{Zd8~^3CtLs^eMhlw6y{dNuvR)ms{Xi9id@@St5B)U83!ae
zg*Hb9sva3tnyVnpT_rOrRDvxq4dj<8sEEZvAQaBH)>s-s1&{~^V<&Z7dO2}%QJPl+
zjG{=L=(VbGlSv4J+(M*Dq@o_YZ&FpSP@`2@Z7)<6vlQ(L-3VLvU{N+nFdhm>5tTf`
zAQbE_-l+y^JQkyXpl6z5p7G$I^@+;HINoY?KV+#n)pg&h290-~HLj`tHjYMN<GFVI
zNncIW2&bWeG&up;P$HPG<q=AS&SFlijx|_Zyz6sQ7{Y2F)I==X!diBg^T3Oq-4QE~
zE^W%0EvMJhx+l-)AJdz5+#Fi<<-0}EUKU*Xu&Xf4f%%_&v(Ihkp}B|ZR_!xbUOaaB
zb|bA6m!u#Q`LXE^(?;L9^8ZAI9*y6T!%rV)Vm9#G>P{|kjEjDx5b&N}b0Nhpi5D43
zlRZUhZTytrri~Bw&-2cfIMkKqLWp0o+AO9ZB`LoY4<0Z`-(SE4>q|#F`B^pZ#l%G$
zkFwK#8J^T#h8G4!QZFqlDxeXoM2ZgF$6QaY^xAo)>~xG1U`HFJO+ojVpHf&<B1wo8
zOc%ejZtI6eQjECr%pqbZAQDrbm;WQ+sC_TMpNg<p%&^R7!<2#<<J~@KLWF+hu=xx-
zqeb^F=i9HMFXLOgY7d^3m*l0?*FL+8s)+TMhw>;EY=Yb@IJ1GDt5<hcM7(-@=|8%<
zJB%iMM%PV1WNDUUx5jTyCU;*iZAmf2HdP5Se%{`YUJt(H(5w4RsS&>TieUyeQ_sk=
zzQ2#xm}hxK8%@W$l+NvLW^$yuwWfxaj&qDJuCA{xW;wI$A6R$g)L@Wh4qR>3<mOhH
zopIf`7vRdJ{(R6ND>WZ4*mjLf#}Mb7ToO6B2)IzR;PIdTb71M=!Y=N;bFxYee{}a`
z)&9;p#9&r)h+jsxbT{~lA$td99nkfoTl>ePoj4m~&Z~p34~tj5x4aR}%+8}xM^3`z
zLp|uOWpnH1QloNOl|L*tIy;t;lwIElSmF(9S7Y5Nd%LV{!}@M>yA7^(p!V>$m?LE7
z=*GGjb-Wr*U5CzHcb6bHkJbat#xR1FOVlxq%Dv_4>8~#t0a|^tjf<7YLaZlmim6mJ
zjIfOYG;Ni&_=s}E`l}`Tok_<8;zYP~;}J4p3T)=lILS~M=6$~LIGVTdIPPujk1c2O
z(c1`_mpmE1Sz5OwYMfJklSo@6L^|b)@sLx;{C1r(dd!UR*u&exws&)S{(l{E={j{D
zy4wVq)qH19*^Jb7!QCu*BUSFMwdNzrCP|b_cOENJrpRVKNs=s;`ED{tjVM;SG9CQ5
zNbt7xmb|t%<0;JVMQ6xg&(*nAOGp;|!Rxg>z4g@a^pUk;+A?V4a`s(FM?8El;=jC3
zcs=QEL)1YKCG3uiI4HW9PNF64WIT=La1*!E?uQ0uaofK-VtlT;%sfebK7C1jKHlIb
zb5GzOy7juQ)UVza-#(AOleEWN6Xvh>)LB=ag#&*4uIt=+>^^&wutz>D9g~X3z@^(R
zf5l-uhYK5%xYc^)xo1c9^}Fof;0sQ+|5O$D9~(U~v;IF+{b@<ZZm^?tPuC!UV9n^y
z!)+syjvB-p18-YTPoA-5<zSlu#V+U6HE7ZT3co6-l$2T)Hc8Oe&twUuP7|IZRNo^9
zG3|8OI$zeYx7%P^VOqOE;q4j~C(IWmjM9+0+d3?(rwOjmUyT996Z6odvQAGGtE#-|
zG$uat&{szJ@rTguz>}EK2^j&#YaOOuUXRsW>s~GCKw!x*TdZG}wIdxvEEb@M*Bju&
z$O;~%y*1YvfDN5r8$C9%c!v`H7iDi571y_I`{EWn5Zob1aEIU!+}+*X-95OwLvVN3
z0KpxCySqEQ#ed)P-ns4UciX-NAL>K3)oPA{IaiI|$FFO%Wx{aITNn5(p%fb9dYcv@
z5<TpFnUp2DzIF13R!6CXV_pFCvaGw^K=MdTG)X#yPlZo-_iH%rOpmf~g$V)6aL^ya
z69=F?yMTX7aV9{#PsvwG4A9Hqk-=<&GUNfhjBao%?|DCUcUJN8bh$v3C+L1ihpz!P
zu{$~c+l+|&oQV0c%*=FS<u4-30EaJc(4_{xX_u9dwJz~T*DgjjQRky#8%-bG3xH~E
zDV+nJ{tVc+D0qZ=y12ojkrd3Nk+Qv~32ATod+vCL^xi#4AM4C`)u~@%wN!^GN{+LO
zg=jyWtB(ZrlYVJNc(9kJ!~|w1<cJD{yVPucL4Y^RDet~(Tzt$<l#7u7vlDMc(Ux_>
z|C*hcR;HJox{18;Nw<h)Zb^gHpGdH}mKZo3^MHHvW)5YsB<*X)NC<EfjP0cfHtv+6
z=^DeXj$jV1^kQ7=u+FvI=!5^5ov@Cuakxrud7ZA~N!>Q(zgy(F2NmLo%tlLkgL|s9
zHI#Xq@u4o0_p(1!+=N(5C&letwy+1<7tS7p6rU%lIO?go$by3(4kA|NB;ZpT1}<I$
zNrE<n9r~CfR)5)dLxtyt%H%u(jwYmmlW`$q*%Qp&MTa%b-Wd^wBf^7=RSi$xJpWkm
zNu}<u7ElZz+-Ff<RK6o)hfvtYIkuVcq)J}s80dlw0lI`C3ia9xZBjiqLuJPCMFu_{
z?9Y4TWw{ox%1e7&tu;v;5h@5Xte?}uNA}d7#~uglVkYSDrMz{;7r2($&zG{t+18o}
z_*(C>&Z-XH>;ciIb2ZBbOavsxMzoW_^vswjw|TPmb)*L)K?rw$d7Ur#?(|@H_JUU%
z9$ON-5S}?D&@Ix|=miv>VSr*!hGAByf=glPOudo5omrWpne*MW9M01b0Xos3pFZ}x
z9+&|L8PVqhOfsZUHh=a4e)UD~g;<!bsBO_!H`IW>p*xn#JLB&CWYYY%xlq5~dULf7
z&8Du6u)kGeY@qI4NKxc;>)B0~-uYlZcWqH>Q$X;q#d>l~zB}ajFigIu?=15#deqDN
zdf^mFK1Y~iuX={igtARZ_DDU?NmEF56w+*dD~-<MJUDxEN=oaDx+e$qZrJCeT&~(b
zJ_<Y^#)WtJbNr>*)_45*;M#9JKNGj2ZKKS0+zHgcg1pUFVRS6BhjVQtx4ravSZ=?i
zdELp?mgue`V-wA{H7e^Vt+)zmc0mhdR$ixJf->^mJ7ZI`4>N}`_ZD*w)(YrXqz@(~
zeyzuTH;k&-07N@4Ww`E1><)yP#mQgwIO1kDqH6|wm#6D3o4@m+Q&dP1jws^!Ih%8z
zX@<T{x951cbo#~kS0>l?2c-0#L75%L?LfyZmtzzUqTOBaVK;!qqs^XEEwB_8`5waW
ztXflu8ZFx##*)FdsH;Zx@f4Tlm}|5~r&&x>v?fo*oKH|zd1>iX2<lPz8;<|!V_tr}
zv*bgZl-^sWeSpiU%WW|@l{Cd?V}m>tUy~qetb8_?C5a{nQ#CKlfRY?ZI6{?}aKMg-
z!!vd7i=87x7t?1EQSKuMwZZN+-=X}2M7kdTo3{JF4nw!(6O0`_PDoZCW)gD@TE#m&
ziCRg`m~fBoqp70%l7G2)6nMnaPBX4K57=N5`qx3K_nl_d9{yd`=bBwG+EOsIz!VjR
z`ZrKRg}m3)6b!Ox6xJ{#DFr%_&s-TkT!FmweBiX|L7EV-nrM8t{O*cT_9X)XggP<s
z58YWXgsmx$d{<tIdUBz)7Soh*j66S4e8o;TFJY>)+-Q!8a|Vl}e1Yx@3VmZaGnn~H
zyjq)o008MAA%xkpd;Tp3RsH-4|1J;`Nqy}CQN(-ga}OTbm-V;WuGt>567UCaUQt2b
zc%e6S5iaOnu&CT0$b@|q5GRiCdQ(Gu?86@5jucwt(uSuDr9h&PdDgwbh`5{1cJ}BT
zIxe7nPI!!@I-6(TpRu1;SlRsaq=ObL@fr}!p3pbJ9pvpodDBSnrkcHDKatt220UqU
zgPZvIX?;Mmj)SYQJfV-}ZKz$Nho42YjWk1-<;*DWnVTu^cnpJ1c%&I~%+8JdeqY7$
zy>)pT)LfnyVFm5C0*2T#z5GL3R>sOZl_{di;`<%u+=><;Q?1q*{aUHGl-XN`Lj+{1
z6%z*3OaztX+pexb($3tdn`dzDs!b23VdN&-Fq4Eh;{<*B`~>EFwAZCcIedwpivm9M
zXpac<f4Ld|*2?2AJdiusKmz~w04Ae>J0!KB;-t?#;;0eO#MbYCDlB&n@V0a)+sZ=D
z$JW8uiO>edjAE;~><|~<K`$}4=AP#1h^@u!@vMJaI_yuKDlx(JHxQV0QsWk)vA`f1
zi*vSACNi<`j5lrccL>YfLu7*)PlbHG+vD1s=tIys+JuTmF;?bWo(!mPsm}#*_<A>3
zs7zp~I_N+ds^X-@aB!u7u*OuhyYE`uL#f<km<eKDw;+(A_J)W+HLAJGBbHF=EKW4}
z@(2mtGSYUq{Ym&&2bMZ9l-RO2^GE=-fB4|Qpcc-;?X;+@;QnV*1njdWk0%Vr=+DCY
zof~y<&}B~4FXg)O>7^hiMCO9F4AYj#QDao_ec0;-4{ALe{jxJAD5OHUbE9RncrTax
zh`-lbz*`<x{0PcXGezHBYkLP+h!|AK;xY?$$V%B=asc}ml{ce+)dSE_iOh<X3hGhV
zOcr`@9|V-3EaneE>umxF8!&*Cj$i>LIs{5o>kX9X6e!W(){hbyyQs66@P|dab#KIp
z67+2>$hdWb&~={8Itdi>ICLPJzWu(PES$cc-}N3MFDS!Ph;qDgruHOKeMWIHu$r@`
zo{?9v9{3vi)bTe+6uMDA#2EejchYm=I#-1=ARxZ5mK$h>(ivbRnQ5?Pjl;~HXu&Yc
ztmR7mTA{g@*wbh*Fprh)c*C&zOX=}H@9UTu{(Ix#{|)eF`_I9<>JmWyDRL*!7$Eb7
zpQT=p&O6QJ(;#dj?ZgsBw%lU&J!O{mZKQZw|IUOaW=6KymX}t~=austWq*700w=1t
zLDwc#m-qa4k5aEv?=83}1e3CZ1+&7uJtcUXPhX|W#8+{hli^=RUIvLq_cn{U2ag#^
zpUQ0nM8!#l9N@uQ=~9)01xKVVEPdLZyPe}}ETNEvIH5+o4T`V)AS*S|P_mv6Ai<6{
zSbn1ii|H{`(*0#>^axJtsqXywRgEpq*D27f*_Zcb<~JsI8zgf=v3&=nfiC)HDZl+#
z!S+r4(GVXhhwNxTaM)t!V7`k8i35D%j=D#)6->_d)Nv>ZvO~``mv1Vl5L6|q=P>G+
z(z$Pf>$kZKn8N-_KBvWdFc<$Ms>seUe_RDjnM<(Ju~1S(>D<wH#VHf@W4gNON~>Q)
zR{8gcPqCH}x~T^$IYMmVZ?C!KXMt;(@ZRC0$iE|ovrinL@E$bxheUILn>p(r$cK9K
zUv4h?F{HK<Yg}z#qv8I3NKtgBCL=*U_Mb-3$!uPkgweDxk27Kr!&T8}w>GG!F9b;P
z1pO4Dw)gxM;BcA8*|nZLLPd~;mx+gL@Ue*QuSuVYW4TQA4EcR)(P=Q#_W!(k)euXB
zrDExC4QIW|+m?FJ5w6KVw*-e9w!p-hSL-HF_Uf4@J-1&y6N65O>L03yUv4@fwaUSN
z16XG;Asn4_{W^)aBNOHfgH4c1WnPQmq4rQk2~n+adx=>ul$o4rC`KM(&aCVKD#-<d
zHA?cV8UA+7;C|9VRkM?2jN*uRu#}h~HZ+uajqV{;&`}xw`@*Wgkgb*(lBI15V8l<N
z6Oz0%J>3)o2;h-&v{euu2r}7)<pBkKHBIb1x&HCslTzKrGy%B2c=z4BctksQuFy11
zIV)dCe-zrF<~7K&P-91l@Yh}2uRLw)-`~n0Hnkii7+@@na<I?!P(wu-vDSt!oYZ>Q
zJJUuUuVrS6tgiA~v_?C=2QPWVd18xdXJ_`^t$4XOJ6$>^e;s5LW0S>OB;%SdAQBZs
zE+tf)>L|MFRFT#LK{rzzoQe}oa-F^`*oN(oC(E++y0^c$30^8_nAi@eK1hwuEST3X
zvs0uG#rs2U+Jk?N`-A`a41MUV#7m;$N4}?voAa3?c}~9OJ^BG=Te-o7-*M%iA<1+C
zI;fEf>V+>~1bRwDp8(h*Jja0QSm&H9lQLvw<oBDC-#g5vvNu+j45o0Btv`I{4W#1f
z+5diGm_tWHjgLb<*!{6z{NoOjpY-?;=RgcXz)$p6Ga)g&1iDm&9uY1c+#wB9g&_^g
zg>jZ~dmyXANK?WSPJ$Ii_S5`k5|;)HS9?EI&!toX#X~GbBYCTqOry!F%|zZacq99g
zwO~$U602@=Q7vKNuFwpt71S;%Iy4h4`8t=ZbcPT@^%+8gi=jd7J$FOAeV*<!(B=6x
z+LK{l`pj)}I4N$9-XP{JRWWY=%kchn8<q_IKF{t`73LVM6XUGCKqR+Dx-88lrC2rp
z$D(Ei&GESfdJgICG#+%K<$nnGcUUcZKx0-Ya^RWAQbA``6o@N$UR_zE<lL&(9Sd6!
z91MmxHUO8i@L`<2T8ie@XuAV0cih15t6q&?)i!U-z6ILsCw4Ox<Wj@DRdeX>SlN4r
z-oWPdox|*KQ*1;o*4Z)7eznTc+x^G={cPcLy8X1hVxyRURgn)*=W$DSfZ56|l`zw_
zOt8A~+uyo|QJbGKddE<pfXn_Ul4%Q?rHWlLA*JgI#}DxCP=+4JiyE3AqMVQXAb#R|
zVANXsW#UqD&!F`b%l)^Ad1u%3&zPu}&W(R)KotV$IuXIc(#A|OINS9uU%p2nFCtH_
zGE!6kSQp<UM>J<$(SXSH6qwY4_vNsdB178srD5@|C$2{$D}lzAg1u<~NX1~<>mEpZ
z!0eZmht4y}Xnvt@>o;Ef*nRm)Rwt#HV<((g=BZi+6Z5rYWIRDAI1r}X^ILYJY26j@
zA_O7KuID&xd0|tMiSfBfC5(5LG)n4Q3P~>iTvxtdl;ZsYI|r{xnm`JjjN`3yUMTVZ
z@3oLQV6Dc@pG7RGsw34LmmG?uq5@IUyj=@?+%@3(uQ*kzjp$Gv?e*s~a&84-#B?N4
z-2HL=VnmsOC~j13eyB|<2u?lq-K{qz<uL!7<BjRxwJlhg|F57AI4vL0hni^n^WPWG
zeS8G!IWByYU>M6bE#0<;94NYrQPgb0?*LX_!d_*1SY19@GaLfO#X0GP2H?tP<j&VW
z&Q9(vd~UTfvNf`O1;bquB@2zq+Z0C`u&hgCYa%~ygz{?ordMi))2~$bG;3Kq?h*+=
zI&m!ea`5+8%6h{8fpjK)Ksv8>Bphnznihd{qs?-2ATZ-k@&IZjp3}t&yC8Q?wXm;t
zE>8aN@ok>lLPq;FNrmOozz}^KKD2R2N}TM&{%*oJ$1*kOn1gHU+Ap5!FkAHQ7ORx6
z56sa9UpUH{=$(%cBd$HPJnsK89eo6RgVqZ8rLmJ_EP$;3;u55m{8zxYBoOdz1F1V2
zA91e#52Um2A4q4jV(vX7W1e&_@6jhoE9mgYasI?hveeti`KRY{Aa>QvR>LGB*dxTj
zk)CO(l5g7D@bytCwZEo<9mVsICLqtq@cAdB$Yj*$BR2|cu#4P_6_D_)hbwit+3(`C
zCGe4L6o7WuWkSlEbeH~5Jmppts^EK+SV7IC3Xt%<{*mzQ0wjFH{wv{I+vX$To9`px
z+Z*cXpM-CQkA!cYPQ#@CO8B<ofczYJ9(rHL9R5daCIT2b8%>*lB@cH2eezzuWa$$9
zV9FyEBDllNt+9N@Q}Iq>C&aQx)4n!tcgyTi6>0nK`a29Jorgyvomb`<BYSsj#pDy$
z`5h%x&d~qI=yaQw@>bLDe=L}?>qX%T{~d9j&Oy0B`cJk|Msk?TFYyjl6=a-*n$!k}
z5DstJ^TfUPbmaQ0(?<)L{A+;G83iypjZS>JxZJM8g?2xcxH-6ef~qXGYzE0Ro7Y@p
zJ}1XGK9;$j`CxQTNP`MXy*amKRdJ0-(qg!TKg(=hc2(+tU3+?8b7|1wpMk%-lpfd=
zb$8v3nd3FX|2?_s+g)Dxy(~MBt`m5x4dFfXlAt$KzB2|K@K!1IDWS;mN+!et((~bM
zk5aBzPBm}1#Ud$*L25LW=K@vH6I}{l_sWX01w$6iy*m&EdNW}%&+w&-2uJ(26+XRI
zcXpCcNK=1)w}Va856jf8f)fajc1I52e^wQg5B;GTRPR;iAHjav2@dsEZi#X73ca+_
zUDrfpEb!-sJ6TEt#q|^94&IF4{W%VS@XYDueEKNpHePes5-4E@{CL6y-xDRsjVaj<
zRcF35=#k+#YKf#KPH5=-qt*;1OFgrO#B>j4mQ*-aAyaefcCPa%74=N8;b|T&mAYmr
z)$8wF&R~D4>d-6G$v5o0?4;^@&asAQQe1mt*r{myc6|yr2Tm3=^#}z@T$2Q?_Py6x
zcFxCBN4V3^jSiOCH61;7deo$b`0<N}v*MUX%Q%+%`6h)3P&++#lObZ;I=dH2(o$!!
z+9Vx(Pxyx>K<4*)MDn1twOp{0Na?OCCQplR?!d%^`d<UCSJ=bLVZU4`6)qcTaH=hR
z`}yPQ?<Y;U{pNpF-Ly=HfB<eI=2=_QLkVSUb@Q9}-X0V*fc@DEdmi%H+Vdg^Vxcj0
zh~y#cg7s+fL_=NjEXE<^rhOM;`>O6r3<Y(6Xs5>^;+hBiwA3WzuU~Ex<Bf7uGp^m^
z4%~8g*Ja-&6!ja=O8cN4T6VnjNcG&`JO{pnn2Jv)EFYx$#5VJRV%85*ec^g?`Rt9w
zP5kIB3>+0ONAEQh${@>tC6R*MbCXR^x~}J{O~@^XJv#0C_{yT$8}H$H6?)?Ue<mq!
zleu;>k4Lg*umYd4P4|)hP3#rSeCGFeTbG{m5%x7w4<{2u2XunDk#Y2TpKuK8il_aI
zH0|>))qO~W+$kvlqRcFyQ!9y|Js`)r`&d1TrZa_j{iKrxkY>H`LHu00C;O)LjQv`0
z%jpk%tlC3dO%f<DIdDeFfevyiVSLK36VkzWOM|GG!A$JmhDzxeZfH3@x^WNFFB21k
ziF=@R|71jDwkOQWm1%Eo0Uwx&MvyeMFXH&kKkQDMam=|jE&v9f{v4O+?hxr62aV4U
z^h>_a;6;;iY_>|>B%8AFv@Gc6WpxTX8Bk?3Ol<mf{^0z(HC%-7#es-oM$oCf`;N9z
z@*t&6mL;_}$ZTEYQ#K2~$FTx>I9BK^&o|Jl3Grq`HCT&ljLf<em6^|#KQ|LK6oKj=
z0o4~?1*%^RR3H8l2ClSciJI9Zt)&r_3o1snjdGWB_N-J;P*e$1n8kQu3I#vzQUt?Q
zz&+|_sjt-9$h?|yPBqm$TUD?uheb8@2=@E}>xyNXK>e+Zb)0mbT8TA_q14I~U?oHX
zRcyG#ekE<=^7kLEX{5oP{w4-_114(Z|ATHiwrF5G4YOhaZ+i?&fceNRY9;ur%qdlW
zEv|u=N0OhzQB=ETS!OHva(0ovntC8^UT2trdcKjUKS0^QgKt%wqo&;6;1A}UG!sxr
zv!}R3PAQ;}xdb1D?2{H`(R{3##+x5H${f`7?-vr0f2YSg>mu|Dx)~hDnxdI*$~*$o
z?rwo~i?(`Yd2wVIuIsSDOQ7HoFRxq{WE84k1Be8U!JVo>owE7qrkx_Pz<1=c`s}s=
zKVH%JSHi6Gk87QF#l}xO1x0rc98~M&<bJJc=I6vHU?6W5aQ}J3GEFzc9KB1n5&V!3
zVyr#B&0u63*I_VmF*oog?{Ph-1R&QDTb1yPG3UZQ1rE$Fuu^3-m!4*y-no?lBv85_
zs&*E^S@&arDrs{Xu5?hr()yFu8DbZpO5!(}BXpF`1Efvb57H+3`H*7s2D<ytaK)+=
z{eo$5W$=lh$GVKFtKMrRm=O%jjO-dj9q(t7De$iQhb4|Fb%W=Y9gpa0H}37dTSbQy
zOS6YbGVdIR=+o9dwbOTLVZO!sD)61^>y*jfVJ`f3HvYI%s%^#KVgyBi!AYW|Q{F49
z1jKdfMdbjI?785EMn7x0Ue&XX;*G@kA^moEPeo+c_?1O@314@Z3R76mUUrwFcuA4~
zWW7-pz#G*Bc%$kTGKB&L6_XOo)$aWXBzT@w)GYi1?{m4;x`9ir6d6S3W9|TCRO`N<
ziunY4Rmg+O-yVRBA~gn*E>#nR^yDK2;OXUE9Y21_eOrFC<MZguIRB&gvg%o*eU*IH
z5Jvf9>5vSp7H#~eo>e=NPI;%U)5TwoTXhnXDURE;eFeHzsmM%%1@6mLo{C5j{i-(n
z70Fk#7(5r;!2%xYxexrOA~}&_Hg+TgHV^aBu%f%MVr8=9xIzVB3Fhu{K>qyxrIPDg
zNP0HCO;4R7!?2Nd-Czl6;Z=M-yqrZWP$#KcI^|rc`Gc=%(<ljm)A^wc@C<q~rvEa1
z$2NHkc{#e6b!%ODncFP6KJL95tU?SR|GsHMTa-j^6@877gH?8c#8(8RxJ&g0b_3$O
z@CH%ui8xdyT$+1#Ui1D*A?b4L-`uJGZw`+CYQ(TJv;D^o4K=k7tR^s!mNJO0Ua`*u
zc0=?dKF{nEY2!LB39{iLN={#mA^Gb)8OQg!Z*F92>MYiN*ps*!n@s>z6L;?<$?onL
zR;LVd7VL<QZ&MOS3A-YNxe%?l?ir<x<MAT!u>VLNQyE<t+u3JiUd+TwbH1+hf`_p}
zc6<`PxKmgB2z6NAV=#Cop6+C4NOK+xh7`SBPY0{kQ?E0D^#Wx3f7ybagUtMK>W?{|
zyQO&uix$eM{G+BTWcfIIXUYCGhsKo-hAnScN$!SNPCN@o!6x}PPS}EH^CvNOgqeIi
z!%)+*J5w4kh4!N)A;1_Ke2I}AeG;sF%(e1{un;;fSlw6pXP&qkB_RZBkR?fvNzF*C
zmLj)~y0iqkiOgrBMYO&6j_x^iQ8025g~I6Bg7f-$pbgWwEH3yLYM=TBd<c_|QM-b#
z(VF=6SxT*J`<oN$0~&WgyvgiayrXmuLgq0bs_EL~={5;(*wNcLU~~CQ5%Nra(O7s_
zgL*#a7<!j@Ue({smeey75i9BX5jy(Gp<=gp>notCv2(<t;hMTuiz3VCua-B-`};4M
zm)ZNk+<iELDGzsIk>z>43{xocZa!wvbeG|vAb=UPSG^=+x!M%it)EnacO2WZN0|W~
z>n!>R7()pwpOykR@<lhl2m&~=VI(F<W|FYn*0qR*W1<VFwUv(%G}Y)!y|ou3?Jj0z
zn>|<cm|N>?2auMG0U0<6t$&T6Wj-0CChQEYh;2Y<myzO*S5R_n8b;j0LvVpM#7R|L
zQBNuU7BMK#R@d*weW#8V?#m7rD?7_N#g^(>UVx*}uJPundH1xle^QRWOl*wPTJa6Q
zsZ3SWW%~BFpQVj@JUL)luBxhl=A{h3J7w#3%-%7YR7Ep_e0$Lf_s!vg26aMs0jY1-
zl_&frdgbDtI|r${Sx2}OqV_;bt9|*`TCm37^iR#9yR#5yy4?=`Ab8wVA}Y*?xwKMT
z=v6wCsM1Gl-GrJEHvU@LcFqU}=lbIQ5rIra<(#(p;2`$l?6QKj%l^oZj|vdc7FX1P
zAZw=Tgv>`Mni!PKVW1lx>3N-0Y!RiC#k73H31&frrw_gw3iQS^VBA_M*amgX=*DO!
z*VyL>bHqvwie+g^BUhSpSNcvhGTM~@<OW>X3>jhA7^=)H!MQwGicZyx;n>u#2<%UN
zOqx)r*?#?k3$D00G7W_lv-6^hHipm5lk7Oo?kW*|7^!Omn}uvYU5EE%Kc=`6mpsSU
zVd5rqaj1mF&Wo(Jt#6bph4x~*`>%S2cHrfeuD;XxhLiYm&risy!8wQ`AQUEwv^WXR
zkulx0^;F(}Ir(9K+vWNq3_Q||h{f+(*INkSd|uif-nl#dC9XZ6xYlAlR5oys(S8@a
zl+mut@}1cYScEjY2)!%@_0FII49mEDYh2mkaBqbwyWiklN~QIm+`S#yvT^B4JqjLp
z$PSvXvu{GD91S-1W|U%>VL0IoJ2^~+ip+gc-)PMO6&}OH!GC>&XfO((4Ye-uQks&h
zui&wk#4;R2XdO5>+4c~$y1P^wL%hADy&Lj&g8;{ix}60=Y1-H|4WxXpeHuWchY(D5
zSY2klMQKwr*%}8CiUybwh%P=WXE>urlT<fFlyC#`B0U)QBGru&=)E5SgR`kXRM;JR
zjZ!^zF?Sv-3~@a<#VTZ<!{6_=g6N9sR8`K<k;=LwQ8<E;Qyiwi%5j;?((tgEwNB#k
zUSXg(am!dt69$uERInASz)3ilyz=u`@fvvLS}bv4$Q9*tRN>H;xmF=|`bgl`((=C#
zKBUFQugN_Vahybn21*l9ZHH5OIr8ayTEN_iCh!YZLwYCBp6WNPe^c9v*C+3)V&Hs!
zU6W0J^;W7AeC-VPs;$EB;R`Wnc;@c3r2=naC3y9gE_w+WXnD6zB#v-bZf)90KFq1#
zRZW=bIQ9jdnol|-BGp>y-P%E8bP}5B_@=Y_Rj7pciTR2q^p0<P25Q{^@%Qhhzo^D@
z-%-o65}!mT)LH)KPN@5CG?ARR&$~nQj+<2U{rWRiT}3&+PQ3l8v{&S{JD~eO6uTE2
zQinVDjpwOsL1jbX+#}}i{`O+r{L4+nm6S3Jb@=&atl8=2rkYGok<C~by_c0l5pKZ6
zHzQe;hbH9>C%wR4pQqa&lnp;UtH?L1#3?;7<-{=@V4z`5TAo39ZfS{8Tq+JXmZR<V
zw)E5+DhH$M^Ct`VWqN7M&_s+KPP`&XMlKEO-(ISdd&N#j4$7$Tgk3IITgk(E?9p~J
z_^d%o_Fcl+&E7<jwkEO1$&qFXSrQ_^w~c1xZ9y>Mc^70h=qgWfkjdeaoWfVX%X>>(
zaO2W^adoDn!YhyRE?<RX57B748;a$%r(Is*W}TtdBO!I~&Rf0BRuhDJ#VJkt1@b@7
zVJ23_|7LXgKRPLy|C^JNjp08&r_})`B^xr}q)ajJSlo^2qw9u=iRR?4=SO~?WWC^T
z2<jH4f~=h`eDAbTCLuFh(j~Gfg>3bW+?3x*AKbjyHbeg%pHJw=j;2bKO=3x^hm=kc
zHg6(M*7Jpmr(3xc_BD^h8J$4r>m7d@GIva^H^iTVNj5mUEA$}I40M_tS;h?mvdZ$+
z&v0<6eBJ11@`MBcZdz(*G7xR3$ajF!&+rQx<KyrWW)32*pg)jLKSAi){HHvh|8|_9
zbwB6*31unq1lx&ZJl8`Sv(fZkaw}1ombBM*B9T=1bW>FR$9*qp5U4Vjm{Rv|G`wI`
z<kCNgf5>t``0Nt%WR^N)BZiA|zACX-MCo5%C{1+;KY+R0qZ?HD#Ua_s1<CWG%@!V*
zf2NO1pyvifraX?ZAi9`lO{@+06!DX>Hw;Vxk>AX97!23WkI^%SFi_PTgm_n8fP_}g
zP8!FAs=Q81_5!AgK3}%s`xRf29Edsu2Puru5dU&cp{Wpud<h|0%EmAeOK)na$OB7`
zQcqy42+9yCrjOK*{Bgxicu1LsuY>`h`f3d*j6-ftTe1Ao<g#3hgFo&fA0p(Bdf>!c
zM9R{#`C@z}`v)(uVoFCj%~F{5Hkv1#jDMh$Vv7GE;a^FObt^dR%h0!mun24sJ`LwQ
zr*|RdyVG74>w>99V<tZNWqJkDPhO2y0Qww(iyWBl)vpQj6&b>p42BMRyoKYtMe7Lf
zI&FzZvwB67l*{GWg4O#=-|3Ol47a+Cby9jV;>f&~)4Iz<>mc2-=<B-DOoytr%^?|M
z$6UkClKM0HOmH#385sjMcR?M~5pA(YYp15dePE|s<HPISoW4h!cFl7VK9}hQ8)7`#
za>TBoWUWoz8G1lUgaqmLW?E?CmN}^nQX%BnzF9Pgz&?6>Crx}v&>s8c&qG$Fm&_bs
z>#B|(UzOBVuEYuppOo3wxMP=E6=eiTn-GhpqF`7L=ld0QU=ZP5X`%69L^tAhV4^aV
z@0bD{^5;lyA(#RL%jW_{Lm?K9P7RPz2;LKicw6?~Ha8X)l%wlA*8Rs$@~!%Ur=trz
zT%G5&94EeX(b)LjIKG<Ny*{}P5;bs~Ho<1-|7CNF*}h($8@sfI#4%tu)ZStBX@}@!
zU%S5Di|*eeS(mvH5;{46bmDz|`BWKQp>5?Q`PT)mVy(l?!AVHt2)}B*oDIVUV?hG0
zjdlWK`P9CSxXa_^?Dg)j-?>H$^&VZyrBmDLnUq7u{pGh-+jW6U`YbO<yGN&*jt(^n
zG$9X1(V6|Dw}eXez}4f{TrFtOoO8?4njZ9d4YRMyu}k{#FXbSux#*Gf<^KFzk0U`^
zhI$l?&jk5Vhozuf#x<?W`;~I4mcHR50n2(f3NFyX?rbVq?kwwzrmU%B#^H;tb&IJ+
zb!+9DMtvi}$p~~f+N|xCuKgEbn~8X9bFRzPNHlGIJ!qKeweTxWz1&|sy7gUlZJD9D
ze!@$>jWq1_r<llftDzwok~WSbaf5F)7ZI}(aEwAVLZIUWgeJtgZ9Mq#EPn1eZOz(o
z4Q{kn8a}OhTsczbfQ?nN8Fe?9$EjOOxg98HGw#Ush=5<f0bgEIvGI(-%npqvxecy<
zZd;o?59;z?qi{Z_b3U*7*KZ+7>0zijXI@%V@xPwBI)xOi$DdQD-x>#~G2o1St#cD_
z7c}mu*<wcNxC~l+h=v^ei+bYP&%hbKO(v2Swo@D*i)&4s);CxZ8a0)QOT&3&C=`+&
zdW7j_eMa<M*o1K;O3fLU?mM$q@NiVF)u6|iUHLRjcE{qRj!8rn^NkAi>sZ$5z=ZR*
zN!z6ZtHx-W|3x8TJ0*+_TrW-*8<&mi+C}@OTi<ms20R_6cFJ^mB>T5W^nEq6QE>xh
z?7Hcc#DQ{V<6jKlBHuHoVfu7z;jKJmei0#q%%9+rG-RVkzHb4J%PkV`NDP7}hwIhb
zbwj@tYUFrGR`u7|$MLjL<ajK7MnOsA(9NHlKl;LhF%ammwVB&ZU8gR#60|)F)S}9p
zC4WQ}4E>4a!s3V{ClMK@7Ad3?c>}}xv`+dz&m?Ac*8g@U{g?TYgNgM&K65m!0FfT`
z?Mx>(`l?~%;<cZT_r_PkcLR>O#)F*@0p6rTI?~n3<g~!d=M@2n$E{Fl`_dxj?gEP(
zmiI>_W0R?mh?34~jkVs*nQ!5ru(9*pKvsF!yaLl^`9Ox{@-JikJ>k%iol8L2nB&i_
zQne1N`EYG(^IujoWb?to0ddLN2-=?WFto4*Lh3W4$<XKN0nkqA2)?qU8A@a4kv3PI
zHGvi)cwB7bvPcx=C^;&{1I1VF+}7I6&~%;>-_mLA7;mg)!8!yb#VB5`?Zx*qtde~9
zBwSmye{s?SLWZpU)JNLbdXyTqo?4$Or48nDlZMBF>c4~xKg%F67CT|~D$rIjw_w$S
z!XRQ;6)47jRDM@-5Cfi(heBf4imqRs9LHzWQ)nSnv)Tjx#R|pIPhh216#AX^vPPa_
zIyXTBPsb7Ul{jYf|57r%94DEUA^|0X`sui=7GuzPOX%X43Dghco0ktILvrs8P%_f?
zDc%}I042i`P%<u6SO6u%67OSrv@4o)J@_tsv3K85&rbMfrH>%Fyhd>ui_wydew6wC
zw?Sf1P(aDRpr`FvtDnmV@RgUKnSRB+(JMitO8otYg!uRKL!2Ac|B4$k8=*Y-CvL1r
z`*+;z7|bPKg4?x5?_vLUC@<a#%Q2)l;#tOw0JDH3fd;m*glH0888SkwB#QRC_{%3J
zL@T^8lZ#MvlzA)9tgk*<19Uvc<PHQh7v7CBJRp?huisJ!G=k5KpmxRBUPt8RPGrpS
zzT@_I#HL(2J;&|Xq7Jk|`?HQq^gEQu(<FV;4`7_nPtv7-j^6E<e)0L7q3rW)zc6Fk
z&P18>JIpwV_^vA(m&=Vx?oq&eEkhHe2R#u1BZp}$E&h5>$-~p<&e@Lrvu4xT0kW>*
z^NsDm(#UaLne0oIp!HJ!%iCVeva{3U{?<xz`rUfC3GaKHirT8ynkGRfk6Wii9?N4k
z7y3L7VhZtfN1$A792-+1OjB+$ul5R^@&zr?QgrUO0hiqq{Iv%!3ZLsumEqs|6*nhO
zT!#8KL6%jraqC`Sb5-l8Ck~o#uiFsHzC+|-Na)S*=VRSX{>pY%qCJDQ=)IS>iF&_(
z-eY0@Eve=BTeeWo5n?hCKnJ06U_qNsJAFS2zJsvpcW+}spgt3FT2a(Xvh<xEv;maQ
z9V>(K-0wyU<X}krKxIFb6S6p-L!yAu&a!((ZQn$&X!h?U7iJv1K|33s@xl1Pfpo~y
zWEs7rMVwQ>mFm=%tkNMl;sOtvqTi1Mk$@Yu3v>0}j3efSH7(mvCsJ3I;mfeMt)b0P
zS<xvhGPdP>ODYTPv55TBi4Z2yb9yb^C(s!ONt;3e0imgJBok!;h3qVYR?Hy3uaYTu
zrm}i<Hb)OOEUr%4gYSUj?L9ts;E^|>JmH<?Ni1VEz~8*u(}Oowf6u5MjHe##D`A)&
zvdyCPyyqw7jU^39UU--wY6BX-RPC!fE#W)#!UeUV#41uwVQZtjSfL?tnH2*?COgJi
z_Rk@4iHtSEyg#`;WPvy-!8I|}M_c4R)s1YJb`S7&lq@#H4*Z?{l+M05g;5=SxHaUY
z@KYq)n%V0)EW=Fea|WNVLmbH}`i>sQsKd7Mak4_+!Fn^FUtwGDAXts*O=*3#4buUD
z#jZUQo@IPF*YH<+#gxp}5<Z|AG{iOy_;$^vfw+L%=uJ1xUyl-J6Y>te^wuQ+TzAwg
z&Fy%NQJ?OYj{Ep}uU$I#Gz9vQr7ofbVTzxRB%{!@K)^B~f=!p;#032h3EwP^cj6cU
z11iDnE~Gd-y~X2IsC*zdcL7T>TF#0dC?A3`g<44upjgCuA^~~{wvII{kX-9zux_9j
z{c2czSwkSM%y|ctce?=^Qc3qzb&gBZ-;YU2+j$6fq0t`p>k`3Kt`?aK`Q+-*_-dw}
zdo)@2;RwSTe!k#KQ8dR=cH#X*E=kl#Z#Z=%iX*5YMh*MT$d{L^#@pQ5)1sZF*Nhq`
zhM{K)Q}H6^Yxy1t>kG27!!%=WcSl_4i0xkg?P`XMU;2-8hPuWmlDVDew4*Fh!2-d7
zf8{nMm>ESj-G%#J-VT1S0NG^2Se1jtclN-dI#y(B<>^%Nvv*SOasWT=m;}~2T`tyV
zN@9pRO(Kt^6yE+9oR1<x?$OZ`V76RsCLoky7jWe3VXf+L(PS2tb#Wjhx7j`-5I9vR
zZ`Q&jJ9y42=^*0MTuzPU%n|Uw7rdGu0BDZ31KE3qt?`X$v51T^$^5}$*2p2*?Lvlx
z@>ym;13PGkhD<bwg!Tb4ZmCxFqvg2ofOj|O4aC&_T{r-kjlc))r-b_=Cq=sJ)RUda
zLo1QKOVmN*r}sA?e@Lna;Vp3d0=_RxhV1#ZjFvTW>gQ<Q=hPqMj6cZZb%>ur<#a{d
z#6;-oY$+|<g1V0^1uz@%ZN#7H*H&1w*#W^JT^e9xL^EQU&E~@QQ{9k%H2QUshAD%w
z!)r1~<<q!>RF@vezbsnXEn>=lbzI@fFU`c;`Z-9JHp)_nk$WH~7Yzso6Vg<wOM<ht
zj*Qb35+VBNwe`YmvFaYO0PXc`n`NV-v9{*zbES0@jt*YF3&NRp?*8nb^FJr)zxVze
zwvB?Xd#F0L1w-XwcNM|QP!x)!P;s5<rtGV6^w~!~E@(Z{T3a9t7w0((6dT<s=E&(I
z8B*;y1!)sw9HT3Q-Nf{YqaqSxrj-8J_c$kb82e#c%nNb9M9^|b`xPdgX7kUf!POw=
zvg^-VmCgFx>u%KKrY|y=<^3vKl=wTZV9J{<Kktf0nN)|jw`N;M{kP!l(iUgGe7chR
zBiEc@pt>m|OP!|1ndTPxC^CkXW2<7O$}Lo3BqfZ^Qjx0~)Ch<Xrl`PD8D@pA;}n^P
zQH>ru1#tG3ox)M}*}U3($(Tx9bz6SFQ0}c84#r`mbcI5veqod0U^6vXvb+P;I+^`(
z`Tfi1<8^}`WQ+YA_0uBNt_|-etGmZJtit<MYY$Mv4CPNrLED2+%Nf3Cfx}b;G05gx
z-9RCb=wnJIK@|_ku&p}u#bbqe17g#3{m9=KGnK_`T4InyZU}02sK2kaj&%BOE+$nu
zxW>u!sVOYcYVWjln_R*clzG}b(ppj7C+<Ify;LeKbiJbdg7e$eDRnIQd*;0EZr2zo
z1j+3DSU9j^IZ0vchA?2o#&Y@TfA+X30AS0A$x`6Y;pqDBflo5^MW9xXeBa|s9<ERH
zJ8lRR7pMv`OC0M`9Q0G(%1+R)uR6B2&$iVG(5Smk(S7zh^aWE&J{6)0+~ThY<y_J<
zYZgx9HQRR-Xw{kbClE*C|5ni7(kW4@D(LQfk7z@vMpoB*gSy59UHrKYKS9cnsMB+c
z+@P5e!7M%iyerG{DJjw^`~(Hi+k@8)EbyPZ?|;27BFyxuc+bb^OBcDZ%rn*mhr}PX
z^0R*rb5j7ZXO1qe0}y)CUd#AzFUx&QnB1pS`!V?WOQT-}Bhwdw{eh`$N`^7=pV#K6
z!j6CrzZ(a@88>!I<;Y;}tQ+I}*!;XdVwrC`WrJoG>{9uR`rus6LM`0r^r~+Hg<({5
zD*ec7>Sm}a^fM_@K_ps^08=Vx2^9;0^c!L%)y)hv^X<oe%KexED_C3^xv;tS9o~s+
z2z-~o3Kud2{f*8b&}c3M9sAvsXu$RQB>u)IsY_hpmjI~a5K-{cRR|{2uFt1m_N7M*
zrDN`lv(0KqWZzhh0nt-oB>)gTg`=5`r%~R^q=qUX<J6p){=C1Y3O|S?rOLA(csvHB
zz?WZO=yUlmMpbXK>Ej1fAFJq7xles|%cC;)i8$mP^wN;{pEvR>3=IGCMxKF#;Xl8T
zS6j9QnnzyuDi;;sNY$^s877m7onn|lv6m^~*o6uI*b{fh;fhJedwZD#OPn1U11gW0
zA2xk5ZfposACsVcavf4c-M)zD;XeczSker$eDQqr_<;lgS6000^nA8&0iDZrXu(QB
zvB6Tj)kaf`t#be9Jc#Ns8M!!<o;?Y(WC_3SqHaY*vj0wr>ofv(TPoMSH+}Y<&)2`|
z{h%LK|7(D1Qpz^Ko7=?Z?-UO1Axl|UPQLRe)X6A%r=d-xQvm-mA?zEBnws6zf>ch-
zg)VBHQcd=%9{o9sG>?Ja$sAGg$?ASB#r-RRxE$4P&Qh;mBfK=^4T#P@mx{aWOn_*g
z0x&-XC_Pw&5XwQC<o;$gLw3!@>Yeo&NmO_EWc*i@H{?E`bMv7P&QX<;@w0h0_4Df{
zzpUC4gR2q7)l8wM8GMb~)%{LZNuMsY)Y7-VIiNnkeBcIIeB#Dn>-8MCWd!^;rO+(r
zVYuE>gdLtPU)rE9vpNCCEx~T}2X0Vj$>z(acVjq-@8aUG772e0a;pIN%gS%CCeZKX
zvHsuy`~_0L{N@8UNdJKww7%~l$g(i1CC%hQ6)-Ondr~77|0j8-u|k(`idn?`zV3@y
z+?*uxx5Asn!3c$P7sNO3#ZVSaQU!ZDQ>48J;vfz9F-@6o>l3Ke5v0Lcx5n)+Cw+gr
z)4#j+io*c61rzCAqsPhKWo!;yG=JQiCOCX4itNPoN+?aWH~(a<6qAqS70BRcqDgNw
zi1q5NtH*to9#9?UqyL(tRBNVmMwa$5JGH9-%ue-X?@pOLoa;}QrkgB#15(c%9I1AV
zH6Zoa+c|oc4n2x%4AENqZUkF=MSmEDbG93!4Zn6f@5`A{n*Op`08U@Xe(Lyx0r%cI
z{~<ccu|J15+p&PIu&)bxfx@LfJ7B-7%gQ-*JUHGCt$jQAo_zaJW<mCIW?6Xha`}oa
z*7GgcnE{6&=r?#|ioeq_KON<^kuRoy*Iiz*(DW2IgMacLcz~MUMK@Wvm>Ee)CZy3D
zL@ASA{R0M~oi9m&2g2=(Jb0}H5&h|!0%8)CahEDkn8_bVh!kc4@l=tSUDyqlKyRHr
zp4M-SI0i2ngkH(=XGl*naZ)pn7+X?Ik;@$W*3LAGT~V5-USF=BIGC2SP#WoTWscX)
zPg5-(U@FU6r%IDQYrtaZ*&`mFD)Na7t6Q;p|BC7s*QGT|P;5~TVVD!E?S!)`TqTn>
zAzs+62`PdYLE0G~Tk*1d2B8&Gpd@TU5mAhB#3?Av&#?Vl`+giM1qFua51l`&40-mA
zDS~EUpmRlv=PWxl6M;ttr4y{ukktaudE*C2dq-4Q?0<g{Ni~W?u?Qa<Rx{1+8CIJv
zb`Ir4`q+aIbn{zP+j3bSDso6{(qZ(i_ev?SYlvUq1-&C&dI-E^(ISUEScLJ^*b9`m
z|9X0c()f^c4p0a8VI+PBPvRT&p(o#bD!K~%N6?uw5~LNiF7Z|xl52qGu--%l-!E0h
ztzuAj@H8nN!_}#`)QM^WCRlbsaqD^Ksq!Of$<p*Tu}TrFq`yza3~NyN#f`UanYe<C
z*!<K4G7e#N6dtp7vP<yARwgJpZ$oBj9E<NDb2;qSzNZb$Bp^cb<AH2+h9@+^iP3DZ
zzo7+CGlLDs(Aw>x=_5g)6~4wl{3Yls+k!o$6qS*<{Z_h9v)e;I)9Ki{t1NF&k64$P
z%NnwnXY?GbhV#BN$?h9)H>i$dJlv<XChE@dLytA5Zw`Y~b5-t@JMnrT_9RIhp4Wu(
zr94Nu+fKHZJy!^he3$h%Em!LWeJ3W0Zgk<pbPB+obB`1=!KAp0%}{Q1%-x-rGpOIY
z8JA7x=m4soVGT?c^?|dS`jp>h47h}14?K#RaiusAvywSeoR_r!LXBpY?$;tUp8egE
z_Lg=~g?+Q|^KH;>CBpFj=DC>R%)!_hn{9bqqeP-geQygppT)){cxiO`$H?8T70$b1
zCGc1GY*4XV@#Vl$$3HW+nTa!R78+ed(OF2o$-X+5Y1rI{tEwO%lGJYNJ!81-$4~kr
zXs)$xt8G|obk}yjX?ZvFNDu6DQjL5~(*~>c;x$fjIdc-k8^n63<$E#;!oMLXjU5nP
z5WVxsInNeyAwyA>s@^tuI<4Pc5YgDzK3T^F6?0cjzuE*;Wmfu!_n1K=5(TWicys0y
zczD8n7ceMiAzL8;Ne!myD0ZLS57<_y%aK}Wp7xmVO}4d@LwV>|-!L)?>_zI=0#$jS
zdRianJFS$rg~z_REaSh8B^>^W+%$<T>2bYjsOj{Zy@_+FbQP>!@s|T5)^n6Y%Q%@T
zdP#gW%I!=4vB|gdU(Sy3_$rreoBUi3Z#5!WIniV}8)dMMc`^h_Ij`7NX8jbc#}=n)
z*D6&*h^Q-)7j5FZV{I_(1YcpgJI_TX8f#kRUqM)lkOXp;Sxh+&CTenrA|%5wwjXmC
zbB-!V<V+`!C#BFjSd>p60t+6I&L7du|IJ~}#P)9vb7sc>+_9muY`rY}ahO}KQ$Xn7
zjjwh#h~hCzg%8MwgNfmzh1LEH%OjB!TdF!A)pq@CH7J(G%!i;VF%CR9z9*BBB?rb0
znm95uVq!&9@eBM^Bvz6~u2dXOA$s<=9M8(6e$qD>MuX)I`&QdUYB1G5OJybcz%W3E
zElL{$6*v*P!5Flzm=>`!w`gJSoG0-W-d|KT|Br^hg!bond#jS5$5CT61h{zE4fI(|
zr9a31TqGtFw!dOYUsMk}a74Smn<x~by&)IX&tuE`u_RnLuQYPnhS9^tSi;Pdq@T_+
zY?GEZ(No=G2)~~h19%9Mkc?jfh8GVg5J$7dy~N;-VeQl`8-<{;fw7>46eyHPAeen3
z14TmBG@bIA<Gtx(EKVZ34P_Le8UCHyp9HJ~e#<T@V6hzvWgnJzjfx#de>W4=NH@Y>
ztGJ%>H(}1;8?i=vy)QZ0NC6?ouA*lw4v(PqCf+lx4{2E@SNGrb`u4D-mgCJ>S!iv4
z`6K=Tyruc}Wj(yQica*6cWELCILKkc@yL?iV9A#na)3S@c6!RMr968y*yc&uGP&+%
zp|$#{4Db&_@u>%*Cni;pIoahN3eE5PX0bJKRda~M@&*vA5h`2eGnDn~`EUxe)rhas
zIXgS04KqC&AnP*jEx{oL^5!YWJqgW{x!4MIUQy)PO{jc(q{ER~O!(0p<qh@gJ^2-;
zdKuep%P(#o-gcE1d#)NAT1_?<zMnXLYA~K^r!FTOA<8^YTg`{%@tz(X*fOv2>th``
z$2D%$vksuc%&Q9ZKNqXh%tPyQVQp)K%cRKoB#TU6@LvBY^XO={VEuZo2^WLOzjqq>
z`n<c?=WzH3L*Aiwh0pgJ6)Z+~s<C0q*NOQ4no?z^EVAb=V}~9mc^li&xZTkU!>%Pf
z)`s*-Whu3D#Jc1Wau+vG=rw|*i%4|5rtYpF-@yEu-pAJ$GBS%45C6+V*kYt&*vq0?
zMG8|DSHcU*y-V0Xv}G;Au2Wye<PQMYcwH;Cu>;g#){7#cP~@cLCn;HIs{vF#wsZ$Q
z$E$iM?zGtjZLg`5Xg7xy48-Sx`-Gy=i<*1!(&Y|}!hLRVIKs`8pjOjzmu$!dpOr+Z
zmL-2)0aBIS-8oN<cFL7DNyOX266JIMBf5r#rQSaXzP2Jz&*Qm2RDZSW;nyecHeNg}
zAVWf_E?VcWI;o+zflq@$;b&Q_xqRsz0~wl4U!HBgfK-a9yBK*118sNHWV{fB;KfIG
z)6(*YM(W8AJytkD)Vaj&{MqLZ?G1itCmgmnYWiURrAP48X%|-E_)6-Lq5jW_@L*?Z
zeX70a$vElK=NI9$ilD1sA<ShUTeuSs2e3(EEfz;81$qbFe2HST7q;C3_FjQpX1z!4
ze_8t9_%F}gsk(1qtwIcTG0wtScH-Bi)KWd0w4<3hp}$g_xah(vL(vC1(j$WpEY`sC
ztVZ=@=-Qe&`}5mHZ2N6PSFO(?7@h{2;Yhwm5r(y2vNXPYKlu!H6TNj=)ul%>Y`T@r
z_t0or`7*P0IKh@gYLJ6hj8sx&8{{W}(x<xxr2##~h0a{i=houj1aq`?TMBv=16uK!
zG?lD>O~%cH{{;@q@8`<|tFp&kYGAn7fWD>A_jCzLXiei-i9?*vIbT+y$t>~R<I?Gr
z9}~}|?~kffQ#Ou_IJo$p!YA2PInBbkvik-luIi$}r@{kLmbhJo3j{{wtfR4fX>;?%
z*(Yo85HYy=&-C^ojqEX&`@8!+V&Zyk$#LR(ZODvJK38Sq+S035vBnf1+l?cL>k~)@
z`#vs+;qHIww3v-yxBF%n4K?S&r_wdb<4fg5VubBHa>JdYZGEhQVy2|%)C?~Db<!hB
zHslR7JRBtd-|X@Kn;YQ2TzM=2JpP}@e-Ij~s%8W1@!ktnf7`|Q`cJ$H4g}Sie@sJ^
zMDvij#ZZcWA(U$r>XG*GGPxrrt8v!fH}!+6ZvLK;nQ3Dk9LJHn(*(-x$_V^ug9o9O
z2;~+KYL@sjNU)-(B1lIedNrB!p#A*V;Zx%Ur_I(Wt;AkxSaFx~GxD&Ic}V<6z`Q@1
zAyBFWSTM40k#U3h0cy)m@J6hK{-=+rku`QfxsRz4Ex$;)AZKn8azk5x@(R++!J(X0
ze8~E1MkD@Bf-PWZL_(Pmfs%_iITgAcMg}wiORx39X4zrX9qt%cQgSswtn0Z##bex1
za2$>Zo|-_9Pm7%dSKapxYXa@dY&~Dxg`qE{oKO!5$bjvTD*<d}KTZEO^_gXEiHp*P
zHkm*z<%Nd_fAMV$CL^>$p@jN0+tuy;WxaPs5s_6U1NPnc@c|zbHg`-#y6C&NLZhu^
zV0_@m1|J>4>W#WbS|8BTK`!RMtwq^vMK*cCm|6I?yXYdd1--N-{q7Uj^X9>S)n(D8
zM(a6-n#3ti^~x>2Q`h~k4L*N~$blFZ)3kU-R97=^70_A!+2GxD{JE(U1&qYgxZ(oq
zWs(z8d*Qwn8>)fT(-iSdIf^VLtAz^<nSVtQkrlU3-L1bE?_R6oYzdZ@hin+V*A@3H
zl#MJ&T^PDHDg<Gak$YQY-R?7iHNiV|z99W3#y)z<$mWyvn!MI9hFb)==KQP)?)gWo
zJ<hDl=uO+0E34g%Zls_<h6nhb)~g{pN@yc71W7)^Ue0GbKt#3nBe6vlBNSnPJwsif
z*Yi;gKQh?oD_VuKhx__+Lc0N_uRbK!{?FI_>z)~mX&fEVv7twJFiknJW{`9<;qjuK
z)r(Ul;LFQf8gsbQt0jc~>_vZQ`lfYT@qT{IhWmptBX-@U)nCaYGu4b!<!1W6h2tqq
z{Nu9IgTBYI=Sf^$pC<uTNo-NUU~K=8@*Q6RbpN)Tt!p&}s|{MBV3RRJ?M->aQzSn}
zrY&bov)uy3-;357L38HzIBPxMSTlle{~yZUIk>WR+ZT>Iwr$(CjgD=nW7|&09ox3e
zPCB-2+sU{3t+Vg9@7<?vom;bN*8FG9^^93#%`u*_epoz%|B+hS@zDPEk#DycV5+MQ
zz6*>JSsj=Zjpd6az9C<6VYdP{>_ZxeCR^aeOOUQMmIv9-vlL1r`&5_&9wxK*o5H8R
zuSB|hus^Q%o|3;A{<V{2yP{vv1F++-;qTY94P`jHF!Ei25lstpwReX<Xve)CGSu01
z*Ji+QVf|e3Tr~N{s~-q@XboFEk>95Cu)(&7%pC>8t|qiS-oF05qTG&r{9uI8&Fvsw
zo<gqkn3(Vf{eWw?j=pTl&Z>J)csxwhu@7X#g#)DSjwcGOrrO%5lH3?&yC!XgInlHL
zO-w=ju%ZsC!Q-gL3{7|^&VWDbyJ=VZDsV5pJ~a$|<L}*LHzA_u)V|d4)~UGZM#84u
zyp>hZm|Vs_hCaR-NJvNhrjEK8tdMQyx8vum<InTfS_Gn4EX|ekDW2&xVV*?z_oT!x
zK2_(gOixu`J_4Pi$lthySi?;6^3(TuM%{0MzftwNp$nj7$w%A!^(+_(@{nNVg?0HV
zwIJ+8#(~q(nCu$zl`n5r`y(>x1Rf0oaDP^x=|+q7`UL}z1uuQb7;GzJ1g?_68VEjW
z*7XVj4av{LLEN|q<EgYUVAf*XvSSKQ<a^m|GydKg9kHnl@w-RY0v2I1IJs7BQOEr5
z6N1>Ds-(HH*1c;7Q|7qii*P4)Rpt94g+yr3zLPG<C}WO~0W|H@=0q78RQl~LhwJ&7
z(Q)gme(Ni26T)fF&*fkf`d7s!Y+n&4HNwWOZ@>fHW7KiIUnwXovNDn}^tVlK9V{Ri
z<)Hj%$1qg`2H0z3nenNpp!W6?8sJs!H<(fc0(M3!{IV~95FipxSxhhBTzo{JsggPV
z3j3KX4t65xVlNg&4^()=Y7~jOSykv_3vk^*5U^*=M5e7RSp(|e9!}`_4%H^jB+Jfa
z_Qa?m#~J7Ri$mnK5`DY!lWN!?)>z^NYUhnncVLp=6Gp2#{3isImmb2n?{G$L0;&I5
zO<39g-E`#tiY;UNH*6W_|9nT0K5mC2fhhL$9;rp1kyd?L?F$qLEI@1?j~QAdhAaXj
zP7K=zZ>i|K`|e{07Zq(Y0zt$?Xox`AVII2Ca`pY;Oj5Q?IX4O0p)v`e7$WvNi8CKn
znO6pzO8t#jDLGiV%v2~XLZm4&L?cjEc~C4#mWqWwvZ-n?Vy2Y3j=l6qR0oHDOxgl$
zBU5Ubdm=>CYCw!kDoCDWCX>AESBt+qOyYd-p_rr=RcknETAmKFm{LUnYJrOqvlv>1
z3gw}V3-u89F>54Uh*Wtgh9Z+H=g&TB{#c~si;eTDx$9<&bgMd?byY9uy|F&6Rlf<l
zX=uEPLQbg|sTrDV*Z|Qebf_OezFNd|wLcM&Y+)BtsJ=BFhA<b1StUD0E84}>4~dK9
zMoXi>$+nS}?#lhdcE%)5?p32uGCZcNL}h{@LOJc%rTk8thfSRvJxy+cMsE-qA{_%h
z28ZFoD9^Utg%)3hgrzo|*vHUbmPSKQy^xO{EZ&qJ3uZ->-^R!$juC0lgqceluO<E4
zNJI|F7%jS>UyVYgVi8<<A$<s9AP_VZxtftP=ohkZA8=Venz;IO@TX#sNHy;arY$2e
zJ~mUYa1gK<GnSkI)&P!r3h{n&S^*gdH7#U-5vL&#(OUYsW?geQy%Sr{aafyb8s(+N
zGOkFX*dND!(w`QFQtU8)!i==)Okg#e%E-`enp+!K@ABC>Y&I!}sW*|BRAialXSun!
z((aG^da{(h?D_R8$v;*)w$ukc6qfFm5<h2*JlM3mVSlyk<bT!qEeiOqP{9dcBz_K^
zJBl$d_49Z|OtZmOXvm}*{uBxGh>%7&*$;kYInAM;#F!VzyULdP>hxvP)nz!$-<k{h
zhVp*?Tj~VU(w>6O1z5#`g2J{(Jlz>sXc^N|chr`H1o!d0(7AcDGON)4CoVY$6(Olc
z4u?2mf}s&!ztqJnIY)%{Hx`~lvmX9d=jRqCWDyLfE;rH73Tu43<`3yJ>5rDJRWxlx
z_K(*J*dMb3yj&+!ujX?CX6lNb?e45y-ab#U#+TBYw5S?c9gbd*-g$d-XD<sI9dd!O
z%_ItKG%ZKH7+X)yb??^goeNJW$Cg2#FsFVm4;;8C?crirBM_Cm$%j{)oo%fmx~BvT
zo3ttBKT1+rBG1UDN-7p?xF|HYKCgWr?rqp~C&I(NC4M?H>B9x>&&yNlUChh#E(d=M
zyV5@3*wC4(z}!%7PM!Sxs4ok)<$+tWUlh=%r=K!|BtGZmD*E((^cBM(Am4qtU$6oB
zx?8a6Ztr6kaJHN2(d^XLo3snP8^1n2T3L9wpV{Sud+ArIY50);PDhA=LP=k8mod@n
z1~W(I?2rp>9Yb`*VQ*s2b7v{h;gZ%xtEa}W#8s=Z(Qd*pG=5xkhPFggt*%qks^`#k
z>bdYxezwrGIrNF{5P>I;o^e6SoO0i|m!NuCrz~2DnrG+s7~YZOn#pwpgM;z5{+rm9
zc_UqF6@0}3bcgTiQlo8_9Y;d}mg_PjrUR?*Fofi4Sn6`Onu4MF%_Scas<b!D<kE=h
zo8xS|M$5|Y@QB3$*u+X*stg}?^mE~{Z(K3xxYI2X45ScidANtHT8Ig@cGSfEH-d$X
z%;Tf9Bn{DjZeti_7B<pMN7}tiQr1{+xqRD5Vkz%NIE{K-I~igr0nX)x+~}+$frlE?
zxG?F);!b}oVzDNe3b@QzN1~e-8@2|jWge9~qxHxc7RJv_Z|d=pV&{=04v}NQB~E9y
zC3&!MEk^=L-tfsuVxu{kcSplX{$4m5;MQF+okx;%EPh0O2Itp#_tPqUeKV`8(T0ug
zorFFA)4L#Jj_3e^^#7lhQ2y(Ps>aY|XxDS@`V||lxjIVgv}0_T;!69c{_5@p5X+VP
z<s>cdzanJ*d!difFxbh{aIWDN_ve<2`2|t`QjT1cpD1wxqv278{5KR47twQ6`hL6f
zuB1Hf?+6ku2U3dI{lUEIlK<m4(m&>bW`5~!yTTzx;X-C!dCPIRlqdIQHR+cSA!knb
zElEz2<_%|Ziy-#|lJK83ij9-=Kc`yEY#je{tJZ(Rnw^c)jk_sBT*^2sfe0bA4t6U5
zDDz9MQ~pb?@3SnIOKzM1NtmwNYEG*xbh!ToHQN!z*?gbBK70f1c0jX715oDaI%mY0
z3RFpA427Q$Kl9sopRbLkGj!7!<<$Q=W96hz6vwTJYi>D*JUL9~9vCZ%AnLOMj0(u+
zHrXm3`ce<K#kR~~2wBS(GY<XaFmH=)WLHyKuGgDlzX={>fpiIT^vO-1Io#!OJtK;H
zZa9lo0L*0Q7<mQ(UadKVC3b6z9M*y`bj<oSP6Qfat<)B6Ho>OPjnOFh+M+)UPUi`J
z9Rk2+9Lp>x|7iVD3k9S!$u|jsdS~3?-Z9D{$YEE5m`Kw_{+0(K5>_3xmAhWGA0M!v
z*#MYh(W{<!$-_yNOcOi+tO{mHg^vJUtqHUDI8_g8m5oJJ0IPzt%#b(qqvifT?kymi
z9c3qH>ZV8|Zo8z{E(Q=1W~iT}X($#7hqu|mU;}TIJI#dG&x|pL&)3g#F|EoNHPv5w
z&~G2{`%^_96E-_b&VPAH)qdlow;IRRbQtV5MszK<$C>G<V62@@c|wwS_hwzm$YAqo
z+R0byxd>BD%xz?1_)tU}C8?E;Q3iSrbj%<&+EoD&#cqXfs@Y;zzvpBT8ALVNz9u-u
zI*q5F-m<Gg!~k9e0bYzhrj=Xaiqp<457gVhNe9q_mg=#7=&3U{AAI2P8j=Oe;piVs
z=pR1FOpTf0=YwmyzMX^qbmZ<-*62ig*4t6c_EynL$(Z%NJ!ahj(-TFezo(;H0dSM#
z5WtLA36sxdlD#6j|8Z-zihyA8qa{c}|7<^Q9J?6NRY<&{Zwf@ONjHsM?&0HlKIDlR
zQ(qlB+((AAYqcu`$<mz3%;d;H-_q0B#FuVq?cvm+w#3*9$tVe{Sj#970b+weTZ?13
zoUGkiA$fF;BCtb9wQuY4;(YhM^-V=339ItU?De4WBGhmJg`h^WHuNO~%^u{`Xk=x2
z-rx-*%XHoX1+8H0MtD47`7yb(@^<XedlQ^}Rvc9^zzuqva?)yF>IqH=f5ZUnJ^juP
zM5j1uBQz}(xrlU_J3tELRgXXB&|0E|#u^#BUVH$&w7XXO(KsAkH94+Ig}K*o!no%=
zjN#5apQ7mk!<JO3o$MnvZY?$gASGd;iuM|j=L)^}IFqOikS}Uk24Tb_>R8y5^!M7;
z<m0>?+MjSH9YfXY(@?IhrYp!S-46A@rz-Lrcmy1vq#S8%%7+BgdDn64c*2Q@c*GtI
zQNqF^jnTzI;5B!Y#D3v6vyxc6Ghmv0DuiMmeJR#!eX86ia`6NK%nOK*4u0Ovg2VOc
z6?*T#;~{>ho6Mb$dec#lvbJd(eid6n{*7ja_9$NIeSS=hk!rwen@f}Q1d4C;t<D_`
zwXQW#nYdb1r;T%nxj83n#~n1(9pwlA4rlC?W(}%_7VQ^kIhPJ}6%UGg%xk|$<NU0S
zA3~7y$BAOW{MOIPgjdoJ()P(bfFVKZ{8Av-w;Kl-r24CMK6mRYP_!<X=%82VGeMbe
z_ix&rgA^Rk`|xK98h)_edjVMiu5o<GJ;HXt!#5aUb&&aWzQHcrjB$LA5PMN>7C%77
zO~9U^-OcLe9kTsmW^oPIjyA-1X;IRf1@0L~#hJ0J)+Iv|7O<hj1k#pS{)+muvZJf9
z>_ouV8~j}IWuAXtMIfut^@{Yp^_<8h1X+wlyf^KHg`dwohsP}|92!R)`<#ZcGxksi
zC9#K)NDK>Xd8c@lg>=o-=Uo~+dcq_EpR!^6vxIq@Q5(_dAi{WDp(I$kb&i(_Lz4EL
z^=%x{<SrwU)y@aB&MGD@H^YX(`QW>VEu0+UC?kUoYb6^pIoK|GE7oa=#upMG=$^q>
zvPBZ8s{!;YCc8R%USQC+{#)*3;T2SK`v#4EvE}KJa>N}$53GLZxS7CaqCo#<L?KBW
z63Ce$>}FA19ucUCon=D{XL?K%en1jX+yjz89FPPaoI!z<8W~~zRje&~pshxmt~ioh
zEU&ia!^bjkuL@D|_RPU+vY#d(2pT=Tug3mtA1Z|dDuqKTg>XWQNf6RuC6XOKyIcb<
z(9d!@9<y)}$`e^5%^{fwN{C}nyb0CldZ5~WClvz3(@)8gW6YQ#^Kx;nbRPF$+`N!J
z?A@Zhds#NwvY)DJyZgECks;#Zw=Lr$OJYi9WFM?6a~}Wfpxwg6n2*_0L9n8M!rtMH
z5!E(oTsqjJrclY;KY|C@zyRQ5F@}_yRcwlYl;-`L(#_bxhtQrviSbTDjKl`cOrAK5
zz1zz7`5x|&;*O!s_t6h3m0!y;vpPl}%tbEV%VsIc+^kGz&DypYq!&ml_CGUzk^Dw9
zDOM1}rg#;s)XGM2PArpB2Y7Eja_R;muGcj;h3r##eFE-7>o=T|Af?{d#cjT4=TG}5
zNnpg2i-(*77w-Z4%v{4SmfuGn1(E3>woV!6Iygdmk+7_F98v3cW8TX<h!9)wZ^?pC
zM%0Q~M9Oxl^DH5*1;qkO;kd2e@#tNGiWol3v&Z=eL*+Wok#@7v6S<k5rK|6`OVM6$
zO@-e~eLSx*yS7m9<(Bl-J*Pd*8p*Nr0&=l8O$wC;a3WPL-BCA4t<(PA)A&S)0!y^#
zWsA-UtgAYrvXA^T2R;W8ZQb|-{S1K!M3QKD(NE-lj>UhNdC!tZEFc%z6@Y9r3^P(i
zPRY+0tj9_c@n7Y6Fy}&<{On`)(_*Qxm~0hPY~#bSOaiz=P`Am=$Pw?u0&G~ZC*Ccm
z3Q_&YKVL(0NgHAh_hG;m5Xnhmu=^D&rA2a&$?M!8{$@HG*nw#QZQ`P+NVq-}zp<p(
z(s&53(YPuDhNF@rk=iz1@l$Rm4(ZFhWFUR;qf9HN039%iGecbqP4w2g-mihN7NNxX
zhKU}f=rB^)MT#5-(bS~~{Db1uZ|&KiFZBbW6;lz59|FW?@>aVXgHVB=P&cB5>fMBw
zQK}!n4qbL*96ct0b;OXrkZJJ05Q1=AO$RbAl#;*1>f*$kPkL%;Q4{@aP(}$DPE__u
zvvN`fI1_k~)*D&H56|5oqi>+{Q}1V8vWK=c+#qz7oK4ugi!hvZ`wrN=D>EVa@u}Ma
zg9loHB*u%|8c?bgAt^d1w(<lymgk8#+z6tPvtFPi;X`<=P>96TyeZbNhLjvrFf74B
z6B{g$AEW{qCnS2mRd$yD84KF_^kTq&#v1=6>^JHXLZQP)l+~RSdy{g^00Xh&2taOW
zPXd0*<9Wvv4L%UyO0|#j%JfA>;_s6QNt{@r4RQzWKN0;9yT4~z)*QX~cp_BZKAivy
zd5$5>KM5^Az_|3&j>YmTq0N7}?_fBYmO%Yyf631D?=z79ny9ldGX2kI9Dn=ER-}Kp
z434u0Hy>nz0*zu$g7$(LslL*n11-%?5U#c(dV8~P4|womk*m5F)sni7;MjzyVp4IO
z1LmOvd7X_f>(hrLEbT5P_9pi4AasrK(uibY0{{x<%Hd6W8<$td-LxAIa&daKq7GYJ
zPR2#C+gtW*^9i)|fJjh^+rPLBNdPVb?UK2EdPA012A}`*UoJyUjNiVDG1O8lcdK1y
z9=@UmiC~TWKcY-Ncg<X0XE}}aC(slkR}gg5JcCKW{Q?Xn&XX%<a2DMmC41BxpaqZ;
zz;qmNb6_=(Q%q5Gv|fyxNdlN}ZaQ9W?&kAE=Aw|G3((+JyV3S~pw2nBuyxaNFmlMT
zuqKgof!Fdt!`{g^K62Mue$pR%l{^iiD!AqLPu%RHS^XcS47sb><cB}t%cB`iGP-9p
z{Bq(KT}a@}&Z1}RFKgrAJ;ghaBv)hP{Jd2kcbDZeA-f;{<c>yX3n(2zrvF~=p&;(t
z<BAlxK(c7X%qe!i`EdKoX28Y$N(AUKNhEwBTyQjmY|VuXVUe|e@?}X*^>=Bh6n~H&
zmb!i(E|U`G=E=W73>PhBG^aZuDq59^aq9>21IFw{DVQFj!S49_^|Ex)8bV|;kc<Ff
zCOm*J)6w{R)N(>Q7Llu{CVLNQre5b$?iEK`)r*jFuvVxOqwTbE>0@!)sl$G1+1~~;
zWW<+wlb?P^Q{oPb*GMeb7DvDMcTUMo)Dpm6Ndl%b=OPh7aO@vtrlcLOG$D^!sjvEZ
z)VIX!SiIQnF3CU7UT-OX=?u-^0?h%^Oh?K^9UAoEaAV7{T7i9k2*$mNUcSgVt3K~H
z>MCa3F_Z<P&?bn*&iy{@9|x=>#xRuc4hP^6b(B`@fw|`@U+fpWZN3~E+I)HSA*<Jx
zj@BEe6k49#Y9@gdn$}w~9!uWtz;Q1Lvip6$iX%Vw>|DIu@Tz-ydR8hKdW2bczV3G>
zm^{Q&bp5`7XlnMO4BpXxQDY1WDF|WfBr4U~RtsZxb`eK$-tdBgzA3VQdwY&xSGT{A
zyB69Ge9r>(nzbL{ptsqVm_41`K#2Gy9YBk5eN;g3qa}}ok_;g%Xg)u?QT+Kd5kC4k
z=Sr`!#Zs&HuKg41CKW{G%j#xU@RITWi-+(Gq!79~S7DbMn&494SZ-li34mXRT0;OJ
z>qrMPx$jJzsC0(hPU+TH3Y@`M$hgodj8{?HioTrsn9yXNf%`k2Nj|m{BOwlrA4e}B
zV-&PCb>73b(Bp4qKN-_n#_vLsuS*$fYCG{m0wjJhUR3nKy=|zB_(3OT7`eP(F_pgv
z01&8Oe-Wtq00im^)?eu(&Z)4>qhV{vLt;bFBLe^2{H|c-=D0HYRkY^HTHlQ(Fvl0Y
zmo_AtEAIB&M~LVVk9Oe~&TXR2-c`8V+DCNhzw$>OUIC^SJ-2atP?)>AoG;;1N}clc
z35I2LgHYamh3B%{h!1HYt-lxAK4A+|Ugz?}%X-K(i5@EMQBJtS05DP4G6pOr2x8N3
zb@dm28%=9Iw5l6GqlpY?G(qp*oH&DN+}sa<&!#lC+`E;g<P|#P#CJjW%?xlE5JgGC
z2VZF_-<Da|9hfm~_4B)!XbX_GX|~gcl|y|$sU+{9*ZA=`kPDy^UG79em264>b|)qP
zK0!QyPk`hF@fZ6(JS;2IZ&MTmu=NDw{`~VY0LF^<R{`lJT}TK`Lqd3Z!yOB^RCooB
zQ&|s#L4@*61${mAHV-V+5J6AQZ@#f6k)eeXC3h?Ab>aS6zs1_Q7m*8V&6f|i$mx3J
z+Yf_o9{<N^i?iPwGrY27VS2=EOYH-gFk@&bEt=yG`)DwGt?y91nDD|_M%`?S40Z3^
zc+1W^WmiR30>&TsXK@-mL$45LJqA8L8t!wCP&p`z(y|WyVp%-0hzpzc)U&NDxZ$d&
zFf`7I>*`$qotQ`QNXSA+Muz9MDM@MtfeoZ9^GbcQ3k<xQ*T)+E-Fi%MtG$apVpTe1
zt|ZxG%txgK`t9Dv9toHqp^BT&lv766#aIx|li!?~mU*y;q@Zc|RV6k6RF)3Fv-`~h
zpt5xRrLtuF6&or5O9|ANC_#RPI-83B6W`1D`!lp*k8Lsuu;`K3#1adR19vTJTC$Xq
z`1$e4GtP9vaw={#%gj8`I%z1ayo|vzPk^RRObyd7a$$APV*CQnWk3av161JIfSMTv
zY`{+Jdr(sr)%saQWAZgFYuTjcLxgAtLwu=s3Pk4|pJzEDik?aK1$i=dS6csm#N$m~
zKq76E1!GewQ&Xvr8JIM9QQv(2qgQxYmSM;6Yf1G-sc}KVe6b?3DBHhUVyYg%qodTU
z0m%92QqxGaVgzFEX2O5pJO}exBEmjDoUgsq*|G7AdN9p2P3vDL9`qL!9A+E#ClX65
z%~n|N!-fBuBPGlFD)K{6g{hZOKOdJi<1Ee`1&=~`f$I%Fe-RnbZfOD9En%6`8M(qM
z*uU(CtX(=^C3UuAKGBO@1#xV=djNzyNUtD+Q<;Ub1lP*(6LFm+2t!J^s(RD*3*)d9
zzaH;VhL*#$jb;y*-j&wuT1O~0a<_hChd*9~;gZ&@WI9!3oV#Mn&?o3Yrm_amzQ6j(
zPs&)zBqgarEQ-uyF9X_8P(T}MVE~|5s4)foO_X7b&mUB<6td0)y;b$!kCD$l0`<(W
zsM5>}+qX%kStc*kHT;HOnXR`n7H)||Q=4`{_N-Pz7HDXF?swf~wRTeY^(07)shtgK
zEj?&`71JrTIg;-Yn`CpMS;zhcBwGWB_vmlDt5eQ!_#wz<?QOgCYv5mO7a>2cXo0F!
zM_cNv@UYh1PB>}GroxI^c}(hjhhI$>0smFGEW$HphM80{kqvV!dvFfAQAMD;WOs#?
z9=riU=|&i$-uH^JBbGA%OMS5Dq~)9`p8O3!<OlI%SvpSxNFeZ@F?PF>fG*v!wBKFR
zqQ9v;I@RN?#m3k*CiV{SKCw(p!8m_b=CIEj4}i&9mr-fCxDXJ6wd3@zat}KoxAu_T
zVjfG7^9`Vq%Hhs@FQxuPEC2N~6_cK(XD<fvA?HS3l=~X(2ff;jMCU>KNl!<W8ek>I
z#Qp<@{n*=3XDh|o5lYHG9EmtFXvL)zJ1)@&y1g;=SV_v{IBB<Yby9!zjBtdZiG3oC
zfG;j-dg0^mwD*T;D*vhXt{oKQ)l3eLgk10_jS$?kECp~AXur4VDIb<<F%?M`ILr$p
zqJKtLy0?0Tuij)wvvO~X)Im#higP&RnkmY2eK*7vi>QRp-!!$Jrhv^)Svhr5#TKoz
zi;vV!m;*)mN)=Ak+%)5Q_a|9U*Mv6@dN@l^j~THD!)0u>?Kj!rz{e7AQj!nFZW#j*
zzsxiXa!h0advaXsAC<&qK$PN1r;Cg)iyy|X!72r+VwZNcz8M!zo2L4+c+fJ1LkwBY
z=-!#1W%!u}$N3Toa^5^E`(NjKh1bS?aCy~^;sCn9%HLY_0jNbQe{0e5-&)j^^U*&}
z9zG`Q@?Hx*Rx9Eh@i6Thzq-d4XNduDwcA^OOKCZoWOUQ$wkBl9l+1ahmDa{y5_>qz
zwX9udosTFc!p$ppG&*KFEodrJ{MO$TU@~;072^9!S)m7bQ|+MyDT-S0En$>5!|#c4
z?&mDYX6NcX@_>${bSqd6$PJUdMC~o+zcU<smnY>DZ)%bSM4tkPeo+~)!s~xlXiYbK
z6?$Vv1Q!pWW|2xrl?v6gUrre^M_VBSo)yIofU$wR2q-J&|CE&yHUH??g`!#J=+n9!
zST_DgK)55_;sTbPW`7xb&(5~*ag}vxpTuLMD5;uzmz3|dNMLQEifmI?w5A3jJq`Ov
z%R4z`T(=@JC4{(dMMaT%_kYq#BUf{f-x!7nR(t=OodL^#GkIp>{C}7{Yij>kV@LI!
zs@cvaH%=6ed;;;(s57;g;bZ-+jg4AePzd2IWovHz^)bnXO0|-bxa_zFr{RC?DCx`X
zruSzBZ|^(~rCSN66ns9%zdvoZep<R-`Aisc1Rbq|&!eO1xMLw?x{}_xY-KmrHes?u
z4Qu4Izpd6H#xd4P@CH1wh&q=xslkSo?`2TF`LG^dkWlK1^(|v3LE#Tt(arim>q81j
zuCrR8o?-en_Je2NW_@By>nxQTh93CkB4sb0VR_&H!B|=6Q>+{?9F=J~g(bWOk`+{v
zHo2@&!36hCII$MYUg2Z^=+@k7#>}760cBKWEsICRT&m7we74?>By*T=Hr#|%rQoBe
zRP~hBz`?!6lpT`Sn&*x>^rzW0uoNmGD|3?J+#xT84E9a%3BA7o3aun?CpaswZyKxj
z4XnXcEsKI%C~^3ldrja8t|TX0HDltq>QN|tCC6BqyahKolulr_zt_D1JAqqvXjdFC
ze26_O`5=rfws3^`p@K)hl>SdI`P&}L9!*;B2~>2qq+hQ*za*^<tC@5#*9Uny@F^q`
zf<fV)AfWQUmvZdMY6kj`QIe?mfxX)6NM%vp>?0u=3HX{MQRdqx(8MDetleT`#%zUu
zDD4s}BWEBP$s{4Y{j$ftp$B;4YS)E|&w{%JJ{n9?D!2Ac%U)Q#DCPT`h|YZH$ha}%
z3v9}6ar;YZW;gauV*g@ZVTfhR;L1iHGiy=c^4F2yp$N&V&b@+`xh<5xs}qfUFI<S<
zf!quyrJb~;q}l=Kv%-Q|^Gd5W7fCmWY9nGK)p=M!0Xw}n@KVEl*-C23J-wIBV0N}8
z2s$JjoD6>6-tOPa#@`k(lXY~0e_PV2Qx2SrHb~_-r+#~IRi%Syqm^@Ir?h>n#ho%|
zfbcJAKt5sjVj<X9oVZFg+~lc#vEbMKa0jJhYVG#^NP^_C)9TUhH>D{eU~CIL-Zv1~
zQ37fY6<SK-&h+MBFhrtw)HXmwUTx~DuIB0ps9Pes5M1m_Q&A%*pBeRE?~A1(ZHXfa
ze(=WW%iGKe+3wqO{uV6E)p;=xBFqlt{X>;?X(vUQj8fHI39=wRgs_P-ZY^%fGZjFy
znd@Jkkc+`uUa{Q!$m4E>E?)gv$v*BR7BzfYlw(nCOj-I(Ee{E9%r(WzB1sEq24gb`
zf+Y&M^=_c#yP?Y{ZAs9&BPEr9v%mAqvZ`Iy*e0wa<x$n`C*rRJPWQSeWrl_=h&J^V
zOzE@_v4dZf<MSz_Sc+y|18YBp-KvAQgt6aIM;QwprA=MJkwXiE4?d;0SH2<s98j<8
zEOL}T$S)Qk_fODl;NeYwI;%8t`0@BrFC8?)Q*jR0#8Xiz<7%OHQjn7b6`PyOuhEJh
z_idgt&ZAIgiUgv%us=EE!G#!jsR3Viirc=cStOu<tGWT{xqy6&btQQ9s*JDX(UzL3
zI9FH6E2I|N7;D!!6fZ+}2D0Q9JQ-U$1Ij9uuAQ(w@6kn0ViEtr!8l5J+?RjT;QsNZ
zE6?@qx!ok})t%sjv<IM^2|u#>Xi*i3JXCwXC=B0~Z^K{gsBHeYj-BxVf<|_>u8Hly
zI-VaUf@9XFik>wu=_~>qc9X+IiyI>7{0!B!)e^WHkBB&il;l{&xQYSVe@BrZqG(y#
z>C27Z80yt{hJgSle&xQ5W97#CH)!7b8HL*&63S!y%1QU1@ODS5Xp;+A-T8O(=G+P!
zDsOzr6^m3h!ekK8nK_J#(Ye_pE{gr!L}HwpLgSa=%%zSxe;PU#eUWtDL7T#~ff<Wj
zbN%4!5+>Q>URwfTl26mFaH2OqyEx|UzVuSh6~8V4^xs4Y#pxZ5jn5c{;4xG^h$r}x
z!*A5;-#2Sif>ZRVD%S8ZdFnkIR6>47f~YrxsK+POqqR~)(Cb0(V7jF)uf%peu;>_^
z2CU9W5YzK6K|+O2A9B`6(A9$K#|8Iq?YYD)6&w-rK6uUtMWJH8l^fmtX5bZ<_9l2*
zhk;F^Bzg)_aw+Vr1ZZE;&qrQXl$g&psr%y&?=qmWy6x$GRgTQhv2+cK(BEVmVu=#k
z*s!70a0hUtAtC#*kBcdldt&g^*x`#WOc16bcj6Ps>9Y4AY|OkHHusyW7O`NNsJOz#
zuJ!L;-2k73(Z7UHH_kdpx83AT(0uB7BF_G-!4*Sds8DtP4)IvL;2e837R6wF9vhl?
zV+9n$P>7n`mA)ZRX&*U;zEG=1Jn`(sTi)O6-mFtp_hQR;w3lB#qP}OjpaLV5L~^`s
z#90Lr)DULbgc;Nu@cY2(*$_7bf#v#7y-o~8;<S&(t`GqW0hRqT7lA5^;<3AH%R^y3
z)^`soXbRr>_U&@mP91pfG`dg@RsOK;gsePMXsB^zi+H(D*p!NV7==F3fdzMdm|(AZ
z6V`i7PT8PHlXCv8FnxHl-Y<7f&Ct9xt}#6Z?e8-8A@DQV+5`O82<wJT4F64y{%`TH
zEdLKRT3y>|kM(bjM*J~(KDaQE@P`Ccm(nzaeaA92Y@4&}oBr&eWy{+cSNp1z?Ar7O
z-Jj_+@x^%Od!HjK<3?f3F1($f_+CkaPH-r3UPFg|tb8eCl>BLOiSe0*nLKqI;{?K|
z#Az^FwPQtRK4)plecz<|ut(CRRoHZXq3ByoU*l@hvGN%@Y`hCGP-vk+<>Xwi!nk!}
zG?n;W)J0S3I<?Ux69e^Sar^uWZZUNW=OP8_*mZ?5x^_OOTo!JR=pBW-eam#}?*Z&|
zd4{wD_}9eIu%t4gqBd$BPNG;)CwTW6uiyB(ORCf}vNl7L2BW3iy#s*7GNn_jV77_&
zero{Bf)m4&0L$z38=XQT<`Y2SA@?<TMm{*~no*-Kw~k#_clNHKKH>lnv2fwxn>KPM
z3@4)czn5zEEN_?<L{^KHp?{S?7(<WN-U-%LV*petS7wxAW7YLeMtjMb+n;YTyJqo^
zF7a!iVxO9hElYsc#Qd-`hE6KTZv+~L3O@Z({)LFuqWAude8>~Y@BvELuenSEMlQ@z
zhZ^<$$0IQW11l$*mLTPlD~hxtG+5hqV2B6AqvEdyQpxv>u%+#z&jeaY$x6&aQVaj)
zWOAVSt(_xH!5uQ-Vs!^mG7%0T1&JTfkKM-y>8{Gi#$vJv!=on9=-g}r4Nu<aq`iM6
zky%_Pp6&l^+eE(Mt)Jau!#nZ9Oyod(y);#)owb0ky1XfZ%8oktkRh2#|LGFN<~N+x
zMHj_a;Jg6#I=~|%e%O>phL)dp&<NO;E!Ujkuc(c>kD#=U9%w<&V9fc_)x(kdlF@E#
zKY388;P;@&3=m~vh5B*6H6d<(w2qnMAT(nG1)OaVO9`3*6=()?&!2u-ZpB~ByviHx
zm!r!Ieh9>^)9d_41Rco-7*oR`k{tDMKq9d#PqTU9eb2o=IGR3#iDc&E*AC?t>Vve5
znDfjzyNexAW)@3RNxYJ-b?w5#g?m3&oHFOI`@G%ZgX>CkStR>HCtbSMxMrj6QH8hl
zNv4T4D-r4}BC;un&`-t@|G}X4={G3WdW+RbxWGk!nS6q%g^9vHSGTjbz;jQ}g@2Ye
zZ>XX?^{RKPSZ09yhK!0(MlOFW?m?|5|Davt`>w#0h`6YH!BP;%{Kh|(KDN1R<7B;-
z4}b)+5*|8E8UIYHcUs{H(mS|zpnFfDty%NFBmZi}pZ!L#jk`LSc-o(GQH?z-LN#ya
z2vRBL;Fb_V6;wSAZxqI7wWF!T-BE3eI5CdAJ1qK?576ep348^W&m~+v^2(}Zzx>j%
zP2cc@sn|!YbtfS6BOjCNUN+^QfgoZ&-7RzxYH!kZ!IXm3;i17{v0Y>s%;JrF)6upt
zCrMpfkDuYMY(7D9BBjBU`mXuLudz~JXE|LRgId20UOH4z$}lnXWSn^PM!EI*ngQdV
z-&H5$=CP<%I8>qsmAM4d!QX|Oc$|vipFCg?M=da?k3{dZS;dy%!Ifl;yK0phTAiD7
z=D+!Z4H|L~BX7h)m36Q*Y0N#E-VUZFcENi#cBDj$XOszv?*vb$>_)Nl<vU4WW1b$P
zf_<<er@MY>bH^!4uS&bq#Y`yB)svY;@#bIn2^3T;hwPqbqIRR;XM8o#U3$S6fT2X`
zeTZvPW?Rax<09EQf0mqLq<sM%9{VUA_NLdkh5D0B{O1O5CL)i-JMY+(;1rotDc^V<
zFdd!DK*30W#LG9zRL%IM6%q9;_TqvItwbt{#cT$tOI_V|3p4(7yz$9++E>s_SAANW
zQx?gH=2GuP1c*JB5dyP<9-smKy@vmMU$e3QyZ^=i6><9SYKENubAH4Bre;VMHb4Z0
z34E(m62=Z`MX-v^U)qd_DODt>wtBzy6vL{VP)v#5tPIl!k`gvfw#5q{w$ESXhz_0P
zRQ!v&5*Z_ajx^p-DHYj<WTI6*Mp|hsy_EUYGPm49A7+TKY`h$ZnN%#sCS5JEjSUrh
zC;|EanNsaenb?7mBP7BYLQiT?X9llDle6Gw#7_12P${^zN_a&ZWpXQ;^&I68WICIo
z(Lh;zFNiI*%9sr-3YqG2xp3Kc>rASq-fdda0HG#!)JPX~SOI9NQ79O@CdmY{e1Gha
zfpuglI3SzjO|UNPQn1=_I6TH|dJdgJSbezfg_>?KVCu6Il8AJ0YB<Qy(Fi)Fx`Rfx
zy@X=ciBe?RI6D3^C3!#5hkN{8RUHHjBC*4*VZRf50wqJUlrteWvdv49iQV_Jq9Sz^
z{Zg!p&o5THWK}Eg!yOi9gZ4Dcry*$^i!a~qW>#s^Q4+_e1j&VTsDj#OGH)_0M+3H<
zc@8u+j-L$OP;O$6(SqnK7L<eg#;47cIXI79j;4trB6w?ooMhB=I6C{&W{~N}EgX`F
zc>edsx!`#iEE81{Y48A5Ss7TkyF5r6(b(P)M(Jo+by#@Si9i?vCY)>_F6sn9rVA#u
zJMz(kWiWoQ-+zEt3Ly1N#6?kG(76nYEJ6{`??Ri9SB7(cHH{W&@H}suxo0e(53a5_
zMo21y6VT$qZ~T_x$ozV6l^@s}GHSaFZE)+U;#d+`r`2;CNIW?8>~4qDXx?#r_H}3C
z?K)<${iN^MDS25)r~f(S15`A<woTT^PYNT=fIV}}l$AMcFFtU0@RAT}em@blLcmdD
zc9wAvc-L}&tQ6J*%2dW@bSi3eCS-;i@2-JgcmI4IUN}v$mUf91@8^Waf#KD&{;DkS
zT;)@jwEXgrXCuzc%g=>18H&-cb+jbt+3n5T;pyWxBJ~!Syd5j{dg}oF9=11^G4!yp
z0X;Cof9H5EGfFbz)voiwrR}r-hNWwQ1M(p{FxB%O37V^*!On^3Nh_z->2-H_3vX-b
z-tOI?lhu`_J3(dJ(V1BV@WKvs_j=iQd$?<Ou;DAAGvUaj$2gNq$TU(x`r#TG4{&21
za#ePHFv6)!b#UKJy7jCx?d}MGYquTSX6w|UtC>86BpT)ADcbS=^p#7^kslaesqpyr
z81>*?*F7YpTGcW1>H1>l!q&5qshd$waJ0nHKr#N*m`0b0?*kz;xzg{j0_?!1y?!~n
zN%cpqw+MF*-I(R9#lYmK{ie~_$GT#T&W~rVg}vRh<#$!<gdN^rOJ?4<u&jOUGxvp?
z@;%j&3hmDEpbL};l{nC$JC35|Fk+IY2=_Q2V#6Frw#$!QX_NhSE`2RMT>A#uvhC;P
z#Q61Z_+H+HL3V3UZz<>IpdAr|Z&Sk?;oHakPE)ZFkB{4ou}}mlQsR==(Z{l^k7Y^U
zjAmcIKQ$DDOI0HzlBd5ZyO!8ca|Ydx2HS>_DTC-N@x=!Y{FLCo3I=5nl6-VhL5lx{
zzYoQz8XWwTAw|~}tH%{)rtF#MGFb@ax_(QjIeYECd7BN%?q!RRNCwB#g*Qx(ikR>&
z$Ze=hn`rbxjVeR8hc-Ui{Ovof)s#YeM{NVumPFvOJdO8H!bi3_jGu%PMS@OL(>l<s
zsUQ<YVosJbI$Er$=(aR;=NT<kF?K7qjx=@++AiAbYAvxC14vSc*`%X^2O6Z11TkPz
z5+p*iGm?mDq@$!l<WS5~Kcxa?k@qVS2p=~+E5WeV9>d8bU(e~nyI}b{!RlHL)+BsW
zL1u~sovC{1>6VMYI~omA(a&cCb+u~u>i#*%{&8=f+9F)3xT)eRZXvWxApGWM)!OmT
zm+f1ShU3P7{Z0h?_pkKUMJA2$gJEvI_#zbI|3F(B>A76}<NI=cr0cdoJC<XK4)?T3
zTYjrBL0nBV$QB&d>-*l`^!GBNl^kh_bT?g8C(&eNJT?`Nfk)4^^W1&qrhd<KWc(tW
zHA*)90C2{QG5al1pm|%UNj|}BnIhF7n{GNwtN*>>9Zoyy+44W@6FUdjf1pj72$@(o
z|5r6)CS>DcVfq&v0)QR(*Cio4C*Z^XLmFO;Gx+zUja7E47-vc*f5`#Z4LstFD%b%a
zkw3kR`_mi5$kZ`}k}b2d(1HGqg~85}GCs#=UXvbEUo{UtXPRdo**;rKx?cKAX67?&
z+bXqU%Yv#%_NWBm1e8#Uv&u~1z`%mJhysX|lB1J$kRn_nU(G1>I3n?vj9@yq0*KO*
zl$b8YDi8&B{#uZn+j(IuK%`(m5z>K!qyd8dgou<k9Ebr)V3a|8hRr~9zP%z~xTo@?
z)xq`;LjC7>iX1MVpWybwtw4lOP)wU!J3!zVl*kak1Hf7aNucuw#l&Feu#AF&28u~j
z0?PK%Jh_V12_PVPdV22$7;e5`vwA5}7~9|?TtN4Emx_fH^XS*sCjRWp;IHPazQV5l
zd7ytj7{Y-2;Sa#Yg@HLB%rU{B-Fl#I{MUrz{WDI1KwaegV`)%7F*NQNfEc!?4uJ&U
zbN!0mP(EmoU_W^<VFL(tbo%TGFxT-7zBz@0IDcn*QQD-`1H!a>gMoB)l~^AP;_G8H
zubqI~b~`ihFAu~A@@Cn|d08it{6#aCwS{;1ZY>_5Z)w-832s*rZ0``zuXGvQ^Qsa`
zl=m~k=LzZAsa4)S7z6j?X$&5srRj|=v?{E^9|`yH5Li|HGpLOT>2u8tK@3>bA34w{
zKOd+R2j~>e0r6S2cX|Zvbq(s(v^%Yj;3Co<e0d`p_#tdX=m&qWg<CBJs=u(O;K$6@
z$@5VpA{fYWKOv<*+&n%6#k<=h4$J&y&BMeCiQy&?%r@MIAkcQrk-ncA2W*2_Z<fy|
z+?UTFNO~lB<QO@xpZI60iW1TeoZuZ4H0U!JnP>pe&)RTCL0MVE?XM}(c7x>{@lO&B
zJi9O;$@hh;3Gt7Oh9d%G`%fiEjP0DZMkJ2;5D?C<31^vHNJPgwkdK^~PoDiRi93DG
zPxaz2H;Hom$;qCNOh4dH0)b`l!}Ggu4kEe;^D(fEh#W@#UmWvbuc@w0xy_6Ssb7!f
z1-%@OivDXnU)Cgsm*tE<N~`E$!5yBXnEX~^d@U!iAi_?G*A08@Fn|akA>SFcVtNVX
zIXJ@K;OP81ayedK%%#vc!3^JIECPx1ff$jY9*H`+4l|M=f#BMe3hCzGIO2gp-tECo
z$-(SdvITnK14!RpigyM5{X4><#;;)D!5_pw98o?Czy?7hI%igYV6V@Ad3|UvE0%Jg
z3Uji>`b&mzsfDrSs8O#!>}q2rz4Uh*-DXB1@P*jiJWEBU_N&GtF~{+$Nxm!#u{RX<
zdyooDct>p7lnzDNco#1@U%K5qmwjGKqwMS>%1s@;%Y~%diGx7!i@d1z$Z!bk8Bm-<
z{Amt5=d#1!Eo}ElD$&!m+GN`p(iaemt~st!3%5elMUA^2!Wq(pz=*9sS~ZECsoNC3
zVcK<4IrrGY=i~kqj{kT`f7wyN7yhbp&jV?E2{h<m`E_MemY4hH_3Cwh=p4N)ftnL~
zRe-jnOfV~-8wGu+7gJBV+@@FYTN4)tq9Bu(nN!rlIot&}mjD#~{UDM6O{EzXsdpbZ
zL17s3osX?-TV0^Dp5qEXnc8y2YMaSGV!efxMYO>qC85<DjCD&H3Ca39%&#f3Lnx&B
zV|qwGDs+nZEGZI?EfJn2X@|6JqNGepzw5EP&Y;9yZPb)y5m*;xRXkuF3MYE9$|d=}
zDQ_7)`zW+up6;Gq=fn{-nUS1LJp%aJR?c-ILj^i!`HItBi~?E%jT5cwJjEF&bPtgm
z&}mZiIX(UnZdpI&vMWMUmAereyO0f@?(*!Hi=X$gR#`Mv&ls;*ukY8i7w`RNuUe&W
z^ceVLdLClM^^}56*=yR_7nm5sb@9`=H;C-}rE;;guh4;&Xv~KsCT1Qq?lTB=vQqIK
z?5?Dh;@m@Mr89I+e@*^GLY--aNb3R%-guSXOJ3utqx^!!Nq7tu$?|mPc`?Tk%br@>
zF1M@ZvPO7N--o6eaDNP^bL}K?KOIA1e}v(tm)nSR6iP33V-JaIIj)@D9_}m<P@u#T
z*2bk1FC+LKw}=^LO~$nvjsbfNCJ&Jt`v*;3&oR5Y4;|*5`8hr4jMf=y{<dv3NZ!;Q
zALVkG;rSGyrP}eHiHCYLL92WdTi{&%iO&P^vv~GMPivEpl-}WDgUUC6`o{>cL!L8p
zXF&~5RDE7|y_=f0olK?$g1P&x+}E{tQ?HAm#j|u;MxB)IoIuBBOx~^bj5^L0)JmeU
zID+z5%HZwWSwc_-^Q4-wtTsa^Qo@g|MRB|R3f?HXARoH*X7~JS%7?oHCaLc=V(GmX
zXlNh6We9JtXE(~f3oH5ZGdF%%v0jc*)xnNRukhG6#UfP%;J8AOZhQXtWxgd7Hab^*
zDQ4oR%q*i|EIw4S^{^MQeM>NRaS~UE-1${JmO`~K0q)2&)m$ils;Jwu*p}}Jt1565
z#a!BF_$Qqh-Z+%O?`uDd-k(uo<;8s%26TLihrNo|h4|pn+<1bDU*gHKq3kNj$v{i`
zkGl`=73>n-h^vpIeaM<c*9-{FTK|p?`uWmu(a3!h&FGsFQpxoC)4cGd<+kQllb_Z$
z2Gd22&W3j>N%gw~8TDyqoLNF6t=$x;xYnjE_9~iYqh0gH#Z#89#L*u_qTflw;Nxq5
z;0d>q+V8v^VA^HU#%d?e)#xp!c;9wRLwO!UBOg`b&anraOK;_*Bl>@9g|M1k^epxA
zJ}#2y2$^2t=U8$CQ^*%)DkpWD@jK9f$YEbU#U0)7T0;1S!zbzSQZ<%;?Wd{oUj~}{
z)~o!yi|6bLe9<W$8eHx>fAsmFamI==*j`FIWQ&!vEu`EVDW_n+;gWxe6FDi~f@RpL
zQP-O>nd|m*GB<EY-0Sj2<tq*IR4Rp+YcDBc%D}vTety6Co{H_hSSts-2GPJ^24taE
z7_K)tMOKHWZ`nrne9hV9t@(&cSo{Sp;Hg;foz@QN=w&cb^nFKa)m4PK3?bQC+msi*
zKO*wmIwmZY?Xz7!u(A;s+ZYhixjhOOZBSzI8;UTM*nra;dOFRTIoXI;9OtwHM!U{%
zjuogQn#Y`wpc4&X5B1DMCPZDD6#b4-$Mrk~tt2e)dS1X(jHQQmyPesdL&WPNcaHWg
zP6|y50%wS`Eu46ApsS2cRG(D8)y4T#_fm;_N_sX0rSsF`qM7fs0cM=vz4ERcO~NMY
zG$RE<CPR{}<P9P#%Q*)cvF9XUDf6Yc(#O_tsW(U)oYj-i>_P)71&zvq_~qmUBS9!f
z=IX~a-_Cb<aFDG_w2KukDyCf}O)&G+l1cZukqO69cZtRfF#hM8P)KyaV|gu15g{MV
zQ#22Bg0FIMiXO62_X1vO+!X#-Mq#Ib%W;LrLR8QDUsLB!w5qVLFlti$w^Bcc{-_?=
z9ooVmKeu{uvRcnXDtLqlJdE7ILY{u^eJ{H7(v(R%RU1p3@{*3^0&r>g@Yea%)S_5h
z7mD0|kdC6=+1+Sp;AEHHKRnHi0d?}J9XznV)zKm@fWR4{O885>=kmHMxen7dkb#}W
zT0$ti9OcD~)$he$FCH(tO7W~Bt2kCYrqRu=Bg#ki1I<?fUA0y>iAW+T)IETX&Y3Du
zFT!RM*`%nx6@l3RM9#UXTuYkzAds7Xwx(ulSAr2;E%#V;o%AY6BICtPi+>CCGx#A}
z7g?ghU(=kTm8`mDQ?2Q<&|suLl@MQH0;HXB?j2i;bJ|oA<4}C}Qxx*xxsVzAV&YKl
zTr;r2!E&|8zJ)WJPIN7Kl%criI`dr(+sV{db4RkUO2ZZ~_-z`wx`TH$A)fb;Ck%{@
ze6x>33sOG4d<|X+QI!gGfro)@jO#9~W=5z8-g@NerG#T)-+rh>%sTlT-hy#$J8Bt}
zXCvd`y|v94XH+b^@H#C;IjblU{_|ZX+{(kwrvPd**kPvB^rXY4AFuy;(P49p&rA0g
zq*SuQygGTK+Aqos6B8}b7a5wGD<@5f6KUunO)A`2wKnFXBkX>q*05+EYUnKH7uBGI
z4=wJ3?QA4`<GDC=nHpPnb1dCnm325KXDQHpWXnUQWHp}`xe28_U$jM8pPYkI3nQiM
zw{mL_y~H3%q3UcR@k5JZhXB)VG8?=pYNT_!TUU>qUXD5<WS|Q1<~-{^5jzOx-+pZZ
zBUwc2q6dDecM}v%FvTK5y;ElBiFFL~czHg+0Bic-c_g2c6um0>rVK_h!r9qAcKOSR
z&m!ON#lNwuXu3*ThMJP9jdpby@O%uKcj*dLlt3BV9hfyq0YOGA|3cYh!zB*Olw%LF
zNXL=$zi&-d7D_(SWOGE=ZL-|T$M9@Xufw}EdnzrNsO1{4a$EAY%AS$m+3FBio|IY2
zKb|u-DE_rQnRzkwy#=hHh{0MpF}F)!KNJj3R6t1|o`Bq=fl&w#qs7Q2$|1eFJ_|$p
zVOzBRR_M}XwzUB(!s-+>s29%)b0Dn5&1iE@rnpoojSf#Ql<iZoqVIlVI9xpAmocV4
zZA!-}l&ogd=m2cVTj`mv^lEcgg;4;zg=vVajrzT+yVWixa#wMQq-6uCKD>0{6H>26
z4%afHD~`mk=X8;7@bKq`$}Yx2OPkV}YLZ@cDQbC45@uJQ_*SlCNBhwh+1lltL}V;K
zw=^}fdRNwJSz?FiVrhidq+ZdTDVm=n={ly&Ec-TdvY9z@h9wFN*(((CKX54|Y)eh4
zOz&HL#8~KRy`n~EpqbF@rhYAjbT-|21oLcNHCC;}Zx{J6JJfz>j}Ds~u?uh<>C?dq
z!Ze;@ZzXC%Y8`3sS`|INp`J^5D(kp(j>k*=i1jvsOi7Ooy;6cUoW4DMg8&KYl<@j3
zK_b=p=hfIXLi<6T8ZYJKV5yDqkF?qphq&d3aso{Uy64-gz8WbxRwkp3Hpyh?X34##
zPnd^N?3^EpG0;h~j#}Lc|59^S^_d@oIyL%jx$+Fdg%OF1+r4qPdP88?$T!VK!<PDF
zjlGA%5x%t{y8DI(=69r5G8EK@I7K@3v2fgFXGRLsOQ<+92D*rfw&drv=E=@bW`^)P
z<76J~M2xE~IEXUCx(&Vz%n#*-O(lgorAOuAG1=%q9uh-bta3MS1E!gb7v#`$ero>a
z5uMtF1Ov}6aNn>FsTOR-wXvW6$#savEDWe>_0`MAF-|zr;@-3fvu@K4gM4r$eFy>#
zydM0y$DlXFK9@3yQmb=Ekc2^t3qCKm2Ez1pURn7mghQ0z0{&wzABobC0~bvPOY`qV
z@&R3^Q)3u}{g1PIXL60>`e+>3Q-3(<)@o79(xm&vaD|!UACOv3RP_#mmDc$4&mXBT
z!~4<$7rI=4_oa65ppfV4#$0|NYH12o=X)5#=MJDOnf5p85b)l6X8(VTol}!2z?x;t
zwsFd~?K)-Kwr$(CZQHhO+qSDuPee~lMBj&b$cOv`8JW5Ew^u*v6ar;GZTw#u&o~=w
z6MQUo30o|>Q#{1c#-!r+`Kq)1Q&HZByv80D1(QC~Qs9lUkC|bFri`6pqfAxhK9_e6
z!zps2OW=Ag2b`E8%#k00(ei+x%9$X}<{8$k3X=n1izB|%%9dcN_fGLVu%b7NWfkEC
z<z;at)`v4$yhqeDhIiNkG2V^w5rQ2&%btsIRB7b2@12ybb?NfQygwCnX?Wp_p6p{H
zq~~pof@)MEN_RR<H>`o{51;v>A30HD8_pbC8XDJ&_J@ei`xLrz+m^SY=^ap)frhJ(
zfW?)i>R9PhZMbE`ftRH#C;8A8A1hKJ6fc6B@EgJCOl0gK`B)#sE5l6vGU?p5k|$^A
zr(<I{-~Y;pO%R}^;hUUo+vSDG68M_%I!L&Y+PZu0pu9)0UU4L`eerBx_HUkue?NWz
zX<zmS5E`)t4%R{!8ZRh%JxT<aWDROTFq;qa<f}_)WR<p=xMGh^egbL$(7U%Nxk_YQ
zo4u<AG6`y6y_x!lY1bX_1UAhuvt087pQJ@v6<_Qf@;5UR(fP3Ghlq~ai`$PcQdHGr
za)=*n-;FfSIf>^iV&Riv0?GN>L7i`E2&c)fT`Q=Xku07)dm)Ds6s($%ad}z4**V&_
zmO8?p_T<uBpZydbo_6b52NRtm0n)zcl`YYlx1%}DS;!;_%{NOa2!a!gs^mm;4nv9Y
zu79WKt=OkO+qys58M_AW<*9cs=tDud@oxTB8~#Wrn7&&m&5#qcv)9qrE03v4phD_U
zlMYfm$)dPPCCs$a7iCTRmzn6#sjnP{T~{vr;ZE_D`7%k!9Uk*M6YaAu>nV(ipVISh
z*tAF<xqL7BRL4cSJY@&AIS-szNO<(?h){bFmd-@;$v^qN?4uuhK&Aa`YE>Q=YC@55
zSNB9`y~nklgmUZrY44F0VjWj~G#{^jlQ}JgtR9UGnB7C}F)E_|1{#AdY+F?`taS})
z@4#`|AvNDOUw46xW2qtq)rm&74G4<00F|08-0cXYI`^yMVuvEhEOzb|l{<<Y{vbXD
zu@b)CWZ)RHx}Vx#muS2N6vGQw3th8mhYcH4QlXrjr8j{fNXDFbZL`Z}Z%C=-BX~y<
zW%{vc)fBdCTvVXB^eu5<{fMfc<8Jr*KHFR(Wi2()=Rp(`GMkuZT6mo!FbUGP(l+?E
zc5t7v*wC)Gmv|&1X*|?Sqq=7ifX#K<K#w4CSKlGk#*z=F+CI#2sgr}g(HljH(OnE@
zw=zB)e|1lxoZuv`teq959e<fuO6;Ddj^w1fFLY?K7RRg|-l~pVcz6(7<{pgB`r0G^
z^idQWlT+4=G~(qEEMWWR@90KWX>r}Y>bs$^AM!8g&tx%BD55N*nY)x}$NNxx-e~r0
z+PfsLZ4<wmFwywFFR>UjUB22oo!xmnKdww;hNq}h`h<&YGp)bJ-cbOHKL;K(F_fqz
zp1qy^cntcNb)S?<0B&4}wGTX*h_>2bq^?$q;NHIf;yh-~Mp%A_<UD>aRISrlemlds
zO#+m;H`ARvz-+cQJC(Jk;|BVf;__q_sKWf`%X*8|BizqMx|Z-EQ;CA_r8As|I_)U<
z(`m?=OG(1_i;({34;TNMvWuxwG-d}{H(?)?-T<V@r+_d$-7HYaia|`>d%gnb7M-Q+
zC{{mHLS-EY^+wvUulafuw><o`>$xP?LFrlZOUZ9PlQ`ys)ZDZd(x`p;#WUxGP-IQ9
zLZMoPXjj2$vmjXg=ZbeLv}BN`ykS5+Q4OvijtcD&N>-3PypNNe(5S#N1#;=Y$hNqd
zftA!&LSau{C0UO+x>K7|I1N_^0t68*a!I2-!5RLntA93lP1ltr&dF+X4J#lVj3WfD
z5P<5aDLoj^Y<Jdsbzc1pU5Fr1E9R^TW~chZ^umJI;;8y3-CyO?>?d%YrC8XSyOi!G
z7Y(CW@0uHZ;2u9sopV`HkGb`Q+5H3<u>zB$9Tl7OZdS23_Z%ucgA1v`#-E_3tqL6N
zqcpL%o(WnSk_-E$O5KNvkf?SU54uxY+56(>6JVeL5yS+)RLnuCc!L(WH@v!y5y`5k
z-T;eHk--*5kjJsmBo1(C>VM`9Ig21tKcgAW1e)C}t7yfni2Ju@mj?p^#}qY@^+Ay7
zfi?F;NcwV%d?oiVRa`q8%mtOkk+L|AbZ+DZ7WzVQI7k}>KuH>9BuOGNSls=%o<8@#
z>`ZVc%{Agf5jTXsxCHUG?aI6IQtcT&rYKaU1S%s;c5%1hqa6*D>D|#$Oy8l7I3dbF
zwY<9JWD8iEwbFQqaG-HMoxej@{ptL4z>1KQ%2Ud*p%<ss2gPtxft$<zXj8!3T0$51
zxv05!`V?EO;`~${y{EdP+38H-S6BReV_A=gF!+|Zn-tWaTfBpOJ|`J#8otr)U+R16
zs-unqB5Q`caSJ^PpDLvl5>=+Pzk1s6?04o{Gn(_UBPfB`qae(d%?3ZDWeLfZ18xkd
zgNGD+M8|3E=>4^#Kze{TB%xf+=ko`8l_ZybtqC%(FN{Q)UPzhla`7ny%zD=JUP;;f
zl#=7J23F7h#U%-+uFuUNrD54}KDR5?7(uApJPOusEematc%h`DuF|xvYH(4gS1GrS
zp$HFE>0E>!(u!>uO-{}+X9jqv{?e@P`+n<I4j{rq?O?EIO8c~C1gCyA`@%-nRxT(K
z&|U{yvO)S#?I)Y@mZ5W8@^Q2m5j|#bD6J+vKh!YGgL<<ccQ@3o>mCke<*l1)F}wV>
zmcF$)90^E;W-RRrs1FJoyatE+YpslioJZ%0`-oJbiRZ)aOI@8cbC3facMmORl^pqG
zX<k&rG{mJWQ++ApJ!2dU`zF&=)!R*=9tf?}u5}F>*<y3RjEnJ~JU6#F3+~=~JABfU
z(idmCDIJ~lS2OBpvn7@6%TRXwXL)OexV+{U#Aij17$fYY*@vlJYkX4CA6Qrt4&Hbj
zO$ReaI{68^jf5aSNdx~_nk&&ZQ+j9LIF#-Y#@}}>Cl;NWB~N8D!0+VmYq{VF0ePIM
zw-4``X2aRkf{2iR;dj-1sf`^py4Y}+(49Vb;BjZN5`icyQn$Z3pyyBrj@AY3Ej_b4
z#uhfWg-b40LQZsy-EQwpnmKjrY}=HOv7J~7$ypw~f!$<Y6z2lvW?qEwCUffIh}3u9
zkf!Ej4KCIVY)?R;<<nu>B(oE7YgL~%ZLx62tw*I9<EcZdpN7DOb4chBTm`Qm3nI|P
z>IO(Hm#JtT#tFq}g6A+m`00V1P?L=%kH)7KoVTywIiJ{vD6A84hV1SWaKjTRPxF1j
z#%60@5r1fbXHFD?e9v}0s1+F+9>V>03=S3G(%Y|o3%DEYy6orkqmDGgeOvWK@#UG7
z+F-UvN!{JIwFBcd2}B;R;!nu)!{_5ImzvtA_7^rwwnMAr7qr*370+^@{Ou6tHLHUs
zZ)e5U)K-{J<wMX@oU!D#wJH&<A);Ky1`hEHlMiQp6s4aF7lTNfL7#ej_QQo39F-P|
z$le>?^<utHzm;&cn#Ngw{QU;ED;Z<Ixh%)l;pdeMXnL4GuT+$)(yoU=g<(@(LSs1P
zCorO^k}yQ91cH!q*#~;U1dv*UQgRh;k$NCl4c0Ob>Aggsl`XX&l4SEt=b=4bO1zsg
z@#C&>suSZc?7@Ym#hV)#LoO)?LK`X&-x_T)PlII!JD|?;HWXzJeF0XcC3OA+R%8F)
zo&g4YMg}&v|Dk7$_#8}3O#id{f3X?^Ju4&o|1VZ+1!w+qfyP?(ia^YXNmCHB^Y;m@
z+cmJCoM|Eoi`e5<i0r=?fl#cOd?0=pt;Z+nX_jY#=dW+|t!86Ny6LUys_X5mcG@{h
z6Gw5Ceu|zu2FlF%;4lqToV@bF{Qf?eo#Uguz2oBn1Jfn{Ud)~^s@s%@o+=vz4CM#7
z@CYhQ0FiM6K6^qrR3K0iVhjK-FF<S_d2Adx8=GHhrsff!P+BE9KiUyoJy@9oV3NLE
z9X4(w%+&fMs(~fADA>oWY#%a}3P5aF7{aLT$v@Lo9n}yN2xwWro{Jutkq}iJ5kIyG
zDBxiC&*U(&Js10yIts+o-Q6A#N4tH$mNkhQ2LLWY8xBA86_|@NxF)a<Dy$qZhySl4
zHaad7|0vwSOREUB3GgdGdjO!AKhY2rjPu_O{Ja%CRRBhAe$8_Z{%uT{@64|+TOjbg
z+#3M5Hn$(=*Vtz|QG_$y5qv7Dv_vL|3sC<JAS?VBJ-?1GN7^>BEMR~DjbFlmKihg@
zo^f0d2M`THNIw%eAaSV+AbxQqKZ<#o$y7s_HzUVj_Fu}xcd6J$xyN}cjPzt=^e8tW
zuT*^MVN`2S>uz*=zB8+UP>+5eU(nY0a+aC8!D>WV{vC2*&_GqhpYV_r1D`aFpo4%w
zTU%R#f`kBd&;VEF#^7J5f}@j|U#SVFsTk_PwXsZ7n0kTKAXm`!fJD7_UR(S(dH`Ff
zfG^MQiubz^v7z94u=N4J)cq|8<U6*vVhn31wo!ioRl~^o(y%%FxdCo;bA5f&2}7r_
zz?`2xq<=4y{2e+XGBi4_e#_r=tDBgokoN+kCdMa6!4XhU03jkEfV_S;qro5^WU+kJ
zC<Uzb0OETag<H<~Fgt&lKxBW^5mowpKNSKAOwnQ79z2(%+d$i@<9ywIa>0M-5`TL+
zevglTc`tr56SxQF@972-eKCIVfW{yX&Ys~xIl3U&9T1iD$DI9s=#=rkUOy@VstF)U
zf0rry1;&Eo0yYRkGZI5{BQt+(IfkTh4<K7s{Q9b|elp^FhdX*%XAprvE&I0cducI?
z5ZPXT&*Cgij{UmCw&DukQ=y`^3Vz|Jz?%cqe>K>200jWXmQjy_N29aIbpQd6=80Yx
zu&&-$>4E*U<S_o21M#PvLD%`WAAEDF3jhMhU8J7b{$s}zUm^nlc~`w+Vb}mkztJFo
zeW$&M3o_5T#8E~TUm{%qc`LR<-~blCB3}S`Gqyof!mql-0Z))0(Q)l+KO%$xdCR?G
z0OgGV9E6&0c#wR*bm57Mwx7D)pc=n`?g0FEw-UYt=pYVIzQ}Rrn_qDLQk!50KbwMk
zsaPR)--7_+_21Ws<*nb(lzzWO_=^pn*~AT6Ty&;f^iS|$3SjTRgjAnQWJc>Uy#x-c
z{=RF3=r|Cc*Er+CJp#UE@t7RHDn!=V@wEU;h*rSAG{^6vAFnSzR3a%SS7COagP08o
zd{Bo1CEuD<chLUZzh#mD;;w&pEPpwLa;h_l#-?xJ(Z~Eha1h<~0rp*L_V}=3q%dv(
zKL-KMf$u#D;~0KDhPJBoYJ63qukt?e5Y|y+cjM5{mmcB6qAs4ng<PgS>E3jyOoQ!&
zT_Ao|kHfY`fA@NGxailAt%BLOU>lL38)~5~_-f+|UiP%`jxQ0Hc)I^`AB;x22z`$P
zzvx5{<R@kPPKVn$q*RT>WeX)UbWn0yi2Z5SYKjXD408fUXN5GUhs-cwUxridZp$F=
z2b@Wi4c+2Vx|c_(b!;)}`apR-pOl47CovKcYx=7P|7|`S-DBHPW+=aye5!g-VJp)Z
z5vL7yRZhkEVIE;14d*oBun8X2GopH9fz_)g&pKec!9i4gg<qeRag*Fbw#SEA;0IpI
zRaeAHjf!;r=lr0%=~B*pC46IA9wUD8;XAF+ZqMq4!y^EXi0FnD);QlfZ54hQ<AFQu
zV#Ia)*s~}VVIR2gWe)pARuPif=daM{2&q?bW`$gY-@jx%dyVWg)id`nf%<&c3YJ!C
z@d^*tK3xxj5+`z(;NC9x-(dE4nM1la`?DSUX7LlW#V|R;_$IoDw^!39!~~&5SBS?`
zsB>heQ2P+Os1>rL;G6q{reGD!f=TQyiix{}iu)vtX6GAk*Obw#9^3bJ5F)9~WuvlM
z<|z6~d%tV;++hFma(GRU0(2E0x&BV_mJ^QUWMAzl6F?Kp5BGoLk2Z(j7Xpl2;>5{n
z`y-P)h4w35`JGFdZPDRhof#)=U+2?An;{H;<hc<_UO(*5t37Vbj285`+D@7`tk44f
z8LyJA(j}4z(fxY_aGnUM<!nrNt-C{nqp5XWv(8^>)r|PIkGwONoJkAT7&e%e*EvBN
zxiXNGpjy_X*h8C7-X<(gmc&e$gsrPPhqC-V4Dbs#ZThDd*j~|vbdSk8irvf#6bQf8
zmBD=q!0Boyd#}c&ay)>O?SPe3O^9Ku8jrgam)-#Hy)YRgUPgu#={sr=<Rv#==5}Cw
zx+@GP2|Kk=z}=&6?4E!uRrT~ED`Ro+RflLoX9PBW9!#Ejhh;Kntgz~L?_P7GEX@7$
zV%Xqe>ncYS(AwE|g|#QGx#iHdTOR&B(y^r9Z_GpGwewtVMMCAf1F3rfa_9S-lzlpQ
z@YnnVt@VOyRM6rGjqx1zd4+l1x(pq|Z!dOLlLf(z9@R;v_v0E(jxqFx_@Jx^&pDk$
z{nzBBq@uFmc|Bzjfz{q+c&zH>LgO!6uZJT9T{tsrxUg~KyLM%*k05xsvbu9zZ2#K!
z(Q^3)b2hU4;Z+*{jz_0Df3X@wQ)Q@n6>4{dQOqbM+hY}%I(_;39ld&~w-~$0+Xc-x
zCP)S2>P*kqrLRAD4#}!}rY|le(6n#3<<7;<O|Pgy6v5jOZE7MRbHn;l-lcFq`}eV1
zybh_1est?;Ti4i7mYzG!lzMi`nC;FCMImj^ox6R+;c(l-^2js(njX&PoGR`NbE>8n
zqeK*Sl*^}t>Zv-IVB2BWTY(cVm4hfWifw$D1Z0}qg=8gT#bvyefF*_paT=AgQeLIo
z?omh8tw=e>;bvmktU+ui7g!~Ha?&*m^Hne6C6f3s39%w7JxNT)zOqt#+~2gUFCQL|
zf3K!Tf-qjmgqTyF;4_3UkQA&(T-%;TJHU@fubqjXmh!X}^;CAVR!qVEc|)~wR!T65
z2=>ZFr{}B)4*oD2`Brr>3v;~TPq{j<vfInnpc~)bb;t(o9ZL``*XoV`Qqq@<tX{C7
z5UNS_Yt&rB4IyyopuWmfKfN$?%gmZPC}qk;XcQjRVYEE>#-Ak}Fglg?o(o2I@)uN2
zn6tU68)9#c;$cMN=M-><;xpFCwEI>LQ`wl(->Q=7#mZDFm6_3hLI0X`>bAw{Isr^d
zZ^wJ9BVr}Lh%i+^Z=<Fd@%Krel;p2RJH{)xq}imI>won@0G|%JS<E*5KawUyJ#Pt~
zL?=V`VDKrgta)%@4URtHX!4X!pnNx7qv)7F#kDA-L9qj291w+vtMBxQu$C25itk=q
zO$dTU`Bqd-@QFjf&kepd3U`euug4Tl-RzA~-ni3z8(!vTE~R|JwF{;op3fZNLIt;6
zR~`&TV<p@HlWnhD=*s<Umw-zOv(RiLvC;ZeON;VbA^-CH^@+kC5-c5&$Ctr1af=Ml
zk)cU$NW?p&hzsa+12ks5Yv7y*LU7b3r`In%B+dV@{Nsp(JAB3rfKpA$^BZfq5M)(w
zzPXwGNxF|RU6QCUbb&kbxC}Y=tQW22&zbd==jkwa#GcxeLomR7<p%KLPjl!#D+!<B
zOYZ}I%wNvTFO-~O+-=eP>YCknjrX+{?M|QmhLd^{KHLd|t5exUx2hndkmtcu#ilq}
zf;ZN(rWkz3`ZzlzFl3%yeOi*B8zJ2inEk4Q*f$A<wbYE#jv0omd=($|$MFq8Y03oP
zuq`e6znvTRV>=$*5y=E^MQr+gthouMtl>JmYk~0iLq7G+xuf2M1mTWJ)=9i$1c!Eu
zfkK1l%vlB8WJ{KxYLAJXcn6dmV7+85VL81Obh+ab79C|wO7)LGZ!K9Sg#8`yFc_36
zQgY|{vq7tMvy3wKnpWN!B{HhqK<>Zw3DPz@r(Z60mK0icyRhIQAZAN=8{Z<l)V`5{
z7jH^O(9Ir<*d&e9L*J9jDcRq%@_REF`{I<5ta{B7<&2dKDS+8bg-lMc*F@ImCM$mA
zn8VMa++rYND69v`ZTvJfqV@doFYz@#^cDfAi+7x2*VpyXsyrGkIP%<n|9q6Q@rGOn
z{nAl7Lvr?QXc7wXp?@|E>mtYr_}MEtkrnrXQpccr<zBRtQ{vDH?9DiMkv+v=EW+%2
zy>P`g;-t>)`?vI|Hejd=Syo(+6%l(j7)gx8TYPd&s35e6I#G6g*(<cUgf3XWlGyO-
zFu4zz1`)~~dwc0RR5yK%v1lQW84re2hPbP_ly45l)<P`XfA`X<DR$;>J5mK7$x|kX
z)0Q~GYRR8h7`Nr0tf3NpZB~aeh*pT*Lkfj?6_LG*9$({}yW6{tyWBCHJXcts*<b6$
zGGY5?MM!!-zdo5sT=A{WxxkXIT2O(&zXA(PMd7QTj9Dj+Q&NXPmH9Hz2T5vTr2h$Z
zpe7>Cv>az9G;zn{65ktpvjsoPTS!|&aOXCkr?{M5Zf$}wb9OtQ)5QY*aeGxHk?Pwx
zHdHR}Y%2ZeK;3!E-m<s~k_I)UAL~<EssU8rDq0bfx8pE^5pzLmP_$aqsY(g1fhC6B
z!)dbR<ZDcP_kqiWu#v6V+*8`4js~P@3ZO+T%7qCJPViZuFeWN`k71-(4Cq*+7X_AT
zLY3)^V_wV9VCQPE^(mYfMmQ19%Ln*w6$n-l$wEW1`vN6{^B_FCxHyU%PHH(JlIO&j
zouaf1oLYN%C3#J&n6Sj=d`wC<IQi2X7w5S*CcvwxE{Nx68e4k$?p<;T{sog!=WC{-
zLFKtEUYd=d3Kg0$0FZT&(lsFj9M~Po-6XiQZN>>?p4fZ7NDOBio2`gz^c457@O5ih
ztf|ex476g!WHD+<e_D!nbVH-kYCK*c-IaCwD7;bYZd13d<As|{=bQKbgdgz8I}+YK
zp*YR6w*2mAvg(eEFK~`86DcKR(FxA<q08DClsY`e157z%)ph3a#7&l#mH{lo*ZV-j
zCacDUO<O|l@8~N;|3jxJ$nEfCooxxQCKvBp`&trRUin0i?L7;@Ta48Y36KZUKzSPy
z1C5h;Ki4I%&%Yvzwbo_+`hPwdcWY8}SiC;!czu21j%n-$HQAzGpR@p6cbyxnjyp8V
z6g+<XrF_rqS`mt~MOOzTVsL2b9Ql_I5PJ$w%<g)29sUH~A4A)=yEG=uy$uK8Uv8GA
zW~c~{_q;mg9{dg48dzL<vpg_``8p%~cz0|q?2}EO<!&r-zmZ`Bk}G-QTSTO(F%uEj
z!Nd?c5O&7~i}Z%dIC!|~i}eI<imS}<s~#&EVDJ$#tNNYO1pUZ;Oote{e($HLGg#`P
zR}^tTrm$eCn-#&srM=$snB{EuBWp9t9LVDEN$`d@sM+l{=m?wrWcVse5m%(SPZ_0N
z^5w7T<{|cC?liPdMb-gzBV5&}>u5_3g{^FdOt%+3)4;M_wcYY5{u@5P^3#{nkcA^L
zY1OFoN%Ou^N5TUQ3UIS}nwW<b2StGD-qydjwCl9X`(~w&mJ>YFuprn*nhjXWeFRDa
zah>=Z=Y2Hu9wbJw<GWezo{Y~nQLKG1*Fv%+GGB!gp{&EGuJb#1Rp-L>g<O4DnxKN4
zLzWtHJuUrY)lnTfo4R-3)Q<x;`<%*8*r6lpjEv;AX}DjJ*pU7ynj#vFGyJd$Q#Sg6
z$n5{YP|c+8wmC;uExQz82iO3pJ~IlsR6#eTM^a0}#4#}<;m46Ymk<#h2oSop^Bpu>
zC9tG$8)<g!nrOsM7&NRJ7%*Br`6zDvWa_k<dHf+fR7u6i#9^^Ao>;8<&=aQUoZhnS
z9MAvlv53>N#B2uMST_NB%3|>L_EM;LAhCayw6sP|FoxYAodc?259I(sV`t(0zCizz
zoiC85Z<12O8V5?iIs2+-LglYj{Fg=O(0Bln*<+hox<z^H;9(!g$$6`Mv^{Ci)Ipaf
z?$g_l8}8=OR1ODMSnz#+`v@78I_lKS{Y+2dL^!GL`pBB`+WE!k#lryp>zbH3CE0ml
zWR2lTtR^A88AJS(1GANgFBKis97D~iNRe3DyEEHV&d+XOOb4J{0}0CC6RXogU@k4T
zR!3s$QN^#kaIaN^3(p>x!jz?hV&=?2&$!MSV(0AhWc2Usa+)&WQ!Gi60T#(tcRMgQ
z$c`*#uG&L9x34IQ+BoHSN}~mLX1wx+KFpp}=~!N`PccT3%Lg&Zz^P-4A*pxJXyZmV
zsCpHSCz?G<tinw@ihs`Fs%1u~H9DEryG5K&WoR*&+N+kw63S(b(g%5+7#_k)v3(ra
z;eDW4H4hfxl(q4EM$4k+G+pXP_pbJPVxsxhRHo}O`VgkqSJ~!y*2B;+bOgs;*leWb
z!^}Zj-AnpEfra{Q7vTBSIa{+FUHYeggtk09jiXZ~Oa6do2H0|_j;Tp5V7-<-2C{zx
zOHaXl-k0CJ2qV3Ol$}VsGMl%<wQRYFjl#KtRL8bn0Goj>+We-@6FPy=*E)IE6+8Xh
zW{tT1KB#4oNm4Z(nix}rIo@>>h{+ScRddq{O+y(gV1%xW>gYGsN5L~%>uIJsTcFZ{
z3|Jr<<dX`{@s*e|NC45K<7Kh#F777>2M4QbT#rn0y4rvabbyO!n{6gQj)fw9v4?7R
zkyY2&)bj7nh|J`n8WbA?o~;U9OuK6-r`n@T^tc+Uq$*j>QuI-=gr3gb1COCt<y$bj
z4$A4lNLMO5)zHl8pZ8z6=hG1T)MLSX9r%+HtI!Hh>4s;=5ysT#$C!DA`fvM2uS(nk
zL+jx-Pp>cmJhZ)_UWZVtfbOZpq9idNl{D0G^RAhq?A35DOwHzO`bM5}TB@%okPv)q
zGc`z92vj-rotSJXC&toB>erbJM_3CrCc?v2G8SM5I1*Mz5K$iwUuD4=Fiw`MsBE!H
zoC(yCTkY(b@-6p!T2E0wuzq@`+>!19{7ff_XL&=8Mm0gkxNgtJ-NWl{{oB#gT$ezm
z!s2UXp=GG5(2AV&sIR){DqH&7E4hR6HR56&IRyEM;MW-+=-db^0)9ssen_nA#PqU$
zDTJ^YEfk}%y2h^xA$wQl5&p{iG|YbD_ZGaS<bhh1(yp$z_L*LAY*+q!vvzlJJ#yM3
zQ~NKb45Y0bPjL%s37O6-9b#lp;7BzPe6W#8W8EFn7Hb`i0Gs8G8Fd{W$M%9VpxbGR
zw1sGrl5u&#q)eWU7-)B+3MJVu^wPerL!C^H3!~@t=BSCDbr?Dy?cBqhRHVgdp6WL)
zcyXPCQ84(JIKWFOSu*7Q^Z>~fr0@-6aM8>G5Xe+#q&8dnL*x#3R_13}IYsrh_40m{
z{7CPcHm?#~i}dgSwZ?({Zyxv!bp&-kW(t2iAuR-MUN)KjX~e8;xZXv;&6`WfGldEF
zi>a+C&-AIF$P1T>FZ-7<SVdCCiGnuR08@2})l{n54ILjPT(|af-7$nqf4M!JSWa_=
zOM;m2i6O=VCa9>v49Im>>SVN3SbA8F;k_B;(fP#(b<k!Vl*Ur4=N)t#qqLEu?5Hgt
z{i5@y^}9Enw(8QQ#Qx~?g`1`UL%rVQiaOk8N7_v-W0TR)aWwyC4$@fW0RqMRtW|?W
zw&<O@DR~u9eL^p;U4k+P{Cl+*M@yNnTrH_Am(+?_k-J%AnD>kuPzz;L`n!1P@fdWo
zV3*IQ?adg&UfI+o$&e2%vh^Mk)-yeI@4e(eoHM}tK8Zm3YyWumi|qwc-8)UJ56lhl
zk$gvIEwd!*A1V9NOHDdO<jTz{ByL&tK;NlX9&hc5b<Q;{?myA?jZ{451J05CsI|6i
zsMmM(l|qA@lF_4bI}B9>rF6|V;!OyaY?xkBpNh=oJgC^Hp>N*>N9%uSo!k*^BH%-l
zY{yF<r?qD@VbvRlbB*q8k*d=Ze56g<#suoE7c9n{d+awHn<QkL+&r*En(LRMF>}P5
zx>gMQEVyecbHUxi?W1%&XO36aW<-d!+6%}`-gtm#JamN$6#j4$P+UgSnzDs)teO_@
z+DQ3$<MQGVJH(3u+oX>hl@*Ayte@YyqGc*5XjD|2_Ie}NPNH&!sW5P=JLak{%BC|@
zYIx!%DWP-w_q<0uAUg)}RxV)wSva{pU8lm@q#hYOX#88*I&_QtsxT@Q^4z@1OC~JS
z(CjOfr(gLU$J-doi`MU~njDNKsYJUW%q<%uH#Pgd6-WE21{wMuv<aQsC<Ywb%;w}q
zrJ))0>2cg_-7`M;G{IsppDYCJjpV=OkBd-JGh?xfwZ-NQ<`8f1)yf_*?;FqgV^pFw
zGfFEPYaU^;kW6WiJ5S>yEib`-5^sLaec#R^PtjgSJj0VXE_si;ojHOZ@hRW*r_Ub^
zaT_ID<QTLhAFjM<S~5|dS2QXp4K;5qlML|#C-NrqVDhg;dcncLi*{=^-)%%%;dIU3
zFn53QC7@noX5^mh?aYux6**)8YYn<-r0%`5D!LLu!X<w>hdA~_guZxfrHVIg=mb|3
zvsd`-bx&|h5_k8ag$eQg_~$(Qu<-U&@BS67ke|qm{vs0@+#b;?b)YBNx}wL|t3Y7F
z)yJFsMbs&aWOvmt4}KzI{?My7TI4_x`3alyOuI6)TG^Gn>+2ufH=danmwP4lL`rl*
znu2kZ%;QF~*)NY)74wuSre}XcY+z5yNd>X;ygRs2q8zL7j~@wr^mxpL-7C*|AXFE~
z-}%AR;dy}I4?0&}mJIW4FDv+=!&uW3PI=awlP+9^*;7@@Hw@|GGi#$z68pL~Et=4z
zpCUtEhzW;!7;<culIiL3yAmiIAO4Rw9&}cpdp%NkU^?gfe=S1Ha{BQ~V=-^_FwxM?
zQmcA(s4Cj>+kVP0(_GR*!K5BUa^;1rcK(T<#Hd`i4T4BWE$01ptvUuI8~9)1$0#~{
z8T2(=k2W&xoKhKo*Y`?ItWNv!-)9{Z^&)0*Jkg&nx*E6>*snrj0-c-dB0CCv#H9#_
zT)tam#J;<%hAuu2O;WKGdNIu`00av*Lldk=nQAHg(qr`-%=d2sZP(s6A5i9_4myoW
z7TtFW4;n!r3qzm@3lR<p(t4Pm6Nm+G6ANaTnp)qCDg;R_IRBbm+x^@=DWwV6a8M3o
z)#Q#wPq9&!S9-tCVP7{Y!gD!<jbv#(uwN$BXq~&NNo4E>Z?%TgFPQ2G%a2WejtqJH
zIbM=%IO}aSTGGv@R|s6@lhzK4hCRu8{P*u}Qt%`h?`tohU0@j#Nu<@3ZN1ry`1CR3
zSY}2VsW@P*$v9Y@C>pJDB7gcAjT?aulYV0wEt<${pL@pE%gum%HyV8xC&7!lTldN@
zDZ2i74{~^zdL=TJCX#`mdln3D2*wOm+~X<0-X0CuL#yi@I7=HTpspVg{y2UdJV?NR
zTEx-*q4DFmB8|!0DmA#8P!q~rBrnA36(F2iO8jwwNa#kwI>m<xAS-M*Xgv0zv2Hwz
z<iQMXs8FPxL0*bA(N~Y2`8k(uuLM9weP-NI)KqshEz@oyT?;g^darzXs5#i`F*v#g
ztk!Seu0Y;7NNph~S^{!zwtJLZ>B!qf4~3oRmLw}lI4?S0e;b{m^poTem$tg<KUJp4
z%kE8N(2fXSy?yNffB1!umfv+Ofk?-3-vh@#<w#x6Ub5csA+^qaFsN5!9Bo|`)6qUN
z>Sw!7Iy9q;*$kmuEDSfh)8w$F1mvkQAkdQnQcSSXHn<7jrfU!bIN!=BV*3$8GUO+{
z4FepAvc3!$GFp;Ev^}7OMBb?(sHkY-<|a??q?MX-JDa*Aq5YBMii{R0h!58}6J$Mi
zOY}$Rl9zdKH<35SuC#B^d%f1tM7FUT_C_6}L1y-hTTCu3ve}4FzN3z2uvIj=O1u*P
z2R98du$x`eCP0UHNaIHeg)eB&%#O2QUs72h3isk9rhG?OD(F_}n6a_I%f4Z~TGcn}
z10xx%zQds&A>^QB1}v2o=E`|WqNDK7^3b&xy6CT$&*?NT?rm9{EC`_K_g;#+liABe
zYiF6m4`n8Bh*SHW#>v<$%fj%01=g(2-EQ4%t#zRql!}7OYKnObEr`?QpB@vtbB1^(
zKBuFWmySq(na7<%*=1=-wh`Nfrwa+4GXxkdTq~W_gs$NzhhE9yX&e^MHn1Jc*ACU(
z&u<`UN8vF?qQQdw`qq2;!hY5I?tr|o^Wdc`mYK6d3Rlz2(U6^x4LTRE+eFt$;~q<Q
zT`4sw?MjvVmvVF4!?p<Kf0yH^K-%T|LG@D7J>tZz$?xDlhZ>z0U*Q|~vlV#-vTx-5
z;C>f?6>%R5^o9>yiNz?KpU+|yXsxmvx};Gis{Jy#kt!0*_qm{VOE%sL6)G7po<`>+
z8;{#zh4<<!N^H_8fF7i12(j;|kv-HCLStj~%<*$fbw<!Q%{Ol5yK^V~+nMnr>9q_v
zI8Z9&uGyxdKF~@R83wW7m5p6RyjKJF^QaSaA0S)Tcp4#oXsa;I+*|3~(2AqyVs-8o
zN6bZZz5RbnChYWDfp)qwD?yn~t2_Z4*9Ab7ZZc=FEH-dv2w`DZb}0E&kTu!5I)Kzy
zKj?IHxg`P^mBAl(NuQ1JaLg)Ycv=niFHYDUc9r#vDjYLF&>R+JM#$B-<kAWrOfK|v
z=9d$fJ*f)~JX}=kI3U>&KhZ)7&2XR+LA)b{;B2_s44?4IVa9JsveCpjyeP=IIph>6
ztvJ-7?&nLZ)9TGDF;ezCcml&NhjBJ-pq1^%Mryau%*DNkJi4t`^qt4Kl&f1RJ3`1a
zyo3xl?haXA$}j{{lN4nkv^8%<>T)?FM!^Xq=oXa>O0PzED`_9atI@1Y%~zfyCo~N}
z!~d2iAWBi9&@*_1_?>Zi8zR5N-6fX-hG{tz2GWVUZ-tQhox9qm*IugYgq-pcip%aQ
zAL*MgH?z*1GaBrP@lI_X?o!gcChMkpFILeP5_zEm+MhE(h@JR;zN@9RcCYu?-O|jn
zz<Ts%8zQDl=e(rF#a41W&lcPj&~f5LMyT$VNa83)B2GJt=2)?0vyN(kUR3M3_g1w@
z<rN-=Hzx016No)E@M*nPvj;@+XU%nE<BdKaVYv%{xo6r4*3iM8Qyth$UK9_k@+HMy
z+kthTY$2E|E@{i?ljj<=unjnGZRq_xX=|2U7oCN2b}Kaw%kt<-HyOaEgI2|_m}SP_
zz2_}-QWuVb4+igooK{NsU*^ivO~ugTL4os75MB?i*FhJGDQp;7z@9+!P<aGaH;w)D
zE2qj48W_eUYmAfjCmJ_cPt8!At!b9CJqhk0b$)P24o8^aD5Xjp>%QVcAJva#ch<%u
z(HH`wLw=Rqd6$Y=uJN1<ic5iwv~O4@;y4M-Rn{^Uo+FDlZmY>+An3#(F118%0uv!(
z4hjzE;;`Ie61(cX)dm?=mA@fES*2FINQl{Z1zyMQj>vF_JZN3Bp<gF<p&;=6rc5HD
zzC^UUaC$XL58K@?bLUc7iC5DJdu96Xx(!JzET7xW?z4zHuG(~RxZYeBj=XMRU+aRW
zMP-DqTd;~i>25BJjppqwP&eNFF}7&Gy@ttFBr{$NN@&O(6OSERtd(nS^o{i?W|lo}
zM$c4HIk^}Zco6bG0Gdf|VxyLO<(JOmtJ@4%UMs%-T87nEbfld)7sx9!C<^8l9wB3H
zq0_(J%9+e!iuYP(j!;I4Gsr=g{z<7F{1e_olT+CCr8uz_Vm^3O1vKz^KS5JX-M$~0
z80oN@Xb8NRf%o?I%>N$TU1uPeDPT2e8)SU5EnNk0wNO00Tt6j6ottZ9_sKTSHE%+q
zaynfAc3A*gam`}fv?4h9Rf}D~3ud}}Dm=om8~Qn9WSvr7kC9v+s1<R-P^&k>6m`(7
zWfh*Pz=Egg!tGlmZaQ_{VuOp|8InMo^MV<~*Ug#s8aKB16g+mISfo=3PKBy7e?Nc+
zKS}Nh+g6Op1|I#C4M(h9AtV}P1Wz1QlBrSDkm$O+uH(rP8w#wT(YKVrPNQled1G!P
zs+PTGEQlDeR18Lvp~jvZWREzm8FVahn_bO{H@{5<ZK1BAA7;4vWhNp#{G2iO4)7J9
z;^GKDqcXH2J0GT?WL9A3g&lgCeuhV~(NW=J+Wwqj%bBbFuOmt{Fs(c!`ovOF@!3Vm
z41DpV+7tphx84Z7B`$O>)tfC71?9lwMuJ8LP_QJ*5T4-iIEi9@gqAk)=N)}u>MIz>
zF$7{l{d6=o$mz5GsUW2D4H;Xb+N89WCE@0Dcz$h%wWXKW5oZI8Q?`d$L#vSh<=S_%
zns2OrV#>91aKTUfCb-0yLDrQT``OvFXIyR1X(&7EUc@+1;8viIqOZrxibui(C0hy4
zcw_31UfE;fBzN(E-^+G?<4R#Jo|@ha<;m*uz6Gt~<Qg9?^Q?=9j3RQ0c?)Nv+^{+B
zDO0dl1iu^y${W~L^ERCy>MozpSK^V>ImI@Gt(o2)2}$)A_^#7V-(q95jMdYPb}}7f
z&!hweHnL&!t1C#&v{dDVc4IbGl%%5#^^MJ)xRg2G6b9xaNb_C_q5){$+Qqd$+Phg!
zDQ%+nWsjk=CKuQJJHpUs%p!S2nY9gctUnX&fKhGXY11B40J$}4VP^nS&+5pnpB@62
zdBosaH;E=(t1i(VHG1pSWzp`d_`0b4sx9hE-9U$REcP~dUD<TN{0uE5ZGuPFO+YKS
zQ33nmr(hjAkmM2nC<$2h?!!2v+2J@trK)YtW;yJIL~w(@`-SDn>k`0wF~k<`$G!h5
zPdlQpa>}G=C$J};Tu6XoL}b7X(E`%g+dzS_ZI2uuTFZ-PNN4AIW<gst_SSy>M3)o-
zh&!HYPQh>1OlSpuezUDiaxr}>sYICMs`cqpu;tQuMh77cInjevQKR=lEySzv)+Uc+
z0k{fe*(Cc+`pNf!yT0-|rjkc}s)=TYEcT6q>9|dO8|(@t_s#x_*$&#7L7htI{smsq
zZH(uR78{ua<{)kIJCJy4o{+T^-nH3Fm>t153r6maV9)3QZq?2uyBzL|vV$XnIEyXw
znzavVH&agvR2;f*DIY0$eE4I~Xa8hYCFn{Ow>8dqG5`75djx40I-BVF4U^hJTF60i
zZJluS5O@h|9X)5k`Wv98&ZvSnDK&JH+e&D`gn4zn>np&&F|cZJc5g4feIcIOb93*!
ze}v_o=$`0Y^_yn(;8`BEnaQ^|XF>vAg;D)g=sTPRSG>h2x0&^Q0f!2gv9ji9$6^=a
z?`2;)C}VWoXTKp1@&t0iinEY`DXhk+F_3Bk*VGG@WygcTUagl*u6*joFs|lnCLpxc
z2ochN#e{k;RI`Pbx|p5dqwd~;x?7yLz6>jJ<olbEMTnhm`e?2w(=xcIn5VlWmDHel
z)|=tD09DXQW=*6S$@I%&XlM}7S1yKNIg3t~`-By?FKSH52n3Zwi$PFGsG{=QsPuDP
zYB`OR%RNE`T0j7&BKO<U@|u+r`mqBXB^W#^d0~KhDs2d1jt=!1**xQn^c3<S^ddTj
z{H*Kwr<wpQJdO0St=Y)-zoKu)KS-85#LJ~Sqc9h2jPq3NqXM0@Tz%chqI!v>XHv>3
zAi>NvkMfblJoSxZrNJTf)?HXigs-K!R^r&l@UG2F;pWMg9G<n?cgsGCA>OeNL{9T5
zV0}577Sg?^I0Oxjn!~1^3#}C>FEQTt071Y4xmU-=;TS(|r>6vu&Fr5}?X*H~qLIUu
z-Gf5{7bTtWq9t@SFK(2!(>O!EWr`PcEKD<J&E}%&F+FeoXAqk~rQLlbZbNHi3oi78
z-3o7I)V)gb&gEWLozV1c5i!wV#!!XQsLM=@A-r9SRqWYYm+e!WcsEz#UfIjPfN(`n
z7XK;x`M(7#|0-3O8UC+QmJy$oo`L;8Wj|Rt*jfKi^b;S7PQ={G$=Cs(PQ*&z$ymtP
z(ALNpikBD4(aFJB-x|toBia>QDRFawH2_~21tDHWX>TrZuIN>#I8Y3dfS+7kjF1FH
zz$0;|(h2cgyGRTbp*U<oAc9%=Je#iUbjS1Nzl&9<EJ-{~a?*JkUmv?JVMnh&!M+|^
z#IVz#ko=Fqq5X&fA~rTI^bi0*kdOfULB=`QVfNKb*LHmz;0!UafJF(3e}MDDLImpE
za*;qH!7Ydo0is1`0O}9`s3`$^$N+%=K?Lycd;P_lK=>gKz*<4gpaGcUATUC*{vw7y
z2n}B2)V+EC=m53lZ2{D^z47$uo&&g}V_?Ar;{$9UQ~^(c_7b5S{A*Sb+^4TP<&&@z
z=g_Mshs2-T-qr><2ted7xHfKk2e`4T0_MjH@z3AZYw?SW2joORU(1~b8RGy5fwP;s
z^#fdhyMz-M1aJ#w`UCY>?~WoCp@p6qpqHzUUmH3vGJwD*u-OOr4Zs%;8~_dAw{k1@
zDpJsHsMZJqCBjKS03pXNh^G(k1_t=7!VFT6r_KuiV&DY`!hQt*-*L_cv4v?62>xY(
z10)x}3e10p{F4q3EIcT$76kxy<HbO9R{-;qmW<dEA%>2Q0TfBldnOAR6C!YbdIS08
z*e(N6#5HhfI|&z4*W8IQ_<{&!7Ub8-8&FR5-M^=O^!u_tSO<U@x5h)jfCw;!3h?O1
z2|&k6|0LA!1KN*n^994JLrx5(7m5db0^Az7?>p8_kHCfj5a|Q*c>gim$4BEpkD&)0
zY5>Se@5vtUwYz+8leTS(?fYxi2Hy84s1y$apyTb!(%9cLjRtA|>fQFOmI|zbyxd}E
z_4u3Vp*H{uN(|l)y*CcuPgf2E1RyYI7~u5|I@fol9p3-<{E*LK8PP?oPq;qP=N|mo
zb=JmLtnXQrV9U=JZBgK$92(f}JM$b~fDoVj2l~tR<g+&E7jK%b;;UBb_clfemxJp!
z!topIi_bm`dTad=9!PduRILl9Dc+3%{|DOw=(AeeI*sNle$rQTxgMeG&k^E_o5W*a
zFev{(@3&nP)E2ObCtgf2=U2(JZSQ>WUe-Runl~|+&wW_GsXzZO>R=_zMUZcl-rd$0
zPJrHB!r)KG7&LYW`ZC|AoCtq>3;G2z3L+@~oLLwd$Iruk3Pc}t)W4%}>JYsXkRK2P
zDN+`2&-4XwjbQ!=9i4^{4&Z>~3aH)g_aQ6*kUxqE<nV6tF?KpHcFxR;tyeAJ_Q>`#
z^+ul_9wW)i3kEV7#;FR*inlDz)ZtVYD~WJNih~~Y2yd#Dc-VykUUPaB)5t{1ETt^U
zH9(mfN7ETa3%=oB>dlizrE%E}>h`{|ylSm+eJCpXRvRn&b;GoW$)($WFI_wVs8R^A
zBXW;d5=0q0y{g(f6>!w~wzq<lceK-S0j+Xt5yE1{+`N)}X0E^xjcw<xAlAC+oJ9+1
zJ^z>LGg8v2Pw;54&M3QyD?@=M^vM3^SjmycyIXN7oBA5@2g^n_#itJ3usoT>Gd($|
zHVIqySchwz)<ILY@V-I5o1oQZ^VuMU>pV}wbW-2ma{GW)RdACB{1W6})j8NrTrd`k
zaAjP;%=o|COZF6$K2gPOeE9AthHKHl^3GJ(*T6v$B2v*g2g3#fv+IKxllu0Tw<|t+
zu1nUF`uRbKEY0X8Ia{aRKuIrULK%9p`#EFVs@D~eL+5%nQ|U`vfQU56^svI83|M2R
z(jnl?FRAk2AoA@!3S*H~zh+&}8kDqa_x+$*hgMAu6m~MLNg@P!=J>Yu3yRyOFH_M>
zy}eB=GYjJ)!kKMjpV`hW!yUoOQBd{0N2Jv&!UyXc<mW}PA`m~C%XbY+H$9YYe#N!y
z)I{j){LZT4Achu@VNMRZ7?-c}9cQ9U$e^+9h;O3Um47rK$Q%I2K?ydd?dV5Z{BU2z
zb3SK_7D_PPzUMKQQF^??2Wx=#A<tjYJHMk|1b0KH7HvnXqeK|X{^A|~t+4oY12VG1
zVHL;g%~r~Rr`SudauvG{3ms>jny+;+4X2++VVwEn+QPBEiLc!6?Xec~l1uPKDrysR
zVRdavvubU|dL~|l&6k`W`*ar9neV(;q1D(S^cov@-$2{sBa>rnh4R54N`x(ii=H6Y
za8bpsa-K)wba@hyC0&~&cN?!LF}+^fBz!facUdU_2QLY94PBSW9CWjZ*~Bb~zpGmF
zPeFtgu48LwCa9I*G>1S8BwvLT<c&M-TH>yly&ioT{KPS_&(+^R&O<z%M(joq{+Q9K
zvCg!tVEd8EO)7}*>vk2iO7b)<cNTbp87>Oec@SDTI^MqTCNdUng0;PKYLPQ<rzS<X
zT;n2XtqpiBUm}X20zR=d34bH~YcPiCCHvPsM7Km&vjC5yg=g5r4a$CA6vn^j?1#b_
z@{yrZx!YvT7etP%(0`A+T}GL|ckFcWZyAOU2)D8DX@V^_)U6OrIZ;C^HpXRUl{&4v
z_>OHgT{~xF7gX5UX6yDo+QL9~2ja1>waZMHXP455>2AKKsY>FHnb{@C`l`LU<JDhj
zF~cldqtB4oCBDn-<q{*9#L!;$4}Gef&N8$OvzOpSrxQzwu4G_mJemHeaM50&DU}~U
ze`N!5NVaC-YkbFImcl8*?Cyeldf)Z-i$#t>dVVlS8V^j4+&%L*;)vg++H%l!4>X0W
za{S%g82glB8b<$sI*obhU7eX2``ArWt74dX--`UIxy@l^q!Yj$$bhrlA_ljK4`!*Z
zqi<bYbjS6zY&$_tLk`f+p`V5|>u{X4okYcEH+4N)6lvs6>eEJ~c3J259b%q;M2l7e
zD-oGvkY8>1H%=ZNQFv|9ByXZPmz6qYhC*N3^wnMf?cf5*JSKnf2*ct&`BIq*oLwiy
zF8zio=~_I1=Rn_$vuT+vWQovLy&iX*rDa)7O>se~C6>@Wp_^YRp9AXRHMP;$=$UMG
zHX|v0cF};!@g6fZzKQ(zD9(u&{xLeF^;<OO-RIC%o=t2`F&ZqLAlOIQC!NqE06vKL
zjOjAnqj*Xww5t#NRL;;j1d33ema6F`JCNQgt~yt@&nbMyqb?5=1ulITd51T3$H+ZY
z#{2k$L;Xf?e6gm4L+AY4?zB-o?K4_K<ocPLfH!Aj#*PgU&p!d^en1m--BpNSnq_HK
z&!p0vmqDmt7Sx*=n18oAis}qZS6C<g4**(t%lk{He@hn1fneusR_|on=yY0*xVJo0
zWW`y})xOB%M@Z~$0TiF-N4698#->&?I~fU&H+Geg^K7&7=vT;$6j_8YP6*F_DiQHO
zXgOEovf^pX1NZsL%OHUVNwTt$R<JI~=Qq_8dGs0(syr*z?GZ6NCko_{eq|Qri5B8f
zJDETzLuhbA;FG*rz@06{w6qGBv4Cn>=(Ur$v<WW)Es7~RA18?P?WhO`;Nb-Oq_9hE
z4-PGF)%RQaRBwW>%H-6WK=^b>oq5VTz}dVXQFhcrnPRf;vh`vOYjs*mN1!QBy~sGK
z=Q%XMHNq*;UzQV{p}TZb53;GU&d)CpWMW@;N}@lQ#6zrS*8O1Qtzz0*CIc)VvX@Er
zq>Q9xKFmOq%V5v|!;mDNAxm`C-Kza0b?7Z`nBHI(NJt#M4am=S*^~MNkHobOujO6^
zC16!2`|KG_;!Y8Cd<>OOLLOdaj?|wySMjRGcbvgz`KB~%2PsyHDfbh|(VYLhLr6L;
zI;3P`axok@HWq!f#S5{d?}^!*vrX+dfz0`=u-S7EY@&~|r^ZV6H6+H)9a=-x@b7qA
zF-CgR8mnjcs)r^bm)FBf4QJt3$2_b>t@96n6EG3%^Hy`Wl9%nL7dhL7$Q+Q}lf?0F
z;dHBG5i*xOR#w)syEHh%4aUIN-LZGmu}R2`;j3kxySJgV+S(r!LyzP4HR7Tr3mIz?
z|Harj1PP-AYqo9Mwr%&{wr$(CZQHhO+qP}{-POdKh*`|CqN27{6`A>+1M;;R{-(Nm
z<C${5(Hn3d$~?LPS>rBIHFn(UDS>r)?II(AYuclocX6TptfOXoeP)rqH1@F>qktIk
zMB0N;^XoVT<(<racm<d5-twEh;UA}thC_=jAlbl6#=psWbus;7EO@1O<aJ$z!&WF$
zgpqlNO>JxgL$7@t>Tw=x1Y<E`M3#aL2`%k>^Ff|Cx=CE8R$fz>dQm61EUtbR&o_LM
z*INJt7grBP#f^pRWfC~lIsgn9ni4UpSvEwJOP0CI(RXkYa*V%=?6OY%5ewB?B{;fh
z((X_Y!8XJQyLW2xRGGTj6eqM=X3J?t0m9XPUfJ3pA@P~pzlLr4y`Hk1yb_tF64*W+
z$8Q7-4IwmM0`sq7U-N9%;!2IpV(I{7Lk8sX%2}93bvpxIu}!Q-UwLv3ral=|R6Zfb
zE%M0`Q7&GGNSPN;Q}RypL}~B}FVFP?rO|ty9MoLId#e8GRUJ+FP|}mhf4m|SV=6sZ
zP9&up-xDvQ;2Og&7V6!R#6Op?hQM$~mCbKfPT5nluffZPG=Clcjmv@(OGnh($Hq3j
zN1z~BC<*}s?@6UlFu)d_f$b?`NiK!b${pcb9K|@G;gvjYOX6A9D)Owd7<o3A_E7Vu
zK;C6i?q>PBlCg?=qC(&I>^+6dDnn|xc{^IA_gil&ZxfFtrg<U$vEgcHTw63{Izm^p
zMWn#{%P;u{qKJSoCo+v6)hS^t=FWGf-FmB#*u#u*B`w#OqP88Bb*nqgyD30Wq2)eP
zuX{G^I<hRMX*UsIhI&t=V)NMSP0Xvh23-q>)E_0d4FXSD>?<b^6V>q^CxVIhyIZHM
zo(G>Fk$R=A)tNsLSLFD8v988zr3V;$7@9seAnn8|Iofn3FB(V}8`b&cufIB-)r1S|
z<2aGuJ4i)IyQ^`-Z9Ef-L@iSfsQZ%mDR77jy|xr7MUt1=%u>V4@A5~qpBhsW@z5K<
zGZTGM<sbWldIC$x+ZXpK*y#f}tQmzXz?mGC_`(a8>q#O9Y6}HA(Cvm6^M}xI$-2f5
zse30gclglI#cPFOWpLj6rCOEXuNt3hucol0USCRY!L?e-ZL{~my+_Pe1y2v6ESV`Y
zk{3Z(L$}L*fhQdTwzQ{Dj;<R&$Bp2y+t?#0s^(C|DSWc`b^Hu~=+8tXakP$*jPCDQ
z7@e;?IYVQj+Xth?c>m+INuTRU3qsZm1m6hL+6~<7zS0u*)uqS9P7!6=LLI*BYLCa`
z>-SUxAB@Ef8v9;wYfnhxjyu1mYOcYXg2O$T#K}{GDDok%^8I-$vV?8Few{ZQHtJe?
z$Scth`Q??da<i8bW9lz`7~dtCDwkZ}-#+C`w~}=PvO?^$S${(w??sqz7T^mvW^hrV
z-qas~Glh^5MWAk&CbCs`6u(oXxh;;oiYvy6c0vy(0?2q5quQu(PI?CcIi3SI=!d*4
zHJ)L*bwOOodV8!D?O#+vna`d$Q@Wl<xC12x-pAzp>6CNTxS@y7)r8eRFj@vWb|zk}
z5H6^bT}{0R4gHdu5uZDJhW8-^=n1SiJ@wdR3Ogx;HI(LTK@~JfV~6^(v7`Yt@*8+*
zl^E&N8~>ZPC))XuVPeDNUz4hrt}jAZ!|3|^u(3#No~u6Av3boQ<BY)IaW!bl{A&MI
zr^BS3E(y0^T)KmMxuuPh1^ggYYM$!cENjz>l?91&(Kc(1sk57tfmXu%Y?M>mYIvy5
z7_(&{#YXaVQH8HZYQm=pVt`@vzbFy-q~c6{jrl7ojkDEEDhk?}L(GAnz@MBghQ;m)
zR$rR;3o`=Zdh%6KDh@{9%jFA<_G4D5My_OkY5eI9M2w%>EOUb*0qK+=E*RGJZ`YnK
z-LJA?@k7MJXISp#e!CorV^;}nRb&dr@2Ozz`I};{JrUBzgf7EGHGKZ9O3uAeZPSTX
z?W*`zweJLMts{O~ulB}YNH-Ct$w{JK%$uk`;BquQJh0(hd1vdoa4NpOGRwba#*=&6
zq~RA~Pz#6G9cq5B)r*ZKk4q6Fw>VGS`878?@c4!SNm8}Xx)0baXGoN-&)cfpfMlgp
z@Ck@Vv*Z3K7f=Pf`$+pEq_wIBVAvCSBwfXrg5J%uD{aTO=5$=IbjA=wXO|COvMkd3
z4uPgybGbz;x3zkOcSsd`t=Ft`qMYp+g-Xsuw6d2D*)7hv|8lM-7rFk0^FUD?&@BB7
zwKHLj7hRu8M|Ky1OE4yGl+Z$SWw|vj8A&Q6Nq*5md|WTqEQ@b$nZ%I;eF@92A&fol
zXM0#U=R4M(ek^0}F6e7iwx;GYBU3joOK`X}a<Bb!EEK2#ik2q;MmQlUB&!zAq5Ql~
zGU*{j<>j&JCGYmkDF>GW&uvtrQvV)1H?&O#(%W29aI}@i$^}a#TDSwVhbh0y?V)C5
z_97XwLU0SV7UPoJUR042B|279ib<+)sky!`<4ng^(%?WDn`t^+n#LYkUk555?wnyf
z2Mcxri^yg2=2?ISmZ0DMqa?K`E01TysaNEu<cf~LXpq}1m~wRW9wRlU+sdp2Fq&A_
zlK59@uo*{%D(n~k<e{W0P-?B2+bL@8y|HqbC9P)jrK#{~uNq6romHRiz)cDB8>5pL
zvHzh!K4S<&wjfmdF*19qUSbJoPYIjn1cCAFrIop3+L5g8EQLfL2pZHf;VV+1UvqM!
zZj2K?ySIUpyciK9YB(4K=H3NOT>y<;d8_NynAiHQvarFle+fl9@G(&%me4F5?xtn1
z{wsqiDF~mtrMQV#-P3`kUOaF3ePA0%*4i_2BC}-;Enf<fHWt?(EO@80NJ_*S(tPD8
zPeahW+S(N_Q?Pd-Y&Tfy6BvA9(s8Q`Dm!(a$%rgY-tc%0%SYaWyQ@KeX)-QaW_N`N
zIA2{7YJHUXkj-j2&F#)S5;;?ZGuwuvfIW=*z^ZfBQxH>sGT3=uZLEIW$@>gH_!;K5
zdXjZ3YjQh0hH;GTW~FGKb6dvKH6<!|Igr9|WlmH{Jbb;9+iV_}<DdD<eZLgbzDUbZ
zS%K$WzIhl1d9et+roP<tddfuvu`qNV6tF?Ni~L;e+E`pIjNTaz9&S*d#11l)^{Kw!
zMn$5`8XAti9+f(V-gqwPu1;J`zDQFc$LAl7P}pG>DB=jJS|7@GMc<pCjbBA8{U^};
zBeX}dQ`obm2KvM&yU{@TH<&5ufLeu0#HI`>c?<pG%ip+KI5o+8^=S|xNs1_yTAU@5
zPJW}HGQTW>!zlRCF>l!+^~sZanatkdt*+)xslIFG|GOa^=^;0D<-Lffns7rO9j+(R
zHp7R!LJI4koUV?L1e*x^-C6wan<|nZb>sUYwoGx*{R$O5NvNvJfekVKiNwy$*3#vw
z<BFbpA0eE*82c$xPk?v{!9}cGO*htQL+SQ~y_23;&Yy%tFxW;yd|#KvH9TQ3h{(Y!
z&CNgdOn0Yr)2j-Yw*uF*9$R#+j<Nq<@uhf4EWubyX<8w1^FOko7CG{A52>2mk3j|A
zss}vO1@13Q+x!%H<(-8O)ihS$QGP`c_sQKy$3ZZ^PV|^*hh)oQBAWCyajqVhFuWu%
zbzPfdpbCsnGOaHgG;bCy)4u-cvh98JB(u`A96=MOwmC)gA!(9ZUqyyk?;lqGHxpYB
zjhu#a!pH^)E|Cb4w`=~6*@(FTM&{+0TBH`|R6}(xdB>6Vd|37ONATDU3gzxMsyuDw
zQ~)<?Xj(;6-;uXhMFk}DY({e|M$lahTDS6v&-`j$4T1(I_HHNHj=<y)=f0i$T}b-f
zQG9wA<&e`(&1{6-@Vf14gNjR!t^GVlExq-e!R(1+G*_F(VL{UCs3_BeVAzo$iw?or
z=S*a#8wTYOnOoo@-o5?cFEblnt2t_@e~t0)VeG`E@Z}gsvABTUz;cx3mLd_=$=Jh~
zPd09|9AA<%`UyVIswGN1=!*1{pLRM!y=u4Q{c$e!Gy`p3t@vcAKTyb^>dG99nm(+I
zb4^f2m>&f6xo5rCu!nca@5gh0a{ysk^PaA>Qp)Njcll@QABH2inmN+FQ9eH1g0SJ@
zbe9XKMpI|Ui}Cb*<Ja05#u~!VB3c-E;dw+0ZH>DuBy!XJ?CG+4f1r<qZ%t~Aw6Quw
z!D))KWQS2f9DE=4xp+A5RMFSe@1x}bO+~^r%-dmkFYMU-0|-;&F1?8enRc>cLDN_o
zcZf85?DxnMo|0RFTR->D)2?jAh&BV5E5T3nXcU@CWN3%`YVOV}jcNQV-D54V<+y!H
zk(1R0QlBqenl#|tKH`S`gJ@m8c)9S?q-j!Hr~i17t5wgJ$Cr^@dyOBT-0IR*h1LmI
zM<YEBy-=RIjRpz>@_VqVS1S(Yq!3l%q!I0xte!|$x?}Co-Sv_G&)^cPa@Ni;4PoGU
zbiPP2w)ajeOCRHFmgyN+Ob1$rSrD~GjF1+tSJ<{5Oo2~fRh!J}**LgSY_i>|OBK6q
zB;Gx46?SV=*uNZD&(gRs@3_9=)ZjpNx;$)Q+(w%798Pkxyb5P&DVE-X#Th-5zAbE7
zPc4bUbb`Q4g7BcMdnnpqUqYQN;@{@Bv)B~X#{`-uY%Q!@;)r<0`O^FmUijJ=G6NKm
z4W9;6Jkx+FTOt~bbYJX>ow5})P0xXRncN_*8M}x)_p1|QeV}jT@x+Al6lRgo|3DvU
zNx3dxoNo#?uJF9iCi7=K6Su1Y(u!2#IrZ?lq`Xhh{Mu6*0rereKb6Hn0(JM+aMvjZ
zj*~cWK%?-}Kb+8A9KxHkX<~BPpp!(OaO#?6{3yZOmpC-)cMh@)uLzs0(=;8OV+AHb
zMkEc4{V~hpgrtUiSly!UV7q}`UVG^CzwS5=N(qRMOgT@u-LFgbD&bqGxb$@{v5>w`
z;~_M^D7)99Sb)ViPsR;FMADpx%w>BrlOruKjA^#qApcao_zrs-xARMKKomVe5q-iD
z$y+dhUBkDaVC)2y9a-5^JGaoO|79Of?BA{GXrv!f(`z)<&$lRD(K(PH%1>H{!?RO}
z&|Rvmu!#zri1j&B?gGT+O~tFE{CUqvYSg<N$4@1)5D_s-ryH5|MCqFAg%696EUcJ~
zN%9dkbyK!lnq215)-9-dqpVuwNEM%}8Z>JZ&5**=%!aShOkf3}hF{_D8OXEY<!O;#
zlQ~jufta>IMaq79?$UxVh}k~fm7+?;$m3(+S;0Kx@+leZt$(dTBPaV>p|Kb7;v$B*
z$Vk6<Vr^#qB~eTV1?d>gE9-KPI_J5t^qmynI4DF|2(pl_SbxU#)GY<yrq%9@@%$>r
z%Y5cPqIsp36H7#@mi<j&E3~?Dq22xg#wo-V|GzqT|6AH4ZDMQYZ2lj;g_-kz(OVdq
zIGO&>^8doOurM>Q|Nr7!-azGXtQSF{j&2dOL-%iQ@wRtN`~|_>py3381_;~1fk4{!
zv1r=C+#qfd+LL_T*f;L}s%~j3^<}p%Zp*sL@(Yxgj1%e|8v!P{0(LYw)Hgr@6(=(@
zajpSeUFlR^U1d?QuxR7EfbaPoQLu3HtFXr*Y2MR9lBwsnafy=H+`=%ZxB~#8@ZbR8
z=m5a$2*C3QxVin4a&-3jLuG3N0W6&4;!y=MQ1kaez&Z=&#W^{@+5@evcL;g@J|Obj
zi~;N+AnqCerf>-kAe=!pGJ^moPj6cV#2|`FI)DY1CXhg!-#+pXo(EfRZ)g8I_@5J@
z`ZU(ITDF$05M~;HKE8`q0Qelt`2|E1&^HZ6p3x2Dw~Iv;1<OB`cKilEq%^6eE3JbX
z0RWN1pC&+`Jv<709qJs)zaE(7Bn6;yR^Xn0RPqml0r+<Z2LPOU%x~yt>&uM*;azX`
zG&NN=cJ@>sA7LF#AJ!27NZBMseWzz91MpP8YA^H@&Q=z|&q>ZHKBxn9cwes~3)q-+
z8Spe7>Q5&xtAu)dV<%<>_V~U_v~|ybz$O(?OM;V=3uxfhmZI+|39JK%meA+T;Op&Z
zGbpFK-}g^&m7qT5<&LSECwrrLfB+pnzx%n*hJYxUp9wpk+dnKPCnqll8o(J4poi9G
z!x!27g9G@l<oKO7bQxj)?D`m{J~SB+20#Ot&`<t*dzv=@z)cNa-+yl}@=t^U2M2&U
zzJ$9U$Q&4`=sN-q5xC}WApXq_)B_l1gYak!Q2+1eX-{r69<s?1i2MDU@ox>eN#b0>
zYU+aVr|bBSij*{`8_<`g`@gcLES{^szdwL~oEQMYeV@VdwBX-=;0QI3`W9q?{XA<z
zn@_pX+&?_PPro~G;C(-%3JyUqbcp;v<}=Z2p6k?jJ)FP1vL8LVzrWCLx8%Py&c8m)
z2~Q4=Zz;LIxPia8-EG9<{U<x<@qN#B;Z%VQ$VRCBKdVaUZ!@Qq0969`vA?PmH})`^
zAdvOZgT5G$j)}nT098u@H>oYZ(-ZuT*E<;}fPer?S=;=*HqidTf!SX_=lGY#=CEye
z_Hp@NDbQf+xj)58f7+aC-#Sckc>4bBm6dJq5L_myY#tr}ez<WyO~9T%)=>a9)%qmv
zVNd`{dI0^K<H*M^84nMFq$jOs_(RZoiC;l)0JaMLaQuF<A8<&3b>rWt_jN$)Mn42_
zf0<kOLy-EOkDzt{>qkF?V7UwUz<bI+K?J@Oeu%-cPrYII1EsGZ0vN3SHU*#H55ej!
zegxw!TmA?jx@Pe~4wk;?k^GMhf58Qv(fkld(5L>0@WN*oCjcM){}Sw-{!%%(ejomR
z8m(3R1jWPr{fGn}-_5Fjw|4eC(c#DWzQqPXE1({u;Z9@yL;{v=e!zvitN#MG`v}kB
ze{H_OAMJlabNv$c-)Y~6#-6seCx%|=*}?;6^bnBtUgaj@M>jt?wtxowGz5(O9-4%?
zEdS7hn`aK_AucZTN(AXJw({h85Y!lqaeo(uBV9g+;2c~(@mWc{&iF0G8O+Anan}LO
zf6B(TasE;}f90+^2;&Z5I;3U&a-*;nJDP&Rw)7qdNB&l@;KWP(ntA#V=b!3S?**oJ
z2*7{hfP;_WNss9oJGMH3{xTuNM{jjy{Wt}<_8-uLS>~STAu9H+>e52p|3pS1A@82h
zgP%v=<ZR-_dj;0~%74!q)jivVGLL@iuHgS`4-fo(vVjBj1f)_#ImxC98Mw9>+_<qK
zk?7KJ8$noM<1Rpc{wVO=Ha~gzwn~O)I4aR4nyThOiY+dqO%juxeM?}--H!Iebn=7M
zmG(}!ge6|=CGr?e;P>`jw*n?=aUyO>-<OEk8RU^wCK+U64q`5nUd-}_JiXZ8sNA6a
zT-t9jBX9t507=@`Qz%JnACWk6#p)YAQ&zBV_g!Ecj2yLNm3o;^Sjl|{MgWfNNw$t5
z?R;5dj+%LYJs6!I3HbeI84obDW4$|r&?e&11D(uHfDigths00C(jmAY`M52fLhg_=
z`z&-4<1L)dB??_Hf7hcpC;Jo}6DnfeupC6nw`d{FS0Ha0D?)UN?wn|HkJO;=GirQi
z8$^w*ZXHu6GlGYDc&kHDb~R5!)ztsshtBQ9-QNL4Zv~(fK4ja|n1%s3);{#q2?hLz
zxMYU~=%Ri~=fLa7I^$=Qyox>*HwWj|)%376i-*J*sH{-AoNlB>$mt@XF23oaa(+mV
zn%r`m%XHK0f15AuT#kajpU+4Lop~mKy9wK(02KP8_@+*era$Xr6(i1NkE}2_k|nE}
z6(}l6Ak(=w;peEHdq>NF{9hsB1n$Z$`0ya^oJM?fq0@51{p>6DER$p6_-H8WpsgW$
z0d4o46?RB0<girh@S=B|?clwNFlw|a?f$xX+B^wup85t$g}ROz@JerHE9lb>A4ZSa
z#g!2!d1ZNV+Zw>U=zYunCxJMvL*-%=ia*7WrWpCp-!eVA+E$-~&;u=hcY3pxb2`oz
zimo0|=Oplkmp|J|y|H;~y+9eY+QZFB>k#L>J0~I#-kvW(+%%uj2nr7f%E8^>l92U|
z6GwyG&!$ORp%4d6{<9(yuno^rQZdx{19|<xI?Gkml%#}(!?>*TeQ~Uw+w>~v9BVLn
z=Qi;#s6+%TxIv3}`;pY<Sbx@5h)tU)=VI4G*(~6pxpwQi%Cx<_ulb~b@An{q{1aok
zix#aEu5M<e#3!y(?ML;$7vXB)Qvc-XtGM_G1U|r(usjj0ymekqM!^-z<mKy?pPvy;
z3l!SXYLlC#UFHM?#DO@?`M;!)zcAgAu8=rSAMMm4-g));Q__ZsKHR%!giaeeUL$z+
zD@cv@LOXJCZNCL+2q^!OF;oesQ>D~7RcOck&i&!a)`=!EZG?p1KjZ3y7oR;~>u$$s
z9!O*nB+1H1@jXIH#l|H5PR7>X?I2!5Q^i!Oa$PRrM8Pj)sb@(M=4g^!fK#2FWQEx5
zURkau6$89J4+_=|E$~-Vl#++33B3^Cdi*Hj51VHgogEQz*YV+>olss0tC(qndE41=
zrPeJcI~uHTXQyNrmbF=4`T;WdU_td(j7>Aeu|b-Jsucdc^8Yk}Ijw4aj%xz;slK`{
zp4u!82(GSt$qlFCqT2k|s9`<iea#KNT#CmEinn~O$~ul4)O-*~&eerpK?1TRX3lNX
zy(Z7=KZ5x2l2=BY{1vhMXao%<9E{q9O(;n)kLWF^mQnIOB>jWX;a0I957pLO@m?_=
zms8jU<Xc?~mYbFGC@4ttwC?f|$Q5!C;C-H-EKR?Pm<B#xGu0ZY;!6Nqt1mPR&MAk+
z0?R+b3Qd$19GwFlATb=tH=%iYDy$;%)x`l1<Ko;<#KL8i9HBGJj>BQRf-WPb`K%js
zs3IH_mB&H-zy4L7(wKZ*zhqn{73Yfq!O^2iBWz^f`OFsYmLpinJ%jfJq=2i+`xcR>
zRW*Sq-wzJoaxO>{r6ukf)yZ@ddDozvE4lGq){$a(T;k{Q)BZIpN}Rg<H&WWCH^t8*
zyICyPf#F}f7ZN1QgMao@1_X_5C<HPzI#9B+sFO<0gmRL7NcEf^2Ngm{s8?CCu$XS_
z4IRIOV;4CGk#wz~rzocdc3cd!|JYs)Rm`Wm9ED|3$-!M0cIpt=X9KJI$RM*GYwGnR
z{h1UfA>wF49a66rnIo|W{vnS}=>Qd49t2--s^+XH{X>08B!+s;c$)5u1cI$#(<f=a
zzg->R$G^X=ZQ28lwl<>nra*WuVAZpQ2i+*2Hk&Pqbv=8(pbP1{4M%EVpW37l)mw|D
zLU?yN6yxg03GK({53v46`N+lyGSe9N29z_QcJ;6BUQ(OOtHeA10Qd5PNo?BN>GD&>
zz7EY!7C5HvHi&4H4iX3_lC1@4qSKNB`&cgK(8M-YTi&%*7#vX|bwiDU6jMKuWN5AO
zKD(RJ%P^fPqsn3wQ63&12V7CF>f8vB*~X(O@Sa#&Rh?dRD$x)Kp3%$ph*X|fkJNFC
z?t#fgre!+L&7CTtK1nY{k4JB}_7qY&pn^(Eg70MLzt+bi({a+lYfolEEaLBUr*mK2
z(L-?nYULxx(6RD2N^NFpc+H{G&rKkAl>AzJ8+jrQI^r|t?fdn8Pu4c=t(4<?*zloR
z?a)Iamr=i4tZ8lG$l5mvS7wnN{m0L{IQ1y7zDulz=u^Cm?Hy0hlMeYfQtVU3$B`Ta
ztPhg=Hp|MeODtF*)G;hBCadh~n=tcgb@X@vyWea0#mGS3?T83zdjf{|m7h>uUTQff
zM!{{}kE(Mc0(01I%jPgVg2DApr%^<?zBrT020o6aT1iuq#Ch|m@`rnPyn>-}eFjQD
ztELJMi_&J*N{;#o>VHnv7Dt(5g)4BC3(cA=Hk-i#k^%8Q+^84>9dy*~I(CbOd=k%&
zqT8h~mpNsdYJCEwRqjdtf~AAvKjy%8_FQ17fnKdUCxXj4aIT8Z;<;|M-d&e9gEWp=
zzNAL}>f8^<p(E>18!>MMbG1^p6e>EXq4L=V{I7h>3=h<X&m0@1;ZhY*(4!`Y%*T3?
zfU3qNie^3uc05l=9vjP4=Q;@ZNXCJMqbeKUJQ<ut>r96IUH8InTevl+`q90LRCN-J
zv;#og|Ktt`K0k}D(T17;p>~+OQ;~JtwIvnlK|o*>@K1>{<W3i5YF>#w$e!0$Go2-u
zWbXKNTeWKl9uH*2JZip~oPM&&zw3voN&ax9^hN(g)adug9fA<@0%;@z<kw$nu;&yR
z2kKCv<<O;mLvSChQC|+(AHG^o25#^#h5Yevgom2a;i=4IIB;2fN&`HqhkPn_z)R4}
zea`U1OkH1CM@37b+H-6{&NN$}8TfjjEE{5%`RyC9(>`zrf(=!7=qJL??(Ui+HzX7Z
zWw^tjiUZG(KfqQBzb#n^_El!gQX)upp8^5Ay5`N=V>3=#U4-CoU<Dv_Rn}oOio75#
zd7Z}QAp&$K-CmM`K1!^c;#^}u-bE}sTzc2jP6YWa_3tyIieK+``EK&c16FM*QY+LP
zLs&Bu+XBMqu3<F;`<-2FhlNboIx{7UzHaZ%SLbr9@NVtWGO+WCr=c6K0+D8Sz<?Ub
zOzLI5HYaI+l=)3|gqEhR{B`#>Ord5?<Xo~C9xAju{?-XZq9A+W3!)vpO8i8^+2r|7
z>kyPf7Z6sAsb5gB>IAQ#R&=<ym8GuN3Smkz>KkmXu!Pp&h_rh|$xIXGrsd8V?B1^s
zE8kxQud?$#uL=%JJLFoMMSI$ot=Kv-RZ2JHx9B{!g^;=U(;6YnEXh2y^q4GB3p78&
zLNwhWh#1R97w_e-nLt|Asa$74_4&&jb+={VJsc{G{4#~<`=}>k)A-hSs)Wh$XlHI_
zn(G@#XfBnqFCT|8<4u4P6O*0dc<DOta6k29ctEPLEM7lISb$GC(kaS?r!nPCqm-lR
zDXpmVz?LDoNnrwQwxa&!Anb=v0;yyv$;BqF0}PLU&)JxKAIR{`2Sg(-@U9Z>u9MP2
z&Yd%IzXRp&nVZ4ghRIf(a0TLFqp_RBT{ea-vA~KQCIcjf#nwE)bp$(vVJkkV*erY9
zV>r1bS!Z#YlGQmZz>Isum<O5URj{vr>dur#AdW>EM{|_CEMq?S1aq=+t<}fApPXws
zn7nR$6d+Zns5`Go@IGO|?9`VnQEBF`CUDTlAjA75;sZZl9YBzrY;Ek#i>x7%_RWZ2
zM`rxseNKloHz2z0njiWWk?_3Tuy#j1W%x}f<Klgkb=EmRgzjO3k`5nt<wPf;mVzav
z&@TS$yd|$Z>;74qs-I6B_>8iL!G8Gy=TEb3Ewj|7EJ|QX<0Wpyh(Mhz0&490hJjN7
zAkK?0P#T8jDSA-Ma^47q?PrrjSUg~Szdo=W#Eu#aarkd}Fj)BT(+k+W&%9L_17u*f
zs+?*AS)AS(40lRC%FBgP6tMc@;FN+Pm4uR=YNT^J<FySc#!}yJrKOJhm9b$Ph7g<c
zz~*|=vSW22pxA^rj7w5xx)Ywn*Q8TfEYA}En`twzx4#e?uhmKmGa%4L^*3vb4hatV
zHU47W%cb4Mg@jt83_rhh@-1~HEg<k6oK*^QnIIHrdGE2tg?^AL*FV(vTM2P!dSR_}
z5hgQ&4&K21yDQWw>q2y}qoInU$o|IWpEUd3pQYK@i|R)`WRR|QOj2c{E0Fd)-Mqct
zAkpH>Vp-laSfeor_8MZ~8~I)$Cjv)NDzQJ1XN@`X9c_Alm0?LuhBIY30Y5y(bY2|=
zcWRTV^TM>lYhZZoNM2vVIyL$?=$(#(a|HA<b)vw3aWj`X4gZBGKY-_puWb!6q#dxN
z3vL|KQRWPRf9}O2CXubb1FSGSbsNXo={(zuMkhB(&p_UQZWO1v+n|}ubNv4M>^KT(
z5yZ~#&GSzuq%)q&g%??tRTgDkgj)%ET}&a8XCjCdxY>U1_<8P_ERUPnkmGd>%(H27
z|MheSi#m?q)T=^@!Iqd({X5!P#8^C6Tty5z<Ne*?g|0?MUy`OG9-?VTE*d^16r1|$
z;AJ(<(7I-5(WavnY5=**nBsoW$tzJ=jVEDINIn>eqNkQG^rX|7Rja%&aV6tXY8TGT
z^zcF@`MgR4v6TDu)xbk#?~VcZ_LFn?Ny5(bKt1NE*&&>gEi%*VWQ#`;HK-4wrg2NB
zHq!OJ#e&}4WpTZ1nmsr;d0z*UBmHv$`EoToEb!4qY+;x!Qv&wcZ86N`A8CnM%t&Y)
zNTU#kT8j1MqV$o2{Q^4<(RtWMg!7?qq~b8e$rxv3&N;C067_PPZv-5<s5cC!Dj{;h
z>lG+FgoHtp5-EN3W#lTkxN=Hk3t9T7`mTu26|v9J@N-bZk{HPEg_Q=FIB!%`eV*xa
zXo3_%6Sn?UXYL(x=}(l7i348qD;cjpDPKeD_!H4MA7%Zs<`HKV{TemP5DaV?CELe^
zIELSV?L(P^a>wS$p$#iG3omfO;ezdGJhKYC?pX;o$LF@xE@IdSX6bH>nLv6wO9WNx
z(kg4BtWS=5nP$Sdq>P8&znn0lb*P9E&&8>xq(@zAZk(7fcO9)Y(evh~o#7GUr^W*+
zt2yhea|O*4u`Fwk(`5AxGB(j1{aKHF=8*Utz~#;B*k!huw)>dweW2Vk=0rzJM`4=$
zs=ePP_qfrV7cKqPz%a`&)XxUry#>zg(~z2<5aALS7$elDw6wvhTnZ8n*B1<9#DM<o
z*h;I_r*E^4+vA^q(FxeLSQw6m<fDjhlRET{ORcIzEz&_d=*svv>Jl7V{f_ITMo^LB
zWyU0pJbD`K2L{B#k;KOi1YWZ=j<;1bSiDeOo9k}np34Q#ntIuw@p}Uz^x>HlVVuN!
zWwSOuTfpFci?;EH5+<esGdm)z(eE^<8&9azqfy7XC%pR>Ey74i7eZx0Dux%vBS-QE
z0AlYRhWSTi3P^fO+%fC9*8%I9^zud7tHacWHk6(=6|~vZ=>&?WV`j3temrwkgWd`;
z6UE?uw;PUSZmAq@aq9-UZUNwS+6t(R#KQY{pR0Nmv(S#VCm6oYurT{jnT`otgZSyK
z1Y`w6NsUSg4wwTisOL!r)7HlJ)n#$LE=Hx+@BR(iJ<1Z4v{8k$UWwUVep!FbU0wI%
z!L~M!oXaJdfj1P?u_?_NZ@dx6cfiY-+W~X+1#8O&+{q4D3s>)G7x}f67n3A+ZrtI|
zr;G2dL9LZm=oQyPm=;Yyo>E#!3oP_%UoKQECZINT*nCeT0!unXca7!D(|mMwb+pM&
z>1niO&5@E_sSYH!ha6P@sFZYs8vGz5O8nB3Z_PH&-evoDC49Y=%RHLcqtw5J_`X7A
zB?QG0MezBPn7>|l=wGc{I#%4vjv+!9Uf*nN?sv>Ueo49V*ku%Ce6<$YT+Cm{Kl6<&
zd5W1NOjX<G3^rz>4<HGaCWlWTNMkBsIeuI6S3EVg3#>6wBD`Cd&ZpytER&14r-`z`
ziUQhm>Q6zQ+!i;x*6q#I>@v0>>vHw~`@QH<kaSpXLx?e&=-W1V?(OwEda(eLqcj|=
z9ZmTj^H5$e<b5^k&Q<xG6|#l=olLmXH7q)Ld{$(_Q040Th7R5i*Q8mtr<^*Wfo*)D
zXvaP1XOe9?kxU`-j*Pu?I>zB%YZUiBAo)H=4QI-k-BA%CsYr*mZkU*dGqIVBx^&c9
z2@a+N%2b+MQ^7tmn_I<&){&Cpo`lq8&cW=nbzb3*UV1j00HfDJOMo4gT-WTOFJv$c
z0cW=(>90ZyF9S8l>u1L#C{T_$BNqD+EE{O9R*Iz+WbTPy{~4oO3@DmjVECfzc9QiU
zAu4TpZ6-&yKF!Pj>`>a4TdJwFQCkW+2|uK_l}$-8RJATucEG$$%=9Noi7%!6T5>oF
z2Db)fautc~Toz3kEsViY!5nUebSK%At!6g<3nc;7d;!H0qFpk-Z7^_*S+iS(d|rSd
z$1XZ$ney<X%@{__&cc-h#c7gX;j~{Lj!7jvUUyodfg*ZZ;+s$0MMX3JQ$FPXaTva*
z7)nAfDIoL6hVJ9`0sVmA;#l&hu~--#*lJTFYIR*WbPsAT_?ADV^4f+JD4)V4F<*h%
zWaXF5v|8|);%E>pOFhSK(%#ausoegvdXPHA8T>Xl;XEW95NK$qVr?o%$>Ki=z%fzf
zY=WPxbf(Bmmg+wLep8|ul=-Z#Z*F>~*Xa|$I#96MK5+|fW~VN?M~1_*OOla%*hQXR
z>@gw8TOLML;$_fWHFu60c_TdEUq@U-rr4@+2veHUkR#IDUnXw#5vX&x;*n@Qr;|yB
z(DGEMdgR1JG{7s+M0OR@J>Ru1+P2)Sx-RXG{(IS!6u3KE$;fA0qc_1i_~M>;7Ehg>
zsR;mIZh#1?r=v9@hE}V{>57Swu%pW)UceT+Cx#9f))HDwb0g^sMZuXg*q($EHsRD%
z9$o|l>51I^y%%TeB6?@bEHcn*XO><X*$CV{`#RXbGEsR3IkoeeL0jJ1^&R`gnMYk`
z{tuLDC+X?DTKZwFJtJm0&)Zc0r_V$r7e&}M4erhw<APvfw>RzcVH>m=7`fd8EoCvW
zD~0bQS2w4#8$t2UcmQ<O^ikVGfYL#zG9Q1kRG1Fv4nUwt>?zW(0SO&S;W!kk0<SEV
zvXdy*(W^HJ=iJ^Vy-U<NQ(;kR?+3-!I&o0qdawXBJk?*|@Q8bM?o3^Y`?+OGXWc|Q
zu#$CM!0VQGyX8?oWouu3H_lpOHvBV$@8Unx5dm1G%IzXu(AmOxI0q;3Qd^y6v}S7`
z?wQy8APLPfPii&TcYGtzG_Ytr38@1d@;$Uk2YF$k2sbPGf7C>i8zp!rcRjrH4_>e$
za>->l-X41>LunEsWsdPDy1A0UdizbKZ{ry$L$W%H3=uSv@5l07aOhlZIm8z-W8L^o
zxVX#%S#0df*GZV=YXtA&1@(j!*)$G^mpOf@$qL4eU?QA{Rv~Am4gW7NYN7LLLobhz
zNbdW0evhrJK`i@hIX)Ro)`A9<r&xF^n9xH?3&3JT7`A`*rpQZ0&WEj(J)PJ{P)EM=
zNcw6~Sf_)>wuwx(aiA$=<7-=0$6JDR(m2t7&U9;u|7wvltq4g%*gbLT=Mb!@%=~NM
zi3d%D#lt4#)$|sPLvO5LR?ZVA4MPKk({Xk?4DE(RY@G49(IN_W+CnIW_GpygsMGwS
z$MW0awwc_#cWPEtNrJDG$v1+{<V41|S27CmlqRuJy0A0q-FW4__*OUh74>Qi<-sYC
zdwRK-p>ktQPs}pe@2E~WZxPIU83xE$>=-y{=R$mza~vnV$hm*>ij@!z$1r<qn>yse
zHB(x!YON^fg}oARo!fSTcW#M(ntwW3(nxf1n?gcZeBG!>r})orI|51C9!`)nR#_U?
z0h^f69w5$;#=s&C2ngc62MiHNHOiUtH)zAfte&uuQ2(0`z*SQVz!$#0S3&Ej5d;>J
zR$9MWk~K$1?M3-#qPEUMEtCNS6M?9`!;XB;DG3W7c}|R<L=4`W{um5&k_#W2x&IPw
zF(l1mTzIsyAYr@9_Dy7fhGV{F^sva(KSMs_e5^nf>gk?>rO3}R2#U&y@13Sm(r)fR
zWClo*{lbNl_Zj;%^U>@Er4`e7%;O3rUG`O|N=F79tJat$kD=En#|d~M$atw$PpM`1
zJa-v-K-R0DvXbkUX6|ayMYOl-rq<Tp_%_*Jh$GuWe%<34EY{zO)Is(~+QLWL1kVvF
z%efDW#k_(oH@1ZuF~csA29**M4;^v#b-CF(#n;$+G-XG}hrRphyu)@}SLFO_F^R%G
z!CmBE>X-2RZA_KkBH*3L*2}g;&dLgwAXf8l0npRzQ(pL5UFii-Yp390gm9by8HyB~
z{v<Ny$e7uHeVthH7xVMK8wUU(z!m!#B}Sy2iS4Dp?^}3gQI290c4AK1Qw7M&hKO<f
zo?NdjqLMSyfy_n}Hs|`wAW;jpGN~#d-DQ<zTA|qlgC|XF2FtuAXf##aXkrVj$N;d-
zO8o9t@(^*Lg{d~-zu@Ku@?q;UPG_Y3>Jvnq>u+j89LtY-6x(OZE#M`o)}EKzEOlKB
z!)PCLsqM(5D@T%*@2Z?Vzr$=TH;LiDdHeLJVkZR)J*Fm?(j$mgzmwNdVTsM&j$JK-
zzQ~Ip7GNysHTWPAnTLhkH}Th@g{BGb2f*ApO9&ZK^AFF=-@RTv{%nZ!LBnz>-ec?k
ztg{Zg@9?4`+cHU(qDMpxwNSFDiX@gz5!Oq~5&v<EAwYda9pLsyD)6Ev;_nz?ozoAi
z=YCR#LYca$)+90!;!4SqgeN90=wS)y5;q2Ww$>|C7BZe{J_v&idZK&|6tHCW4eZe_
zw454n5)=@lDnIix99kEdWdyAd57Q+es)5wTl=q?Ymw%-@zV#<Rm*Rx0j*i=<m8oGX
z#A7u+f==H1<Jjohg|{#0b!8Kn`-5mux{n%Qs4j`>&PT>Kkw7NaC}TK@r^*?ivO#e6
zvhNy7J`_Sos4Ne=DW8#IUr-ec{A9;e(0AL5gHw*H)YxjJR^)tqG^X;HyKT6Q+3H_Z
z5<i}kA532TV{eT7pktav5VJr&=kaKu0b!;v=L_sDX8k*r6tsztS|^*cW?D&Er@{y|
z&S@~Frr956wRf({%R_~cogDp%ab@gh%iW3%c^<l=_t?QXA{eAxEp6^+`;8aR=%H}a
zI(FRZ<y_Y11i}R4if$HnF{BkYUmEZ26GXafYDnD_W<r72nqOAC<~FJlFiK^L%ba-`
zqc1IFmtD+I2HzyaEoFEcPp4`QBAm{Pq;2f9<KvUN*mKB~Z?v@A+0O1do8>fCGEY6d
zGcU!h+i!Qw{N3*U<C5F5)L%=d(!2_3)L+pIbSBq=@iSbpET-#`;@ow}i;Y7{%V+CX
za>Tx<GQ7dBkuNSpz~u-uy_|Tr?-LHF7E|2`(Ora*y`Nt`il!O$m5}hiG9uW*^DRz?
zPB<NCszr7NgPXZe85QN;nD#o2D_A;iINGU{OP<E?;rJ6T0T336vqi17=td3GtFMK*
zuq$%{KhzTt%u%YQpFa44QE=m&WJnghr0|ag@3Tr?{p3?b88}Q`-MPTVl2Kaib?O#w
zE$x=_Y3|0VjZWMea)*ogD*q{3y4*2vI@Q{itwZg&jT-@+ura7wrLV#la<6G0rdL+r
zf*(Cv1bG@zxD2jB->*dg>W+EXOHdui2?SC<a^=i*!sB-=GXfq9i5#WN^?fEkbjxls
zgGZO!uq8R=jRGA~4^yKmFzMQ~rM#9E`7>&zX$2rRNgG>pNAd55SY8v6-S-&oD{Cl=
zflqu~%?wK$E1*(<952ovNjx;wj$jhC!2|&I*~ilKK`e7nkGZ-BC^;-=h6z~_Q!_;O
zI~-HSCc1V?H&!!W(6y)|+RD^Lw#ud?cWLO2x`&fr#jZQm$(m7WGDu&2af@U)x~GXa
z$_U?w76e2<Oz;lCg<mOtPzODQW#at~3LTExOSs#)mK%k}Xm^zH{uuMMF{2W#p68fi
z%%Osf8O|7PMi`tB)6+iB)N{-voT~8Di2HT*8Ig}b*?4Y!1vzIKEe)_j7X2Pyua5Sg
zV;>NY0$;M`W=Z>2uhOL5ZXscFZ;EaZ?MHJUlM@T@kSMANeJ6ZAFD1gACAbXTI9Ick
zJa-07NOq_4%qYtgRD7RKz5RT-Ok6i~3|~boZL0ET{Jb~^Q1D<LrNaY()XSHqpgPyq
zI|qp#D<-cqtQ1sTq&DmVE}Jo6^&Sovp98LzRV>AES+649#$>`h*5sns*Fr}7jx0Cd
zLEdWuC72Z5ER2s6$0^@l!!I83rcBIyh*FS&mg0Qy5nr-vIr4q#s;?W#lw%LI+TF0(
zyYhHb=tVtUE4k0*|GC>=Z;{N}WbUc7(fs1P4az#umRzPS%P4jU?x56HKKwR<ZlB9+
zhL&02H*E}lxXg~L8_$_i`B;BiY4dUP4AQjH2QU`SsFk+~xEM7jrut5BF4feJdOhIH
zI8mMaM~FIMK=meR1Dp3}Cd^lQL*<Jy@bgz<sZNIC>dC?4QfADte<5e&O2F4I`jI>l
z4~Llkc?gi=v?7>KFb+7ftz8FkY5sM-v0Wv7SJl=MN{8HnJz>dI?U?ia`%!AdF3_Gz
z3nBB>DQpHg+%;5n*K_;4I%6?<=DOE6dcN6T)7$&`Dza7VCt2g!D=xau8{Ny?13}=J
zKBl5Z%Tk8H2$)^E*5zHgJY~`OFGH_2u&%dSkNL=+CHvq}8OW;}J{B2PDyXF|W_ZrM
zReicHn>GP|kS^jZW|5m$ZZEd%_WF4*D`uHXIz<;6i_x%`YdQZ50UuZ4-+&B*ukK>X
zo`X{k>Bar;R;i#+{DrhNz`(VhTZmc#=Xb2Q9a;+JK;r^<ryR$c#SqxVAmMl)?z6c~
zi~+`Z{U4=-_q<WEbZB{-cJz+0259tiCeYZz280x`ff(LP1L1VH_<%?@HVmKat)eJ2
zBfp*N-GM4L@xj=0?Ks_R1)=TFJ+0ZwNx2-fUP`$bb+lz3EsF6^hzAXhheOP}p4h|5
zNqTpK#+4-2g7m|6&WC%ba^xOU-If{!^d9K)i!6L=0h<6@jEpgdbU@vK!VBcBNndxt
z<@KUBic_wKvk3DBihP&`w!FSOR<SLYH4KYbA|4@d9B9j`<3kIoa%#}EHJ0NXkuolg
zLV!;!WJOQETN<~5`)*EFXmx=$E&TWtC^imh8J}UFyLO!KBdtd2DnxcxNmg&sI7T9i
z(x>@iQOetmO5Uih*xb{iPTXuq@y{%VMA7`wq0`2_r`6JMRz7S2^pqg8Vp<SxXXH5E
zOlFoTez{?}KNLP520ZlwN*?fh>~gRec=J;&QWFu&bLGf2$LcP+^o0kQ{B@hVcL}4)
z6CkzYA}2PC;!yCu7>w`Hv64c<5VlmRDNd<MY>B<P8%ncEkEi8-V^}IT-p+kDC)yv)
zlAJik-E{pPvJdy+QU_sGt0<6d#{aC48+kA&c#b$Mxw~s%hH6%shjk8En@5Of!P$>t
zl4(A1g5H)ZunCujtgmF-%fBhg*r4|;^bmEOZ)mUvNM%Ztvt8f`_&y-E?5_k(_O6b!
zIq#e5L%LmgN}Ta~OwaG{UEY2NuO4u1O(%wlb<!3nrw!KB;aPYkH<uqoB_>-hb|?x{
zG2*_C-T)dZL*-s8U+}=@N$>Y~*Ld#9dS#h=6wlUpd2>t8mN(s)a!}72BDeT(HfRlt
z$>%`nqW^_9-#gyw+Yh=A*Jf=Juxc^-SZHPVpvp&~M}+I$JOIP3o=BNI?~um0hOY0<
zc>Y!^cM^x|Y78N)CQ|5Mc%>zezu7XizwkLWq1Qudm6GB|!8M3{nXIfo)2uW>pTcIC
zVZxnO_ldXFFF@1%wu`TG)8NlU-oxoGfgX}I0HC_MTTiULV=FNhy%LowLXewHaLf$d
z#eN@O5MApsZf;kiOC4O>KnR9t^Dz>O+0r|(`^Te#@#0`0NLJWoN@f_}VF`~<IXvHs
z=<A*U^1#jSbb)TvV|{AB8BxVax$}7=-x@rU%HzWMh;Gz0defJGFiUPtttZ*2h_}`8
zY}5J`EqiGVu93HB2FKGdLPlN^kVa-m3kU&TI=(aZPwurfJc1e*q4rgrAkq&JtNS$-
zNDTbrUc(^0hf6EF2wzIEp!(8L219bXRKAK5416VczdS?OxH@o@FtL=Ml0i@LLLKRb
zUxH>(plY3^aWi=twzhIgNL@F05vQq<P#urFq-uM^fG5OAp?Nz|yA6$&EHC6iZ*AK!
z2D-Ht-h=PCiA#IR8<|f4G74s!9#q##(MQ6LT=1RfsXLzO6^z~Wq(M;VE5yL1Pbsv9
ze9|B2ioZ2bRp{d)U-AOx_g-|_w9B<HE+0^$;#pi|!>6!}bCU%yIe;V{t|hyj%J{e#
zGA}~b^!p*b>7t085;?y#_oLcXWt674c*`#h`h>~WEwBw-`E1Lx9mdT!?b^BCP2YK8
zL&+0?oNTUgY@hWW$n*fWt#U-nBNA=cHRWVUmV|$=-pc*Zk3>(idDFRtvWf=yARTs_
zkS$KO_>OjdNEr(^1plMEbxW{8RhW6wYzZnzBYTdse@c2uKX18)J|QflH5qOo)a#6X
zuxL(A<#Q~v=-`S4?LJnc8f)Luxu$>yH($=T*G`O)x<D7-Z0MVvWUvbLGQ!<5U2gBW
zSPhT0T!SexdX;%z8k>~^w#Q*z7AgKs;?FWp-G#U5jL5{#5K=0z0}O93nf`A~zKTUI
ze?sR-1b~_-5S~O-nWe5GKj7mh{P8_#>s^aqpva_fCFb`E5Fnd_7(XSW$asWILl~n~
z?pF!WL69-XlTy^O&vaqD9tBOhH<A2DT7a_xAq7C9#w6mh*ks*TL<pM4c#@Rs3gqD;
zbHK6mN<=c~NPi$Fjwx{63$OzwK}EDrx9#>qY{|?QaxJ0eXC(W`Df1bcW-lwmH2&&m
z#pctduj<!Hx9O@;z;y|*I@IKh5cJv`P)(ymc|z#r*%H=;u*|q}&bZa^N8?xFb}$tN
zTryPj>PWQ+bt@G%aewyLc?|n&mpS3fCYqyT0}@AXnH}~Em7Dqwh`Uh@)OUj$-D7us
z;0XpZ&t6y!9r{R$@r|aTm&H7sxd(L(jesy>v#mxnaxH;Mf-W(HdeP4#Z`&TTRany)
zXFH49<SYb9NBsd)N8WI1k>~_d39d{uTd%yH1H<eXDO<E+<N36c%EF)098nUQS{0fH
z=<IQV!DDYo;(hC6;*6VIc^TvPI``KwF679jJ)?P$50s~^{|WV2&?bmgRg(C&b$X&u
zEvG67$Lg}O;=CMnw2og*T0+O2=W}P!c>SMS0X8a9!1uIyKeNF*2d8nLvKKGQN`x3n
z&UK4lq6rQ?T(OsKFXQgF05w)572$VgG~u|=YmSS#eDtXpo)&XGk>9uCu5+n;_P?<n
z!`7&8Y+z541$hznpW^J5(l?v8Qfz6(TOPvYrt<ZFkV&h3SVsKrXeKKfYJ3ulh!iUx
zn=xkXD)NfwAdVEveiK(Efu!hwo;@1wJ;$2z90zZkf#zG1KST-V3^&(hGx8vX?tV<7
z_n{pe-_V4e^$eg?ww&CRZihD}ZEF_KLJ)YAi_sfi4$4ix(k;b$-N}n8Uybcd1PBQ$
zGo@+@#(sw$ZLOHIiYdkOL&9*!Odt_HPUSBdETmfr-VW-4YuA_fdp*#Rum){O_Lcua
zuy0%91v;)?GeA_5oPR1)J;}*JpQ3s`q<W9i2r{rM<wR(&?i@#Hs3_#ebwsG==79Ga
z|05BYDfW8$0Rh_yb?F=5`se`puwQV<2*=-hTPV8NK$nHYW<=g*U#7?k#%4x|$6uVd
zH51cdPm_i<4jx$|-8sI({^G=kU`XTaE`SWhyBozUJRe*~C2+hoWB7FRAfF~BGfuU-
z*yZcsvoLQdr>s4OwCbMAnyPe+RP<0&THq^q9rzY7iir4eEINXGU9Pb?3{fSx>4l~B
zy0-R)VXJzhvu#n+8)}d7d2~iB7=(R&o@iqEB^)gSS;QdR+Kd@`TALQM!bk)zv%&so
zZ|(^0Am2eMto#<z60-0&{ZzO501i3@XjCk=7RDD6VTk23^V~ohNz4KXgTT{~NZCT-
zM?JNaOFw;E?@OfHU5)$x6GgjmL!t6$5Ij1qxZF-gqdJ|XEg{cjH{*hJ`U#?EpWCHk
z(oK1-)}pUl5{r@vPR8!@urV*%$KAh^@uf6O#z{!qwpux<9k3&`ee310csl`}!lcHn
z<0oBkksj<98+lVXNGH-mguB}3rK~QZRPVBm#nI015zX!~{vXE9Ax0EvUDj>ewr$(C
zx!bmF+qP}nz1y~Jn|I%roaDXa4BliVHCwZl|6lc0d80~70tc0fMecf{mMX<?H8j$U
zi|EbrG54vYP`>9@p7fHc9@IQ647&x|yaGiwumJ?%c8>oGGj$}xWVgAgOXLiEMZ>hr
zd!=wB6*KIF@2Vv&3qYQ2uAK43Y_~g=!M*veU_v$NZ8-<AM~hR29YeVkyH(Qo;|p<2
z=ofrB3ShXcvO2!hFtuWUcrWctj-m#A@)@_!)y$ZuIv-{uucTXhipk+&8WKrx?bfx-
zmF<K7v>5V`6B|~!EWt~_LzL4gar0Eztr{IFNY&ap8e5nDFJ^1-#k~@Uo&!$8cMF>F
zu9K5W6tX8Bher>4f2>MkCeNwi(J&L;rmR3OX|7IbwdroMaQp_ci+IQG{U*b4Dd2O0
zAjdV>_8SbNWi49UQQX4BI9me~1>SQ%d7DR`KEw)GbFH#n)qkD_U`-PF1T3JMTaYex
zpt1rG{|a+_XPs)LTO|lQ!bG{ntEM^<>M_0Ou$Z1=jRi$j_=v)Sh1zVfmXYp=1wgY+
zum~2F`p(;X_pbRYt1-@LTF4!sQ*}d$AbDZbw;?hNpxZ<&Cs#i5$({7o{-`nf@!*-*
zbOz6eVNT3T^>&mS#FLb4eaY}BjuPlrdL4{C$lA*2Cgrn4VrdBw7SQewDisEE6U_;a
znt1$l&L;KK$!%{?mHSFRFDIm~LoNs~Iq9KQ&2>s-q)~RmC(7&2ZZgdJ)H+81ir2Cx
z)Wuw^<pe*95=Nr;8)wE|gfmBG1n{4ti$md!DdcuyCQuQjwwTvcP_6pvL>;D62M}G;
zR+^z84*3+o48fE%7j5e8WtYDy`>>~jMG}R36E_<{&c(JqlRs8}`i$bBdezhVjM#~M
zUcu?*uH0mj!i88&?sJc1;J9hz-K>Wxp2pEYcQ|r77qAy_=^b-3h(<LGsQ`**3sQFQ
zc^MeLC9$^WPu>5P0nz+bQ_-CtD#dA(ic84JIU5>mqj(J0uzXp2u0_OAD7)LcHx@;;
zy%936h^P;z^bRkbrOt9gAD*YnauI81=~<BWP`-WC(kG_d>=7Em#KAu*%+KNVu17pW
zN_)w|2^?E5IeIkU*C?>{e;wwvpTSm7fJ0g|`Myzt;_%Es+{@|0Ug^@!^Bz66&Wa`8
z?<WkOm^sPE>8!B&2I<jtY4eqbB+g>WyOQr`^){Xv@e@~~xDY(_F4x~VI;;r{L?WGI
z%UiuQ>{3w=sh*qm$;4N@ehHa9Z;(6`K>iG3k)Hy(Nlo_GDU<Iu`WJZy+Ha(j1A^<0
z-M2&a8x);c(`f<4D<m&oK+p8FY{1?XAe~ORaBk6K&MwHEMg7WAJ8e@%O~;9B&C>wy
zZv-C@!8Q0Jqm%eu-3i(*Ca6Y}vOSYMH0>_QkCo1UVE}=Bk<-s}E)(}AT1wPy)@M^H
z8_fdQbZ1cQmRqW~6W>`BBp>@1OSgt`xhCB(OCn-QTDN~+Z~tNP4OY?9ibv1l16Eux
z(i^iv`8R=BuS{>iWe$FMh)pgEK>p8myVkyJ7fpzk7*Qi8x8K%9D00J|=k|S&;z(Ay
z#N({)4VG8Srn!;+!!Sp9CQCqUwdD;l5tiqb*wOI9ffopuh;F18G9Yi&1wLwHZZ}fK
za<qqQb!dUfNG2Oo&|@oq1x4hP<7!Z`&gRbImvdJ|->v-s0i?&I=wyua>Q@z80@Sl+
zbQ@<}3uR>epe>tq$mQi1fQtHDhM}!VXPNdt_BFKaLyG=gn_NTw?)2rRY=~jw>#&Z+
zQA;9SR&lvu*JRShMKwKYDE<Kr*i20w!R4rvbSohcFo)mT6)AplgVSZ99=fji>B9C}
zVnibfo1&Za{Ot8F=JvyVy?@Jd!<wjzMBm+tS*ODyu1K@V+Xz?sVU+2a|Ll@m#}M7?
zLAX~D+(XGLpz2I$XyU{M#xXZFBa6;7)nhW(qA3d;SQs4mBszSJ9Zq`h9!@<W-t!rz
zdqH9Li^c_ZSM#`M$6VRrw)@eB^wU9=sP9PH&;89`-nF0W(R|UIraKFw)+KQTsZN=F
zI;OTaEm-Bmi7N4;fu#IdinwVEfEonDoz|F%Bmu^BK~p{q)b513d!@UwJM65vuOTkT
zzalyzwT^kg-;xzNf9Qrslw`$pMP)}!n;VTfdUaB2-MWw+c!|MH=g`IQ;`5vMj{MU>
zmZ2S=5|6<x^|qu|{K_?G+9IP7Uuf_?$F0bH_6{n)A|oGMN^WgjGOh~a@2qB{_y76b
z?ofA`SNNcK5(ckZuI)t-mAq9mpToPnL7&0lI)HXbtl%_YdL6gq>Aw;epY5QDS3zpx
zF@lAoW<yBub?>rL`8J@-C`SEy&@o`wj%Z@nlc2027x2`-R8+8Larf1RFq~S)ThWeY
zUe~DZ4dIr}6NflalaBU`^6Aw^W>iU^3m)YLt6yAYu2KdUvQ-3wwN_CG-MW|)<J>RD
zY+Pm@Edr}k>vlh5ouwngteX#+EJ?A9D-a~)v<#RE=G)nXVsht8Bp8|hLfbUg^})>+
zeJp1*|A8n^Up=3g_hvTct^M!=P<jRe{BLX_I{^cMouMTZ56^!RK>xQHo|ExEiiv=c
zk&T1%Kl1-!3z;}MnE!v-!WK{kolP{_NH6<9;X{Ni-u5oAFfgpLhS3~=fo(z%_?!4W
z!ggu<)xBCkBt85N^3$E)s-3FZjh?Y5#?@@AMa2rM1}P1$j3DEjegri+HaJ56<j7Ps
za&-TopsbjnAn2G_xlObKfR8ww7&-K#3)4#ww2%Kq3n3r^CW-?P$_pZqG4juD0QQc+
z>>nT<9-tlU0opk@Kz@zRj;KK5!93TrfK=81BIW+XV`JqcCpEQI)wIBZWS;WZ0f<49
z0hHwv@K64$WQP=wuc`q>fKdPxvKEM)i$)Uw_fJg<p@lg3)+fXewI)C?BXo6@mzQVD
zFT&1Bs(Q8}L-Q}A9R|yXbOi43^rr#zQvfRm!WHyWHWDrZHs4(9{4SYuYz%Y<(iH?K
z?;ltJ0&xxMY!9LptOYQ4^-m3C5}3vW`I*#srw2ys-<<}uZ*=%UzOlRI2U-vObq3SY
z;Nau}9@uI9W96Sx0}PQ*Ry757A6O6I|7VgPCg2}fsNk0mKDPpB4#)b|<V+x-CIk!+
z%=DF&mlaY~!#o@_J-xCIAA98&9MGx>W=(l~bOr$=yfybzF2AvYY7BVY#s0Y)W(Dv1
z0P^_*uDKpaTl14|aDFpY3*74B08T0KYXgj0@SUm|L;#=%2L}fThX*hY3h>ZaZ}3YT
z(Avg)CQo{bZ%6^|r*B4X=8u{THom(81@bNY=mPBN4}hTV7T<T<hx!{c$H57h+KPtY
z&pEz<7X8A_r2^CV0WCbcySxJZXB-fhUk+dlzt3+cx&Ybq9K`kR$L8l0vB}EnFQtVg
z^RMhXeo1k05_*4HN*r|m#K<Hh!Vx;a-6Ish*DrT8*WAl36NhgXm48h(K%k#0w9TR)
z)yta~2*&RYJXycrn378%934deo7YSXhdU=tKK#t@sm1T&@vq(ZZ}@{>&f{;5#LD)-
zzC7=tKG-il@7(;#z^5Jb*<Pn`zI6Tph%V^eZ)PvxcS{EqfwZ>U*uQdk0slFZcmQqg
zuSE%Mc1>;i_>xZA;lYzSk9X<X$FWk*wLrlOIonphEm{CIIk?#0dBN4mK|D{tcEPX*
zc~po_-rpQ0FzbEe_pwZe`v*Y(+FAiS7PvE-e*(yxv*6Pl^5MfN8t}^c4>`mVC_u?6
ze7&Qi=;xu#yF1_&l3)0bkT-zq-`#&iA>A|fKccY2k5~|(Yl=TS54fN16Wal3?bw&t
z7NBd;4rDt23tN!6{7Y;u-SU?J&|KzuHypaZ_8nW$$nsl^XYTwrCO_KQm)M+ddlzCt
z@D`s0VjAu%CZFqZ7eXs)gdd`l;R;O-Zcg|k?kKLwu<Z*y*b(fzHO%zeO2HRf08-<d
z%^#`0@%b%n4BwL6ZS^03CtLjsTF`9$??C1Jf)<)|`GVyv82PSEL7zZ|$tm?aFIeXK
z2KLCGk^3zPMLYb`hJ`(W`Bw196;SJgytn_Mb@`^x<(aF@!QI;aL9`bR>T2f}WWVV9
z9Dw9)_G>r}UiiEo{saf|@c$VBw7dc9LH6So(Ej4ad9pMJTWb&eSynJ#MvyQ2ca^Ta
zA9@Iz{INcQW|`UDc<77oI%A!=yPe;_xu3T$$X_dT+AZCJZ~re3eIfK`{<qm%e262^
zMgi??r$z+0mMVCgzPc2GXX9;LVXfV(Xqma&sJ)Q6+0*w83SzUrvRx9HnretK#Z~ml
zA@bAj3B6oB_^(VCGmv`HNxuLA3Fn)sJZ97Q9}delu!Z%`L=Cxn@^M?EoYG4KV@%8<
z48{_^%&#b;zbRgIA71mba&5f81H}Hs`A2A3R52(>)Crp&Am}ynqFs=fE9`@@LoTdS
z_N!`h8E=s2HrQ_D>v+OnLtdK<bWZLrE(~RT^f{l60u^PUbyPTU7`x&LRBj{|{^<ma
z;`d3|Fg+x<H7c4yBJXMM=D`yl>PE6b>YZh*2BA#>W3DiG;~3(^NQR!XGXhWSD8*pu
zF3TBI*&krtA^I8uPA1H`bzDaP4X-TQ1HwbL{Pig>$x=29CsYUZcf6c4_!UMZNx>MG
z?moM|V5ub;hsb*oX3=CO($8J@HP%vzW;F4f385P|X&%~Br#kb;(bN^F9tXFIi`~@A
z7F%J*uiSr*uoc^xTBSMy7#X_K%&O+EY`5zcYEW@~5EqL2o<@P5<sQ38%?4bp+L1+J
z3=S28z`tK|ZyR1$>wfgas7&m7a7;j5jwE{DA;`JVdbLEA&Hp}@uc`cMrNQ1i&h`6x
zum>37HuZnl$i?i6aPX0>8*C%j4)w~f24}T=Ru7^tDEBiCk~Lp5_e)KEV#RP~ZAVB$
zIc-Ejr-s2FN7JdrzDc$hQH)VV0?w4`RkmA9{Tv8B4VMRg=w7WV@iuX<dMp)sA|bdd
zcGRoG)4mxPMy!E8P?MV8%I1BNhCR;CvFaG+yo<u^;_v%-1<uC7<yn_|k>H`{t+n@1
zC3Rpi#+&gAM-y&p9MN^KE>E(@_0az_H8oYoYZjqkl!>qNXkE;dBC&65H}t`I#?v+O
zepeTtrcMn_Qn-=~lFpZPV=C*EuTFnqVE$Rj1e#ryTXXy({I05o;xVyQDR89`^TZFu
z<9f4xSk;bIKG`_$$vax!T1e2^h?FuOb&$=)O#0_Ag2adGiB0oDf3QeH&}K>^IUNi#
z&>eaJvql`J=qah_!EeLd);mpEt6Xaq>-5o(le!&lhlmvHwd(4bi~J5L<a#?!ciq3f
zrjTNMuM1oS_qQrBTr}$eZVH&ujVU`{|C7>l?WNa5&{jt*IA&MR=sQ8(_YBplCR<6~
zojV99`Jo=m<ib2~CfJKnlhN8hwy=i7WcGxWO(W8iFWWoP-6)8A#c%n#&;uG^1X?7X
zo=RnQW6Ci%XP}>$V}XF(bXLO$$Dc5uPfo0o!z?0ZJg5IJ>g*t_J%}zUI`UgQK-PK-
zO%VeQ?8AlWbBfSzoIbF>M??RGRs|ZhNAmt`M*%@!wQ>8W@vpkjWjFT*2c?Vw^k$9R
z<G>gLho|pIn-Z2SEb{>llWxNyPA!dYj|Vp@OVm&S4qS$DUH^A@_Hs%qCr#tej&4+>
z>F!XWZEEVy-qgW(_(bbExJaO^AX+^PGPTyF#;XX@mc)d}O@8_78`<HIGUBX~xo{zr
zheD3LK|_GkCOUYCs`9Vun|@391tQn|yX45zTW$iT9odr+f5dImU{<gk0-OD~e0F^-
zj?cF-i?{RImb#qj<?|YVUbJj{ke8J!QAns-BC9#VFWYdjh(Xg%<4*J%dWArO_q^+u
zbJ8v1z`Q6*M3G*ruwnW<%DT+njb0>19qnZcGkL<()T+8@<K~bG->qL~nyGQ8{hS%I
zDy6Shv*FVB&HK0(ZyXLC$H4mU_8@%YsC6_O(mY@{7CPg~H`}7U4AY<wN25Vu%B;^5
zwZ|4*5pQm5QC&du<Xy{*4l&nygg_v8b@j8F^CnKUOz!u_8y^j16!L;`VOE%Y{M4HX
zpDQ!pPna@2Ay$kySwJIq0uf0~du9AY@8GkY6~huraivKvD`SG1yIt=0I^|duPVUYo
ztwO0`A4l;Klwvtl4V)~H_bPfy0ISNqv`M>==l4K^$13VGDrpIg5>`terJ6%SH_~yG
zxu_nAG%WQ#?z`y5ajhNVw9_MUkZ;FL9$9x(C;*@gcFpl4zg;?C)^>+tvWLy1BlTfS
zeUW74Zt;f{{HZ4O%_G<O`uxwZQ-D1BOySjl90rEU`6n7lEsN_j^qgPWi&H_6xBkhV
z&oBMIOgs$0+(W_SUQs7wXYh0Udi>p(TtbnCGfJhfip{F>&?T3B10~>0aBTDW<jqli
zwR?{x=n6k3Sj?#O4{yntgxNbKr0`9Fre4#2WbS6B&P~%T5>n$i*|yNhC!OAg{*3A2
zWI(<pTI3l&ogX>(kV4INq{Bf^Xpf9T2uH`=1(F9rM&t1O7ChC8m=yk&$ush6W<Y)<
zXdZ78C_`7j)k2)OTaUR`$F2cu!*6MGhk>{#>jqq-=(>`&?dU;Ywa2^1llZrGgy&^-
z7p8=x>`~}GV7IDA9?kaiK=trLqg7E(R;R6M&zUTBlH=0-{Qx?F=;JnSTiV4l4|<}Z
z)B8gP`Msf-#qyyP7OUMu+b7<d!3d^lwRlxN5?20w{t&$MNH0LM4EYWND-iyvJc8;v
zO%L=5l;A7gf+>)2*wP;K(2DF|=i3A_c6O&28$#cc)Q?nsP`O~hTK3+=B_$1dA5UM8
z;g}g@_S@5QMKQc(miaX;?SiW=T11c0cl=nADtL0hk>?#9>sYS12UlqI60hg)9gt4T
zE;$UzT#px-o`5{E1P4ns>|6=R5Y-Z=k4q+m@z4Zwbcy2;KTIv>PNcV|+ksm=jHU+b
z>-;!y$uzyx$*s$AYOzG${zx~n$cwfp-k#h=+;z`)pN7OPR@a454ZIMqqzx_=wy4Z6
z;r)8d4S%@ehz+TNYIakRNN(i^O2E&Z-t3g{BZ(dIim9K6qrt06QR{E4*|6H?&bkY{
zaX}t6@IE3FA2KYLV%I<mKeHc8Bbj2HT+5AuaKyuTYbLhkYNE<Dw&0|AmcXZcf_kIC
zMI8CS+(^+R{XdzNI1X7?OEwN*lQrr+9ho769<5^|-G^fOP?Slf1O>(b{sLw#RsyVO
zTUKcQjG#a(`zBv!C^-SDoqlfYY`h&|B==sLV|I&BLe-9~mIX^lJqSY68|WT&He-6J
zC)Bv2k_q-$$mphN9@qYt;YdV&+Y{d!wvm;s=$G-#ikP{n<BJ+2VwW(vg4*O%7efmd
zf!^s|*jw%;(3aIiS~V|XF4+lPw|f_Hzwy4dK!=U)3Qkg|S!~G536SyO#5g==931w$
zf1`XzV%LV$Y%lS87kHmk=-mcxh8w!fTWFIW*Y-g>Sc5xLPna7tIFKcl^$-Ch84#7R
z5zHU&3G`~@GR%Q_QK_aoZvJJiK-O)2l$)9lR8(TxSw5BZzG@t>Nq0nSS;Gy8=Ne>o
zQ?Q|B8cOdr_3yE`Oe0qm?38cO)t%=`W+||mtG{(x3e`2ciR39dnV_vVPZYMuh;hzK
zB612Rc5+>_>izo8Ia_jmT-$FW`r)4nYL_oZ_%vhcRzuX$eQP8jtZJtWyvA{K$;Yy3
z=pmQTaTMlB>XFSAaEA115!ox->b*8$=V_<I+NqODavsik4@b()qlv`*u>d~A)cL}=
zt3ERFIL0GR4mbp|zufh7UlR7^RK`yr9pQwEtejy<YhaW?pC(L9K$wHA{K<q8(W?wO
zbVs?GdSk=E%)<Bs72fV_t{TNQ%-EdlSvC?gsT)>!KJ~MF|2?H3)HXhIG(kAMRfWJ!
z+Cs@}rpy;WKVrUR8)7&_s-6mU!<y8Y-j$|obczVzi<gz7ueJj!<5VHWe@iu)Bn6B!
zq*EDA_H?^UBt`H{DVJs*O}pc8ytL2^BHl;~)j+KwMXI`cL{$uzE;7Mg6=c8NZNH2C
z>SynLQ%;$?2wC_yM!Ri4SI4QFc_dSY0(D(Ny;A1B=8S;UNm8?h?rEev{_$LD-*+P)
zHR2dTH06mLKVsA40DBF0S42W#K?EIG`xAp~yVSY4$=!@h3-pgGbz8d1r(Gzeu2mB;
z+e&!SHrUIRf7GMe+nOjH&t*5R^j3(}{;PSJ&R=y&aR&P_#V2;%GUyyHYu%w+{i6fX
z?<ww(7Qybc%e{1>xY*)3ynlPH`>OV)I%Odu1W1_DElFRchb{o3#PQ)8ueV=?q*psc
z8vlH0Dc78Z@{XeNYCs^N77hfhb~6#3HAbCO6ocH;lZx2TVBZ%Yh7cpSc74B1otGIp
zv$<F;NMuhoU%S3>%L*#Tl!KI{j`qNEiQ97>qpelIMu~|HlI>KE(?#VBmrqyP4?bPw
zm#L;l8uJV16deawQ`2+TR+G}r=>j;j8_0*0S!Qq>NvPb?WVmMrw4#|LEQO)9!G!tw
zKT+cG!Az?BeniM9KC3u%2}Z$`S_T|XXjZx!f+IRE_>><np^+YRuN##m0Pq8DQ1?w=
z9XFA!hEs0fHeN_6h$Vs9kHjerI9k)vRI8cP2Bkp&TOp5j?Ja{*U0=PYK}U#|C_9sC
zpKq3HvHa)VRcD_`pP7dtp4YRd7!4h!rdo03B@hx9RP$+iF*TKEMkG3>PT0G<Z?B|v
z-jKdavPfoIzLE>rw&e_}Ueu<Ib}eUzLK2}`S){sM({Oe9iP}VDNFGh68<OgU$l*!J
z9tauhk#|hTB;)yKl{UDhgli_|Vibhcd%6X<q-u_DCYmZ*^@@z73ny!&on5b+y?Lt)
zh8sXEDj(|3-&>)!mQjRnEH$}FW=RV`BymYIm4)D=EclETnD7%Gu#JUhAK=MH$3}On
zp}{=;9M$oHdU42l%7aQv-*)^TF)I3pd4H?^UevI6l-Z&FO$3+3Ka+$`-0z%?U}Sm}
z9}ezan3H%(2yjR%LU$o!!w88vKvxZqJ)1PLZsU@qPUhNH2FzunnPA+sIK>Gu(_=R`
zKefqG2m>!art-8`T7v%;h_!?D#a2%qGG4Y^hfP$X6gd|90KwkDG!Dg+%suxRyLTcO
zaknkNLoM1J6Z(4`#Nw*N7uqcEzFf9unGwHw*d!k&JvL(PT^Lc=A)g+l2$oD(D7|%2
z26M{!#tkgPsE;>ou(CnRXbq;BW5wrV-4aRun%c%F9vOd#9i-p=&6fbT+QK``>dPAY
zwZ{4*3a8V^&%L-xAB3ns`^x#rTqPFV(Y`OP>H56ju6?nV;PN5woDK<l=|GwCK2+5a
zCuVDQYi5FIac5>Mmc)_{&2ihP06l{q!Fw^q2j4=3g`VQ()UD7j$=6DyIdmP&%WHvN
zD5q0sTz)s#R@C6O`!(lwZ{8--*v}okzyr2>P?f#Q+9(CS80wjzPmNl3mV4!`CgE(m
z^C6g=Hsg{VI$hz?qBBV}{ws=)?y0a>+pQ>L)5`nt7hISp-OQI`x@D!nq|>2wC;5Ak
zXwjFM6w!Kl@57Cp8DE4DXDkBm{l)Uo=aP+jo8%G_SL;igTXx+ng%GogoF>3Wa=$xK
z!=)k<>o1HuOYC8EJ<WbwVLR*5G*8kQV;*zH?z}m;m+P8BS|N{e(h<~BZM3t#WPyn~
zy=bf|o(oMhnjKi;GsxYEB7s<%`r7Tj)IL388Kb(7(l&BT9dcKVh40V4EPwPL^BpAJ
z8#<NGqi4>y%^|0omAc{d_Twfuv;YvC%`72(q}-so`+kZhbz3Ev-I7t;3XC1eqfeaY
zfQs^NA}O#eITD-g@P^k`=Ol72(^!-a>qCp<WRnwp@S?2GKH{BqwTQllk9nN$@Vw<)
zYMBD3cOcgKE-<D;V*-gq`j0BBbg0Q(y1A!|#zZrn!cB_BMIlwW;LNnY5DjMAxxg+2
zGUtKT2nS4!h$pm9MRp4kn_VXK5Pj`O7p^Ys{31{v$A%gCbsGb7F4pLbzan&9FLZe$
zGYfT05z<~(^7nkk9<JudC0C^0j=LDOwcFbS*XsuOYmf~gBiBW+HoK67c3xKe7XSYB
zamCUfLb$+CKe15@IW0bBu{6N-LI)yeyu>7Vn49hXO-eR9k3g^TF3WYnZ>9#APfCBv
zYP4xdUjzQ6nn+a*+sQ}Py#dQWpy<3Jt4)g;>z}5k-~cIp)v@9`|C^*=dpdw88lt0$
z4`$z;F;znJLlYW`aNNIW?mAX5c)0`i-7YdV&)B%_$XC?XBd>7yCxxK0$yfD|l$^N1
zo2-~lJFFNo4(rg~6I=HWZ1?(A*tRo$hX63|`e7Z<w()-x{TQt1bRci1n}^4OWE&nh
z;EvJ|%PQu$G<aBxrY{<oH=J-zi#1E)ZWQuIu0f_>uh4G0uOZ1iQRTOSo%IemN{dEt
zFh`|w!NjF}>U%lPH}?67gU;S{>k_(DX#eJ9;9UpQu;)P>thU&^TI|fb&%=>Z_ZdB$
zO2|(y+%#29UW!?L^Mw7iE(&`o*$86zRw<S`a+LL{g94J+kS>4<5IEW@0H4?zPOXbG
z5+0j@d7Po=MF6GVr|(R&(8N5IQ^p}tub4((Gc^p}F*R=~<(|yVyNBi#r+C?`Z)8W1
zPIL%diPD|JZ_;{#t*_f9=Au4UI(*j!qiRTv*`)(X$ns=fU%4G+hx84N>p^qd8uTCg
zT;n?)j!#xFDboM^o~8&J7EqF5wVP~9+?y0qyZ=nW!U^TI0gJzhhqMh3MQZmDzxw#v
z&v5Yn^GOw@pP8Q#vcMAS1Z+`j=np@S-8IKrg}-C%-Q;V3KNBNw->x22F^Bqxt7_iS
zz8sGzAc}_kgU<kdT>VA{<np5j%-hA3ULoyIUcU2OL4`x;5hX(8z}t*)pJqXyTRL^_
z6AjgAddbRGhOQk7Zz?KleGcKzkkAC#2FH5?%-Hz%OsvbJV{GrC$Ly3<mj_L({8rg+
zQZL?g&tlFMnDQWt`amMqX@^g<4Uz%v8uo|EvR~1BiQ{&X5$SZ=Qj;N{T3Z4T^=Mh(
z+}|Bmv6IPh@!$-B9zs&tXPx2mqF75*u7Dr8uPLyo{Z5sJ6IIM8lVJPltXN+d>C1(T
z7@erJ*>x`*EeNvG?tSS&WtoQU(VJoSIFwp5L5P5tt&igK-IXf$*m6~x*(qBr>d!b5
zc5cRtpmqAXUM;l-$z2>&Ne!c;ls}z_8!^u}^@2Bb#Nh6ManUBy6lvr2zFT3H&c?Cu
zzn;e*FD)=GU?zP0dhbKupv!L6j^sbqsSxACCH7zUc!@8z=@F#?9Q-mFL)*M?=O&Ar
zcQq{cpso*~+UYU0Z&k>+l1m9C>ive&j-&%47{nDB?41uuFL@cbDk!rhu>mf={^{SN
zR)o6=ay=A^-Cu2wnvMsFEO!COqUcpDwhLVe=>4r%bPZH&ViJuXj%Y{Hw#7<b6$ayY
zgKDMfEH%f}el*ph)N7E5IeS|(`?HwM9@NHnHBuXUc6%i+8X*ymHRp{7s$O_AHj@Qg
zl5~1JoM9=-A+8at7rI#De>)5HgQss(WEbUhhB1s9oVp>KVo6DkD|i8Q?!XQ{58l{y
z4=C@$KRanMU;<?zm$))0$G=j<4<@`Ifsu`&kz>={TR@3pO<oQ=<b@j^AwX}5kUX?)
zfGubqBMhol%zCZBZx%0&WJbXzMbWz=Lca(3XH7g;G8E#MkjDE2^I;4*d!nJDcA-<$
zqVH_im`1VmR9(TB_Q|Jr1}LQ<LJmz}QB^k}5X$ZFhc8H~!N~c^@7G%>BDmo^xzOiE
zXumOHa`KvTAtOH-+%W^&HWwQf$Dyfbr9h2N^}&tqr7{$z<ZA}fn2L9owj$kN`YYr2
zzN_x-b#>fj+!-KILtn2uDL7WJ-h<@>cUaa~W)_a1s|l>>?pmiZsZQZuLJqSc2Isbl
zE6-yD>i3_lQzJ`c&IF23;-nefXvFh^Aw@YKXF)V$^lylTHW#9*oSa@#w=Qhjl-1zZ
z)@ms>VS70@(HRQb^h~+9y5UBa>cp==wm>PLI)%Q()e2$j^!BOw=(7zfavJrvpjXi8
z2XV$Y`xpo;m8r3*z306Zee`jucG;3S3*+wARqXj}(jeuFGOas>Cxl;<vy?10loHAQ
zK&Qd{9+>dOb_)Q%V#-Yms45!9cSRX^CX5VQRbj2zCE47wB_%>mHjzt=S&}J60DHWs
zXRUBcIQUQV%F(I0FWEjJRb^bB31y_Qg-kvrFg&%jce7KcqegM-(0lsXpjHX+AmXGM
z-lbbp=$U7-m|k1Bfrj?yO^UB@7dW@&!7nezkTQ<Fb7HzkL1y9uW<?#@@d8E*JwjBN
z-tG<=C3n+ozFPP`FY@;bZk7LQG&{R74-d%5%b{MUwZc1f1OdASwPl%h>hk6AqoY*a
zLWISKsoNQuhaDNL>_SeQBzV4;sBNVRtA#jwdkSt#1C*zZSGt(HhHT?`F7a}m&+cU<
z4BL{3KsPUQwA0M(zyU`a3H78=qEatDeQ;nAlMEG^7$QXKJDS%`nXkFK43ksby4aju
z=^UpQr)rlQyJ42+39(<-Z#l5BDNS8Xv@=IasaAda8J>2&#Z3WD@`=t)LiCFQ55#uE
zdka1Z32os*V6$;G%f3Nx;`w$I!Xvw}A`w4Up9LR$vr|cyEt*)EkZodSf!m?UK|X^C
z1(c9k%oIc{Xt%<@t62p+=E4ULgHCECg8Aw_$(ysQIwXUYIgzAA>UK9?9k9DEz;ej=
zJtJ%Sb{va;Z*{0(6PUwSt?+bjPUhz?&D){Sfz_5WK3F5P)J#Yb0QbSX8LluFtSHGq
zC-xVV<eb-nYV%@S4Xg!6uYt&tL-N5ETllH;d*6-}{lGpS?TLJYTz;A@)WEMXX`7?q
zhvzsAI_-mHBc<_t%Y0!rmybe95jTXw>;%5w>O)f-np_&;!<$JU<C5^s+Z)O!iK<af
z7(3ay`evn+Xt$SafH&&xph->0Pmh7e!?{I4htI<jJ{er7e&Osw!OC3t(K^80kHlyZ
z$Ix+xWpB&2^q}TM5<=A9T0*M)i5@khrlt|)n~+CE$d^%j-~I<3N*j(`DuKine3ne7
z<FZqd>SbTg!T_rF@jYvrn7VboY%!H`e31TLReg47zqNwFj}8O!b_X*51Dc;`DNDS5
zE+vBi$DmCicfk-;uK+J{nu{`b%`k#F4(v4A-Y0H8z`W-xOF~s-FIa&;xFbwtTQ${C
zQl*>=k@HIXZ3M^dp%_=*3eGDovnIx_YypcR>5wm_t8U?;3<W5PS>+9YuUR(9$baT_
zuMoeJVT$O=NUj8{?OYs>Me%x}4}cLlW1rZBFMPZ+u|`X1ixc)?)RgIxz9bp#{{8G8
zu0*p;vk?$ik@Aj`y}M4xxpK91W~|d>0o&g$)F#&s%oHhK6Srq8O^Hk{WuM=LN`2c9
z@T=x}C<PvNM%MzVm5ho!^MyKONttx&3U)={2o*8rJ-5Pg;y0Ink`O9U9qd11>)&>2
z(+YYP)w^$G`DWLA<t%w8mjSRB2V+6i0%5i*ydt<bE|y=_T^@7-jxS#6^w>F>GX=9u
z9yT8zhMD*0T4s_W8q2hF0m-=rFh@4gpuv?X^>ki;qL~x?`M{1a6pFeZRQ8n`p|J@Y
zN0lM@Wa&)JDTBBXLm7O)N#%YVzxEL!GTEiCvZ-ciwCpy$D9#IFo`6CDomsfgI85(g
zQJI8Li+G=+kub7W?A$OD1BuX1?iDj(y!zKk;uv))UrXq7vVbvN(&5wPwGxvI={%Kq
z>-l`H>>%HL7<BsZlcV5qwNW1w-|yfjzz!E<Sqtq&WnH8&T<2k0BoV-x5g1EmM->T;
zvWb$`pAq@|;56>tm8_`*n;4ka-m_;pYDLwOw1CZ+O_WAUoLElvQ^HnrKvmh;j%2|3
zd~ZvCiNr)bViBik51x{9M<<lPR$CFdx(r^%jT%R<SCic;0%bMwP0ujnNE50J0@Scg
z9dB}nYx0mHD*ew#+_ERx^Ik2GrMKkJ^Y@^ag8dM`Ej(s4wgt|4V(2D$GKAm?1(LW!
zV`vqF{ybr?2(r9Ta*pBPp*TX{uDCx0Vp|f&eaDe$YzGBy%984#9^2Irr!UKQjeZQp
z5@)znI4B-8%-8ELW$NJ#M(og6v^v+IiV#bHey2#rUV-ZRdkn=!b3`nNT2OTp<H5go
ziy1-xoG5dn7P;$<iWL^%Azlq?mV_1kg}5_-_Lw8z$5v)8T*25UmbuW}=sL#od?w&<
zfygk`d=JT`X58BqFuL};O1}&}g##E#<E@iZK4!djc#{XabO<85_8&M6zb!r%1wXJc
zb=iCjnJ^4Bxmt0Us<Gy%@~xGj7pAJ@vnz9UKwAHlTh$vr{}&MQ?E1zm2yUZ<V|-m9
zctXf014ggx_~fpg=PuoIeXfMXK_XTj6P2te!}f599e(CYGNG?J#a9&W9NHb+e%RzY
z!bJZQvh6iVhiSF3OOjT+<w?2D5Vxx5OSEs@MvdzxpR6TBDD<xDGC~r?dz57kd<pA0
z!yev`ie;l(LvKly7w1$M4Qf!@S8Z5c-&kU{5tp0Wq{gFs!qYI_!wE>S!#hrh3w#6R
z38ptK=yecBWHJ4@(JZyC!&wz0#E5B%1D9XiV~PFFyxna*h4FoN@_70QO30kz^CRrW
zk%OIVcl330(L5d=&6)Vd?2b~(4GD4>22H8+t<jrH-!d*zl1;TbU-;*FraxiFUR@-w
zNT0?8Ea_-k+}|xUBaeAV3c52jvBVxgYsYP=lVsf%@Hqv<D6{^^7=_I~e;tr;3+HN+
z8XbF>IIWhQLWm3O{Kg7PjcKq^sXhplY4?Ir5h}?d?qLypXNCPSdJ<>nFQ0tEta$>^
znle^eT)&2@p8mxy`$54%QVpVBNAflXZ&<dY5r02L8sF_3B0V0aSdI(p<{}yanPwK_
zo;~wr>*iT;FSAkNqy3H4WwoRwM@r+MbFOL6Sz3bWz!_V6vZlG#DVy91;EEo!W&tN?
zU<)?e0fv)>4OE?BGqC4@r|R_G=BffX14}tEpp!P(ZP&|IXbVnn`hD$I{*0rkf4u%w
zom=6g`%!Ga2iFmNelS`m8#I4tTpqp@lg`Y@oBB56?zSYK`aZvm=&e}TB`ThXd;g2M
zGoxSmm8Swv)8u$K)Kez^pj>FaH42oe8YYRK6LUraAdo^y>9zq?8@mR=CLQlK!%6q4
zhY{|*yD*3)(yqt>ycq{sFm(kZaQ68|4%b>op0frdTGi?tIpkh1OX3BKabiQZHPmjq
zH@v@1Qcf3<zh6(p&g{U?5M2oe#@L{;Bks(-TDxgq2jJrlJlgD3li{Db;z*_$Xu{#2
z;U-ai=9Zm2Nf?dT80^q(Ne{w#WJ-41QLY_Qu%S*zyd<RY2Pg@xmx2|(8oek}-;3do
zY;Ah&iouw*$UBeY(I+z)#+m6McuwAWF#~`gL{+B*f~f}|3dQgalrOBXk5Az>lSzk>
z@yqPGS8#Xz^fW)3;Isp~p6Hz$Q%QqzhMvR}PlM1)i<G{k*RSqP50dtIc3&UPwF&#Q
zHx_Y3>rE+Ng=~&-```L)_OONp&vbLa={iNpnGgTFnjyQvCK(f}I!dY^Wy+UxH0>kg
zBMxDP^oxtMYVMvPhR^^t@Le0FY!M{f&O^mP1S7Q>;?|+wklwY|1u?wXS&J)-lS}n}
zK78s@HXhN;fY!vG#anY_Q#AAOO6|J2a|gE08n~q$y+O2Bs!^CBXS}`)IbulJF{EtS
zuquUk#c%r*3D3oiuL=>upd;VwB!JG@^|vq?4o>8-AV9hfU?UBUY4j{<D+!*%BHEd2
zddMNQ7yq)KPf+-z2t6t;w4MkG-Yl-2m9}rY86upoRHe_-K;jmtQe52N<R-pUB|~`>
zso1eGoQQ#jp&v^G21bLqqtiRh!iV@d$O&GqFG-I`67Q&WY%_lSOr-V+jw^l-aS4Sa
z6~wm5JR$=Fj2jsn#eLcUkya+g{*mk(A1wqE4YK%Ml<wM|aMHqD&j4;|Pt0rM-er$B
zhd*PkYC5xK)C2+ZyWBbKmE@^+3;L%ci=ffp3WX562$z3_JIgRk?*#s^2YdqnPfh9s
zta3!qB}iAg3hAcY=FVN#ea_y2rnx(8$z@-^$7EN!sI8&yJ796Cx>`gwFak4z*5ORj
zBUN2i?W?h7WQ@$$eCsIR4wDPPV`bHHVTXhjHXrbbQ78K8yfAa3AR4Rj42v3dq9|ew
z51a}|<0~<MbIXBXIzS=t5VqU-)w@l|M+Q8<nRYTDA(40;NY5-$r{rUd<%FsyGQJ6m
zi8?bw#!fMEF3ZoHd08XI<U3NdES<tc(7(&*!#V9FkGivCYEwZEMwJ~{u1Yu%)jVT-
z4OR<z?GB8j#qeAhoqq;i#XK2_=b_R{=>0tj#Bo5t3=JV%NQDx0mw;HllDIE-_83j+
z7*mia(&@m8wXEVs3wfWnGl6u~P}|LHn7~L$7eX>WhY`@SdfLVIBoIThGMQBjLp{8d
z?P6=~gaNRB;;XlpO2xGawCo@AekSEbtjMYp^GvgXY{a$n+zfm)KAm$TA#gQV1}V<X
z46eOQ_yqovJCFi(M~iTbp1MyZN5_?>dy0w;6Z4WRdi`PxuH~1ajEbFWa&w>GNy@@W
zi$0NJd#GoYX@DwUWAZ60CNeeFJy9zD-3Ov?!rEbH^aNM;ZwhkOS3=AD$d5_$(-=K~
zqAXgPB<i|Z+`=T=3u2iT=0m@u+v8C&NYM^K@|)!I&p{z#_R=IF$O^>{;AG>2mkDgL
zj|5xWgWVp^4Z3{4?z5Q#E8MhpolQ)mi5OZI8Gygv(}~FSp&tvMuE-WLD5r0`e5u5?
zgai)N<|B?e^sZ#?nU%3MwdF+fw3_&Q1TDo^%u*g+jbQ-Ip@B5Sm({67jTA9ji5gz{
z<6EPu%Tz5q_3bYxe9>4gn`T1SP8YV`0XRV!l>w=laVrQW85&VyEEwY*6O;@xfjvAi
zG{gyGv1_N=AC?5VB`87z<6u?@YWi9v_6d9&#FV=#z_jG9x}_w*r~}ps+Cq)I#2~hS
z9@}j3Od31*eQXpZ2Fp#<41u2@pr11R8T9V;dWY=F?Ogh1<)UT2P0ROg?Aim+1LM0U
zKiZZ+HC-9VqohVYm1or1wsI#0O5<LNM6kCRbV>48;4TtYv_)8KlGKhxI1;uWS9@MU
zLsKZv9?qACI+Ja%^b=Fxk%Ak(VIRxAqD~ktC@g+#DFRqC4KKbNi5p>(Hqx`yc&w??
z83WD|vI`TDf3fhb%G-huhvHC3D1no5o*IH7=f3+K(gmyg*3a`E$oP2_NSE*liP8zr
z8IKU>*EutmtAk{DjwfS_v*A)ivW?|~h&yZ8@i`_Bg=_Qp2&An&4NAR;AZ1&VjFT%<
zi*odc?Mib#(uJ+TQ5c6N2KX?#$veoRHLNB1)Cl>p$FpHYWKz4d8osL3|8$X}$Q>^C
zvp!I!qqmj3BPTLjC!OGPoPJ7cJt7E-It6Uqg?D1PeHViIO>g@_%z|<RTr=26L<x7`
z74Tt7s@CJFrr1%C#kObx#gn9sDg1M{$aMYcT;d%QFU-$A^luB>&X&KPH=bUlxY%5^
z#*L7y32pI<T`Wg9^y;;uUODETk;u*^cwfTc$;h^Eem&dW0cD~EqaGI&7OD}fVeHTV
zPV>YWjK6-A;&GZP&Pf<#Qt}yGRslAoVeAbbFyZT{x0Te#O1r2l@mzE;K9;#sKst+f
z<9qmiHv=U(Mm;e2tXsAhU6DL14M!SxVPo`S@noOA-WOZ*cBK{2Lu^5=+kIN&YPY3a
zI;#>2r2~L}6w6%LpEiO7uA}|c`80e$5(ki)PVydOz%PgJ&-a2mCsTmzsJdtHLIF$P
zO%s0Ri<jQP#PXf&VNV>q8Cj$~=0KL2G0xX1lg6-^WuUG`?~`sFn3`)%hi)jD0yqU#
zY2}7KK3g~4Jk^{Yw<rh80`2vS)TvUEKf6ishBHqw@<5qA_)y&Yy)u8Pg1Q{-(X&B~
z1F(C)!Rr%5zppkXHs}X&@s(z!+|&vFI`{>1(L=5aZFGKRN4-&(T^p&+*k5;TBgU?2
zNO2;sIUxC1>V2tfkm{srFVXWavjT``pNMv$UU`DQp%i9x<@4SYF9&s=k0mKqNYoC)
zf$t_uu%Aog#c`;4WuF&UOYj$ZK^UM_mDpa((su6H&vq5fcepJi9^ZjXtVHk}9$w<F
z3)Sy9xjI2>w>(%3OaQN?_&M^Ela2JQ<*YZ$1;MgO)0D;(;PMLa!T)M-@6>89Ef-(d
zeTZ&=&j;QxU&DrS{wvKI1_Sc1mvE>#dj$LEV37nzI2ju-u0&LYv@cmq<B&^_e0jS0
zMsJ8(mj5<)aYs{;TOE=tAU0+-!~JwoelU1S-|E-?q0Ih>BkltBIha2s!+a7mUpmlT
z>mlRJeSH$O_maBD{c-bw5oB+qkgF<AJ@8`B$U1_Te7RnTX5DCyb&(d6^rUwPWuOey
z8&huVWOa^gGm)Hk$nMmSmhS3trl7{2{4CX=5dK>%%#~sjK=IWZT_oG=VGr{XOIWwM
zVOlh69TOLk$a(^BxdwbZVq<Lhsmcco*cAdMBwzVXab#OWti1Ai*bp2Rv0>UT?xAd`
z1;3y<^6H>S#cXZOB&ho$$@eoe7`+e9t&Btj2Qdssnx>JH;X840VA$8_X$rI!8loq0
znQ^vMK22J=t<u@SAUo7X>I~^4;*0c$**3^8-6H<-SM}WUSe2Bj5KT;qnYcD6IYO@@
z*pN`>Ib)o<BkrWvQLE7KaOjBM`?2=6Q8sjzm1g;rxN$F%Pls@P!&6?VcraulZm7Ci
z0<I2pD1t6}B>UR^yD2MJNve0cN4#!!d6mscCw9u>V^5~{itpS1jtH6(e4MzOYNG&|
zoQYmWKy;A1n%XR6t)rrf(!J~;z?jy(K><YV10b6F@ni4tT&Luk)B;zevt!tL(&&Y^
zGRL`+=5?i>hrrk=Ts4jvh4$KttF)4d?KVIQ(UpH)jweUrC73E#qhHSRao;_?o*#^=
zTG!DnD)`ArwPX$PFv?S!0PK9B9L)>zfZCQ>^zR~RgnIe53lB{Zk(DUy3Gn{70K3dc
zB_{(A`J%w4|73h{T^vB8r6^LJ^eh#t#qQx}Pb+rU=kyvu7f{!UC<d@^B5pFEuT-EG
zWSYFl78Ab#L^UbPa<B~v{Rx6fTn%FNtk4|}5z0PHm2e*h6B2Fp_$`uKkF*OwOsmHa
zN6Ht(I1xV)<o$G>pFA_uL&INQ7)Tm+&ja;oVFUiu%&WM3XcS_o%$K1DH|hLAyBGmh
z&#NGrcZ4&Ud`?2?yjQsNW7%7xn4T^MRR-{C(XOZVvk+#>&9HM3bdFCX$hG%@!Al-R
zO8*%&!B<|=+4JibX7(zVHDCe}g!l`q#v3o2kr8#XcP}!~Sd7zW+#6O<`YP_+){j*0
zT;%{6B<r~Ky-c-LKtr9zEH@(Ei8}m3Y>C~Ifnxt&XtU)6_%K-*Ed8%=*Ox;~5#c7z
zWtv+$cha0t4f=Xlfd#_W3?xT=M`1jK_*+s*ek~SJ2W{sx)Uw>sK)zKe@qjG=BFND;
zxJ0MYfnj>+s0(5!eQS}i`NEQ|kH3+Bol@0;sF@s(#ACVjg`Sq7#m!*x-%n0{0ertz
z68{@7!}(u$8AcXnj{g8MOayE!3~c{V|2Hnf&dkpFe`py3D0(pqYiAQj0(voP17{Nv
z6C*og6DU4DC?{t}69XG4_l+1gaFryhMOxi+Q2{P`3A+Mf(PX`1VS!>WB!Xm+#JOU~
zZ~|RVae-oDCDC)yOlOc{iMmLCBGC=}XYbRm(=N9fFZC6vo%JTOoeZ-V9;=Y7p?(bw
z$jIiP3SvYI14cweP65%?wKp;XDk?%EDk{0bX&a!i_Q9V($#z)-3KpD*_)mQhR1}!N
z0mDiJnAEZ;5rB={+JAf%0U0eh88JB(RCrY6#7|hb$X~!p1bPmXIaI*r1PF}GU~SOz
zo2Wo;E`$2I_gUh8vsw7?zY`M>-*Di`TYo6pc|icrgc#%)*srj!32Xfs7x3VQ-hQY-
z^8%5<4(a~c^}W2J1h(dgB$A7W`VjVD$2b7R7+7GAL4$z$u)v-OZ0q@RBI50V$>i*x
z@V79|{qIBb{{W!bhincZm|F*v13?J`uB8EwYH|7Za>Lw*RX>IifPA^I`KO1k?VkTc
z{-i*}zTv_&w260i2qW~`2NUlBxP<_ITyPO#;720>2nzTi64siNu;Cqm26PEs)d%mU
z2m`o~3Ihb%+||9zy->7sgV6?tcKui)K2gKIBrhW<pcZFg!vqK>eO1b%#s;n%#ciWM
znKjZu6tfTB(hmW~`g46*0$rTL0TJ)v<ucGN_fcqsAK_0Sh5{Cgh>F@pMglUx1DqJ$
zf_$Xx46ow8Y`$ZUy8f%-iggCZsh8~s+Jj@bf{(z*IsgS`ppe(_-~Dy{+9X0m2F|zl
zg82jP6ih7f^EWoc0M5_)>Q2!fzh4NYikDH~Y4!A$oVvO@Un02bmj7w@5fl^a-2XNU
z>gW8;F0YYk0sIa*H5Dj$q(lUeQPILdKuik;;>`^U7~-prN#r+TS%`xGaGihIFz%cm
z)$5xSptmnI2>;EEj+aEOg~9(8KS4WEDq_Q^Z}_LL<cIs{xAab5^GBcP_cm~eS6A1s
z>)rSB7oL3>_U7Uf9gtc}9>x{WIbn@e;ER13=Ecmy#XzCGY}3!Hg#qD;h=LGD_m@wM
zLksr?I0&ns@kP#W=>Z)4cY{ft1IW~0WXMmK+P^-%{LJsYzRKh_u!n0-L-NOUpg!yI
zR=tw&Dq&8anv9MVRzQIwoF3A3Ttgb_4teN?2<7Badl*0hBw+ZDFo3lepg%B@=z~>Z
zBNG6`1N4VYgh3gQ<L72F3aG%%+9lvVlvDp#&_9>@59al5!+}QYtlgtN>V{Wgz0Q;!
ztij}O-1n!L|A6<O7ZQB(gqMf7>?(X#Ty;uDFX%d0BwCslo(T5a*!aOa8Qw>yg^t7m
z#*+eT=$bzm60GTT9z0y<vov?l?&D*Pywq>Xk*0KD;x%5P9`YP@w?|%G#L=*k#*R3t
zl0J<>J)cYQDGMus{ekTB)(<o?rNV|qj^FPd(4L3AS7!aN?hX<o34_rQiW3&^(dZ^K
z11-Vl>}|!7C4!p89+~C=VYByzdkx|fvh&g8z0_z8=iiTyJ({<j%A~!=yVu^7y7zBb
z#Mi(!vDpE^cw(Jpnj5g@ICv%%!YPSw;R~%56oC(v$tm+bfD8-;bB0d02Q%Z}gW|c<
zKOELI1=)I{my!ui0U0)(pOL<U7b?8k<d!`)uN=E%kUBdn1A-Q+$r9R*$?F{0Yt-{t
z&#kn!rVVU#FJu2ULMWEJw10*vJFHZ2#Z0bhvueneOE`2pfy%I;N$rl!A)Wt<q>Z>J
zlIGw&^m)@8ldm}s<bfPNh{WE$Y}}5dvWvC*1U@(RshO9USk;i9-aQ?PKZ1^5HT(4W
z@%<=saLT}5Gx!CxMoGtgw=SC0ryT`;2At?6@J-6pY1Xps;&x0_(I$eu$K8-|3d$m%
z%&bBvf1xw&XvOg6X*o0O%$7-G?tOFUxsN^$eI3EknbBR(5?Pd>hT2Wi_tl$c+8;^y
zNN>%1VFd}T*W=u-!ELYWurVh`v}@hk>6H#PYI-PFoqPg01F!pt(HyUqBwv314`b&L
zoC_0Z+t{{k+c>dpJ6~+uww)8(wr$(?iJiRv4sO*OyullGS6B6{tE+eKwVH=w6qnhl
zD6J)u!U8IEanxV?UpfHZ9e$e0T)xrL6C$mC7z|+%Ps94QsW-Bp*%Nhe=1x&#ZOn1<
zT2C8NAH1|lFPvXbS=g3rCDLtLM~@-P*j;)j+%VDFFmjoeAv51$1oZd|vz#Hwm6w8T
zrZ}{mN7t`@u0^>}x#DONdLgJBgroRvTJ@f!<1WIk9q+ic8vY@M^8E63;Q6L<hKj!<
z>vH1-!U8wP?KY05M{qwWc}pT@yB^%`_fthtwkM25(uV(@k!yGUWMx7+*ARFsPhjsM
z$mAj>Wa%4Gdm>f}Xb#|4AvWSf<+{dlh9$knDRBA7yl2yEK-G4xnd5g))c-MUaN9oq
z6U3fV>sa>RvpgSiMzh=VLha-|88-Ss8t_|)akv<StZcTr86tMi?T-&rvW>EQ-<*G2
zn4jp4JTk$E-GmnS(z@R;Y0`CQQrkVW^)zg%7;HY-Vj1;vG|S)ZEnL?{!3CE)-O9<S
zh^@zi>&J%3Q!9zE)1Pha?6_A|c{~eYNE_)(F{5~yvx_RNVVwkBl9?g&suG)ghd&es
zpE~n(XCXR1EqqU3?f;hBXt4@A&9&ep1Yu7Q*_Kh+6$!f`IBCdHP;efEYd0tnOuJDe
zNOG`ErP*(*b>fwMNPy)Q{V$n1o(GOE>TlV~UEDSgcCzK3CAz(n55tj}oG0?*Uv&1#
zK7wXjX9=ylTPKLZDR&q4_B^2B7Ld8Z6yvP}tQ0R&ib}Id5S`Tz#E!n25a64fnL3qM
zmqKV_!IC@*0wW+l3TvsKTLc#;wO((8zpN_hN<89Kk7*EnqUkO5kV)(`g1rp<0jULE
z|A6!3ADd}@38q(QaWyWUjLlc?^SV!KmP=N0S~H=w@rv`%UiWPk*z%_>``35;;ydKe
z%orRPQ1jWn)r3dayj|UCG06MbSl!|bFranvi?j@?SnP#LURG*t^D}fVRu`%zCWlV%
z5R|)UUt}y$Canbs4(ij2HEr|$oGP5R+UQC7H)Y-x*X!LGi5uiwc-(W;I}tF`7ViQI
z)pF2n8SkBy%PWv^94M7TKF)+zC&Q=fULF2pA3P09g9|Qi)HCDo$?_NuvLdvttDLK2
zXtd0h1y<=K_mXv?9uVXC$B)Ae(-Gvuq<fFzl-SE3`3@CFJ)TbJzs&;5By!+_;7v$r
z3<D9vXr7CA2`*x5h)G1A(WXyv;qEjik1YHNnig#MXjdte`2wi)l2@|{%!M&DJ@`qI
z%j?`6lQw!d38+RaWz18kBNL0d8$EYDVrl`dchfztc6UdA-;SYk6DfCeaBngKn=mot
zsxb;U7pW7PO-N6`aE`Pt<oAoO2rX=@vB$TjP%|FTa=!K;tC7R?+k6+-bq{U&KgQ<`
z6kgbB>c(OXLC^E#n@gYfOa`U}%(ZYEO4EIuQr~=G%o{eMnZ<2qa|c~kx59JU+AG2F
z_WVMfXQN$l=O^lXc#|3I^%0T~*LR+2LlguBT^Z|s?d0eVE}Ilu>vwZ>9mkUncIE}Z
zI~ELHibBM=qwrA+6bN$hK7z3ehz*cpC$ADc&NVSqlO7)dPkOg-M_a2X(z4u1xB_a9
zUVx}#_P_sx%=+9b!X4y}=&AnGI<76Kbo)>iT#*Pht_AW3u?jg=9s}%{d`Pv^ljY^%
zm9x_H2Q=LoUqKUQL*gG`1NE@u+`JUNqqwKF74deZ<vP0}krLO_+M*P`dU`|#g#T=)
zOE<f0FQA}L50zQ5jiVmSQK+~DgZ%f4WkSCstoUYg%fz94TKf(34tlp^;x=0j(U>#+
z(Z14c8m}6999_Q#zOVFw;bw#>q_HJb=rq`5u1a*3>d;qAY6;hE<=~KzT5HoLmzmR#
z>_vTI2V6aA)J=<C&hkc>^fbZ+3n}PK?8y+&CT2`C*|>G=X>w)swxB>4YD??`xulo8
z0DNT$iAc<0uS=)qU<#sk%XAVU)2)~11CA00NI!f&@{&L9zfPA`dwTxBUX$i~|K!-{
zy-HF$2fcBpmYgl*l@%De#k4*CzVXG(-Lq@sU|Lx8vS8_{uF~qtXzxILm#X0gIB%`?
zv5#a918eXW)s5ka62`Kkb13$_Tud)3^I8Vo6t{ctsLn+C`94ic-8Yqli|15MrP$_N
z+m4?nRlwnoSx^4EfXFi*)*cQtjsy7(XGwd=ud^(Uvw32d%R2vz6*;?Kn!`l#aN@Iv
zdT@+h3SAGuYr1<9=vophnO^5kD2Bb6a!+rIHR2%Z8`WeY!0YM4*Sbm4;5cK>=ut5X
zFv&)3lG{uSNSLF1D+GK#JJp%@!01y-4hqNa^Base3Tk5GFk`?xw5;N@C;5Dj{9U|)
zk+2I}$ehEJq^N#eAIQnQ_54;RE0$2rb~3#T_ZNxPV@iF>n>uUgz+%bMCxU62VdyjV
z^SSavmu2dDg1~gae8Iw}?Bgk;G9jWB_n+drBiBETm&{XD1Gh^QMX?h=$o;s*_DM=x
z=5rcJ+`q06D7!(*zJ-ITuya%zkRqFnu}+}XpV6-)Y$q`(bv2#v$i)=}Yzzd>^<m`S
z_C9)C$0o}M>iTEkPC)IJ%=N>D*VSVkFhpiuMl=N48g1vZ3YEeHFZhTIkw>PsYcxk8
z*!dp<E39c0ap!|X77FKgjv7N7x`uv<rl*pobxY`EqqFVMUE5npDtPg6k{w#_JlL3D
z+bD{0K{Jd@8VuhRvQ9DJr5tSAFTF`8=vXo9!M{q}9xI~h_S9LbH&&6+_OI0q&DkVP
zeyT1mgMdlcxU1+9gy?S2M(_#5{h)2%rv^zE5c9ZhGVB;pl4Pf1Q;uj+xi(eOuBR~=
z7d{5{)j1?-N}J7y3^HC$l>1SYN7jg@?fVr$&_z<ub&44I6tAe$iU0L@Et!9Ru**ac
zl~?zQ;rf%E_l|mjlOC~cOltY)9i}1cje&$~{}S;ZeUi^QL05E2!lV0S)oQkq^NY2t
zke>4Qvlj3C6Z#zDZjP#eS^sYM2Z-MO1hS2Zf6|p#%XjhR>UsJoL!IYv_w@MjskT{G
zv?PZHcHf5pUyWMsvB+!lb|^MIqpAn+7#%!pD=c!Cd*yHoFGVI18^9{vW@%^2_Su$$
zRQW*7f#~>4R(l2J;F?S$qQ{n8V1IopOMb!F^BA?&Q9(f0SJ{cINroEQ3~pJCZ!^ln
z#Xwcl=8CnT4z``5rpk8}Tw)WIe-k3g*ah&}>I8012oB!xrQ$EekR?3|_~Q=G*y0B7
z`4I8*IFDTwBl2$w9#`k1wV{Le0E)Bn(WC@@6{5sjq@_VjaG~gj(j7KKxP2Xqg*S#4
zw$OattV^7>&wM?XOv<D5nsj5eup;^xET5*aY2CaEk|E^-fi5)?^0e_?&wHcaf>k{S
z#o&m<+h#elyBIU%SIh|6e_?!7<{mVC^n?IiUVgq97ku%1>ugny9zlgN*eY$B<Se%X
z*!<rS?#26k7Fpo^0=1jBNr57~mdYk6{@Gw+BJvsxc-8AMb51PYioQ##B3OQJf=4Ua
zh`DuFy|~RE*CW}dd4h0>Lv?AWv!|ldRLTeEuCprTn7W+HEV;{vCjx4>;cI_>I7HmF
z)(4SQ#3YaVVkdQH+Z2^KH>qA`p+`;l_uM^Ti3p_mEo4wvjQc^?O^(3$Dt+c=EI6GD
zcGDP2B(hiTgpp$PkP1Rx{RaMPDOR9oxHXa(tu#hi++v459@+g#`lh{2L-+69cUjDF
zlj<!~N%Obi+l*!$`D8zhzXy&EW#aIWPdV|IzB>=SQm1!3YhfuZIN8UQM|dEHr0wAG
zhoe^3^eqEaiqhF2O(lIx4&Wp;Q$`pxf5?+!DJLdUlUg&b2w|WBp2pc@cvSd%dUk)Q
zi5;HU`8PGPGApF+e3x42ols2TUMdLh@jR~1)G`IN?IBSh=I($U^$lOd8knMfUab$s
zb-fDDt%Ia&Nv~xE=jf#GsVdBG=Tr~WnxoRmuA%n?6M{0YHP6x?te1nbFHWKCG9BP_
zxem};KKNGvS*hpyjq%^7S}!|*gIeP=qDtaJs*v;H%EfUbg*DNUg_h1I;ZtBDZ~^<P
z(To4OvfnvFM4<*(hHYHhDhGbx>m<%Q0_npw>rFF4fR1r-KP~-(X;+_L<!|2JEVjeq
z$6L@DxJrC9*VA~c;BfwW(z;Cs-kYl=jb(&m!MIQ^jqFj|{OQ$=KuE@&rLf54SvNnl
za@8|5)Y0?4M2}DmKU=vkE!fvCW7vk#9o^hKE=m(S_1gNvIqD)M*wN;V|H!MGQLC%9
z$KrHg6es*Tf^r$-<LgWs?+lEyl|l;HA(|Q;sxv)kpuGfMIAA&vbD<j1OK+B}SJ?Uy
z33>MZ;PaWd#`0I_m~FU<Tzc>}5<L~-IWq&S?6eO-MQNF!8muUKyGm(07i7_?E-RUo
zYFbYp4!k9|ShI#%ql-eZUvIWckAJPyFf>j^RvSpnK$k=a2o`M~j*i!~2-x)@GvC)z
zP98dz7%)%jWL*(oF0gth97#5^j0rM#G$jiYGuU7)roF$(U}nXMntv0V@<LdcL$DoX
zD4_iP)}q97S_y_`rb^059G9W5d^OTI&t!(f6>?eC;@W9K*A{rPn1)YhG+B!)eJ_^o
z-&5qd&~?vMar<PtB&Rjg5(`7h`XI_)+d7o4w_8;A@vq8h)tno>jsi>Z68j;Bah8nU
z-TY~7hDNB&z6Z5`$I%(3jPYMuHc0H6q@0X+zvb5CT<_+0N}#}G3b3ZcBvFO)66t0r
zuJi*Z1QO+=QO+93&4YQ&?u3`@b>p7ueMu4aSwQ(otbf)ag9fglhS9@<J*j!ZogM}m
z{Y(k?TAfdb);_72lhb;Nf%ISmw-+6)>Oh>ic<0qxo+QCPc?4M|PfT1AB)qtIuH`;J
zw5hmdvR^K5iAjw!{^3IIvb3pXkn(H1b|cQi+{Rr)PurEk6kW|q+xqXT<r)U?l#D7g
zw>|Qdz$sG{=GYvQ&lYVi%(YmelwSCu2I1JPvgzv?EEuOBc)r643amJYBhwp>Ao%P$
zA#wgfF>a(a5g{Bgxj3$5P2QD^Ke%bK+WVJM1`-~rX6zeO&-(thfJbMDWQ@4Npu`qN
z7N6+EhI-;KdaiNp0}jOvVB<d=QyU8#IiD?Y+Lln-c;m0DRiC-T1t-iW$H_O52Ho!5
z!Tj{PS}+=g2-K?oj1A~$L`(_hu~?h`d^se|IS(oL7iv2V9<J~*wql2_Dn2YS&M2i*
zmUsr^0B0%9ykS4MyjGyNg0SG*a#BMBo>j_+`=0jzsgBA6^<!XJ)q=DoC}cN^=lII%
z@y5!PNfyQrHZWv)R;D=|UW6|UAH~gc7T70re_szmk7mIpc{Ok~f?Prxl1tT(oG!CZ
zfMdGZjC@m3Hy+Z=O(W#yz3c;;3piGtS|?sIpFe-L+fZc~aVS<Y?`H6ixn$hi-@SV}
zvT0;^7o00C)OdA8Z}6Tp^PRltq`BM`^9NmvSX1S3OBGo+3pxHZ)7rmgrofkFQhvAG
zHtkD17%b~S*s6dyALh?V<F>@Gy<@Qub6YBRZKEDSe0_HQDM9=iXC%*3c=?7$u1k_i
zy<EFQQ!()thd_YxUfN4-VV$WY968uETtZQ&m`>yyR69g==H0F>W+52BT+R=#EQbcM
zVI*q@<gj<9jRuspIXD1ryjjgcCSYpW4ER$fzO^u4Y6m`8axBx@`93eo!UawP_bY$s
z0V-Kxr8?Fvdf6MNkp!rsQhtjHJ+XWOc4KO358X0z5k2}Lc?^3z%G@Iz7MjgF9{akY
ze9c;(Ok3jIC%t)Ux-OGX)A5=mkbB!x3JzC2TLN^Z0$Dr5p_Pa@7l7vQaY6d<ZO@4Y
zPW<BkWTTCyK4;S`Z`3ONi9X8b*<*uE>CFpw8tg0_M+8@3eWx?Ip{INb5V>m#uj<h3
zn*PTN<~--ILbXqdwp?CUA_V+~ZmPH`#^sf2i_J{1MEpk+D$ir?PKTPuN&m<<*L?I_
zq!XQcpoE*qIFRH`#hdc~-irw@W!+8ksJUG8jZ-kC1Qbe&3X6$w<RWxaR=*2fo)(|9
zfX_!qQHnbFiKJTSg4%M|)qJq1<?1YdI-ai~*uRs!f?1LfR=)Jc^)&6F-8S{*MhMD@
zk=1{T1jR);Dhp0(#%V-bXZos?UUjD8rW>7(MS6&fVfut_LQcK}In5d5oJpl##Ufva
zE$IuQC7QO;u2CHpF0vcB&7ds5?ii{wZaF>+BDPJbk!eZa{Scd0hSs{=xs(dQmUHik
zfk6usE8owcpnb`$=u)*>E@e=rYIhDmW`@oj-D^arZc?5_TQY5_4+p>+rH{zN8R<fC
zVKh|elI=oWJ+a)SfbrKia$s#kyf~)tXjU+pZYXHt?MY<(i|5|e)z8Eek!H?kvD#vW
zz_8+G^xY_FQAJgi>siZbhw~)Q8}<N!k6dQX?iYEJV@Q5BlJ>Pm<R{$;CSvAWYOhj&
zoMSnGyUU!VnNe2g4oO`!fK30421N6bI4NI@`%M!OSKFIv=v$@rn`7i~80c)xZ@vY3
z7T#qoVjLy<Q+GSeuy|UEYqfeC0p*8;rq+Z|eHPd#Y`KyzA}O}m?3e|+4LOj$%#klq
zx(@^DDYYgue_}p-14D~kP%1Uj?VqvK?2!H1ONj18>P-96D~Y6!FDIR<(h><JP<^&U
zPP)@6XHu`Q%PT^;oM0sRUX><4%+ou^2tPMGMMhw?v%E)+3y53B?845Uer^xsyh3vZ
ze(VB|l)C1VH8x|rQg}mxG<wh9y)q`w|LYDC8_vEJ1gZ1|s<N6e^3(6^?Kj_OkzSTz
zxPI~IRS~93j&S<w|3O-<3W4I-QH5J&mI)76IrB^QGqXu_|MxfZSt|pzQog`jL!v1l
z##&XsIp1YC{lU9=k9op14o5V?X}XE1=ot^)UCl_NSK<1m)YP{v&H9!-w839WUtpcL
z+qoRaNg-Ewl`Ody7|9yJ!$!*sWOKZY?AL<2T&cy_S{*#^6R39BER1(%aYMYMbn>tB
z2pnU`k|vowpiZ#CbAV^-{zMxe+|@L-OETd{lb!;@Zz6_l7`Hgjl};Dgpng}%v;&ay
zV}6=h)W1VCme83L%EhWE44abHc7}tQInYQ1RV}2AD_ztSG$dI=Ui5tbu1@7%iB!U%
zXYdjGuioCS(syMA44^*);s(;5Q^|3|x$-(e`B;AGZ&z&A#Kb&3)t%kJ+q!_!5B=Bv
zV8yBS9mB&CvoDID>B*7sE5$MTYH@R9@*#?fVJidzaPXCC7>g9USen+7sEe&Vc+D~z
zgEzT_Mp@Ra!|{3%@|Bg*$$(70ddlss*leaBMU8VuAw>RNp7R*0{W3Nu-)#tLJ-Cv-
z(vEyOnWOP=4{)OCEiP*O%&_(~q8j@=`O!L??RaPO6y&dThS*95h)Z+N)}3cy1RW{J
z59r25-Z$`;UI4-trFO#6rLCD72nmL4SqYZk16hYe346z?H79VUPqi|UO!4@A^2hwH
zS2p`h&p`4V$Q@7cV&E=Aws=1zevudjq;9o_4O;wq{0i*koOX=3kw03NTAO4sFUYsm
zS&f-<z@vN@1j8uh1N|;Jb0G{Y-DN7*TNqh1aS(2x5}v#zR3i{_b)M+jdu0LqZDJ#r
z140w_iz(n2Kbw8e34CAXc=kc}o%ii`$B_Yg#}7Vo5Y9Ie!U9=Rne^JaccL|^QS&Ll
zG^BoAVBGQ#?ty$gWG-AjC@&&bU>OX|$GJ;`_Lt?4lkunVh#@9fDGJYfirr@ETLf+@
zUl4rIrkUS_MkGvY|Bon@oB96{rLr(FvHfp&`akGFPA<;>9sb{0D)WEnLAL*2^kAy1
zD&9ECfAZb|_BX?UAh=tM?c9AK2rN?v#Dx7@q-}CASd{IZogMae9S>9YJBrud*D+pa
z1Xcqz*R{INDjX{(ay4;eR9w=85cuIl<niG_SP$B|N}3OZaS%fzBNJmIBeBv_bx>aI
zKtJz<3idxQ2!^;&cZAILH$q`?a~#YfV`qw<xKMvaCt!d~L4mkE19iLU=z!EQF!FxE
zHrF3PhT(DtD1baE0%G97jss-BS4UT82yD!6-*PSl1%aDKU4V!X5!bT&(m+M4g<cFz
zM4<lc&M)AZe&;N#&%^w+GJ%8O?)+ZEX#P5li)-zko@QWRkPTZ!kmYzW4|1^wVlb3&
z4uBa4Hob#m0sf-F{cC6e|Ix-pVQLkS#l3!DpI#YTQ50Rk5B1Nc9t1a-2w^Tdhlqm6
zyWiO_pt2e7&jJS6JFn*LT>xi*e;g!`DacRPj$oC)Em+uB%liEK`sn5=(ji1x2C(da
zI#EDj2wrkg)e&w0@$j+6--6WOnMW@XfJl96!Gd$Y!67^_3dtnU&>7Be#r(iJfz8!<
z|1nVOhZ^}~ooybXHlFdr>gXm8n4`;|pArQ)8$=dA0Z-0T|GIS)<0Fv!x0zDIh{%gN
zW!W5?%oSooUdn{#bwQ;$isL;F)&$ak;Ly;}>`+voM$iD=*qKa!awX>`@b43_9~Qy2
zg9{^T6Ido+2JmBO4sd>dVs9=io?xK37kMIuctQMMx$!?l{a`lNP)s11fyCo|?*1IX
zvcIQi4E2y*ATFAD*+@`^@6YeA>ATMPnd^V5xBbF@T^FXcqfi<bmEnFdzUy>Umq*d}
z2gbp{m>Qk_BQVnO@c}W|VFaA{-xZre{YXp@T9*))ascJ^feYp@`-}4aE<rc{!olG0
z_q{6Dd)^u%1-!TqT{Sw?V+sBO|M<6k8_4_^?EF}#|Hg;?8pxr@tE>Bq&Hqpy|3)!2
zL+EsTqdsmA$;Zt-64^iUp(p&puYi5?j}~&MQ~X{fyRrG?!5~LuuJlPjyCnd11<9uL
z$Bmx)bI$C!JsXe$z{3JkZ1DQ~^00wO6GBh?ede6aZQj39xcJ;p4H)JhvA*4<9U~fI
zC+a*+PWHjDHn(s)i3JUwSRH}*0_K7(L7cv;(t)RCk#NV($OFy}`G89~UdH?EQj<aZ
z=^mz@I2}PU#Qae?1GKIU4vm4CWWFef0<<1D8-O!P{84!Wv`#ts0SVu7bJ)jzsE7et
z_Xb{evgRdw!!U;G@BhuJ`Y$;{wCp|Q2z?tI9)K|6{*<2K#P}^a3mo}hta-9#e+AC5
z2mbT}fJ^)CH!MFOf1HATq5M_>FOUMTY`>Tr`QTYy8(D&aRehv+w*IyL>bCEY0#mIY
zp!u#kzr-h6uY@|^N#>7`0y0hCxu04YHL1x%gg=1j9l|%dl^;Z()n~xJM_=xrz&V!O
z|F+4T_1|xjO&5$i*DAgGZ^_aP<5wL5hg)Ntzu&^I7a(uzQ}Ao84-sh`-86=e)7E7A
z44`zd=J6;OG9Kz^ok94+b$^Oad*o0nkYtkn);^#eW2JrBeXSAMA^$g|S1dZHd`Ir%
zVY*|?oLnmv_Y<8obGQ~eQ)&5W3H`~mdW@ZUgw2ayHO)MTirPNV7c&J#ic0(nmKe2(
zLCxw3kqsTcsez%+u~*Vz;*FDrt<CQ<i6&fPy6E0SZ=_+iT*Ef{=n!3Qw9~ctPCUcz
zA0X5eM(gKP=NK!UC1xe>XIE@mKB;wHfn`=ehf)<dF5}RrXV~~U!VD*R!vYZ-d=_#-
zlR!LaeGrmDPt4iwxx#0oHVTiLCe_voAh{4jfRA$1^y;+10d0^xy`eb1d6&WmF}9^1
zT=TGwKzI^@XIS+{7A6|#Z8dre#$etUwiCVKljrfTIWQ08&W&E%Ldw|n#JXUhR!241
zVRAdEINo@V77pLwgd*0NN75NtvrRd&^(~P?Cn{l9H2sH2&Zo!17Q}>P5x<lYS)Vs8
z89K+WZ#Ga#f0M8bvI~gR(sINqZ+>&IM}6>0<VnekT6jR`El&(@z<XCq$1LlGsePNA
zlU>V~(Pa7RP?$>w5L+~ZvP0%l^obfwWmt$+cp&U@h)Fk}&Ac5J=D9KcW2u05FoUtJ
zHVu}CZXJ@nS27G+IbG>xei+&N)r%w788==h&|Cd7bgtDnBV;mJfzrjbFlAak_Ro3j
znkpiD%`WgqcR6=0iKZ8F;-y28;94KE6u%icLAWtuHaUPW<feMlF@3_<;FcU4{dPCo
zm&x&AuowOvArbO1u^Tgdi89@$O6JER5!X+T1BSME0*{91M>EO8_83>4*Yx#G?i|d_
zljE%v>mj0jeTP)iO)rT;-iG&5gQWe2EOVmibD54fUj%72?C;Q0^c23DRvNLX3n^fp
zYH)=rI2p7OAKPyyqYTqm1IpRJ*yit-6mC9_8Ii)o;p$5W0A>O=l^&G)O-Hj}*`cUQ
z5xCFR*a`mAg7Y`ZPsBT5hNgB-(u#U&s_=Ry$}NR!R}^o}6vF`vTw3+_Y<~-PINtT5
zY;0sKAK5-h+NZd@CyA_dt7X;3=eES;yu5=YPMBk2*K>_z0n?w*y1-y|^r@4R?3O(d
z-{2uLYz2SHNfQ%`UTzcjFYMkam^FfH87rJ&?V?{}uz<FdBA<6xf4Pv?{D^Gvv;3ay
zHR>;s5W&%oLV7+W9I`rq8NG)ZByL*VqUA_in54u_&&U!=u<tgN=9v462w+J=QG8Ut
zfi%(wq^m{Y2w;4&8}v^P)9_{;&zfyL;x-)JA&JP@n*>B;A`nzf@5LZb(xzgb8S%Vb
zH+MqPDX?0aWrTYjSPMqa;i+%ki)YrkxAkn=z{x*18@R*78y>*bMdc5*GkmQ3)R7;5
zzPZ8yQxi>$!srle^_xHN7W-p63KaZ8B2r(geYy*{H!A7Vrwh?h84D+OT8Pk~Z%KR(
z-c&dW;ksGd#1|i~gd;c(5qRt9Z0FWQ5C{yjq)f=nBLsf>+<!oowLyF)%tC~I{j^v$
z=$G+^-5w7&8a~x-Q-y}+1^Hh4(R8UufprBZbyLq3e|szlUHqq>ZICMlb_s$3UxVp;
zN<W3dAKpqzi9h*=g`B$$vfE!}p=7{FF33EyNm;?Q3u2_-Xc^>CVhBU8vjayz0;X^Y
zw1=4_U>y(c-XDM>?49y@kcG{}j3`YSRfsKvJ|z1G`PiMhv}K#2h%c?|0S)&%tRT9k
z<M}UnQu#`6C2zn%ki`Uc-e=I9tPM7`GNsl>q78+y!fXwtTxlJ)krm+<{0daw+I`L{
zhet!az#vlw*z8!fbe?jrIRJs|W3;9v?Tq^SBmt~7v`+9-$v8s6G{OY$rjg83_YTfJ
z!=1?tXLOB$N`Rwn2W8LXty`h3Ww2$-rq>T!`eL59C`w_X7+);74t%TsM7n-MV4=`#
zUG7UPKNdG|UEf}Rw|C9Cfl+tOgTE$E85_S-u{?kTRF*cbE<iM|vg@EDj&S>+!L%#%
zLId6Xngz3O!Js^z8-*!m!0Iuf2NaiL?1dFTI*{QJBYf1F{vb3p+pEo98*}FZk2Snk
z)o>*6F>s-@otHU9g9FV>qQ_b?F!;Q|19-i{=j^fmyRkO8vL#fRXpx(qo_`BHqtQsC
zeLOUj7c`({cD`W+5xXKlN8&P?s|8~&&lWL}GqdO;2B$OBa->k!QuiX5uTsfNG!-SU
zZ!hN!8D04}jG&};ypx3@Dte1c?bt#v1#J@%kgx;JkK{k&4vX~<l-~jgV}C1FnIfmY
zfUlQ_x9mHI;*Q5ete`Mbz@s91fX0DnmV(?qf*C6z+&0bd^YrWh2<>boWnK=wC^RO-
zuhLx9zl_n_|8`B2IQ^>UX7RE}P7kAP{!6{^z!%~(LVi@t<(YXBY%pU0MIb|mnR!6r
zke_qH@^fBbwOm^uHE@Zk;}8;kScSSdoE^2}OH*n#a=zttmA3uC<-Jygzz+-%29N9K
zLduX&kAY>RM74vLt%5BZ9^`wbs4&ywiW1qRUVdRYz7=hKgqrMETiTOTCiiD6B4{+8
zI8uBOKlifIVZz>KwXYt?<Si#Jx}T0vd#EA_TZ0BG2$jUyG^|r$$b}>UQ2Jj5<^H9F
z56@!n9lx@4+H#QGw9OMQK7}yO#9{DClRDphHB^DR*l(72<$2xjo9b}-;==OQOU|Op
z(mIzb7Ab7su!S+bpt2PDe3Od2DFNA;d5+K99ZCMsargKVBY#ELrtx_<#+dv;pW&_n
zZS0ay0)}#zKiGivrqM9G82<;(uDY0!iRjn(+RjKc5cz%NYH@iE>An1(M2Nlq`w)+M
z?6iTtz(D0?#P_kr{FgjbDzuYyY`H;lwvuw9!Z_qsO@eS!e<d8_o`&{x-q-Ao>q^|r
zR3~T79(I2KFd7EZbAtuE>+RL=YQl|!>|<q~7Jgo2S*2CYZo=x}Gku;K$=ciULkN+P
z5_QKMT?_&Cnae_jpn*!bd7q?3N>Yy-h6DFnAH8BgydZh7Xq04-mc}CuYo<RxtWTJQ
zk3B*Js#2-MC4fd~5|e9|EMhAIdc<=E?;m7JfK0%j#Z%FwE+LQ>PaPdyv9Tl<Q;3w5
ztn|6$&XQiwKVx}?7*)y*_+E$gsj9}=$Qlf4y}#wtbgO{!OB>%7ExexsF;KAo+`s7m
zkczmmHp#qiAFLyY#P-eoS*SKCn)p_NDU|i4He377&rX>|-v}7EFCVgoBc3o*eHZVx
zBjL>nWhQ^Jv<#=&=x%5jS$~LrapU_^M{}%+^0{d{+#}JzI`^J}o}1s)SgkXvFX&T|
zUH2dkTrQ?gnp-s-=VKTFW!j<A&8+YNqX8rFCxf0M{N;0bmT*9v%C$K&8gejPmJCXp
z-xdPu@Uy$Y#wVsp7Qt2hpqS=xLwO&LqI$S7bmg4na8mw==GC6=skkTzEEUw_i-VBj
zq`ZZkFy~A5Qz6TR{*XdH5WUl-^z@pRyGX`l&y&2ixS^+y2QM=?<i9`_#JHlQT{^+s
zn^LHEro+VyrGOu*yBj#p-=MgoQ8nl?6giA_5j1sb&W#zq+xf&-LiN%r(G)qcU|JoS
zeY24cO@69yg6{4wE0Ucv1g;dqfUytWn{;$K8{q_`kX8|SB={;Mn@c+KOIoTbe5{)>
zk{b|+4UpvtJ%1zseZiPvBb*_CYNj?Imb{6^&L23uF_3;VxrdLvI&#HznO;NQM5GKW
zENY-6k1AQQKFWEeZ5L0^t0CXT37*B~pPrcCo>tbqg|UxHK{xxt^66DSu9<IAA&}Im
ze1+hNVvU;Xgj(b@zxvhWgz21kU-y9FPDHOe7^t14DDYf6I}WPkd&R&d(I9syJbqVB
zCB7b;-oB8Ez$wy7Eps^6f2Tdot$oP|BJEMr{7G~mQ*GAE5LUa?DKwh5K_T0$ODWx8
zxX13hPVa45KJho{$MHZ{9|VjRbSeESa6h+uY)E@{a*Iw*V*J(XcKhZfWd?BW%rbdC
zhS4{3_-{g$Zx5dIPw`Le^Jxrif20VfThz9AchIy{2#J=h5#&@=en?9JdTygnr@I3V
zbJ52Q_gw6xMT4lcE{E?27B-I%>>saPZM@$3+9J^;GmW>+w!%W6tFY@&-WK1Z7zS2E
zg;devD)UYxyd&+1c`W&q^UZ4ibLvc|8b+R;4$atx<2kc*4k}+emBvmOP3Q5MoLAN`
z57z8Jwnxe)K7`fcQ`)1kmyhg(vi1{EXQK~@A(N_907+opm@Fzc#;>c{K+vkIyT#>A
zlfA=9iMm>C;vJ}?PjdDLPZjww=3Nm*RnO~qB4hR;$e~Bls`hpy5x;bd2DCv4?@(p0
zwNYt9g1Rg0>niBdY!W#ms(d+#2kjzycZ9eY@XPlBw|z67wvrv-lnV+2#}<!jaEmNQ
zThhjFa43@{_SiVnJ8-_l!`>3$%{-WzS%#_iA|r#=SgJb;0$)J;T~gH}bpQ^%P80M(
z7g|%g8KupZNF-%t)yv|yex<yTX?B=qTYq1&lwar?p`2;>Hj7@jf#eaqe?7z~%hd3-
z=Ye{^S!^Ifz`y*m@qRPo{F{7(WFhqf=Y_%tnxfyWVNSY>Y<5L`$0%!s0$*52>saHJ
z|HeLN;mNMgyB~7q&vi9Qjy^c6geh?lJ_j+fvvj@WGTNcrD}&*(Plg_f3l-E(+7fsX
z6xazY8BIRwactI=cHBH9^Jr8gNVTya>H{ij^-M|%VOv!18BZUSo4Dei`__tT54;lG
zZb&w4R*y2}R!jdq(f0joA7%M1P2pN~{|RE=j1Vt~eVaV;D|bbFb#(9|EHHOdb$Q(h
z2^9nKZRt=*HEp~2Q&HmBbl+gk^Fk3F(miuX&ch6p&_w3MERhh=d-6OVT)0rVOyC-u
zI)M9uvzcvp5ZEz}2oiIa3B!5YprE{-H3H<s$f`D+_*(QlUE_|_=PT$N=#`PUQ(A%m
zOZaYg$>t<owRY0b>S>U`h_k^ia-|yK16z%cs&TSkrVd$(uZTY0<yQyJ==R5PF#N&1
z3;jlEUn=SwIdcgw7JmJQVj7JN(w6w^O(>5Wi1=Lma{rr??sTc}AP;efV^$6QkqbEx
z@fEde7c8MB`?|I@hfu!rO-yti1ai?#cyGaX`5mwwnvw|1^AWQ{mSQd`7adhL)9ej#
z6UT-&u;dkkuq%XC|340a1AKU7#K(sA=IU>`Ej`x9h1`&yv44xND{Z?vv=^54w=;X$
z9G<I<Tykvf@n9G^@}tLI$l`{FVQdn`Yl~1II3M+~eA69SH)z=64NCMyq_k6v-+(>W
z@Hu}Lm*q2chT;XE=-#$OyA@iw&EPs*`qiL36yEx;iNEXgym=T~(RyXe0xPX`!V1>^
zVT^a&#!e*@i6<T%t=QmoF=FDhL$5BF-R`sacC-8a6o|8X-WFxbaN71<m`K<hdVWSc
zg;mQqUEyY<sXj+qh3%oSW+KSa{*YfH28TR%TSJAYi_yp!Jk1rQIBulNCT}b5p?ibH
zCU?Z;zOo{B))gpJ<cH9Iutws)$-bDux~{+oV?N>UQ^X{-)1k=4=4$ps;&#yMIyKSg
z8x}C3*?GLg6c}{;TQzM?=f{U(aC=do!uUibJ{qW>S~(Rzc;n-lK6+NeJi0Qzv=L#*
z>=gA4mvB)z@K#B<;qQ9!pDq`H5M-Qo4?zu)Lv{4U*96<MQS`5=!%uBFZg8C25oOc7
z-{PRQ!xr-rdsVqtm2LwNzSmoeYjxE>W#hj)CZxy#ZSnSgdynoy<kv7Aq~V^HRP6LN
z>V+trkDNns$p`l?@O|=-8{80ZFtS*awvRX6D5+_yXVim1jF<AUK!RL<fnbG;j&beQ
ze?lwCtT|{#F05HSlN)KC;@<Itu_Z81QVJF<z}jykTW`rQ8$E7r`X<cFcljv@jh(i5
zr_`G4X`nK)r964wr}dbou4VoO40(wC$&maqCpOy9Cdy$|yJw?yKc+ehciWZCA#cKu
z2YSWUaH?o*!dBIy-uCz_;&5aK3;zIOL{nn1$Fermjh;4pfXjv!vo1RmB7&FNAOFU4
zI1K6WX0UCR$Smn+4AI>}Dh7Xm=SCqtEL-g(b1zoC(XdHoDLr~}6&F?g4mL%{vdPb3
zeA9|S44Gw@+a%YhS`B9xF+zi9nFOg`Hp;}X!<Y72(d!nU`6?8nd!0H`rKi|HUxzLr
z(^aLu!XvfUSS(Jh1CB=Cn?6v!%URn%a+RzE&EJRzs>I4=h=nYikXfHrieXn8?fIjC
zqtUchme!lkU3*(R2rw{X^Rs<kP*In%_yRIh^<f@Y%s0dwmpS*O+@e8p!;<$z5~z9g
z*x_%0fzcgZZa*9Wbe`CdyJYDXB#FFU*n&fmAf53hsmtVveL5airp?rd>9?}0_HrzL
z;wfOB{bfE5%J}a#y5v~D)*Lz+r@9QZ(f@^x?ywY^a{-b#amZZx$x|^eVO6rI`0$`Y
zc+c<)$4J9=1SiV@xd0j*)v>~vs2$p<sHk9KP(j;7S7(ikju6yW>$bE5E-js0<JXjT
z*ET=#g)j3k1yi|(T?@{k(V;!jHMUUIO`nT;m0pT1-yQ|-UQWJ?44m#{f1t~ZpoFFD
zB#09}AgNsep>i<MU}!mYThI}H1GndnCS`e7IteGL5H6LUvFZ9$*mc45?>|td>wT5{
zO)lH@>dKM(Ba{ueoo}zp`*9K=`TUnf42F&%IIScr&40c+pPpnYOm)2}(yRXMn4j<Q
z&`cfEkNs*JBRbz3pY!n#)WDR^Txt{t&F+lWGCIkYm>hXwa4RP0!~u!6lLAXW8NtfS
zF&@=^`epHZ?6<X)G?<c;typomqYv-;hD#@x<{Lo{YouJq<Vaaw$w2$5It?*GS;X+-
zpMV+ESXIACS4Ow8Jognd<9GOnD|k&jVU$<0=g~hxd#s?nH|rgrMVyZ}{00=I)f@K<
zAcD!G5qx4r??xIJH7Am9GMv3QfrffzUp^uK<fGkvPDD$tcL%j&^w*mGlblVNek!_f
zT;KPf4v5C&OYDhKq7zNMb{%r1XsgYy=%=%xBW6vPeg3PTxUg3ZEeh#Os#Fet5XZ_n
zqW-ho-}-~<8vK!c1XJ6(9c)6f88haT5zP8*Pn)e1hM+LJsKE6)_Pn?Amw7K@-bSmg
z!+|Ou%%W3jkeNJjc`zS2hdZATankZ2rdvP-+<u@o*9jWnO7#fS2HsTu?8q3){LEpe
z9f_2X>tJb2H~B_J(SH878(JqS&sl26#C6irIzl01Rv#m*QQ^+hb9;9|2lQ$#C&JU_
ztQyAMnjul%mD-EvGtl8P5ShLHqJ%^%ji}41K=6wGCv_yNxmem7_F**975#pSYJ>xw
zysIHs@^aUt^RN<(qu~<nRXu&tg>h*JAEl#ujkXb0h%vB=Fc9{1)k+<B^vs0dxb%Z9
zU(9=N(Vt#HW1YhGBtQ;f!plUWT2(!-P#Zh&+4cR@2NK%Q;nJWVPg-Dy_8e$=2@C_a
z>1-MOaP6{;u*XEy1HTJJXrM1dN9XpJefRb9+j_`{U9tKEJ9n@d52}%nB#(fAT#wW4
zHYr*B)l{k5RPZAGi{5~}pe$QT^lq&3NX8h$MO(|_qEb#|on%vb9lw&>zNO$QFn}Su
z!L1yjCR^&up|soLE=8Qbzjch=n2(l?dUdXPX!aN;F0$GY8!zQnrMt-{oMG6zadU>c
zs8<)PWlBBwhgz<J5*e)^+L;Lxgeh<R&B=QdS9+nzwtmj|KVFgaiw|#m;UHV+OSL+w
zs$Z!TzYh18aA-1hRYBVCMMbsJcG<1yXA3MyOyI+`1gq^w$_?m>`U<ES?XV+O1@4SA
z&!0%PaAToBRbVZHZ;a*$sV<tTm3e7zQ*vK-gmt{Io6k8DN}Z7+YfaY%P(To-fTiu(
zf@+PO*I0I)RsFHtG(hA6C$KB^k1WbZR(q&G$U2EGWm8iv^+E2*jxXYE%nVav*y^){
z+8|+2%=)zWvxZJJ1eV-SDVgA!RIlMn^F%nQ=S0d}lCM8NQjETyU{JWDQ~R$C!;@O=
zAk@metDBuzqWvr~1NJ}Ft$wA#UZboxk8Xh{At>suLGcM-G4qZWmH_<{Cm><mlelu^
ziA85!^%DF%VP&j%m;aFcp#a|r<JB#cA&!fXn8EcKU=qzqaBf;s)+&kX(W}yAimH{D
zIS5nI=GrZn85xbnHxokmH?5*pvq7A!^SR00<Q|s4H&Yaq!$hl4xYPu4w4(R{eahi$
zlEU>h@z=cXNoC7O+lIsb?~nAE#`d_liDZ|tUumpujSwFqu9Rvq<L^;vmhKSXSo<l@
z9j-KfvBI-1UJX{DBAwehsoZd=)}o_`vFXUKaMp~@J)Gu-(3LokB0)<Qz~q)Ja)EhP
zyIJ(E@?8v{Rs$X_jhaqK&YIJK+t{{tdFE+$eE9C`RhMuS2sKoZZ*9Wo0?Y*ykdVH#
ziH0%B+}2A@S9F}E?fO7#mpBtHJ6$nXw}aV<N$rp_lUR&+k3*v^A2&hK9V{4-kirY%
zey$l?bUTg1gdOdmn!|4SxsZe|6b1T(p))6UEJ`Ju|1q77Mil3(UH1P?X2-zG-KY@?
zuLNsBJ(pjn=B-9&He+J5xX}2{c(PosChb(<Uwq_B2ws#&m=1W5dC<8#Fh#K;M5pAl
zcgQ@T6p)*TpmPP_r2|g7Dc~zhPbOzYyKTay$E>Fo38`6GCS?m;F;}GI=XR<8-KwBc
zw!kHeP^a&AZvs*D>$pMgIxsTBKCHOF*2&~GJ6>Jm3|tS)?2Oj$_!c|T|3QfPXO4zU
z@AcaMcY0w;)HI<ev6qx`U(OOsGSW(*kmN{)p?;7PFCl2r)6xzT>f3S&yDg8?L?Be1
zJ9^QuotW=uT8c(ffwN++`9|lzigOZd31b<H^;8A!mB@BY>Z;EXykfbEA*`xynbI1j
z#ao5UNSq4D@T0SU%bUyEB;L2_n@{DJlx1%B?jnwCv~=4u${$lj1#T2kG-0d7CPqR%
zXAe2MITR`=JNu6NIZD@j=XUXu);(0*m+-AGFLpU~`YF;J_`0r3e>v;PnwraJu4Srb
z2u$(wz*e&1sBSPu(z|mO5br1cY&%Y1`k{at`*S3V5%>xIl*F?|Bf6<QQ@jCK%Ckb$
z(yKW$-_*y6w3aF?eklBcA$WOiJM=_hYLf!-O?uC0c2miL(O!SxQOBxY%$G~(^5Axk
za_zQ_1aRw9E{CD)$Ccx)AZ3qA6p_wsgTe)bKy$0-5l;CY+1{-Yoa_5(^j%KrC*WE0
z#YgKvF(<UUK2Is{&~OdONv3Y=gRDAQiztRePZj}Zu|HgUi|L;YB-f#CvUw}i<e>0_
znM(iIz}FO$Sqb#rLi&5hypcP?%SIQ|Gs|KJV_auMl1}T$FE+(7|2uUF_x2zDvum(=
z5b-Qi4B9K{=C&1KJNSLh{OGGck?lh_SWS7ol{RZ@F85S)P<E}C_xYJ1$AXute56Ln
z;Put-y@>*h(it|5Zr1*5os84&1a4Ybw6E?EY!jo076`?SG|VsCD`*>0NaP4(Z;qtK
z(49Ve91@p;b^<dJT#HstomZ~8f*rY`e>n_CC$uI`4qh=xP5f;o-7jvyUxdSJ{NBy|
z5704ttA^9qqf0lHUpZ2HiWlt_5&$qHD{A!coV2sOYp#f(!Ihpv<{x<yRg;p8FAc<B
zI5_!>y2n~z?vcB=5IaXZ+Kk2X`lBds5XeXS9q#oVkYv^-@7IvulTnq)E5l(CGH*4}
zvl3qxa!Z{38k*bgjfg{N$$(dGVM3PeBtmbgfsX4jW-wwV)m;^;@*8kxeHBiyeclnF
z1c<H##j(4NKoISq%-t)stCJBEVG$>6^r?C}PJAF)>An&RKcDMi-0D1w_B}(?gHIq|
z8xLrG9*BxaUCq-h$szpEgQ+VD%604RF^CfLz^lLL`7@T0#Gf-Iw$1)xAVdbr<$^^<
zfU3kPgI1D0UcG&=3$*tnCob&Ux9KgiIr*M^Tu=^uFJ>fVF8$$-{WvDl5H=GSEyBcf
z|CFNjbftrl<bjb}T>iZvD|?-$PA7z>UlOxvUkic^GDv9RKdb28QBHF=trZ(WSlK#c
z4(wm;Lu5ue_eKSYm^*ldS{}X}gey$eI6-toUVdn=CflJXeL4O#l60nXH(}mf(IQ4L
z(rrGy4u#%!UQ{Zeo?*EUJA~Li<?Q3=N4!1RsVtq@SQi7ftIr!ivW*b|+F&GU5dG`I
z)Q%HFpUJ*(ZjKz^|8^OD;)N>~X@r9|dOUP?NQOS<o|0+Q_e(~M!`RC0)#{<i&XZD1
z&JRQ@kBcyu7Nz8+UHypdCvZ0=Sq$Bzlm4-}wqZ#gzOMP!jbI=gj1Y~ORknu=v7fGi
zjx@{GPC<ESGijQlPyKEzF<VA$^NvKWF?rZS9<bI`KGIl_I|CfCs45=H{AR^Plhqbz
z+S9|9M1PgXBWs=Fs(PxQjYQ&+DzC10sW9IC)le6cT}*DR#4}Q7;7{$*QmMzZ@D^pO
z3JY(TM%^?QJ)7bycZ6WmRgzflomKq0TAX{&9P2$%B#1djhNi@E7fx2I;F|h(euApD
zM1Uc)o}#JqFCX6oe(f$<!(94ru0=)J=%0)V3tJ;^BB9MI868Dc`_HTlj`4v6sTAW+
z+eJF-h~?+MqpkeB&EkjKAn5@^9)_`B_>}tVL^?dH0e80`kFY(xkm{N{egaimMvd{x
zv~rwK5S(oMwtEK?0rO}Ns@Hcjt%8n!;|anjbFg{eMfb{)-;JlkSvsotZ2TAU<=DNU
zY~}%H=MwJZ1b+x&*QF3<LZS^>^^|t9wdt4@vp-ZnONb&yA~nKTwsc~+z}V%0NnS|f
zRjN}xuSsn9lTcOOvmko%Is4_4f*QJl_TjnJIq32rq>4V!^2PvwQS%e_ySutX2k0x*
z+rxm6s=qBvTS3QWUEcF}CTX|D**Rk`aP7|UKaMi}&t<oUnBpzFyg~1&CeBrmt|57?
zF!o2#9e;QFCnfyy=8;PGKd2xEwzyW6kJ($|LELv%`Ksg@a<E_3QdQ2T(S)LRn~m^^
zgI;AAdYU%(+$z9MC<yW5J)w9X|AlMvWI!8@P|l4ye)W!n`QW?pzr}2AQ9~Ua6&U4X
zUnGU7Sr%spUcvYibmT@?5m}jv{abNSEZ{@0791cOns9%}N~N;eKuzm26+$klQAg1@
zMWPM8A(a+ImKyPNUK$%e`uw+mFxYeLxC>24d;Ql>`{Ra*$=<U?`$baRv>0aQ)uAe;
zN%1rV{x@V9{o<-?P(CGs|BdolrcmHH>>(_Ju#!E>Vm4-L<2EcVVh6^h3$_(_k}m&N
z8PTX!g~U1|v$!+sNZ3lV@<cEbH<e|=@R7=6b;3Uu1(gL;f(`oVun#t!?>{t%B%P6e
zemOw-EwD=<xM8tC_oxCDH1G?ab~=W0VXQN<^6ES8xN}tlB^roaonKP7=qK3ttE6xT
z(_|Pc>}E9{VkU$b9Mz+)nM9*eJuT%Pp$e@G)VD-Cc-Bic0(<*B{<bzd0`F3DuFgq`
z(*jHX4u*glUpW+snyxuEVQY~JwEWqk)hi9ip4a`s0PE8Nc#B~v)x!gZ3-3iRqqX}F
z57?iv?QUbw9g^dl@Lm1$w??w6JLyLXxvb&vI-P*sxxzGzag!ehBzxkEzN}2O=+2E(
zEZi$b`9(wi;<?w`(}D2BGN?)LYTohUv>KQ6CzxpdI_Ea0gUO%GcIe=lXm#J6L)pX$
zIT+Lex$T2g)DQzLt^wi)_Rifbgw4Z~cp#0^O;5FPU<_aUilOZ}B9)w;&rn1ksI%gq
zRerL=Z^B*=dHXA&?YMEUBzro`y)kKhZDlcF1&`87ytZs9Pg*^1*rFZklFEM>2sHGC
zYk>KzGV$7Pi_BW`FPd*9<cMY}sDBoUe@);jQMFjnX9hu)ax0LCKRQ0@fm;S`(#fIr
z1k_IEXZ_0isQaL4Y_7IQ(zilLPb#F`=qc1EWpTO?tSs|M9>o@mUxNQ}EXa?!*@fB!
zkVJY=LJt{~l7zh^8Qt@?9Sy^Yn446HFBp24nnt=H5d7Dvn5(gwn~|Af=?zY*bEHg2
zkV%3mI2>qv6U+&c6r4@|@VG0XXuZBuMQM3u%(0B&rTIG7U4!I2ykonzVb*$nuf00x
z>p{jdFRrz2iuhZ*_Bvd;;G;UJ)Wrb(<7Sw`?mv{zWT08@*||RL%eo=$N+muW;Cd}C
zNDz915WnZ;TP!$)gK%}Gy56&B*(jb8(TnZEnmO9_Ual9kaw&@g4htS|X^}{A0{}l|
zCSu)Dq#}FuqjWHuzB|<a_lDx^xcb)m(s*?I&^QzCWu$xb1DmWhkV`%I<|3-6B{>J@
zZUh|TrJ@vi)kZ^`1!s;r<zA3CU7xq1C2-Yt15QrxF8qHOJEs^?q6XczZQHhOyHDFT
zPTRI^+qP}nwr%&E<Yw;7!~ZakwO=c#q;}S~)_&2SI?yqF{;#)=80>^tM3vz^vH-E)
z!4AN#vH=-5F_p)J#x;$ia%F1BRo=*4%F30jFg4afH7~J?+vgIRO01|(iNKc8ml$Y~
z5;$8@SZ>8IN^o9Uqz$REVRR!GC$lodA#c53?d9l3T-phq^a6-9EI1{zC1|t6TiU^j
z6zoKmS4k9p)P@su9A<)Iy6)mF7oUDX=8_1q0=E0&5pcj@n_z>!2N_tf!a1d>l>&N-
zs?}E~V8Co5@8^Wxc4QB?Ut-br&_%JP+#stzpQDYl-6=6HDEY1trD{N9C>$8$P4?)I
zK@bI>jjc->#n34#wok@w=dE;{P#l3KXo{3EFsrJh7)x)!3L}9}*y^0mC#A`grmL~N
zN(-yxUAF9xcYdwE=?`q26are3XdsOOdUdT6UbF{CDV!9L>xYLci;OEpu`h)zsE9^N
zuqU5SPTB&vX&dm1p{;B2`S*z*a;1vyJ=ndrGhP&aKZQb*S2Vlay7PR$(=ti<vFrdk
zy3<i1&5fnm@R@d2ZanRBY6xT>gE%ScUS=kM-I%!|iJLn%V1R{y&*#bXi!Sns*lkXw
ziv9?~CN$$Tbr(OdZ`dGa7j1*si}sG|=T`XS`|S=-Izh84rF7^vhzg_BFwUbg3nm^i
z^uD8DF!Of9OYO`>f6n$6vbe#8BGA4RYUTfWADbbv8lT=RA1)!IXw<c%T)pF#S_DVg
zV#O?=?r+BH8m+R{jbd}+hO599X!}v`v3{WvnbES9M1<cUi?E=_uL<$UZNhMC+odwZ
zB8hs>B@WPpQDJ-_Fdi}%e-Qo&zLyJ<dBbsAw0*T$8VK&WrHlhfv|Gcio2#5kTrR4o
z4Xe`1QobcO=I*K^j`LRMVgi9O<Y(O<NzZfV82ZG3ka}QtN2Obg+e4?+#rA?ZH9whC
zIkt!)UeD6_Q}B^eE;iFJV;8kh=u($tV!W>2;iTY?y|#n{$nAKmi+Y?>mu{_x!k-{`
zCf_)|Syn2Mw&`{f?%W|~W>|9k(05$PwNxS$*XbFVk4btj32}^8#7@OKeL&_|)WJa4
z^H%sa*smE^Y8F#ym~}ENC%&cMgbS%kSMvi*IsPLJhl<{k0g&}Ko=}Ul5&$J|-T9^6
zy%^cWVR<kCyFE37OmIKidb<EG=Ycudqt0EOtWrKERE|-1bgy>mExt~b!8K1?l&^;O
zkP^r%QvVMEoU4xd&ah=_az^3!{&%TWH$e)|A5#%D4g6qh^|ABv$tyv$q5W|DOf|z1
z6PfYlly4OVrY&n>E^EIu%OQF5hXa_y>I4F_P%?=)AFB{j#Cf{Jq^S=VWGxzcF=`^6
zlRdRFk2FC*{D;?sR5_iv0Y%jJl<{0u9=Dk?@5QK)cLI`aT2+j7pv>!U0%GDjB$xJt
zk35e9J3@Psml;Ul=xf*cJyiY5FVo7!PHh9sc+J2g58-<T&<shJ&~G7XBT>FWT@RW*
zcx{4D0uR%#B(220a_9`xM$*4F?NHG#p%7jHYE*YNX>PgnxM_*1S!>RnR76uA;nm6c
zo3}D#$7=b5cPX)fO%Fk>A!K4r@`wolK`eJuXhSXYI{^{0uJ8%tHE(F!>q4p<gvA5$
zJ(JZbD4(==?&G&-K|D2lAPh5sY)FVQx_N)iLOG1`jP;HGs-c@d)#LsPH;P~|)xu6#
z{o9imqsf)%?`rNc4|iH+2wz@2Hw`k?IruAHagrqJi(McC;f~;&(zjY1To!C$X04m{
z94y&=+hM>$Ahb97Jf|VeSgXZEd-k7zG7l)Pa#4x9uMtnrFy>eGUOhZ|*VWX@W@Q=k
zuxv`MeyzPk>&^TrLX)=eon`m|;icufKs9njwW0iE#O!~43F(+y2Co^m#5J6L?i-G<
z0+-OPSeS8b3KhsVKBYyIGp5N!7#atNo}=T7(-2&t!|7*><}xf8!h!ABzwC$&Ps_Tb
z#{YBk_tHozmF>C<zT;@h*_49>e^K=De$%tlYX!tDN-YUGW6Rp{j^k$5r)ZCEgYy&}
zB`kT&k#+shOK!k_J}v~0Uw+k5nkNpTGS+`Ijoj*}rm<E*tse!>_Y(N&>mI|=PmMA{
zyM9k6b{=WK>`KP+Eul7o)aR3FY<>kpOvRPcuTmgWewsc)Xz2upCY~Fmx}OpP+$y(_
zG;I%%UO6}P=vdlfi?h2#J4;RaX8{e{!~)<eTSAo^Hs2%_q+YUubj;&BBagznZXs{(
zDl*vso}F>FA{%_(V#yo)4}X(Cg=K@4NFWNIGN^Cq^_vV*^^>q}ciFzx!P6`56VpkW
zROZegf2MwYM=Est3YVM+AkFG0sm4EBhu*?{igN=e%_W-SuriTHV>n`<(fy%x%$6E8
zq@GACz-yi~e%NXYSiB2F()2sQxFlXsV}{aSC@>aur59Seff3%$KbHGD_Rw;eDy{Fa
zm-m&MGQkfxo%B#?Y$l@RY#vDVgBi_>CyX(01i~x^oG2L{a?OB`v4*1^+dVO0r<;Cd
zi!!^H8iKDVRA%%&O*R>asS1J%s*6vZJO`WXDS>KIq4IE!J#g(~mqrj6qa7%@9s&G&
zc|da&WteAk?)sh9%V!}=if<#KI7yF~Qpfy}OKXVW_7SE`2}&M9xzJW$W!BBnq*v4@
zH&CQLk2dha!R6+XBo}*=UT7*spNDbl)R@9xi*g01T`TYCVm{9?<@giR+iJ>=M{>c6
zIJRup^M&V33sM0kSG#)an8p}O{yQxd@%#1gYY&p4A2)R?*GQ)%I8-f{L#5STweDWp
zv4aJSm`;z%4}|x^Ntqw^g*6Ui-g>DBVY~69ZW1NeR?esQc%d=6C=lLEeAi48%FP&M
z<eYRQ?Qkh((z4{jLj!|_qM{pM1-U4D{lj-EnkN{KIOV+-CHNX;6`k&}S6hU5sGoFb
zG0|b?Ym`&LCy#|^3uLis@!VEUX^O}DBmn{l-(i~)fB)jlPNqVq$x<}d2dSMMOibvM
zJ-CY6;uxyZ>@QUsa3C()<moN20-yow2lswfb`Q`N8sAApoW}F}w-iO4zpd2iMC>66
zlA8U?qyT-t8|6*t@;B5G-+Qd=&!mMk6TK<aeaRl)ciMhr5t;?}$N7+3;|o+UUMta!
z3k@j==SWYdvVC-<vmy(nJ%nKownBxDl_eZCE2ni<B_Bw80dpsE($@=}BVKHW9;`Yf
zPuk)7Cl99-j0^VgPawiND{W~43*1h+QIY+^Um}$S{-?przXc3|t)V3p56^#ZVi*Y+
z*%+Ar`-zEwos;qZn+X5+UWS8{jp_gF9&80wU%W+Uqa$GLzy=Frv381BqOh<SXVizl
zX4F(90RgdyR}?|8I2Xf$JdcC3DCXWdZ}t8Cb?&}xzTDt@<$UEl<(zfQHNyt4jN$9;
zqFn$36D0ccr4JZ~@<%~QPY)IbC_rEUh#(HZFYu5M=NRO36L60qsJ(qK9-`b=0cBf%
zg`!Dz94pw=k_b`28J?U#9RmU_5hzmHUqFBmLE?d)Xh9aL%=lmZ2GkM?U^yNljoVKc
z@%-QrtkqQ@ANu<l;wZoYxIZD`z^ixA|6zuKonr|LaH&a*voC8NHGf4;z?%dGPWb(c
z95giw80wgal;Z68m{kAziq6lev3GPD{3)Oi$IpYoH7%2T3hk}ZA}^(x`KopY1OTTm
z6wdCIpXWn@J1Mmp8zzj6qd%V^h?{E!*Z|rQxL^|ac^MC&Sw}yNe+>SI;12M23ztA0
z;LEjxztER7fY=9IOE_P`4BaXS9>)N_DUiG0ZzB#Ih{6^He-6vwGt&I}C`M#d%TseO
zyXx;;(5}@#_U6dhf8`AA*Ln{30?bvg9lC8W+lQ6%jT+V?HyN=dLc+`p1Lzp;t(lLs
z4I3t6#|^)~cjlE5=sfuD6MLf{vBjkqtNz&zkwu_ShgaWH$~R$-tdF0~69b9@3do;i
zPJuy<U<&(!;H`2<7$1Gjmi#Rhb33><7kUBIJk$o{1im#epC7>IW+jJ#U}75h<o4Be
z)E5x|25e&>pCS)-3NHrWD-ri^$ojYcJd$0YC-Ag@IcR`?o?dNl?$kU{qbO09*H8X0
zpGkjc2|OMJ*8R`&<E}0!Xc7Bdtezsy7(EFTP=MgUfq=b&WZthY^11rpul884;WC1Y
zMBWkprKs(ap4H2j5}39x93=Z)Z+lam`SXCE_HWoXq67+vXs`coF6r+-{h<7LZ^PH_
z@K3LFfEx$rH<{h{_}ec+RrbCdpReNF!?QrXxFB*k9>mbEVzj<5OdZ)2+Rgq~uP$p6
zs|U#u;)}V)V_@(P!@xG7oo!k*91f`Sn=q^I8my0O7BUXtqF~2A2O-lL0>YWE*BN6V
zpO_xWjk(1?1>v01({8~81jWM3Y8zyD1Yq^{R+TrvS)mxQLLNQ_Z)yv}?XDph6cj-m
zg%bqAb{_v6+&S2rbB$qvfZ)4c$*zPp5=GfJdjbL&)WvV~91~L054Qh&0ZPy}G!!7{
z^$+NL{>yIwk$_;Rub`8{TW=o7pSg2Agl>paUjohffUtk3I!C<(T#cK(c^H2Xp5al1
zMm>H*e%D#bLNl>H;NEF%o??E2ef88C(%503(y+vZiiL3}g|X$Nk1@O6)WD7Fd`oh$
zpzq<k8vBHuDd4r}MlQ{Y<gHKyN;ditphrz<L01>2+QWf+ljUhp`G`ipXyTkb(K6fa
zGmWC=L3)wJTI>@tv}~giP}7PZwzzE08ARh-_9oXiT+bZic=l;*GiB~Nq|@OM)qGd-
zmX{PWa)10X6c+3}CE-=97QgLOp5juNt;RWD&K;f#$55(S*kIF3m<77jAkT%=Y+qqC
zxRtDw>pINFE^e|JP|_Qm*b5UNY{K`av<@b#cxJ0v9Qd(OBR;iSaPi4ibhS@ddvrg~
zfJOpl5{IEM@K#nomeR}oG>ctxa9(lD`Mhlh5RG_Z*sETpo*BoR?P<dLoxz{VT<~3K
z{=pvNgHggI413S(gif7T@0Q{*S4UR3+4*+L>5+sojZbBO6@2~K8i%`1k7@FNp~hbN
z6~|g>j6%eo19IY*)eu?}6R%;X@m(GG7bo)3C8=8Uq-#pBuqkhNC02H9aw=89ns$t2
zCH2;6jT#2(UR=R+{!@d81`=2c-)R6nx7jm;@&k(Ao<%z1JAZ)eQZ9CSO@ps%XsF7U
z!rRzaH&6f@!;rr@@@tp3fhy|SbJ0y}urSKtL6`~D-;Tcg*~NuXsv()#n;`Hoi;k9G
zI^89ZzB57Jb_U~SU+thSj{hP(8ChoCPmu$MMP=D>nS_z>)>v_>Y?GaBpS9E_mcb;5
z3zwL+c}tZhKf5`ERr#K<afeI!R;2BZp;&mGr#mHcN+_7LKV**>d8Uk2w_L5!RO!r5
zj7bUQLUDWh0S21OK8f&5w5kw~t{@07BXdgnJ+HVf((D^2Lc>p$ys5CQOruo!cgW+W
z8d&@^RJMxhzM7p2EYGb`YZzs0Fj%<U3>qQr(Y-fSJIOqw73cIJ<sHLb>VN@Cm5O@{
z?KO9GWan@EY{(b}0FI7&w}=}3Xr-gl*@LBdTUHXN{BT&@1dv9kZHvAg8|dh5P2xG4
z+%<LX4NF`k-Ky_zKO;D3CtXVFylvUkGn+it91yVlN*Is=W0JIv5X@PPE9Wq@W%!u2
z8;e}ThY(br^^aJt+$)9tm5V@=*N9!ctruifA(a=h5~GMJ2l3b~o?83^K2JDEb7Rqt
z=+K&bo@wRDsC`tr!ME7U3_5AQs=zHx)H-Q~u=;uv`RJWFX7s79-^h6WiK=TcG=^p|
zltzQ=X!*<RxohW^FbW(Vl(wH3Ml=eBF)3`Tn@xY)vvPe-+MLmXbl{#DYN~+6xc%2a
zoul`gK*OiMk)XIGrZjR&J-mW}gD5snzGEw3@~%KXkWRlvyf4f$omG5J@b!AV`JLt%
zfsGE-6zLa5l#hXrYiE+0o{E8YddB-fNjoH&DbojnU3YWV*g~VUhqtj)(J$^Na3YTF
zHKPCZ-ZTV*EOuZ@w1EJsHcV;g02)gt@b8l1VLoajn&4a-A$ka{GJkKKfJeyB%@`ls
zQ;KrTk#q{=<F#7)qD98D_?p3mB`fhVb0=>^3z>c9ys)A_T=oxqG|TB=*z)&f*NUl0
zU}b<4WN$<7(+r&#dS>)6?XD@OM6n)4R?qi6@+9PQDMEMhm({31%VMI0%YzJ&NqU&J
zJ96N{N5r;SqAW&^&?(s%p)?}@6?XC$9K3L>*=zclC_#bLuqofksBNPQ$6)z=$p`b~
zS1#QiOr1%kWSeByxUMg_-n-no7aJ9yUBa7#KaU3!v9LS)n>%~Cd0du{n#zvo1~ief
z6m*<JteYwC;CxXXXXEkXiZp;gcPJQj!pMqlj`i_D=ZtLBM&mImq9tN(@O;^9`x<lI
z>@+>N!zn8i!cyTS9F>?<MixJQT8tN@c1d#^SB+{4{gKBjX*@9wRs|3wba-23im{Z^
zkz=rA4OdDab}fB<t+!G&s0u35tPNbne%@&Pl1^E9+hRSr`?OY@jdqG(B{!Wtf9!R=
znQPG<CC%R}trfoD`A&F_)vS8JfBk8UZElFL^`18>{B*dS4(|365jIHxMwOEn<x-nA
zbBV2bW4FWTgk@)enCh%vk3rq_rx*YR?e>B<GMBSsE_68&AB=rk-($ez)>=G93W!4O
zj7Lo!EAS>ZkOKY)=ua2EPSnF@4EujSUSjGwE3r10^Ay0p?m`GWcGv3F>*5dTsZ+B>
zsLNnvK+;u$;KrI5VD1->c6i?x;mKCh&*<XQ@|CKkk2PnS7@Su136l|f=<YIv>o+!r
zH-50~GG*jX+T!=Rk*t%GleZ7_4HGSm8G;p~!1Cl@k=59IQWM~;_Z0(>p?4+Cqngl0
z6|a7dTA|-qv=tT&YCi1Bf)Lo*8258-D5wz9YcD@GJZi)}9!+z6yzb2aWZ<)EQ`X|6
z$KQb0?{I&ylfmAF<<2n{LlrF0m685p>nX&m%>YcO#f!NW-By-Kst(g_XK-bn&SEx(
z5U%llXshi*Gl<R!KssAFH;)H3;TYwx>TIMm{u4(`<ouLOz2RaZCrf)B{==(5r#5C?
zM_sIkqAuCnCfo`pj;s3nXag5WEX@2Zl3kshj*+fb>NUfYmy%rMlx~RfQZrQ}H~S_q
z6P0u-mi)D}6*uOioo!L<(ea*4-^Rf}Pe_yuj^i;1;A-}+)Ym&d@bSaW?kUs*P1gkv
z$BBcdu!UQh6iIolp**cXR-~+&p0$sAa96&@mYnY<a^$X3?|<#8keI$PRh_!jbO#Fa
z@wWx<3&F!A_~hzsYcNXob)ug$BjQudcB)koJ%JD(y*g7+mENWBOTG81*CbF*Ig0d6
z;hwCgdd}CO9DNk%Ys@;oL(lS?{Hl)y6PS^Jb|BI^5|C9n>Yt@4tT-=0z+c?ML-1yy
zrMVUy|8^IXH%zMvb2fU=RMI1AaCK$bx(7W@4StGu#=qH1I!CLi`jo>(Z$hV*_d6+L
z8L7%I<g&v+o5%8^7YpM=fY;N=V>iimNjMoE@HW?#zEZj^;u$f16O*=D*n@0%l)AM4
zL9Nkd{<CHHY|#VMOBj9-pP(q>Sb2DEPnDDMW&W93v-xlLwrpErT|$E5WEp;?R7BcP
zM-N_bF~tJ21l{}jXP(=;t!TU9_r^W@a??8roxEeNOnQ~cYYHjAV#5nK1r=dC19MO>
z6@@Y3N{e;KPp;kl4zk!%JqQ%LBtAih4Gi$#w(YK-v{Wvm7dHyag<3_hXtHk)yY>VF
z?|t-e@EdXnV3Fz%mFcyt2EwwT2JD~_uUpdU$Q|!(-x1NwdNNZaetQ=cdO4RNN)4Yu
z)WWCi<g<mD22*k*Smiuw(Yp$M3ItFcS!-H4YTv?e9v)aN^$yzW?M!yWL6@sXzJ?;V
z5fR|lvxoco6}L9ic#wK`(vF-%X(Ed|-FeiFvFnt(dH<pp29cbkPPPu9!*iI{`Z6oo
zb*@DMge`VN4WL9Qyzc48G+%gI)lWuA7TcHe+~(+SOq7v=FJ$v7{4v2ba+gL<V);fv
zR&dKoUyd_|OBeGCin#6O;V~#OZ~xvgkJScx`CjT=_WM*QJF%EUF6|S8d+;gkI9vF6
z2jXBU&GPY{ezJ4#dp@|=EJe~1sG4sbTB#rt5XAv;oUpn|%m~_v7_F|9Q`LDZ4y-K&
zj)aYds`}CR-K+i)7@4SnS1<RPk8AgD8s(M0HbimJYRTC2K;$0LV&i`9Xi3m{GG6l6
zLqwFKwsqkE4qSs~);*1uscUq?5W0^~7`yxCtbK1lq`fCXTgl~%kCGtq`NSYX0Ca(C
z96FP<DOu>7xUE|AwoTzAXcZLh@=~hX`1K|obGR*<DdrOv#*P`NcOyC8<-BUCi`qaQ
ze2@!bPZBx|8NO))jRB$T=-@)1)cW6D2N{lmFR4<>*eS5?FiGZqPr%)7Hlgkd)Ue*Y
zyFkoJKG*87a4Gi&4`{hO>)r2+iP?)h^H@_=qHR*P9o~<6A>|XSK4eQat`_o|L{_DQ
z?>Z5-rerrBMo`43c4C`Q%urFK<40m-!8M~OWW+(@!dd=0Cm7J+Q#n*ix<C57E4HT!
zD*R(Qo_Y1bBr3I5%hWfRS*tj}aKY4nvl3^Z(<Yq61I`5#mNt~B2e9OG^n0|%2Gxap
z(VNG6sP$Fux?|%X;?Ds4Ul|+kGO5uNuXEP->?SGYtMRS4)xYvxCKpus@Yk#Bn)$&b
zsxTu@=6VY7ONl-i_Ec|*Zj+EILNL{xFON3&t4C5f0quYh?h>_(#vY9?*@wK3*^x%R
zy(ZRUPt)OLNkvhhuRZ93LRuVFM$-6fr~sw3fexp}9eUronAFV937mWpHYWF-?Z0`C
zt&IOnL5wS24x{UWf{n|JxPf`G#Y?5$Gzc<@ffG14Ud<=Yys30mH3oKBtpfP?F5u#v
zi~#Dp4ZmHn?~r}NrM`}O>{yeE*OfBo<fA!MEPQE*CJsD93p%k^E0xLPwD7ns%Q6mu
z-r6X?GeIM~yQfy~TcLJ$b-t^)5&T-ylD*75*=LMR?J3v8y^LeCVYdW!CCk_M&?(%~
zik%!_g~Yas7;tpO6~$8aU+3=l1tzejH6iuR3;Y>14qAm~rv!m9qS6zClwZIANe+K{
zSx`DAa$6G7ls~!Hl3OYq;YpaGj*M!f1#brO&MBHQoe%mr7K95CO61)AQ%i?)S$D~R
z@9n+f%D2LX-4%{b>eKlwC8+20S~+XhpnXjLYR)k#Mp)(^y|_1F?^t87c(FLHoDgLS
z243y>xk13j<RT|a^+-1`Vud@~41TX{Wvt1)OOJq99!iZytJ;u15#8SM^{PJuf)R4=
zC2P&@j`g8ACCNAd-QK{ch-_>oX+5kIE)UBP{Fv<f#(|i{$}8VyMtMuW{1H8BltJh~
ze6w}WR+P+6=B)7sy!$wut_UCcR<C*69Sf`za^%S@r{b0z^%fHs>liD}1BBeI=RFVM
z_Y8}xWBUd2RYsMcU{c$^C{%&Ba5xLafFQGiTXAZK{_zes?<j#4xgXqCI;8Xy#KMKf
z|8NxTgGgB;-JeuC_WEF&^(MWkCe7P9`?ka^kQ@JLEON)7WMzC3*g}-kHqAO2Q_jvU
zMQC}cCFp1{wpmydm~E!0#$JLp!n8I}?JZA#zeSK6v2a7*tyYHtX{R30J6T8cdorQt
z<h_Almqq?`PT5dzrr+UBHpyelbwV8GrpdiUA0R_|c|;Un6cn<OkaYWFkzl7j21I^|
zs%A}*EVxJ7vO2n);JpNpMZas_NNkpdhaLh=O!cr~i$Ux{I+6FTY7s@;ikR-8B^D;7
zlOz8)N@yGE%IKibS2$cRd=dz3=A-_!<G!e&JV6Wh*M7^#N0bn!L4I>X`QmfeD-(XH
zT1EhicRYqLmB06O<i;rjN-Q+g>B$q^<uHu!Iyp(MxX&ShZK$z7I!aEHQE$yuxA=XV
zVq+@9;fsfYFZslR7shRdRde5|2+GG6TziVKC{)I&Sxjaq_vND3)eP*_=O1fZH<Csx
z3)h-xY#WaLt&2~nuk`Q~=<i$xx=*mB@#rHsTTL>>J3YdEWZ+3hhfcM)GSLnLiwc=m
zHBFLc0S~0%WEIw`_Lcj1<84RjIhpvbI?={`>`|oHy=wCp(#~TB>b&~A(YG)czrnb+
zF|Q4f#_X%0EpLZ>ANZf~lN`v{`Csd=QI&u>1TLhOnn@7&!t#2oI~`KjSnTfg2%?<m
zmO@dkYjl713#s?cd)}q-hg<|BhwH+Mm)u47@GNB2t8;xI;KK;S=03T;p2`kLKS8#<
zcLP|W)5vzv_Y;cXMBqv(INKB88V^moqJ!3=;x&$2?It`%etU-lJrRh4+p8GB-32-O
zMtPn-{O2=<H<$EXC{L?mRpMqp{fUh^T@Q6ZZhoR-FRO6q(fmBEcabspS>@26IQ{0N
zy{}+KDHBjtl84+fF9xQG?S@g5e1&a~e0DoLVOhz10)zi&3t97bhG=ccMoy0JKr*Xb
z?A4;*KNi!jrc1Xc$;5^DtbbT%FznODhuc&BR77s&bW!vIW-78>qG<7kI2Oc*Zh%3^
zCi{wYV`!V(318ca@tsa3y@-C(uu`wFkDhxrw$`O5xnPyCpIqN8USJ1F|J}sVnG=tm
z+Dmu^c4}OS>e#K$f)7x!>h;gkT|m+xE&v#Z2+LN@WRsb(5O_G+_bnUUQ2V8!5OXL4
zDhp;qEh}fiftApv@FnmSFo_PO>QvPF@w*L|^yJ!(kHho%`Wm8+#2S9hr=a%r1nD=`
zu?x<x&kB;TL-d$1Yqucg{oOK{#lDuf9*nrYj27=Rtyq<jKT+Q#`K<;%z9@+X+yhpi
z-TiE?qU??7Mp6!(aRQH5+tZo(DOhOM7Y(7JPtAx()1}#_e9G}FwK_FpmIs*#^Ezf`
zG23EHp*oVzs&|R;Mf2r#pY>K-?dtK-t%(x2fx=rOnss1+twd}Q-)N9(AB!IUg@ji_
z-77`OILUBhdDms;jMSwn-TlyeYm=Aoa9eIe4bNr$0<$JG6nRrh%fu5!Ea_o*z1##V
z8lEm{n8~gqmr;8Cy_Tr(0M#oCQSFK~v>A~5TiPgy-OON!7mHU&;;|D;zu^(!Y;&pT
z<yf$+;I`<w>PA7m3M951q{0hTt3#-eCTv%uc{BAfM8L`#u2+O<eGV^H^2J<=$}7cP
zYHi0=Z>d*oH`K}<1?*5Ftr<sSHCYGG+|?9=dF*?`rmfKyo1z9gxuV-i!pEXdoDUQC
zeXNPn5NnZ+0Drc=@9<DX;x%jNwK2@Yqz_q@H%DEXvWS+rhq(Q!KB7KE-6%665r)FT
z=%Oq_AWm<1Xzy3R=7)b=10=@s)pqWEg>9u#__H{LQd__3?z%_xw89o?VnDquV)JA6
z&C~e$D#$0lx)FZGTnM&Cnb^oY-B5`4O|#DARTheCNgR>~8;>`34h%+k5=fHXuIB^G
z%Q=q{{dF`gUk)&(x1COGQGpH146lSPUmkInxgO!E2M=8AK9*w_a}?hk3!7dhIi_zk
z9W9;U`@!>I|KX~b2)e2&5rwy&aEdh4LG80pk8Rqvr9?ADH_lPV{&vBx?!MO+r5*NF
zJB+f-BCANxZhcy$g>UehdFIA-Qf!~oy0=BiM@b+S_Y2&|>ta*-%UP8=rzh25kkpTd
z$VJ|ccWpkW^xM#ZCz8P~e5C>CJdPPrIdT7tXX?Fc%rImP(R9=ugTO)1q6!!FlO(j?
zbaK$LEsJs<wQ|K3OSOkka|@eq|ByUV!g$NJv3h^g9<1t0qI`545zOrbxspu%d*|i`
z@LEWDmW+t(H@KD1hIVp!xK$=}XAZW}dh&vPUGvR;pIFb-eMSxNt*oUsf?pDEhEgr5
zEShYJU`MqSb$xW3D17aeeD4gV&5ml?pu{X`=HHV~K)ZFcIIX>N*(CaN-VN`uUz?t{
zIjy~Mf{?b`5t)s$Rt;X#^6f~2OcxbKsy~k9hT#ys0~om=9R1zV?egw|%m|~Tw!Pw}
zEY={6gw7t&NUl(``i0QxsJ9(N8EuX$_lJXsUbYH>Nuz%+V6qqk%PO}9q=-Lm`ev?q
zoysM0`YTzaXXZGBFQ1Lp<>9fWtbMQ#*?4FuZ{HiJ%{<idWQ+}HSg|vwjNuCa9Ob{5
zgF7a0O*t#R_71h7rlS%bJ*VX`W1Dq9Z$DkREF@nrWL~B3Bu?l(hEc$UwjAH$BN5##
zQ}~zQ$Gl3A?EDKVzw?+9My@w^h#X+mbA_ll9E?REJTD_7o8Neu_bL~6rB}rM_87md
zKb8#co^)Vgfhkq?(0cJ}KH6(8<^<pPb1A8z#ikF-*g!N|#^ueJo(cug7?Ts#<wa{B
z%SxLKH%ojVMf#?n7;yKFwmtiY;m<Z%Uk_e=VHthrf?FnoMub()tpv{%vEfFbZ|W6Z
z!{<FM8#K}FQQHM7TFKWNbxxia#coUr(iSx$yy9uB`u}i8i|0n5)|TDrNagxmw6_{e
zF>>gdb>?&_>ALBMCcacxwX8Z175)`tZb?=O*N(w9+^}DjpNh*LCxcWJt5IuM1Af`F
zWJyBrWopRPNnx&U`MBzIGz*`~>hKaRe$f4}3<n9jqb=;@9q=||M>p`PuUq5($1Dwe
z3MZ!79tgErZ*hW5d1Ez&x^jccOV&gtiyH+{BXa;T>yqF(O@c0RsF}vcVCX3}*n^LO
zpCV6mjg&;hk+IEa&-mp13avgo7Ugen-Olj75hN%5$P)Zy`im5vW)RakNkz7B2%O>k
zA_)czhd$j05ow#5=x(@w!;Ut8o7*#kUll?od7XZ%N{&$Q`0wGnT?QoeOq9CgZ59|$
zhv_tSr!&qN4(kf*_>2iHct+%kXXI09x@+kyTa0Rn`2El`A-6qqbZ@$buQrO&n8Atv
z`$W_yW1A^e^|VZxQza+ppE8^+t15~ot>dbNP-j`gILxt^>2n!*vV$<6rt|7sKsRU*
z$S$Vy*KC^HeeDTUTtc1q5>r}XxBzpFzWK;wJk2dZRQxf%6PmiD+Uyd{CRlqnXz6tA
zE50r!w<eKvj+;8^`ti{{aMK9uJ8kBqgp$BqDUlNqxOoa*Y{0@>8CHY6xM%L|yjq}0
zMuEi>TfDd{pf&j2%ot}YQ#0L`tsaE&NwpbzLw)R69kc{Y8_E!)wfR{k!bkw8$pF3w
zikO17{_8#W-Bel~LX`CMyqz{9sG@)PEC$_XvSgl><+6#2ZgJ@yd562%bV~`IdvI}y
z%lOr`);+UGYOK+t5Rn+>5pN1c?-;+(T{xLM29kjt5;u40fSJM*b7bmiUO*x_1BCYH
zG2Z)N6oDbNiK#_EE2NP|jjG7Ke^I1dk7R=ylL4?~l3sScYw>kD0HIR7i_y=;5`t+Z
z+Cb*paMPI|4Yd?$-C7A%ciS{Xc4L^6iPRH8zb+)xmZZ^ctxO_EhBZrv5@}0nmKP;<
zn<MR=PpJ9y9a-btC%4Dq+i+v%q_xgpzO{3Mdqc-#i!}sBBnf%sZHir9{NM~TDF9lp
z?r04j8{q{Vng`Y<cZuTfF@&}QllSX<uzer7MIeyvvs@bOjwLQWc1&?WiLbqAejOKZ
zZnj>^<x>~}zkqPyQ)DS>;UG;u#I)JqXRbQGHl(-eUJ*;CgU=;m6C%ZD3_kS8IYPUp
z^vr=%P2u6?gwZ}zLf#-_#`O4v!Sl=aS*Kc~I+P--M-O1l$i>@ikJLyHL;0G{j$P)c
zla3`Aj+0YtVUtMaLO_LMc*!!)S<NdO)C3%b1|<F(l4}DG7Z1e9KvCVDk{hc9r-$Rq
z(vVc2=X&J}6R8RyUtqB4z?(MmTg6?E;;oc1dWA4s*8v#DG==ZGmrN2{+hP+<!XGrA
zl^-{tMu<BLv0~`C%I)s=>x$A?qC*=(t#KN5IgrNW)vr>HI$IjxpU-LDpNOw|v!yDO
zbsN<-<(B#F)CV%Jn3%t1$NQt4dPBu93Dv1P5fVNu!90c;v=l{;&vXr{)e7u(bQZA2
z|1z-XqM7-P0$Ft~sk%4`PS|V~b|sY7BtnbTl(4Lck9R&Nq8r90YMk2^iq9hLC}ejU
z50HL_&{s;y80l=3$3PUnGXV!}nk|=bkT$K|T8Z92{Y5*Co|`{G0n)agMrUlOfWhhm
zcHA+C1KzJGGUZj}7N;RIr~g!800Begxcp+mW839UV6}$;2D0fHevM4f9mPNiuLxXe
zeI*q<5#jYwLwOh3m_P091S;aF3HrL2Gn&$O9_vMZjV<iY)o1s7MSL9{;8G0s9+<hi
zTFyq#{t8*`XT?;)xyjgUQ|!BdvmVJ5T78J+OF4l5rr3%sU1d#apNP0ul0ymvZw3`O
ze23Cktm9*{#@gyX@qHSx?$Q8{LbUsvxH@wD_sJ$J4HIde-s>>_Rk3#}a)#&f4afZ2
zdWe`zXNVLmqSu{WRmX}cX|M{T92&m@=aXq<@<@c<p$i?dkcDYtUG+)7MTo==0>w9Y
z|I)x=D5y{G5tGH)z@b`U#h2QJ7YH_(S4I%4lNaE*yRJ7#uZxmr9y>n^6CNwn8oL}5
z;qpb<IU$t|xY;D|UX8cTLY1;$8Ke4sxF1x~;y!x2D|TE3vz<=$;cHHD<@jyJlPky9
z-PL6cRmr1^MWL4oMOG;C3eGll!e_rzLUzr11blbi0ZR*_bmz1FHBHXR*z{AddP<s-
z20d*Nm}Ir#AV9GPF!LayJL)~D%h6da*RnJN>$^PGuB#yroWU_)CMWJ<$Av(qaOaIv
zjL)W>Y5#OEp2Ni5tf*-3rxFQnhFg+m!V_(Y$jrMm608b0man*>_mD-^GdR#|^zvYt
zd>x&sD?jFUHL8_EutdBXA1is)Zdv`ajy91mMcoiK-tEP_#~hiX3j;au5K&k&$gsq^
zlMQq|3-ZyU5P82X1jgchYqKT&WiIz+i%Zm+T;}>`WmmGf21sN*T}wn5M<A-cBpjtt
z%5QB9W=Y$e@g$ikjuX=q{j4v8#Ue5*WF02G+j&xj=tjPg$@+Z-`WX{39F#^VV7hk%
z);R9no#QptA_oippEYJ#S3_x#8f2arXF=YJ=X|3BVniPY*blz|o|4+u|B1X|{ZHf#
z3j^!_kT-11tpAa`VP|IG_&<|3PN2$3e^==OB-j%o;I|osLl+3P38xvD`w*BJU~mJR
zAgCy^kroI^VIzVhB&4OODYAk1KR$eJe|=7?oM$qcbKGBcR%<?cYC^L$R{iwwLK=cA
z^Wrdk>Hs95{wy&u{7D25ATR(#kO+&*-bV1Z5Ba?&H)anh+(x7*TKW~DDA-4sN|uKV
zXa};)kMrvYWd=Y4>oZi;x2HsaK={Xs{DKwCDf%G|Ap-(8fdDw8Al=A~)r2`b3ld-9
zGN^g~_<*+MZ2_n&DrVifauM7Di4*4`kmP~(bM)!da0wC4_yIyi_!)Wqs03Wr0D~P;
z{Xgyoz}_BRAphE=?Hzy~WCU<7v~zGi#}4Dp%0vMB0O+?d1AvfQ|IE+sms;PiVbD!*
z&TemFKgw?){zi=wPXYr6R@)gIW{Cw5SB>L=zV1X1fNyVhA3z`Q&UfN_{6_`C|0^5D
zUr2$j2B8QZ$O3+iFl*m0CZYsj1bYx%Ag)eNAgq!5f(ritQjkN~x)z{UJQRTH!2{sP
za*+SCU3eZsEvnOxd*FRFCZKmh8@G`OsU*z7-k+H9CaCvBNhoiBuF~-p@yoGYP9h0?
z;nsE%&Od1VTNC(XpUON)q|Lo=Iq65gMmy;D?P?!G00jg{(vJk74IAJ!gbU)wQvd7_
z>K7N}*Fo?4U-2`-0raY7#Sfg|YVTj8&*ug~p2JVjvk&;|$8~QPjSvH-58)4tK6I-%
z0pxGF&vD4+w~5|1Kh{2MT42o?IG8}M&#!x9mmvmBu+z&s_|InqXfvwv6pNGFZ`D_B
zUI5Sp_T3@4o_&(uKXAscf1tpx4Fls}@{>>p;$W5_&o^N?&V@qYq1L???&3C`_czY(
zGFGU~-*<IEOh*A7&hC5A2|A7f!s8j>*Z1+;b>cVrq_^UWKmBJHx!jF|`!~k%8}yC;
z00BLyaTb3F)tQ$;jcHCuCmis7_Yd-$-o|pkAOarUFRsd<K+O?BY*TLm3%~jqJ^6qR
z=ss-dIT6F3&|Z$m)ZMexjVwfoHo6Ji`^!(Dubxuh(Hask=Z=q%Ha)~gR|uctJYRMd
zF(HLIJw;?BRA2&gmZ8wQi-KAjg+6$%Vo8jxPxfR$K|m5H$7CRFS`2`-{JEfB8&nu1
zP!PmkFJBo(fw_BpB?u4*k6*UmWz-9QF+IpTcJ>;vkIkQI7mh7#IMOUEalwC>Or>yU
zEcG#F_nR8n&B2cZpAW$hf1Fux?78BParfI40r7XESpCEjcU?rXQD#fPrv1s*P|ohd
zB{wS{QRtVe^2+t?8XsmaPZwLpo~u)&Nmdmg_2^MBP@*zn2ekC73yeFrKh0Fnq+j+_
zZsW(baR55_Gc~IHbDin+%FBe>c?L@>Ni=)!nbFSL3r0=Y&7!@jw|zLgMnrR%)PkJJ
zeA}jn(A!jHfLDHJty##y_H|5}zVoa&^``Yc29(Kpj+PXW8|nyidl7#l6mM^_;>Y?<
zsbUf1+{AvJoRk7iEX*z8zqBOJ2=c2N(LW|y%4-7LMWomL&&eB4U&BB_R3I3;#*T?k
zmCgInN#JLaI3{v^!5B*{b1x591(e!J+@`}OTaf89;LsFeqt+~Ih8Gq_iF^~+=n323
z<rJ)(sp+!&HB>~DNs@@fO*QNpViPI(V7M@Kww33N5u)kDiV1w5^-43#uymnKMIX6h
z1Dy5SEx4(Wx%WumV#OKh2u!;hFr4vB(kshrVsCF#n_3qj8u_LJb4NtYN~;EFlC^O3
z{X&mgy?wagYuilo62uA)<6nw5L$7VUpJKx;g;|S3?-}=!0i^RY_iGzMVy|IKmDmsX
z78QAX^fyhuroMLF!jF%WEOBZ)m+An?@;)O-W~y|+z0s1E$-wdSe4V0s0<FOX?RTIv
z>&Y|V8uK?D;HRX7ArlWknAc}zKqD8y&;ZoG3LjNa>V{j!P4V(8l4#h1`k`hsRboFW
z=R-PJJ?cJ<euOn}+?&qra9=!5FQ)^RLc>oF7v|nKEu$Q-k*BZ&D!W<CK8o65brKH6
z4jFLy-wDfCY-iy-)g5@GkGj;J5NTv2*^^B!=z$A#55>m*4IaL^Fi@7eXwtr;USHua
z__eAbga6`_^mc38YZ$rfTM+g>vMqsG+X7S?mCq`qs<xP!3ZPqNunzNgKB6xII{X`b
z2Ki~IMLFoj<_xFK|K4{(DUVz~znSw9+~RXf)$JKM2BWl%1@RAZ3y!e0Q~`zV(sUSk
zKDaQ|RCZMS8h(=Y{^BTL4QKk!3CVJHu`(U+=dI%uMWP}GZ8B}1@%Gx<F*-RHdUCLk
zpjKWjR8Ar<v5k||zA{oPzqdKE!A|oUWA%gX9?D~dQe5Bq4WPITMY++t9}t_rQ*+M+
zk&64=^U$_DS`<E+fpOfCiH4KB_9b82VT;Ip^$q^+W#EYc8yEs#z-glvCEzGGdV>2W
z<@YonElJGN#jS0|)i8*pMRYkh<sFz!_2Heq>mm2>-v#?t3D4aG+TOpiZVV~Fp0&wq
zwg{F!p`1?gr;6a2jc2E-f~>wLi8lrtX3AR}DJiUvV*y4`&G>rH=Vc12x!>U}F}Cii
zHk9~~2hsG@G1ww`MPASg5y-6@K0B55K8iN-)p+BlFl?`#{BoPUO={PqFFS9Q#NzcW
zE^7<y5wm>ZgImQHU3w@z#6p~YM$0pJhv=?2EBKpqQjbHomOB>e8!Q~Fmi5|fwIo_Z
z+a>j4jxw$nr4}jh$+le>$Ru*!vk1EBRF66*<nU+nUHrosWzYuDs4en7vP@Wc)SbLq
zL-BTl#Nqdj)eIHq#<EpQX}!W+#z9h|3abqAd8Wjr)D%`S<Gsm!ey-Xqh(Gn^#(1%V
zqCO9NmHQRBlTp6?BYDw1YI52Z%Bjdk?rY3>q%h>)ocw2Gu4V?bwJ6nJ(~o3mq2=Z&
zmX!yt;6HUQ3WiRA2ib-!q5d?^XIe(Bp`oc^EZM<EwV_`NBQJ;`!-)g|;1sJ5bd~WN
z0#ewJ<F74o59g5}V=S*;F3nQ;5r0kAF5BWFW7EOWmZ8x3d`gR7k20*GZ=r11v8#*I
zkW>mjuC9jLESNVQW@S{zT|fMJMpSITRwFr`UJMi_$5EI^-w>LAF$$n>_dL;|^05OV
z?lL~6hKds9URcVPZv;=a^FSQNY~Xl8M+$c!>(h?u_KSxY98bujS2<ns7xTh)2~J1X
z{I~X>!n`d`&{Xd@z%ZXSe52>6e|kvRemw@9U2?c>MOenhJKlO^;|kV{MkX)}^7nCi
z6uM49Dn058M%wu;eEOvLA1E1=BKxb*|IX@R9#39xGCL2fb&227)?H3ANz8aeJg};_
z#Gl5hb6!c-^I5CG88vXO_vC`nM>mn_3TauEDTI*o$2^Vm`~C>uxR+gew{Stn>^v2h
z)npv~Fn4w1Zr(y8w3ZKzjr64bY=qf3ryT}HMzCQJ@BbJA3LguUP}X9!(kQLU`Bbim
zROWmHwNzDdH?403xSS-^jq|B*X7}k1MN~(xl$?fdoys?Fqe)ll1gN05(U75jRl5U3
zNiV$=p4ORG;pQXUmPp80<C4;-Dg#FbCS}IhPYuXOcUYL%pQU%QPP^}RFP$cg+ClLv
z^1}Ph+Fp=*b3#k1JM14E?7cCd8_oUs_)aZ4*VfV-fkD?D)>VvLJAiR)JB;f>lq<@A
zaoY+4!oGElW||X+gmfs??7Ff0<WfGvend@@G@U$XBfNjY)id~xSjB2?6l=k@_|nQy
z+I*_<Kuw)%;?NBqPg+;}DV*$RGrzo+Mif6OP5XcpOsC+DY63U4RGEk_jyS^zl4x6D
z%EiIzOrk?GdsKL`A~%v&hHs6tra5k+iX5lD<qk77I{Egv|69oLLpvLfU~;o6V;#W<
zru=;}G(yJOwI{OL9{{Z|HF7fg7-7ZjmKbUVSFO&AP2)hsV;q^$q#f@7wAFAa%6N6C
zYEUQ_sBw%<2=1uW;n0Qxk1oA95mtI9GHrhZdTC3OXhA#7t8=LlP0>k)-0@^^1g)Bc
z@pvU1nG~n^2fVV^Yn}Tvz9wqjwOb^o(fFb*N;Ni+I_Y%@gp%)L)q~k@Zovb34nEqJ
z`^Z}Vkm(0-IqGKA5D4nPy0f(3a|Fkg_!F@Ky&4<~$vB17Lhn>cB9OtYNjQt0;)^eI
z6DB{PnB1JrB`aXEN#2$PzmxL;6lrr;-B=7)w6SmK9p3B@lHZNZQ|6`;LxKZos!i9n
zNpgv&)4jBJZW(CK2lohjkJmPqx2rfI)M-{`yMn-NL!gx%m>v@Bv%c#-2{r0Bruj0E
z5lOlxTCj!8z>+FLm*sm0arj$`)2?_5>uw#p328Wl6W)yF2#My>2i0&B0yX@u_A7Sw
zjx8^zG3^ON?L}c|D;`YyBh2<Ze40q{(h5L>rJGx$Et<Cscr!Mm&xg|W^Bj~FGU#A)
zV>m|^mNv3$q+z;^P`83@j1Xwh+QWFfQx#HJmC@*h$qZKttwpi=&{X`v;X6&`0cEfU
z5YJ@B$tu~QBl`EFN(>)3HYIjW9&YgGR=np<xn2kmUV5ny@8Mk3Y!)KKAMc2MEmnPh
zpWzMiqq4QRo%jTB$GtOlT<h0Tyz?ER%#W-&)bAF|gSxUFXm+oPDB3cIUcN{+M80@n
z&kffT^?6Mz2Pu-PGV8+0JUV-ce>XlXuful9-V#-sL!mN-gS|hZlR;lzV*$LMjam-W
z&E0>zX1-})nLQ6+1nJ~n%x-(s4UHBOfp;+?nG}84&kP=o)wxo}k%aL|M7GH-W9+`#
z4YnOYqJXvbZXHG58A5Y`E3=LTA>VVng=ZQ}ZJ17!wAV`ArahB51v`I?4MK;4jOC`D
z+Un`g6a(uL$|%Cp36Q`Zc|cxLL$9Ak`6dvEH@CYU<C(L;?wpl%xV$=h^{XN(Eu%DQ
zxH8&WqNsfq7O4#?V^0@chwf|KAIzJQy>c1kM~WJP4y^+sm1CnMK_zdk1RFfE-bR*B
z++?x8{HpA)JrJ{_Z`=LfD^?qb3j@DVma~oI*CigRYoUcVgE#*^_Gs4X-(#2viOfC~
z+4crcYB0SXKbbG>nV4>oXc?;FpQ%qn1Q^r*Tq(%XrW$tJYMhrXF&1-@AXQ1<1{C{<
z-!41Io7RjX&Ro7`?rS7l@19vZa}eI6I-;I78uolmG0{V8Z>6f3a7e%g54^57Thx0m
zd?f5Xs>jBnGR;GSC>F6MFmUv)bbPlIiazHQEi5o+t?4XQkD->P5>TEq3x-`hMyoZk
z+HA=~yac3*H6tsi%4D|D-LG@0bFS*1(U@p$1k{E)DNV%%HDMxZF(h^br=T&?$a(jy
z0~R_QUU(|Mz687Gj%s>5i+*(o@p%HoGhw>2)$Tg`Rxey;Yo2$PZ28MZj<0U4lEz4b
z8B4Ne)bml;b~C>&>OD^simsAaTy4|KBn&%|>qwws6uHw(tu7>kea0|$GhzCaYmRlD
zG3ofL6hB^txjmy@)_1T1<~%u4<6FAyi&<Vz(7~gPEKK3k%U_ZZEvjrjM~5^RIx*?E
zp1)z0x_E45XW561XIrY#!C>V)v1eA#j**CUYTJm~u-raVi$Ga|j;gqIC(O&`KDz(u
z*sdU58|;_?J<Z4Zd%x#>Bw^LIUpR`4^jsl8BjU1kvza31v&pb6z-gDh!K)`<qzc(R
zj-8!C1p7BN!YYwwg8TZF%Fub*2W&fI&&d8PNfMJTAuW<^8n%CYP6Mx^#FL`F<z0C^
zf&GJ5WObTkt;6Owv)ML)wO3yK$%!jBC}%flQPiwK!ua&0-WE9>t#C$ct=>J0_V2(-
zjHP^M@cit>(;Qv7a27HArTDoddv>u4tvAqkFnyvx+-p4>h_im`rGYfdR$NrQD<9)4
z6PET(^yU)D8@b+<wfSG1*R~CS7Ix0_SfXw!Wa>4J@!%Ei9-X*Px+8)6SSj$aEn{0G
zi^L!ZypM~JKTyeF1Y`NRVhlz6_%;|^)hQ||JBu7>(}f`jYZH%UWyxPK3%r(mo$@|J
zr~Fy?OH$%*`IBouy3Eq1Xb{$6SGMqZE-_k;hzGdifa3(CuaYg62J-MQ9)F@L#1@Cl
zwnf_IXCZ2XaiIT-Tk<pZvT?kBsW-4U9eQS9GCik-&|bN@wj8eWQf9@2v}wSf%yibO
z!-AqVlq?08JI%|(?a(C*x!h^@t{S*J%>nrZ7JOV<-z9An<{4{y%m~2?mk8f6ZljSy
zC&ZF4$w#C(x)b$BJ}U}K-~su^P1u;eMxya6*1R>nhP-Pnbt`LSF61<7T1&IBKEB_1
zFtC427AgJA8BeBqTsH_b)@@LwVZ-w$sUWwwnk7-x0Z?FbqvN}|^fUHrm7RaW6h#}N
zBzTK1rjoEFe1njT6vt~Qyr>GoC&%1x;EGBf+!Cx)k7f>9pGJk+s(owgeuUJZ5l`LK
zSz8r6mvTTh*@d2@A8%oV%dqEPyKwnbgucdbytN5o)H_oP$rf>$zWt51e&)L%J{AB-
z_HLa0Axs~8TA*W%c!3r)=npz-HmT_T%qbb=&N%tDhSED0QoFyT*sV5#{u}Ef>I9*o
z+FHGv;+s%b5BEuu;#6K!-MzE@SiIy~m}(+ww(19;Vs{jv^g;<>D(n{WUyPkoj3+?X
zr<>C@r)_iEwr$(CZQGu<ZT;J}ZQIsPHrf5&WZ#RutlLVZs&bxlp5JSUMJkV{v%2$T
zF9k>AZY6=djXUN*#Rst`Jx=@mRK`a8P0-lB!qqs`Dhp9>tDk!$NVZ#MSKB=ZX;!b~
zE#|vNzRj{ve7VO1Gg^JfIrHO^mmxCV;2-ywSZ~q#6~d<@n9R_MWNz|u$C=fy;S$rj
zup;$>y%@(_t)#l!FhVe^Cy;S>$r_eCQdyh;f21*)AGUU+WJj`hK;2p?EU3w^VLL4a
z3*PrS7FtOu7l(7oOI(N$xVMTco0-Oi8D$pjKBS!`Uv~Kn_O*xcpo2`$V@o(Ev4diZ
zP7%YTZB)j{@F~^$EfsF0b2^9lG(q<m<_+0Iz0ckk#Tv+{hMv^Ek;N8U8B;zprx_8&
za|^FxM|&`>Ke3UsIMZ-XiFI`jA?i_HGSv7}%)+eC@r_Se8OM88o!xErpjlI@@sPhW
zTB8X-v*NbSN^L>u%GDwRAd;hPahOFT2_vBpJyCvzk!4^w{ECG-a!nDlh-=W}d(asq
zvgu0L7R)%H#(t+29>?f>tK`U20rm(GM!I@FV=ML|{Z`!HmzzYp<o>-On*t3<WwZE)
zxG0~Bc`2@dNz<j#a!am7xG6P{j6CxMBk|GGec!<fn%Xc`lhes+Urj5QGDWKuka!Sx
zRYeWI`-`5n;HW=`aXd4u7VcSzj+ZN|s8f~0#8Uy%W_W*zC!Qj+CQb{@SK(rYb5vrB
z@Ci;?elsw<(9J3SbvvAl2_dsR6}6NT!ZcTwUA)`b7Y=dH<QWcO{elSlp-Kw21Hd{p
zE(?P@WUx}J<e0RRVih@_&)luy(AoZnO-<wGtv|5sJZuGuBwDV_SKlVuZh2XdWdAqC
z*X<RaRasobQT!##ffB%YvPEZXl?v&*orV?TY3r$5fx5`OM<J5ezv=w*3%&VC_yqMt
z2Oh>{8Jd_}@sZv4G7%BfmF@7L%hgqhNxs#|TPp~>@1Jpw2^xMQS~?bfZih(3Q>NH(
z>0CBT&Z1>)wf5}Kj*SKuNvA%)P3eWin#yj1hog-hz6-0y@e9v;Zn%;wPY7CPUTsJK
z`<GE`pBUERXX`hDKKk4~M<bytmdq*2EaDCC1Gj~yjIEJzsL>C^!<?mdcwn?b6(uVh
zvm#@~FU#I$3oKz-On6cq2WQ&|3+8pSE`|8VP#o%otga~&x)3Ygql-x~HU%Q7WMW}v
zypGV$T(KttE-7=d<1tJ!>eyD+q_R9C^1fIqOZ`7x_=gojUBI9Ehoni4dOSvzWK#Xx
z^Cxx)W2Amj<cIEz05x6mKu&MHsd<K@KyKbgMEoSeGaABts-l%cN<A;q2zg{?wb;@n
z!?Oo@)~U`KnOYLpeY?Nw>v?P(y_ux=))aCo&yE}5KK5>=?rLvHK_<hLt4-D1Vd*o^
zHe_O7G_O)XV?LVmK0VBAfXNR!9Z&@ygcRM-u-!CMmDP7CE*8d*pq;1%pJ4bV2|;Op
zwaUH;(bxWjH^<~V)d05@W({tYHMZmm>L}Vzi0%30`2Hj`yasMnk6!o5AF>RQEh2*r
zYP*5g4Gi7n#ez9l>MU%S80pXZ;f9y2U-$rB>xVs~AH$D}x-T^RK|DC{uay>zOO*G|
z8ZZu<zrS*h<s>~EeFDyy>_SR~24j@&7rS3%+Rvs^8+cjB^L#B?8lGp~vxBu(*5bN8
z`Fm)KTvi`8{|Sl1F0u^Vu+tar#MZ>C_i@9R&=0q977`rWgv2`-9k}8e+}ZDTb!EVo
zEi$Qmn@ZYMc<$jzgQ!uY%&~dk7&>p>J|C1p$g#r4lK+%lP_KKi(GPS;lE|F8uA9e~
zH{@9ASkk3yFAZ53W<XMrd@{fnrLS4lG+M<BCo=u)1MVF-4R?m>i@$!m-N>zR8>=U#
z$m;6?9sV*&z2=fs1J#KfX^E6zD_PE-jb=B^$!K<IrGYrI>l*d6pUNElI!NCDELa;t
zInn=)k^XX@j6&BbdgN*-3Zt@q>^>35URKWC$ud~zTV<_!ZW+4FY9K@)an=8hs14^c
ziusroTDa_iZ;&<JL?+>?=9rjyBk;&7GT^#^&pI)1*^}yk*De)`2x$GSp)%Q8`=WNc
zjr&s5FyjHFg_3%Cb?EmwmVOpSB3ZR>$}C~Oy^v_<=p21uTi%^>8Gs>h84@6M;;O{~
zWK=AFcxAyi|2+Xr?G1>SFw<_!fu)`6{a$FgFj<R_d-5I!mgHSFP)M7m*DN{-&d^dz
zpGY_ia<z0UJ-GmHF5qjn6q71HaG|tcQg#}ReCNipIXU03GWtTk7l~5$-lJcrB3LG`
zdx%ND68fe`ddQ7aP7jy1QS`Q|UkhA)i=lMXJ;iSl(5e9z@St^ef^{jFpDUDvv`wq0
z<)Ts|9eCc}9SlwGQ_*~03_>5UWyO0fabsfGXTfVsIpnLY-XJz?WV|Gr<yQDLuM>mB
z94C%C-MKKbPJOTA;2wRGZ9X?DHFSKp0RBq;9_faFkgLZfCmoB_Xs6+1)uOTtvt3>h
ztZ@<G1Sm%{sW>=$;~2V;Qd5P0?M5<8PfWm(B!$J0oVurJXPZj2P;S8W9n>3&zx!OX
zt+6ye&i9xpctGv+^0+a7&A|p!=13sDT@$IMLIch@EDxXdT0bn-7%QlrCtIDDjgCT3
z5sd>xvgU)DKuz4GO43sr)wENBAB?45D_OvKq)N<167TRx8l+m;-`on5=}^C({;rO9
ze`)GpQC+%yh;~ClU|b>J15P{j@Nre_EJzZ3U#>uA&G&KZ5Xc7W8Tmj0yQ--K2`Xv#
zkWB(Jz^^ytdWEd#&8fFLoYq3QEkcbQ9y+kOrwRNR7)C$CDr&25+Kvo$h?%Vq(*ig=
zxSbnwveUB>pZT(%N<x#9KH@hZ8L&I$<M_&<Gs^%Db-T4Rt+dv{H>g<Ey5hZcZ`4bl
zmGFk&zEYdEgZ`vu+hvf#Aw)X0fw~f`IasmvTnO1VzGw)HR4-O8r|4%Hq%E4Cyhv2f
zRh?W7XUN6N1WqTDD`E`%K5hl!yQ^O<k@8S_8dnTC+oPmV!3`k!hLEeq@cC~Hl<mJV
z&_93vgMt2G{J-Iy|7jm!V`OLg|7M`>;L6HZ>#XxcG0t$}ZsJmIZc^}|u&_+MuyFH~
zTQLN<^E#5y;%t!O6fw#E(C;ztfRpT(&mYa5R?}*pkM`?}nv?cxo<>%tpjh74D%?r*
zBCsH$wl|O0LE!t$_mWR<AYNVpJYHU498*(J1`v>U>=6T2koJK-hO$>bhQloZR!EW(
z{UCm176W<zObD=EddOW2^b-uElLJu5+q;M_nc(&)P*iK~K35<aJbx@$=)ePpi2-e&
z%|7^8NaNfdJ78;mSD>EsbjTGuH~%V_QC!%j01##y#8?6hW6{5GV1E%n0|g-S{DSqp
z_QUzxA|LKOJv{DK+j>3pui+IhwSnjv`jP%Z1O{#L2)O#|(SkNvYxDax4+jmvGT4I+
z{9ryBJVVlByCDKW@%Qp0qQ*Mk`L73T0*_t-`KZA8w@^dAhgIE$^@6`zIRSlgfBrzf
zq`lPg>z~@0VG|%8?^}XC0Sn;*J?le4{;e+R3%V7x2e$s}+7r%-$PRmEcVGb;y1GgP
z)g#pkOfhy2VjYkBy@Zc*3hye?>#?gpT8ro57b0NWNVF)6cf1c0NVLn|bFKu`8RS3e
z^RoAKVV{%FKwq}AL+!<|IQ^}L?TS~^6)Ins1tG4oM-PwO|HgI<BnTAzIv5-h@(N7A
z1~iI&I{HB!5D-Uw+Iq(peM#6mgLVvEC)NTC4YUyyugC6dk1dA?w08{*efuFdz^BIk
z`3c0ufCyX%&f%ZW{u3J;Eb#2NJo?}U;RQPR7Jq*Lar^cDc`*%A&cKC<y8mGJenWh`
zq{y;7mwNJ9dF1!|=um$fh|d<75vVnt0pC9_PY;uThzJbw)eHw0`&Q58Gpv?s5$_k`
zhZ3D*-Z$Oa13g^b7Yo6qzu)kDAYlp&NZps#soY!etN$p>&yU7+kIuIr)eqW{AKJU0
zTG1hJz-LS27vU$LD>eqh;0gUN#dbW%Lsawr_(q^Fy9&nBnN!Qa%iez^)J8>Qm7r7r
z{oG%fneE324>00q&!7!;3D>XyOza=)r~ClH_Q_(0$beb|8WiNmV-T<F?fqjS1pj;X
z_Zw;eaq-&*nBep*-vgCk07IwV02u)ZY@LH!>s_BfE{h{6*zJLsY%%WWM{7E87q>hn
z$QcO#GbT`7So^^zr@DXu5dA#N@zY6Le9O+iZCC+^ee(Z-LF5O)Lj+qz%Xq<fjwABN
zyS^Tc_^d`Z4)Lq?i{pblgag^9q5qfnW*`e^pu^_Uu&BF<Kf=hy>Q#Uu15p@^NE1c>
ziNjLYkYcuZSaoGdG5p6lm5QhY<FlMY)|RX*4=@I&E2l=u|Ca*Zb|S|SBi6gy{Q#(-
zF-U<!I?`C!4#h`yzL>O*RgBF{hEcZ-4&w}E)90!DwFUxUMdk`=51g!v^ruLPc;EQJ
z3tk~F>koqsk*G`_@-RwgmYFAJ8J6sJJ}<<G-h|7LqVLlhYupT$j+R#z7jrtHyKd38
z9<?@#fZ*G#S`48djYoPSGo@NJT5LZNxihI=Lh7iv#y)aiL#!!n!?#b1c^Q+1`fTBv
zfE4d*z;mM;EnW;mKKR%t;K>QAG6yH9w#yyWW^682+vT^k<7Bfdg<4@oo)tJD_}8hi
zGtlI4?owW6V_n0&l@i=yN~(I5S$<$@UI~S5)j>njD>%(O31PtizV0~G1^=n`$YqS0
zZC(+ucv-rHv+`UMW;QF&FrLGmw+9Dg&7XX^0Nv~6?klXTDDDu(oY}-_F!D@iGkka)
zlI99%ohhW>4iwPD=Svs{-Kv*RZ%~Z^p5Sk%oH;z>$fkb`rIezfL<7M>ehEkY!s?{R
z(0q{@8ZZemg}Rf-mZLJ)t5$~T@gn>^PnJQ3HTYufa{ibbc&z|p@bwJ31*pYLjcB<p
z?E+a%JqPD8ja@d;tg;LriP%jY7~J6DSjgB_N(gxZX~z!@P$fB<$wgls@rnCs@LKz9
z?-g!g9EU~DS?5A&vw6D5wP{_G=axCXC@=iBU2f8`OF6bvJ1r$?DIDJ)h&CXNuSf;U
zhkt7(oHrMGAjFQTnP9bk0k}QWrs+AnP1Ta(-GFs!Ub9&FxPIxLk(B}wkm>6{W1wu-
z%)uHWJqV4&{DD^P{UDNzHe*`CA*us95^!j;>st#DudIzn!_8#kv5O+K{4BZj^jwl_
z6rER)8>nQxgy|BchEhQOx-KVv{RXX#Xt~wG*HE|n8fzTTfiYsU3_+Z7oupV8!8XA_
zvJn<3fqJ-!ly*jyW4Q)vsRmoeUu;tARi6K}e;w;Kqri{Fse5+ykbigUzqPFa{I;4~
zI5(j#$xXvEYbKv8(DgNw*qRQWrT!xE!YUI8t~eR}iIk<>d9G?Q{o~Js97yhG1WZAz
zab+|BUwGG)7Z&2UE~j{CiHEnq_Dfr+$ig4zLLr)*B{jfvDUXIqhsYK3q}RmYJY}7j
z=?=C6!4TUN8*roOJxBa*5R%T4I(qKomdbHSIPhGC+;>+KZTuXt7p2Yjb49P`3~CL=
zN0lP<k1o&&ch6Op*GHNmj!_La9Qm+lA;-W{i(V-4-5zp@;)i1OVKJ%laNWlx7JpzV
zoslX!U677!OF4x6tMI8jahp`VuP3I;@K(D;@0FA1wI~PXR#xtY3q|lz8x5uKNwcO(
z&0a}MqnlYFtfi=Q&(#Z8TTk9=PLokKE8!~KxtN0L!i0MI$rVNYhT@&aXIWIRX?ju0
zE5i&^3OUG)&RTXw!+XH<07#@W(_8-lgjY)Xs&puv(u<L?5(Gbub;4M5uZ~tN|C-Hn
zuOKR+6c!p}lsuXo<Ug85QjZ9xhm(I3wI5c%^Kt8IWe|vzs8ef<-YYZEuJ0NZmidT*
z`HzdWGXH7P$>OPks?C%$eqMlO@_qecFxlL<8}kIvOVH6>zI+jt(nO%*?C9~XQQl9C
zuO~`jbE!kVK6{#T;M3{PQ2FgO0#wXcJ}sY9qQT>3nTzoxN%x1bB;U?*Jr}Qx%xom?
z5Aap~z=ZlA-oGH$u;8ZeKe*h>O_w=o;Jgy8+L|9z!={U^>}ZmwKAYzCa|+R5WN2y_
z^66%4Aw~$!BfDzuUC|gA*tw&$w$Q9K{)Xj+2V2|K`D?41M<d5#n+Wc9!G!*gI}~qu
zsu!G!9gITfm_bcQAXlS%2&y&0w$xiSAfJ<WVh)tnQj62LGmf-_f_@g1gozqMn=WwV
zEf@#g-E64<*^p`H6IX7?G~yqBQQb9B3Ci<w{Tb*{z0NxZm0!nTYmOWfbmdjWuD+Cp
z4SQZ!HrCR<mY8wcE5OU!Ac7{O%!m!{m*8tI<o>7{XLx$vVTmL?2c?;Fj{WVdQ+v}R
zQ3SAc!vI`+5-ZG`rvg^Enhx59$&aHigiz&{s}e5{s04}`8rtB*9SXRQXL6AFmS7so
zbSLVcR8rGSjp6Sgs#Dw<FPBQ=U`Aq6PR}kGeJpJ-e%5$Kc;|6zU(GFQ2V<K%evIf@
z+`M3}wSZA|&jo-UG`#r_9qC;z=e-=Y)9P<E8?`C|h~T{E##YiM9D3`cjwMzHOcu}6
zao|!C+u@&H(lBV>E;ID%Ht#tw)B;L)ErJemNWnQ??8U2?cZRk#oF<qJ?~t$!wiOz6
zk7bwTbW{i)C&VJ5E{j&qWyI$sRt05*1|Vv3nxnI1^zf-^dDfC(%%ykSY_+dO)@Qj)
zwKDRWP}L2|4uw*>ySW^pE`%fgb)tv8b_VyG?%=6#mn?p*3TUFJo+gRy=OxzdOUQj@
zJZKm?0B`YE7&0;V))@Sz)o?nsPiQ@kOua;`M~!u@1R0N2Urw-Fe_7;QGf_r|@1^DG
z=jx*xZ8blpPbQV~#?2Z(>(Lr%S+HgHYe6d1)`dfYQfMt@^*!yp*41~RdDF?5i)+5W
zQxA?H`<q;yR=`)yyU$cifAcY%YO?b?#<!8-=h-~O%ULL{9SUH3Pl?yrGAN&M=4aIz
zkGmaRH5_Z{-6_$&NY<IO{#}9%d{&+(-%iae#t8WwDrb&97kVM=#M})OK!q%MCFLdG
zm90iZd1|Ztx+YRLfzli8p!Ag|l>Ta-<_pHA@`NoB<P{N7=tJ)+687uMORDJ7m-7wN
zcm(`~4Z9uIZ+x}LncC}wfKM{RIpNXc%th>(hfyzjI_{Uk{Y5|*Xt)`!L{J72rU`>)
z>;Y{3D&fnR$I?j%L#gI~(GxYYU1>jj?uT~z0b6#<rwa_6S=(0y2O@aPOoyp?VBZQY
z=cfFs$%+TtMH1!0HJmi!3liTAoUe6&xIIl+!E7OH=V+s$x_3ujh<7pS0s?dK2LK*c
zNdbr60GFZ>Q<l+bQ*$RMZbWL>O8=w)D(GCd1K!G^rXA-qmE@I>8-`KA1H~uosSQhX
zZia5>yQ|xw&kM3XqqkAt*L`q6-7;rY!{AA>6-^pNy!qh$-I5{K78M32H=643pA{$5
zUzCaIJM!xiMJ7RDk5Te01-HjjAi+sxsS~rbxwnGI+L9uV-lO=QdR@k|^>jH{C=1$X
z%g1!5P6N6*^qDltu%dwX-=l-D!#^X1_qZlD7j#S+7il4chDSLaLekl>Oy!`Q;#aOW
z^Z0pL!(+t;j26I4L6#}dhyim{R?Pbc>1V!?nq@PEcA61TBVZNBi6^#BpE_xnbV<@D
zaUr%ohT)OtV=2PAmfs_rb3m{v+>PL}R2H)2wsyInLvscF;d45+bQm=35mcjG@Le^i
z5)%RE-8?XJ@{tO?d(ZVSujKHW)b(Nm;sa}jumVTi^LFyLh_C;X${5wvoX-Z+X}1Ig
z$KHqQfc}k%75mMvL{5A|uN2=2A(z%wRTKli?tw7`G%R(YPlYwSGWH^-s7EQqA2H!m
zJY0M_&13U$=B#<@w-j@50QX0i<k8|H@s*Lc$Qfk!=2R@EGM*?FJz@<pnh8BXb{1xK
zIxy<uEkcu|N}{C3mzw9bbm=VMQu?p{S}NU{Z!J#F+!0fW65~(-e1kB#MD_{#ynjVE
zHy?epCN&6Q-vx3pO4!==@Q8<_h8GuySi_OZVAhr`TvDkK*duoBN+~{@(a@|2I=O7l
zZO?ptX?mxgPhaz2K6L``_N@?shyL!iNp6d)n9?VR&6?x)8;8TR?ttx;acMr7vH?kW
zG7k{w4RX9LDH~dHKfbj>=G+wMs>&Hz=!Bv&52G1=-?&_(s?J1+B)1h9Cjw2mrXU$p
zk2jANdc3-7trEQ`ktI@MzpLV8r~$1s=GnEfNw8qpomcXM=!CBEZJFHDY^FEwf8Xi6
zENxl@?kwf1pv3!Yf!z80nKH=glEeUtaDGsOtj4Me5fZNUV>ki*DBro#yppqtS<{Mm
zcInBgV2@Ns_zCy*J(6Xuzp&7K=tHzp7vVoiF%f<H(HD3f78M7a`*{!(DcR-xpP7GC
z$wOwm)pcTb<r-jLf`+tzS9!`$ICrW7F6&Y)k0{uDwg*R(`^9i+@cpig=Y(Oq5t$Pd
zsUK5xRoUSa7dxPOR%~E~3S%Ceh+A1qk_@%ck5;|5nf_dBUlY57ZbY^VcpyM_gsZMy
z4@k-1u3PrhQ^5CMmB^CtG<ofnrb}~b&i`yuyJt8uMTLh&ESfv5BtR#4{VPEqUQQa{
zz1F3oexaK0sJ>RG)+ym_vFjXrU+#D6Z>xk-^9Z*-i$q?RR+&3*OgSp*u0(lFVbGhy
zDsFlvmx#<&`%X?ElfDQdNI0L=`J%U%jeOL<N^)o1OpU<0a_q1|M0I76SaDAJcnpsV
zxSp6I8`~pz#<~mg;9q!8N73pK?h20O^08X@t?7DYx3sG`A3TKHbS`Il4AmxP)<x$_
zrO~CO#eoC%wtOvmsMUrvcy7Kw8~2CJoCY5wgIYLcvah8|?4;om5z2XkX^tiFSH~bh
z)}Uy?sLSW?5RNU571BAFL6yzl%Nw4h?nw`?bZt!yquV-Nm6q@LoI9xHi3*4=V?aU&
zERZZl#;aDe?&TJ^wAbdWR%;pj4mIb@P|)gEW*VFQCMYd@|CX->ENo(ip0FN{%`0+w
z()Qdb$iozi48U%mn*2T3OB4QebDmz+IXC7vJ(i=Uq+tHaU8s8&wHE_#RQfI~3}&XX
z5^aGX9xR+$8%cWwjcqL^lT$rMvbP&e`W%`*rJTaWh;I0GJ`oZ8#C>*6zD8+{c2}ZC
z&nly0Zu6#;*~+OB`nBFA81_UXQ`$&**KV)NK$$XATuDKbhpiydnQQbWZPvuMs9~m>
zcnN!kjh7fT*rf@eQdZTQcd+&@Su)-Zt>LtEK|~{o^ReVV?&LM{^LzPDT6<hv-PrY7
zj!WnnkNkwOtkQAIvXWi~_IzQ460B1N$xDSmvax_--@sVY?&V$-273U3LdcxvVMFkw
z2VGK&`Eq12TLCY|K-s5ao}mRqVk=2<liU}{!IDo7GOQ)`LM9c;0q}K91rvJ<8_TH(
zb$a#Xalm&yW}(Ts1UvsUt$Ltk(s0-yKOFSwG{`Spq!YXl$OI?H8QXx3xYkv3YtAr9
zb=eLQG*@}~5C%6Nnbt91J@0QXOk5NPB4NeoFAmHK=Afb@fdv9RjSH=*32yD(jSo5F
z<Oo+na2@y-4z|9$Tb%jiZW)!f>K*2>NWN5rxZaadGeTqEFWyO985Za|0@PM=;0_9n
zadN6NQ@LSzgKlc^!#oET|4}IF-`gHr&{nl~VB9kZf;h3@&`3Gxzinbvm>pG7Qkqm(
zx&Xy!{}@kjm~oG08Rq@k7dvNspou&pHNA7?<#nA&I0rw)V=N`%AL7)Z9=iN?wqG9_
z>zR&6(~0K3!@#-*v?V1cBXF$bj=~zJ&%c0ftQ#ObU9*iC4H(O!-lx-vw7n=JsK&wb
z$e&HTr_OwM9`NqVrPG&DO2zW({OUq4;hpbR;%WAJQX&76v%~pS{7#3^)<@9;AzhTx
z3lXzUN;lytQHpa67p}$Jq`t0PB_&9Rm|`iD1)pqO*Jk{KQ2wEHQ32si;o0ePosXE1
zg&c!#--qnXMnE;})GEK+ZY6a@32_Lke0h{SjEh7SV;B&wli743E+c?TcT-7q6)E*X
zsF;S#_ypHX?l1Hz3vy3)Op0I8BvKTZ6Vt<yhN|LR7@M)!5Xzc8o0@op9RKe(Lt~}B
zsEM}Zzs6DBk8uYMMO<v(h^L^GQ!GcUE;D~AKE#eDaxT~G%23~UX(;mbi+0*bXZ^vQ
zm=Yg02kF?2Za67*Rl!4%mrW$5mszIgUEl`XiL`xl#&A>-XLbxlAu&`z8lL?OG2A9+
zh2gl%K|CR9EGh?6dzmAmCpo@xQD(MA9O-j?g7AgEj&TumJz{vVBcC(a@3yg55*y`l
zz!N@<H|gvt)I}Vq6|<b~hJQ?KN>pAR@Hn5fmk^ZEaqQ(BEEdaUj?P%zQri7;^UQ@%
zQ3Vf{E4>q4ir`Ryf=6inPIWO!?sQQ|u>|d>Y(9a?WJOjUT6J?kD4CVU%_u~CoTu3Q
zux=~&l4pfQnGkyq^_9KrtolWvPeW#shbKP=O_1I3+q@lBShz*`>ypB`BAVjr#f--4
z-Pt3t&0mRu+fbIX66rwQAInZsdVLfChJx%sw)L8DAI^a&Mfyewt=9Y!{6pG>e(j@O
zkoY4kkz?*f?g(Y{xmtfI(3x?ID#9KKv+~|l$lF@~+W~}kJs1nz%4+MW%=m5Ei@rL}
zBZf6z^3J>j+a@0_Cx@Zdx3b(^1c3SL$))eKoKbWlZb1)s(>MHXX@eTRUZU-f<TqwB
z1l|21JM}=^$<YXWP^;0kmEXT?8Zx{gIwzUwN2?7AH8ip`Iklc7v4f|I2`7(ml_y_i
zzu2>sYqt&ykC=&qs3O)sH`28q_@0s$11amvP7La|xs0J#6eZ#@Vy~^@>V+t<`<}jL
zfPB4j76!gwbo!d!@WA=Q3{+;AiX1~W$_AltgV{vf-6umlm(EF}+>_o_c&wPKj3vr|
zrlU=tLR^D-u=1zxUaCtM0Il^rf!u24U)j<akvFn=-mocw;fa30B864e%qPL^ngLDE
z{E82!6o`hv%=yQF);dQnQ!_Iy$P2fFgLjV)v$@s}>u_t=^X&7S$~4adj$Yo_p$C~W
zqTf@+LzjGLPVQv_%Z7eiv|a&8Xk5MWjm2W@Vr$X54t|DQ5Z?wITaUqyZ}Ds`5hLuQ
z6rayNu^F`@uz~)a>?!+De0g~GM|S9xjaoQh{i@#S7PZ(8=LyBgbeE-aWUP#jGH9`4
z*9}m@e*ayQmikbuNV$5_*!HE_jIw^wgaLp;JWqLHL2~ce;u<W8o5vjlJSIA&ORk@H
zEc9GV&KPJ{1s|^F>b^-ft(aB3Px7f~54tnCB3_;AUROnIPz6ygKp1*_2{O>4%=2Nz
z%=UaU_ex`XPXb0RD%iXm0iPX-34(vuK*^Pml4+a=IO?<^=$W|8iu--lcXXdg9|me{
zl5Ihkjx2I`gWGy7ZlUAFC61zwBq9Xc@S2{YGs1+oUdopT`qS&SHm<Dx41v<r*u*FF
z!X^}vaYP4y5xUi3*{_}o)xW}~>^1Z{XDvedE$(liiU#WH9K>Seue<4$B;h%kfB4?)
z0=MX}11nWZYuK`F)6>c%lHlNq<3%EtW-1!9=xlZI@a!g{1;$p`QVbQ{9ZT*2qQ=$-
z8|wldm@OPGSuvfNK}v>@{bF0irLbnF<bCuvoz{S6Rk$|2_H;e2Em}*CVlY<?oDLP$
zXC^#W%~ZBqPa>GEokWEiGAyRrd#q{Tgj$mzD?CLO`V-^sTO?WKns?SS%D#`0&kv4r
zgglTrS4kJS5Y;#*c-Y_w1u7rLJ`&#GUPSXSgXS+UeD6ODFJ5*^KL)lcKvku<b0j*n
zVF$Nev37r*m}-l_l)y1qvY7B^CN~T+z^eMUkS?PT#ALUJv|&NYxa!^~<fkZ4pi;K|
z^G)iBnt`u#Vtb076cAy%$DMTh1yZsS(id)e^E?H!GBtNwyV`&;2~Bpyas@tJRDR7l
zI9f0j+;hNMH)E)kwrgNX`U~EhC^S|Wdc$mcJ~)uc)%Lw_m$o1eTD{|vCju%rhIqKD
zl3#;pjS8}Z+}@KWkr7DHQ_d(O0^?H1H7M6&XA{SeL8g#XAa>XmU$L_@5FYDnBKstv
z^uke@DtYAaaHS{cfqN+UULZY>7|~%QOtrkJ7(J7TBzw3-SYm}B^3l{H3kG<Bd$y6K
zb5?ez=J(<pWL$n}$9nOYOSI&5V5_2Upw=$UBe$d?a4l`TBHH>`jEaY)+7RMRjSYw*
zWxZHHjEoJaC)eg()pvL<W6@Oa!8)zhkP7sa8n_Orz9G`=OI>ZT(X>ud?2Wy7E>h(Q
zrnq%h5VaRC*<9DnR|}{(X~s0;brU+1-%`7gizW)Uj>yEYga2ze^?L|vZ{S%sVETDW
z%kHfV$IM?sk4d`vz_;89@WEN;Q&iq^p^vhYD8!SBOjtS8-lcu5jJ%80CG%d%M(=4@
zJh?GOk4`(C?$+?~K_ON%XlhErSfEF}<pTs>=l?eh$?@MXBoiCs|6oWKj{i}j|0jlI
zWoQ2XAH{TmP)^!vp`9OuoEL~h_CyjqRq>>s7l8cB2!$vKB5~TS2Alw2fC`*IML;CO
zDNg^-hz?Fk)U9SmbCUg$_cH8mXB>UF;WX9!tm!rL7Lv6!D=hvy*4KZQztCQgh@g%}
zUS(%c5exEHR6wu30uWm(CGtu1D-3Ac6cQ#4a!lfv9+(O&NaTQ#r94h<X`Cpq%Vtgh
zks&{lga#6&1_LGq^skA}sPN%YkfpzP;_UMHV2c8Ui0VMCRO?IFK}~MGd#!IP#PJBj
z2&f4O3CN#0@XD?N`tb}D&}9fi90c08T>XEi`B9i*BYHi43c>QaK;ciQsA(?G&Y%K2
z^aaeLyFq#gW>F%XfUt<`Afq8b;6GUyZs8Zf@67%n`eU1)`wxEi$;`U4%TfFS9t52h
z#{>x(GHWZ`g@i@y;g}WHLNMQr<o_bP_#%V^`N@To2P3(^arA5SLlZ3UB^NSaXo{jn
zJ&F-t1HFo$hWr~4iU)ZTf&wO}aH}U=#)KX-6XwaEFg*+5-hVTl&ktupOn}O_yK`5^
z06UKs25yir-6M+nt@6un{G6hVYJCVipx*$v=Uqw25Ru`t-V5fZU4tBT9p&i5%^*y;
z@5&br*v$=%RfuqF6KUP@UmhLgE&e240H~zD2?^<ti9aM8K|jnf!DnSxc`Nn4oBSb0
zbYAby3fdKP(*Q!it=Kx@Egz5`%_1o9pur~6H{gfsb_R`K7z|RZ-@X?D(u)N8Ht*aW
z)8NG{rpFt09tINlpqU&5?C1USW9%0_Jv-LX<rn_@%?BFmlA4OrT+fr)zTb$l@~keu
z9w8%xytt?!2n7)lEpT#TBJlf9V>BV^TPxhHpJTIvtNGu_JlngG$J$t)Z)pFUT8`I%
zpDGuK7&Tgu(HHy#tsv4~hR?LopVmdc($5{#@5=EXsl%VG_|VSAhZ}~So7W$t&<=u}
z?jPja0+*3JxW9438$eGx$}ofY3+KeU;%;d_v6csa-H9m*FYgS|k&z$*hJJR%tA5SH
z0J912>%;qz{DY7mCM;$o&<5edM1I{`w;KAlenYq4fgF1~`08DBeqo62AD>QoE~v0@
zKSxXoP!a^FK>?niz3v<l`!^eEV0Pl{{THo?0sRE1SYdL&`(}?2>-miSdMnVspgRc^
zu2=OacJWgTt}$V^4SK-+cb@ewc3edXx)2$9AHlH5FpgiTs_x1ZGlrB-H-OGg=%?l)
zQgalL*>lIt0Pvn^Oe|ts%GU!EH7~`#WxK;MGj5HLR@t#08*HcGbETF%BZ=j2wB^^3
z$FF#{ZJNb4g9|gR!iwe#Fv{5Qe{cQmLLdyi<!wo4he@+U(W29%Vz(Qn(;BM+*6|O4
zP`)guX$u?Yn6e74PrcOEv%jIOF5kCch<A2VvT4kB>Q^`Z%$GF6S*U*jA>lpvy0n&a
zRWr+6mCd#F3Nh9IqeaPygZGlvQ)EN=DzwhwqydhL91J**N*0DMIiW-1b`qYQezpjg
z{_0f0Nv8a`AD9yJ!g9g79F(MqKQ33mC2ac%s-9MB{H2y1Xd#TXQs`(PLEf0fS?S$e
zw37@pm3Zgdz5fn=7t{H$?|NJnhuWb+IY!}_am4$);>FU$iN!{_F;JKs%nmALtoNMi
z6FC!U`lMjX_jp^ePF`G96cet(42WWgDH|<3Hn4aF%8{ndv300AHzVpkrfszY?`S+x
z`AFnk;#QQ~UYZJV!9V`pIv>kG^nCn^wdFfc-NHH`T8AoUy-Odk&kWUIi((MI<GELL
zygEXI>?yCjp1IgOKlpID!LOF^bk*h=r!i7>a_6#PUpfYe0aTW^b_VxEJE$++o)n8~
z7cp*ru8JS-Oq+8t<Po8ky>9(8k)K<x9^gjH4Xu}K6npje*Zua0d><>+a+*1yfVlSH
zr%$FaI^m0iuF33%BC@Exhi|e|DxtKDJyar`0t7vHh`*jI%n1l!xPzTOu+Yd=P8s-X
z1;&^ebyBx6GlY^15-Kz)3FvwXrPoP2`#pYXRhxBI_^9N0-PEh#*EA@Lc~h?gb+cV`
zi*SciVZoeC_A89Y;nUBOBUaSTb@{j1<>#IhxK4)QIzyTaHd21h5JNJBYbUuUGQat@
z4bn)M$R+U$YOmVZk&jST^;sGM;SjcR)cAhxuA?dQHdnbkTxuf~LN>vPN6WNBl<Qks
z=1)VaJd9aB0U5JoRAtq#Ee8KBn*#SZo6PTS-<`DyD+-il#BTo;W;yc}1;+X+xiDaq
zCLLj(S3p5NAV75t;;=?P7QG0eD1?;wLC_E{gEmIyS$IcL(${FQum7ghN?%p2%-D4o
z)}*Hmq6f|)<q>q%UeN<FVUh9VADiP<4&J8Jw_n)WAWYEp&z>7R|7>{3+VV0o2C+~U
zw9S;%Zb|9r5vF>B{ri_oc>P*lA0vkWA2OWXxTNX%&!lxb4a&Db^FTJO{8OWfJ~!r}
zfBSVAtw5BkDwJpDx{%yHOc6Jv$L8Lj-1gJ75p^um=JkVLPfwx5!=W~~@K2=h3YqHa
zaHSS=R<lMWf;~ErN)>yTwx^Ck?Z<*Eq?z&DIdO|B_qh{&8x$GVz+ztnG>pRE>rWTB
ztgLM+*1QTe2_N-0D7Zg~BgVk(%*6|Gt5In>dURoDY4MWjaBVQi6vD(C^a7VbQ^`2*
zq=*C;?;gFxTSg#-L`D*mDwT4K*=AclR=hgoC0AaT_X!}|u&h){?X}&qAH73vcC5(l
z4jL>7IQ2)p&!eW1kyg;ifUeuN0<>gGqImw*%<wl?JPwRu?E8?>770hg(E__V(h$>}
z{`Gm2jd}IEb~hoiPBM9r>6|R#FCjP6pT5DFg0FQoL?T+jH36C9OrXD08W0BZQUn4S
zcuL1h-N@?GIrZypCeW3;AbAFkNTd1LuZDG%SCUsbGAj$nY-lV5aHKcNk8`j*Hv^;X
z0ag6xof0Fs2sxIY(5cK&;W3IGuvUSpe7S0_WsTut&8uYctksKgMI+NY-)AE{RxHDQ
ze4@b1csjdjHI4Iwelu%Ki7@@oHcTs@36w5F35PKhT$;UE(;IoYyo9{On(Bz+ukSH9
zXPPCYvB1ul)S)yTq+Yw3p{;midPC{jT$9XON3b^OMU*_YAoL}VOnDm^i>Djj2hZIG
zavR#&wr|O2Lc&nWm(&XA)gsSXfa@79%Nbp1==&~5{hLg9A0%8hmdUQ`=LK|i5fJ_I
zC7xn=U3Xbz0+Vev-tCNv#C;cW^gruTRkdVP9Q~dh>b;jb5!3F(Stz`tc)!J_@>_tC
z3tjd$*QTrwNX<v<<iQy<y^K6;VY!|@v{JyRtgh(>|15L)a>DC=*0bX$n0g~lswx1U
zhE1Z#_m(Q@vYJN)lSRx&pKR5?_nn|}hpiHEPWZ_2tMSTYQt`6`VGv41D`;Bt=Ivc%
zo~DyeF3T7x+-0%zz}+A;L<Dz^XMK{c)y-K3(Pu{Q@rN9^mC|tH54{i8RIPU~?UM(N
z_=r79VoWGY?9Y-}eM+&H`$Wd`nEBSx$K4#39HR!6&aAu|(?N>{Xxb|QXV7NC6S}Lw
z9{Zr`qW{9)GjN&x3IjZj2Es+$x4!5Zlj{WLx0#RHTM+4pW{8F^uv6w>$E*CkYmzXE
z?-osd*%UQPPB4Vb3A@Z$F`=w$u-3+4bZXf?k+fVCZ;a`Vq4en*Fq)_GYXUu)dPPj1
zT^?3bB+30@NsK^R(?pW)f)G_+!8NJc;NBw3<IO4d<@By|-wnN$hBng)G_@Ws_b;)q
zrL&KJz;1Pl=$g<#{6YR|;{NpT$h6+jd3l%{N=uQt*?wKLw6B|HwrAiz!DYlidB2Wi
z=CDRWp2P$oZq~Fw!5B!yVC`xVs3P7mbd3?8YxiO`9*h%IOxX*)aI1y54J$D<MRDBw
z;*x1_nOMZp8DKhgC^S@_yaw2r3}nSW0`nAbFV)!$7Dw4Vs0d}^6x>C?$#Qx{Y(-N$
zvQ(f&s+!z#!41U=Q*cT$bF_uSOJKJltn}-3xGQf-ZP1x!qnvr_z^Tl*i@fOHTcWAn
zE)h!qJ6B%g@foh?42+<jZh$q4rCrLMsZ{{XxOQ!jdZ{WKdsM<A<D?O`{AajB++@kz
zi;QB%=R9J_4%`P?3CmXJ0NdP9&E7>4<6BC=0YhGug<hsBjG|-s<pbgY(KBjJ7mqzg
zKD9In!n1JkfOgtwHc!-KQ<cpaM-%bpy$fLqE8&xcb@Ic-%{Itff!eTc+$or!pl~nt
z3W-YC$!p?xHM|_)bS!#Hslc?sS;-xJ^l(YQTZo<9gp_`2H_6#mWK2d8K4h|%+)JRC
zl+0DI7B<k`qW0mGNz?RqUU!&Qc8c0fW8>i`rYa}m_FscIBG^+$m|Ysc2Ib;q&0Ep;
z9>w*;g3KXny*_nf7?4k^sObl~GO!h`qdqEd^n?sfCsXIQP8YE^Vgkr=#g}LB#CgUK
zQr4nlo~;H9f3`h=8l&@%nX86HFegpG7jsO-;{&mT(0TJ&5U{7u^u4ylakM{Sl&ZAM
z1lKyTm)kEl6&Lpp`L|HYow`8JcPMd3hp9bB?t$!8>vFtIlzuI_*4g)6gZ`@&DKco%
z8YRMW7;r|m@}VT&Shnp0RR2+)w~nr276J}0Q$1Hzb5BM41wRL4p7~C`F^^m{+z)O_
zg>Nfs9h87)@9)VgSmMWB{X-Z&_xg8<4TSAB@PHyk?2iwhZ^;?hJ`yMa(vtkNF{Say
z;wzLHD3gGXyMd@?l-l4$)8hMBF$l=Y5*Lr!dcVB39+XALsyuXyZtXPs7+pB8QJPK9
zIrGx?8;15-oD}sGBh+MUIe{d8X}`O%QvSWFn<xErRVhW&K4363!S}X+(SnXF_YxXD
zph{@0DftR!E{m#pw}Pu5`-_aP>yuH?k#kZnVi1);O||4D>7u6-!#qhX<~{kG!SF7P
z2esjZ2Bfzv)ZrEn(^tN(2i`;5-yt=}4mS5*J53a^Dx-DpzN!t?p!vJ+8LUKnv0<UO
zL!)Tk0IM4PgZ3rwwbVlEfKnd0DRO$#@pj|=Y-$|%gY#VxXCHW2Xu1$fPc5n)*C<bi
zG#W9<6=umhn4YVD^|v4)fQW-ai^QT1`R^5*C$(0%_=o=HZRFOXv_?`FTI)g*9?6#E
zdcCxC@=&D^y_S_1)Z&#oOs}U<Y)(p>vaw&@v!n*y5XqBsFhbH|apdw?3WpW1kB(;|
z-S!VF%uy1Jdi+n>z<|Y^noB=bHl4Jc%Ln%E>U&g{<}k|p6CgxUn&0C|-8mDlj_Fr5
zn5-L}6L#W$`U|a!uLKt|9w_Hhi)_Q#GY4WX^!47kmzOkaMhq$rxF5iXaH2-tF~ub5
zi)CWZE^tM1<LxU?)%ea+)y%$^*65Q}T!K~FCT=h85i(D-`9pXO#lxv`TH9BaJ`Pe&
zl;H$Bx$6S7Hu0S;ni@d_XY~{#VOqP`&CY9TS#blXf1t_YMx)a(jwpG=u=Cy>+?`P{
z^Rj9Xfa!co<~p>QwM`e>vY(rL{{jS~j;!|?D&GlXGS^GbWrx_#L|A6H+g7pA!#Ocu
z2t|~l@|s0y!!oLUVEsh><6ZJ4yj>_u3FU@SM)G4a6rEMvxct1oXr3RGtEgV`|HPbu
zJ!*zjyJc44u5VnXo_Mt*jRzO^S0YuXLoOhWN(hCre-YAN@QBJDj<tsskH>U6qVQd_
zAG<qqCQO88(M>+0A?kjNAre~|Zym4+)gwO5(I{6H@XbJajKvWd>XK903FvfoqD!)y
z>B*t;f*jf9m+a}HZ0o$2xL&mv{KTV;rDTGJOe?=^dJ)DX+FgRz(k!lAWVO3KGs36@
z@S=xw9!hl{_H-boB=CgTzQ(Gg{JQplx6!mbQk@Q4Aqkl*J|>HFkYqj$$v?}UWT)K^
z({CDklp&P-71svIC?~<E!c%U3hPLm#b$R}ykXxc82vMY=YDQkk+s*Q%wE5@j0I@c{
zt+{%?Sfg{XO(jxD?9a5=7~FI&fRY|I%$#<%nRl%{Usw*2`$JME@C_Bm2I6GyP?=dM
z-35s0lU5YFaN{<HvfWBc`qlYXfic)DUqTuyS$42+J4JI1Q{M&27#R%O7xA+YV5tN3
zmx$w<6!%{w#sLi*r3F>lj-06G=BLd^?Dj<19C(Cnvi08eNU6)mi*OA6^!qh2w&|h&
zL#ci->jyD!9sh_us?(mO)b>MCdffx4RaHovP1%~)+L#rTp!4tXfV%YX=kdj6dXDi)
z;N*@$2{&<R#Gqf53?1Zzh`_~<OS6jK9zR$JT04^$gZ`h>Eveb$E-(52#A(UUkijOe
znyz3WcGRr_?eBYn!uydI0D??l(%(y^v)(pQ<9Wfaq}^9FT1?HAZx>EXs+>SFFDb9r
zcqmVM0I760W9%K7*(2dYB;*Xv9IwwbMG2R<B1078rjQOx@a!4RwtH#4K^^|eFiZ#8
z><usF%}!)Gs(1U2zO&>cuna{BOuNjtitO2$hP&%zBSMm(E}M;eP%6;s>+yK-<H1k1
z_-n$XV#CT>1*=I(ic)0vZ>JdCp;#t#hrr|HF&vE#n}$N+5cBhxjTDRwKJDX&)%%Z+
zMAq?sFVGMYJxRvClL}dnbLadu+;~|~+lUm%u)grr2;YMzEzSml$lqB5!Ro+mo@3t1
z;&L*Hn9cSr6<-1LPJsm|*JY1$){2*8XKdOR+XusO+KG0&c$zlD70hGmL?h&GA~ol5
zx&!yp(cP6?cDl(?k9dCgW&`7o(}}c!FOiGBafnk~2x>tSqG32dR%I^}aX}YbtkiKT
z<L?WH%mp(U|N5(_vUau`UlEhYZ6}&p-M+xZmONE6Bnu-MKQN;M8FQe}8@Q{_meagW
z6Y$5~*G#$pb{y2F^7p_JG!K<zDYJAq5y??pElk~E3<t%=Bdp{E=75|^ttEwUh^vky
z#nMFxw>QL(4CUO2@U5M*oB*1HzL4#?*Su%=qn#c6kgG`lK8Igg&24+dlN2M6Q!Xp$
zyNBk~nl1Lw(3K@9Ro*QU^BS*={Hve_o6__sUPwf~^=^yI{^?_cs(BdxKoUUwpgk$K
zK1j%w<44q7Rpz9yVVOTYp%m<c=(qiP^?eU)N-DQBP@F~B@IUz*JAA$wgdZk?DNjn1
zXhlluDZrK1d2%@>Tz}B+=%rn<rYZ@l#@m_rw}(-T!KB@8f~LFnuaa7uZl}4>RLOKe
zJ0OGtYKSyP<Oqzb2fHC;9haHv@e5?7RGfLEo}^vcvam5w*m@cGZ;pP3M6(%MJiKO!
z@~TyhTNOzwSge}|nLlvu7l`o-zvs2;4HaFCWgOITY=sL`@~qj9c{FrV9%!(v=~7F=
z;Ulrw4f-)Wy@+ZP)CSJX%bN_A!OF!e=-k-%@2K`)t-K!V@=J{KvU)DH6h5lj#Muz>
zYS3g&R96`~YO2527ctAuM)p^sQ&S+m#xOkrY~&NDqj*yapTRRJ%GtXz3$C^RruOnf
zN@y#H7y#=Ei@{w1fXiG}#mfZq@5=2$qfzE}<Bt}_mglk8W0=g=;|u%^W*nH@`N)&>
z$|x!Q27DRdYU@BtG}z6Idv~xHv3p0;Cx7%J5o0o{mm{ex%EPtgw0J}^BBcLG(>cd|
z3&o8bvS;4QdPOP`?I!1RO}m0n(lrrE{_xiB(6l@mM7x4LULiCRg&_t9=}0c*)oX{h
zxlQMvx%cs51L|z5OXB>%SN7xR^0#O%`V0B0p_GlavY^(k5EN|Q*soHC$w#vCbHTMq
ztCsxjjgR^2G%C1jkzx(ckXR1}NfIZtgqvc}&6L968CA6RD6WZoOcKyVRl8}bo9SFu
zm1@BR8#Rkr%lB%E3Ydm)s(T*+<etDZ=ucy*>L7&4jqPRcmE{Zjl~2Nr)I+#&6ty<+
z^tQiOpSyV}(z{BXb7ieAAAvNc156p*qxP;&Pxqil!&~b_ih<FUlKE~~?6UwBmfGzc
z*N6;E%}KediFB@(DMf+fr5E*uO3De){P`OixpjO#oOVrjoy^!aI(aeQjn=C}&ouSx
zDUSsw_y~LhhKh39qnjqz6&gz}IwQF5hr~!t{7FTGleqHL2c-tGGx$J3<K{Fm>*)RQ
zBX^-512K+!4(f@VmM-1#9$!^1CJXzU1Bifssc*NLPsK0Tpj+UTeR`d21sPy%!o7;Z
zZl6mx_?3th4id8JIMLluUSBR2*oA~<PcqBuIUScJ5ZZ?(xjoO=1CW2{dvduyjIt*x
zTbACZZH*e`Om8fAJp3O=p{e1|Mx02il=1Xhi8puh|J{b!f_}BdyZehdA$kgg&M{H+
zRW&}S(S<g--RPm+ZK(&Y%ut2=irS``QZq^p2}beh!V*ggTRasnMzOLYZj*Gz>^YZx
z&|6Dor!m>|TdQ{aObA<06}ZrcU&gQL+^_7?M#55YeLqb*ni(1MUq`A$8C}^fuc`9O
z83=q`G~;GXiB-}`p*s^7M$?7tOyZpc-EP*m_Uark^hIaRbH$Cdz9ore<2MgFLQmp5
z5F+Np^!2lD5?hU_5EX2(_o;59ZnjdH9o`piJ`AZPwP=YKXu4;ug-XZjKlaupCyG+C
zZ-1bMpo{j~k~e)`NUi*@%z;1tnDm=zL~`wR!-!E=(C%ULsP(;{B`w#e^Ad%qP;ria
z)*19jw2K}H<dX?=jQuT^_kJnGNE4xv_fsI=AN{t9AX51_Jkt42e_v#u$wwAq<pkdD
z|JC3?VAh>WtKq^Z@G^;X1%tsTw>IwpvYm=cd{h(mTr#DVhfNYnt$sGa!)zA-{cjYu
zGxL0*+-B9b9djsL<7}j%=Xjdjj!0M@{{(<<BMkl^;;jLLUq6m`UU+d{0*6l=0mFuD
z){aVotS25ac#qikXL177n|ie|?a7EFx&PDrGS@5DLVLT@TSr4iV+?3h7pWRx$!Tvx
z_e0S6vKR8AgjLCPeY)Agt;+PcF-f?^6&4qW-k}b17|ZL#GH?9+VyU0T(z<|EuK8@%
zW&i829gam~Q}SkvXCFlmZ4xx&O+~0mxxjQ0QyQ-Vw`=0Vm+RFJNU%%j`Tyl+Yz&P5
zg`53hVr2LqZpO&M$j<tImj4OP7+G0ZS^xI{A2hwFg|)MZBLTgrwSlvVu!)hKu?aLU
zFSL`hqltkHwEKF@3#dKr`8x!(Sj<*Bb&Cyo%vI4|B-VziQD?nPY5mr!W%lr~%**!4
zW$w?*)Fzh|nbrRSK|sF0J-^3%CuBql%5*};HikfP8*4{8CVED0fQYP$3KJuMk&%U-
zk&zjej7-Jc(GvI{9F|NCXzyTdW6k}Cgor)R!0{auHE?_vl(n%2NI6*om{<Tz?A%No
z+>DF>W=2M?{|MUHa|1*ToXw2^vh)Bc8*88gESZRnt((2MshQ)upa1*>P#RGIn7Ft&
zX#Y|N2w4H`&5aDK0kQ^;W<aZVk46TT0A(8^bD*Q!|CpfUHFI>d<z`@TadDwHuyUZc
zu{Y(Xq6N5^JDLHMfDS-=XP`0Qcgg@c11sP^snNrd0aVP)9sXu0+n6}I7}x^=?|`Mb
z5zyM<UB$`T7-$c8_YP2&lmW=w0<HfRmib$N7VuAT08I2u|26lI_HRSx)_*A*7#Z1E
z*&0~8nOmCzOw28T0C{m4dPi4BT7ZGI@ozx`O9z{GeFJ9$b4vrmcY(h`HvotWDFO`M
zGyEq%2P1oPTSo_a2Xo8cIWqh<^Im2#Yhw``D=VP2qXX>k_(aX^fky9jcVqacxnHbp
zT&z9*Lz$Rc8=L%2!`R7|LDkya&Iu?f`j5;z1p6;$3UmapF)}i8vM>XHb^xHOkr~5p
z2P$s1z`vMGzv1^BygY1eYyl?kX#l;<O@QxTupSNu&Om^py%W&O<4?x_Qm{-+0Aq6_
zM}Q&F)Z7~OU+eEM(ByCWz5n*+t^jSu_wi!_F#i7h_m}Sbgc;jdTe|%#{#T0`gj8hJ
zG}LJRN%_A_VPP9rfCn8bfR34k1;EC_0bu812YCIj6$JzHf1>z9Q_|YR2Eg@qu<up+
zPr%OqOaSFS>p=zhUt4lE?{f<TQ2smSI*e?LM(=-^{yz`<Un>9qX8gC6|JO19zY&Q$
zSz7+Zr~F&s|A%j2Wp3&AkHq`jIyt`YfUM2?CRqPpQFY+oiz^E>Hg~f6zg$U2gZEt!
zvNpB+Zz0Vc#LZoS#tP<+MrMD_#^12&?>)0Lw+1TMIGF#wEC4ztM#lfcdtWZ2FYnjG
z;e90kVgcWG=YKtlSsU3H|6Vd?Hg<r4y}f}OEaUqmF|)A&Jec0M(HQ9Z*AN33=&fxW
z-&Fwb*?9p>Z0uowx0H(mz+mtj`kP{XryPvT&5g|Mjhw9hi?cHS#@~x#?(pTmAp37&
z$A5J>0St!!rI?sm-bs4{BcLVF#PJW9?H~B>!T7H-)8FtHpyU6@bFusv|33;`00yJ~
zQf%+OjchF6i}>G&S$`u|R{!SqySfa<e~|Akf&W@yf2aP(1N(2jozweB{;Tp{29tkP
z-eWT{cmBi1Z?=t-{T~YNBBuXZeCM0~ejb2-io6GE_HWql<uP-!H3M4zA@Pox|3SWY
z?8_hId+Ps&$o`(A<?ng<m;YX<e;dj4ei#`3waxZ!%lduJ{@}l7ZS!w9@5(m+!+uY}
z_Fwk9G28bC#QJ}F#>(^$`oA4}zdH=JK>PQH;tx^wzc6!~fBVAvUNT!tr++hGc`uNi
zlZ_+L*zk{5FukLHRFUbuM*pg?{icA<e-x4JUEJaQp#Im(doT``1`cL_n0Sxn-^kui
z6@#OhJ@Ak2z9;MGV)KW>yA7v*2lCzLUvH;_k&XSI6?^ZG^B?4UMP2?_hj*i{e~|B9
z-TolotL^@8%<q=mf%bn#@IObu$jSb_+>U?U>GuoqANsFn7YK9(8o@5j+Zgc#TGR%%
z-&YIay3p-T@Xe6zs;5xVc`Vs?I6WdlB~n#o_;1+X3MCHrAg}I-QQinH6TWyHx70(0
zw8ks8J$XLs#VSqjw!tn=VGK;>9}Cs><HO+5sR-_QzSw!H`F#Oz0qv9`%eHgkL{!NA
zc<<6D;ab;Ux-u3#zpJ=c#V!r^T>679QZ+)yuV|Jm$MD-67BQqF9X>QQ>bWb@?9J^A
zYS!c%p;Rmltk+2-OM!>hCUexoocmh23bR8OHYv6i7C!h5>ew{7hwwp?6y}0Qabd&;
z-$EuM6+OttSU;iXXeZGxZfzM!R7^%o`z0=kF;cI#ItD+DUU-;{U1fb+>RQ`l#04xG
z6Bie8iCp>S!oJUYLm@DCq4qS!vd+;^a&8`bTT2i(n#VWN$G>VPwcSUwO}1fRx6xOd
zgx5r0LhQWIWFhgc_c*&e^DWX#NNYTLEhbNFXI%GOl{*H?#7><MVWIL9Q<0EA=BG7s
zkPXM))O=*sfvMtZGX#Or>88>i#$FazU7@$T*n4IDl{2Q5!YTda?&5;i;AWXHSt!Y7
zh}uMGVHEpI3%wi+F*S@fo+B%Nez;V+hWnmGP-?}vQgFIW`~-AdHe~;<Tdy?d$ry#i
zl&1Fan6K%8|2Pa{R_j}cflqT&vN~Rv+zndTCpJxn<sDwF+JXFo{G5>fcu)*nvGD@!
z(FyYGNJ+RJ)`PjbTZD+8kV!~bX0~5UnnLBYD>BG1grA2;E~~_Q1Wc7urAz8`zmCW7
zD$gkRH+Q`uMVA-w-IQ)?kKy&kFX>j3c&ChM&2(!mx{bIFRYlZ>)C~HmlZ$G{&K!cI
zlcP55w$AYobK$iGDR@12eUIJf?435anYW<VdzpyLULO5K*sx~37y0?<e!wMqct*Bq
zhT7Gtc@ASwvoaZNjQ?a6f!==iV|k3Ni3g)-mnXYm*?!jHHy+(1hN|yoo|9Ab_eDxV
z5bML$(zD=J8J4%7SIN~^eKEt=20aLxa?f9^#QSj8&dTt=)hBo2F3+V7v&4u?drM*j
z7jOt-sBj3a%09^}AcoQNM6Ja{d>WxK57wy3<ngMFyU#@1v*_2<xDi`$DkE&qV;gL<
zU5OEvY%ko(2~!4OJeOp1*eZ5JvN9TheoCF4V?B4$K0$EN?C4EO%iR6O+^+37^<#J>
zX*n;VK10ClIyJ1>Fk|bLdI=wl)ECxxpg=#26rd0UAC&%;+Yg!8Kl`GDDyqa&b^2_e
z<T%0v6Y`<#qi93E<2~}MadJ;lHeY#q2HW^myAKU+WB#cHM>pBAx!_we#`eb{6e?`c
z>CjOX29PPcP#o3ODe=RayO<WH1V^z+{~t|jxq=AOxxSc8*u8nR;-8fSlIXrAe!J``
zw8IF5`kX=Z6ahxfWa@Ia&)k*6%21e{)y&c<ShlTG&EdXc;C?jCgm*~wwTXT+_58{&
zT+G#9lcIH~^zMi)RNTmi&)VWU^LED^<h&8uL5vF%eB|@}*a3^LVV0zBNJdz<TXQXU
z)Ywl1llh#I)uZtE-J^@br(zS+6S7X*oF`uM3qpAUN(zsX=(*ZaM3=Hx<vnF!It8cb
z+r{WK?sgsX<V4q=7r~u)g|F~jvCT|mYwWL&pCJOelTWn@vmoJ)m)<B6V=96XLFGNB
z#Z$ia8f>Mj5BWXaPx7#>W#lVZ3C#}CWUDQVxUY@m#ibiz5QbpsvwT)Tgw0ZLh9$?T
zuHNZBDv#B@Q&LJV6)6yb_*}*I!bNf|^lOVoU{Ls&bbob_>PRrImb-{jonns`JOt!b
zxL=|a9-GcSZcHU7IB^a`)7%#VDNva9%Qx`ieR5=<i83+bbf=59CC$yp68$rNl<Cj<
z1R(X4i4`*xkXEUW_;At`NYdLg2o(HTC>dg#3UajEvIibv$}AGhN{E8&I$4_nbmw!Q
zF*G76d{+)`e8e&6?aWeoKip~JW?ep6`WuuMyNoT#RjS&K7wK12TyLkza@9co)I*ap
z{bA@rw4Dq;*6z^fH=^<*0i;5-3*69um|NYBNY}@FAjhRGUjOYNzXSp*f?~Hvf79$U
zWDmTGO0;B|^{1~u%b$(8C>`T*mr-}U+4`t!xAQxGDZaN0YXFL30^86HL`pu0GEJtI
zz1y;{*{Yq><;4sq@{b=>V9E5!Pw+efDY2ycx#>kt%*Um<aywl=_0xTH-WGeFWp&kK
zBM9A%&R%ML4omB!z0NE$-nL|;FdQ{pv!hM=bW0DAkFP%9?B9Zsle50l>XW$q0A=Si
zPS7-1GcTI}_Wios^yzTcm;oG&9?IsIH+5e}66#j#cchU{Ae!y{Xag+MX|0zHlCLz#
z31!lmhY-$`gX|8zz5oCPKC@xe3me%J^)dq$1hx3{kL4oDr)8#2=PX%Jjps!nix?s`
zh^v4vY<x!p<ms9@C*}l8E?2>$o|wg0--4Y*s=^h3?mOZTHS3Xw`r-lGQc&_lU1hKC
z1381ed1CZboOwbSj`}po6E;UnM+wlC6$rio?r8dZM4qoc6$aFm<Hq}{a4`^2RP>IL
zcxHC73$LLB)QsTScBj6rHNI8+`+;)kgQ_t7ECYl?(VHAdeZE}sj*Z$Zo{{Hl_Ke_H
zEw%%TuTP7dP#cNrDR_3GGW{LxQ68(m^lQ=Lhm2hka6K0S5hSGE+!>lO4n9$Q*CGx4
zx&)HCC_9nQ8JWKs6+NJZJQf-Snh(v(vEflAW4n&hd#6T4_7QdK;LFG=OF-aBs)|ki
zvxv19D}R&LoBdF=N1Hunx2o0~msn}m{H#RfmQyS%*E0X6z74kB_i+aQ{4MW8MAz-4
zXyqW!3==NGg>r!|jSG6GAz@b&2}7}GdKYz`r=V0D+AbK(0%4giU!)s6ptww+O--L^
zK`jqG%gEbteScsf$_^P&=gpGde#P%}CRuIdKjBb4F*@`a?`?!C1EFXFLkqEPmP49K
zonVSc+b>jKA1Hf;Dqeb1TZJ!4k#mBi;d)y}V&y;ycauP(obGwu*gz=uL+I%WBd%~C
z*+#UH@6k)}EH~16#d8<V-wP0I(()A|T>Jj8DQJ)fbWPgDH57>1=O>;(PXv7%hSKZ`
zIy=pn$NYMzQfMk%Zwc{nDUwU|Mf^yrN`M$u+g7&+Zu!HA?SxsxVt4jLS6uN$uqqg*
z@8n#(d(Qs)cbQ;;I^RdIc+6*R0_kv`%A3a=^*6FHjfB7(2-3w6ZcM{rF58`Y@z2I-
z!V?7ZQc9Gd?AmlH)W2AXG9stlG(q$!WE);6!Qm>PtA35hek@ZEA{j7=^;ID@!HXb<
z5CGmKziJeb$0>d96B|5+(=BF}@jVLe17U}NzYs8^;v^qKXAm|(HVL>*iYLNsWejaN
zcSLTI<?Rm#*I)#7?_k;>WhX^%@_O=9=iOkk*frngVBdcs{mxng1g);G1MHW`?9w_<
z<fu3)9@w;SuU+z8c;PpQ)sGfa?kJCpjx*B7cUDC)ty94nKVgIikX(&OLXPva`6}pQ
zqIL{7T(n>)fp)$oJ|85uN`liS5Dy8Ye~b6K-7yr-nw%JVUf}dxU6Gi~TwgoW;c5K@
z4{JTJ1!=?_x>4mys4)SFmd!*Yz`cpSh0>C=i;Uq&LeRU3AwbqLeV1q&7^d6_)OJtH
z0*-?Oc>zW?%&3wdMNi<tE$AMDpac6(<OA8AZ`_^oV+0KYV8Z)h%w6k^2G;cLS5V3A
zyY}%A5G7BJAW@^SqnBIs_)(z!5SWyYl#M@JgJA7ylAZ?XQ|0$m+)VdjEpUGb!#M3s
zMA3!2S3=dD5|^x0`Qmz>ZeePe`6=EQnx2vywtM@mECY`|VhSvoEZ^3yv-FxD!>648
z^P#&d?Xt(`xN#$0`}A#go64r(hL)S0q4jddPw^_NA}^B*fJAwoDZ6byUCswmG`Hp^
z;ZPeHdjww(K9jD-a#9L__<g?@^-TcT19dne>SO7nqp?FSM!E>7oV!tDTUybRKbwU2
zxX)rwb0!dQ#2k)4Yg~qh3L3VfYv|xk(K*DuJ87UUR9~9P&x%C@`vO+T=Cr{lMWAx|
zSP}mz6djbaSaYih!@Bw7)TGCA4V@oS`~4$yrsLZWFNNom1jjz~+#)I0x<t-}p&2E-
zl`VQq9gh)xr?oe2^iW*FhN5e-!xonA(CFtNjvk#kdu#t-IVr-1qu21s%hZ=_@rp0y
zwQzMGF2BoLbS8J;RBkNq(bPk!199f297<&8)RM!gO7ByWuDUm&N~JC|)h<TUIqOfl
zn7UsejY*_rm@ouB%GM7Fd?fBqH!K=c>_)^p``%(=)HF4dWnbS>1;=Y@=^c5N9LuIz
zB|XXKr@VWidu$9V7@F^^2gNq`$zjv~>1Gvu;E3UJyET05iyGdV)1K*1pM;m3@cC0@
zFA7%6#HoZ`!H23&J$cEE77pz6Nd}LY&k*izbjJ4MSgr^rxH%nTgpa5kQXA9r7x#e+
zGhKJ}%HK+MD6(*H-94~djtv_MW{kcl7(#mw_<=0KGUn9u^xhYayW{Ri&QuTuBB%s3
zhbxtm&d?SR_!plrWFH8F<ezN&k&C53a(36H4nE7mxTxM`oUkS@F89%9Y?>E_XzFjY
zPy-irHEAO4u4QBMshhrex8ioWueS28yZNc^iA)IwCu`;f<7SEKwvKLCIeiPBV1M$G
zKg1TJS21kRLYZsvPD9aS2~*3hKgG_5tjA?lIwX$9$H=dpYB)OILPSI;u?dz+j#BR?
z??i9$RM(<>2t1nlBm*-nxEC_PVJzR9w>J`Kryd?`5POMTwOWCh$?bqi2IWjZO)Seh
z21$_OcDS%@!6hfD^C4w`Ez)dm4T(-CKJ4MA41wV+dN*yAsk}7>C;IU7P-#Ua5hjk<
zY%m26{%a5u1|Vc7IAg?z0{hB9YXD%mk5@cXe#`MdoHJqQtMWrIiC7Ug!NqpMA{{+`
z&P;>pu8xP|rg-W@{pUhEB;HBNvn!v+ZZ?T7xIiE$vR;hA#Ai5MLOmGcWE#sd6=Dwp
zbM|UIXL~jI6^z_qXwVoF$fW%&|5CGn4O50x*S4H$x#?d6Gpfu2m!=}8Xo`MiXe73<
zUYe3n(%SwH{h_qqXYPbj&Xg3!2%!|dnDgGLtAOst=f@yy9_xoXPRNJGDGa1pjz!<9
zcu?yws=$pCkPQhh?`F$o!`BLYU`$B1#eVzsDsx_+piy6gu)O{tnqVLYxyx_hhic!>
zx)47ni2Y#vy)?NyVc)rm4XqCBs5OHPh~6h^qD0eV#HU~m_U2Hwq7lo9|C%;CWn~ra
zo3_FuR#7E$)Tf*A$+JQ^zNfk-qzOQBGt>#yS#*9sieF!}yI<YaHh@%WxsP~Li~Eqe
z0~oq1z6?}mN5pst+dBv5pLTZ=v4HwQ3u~&FBBww=E<@IMVW{M&7^F;u`iEwfO@f)A
zr_Y*Z6}HX3o@*59(KseB<uFqT%_Dkj6yKli7zHd;XUCQmyDLyDj+P`VFDUIAQxoo6
z%KgGzj5hpeF&t8Y2IkV?q}N{rj)lj#fyGjw*J;(u2hMP?;iGtQUFT$Sy`2ktK_Lkv
z2a(&hEinAi&Z6`pl?rww>NsttmL{_?M+kLr8h^?@ce|~Go&3{Of(X~VmHh|LUJm<O
z)GLVxGVYK|Df`z(-gU|{Q;E+#ef4$TRjvw$ke_{}x7TN}F}pW4A2owSN8^d_n-fUU
z1WcMZ2$Gl6HoQ<;;xtBo+8l)Lvpi=|Y}2R&0X;cxyt7>Ixx~N6%|njF0}|1kV8_l<
z63lm?%|Kwe<Q!43@#BKH{M($&jrJVIe@gkfz#3_JHg=<ZAQc(Xo+dC4TL7L1!2KY;
z19__5{64^g!{R9v>n^^f$$F~}wWy5uX(UBzt0?o3%KHtCEThO!yAwrTp!cGm2R%#4
zC1j&zP^^M2Pk=a6eb>{MXRcC`{WKCRK?96J$2jaj`r+Eai#Y<@Ls5vz1xM=05lUk-
z3f-K=DMAUA1xzK~jxE;Elfe7<F^>0;`KgNo+fc|W$S*4ur$9zyR-S$Ti2<1cOrM)@
za`vJFtiH28wwRz56#atJG(r-T^hSa<@oh|2-_1+ns4xR_M1aaDZuP_ZRZ`j;$Cfl4
z5vTA-N0cS#F#Re#n9U8rBu2xWQ>ZhnbW@<Rz&N88Zz+EJrR?WS8!@o0`LhHBUX4&=
z1E4g*XYawyCSMvDBE;{TyjBt|&XI1Wj9mOh^~3~BhlRg%Du5H^l8x}n4k*LIk}6FR
zbF<Eus7t_uh8#i|rM555mZY8)=65HIcDCmou!^oA)AlwypB@!M>r$QFFZR>>1I~e=
zOBbxs`o)7|z!Jl|lvaT<HHs<&*K>3N_#W!5aSWKj9MCrJaM@iEk?Y*Al?gYDKZxzq
za2?Voltn7{!1#K8xQ)O+Y0o~*e&!{jD`X<AW|K)+mqkI5q~m|Zm`jj61z`#?CdojR
z;yNWym5c@tBB=pP_M1j+G#XL238R406jz|HimO{22g`a*Drq{U8Y+M{`Byln?HdW`
znaX}))3WKaSws7YSDaNB@WUiK7~C6k<CjDa|LpT(!|`}1Z5DENdCHFImtkEnG0u-o
z0^liGi~0tkat!*mkVQ!Q6WfQ(%;w<5kB*}-lnmN^f;*l(B83|ux(4>_J98R02!ZO&
zv|#Nb;b9;k@));Y6;Hlu>KE_pOX&$$Z4#Ij&DvcYe&mEF;u)sDoE*gMr0(mKk-aM)
zJljt<>`W+>mBvms@G!KIPM6%Xo>mRCX~uP#GsVATiR+$=^o&*dZig>`a$ZL9Sc0?%
zs{A-_BRQ|8(+9PpYd)`HIustMt+%1opm$;sj8~cWn!uJ{RZJnr>4GF1N(MzE#_b#d
z8#Y252;MwXdRF4I6>m`Jior9f2_L0jX9a&(Gcf>+ygWNAAvSc_G`HiY?ZeTs9>MP|
zPG{Nci2zQGQ5Mvg4lLR{6{5ET_()Hb9-7{=8eUh+A3s!OQGjntaT{5bvK0E8FvH5l
zHE^?th{W*7Bab)}^ZH;6)d>l3@paCI7{twB4SAqFKFW@^g*D!id&%rsJAEtk!75-~
z$x_oE+oYk-)eAB_DkgQjEsJ3~qHpvXo*$g)Hky8}^7vWC%bV_ea<|&NfidPje;va6
z2saLLku)@ww+6yBW)HcJ)!;{*Vd(GJm^zX|>UKC)I-jF<0?<`Y!s?m-G0UH~w!;XX
ziE`sZsE{ul*Q8F3uO%`(kRB$Vd8F*;3v<j^cj!B35RKXDKQZ6c3c0HsSi?4vct_{O
z@zo!Cb^v3Et~;F?vsqH4GTMV_s3lk^EM-HhzwRegCAPhMETsfA9FBDgD*Fy0T6=UZ
zm-!Cch9QdRi`-o;ApwPpOb-GA=q@d4tp#!7A&9zjotx~0p^0;POa22MIqh8VC#cHc
z0OaY$b_s2Nlb~OH=;ZMw3SF+v_2iPMSZS$m$NACbe3VF|$=L_)-2U2LfHcyhNB((;
zY2EsqeMw8_N;ie;rDx-a8Epb^e@B#7_i2}(Jkw?%Mg?3+Rfihz%A6;t9^YaS=e}u{
zOR;`IcW61MAhoAScT9T;+2f!Jq}s3|50_CNQ=CQEk>QA^s;D!)^=<MCUbu=Kk8LET
z5AAAyGXaDmi>1F%Z^I&0W6F-)`g3#ORoz(NSf!_c!<P~@P!vLRqOr8NqV?hOYTE-&
zf0jX<e$B={wvJi*oa)dz%$@Z)S=OVMODw*>32hn=Y>abyO?bzVcUhg}xK7<+1%@Tg
zs|VR{&s%p=PUvOYm8cN@mr24o+3SzgzQOxym!2g&1mBa;<KEFa+Qm-A`JIS*_1^OE
zQqa+?4>4t;Xatprn7EFSh|FibV0B|(g`?7|o25kc_VN(83}N6#@B{d1!(2+7E2>5}
z8ONX1bao9ImRPe?(3$-tXo|1i#DD67Uysu+rS0nal(=Vm8$>Xr@1=`O5ADnlNU2sV
zJ21ddESGg*gVyw%^v8b*>BP_;lqHem+$g+^qblYl>fTvJl+~fb)<M_bfmKZ7L*uW%
zva3=37Jf`RYriQX&);9&u^r)vD?9(f5Ai5@RT`#NF-Bf+(~!WSyw!C(M`l(b|E*k5
zWF7ufGB#&f{7Q8=NYRksmBFkq#I}+&CT<5|rdMvInH)OTu4Hmi!GK-pzQc{LGE1P6
zb<ac>ni+5M`8JcNZHKq<iCZJRk@t!pQIF)~#?v-HfcdtP>;uWsc$hGTT0CpY+0PbX
z;}~K#cK%1+P3`AR&-uClF^D#aL77p?`1QxTgP4@tTKr*fomhO3EI2^=OE=#JN*tGL
z?BEG(ftjaPv*E%(sa37|T4GR<P6;UAmVwC*_zQ{iie`tL!d14BaBw_TGCo&GQTi5I
z95@-0%7m~9Qr1sWs^#a|%gC$KoC3)HA^=j2Q+T=QmT;%2P#U=X(3=dmNcsz^Cc;`^
z@&?P?`#T+&_$|A)@i+&f_2egaZHzh;@GO}h^B9BqQWpGEwls@Lu@DWJ?ws(xw+7Cq
zk>Fq`!fU<jukG2_t@(3HKsC$f=D79It;0zllxS5swa__IE26F5uX5sKjIj=cqbQ=A
zE0IvZPgbE*{riR{LXe26=yA67Zt@v0Vz1>?x)_nC=948V>CJe4p_I-w4zEe(S6U^3
zb|}PU49h-99_^L8Xuj0-&axp^=5CvA`1oJSVUJ;n=NZ+nWQStclQ7yic~Q|dM82v~
zHDQC~m6w>t?xbP0@mz}!Kc1g@@8WI=SgR{%yQ#>%*-2(Z+N5_P1nJjvb4JPG_u~X1
z*7$ttS3s2$#n!BpIZ!OC&r1J=aSp{8K2KAG6Vc<p_wycD9=#EHW8wQj9QmQB?dOun
z*8~jz^N|iH0TSn(Z}EIj{h!K;%bFOvl9)9E^sCRd;)qADR<-VKn%=^1ky3p}d3t?c
z^*U^r&fgwz$k~{q1A_{fDN8Hi4X19TYII&s-I|uR%}|^to@ho;AeM^so(6Pv^d22<
zaPV$@Wl=CFLR}rCzycvxu`FAz#*ZJWtP0%q_HR2l?|9^6_;*E@IiL=}(nt%!$>o=e
zWWEcwWs@m5%@#Q%=FJCF%=IJ7>czl-z&X)AzUo;r+=+hW^Y{vrvUWh92M@#+S0Q7)
zB)b<J?`hfy{OFx)a|{KDY{zjRUi{@tCvbJxEyutZ%bLvo+9rcWp$t=c3Aaxu!*B-e
zM9}DGn!6b7K4A&+5eqe(Qcd)$zdj9GwerzgI$Be)|4>MphfQv(ufBk*^v+_f@UL45
zvi;tCuj?Mv)~nUJyKTu(C*Iqij(xd}9^!j4gO_STp87KVc^DogF7_vD@*^W>aFI66
zu%Y*{F8uczIdw-+0V+#Bs&m}|ix8!H_ft>8eMBGoIrl@(*zYJiP<o@!B1iR!tKwMK
zHzOKp?`dDQe~`@X>ah80yOATtr05px)?V{%785LkS2A+e$qlJ7GoEx@8g}*1KiomE
zhin$rS@qog$XAmiQTWkM^P08qq6arv`NRDS`(DlHRE(1lv<SpTo5v|UotbT7!0uZ6
z=mXNQ^4GYO-kT03TFA0N-uYoI=W)zjEs%z{jbpnS_eNC9U||#P3ccbfA!kKlWU+2m
zIfJx7b?=xXaXD5|OjiS04jKrKt%dx@4$a;@8q?UEg2FJ19f--NaSjC>1m5HUJW7j$
zkmL*o7385R8_+4t(JWQQaI1^vddYT{Nss5~Q5^vKkp`Ncm$|~XaRbVd?#to~w`g`~
zIY8_AO8l-bmz2@S4C;j3cR!uA+b3?WVH5%?NYYg<kCheu36_eh`X4Quanc9$M6fVU
z-t?t5rrpJwUoI?*bY*p69TsV|*E8pAq{JBdZ3MYKA&?CeBD&;x>E0%yhNxH(fD@@^
znRYE5$5nqZTd_jy9`FZIR+4Z(i|H!8hI|qjwl#_Z^b?Q=VrPC~ZPX+++zgWUZrQ5T
zI(v!x3B`!Pe7(6VtBl?{3Gw#ExtSK<u+-IKL%K>VJyJbkYe7K%UAMX5yXgKeKCc7c
znexo*W|pN5);BIyR&cN%?ISf}lV#3<dHuY3P-U~tQ4RZr@fji~_lW(b4Tg7_Z2A7a
z<?@7b*_&XCEi%h!g6j2i4=+L%O9M06+4B-Ogpt&QWIu$JcD)C6AM@UReRb#98>1*{
zep<bAn9BFfhFAP`wWXYJ_3&dtD+m}8x(LlXve;@IWay}-G%S7P_)Cl6)x0h~LLw(k
zZtGWKQq<>MU02j>XDB4uT($GYy9SnQwN;n#IK3=W)Y_;b$Wt(4m1q}F8}X%;tgBN*
z#@_24P?%-wOoyt{gzKN}FtfkTj3?lUTMgG$%hQH}KpvD#6(W|6EF?1YV~U9?^H(al
zW-hO+cOVnDDOrHrB1u<vj9ouNmBbs>HdRG3?Q#4C=JLTF?g9s8vFEpNkW**Vd9lRb
zk|wXS=jqhRkbsO97$(x?+f*&>*0es`;>_KlEbb?5Dsv#i4vOE>qD(w4pvK>P8nguZ
zY*t8fMnD*w-a2~;yqFQ8-{e&%4_-?n37<G@hH6^MU2U56T}kEal6uZ^<vu2{0AfRi
z&%F*>k0#=*BhT-N<@VvVW})q<@u!8jvU+alV;8VNb?FjosRC|3k&MRw3~i}i;}UP$
zyBPmiA5R-=A9Z?2!n)1vV{n&Y{YL(XUEik5jB;9dYK}={s#fka3OmujLytDscyx;O
zp>;~}S9H_qCzT6+vTSRRj8w)r5h>RuhVeM0UV!<NYGq-fbHG%t_ovBN7|;uPV^s(l
zOLL5_IrIzTAD1g`)krhNeA#W^%kJ;bSPs#{1Q10nXe*kYHudLE$>wAgR)HMX>0q40
zSiRkf0jMtrdb;r+J(b&?XjH8x?>2-cni1q-ReNmiRmVON@V6(>w}-$KkE*I0R9^K{
zbc2IiwuXA5iBr6tbn<wYFIyR7O(^cH0ZObxxk%WZMJHw8z9Pznxg=HHKEiRY8RG6m
z{ru!|<+DStxV=&f$VES!)*Fd#PN)NZT?~>HJD}KE{z=41BqCHS-c@OLrvnGKk7&!B
zk$2qXwRI(&SAha{WNnkT{&m0x$DKRl9x*NK>n%y&8_x+ZiPHd+ctu~y&DPdA_3-mF
zqg>Xf4j`winqt?JKQ#5o!ddzET#zaKP$@OLw5&-Hl4MWnKx4YL2xG?R*0=7{Aq7#i
zEZAez4-0Y>d)Vf2c*4fhcl%=BNu4(ud?LqBCg@^aSa;Q%a83pHEeyHnpH4@MMElo%
zq$+Z2=|yw36{C^j5e|y~Y!{7A8+`P~H&o83`w3reAJ|DSmP?=0BLl5w+}-$gQIn*t
zbm(DxE<$VeiwcWP45kuG5(_nvZ47!*friCDh)Cq)*Bw`6kes3m!M3Mga_5?GabvD=
zLbE0$LKJ=>c7=!uMGbnrd(EP!4};lU#}1lB-Fm_NP+%k<gq$>q=FbJYlU>&u@g=s~
zS;jJ-1`IjXl#*8Q4cQ$+ECQNGyqd|htXP<K0n8~PV8>Vxm++S7Ld{W}_5`jFB;BVM
zzBbx8d`h?eEF<~iD~Rc}m+W=ULw2leCdTU&;5<R@^zdEZbl=8(FZVuHF`^3Ed{n5O
zxK%y<(Y|T-LYMO$R%Qdunm69KQfQ;5|1^f0Db5PBYfk1x;ZO~KPsZB~f=woh?`Wn7
z_9d!}QSt>DnGtEt;B)^7tmVRnE#cx}cym=+-kg(<<nyGRQzvf6!Ui|Q$Mtqarf3Y#
zcFWkKC$P(y;Qi-0ZE!P?l5ph+Xhwqy*Bw(+gaG{sB~gZH8|YMTu@5~{Qw}{%-jWPy
z8yW_il|d0)k>(QPD}^)cDvDQuS91??LQ)gkW7%<#D~GP1tHHpkzfcfWUtF9<3oxE3
z^0dF4f7C)?BUbnPMX13$qmX5aRq3=~?2CP2kA$+wi#ltr50mf(r+tC-z{4^!9P*Bm
z!0Eu|b?;$n=QV*}P(8bT-2{Ygczu0QKP;ab!`0Qw-z0AtgavA?Hp=XBixh`&wzT;`
zCxQ0t!{&X!+@hO&Y21S_FE^Ho<7Vy>siU%c4HqBbgt%HqAttX-4QATS>5FY3D2cqd
zD1RP8(B?CDOiP>r?;B2B0a~5@lQ&zI>bO={?_;CDFx~ELy-VI(J_s2f@nP7*LG3b0
zM&xpP`i|H!k%N5r*Ca}NDx<wwkFT!Nw!NdiUxJD)ggeeN(<Fk?OAg9sjV@~_#;qfH
zF;!fB&SNVw7+;q1bOMQ_h@F>Kog|9gfo_PyyiTv!5IHZaA^xQ%!Y<r~s(J>QW-kWD
z4w;g7LNO>?6s`^=P7S4i?d()!Ueh~Qx&<P?<LRuY-M~DN105I&w_sY`05^->hmULn
zY)h`SW#;KVU$E)v=xOzPa}T3Tcns@3;3|m|jyL6{rk_6~yEA+wJwSTqR2pkh6D+NU
z;<6(qIC?g@pu4R7G5Ab1*%mBdwxK8AJSbsjSuoP2E7l6T;F?RpD+pm}1Yj4H_Ek8l
zE5g$v<VxNLRW-A~4=%NG%T!H+j~(scAIC=o4u(y9xno<t8(qDuRrF&OwxF^#IF1c3
z7XHNzLMG|h?|2?dWLH>*tL6Iwz`Vr?B|2vfnlW0giiTqug-)f`J|{@iV9h;#c{V>Q
zhD<2Cu*6EN=;f#90-aEw5f;gdGiBylMM{*h_=?`rcS`Noh+1K140U+R=|JMuh_{VS
zZ?cIet4KrGEpLz2mX;^-L9&M@s;7op;rTI~nK9h%SNJhiABRWIvX8h00rd~6IqG2-
zmE6EjZZ@ztd?Pm7?(P1<-fL(la1`KOFhw9YXFBHx`A5j+b<UokTe-BvOUYDwFGx%w
z1F^R4IhdMlpLZAOtfV623mvg<_MJ<U`RG&v9Y0F2uli~$YgP8#xSZXrv1%YkE2}`I
z*aRzm3<0c0C_zr;?E44N@zO!p^+y^%6w~~)j|2EeJp^nh-|!lF&K{+6oy1j{LYs_(
zeJu%^^M@E}!;Te<o#%?Gesa8*DV}30@DcDD*Yywcp1R+opsf>?<&v3)<~t`f-$w|&
zI&&jCgdqVp;?PPBkhY6}E7i+OActj5Lb?Y-sdpeZwHGEqMoV#yK#qb89ui8I53Ge!
zxc4DfXFZ{VTn)NqV&ZuX4w#Q1Ef=xchK+B=z=Mbb76w<rVnvzP(V6w%`dvXr?no>W
zErYm$8H2p7rWqefUlVv5h9fBITa|{wawJAsFLYM8s!09x{Mot2OU<*G!pC`lc9ZN>
z&`AR-Y!~eWvr9f17lQnH!S2w?SdlCoG0RHs!!pc)m^3n;=={h(F0CXeJ{li2D7a&%
z{fn3COYU@^zcSXi{g`yM`rD~(<4m|$5#RAB`O$}RRaC3V8rb`jX4W(dxdy3TD;k5R
zAd?bkVNI(8aGsI9y(DQ(HkCxb5yUM7ds;r+T;Mb(&L?JB$KkFHT$|KX@*!w`mHCty
z1~P3`W1|6!&u@H!YK)l1>DE(eT4@pB!OzAW5nue!qA>eqTe(MO*o2jbp{GQKMIVEI
zrUu}RHy$Nj7CyveE2BW?dW&%jd&iwH#)2swlEr+DJNMWTDm{1_E%X?5VZGtM!gp3L
z<ADh~TmHnmIP&<Fof>dMAuoLc>ue<?$GI?hvIk1Rcpjznaaqxb24#cfk_hIhl=|Zu
zwFOYg!AiEZ1!)-YDcfQ%(=heklKSS0*H85{W~e?qYadZ;$%<xq?^*p6QS14O@|u+B
zWCi1ITV-HJdV)|w;gP-Qtod`tqc);e=Fo{u^J{1Fv|0Tv(P!14+LQQ$6?+N1#WI!C
zEs3pi_wcwr3tv3+#K+|f&5=7$3m4%OIe33>hI{2lB_eZ$jm?ZtwD!S-T1EXbIg*5j
z#@Hy-xneIPGWnL%gJXZ%)mn5sl&#Pyb|<f1k!bWYxuqT%qLxx|cVyA_YGpY|my_^g
z<FixoIFIhn*w5i&1*{rocNiN#;`x%p2Jys7b+9j_VW&=~AYDuCog5dlE2nCh^f#Ra
zmSRdM?x07t0o|UxHQpEds&T}v%4+_Wy=WXV@j=vzDJTPTh=M+@e3rJ8=32CQaNT~+
zzD%cBVTBQ<avv}Pv|HThQrPWxa5d&NDlhd<%I(d<Qmls_qFZKq6(1~W(XfscW<J!z
zvx`~fZIw6C*c<Eqij#5#DOBNu6q?QqDBZzVy}B8TB#x1%E}oLzC{qJ}LDb{n$8mK#
zN98XbT3yYZS|ph--ATq{MTZM2zb4<2#=6A#;mi@WP|_NX<XltRFV!*!7!4bmvJ|4T
zY`8Rl#uRN?CyogLcT-38?JWs-nWny7ACl|!nq=g_iI_>>V9k|6oNFdKkCuqUoNkUn
zDpWc!DM~Bp*%@?F{?3k}UtpEG+_r#nD=HA3N>k7_d{`Er4)VdqBN%FjLwM?V1Vh#!
zF^HIIvhbLSZ!D@^Vx)*t*DNf(Qw?GF`l#b%2X~m_*BWHhP{Fs|eUt7=zVgD^0w$yf
z7y}zvOh_TkW~zp~A9299U*uH}!osI?+2K8P1yr5F8mq^ShHPG|viYQyTdp<<sB5b{
zSN_=Ws2;5TY1aW1Ro}rUcDexKs78m<G#Nyw)ZZx#jXs;3YCUva)8tb=QagJJEKPM0
zd434;dsh9$u)bAA@~U+_AT01Rv9xn2vw73IE$wJ;K&uW=rl(m(ZPQLspMcxrhSBT|
zI)$>5IWg%t2;$JW63u5TP=ac7f4MBQlFmP=xpkT`AC8Q310+v8t7;dP@3(ClKB8oh
zcf)*ythEZzQ9&hsfONo6rp=Cwz?!?BdE!OgA3q>$nqJ%<18v#+F;ktehUIbzz2FeH
z4nMs~L8>&{F0#OW!o_Cnc07~8Yd?^^+vjC{M;qA!-h;PuP*31`5<>okdXiu+V)X~)
zc01o^9AGQt8cYudGfRwr7a2)R7<oli&!{9ym94UjS1XcvYUq9%{w4FXir0>F)itVQ
zTxyS|_m<#DI2+d$*mqk3<q92U>$Pa#{#0g|tX-<Q2FW&5U*+~JuTp&;3)Tj{3x`a<
z_yw~njC-10*B$voe--J(X*aVEfg-XZ$ymb^eo2Z0<Towa$f;>hCm)<)>Z4St8|rEN
zNE5s|@@EE-S&DHVkVt#h3mFFTF(+j@wU8Ra-DP!$>g+}^H{n8WVU=u|Sm-({!5n+I
zLWU5emD;fn=c=A6f<4jM<H9zHm^LF8=!0?^xjYyc4i}^Qu6?egOlxQa>Nzr7Z+y=>
zByE?~_sq{Xy?!E;W~F1+%Fp+4sb?h35ZxG&vvA@TUEMjJ))o+IT|bi+#ePGfM)bQN
z!>>!hu(J#oHAHi(`T{Dbj)NQ$tvYI$1!f?tYC#`DetXnm>qAFg98dm&5WNo?1mP3S
z#R@5*nl-KB_h1?+PZBWnfLCA%K*Epk)4*!mar^;R+o>1G97+J{My#b(*V-|jQplrl
z3BQ4q1>}ts3PGldWJL?jmdl^a&Ss~+YTJD!ldhNxwQlO|$@P1V$MfqI0w-UNhKDnk
zhD3{cv7+(d!_yXurq;QL@6y+_Au{$#zK%kLM+6yjU8)fOOgj;K2ySb+?2DKAiA=Nr
zUxH-^KEF#Foq52C*hlcVu;zwx6g^Xm{_tm#TyzCJ4$E+B*?@j=SVE^5?cLX20Xew}
z45?~coZ^&|I?PF}<vuSY(!hv-#eN(S7VPgXc%8AP2ym_B#KX@j-w%2@aZn9+J20A{
z-9A@q`ZLw-^?D`b5j?T08HxzomB$OD`C@6jbY!nO7{}aXU8C{;*f|L<f7^Vm^B8-M
zzVJR%R)W(I9vc55k?M@xs*dn>U`UQYeS@?ybw6urqsh|lx@(@<&cUl{J$tMhG@IDd
z8m1NR8i_pcc~WH?&HvR(#I{Uc2$?bz<k;Nc$M;n7TSD@Y&u0-jB+kvMPYS<?CLWjM
z2Hp88vV+)DIh2UQHzJ>@4oETKP>%r3E^0<xO<QqGK|}TV*ELzwrwsFbjoI_(&LRs+
z1?)!FOCTe?-J`C%w7j+qE+@al1k88$yiR3oBnpj3%^9BWN02zL<%Q9as`07J2(vI^
zBFp8J&(vY|1QitX=lAZ;_)+o3RpR=Bx7u5?4JKLp!YQjfo&k*<wUw;*)m9U*h{bgv
z8Y?I<dUZwZUaywLi_ok~lG+v{@kU)>)^HecNg?`4#f3<j(5+R2mK*Z-70hZs8r!+O
z4)u}3G{s~eT<D)lSFAr>@1JlCvf^tKXO`>PQRi7#jMB{S-+8($DZCcj_4sf(IM$`^
zolJb8-J+8s`G$+Y5{60nx+%hA%RkRLl66w%hi??;r{8ioCc}*o*UtD|_EnC7v~FVh
zq?mW1RD>^iWr4)+ROJ@ST4&&bMJU{aA5+pMaE;Z>)uM5vigGd(+_)9XA_Y~d(+hvH
zr>uv9RVPbRu5V#`KZef$5O@J~q@0R{k*mM|)@+8Ae>*z?*5<M?I9QWCr4}pqI(*2N
zmLrvQUDz)!_AqP#Zm2;}*ZykJXJ@LaTkU+8$_yddlCih8IoK4&8~%AoN2CmaQ{Ry+
zBKKs!gCgB}R>WL>rmG#<au~WH*ttY?G`X3)rMTJyHG-3{nMrPL4&O)l_7$7_d)q+-
zv)U;PvbT}&a&OD{SL*A|Q%kW~leqz{wSH|ODeA=oDeY+QwqYOEh*C~tFRNR8{b2Co
z335ftvwaIdA0BzW>CqwiGFdj&u$}2gHI^|+sqLBrBnj|W8<jfKhLlxJ5ap`xXxJ2<
zTA?Alma#*=JFPPinKY%=?E6wm34VYB9PuX7^3KzQB(NnfJNW(%q@kGat)Gg!s~it3
z>%Yteq%W)E?hK?G$*FOpXon5D)E|=|ZcOyx@@|){d|mQ)8Iwe_W_4A*==bTCOxB>o
zW3U`lypgYx`B0EVxWn5W5)FS!E2us9f>PjfDc?ig7B%nOhOre9<oIQ#@Z|?2g~|7m
zNX3WhvUtNYYw~U%^vP1&Egg$vxx}kYpFs*E_3w|r%9V!m?vLTic7v~R+dkBD%6o>l
zHTW>x(``e7g6T$3JJK&zEqt~T?g^}5I~kn!x)OG9chZHi^i5vBkt$MhLqXdwm~UWN
zc>bFE(c1VFwB9%Ztb4KQtDC&DCe`8<Q}Bh&frr3|j>{$T{f~@D;pgnIC6nUkxDP%{
z#orZ$);ddap9umMqF=(<_*+~se3>v0pu)>3BS5QJG)zh9oKi$_iz&1#Tn9rIJwYYl
z=*h9lL$k~LD-NzKj~9Jsr$~_wEGL0c$>_5+d7t82LOgVsF)SLmpsB()av5q}TLUBr
zJ;96?2wM`A*g-)f$7q+v7M^}oOb|vfd@GGhpD86eY(Y&wHlOH8pTXGpmKY^W@*r*d
zvaeKns3VVuxCu!C)wf|gD4V_zbJOe)I;ps0kN9GO76|Rne8w|FJ<s7-m!F!v+feM|
zu^Ksaq8=oh-!XfTZ#hKM0_hcPq5_Hfg?691m$)8dw)XyLeDq3%%8ySp9@f-+Tz|M_
z+2>6Zf<tKpjA`DaABE{*IHO+;*HUUq{y=^XCd<%1HPe3QWjI#$Fqs&&)U~ljNkzc#
z;~@NPqFPcrMpw9~KL)EG7gZtOkuT#$<Uv)9nE!;psUC^Qhzf5a_Emh|EfwNXXaP(Y
z4}#|DdX#_G_L*h`OYINoW{vYFt-e!Z-H3VtL7?KIn|x@`EYuILcugL4y_B0O_A{J$
z9QYXsoxKeGCzd+|oINrHx9vNm1260qW6^V}S&yMi_-~hEQv1rSP{Pau%BFs+pW$z@
z_r(PxuNyq^=W#<k1R+9&X#~5=E}u-uT0A}%xZ;#*4jY<S<p<uS`!#)|o`ysk_bGq&
zpH030?r`YP-AxpIq^DAR^MP(=4MR@Cy={XpB|tLwUW`fKh7+B{(T}CJ0F0b`MZCDc
zvr+$bjiH8aD0z}G`l!QPq6F8AjO~@bm?7&XU1#It_jEh#*AVUT3UURGBp1!Ls%L(>
z0qe+!8QpE8Y>S(olqjp@xr@X@)P?%Neb^SZ5|VBV{m1wJ>;Um^4`;gv!zl9uAY^X=
z?cZD-f~%yMa==~KEH*)rF(Hy@;g<w|1+Cr|sB80jJ=CSczTu&s-AxB>lxCi05w
zL&=L79)S?|Ws{%IsV#i+!Wk7`z8Hn1g9H=O!Emys)SlhJMUTs7<R`2*qs2P0RunQ`
z6C7wK2$;**c@yttS~n6pYV?|Zyp05OCdrAhD}$1-2;=fqSBg?r2H^;rk)uedG?773
ziSn<zF0>&I(vKxEkPq7)bF{x4id(~`dPZs$=d!PrFjry-q6h8cOBq}vRCT&r8N0OD
zB5JZ((a_F{*vVKdH7P1h4`zn;SAW9>igazoX^cLcUOBKTNH?ejJVf@z0w!Ncwjj+w
z&S|2ZkZ07h5-4bHOd?4$B5y3y3L$HN=buBY`a#$^*P)axw5L4%ftwa~fVLl>ZyWMz
zcDWUyreMLv)l*LHY_W6Eiliwe{1H-`96TeCH_((V(G}wI)pz(-UbKggtqs%8R%8$P
z^co|vRjbm?8%uI<N;@`O=hZJJql>U9Ls54sL-s=g@>Iu8VGpoT4@`yZi&R%UzvahJ
z&)|KE4fu9^M!~@jmb)puXqIigKIUD~hAeDEDWQS${j_eFy#|ll?;D=}1lBV)X^_f?
zbK;NSb#wY@gcE5r9iLc4qq6iWIr`!}zg8mFSv4sP2XVOT={C`$V#n&G1XnVMm7b&B
zrSC&&G<&#7^P*WgBDKZzXb^y2L0S|%K&%&nu?cQYVd*j$dMr<)HQhGR(J!7^K7+4i
zn94MSC{OE%h(5Z65uUq=*SA5?w94XReDFCfaeRgz={a-{mgAqhb`BSy8I+7;yStSa
zOp}G(5_&M34fNOhmMlH_Qw{YZdNnzvBaJ(y9~zHH_Nx*2b<89_1vko}iQHPN?hTln
z@;&?pd2UqBm+|S^)TRQ%mGwECtJK2BT@)%myZa?UUP0w`{|I?(X8sQlQ<kZv?n)=B
zT{o^5`vY@2dMYGr;C0pYoxZnL$%YI&jTpUOzd8AK1g>MIJrm268J2v7`<_&hp+7;P
z$dee)g5%O{Aq6|@iW(@><935-GYtA_1$TlNHuX$;d=F6qG^IUaA(9x}^4TO8&ia@S
z;l6BXRp#ZAJUrNV3&2hGWP#Wqnxx<2921`CNXjiFhLswc3hiLv7TaVm$+gOwr!40+
zh!quP^(%J|@eC#=JL{)Y!oXYCrPev)x4v(Q0&5`s0&(oz{3II^MrZgDt|ZlO;$e=^
z<GDH;${^hXZecOm2FtkO;pfWc?%ONkXwdTb-K^>Apukjn33kX9nBM>wS782dDI+!M
z>}K$T>|Sd1kPKUao(c!~R$00Fe%EV0rJtOKHzj=(jnAl&m4`kGFfn?Hr*AV2ox4p~
z0*NAYK72n;v9;#=@?BLDciBoBNV&b%zDGh*DwZVvy+vWu2MpTJ0g0#=)EUj|3?yA>
zgpf&i2S4Usi|*A`3?WTsE(}%J2!Qh_YfzN@T3BcEbFqb(Cyw9_J#b029u5dt+<iM@
zsRN=MoblTIjTv9gC19@=c=OW=Yk%HwMIrM-*KFF}Ygg7rDtzI#hCsB2{Ukup=cg@Q
zs3Oc)xH&4ELN$ov#R3m2<c??4PcvAD`GZ-BR4g+aDM!agOJ4fa<%|kF<HPltwkPpQ
zJ7TVoa#=ORU|tie;;Y`nfNmKDA0~=QO&yhb*Shr}GlR4ifelLs$in#g$9)Y|YMz|B
zpjXWXAw}N#7w9np|Kezg0l}-(2?-!3c;hT<TI}frUjR0;%V#K_VS-vi(9mz_;S9Mx
zX*nHZA;YhsCKp`Rj6W^=9wJl`aM^=SMw5T7=M9p&4SR>V9@E|k;T+p={M1P$9K2PW
zN5zvyw%c)(IU!20x-BF(f+elk;mTCJrBAqxpWf-ne!<ct1=AzI9o?(wOB_a36_c}C
zT1(dX<O$wRASzz2ykooRb!uw|n#2Uj-bKIls<XZVSM0rIa2-psE-IERwwTGXn3>UH
zW@eTw28)@QnbBghn3<WGnVF@x?3sCIN6fx)=e;<8?u`?%R>tZs=<HH{%F4>B%v*W0
z)x7WAW<|5#W(or(FH1$I==_v(P7BV?Pr&c1^BS;$yMR9jUpOto1<FFGS}~~5f!{W9
z<^T>IgPH<Yjr?7>1VH=)0&9J9XfCdQMMR({pl4+GOSl9UW;*tNmHjnZ0xJ_e!+)IZ
z*x?`9jtSSYf6<9POy>AH9C(X{fWp$jj&_bt5)iJ5iiwH?Vl?{_tWjqm5fO+6yC0so
zjk`>IRNi@5RMtIsp1(A|G~+v=X(4u`Gj}89hI{8d1%P=^15rn(Cn6#OfrUZ=`E>iA
zo<hj}0^PM{Cwwu0LIM}+EcgJ;@%HzgVr3AbdqzhDhWy#W2IT(@NML(f0BM?!fDZ#I
z{uw)%PaG7bt3uYoI}wNv7dR*zY6%rl8c+0RIj%a^Wu*>K5-Ka7sJOWAMFTtE_-9{#
zb)PLDYPx0kZD-^u=u;pd!m@0{Ujw|FHr!Q$c^UcO;A^X^V4aa<z+ltLfoTYv5JD@!
zw}fO+)2RAzk4g-D*r!lW3aLPZoSmKUq0bV)0*l#eouW{i!ka*KzMXQcBs4Y_P)Ih&
zxIpqVV25n{n_fQ5A0U^&?@W*c#00mrKfKDk*nPXbu>H!*5F@2^8zE7wV5)pL_<%Tu
zQ$b9BFyRBiHoSv_ab_d3-s`H+%>sXS0e#Hiz{sX5f&|)7d@SOLM@2gJ7ygu;4G%cc
z^SZZRI3{jm0q&po1uRnd^ZT&O=TLvuwc9np%LyAj8-MK8lZ!Yw6f4V{QdkRgQhq_U
z*(LDY!beE^FGQ~mqiDVm!N5ZP`NRQ0h$cY2Ys1o)DnOtJq=VLb)>l$+JlfJwq~2pv
zn27j4;db{04+B721b|HQVR{Encu_yNK0|*3F3V9i_5Rw622J|f@PJL<yI*-T_8KGm
z!<+mv&{F_tv+_X4Yo8UiR=5+F>mBZcTc6*>NQxU=Xr1@#hXgrolntb_7|5egpY<UH
zAjvUe^A>aGL6((6*CemmYfv%PDg<!!BjrjO%UeeE2|l#V+Y*9RC$ERu7pq_xV4JtJ
z17kk=?71<C!;jJX4U!Mu*pKV$j_cPBMtLV&o5%8lXPC>6!>&kK$fyoPaT<8UT`$mZ
z0<W`Dv|g;^?Z*vuW~N7D@ZHUN_z!b@lmb}CAk`CrDP<?G>fHt$JHdCaWs0tH#QBc;
zZ>o|wK*B^_cz`2W6i{Iyz^(pP^FvtM?`Z)*{J?A=9tI#bcKk0u-w{Q-b-u(P0z<9a
zfWK1SUufsylz9<CeSGd41`lanO4#sg72CSFx#;ufTP0NXuT_95eet<h@}V|CwS%>K
z#TG;uSYi#=w!Lv1Bht{K5xqic8kqr2j4rYZX}cKWEho3l;7T=Uzh1U1>w+;0U9f1s
zeb@V3#M?Y2$;K$K#{FunQt{lQ1tRR}Cz7_VGQJ-5rneZ8e0F@QHck_p-^{LPVx>iK
zK8GK=59wRPa?3K(#~kd55^*JhDV+L*E?H%w(Vm&#78JJM5jG+CG?(#xd)FbI!TvE-
zjXN@F(u7u=cw)p*jqTZe+CHb34`YshK-fgU4gJO6PB*?s!jn@h!+5!r?n2=q+zoWN
zJ;1phDVj5FTfO;>sDO28j)4d#)^r=!gAb`Gv`&fh8YJ!;1p80l5Q_B==fW`_L+Z44
zo0#<&dww&X5E<f%%oR0x9@DYdN_qwSS&!xC*!+BWIFuiFFqjXIz$tQz;~6mzJN?su
zIrqesTYo>jSXzk*ohHb(YsAVLxMrl$SBe^vwS5rR*fbgZ4r%(>)45AqFd}!{1*h+j
z+A#ydz5~e@lgKgDhZwDdc+`C_zM{J-+ubErlgJgZ1~daSb<yy}Rhq-n+!e5M%Y3wu
z_*7wsXcyuNm~UMBQ?Tp<@rAhQ&7}tlGVV}&ca|);vb*6kJDbF>-Z$6Whmh?*blogc
z#yjaf)=Rkilf-A**Vj<R^j<tIkQ4lw(?t9qz^kohs2``NYaO*gsnIxglZQ?^>O^mS
z4w9{T4GHcfP&S;C&;n4%v~6=`lLLgR+QOVSc_&D9K*Do0;KpkAR$m^QL{!izjI2|2
zHiJ^6=E;68RFYzFA&;C~gcLM?wi*?;wMZv@JHR1R%<`9%XpTt74w~wUj7yInb002c
zF%0|C!>d2=avv-XNh&vFp|0*Lgs2S%K_jp$TVht=Gxi)4yX~vVGj;&d-yUy8gBUQ+
z%)5rUr+2*n;@Etoy!OtvbHO0afs@rpe;AmFv?F=Vn59f0P25QAq&8G0dEhF5d^1j|
z!B$}zMLHVm`leR$32*-B8>3e8qF2l)ADY{IT^UjmWFCyY4jBxy`A9p%a#4ZJSNl+f
zOoQG;U16$(YLo$4WX-M_D0E@SXWnWoVgzf!Z`NYkYr|1r`leB|%F(5;R0VI1sx}!E
zF0JU*6wpbWSmdhvb55qYH&tqLeZYrnhAdcOAKeZ$8YwU8IT~U44D&u^DvvIaA7o?I
z1F!5f`>t9>Sj;psl^ellykO^8w5GR5sTW56tnABm(MJie1&a{`!^&(8ibEUy;j810
z{Uf*G%TJHgmr*OH%LDXGJCf-r%H^L=dZXvx>dXsemj=>kUVw8Zj+$Iw_ZT>T$i?7u
znMfHq89cId__16-M0-m`)}lK&^v)H=NZ!MI8&3cOXArzgUSOfc7En@N)YgSga{k6U
zF;%KMf0<!Dsk8sFD!Je7IR4>N?4kAiD=^vxc?R_xfiAdd3Gd?l&&Z;bKFFu7LPI@S
z1m@=Uxcsy`cDiyl8B@|Mc0xuA`fjg@#uAKBz1ZzNGkp)H+=XEJ6ImsH*sJ?Jyi2dI
zsT@jhZ8Ip#*U~~%?B9%D`Q;apTZrLipdBgu#6cKwa(-PHPz{4@*h&iKS4jmrqkM`l
z6Fa?Jqqr83hs)!1_zXTL$%98y%;#Nkl*!K|&){37;a~DaSZhQctvBN0@;!Uj{);x7
zDE&~FfSAuS_m!-H$qA{Z#EP6Dc<|)+7!vk)Ch1#g1L}=up!Ku+0-#ks)+AjGtR&-k
ztYYTxG;mNuu55S}Opz^&@oxI=ORR44O;OLq?%dj|Dd*Lk{#nFW*yl=~m^o!tX3THl
z>rg0U%9l6T!d2QP({m&IbuCCE6my@C;`~-!@RAp4_ZH$#lrH`2*7PMmy)Awlm|@N_
zNilYK=Y>k<Oc_tI?_tObGxm^0H(zU2gW>Ngo~Zcgi?m%K=VI;UA5$FUU4;9XZ?~5E
ziMU{7#%jS(+wCZp+c?_VSwVG$2h}SG0n@9?7JcSHezUkhC|B};G45sK*7t6vQZ&@_
z+m!ASE6$b@{j`FjXgxvzw!}r8_;C(dOj3f0zsR<I5My7P28rv1PG!mdeUG$*U~gn;
z?fDJcS;=UAMmf^}j!}<-{BW(%<D(+mK0<rMnp{6wxZY{123hZt^#eiGBz+;$9A#-P
zr?s$~`?l)HRuVsM$}@gi<RG<M4SNA5Uhl4TB8CO?^^N|_`y()ZgLH2TObNfDKk9{*
zMMZ0ktfi^(L!7R-+jzmEE>l|x_od*}gg`XG{^t=(m((Bv&ee1``~{9lLwZjay9iAy
zKZ!+a(nwrJ+p+aU?GBn;*VNgAl+~Er8}d4;L;m7WXTvWwU^}wRmhpT8gWc<5@<N~+
z2cp&#zA6G59MYH5SR33WiM{oF4)1a0`I$za^Jx=OnzjWVp61HKm}#gM1mbof_RgzD
z?j@H!;kCbj4$x)}haxXkOs%v(JZ*&bnNf9rZ9Rax8gYNNi`R!HYVt_#adqss<T2A@
z^t>Y$E9zO-B9$9?8ovpiA2C#i`hKnmNW6EFK(zM$Qri%Q@a0^#)pN=GF=oznRq`JE
zMGkxM@Q`15Rzb$H?JODcVzXEKm*0NlB(+o_98KW-WE<_d(EHh$=;r0Kj3%G$d}hE`
zW5nSu5tX}Zin*<07+ZOo8v)(&@)-?K_Tpx7vC#XneLtwmuhi)<Fj1(TJp`%@lMIPJ
zh*4u!m@h63I^R37vX*38oS%}B*=h&oUsSP3Zo>5}zKARD_&?-8p&Ak)@W~klAb$&n
zI#1)eiayDX^dK{!oK(F(6NkW+>+JbpQQxR4cM|<&pfE=h|K5s~*yYPF%ls4BVI<+I
zo?>J4dJxl|Rk;FLFiHod*w)g>CxVa>PCI1`gf*vY1n3G4I)jzY3*B?+O17e!E<Qvx
zgSrkSwI+vS{r=X7Hp@uFXScxWZB^?dWB5B~n&3_W`F?GB{C-&enMcFvbN=A!=i(BB
z8);Yl;DEt}vHgh{!_l!Y-sfY@vbB|5GDabal0ouJfBlv*SHTB}X5cTw^hDxEf#EuV
zy&g19SDZuSf?SPrGT}%-L*^mqqt9R3eL`sQF;e+QDpOp95V|y`Eg=^+I-jPlOF@X#
zIy=KAS|VjmTJc2`O~?_m(#N}7ge4zBv_EyB(;2D{N5(+&FI&Q4594>;fS)Kul|)}t
zr7^YxZJMzwqn!uvO1EWq1y%*wj1X#^IEU{jh}2B?RsLkdE=$f+YI8v<;(@g=yA!b;
zC;iz-qxzOYd~$TESL)Wj^`;0f_l6W}?WQNYrl0Na@Tnhf9osk(CyNRFEQ9N8RC#VL
zAJa@5v0DE=wH?Q?K8MFw(ENSCjAh8tRAmzlOZf`uD9r7u&-wUPTTZgAw3{`mhNcfL
z1W`*t$W?rFESRCYdVJp4h>Bswp|BhZX}vD>GH3BY`YCrskj9MC=P{{U2X)WSl3|pT
z?WScC-npEaP;y|pN5E~S(So7tneU07@&X|vrN!D=dLti4t5*V$&SbsEi`Ru$vT0jC
zK$psqv^6sNJAo7(RlLoOUl%DUo@Sa;SqXI!qP684>95aWkH_*%HN81t`#J%Fcf5+^
zGy2%D;qg{@RtYs%5ID7TN!`g0#ld;LCnXK~%sTMgw`_S^I}PalsZ|zzUG{)b&v2-$
z@#6`y@as?g<VpH%Y0mPz^sTARz~D1@OeQIkMY>Gpm5;=rK~(5*DpqODMpS6r<15z6
zLS8VHEHH#G`r{R;si^fcO*g9r%x~aD0Z9(aFQb7;D4v|q56IsK^!T-?fC`(Im_DiE
z#EW^ce&3l#qH2~EMiZ~%>!u!(cIztskdD$=td&d__w(P~j%edKylXaCuhF3vt9Wf@
z{Fs*&fcbPj;eh?+*rep4av!SDS(1sAVaix$PPZOngh^PP@UG&~BV66L$&Ehm=9e9$
z@SH6_4@J^uEfC>~A0qYFyf_cC6+`E1He0JeV0Zpz-;RR1?5rvLy+WEc)+@i58qY>`
zVoEFQRH80pR9BXxD%0a=uq!t@q&D<=7iN=_Gg<7;a#YcLuHqNdX5T3<<(e{<Q)+{;
zvSw|A4C^G_Z#5ja@2Sj7D)LLvUU_@^_m9_6^jz10`pBVp=M72X!#IJd(j?=n<kFhX
zKY8zL_<iH#hz(<3p!HGqhU=%lwMi#v&GH&4>7byS_h)_&2`;2oI>F(VJA91yYZuDH
zVBu2jV;9<~tgLBPvp@N|m$tx`j8I^y$`$T2DX&1)B#qPWQ(3pa!#qwf8fs1?OR=hu
zP7v{OEm8i}hG1j0=w;1NmoMf5LdD%MZBaS6LNfGMjW8Z^vSP+m#!o5~&_u4<y$pc>
ziFMvdiJc@3HY5%Y+4T$j=mwT=N|m{6I8D;ZoxRPU2{96@NVPBd?V`EF2#SpFy233B
z*Y1~F8Og;)k3~e?8I^9Cp3?Tv^wL%&doJ#dNoeA#_)`pd?|x<{VY4BWE~uSm>cB6G
z1g8fJe@=l5Yh6);O8An2FN;a-&0_;gwN(?r3y~wvN%L*VX?<22ki{P5Et*<**-fsk
z5E=}E<9Iec{Lqp|;(hVPXVXZlL<uXkjd^Qjw~d+s2f6N+x_S#0(Ix6+%@Yc_LM?Ok
zncemdDH7AIdjec!@l#-hQBBsGh4~F*htWyV+*aY~ioQ;5710kcHfEhY1Vv}pFNd}~
zS=yRFeu|y$YB<8p*cUhakgywENL7AUJ!aDBKYG-_B-uh4ZJ>i?6J5`xvwU}Xuyd;#
zhi>(99*XmdSi4+<48KNNPd1izt{cgC8d$V1n1EOo8Tsz(@y5@N1dFUlXZT$z#_mfe
zh_|v_L?a^V$V!s0Q7gL;lsW$Jv)q}k?)5#ao#KR1>dA}(ap*AvWUyJ{kz;?{-J>U}
ze!uT@Zkq@F0Z0@r-<*QQ<_9*8mzHjGAz*y)Y?scRGo3-QQ=#yF<xjh>$QWc~DfED|
zV<-FlvmoscrZ4HT245Z)C%OA7$5#yU98^x7Z{{qPkGD6@H0$StdJSag7LmUvCkm@3
zM+#kwj^4X`wsaFDl7UJK)Svd{`Ch*v7ZnQQpG3@UWFiyags+VLaP2`JCzL`LxGo)k
z;G7?3Axo&K{4U6fz`C!YH}=}ln5ROLL1a1UrZ_e3T!^N7KSY#cU8h#5lQv;|L0kh=
z{WZ_Uo{?dcc>Qn+#dEJIj8+J7C8VjIVt+}MlF!gVpx`XBl}SDhDUHhd!^e-O!^y>)
zKVgt=e#>A8uq*l4MUwaYmyN;Z_7~0*?W2s7#r7Mdedp8odK+e?Pnq{W0y0!XA>w^s
zk0Axk7OZt(P>jY7`>Z!tuI#NO9($T|Ga*bqb7ijI;toNPWVOe8NUOib&bdQI?QRkt
z_xZ`s#()?TTCFqC@h&fCV_k*!9g8p@6FjZ|SXNK*zw<^MOEI;sy$K(8*n;^E4)4+s
z7C1q?d}32}G$zV_<oO+bBg(#+Lkk)Bz<FeVQevq5j6=cwL@)Q=APB-TPQZ|sY=&?v
z$klHVbDsk0RHPWqI*`Qo$~p>u3vT)0wwppD^ahn_nfFRp7M%!pyG51v$9<{tLyz!<
zrt0c*ia>>@s{YfbO0iISyKe7kmqqt>*Dom$MOokZW;XFa4=AUfuA81hyseGi?;Z5G
zOynXTlEy<iHTDeHz>#xl6hmZ|As|J=D0z5@y*Kc2NP?G#PkM305Y(Mx&(`Rz<i45Q
zI*%7|=|YbX>D)PeE#U0q-}BHNaf*Zuqnb#`s!DH0Te<IYZxmz*&j2Qj!ixs1h82~o
zE7}2^)e~2HJ91OU{2L3mFcyNVF|2U+c2Mxj!HzS(tZMvw$^ypcB6fqN#68kztR`BS
zXC&F8EG<og3>_u8sC$g5XTbEtz4R#gdDPr-lpCC0Mr4tA3Z~(gmG+E$HB*R$I-sct
zeNeC1nS<1_`_~;84a*Zv-iyh^1QL>!=S07XRQ0FmMLXT-myJ6<j|3<5<7zsk@F!`(
z--;hLXQaehEN*|xscQ9BSY9AR2<)x!8wWdy(&Jt<*c`yynVC4%U)F*fg?`Y4<fONm
zfBU*7zs&SpP%RjCz<{s%tMhBMF;7x-M2Iwb?Ac;sA-CbDiBanb0^T3F%<IWK>pcNY
zP(P9#e=UhV$ITkFu21VRTJ>X}6+<?XdXB=_=57y*r}V;Ga+qMqo=THdEo;GA=GVyT
z^K#Y%Apbz=N-xhZJCr~FrPus8O6?wzlY?(ucgoU{j_%m9ciYvOaI(uUR2eBwLf<%v
z=7Pj}3pHaHWD^G?@fr1el`3KBX!{Mem-2uU`6p635|~`SWjyY9cswGHUZcHi`Z6;|
z#cS=*ZtUPgX|~$Kx^3v{kBzBZti;)0QJHXQE1B^`zGc$C@<4z3AK@ScTm<bJgX+gL
zd4?{ob^$zeUmF#>51+~rP>bu{rLi!pE8bdiQtHFWb4_PXR_ZcE>3<ad9D*ICvUcax
zFNiF0=o!QXI}tN&E#f?~kZeu@@et2x<8|@xdpKzi`lXnA{`HxqqtgsZ;@0Ti?KAQ1
z5t%gUIOQ<e7u!?vqClfDm`Z#Jkly^co8o@=%8M;U^s>2SlijY46~&<b-4s5(jQJfd
z;jagF#ylnPq1jy?g|!zg2CD;_xD)$Byy)hGK~$coK@Q~{k{FY>?D&^?PyCG+!N){?
zSVrb#`;q!2`$ly=mPGilDTrY!O=bz)`GAA+JJPf3MH868fn4fmUo8k0yqoy5tr&}(
z(ZCxrHBy<XIo)&}Tl%w1yGc$37a`jO^DaLB5(z>4u4a5owqmvsuumZK_Hr&qnlqQV
z$dR~OK8ArB{M~S|-d%4rd;9EkUdO({l7aGq`jy_6YGHselKGWI1w23>NNEeX=c{Wi
zLD0H3ZhiCmH9^FxM&d%!UHLDC-lrUQjzF!Uc&y0fE<>?=M!M?2<Zp@KA-?uWr!5Vx
z_u){NP@fDM1<w_!2qhe%-R`S0p?!*}JZdJJh<rI+Uuf@%>blQ}4DC8|0yq)!*(5I7
zUeuL~nbt*8z=*Dj+sd85tvO;h`n>vgpiuhOv%^l$bETP|7Dv%DRc6m|_|PD+KSv;H
z^vzl<ndB{fo;n$tl1drT9l)Z&gP>;~a==Q?@r#)ZNen1Nc+-OCEjMlt&N)k+5$+D;
z;9F8sCW3q=5#p-c37Ia9w|nI!vvQTA*sg^*19{HUWmIOte8L+L>fPuMB6oy(d^lil
zO+}o(!J=A=sAY~c4rYuq2r8s~N)=mf?d8cf&8N^rHK<rUZfd$4*EQmP0TDOt*wN>S
zsWf+?pDUxQ(33t6FrSq4xzaq9GsyjhkCNsWWhkrJRsvrEj3R}gcV2V9ib#eWuWM(d
zU@elo594An^R(GUoqD95Y>s!U`)LI2w7mb!(c%h%bSv_}PBzXyY8%H6c>ajr+=KR7
z@T<*4eLu$G$sT3H9%*f31`*gz+HMtUfW-n?3ztDWbomTAZSJ79rAMu6#RfeXh&-)3
z`K*S!mFWvVHKB~ZK}6|gn%x5KwZPL*K^V|HbWCH-SP2Vy-f9po{IicWuQt4Do#s!+
z;iNbk0i484><h`Jf)EJ!rAM<Y9_FY%2wVp=Nfvsg%t>1Cy+e0wZb8>4eR|lT`77KB
z4DH%1sw4w}LQjg@3C(2iGX#0)#NaH$@f`d)nEvrm%}YzF1Fx2XD1%0iiNQ9RuxW@0
zv1SOz{7qxD=swhuGIgT#8!nbKc!K7ER!te1boAhYl>A-&^x%wFJr>w5Hz9%X@HV1m
zB1m^Zf?OftiKaefxLiWH%%4bSjO_8~Y*V^g?(%{!fvztSs;?79xMy+0JE-aVN*S7r
z46m+lSyZ_TBwfC1wEH!QoR>hGX_Vs3gz|nKfls%V$8Uv43*SPQkjENZ8>@L)#o|q$
z^IBjwBMd7ZCuHyyQxD?<*DT4xm19RIj-jWN>kepN>YY*arjX<Ms|BuheQwxlWk$(;
zGTOzMJzZB7>wm7U`Cq2nPZv`ym-|S?QHFn?+L6xtp}|ts5Ja^E(po*#UtF+^o$Wjt
zPP+*T7m`2og+Xhp64O$bIwA}zi>O;m4#@?Y=)x&L;pUN4d~NMpa%50cjuJFq4fn>;
zulJ?ngWh8l?Q*I#SvxY)sJCXlJwd1Hkh5yvsCkr>;|kEzU8|;BVmW!lH>e@nVs~F=
zdL3ph3stc!Q%mVxLlIl`)H-}5y9$<Es}9SJq#RlEB3Nfi8$*+&uQgG$Qklqft$;18
zUd$v=CpS8dW@3vgchhcouHv6yl91dz44o!xM_%+~n~@X>PH!@fdWw<?zB|aEk+iC7
zXg|UYaxib!FGkc`(8eeypX$ay7>4eNlkEAGE7Hb-!zST?+|c7^aSzEU#lktZ93w!O
z&HgHWpdujCvZbX|`@xZH3$M!~p5|vx=9s6!LJH>A)F-}(f3_w7k9TpK0dqHh3bo0?
zugzUuS;)mq-2R~12RA>??Xe;~Zeq2-DXQKh!9HVlZ1K4pQ$giGp#;B-eU;ql;;8UZ
z!S0iQ%)q+1u=pEEH|-Lx(>~N#v}f}Ab3H*^!cyKFsm_<c2x`J_H`y2&4_8xH!Hx>P
zL3#YxUuE1=G(}#SZM9dR9@*cM?MkJuI`IZnc;<4_!f);vC(KSdaz)+|f|s}^{?6CR
z{BOQi0w#8*fA~-d{`Vp0E#suD^XL!*FJ7T92<2U~lcU^~ph9~2hxJxULX_J`?umt<
zAuyv^K0LYcMCX*?`J+tMF*`uxhAJQ0vPz%9JYPqbAb&Yp6F;VH4D_c-exrVYc6fl!
z=8IUm4`@qMKhH6k8IwMBSmIW@k{+?B)K%n0E8mhow*Iu<QwaHZYXt5kc%Joqy_~Ro
zK*kflHW!Mwp8{8t#lIi=?rk=r-cfa#Vs8biQO@ERVDL+uPF1hV^jy>22!i+2|Gbq>
zza)Om#!g0U>A|P(MPr4(ZsuTFLIKTv(QqXt?dEY8+*;U&{4PMsOTD((V}nVB4?EmV
ztsf=bQiSv4q#6W`EB&g1eHBYLhBIhaVjg;VpR5b#4!kw~CcHo>#wB~st0`yO0WV))
zuRz}npKIKtLVvXHNBJuU7Hx#q!k$xx8ayt#{ciBK#%}U9<*tA7mx*Alv-Df=Yu|15
z>sYpL+MlhX?YA*pq)yM55k&@lRSB#^kavC01+s=V@tKD<y+XJi2U=pgKDw`=E%L~|
zS+mdE;cZcM+qMeu5;<T+z6pH;%op_a(^ZIj57kBwrCNCG^%qQpPR(uC$-?)07Hq8X
zJBZKMJj4vG!`JW|(ygd%2*u$Z-YkzHN>?YS-ydaqW;C|mK*1z&g8oi?SpHpnSlAf<
zbMc9nvEHUb?7MhESr;t0=c~?$<_m~EeT`xJu909~ry?rJibO1j1pjt_l0qn{bO+>`
z(psJMrm%m$aAIX#a=&T3m4{+&WlHODWPEhtfxfzCWa+}*V(jizVK;Z{<cw75p>e)S
zc0OZSZ+U#xmDV8T(yVA@_x8Q!JkM$o<R$p!$HNch%oAHhJEz9kqG~H+q}7W%=j)0r
zxNeochLX;D-U|HVdKaW)Iq@SH7lnc|OlbzN8w)J@tZUqp!sE)AUSJRi@vst_ffMBS
zBonW%ldUf}!avqo23S-p`(DR7aQfeo6?njIlG^P&*4%?%te0L})(TFf;q9O>zq91p
ziFD+5_#M10g*;6Rw5F`{a*{IfP>L(pCtx>w96fv7Z28v3V^`CJN^Pbdue2m{zz+<i
zI+tp8>zJaY>6RxfaQOS)E~BH#1{Zt7+t5N&P<aprCK3+_z*0lt7&~gr4kWWk&Z1<$
zHe6K788t$Cr1h59h4WjK^gJ^VTk?zK+fTwQ^ZQ1M?Qkq)R4;U<itnVN-9Zx#$1^=M
za!7!rM(Rxpn*4evT7b4MK|F@G6UX({G2>t{ly$IR0f}ID48<EQW*p2rB!a6*kzMoF
zmZx>4>lmFhkUfajQ5&uD9;N7W_3<3e$|WU@f{=Q$)q#6*%+FB~i3esjoSe2kVi%v@
zl=+E%Ve?e5AlgjHRw*|+Zfa~IU~EEUY=SmA-a9%@JUWhHY(ix;$T#CMltr%)$Ect{
zr$CuftTZ}nZfq`SZ2raA9DQ`Qe{_~)be1NiSc_i4BDuJ3bhdDG)_8Pwe{>dYlqq49
zNoka6Zj=cwMFMNhd21n12i^UhuEHv>p!K#bi-T>(Cb1Kx82bmdo7M_H`n!#`G$U4~
zcIYPWEor#R-^o7f-^f1uKXP>t{3ri96{UX5e%pbHGeW{LXdgbY4>aM@r@xB6m@HJ#
z`^z0Ra+xwWqEzO_XzLY>{lkeP=af=I1M=PAQX2P<r1oENrw`{SYioO2E@t03ximDE
z%(=LEs~6fS8=Q0>mPj^!-Cb@Drqy$4Q*hRJl3>2bHTd}QoW8HUXT(b>!0;^T9Ph|B
zAj4m~apT|ua^mds-J=@D-wxa#G+SgPJMX4*>aHoPE$D{ZGqs89&B(3hSe3q)1fAv3
zAjK*<vTJS}s<E8&M7O?iA=EGZ3ZhY`S-Ks=%?x^vW8(QXlhj`0vF2IzQaQakb#KRk
zOKnJpSFT)E1F14K_R(rVl?L#!M%=u+)JRUZTs99^)EvxI{CcZ+tD1_vLDfL(qo}dG
zJCn4_s^JY^owa?!K`7Ws0EOLV1{P-H%T5jEjt1fjy(A}rtOCmZG{^=ti`WE-;@#aR
z?@Zw6{jFIO(VPKDG$|0MBxh3Dr>hBsPS90S>k+nFzskTNMhpb^69(W&i>|4|R2W@)
zzoxMNRyYV9LJwv>Hgcf-F-~;Q7Oc4;7i^Lk&@oGT%iWc$$<0;sh*!(|F{}zqzU^Ri
zY#wD2Ue=fU1T=`N(E*Fzbr3i>nh^aG`-~{NBea10RPZhEeYD<#K>Z8LNY&?de~As&
zfjZF*RuSBz7c~Eub}N1KA|4WW^Wnwlx?RU~pUPHDr00Yy`c>tWhlK6b9q{eCfe;x-
z;<+qaFO@@V1=_FlZkmr6&H^}u-bJ$#WTv9UNs5z+`<Z?%;>GY2J`<HrAPcJ}UECI*
zU#A%!3zRM;CVX``;60Cs9}8wSljgf<Gax&{VLelXE-8^&I}S&3X+M2@Kh0U0CZqMZ
zxc@{=sgA6Q=xAa6aypw4EySztzq8-BBJueR()P@+_wRtf_U}MoWup7fm4MQ3AfUAV
z3kXi4AgBU0H-xr}^%QgFmC?yo#G<kQ9q@2+^~HJV9tev!D$C@wYxT`}O;U00e3aOI
z_jK&`tch}r_^pW>{KpS9jm)S*%*=>lN#^l;nbSIpkqFF1kLK}tDQ+&+>t=F<gW>S9
z3$bPQ*@z-te5Vg9PnltZ4FC`_PI_cn{{aN5qZJq9u+#d%)m)vzlaqQE{p}cgt3N8>
zGt0BCYfN`xUZcmF;+hb`3cnTsap|#LWBBS1mtO}yeQ6V(bOVmH)_!}jQX8j+ybf8?
zJA!l`M-(%2V~{N!3C2Y%2Qx>y1b$b#b@~ZQE0OabH}|HCw;E9@<1Yt8bVvQZ4hfb<
z&olK*YxPfbUH=3Eoz&firU0eWOH8UNMb-|b;uMAI@T5LwUMF=uxMW}|w7mutOxPC+
zRU9e<J=<K(u5H~ET?VnKV3=Fk!yL2)t+EO^eg=Hh1h4uvAx;Ez)&40}aXe6o5<y^u
zPZpb)TR3dFxNJ6(i+4?@r}F|uDARYu$hP4K{l)mSbmOmVeQX~=ymp~0SkB!ph=k(;
zoqpUyV5&&y&;B=^cd&~t(ZxG#2n>G8{0rGOgG4#aAel4j1GekV`)m%7?5$7-Qr|9V
z2UWHncgY>o)7g-c-5Wg$vR48<$&auYH~s0{gJu<ckmV;3)1${l1!WvUfX0c5q`{=)
z<8(}ep|1ln=}CE*h%x87WP-DDWCS?Yv?!!GY%|3lSILg-^8~W6nQxlpVkRO&J**VB
z?epR%B67>f4&~4<BD0>jieFh|N*c%F_8VNLw=FV%y{3}lK64$VOvFXHXF8nT<RfR1
zazD6C!(@?`yJ!BocbP7ph#LnKo!{icWEI7~a20ePkSFlS9G$wFoF`p)9yzC7#%gD^
zU9URc3pq@byB%Kk&P6n}4;J0~E1mCTUSD;9<|Pq5!lXHr#Qz;=*#GYU4g3Fq=6^u*
z-wv8Nw35uf0S!If{}rH_AZ@Ytz&JMU?((P%^4oJ~9s3@!Hk+(*J>uZ8#v&uhWjAg~
zXkGG)yIT-nl9pr9L9xCvt)wMbk5d%J-%v>lJ}KsfChDH9%K2sqVZYJp-m*%w<|r-W
zH0XSW+#RZrR?Dcl`APSIb@+62-<<;&R*?>`Ou3A~ZDrEcYaI_R&C}#k+~tD{lVnXb
zx}>ID(rY=faD@e*MAfcce<W8Gys-E^O~SO#idP<4V$a?^fI=Ny0#lvJ0Hv|7-g`(^
zLC*?CPtW!nc$l9N6_s>6B|zgKOOlXd+4|r?bI)!!hs`lli0<d*UQg7wm>z6_jId!z
z3>oN(Ig)J>>cMe+Ne31A1qvDYsf8ygV@>@qUZ6SCkSOx1i|uX)ygvoM2;<=6QbgKT
z15}0$fv8sv%DIX(1xR9BAj<}?F%y^*fx!{yfZuqM;t&f3UX55$rPE&hgl!yq0w?I4
zAP9Y{H3aG%{D}f5i>n|q4p{^w4xynE#bITV_N$fwL^@!xBHES0M&KCJ7kL`f>@bAp
zb7#u8-jL1^yK5LYIsW!Z#{J%)K;k@($4^&`w_>72EfRfVq~i*ovar7bbjzWAo-q$A
z#dVWh^+bfYyHz~kS3D{>_wn}H6=N1@!p6QBDnQEs6pp(4SX`pcO}-#Nx7@f)#{iU#
zN2bH}P5uFojO*4-e!@iDJdX^=oy#;p4~+wgwr=9>vWgTRxeBHa$TPGqM&GX2XmPEP
z*QZWORWe(~d4kWY9T9Ld8=L7D9v){itA;&R1$o{rb#LXtynywwiwFJ=H2<D`kbsrx
zKVRZ;{9jDZ?y6Qw6ieF#^qXS{_{E~@0ZTldV6<odH9vEA)0R=3vl=6AJvi;VJuCh_
zKWk@)U}Xi&&)^F&Juwtg%+t^C7ws1kEf}j-Zt2~=A8(Dv;-}epm5dHP^NOW4G+&{d
z_E_nRRf@SBzMSEAk*+AT;RArf?^PbuR^ZX4TR+N^n4<AG^BXt_lB@Rr1Wu<UzT|}O
zqC<RRX<7+qslJ9O>>uD90DyyVuUO^$wvMoWxp=y)+hjO{&bfrJ)-vOQE}7Ll?BICQ
zZ^av08+S5u)yRt2?Odf^J$(9I#tojA1+u45hpnwjdm}1Pj1?`v{5$0gK5M}d%UIf{
z#PrI=8Zu=cvl0aRPvwi}!shzL?va(ULZ!Oof?-DCAPtLXbeQH8qn3nJRFOY*ZL7iq
z`a<k}Bgl7%Sm+DQq+m{vx~5ZgvoJUyud?F%x`W9Xm!>YJhyl3^ELAZ)I2Uo5kBPti
zX0X#{C%xXnL2IoBOoqJZLY!nTJr72aXKN?h%?`@WEB^Z+d4<L1rvrX}HW*-pdtzi=
zG0j8danJ=u-2?u@$Q`ahSaonMziz@X|6^!pX$#x|mz|WIN!;B_qIA~JT^$5E0XuO;
zv$l5IqPo{x_X)mM*v|#|7Ng$C-IuuXGT_pp+=4a&9ADtLr`SQ*;)2;ev&l(&<`7a0
zqPzB|KTbv)DnjxSdIs`*I&%0?%y!i4La}9^mp>5!?O`=LdLS$W0MC7sT*yR(JP&JV
zO>VYGMjNN+{!RY%30LtphfJ!@fmqmt%k;H<ro-V)z6k&!*Dll56LHe+nYXJ4Vzhwy
z8Q{k9fmjis2!N}mi8vbfO!@PhcxV84p1BHYM>3jp+FuHGEW0gST^@%O7M572xSP&%
zdXQCE_pd4NSH37WMY)X+czq7f4Be!4U9Xt`JK!+<owlL>Z?J1Dl}hHc3F+5FqqB)|
zo-Y=5L@uZQ3pa1hOaHH3GxzUyjVI<G++;0EEF@wxR=qf6l&85mSM53~;Eg9>s%^ym
za;e0Y=>iz|W3MUk1MQXIS*PQ7dOy=XLB5pZ!6)8v`z%qsC>mKUS<l=#u5i&`4YD@`
zv=&d_Ly>yWwfzKG_#5q3CFevTX9RSJe=L0A^A@h<FJ0SXBD~2v%}Whkc0+}gPBr+=
z2u0U~$vi(2j*j;;*1V>T5B?Z7+5a+Z67OL>Eu#XZIvQNQwt1w(##vE`sx2uO@>)vF
zSf>3zrefErHj*m?nNiHzM^dj|iJI+|f5Oqp303Zr4@4Wm0Jo~9)@7JgOkNRCRmG75
z7&MqLVaR*XXpkddablsun%O}D2z8C;`i>FMu}?pLzP6mvEW0#zGDP;vUt_4s<3c-&
zs)9~H9dv#^>+~_`u9z?~X+YyBh^`<_2{v%2lYVpdvE3ga?|<@ojZs*g??fK=gR(^f
z!8;Wr>5OfhBu$5`FzFoklLTNS8uJ%)Gem$O+NU|x%k+7U$eVt~fgH}kEg?2*B4025
zcIW|8nc|(@-k8qao-<1DHI`cmv4vz{QvWT^{7mTdXxG3^e+OYK*IWrO8<NkqL^iqU
zuL6RK(X_YW>{mI2LqFj?1zur2kfx8$<!z@!ZWOx@gvGM3QJ+_TrU0xOcPqtH`#hv9
z>;iWy6eA)2sAlIMucHTIdkrq~eU_O_9b<8509~`Uz;!fvAT|LoT>$ao46;Zw9=VPH
z2+@AzGTXb!uLHmcplASSv(F;c0w|i*19DUznc`a)agzm3`wsU9iGdaWhTG@!goBD&
znOrxUHNha#3aimODV{pgGCTZ-<R~u!3~ux;UL@_ZoWDcOzn|U$sGI-eRs$AphR!wd
z1DjK9qBl;qbj3Et88r+Wg!a+i7_@X6#*mSt((N?{(<m&Xs-_@k!cbUn`_2Fh!Zfan
z@W&v8YD!5xM!W4JdE5w8>+bh{>=kJq*i=d1pN&aha$X#;@O_UYR8lIecoyHvnK<j%
zv!o$GF~cECQA<bHoqoM%HHh>SZn(DEt@FzKQ8lFiRg+a;a8y4Q^cPk0@!tcQ+W)_x
z;r$=b`~@^gx6h})K|`DCX7w-72>lZ@@lhYR7;PwBI#bN0vwsH~rhiv8>~#O%K(oSu
z*m|N;6$EtD#mq}APXh~@ZwShM3o3PaSrOU0`zhE+upZ&*A*$9Q%`!$Xe9oqclM^TK
zN0ukgxO<0JB;$=xsLoX*uZ&hY^G1Ep_IbtHj{_|nnGJ(e11~LZPgLF_j|U8y?RFPa
z{Qj+5_OHC*Yb#Ar3uRGp0`9o|>ApPM@8s{CCr$%DzM4!NY@IQMGluLIZQUlu@@S%e
zLh<tTWqCs`+Nr|tfW8wFPw~E6WPQQyXG6PzL8b4Ab;IoUY`*+Fp{qhh{hY(8)qdEp
z@|C;l>FUinv?b`uNQWv(|DBQ=PiBVG_EmG#OVT4f@Jea*LyVf5I};(O0SZe1tu*0q
zN1`No|D%0Pi2lQ-Ut@gcg%Db5Bh<z5g$EB|g+_YiiWyCUTt&ZWsVtQ3-mBbfKqFJ9
zqRDYMLF#$RRjxd=V47Sx(0dt#3|J9t5y=~T6Uey(coP_GJr_7T)7&KtvI?S#uoHN4
z7AT=k9Rp~#lVKA`mYb+h-4J7zPP4~Pq*z`vry-u4CJ~i^Vtn$a9-+TdD|jzeVD0RR
z=w(&5E+vFH&}0t{P$M%Cr1W?--{E%E;iR$zHS#8~bJo|}<W)w5)UVn6RcbTdt~ukW
z5VKG`7Fe=h%4!RN@32ysE$LM9tWyhn+B40|D~eX%8Zu}CC)INz6^T`WXp70M$oIdf
zpd|TQlkLY@cgm};!(b9i<k~RBMyl6S>u+n8(-*E*iSK^emn9Opicy`FEAur&q-BF_
zwiwd(?pC)A=TNr~Z%bv5H_T2g7kGk+B;T-?e4@SpXKmYe_DP-)_eslXpVqC^%N%MW
zH!c_=4y=m>rcY^vHk=AIC=pc06nx^!17q!B6dyLZaUHdKd>6$StU5#88ddZ08cN03
zRZOqc3kbca?59H2X~9aT&PoE6Ibnlf9r3_DUXj3^{k57T`Mt?n;wtr;a@eF?d~}<p
z9RyND-39cfIZXL7%AlNytugijtaW!$#o<Y7j6>E_H)<;!=c+o<PCe3=#%hdSYXELw
zQ{iJDleQRsLpI^<!tuU?b?Kvh^!-8UzN2C3W8LWEh2w>H@bn|Q=EHCN9f9P9H-71Z
z(C{OO<fVh&C9$w|OcdY>WqxnkA#ZNOUUNyEjuW^*Z9{SX5=g(E`K#yMTkBTDdh710
zZy`$Rinl`b81L^u{Wre#|6@V@;Ck<lw=Uho7B}dDC2dW9kY^M_cLhmeI>Kwq0snGU
zDyx$p|4B8<qjp0t(15<<67hzzHc9Wa2nO>C);wT37r%LCFU+$=3kft0e<P}>(G5fA
zzR!+)zhip)im45`XQJ<QV0OR^L#OWze{_0y=gG!7OqbBGT0MlPJ>S>t{$fQ7<t7TE
zwScP)KX>xBJXxvXV&|&rZ2K3kjzM+2-Pgplv65hekA+mH*1e~_RE;9drF{Hy^tlpM
ziVS@tsGro{NPI%x)WE7h@ZJyDKzMwc)L1~r#fc5hKo&sQAUHr$aI$Kp0l<84RG{!c
zb6@mJgaY~DdYHMK<0<-3T_sYs_0<UTwqYZ%&;(TbH26mK*(BN`zz|@d>-DoCfVfXV
zyfr{=Hg|Dg{Au9B;4SNB8G!M;AJhb*hPZFC)j+Vo(wO0Xq92!;@kML`;LP030YG+T
zjM@x@J@a#+8OP~K@%*vt@S4}kkv>#8>anPhFZg9_>UN?&kDZXM*khnR&l3by!CqT5
ze+@CT$d{v^Mk_=l<8xRB!*HeFGIim<+M(cw2;ufoBtu;J+Gn>Uo`RE=p$`1aV;)#6
zO%W7>C|RP2@7PSf<{pUOxCSJ|u!uwtekwz0#5|^iyT{s3SxwVdPL<SGrNl&x3n}G8
zD`zX@$WNe;pI{WXVdToocW0>_I1Rjr^7nyV(f&N1j{2BX7Qzq65Q3%r^YPt$RQ{^2
zv}@_wh5hnZq=8NC>OxL?+bqA-h{sTG;!8bBC3#1#`+P|+)%iB1jZJ<6D_v!aDfkE5
zyttnq<&$)$M5KpM(#BjF=pN)f#`xf8d<*iTJ&c3xd7K%?RU25IJL}A=*yBq$rKJyT
zlMlDJw~LWeFCC)~x3sqlt^1D3+7G|ccLa(T-ow)m-I@=<ls9~VdoR<P59~M-v*DF+
zhmj-Yoh(kg+?Cp?j3V+jvRm~j2_IH{jGZ%n&QpX%A+K8`?F}vWw?&i>Q2UT0xxWMT
z-|cu{{b$gD|H$jxWd+zU({_XUd&5I?KMQayMW`WmwB2I8Ilw4HwrLg(;pc;Yd$|i2
zG;TQtzQyQBaoHi=KOZ=;Dm=c|HM#m?V(kRj@L*)Jf6yLwetuowoL-KdRbFn%odFMU
zc3GEZ;4ZZ-?Q1NABV@KZ&K=3Ggxqn9twi;I27j)4`SIeWoG~DGxwJK%-rqmQ7QK;n
zwJWxf$%t>L;|Ck(x#Vf8{?Oa|<zN*OS|#ls)&O?ul_Fq_)OMhM?oA1S)4N$zRGD?o
z`K|kh<NX8hP|JkguY+>V`YG43ed~7{j2D^NN5nCm=jHo|7u^S)f+s+%mlJ6;fYaBK
zrL0~=XKLc3l~;r1!>J$p;H|b+a=QVlxum{cu%YsM#}9lKb^CU`lbY3U^K%~=UkP_?
z(toBDU4L!WPxzi&hk`eP0<vm=@7)6kVpf2V-#G{ko8KwhDO;)mx8Yclbyci$XR@J_
zaOI5p!W5Z7ECW1(81&2quL`OSrU@i1013|qhjF6GX;9&tDxfbF;S7${^a<n1CHX5T
zPQUEnCq{EGYYQI^Ay`{rj;3%QT0;zMreL;9bZ@yLra}G;21srq&HV0EwP4E5k?1g-
z*$MkyvR?2*bS!4#Y&DGVq(OvdIJ?l!7;it%Q%e30P0_)GNfo<|ezyTo2x+uOXX_EK
z$Ize`AIt=Tj(Fk@MM=+M@TUT#Yl(v0q~Sr@0>B552h1j!{?h(h4s|z=Vn}3cNQ8P&
zgnCE>H!jMRpXb=$eU|*`C*!po?T|<t5q=%uLxNvNi0>%PbL{Ir`}N6`?a37E$&~oX
zRR6k<%F1&KKhO*1b*n1MY9z1uwrzv5@t2KOXRc{_+gV-3837EwX7CI|U}X}+GfbtF
z|KBbj{>?TAdOC*xILwBnvXu1-8)DmuiYWr{w4X)nc90?lgy;|iK;lh(`?PUG#Nc{s
z2v>q$U#`a%_l;xxB8sNmTQMUu9Uc}Y22$|vCv25HQ1>dGSF?(E#+N%-f6q-<9{w>m
z#o4xLb9+^OPrrXN;7WKn6MUevlF^C1=~?W^2y(uAUM25cI7`-&Eo$`&{TO!|7?TcN
zx^a1NT8?A{J}5dnqGtDS?U3xh&a2MP_S$w>Js+?xc%|?c4V+{9^@6+ndv=;&=lS)a
z1~=%ayT~1)MWDoP@xEvT_3fdB2?30^9bd-wfv){DtS>kHL!33L=qQ8DlYF1IkPtA}
ztZV%crCxL8!iTs1%p@*Hm9&SbBi}#!@i0b{cq8N1`UFsA`o)Lp43(a|y*qj{17?iv
z{rZHxHJW7$BSeusALry<gcGzX8#GU$O3w<ztRwn_K6_XYnBN%4z%^sKATh9XWe5|b
z1EKFI#OC`uy`w_~6dS}L;1HznN~C+yAQs8)FX=j0Oo!$2Litg9lt3`mF_Mcx=)-K>
ziUk6(AoPpW{Z;)}_{202YoQuu(`(31_(nfhi`UZ;1X(Ns9m9j#Y1Z?L@`*;f=u_L2
zS|Lj)bVRCzuC%^vq#fBYkxLh{b+Kb9e&&LXVgai&Tohp>X#uY<KJ&*lfPozzE~nfK
zq<@;cGz_c%rW;-yoOrU012)mUz~<6r60RQGD7H<~R0l6Y?SjCuo0MuVU#1rhEH;vt
ztvcXzUAYw5T%*UyCb56?iBU%8o%$tPWQiUEvX83;KH0^!JM~4NBlIz!A4sEruoA}_
z!>%MX3FZ}bP6>ihg$rNMg(FgkHZ-ZzCr#ZRZ`rBhWK+jhiz6xY*cH+$!2rKTK}$}K
zinbBB;kX<4iE_WQdbEpU6ZHTrD|BMKtKukJl3?^@CKr8d6~x?ZAB|3Mj4@})eBZcB
zXCXbvrYG5*>!8=_F!F4(OeMTatrF9(c8i-enE5BAKO|&N%hVL;SW8zF??u%&=^lhK
z{-{8=_(3yrlY&iyrx>N57EJ?@r>kGiirQ|2ajRAh!qO8>N{7a4=Wg>U_r;Qpf0{L{
zedXQXXX_vyeWJ9nPkQw?Q;uXBzDj{k57=q;aR_=br^|ZgeK`fU%~qwYRd!CO>sC>t
zY#mqMlA4FUs8N38un4fOQFhL%>;9oe`FdQ4N^F#`S@TY2=$Roz{F90(XP5}d0WC(i
zCKxPDh`14OW1I*nm5Qj#<nt%OFj3RmV1Jr0QB|YQpH8TV$_I(2m0b?Y+i``hFfOE9
zkE|>A-npcXBBZiFZ;ARTojLng(0lZ}_)y*s6?UF2{Yvpqwh`9gSz-T1e=`2n(v`Kd
zHgq&FvLhg~F*H^*Ql+M+VWOj9U?zvA<#%*2wYDQ8r&Tm__-;g|$;iqA__Au!(a|aV
zCf2`+@o(brn^^uPdcTSDZ=y-Z1o+a?N&Y5(s{Z)>%jh?;`%TP#6RY3E<Tv?Oec|61
z-+z<;YQ*u^3Sz%6<o|j{@%P1_KL0%aPoMU`@BLx!Pd9%`|BJ&v4F6%^PwAf)|6<_J
zyZx!~r?)@M{o&f4oL0e6-`vQ+0Wc<lc1C&*fYJHusQh`=KaKuy^rw;PUm5)~0)HO+
zr+?GGmI?g6_|xQ14}W<5ufh5ka|XX_{TZD<wf?*%fG-Inmmk)4hW2EDKieP9%&e`x
z={Xn?kbUD|pkrX6V`5-ppl7CMWKjpykg_)XUl%Ic>DkyA84}P6>wUL3f~J*K6jG%T
zcKrTbU(d?wx11=Nn%NToKL3yxDgtFAJA1(62pDPTndq6>8CaMZsTpWk{v}9&21cp`
zl4edu1OQGLX}=lio9S7R6Ij^jIhfiT=@B^nvpow9BMm_8czAw`?C;o9EG&Pu-V|MI
zj0k9D^vx9<ETL(o30MHpr-aSy>>UVL=ve<1h>C!biT$5jU;p<Tr_a<i6tJ6+JP%Y3
z>1WaA4{4O-inozs(`^8XajD8BIr-CL?_!x{?CJaSn>yBt{S_jt+i~l6R8uDE;=8+r
z@<hWGtO*8VDO~}uLfklYdJELJ^{o{80x)FBpiW}O6k}*3zAz(U1%!ccX6$)aW1I+B
zD#PfO@J~lSeYd7H6p4|?u@q4#W*5vzxFA$bl2Lp(G)R<y@WK^SCb8#T3d;mcvgoNz
zjcHSq<#Tg=So1=`d3?6;2v8gYp?4U3p!pz=<fcZ4Re@0~bJ(2)DpN)nu(C0Z>;#|`
z37GL>JJUgFV$^1tVZH6LT!OFSp@E2RpeKQ5u+3O?^GW0MVjc)kAfA9K=wuK<uyA3f
zqlJ5XL5e{P!Smq0>-*S9#8hu`r9^~2c=qUO^1<2gt0v~``-@1Kmc+o5Pv!muaY_e!
zNb#+fhE5HU1kp<lg^in?fGp8N^auV4XTIw~N^K5J%>F6f*ZnkG3<}!_g#<4;W0*{r
zuR8A$5%~QFwEdtY5@)D6$;#sT`lgB3I!gFOxbV&r#0IVmP3VwIVD<>l75Z!PMMl_T
zP1gp>8z$$%@O^qf89H6Td^38{pzAWU14r8%m!E(q8-~Xo=gP`!JpRE#A2<^(tT4Jv
zmeuA3;f#4m$g8{h7gu<m=80bu1CTpI5AO0QZdn-`3_fjouRYfIDGuBPpI7?tJMppC
z;DfU8CX=q(u57s2+lF4i(e!byIkLt=hoDLs+j(B--8?x)Ou!{5SRcEbnG9A|!toEo
zx0#(dgWG9NLfuv7%fqYHoa|5<|DVRrJF2NA&f|(mlimfSH-QKN0)$Yc1OX8gB+^7$
zKuYK!9Z~63nn>>udI=B<AX1e`?=4gz^b(NH;_jY3Z{K_S-k#4NcYbr`ckj%(_nbMu
zx!>Om((!SWzRy+Wb06`|EXG+1k^?GcMd1L9LAaZtSclV{<|(Z^inV*!b(=1?dcj9Z
z_r6ekTjG`<&Q9GnV71-lUAACL{qExYg6HC*<|xpNP52vLM98R^y}sOlh0h1&JC<oJ
zo|0+y{dt$3e4Rp}mXGF-u7alYZYy<LI8Gd^S4YbV04T5Z4GIy<Nx8{Gwk&JFO_J>L
z1oZ|~8h5|aR)_9&4ZA%+Za$^5dK2ZYuEGO<xtY)4`(D|NB7{<q;o!&$Qv(BaKQ!t%
zYQ2{28LwVGl$4LWubFGYl6ueLo-}t{^5JG(ZWNvGW8WfOLsB}B`W3QDb<5ON;#@u!
zx;1AfUgqA5eK{Eq8YJ>D`$AP^!hR5&wFBpJ`$(Jh_p)$?^q1T=iwaweWjuG=OrDh3
zt31E$jO$~#?`ko?iZaDKN>+|5$du&tePZu&K&vujMHt=p=w4lgcxrS9FEYx@YN#@=
ziPyy(>^crf5;9UFe|1mTBC&0!+A&XVWclQV{W_$4!e8@^b8>bWXq{ctvRYRYdAY4s
zZmTZtt+BEp3@5?hw|B9KR`qw)BFp2usu0gOrJL$5L}S6PM8!ZOVkaXiN}vsWhlFxI
z#=)u^Ofc@)3p+7B`U=`9c-WKHT`Y7r1u8DepnIDyQ>N#T*{K@O<w}45=A{OY;pFZf
z7Mdjxg%+XBqsfb1D8ARX$ZAsiRXIJeguV@kRu*sT6N*bTZRBpE6*W>LvebAQA3v0w
zn0Q+LWpw{pOvQ>IO_6?Ft{6114#QhM{@i>X;N9#H?OIS1%x+`*BX#8@Zp3$8FPY);
z46i}P&O+G=zkr>1&9noRb;aC8XVXenCLNJ)a|H>y&%<^}9(@F9+V{j*^M>nWYVt5O
z>LCZXlT<@`+)YBNlj2p#@F<KeReK$w^lFM)4O(zsA?!RQd}5%5v~FlOB_5@kp{y!y
zOBsD;@U8&#*7-R7+H(AtGRYnF{M-8l^bQuX>StfVBW}=+X=S_5=9b6H?8|CeWF^Ll
zXi8VU){t#~IFU^)I7nnoiJlc8G?VtKnJxvN!^=^s#vJb&^RE|?<I0BM*{x4l-Y)9)
z_iM3enohLS!4hSd5L%YPXL?Bb@lrMbJXoUVv%bA7qmhV<-bgpCfcVsMa)ax~giWTw
zb1P>FR-J)cNfm=p9Ggwp>A;Kbnf_WTZk1NC=5+B4pY_d{z^Xj}ay*%w(e^Iu1QBn=
zGGs6y$iAW@uJQCI6o~$u3jEy1J5&Xi_}rMj)T$I{;|>-n-f@SSG`}8aJyiiK3tzqd
z<mgRBz0M*2;8-?`LJZ{CkVe|pa^-dT4R^YVz@a|GP;G-~fd3w&TB96ftxA)GxnO|!
zxzt#JhQX$O+WgvAq1*835<t$f=Xjs(C?$5`<n$%SF3)*t^IXwsRPz$}0(0_Nkv-;~
zpgFjC_PXdf`bX&LjSKNZ*tY1xBx&>Fy9Dk|{mf@OrHW1!5i}gzT+Kgu6n-u|YH>Pz
zpm1BQb-r6<n4j<!mfWex?=n&{@@q}O%@Su(?%^1+cE*SE1rOIxQn0&|{ZQchgU*Ck
zfeY9ps&j#h(vC)^HRr&pi__sXI_E%qjc|4%VS<Zs4%Ltrj7xEa<;YaOi)m;~sVO02
zQNS*5$)k2i)_TKlEb!#?8b(%rL%ch}3Pv)<ptVvTVfv}uNQ8b_SjzS(Rq-_QrI_?_
z@6nBO@sJUqBJ#~B<UB{X&B81Hu5bW8^a#|>dPd=On%BkJ^SYAO{aJSvD+RU3i%wJE
z%_vb=u$lZJBTeUp5dXr*K#c4;7~+4v`onNHc70oi93pG2H+0l_(m6Az_zm*n$QqY)
z{FP%W%h}>Mb`m(C>etMK^Jtou^}9I!xxLZ(;;j=_so{O4EA`fV=*QTF$xqr%ZdIuG
zWDXCy(Fte(1njlH3zc+faC39FaO5lZ$&#PY%qcO4`$?1?bjKZ21qN5i+HMHYYubXS
zV4^d?;D=Gk%)GVmyF+L)d-z#xY-p(O7SEFmTSYWjzek>5$+4e6RE7JKC28i%cXBmK
zav}{FQl}26fxWQIjQru@D)0CN%?Uy0npNq;pTL%9bs+Z%+1U$(5el{|7jSgG|Bc-c
zZ@kPUR#omrL{A{#YLiT4el2h;v%I!P9+IWifgL1kVvnLO9c^eGpS2z%>8}$X8SkGt
z-+Uxl(`rwqeIukPwRL8}<WRr?l_90px_VHSl<C<MGvl94KIPppJ%Ed{PxQsyl>XF7
zD!qJ!Qe?&xC!BA9R#6lWns@y|((&H$zampoECC;qN}ffID@Xa*p-<4egKj|h2XkP?
z%tg8~Y_vz%g{kpS&Tn@7gGYRhXvgUG+GfjKrSC~ITgE1v$Kh}-0Kwo7zB6xgJiN$V
z8UreH<y-q4>mjOw<}K=ZTZ-Gxb=1E%VB+_pJC~a5#hxuqGXr8;%AU6k%NC~@AeN$K
zL&(E_*!p>QH^3SADYXL&X{)eVXvk9v^iz6?Ieuv*-OkTFu9{yOMp&K%=3SeNTR)CV
zWh&<9o>sL|zW$bh;ch)eS2<O&_r5Jb&4as%+AzIE80YR9j~)9#T9_Z*Te6HWm<*sS
zw%CQu@e0|vPHr*FSnhRotF>!(9Hwb0i#k&<Qr_i>l(uVABXYb_Ty@HmsoJUTtnnrs
zx_cs{szWgwWvuabDn55(9N0Tj<BVFKQt%ZBd1<l62D}_yT`rVmI|h#4axPi#l1e4!
zUH^$JA}wMV)*L;B3*mq$8hOrV(!bXDb72o5`bvvJl(oXi;}L#mvdtoeV}+d!s0F84
zf7;&S+^DE0uOn|mbPB7ic>P}|WD4EaNl1lG%qqxR5gDG#g~z1q*na<&pMb1+iVW?|
z(aaD+_;#gzV=w>yLa(VvCz4$<RO2YAY3xDo?x;DxKldvDXxuXCWSlM6K4zE6=0$Wt
zpH_y)H=FRtfE4DPd<WXV<gpPCR|xAo8Uo;nPC^Q(P0Yh%ToqhjUXu|{Ai$b0snw!#
z>gfh{=vOk=V^55WYW>6~tz?1wi!v8d=%}oz$OjJf0P~QFdOqm0_opzr`^AC}DkB)$
zO7P-}Y$jCJ6k_{5S*caR<vs~b(TE>roOk?98H{W6>f0zShBr694S8-=X<+w3{iDF)
z6Ri0a=7EpkV8U-|sCKh))Kl)9)-g|~DhK6AKT>yCisPYcZU5OF{H$R^SZiAoNVJdv
z_tGHA;`2kmigQ90fMPp-`^8o1wEfK&S3>Ozo(EHS*OSk?QdrA7E5xR-mbv(h8t%*I
z^^NM20@4$wVAt9gBQpU}$RNg;lG_C!J?k!A<(w^qzVb|paS|{6cF9Png@0yC@+@QR
z9UqS?d6zk~1{$=2uu^iR$kJC~{bhdo%0UkL`pUrdRy%_mO?~ztkq8gr62Eq_C;Cl-
zat;VxD3{?aRbWX;;X^N$W?F^^;4KsjJ=6stcDE5z5+_KOge3V=AX#cyl@_TGi5p;7
zQ8W@@z$fhE6PNUobe<|)=3fIkd+R1$QNmg00;Wb+#cpm4gfD4dNotYt$)P%X)#%|k
z(QR|&?Dyeq{qz#0JQmD!@)So?wCD%<EE40)w%}CM{@IP5{byF>oV)MkZy{5iUWv!1
zibqeuk%7N_li?P7IKBROO?CMF5>9;pGURbXK9TNVypcNM{wk*j02w;ekbiZ6I?%qk
z_wY6)+0E^SBBsYyR%uAjdg`&^3?3Rs57vIJ(^V;M=$m1LYBUDcK%=Bv=E#$pZ!)cB
zrBU}WS}@_36}3y&ip{Zrbo=unougEMlXT!UGkS)?zgxZdYfxyQ3%4c{)o`?edvO|z
zaY{=_JtY&>v-N@hE|Q5p;WP$vN^k-%D|MZm+%GGC>tl3(dssVNTECY=ep{-X#&SxM
zU@;kSFi1rVBngsMmXwr~0LcJBO5&0rkP=7*tibv2O)j6K1$VS@f6fWK47~qKtM>2F
z=&ezWuuP4cUv^3=A-Hv4feiP9{MxN=Y4QpF*kAq}RE$0@eckE!c?~6Me7j0eCpf$z
zNQs_^kV;`5N<%|L#m#NXMoK9Y(ERbVcq{LubT#1I`%}Cw%b9B&(V3)Hr-7#J#5<Kj
zOL7S@I@`l`ThH&dr>`Z|z1=@;$*eR+T~!l{uuX{pV!mi$zB*u9Ofao}B43n6zFLU1
z$Ru0b<ag8DOBg0hwhX?8`06P7OLkG&Vc!&j6)NjsDQlN4>m+h$qn9$5s5cX?H-~td
z33%opJ`kT&ix<7k*zh}XXu7WVyt&RZ8!;C2<GBDjigJU=mn77M@SY2KrwhT>9Iy7A
zWY4Tf&z#(yS?)V?LX?Y=2UyQ8VeKk9*1mw;QFII|I@}l?4M9ixqGO4@!tQxR33x@=
zc*QVzg+K9%v<Lq<^M-V@3Wx>U{nR%Z-Y!A^KgeILT2FeBk$vX}?&p(<?|!A|RWdCL
zzBiA9|C{J)GGsl6lDhu?M!|!-2+2|JPm!H!JRRxZr!Rjc=<z>eI3y3=Xu?W0n_bbZ
zDAWfTR^FA|XR&$&|3}&%iY~b?>TD1aAdgF(wnr4_4=~@D%?R^SLU7Ej<d`-QOa~jL
zT@KSpfFd+qa^LjbSYTShZ!jQ0F$D%8q#~Wa#{)A5jhF#O%;lxbcnYgX)2gpSs(6ct
z1tyXworykGvHJG0Tb)#K5ckb-QqGCR%nHWL$!O2Yr?=fs?*Ol3aQmHzy;`qVH_&1C
z(NV(a2>ZW#8#XdogQFFf@~B&j1N?ggqB~@jJ4CdCO=$J({|i)$VlU1(3)P!e>DfGA
zbGs;Z#%T+1`N;(_NBkRv-@m3eotRJRGPY+&P=5C$$zOlN9dU^!q~brXzsrAu^7}}7
zfO7+)BHH31G!RM-<y?}ZfAr6|Kh$2vFZd(e5UQ++=bDj9oJ-(8c+e<t($hqcK;LjH
z)8vFbR2Af=rzwBk2|mO3Zur*GthpUei(Y2#g><SE^G>lVlXHbotAug2QCpAz-!T8#
zBmhbmWi{Fxf9J*s%CRdG=R}W@{Y|q!+=R64v3AyJ2_iu2bi<cI2+w3e67+@``nwl@
z>eL_16;AC%Jnq*dX$XS+qm<wo^m0&hN`G+(Yfup4vV2Js^%c-ye`~52CyjmnOgdy3
z>|b5km>xWy^7=yxac`L8RYnUMMoZF;YWi-mpe1Ln4gPA{(key?x8v3mL^nt#Y~T50
zRq89e+U@<u6eWbItEaBo4Y5AO4SCVoQ&2t`JU#Zh(RpkAVBv}^TgS(Ag%cDafJEAv
z-eM(YfpDf(BmJvwdW&Itt2g?NJCh6PyE%pNr|lq<P5|3CTvniM8n*2gig0sIfqj;X
zeGZs8dpmOuY&y$lItQA%1-XSkSn=p<z+roEdt0bq^!;1CJZp;fqBdsyTFugDGkD*@
ztq09cfn5J%y<1EV#45n$mi3J%?QDCPiYDv~dz{WkV07S%MF(R*?@<E>YB(bOVkfdb
z74v%d!mc#Q+xcfHqE3j)G@!8@w%ihW;aCdt-p;#)sFR{PKSY&}EVV2*YkO}y9><A>
zxKN!-pK?s7D}b!<X(}BGEaGdGDn9`nHQOC=I}|GXGZgPneNX6v@R!H!^q1x<5*Xq)
zZgNGMGB%3%7~*SI;QigX@1TKqV$0;^P8a1bG%kp#v0q&OI$658o4dMuxn9nW;$mQ+
Oq&OKLA5=$;>^}ekJdUIQ

literal 0
HcmV?d00001

diff --git a/reduction_tiling_docs/softmax_lowering_flow.tex b/reduction_tiling_docs/softmax_lowering_flow.tex
new file mode 100644
index 00000000..bc378c29
--- /dev/null
+++ b/reduction_tiling_docs/softmax_lowering_flow.tex
@@ -0,0 +1,567 @@
+\documentclass[aspectratio=169]{beamer}
+\usepackage[utf8]{inputenc}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{amsmath}
+\usepackage{booktabs}
+
+\usetheme{default}
+\usecolortheme{default}
+
+% MLIR syntax highlighting - simplified for slides
+\lstdefinelanguage{MLIR}{
+  morekeywords={func, memref, tensor, linalg, scf, forall, for, arith, math, gpu, xegpu, vector},
+  sensitive=true,
+  morecomment=[l]{//},
+  morestring=[b]",
+}
+
+\lstset{
+  language=MLIR,
+  basicstyle=\ttfamily\small,
+  keywordstyle=\color{blue}\bfseries,
+  commentstyle=\color{gray}\itshape,
+  stringstyle=\color{red},
+  numbers=none,
+  backgroundcolor=\color{white},
+  showspaces=false,
+  showstringspaces=false,
+  showtabs=false,
+  frame=single,
+  tabsize=2,
+  breaklines=true,
+  breakatwhitespace=false,
+  escapeinside={\%*}{*)}
+}
+
+\title{Softmax Lowering Flow: IR Transformation Stages}
+\author{}
+\date{}
+
+\begin{document}
+
+\begin{frame}
+\titlepage
+\end{frame}
+
+\begin{frame}{Problem Setup}
+\textbf{Input Shape}: $1024 \times 512 \times f32$ (1024 rows, 512 columns)
+
+\textbf{Softmax Dimension}: dim=1 (along the 512-element rows)
+
+\vspace{0.5cm}
+
+\textbf{Softmax Formula}:
+$$\text{softmax}(x_i) = \frac{e^{x_i - \max(x)}}{\sum_j e^{x_j - \max(x)}}$$
+\end{frame}
+
+\begin{frame}[fragile]{Stage 1: Initial IR}
+Single high-level \texttt{linalg.softmax} operation on the full tensor.
+
+\begin{lstlisting}
+// Pseudo code:
+output = softmax(input[1024x512], dim=1)
+\end{lstlisting}
+
+\vspace{0.3cm}
+
+\textbf{Key Points}:
+\begin{itemize}
+\item Single operation on entire tensor
+\item No parallelism or tiling yet
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 2: After Tiling Parallel Dimension}
+Parallel dimension (rows) tiled into 16 chunks of 64 rows each.
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  row_offset = tile_id * 64
+  slice = input[row_offset:row_offset+64, 0:512]
+  output_slice = softmax(slice, dim=1)
+  output[row_offset:row_offset+64, :] = output_slice
+\end{lstlisting}
+
+\vspace{0.3cm}
+
+\textbf{Key Points}:
+\begin{itemize}
+\item 16 parallel tiles: $1024 / 64 = 16$
+\item Each tile: $64 \times 512$
+\item Introduces \texttt{scf.forall} for parallel execution
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 3: After Decomposing Softmax}
+Softmax decomposed into 4 operations:
+\textbf{max} $\rightarrow$ \textbf{center+exp} $\rightarrow$ \textbf{sum} $\rightarrow$ \textbf{division}
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  // Step 1: Max reduction (64,512) -> (64,)
+  max_vals = reduce_max(slice, dim=1)
+
+  // Step 2: Center and exp (64,512) -> (64,512)
+  exp_vals = exp(slice - max_vals)
+
+  // Step 3: Sum reduction (64,512) -> (64,)
+  sum_vals = reduce_sum(exp_vals, dim=1)
+
+  // Step 4: Division (64,512) -> (64,512)
+  output_slice = exp_vals / sum_vals
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 4: After Tiling Division}
+Division operation tiled along dimension 1 into chunks of 16 columns.
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  max_vals = reduce_max(slice, dim=1)      // (64,512)->(64,)
+  exp_vals = exp(slice - max_vals)         // (64,512)->(64,512)
+  sum_vals = reduce_sum(exp_vals, dim=1)   // (64,512)->(64,)
+
+  // Division tiled: 32 iterations (512/16 = 32)
+  for col_offset in [0:512:16]:
+    exp_tile = exp_vals[:, col_offset:col_offset+16]
+    output[:, col_offset:col_offset+16] = exp_tile / sum_vals
+\end{lstlisting}
+
+\vspace{0.2cm}
+\textbf{Key}: Division loop operates on $64 \times 16$ tiles
+\end{frame}
+
+\begin{frame}[fragile]{Stage 5: Fusing Center+Exp into Division Loop}
+Recompute center-and-exp on-the-fly in division loop.
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  max_vals = reduce_max(slice, dim=1)      // (64,)
+
+  // Still materialized for sum reduction
+  exp_vals = exp(slice - max_vals)         // (64,512)
+  sum_vals = reduce_sum(exp_vals, dim=1)   // (64,)
+
+  // Division loop: recompute exp on-the-fly
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(input_tile - max_vals)  // Recomputed
+    output[:, col_offset:col_offset+16] = exp_tile / sum_vals
+\end{lstlisting}
+
+\vspace{0.2cm}
+\textbf{Benefit}: Reduces memory footprint (partial recomputation)
+\end{frame}
+
+\begin{frame}[fragile]{Stage 6: After Tiling Sum Reduction}
+Sum reduction tiled into 16-column chunks with partial sums.
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  max_vals = reduce_max(slice, dim=1)      // (64,)
+  exp_vals = exp(slice - max_vals)         // (64,512)
+
+  // Tiled sum reduction: accumulate into buffer (64,16)
+  sum_buffer = zeros(64, 16)
+  for col_offset in [0:512:16]:
+    exp_tile = exp_vals[:, col_offset:col_offset+16]
+    sum_buffer += exp_tile  // Accumulate
+
+  sum_vals = reduce_sum(sum_buffer, dim=1)  // (64,16)->(64,)
+
+  // Division loop (same as before)
+  for col_offset in [0:512:16]: ...
+\end{lstlisting}
+
+\vspace{0.2cm}
+\textbf{Key}: Sum uses partial accumulation buffer
+\end{frame}
+
+\begin{frame}[fragile]{Stage 7: Fusing into Sum Reduction Loop}
+Fuse center-and-exp into sum reduction loop as well.
+
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  max_vals = reduce_max(slice, dim=1)      // (64,)
+
+  // Sum reduction with fused center+exp
+  sum_buffer = zeros(64, 16)
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(input_tile - max_vals)  // Fused
+    sum_buffer += exp_tile
+
+  sum_vals = reduce_sum(sum_buffer, dim=1)  // (64,)
+
+  // Division loop with fused center+exp+div
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(input_tile - max_vals)  // Recomputed
+    output[:, col_offset:col_offset+16] = exp_tile / sum_vals
+\end{lstlisting}
+\end{frame}
+
+\section{Stage 8: After Tiling Max Reduction}
+
+Max reduction also tiled into 16-column chunks with partial max followed by final reduction.
+
+\begin{lstlisting}
+func.func @payload(%arg0: memref<1024x512xf32>, %arg1: memref<1024x512xf32>) {
+  %2 = scf.forall (%arg2) in (16) shared_outs(%arg3 = %1) -> (tensor<1024x512xf32>) {
+    %slice = tensor.extract_slice %0[%3, 0] [64, 512] [1, 1]
+
+    // Tiled max reduction: accumulate into 64x16 buffer
+    %8 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %7) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Max accumulation
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7 : tensor<64x16xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %15 = arith.maxnumf %in, %out : f32
+          linalg.yield %15 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %14 into %arg5[0, 0] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x16xf32>
+    }
+
+    // Final max reduction: (64,16) -> (64,)
+    %reduced = linalg.reduce ins(%8 : tensor<64x16xf32>) outs(%5 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %14 = arith.maxnumf %in, %init : f32
+        linalg.yield %14 : f32
+      }
+    }
+
+    // Sum reduction loop with fused center+exp
+    %12 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %11) -> (tensor<64x16xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp using reduced max
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.subf %in, %in_9 : f32
+          %17 = math.exp %16 : f32
+          linalg.yield %17 : f32
+      } -> tensor<64x16xf32>
+
+      // Sum accumulation
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14 : tensor<64x16xf32>) outs(%arg5 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          %16 = arith.addf %in, %out : f32
+          linalg.yield %16 : f32
+      } -> tensor<64x16xf32>
+
+      scf.yield %15 : tensor<64x16xf32>
+    }
+
+    // Final sum reduction: (64,16) -> (64,)
+    %reduced_6 = linalg.reduce ins(%12 : tensor<64x16xf32>) outs(%9 : tensor<64xf32>) dimensions = [1] {
+      (%in: f32, %init: f32) {
+        %14 = arith.addf %in, %init : f32
+        linalg.yield %14 : f32
+      }
+    }
+
+    // Division loop with fused center+exp+div
+    %13 = scf.for %arg4 = %c0 to %c512 step %c16 iter_args(%arg5 = %slice_1) -> (tensor<64x512xf32>) {
+      %slice_7 = tensor.extract_slice %slice[0, %arg4] [64, 16] [1, 1]
+
+      // Fused center+exp
+      %14 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%slice_7, %reduced : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.subf %in, %in_9 : f32
+          %17 = math.exp %16 : f32
+          linalg.yield %17 : f32
+      } -> tensor<64x16xf32>
+
+      // Division
+      %15 = linalg.generic {iterator_types = ["parallel", "parallel"]}
+            ins(%14, %reduced_6 : tensor<64x16xf32>, tensor<64xf32>) outs(%slice_8 : tensor<64x16xf32>) {
+        ^bb0(%in: f32, %in_9: f32, %out: f32):
+          %16 = arith.divf %in, %in_9 : f32
+          linalg.yield %16 : f32
+      } -> tensor<64x16xf32>
+
+      %inserted = tensor.insert_slice %15 into %arg5[0, %arg4] [64, 16] [1, 1]
+      scf.yield %inserted : tensor<64x512xf32>
+    }
+
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %13 into %arg3[%3, 0] [64, 512] [1, 1]
+    }
+  }
+}
+\end{lstlisting}
+
+\begin{frame}[fragile]{Stage 8: After Tiling Max Reduction (Part 1)}
+Max reduction tiled with partial buffers, creating 3 loops total.
+
+\begin{lstlisting}
+// Pseudo code (continued from Stage 7):
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  // Loop 1: Tiled max reduction
+  max_buffer = fill(-inf, 64, 16)
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    max_buffer = max(max_buffer, input_tile)
+  max_vals = reduce_max(max_buffer, dim=1)  // (64,)
+
+  // Loop 2: Sum reduction with fused center+exp
+  sum_buffer = zeros(64, 16)
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(input_tile - max_vals)
+    sum_buffer += exp_tile
+  sum_vals = reduce_sum(sum_buffer, dim=1)  // (64,)
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 8: After Tiling Max Reduction (Part 2)}
+\begin{lstlisting}
+  // Loop 3: Division with fused center+exp+div
+  for col_offset in [0:512:16]:
+    input_tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(input_tile - max_vals)
+    output[:, col_offset:col_offset+16] = exp_tile / sum_vals
+\end{lstlisting}
+
+\vspace{0.3cm}
+
+\textbf{Key Points}:
+\begin{itemize}
+\item All 3 operations (max, sum, div) are now tiled
+\item 3 separate loops over columns (32 iterations each)
+\item Each loop processes $64 \times 16$ tiles
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 9: Final Vectorized XeGPU Version}
+After vectorization, bufferization, and XeGPU lowering.
+
+\begin{lstlisting}
+// Pseudo code:
+gpu.kernel:
+  block_id = get_block_id()
+  slice = input[block_id*64:(block_id+1)*64, :]
+
+  // Allocate SLM buffer (64x16) for partial reductions
+  slm_buffer = alloc_shared_memory(64, 16)
+
+  // Loop 1: Max reduction (32 iterations)
+  slm_buffer = fill(-inf)
+  for col_offset in [0:512:16]:
+    tile = load_vector(slice[:, col_offset:col_offset+16])
+    slm_buffer = max(slm_buffer, tile)  // Update in SLM
+  max_vals = reduce_across_cols(slm_buffer)
+
+  // Loop 2: Sum reduction (32 iterations)
+  slm_buffer = zeros()
+  for col_offset in [0:512:16]:
+    tile = load_vector(slice[:, col_offset:col_offset+16])
+    exp_tile = exp(tile - max_vals)
+    slm_buffer += exp_tile  // Accumulate in SLM
+  sum_vals = reduce_across_cols(slm_buffer)
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Stage 9: Final Vectorized XeGPU Version (cont'd)}
+\begin{lstlisting}
+  // Loop 3: Division (32 iterations)
+  for col_offset in [0:512:16]:
+    tile = load_vector(slice[:, col_offset:col_offset+16])
+    exp_tile = exp(tile - max_vals)
+    result = exp_tile / sum_vals
+    store_vector(output[:, col_offset:col_offset+16], result)
+\end{lstlisting}
+
+\vspace{0.3cm}
+
+\textbf{Key Features}:
+\begin{itemize}
+\item Uses vector operations ($64 \times 16$ SIMD)
+\item Shared Local Memory (SLM) for partial reductions
+\item XeGPU dialect for Intel GPU operations
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Summary of Transformations}
+\begin{table}
+\centering
+\small
+\begin{tabular}{@{}rll@{}}
+\toprule
+\textbf{Stage} & \textbf{Key Transformation} & \textbf{Loop Structure} \\
+\midrule
+1 & Initial high-level softmax & No loops \\
+2 & Tile parallel dimension & \texttt{forall(16)} \\
+3 & Decompose softmax & \texttt{forall(16)} + 4 ops \\
+4 & Tile division & \texttt{forall(16)} $\rightarrow$ \texttt{for(32)} \\
+5 & Fuse into division loop & Recompute center+exp \\
+6 & Tile sum reduction & Add sum loop \\
+7 & Fuse into sum loop & Recompute center+exp \\
+8 & Tile max reduction & 3 loops total \\
+9 & Vectorize + XeGPU & GPU with SLM \\
+\bottomrule
+\end{tabular}
+\end{table}
+
+\vspace{0.3cm}
+
+\textbf{Final pattern per GPU block}: 3 loops of 32 iterations each
+\begin{enumerate}
+\item Max reduction $\rightarrow$ SLM $\rightarrow$ final reduction
+\item Sum reduction (fused center+exp) $\rightarrow$ SLM $\rightarrow$ final reduction
+\item Division (fused center+exp+div) $\rightarrow$ global memory
+\end{enumerate}
+\end{frame}
+
+\begin{frame}{Optimization: Fusing Max and Sum Loops}
+After Stage 8, we can fuse max and sum loops into one.
+
+\textbf{Result}: 3 loops $\rightarrow$ 2 loops
+
+\vspace{0.5cm}
+
+\textbf{Key Insight}: \emph{Online Softmax Algorithm}
+
+Incrementally update both global max and sum as we process each tile:
+
+\begin{enumerate}
+\item Compute \textbf{local max} for the tile
+\item Update \textbf{global max} = $\max(\text{old\_max}, \text{local\_max})$
+\item Compute \textbf{local sum} = $\sum \exp(x - \text{local\_max})$
+\item \textbf{Rescale} global sum by $\exp(\text{old\_max} - \text{new\_max})$
+\item \textbf{Add} rescaled local sum to global sum
+\end{enumerate}
+
+\vspace{0.3cm}
+
+Maintains numerical stability while reducing memory bandwidth!
+\end{frame}
+
+\begin{frame}[fragile]{Before Fusion: 3 Separate Loops}
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  // Loop 1: Max reduction
+  max_buffer = fill(-inf, 64, 16)
+  for col_offset in [0:512:16]:
+    tile = slice[:, col_offset:col_offset+16]
+    max_buffer = max(max_buffer, tile)
+  max_vals = reduce_max(max_buffer, dim=1)
+
+  // Loop 2: Sum reduction
+  sum_buffer = zeros(64, 16)
+  for col_offset in [0:512:16]:
+    tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(tile - max_vals)  // Uses final max
+    sum_buffer += exp_tile
+  sum_vals = reduce_sum(sum_buffer, dim=1)
+
+  // Loop 3: Division
+  for col_offset in [0:512:16]: ...
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{After Fusion: 2 Loops (Online Softmax)}
+\begin{lstlisting}
+// Pseudo code:
+parallel for tile_id in [0..16):
+  slice = input[tile_id*64:(tile_id+1)*64, :]
+
+  // Loop 1: Fused max+sum (online softmax algorithm)
+  global_max = fill(-inf, 64)
+  global_sum = zeros(64)
+
+  for col_offset in [0:512:16]:
+    tile = slice[:, col_offset:col_offset+16]
+
+    // Update max incrementally
+    local_max = reduce_max(tile, dim=1)
+    new_max = max(global_max, local_max)
+
+    // Compute local sum centered on local_max
+    local_sum = sum(exp(tile - local_max), dim=1)
+
+    // Rescale and accumulate sum
+    correction = exp(global_max - new_max)
+    local_correction = exp(local_max - new_max)
+    global_sum = global_sum * correction + local_sum * local_correction
+
+    global_max = new_max
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{After Fusion: 2 Loops (cont'd)}
+\begin{lstlisting}
+  // Loop 2: Division (same as before)
+  for col_offset in [0:512:16]:
+    tile = slice[:, col_offset:col_offset+16]
+    exp_tile = exp(tile - global_max)
+    output[:, col_offset:col_offset+16] = exp_tile / global_sum
+\end{lstlisting}
+
+\vspace{0.5cm}
+
+\textbf{Benefits}:
+\begin{itemize}
+\item Reduced loop count: 3 $\rightarrow$ 2 loops
+\item Better memory locality: Input read twice instead of three times
+\item Same numerical stability (still uses max-centering)
+\end{itemize}
+
+\vspace{0.3cm}
+
+\textbf{Trade-off}: More computation per iteration (exponentials for rescaling)
+\end{frame}
+
+\begin{frame}{Summary}
+\textbf{Softmax Lowering Journey}: 9 transformation stages
+
+\begin{enumerate}
+\item Start: High-level \texttt{linalg.softmax} operation
+\item Decompose into: max $\rightarrow$ center+exp $\rightarrow$ sum $\rightarrow$ div
+\item Tile parallel dimension (16 workgroups)
+\item Tile reduction dimension (32 iterations per loop)
+\item Fuse operations to reduce memory footprint
+\item Lower to GPU with vectorization and SLM
+\item \textbf{Optional}: Fuse max+sum loops (online softmax)
+\end{enumerate}
+
+\vspace{0.5cm}
+
+\textbf{Key Techniques}:
+\begin{itemize}
+\item Tiling for parallelism and memory hierarchy
+\item Fusion for memory efficiency (recomputation)
+\item Online softmax for bandwidth optimization
+\end{itemize}
+\end{frame}
+
+\end{document}