Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
800 changes: 400 additions & 400 deletions samples/benchmarks/vertical_advection_128_128_80.sptl

Large diffs are not rendered by default.

800 changes: 400 additions & 400 deletions samples/benchmarks/vertical_advection_16_16_4.sptl

Large diffs are not rendered by default.

800 changes: 400 additions & 400 deletions samples/benchmarks/vertical_advection_4_4_4.sptl

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion spatialstencil/lowering/stencil_to_spatial_compute_fwbw.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,18 @@ def visit_Subscript(self, node: sast.Subscript):
assert node.subscript[0] == 0
assert node.subscript[1] == 0
z_offset = node.subscript[2]
array = self.placement.get_storage(node.value)
if z_offset != 0:
# For non-zero k-offsets in a FORWARD/BACKWARD stencil, the access
# targets the *accumulated* (final) field value at the neighbouring
# k-level — i.e. the value after every assignment at that level has
# completed. Local SSA intermediates (e.g. fresh_d before Thomas
# elimination) only hold the correct value at the *current* k; using
# them at k±1 yields wrong (often uninitialized) results.
array = self.placement.get_accumulated_storage(node.value.name)
if array is None:
array = self.placement.get_storage(node.value)
else:
array = self.placement.get_storage(node.value)
if isinstance(array[1], spa.ArrayType):
if z_offset == 0:
access = self.iteration_variable.identifier
Expand Down
18 changes: 18 additions & 0 deletions spatialstencil/lowering/stencil_to_spatial_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,24 @@ def get_shift(self) -> tuple[int, int, int]:
"""
return self.domain_shift

def get_accumulated_storage(self,
name: str,
offset: sast.Offset = sast.Offset.zero()) -> tuple[spa.Identifier, spa.ArrayType | spa.ScalarType] | None:
"""Return program-scope (accumulated) storage for a variable by name.

Unlike get_storage(), this bypasses SSA-specific storage and resolves
through the program scope only, returning the storage associated with
the field's accumulated value — i.e. the final value after all
assignments at a given k-level have completed. This is the correct
target for stencil accesses with a non-zero k-offset inside a
FORWARD or BACKWARD computation body.
"""
if name in self._program_scope_fields:
identifier = self._program_scope_fields[name]
if offset in self._storage_map[identifier]:
return self._storage_map[identifier][offset]
return None

def get_storage(self,
identifier: sast.Identifier,
offset: sast.Offset = sast.Offset.zero()) -> tuple[spa.Identifier, spa.ArrayType | spa.ScalarType] | None:
Expand Down
49 changes: 49 additions & 0 deletions tests/csl_runtime/test_vadv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/sh

set -e

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

BNAME=vadv_sptl
# Compile the spatial stencil program
sptlc "$SCRIPT_DIR/../../samples/benchmarks/vertical_advection_4_4_4.sptl" $BNAME $*

python <<EOF
import numpy as np
dtr_stage = np.random.rand(1).astype(np.float32)
u_pos = np.random.rand(4, 4, 4).astype(np.float32)
u_stage = np.random.rand(4, 4, 5).astype(np.float32)
utens = np.random.rand(4, 4, 4).astype(np.float32)
utens_stage = np.random.rand(4, 4, 4).astype(np.float32)
wcon = np.random.rand(5, 4, 4).astype(np.float32)
# Save the arrays to .npy files
np.save('dtr_stage.npy', dtr_stage)
np.save('u_pos.npy', u_pos)
np.save('u_stage.npy', u_stage)
np.save('utens.npy', utens)
np.save('utens_stage.npy', utens_stage)
np.save('wcon.npy', wcon)
EOF

# Run the compiled program with the Python runtime and the simulator
timeout -s 9 120 cs_python "$SCRIPT_DIR/../../spatialstencil/runtime/runtime.py" --benchmark $BNAME dtr_stage.npy u_pos.npy u_stage.npy utens.npy utens_stage.npy wcon.npy

# Check if the output file matches the expected output
python $SCRIPT_DIR/vertical_advection.py dtr_stage.npy u_pos.npy u_stage.npy utens.npy utens_stage.npy wcon.npy -o expected_out.npy
python <<EOF
import numpy as np
# Load the arrays
ref = np.load('expected_out.npy')
output = np.load('OUT___kernel_out_0.npy')
# Check if the output is correct
if not np.allclose(output, ref, atol=1e-6, rtol=1e-5):
print("Test failed: Output does not match expected result.")
exit(1)
else:
print("Test passed: Output matches expected result.")
EOF

# Clean up generated files
rm -rf $BNAME
rm -f OUT___kernel_out_0.npy expected_out.npy
rm -f dtr_stage.npy u_pos.npy u_stage.npy utens.npy utens_stage.npy wcon.npy
94 changes: 94 additions & 0 deletions tests/csl_runtime/vertical_advection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import argparse
import numpy as np
import numpy.typing as npt

# Adapted from NPBench: https://github.com/spcl/npbench/blob/main/npbench/benchmarks/weather_stencils/vadv/vadv_numpy.py

# Sample constants
BET_M = 0.5
BET_P = 0.5


# Adapted from https://github.com/GridTools/gt4py/blob/1caca893034a18d5df1522ed251486659f846589/tests/test_integration/stencil_definitions.py#L111
def vadv(utens_stage, u_stage, wcon, u_pos, utens, dtr_stage):
I, J, K = utens_stage.shape[0], utens_stage.shape[1], utens_stage.shape[2]
ccol = np.ndarray((I, J, K), dtype=utens_stage.dtype)
dcol = np.ndarray((I, J, K), dtype=utens_stage.dtype)
data_col = np.ndarray((I, J), dtype=utens_stage.dtype)

for k in range(1):
gcv = 0.25 * (wcon[1:, :, k + 1] + wcon[:-1, :, k + 1])
cs = gcv * BET_M

ccol[:, :, k] = gcv * BET_P
bcol = dtr_stage - ccol[:, :, k]

# update the d column
correction_term = -cs * (u_stage[:, :, k + 1] - u_stage[:, :, k])
dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + utens_stage[:, :, k] + correction_term)

# Thomas forward
divided = 1.0 / bcol
ccol[:, :, k] = ccol[:, :, k] * divided
dcol[:, :, k] = dcol[:, :, k] * divided

for k in range(1, K - 1):
gav = -0.25 * (wcon[1:, :, k] + wcon[:-1, :, k])
gcv = 0.25 * (wcon[1:, :, k + 1] + wcon[:-1, :, k + 1])

as_ = gav * BET_M
cs = gcv * BET_M

acol = gav * BET_P
ccol[:, :, k] = gcv * BET_P
bcol = dtr_stage - acol - ccol[:, :, k]

# update the d column
correction_term = -as_ * (u_stage[:, :, k - 1] - u_stage[:, :, k]) - cs * (
u_stage[:, :, k + 1] - u_stage[:, :, k])
dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + utens_stage[:, :, k] + correction_term)

# Thomas forward
divided = 1.0 / (bcol - ccol[:, :, k - 1] * acol)
ccol[:, :, k] = ccol[:, :, k] * divided
dcol[:, :, k] = (dcol[:, :, k] - (dcol[:, :, k - 1]) * acol) * divided

for k in range(K - 1, K):
gav = -0.25 * (wcon[1:, :, k] + wcon[:-1, :, k])
as_ = gav * BET_M
acol = gav * BET_P
bcol = dtr_stage - acol

# update the d column
correction_term = -as_ * (u_stage[:, :, k - 1] - u_stage[:, :, k])
dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + utens_stage[:, :, k] + correction_term)

# Thomas forward
divided = 1.0 / (bcol - ccol[:, :, k - 1] * acol)
dcol[:, :, k] = (dcol[:, :, k] - (dcol[:, :, k - 1]) * acol) * divided

for k in range(K - 1, K - 2, -1):
datacol = dcol[:, :, k]
data_col[:] = datacol
utens_stage[:, :, k] = dtr_stage * (datacol - u_pos[:, :, k])

for k in range(K - 2, -1, -1):
datacol = dcol[:, :, k] - ccol[:, :, k] * data_col[:, :]
data_col[:] = datacol
utens_stage[:, :, k] = dtr_stage * (datacol - u_pos[:, :, k])

return utens_stage


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Vertical Advection Stencil")
parser.add_argument("inputs", nargs='+', type=str, help="Input file paths")
parser.add_argument("--output", "-o", type=str, default='vadv_out.npy', help="Output file path")
args = parser.parse_args()

inputs = [np.load(input_file) for input_file in args.inputs]
inputs[0] = inputs[0].item() # dtr_stage
dtr_stage, u_pos, u_stage, utens, utens_stage, wcon = inputs

result = vadv(utens_stage, u_stage, wcon, u_pos, utens, dtr_stage)
np.save(args.output, result)
36 changes: 36 additions & 0 deletions tests/stencil_ir/test_lowering_stencil_to_spatial.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import re
import unittest
from pathlib import Path
from typing import Tuple

import pytest

from spatialstencil.cli.gt4py_to_spatial import lower_function, lower_gt4py_to_sptl
from spatialstencil.lowering import gt4py_to_stencil_ir
from spatialstencil.lowering.stencil_to_spatial_routing import ChannelStrategy
from spatialstencil.lowering.stencil_to_spatial_compute import HorizontalStencilTransformer
from spatialstencil.lowering.stencil_to_spatial_dataflow import ProgramDataflow
Expand Down Expand Up @@ -228,6 +230,39 @@ def test_vadv():
assert subgrids_dont_overlap(spatial_program)


def test_fwbw_koffset_uses_accumulated_storage():
"""Regression: k±1 accesses in FORWARD/BACKWARD stencil bodies must use
accumulated (program-scope) storage, not SSA-versioned intermediates.

The Thomas forward sweep in vertical_advection needs dcol[k-1] equal to
the Thomas-eliminated value from the previous k-iteration. The bug was
that visit_Subscript used the SSA-specific local array (e.g.
dcol_0_0_0#3) instead of the accumulated array (dcol_0_0_0), yielding
uninitialised reads for the first k in each interval.
"""
from spatialstencil.syntax.gt4py import parser as gt4py_parser

gtfuncs = gt4py_parser.parse_file(str(
Path(__file__).parent / Path('../../samples/stencils.py')))
program = gtfuncs['vertical_advection']
irprogram = gt4py_to_stencil_ir.lower_gt4py_to_stencil_ir(program, domain=(4, 4, 4))
type_inference.infer_field_extents(irprogram)
type_inference.infer_field_domains(irprogram)
kernel = lower_stencil_to_spatial(irprogram)
ir = kernel.as_ir()

# Detect the bug: an SSA-versioned array name (name#N) directly followed
# by an array index containing a k-offset (e.g. [(k#3 - 1)]).
# Accumulated storage is printed without a version suffix, so any match
# here means a local intermediate is being read at a neighbouring k-level.
bad = re.findall(r'\w+#\d+\[\([^)]*-\s*\d+\)\]', ir)
assert not bad, (
"k-offset accesses to SSA-versioned (non-accumulated) storage found; "
"these should use the accumulated field instead:\n "
+ "\n ".join(bad)
)


def test_gt4py_integration():
from spatialstencil.syntax.gt4py import parser as gt4py_parser

Expand All @@ -247,4 +282,5 @@ def test_gt4py_integration():
test_vertical_stencil_finishes()
test_scalar_arguments()
test_vadv()
test_fwbw_koffset_uses_accumulated_storage()
test_gt4py_integration()
Loading