From 3fdf0fe0ab7e813f3956b3b7d8c5b4eefefec7a2 Mon Sep 17 00:00:00 2001
From: Alex Kaszynski <akascap@gmail.com>
Date: Fri, 19 Jun 2026 12:51:22 -0600
Subject: [PATCH] perf(data-interp): default-on threading for cell<->point data
 interpolation

Wrap the per-output-element vtkSMPTools::For loops of vtkCellDataToPointData
(5 dispatch sites over numPts) and vtkPointDataToCellData (2 sites over numCells)
in fvtk::RunSafeFilterParallel so they thread under the audited 4-thread-capped
default policy instead of the Sequential backend.

Each output element is an independent average of read-only input: a point
gathers from its incident cells (via the deterministically-built cell links) and
a cell gathers from its points, each summed in a fixed link/point order and
written to its own pre-sized slot. No shared scatter, no cross-element state =>
bit-identical under any thread count and to serial stock VTK. Only the For
compute loops are wrapped; the cell-links build is left outside the scope.

Adds a >THRESHOLD (sphere 700x700 ~490k pts / ~980k cells) determinism case
parametrized over cd2pd/pd2cd. Byte-exactness vs stock is covered by the
existing op_cell2point / op_point2cell / op_point2cell_ugrid bitexact cases.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 Filters/Core/vtkCellDataToPointData.cxx | 11 ++--
 Filters/Core/vtkPointDataToCellData.cxx |  6 +-
 tests/bitexact/test_smp_determinism.py  | 82 +++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 7 deletions(-)
diff --git a/Filters/Core/vtkCellDataToPointData.cxx b/Filters/Core/vtkCellDataToPointData.cxx
index 3b116ea9..abd9695b 100644
--- a/Filters/Core/vtkCellDataToPointData.cxx
+++ b/Filters/Core/vtkCellDataToPointData.cxx
@@ -11,6 +11,7 @@
 #include "vtkCellTypeUtilities.h"
 #include "vtkDataArrayRange.h"
 #include "vtkDataSet.h"
+#include "vtkFVTKSMPDefaults.h" // fvtk: opt into default multithreading (bit-exact)
 #include "vtkIdList.h"
 #include "vtkInformation.h"
 #include "vtkInformationVector.h"
@@ -81,13 +82,13 @@ void FastUnstructuredDataACL(
   if (auto staticCellLinks = vtkStaticCellLinks::SafeDownCast(links))
   {
     UnstructuredDataCD2PD<vtkStaticCellLinks> cd2pd(numPts, cfl, pd, staticCellLinks);
-    vtkSMPTools::For(0, numPts, cd2pd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numPts, cd2pd); });
   }
   else // vtkCellLinks
   {
     auto cellLinks = vtkCellLinks::SafeDownCast(links);
     UnstructuredDataCD2PD<vtkCellLinks> cd2pd(numPts, cfl, pd, cellLinks);
-    vtkSMPTools::For(0, numPts, cd2pd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numPts, cd2pd); });
   }
 }
 
@@ -108,7 +109,7 @@ void FastUnstructuredDataSCLT(
     TCellLinks cellLinks;
     cellLinks.BuildLinks(input);
     UnstructuredDataCD2PD<TCellLinks> cd2pd(numberOfPoints, cfl, pd, &cellLinks);
-    vtkSMPTools::For(0, numberOfPoints, cd2pd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numberOfPoints, cd2pd); });
   }
 #ifdef VTK_USE_64BIT_IDS
   else if (linksType == vtkAbstractCellLinks::STATIC_CELL_LINKS_UINT)
@@ -117,7 +118,7 @@ void FastUnstructuredDataSCLT(
     TCellLinks cellLinks;
     cellLinks.BuildLinks(input);
     UnstructuredDataCD2PD<TCellLinks> cd2pd(numberOfPoints, cfl, pd, &cellLinks);
-    vtkSMPTools::For(0, numberOfPoints, cd2pd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numberOfPoints, cd2pd); });
   }
 #endif
   else
@@ -126,7 +127,7 @@ void FastUnstructuredDataSCLT(
     TCellLinks cellLinks;
     cellLinks.BuildLinks(input);
     UnstructuredDataCD2PD<TCellLinks> cd2pd(numberOfPoints, cfl, pd, &cellLinks);
-    vtkSMPTools::For(0, numberOfPoints, cd2pd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numberOfPoints, cd2pd); });
   }
 }
 
diff --git a/Filters/Core/vtkPointDataToCellData.cxx b/Filters/Core/vtkPointDataToCellData.cxx
index 5eb810e7..1c3a59f3 100644
--- a/Filters/Core/vtkPointDataToCellData.cxx
+++ b/Filters/Core/vtkPointDataToCellData.cxx
@@ -8,6 +8,7 @@
 #include "vtkCellData.h"
 #include "vtkDataArray.h"
 #include "vtkDataSet.h"
+#include "vtkFVTKSMPDefaults.h" // fvtk: opt into default multithreading (bit-exact)
 #include "vtkIdList.h"
 #include "vtkInformation.h"
 #include "vtkInformationVector.h"
@@ -338,7 +339,8 @@ struct PointDataToCellDataCategoricalWorker
     vtkPointDataToCellData* filter)
   {
     PointDataToCellDataCategoricalFunctor<ArrayType> pd2cd(input, inPD, outCD, scalars, filter);
-    vtkSMPTools::For(0, input->GetNumberOfCells(), pd2cd);
+    fvtk::RunSafeFilterParallel(
+      [&]() { vtkSMPTools::For(0, input->GetNumberOfCells(), pd2cd); });
   }
 };
 } // End anonymous namespace
@@ -504,7 +506,7 @@ int vtkPointDataToCellData::RequestData(
   {
     // Thread the process
     PointDataToCellDataFunctor pd2cd(input, inPD, outCD, this);
-    vtkSMPTools::For(0, numCells, pd2cd);
+    fvtk::RunSafeFilterParallel([&]() { vtkSMPTools::For(0, numCells, pd2cd); });
   }
   // Create a threaded fast path for categorical data.
   else
diff --git a/tests/bitexact/test_smp_determinism.py b/tests/bitexact/test_smp_determinism.py
index 1859e9a6..f541bc85 100644
--- a/tests/bitexact/test_smp_determinism.py
+++ b/tests/bitexact/test_smp_determinism.py
@@ -251,3 +251,85 @@ def test_large_transform_threaded_path_is_deterministic(
         f"large {kind} transform digest at {nthreads} threads ({got[:16]}) != "
         f"1-thread reference ({ref[:16]}) -- threading is non-deterministic"
     )
+
+
+# A large-mesh script that forces the threaded cell<->point data-interpolation
+# paths: vtkCellDataToPointData (For over numPts, each point averages its incident
+# cells via read-only cell links) and vtkPointDataToCellData (For over numCells,
+# each cell averages its points). Both parallelize with grain = vtkSMPTools::
+# THRESHOLD, so per-suite cases (<10k) run serial; this drives ~490k pts / ~980k
+# cells so the multithreaded branch is actually exercised. Each output element is
+# an independent average of read-only input => bit-exact under any thread count.
+# argv[1] selects "cd2pd" or "pd2cd".
+_LARGE_INTERP_SCRIPT = r"""
+import os, sys, hashlib
+sys.path = [p for p in sys.path if p not in ("", os.getcwd())]
+import numpy as np
+from vtkmodules.util.numpy_support import numpy_to_vtk, vtk_to_numpy
+from vtkmodules.vtkFiltersSources import vtkSphereSource
+from vtkmodules.vtkFiltersCore import (
+    vtkTriangleFilter, vtkCellDataToPointData, vtkPointDataToCellData)
+
+kind = sys.argv[1]
+s = vtkSphereSource(); s.SetThetaResolution(700); s.SetPhiResolution(700)
+t = vtkTriangleFilter(); t.SetInputConnection(s.GetOutputPort()); t.Update()
+poly = t.GetOutput()
+npts = poly.GetNumberOfPoints(); ncells = poly.GetNumberOfCells()
+assert npts > 100000 and ncells > 100000, (npts, ncells)
+
+if kind == "cd2pd":
+    # non-trivial per-cell scalar (float64) -> averaged onto points
+    poly.GetCellData().SetScalars(
+        numpy_to_vtk(np.linspace(-1, 1, ncells).astype(np.float64), deep=1))
+    f = vtkCellDataToPointData(); f.SetInputData(poly); f.Update()
+    arr = f.GetOutput().GetPointData().GetScalars()
+elif kind == "pd2cd":
+    poly.GetPointData().SetScalars(
+        numpy_to_vtk(np.linspace(-1, 1, npts).astype(np.float64), deep=1))
+    f = vtkPointDataToCellData(); f.SetInputData(poly); f.Update()
+    arr = f.GetOutput().GetCellData().GetScalars()
+else:
+    raise SystemExit("unknown kind " + kind)
+
+h = hashlib.sha256()
+h.update(np.ascontiguousarray(vtk_to_numpy(arr)).tobytes())
+print(h.hexdigest())
+"""
+
+INTERP_KINDS = ["cd2pd", "pd2cd"]
+
+
+@pytest.fixture(scope="module")
+def large_interp_digests(tmp_path_factory):
+    fvtk_py = os.environ.get("BITEXACT_FVTK_PY")
+    if not fvtk_py:
+        pytest.skip("BITEXACT_FVTK_PY not set.")
+    script = tmp_path_factory.mktemp("large_interp") / "large_interp.py"
+    script.write_text(_LARGE_INTERP_SCRIPT)
+    digests = {k: {} for k in INTERP_KINDS}
+    for kind in INTERP_KINDS:
+        for n in THREAD_COUNTS:
+            proc = subprocess.run(
+                [fvtk_py, str(script), kind],
+                env=_env(os.environ.get("BITEXACT_FVTK_LDLP", ""), n),
+                capture_output=True,
+                text=True,
+            )
+            assert proc.returncode == 0, f"{kind} @ {n} threads:\n{proc.stderr}"
+            digests[kind][n] = proc.stdout.strip().splitlines()[-1]
+    return digests
+
+
+@pytest.mark.parametrize("kind", INTERP_KINDS)
+@pytest.mark.parametrize("nthreads", [n for n in THREAD_COUNTS if n != 1])
+def test_large_interp_threaded_path_is_deterministic(
+    large_interp_digests, kind, nthreads
+):
+    """The >THRESHOLD cell<->point data-interpolation paths (genuinely
+    multithreaded) hash identically at 1 vs N threads."""
+    ref = large_interp_digests[kind][1]
+    got = large_interp_digests[kind][nthreads]
+    assert got == ref, (
+        f"large {kind} digest at {nthreads} threads ({got[:16]}) != "
+        f"1-thread reference ({ref[:16]}) -- threading is non-deterministic"
+    )