Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions Common/Transforms/vtkAbstractTransform.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "vtkDataArray.h"
#include "vtkDebugLeaks.h"
#include "vtkDoubleArray.h"
#include "vtkFVTKSMPDefaults.h" // fvtk: opt into default multithreading (bit-exact)
#include "vtkFloatArray.h"
#include "vtkIndent.h"
#include "vtkLinearTransform.h"
Expand Down Expand Up @@ -283,16 +284,23 @@ void vtkAbstractTransform::TransformPoints(vtkPoints* inPts, vtkPoints* outPts)
const AbsTransformAOSReader inReader(inPts->GetData());
const AbsTransformAOSWriter outWriter(outPts->GetData());

vtkSMPTools::For(0, n,
[&](vtkIdType ptId, vtkIdType endPtId)
// fvtk: per-point-independent writes to a pre-sized output (InternalTransformPoint
// is a read-only function of the post-Update transform state) => bit-exact under
// any thread count; run under the default-threading policy.
fvtk::RunSafeFilterParallel(
[&]()
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inReader.Read(ptId, point);
this->InternalTransformPoint(point, point);
outWriter.Write(m + ptId, point);
}
vtkSMPTools::For(0, n,
[&](vtkIdType ptId, vtkIdType endPtId)
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inReader.Read(ptId, point);
this->InternalTransformPoint(point, point);
outWriter.Write(m + ptId, point);
}
});
});
}

Expand Down Expand Up @@ -353,6 +361,10 @@ void vtkAbstractTransform::TransformPointsNormalsVectors(vtkPoints* inPts, vtkPo
}
}

// fvtk: per-point-independent writes to pre-sized outputs (InternalTransform-
// Derivative is a read-only function of the post-Update transform state) =>
// bit-exact under any thread count; run under the default-threading policy.
fvtk::RunSafeFilterParallel([&]() {
vtkSMPTools::For(0, n,
[&](vtkIdType ptId, vtkIdType endPtId)
{
Expand Down Expand Up @@ -389,6 +401,7 @@ void vtkAbstractTransform::TransformPointsNormalsVectors(vtkPoints* inPts, vtkPo
}
}
});
}); // fvtk: end RunSafeFilterParallel
}

//------------------------------------------------------------------------------
Expand Down
29 changes: 20 additions & 9 deletions Common/Transforms/vtkHomogeneousTransform.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "vtkDataArray.h"
#include "vtkDoubleArray.h"
#include "vtkFVTKSMPDefaults.h" // fvtk: opt into default multithreading (bit-exact)
#include "vtkFloatArray.h"
#include "vtkMath.h"
#include "vtkMatrix4x4.h"
Expand Down Expand Up @@ -235,16 +236,22 @@ void vtkHomogeneousTransform::TransformPoints(vtkPoints* inPts, vtkPoints* outPt
const HomogTransformAOSReader inReader(inPts->GetData());
const HomogTransformAOSWriter outWriter(outPts->GetData());

vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
// fvtk: per-point-independent writes to a pre-sized output => bit-exact under
// any thread count; run under the default-threading policy.
fvtk::RunSafeFilterParallel(
[&]()
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inReader.Read(ptId, point);
vtkHomogeneousTransformPoint(M, point, point);
outWriter.Write(m + ptId, point);
}
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inReader.Read(ptId, point);
vtkHomogeneousTransformPoint(M, point, point);
outWriter.Write(m + ptId, point);
}
});
});
}

Expand Down Expand Up @@ -308,6 +315,9 @@ void vtkHomogeneousTransform::TransformPointsNormalsVectors(vtkPoints* inPts, vt
}
}

// fvtk: per-point-independent writes to pre-sized outputs => bit-exact under
// any thread count; run under the default-threading policy.
fvtk::RunSafeFilterParallel([&]() {
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
{
Expand Down Expand Up @@ -355,6 +365,7 @@ void vtkHomogeneousTransform::TransformPointsNormalsVectors(vtkPoints* inPts, vt
}
}
});
}); // fvtk: end RunSafeFilterParallel
}

//------------------------------------------------------------------------------
Expand Down
115 changes: 65 additions & 50 deletions Common/Transforms/vtkLinearTransform.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "vtkArrayDispatch.h"
#include "vtkArrayDispatchDataSetArrayList.h"
#include "vtkDataArray.h"
#include "vtkFVTKSMPDefaults.h" // fvtk: opt into default multithreading (bit-exact)
#include "vtkMath.h"
#include "vtkMatrix4x4.h"
#include "vtkPoints.h"
Expand Down Expand Up @@ -274,23 +275,29 @@ void vtkLinearTransform::TransformPoints(vtkPoints* inPts, vtkPoints* outPts)
vtkDataArray* outArray = outPts->GetData();
outArray->WriteVoidPointer(3 * m, 3 * n);

vtkLinearTransformPointsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inArray, outArray, worker, m, matrix))
{
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
// fvtk: run the (pre-sized, per-tuple-independent => bit-exact under any thread
// count) transform under the default-threading policy.
fvtk::RunSafeFilterParallel(
[&]()
{
vtkLinearTransformPointsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inArray, outArray, worker, m, matrix))
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inPts->GetPoint(ptId, point);
vtkLinearTransformPoint(matrix, point, point);
outPts->SetPoint(m + ptId, point);
}
});
}
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
{
double point[3];
for (; ptId < endPtId; ++ptId)
{
inPts->GetPoint(ptId, point);
vtkLinearTransformPoint(matrix, point, point);
outPts->SetPoint(m + ptId, point);
}
});
}
});
}

//------------------------------------------------------------------------------
Expand All @@ -310,25 +317,29 @@ void vtkLinearTransform::TransformNormals(vtkDataArray* inNms, vtkDataArray* out
// operate directly on the memory to avoid GetTuple()/SetPoint() calls.
outNms->WriteVoidPointer(3 * m, 3 * n);

vtkLinearTransformNormalsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inNms, outNms, worker, m, matrix))
{
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
fvtk::RunSafeFilterParallel(
[&]()
{
vtkLinearTransformNormalsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inNms, outNms, worker, m, matrix))
{
double norm[3];
for (; ptId < endPtId; ++ptId)
{
inNms->GetTuple(ptId, norm);
// use TransformVector because matrix is already transposed & inverted
vtkLinearTransformVector(matrix, norm, norm);
vtkMath::Normalize(norm);
outNms->SetTuple(m + ptId, norm);
}
});
}
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
{
double norm[3];
for (; ptId < endPtId; ++ptId)
{
inNms->GetTuple(ptId, norm);
// use TransformVector because matrix is already transposed & inverted
vtkLinearTransformVector(matrix, norm, norm);
vtkMath::Normalize(norm);
outNms->SetTuple(m + ptId, norm);
}
});
}
});
}

//------------------------------------------------------------------------------
Expand All @@ -344,22 +355,26 @@ void vtkLinearTransform::TransformVectors(vtkDataArray* inVrs, vtkDataArray* out
// operate directly on the memory to avoid GetTuple()/SetTuple() calls.
outVrs->WriteVoidPointer(3 * m, 3 * n);

vtkLinearTransformVectorsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inVrs, outVrs, worker, m, matrix))
{
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
fvtk::RunSafeFilterParallel(
[&]()
{
vtkLinearTransformVectorsWorker worker;
if (!vtkArrayDispatch::Dispatch2ByArray<vtkArrayDispatch::AOSPointArrays,
vtkArrayDispatch::AOSPointArrays>::Execute(inVrs, outVrs, worker, m, matrix))
{
double vec[3];
for (; ptId < endPtId; ++ptId)
{
inVrs->GetTuple(ptId, vec);
vtkLinearTransformVector(matrix, vec, vec);
outVrs->SetTuple(m + ptId, vec);
}
});
}
// for anything that isn't float or double
vtkSMPTools::For(0, n, vtkSMPTools::THRESHOLD,
[&](vtkIdType ptId, vtkIdType endPtId)
{
double vec[3];
for (; ptId < endPtId; ++ptId)
{
inVrs->GetTuple(ptId, vec);
vtkLinearTransformVector(matrix, vec, vec);
outVrs->SetTuple(m + ptId, vec);
}
});
}
});
}
VTK_ABI_NAMESPACE_END
105 changes: 105 additions & 0 deletions tests/bitexact/test_smp_determinism.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,108 @@ def test_large_warp_threaded_path_is_deterministic(large_warp_digests, nthreads)
f"large warp output digest at {nthreads} threads ({got[:16]}) != "
f"1-thread reference ({ref[:16]}) -- threading is non-deterministic"
)


# A large-mesh script that forces the threaded transform paths across the WHOLE
# transform hierarchy: vtkLinearTransform (linear), vtkHomogeneousTransform
# (perspective), and vtkAbstractTransform's generic per-point loop (thin-plate
# spline). All parallelize with grain = vtkSMPTools::THRESHOLD, so per-suite cases
# (<10k points) run serial regardless of thread count; this drives >THRESHOLD
# points so the multithreaded branch is actually exercised. The transform carries
# normals + vectors through (TransformAllInputVectorsOn -> TransformPointsNormals-
# Vectors), and we hash transformed points AND normals so determinism across
# thread counts is checked directly. The transform kind is argv[1].
_LARGE_TRANSFORM_SCRIPT = r"""
import os, sys, hashlib
sys.path = [p for p in sys.path if p not in ("", os.getcwd())]
import numpy as np
from vtkmodules.util.numpy_support import vtk_to_numpy
from vtkmodules.vtkCommonCore import vtkPoints
from vtkmodules.vtkCommonTransforms import vtkTransform, vtkPerspectiveTransform, \
vtkThinPlateSplineTransform
from vtkmodules.vtkFiltersGeneral import vtkTransformFilter
from vtkmodules.vtkFiltersSources import vtkSphereSource
from vtkmodules.vtkFiltersCore import vtkTriangleFilter, vtkPolyDataNormals

kind = sys.argv[1]
s = vtkSphereSource(); s.SetThetaResolution(700); s.SetPhiResolution(700)
t = vtkTriangleFilter(); t.SetInputConnection(s.GetOutputPort()); t.Update()
nf = vtkPolyDataNormals(); nf.SetInputData(t.GetOutput())
nf.ComputePointNormalsOn(); nf.SplittingOff(); nf.Update()
poly = nf.GetOutput(); n = poly.GetNumberOfPoints()
assert n > 100000, n # ensure we exceed the transform THRESHOLD grain

if kind == "linear":
xf = vtkTransform()
xf.Translate(1.25, -3.5, 7.0); xf.RotateWXYZ(37.0, 0.3, 0.6, 0.8)
xf.Scale(1.7, 0.4, 2.3)
elif kind == "perspective":
xf = vtkPerspectiveTransform()
xf.Translate(1.25, -3.5, 7.0); xf.RotateWXYZ(37.0, 0.3, 0.6, 0.8)
xf.Scale(1.7, 0.4, 2.3)
m = xf.GetMatrix() # non-affine bottom row -> non-trivial homogeneous w-divide
m.SetElement(3, 0, 0.05); m.SetElement(3, 1, -0.03); m.SetElement(3, 2, 0.02)
xf.SetMatrix(m)
elif kind == "tps":
src = vtkPoints(); dst = vtkPoints()
cage = [(-1.,-1.,-1.),(1.,-1.,-1.),(-1.,1.,-1.),(1.,1.,-1.),
(-1.,-1.,1.),(1.,-1.,1.),(-1.,1.,1.),(1.,1.,1.)]
for i, (x, y, z) in enumerate(cage):
src.InsertNextPoint(x, y, z)
dst.InsertNextPoint(x + 0.10*((i % 3)-1), y - 0.07*(i % 2),
z + 0.05*(((i+1) % 3)-1))
xf = vtkThinPlateSplineTransform()
xf.SetSourceLandmarks(src); xf.SetTargetLandmarks(dst); xf.SetBasisToR()
else:
raise SystemExit("unknown kind " + kind)

tf = vtkTransformFilter()
tf.SetTransform(xf); tf.TransformAllInputVectorsOn(); tf.SetInputData(poly)
tf.Update()
out = tf.GetOutput()
h = hashlib.sha256()
h.update(np.ascontiguousarray(vtk_to_numpy(out.GetPoints().GetData())).tobytes())
h.update(np.ascontiguousarray(vtk_to_numpy(out.GetPointData().GetNormals())).tobytes())
print(h.hexdigest())
"""

# linear -> vtkLinearTransform; perspective -> vtkHomogeneousTransform;
# tps -> vtkAbstractTransform generic per-point loop. One per threaded family.
TRANSFORM_KINDS = ["linear", "perspective", "tps"]


@pytest.fixture(scope="module")
def large_transform_digests(tmp_path_factory):
fvtk_py = os.environ.get("BITEXACT_FVTK_PY")
if not fvtk_py:
pytest.skip("BITEXACT_FVTK_PY not set.")
script = tmp_path_factory.mktemp("large_transform") / "large_transform.py"
script.write_text(_LARGE_TRANSFORM_SCRIPT)
# digests[kind][nthreads] = hexdigest
digests = {k: {} for k in TRANSFORM_KINDS}
for kind in TRANSFORM_KINDS:
for n in THREAD_COUNTS:
proc = subprocess.run(
[fvtk_py, str(script), kind],
env=_env(os.environ.get("BITEXACT_FVTK_LDLP", ""), n),
capture_output=True,
text=True,
)
assert proc.returncode == 0, f"{kind} @ {n} threads:\n{proc.stderr}"
digests[kind][n] = proc.stdout.strip().splitlines()[-1]
return digests


@pytest.mark.parametrize("kind", TRANSFORM_KINDS)
@pytest.mark.parametrize("nthreads", [n for n in THREAD_COUNTS if n != 1])
def test_large_transform_threaded_path_is_deterministic(
large_transform_digests, kind, nthreads
):
"""The >THRESHOLD transform paths (linear / homogeneous / abstract, all
genuinely multithreaded) hash identically at 1 vs N threads."""
ref = large_transform_digests[kind][1]
got = large_transform_digests[kind][nthreads]
assert got == ref, (
f"large {kind} transform digest at {nthreads} threads ({got[:16]}) != "
f"1-thread reference ({ref[:16]}) -- threading is non-deterministic"
)
Loading