Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Common/Core/vtkFVTKSMPDefaults.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@ bool DefaultThreadingDisabledByEnv()
}
}

//------------------------------------------------------------------------------
// Opt-in NON-EXACT fast mode. Default OFF: filters whose threaded path is not
// byte-exact (e.g. order-relaxed topology emission) stay serial unless the user
// opts in. Read live from the FVTK_FAST env var (which fvtk.EnableFast() sets),
// so it can be toggled at runtime. Truthy: 1/on/true/yes (any case).
bool FastModeEnabled()
{
const char* v = std::getenv("FVTK_FAST");
if (!v || v[0] == '\0')
{
return false;
}
return std::strcmp(v, "1") == 0 || std::strcmp(v, "on") == 0 ||
std::strcmp(v, "ON") == 0 || std::strcmp(v, "On") == 0 ||
std::strcmp(v, "true") == 0 || std::strcmp(v, "True") == 0 ||
std::strcmp(v, "TRUE") == 0 || std::strcmp(v, "yes") == 0 ||
std::strcmp(v, "YES") == 0;
}

//------------------------------------------------------------------------------
// Precedence (first match wins):
// 1. opt-out env FVTK_SMP_DEFAULT=0/off/serial -> stay Sequential (serial).
Expand Down
25 changes: 25 additions & 0 deletions Common/Core/vtkFVTKSMPDefaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,31 @@ inline void RunSafeFilterParallel(Body&& body)
vtkSMPTools::LocalScope(GetSafeFilterThreadingConfig(), std::forward<Body>(body));
}

/**
* True when the opt-in NON-EXACT fast mode is enabled (env FVTK_FAST, set by the
* Python fvtk.EnableFast()). Default OFF. Read live so it can be toggled at
* runtime. Filters whose threaded path is not byte-exact gate on this.
*/
VTKCOMMONCORE_EXPORT bool FastModeEnabled();

/**
* Like RunSafeFilterParallel(), but ONLY threads when FastModeEnabled(). When
* fast mode is off (the default), @p body runs serially so the filter stays
* byte-exact vs stock. Use this -- not RunSafeFilterParallel() -- for parallel
* regions whose output is NOT byte-exact (e.g. order-relaxed topology emission
* whose cell order depends on thread scheduling).
*/
template <typename Body>
inline void RunFastFilterParallel(Body&& body)
{
if (!FastModeEnabled())
{
body();
return;
}
RunSafeFilterParallel(std::forward<Body>(body));
}

VTK_ABI_NAMESPACE_END
} // namespace fvtk

Expand Down
5 changes: 3 additions & 2 deletions Filters/Core/vtk3DLinearGridPlaneCutter.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "vtkPlane.h"
#include "vtkPointData.h"
#include "vtkPolyData.h"
#include "vtkFVTKSMPDefaults.h"
#include "vtkSMPThreadLocalObject.h"
#include "vtkSMPTools.h"
#include "vtkStaticEdgeLocatorTemplate.h"
Expand Down Expand Up @@ -50,7 +51,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane);
{ \
if (!_seq) \
{ \
vtkSMPTools::For(0, _num, _op); \
fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \
} \
else \
{ \
Expand All @@ -63,7 +64,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane);
{ \
if (!_seq) \
{ \
vtkSMPTools::For(0, _num, _op); \
fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \
} \
else \
{ \
Expand Down
31 changes: 16 additions & 15 deletions Filters/Core/vtkContour3DLinearGrid.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "vtkObjectFactory.h"
#include "vtkPointData.h"
#include "vtkPolyData.h"
#include "vtkFVTKSMPDefaults.h"
#include "vtkSMPTools.h"
#include "vtkSmartPointer.h"
#include "vtkSpanSpace.h"
Expand Down Expand Up @@ -59,7 +60,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree);
{ \
if (!_seq) \
{ \
vtkSMPTools::For(0, _num, _op); \
fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \
} \
else \
{ \
Expand All @@ -72,7 +73,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree);
{ \
if (!_seq) \
{ \
vtkSMPTools::For(0, _num, _op); \
fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \
} \
else \
{ \
Expand Down Expand Up @@ -255,11 +256,11 @@ struct ContourCellsBase
// Copy points output to VTK structures. Only point coordinates are
// copied for now; later we'll define the triangle topology.
ProducePoints producePts(localPts, localPtOffsets, this->NewPts);
EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, producePts);
EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, producePts);

// Now produce the output triangles (topology) for this contour n parallel
ProduceTriangles produceTris(this->TotalTris, this->NewPolys);
EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumTris, produceTris);
EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumTris, produceTris);
} // Reduce
}; // ContourCellsBase

Expand Down Expand Up @@ -452,14 +453,14 @@ struct ProcessFastPathWorker
TContourCellsST contour(
filter, inPts, outPts, scalars, cellIter, isoValue, st, tris, totalPts, totalTris);
EXECUTE_REDUCED_SMPFOR(
filter->GetSequentialProcessing(), contour.NumBatches, contour, numThreads);
(filter->GetSequentialProcessing() || filter->GetComputeNormals()), contour.NumBatches, contour, numThreads);
}
else
{
using TContourCells = ContourCells<TInputPointsArray, TOutputPointsArray, TScalarsArray>;
TContourCells contour(
filter, inPts, outPts, scalars, cellIter, isoValue, tris, totalPts, totalTris);
EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, contour, numThreads);
EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, contour, numThreads);
}
}
};
Expand Down Expand Up @@ -619,7 +620,7 @@ struct ExtractEdgesBase
this->Edges =
new EdgeTuple<IDType, EdgeDataType<IDType>>[3 * this->NumTris]; // three edges per triangle
ProduceEdges<IDType> produceEdges(localEdges, localTriOffsets, this->Edges, this->Filter);
EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, produceEdges);
EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, produceEdges);
} // Reduce
}; // ExtractEdgesBase

Expand Down Expand Up @@ -818,7 +819,7 @@ struct ExtractEdgesWorker
TExtractEdgesST extractEdges(
filter, scalars, cellIter, isoValue, st, newPolys, totalTris, originalCellIds);
EXECUTE_REDUCED_SMPFOR(
filter->GetSequentialProcessing(), extractEdges.NumBatches, extractEdges, numThreads);
(filter->GetSequentialProcessing() || filter->GetComputeNormals()), extractEdges.NumBatches, extractEdges, numThreads);
numTris = extractEdges.NumTris;
mergeEdges = extractEdges.Edges;
}
Expand All @@ -827,7 +828,7 @@ struct ExtractEdgesWorker
using TExtractEdges = ExtractEdges<TIds, TScalarArray>;
TExtractEdges extractEdges(
filter, scalars, cellIter, isoValue, newPolys, totalTris, originalCellIds);
EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, extractEdges, numThreads);
EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, extractEdges, numThreads);
numTris = extractEdges.NumTris;
mergeEdges = extractEdges.Edges;
}
Expand Down Expand Up @@ -983,7 +984,7 @@ struct ProduceMergedPointsWorker
{
ProduceMergedPoints<TInputPointsArray, TOutputPointsArray, TIds> produceMergedPoints(
filter, inputPointsArray, outputPointsArray, mergeArray, offsets, totalPoints);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceMergedPoints);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceMergedPoints);
}
};

Expand Down Expand Up @@ -1121,7 +1122,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
// Generate triangles.
ProduceMergedTriangles<TIds> produceTris(
mergeEdges, offsets, numTris, newPolys, totalPts, totalTris, filter);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceTris);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceTris);
numThreads = nt;

// Generate points (one per unique edge)
Expand Down Expand Up @@ -1159,7 +1160,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
pointArrays->Realloc(totalPts + numPts);
}
ProducePointAttributes<TIds> interpolate(mergeEdges, offsets, pointArrays, totalPts, filter);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, interpolate);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, interpolate);

// interpolate cell data
if (totalTris <= 0) // first contour value generating output
Expand All @@ -1172,7 +1173,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
cellArrays->Realloc(totalTris + numTris);
}
ProduceCellAttributes<TIds> interpolateCell(originalCellIds, cellArrays, totalTris, filter);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, interpolateCell);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, interpolateCell);
}

// Clean up
Expand Down Expand Up @@ -1246,7 +1247,7 @@ vtkSmartPointer<vtkFloatArray> GenerateTriNormals(

// Execute functor over all triangles
ComputeCellNormals computeNormals(pts, tris, n, filter);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, computeNormals);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, computeNormals);

return cellNormals;
}
Expand Down Expand Up @@ -1331,7 +1332,7 @@ void GeneratePointNormals(vtkPoints* pts, vtkCellArray* tris, vtkFloatArray* cel

// Process all points, averaging normals
AverageNormals<TId> average(&links, triN, ptN, filter);
EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, average);
EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, average);

// Clean up and get out
pd->SetNormals(ptNormals);
Expand Down
27 changes: 27 additions & 0 deletions Wrapping/Python/fvtk/__init__.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,33 @@ def _load_fvtk_static():
__all__ = [
@_fvtk_all@
]
#------------------------------------------------------------------------------
# Opt-in NON-EXACT fast mode (fvtk extension).
#
# Some fvtk filters have a multithreaded fast path whose output is NOT byte-exact
# vs stock VTK -- e.g. order-relaxed topology emission, where the SAME cells are
# produced but their ORDER (and thus the raw connectivity bytes) depends on thread
# scheduling. These stay OFF by default so fvtk is a byte-exact drop-in. Call
# EnableFast() to opt in; points / point-data / the cell set stay correct, only
# cell ORDER becomes non-deterministic. Backed by the FVTK_FAST env var, which the
# native filters read live, so this can be toggled at runtime.
import os as _os

def EnableFast():
"""Opt in to fvtk's non-exact multithreaded fast paths (see module docs)."""
_os.environ["FVTK_FAST"] = "1"

def DisableFast():
"""Turn the non-exact fast paths back off (the default)."""
_os.environ.pop("FVTK_FAST", None)

def IsFastEnabled():
"""True if the non-exact fast mode is currently enabled."""
return _os.environ.get("FVTK_FAST", "").lower() in ("1", "on", "true", "yes")

# Expose the fast-mode toggles for `from fvtk import *` and discoverability.
__all__ += ["EnableFast", "DisableFast", "IsFastEnabled"]

#------------------------------------------------------------------------------
# get the version
__version__ = "@VTK_MAJOR_VERSION@.@VTK_MINOR_VERSION@.@VTK_BUILD_VERSION@"
Expand Down
110 changes: 107 additions & 3 deletions tests/bitexact/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,102 @@ def _ulp_distance(x, y):
return int(np.abs(xi - yi).max()) if xi.size else 0


def compare_case(stock_dir, fvtk_dir, key):
# Tags in their canonical vtkPolyData cell-data global-index order. Cell data is
# indexed across all cells as verts, then lines, then polys, then strips; the
# canonical sort must preserve this partition and only reorder WITHIN each group.
_POLY_TAGS = ("verts", "lines", "polys", "strips")


def _cell_records(arrays):
"""Reconstruct a per-cell canonical sort key list + the global cell order.

Returns (keys, perm) where ``keys`` is the list of canonical per-cell keys in
GLOBAL cell-data index order, and ``perm`` is an argsort (a permutation of
range(numCells)) that orders cells canonically while preserving the VTK group
partition. Cells are keyed by (group_rank, connectivity-tuple) for polydata or
(cell_type, connectivity-tuple) for unstructured grids -- connectivity is point
IDs, which are directly comparable because points stay strictly identical.
Returns None if the array set has no recognizable topology.
"""
names = set(arrays.files) if hasattr(arrays, "files") else set(arrays)

def conn_off(tag):
ck, ok = (f"conn:{tag}", f"off:{tag}") if tag else ("conn", "off")
if ck in names and ok in names:
return np.asarray(arrays[ck]).astype(np.int64), np.asarray(arrays[ok]).astype(np.int64)
return None, None

keys = []
# Unstructured grid: single conn/off plus celltypes.
if "conn" in names and "off" in names:
conn, off = conn_off(None)
ctypes = np.asarray(arrays["celltypes"]).astype(np.int64) if "celltypes" in names else None
for i in range(len(off) - 1):
cell = tuple(conn[off[i]:off[i + 1]].tolist())
rank = int(ctypes[i]) if ctypes is not None else 0
keys.append((rank, len(cell), cell))
else:
# PolyData: grouped verts|lines|polys|strips.
any_topo = False
for rank, tag in enumerate(_POLY_TAGS):
conn, off = conn_off(tag)
if conn is None:
continue
any_topo = True
for i in range(len(off) - 1):
cell = tuple(conn[off[i]:off[i + 1]].tolist())
keys.append((rank, len(cell), cell))
if not any_topo:
return None
# Stable argsort: preserve group partition (rank leads the key), order within.
perm = sorted(range(len(keys)), key=lambda i: keys[i])
return keys, perm


def _compare_order_relaxed(a, b):
"""Order-invariant mesh equality: points + point-data strict; cells compared
as a multiset carrying their cell-data. Returns (ok, per_array_detail)."""
per = {}
ok = True
names = sorted(set(a.files) & set(b.files))
# 1) points + point-data: STRICT (points stay identical, so pd indices align).
for name in names:
if name == "points" or name.startswith("pd:"):
x, y = a[name], b[name]
eq = bool(x.shape == y.shape and x.dtype == y.dtype and np.array_equal(x, y))
per[name] = {"equal": eq, "mode": "strict", "dtype": str(x.dtype)}
ok &= eq
# 2) cells: canonicalize both sides, compare keys (connectivity multiset).
ra, rb = _cell_records(a), _cell_records(b)
if ra is None or rb is None:
per["__cells__"] = {"equal": False, "reason": "no topology to canonicalize"}
return False, per
ka, pa = ra
kb, pb = rb
keys_eq = bool(len(ka) == len(kb) and [ka[i] for i in pa] == [kb[i] for i in pb])
per["__cells__"] = {"equal": keys_eq, "mode": "order-relaxed", "ncells": len(ka)}
ok &= keys_eq
# 3) cell-data: reorder each cd:* array by the canonical perm, compare (values
# travel with their cell). Width-relaxed for integer cell-data.
for name in names:
if not name.startswith("cd:"):
continue
x, y = a[name], b[name]
if x.shape[0] != len(pa) or y.shape[0] != len(pb) or x.shape[1:] != y.shape[1:]:
per[name] = {"equal": False, "mode": "order-relaxed", "reason": "shape"}
ok = False
continue
xs, ys = x[pa], y[pb]
if xs.dtype.kind in "iu" and ys.dtype.kind in "iu":
eq = bool(np.array_equal(xs.astype(np.int64), ys.astype(np.int64)))
else:
eq = bool(xs.dtype == ys.dtype and np.array_equal(xs, ys))
per[name] = {"equal": eq, "mode": "order-relaxed", "dtype": str(x.dtype)}
ok &= eq
return ok, per


def compare_case(stock_dir, fvtk_dir, key, order_relaxed=False):
"""Return (ok: bool, detail: dict) for a single case key."""
sp = os.path.join(stock_dir, key + ".npz")
fp = os.path.join(fvtk_dir, key + ".npz")
Expand All @@ -48,6 +143,12 @@ def compare_case(stock_dir, fvtk_dir, key):
"only_stock": sorted(names_a - names_b),
"only_fvtk": sorted(names_b - names_a),
}
if order_relaxed:
# Order-relaxed mesh equality: same points/point-data (strict) and the
# same multiset of cells carrying their cell-data, but cell ORDER may
# differ (e.g. thread-batched topology emission). See _compare_order_relaxed.
ok, per = _compare_order_relaxed(a, b)
return ok, {"arrays": per, "order_relaxed": True}
per_array = {}
ok = True
for name in sorted(names_a):
Expand Down Expand Up @@ -117,6 +218,9 @@ def compare_all(stock_dir, fvtk_dir):
"group": cs.get("group"),
}
continue
ok, detail = compare_case(stock_dir, fvtk_dir, key)
cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group")}
# A case is order-relaxed if EITHER manifest marks it so (both should agree).
order_relaxed = bool(cs.get("order_relaxed") or cf.get("order_relaxed"))
ok, detail = compare_case(stock_dir, fvtk_dir, key, order_relaxed=order_relaxed)
cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group"),
"order_relaxed": order_relaxed}
return {"provenance": prov, "cases": cases, "keys": keys}
Loading
Loading