pyvista · akaszynski · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/Common/Core/vtkFVTKSMPDefaults.cxx b/Common/Core/vtkFVTKSMPDefaults.cxx
@@ -29,6 +29,25 @@ bool DefaultThreadingDisabledByEnv()
 }
 }
 
+//------------------------------------------------------------------------------
+// Opt-in NON-EXACT fast mode. Default OFF: filters whose threaded path is not
+// byte-exact (e.g. order-relaxed topology emission) stay serial unless the user
+// opts in. Read live from the FVTK_FAST env var (which fvtk.EnableFast() sets),
+// so it can be toggled at runtime. Truthy: 1/on/true/yes (any case).
+bool FastModeEnabled()
+{
+  const char* v = std::getenv("FVTK_FAST");
+  if (!v || v[0] == '\0')
+  {
+    return false;
+  }
+  return std::strcmp(v, "1") == 0 || std::strcmp(v, "on") == 0 ||
+    std::strcmp(v, "ON") == 0 || std::strcmp(v, "On") == 0 ||
+    std::strcmp(v, "true") == 0 || std::strcmp(v, "True") == 0 ||
+    std::strcmp(v, "TRUE") == 0 || std::strcmp(v, "yes") == 0 ||
+    std::strcmp(v, "YES") == 0;
+}
+
 //------------------------------------------------------------------------------
 // Precedence (first match wins):
 //   1. opt-out env FVTK_SMP_DEFAULT=0/off/serial  -> stay Sequential (serial).

diff --git a/Common/Core/vtkFVTKSMPDefaults.h b/Common/Core/vtkFVTKSMPDefaults.h
@@ -79,6 +79,31 @@ inline void RunSafeFilterParallel(Body&& body)
   vtkSMPTools::LocalScope(GetSafeFilterThreadingConfig(), std::forward<Body>(body));
 }
 
+/**
+ * True when the opt-in NON-EXACT fast mode is enabled (env FVTK_FAST, set by the
+ * Python fvtk.EnableFast()). Default OFF. Read live so it can be toggled at
+ * runtime. Filters whose threaded path is not byte-exact gate on this.
+ */
+VTKCOMMONCORE_EXPORT bool FastModeEnabled();
+
+/**
+ * Like RunSafeFilterParallel(), but ONLY threads when FastModeEnabled(). When
+ * fast mode is off (the default), @p body runs serially so the filter stays
+ * byte-exact vs stock. Use this -- not RunSafeFilterParallel() -- for parallel
+ * regions whose output is NOT byte-exact (e.g. order-relaxed topology emission
+ * whose cell order depends on thread scheduling).
+ */
+template <typename Body>
+inline void RunFastFilterParallel(Body&& body)
+{
+  if (!FastModeEnabled())
+  {
+    body();
+    return;
+  }
+  RunSafeFilterParallel(std::forward<Body>(body));
+}
+
 VTK_ABI_NAMESPACE_END
 } // namespace fvtk
 

diff --git a/Filters/Core/vtk3DLinearGridPlaneCutter.cxx b/Filters/Core/vtk3DLinearGridPlaneCutter.cxx
@@ -22,6 +22,7 @@
 #include "vtkPlane.h"
 #include "vtkPointData.h"
 #include "vtkPolyData.h"
+#include "vtkFVTKSMPDefaults.h"
 #include "vtkSMPThreadLocalObject.h"
 #include "vtkSMPTools.h"
 #include "vtkStaticEdgeLocatorTemplate.h"
@@ -50,7 +51,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane);
   {                                                                                                \
     if (!_seq)                                                                                     \
     {                                                                                              \
-      vtkSMPTools::For(0, _num, _op);                                                              \
+      fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); });                                                              \
     }                                                                                              \
     else                                                                                           \
     {                                                                                              \
@@ -63,7 +64,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane);
   {                                                                                                \
     if (!_seq)                                                                                     \
     {                                                                                              \
-      vtkSMPTools::For(0, _num, _op);                                                              \
+      fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); });                                                              \
     }                                                                                              \
     else                                                                                           \
     {                                                                                              \

diff --git a/Filters/Core/vtkContour3DLinearGrid.cxx b/Filters/Core/vtkContour3DLinearGrid.cxx
@@ -23,6 +23,7 @@
 #include "vtkObjectFactory.h"
 #include "vtkPointData.h"
 #include "vtkPolyData.h"
+#include "vtkFVTKSMPDefaults.h"
 #include "vtkSMPTools.h"
 #include "vtkSmartPointer.h"
 #include "vtkSpanSpace.h"
@@ -59,7 +60,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree);
   {                                                                                                \
     if (!_seq)                                                                                     \
     {                                                                                              \
-      vtkSMPTools::For(0, _num, _op);                                                              \
+      fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); });                                                              \
     }                                                                                              \
     else                                                                                           \
     {                                                                                              \
@@ -72,7 +73,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree);
   {                                                                                                \
     if (!_seq)                                                                                     \
     {                                                                                              \
-      vtkSMPTools::For(0, _num, _op);                                                              \
+      fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); });                                                              \
     }                                                                                              \
     else                                                                                           \
     {                                                                                              \
@@ -255,11 +256,11 @@ struct ContourCellsBase
     // Copy points output to VTK structures. Only point coordinates are
     // copied for now; later we'll define the triangle topology.
     ProducePoints producePts(localPts, localPtOffsets, this->NewPts);
-    EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, producePts);
+    EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, producePts);
 
     // Now produce the output triangles (topology) for this contour n parallel
     ProduceTriangles produceTris(this->TotalTris, this->NewPolys);
-    EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumTris, produceTris);
+    EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumTris, produceTris);
   } // Reduce
 };  // ContourCellsBase
 
@@ -452,14 +453,14 @@ struct ProcessFastPathWorker
       TContourCellsST contour(
         filter, inPts, outPts, scalars, cellIter, isoValue, st, tris, totalPts, totalTris);
       EXECUTE_REDUCED_SMPFOR(
-        filter->GetSequentialProcessing(), contour.NumBatches, contour, numThreads);
+        (filter->GetSequentialProcessing() || filter->GetComputeNormals()), contour.NumBatches, contour, numThreads);
     }
     else
     {
       using TContourCells = ContourCells<TInputPointsArray, TOutputPointsArray, TScalarsArray>;
       TContourCells contour(
         filter, inPts, outPts, scalars, cellIter, isoValue, tris, totalPts, totalTris);
-      EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, contour, numThreads);
+      EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, contour, numThreads);
     }
   }
 };
@@ -619,7 +620,7 @@ struct ExtractEdgesBase
     this->Edges =
       new EdgeTuple<IDType, EdgeDataType<IDType>>[3 * this->NumTris]; // three edges per triangle
     ProduceEdges<IDType> produceEdges(localEdges, localTriOffsets, this->Edges, this->Filter);
-    EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, produceEdges);
+    EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, produceEdges);
   } // Reduce
 };  // ExtractEdgesBase
 
@@ -818,7 +819,7 @@ struct ExtractEdgesWorker
       TExtractEdgesST extractEdges(
         filter, scalars, cellIter, isoValue, st, newPolys, totalTris, originalCellIds);
       EXECUTE_REDUCED_SMPFOR(
-        filter->GetSequentialProcessing(), extractEdges.NumBatches, extractEdges, numThreads);
+        (filter->GetSequentialProcessing() || filter->GetComputeNormals()), extractEdges.NumBatches, extractEdges, numThreads);
       numTris = extractEdges.NumTris;
       mergeEdges = extractEdges.Edges;
     }
@@ -827,7 +828,7 @@ struct ExtractEdgesWorker
       using TExtractEdges = ExtractEdges<TIds, TScalarArray>;
       TExtractEdges extractEdges(
         filter, scalars, cellIter, isoValue, newPolys, totalTris, originalCellIds);
-      EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, extractEdges, numThreads);
+      EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, extractEdges, numThreads);
       numTris = extractEdges.NumTris;
       mergeEdges = extractEdges.Edges;
     }
@@ -983,7 +984,7 @@ struct ProduceMergedPointsWorker
   {
     ProduceMergedPoints<TInputPointsArray, TOutputPointsArray, TIds> produceMergedPoints(
       filter, inputPointsArray, outputPointsArray, mergeArray, offsets, totalPoints);
-    EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceMergedPoints);
+    EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceMergedPoints);
   }
 };
 
@@ -1121,7 +1122,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
   // Generate triangles.
   ProduceMergedTriangles<TIds> produceTris(
     mergeEdges, offsets, numTris, newPolys, totalPts, totalTris, filter);
-  EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceTris);
+  EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceTris);
   numThreads = nt;
 
   // Generate points (one per unique edge)
@@ -1159,7 +1160,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
       pointArrays->Realloc(totalPts + numPts);
     }
     ProducePointAttributes<TIds> interpolate(mergeEdges, offsets, pointArrays, totalPts, filter);
-    EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, interpolate);
+    EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, interpolate);
 
     // interpolate cell data
     if (totalTris <= 0) // first contour value generating output
@@ -1172,7 +1173,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o
       cellArrays->Realloc(totalTris + numTris);
     }
     ProduceCellAttributes<TIds> interpolateCell(originalCellIds, cellArrays, totalTris, filter);
-    EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, interpolateCell);
+    EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, interpolateCell);
   }
 
   // Clean up
@@ -1246,7 +1247,7 @@ vtkSmartPointer<vtkFloatArray> GenerateTriNormals(
 
   // Execute functor over all triangles
   ComputeCellNormals computeNormals(pts, tris, n, filter);
-  EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, computeNormals);
+  EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, computeNormals);
 
   return cellNormals;
 }
@@ -1331,7 +1332,7 @@ void GeneratePointNormals(vtkPoints* pts, vtkCellArray* tris, vtkFloatArray* cel
 
   // Process all points, averaging normals
   AverageNormals<TId> average(&links, triN, ptN, filter);
-  EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, average);
+  EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, average);
 
   // Clean up and get out
   pd->SetNormals(ptNormals);

diff --git a/Wrapping/Python/fvtk/__init__.py.in b/Wrapping/Python/fvtk/__init__.py.in
@@ -111,6 +111,33 @@ def _load_fvtk_static():
 __all__ = [
     @_fvtk_all@
 ]
+#------------------------------------------------------------------------------
+# Opt-in NON-EXACT fast mode (fvtk extension).
+#
+# Some fvtk filters have a multithreaded fast path whose output is NOT byte-exact
+# vs stock VTK -- e.g. order-relaxed topology emission, where the SAME cells are
+# produced but their ORDER (and thus the raw connectivity bytes) depends on thread
+# scheduling. These stay OFF by default so fvtk is a byte-exact drop-in. Call
+# EnableFast() to opt in; points / point-data / the cell set stay correct, only
+# cell ORDER becomes non-deterministic. Backed by the FVTK_FAST env var, which the
+# native filters read live, so this can be toggled at runtime.
+import os as _os
+
+def EnableFast():
+    """Opt in to fvtk's non-exact multithreaded fast paths (see module docs)."""
+    _os.environ["FVTK_FAST"] = "1"
+
+def DisableFast():
+    """Turn the non-exact fast paths back off (the default)."""
+    _os.environ.pop("FVTK_FAST", None)
+
+def IsFastEnabled():
+    """True if the non-exact fast mode is currently enabled."""
+    return _os.environ.get("FVTK_FAST", "").lower() in ("1", "on", "true", "yes")
+
+# Expose the fast-mode toggles for `from fvtk import *` and discoverability.
+__all__ += ["EnableFast", "DisableFast", "IsFastEnabled"]
+
 #------------------------------------------------------------------------------
 # get the version
 __version__ = "@VTK_MAJOR_VERSION@.@VTK_MINOR_VERSION@.@VTK_BUILD_VERSION@"

diff --git a/tests/bitexact/compare.py b/tests/bitexact/compare.py
@@ -33,7 +33,102 @@ def _ulp_distance(x, y):
     return int(np.abs(xi - yi).max()) if xi.size else 0
 
 
-def compare_case(stock_dir, fvtk_dir, key):
+# Tags in their canonical vtkPolyData cell-data global-index order. Cell data is
+# indexed across all cells as verts, then lines, then polys, then strips; the
+# canonical sort must preserve this partition and only reorder WITHIN each group.
+_POLY_TAGS = ("verts", "lines", "polys", "strips")
+
+
+def _cell_records(arrays):
+    """Reconstruct a per-cell canonical sort key list + the global cell order.
+
+    Returns (keys, perm) where ``keys`` is the list of canonical per-cell keys in
+    GLOBAL cell-data index order, and ``perm`` is an argsort (a permutation of
+    range(numCells)) that orders cells canonically while preserving the VTK group
+    partition. Cells are keyed by (group_rank, connectivity-tuple) for polydata or
+    (cell_type, connectivity-tuple) for unstructured grids -- connectivity is point
+    IDs, which are directly comparable because points stay strictly identical.
+    Returns None if the array set has no recognizable topology.
+    """
+    names = set(arrays.files) if hasattr(arrays, "files") else set(arrays)
+
+    def conn_off(tag):
+        ck, ok = (f"conn:{tag}", f"off:{tag}") if tag else ("conn", "off")
+        if ck in names and ok in names:
+            return np.asarray(arrays[ck]).astype(np.int64), np.asarray(arrays[ok]).astype(np.int64)
+        return None, None
+
+    keys = []
+    # Unstructured grid: single conn/off plus celltypes.
+    if "conn" in names and "off" in names:
+        conn, off = conn_off(None)
+        ctypes = np.asarray(arrays["celltypes"]).astype(np.int64) if "celltypes" in names else None
+        for i in range(len(off) - 1):
+            cell = tuple(conn[off[i]:off[i + 1]].tolist())
+            rank = int(ctypes[i]) if ctypes is not None else 0
+            keys.append((rank, len(cell), cell))
+    else:
+        # PolyData: grouped verts|lines|polys|strips.
+        any_topo = False
+        for rank, tag in enumerate(_POLY_TAGS):
+            conn, off = conn_off(tag)
+            if conn is None:
+                continue
+            any_topo = True
+            for i in range(len(off) - 1):
+                cell = tuple(conn[off[i]:off[i + 1]].tolist())
+                keys.append((rank, len(cell), cell))
+        if not any_topo:
+            return None
+    # Stable argsort: preserve group partition (rank leads the key), order within.
+    perm = sorted(range(len(keys)), key=lambda i: keys[i])
+    return keys, perm
+
+
+def _compare_order_relaxed(a, b):
+    """Order-invariant mesh equality: points + point-data strict; cells compared
+    as a multiset carrying their cell-data. Returns (ok, per_array_detail)."""
+    per = {}
+    ok = True
+    names = sorted(set(a.files) & set(b.files))
+    # 1) points + point-data: STRICT (points stay identical, so pd indices align).
+    for name in names:
+        if name == "points" or name.startswith("pd:"):
+            x, y = a[name], b[name]
+            eq = bool(x.shape == y.shape and x.dtype == y.dtype and np.array_equal(x, y))
+            per[name] = {"equal": eq, "mode": "strict", "dtype": str(x.dtype)}
+            ok &= eq
+    # 2) cells: canonicalize both sides, compare keys (connectivity multiset).
+    ra, rb = _cell_records(a), _cell_records(b)
+    if ra is None or rb is None:
+        per["__cells__"] = {"equal": False, "reason": "no topology to canonicalize"}
+        return False, per
+    ka, pa = ra
+    kb, pb = rb
+    keys_eq = bool(len(ka) == len(kb) and [ka[i] for i in pa] == [kb[i] for i in pb])
+    per["__cells__"] = {"equal": keys_eq, "mode": "order-relaxed", "ncells": len(ka)}
+    ok &= keys_eq
+    # 3) cell-data: reorder each cd:* array by the canonical perm, compare (values
+    #    travel with their cell). Width-relaxed for integer cell-data.
+    for name in names:
+        if not name.startswith("cd:"):
+            continue
+        x, y = a[name], b[name]
+        if x.shape[0] != len(pa) or y.shape[0] != len(pb) or x.shape[1:] != y.shape[1:]:
+            per[name] = {"equal": False, "mode": "order-relaxed", "reason": "shape"}
+            ok = False
+            continue
+        xs, ys = x[pa], y[pb]
+        if xs.dtype.kind in "iu" and ys.dtype.kind in "iu":
+            eq = bool(np.array_equal(xs.astype(np.int64), ys.astype(np.int64)))
+        else:
+            eq = bool(xs.dtype == ys.dtype and np.array_equal(xs, ys))
+        per[name] = {"equal": eq, "mode": "order-relaxed", "dtype": str(x.dtype)}
+        ok &= eq
+    return ok, per
+
+
+def compare_case(stock_dir, fvtk_dir, key, order_relaxed=False):
     """Return (ok: bool, detail: dict) for a single case key."""
     sp = os.path.join(stock_dir, key + ".npz")
     fp = os.path.join(fvtk_dir, key + ".npz")
@@ -48,6 +143,12 @@ def compare_case(stock_dir, fvtk_dir, key):
             "only_stock": sorted(names_a - names_b),
             "only_fvtk": sorted(names_b - names_a),
         }
+    if order_relaxed:
+        # Order-relaxed mesh equality: same points/point-data (strict) and the
+        # same multiset of cells carrying their cell-data, but cell ORDER may
+        # differ (e.g. thread-batched topology emission). See _compare_order_relaxed.
+        ok, per = _compare_order_relaxed(a, b)
+        return ok, {"arrays": per, "order_relaxed": True}
     per_array = {}
     ok = True
     for name in sorted(names_a):
@@ -117,6 +218,9 @@ def compare_all(stock_dir, fvtk_dir):
                 "group": cs.get("group"),
             }
             continue
-        ok, detail = compare_case(stock_dir, fvtk_dir, key)
-        cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group")}
+        # A case is order-relaxed if EITHER manifest marks it so (both should agree).
+        order_relaxed = bool(cs.get("order_relaxed") or cf.get("order_relaxed"))
+        ok, detail = compare_case(stock_dir, fvtk_dir, key, order_relaxed=order_relaxed)
+        cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group"),
+                      "order_relaxed": order_relaxed}
     return {"provenance": prov, "cases": cases, "keys": keys}