diff --git a/Common/Core/vtkFVTKSMPDefaults.cxx b/Common/Core/vtkFVTKSMPDefaults.cxx index 98565728..36ae050c 100644 --- a/Common/Core/vtkFVTKSMPDefaults.cxx +++ b/Common/Core/vtkFVTKSMPDefaults.cxx @@ -29,6 +29,25 @@ bool DefaultThreadingDisabledByEnv() } } +//------------------------------------------------------------------------------ +// Opt-in NON-EXACT fast mode. Default OFF: filters whose threaded path is not +// byte-exact (e.g. order-relaxed topology emission) stay serial unless the user +// opts in. Read live from the FVTK_FAST env var (which fvtk.EnableFast() sets), +// so it can be toggled at runtime. Truthy: 1/on/true/yes (any case). +bool FastModeEnabled() +{ + const char* v = std::getenv("FVTK_FAST"); + if (!v || v[0] == '\0') + { + return false; + } + return std::strcmp(v, "1") == 0 || std::strcmp(v, "on") == 0 || + std::strcmp(v, "ON") == 0 || std::strcmp(v, "On") == 0 || + std::strcmp(v, "true") == 0 || std::strcmp(v, "True") == 0 || + std::strcmp(v, "TRUE") == 0 || std::strcmp(v, "yes") == 0 || + std::strcmp(v, "YES") == 0; +} + //------------------------------------------------------------------------------ // Precedence (first match wins): // 1. opt-out env FVTK_SMP_DEFAULT=0/off/serial -> stay Sequential (serial). diff --git a/Common/Core/vtkFVTKSMPDefaults.h b/Common/Core/vtkFVTKSMPDefaults.h index a16c5327..e1c00365 100644 --- a/Common/Core/vtkFVTKSMPDefaults.h +++ b/Common/Core/vtkFVTKSMPDefaults.h @@ -79,6 +79,31 @@ inline void RunSafeFilterParallel(Body&& body) vtkSMPTools::LocalScope(GetSafeFilterThreadingConfig(), std::forward(body)); } +/** + * True when the opt-in NON-EXACT fast mode is enabled (env FVTK_FAST, set by the + * Python fvtk.EnableFast()). Default OFF. Read live so it can be toggled at + * runtime. Filters whose threaded path is not byte-exact gate on this. + */ +VTKCOMMONCORE_EXPORT bool FastModeEnabled(); + +/** + * Like RunSafeFilterParallel(), but ONLY threads when FastModeEnabled(). When + * fast mode is off (the default), @p body runs serially so the filter stays + * byte-exact vs stock. Use this -- not RunSafeFilterParallel() -- for parallel + * regions whose output is NOT byte-exact (e.g. order-relaxed topology emission + * whose cell order depends on thread scheduling). + */ +template +inline void RunFastFilterParallel(Body&& body) +{ + if (!FastModeEnabled()) + { + body(); + return; + } + RunSafeFilterParallel(std::forward(body)); +} + VTK_ABI_NAMESPACE_END } // namespace fvtk diff --git a/Filters/Core/vtk3DLinearGridPlaneCutter.cxx b/Filters/Core/vtk3DLinearGridPlaneCutter.cxx index 4501dae0..ba21ba91 100644 --- a/Filters/Core/vtk3DLinearGridPlaneCutter.cxx +++ b/Filters/Core/vtk3DLinearGridPlaneCutter.cxx @@ -22,6 +22,7 @@ #include "vtkPlane.h" #include "vtkPointData.h" #include "vtkPolyData.h" +#include "vtkFVTKSMPDefaults.h" #include "vtkSMPThreadLocalObject.h" #include "vtkSMPTools.h" #include "vtkStaticEdgeLocatorTemplate.h" @@ -50,7 +51,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane); { \ if (!_seq) \ { \ - vtkSMPTools::For(0, _num, _op); \ + fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \ } \ else \ { \ @@ -63,7 +64,7 @@ vtkCxxSetObjectMacro(vtk3DLinearGridPlaneCutter, Plane, vtkPlane); { \ if (!_seq) \ { \ - vtkSMPTools::For(0, _num, _op); \ + fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \ } \ else \ { \ diff --git a/Filters/Core/vtkContour3DLinearGrid.cxx b/Filters/Core/vtkContour3DLinearGrid.cxx index a7c4f887..e7e4cb2f 100644 --- a/Filters/Core/vtkContour3DLinearGrid.cxx +++ b/Filters/Core/vtkContour3DLinearGrid.cxx @@ -23,6 +23,7 @@ #include "vtkObjectFactory.h" #include "vtkPointData.h" #include "vtkPolyData.h" +#include "vtkFVTKSMPDefaults.h" #include "vtkSMPTools.h" #include "vtkSmartPointer.h" #include "vtkSpanSpace.h" @@ -59,7 +60,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree); { \ if (!_seq) \ { \ - vtkSMPTools::For(0, _num, _op); \ + fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \ } \ else \ { \ @@ -72,7 +73,7 @@ vtkCxxSetObjectMacro(vtkContour3DLinearGrid, ScalarTree, vtkScalarTree); { \ if (!_seq) \ { \ - vtkSMPTools::For(0, _num, _op); \ + fvtk::RunFastFilterParallel([&]() { vtkSMPTools::For(0, _num, _op); }); \ } \ else \ { \ @@ -255,11 +256,11 @@ struct ContourCellsBase // Copy points output to VTK structures. Only point coordinates are // copied for now; later we'll define the triangle topology. ProducePoints producePts(localPts, localPtOffsets, this->NewPts); - EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, producePts); + EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, producePts); // Now produce the output triangles (topology) for this contour n parallel ProduceTriangles produceTris(this->TotalTris, this->NewPolys); - EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumTris, produceTris); + EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumTris, produceTris); } // Reduce }; // ContourCellsBase @@ -452,14 +453,14 @@ struct ProcessFastPathWorker TContourCellsST contour( filter, inPts, outPts, scalars, cellIter, isoValue, st, tris, totalPts, totalTris); EXECUTE_REDUCED_SMPFOR( - filter->GetSequentialProcessing(), contour.NumBatches, contour, numThreads); + (filter->GetSequentialProcessing() || filter->GetComputeNormals()), contour.NumBatches, contour, numThreads); } else { using TContourCells = ContourCells; TContourCells contour( filter, inPts, outPts, scalars, cellIter, isoValue, tris, totalPts, totalTris); - EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, contour, numThreads); + EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, contour, numThreads); } } }; @@ -619,7 +620,7 @@ struct ExtractEdgesBase this->Edges = new EdgeTuple>[3 * this->NumTris]; // three edges per triangle ProduceEdges produceEdges(localEdges, localTriOffsets, this->Edges, this->Filter); - EXECUTE_SMPFOR(this->Filter->GetSequentialProcessing(), this->NumThreadsUsed, produceEdges); + EXECUTE_SMPFOR((this->Filter->GetSequentialProcessing() || this->Filter->GetComputeNormals()), this->NumThreadsUsed, produceEdges); } // Reduce }; // ExtractEdgesBase @@ -818,7 +819,7 @@ struct ExtractEdgesWorker TExtractEdgesST extractEdges( filter, scalars, cellIter, isoValue, st, newPolys, totalTris, originalCellIds); EXECUTE_REDUCED_SMPFOR( - filter->GetSequentialProcessing(), extractEdges.NumBatches, extractEdges, numThreads); + (filter->GetSequentialProcessing() || filter->GetComputeNormals()), extractEdges.NumBatches, extractEdges, numThreads); numTris = extractEdges.NumTris; mergeEdges = extractEdges.Edges; } @@ -827,7 +828,7 @@ struct ExtractEdgesWorker using TExtractEdges = ExtractEdges; TExtractEdges extractEdges( filter, scalars, cellIter, isoValue, newPolys, totalTris, originalCellIds); - EXECUTE_REDUCED_SMPFOR(filter->GetSequentialProcessing(), numCells, extractEdges, numThreads); + EXECUTE_REDUCED_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numCells, extractEdges, numThreads); numTris = extractEdges.NumTris; mergeEdges = extractEdges.Edges; } @@ -983,7 +984,7 @@ struct ProduceMergedPointsWorker { ProduceMergedPoints produceMergedPoints( filter, inputPointsArray, outputPointsArray, mergeArray, offsets, totalPoints); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceMergedPoints); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceMergedPoints); } }; @@ -1121,7 +1122,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o // Generate triangles. ProduceMergedTriangles produceTris( mergeEdges, offsets, numTris, newPolys, totalPts, totalTris, filter); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, produceTris); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, produceTris); numThreads = nt; // Generate points (one per unique edge) @@ -1159,7 +1160,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o pointArrays->Realloc(totalPts + numPts); } ProducePointAttributes interpolate(mergeEdges, offsets, pointArrays, totalPts, filter); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, interpolate); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, interpolate); // interpolate cell data if (totalTris <= 0) // first contour value generating output @@ -1172,7 +1173,7 @@ int ProcessMerged(vtkContour3DLinearGrid* filter, vtkPoints* inPts, vtkPoints* o cellArrays->Realloc(totalTris + numTris); } ProduceCellAttributes interpolateCell(originalCellIds, cellArrays, totalTris, filter); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, interpolateCell); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, interpolateCell); } // Clean up @@ -1246,7 +1247,7 @@ vtkSmartPointer GenerateTriNormals( // Execute functor over all triangles ComputeCellNormals computeNormals(pts, tris, n, filter); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numTris, computeNormals); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numTris, computeNormals); return cellNormals; } @@ -1331,7 +1332,7 @@ void GeneratePointNormals(vtkPoints* pts, vtkCellArray* tris, vtkFloatArray* cel // Process all points, averaging normals AverageNormals average(&links, triN, ptN, filter); - EXECUTE_SMPFOR(filter->GetSequentialProcessing(), numPts, average); + EXECUTE_SMPFOR((filter->GetSequentialProcessing() || filter->GetComputeNormals()), numPts, average); // Clean up and get out pd->SetNormals(ptNormals); diff --git a/Wrapping/Python/fvtk/__init__.py.in b/Wrapping/Python/fvtk/__init__.py.in index f5ebba89..c4736207 100644 --- a/Wrapping/Python/fvtk/__init__.py.in +++ b/Wrapping/Python/fvtk/__init__.py.in @@ -111,6 +111,33 @@ def _load_fvtk_static(): __all__ = [ @_fvtk_all@ ] +#------------------------------------------------------------------------------ +# Opt-in NON-EXACT fast mode (fvtk extension). +# +# Some fvtk filters have a multithreaded fast path whose output is NOT byte-exact +# vs stock VTK -- e.g. order-relaxed topology emission, where the SAME cells are +# produced but their ORDER (and thus the raw connectivity bytes) depends on thread +# scheduling. These stay OFF by default so fvtk is a byte-exact drop-in. Call +# EnableFast() to opt in; points / point-data / the cell set stay correct, only +# cell ORDER becomes non-deterministic. Backed by the FVTK_FAST env var, which the +# native filters read live, so this can be toggled at runtime. +import os as _os + +def EnableFast(): + """Opt in to fvtk's non-exact multithreaded fast paths (see module docs).""" + _os.environ["FVTK_FAST"] = "1" + +def DisableFast(): + """Turn the non-exact fast paths back off (the default).""" + _os.environ.pop("FVTK_FAST", None) + +def IsFastEnabled(): + """True if the non-exact fast mode is currently enabled.""" + return _os.environ.get("FVTK_FAST", "").lower() in ("1", "on", "true", "yes") + +# Expose the fast-mode toggles for `from fvtk import *` and discoverability. +__all__ += ["EnableFast", "DisableFast", "IsFastEnabled"] + #------------------------------------------------------------------------------ # get the version __version__ = "@VTK_MAJOR_VERSION@.@VTK_MINOR_VERSION@.@VTK_BUILD_VERSION@" diff --git a/tests/bitexact/compare.py b/tests/bitexact/compare.py index b71584c7..6530ba58 100644 --- a/tests/bitexact/compare.py +++ b/tests/bitexact/compare.py @@ -33,7 +33,102 @@ def _ulp_distance(x, y): return int(np.abs(xi - yi).max()) if xi.size else 0 -def compare_case(stock_dir, fvtk_dir, key): +# Tags in their canonical vtkPolyData cell-data global-index order. Cell data is +# indexed across all cells as verts, then lines, then polys, then strips; the +# canonical sort must preserve this partition and only reorder WITHIN each group. +_POLY_TAGS = ("verts", "lines", "polys", "strips") + + +def _cell_records(arrays): + """Reconstruct a per-cell canonical sort key list + the global cell order. + + Returns (keys, perm) where ``keys`` is the list of canonical per-cell keys in + GLOBAL cell-data index order, and ``perm`` is an argsort (a permutation of + range(numCells)) that orders cells canonically while preserving the VTK group + partition. Cells are keyed by (group_rank, connectivity-tuple) for polydata or + (cell_type, connectivity-tuple) for unstructured grids -- connectivity is point + IDs, which are directly comparable because points stay strictly identical. + Returns None if the array set has no recognizable topology. + """ + names = set(arrays.files) if hasattr(arrays, "files") else set(arrays) + + def conn_off(tag): + ck, ok = (f"conn:{tag}", f"off:{tag}") if tag else ("conn", "off") + if ck in names and ok in names: + return np.asarray(arrays[ck]).astype(np.int64), np.asarray(arrays[ok]).astype(np.int64) + return None, None + + keys = [] + # Unstructured grid: single conn/off plus celltypes. + if "conn" in names and "off" in names: + conn, off = conn_off(None) + ctypes = np.asarray(arrays["celltypes"]).astype(np.int64) if "celltypes" in names else None + for i in range(len(off) - 1): + cell = tuple(conn[off[i]:off[i + 1]].tolist()) + rank = int(ctypes[i]) if ctypes is not None else 0 + keys.append((rank, len(cell), cell)) + else: + # PolyData: grouped verts|lines|polys|strips. + any_topo = False + for rank, tag in enumerate(_POLY_TAGS): + conn, off = conn_off(tag) + if conn is None: + continue + any_topo = True + for i in range(len(off) - 1): + cell = tuple(conn[off[i]:off[i + 1]].tolist()) + keys.append((rank, len(cell), cell)) + if not any_topo: + return None + # Stable argsort: preserve group partition (rank leads the key), order within. + perm = sorted(range(len(keys)), key=lambda i: keys[i]) + return keys, perm + + +def _compare_order_relaxed(a, b): + """Order-invariant mesh equality: points + point-data strict; cells compared + as a multiset carrying their cell-data. Returns (ok, per_array_detail).""" + per = {} + ok = True + names = sorted(set(a.files) & set(b.files)) + # 1) points + point-data: STRICT (points stay identical, so pd indices align). + for name in names: + if name == "points" or name.startswith("pd:"): + x, y = a[name], b[name] + eq = bool(x.shape == y.shape and x.dtype == y.dtype and np.array_equal(x, y)) + per[name] = {"equal": eq, "mode": "strict", "dtype": str(x.dtype)} + ok &= eq + # 2) cells: canonicalize both sides, compare keys (connectivity multiset). + ra, rb = _cell_records(a), _cell_records(b) + if ra is None or rb is None: + per["__cells__"] = {"equal": False, "reason": "no topology to canonicalize"} + return False, per + ka, pa = ra + kb, pb = rb + keys_eq = bool(len(ka) == len(kb) and [ka[i] for i in pa] == [kb[i] for i in pb]) + per["__cells__"] = {"equal": keys_eq, "mode": "order-relaxed", "ncells": len(ka)} + ok &= keys_eq + # 3) cell-data: reorder each cd:* array by the canonical perm, compare (values + # travel with their cell). Width-relaxed for integer cell-data. + for name in names: + if not name.startswith("cd:"): + continue + x, y = a[name], b[name] + if x.shape[0] != len(pa) or y.shape[0] != len(pb) or x.shape[1:] != y.shape[1:]: + per[name] = {"equal": False, "mode": "order-relaxed", "reason": "shape"} + ok = False + continue + xs, ys = x[pa], y[pb] + if xs.dtype.kind in "iu" and ys.dtype.kind in "iu": + eq = bool(np.array_equal(xs.astype(np.int64), ys.astype(np.int64))) + else: + eq = bool(xs.dtype == ys.dtype and np.array_equal(xs, ys)) + per[name] = {"equal": eq, "mode": "order-relaxed", "dtype": str(x.dtype)} + ok &= eq + return ok, per + + +def compare_case(stock_dir, fvtk_dir, key, order_relaxed=False): """Return (ok: bool, detail: dict) for a single case key.""" sp = os.path.join(stock_dir, key + ".npz") fp = os.path.join(fvtk_dir, key + ".npz") @@ -48,6 +143,12 @@ def compare_case(stock_dir, fvtk_dir, key): "only_stock": sorted(names_a - names_b), "only_fvtk": sorted(names_b - names_a), } + if order_relaxed: + # Order-relaxed mesh equality: same points/point-data (strict) and the + # same multiset of cells carrying their cell-data, but cell ORDER may + # differ (e.g. thread-batched topology emission). See _compare_order_relaxed. + ok, per = _compare_order_relaxed(a, b) + return ok, {"arrays": per, "order_relaxed": True} per_array = {} ok = True for name in sorted(names_a): @@ -117,6 +218,9 @@ def compare_all(stock_dir, fvtk_dir): "group": cs.get("group"), } continue - ok, detail = compare_case(stock_dir, fvtk_dir, key) - cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group")} + # A case is order-relaxed if EITHER manifest marks it so (both should agree). + order_relaxed = bool(cs.get("order_relaxed") or cf.get("order_relaxed")) + ok, detail = compare_case(stock_dir, fvtk_dir, key, order_relaxed=order_relaxed) + cases[key] = {"ok": ok, "detail": detail, "group": cs.get("group"), + "order_relaxed": order_relaxed} return {"provenance": prov, "cases": cases, "keys": keys} diff --git a/tests/bitexact/ops.py b/tests/bitexact/ops.py index bf1a886b..da94bfe3 100644 --- a/tests/bitexact/ops.py +++ b/tests/bitexact/ops.py @@ -1881,6 +1881,51 @@ def op_cutter(dtype, size): return cut.GetOutput() +def op_cutter_linear(dtype, size): + # Plane cut of a LARGE linear hex unstructured grid with triangle generation + # ON (the default). vtkCutter routes this to vtk3DLinearGridPlaneCutter -- the + # threaded fast path that fvtk runs under the OPT-IN non-exact fast mode. + # + # Fast mode is gated by the FVTK_FAST env var (the fvtk.EnableFast() Python + # API just sets this). We set it here so the fvtk side actually threads; stock + # VTK ignores the variable, so it still produces the sequential reference. The + # mesh is sized so the parallel vtkSMPTools::For batch-splits, so the threaded + # triangle emission reorders cells relative to the sequential reference. Output + # points + interpolated point scalars + the (constant) plane normal are + # thread-INVARIANT; only cell EMISSION ORDER differs. Hence this op is compared + # ORDER-RELAXED: same points/point-data (strict) and the same multiset of + # triangles carrying their cell-data, cell order negotiable. + os.environ["FVTK_FAST"] = "1" # fvtk: opt in to the threaded cutter; stock: ignored + p = vtkPlane() + c = (size - 1) / 2.0 + p.SetOrigin(c, c, c) + p.SetNormal(1, 1, 0) + cut = vtkCutter() + cut.SetInputData(make_hex_ugrid(size, dtype)) + cut.SetCutFunction(p) + cut.SetValue(0, 0.0) # GenerateTriangles ON (default) -> linear-grid fast path + cut.Update() + return cut.GetOutput() + + +def op_contour_linear(dtype, size): + # Isocontour of a LARGE linear hex unstructured grid with ComputeNormals OFF. + # vtkContourFilter routes a linear UG to vtkContour3DLinearGrid -- the threaded + # fast path fvtk runs under the OPT-IN non-exact fast mode (FVTK_FAST, set by + # fvtk.EnableFast()). With ComputeNormals OFF the merge path produces + # thread-INVARIANT points + interpolated point scalars; only triangle EMISSION + # ORDER differs, so the case is compared ORDER-RELAXED. ComputeNormals ON is + # NOT order-relaxable (normal averaging is reduction-order-dependent) and the + # filter keeps it serial / byte-exact -- this op deliberately leaves it off. + os.environ["FVTK_FAST"] = "1" # fvtk: opt in to the threaded contour; stock: ignored + c = vtkContourFilter() + c.SetInputData(make_hex_ugrid(size, dtype)) + c.SetComputeNormals(0) + c.SetValue(0, 0.25 * (size ** 2)) + c.Update() + return c.GetOutput() + + def op_cutter_polydata(dtype, size): # vtkCutter on a vtkPolyData (triangle sphere) with GenerateTriangles OFF. # A polydata input that is NOT eligible for the plane-cutter fast path routes @@ -2356,6 +2401,12 @@ def op_ply_roundtrip_ascii(dtype, size): "tube_vec": dict(fn=op_tube_vec, group="filter", dtypes=["float32", "float64"], sizes=[16, 32]), "gradient": dict(fn=op_gradient, group="filter", dtypes=["float32", "float64"], sizes=[16, 24]), "cutter": dict(fn=op_cutter, group="filter", dtypes=["float64"], sizes=[8, 12]), + # Large linear-grid plane cut: threaded vtk3DLinearGridPlaneCutter, ORDER-RELAXED + # (same points/point-data + same triangle multiset; cell order may permute). + "cutter_linear": dict(fn=op_cutter_linear, group="filter", dtypes=["float32", "float64"], sizes=[30, 40], order_relaxed=True), + # Large linear-grid isocontour (ComputeNormals OFF): threaded vtkContour3DLinearGrid, + # ORDER-RELAXED. Normals-ON stays serial/byte-exact (reduction-order-dependent). + "contour_linear": dict(fn=op_contour_linear, group="filter", dtypes=["float32", "float64"], sizes=[30, 40], order_relaxed=True), "cutter_polydata": dict(fn=op_cutter_polydata, group="filter", dtypes=["float64"], sizes=[12, 20]), "cutter_polydata_bycell": dict(fn=op_cutter_polydata_bycell, group="filter", dtypes=["float64"], sizes=[12, 20]), "cellcenters": dict(fn=op_cellcenters, group="filter", dtypes=["float32", "float64"], sizes=[8, 12]), diff --git a/tests/bitexact/run_ops.py b/tests/bitexact/run_ops.py index e5838262..a27e549b 100644 --- a/tests/bitexact/run_ops.py +++ b/tests/bitexact/run_ops.py @@ -69,6 +69,7 @@ def main(): "dtype": dtype_name, "size": size, "group": ops.OPS[op_name]["group"], + "order_relaxed": bool(ops.OPS[op_name].get("order_relaxed", False)), "n_arrays": len(arrays), "sha256": array_sha(arrays), "arrays": {k: list(np.asarray(v).shape) for k, v in arrays.items()}, diff --git a/tests/bitexact/test_bitexact.py b/tests/bitexact/test_bitexact.py index cb6376db..d0350d03 100644 --- a/tests/bitexact/test_bitexact.py +++ b/tests/bitexact/test_bitexact.py @@ -45,13 +45,17 @@ def _assert_case(results, case_key): detail = case["detail"] # Build a focused failure message listing the non-equal arrays + ULP. msg = [f"BIT DIFFERENCE in {case_key}:"] + if detail.get("order_relaxed"): + msg.append(" (order-relaxed mesh comparison)") if "arrays" in detail: for name, info in detail["arrays"].items(): - if not info["equal"]: + if not info.get("equal", True): msg.append( - f" array {name}: equal=False dtype={info['dtype']} " - f"shape_stock={info['shape_stock']} " - f"shape_fvtk={info['shape_fvtk']} ulp={info['ulp']}" + f" array {name}: equal=False mode={info.get('mode', 'strict')} " + f"dtype={info.get('dtype', '?')} " + f"shape_stock={info.get('shape_stock', '?')} " + f"shape_fvtk={info.get('shape_fvtk', '?')} " + f"ulp={info.get('ulp')} reason={info.get('reason', '')}" ) else: msg.append(f" {detail}") diff --git a/tests/bitexact/test_smp_determinism.py b/tests/bitexact/test_smp_determinism.py index f541bc85..0fe6c5df 100644 --- a/tests/bitexact/test_smp_determinism.py +++ b/tests/bitexact/test_smp_determinism.py @@ -26,7 +26,11 @@ # Ops whose filters opt into fvtk default-on threading. Exercising any of these # at >1 thread must produce byte-identical output to the 1-thread run. -THREADED_OPS = ["warp", "warpvector", "normals", "elevation"] +# cutter_linear is ORDER-RELAXED (threaded vtk3DLinearGridPlaneCutter): its cell +# emission order varies with thread count, so compare_all compares it order-relaxed +# (same points/point-data + same triangle multiset). The assertion below thus +# checks thread-count invariance of the MESH, not the byte layout. +THREADED_OPS = ["warp", "warpvector", "normals", "elevation", "cutter_linear", "contour_linear"] THREAD_COUNTS = [1, 4, 8]