diff --git a/src/wmtk/Scheduler.cpp b/src/wmtk/Scheduler.cpp
index 3695080a15..594e267a20 100644
--- a/src/wmtk/Scheduler.cpp
+++ b/src/wmtk/Scheduler.cpp
@@ -3,20 +3,14 @@
 #include <wmtk/attribute/TypedAttributeHandle.hpp>
 #include <wmtk/simplex/k_ring.hpp>
 #include <wmtk/simplex/link.hpp>
+#include <wmtk/simplex/link_single_dimension_iterable.hpp>
 #include <wmtk/simplex/utils/tuple_vector_to_homogeneous_simplex_vector.hpp>
 #include <wmtk/utils/Logger.hpp>
 #include <wmtk/utils/random_seed.hpp>
+#include <wmtk/utils/tbb_parallel_for.hpp>
 
 #include <polysolve/Utils.hpp>
 
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wredundant-decls"
-#endif
-#include <tbb/parallel_for.h>
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
 
 // #include <tbb/task_arena.h>
 #include <atomic>
@@ -374,6 +368,136 @@ SchedulerStats Scheduler::run_operation_on_all_coloring(
     return res;
 }
 
+int64_t Scheduler::color_vertices(attribute::MeshAttributeHandle& color_handle)
+{
+    if (color_handle.primitive_type() != PrimitiveType::Vertex) {
+        log_and_throw_error("Color handle must be of primitive type vertex.");
+    }
+    if (color_handle.held_type() != attribute::MeshAttributeHandle::HeldType::Int64) {
+        log_and_throw_error("Color handle must be of type int64_t.");
+    }
+    if (color_handle.dimension() != 1) {
+        log_and_throw_error("Color handle must be of dimension 1.");
+    }
+
+    Mesh& m = color_handle.mesh();
+
+    auto acc = m.create_accessor<int64_t>(color_handle);
+
+    const auto vertices = m.get_all(PrimitiveType::Vertex);
+    for (const Tuple& t : vertices) {
+        acc.scalar_attribute(t) = -1;
+    }
+
+    int64_t max_color = -1;
+
+    for (const Tuple& t : vertices) {
+        const simplex::Simplex v(m, PrimitiveType::Vertex, t);
+        auto link_vertices = simplex::link_single_dimension_iterable(m, v, PrimitiveType::Vertex);
+
+        std::vector<int64_t> neighbor_colors;
+
+        for (const Tuple& neighbor_tuple : link_vertices) {
+            // max_neighbor = std::max(acc.const_scalar_attribute(neighbor_tuple), max_neighbor);
+            const int64_t c = acc.const_scalar_attribute(neighbor_tuple);
+            if (c >= 0) {
+                neighbor_colors.emplace_back(c);
+            }
+        }
+        const int64_t t_color = first_available_color(neighbor_colors);
+
+        acc.scalar_attribute(t) = t_color;
+        max_color = std::max(max_color, t_color);
+    }
+
+
+    logger().info("{} vertices with {} different colors", vertices.size(), max_color + 1);
+
+    return max_color + 1;
+}
+
+SchedulerStats Scheduler::run_operation_on_all_with_coloring(
+    operations::Operation& op,
+    attribute::MeshAttributeHandle& color_handle,
+    int64_t num_colors,
+    bool parallel_execution)
+{
+    if (&op.mesh() != &color_handle.mesh()) {
+        log_and_throw_error("Operation and color handle do not belong to the same mesh!");
+    }
+    if (color_handle.primitive_type() != PrimitiveType::Vertex) {
+        log_and_throw_error("Color handle must be of primitive type vertex.");
+    }
+    if (color_handle.held_type() != attribute::MeshAttributeHandle::HeldType::Int64) {
+        log_and_throw_error("Color handle must be of type int64_t.");
+    }
+    if (color_handle.dimension() != 1) {
+        log_and_throw_error("Color handle must be of dimension 1.");
+    }
+
+    if (num_colors < 0) {
+        num_colors = color_vertices(color_handle);
+    }
+
+    Mesh& m = op.mesh();
+
+    auto color_acc = m.create_const_accessor<int64_t>(color_handle);
+
+    const auto vertices = m.get_all(op.primitive_type());
+
+    std::vector<std::vector<simplex::Simplex>> colored_vertices;
+    colored_vertices.resize(num_colors);
+
+    for (int64_t color = 0; color < num_colors; ++color) {
+        colored_vertices[color].reserve(vertices.size() / num_colors);
+    }
+
+    for (const Tuple& t : vertices) {
+        colored_vertices[color_acc.const_scalar_attribute(t)].emplace_back(
+            m,
+            op.primitive_type(),
+            t);
+    }
+
+    SchedulerStats res;
+
+    for (auto& one_color_vertices : colored_vertices) {
+        std::atomic_int suc_cnt = 0;
+        std::atomic_int fail_cnt = 0;
+
+        if (parallel_execution) {
+            tbb::parallel_for(
+                tbb::blocked_range<int64_t>(0, one_color_vertices.size()),
+                [&](tbb::blocked_range<int64_t> r) {
+                    for (int64_t k = r.begin(); k < r.end(); ++k) {
+                        auto mods = op(one_color_vertices[k]);
+                        if (mods.empty()) {
+                            fail_cnt++;
+                        } else {
+                            suc_cnt++;
+                        }
+                    }
+                });
+        } else {
+            for (const simplex::Simplex& s : one_color_vertices) {
+                auto mods = op(s);
+                if (mods.empty()) {
+                    fail_cnt++;
+                } else {
+                    suc_cnt++;
+                }
+            }
+        }
+
+        res.m_num_op_success = suc_cnt;
+        res.m_num_op_fail = fail_cnt;
+    }
+
+    m_stats += res;
+
+    return res;
+}
+
 void Scheduler::log(size_t total)
 {
     log(m_stats, total);
diff --git a/src/wmtk/Scheduler.hpp b/src/wmtk/Scheduler.hpp
index 6f22a1c2ea..963ca0709f 100644
--- a/src/wmtk/Scheduler.hpp
+++ b/src/wmtk/Scheduler.hpp
@@ -99,6 +99,31 @@ class Scheduler
         operations::Operation& op,
         const TypedAttributeHandle<int64_t>& color_handle);
 
+    /**
+     * @brief Add vertex colors to perform vertex optimization in parallel.
+     *
+     * @param color_handle A vertex int64_t scalar attribute representing the color.
+     * @return number of colors
+     */
+    int64_t color_vertices(attribute::MeshAttributeHandle& color_handle);
+
+    /**
+     * @brief Run op on all vertices in parallel using the coloring.
+     *
+     * Potential race conditions!!!
+     * Attribute transfer applies changes to the entire closed star, which leads to race conditions
+     * for any transfer to edges or vertices. Do not use this function in that case!!!
+     *
+     * @param op The operation, must be of primitive type vertex.
+     * @param color_handle The attribute holding the vertex int64_t scalar coloring scheme.
+     * @param num_colors The number of different colors. If negative, the coloring is initialized.
+     */
+    SchedulerStats run_operation_on_all_with_coloring(
+        operations::Operation& op,
+        attribute::MeshAttributeHandle& color_handle,
+        int64_t num_colors = -1,
+        bool parallel_execution = true);
+
     const SchedulerStats& stats() const { return m_stats; }
 
     void set_update_frequency(std::optional<size_t>&& freq = {});
diff --git a/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.cpp b/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.cpp
index de3629d6b8..d21fb22ee6 100644
--- a/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.cpp
+++ b/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.cpp
@@ -2,6 +2,7 @@
 #include "AttributeTransferStrategyBase.hpp"
 #include <wmtk/Mesh.hpp>
 #include <wmtk/simplex/neighbors_single_dimension.hpp>
+#include <wmtk/utils/tbb_parallel_for.hpp>
 
 #include <wmtk/simplex/utils/unique_homogeneous_simplices.hpp>
 
@@ -16,13 +17,23 @@ const Mesh& AttributeTransferStrategyBase::mesh() const
     return const_cast<const Mesh&>(const_cast<AttributeTransferStrategyBase*>(this)->mesh());
 }
 
-void AttributeTransferStrategyBase::run_on_all() const
+void AttributeTransferStrategyBase::run_on_all(bool parallel) const
 {
     const PrimitiveType pt = m_handle.primitive_type();
     auto tuples = m_handle.mesh().get_all(pt);
 
-    for (const Tuple& t : tuples) {
-        run(simplex::Simplex(m_handle.mesh(), pt, t));
+    if (parallel) {
+        tbb::parallel_for(
+            tbb::blocked_range<int64_t>(0, tuples.size()),
+            [&](tbb::blocked_range<int64_t> r) {
+                for (int64_t k = r.begin(); k < r.end(); ++k) {
+                    run(simplex::Simplex(m_handle.mesh(), pt, tuples[k]));
+                }
+            });
+    } else {
+        for (const Tuple& t : tuples) {
+            run(simplex::Simplex(m_handle.mesh(), pt, t));
+        }
     }
 }
 
diff --git a/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.hpp b/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.hpp
index babd79f7e0..5586ef6d33 100644
--- a/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.hpp
+++ b/src/wmtk/operations/attribute_update/AttributeTransferStrategyBase.hpp
@@ -71,7 +71,7 @@ class AttributeTransferStrategyBase : public AttributeTransferEdge
 
     // runs the transfer on every simplex - good for initializing an attribute that will be
     // managed by transfer
-    void run_on_all() const;
+    void run_on_all(bool parallel = false) const;
 
 private:
     attribute::MeshAttributeHandle m_handle;
diff --git a/src/wmtk/utils/CMakeLists.txt b/src/wmtk/utils/CMakeLists.txt
index 046c41ee4b..3076255dc4 100644
--- a/src/wmtk/utils/CMakeLists.txt
+++ b/src/wmtk/utils/CMakeLists.txt
@@ -21,6 +21,8 @@ set(SRC_FILES
     orient.hpp
     orient.cpp
 
+    tbb_parallel_for.hpp
+
     TupleInspector.cpp
     triangle_areas.hpp
     triangle_areas.cpp
diff --git a/src/wmtk/utils/tbb_parallel_for.hpp b/src/wmtk/utils/tbb_parallel_for.hpp
new file mode 100644
index 0000000000..6218cda536
--- /dev/null
+++ b/src/wmtk/utils/tbb_parallel_for.hpp
@@ -0,0 +1,10 @@
+#pragma once
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wredundant-decls"
+#endif
+#include <tbb/parallel_for.h>
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/tests/operations/vertex_optimization.cpp b/tests/operations/vertex_optimization.cpp
index 12774d7995..8a086a760a 100644
--- a/tests/operations/vertex_optimization.cpp
+++ b/tests/operations/vertex_optimization.cpp
@@ -1,11 +1,8 @@
 #include <catch2/catch_test_macros.hpp>
-#include <wmtk/Primitive.hpp>
 #include <wmtk/Scheduler.hpp>
 #include <wmtk/function/LocalNeighborsSumFunction.hpp>
 #include <wmtk/function/PerSimplexAutodiffFunction.hpp>
 #include <wmtk/function/simplex/AMIPS.hpp>
-#include <wmtk/function/simplex/TriangleAMIPS.hpp>
-#include <wmtk/function/utils/amips.hpp>
 #include <wmtk/invariants/SimplexInversionInvariant.hpp>
 #include <wmtk/operations/OptimizationSmoothing.hpp>
 #include <wmtk/utils/Logger.hpp>
@@ -56,6 +53,8 @@ class SquareDistance : public PerSimplexAutodiffFunction
 } // namespace wmtk::function
 TEST_CASE("vertex_optimization_Newton_Method")
 {
+    opt_logger().set_level(spdlog::level::off);
+
     DEBUG_TriMesh mesh = single_2d_nonequilateral_triangle_with_positions();
     auto handler = mesh.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
 
@@ -99,6 +98,8 @@ TEST_CASE("vertex_optimization_Newton_Method")
 
 TEST_CASE("vertex_optimization_tet_amips")
 {
+    opt_logger().set_level(spdlog::level::off);
+
     TetMesh mesh = three_incident_tets_with_positions();
     auto handle = mesh.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
     function::AMIPS amips(mesh, handle);
@@ -127,9 +128,10 @@ TEST_CASE("vertex_optimization_tet_amips")
     }
 }
 
-
 TEST_CASE("vertex_optimization_Gradient_Descent")
 {
+    opt_logger().set_level(spdlog::level::off);
+
     DEBUG_TriMesh mesh = single_2d_nonequilateral_triangle_with_positions();
     auto handle = mesh.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
 
@@ -181,3 +183,102 @@ TEST_CASE("vertex_optimization_Gradient_Descent")
     // CHECK((uv0 - uv1).norm() - (uv0 - uv2).norm() < 1e-6);
     // CHECK((uv1 - uv2).norm() - (uv0 - uv2).norm() < 1e-6);
 }
+
+TEST_CASE("vertex_optimization_with_coloring", "[operations][parallel]")
+{
+    opt_logger().set_level(spdlog::level::off);
+    logger().set_level(spdlog::level::off);
+
+    auto run = [](DEBUG_TriMesh& m, bool parallel_execution) {
+        auto color_handle =
+            m.register_attribute<int64_t>("coloring", PrimitiveType::Vertex, 1, false, -1);
+
+        Scheduler scheduler;
+        const int64_t max_colors = scheduler.color_vertices(color_handle);
+
+        auto acc = m.create_const_accessor<int64_t>(color_handle);
+
+        CHECK(acc.const_scalar_attribute(m.vertex_tuple_from_id(0)) == 0);
+        CHECK(acc.const_scalar_attribute(m.vertex_tuple_from_id(1)) == 1);
+        CHECK(acc.const_scalar_attribute(m.vertex_tuple_from_id(2)) == 2);
+
+        auto pos_handle = m.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
+
+        function::AMIPS per_tri_amips(m, pos_handle);
+        auto energy =
+            std::make_shared<function::LocalNeighborsSumFunction>(m, pos_handle, per_tri_amips);
+
+        OptimizationSmoothing op(energy);
+        op.add_invariant(
+            std::make_shared<SimplexInversionInvariant<double>>(m, pos_handle.as<double>()));
+
+        auto stats = scheduler.run_operation_on_all_with_coloring(
+            op,
+            color_handle,
+            max_colors,
+            parallel_execution);
+        REQUIRE(stats.number_of_successful_operations() > 0);
+    };
+
+    SECTION("single_triangle_coloring")
+    {
+        DEBUG_TriMesh mesh_1 = single_2d_nonequilateral_triangle_with_positions();
+        DEBUG_TriMesh mesh_2 = single_2d_nonequilateral_triangle_with_positions();
+
+        run(mesh_1, false); // sequential
+        run(mesh_2, true); // parallel
+
+        {
+            auto color_handle =
+                mesh_1.get_attribute_handle<int64_t>("coloring", PrimitiveType::Vertex);
+
+            Scheduler scheduler;
+            const int64_t max_colors = scheduler.color_vertices(color_handle);
+
+            auto acc = mesh_1.create_const_accessor<int64_t>(color_handle);
+
+            CHECK(acc.const_scalar_attribute(mesh_1.vertex_tuple_from_id(0)) == 0);
+            CHECK(acc.const_scalar_attribute(mesh_1.vertex_tuple_from_id(1)) == 1);
+            CHECK(acc.const_scalar_attribute(mesh_1.vertex_tuple_from_id(2)) == 2);
+        }
+
+        auto pos_handle_1 = mesh_1.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
+        auto pos_handle_2 = mesh_2.get_attribute_handle<double>("vertices", PrimitiveType::Vertex);
+        const auto pos_acc_1 = mesh_1.create_const_accessor<double>(pos_handle_1);
+        const auto pos_acc_2 = mesh_2.create_const_accessor<double>(pos_handle_2);
+
+        const auto vertices = mesh_1.get_all(PrimitiveType::Vertex);
+        /*
+         * This is a bit hacky because I am using the same tuples in two different meshes. But as
+         * they are exactly the same, it is fine here.
+         */
+        for (const Tuple& t : vertices) {
+            const auto p1 = pos_acc_1.const_vector_attribute(t);
+            const auto p2 = pos_acc_2.const_vector_attribute(t);
+            CHECK(p1 == p2);
+        }
+    }
+    SECTION("edge_region_coloring")
+    {
+        DEBUG_TriMesh mesh = edge_region();
+
+        auto color_handle =
+            mesh.register_attribute<int64_t>("coloring", PrimitiveType::Vertex, 1, false, -1);
+
+        Scheduler scheduler;
+        scheduler.color_vertices(color_handle);
+
+        auto acc = mesh.create_const_accessor<int64_t>(color_handle);
+
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(0)) == 0);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(1)) == 1);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(2)) == 0);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(3)) == 1);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(4)) == 2);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(5)) == 3);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(6)) == 1);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(7)) == 0);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(8)) == 1);
+        CHECK(acc.const_scalar_attribute(mesh.vertex_tuple_from_id(9)) == 0);
+    }
+}
\ No newline at end of file