diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3024757..7d2325e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,7 @@ add_library(linalgcpp
     src/solvers.cpp
     src/sparsematrix.cpp
     src/timer.cpp
+    src/utilities.cpp
     src/vector.cpp
     src/vectorview.cpp
 )
@@ -58,10 +59,6 @@ target_include_directories(linalgcpp
         ${CMAKE_CURRENT_SOURCE_DIR}/src
 )
 
-add_subdirectory(tests)
-add_subdirectory(modules)
-add_subdirectory(examples)
-
 target_link_libraries(linalgcpp
     PRIVATE ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}
 )
@@ -109,5 +106,8 @@ export(EXPORT linalgcpp-targets
        FILE ${CMAKE_CURRENT_BINARY_DIR}/linalgcppTargets.cmake
        NAMESPACE linalgcpp::
        )
-
 export(PACKAGE linalgcpp)
+
+add_subdirectory(tests)
+add_subdirectory(modules)
+add_subdirectory(examples)
diff --git a/config/extern_build.sh b/config/extern_build.sh
index bb3fc6b..7c812ec 100644
--- a/config/extern_build.sh
+++ b/config/extern_build.sh
@@ -42,6 +42,7 @@ CC=mpicc CXX=mpic++ cmake \
     -DLINALGCPP_ENABLE_MPI=Yes \
     -DLINALGCPP_ENABLE_METIS=Yes \
     -DLINALGCPP_ENABLE_SUITESPARSE=Yes \
+    -DLINALGCPP_ENABLE_GRAPHTOOLS=Yes \
     -DHypre_INC_DIR=$HYPRE_DIR/include \
     -DHypre_LIB_DIR=$HYPRE_DIR/lib \
     -DMETIS_DIR=$METIS_DIR \
diff --git a/config/gelever.sh b/config/gelever.sh
index e5d1ab7..448fff2 100644
--- a/config/gelever.sh
+++ b/config/gelever.sh
@@ -11,6 +11,7 @@ CXX=mpic++ CC=mpicc cmake .. \
     -DLINALGCPP_ENABLE_MPI=Yes \
     -DLINALGCPP_ENABLE_METIS=Yes \
     -DLINALGCPP_ENABLE_SUITESPARSE=Yes \
+    -DLINALGCPP_ENABLE_GRAPHTOOLS=Yes \
     -DHypre_DIR=${HYPRE_DIR} \
     -DSUITESPARSE_INCLUDE_DIR_HINTS=${SUITESPARSE_DIR}/include \
     -DSUITESPARSE_LIBRARY_DIR_HINTS=${SUITESPARSE_DIR}/lib
diff --git a/include/coomatrix.hpp b/include/coomatrix.hpp
index b6264c4..0c71858 100644
--- a/include/coomatrix.hpp
+++ b/include/coomatrix.hpp
@@ -246,8 +246,11 @@ class CooMatrix : public Operator
         */
         void EliminateZeros(double tolerance = 0);
 
-    private:
+        /*! @brief Obtain the current size from maximum elements
+            @returns (rows, cols) current size
+        */
         std::tuple<int, int> FindSize() const;
+    private:
 
         bool size_set_;
 
@@ -707,8 +710,8 @@ std::tuple<int, int> CooMatrix<T>::FindSize() const
         return std::tuple<int, int> {rows_, cols_};
     }
 
-    int rows = 0;
-    int cols = 0;
+    int rows = -1;
+    int cols = -1;
 
     for (const auto& entry : entries_)
     {
diff --git a/include/densematrix.hpp b/include/densematrix.hpp
index b1d36e4..064e588 100644
--- a/include/densematrix.hpp
+++ b/include/densematrix.hpp
@@ -498,6 +498,19 @@ class DenseMatrix : public Operator
             @param dense dense matrix that holds the submatrix
         */
         void AddSubMatrix(const std::vector<int>& rows, std::vector<int>& cols, const DenseMatrix& dense);
+        /*! @brief Get a non-contigious submatrix, given the rows and cols
+            @param rows rows to get
+            @param cols cols to get
+            @param dense dense matrix that holds the submatrix
+        */
+        void GetSubMatrix(const std::vector<int>& rows, std::vector<int>& cols, DenseMatrix& dense) const;
+
+        /*! @brief Get a non-contigious submatrix, given the rows and cols
+            @param rows rows to get
+            @param cols cols to get
+            @returns dense dense matrix that holds the submatrix
+        */
+        DenseMatrix GetSubMatrix(const std::vector<int>& rows, std::vector<int>& cols) const;
 
         /*! @brief Compute singular values and vectors A = U * S * VT
                    Where S is returned and A is replaced with VT
diff --git a/include/eigensolver.hpp b/include/eigensolver.hpp
index 24ccaed..1eb89b2 100644
--- a/include/eigensolver.hpp
+++ b/include/eigensolver.hpp
@@ -77,6 +77,13 @@ class EigenSolver
         */
         void Solve(const DenseMatrix& mat, double rel_tol, int max_evect,
                    EigenPair& eigen_pair);
+
+        /*! @brief Compute full spectrum of eigen pairs
+
+            @param mat Dense matrix to solve for
+            @param EigenPair pair of eigenvalues and eigenvectors
+        */
+        void Solve(const DenseMatrix& mat, EigenPair& eigen_pair);
                   
 
     private:
diff --git a/include/utilities.hpp b/include/utilities.hpp
index 4c23f4b..eff2bed 100644
--- a/include/utilities.hpp
+++ b/include/utilities.hpp
@@ -5,6 +5,7 @@
 
 #include <memory>
 #include "sparsematrix.hpp"
+#include "coomatrix.hpp"
 
 #if __cplusplus > 201103L
 using std::make_unique;
@@ -19,6 +20,85 @@ std::unique_ptr<T> make_unique(Ts&& ... params)
 namespace linalgcpp
 {
 
+/** @brief Set a global marker with given local indices
+    Such that marker[global_index] = local_index and
+    all other entries are -1
+
+    @param marker global marker to set
+    @param indices local indices
+*/
+void SetMarker(std::vector<int>& marker, const std::vector<int>& indices);
+
+/** @brief Clear a global marker with given local indices
+    Such that marker[global_index] = -1
+
+    @param marker global marker to clear
+    @param indices local indices
+*/
+void ClearMarker(std::vector<int>& marker, const std::vector<int>& indices);
+
+
+/** @brief Sparse identity of given size
+    @param size square size of identity
+    @return identity matrix
+*/
+template <typename T = double>
+SparseMatrix<T> SparseIdentity(int size);
+
+/** @brief Construct an rectangular identity matrix (as a SparseMatrix)
+    @param rows number of row
+    @param cols number of columns
+    @param row_offset offset row where diagonal identity starts
+    @param col_offset offset column where diagonal identity starts
+*/
+template <typename T = double>
+SparseMatrix<T> SparseIdentity(int rows, int cols, int row_offset = 0, int col_offset = 0);
+
+/** @brief Adds two sparse matrices C = alpha * A + beta * B
+           Nonzero entries do not have to match between A and B
+
+    @param alpha scale for A
+    @param A A matrix
+    @param beta scale for B
+    @param B B matrix
+    @returns C such that C = alpha * A + beta * B
+*/
+template <typename T = double>
+SparseMatrix<T> Add(double alpha, const SparseMatrix<T>& A,
+                    double beta, const SparseMatrix<T>& B);
+
+/** @brief Adds two sparse matrices C = A + B
+           Nonzero entries do not have to match between A and B
+
+    @param A A matrix
+    @param B B matrix
+    @returns C such that C = A + B
+*/
+template <typename T = double>
+SparseMatrix<T> Add(const SparseMatrix<T>& A,
+                    const SparseMatrix<T>& B);
+
+/** @brief Change the type of sparse matrix through copy
+
+    @param input Input matrix
+    @param output Output matrix of desired output type
+    @returns C such that C = A + B
+*/
+template <typename T, typename U>
+SparseMatrix<U> Duplicate(const SparseMatrix<T>& input);
+
+/** @brief Check if each entry is finite
+
+    @param x collection to check
+    @throws if any element is not finite
+*/
+template <typename T>
+void VerifyFinite(const T& x);
+
+///
+/// Inline implementations:
+///
+
 /*! @brief Throw if false in debug mode only */
 inline
 void linalgcpp_assert(bool expression, const std::string& message = "linalgcpp assertion failed")
@@ -63,23 +143,7 @@ void linalgcpp_verify(F&& lambda, const std::string& message = "linalgcpp verifi
     }
 }
 
-/** @brief Sparse identity of given size
-    @param size square size of identity
-    @return identity matrix
-*/
 template <typename T>
-SparseMatrix<T> SparseIdentity(int size);
-
-/** @brief Construct an rectangular identity matrix (as a SparseMatrix)
-    @param rows number of row
-    @param cols number of columns
-    @param row_offset offset row where diagonal identity starts
-    @param col_offset offset column where diagonal identity starts
-*/
-template <typename T>
-SparseMatrix<T> SparseIdentity(int rows, int cols, int row_offset = 0, int col_offset = 0);
-
-template <typename T = double>
 SparseMatrix<T> SparseIdentity(int size)
 {
     assert(size >= 0);
@@ -87,7 +151,7 @@ SparseMatrix<T> SparseIdentity(int size)
     return SparseMatrix<T>(std::vector<T>(size, (T)1.0));
 }
 
-template <typename T = double>
+template <typename T>
 SparseMatrix<T> SparseIdentity(int rows, int cols, int row_offset, int col_offset)
 {
     assert(rows >= 0);
@@ -113,6 +177,68 @@ SparseMatrix<T> SparseIdentity(int rows, int cols, int row_offset, int col_offse
     return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data), rows, cols);
 }
 
+template <typename T>
+SparseMatrix<T> Add(const SparseMatrix<T>& A,
+                    const SparseMatrix<T>& B)
+{
+    return Add(1.0, A, 1.0, B);
+}
+
+template <typename T>
+SparseMatrix<T> Add(double alpha, const SparseMatrix<T>& A,
+                    double beta, const SparseMatrix<T>& B)
+{
+    assert(A.Rows() == B.Rows());
+    assert(A.Cols() == B.Cols());
+
+    CooMatrix<T> coo(A.Rows(), A.Cols());
+
+    auto add_mat = [&coo](double scale, const SparseMatrix<T> & mat)
+    {
+        const auto& indptr = mat.GetIndptr();
+        const auto& indices = mat.GetIndices();
+        const auto& data = mat.GetData();
+
+        int rows = mat.Rows();
+
+        for (int i = 0; i < rows; ++i)
+        {
+            for (int j = indptr[i]; j < indptr[i + 1]; ++j)
+            {
+                coo.Add(i, indices[j], scale * data[j]);
+            }
+        }
+    };
+
+    add_mat(alpha, A);
+    add_mat(beta, B);
+
+    return coo.ToSparse();
+}
+
+template <typename T, typename U>
+SparseMatrix<U> Duplicate(const SparseMatrix<T>& input)
+{
+    auto indptr = input.GetIndptr();
+    auto indices = input.GetIndices();
+    std::vector<U> data(input.GetData().size());
+
+    const auto& input_data = input.GetData();
+    std::copy(std::begin(input_data), std::end(input_data), std::begin(data));
+
+    return SparseMatrix<U>(std::move(indptr), std::move(indices), std::move(data),
+                           input.Rows(), input.Cols());
+}
+
+template <typename T>
+void VerifyFinite(const T& x)
+{
+    for (auto&& x_i : x)
+    {
+        linalgcpp_verify(std::isfinite(x_i), "X is not finite!");
+    }
+}
+
 } //namespace linalgcpp
 
 #endif // UTILITIES_HPP__
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
index 48f3aab..b9332c2 100644
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -10,4 +10,6 @@ if (LINALGCPP_ENABLE_METIS)
     add_subdirectory(partition)
 endif(LINALGCPP_ENABLE_METIS)
 
-
+if (LINALGCPP_ENABLE_GRAPHTOOLS)
+    add_subdirectory(graphtools)
+endif(LINALGCPP_ENABLE_GRAPHTOOLS)
diff --git a/modules/graphtools/CMakeLists.txt b/modules/graphtools/CMakeLists.txt
new file mode 100644
index 0000000..ea154fe
--- /dev/null
+++ b/modules/graphtools/CMakeLists.txt
@@ -0,0 +1,98 @@
+###############################################################
+# MIT License
+# 
+# Copyright (c) 2018 Pablo Arias
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+# OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# 
+###############################################################
+
+###############################################################
+# Using https://github.com/pabloariasal/modern-cmake-sample
+# Modified by gelever on April 1st, 2018 #
+###############################################################
+
+cmake_minimum_required(VERSION 3.5)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
+
+add_library(graphtools
+    src/graph.cpp
+    src/graph_mis.cpp
+    src/graph_partition.cpp
+    src/graph_topology.cpp
+    src/graph_utilities.cpp
+)
+
+add_dependencies(graphtools parlinalgcpp)
+add_dependencies(graphtools partition)
+add_dependencies(graphtools sparsesolve)
+
+target_compile_options(graphtools PRIVATE $<$<CXX_COMPILER_ID:GNU>:-Wall -O2>)
+target_compile_options(graphtools PRIVATE $<$<CXX_COMPILER_ID:Clang>:-Wall -O2>)
+
+target_link_libraries(graphtools
+    PUBLIC parlinalgcpp partition sparsesolve
+)
+
+target_include_directories(graphtools
+    PUBLIC
+		$<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+)
+
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/libgraphtools)
+
+install(TARGETS graphtools
+    EXPORT graphtools-targets
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+
+install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+install(EXPORT graphtools-targets
+    FILE graphtoolsTargets.cmake
+    NAMESPACE linalgcpp::
+    DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+write_basic_package_version_file(
+    ${CMAKE_CURRENT_BINARY_DIR}/graphtoolsConfigVersion.cmake
+    VERSION ${PROJECT_VERSION}
+    COMPATIBILITY AnyNewerVersion
+)
+
+configure_package_config_file(${CMAKE_CURRENT_LIST_DIR}/cmake/graphtoolsConfig.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/graphtoolsConfig.cmake
+    INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/graphtoolsConfig.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/graphtoolsConfigVersion.cmake
+    DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+export(EXPORT graphtools-targets FILE ${CMAKE_CURRENT_BINARY_DIR}/graphtoolsTargets.cmake NAMESPACE linalgcpp::)
+
+export(PACKAGE graphtools)
+
+add_subdirectory(tests)
diff --git a/modules/graphtools/cmake/graphtoolsConfig.cmake.in b/modules/graphtools/cmake/graphtoolsConfig.cmake.in
new file mode 100644
index 0000000..680f391
--- /dev/null
+++ b/modules/graphtools/cmake/graphtoolsConfig.cmake.in
@@ -0,0 +1,14 @@
+get_filename_component(graphtools_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+include(CMakeFindDependencyMacro)
+
+list(APPEND CMAKE_MODULE_PATH "${graphtools_CMAKE_DIR}")
+
+find_package(linalgcpp REQUIRED)
+
+LIST(REMOVE_AT CMAKE_MODULE_PATH -1)
+
+if(NOT TARGET linalgcpp::graphtools)
+    include("${graphtools_CMAKE_DIR}/graphtoolsTargets.cmake")
+endif()
+
+set(graphtools_LIBRARIES linalgcpp::graphtools)
diff --git a/modules/graphtools/include/graph.hpp b/modules/graphtools/include/graph.hpp
new file mode 100644
index 0000000..f79170f
--- /dev/null
+++ b/modules/graphtools/include/graph.hpp
@@ -0,0 +1,324 @@
+/*BHEADER**********************************************************************
+ *
+ * Copyright (c) 2018, Lawrence Livermore National Security, LLC.
+ * Produced at the Lawrence Livermore National Laboratory.
+ * LLNL-CODE-745247. All Rights reserved. See file COPYRIGHT for details.
+ *
+ * This file is part of smoothG. For more information and source code
+ * availability, see https://www.github.com/llnl/smoothG.
+ *
+ * smoothG is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License (as published by the Free
+ * Software Foundation) version 2.1 dated February 1999.
+ *
+ ***********************************************************************EHEADER*/
+
+/** @file
+
+    @brief Graph class
+*/
+
+#ifndef GRAPH_HPP
+#define GRAPH_HPP
+
+#include "graph_utilities.hpp"
+
+namespace linalgcpp
+{
+
+/**
+    @brief Container for input graph information
+*/
+template <typename T = double, // Graph Relationships
+          typename U = double, // Weight Type
+          typename V = double> // W block type
+struct Graph
+{
+    /** @brief Default Constructor */
+    Graph() = default;
+
+    /**
+       @brief Distribute a graph to the communicator.
+
+       Generally we read a global graph on one processor, and then distribute
+       it. This constructor handles that process.
+
+       @param comm the communicator over which to distribute the graph
+       @param vertex_edge_global describes the entire global graph, unsigned
+       @param part_global partition of the global vertices
+    */
+    Graph(MPI_Comm comm, const SparseMatrix<T>& vertex_edge_global,
+          const std::vector<int>& part_global,
+          const std::vector<U>& weight_global = {},
+          const SparseMatrix<V>& W_block_global = {});
+
+    /**
+       @brief Accepts an already distributed graph.
+              Computes vertex and edge maps from local info,
+              these are necessarily the same as the original maps!
+
+       @param vertex_edge_local local vertex edge relationship
+       @param edge_true_edge edge to true edge relationship
+       @param part_local partition of the local vertices
+    */
+    Graph(SparseMatrix<T> vertex_edge_local, ParMatrix edge_true_edge,
+          std::vector<int> part_local,
+          std::vector<U> weight_local = {},
+          SparseMatrix<V> W_block_local = {});
+
+    /** @brief Default Destructor */
+    ~Graph() noexcept = default;
+
+    /** @brief Copy Constructor */
+    Graph(const Graph& other) noexcept;
+
+    /** @brief Move Constructor */
+    Graph(Graph&& other) noexcept;
+
+    /** @brief Assignment Operator */
+    Graph& operator=(Graph other) noexcept;
+
+    /** @brief Swap two graphs */
+    template <typename W, typename X, typename Y>
+    friend void swap(Graph<W, X, Y>& lhs, Graph<W, X, Y>& rhs) noexcept;
+
+    // Local to global maps
+    std::vector<int> vertex_map_;
+
+    // Local partition of vertices
+    std::vector<int> part_local_;
+
+    // Graph relationships
+    SparseMatrix<T> vertex_edge_local_;
+    ParMatrix edge_true_edge_;
+    ParMatrix edge_edge_;
+
+    // I'm not sure this is `graph` information?
+    // Graph Weight / Block
+    std::vector<U> weight_local_;
+    SparseMatrix<V> W_local_;
+
+    int global_vertices_;
+    int global_edges_;
+
+private:
+    template <typename W = double>
+    void MakeLocalWeight(const std::vector<int>& edge_map = {},
+            const std::vector<W>& global_weight = {});
+
+    template <typename W = double>
+    void MakeLocalW(const SparseMatrix<W>& W_global);
+};
+
+template <typename T, typename U, typename V>
+Graph<T, U, V>::Graph(MPI_Comm comm,
+             const SparseMatrix<T>& vertex_edge_global,
+             const std::vector<int>& part_global,
+             const std::vector<U>& weight_global,
+             const SparseMatrix<V>& W_block_global)
+    : global_vertices_(vertex_edge_global.Rows()),
+      global_edges_(vertex_edge_global.Cols())
+{
+    assert(static_cast<int>(part_global.size()) == vertex_edge_global.Rows());
+
+    int myid;
+    MPI_Comm_rank(comm, &myid);
+
+    SparseMatrix<T> agg_vert = MakeSetEntity<T>(part_global);
+    SparseMatrix<T> proc_agg = MakeProcAgg(comm, agg_vert, vertex_edge_global);
+
+    SparseMatrix<T> proc_vert = proc_agg.Mult(agg_vert);
+    SparseMatrix<T> proc_edge = proc_vert.Mult(vertex_edge_global);
+
+    proc_edge.SortIndices();
+
+    vertex_map_ = proc_vert.GetIndices(myid);
+    std::vector<int> edge_map = proc_edge.GetIndices(myid);
+
+    vertex_edge_local_ = vertex_edge_global.GetSubMatrix(vertex_map_, edge_map);
+    vertex_edge_local_ = 1.0;
+
+    int nvertices_local = proc_vert.RowSize(myid);
+    part_local_.resize(nvertices_local);
+
+    for (int i = 0; i < nvertices_local; ++i)
+    {
+        part_local_[i] = part_global[vertex_map_[i]];
+    }
+
+    ShiftPartition(part_local_);
+
+    edge_true_edge_ = MakeEdgeTrueEdge(comm, proc_edge, edge_map);
+
+    ParMatrix edge_true_edge_T = edge_true_edge_.Transpose();
+    edge_edge_ = edge_true_edge_.Mult(edge_true_edge_T);
+
+    MakeLocalWeight(edge_map, weight_global);
+    MakeLocalW(W_block_global);
+}
+
+template <typename T, typename U, typename V>
+template <typename W>
+void Graph<T, U, V>::MakeLocalWeight(const std::vector<int>& edge_map,
+                            const std::vector<W>& global_weight)
+{
+    int num_edges = vertex_edge_local_.Cols();
+
+    weight_local_.resize(num_edges);
+
+    if (static_cast<int>(global_weight.size()) == edge_true_edge_.GlobalCols())
+    {
+        assert(static_cast<int>(edge_map.size()) == num_edges);
+
+        for (int i = 0; i < num_edges; ++i)
+        {
+            assert(std::fabs(global_weight[edge_map[i]]) > 1e-14);
+            weight_local_[i] = std::abs(global_weight[edge_map[i]]);
+        }
+    }
+    else
+    {
+        std::fill(std::begin(weight_local_), std::end(weight_local_), (W)1.0);
+    }
+
+    const auto& edge_offd = edge_edge_.GetOffd();
+
+    assert(edge_offd.Rows() == num_edges);
+
+    for (int i = 0; i < num_edges; ++i)
+    {
+        if (edge_offd.RowSize(i))
+        {
+            weight_local_[i] *= (W)2.0;
+        }
+    }
+}
+
+template <typename T, typename U, typename V>
+template <typename W>
+void Graph<T, U, V>::MakeLocalW(const SparseMatrix<W>& W_global)
+{
+    if (W_global.Rows() > 0)
+    {
+        W_local_ = W_global.GetSubMatrix(vertex_map_, vertex_map_);
+        W_local_ *= (W)-1.0;
+    }
+}
+
+template <typename T, typename U, typename V>
+Graph<T, U, V>::Graph(SparseMatrix<T> vertex_edge_local, ParMatrix edge_true_edge,
+             std::vector<int> part_local,
+             std::vector<U> weight_local,
+             SparseMatrix<V> W_block_local)
+    : part_local_(std::move(part_local)),
+      vertex_edge_local_(std::move(vertex_edge_local)),
+      edge_true_edge_(std::move(edge_true_edge)),
+      edge_edge_(edge_true_edge_.Mult(edge_true_edge_.Transpose())),
+      weight_local_(std::move(weight_local)),
+      W_local_(std::move(W_block_local)),
+      global_edges_(edge_true_edge_.GlobalCols())
+{
+    int num_vertices = vertex_edge_local_.Rows();
+    int num_edges = vertex_edge_local_.Cols();
+
+    MPI_Comm comm = edge_true_edge_.GetComm();
+
+    auto vertex_starts = linalgcpp::GenerateOffsets(comm, num_vertices);
+
+    global_vertices_ = vertex_starts.back();
+
+    vertex_map_.resize(num_vertices);
+    std::iota(std::begin(vertex_map_), std::end(vertex_map_), vertex_starts[0]);
+
+    if (static_cast<int>(weight_local_.size()) != num_edges)
+    {
+        MakeLocalWeight();
+    }
+
+    W_local_ *= (V)-1.0;
+}
+
+template <typename T, typename U, typename V>
+Graph<T, U, V>::Graph(const Graph<T, U, V>& other) noexcept
+    : vertex_map_(other.vertex_map_),
+      part_local_(other.part_local_),
+      vertex_edge_local_(other.vertex_edge_local_),
+      edge_true_edge_(other.edge_true_edge_),
+      edge_edge_(other.edge_edge_),
+      weight_local_(other.weight_local_),
+      W_local_(other.W_local_),
+      global_vertices_(other.global_vertices_),
+      global_edges_(other.global_edges_)
+{
+
+}
+
+template <typename T, typename U, typename V>
+Graph<T, U, V>::Graph(Graph<T, U, V>&& other) noexcept
+{
+    swap(*this, other);
+}
+
+template <typename T, typename U, typename V>
+Graph<T, U, V>& Graph<T, U, V>::operator=(Graph<T, U, V> other) noexcept
+{
+    swap(*this, other);
+
+    return *this;
+}
+
+template <typename T, typename U, typename V>
+void swap(Graph<T, U, V>& lhs, Graph<T, U, V>& rhs) noexcept
+{
+    std::swap(lhs.vertex_map_, rhs.vertex_map_);
+    std::swap(lhs.part_local_, rhs.part_local_);
+
+    swap(lhs.vertex_edge_local_, rhs.vertex_edge_local_);
+    swap(lhs.edge_true_edge_, rhs.edge_true_edge_);
+    swap(lhs.edge_edge_, rhs.edge_edge_);
+
+    swap(lhs.weight_local_, rhs.weight_local_);
+    swap(lhs.W_local_, rhs.W_local_);
+
+    std::swap(lhs.global_vertices_, rhs.global_vertices_);
+    std::swap(lhs.global_edges_, rhs.global_edges_);
+}
+
+template <typename T, typename G>
+T GetVertexVector(const G& graph, const T& global_vect)
+{
+    return GetSubVector(global_vect, graph.vertex_map_);
+}
+
+template <typename T, typename G>
+void WriteVertexVector(const G& graph, const T& vect, const std::string& filename)
+{
+    WriteVector(graph.edge_true_edge_.GetComm(), vect, filename,
+                graph.global_vertices_, graph.vertex_map_);
+}
+
+template <typename T=double, typename G>
+Vector<T> ReadVertexVector(const G& graph, const std::string& filename)
+{
+    return ReadVector<T>(filename, graph.vertex_map_);
+}
+
+template <typename T=double, typename G>
+BlockVector<T> ReadVertexBlockVector(const G& graph, const std::string& filename)
+{
+    int num_vertices = graph.vertex_edge_local_.Rows();
+    int num_edges = graph.vertex_edge_local_.Cols();
+
+    BlockVector<T> vect({0, num_edges, num_edges + num_vertices});
+
+    vect.GetBlock(0) = 0.0;
+    vect.GetBlock(1) = ReadVertexVector<T>(graph, filename);
+
+    return vect;
+}
+
+
+} // namespace linalgcpp
+
+#endif /* GRAPH_HPP */
+
diff --git a/modules/graphtools/include/graph_mis.hpp b/modules/graphtools/include/graph_mis.hpp
new file mode 100644
index 0000000..feff974
--- /dev/null
+++ b/modules/graphtools/include/graph_mis.hpp
@@ -0,0 +1,178 @@
+
+/** @file
+
+    @brief Implementations of distributed MIS algorithm
+*/
+
+#ifndef MIS_HPP
+#define MIS_HPP
+
+#include "graph.hpp"
+#include "graph_utilities.hpp"
+
+
+namespace linalgcpp
+{
+
+
+/** @brief Generate a partition of dof using minimum intersection algorithm.
+ *         Serial implementation.
+ *
+ *  @param set_dof set to dof relationship
+ *  @param dof_set dof to set relationship
+ *  @returns partition vector indicating which MIS each dof belongs to
+ */
+template <typename T = double>
+std::vector<int> GenerateMIS(const SparseMatrix<T>& set_dof,
+                             const SparseMatrix<T>& dof_set);
+
+/** @brief Select MISs that contain at least one dof on the current processor.
+ *
+ *  @param mis_dof distributed mis to dof relationship
+ *  @returns mis_dof with off-processor MIS removed
+ */
+ParMatrix SelectMIS(const ParMatrix& mis_dof);
+
+/** @brief Generate a MIS to dof relationship
+ *
+ *  @param agg_dof agglomerate to dof relationship
+ *  @returns mis_dof mis to dof relationship
+ */
+ParMatrix MakeMISDof(const ParMatrix& agg_dof);
+
+/** @brief Generate local face to mis relationship,
+ *         where a face is any mis which belongs to more than
+ *         one aggregate
+ *
+ *  @param mis_agg mis to aggregate relationship
+ *  @returns face_mis face to mis relationship
+ */
+template <typename T = double>
+SparseMatrix<T> MakeFaceMIS(const SparseMatrix<T>& mis_agg);
+
+///////////////////////
+/// Implementations ///
+///////////////////////
+
+template <typename T>
+std::vector<int> GenerateMIS(const SparseMatrix<T>& set_dof,
+                             const SparseMatrix<T>& dof_set)
+{
+    const auto& set_dof_I = set_dof.GetIndptr();
+    const auto& set_dof_J = set_dof.GetIndices();
+
+    const auto& dof_set_I = dof_set.GetIndptr();
+    const auto& dof_set_J = dof_set.GetIndices();
+
+    int num_dofs = dof_set.Rows();
+    int num_mises = 0;
+
+    std::vector<int> distributed(num_dofs, false);
+    std::vector<int> mises(num_dofs, 0);
+
+    // Loop over all DoFs and build MISes.
+    for (int i = 0; i < num_dofs; ++i)
+    {
+        // If DoF i is already assigned.
+        if (distributed[i])
+        {
+            continue;
+        }
+
+        // Loop over all AEs that contain DoF i.
+        for (int j = dof_set_I[i]; j < dof_set_I[i+1]; ++j)
+        {
+            // Loop over all DoFs in these AEs.
+            for (int k = set_dof_I[dof_set_J[j]];
+                 k < set_dof_I[dof_set_J[j]+1]; ++k)
+            {
+                if (!distributed[set_dof_J[k]])
+                {
+                    ++mises[set_dof_J[k]]; // Count the current AE.
+                }
+            }
+        }
+
+        // Loop over all AEs that contain DoF i.
+        for (int j = dof_set_I[i]; j < dof_set_I[i+1]; ++j)
+        {
+            // Loop over all DoFs in these AEs.
+            for (int k = set_dof_I[dof_set_J[j]];
+                 k < set_dof_I[dof_set_J[j]+1]; ++k)
+            {
+                if (distributed[set_dof_J[k]])
+                {
+                    continue;
+                }
+                // If the DoF should be in the same MIS as DoF i.
+                if (mises[set_dof_J[k]] == dof_set_I[i+1] -
+                                             dof_set_I[i] &&
+                    mises[set_dof_J[k]] == dof_set_I[set_dof_J[k]+1] -
+                                             dof_set_I[set_dof_J[k]])
+                {
+                    mises[set_dof_J[k]] = num_mises;
+                    distributed[set_dof_J[k]] = true;
+                } else
+                {
+                    mises[set_dof_J[k]] = 0;
+                }
+            }
+        }
+
+        ++num_mises;
+    }
+
+    return mises;
+}
+
+template <typename T>
+SparseMatrix<T> MakeFaceMIS(const SparseMatrix<T>& mis_agg)
+{
+    std::vector<int> indptr(1, 0);
+    std::vector<int> indices;
+
+    int num_mis = mis_agg.Rows();
+
+    for (int i = 0; i < num_mis; ++i)
+    {
+        if (mis_agg.RowSize(i) > 1)
+        {
+            indptr.push_back(indptr.size());
+            indices.push_back(i);
+        }
+    }
+
+    int num_faces = indices.size();
+    std::vector<double> data(num_faces, 1.0);
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                        num_faces, num_mis);
+}
+
+template <typename T>
+SparseMatrix<T> MakeFaceMIS(const ParMatrix& mis_agg)
+{
+    std::vector<int> indptr(1, 0);
+    std::vector<int> indices;
+
+    int num_mis = mis_agg.Rows();
+
+    for (int i = 0; i < num_mis; ++i)
+    {
+        if (mis_agg.RowSize(i) > 1)
+        {
+            indptr.push_back(indptr.size());
+            indices.push_back(i);
+        }
+    }
+
+    int num_faces = indices.size();
+    std::vector<double> data(num_faces, 1.0);
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                        num_faces, num_mis);
+}
+
+} //namespace linalgcpp
+
+#endif // MIS_HPP
diff --git a/modules/graphtools/include/graph_partition.hpp b/modules/graphtools/include/graph_partition.hpp
new file mode 100644
index 0000000..24d7f50
--- /dev/null
+++ b/modules/graphtools/include/graph_partition.hpp
@@ -0,0 +1,27 @@
+#ifndef PARPARTITION_HPP
+#define PARPARTITION_HPP
+
+#include "graph_utilities.hpp"
+
+namespace linalgcpp
+{
+
+ParMatrix ParPartition(const ParMatrix& A, int num_parts);
+ParMatrix UnDistributer(const ParMatrix& A);
+
+void ParNormalizeRows(ParMatrix& A);
+
+void SortColumnMap(std::vector<int>& col_map, std::vector<int>& indices);
+
+double ComputeT(const linalgcpp::SparseMatrix<double>& A);
+double ComputeT(const ParMatrix& A);
+
+double CalcQ(const linalgcpp::SparseMatrix<double>& A,
+             const linalgcpp::SparseMatrix<double>& P);
+double CalcQ(const ParMatrix& A, const ParMatrix& PT);
+
+} // namespace linalgcpp
+
+#endif // PARPARTITION_HPP
+
+
diff --git a/modules/graphtools/include/graph_topology.hpp b/modules/graphtools/include/graph_topology.hpp
new file mode 100644
index 0000000..cda54fe
--- /dev/null
+++ b/modules/graphtools/include/graph_topology.hpp
@@ -0,0 +1,535 @@
+/*BHEADER**********************************************************************
+ *
+ * Copyright (c) 2018, Lawrence Livermore National Security, LLC.
+ * Produced at the Lawrence Livermore National Laboratory.
+ * LLNL-CODE-745247. All Rights reserved. See file COPYRIGHT for details.
+ *
+ * This file is part of smoothG. For more information and source code
+ * availability, see https://www.github.com/llnl/smoothG.
+ *
+ * smoothG is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License (as published by the Free
+ * Software Foundation) version 2.1 dated February 1999.
+ *
+ ***********************************************************************EHEADER*/
+
+/** @file
+
+    @brief GraphTopology class
+*/
+
+#ifndef GRAPHTOPOLOGY_HPP
+#define GRAPHTOPOLOGY_HPP
+
+#include "graph.hpp"
+#include "graph_mis.hpp"
+#include "graph_utilities.hpp"
+
+namespace linalgcpp
+{
+
+/**
+   @brief Class to represent the topology of a graph as it is coarsened.
+
+   Mostly a container for a bunch of topology tables.
+*/
+
+template <typename T = double>
+class GraphTopology
+{
+public:
+    /** @brief Default Constructor */
+    GraphTopology() = default;
+
+    /**
+       @brief Build agglomerated topology relation tables of a given graph
+
+       @param graph Distrubted graph information
+    */
+    GraphTopology(const Graph<T>& graph);
+
+    /**
+        @brief Build agglomerated topology relation tables of the coarse level
+               graph in a given GraphTopology object
+
+        @param finer_graph_topology finer level graph topology
+        @param coarsening_factor intended number of vertices in an aggregate
+    */
+    GraphTopology(const GraphTopology& fine_topology, double coarsening_factor);
+
+    /**
+       @brief Build agglomerated topology relation tables of a given graph
+              Using already distributed information
+
+       @param vertex_edge local vertex to edge relationship
+       @param partition local vertex partition
+       @param edge_true_edge edge to true edge relationship
+    */
+    GraphTopology(const SparseMatrix<T>& vertex_edge,
+                  const std::vector<int>& partition,
+                  ParMatrix edge_true_edge);
+
+    /** @brief Default Destructor */
+    ~GraphTopology() noexcept = default;
+
+    /** @brief Copy Constructor */
+    GraphTopology(const GraphTopology& other) noexcept;
+
+    /** @brief Move Constructor */
+    GraphTopology(GraphTopology&& other) noexcept;
+
+    /** @brief Assignment Operator */
+    GraphTopology& operator=(GraphTopology other) noexcept;
+
+    /** @brief Swap two topologies */
+    template <typename U>
+    friend void swap(GraphTopology<U>& lhs, GraphTopology<U>& rhs) noexcept;
+
+    int NumAggs() const { return agg_vertex_local_.Rows(); }
+    int NumVertices() const { return agg_vertex_local_.Cols(); }
+    int NumEdges() const { return agg_edge_local_.Cols(); }
+    int NumFaces() const { return agg_face_local_.Cols(); }
+
+    int GlobalNumAggs() const { return agg_ext_vertex_.GlobalRows(); }
+    int GlobalNumVertices() const { return agg_ext_vertex_.GlobalCols(); }
+    int GlobalNumEdges() const { return face_edge_.GlobalCols(); }
+    int GlobalNumFaces() const { return face_edge_.GlobalRows(); }
+
+    // Local topology
+    SparseMatrix<T> agg_vertex_local_; // Aggregate to vertex, not exteneded
+    SparseMatrix<T> agg_edge_local_;   // Aggregate to edge, not extended
+    SparseMatrix<T> face_agg_local_;   // Face to aggregate
+    SparseMatrix<T> agg_face_local_;   // Aggregate to face
+
+    // Global topology
+    ParMatrix face_true_face_; // Face to true face
+    ParMatrix face_edge_;      // Face to false edge
+    ParMatrix agg_ext_vertex_; // Aggregate to extended vertex
+    ParMatrix agg_ext_edge_;   // Aggregate to extended edge
+    ParMatrix edge_true_edge_;   // Edge to true edge
+
+private:
+    void Init(const SparseMatrix<T>& vertex_edge,
+              const std::vector<int>& partition,
+              ParMatrix edge_true_edge);
+
+    SparseMatrix<double> MakeFaceIntAgg(const ParMatrix& agg_agg);
+
+    SparseMatrix<double> MakeFaceEdge(const ParMatrix& agg_agg,
+                              const ParMatrix& edge_edge,
+                              const SparseMatrix<T>& agg_edge_ext,
+                              const SparseMatrix<double>& face_int_agg_edge);
+
+    SparseMatrix<T> ExtendFaceAgg(const ParMatrix& agg_agg,
+                               const SparseMatrix<double>& face_int_agg);
+
+};
+
+template <typename T>
+GraphTopology<T>::GraphTopology(const Graph<T>& graph)
+{
+    const auto& vertex_edge = graph.vertex_edge_local_;
+    const auto& part = graph.part_local_;
+
+    Init(vertex_edge, part,  graph.edge_true_edge_);
+}
+
+template <typename T>
+GraphTopology<T>::GraphTopology(const GraphTopology<T>& fine_topology,
+                                double coarsening_factor)
+{
+    const auto& vertex_edge = fine_topology.agg_face_local_;
+    const auto& part = PartitionAAT(vertex_edge, coarsening_factor, 1.2);
+
+    Init(vertex_edge, part, fine_topology.face_true_face_);
+}
+
+template <typename T>
+GraphTopology<T>::GraphTopology(const SparseMatrix<T>& vertex_edge,
+                                const std::vector<int>& partition,
+                                ParMatrix edge_true_edge)
+{
+    const auto true_edge_edge = edge_true_edge.Transpose();
+    const auto edge_edge = edge_true_edge.Mult(true_edge_edge);
+
+    Init(vertex_edge, partition, std::move(edge_true_edge));
+}
+
+template <typename T>
+SparseMatrix<T> RemoveEmptyRows(const SparseMatrix<T>& mat)
+{
+    CooMatrix<T> coo;
+
+    int num_rows = mat.Rows();
+
+    std::vector<int> indptr(1, 0);
+    std::vector<int> indices = mat.GetIndices();
+    std::vector<T> data = mat.GetData();
+
+    for (int i = 0; i < num_rows; ++i)
+    {
+        int row_size = mat.RowSize(i);
+
+        if (mat.RowSize(i) > 0)
+        {
+            indptr.push_back(indptr.back() + row_size);
+        }
+    }
+
+    linalgcpp_verify(indptr.back() == indices.size());
+
+    int new_rows = indptr.size() - 1;
+    int new_cols = mat.Cols();
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                           new_rows, new_cols);
+}
+
+template <typename T>
+bool has_empty_row(const SparseMatrix<T>& mat)
+{
+    for (int i = 0; i < mat.Rows(); ++i)
+    {
+        if (mat.RowSize(i) == 0)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+inline
+bool has_empty_row(const ParMatrix& mat)
+{
+    for (int i = 0; i < mat.Rows(); ++i)
+    {
+        if (mat.RowSize(i) == 0)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+
+template <typename T>
+void GraphTopology<T>::Init(const SparseMatrix<T>& vertex_edge,
+                            const std::vector<int>& partition,
+                            ParMatrix edge_true_edge)
+{
+    auto edge_edge = edge_true_edge.Mult(edge_true_edge.Transpose());
+    edge_true_edge_ = std::move(edge_true_edge);
+
+    MPI_Comm comm = edge_true_edge_.GetComm();
+
+    agg_vertex_local_ = MakeSetEntity<T>(partition);
+
+    int num_vertices = vertex_edge.Rows();
+    int num_edges = vertex_edge.Cols();
+    int num_aggs = agg_vertex_local_.Rows();
+
+    auto starts = linalgcpp::GenerateOffsets(comm, {num_vertices, num_edges, num_aggs});
+
+    const auto& vertex_starts = starts[0];
+    const auto& edge_starts = starts[1];
+    const auto& agg_starts = starts[2];
+
+    ParMatrix vertex_edge_d(comm, vertex_starts, edge_starts, Duplicate<T, double>(vertex_edge));
+    ParMatrix vertex_true_edge = RemoveLargeEntries(vertex_edge_d.Mult(edge_true_edge_));
+    ParMatrix true_edge_vertex = vertex_true_edge.Transpose();
+    ParMatrix true_edge_edge = edge_true_edge_.Transpose();
+
+    ParMatrix agg_vertex_d(comm, Duplicate<T, double>(agg_vertex_local_));
+    ParMatrix agg_true_edge_d = agg_vertex_d.Mult(vertex_true_edge);
+
+    SparseMatrix<T> agg_edge_ext = agg_vertex_local_.Mult(vertex_edge);
+    agg_edge_ext.SortIndices();
+
+    //agg_edge_local_ = RemoveLargeEntries(agg_edge_ext, 1.5);
+    //auto edge_vertex = vertex_edge.Transpose();
+    //agg_edge_local_ = RemoveLowDegree(agg_edge_ext, edge_vertex);
+    agg_edge_local_ = Duplicate<double, T>(RemoveLowDegree(agg_true_edge_d, true_edge_vertex).Mult(true_edge_edge).GetDiag());
+
+    agg_edge_local_ = 1.0;
+
+    SparseMatrix<double> edge_ext_agg = agg_edge_ext.template Transpose<double>();
+
+    //ParMatrix edge_agg_d(comm, edge_starts, agg_starts, std::move(edge_ext_agg));
+    ParMatrix edge_agg_d(comm, edge_starts, agg_starts, (edge_ext_agg));
+    ParMatrix agg_edge_d = edge_agg_d.Transpose();
+
+    ParMatrix edge_agg_ext = edge_edge.Mult(edge_agg_d);
+    ParMatrix agg_agg = agg_edge_d.Mult(edge_agg_ext);
+
+    const auto& edge_v_edge = true_edge_vertex.Mult(vertex_true_edge);
+    const auto& agg_r = MakeExtPermutation(agg_agg);
+    const auto& edge_r = MakeExtPermutation(edge_v_edge);
+    const auto& edge_r_T = edge_r.Transpose();
+    const auto& agg_edge = agg_edge_d.Mult(edge_true_edge_);
+
+    // HYPRE kindly adds explicit zeros on diagonal
+    auto agg_edge_r = RemoveLargeEntries(agg_r.Mult(agg_edge).Mult(edge_r_T), 0.5);
+
+    auto local_agg_edge = agg_edge_r.GetDiag();
+    auto local_edge_agg = agg_edge_r.GetDiag().Transpose();
+
+    auto mis = GenerateMIS<double>(local_agg_edge, local_edge_agg);
+    auto mis_dof = MakeSetEntity<double>(mis);
+
+    auto mis_agg_f = mis_dof.Mult(local_edge_agg);
+    auto agg_mis_f = mis_agg_f.Transpose();
+
+    auto mis_agg_ur = ParMatrix(comm, mis_agg_f).Mult(agg_r);
+    auto face_mis = MakeFaceMIS(mis_agg_f);
+
+    //auto face_agg = face_mis.Mult(mis_agg_f);
+    auto face_dof = face_mis.Mult(mis_dof);
+
+    ParMatrix face_dof_par(comm, std::move(face_dof));
+    //ParMatrix face_agg_par(comm, std::move(face_agg));
+
+    ParMatrix face_dof_ur = RemoveLargeEntries(face_dof_par.Mult(edge_r), 0.5);
+    //ParMatrix face_agg_ur = RemoveLargeEntries(face_agg_par.Mult(agg_r), 0.5);
+
+    face_dof_par = 1.0;
+    //face_agg_par = 1.0;
+
+    auto face_edge = RemoveEmptyRows(face_dof_ur.Mult(true_edge_edge).GetDiag());
+    auto face_agg_local = face_edge.Mult(edge_ext_agg);
+    face_edge_ = ParMatrix(comm, std::move(face_edge));
+
+    auto face_face = (face_edge_.Mult(edge_edge).Mult(face_edge_.Transpose()));
+    face_true_face_ = MakeEntityTrueEntity(face_face);
+    face_true_face_ = 1.0;
+
+    //face_agg_local_ = RemoveEmptyRows(Duplicate<double, T>(face_agg_ur.GetDiag()));
+    face_agg_local_ = Duplicate<double, T>(face_agg_local);
+    face_agg_local_ = 1.0;
+
+    agg_face_local_ = face_agg_local_.Transpose();
+
+    agg_ext_vertex_ = agg_edge.Mult(true_edge_vertex);
+    agg_ext_vertex_ = 1.0;
+    vertex_true_edge = 1.0;
+
+    //ParMatrix agg_ext_edge_ext = agg_ext_vertex_.Mult(vertex_true_edge);
+    //agg_ext_edge_ = RemoveLowDegree(agg_ext_edge_ext, true_edge_vertex);
+
+    agg_ext_edge_ = agg_true_edge_d;
+}
+
+template <typename T>
+GraphTopology<T>::GraphTopology(const GraphTopology<T>& other) noexcept
+    : agg_vertex_local_(other.agg_vertex_local_),
+      agg_edge_local_(other.agg_edge_local_),
+      face_agg_local_(other.face_agg_local_),
+      agg_face_local_(other.agg_face_local_),
+      face_true_face_(other.face_true_face_),
+      face_edge_(other.face_edge_),
+      agg_ext_vertex_(other.agg_ext_vertex_),
+      agg_ext_edge_(other.agg_ext_edge_),
+      edge_true_edge_(other.edge_true_edge_)
+{
+
+}
+
+template <typename T>
+GraphTopology<T>::GraphTopology(GraphTopology<T>&& other) noexcept
+{
+    swap(*this, other);
+}
+
+template <typename T>
+GraphTopology<T>& GraphTopology<T>::operator=(GraphTopology<T> other) noexcept
+{
+    swap(*this, other);
+
+    return *this;
+}
+
+template <typename T>
+void swap(GraphTopology<T>& lhs, GraphTopology<T>& rhs) noexcept
+{
+    swap(lhs.agg_vertex_local_, rhs.agg_vertex_local_);
+    swap(lhs.agg_edge_local_, rhs.agg_edge_local_);
+    swap(lhs.face_agg_local_, rhs.face_agg_local_);
+    swap(lhs.agg_face_local_, rhs.agg_face_local_);
+
+    swap(lhs.face_true_face_, rhs.face_true_face_);
+    swap(lhs.face_edge_, rhs.face_edge_);
+    swap(lhs.agg_ext_vertex_, rhs.agg_ext_vertex_);
+    swap(lhs.agg_ext_edge_, rhs.agg_ext_edge_);
+    swap(lhs.edge_true_edge_, rhs.edge_true_edge_);
+}
+
+template <typename T>
+SparseMatrix<double> GraphTopology<T>::MakeFaceIntAgg(const ParMatrix& agg_agg)
+{
+    const auto& agg_agg_diag = agg_agg.GetDiag();
+
+    int num_aggs = agg_agg_diag.Rows();
+    int num_faces = agg_agg_diag.nnz() - agg_agg_diag.Rows();
+
+    assert(num_faces % 2 == 0);
+    num_faces /= 2;
+
+    std::vector<int> indptr(num_faces + 1);
+    std::vector<int> indices(num_faces * 2);
+    std::vector<double> data(num_faces * 2, 1);
+
+    indptr[0] = 0;
+
+    const auto& agg_indptr = agg_agg_diag.GetIndptr();
+    const auto& agg_indices = agg_agg_diag.GetIndices();
+    int rows = agg_agg_diag.Rows();
+    int count = 0;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        for (int j = agg_indptr[i]; j < agg_indptr[i + 1]; ++j)
+        {
+            if (agg_indices[j] > i)
+            {
+                indices[count * 2] = i;
+                indices[count * 2 + 1] = agg_indices[j];
+
+                count++;
+
+                indptr[count] = count * 2;
+            }
+        }
+    }
+
+    assert(count == num_faces);
+
+    return SparseMatrix<double>(std::move(indptr), std::move(indices), std::move(data),
+                        num_faces, num_aggs);
+}
+
+template <typename T>
+SparseMatrix<double> GraphTopology<T>::MakeFaceEdge(const ParMatrix& agg_agg,
+                                         const ParMatrix& edge_ext_agg,
+                                         const SparseMatrix<T>& agg_edge_ext,
+                                         const SparseMatrix<double>& face_int_agg_edge)
+{
+    const auto& agg_agg_offd = agg_agg.GetOffd();
+    const auto& edge_ext_agg_offd = edge_ext_agg.GetOffd();
+
+    int num_aggs = agg_agg_offd.Rows();
+    int num_edges = face_int_agg_edge.Cols();
+    int num_faces_int = face_int_agg_edge.Rows();
+    int num_faces = num_faces_int + agg_agg_offd.nnz();
+
+    for (int i = 0; i < 3; ++i)
+    {
+        if (agg_agg.GetMyId() == i)
+        {
+            printf("PRocs: %d\n", i);
+            agg_agg.GetDiag().ToDense().Print("agg agg diag", std::cout, 4, 0);
+            agg_agg.GetOffd().ToDense().Print("agg agg offd", std::cout, 4, 0);
+        }
+        MPI_Barrier(agg_agg.GetComm());
+    }
+    printf("%d Num Faces: %d\n", agg_agg.GetMyId(), num_faces);
+
+    std::vector<int> indptr;
+    std::vector<int> indices;
+
+    indptr.reserve(num_faces + 1);
+
+    const auto& ext_indptr = face_int_agg_edge.GetIndptr();
+    const auto& ext_indices = face_int_agg_edge.GetIndices();
+    const auto& ext_data = face_int_agg_edge.GetData();
+
+    indptr.push_back(0);
+
+    for (int i = 0; i < num_faces_int; i++)
+    {
+        for (int j = ext_indptr[i]; j < ext_indptr[i + 1]; j++)
+        {
+            if (ext_data[j] > 1.5)
+            {
+                indices.push_back(ext_indices[j]);
+            }
+        }
+
+        indptr.push_back(indices.size());
+    }
+
+    const auto& agg_edge_indptr = agg_edge_ext.GetIndptr();
+    const auto& agg_edge_indices = agg_edge_ext.GetIndices();
+
+    const auto& agg_offd_indptr = agg_agg_offd.GetIndptr();
+    const auto& agg_offd_indices = agg_agg_offd.GetIndices();
+    const auto& agg_colmap = agg_agg.GetColMap();
+
+    const auto& edge_offd_indptr = edge_ext_agg_offd.GetIndptr();
+    const auto& edge_offd_indices = edge_ext_agg_offd.GetIndices();
+    const auto& edge_colmap = edge_ext_agg.GetColMap();
+
+    for (int i = 0; i < num_aggs; ++i)
+    {
+        for (int j = agg_offd_indptr[i]; j < agg_offd_indptr[i + 1]; ++j)
+        {
+            int shared = agg_colmap[agg_offd_indices[j]];
+
+            for (int k = agg_edge_indptr[i]; k < agg_edge_indptr[i + 1]; ++k)
+            {
+                int edge = agg_edge_indices[k];
+
+                if (edge_ext_agg_offd.RowSize(edge) > 0)
+                {
+                    int edge_loc = edge_offd_indices[edge_offd_indptr[edge]];
+
+                    if (edge_colmap[edge_loc] == shared)
+                    {
+                        indices.push_back(edge);
+                    }
+                }
+            }
+
+            indptr.push_back(indices.size());
+        }
+    }
+
+    assert(static_cast<int>(indptr.size()) == num_faces + 1);
+
+    std::vector<double> data(indices.size(), 1.0);
+
+    return SparseMatrix<double>(std::move(indptr), std::move(indices), std::move(data),
+                           num_faces, num_edges);
+}
+
+template <typename T>
+SparseMatrix<T> GraphTopology<T>::ExtendFaceAgg(const ParMatrix& agg_agg,
+                                                const SparseMatrix<double>& face_int_agg)
+{
+    const auto& agg_agg_offd = agg_agg.GetOffd();
+
+    int num_aggs = agg_agg.Rows();
+
+    std::vector<int> indptr(face_int_agg.GetIndptr());
+    std::vector<int> indices(face_int_agg.GetIndices());
+
+    const auto& agg_offd_indptr = agg_agg_offd.GetIndptr();
+
+    for (int i = 0; i < num_aggs; ++i)
+    {
+        for (int j = agg_offd_indptr[i]; j < agg_offd_indptr[i + 1]; ++j)
+        {
+            indices.push_back(i);
+            indptr.push_back(indices.size());
+        }
+    }
+
+    int num_faces = indptr.size() - 1;
+
+    std::vector<T> data(indices.size(), 1.0);
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                        num_faces, num_aggs);
+}
+
+} // namespace linalgcpp
+
+#endif /* GRAPHTOPOLOGY_HPP */
diff --git a/modules/graphtools/include/graph_utilities.hpp b/modules/graphtools/include/graph_utilities.hpp
new file mode 100644
index 0000000..21922c2
--- /dev/null
+++ b/modules/graphtools/include/graph_utilities.hpp
@@ -0,0 +1,594 @@
+#ifndef GRAPH_UTILITIES_HPP
+#define GRAPH_UTILITIES_HPP
+
+#include "partition.hpp"
+#include "parlinalgcpp.hpp"
+
+namespace linalgcpp
+{
+
+/** @brief Creates relationship entity_set
+
+    @param partition partition of entities into sets
+    @returns entity_set
+*/
+template <typename T = double>
+SparseMatrix<T> MakeEntitySet(std::vector<int> partition);
+
+/** @brief Creates relationship set_entity
+
+    @param partition partition of entities into sets
+    @returns set_entity
+*/
+template <typename T = double>
+SparseMatrix<T> MakeSetEntity(std::vector<int> partition);
+
+/** @brief Remove entries below the tolerance completely
+
+    @param mat matrix from with to remove entries
+    @param tol remove entries below this tolerance
+    @returns matrix without entries below tolerance
+*/
+template <typename T = double>
+SparseMatrix<T> RemoveLargeEntries(const SparseMatrix<T>& mat, double tol = 0.0);
+
+/** @brief Remove entries below the tolerance completely
+
+    @param mat matrix from with to remove entries
+    @param tol remove entries below this tolerance
+    @returns matrix without entries below tolerance
+*/
+ParMatrix RemoveLargeEntries(const ParMatrix& mat, double tol = 0.0);
+
+/** @brief Partitions matrix = A * A^T
+
+    @param A matrix to partition
+    @param coarsening_factor determine number of parts to partition into
+    @returns partitioning of A * A^T
+*/
+template <typename T = double>
+std::vector<int> PartitionAAT(const SparseMatrix<T>& A, double coarsening_factor,
+                              double ubal = 1.0, bool contig = true);
+
+/** @brief Create entity to true entity relationship
+    @param entity_entity entity to entity relationship on false dofs
+    @return entity_true_entity entity to true entity
+*/
+ParMatrix MakeEntityTrueEntity(const ParMatrix& entity_entity);
+
+/** @brief Read serial vector from file and extract local portion
+
+    @param filename name of vector file
+    @param local_to_global set of local indices to extract
+    @returns local vector
+*/
+template <typename T=double>
+Vector<T> ReadVector(const std::string& filename,
+                     const std::vector<int>& local_to_global);
+
+/** @brief Write a serial vector to file, combining local vectors from all processors
+
+    @param vect vector to write
+    @param filename name of vector file
+    @param global_size global size of vector
+    @param local_to_global map of local indices to global indices
+*/
+template <typename T = VectorView<double>>
+void WriteVector(MPI_Comm comm, const T& vect, const std::string& filename, int global_size,
+                 const std::vector<int>& local_to_global);
+
+/**
+   @brief Extract a subvector from a vector
+
+   @param global_vect global vector from which to extract
+   @param map indices to extract
+   @returns subvector
+*/
+template <typename T = VectorView<double>>
+T GetSubVector(const T& global_vect, const std::vector<int>& map);
+
+
+/// SparseMatrix triple product
+template <typename T = double>
+SparseMatrix<T> Mult(const SparseMatrix<T>& R, const SparseMatrix<T>& A, const SparseMatrix<T>& P);
+
+/// ParMatrix triple product
+ParMatrix Mult(const ParMatrix& R, const ParMatrix& A, const ParMatrix& P);
+
+/// Nonzero density of a matrix
+template <typename T = double>
+double Density(const SparseMatrix<T>& A);
+
+/** @brief Distribute sets to processors
+*/
+template <typename T = double>
+SparseMatrix<T> MakeProcAgg(MPI_Comm comm, const SparseMatrix<T>& agg_vertex,
+                         const SparseMatrix<T>& vertex_edge);
+
+/** @brief Create an edge to true edge relationship
+    @param comm MPI Communicator
+    @param proc_edge processor edge relationship
+    @param edge_map local to global edge map
+    @returns global edge to true edge
+*/
+template <typename T = double>
+ParMatrix MakeEdgeTrueEdge(MPI_Comm comm, const SparseMatrix<T>& proc_edge,
+                           const std::vector<int>& edge_map);
+
+/// Shifts partition such that indices are in [0, num_parts]
+template <typename T = int>
+void ShiftPartition(std::vector<T>& partition);
+
+/// Duplicate SparseMatrix to change its template type
+template <typename T, typename U>
+SparseMatrix<U> Duplicate(const SparseMatrix<T>& input);
+
+/// Remove entries which are of less degree than the dof
+template <typename T, typename U=T>
+SparseMatrix<U> RemoveLowDegree(const SparseMatrix<T>& agg_dof,
+                                const SparseMatrix<T>& dof_degree);
+
+ParMatrix RemoveLowDegree(const ParMatrix& agg_dof, const ParMatrix& dof_degree);
+
+/** @brief Create an extension permutation P such that
+ *         the diagonal block of PT*A*P will contain
+ *         entities from surrounding processors
+ *
+ *  @param entity_entity entity to entity relationship
+ *  @returns ParMatrix P containing the extension permutation
+ */
+ParMatrix MakeExtPermutation(const ParMatrix& entity_entity);
+
+/** @brief Check if an operator is symmetric by checking its transformation of random vectors
+ *  @param A Operator to check
+ *  @param verbose output transformation values 
+ *  @returns bool true if symmetric, false otherwise
+ */
+bool CheckSymmetric(const linalgcpp::Operator& A, bool verbose = false);
+
+/** @brief Check if a distributed operator is symmetric by checking its transformation of random vectors
+ *  @param A Operator to check
+ *  @param verbose output transformation values 
+ *  @returns bool true if symmetric, false otherwise
+ */
+bool CheckSymmetric(const linalgcpp::ParOperator& A, bool verbose = false);
+
+/** @brief Broadcast local vertex data to other connected processors
+ *  @param comm_pkg HYPRE Communication package containing send/recv info
+ *  @param verbose output transformation values 
+ *  @returns vector containing data from other processors
+ */
+template <typename T>
+std::vector<T> Broadcast(const linalgcpp::ParCommPkg& comm_pkg,
+                         const std::vector<T>& send_vect);
+
+////////////////////////////////
+// Templated Implementations  //
+////////////////////////////////
+
+template <typename T>
+SparseMatrix<T> Mult(const SparseMatrix<T>& R, const SparseMatrix<T>& A, const SparseMatrix<T>& P)
+{
+    return R.Mult(A).Mult(P);
+}
+
+template <typename T>
+SparseMatrix<T> MakeEntitySet(std::vector<int> partition)
+{
+    if (partition.size() == 0)
+    {
+        return SparseMatrix<T>();
+    }
+
+    const int num_parts = *std::max_element(std::begin(partition), std::end(partition)) + 1;
+    const int num_vert = partition.size();
+
+    std::vector<int> indptr(num_vert + 1);
+    std::vector<T> data(num_vert, 1.0);
+
+    std::iota(std::begin(indptr), std::end(indptr), 0);
+
+    return SparseMatrix<T>(std::move(indptr), std::move(partition), std::move(data),
+                        num_vert, num_parts);
+}
+
+template <typename T>
+SparseMatrix<T> MakeSetEntity(std::vector<int> partition)
+{
+    return MakeEntitySet<T>(std::move(partition)).Transpose();
+}
+
+template <typename T>
+SparseMatrix<T> RemoveLargeEntries(const SparseMatrix<T>& mat, double tol)
+{
+    int rows = mat.Rows();
+    int cols = mat.Cols();
+
+    const auto& mat_indptr = mat.GetIndptr();
+    const auto& mat_indices = mat.GetIndices();
+    const auto& mat_data = mat.GetData();
+
+    std::vector<int> indptr(rows + 1);
+    std::vector<int> indices;
+    std::vector<T> data;
+
+    indices.reserve(mat.nnz());
+    data.reserve(mat.nnz());
+
+    for (int i = 0; i < rows; ++i)
+    {
+        indptr[i] = indices.size();
+
+        for (int j = mat_indptr[i]; j < mat_indptr[i + 1]; ++j)
+        {
+            if (std::fabs(mat_data[j]) > tol)
+            {
+                indices.push_back(mat_indices[j]);
+                data.push_back(mat_data[j]);
+            }
+        }
+    }
+
+    indptr[rows] = indices.size();
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                           rows, cols);
+}
+
+template <typename T>
+std::vector<int> PartitionAAT(const SparseMatrix<T>& A, double coarsening_factor,
+                              double ubal, bool contig)
+{
+    SparseMatrix<T> A_T = A.Transpose();
+    SparseMatrix<T> AA_T = A.Mult(A_T);
+
+    int num_parts = std::max(1.0, (A.Rows() / coarsening_factor) + 0.5);
+
+    return Partition(AA_T, num_parts, ubal, contig);
+}
+
+template <typename T>
+Vector<T> ReadVector(const std::string& filename,
+                     const std::vector<int>& local_to_global)
+{
+    std::vector<T> global_vect = linalgcpp::ReadText<T>(filename);
+
+    int local_size = local_to_global.size();
+
+    Vector<T> local_vect(local_size);
+
+    for (int i = 0; i < local_size; ++i)
+    {
+        local_vect[i] = global_vect[local_to_global[i]];
+    }
+
+    return local_vect;
+}
+
+template <typename T>
+void WriteVector(MPI_Comm comm, const T& vect, const std::string& filename, int global_size,
+                 const std::vector<int>& local_to_global)
+{
+    assert(global_size > 0);
+    assert(vect.size() <= global_size);
+
+    int myid;
+    int num_procs;
+    MPI_Comm_size(comm, &num_procs);
+    MPI_Comm_rank(comm, &myid);
+
+    std::vector<double> global_global(global_size, 0.0);
+    std::vector<double> global_local(global_size, 0.0);
+
+    int local_size = local_to_global.size();
+
+    for (int i = 0; i < local_size; ++i)
+    {
+        global_local[local_to_global[i]] = vect[i];
+    }
+
+    MPI_Scan(global_local.data(), global_global.data(), global_size,
+             MPI_DOUBLE, MPI_SUM, comm);
+
+    if (myid == num_procs - 1)
+    {
+        linalgcpp::WriteText(global_global, filename);
+    }
+}
+
+template <typename T>
+T GetSubVector(const T& global_vect, const std::vector<int>& map)
+{
+    int size = map.size();
+
+    T local_vect(size);
+
+    for (int i = 0; i < size; ++i)
+    {
+        local_vect[i] = global_vect[map[i]];
+    }
+
+    return local_vect;
+}
+
+template <typename T>
+double Density(const SparseMatrix<T>& A)
+{
+
+    double denom = A.Rows() * (double) A.Cols();
+    return A.nnz() / denom;
+}
+
+template <typename T>
+SparseMatrix<T> MakeProcAgg(MPI_Comm comm, const SparseMatrix<T>& agg_vertex,
+                         const SparseMatrix<T>& vertex_edge)
+{
+    int num_procs;
+    int num_aggs = agg_vertex.Rows();
+
+    MPI_Comm_size(comm, &num_procs);
+
+    if (num_procs == 0)
+    {
+        std::vector<int> trivial_partition(num_aggs, 0);
+        return MakeSetEntity<T>(std::move(trivial_partition));
+    }
+
+    SparseMatrix<T> agg_edge = agg_vertex.Mult(vertex_edge);
+    SparseMatrix<T> agg_agg = agg_edge.Mult(agg_edge.Transpose());
+
+    // Metis doesn't behave well w/ very dense sparse partition
+    // so we partition by hand if aggregates are densely connected
+    const double density = Density(agg_agg);
+    const double density_tol = 0.7;
+
+    std::vector<int> partition;
+
+    if (density < density_tol)
+    {
+        double ubal = 1.0;
+        partition = Partition(agg_agg, num_procs, ubal);
+    }
+    else
+    {
+        partition.reserve(num_aggs);
+
+        int num_each = num_aggs / num_procs;
+        int num_left = num_aggs % num_procs;
+
+        for (int proc = 0; proc < num_procs; ++proc)
+        {
+            int local_num = num_each + (proc < num_left ? 1 : 0);
+
+            for (int i = 0; i < local_num; ++i)
+            {
+                partition.push_back(proc);
+            }
+        }
+
+        assert(static_cast<int>(partition.size()) == num_aggs);
+    }
+
+    SparseMatrix<T> proc_agg = MakeSetEntity<T>(std::move(partition));
+
+    assert(proc_agg.Cols() == num_aggs);
+    assert(proc_agg.Rows() == num_procs);
+
+    return proc_agg;
+}
+
+template <typename T>
+ParMatrix MakeEdgeTrueEdge(MPI_Comm comm, const SparseMatrix<T>& proc_edge,
+                           const std::vector<int>& edge_map)
+{
+    int myid;
+    int num_procs;
+
+    MPI_Comm_size(comm, &num_procs);
+    MPI_Comm_rank(comm, &myid);
+
+    SparseMatrix<T> edge_proc = proc_edge.Transpose();
+
+    int num_edges_local = proc_edge.RowSize(myid);
+    int num_tedges_global = proc_edge.Cols();
+
+    std::vector<int> tedge_counter(num_procs + 1, 0);
+
+    for (int i = 0; i < num_tedges_global; ++i)
+    {
+        tedge_counter[edge_proc.GetIndices(i)[0] + 1]++;
+    }
+
+    int num_tedges_local = tedge_counter[myid + 1];
+    int num_edge_diff = num_edges_local - num_tedges_local;
+    std::partial_sum(std::begin(tedge_counter), std::end(tedge_counter),
+                     std::begin(tedge_counter));
+
+    assert(tedge_counter.back() == static_cast<int>(num_tedges_global));
+
+    std::vector<int> edge_perm(num_tedges_global);
+
+    for (int i = 0; i < num_tedges_global; ++i)
+    {
+        edge_perm[i] = tedge_counter[edge_proc.GetIndices(i)[0]]++;
+    }
+
+    for (int i = num_procs - 1; i > 0; i--)
+    {
+        tedge_counter[i] = tedge_counter[i - 1];
+    }
+    tedge_counter[0] = 0;
+
+    std::vector<int> diag_indptr(num_edges_local + 1);
+    std::vector<int> diag_indices(num_tedges_local);
+    std::vector<double> diag_data(num_tedges_local, 1.0);
+
+    std::vector<int> offd_indptr(num_edges_local + 1);
+    std::vector<int> offd_indices(num_edge_diff);
+    std::vector<double> offd_data(num_edge_diff, 1.0);
+    std::vector<HYPRE_Int> col_map(num_edge_diff);
+    std::vector<std::pair<HYPRE_Int, int>> offd_map(num_edge_diff);
+
+    diag_indptr[0] = 0;
+    offd_indptr[0] = 0;
+
+    int tedge_begin = tedge_counter[myid];
+    int tedge_end = tedge_counter[myid + 1];
+
+    int diag_counter = 0;
+    int offd_counter = 0;
+
+    for (int i = 0; i < num_edges_local; ++i)
+    {
+        int tedge = edge_perm[edge_map[i]];
+
+        if ((tedge >= tedge_begin) && (tedge < tedge_end))
+        {
+            diag_indices[diag_counter++] = tedge - tedge_begin;
+        }
+        else
+        {
+            offd_map[offd_counter].first = tedge;
+            offd_map[offd_counter].second = offd_counter;
+            offd_counter++;
+        }
+
+        diag_indptr[i + 1] = diag_counter;
+        offd_indptr[i + 1] = offd_counter;
+    }
+
+    assert(offd_counter == static_cast<int>(num_edge_diff));
+
+    auto compare = [] (const std::pair<HYPRE_Int, int>& lhs,
+                       const std::pair<HYPRE_Int, int>& rhs)
+    {
+        return lhs.first < rhs.first;
+    };
+
+    std::sort(std::begin(offd_map), std::end(offd_map), compare);
+
+    for (int i = 0; i < offd_counter; ++i)
+    {
+        offd_indices[offd_map[i].second] = i;
+        col_map[i] = offd_map[i].first;
+    }
+
+    auto starts = linalgcpp::GenerateOffsets(comm, {num_edges_local, num_tedges_local});
+
+    SparseMatrix<double> diag(std::move(diag_indptr), std::move(diag_indices), std::move(diag_data),
+                              num_edges_local, num_tedges_local);
+
+    SparseMatrix<double> offd(std::move(offd_indptr), std::move(offd_indices), std::move(offd_data),
+                      num_edges_local, num_edge_diff);
+
+    return ParMatrix(comm, starts[0], starts[1],
+                     std::move(diag), std::move(offd),
+                     std::move(col_map));
+}
+
+template <typename T>
+void ShiftPartition(std::vector<T>& partition)
+{
+    int min_part = *std::min_element(std::begin(partition), std::end(partition));
+
+    for (auto& i : partition)
+    {
+        i -= min_part;
+    }
+
+    linalgcpp::RemoveEmpty(partition);
+}
+
+template <typename T>
+std::vector<T> Broadcast(const linalgcpp::ParCommPkg& comm_pkg,
+                         const std::vector<T>& send_vect)
+{
+    int num_send = comm_pkg.num_sends_;
+    int num_recv = comm_pkg.num_recvs_;
+
+    std::vector<T> send_buffer(comm_pkg.send_map_starts_.back(), 0);
+    std::vector<T> recv_buffer(comm_pkg.recv_vec_starts_.back(), -1.0);
+
+    std::vector<MPI_Request> requests (num_send + num_recv);
+    MPI_Datatype datatype = std::is_same<T, double>::value ? MPI_DOUBLE : MPI_INT;
+
+    int j = 0;
+
+    for (int i = 0; i < comm_pkg.num_recvs_; i++)
+    {
+        int ip = comm_pkg.recv_procs_[i];
+        auto vec_start = comm_pkg.recv_vec_starts_[i];
+        auto vec_len = comm_pkg.recv_vec_starts_[i + 1] - vec_start;
+        MPI_Irecv(&recv_buffer[vec_start], vec_len, datatype, ip, 0, comm_pkg.comm_, &requests[j++]);
+    }
+
+    for (int i = 0; i < comm_pkg.num_sends_; i++)
+    {
+        auto vec_start = comm_pkg.send_map_starts_[i];
+        auto vec_len = comm_pkg.send_map_starts_[i + 1] - vec_start;
+
+        for (int k = vec_start; k < vec_start + vec_len; ++k)
+        {
+            send_buffer[k] = send_vect[comm_pkg.send_map_elmts_[k]];
+        }
+    }
+
+    for (int i = 0; i < comm_pkg.num_sends_; i++)
+    {
+        auto vec_start = comm_pkg.send_map_starts_[i];
+        auto vec_len = comm_pkg.send_map_starts_[i + 1] - vec_start;
+        auto ip = comm_pkg.send_procs_[i];
+        MPI_Isend(&send_buffer[vec_start], vec_len, datatype, ip, 0, comm_pkg.comm_, &requests[j++]); 
+    }
+
+    std::vector<MPI_Status> header_status(num_send + num_recv);
+    MPI_Waitall(num_send + num_recv, requests.data(), header_status.data());
+
+    return recv_buffer;
+}
+
+template <typename T, typename U>
+SparseMatrix<U> RemoveLowDegree(const SparseMatrix<T>& agg_dof,
+                                const SparseMatrix<T>& dof_degree)
+{
+    int rows = agg_dof.Rows();
+    int cols = agg_dof.Cols();
+
+    const auto& mat_indptr = agg_dof.GetIndptr();
+    const auto& mat_indices = agg_dof.GetIndices();
+    const auto& mat_data = agg_dof.GetData();
+
+    std::vector<int> indptr(rows + 1);
+    std::vector<int> indices;
+    std::vector<T> data;
+
+    indices.reserve(agg_dof.nnz());
+    data.reserve(agg_dof.nnz());
+
+    double tol = 1e-4;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        indptr[i] = indices.size();
+
+        for (int j = mat_indptr[i]; j < mat_indptr[i + 1]; ++j)
+        {
+            double degree = dof_degree.RowSize(mat_indices[j]);
+
+            if (std::fabs(degree - mat_data[j]) <= tol)
+            {
+                indices.push_back(mat_indices[j]);
+                data.push_back(mat_data[j]);
+            }
+        }
+    }
+
+    indptr[rows] = indices.size();
+
+    return SparseMatrix<T>(std::move(indptr), std::move(indices), std::move(data),
+                           rows, cols);
+}
+
+} // namespace linalgcpp
+
+#endif // GRAPH_UTILITIES_HPP
diff --git a/modules/graphtools/include/graphtools.hpp b/modules/graphtools/include/graphtools.hpp
new file mode 100644
index 0000000..71cf695
--- /dev/null
+++ b/modules/graphtools/include/graphtools.hpp
@@ -0,0 +1,5 @@
+#include "graph.hpp"
+#include "graph_mis.hpp"
+#include "graph_partition.hpp"
+#include "graph_topology.hpp"
+#include "graph_utilities.hpp"
diff --git a/modules/graphtools/src/graph.cpp b/modules/graphtools/src/graph.cpp
new file mode 100644
index 0000000..ee35bc2
--- /dev/null
+++ b/modules/graphtools/src/graph.cpp
@@ -0,0 +1,27 @@
+/*BHEADER**********************************************************************
+ *
+ * Copyright (c) 2018, Lawrence Livermore National Security, LLC.
+ * Produced at the Lawrence Livermore National Laboratory.
+ * LLNL-CODE-745247. All Rights reserved. See file COPYRIGHT for details.
+ *
+ * This file is part of smoothG. For more information and source code
+ * availability, see https://www.github.com/llnl/smoothG.
+ *
+ * smoothG is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License (as published by the Free
+ * Software Foundation) version 2.1 dated February 1999.
+ *
+ ***********************************************************************EHEADER*/
+
+/** @file
+
+    @brief Contains Graph class
+*/
+
+#include "graph.hpp"
+
+namespace linalgcpp
+{
+
+} // namespace gauss
+
diff --git a/modules/graphtools/src/graph_mis.cpp b/modules/graphtools/src/graph_mis.cpp
new file mode 100644
index 0000000..58211ce
--- /dev/null
+++ b/modules/graphtools/src/graph_mis.cpp
@@ -0,0 +1,67 @@
+#include "graph_mis.hpp"
+
+namespace linalgcpp
+{
+
+ParMatrix SelectMIS(const ParMatrix& mis_dof)
+{
+    const auto& mis_dof_diag = mis_dof.GetDiag();
+
+    int num_mis = mis_dof.Rows();
+
+    std::vector<int> indptr(1, 0);
+    std::vector<int> indices;
+
+    for (int i = 0; i < num_mis; ++i)
+    {
+        if (mis_dof_diag.RowSize(i) > 0)
+        {
+            indptr.push_back(indptr.size());
+            indices.push_back(i);
+        }
+    }
+
+    std::vector<double> data(indices.size(), 1.0);
+
+    int num_selected = indices.size();
+
+    SparseMatrix<double> selector(std::move(indptr), std::move(indices), std::move(data),
+                                  num_selected, num_mis);
+
+    ParMatrix selector_par(mis_dof.GetComm(), std::move(selector));
+
+    return RemoveLargeEntries(selector_par.Mult(mis_dof), 0.0);
+}
+
+ParMatrix MakeMISDof(const ParMatrix& agg_dof)
+{
+    // Redistribute neighbor aggregate info
+    ParMatrix dof_agg = agg_dof.Transpose();
+    ParMatrix agg_agg = agg_dof.Mult(dof_agg);
+    ParMatrix dof_dof = dof_agg.Mult(agg_dof);
+
+    ParMatrix agg_r = MakeExtPermutation(agg_agg);
+    ParMatrix dof_r = MakeExtPermutation(dof_dof);
+    ParMatrix dof_r_T = dof_r.Transpose();
+
+    ParMatrix agg_dof_r = agg_r.Mult(agg_dof).Mult(dof_r_T);
+
+    SparseMatrix<double> agg_dof_ext = agg_dof_r.GetDiag();
+    agg_dof_ext.EliminateZeros();
+
+    SparseMatrix<double> dof_agg_ext = agg_dof_ext.Transpose();
+
+    // Run serial MIS algorithm on redistributed, local agg_dof
+    auto mis = GenerateMIS(agg_dof_ext, dof_agg_ext);
+    SparseMatrix<double> mis_dof = MakeSetEntity<double>(std::move(mis));
+
+    // Obtain true_mis, since local mis may now be duplicated
+    ParMatrix mis_dof_par(agg_dof.GetComm(), std::move(mis_dof));
+    ParMatrix mis_dof_r = mis_dof_par.Mult(dof_r);
+    ParMatrix true_mis_true_dof = SelectMIS(mis_dof_r);
+
+    return RemoveLargeEntries(true_mis_true_dof, 0.0);
+}
+
+
+} // namespace linalgcpp
diff --git a/modules/graphtools/src/graph_partition.cpp b/modules/graphtools/src/graph_partition.cpp
new file mode 100644
index 0000000..4d2b670
--- /dev/null
+++ b/modules/graphtools/src/graph_partition.cpp
@@ -0,0 +1,550 @@
+#include "graph_partition.hpp"
+
+namespace linalgcpp
+{
+
+double ComputeT(const linalgcpp::SparseMatrix<double>& A)
+{
+    const auto& data = A.GetData();
+
+    return std::accumulate(std::begin(data), std::end(data), 0.0);
+}
+
+ParMatrix ParPartition(const ParMatrix& A_fine, int num_parts)
+{
+    MPI_Comm comm = A_fine.GetComm();
+    int myid = A_fine.GetMyId();
+
+    double T = ComputeT(A_fine);
+
+    int num_vertices = A_fine.Rows();
+
+    std::vector<double> alpha(num_vertices, 0);
+    {
+        const auto& diag_indptr = A_fine.GetDiag().GetIndptr();
+        const auto& diag_indices = A_fine.GetDiag().GetIndices();
+        const auto& diag_data = A_fine.GetDiag().GetData();
+
+        const auto& offd_indptr = A_fine.GetOffd().GetIndptr();
+        const auto& offd_indices = A_fine.GetOffd().GetIndices();
+        const auto& offd_data = A_fine.GetOffd().GetData();
+
+        for (int i = 0; i < num_vertices; ++i)
+        {
+            double diag_total = std::accumulate(std::begin(diag_data) + diag_indptr[i],
+                    std::begin(diag_data) + diag_indptr[i + 1], 0.0);
+
+            double offd_total = std::accumulate(std::begin(offd_data) + offd_indptr[i],
+                    std::begin(offd_data) + offd_indptr[i + 1], 0.0);
+
+            alpha[i] = (diag_total + offd_total) / T;
+        }
+    }
+
+    ParMatrix A_i(A_fine);
+    ParMatrix PT(comm, A_fine.GetRowStarts(), SparseIdentity(num_vertices));
+
+    int iter = 0;
+    int global_vertices = A_i.GlobalRows();
+    int global_vertices_prev = -1;
+
+    while (A_i.GlobalRows() > num_parts && global_vertices != global_vertices_prev)
+    {
+        std::vector<int> local_indices(A_i.Rows());
+        std::iota(std::begin(local_indices), std::end(local_indices), A_i.GetRowStarts()[0]);
+
+        auto comm_pkg = A_i.MakeCommPkg();
+
+        auto global_indices = Broadcast(comm_pkg, local_indices);
+        auto alpha_offd = Broadcast(comm_pkg, alpha);
+
+        //if (myid == 0)
+        //{
+        //    std::cout << std::setprecision(4) << "0 Alpha diag: " << alpha;
+        //    std::cout << std::setprecision(4) << "0 Alpha offd: " << alpha_offd;
+        //}
+        //MPI_Barrier(comm);
+
+        //if (myid == 1)
+        //{
+        //    std::cout << std::setprecision(4) << "1 Alpha diag: " << alpha;
+        //    std::cout << std::setprecision(4) << "1 Alpha offd: " << alpha_offd;
+        //}
+        //MPI_Barrier(comm);
+
+        int num_vertices = A_i.Rows();
+
+        const auto& diag_indptr = A_i.GetDiag().GetIndptr();
+        const auto& diag_indices = A_i.GetDiag().GetIndices();
+        const auto& diag_data = A_i.GetDiag().GetData();
+
+        const auto& offd_indptr = A_i.GetOffd().GetIndptr();
+        const auto& offd_indices = A_i.GetOffd().GetIndices();
+        const auto& offd_data = A_i.GetOffd().GetData();
+        const auto& col_map = A_i.GetColMap();
+
+        std::vector<int> indices(num_vertices, -1);
+
+        std::vector<int> P_diag_indptr(1, 0);
+        std::vector<int> P_diag_indices;
+
+        std::vector<int> P_offd_indptr(1, 0);
+        std::vector<int> P_offd_indices;
+
+        std::vector<int> P_col_map;
+
+        double is_selected = 1.0;
+        double not_selected = -1.0;
+
+        std::vector<int> selected_local(A_i.Rows(), not_selected);
+        int num_iter = 2;
+
+        for (int iter = 0; iter < num_iter; ++iter)
+        {
+            bool last_iter = (iter == num_iter - 1);
+
+            auto selected_offd = Broadcast(comm_pkg, selected_local);
+
+            for (int i = 0; i < num_vertices; ++i)
+            {
+                //double score = -std::numeric_limits<double>::infinity();
+                double score = 0.0;
+                int index = -1;
+
+                double alpha_i = alpha[i];
+
+                if (selected_local[i] == is_selected)
+                {
+                    continue;
+                }
+
+                for (int k = diag_indptr[i]; k < diag_indptr[i + 1]; ++k)
+                {
+                    int local_col = diag_indices[k];
+
+                    if (selected_local[local_col] == not_selected && local_col !=i)
+                    {
+                        double a_ij = diag_data[k];
+                        double alpha_j = alpha[local_col];
+                        double q_ij = 2.0 * (a_ij / T - alpha_i * alpha_j);
+
+                        if (q_ij > score)
+                        {
+                            score = q_ij;
+                            index = A_i.GetColStarts()[0] + local_col;
+                        }
+                        else if (q_ij == score)
+                        {
+                            index = -1;
+                        }
+                    }
+                }
+
+                for (int k = offd_indptr[i]; k < offd_indptr[i + 1]; ++k)
+                {
+                    int local_col = offd_indices[k];
+                    int global_col = col_map[local_col];
+
+                    double a_ij = offd_data[k];
+                    auto it = std::lower_bound(std::begin(global_indices), std::end(global_indices), global_col);
+                    assert(*it == global_col);
+
+                    int alpha_index = it - std::begin(global_indices);
+
+                    if (selected_offd[alpha_index] == is_selected)
+                    {
+                        continue;
+                    }
+
+                    double alpha_j = alpha_offd[alpha_index];
+                    double q_ij = 2.0 * (a_ij / T - alpha_i * alpha_j);
+
+                    if (q_ij > score)
+                    {
+                        score = q_ij;
+                        index = global_col;
+                    }
+                    else if (q_ij == score)
+                    {
+                        index = -1;
+                    }
+                }
+
+                int global_i = i + A_i.GetColStarts()[0];
+                //printf("%d - global_i: %d Score %d %.8f \n", myid, global_i, index, score);
+
+                indices[i] = index;
+            }
+
+            auto indices_offd = Broadcast(comm_pkg, indices);
+
+            for (int i = 0; i < num_vertices; ++i)
+            {
+                int local_i = i;
+                int global_j = indices[i];
+                int global_i = A_i.GetColStarts()[0] + i;
+
+                if (global_j != -1)
+                {
+                    if (!(global_j < A_i.GetColStarts()[0] || global_j >= A_i.GetColStarts()[1]))
+                    {
+                        //inside diag
+                        int local_j = global_j - A_i.GetColStarts()[0];
+                        int global_k = indices[local_j];
+
+                        if (global_i == global_k)
+                        {
+                            if (global_i < global_j)
+                            {
+                                //printf("%d : Merge Inside %d %d\n", myid, global_i, global_j);
+                                // merge 2 inside
+                                if (selected_local[local_i] == not_selected)
+                                {
+                                    assert(selected_local[local_j] == not_selected);
+
+                                    P_diag_indices.push_back(local_i);
+                                    P_diag_indices.push_back(local_j);
+
+                                    P_diag_indptr.push_back(P_diag_indices.size());
+                                    P_offd_indptr.push_back(P_offd_indices.size());
+
+                                    selected_local[local_i] = is_selected;
+                                    selected_local[local_j] = is_selected;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            //printf("%d : Identity, Point Inside %d %d %d\n", myid, global_i, global_j, global_k);
+                            if (last_iter && selected_local[local_i] == not_selected)
+                            {
+                                P_diag_indices.push_back(local_i);
+
+                                P_diag_indptr.push_back(P_diag_indices.size());
+                                P_offd_indptr.push_back(P_offd_indices.size());
+                            }
+                        }
+                    }
+                    else
+                    {
+                        //outside diag
+                        auto it = std::lower_bound(std::begin(global_indices), std::end(global_indices), global_j);
+                        int global_k_index = it - std::begin(global_indices);
+
+                        int global_k = indices_offd[global_k_index];
+
+                        if (global_i == global_k)
+                        {
+                            // Processors own merges greater than their offsets
+                            if (global_i < global_j)
+                            {
+                                if (selected_local[local_i] == not_selected)
+                                {
+                                    auto it = std::lower_bound(std::begin(global_indices), std::end(global_indices), global_j);
+                                    int global_j_index = it - std::begin(global_indices);
+                                    assert(selected_offd[global_j_index] == not_selected);
+
+                                    //printf("%d : Merge Offd %d %d\n", myid, global_i, global_j);
+                                    int local_j = P_offd_indices.size();
+
+                                    P_diag_indices.push_back(local_i);
+
+                                    P_offd_indices.push_back(local_j);
+                                    P_col_map.push_back(global_j);
+
+                                    P_diag_indptr.push_back(P_diag_indices.size());
+                                    P_offd_indptr.push_back(P_offd_indices.size());
+
+                                    selected_local[local_i] = is_selected;
+                                }
+                            }
+                            else
+                            {
+                                selected_local[local_i] = is_selected;
+                                //printf("%d : Do Nothing Offd %d %d\n", myid, global_i, global_j);
+                                // Do nothing (hopefully)
+                            }
+                        }
+                        else
+                        {
+                            //printf("%d : Identity, Point offd %d \n", myid, global_i);
+                            if (last_iter && selected_local[local_i] == not_selected)
+                            {
+                                P_diag_indices.push_back(local_i);
+
+                                P_diag_indptr.push_back(P_diag_indices.size());
+                                P_offd_indptr.push_back(P_offd_indices.size());
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    //printf("%d : Index negative 1 %d \n", myid, global_i);
+                    if (last_iter && selected_local[local_i] == not_selected)
+                    {
+                        P_diag_indices.push_back(local_i);
+
+                        P_diag_indptr.push_back(P_diag_indices.size());
+                        P_offd_indptr.push_back(P_offd_indices.size());
+                    }
+                }
+            }
+        }
+
+
+        SortColumnMap(P_col_map, P_offd_indices);
+        std::vector<double> P_diag_data(P_diag_indices.size(), 1.0);
+        std::vector<double> P_offd_data(P_offd_indices.size(), 1.0);
+
+        linalgcpp_parverify(comm, P_diag_indptr.size() == P_offd_indptr.size());
+        int diag_rows = P_diag_indptr.size() - 1;
+
+        linalgcpp::SparseMatrix<double> P_diag(std::move(P_diag_indptr), std::move(P_diag_indices),
+                                               std::move(P_diag_data),
+                                               diag_rows, num_vertices);
+
+        linalgcpp::SparseMatrix<double> P_offd(std::move(P_offd_indptr), std::move(P_offd_indices),
+                                               std::move(P_offd_data),
+                                               diag_rows, P_col_map.size());
+
+        auto starts = linalgcpp::GenerateOffsets(comm, P_diag.Rows());
+
+        ParMatrix PT_i(comm, starts, A_i.GetColStarts(),
+                       std::move(P_diag), std::move(P_offd), P_col_map);
+        ParMatrix P_i = PT_i.Transpose();
+
+        //ParPrint(myid, printf("PT Iter: %d\n", iter)); iter++;
+        //PrintDense(PT_i);
+
+
+        PT = PT_i.Mult(PT);
+        A_i = PT_i.Mult(A_i).Mult(P_i);
+
+
+        bool check_Q = false;
+        double Q;
+
+        if (check_Q)
+        {
+            Q = CalcQ(A_fine, PT);
+
+        }
+        //A_i = hypre_test(A_i, P_i);
+        global_vertices_prev = global_vertices;
+        global_vertices = A_i.GlobalRows();
+
+        VectorView<double> alpha_view(alpha);
+
+        Vector<double> alpha_i = PT_i.Mult(alpha_view);
+
+        alpha = std::vector<double>(std::begin(alpha_i), std::end(alpha_i));
+
+        //int local_size = A_i.Rows();
+        //int global_min = 0;
+        //int global_max = 0;
+        //MPI_Allreduce(&local_size, &global_min, 1, MPI_INT, MPI_MIN, comm);
+        //MPI_Allreduce(&local_size, &global_max, 1, MPI_INT, MPI_MAX, comm);
+
+        bool verbose = false;
+        if (A_i.GetMyId() == 0 && verbose)
+        {
+            //std::cout << "Global Vertices :" << A_i.GlobalRows() << "\n";
+            //std::cout << "Global Max Procs: " << global_max << " ";
+            //std::cout << "Global Min Procs: " << global_min << " ";
+            std::cout << "PT_i: " << PT_i.GlobalRows() << " ";
+            std::cout << "Coarsening Factor: " << PT_i.GlobalRows() / (double) PT_i.GlobalCols() << " ";
+
+            if (check_Q)
+            {
+                std::cout << "Q_i: " << Q << " ";
+            }
+
+            std::cout << "\n";
+        }
+    } // while
+
+    return PT;
+    //return RemoveLargeEntries(PT, 0.5);  // Somehow explicit zeros are introduced -.-
+}
+
+double CalcQ(const linalgcpp::SparseMatrix<double>& A, const linalgcpp::SparseMatrix<double>& P)
+{
+    assert(A.Cols() == P.Rows());
+    assert(CheckSymmetric(A));
+
+    double T = 0.0;
+
+    int N = A.Rows();
+    int M = P.Cols();
+
+    std::vector<double> d(M, 0.0);
+    std::vector<double> out(M, 0.0);
+
+    const auto& indptr = A.GetIndptr();
+    const auto& indices = A.GetIndices();
+    const auto& data = A.GetData();
+
+    const auto& agg = P.GetIndices();
+
+    for (int i = 0; i < N; ++i)
+    {
+        for (int j = indptr[i]; j < indptr[i + 1]; ++j)
+        {
+            int col = indices[j];
+            double val = data[j];
+
+            int A = agg[i];
+            int B = agg[col];
+
+            if (A == B)
+            {
+                d[A] += val;
+            }
+            else
+            {
+                out[A] += val;
+            }
+
+            T += val;
+        }
+    }
+
+    double Q = 0.0;
+
+    for (int i = 0; i < M; ++i)
+    {
+        double alpha_i = (d[i] + out[i]) / T;
+
+        Q += d[i] / T - alpha_i * alpha_i;
+    }
+
+    return Q;
+}
+
+double CalcQ(const ParMatrix& A, const ParMatrix& PT)
+{
+    ParMatrix P = PT.Transpose();
+    auto I_A = UnDistributer(A);
+    auto I_A_T = I_A.Transpose();
+    auto I_P = UnDistributer(PT.Mult(P));
+
+    auto A_0 = I_A_T.Mult(A).Mult(I_A).GetDiag();
+    auto P_0 = I_A_T.Mult(P).Mult(I_P).GetDiag();
+
+    double Q;
+    if (A.GetMyId() == 0)
+    {
+        Q = CalcQ(A_0, P_0);
+    }
+
+    MPI_Bcast(&Q, 1, MPI_DOUBLE, 0, A.GetComm());
+
+    return Q;
+}
+
+void SortColumnMap(std::vector<int>& col_map, std::vector<int>& indices)
+{
+    using linalgcpp::operator<<;
+
+    const int col_map_size = col_map.size();
+
+    std::vector<int> permutation(col_map_size);
+    std::iota(std::begin(permutation), std::end(permutation), 0);
+
+    auto compare = [&](int i, int j)
+    {
+        return col_map[i] < col_map[j];
+    };
+
+    std::sort(std::begin(permutation), std::end(permutation), compare);
+
+    std::vector<int> col_index(col_map_size);
+
+    for (int i = 0; i < col_map_size; ++i)
+    {
+        col_index[i] = indices[permutation[i]];
+    }
+    std::swap(indices, col_index);
+
+    for (int i = 0; i < col_map_size; ++i)
+    {
+        col_index[i] = col_map[permutation[i]];
+    }
+
+    std::swap(col_map, col_index);
+}
+
+ParMatrix UnDistributer(const ParMatrix& A)
+{
+    MPI_Comm comm = A.GetComm();
+    int myid = A.GetMyId();
+
+    int diag_cols = (myid == 0) ? A.GlobalCols() : 0;
+    int diag_rows = A.Rows();
+
+    auto starts = linalgcpp::GenerateOffsets(comm, diag_cols);
+
+    linalgcpp::SparseMatrix<double> diag;
+    linalgcpp::SparseMatrix<double> offd;
+    std::vector<int> col_map;
+
+    if (myid == 0)
+    {
+        diag = SparseIdentity(diag_rows, diag_cols);
+        offd = linalgcpp::SparseMatrix<double>(diag_rows, 0);
+    }
+    else
+    {
+        int offd_start = A.GetRowStarts()[0];
+        int offd_cols = A.Rows();
+
+        diag = linalgcpp::SparseMatrix<double>(diag_rows, 0);
+        offd = SparseIdentity(diag_rows);
+
+        col_map.resize(offd_cols);
+        std::iota(std::begin(col_map), std::end(col_map), offd_start);
+    }
+
+    return ParMatrix(comm, A.GetRowStarts(), starts, diag, offd, col_map);
+}
+
+double ComputeT(const ParMatrix& A)
+{
+    double diag_T = ComputeT(A.GetDiag());
+    double offd_T = ComputeT(A.GetOffd());
+
+    double local_total = diag_T + offd_T;
+    double global_total = 0.0;
+
+    MPI_Allreduce(&local_total, &global_total, 1, MPI_DOUBLE, MPI_SUM, A.GetComm());
+
+    return global_total;
+}
+
+void ParNormalizeRows(ParMatrix& PT)
+{
+    auto& diag_indptr = PT.GetDiag().GetIndptr();
+    auto& diag_data = PT.GetDiag().GetData();
+    auto& offd_indptr = PT.GetOffd().GetIndptr();
+    auto& offd_data = PT.GetOffd().GetData();
+
+    int num_rows = PT.Rows();
+
+    for (int i = 0; i < num_rows; ++i)
+    {
+        double val = 1.0 / std::sqrt(PT.RowSize(i));
+
+        for (int j = diag_indptr[i]; j < diag_indptr[i + 1]; ++j)
+        {
+            diag_data[j] = val;
+        }
+
+        for (int j = offd_indptr[i]; j < offd_indptr[i + 1]; ++j)
+        {
+            offd_data[j] = val;
+        }
+    }
+}
+
+} // namespace linalgcpp
diff --git a/modules/graphtools/src/graph_topology.cpp b/modules/graphtools/src/graph_topology.cpp
new file mode 100644
index 0000000..c89f61d
--- /dev/null
+++ b/modules/graphtools/src/graph_topology.cpp
@@ -0,0 +1,26 @@
+/*BHEADER**********************************************************************
+ *
+ * Copyright (c) 2018, Lawrence Livermore National Security, LLC.
+ * Produced at the Lawrence Livermore National Laboratory.
+ * LLNL-CODE-745247. All Rights reserved. See file COPYRIGHT for details.
+ *
+ * This file is part of smoothG. For more information and source code
+ * availability, see https://www.github.com/llnl/smoothG.
+ *
+ * smoothG is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License (as published by the Free
+ * Software Foundation) version 2.1 dated February 1999.
+ *
+ ***********************************************************************EHEADER*/
+
+/** @file
+
+    @brief GraphTopology class
+*/
+
+#include "graph_topology.hpp"
+
+namespace linalgcpp
+{
+
+} // namespace linalgcpp
diff --git a/modules/graphtools/src/graph_utilities.cpp b/modules/graphtools/src/graph_utilities.cpp
new file mode 100644
index 0000000..8930a4f
--- /dev/null
+++ b/modules/graphtools/src/graph_utilities.cpp
@@ -0,0 +1,303 @@
+#include "graph_utilities.hpp"
+
+namespace linalgcpp
+{
+
+ParMatrix RemoveLargeEntries(const ParMatrix& mat, double tol)
+{
+    int num_rows = mat.Rows();
+
+    const auto& diag_ext = mat.GetDiag();
+    const auto& offd_ext = mat.GetOffd();
+    const auto& colmap_ext = mat.GetColMap();
+
+    const auto& offd_indptr = offd_ext.GetIndptr();
+    const auto& offd_indices = offd_ext.GetIndices();
+    const auto& offd_data = offd_ext.GetData();
+    const int num_offd = offd_ext.Cols();
+
+    std::vector<int> indptr(num_rows + 1);
+    std::vector<int> offd_marker(num_offd, -1);
+
+    int offd_nnz = 0;
+
+    for (int i = 0; i < num_rows; ++i)
+    {
+        indptr[i] = offd_nnz;
+
+        for (int j = offd_indptr[i]; j < offd_indptr[i + 1]; ++j)
+        {
+            if (std::fabs(offd_data[j]) > tol)
+            {
+                offd_marker[offd_indices[j]] = 1;
+                offd_nnz++;
+            }
+        }
+    }
+
+    indptr[num_rows] = offd_nnz;
+
+    int offd_num_cols = std::count_if(std::begin(offd_marker), std::end(offd_marker),
+    [](int i) { return i > 0; });
+
+    std::vector<HYPRE_Int> col_map(offd_num_cols);
+    int count = 0;
+
+    for (int i = 0; i < num_offd; ++i)
+    {
+        if (offd_marker[i] > 0)
+        {
+            offd_marker[i] = count;
+            col_map[count] = colmap_ext[i];
+
+            count++;
+        }
+    }
+
+    assert(count == offd_num_cols);
+
+    std::vector<int> indices(offd_nnz);
+    std::vector<double> data(offd_nnz);
+
+    count = 0;
+
+    for (int i = 0; i < num_rows; i++)
+    {
+        for (int j = offd_indptr[i]; j < offd_indptr[i + 1]; ++j)
+        {
+            if (std::fabs(offd_data[j]) > tol)
+            {
+                indices[count] = offd_marker[offd_indices[j]];
+                data[count] = offd_data[j];
+                count++;
+            }
+        }
+    }
+
+    assert(count == offd_nnz);
+
+    SparseMatrix<double> diag = RemoveLargeEntries(diag_ext, tol);
+    SparseMatrix<double> offd(std::move(indptr), std::move(indices), std::move(data),
+                      num_rows, offd_num_cols);
+
+    return ParMatrix(mat.GetComm(), mat.GetRowStarts(), mat.GetColStarts(),
+                     std::move(diag), std::move(offd), std::move(col_map));
+}
+
+ParMatrix MakeEntityTrueEntity(const ParMatrix& entity_entity)
+{
+    const auto& offd = entity_entity.GetOffd();
+
+    const auto& offd_indptr = offd.GetIndptr();
+    const auto& offd_indices = offd.GetIndices();
+    const auto& offd_colmap = entity_entity.GetColMap();
+
+    HYPRE_Int last_row = entity_entity.GetColStarts()[1];
+
+    int num_entities = entity_entity.Rows();
+    std::vector<int> select_indptr(num_entities + 1);
+
+    int num_true_entities = 0;
+
+    for (int i = 0; i < num_entities; ++i)
+    {
+        select_indptr[i] = num_true_entities;
+
+        int row_size = offd.RowSize(i);
+
+        if (row_size == 0)
+        {
+            num_true_entities++;
+        }
+        else
+        {
+            bool owner = true;
+
+            int start = offd_indptr[i];
+            int end = offd_indptr[i + 1];
+
+            for (int j = start; j < end; ++j)
+            {
+                if (offd_colmap[offd_indices[j]] < last_row)
+                {
+                    owner = false;
+                    break;
+                }
+            }
+
+            if (owner)
+            {
+                num_true_entities++;
+            }
+        }
+    }
+
+    select_indptr[num_entities] = num_true_entities;
+
+    std::vector<int> select_indices(num_true_entities);
+    std::iota(std::begin(select_indices), std::end(select_indices), 0);
+
+    std::vector<double> select_data(num_true_entities, 1.0);
+
+    SparseMatrix<double> select(std::move(select_indptr), std::move(select_indices), std::move(select_data),
+                        num_entities, num_true_entities);
+
+    MPI_Comm comm = entity_entity.GetComm();
+    auto true_starts = linalgcpp::GenerateOffsets(comm, num_true_entities);
+
+    ParMatrix select_d(comm, entity_entity.GetRowStarts(), true_starts, std::move(select));
+
+    return entity_entity.Mult(select_d);
+}
+
+
+ParMatrix Mult(const ParMatrix& R, const ParMatrix& A, const ParMatrix& P)
+{
+    return R.Mult(A).Mult(P);
+}
+
+ParMatrix MakeExtPermutation(const ParMatrix& parmat)
+{
+    MPI_Comm comm = parmat.GetComm();
+
+    const auto& diag = parmat.GetDiag();
+    const auto& offd = parmat.GetOffd();
+    const auto& colmap = parmat.GetColMap();
+
+    int num_diag = diag.Cols();
+    int num_offd = offd.Cols();
+    int num_ext = num_diag + num_offd;
+
+    const auto& mat_starts = parmat.GetColStarts();
+    auto ext_starts = linalgcpp::GenerateOffsets(comm, num_ext);
+
+    SparseMatrix<double> perm_diag = SparseIdentity(num_ext, num_diag);
+    SparseMatrix<double> perm_offd = SparseIdentity(num_ext, num_offd, num_diag);
+
+    return ParMatrix(comm, ext_starts, mat_starts,
+                     std::move(perm_diag), std::move(perm_offd),
+                     colmap);
+}
+
+bool CheckSymmetric(const linalgcpp::Operator& A, bool verbose)
+{
+    const double tol = 1e-8;
+
+    Vector<double> v(A.Rows());
+    Vector<double> u(A.Rows());
+
+    Randomize(u, -1.0, 1.0);
+    Randomize(v, -1.0, 1.0);
+
+    SubAvg(u);
+    SubAvg(v);
+
+    auto vAu = v.Mult(A.Mult(u));
+    auto uAv = u.Mult(A.Mult(v));
+    auto diff = std::fabs((vAu - uAv) / vAu);
+
+    if (verbose && diff >= tol)
+    {
+        std::cout << "vAu: " << vAu << "\n";
+        std::cout << "uAv: " << uAv << "\n";
+        std::cout << "|vAu - uAv| / |vAu|: " << diff << "\n";
+    }
+
+    return diff < tol;
+}
+
+bool CheckSymmetric(const linalgcpp::ParOperator& A, bool verbose)
+{
+    const double tol = 1e-8;
+
+    Vector<double> v(A.Rows());
+    Vector<double> u(A.Rows());
+
+    auto comm = A.GetComm();
+    auto myid = A.GetMyId();
+
+    Randomize(u, -1.0, 1.0, myid + 1);
+    Randomize(v, -1.0, 1.0, myid + 1);
+
+    auto vAu = ParMult(comm, v, A.Mult(u));
+    auto uAv = ParMult(comm, u, A.Mult(v));
+    auto diff = std::fabs((vAu - uAv) / vAu);
+
+    if (verbose && diff >= tol)
+    {
+        ParPrint(myid, std::cout << "vAu: " << vAu << "\n");
+        ParPrint(myid, std::cout << "uAv: " << uAv << "\n");
+        ParPrint(myid, std::cout << "|vAu - uAv| / |vAu|: " << diff << "\n");
+    }
+
+    return diff < tol;
+}
+
+ParMatrix RemoveLowDegree(const ParMatrix& agg_dof, const ParMatrix& dof_degree)
+{
+    int num_dofs = dof_degree.Rows();
+    int num_aggs = agg_dof.Rows();
+
+    std::vector<int> row_sizes(num_dofs);
+
+    for (int i = 0; i < num_dofs; ++i)
+    {
+        row_sizes[i] = dof_degree.RowSize(i);
+    }
+
+    auto comm_pkg = agg_dof.MakeCommPkg();
+    auto offproc_rowsizes = Broadcast(comm_pkg, row_sizes);
+
+    CooMatrix<double> coo_diag;
+    CooMatrix<double> coo_offd;
+
+    const auto& diag_indptr = agg_dof.GetDiag().GetIndptr();
+    const auto& diag_indices = agg_dof.GetDiag().GetIndices();
+    const auto& diag_data = agg_dof.GetDiag().GetData();
+    const auto& offd_indptr = agg_dof.GetOffd().GetIndptr();
+    const auto& offd_indices = agg_dof.GetOffd().GetIndices();
+    const auto& offd_data = agg_dof.GetOffd().GetData();
+    const auto& colmap = agg_dof.GetColMap();
+
+    double tol = 1e-8;
+
+    for (int i = 0; i < num_aggs; ++i)
+    {
+        for (int j = diag_indptr[i]; j < diag_indptr[i + 1]; ++j)
+        {
+            int dof = diag_indices[j];
+            double val = diag_data[j];
+
+            // Warning(@gelever): Disregards edges of degree one no matter what!
+            if (std::fabs(val) >= 1.5 && std::fabs(val - row_sizes[dof]) <= tol)
+            {
+                coo_diag.Add(i, dof, val);
+            }
+        }
+
+        for (int j = offd_indptr[i]; j < offd_indptr[i + 1]; ++j)
+        {
+            int dof = offd_indices[j];
+            double val = offd_data[j];
+
+            // Warning(@gelever): Disregards edges of degree one no matter what!
+            if (std::fabs(val) >= 1.5 && std::fabs(val - offproc_rowsizes[dof]) <= tol)
+            {
+                coo_offd.Add(i, dof, val);
+            }
+        }
+    }
+
+    coo_diag.SetSize(agg_dof.GetDiag().Rows(), agg_dof.GetDiag().Cols());
+    coo_offd.SetSize(agg_dof.GetOffd().Rows(), agg_dof.GetOffd().Cols());
+
+    auto diag = coo_diag.ToSparse();
+    auto offd = coo_offd.ToSparse();
+
+    return ParMatrix(agg_dof.GetComm(), agg_dof.GetRowStarts(), agg_dof.GetColStarts(),
+                     std::move(diag), std::move(offd), colmap);
+}
+
+
+
+} // namespace linalgcpp
diff --git a/modules/graphtools/tests/CMakeLists.txt b/modules/graphtools/tests/CMakeLists.txt
new file mode 100644
index 0000000..3c2e2bf
--- /dev/null
+++ b/modules/graphtools/tests/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(test_mis test_mis.cpp)
+target_link_libraries(test_mis graphtools)
+
+add_executable(test_parpart test_parpart.cpp)
+target_link_libraries(test_parpart graphtools)
+
+add_executable(smooth_parpart smooth_parpart.cpp)
+target_link_libraries(smooth_parpart graphtools)
+
+add_test(test_mis test_mis)
+add_test(test_parpart test_parpart)
diff --git a/modules/graphtools/tests/smooth_parpart.cpp b/modules/graphtools/tests/smooth_parpart.cpp
new file mode 100644
index 0000000..f896bf4
--- /dev/null
+++ b/modules/graphtools/tests/smooth_parpart.cpp
@@ -0,0 +1,137 @@
+#include "graphtools.hpp"
+#include "test_input_graphs.hpp"
+
+using namespace linalgcpp;
+
+ParMatrix AggReDistributer(const ParMatrix& agg_vertex);
+Vector<double> SmoothVector(const ParMatrix& A, int max_iter);
+void CheckPartition(const ParMatrix& agg_vertex, const std::string& label = "Partition");
+void VerifyRowSizeOne(const ParMatrix& A);
+
+int main(int argc, char** argv)
+{
+    MpiSession mpi(argc, argv);
+
+    auto vertex_vertex = ReadMTX("sts4098.mtx");
+    linalgcpp_verify(CheckSymmetric(vertex_vertex), "Input not Symmetric!");
+
+    auto proc_part = Partition(vertex_vertex, mpi.num_procs, 1.0, false);
+    ParMatrix A = linalgcpp::ParSplit(mpi.comm, vertex_vertex, proc_part);
+
+    auto smooth_vector = SmoothVector(A, 10);
+
+    ParMatrix W(mpi.comm, SparseMatrix<double>(smooth_vector.data()));
+
+    A = W.Mult(A).Mult(W);
+
+    ParMatrix PT = ParPartition(A, 2);
+    ParMatrix P = PT.Transpose();
+
+    ParMatrix redist = AggReDistributer(PT);
+    ParMatrix redist_T = redist.Transpose();
+    ParMatrix PT_r = PT.Mult(redist_T);
+
+    CheckPartition(PT, "PT");
+    CheckPartition(PT_r, "PT_r");
+    CheckPartition(redist_T, "redist_T");
+    CheckPartition(redist, "redist");
+}
+
+ParMatrix AggReDistributer(const ParMatrix& agg_vertex)
+{
+    linalgcpp::linalgcpp_verify(agg_vertex.nnz() == agg_vertex.GlobalCols(),
+            "Agg_Vertex is not a proper pattern!");
+
+    MPI_Comm comm = agg_vertex.GetComm();
+
+    int diag_cols = agg_vertex.GetDiag().Cols();
+    int offd_cols = agg_vertex.GetOffd().Cols();
+
+    int diag_rows = agg_vertex.GetDiag().nnz();
+    int offd_rows = agg_vertex.GetOffd().nnz();
+    int total_rows = diag_rows + offd_rows;
+
+    auto& vertices = agg_vertex.GetDiag().GetIndices();
+    int num_vertices = vertices.size();
+
+    auto diag = SparseIdentity(total_rows, diag_cols, 0, diag_cols - num_vertices);
+    auto offd = SparseIdentity(total_rows, offd_cols, diag_rows);
+    std::vector<int> col_map = agg_vertex.GetColMap();
+
+    auto& indices = diag.GetIndices();
+    std::copy(std::begin(vertices), std::end(vertices), std::begin(indices));
+
+    auto starts = linalgcpp::GenerateOffsets(comm, total_rows);
+
+    return ParMatrix(comm, std::move(starts), agg_vertex.GetColStarts(),
+                     std::move(diag), std::move(offd), std::move(col_map));
+}
+
+Vector<double> SmoothVector(const ParMatrix& A, int max_iter)
+{
+    int num_rows = A.Rows();
+
+    if (max_iter <= 0)
+    {
+        Vector<double> w(num_rows, 1.0);
+        w /= A.ParNorm(w);
+
+        return w;
+    }
+
+    Vector<double> w(num_rows);
+    Vector<double> zero(num_rows, 0);
+
+    int seed = A.GetMyId() + 1;
+    Randomize(w, -1.0, 1.0, seed);
+
+    ParSmoother smoother(A, linalgcpp::SmoothType::L1_GS, max_iter);
+    smoother.Mult(zero, w);
+
+    VerifyFinite(w);
+
+    Vector<double> Aw(num_rows, 0.0);
+    A.Mult(w, Aw);
+    double w_norm = ParMult(A.GetComm(), w, Aw);
+
+    if (std::fabs(w_norm) < 1e-12)
+    {
+        throw std::runtime_error("initial W is sufficiently smooth!" + std::to_string(w_norm));
+    }
+
+    w /= std::sqrt(w_norm);
+
+    VerifyFinite(w);
+
+    assert(std::fabs(1.0 - A.ParNorm(w)) < 1e-9);
+
+    return w;
+}
+
+void VerifyRowSizeOne(const ParMatrix& A)
+{
+    int num_rows = A.Rows();
+    int is_bad = 0;
+
+    for (int i = 0; i < num_rows; ++i)
+    {
+        int diag_row_size = A.GetDiag().RowSize(i);
+        int offd_row_size = A.GetOffd().RowSize(i);
+        int row_size = diag_row_size + offd_row_size;
+
+        is_bad += (row_size != 1);
+    }
+
+    linalgcpp_parverify(A.GetComm(), is_bad == 0,
+            std::to_string(A.GetMyId()) + " Row Size Not One! : " + std::to_string(is_bad) );
+}
+
+void CheckPartition(const ParMatrix& agg_vertex, const std::string& label)
+{
+    ParMatrix vertex_agg = RemoveLargeEntries(agg_vertex.Transpose());
+
+    ParPrint(agg_vertex.GetMyId(), printf("Checking %s\t(%d %d, %d)\t",
+                label.c_str(), agg_vertex.GlobalRows(), agg_vertex.GlobalCols(), agg_vertex.nnz() ));
+    VerifyRowSizeOne(vertex_agg);
+    ParPrint(agg_vertex.GetMyId(), printf("Good Partition\n"));
+}
diff --git a/modules/graphtools/tests/test_input_graphs.hpp b/modules/graphtools/tests/test_input_graphs.hpp
new file mode 100644
index 0000000..bcaa43f
--- /dev/null
+++ b/modules/graphtools/tests/test_input_graphs.hpp
@@ -0,0 +1,98 @@
+#pragma once
+
+
+template <typename T>
+struct InputGraph
+{
+    linalgcpp::SparseMatrix<T> vertex_edge;
+    std::vector<int> partition;
+    int max_procs;
+};
+
+template <typename T>
+void AddEdge(linalgcpp::CooMatrix<T>& coo, const std::vector<int>& vertices)
+{
+    int edge_num = std::get<0>(coo.FindSize());
+
+    for (auto&& vertex : vertices)
+    {
+        coo.Add(edge_num, vertex, (T)1.0);
+    }
+}
+
+// 
+// *--*\ /*--*
+// |  | * |  |
+// *--*/ \*--*
+// 
+// 
+template <typename T>
+InputGraph<T> TestGraph0()
+{
+    linalgcpp::CooMatrix<T> edge_vertex;
+
+    AddEdge(edge_vertex, {0, 1});
+    AddEdge(edge_vertex, {0, 2});
+    AddEdge(edge_vertex, {1, 3});
+    AddEdge(edge_vertex, {1, 4});
+    AddEdge(edge_vertex, {2, 3});
+    AddEdge(edge_vertex, {3, 4});
+    AddEdge(edge_vertex, {4, 5});
+    AddEdge(edge_vertex, {4, 7});
+    AddEdge(edge_vertex, {5, 6});
+    AddEdge(edge_vertex, {5, 7});
+    AddEdge(edge_vertex, {6, 8});
+    AddEdge(edge_vertex, {7, 8});
+
+    std::vector<int> part{0, 0, 0, 0, 0,
+                          1, 1, 1, 1};
+
+    int max_procs = 2;
+
+    return {edge_vertex.ToSparse().Transpose(), part, max_procs};
+}
+
+//
+//
+//  *      *
+//  |\    /|
+//  *-*--*-*
+//     \/  // 3 vertex edge in middle
+//     *
+//    / \
+//   *---*
+//
+template <typename T>
+InputGraph<T> TestGraph1()
+{
+    linalgcpp::CooMatrix<T> edge_vertex;
+
+    AddEdge(edge_vertex, {0, 1});
+    AddEdge(edge_vertex, {0, 2});
+    AddEdge(edge_vertex, {1, 2});
+    AddEdge(edge_vertex, {3, 4});
+    AddEdge(edge_vertex, {3, 5});
+    AddEdge(edge_vertex, {4, 5});
+    AddEdge(edge_vertex, {6, 7});
+    AddEdge(edge_vertex, {6, 8});
+    AddEdge(edge_vertex, {7, 8});
+    AddEdge(edge_vertex, {2, 3, 6});
+
+    std::vector<int> part{0, 0, 0,
+                          1, 1, 1,
+                          2, 2, 2};
+    int max_procs = 3;
+
+    return {edge_vertex.ToSparse().Transpose(), part, max_procs};
+}
+
+template <typename T>
+std::vector<InputGraph<T>> GetAllGraphs()
+{
+    return
+    {
+        TestGraph0<T>(),
+        TestGraph1<T>()
+    };
+}
+
diff --git a/modules/graphtools/tests/test_mis.cpp b/modules/graphtools/tests/test_mis.cpp
new file mode 100644
index 0000000..6ad0bf0
--- /dev/null
+++ b/modules/graphtools/tests/test_mis.cpp
@@ -0,0 +1,56 @@
+#include "graphtools.hpp"
+#include "test_input_graphs.hpp"
+
+using namespace linalgcpp;
+
+template <typename T>
+void CreateGraph(const MpiSession& mpi, const InputGraph<T>& graph_info)
+{
+    const auto& vertex_edge = graph_info.vertex_edge;
+    const auto& part = graph_info.partition;
+    const auto& max_procs = graph_info.max_procs;
+
+    if (mpi.num_procs > max_procs)
+    {
+        return;
+    }
+
+    Graph<T> graph(mpi.comm, vertex_edge, part);
+    GraphTopology<T> gt(graph);
+
+    for (int i = 0; i < mpi.num_procs; ++i)
+    {
+        if (mpi.myid == i)
+        {
+            std::cout << "Processor: " << i << "\n";
+            std::cout << "Type: " << typeid(T).name() << "\n";
+            graph.vertex_edge_local_.ToDense().Print("VE:", std::cout, 4, 0);
+            gt.face_agg_local_.ToDense().Print("face_agg :", std::cout, 4, 0);
+            gt.face_edge_.GetDiag().ToDense().Print("face_edge diag:", std::cout, 4, 0);
+            gt.face_edge_.GetOffd().ToDense().Print("face_edge offd:", std::cout, 4, 0);
+            gt.face_true_face_.GetDiag().ToDense().Print("face_true_face_ diag:", std::cout, 4, 0);
+            gt.face_true_face_.GetOffd().ToDense().Print("face_true_face_ offd:", std::cout, 4, 0);
+            std::cout.flush();
+        }
+
+        MPI_Barrier(mpi.comm);
+    }
+}
+
+template <typename T>
+void TestAllGraphs(const MpiSession& mpi)
+{
+    for (const auto& graph_info_i : GetAllGraphs<T>())
+    {
+        CreateGraph(mpi, graph_info_i);
+    }
+}
+
+int main(int argc, char** argv)
+{
+    MpiSession mpi(argc, argv);
+
+    // Test both types of relationships
+    TestAllGraphs<int>(mpi);
+    TestAllGraphs<double>(mpi);
+}
diff --git a/modules/graphtools/tests/test_parpart.cpp b/modules/graphtools/tests/test_parpart.cpp
new file mode 100644
index 0000000..b9ca9eb
--- /dev/null
+++ b/modules/graphtools/tests/test_parpart.cpp
@@ -0,0 +1,23 @@
+#include "graphtools.hpp"
+#include "test_input_graphs.hpp"
+
+using namespace linalgcpp;
+
+int main(int argc, char** argv)
+{
+    MpiSession mpi(argc, argv);
+
+    auto graph_input = TestGraph0<double>();
+
+    auto ve = graph_input.vertex_edge;
+    auto ev = ve.Transpose();
+    auto vv = ve.Mult(ev);
+
+    vv.ToDense().Print("vv:", std::cout, 4, 0);
+
+    ParMatrix A(mpi.comm, vv);
+    A.GetDiag().ToDense().Print("A:", std::cout, 4, 0);
+
+    ParMatrix PT = ParPartition(A, 2);
+    PT.GetDiag().ToDense().Print("P:", std::cout, 4, 0);
+}
diff --git a/modules/parlinalgcpp/include/parmatrix.hpp b/modules/parlinalgcpp/include/parmatrix.hpp
index 4dedb46..51d61ab 100644
--- a/modules/parlinalgcpp/include/parmatrix.hpp
+++ b/modules/parlinalgcpp/include/parmatrix.hpp
@@ -289,7 +289,7 @@ class ParMatrix: public ParOperator
              diagonal and off diagonal blocks.
             @param row row to to get size of
         */
-        int RowSize(int row);
+        int RowSize(int row) const;
 
         /*! @brief Remove entries less than tolerance
             @param tol tolerance
diff --git a/modules/parlinalgcpp/include/paroperator.hpp b/modules/parlinalgcpp/include/paroperator.hpp
index c069285..988feff 100644
--- a/modules/parlinalgcpp/include/paroperator.hpp
+++ b/modules/parlinalgcpp/include/paroperator.hpp
@@ -13,6 +13,7 @@
 namespace linalgcpp
 {
 
+double ParMult(MPI_Comm, const VectorView<double>& x, const VectorView<double>& y);
 
 /*! @brief Base class for distributed operators.
 
@@ -90,9 +91,16 @@ class ParOperator: public linalgcpp::Operator
         /* @brief Access the MPI processor id */
         virtual int GetMyId() const;
 
+        /* @brief Access the number of MPI processors */
+        virtual int GetNumProcs() const;
+
+        /* @brief Compute the operator norm of the vector, x^T A x */
+        virtual double ParNorm(const VectorView<double>& x) const;
+
     protected:
         MPI_Comm comm_;
         int myid_;
+        int num_procs_;
 
         std::vector<HYPRE_Int> row_starts_;
         std::vector<HYPRE_Int> col_starts_;
diff --git a/modules/parlinalgcpp/include/parprecond.hpp b/modules/parlinalgcpp/include/parprecond.hpp
new file mode 100644
index 0000000..5addece
--- /dev/null
+++ b/modules/parlinalgcpp/include/parprecond.hpp
@@ -0,0 +1,47 @@
+/*! @file */
+
+#ifndef PARPRECOND_HPP__
+#define PARPRECOND_HPP__
+
+#include "parmatrix.hpp"
+
+namespace linalgcpp
+{
+
+class ParBlockDiagComp : public ParOperator
+{
+    public:
+        ParBlockDiagComp() = default;
+        ParBlockDiagComp(const ParMatrix& A, const ParMatrix& agg_vertex, int num_steps=1);
+
+        ~ParBlockDiagComp() = default;
+
+        using Operator::Mult;
+
+        /*! @brief Apply the action of the solver
+            @param input input vector x
+            @param output output vector y
+        */
+        void Mult(const linalgcpp::VectorView<double>& input,
+                 linalgcpp::VectorView<double> output) const;
+
+        double Anorm(const linalgcpp::VectorView<double>& x) const;
+
+    private:
+        ParMatrix MakeRedistributer(const ParMatrix& agg_vertex);
+
+        ParMatrix A_r_;
+        ParMatrix redist_;
+
+        BlockDiagOperator block_op_;
+        std::vector<std::unique_ptr<Operator>> solvers_;
+
+        int num_steps_;
+
+        mutable Vector<double> x_;
+        mutable Vector<double> b_;
+};
+
+} //namespace linalgcpp
+
+#endif // PARPRECOND_HPP
diff --git a/modules/parlinalgcpp/include/parutilities.hpp b/modules/parlinalgcpp/include/parutilities.hpp
index a6b39a0..27bb62f 100644
--- a/modules/parlinalgcpp/include/parutilities.hpp
+++ b/modules/parlinalgcpp/include/parutilities.hpp
@@ -144,6 +144,11 @@ struct MpiSession
         MPI_Comm_rank(comm, &myid);
     }
 
+    /// Do not allow initializing mpi multiple times
+    MpiSession(const MpiSession& other) = delete;
+    MpiSession(MpiSession&& other) = delete;
+    MpiSession& operator=(const MpiSession& other) = delete;
+
     /** @brief Destructor */
     ~MpiSession() { MPI_Finalize(); }
 
diff --git a/modules/parlinalgcpp/src/parmatrix.cpp b/modules/parlinalgcpp/src/parmatrix.cpp
index c8ab11d..09ad975 100644
--- a/modules/parlinalgcpp/src/parmatrix.cpp
+++ b/modules/parlinalgcpp/src/parmatrix.cpp
@@ -325,13 +325,23 @@ ParMatrix ParMatrix::operator-() const
 
 void ParMatrix::Print(const std::string& label, std::ostream& out) const
 {
-    out << label << "\n";
+    int num_procs = GetNumProcs();
 
-    diag_.Print("Diag:", out);
-    offd_.Print("Offd:", out);
+    for (int i = 0; i < num_procs; ++i)
+    {
+        if (GetMyId() == i)
+        {
+            out << label << "\n";
+
+            diag_.Print("Diag:", out);
+            offd_.Print("Offd:", out);
 
-    using linalgcpp::operator<<;
-    out << "ColMap:" << col_map_;
+            using linalgcpp::operator<<;
+            out << "ColMap:" << col_map_;
+            out.flush();
+        }
+        MPI_Barrier(GetComm());
+    }
 }
 
 void ParMatrix::AddDiag(double diag_val)
@@ -397,7 +407,7 @@ void ParMatrix::EliminateRow(int index)
     offd_.EliminateRow(index);
 }
 
-int ParMatrix::RowSize(int row)
+int ParMatrix::RowSize(int row) const
 {
     return diag_.RowSize(row) + offd_.RowSize(row);
 }
diff --git a/modules/parlinalgcpp/src/paroperator.cpp b/modules/parlinalgcpp/src/paroperator.cpp
index 1cc8195..ac5bac7 100644
--- a/modules/parlinalgcpp/src/paroperator.cpp
+++ b/modules/parlinalgcpp/src/paroperator.cpp
@@ -6,7 +6,7 @@ namespace linalgcpp
 {
 
 ParOperator::ParOperator()
-    : comm_(0), myid_(-1), x_(nullptr), b_(nullptr)
+    : comm_(0), myid_(-1), num_procs_(0), x_(nullptr), b_(nullptr)
 {
 
 }
@@ -14,6 +14,7 @@ ParOperator::ParOperator()
 ParOperator::ParOperator(MPI_Comm comm)
     : comm_(comm), x_(nullptr), b_(nullptr)
 {
+    MPI_Comm_size(comm_, &num_procs_);
     MPI_Comm_rank(comm_, &myid_);
 }
 
@@ -36,8 +37,9 @@ ParOperator::ParOperator(MPI_Comm comm, std::vector<HYPRE_Int> row_starts,
 
 ParOperator::ParOperator(const ParOperator& other)
     : linalgcpp::Operator(other), comm_(other.comm_),
-      myid_(other.myid_), row_starts_(other.row_starts_),
-      col_starts_(other.col_starts_), x_(nullptr), b_(nullptr)
+      myid_(other.myid_), num_procs_(other.num_procs_),
+      row_starts_(other.row_starts_), col_starts_(other.col_starts_),
+      x_(nullptr), b_(nullptr)
 {
     if (other.comm_ > 0 && myid_ >= 0)
     {
@@ -47,6 +49,7 @@ ParOperator::ParOperator(const ParOperator& other)
 
 void ParOperator::HypreCreate()
 {
+    MPI_Comm_size(comm_, &num_procs_);
     MPI_Comm_rank(comm_, &myid_);
 
     InitVector(x_, row_starts_);
@@ -92,6 +95,7 @@ void swap(ParOperator& lhs, ParOperator& rhs) noexcept
          static_cast<linalgcpp::Operator&>(rhs));
     std::swap(lhs.comm_, rhs.comm_);
     std::swap(lhs.myid_, rhs.myid_);
+    std::swap(lhs.num_procs_, rhs.num_procs_);
     std::swap(lhs.row_starts_, rhs.row_starts_);
     std::swap(lhs.col_starts_, rhs.col_starts_);
     std::swap(lhs.x_, rhs.x_);
@@ -151,5 +155,22 @@ int ParOperator::GetMyId() const
     return myid_;
 }
 
+int ParOperator::GetNumProcs() const
+{
+    return num_procs_;
+}
+
+double ParOperator::ParNorm(const VectorView<double>& x) const
+{
+    MPI_Comm comm = GetComm();
+
+    Vector<double> Ax(x.size());
+    this->Mult(x, Ax);
+
+    double xAx = linalgcpp::ParMult(comm, x, Ax);
+
+    return std::sqrt(xAx);
+}
+
 } // namespace linalgcpp
 
diff --git a/modules/parlinalgcpp/src/parprecond.cpp b/modules/parlinalgcpp/src/parprecond.cpp
new file mode 100644
index 0000000..e39b08c
--- /dev/null
+++ b/modules/parlinalgcpp/src/parprecond.cpp
@@ -0,0 +1,151 @@
+/*! @file */
+
+#include "parprecond.hpp"
+
+namespace linalgcpp
+{
+
+ParBlockDiagComp::ParBlockDiagComp(const ParMatrix& A_in, const ParMatrix& agg_vertex, int num_steps)
+    : ParOperator(A_in), redist_(MakeRedistributer(agg_vertex)), num_steps_(num_steps),
+      x_(redist_.Rows()), b_(redist_.Rows())
+{
+    //linalgcpp_verify(CheckSymmetric(A), "A not Symmetric!");
+
+    ParMatrix redist_T = redist_.Transpose();
+    ParMatrix agg_vertex_r = agg_vertex.Mult(redist_T);
+    agg_vertex_r = 1.0;
+
+    ParMatrix vertex_agg_r = agg_vertex_r.Transpose();
+    A_r_ = redist_.Mult(A_in).Mult(redist_T);
+
+    linalgcpp_parverify(agg_vertex_r.GetComm(), agg_vertex_r.GetOffd().nnz() == 0,
+            "agg vertex not proper pattern!");
+
+    block_op_ = BlockDiagOperator(agg_vertex_r.GetDiag().GetIndptr());
+    solvers_.resize(agg_vertex_r.Rows());
+
+    std::vector<int> marker(redist_.Rows(), -1);
+    int num_aggs = agg_vertex_r.Rows();
+    int num_vertices = A_r_.Rows();
+
+    linalgcpp_verify(A_r_.Rows() == A_r_.Cols(), "A not square!");
+    linalgcpp_verify(A_r_.Cols() == agg_vertex_r.Cols(), "A does not match agg_vertex!!");
+
+    // Diagonal compensation
+    Vector<double> A_diag(A_r_.Rows(), 0.0);
+    const auto& diag_indptr = A_r_.GetDiag().GetIndptr();
+    const auto& diag_indices = A_r_.GetDiag().GetIndices();
+    const auto& diag_data = A_r_.GetDiag().GetData();
+
+    const auto& offd_indptr = A_r_.GetOffd().GetIndptr();
+    const auto& offd_indices = A_r_.GetOffd().GetIndices();
+    const auto& offd_data = A_r_.GetOffd().GetData();
+
+    const auto& col_map = A_r_.GetColMap();
+    const auto& part = vertex_agg_r.GetDiag().GetIndices();
+
+    const auto& A_ii = A_r_.GetDiag().GetDiag();
+
+    auto comm_pkg = A_r_.MakeCommPkg();
+    auto off_proc_diag = Broadcast(comm_pkg, A_r_.GetDiag().GetDiag());
+
+
+    for (int row = 0; row < num_vertices; ++row)
+    {
+        for (int j = diag_indptr[row]; j < diag_indptr[row + 1]; ++j)
+        {
+            int col = diag_indices[j];
+
+            if (col > row)
+            {
+                int agg_i = part[row];
+                int agg_j = part[col];
+
+                if (agg_i != agg_j)
+                {
+                    double tau = std::sqrt(A_ii[row]) / std::sqrt(A_ii[col]);
+                    A_diag[row] += std::fabs(diag_data[j]) * tau;
+                    A_diag[col] += std::fabs(diag_data[j]) / tau;
+                }
+            }
+        }
+
+        for (int j = offd_indptr[row]; j < offd_indptr[row + 1]; ++j)
+        {
+            double tau = std::sqrt(A_ii[row]) / std::sqrt(off_proc_diag[offd_indices[j]]);
+            A_diag[row] += std::fabs(offd_data[j]) * tau;
+        }
+    }
+
+    Vector comp;
+
+    for (int i = 0; i < num_aggs; ++i)
+    {
+        auto indices = agg_vertex_r.GetDiag().GetIndices(i);
+        auto A_agg = A_r_.GetDiag().GetSubMatrix(indices, indices, marker);
+        A_diag.GetSubVector(indices, comp);
+
+        A_agg.AddDiag(comp.data());
+
+        DenseMatrix A_dense(A_agg.Rows(), A_agg.Cols());
+        A_agg.ToDense(A_dense);
+        A_dense.Invert();
+
+        solvers_[i] = make_unique<DenseMatrix>(std::move(A_dense));
+        block_op_.SetBlock(i, i, solvers_[i]);
+    }
+}
+
+ParMatrix ParBlockDiagComp::MakeRedistributor(const ParMatrix& agg_vertex)
+{
+    linalgcpp::linalgcpp_parverify(agg_vertex.nnz() == agg_vertex.GlobalCols(),
+            "Agg_Vertex is not a proper pattern!");
+
+    MPI_Comm comm = agg_vertex.GetComm();
+
+    int diag_cols = agg_vertex.GetDiag().Cols();
+    int offd_cols = agg_vertex.GetOffd().Cols();
+
+    int diag_rows = agg_vertex.GetDiag().nnz();
+    int offd_rows = agg_vertex.GetOffd().nnz();
+    int total_rows = diag_rows + offd_rows;
+
+    auto& vertices = agg_vertex.GetDiag().GetIndices();
+    int num_vertices = vertices.size();
+
+    SparseMatrix diag = SparseIdentity(total_rows, diag_cols, 0, diag_cols - num_vertices);
+    SparseMatrix offd = SparseIdentity(total_rows, offd_cols, diag_rows);
+    std::vector<int> col_map = agg_vertex.GetColMap();
+
+    auto& indices = diag.GetIndices();
+    std::copy(std::begin(vertices), std::end(vertices), std::begin(indices));
+
+    auto starts = linalgcpp::GenerateOffsets(comm, total_rows);
+
+    return ParMatrix(comm, std::move(starts), agg_vertex.GetColStarts(),
+                     std::move(diag), std::move(offd), std::move(col_map));
+}
+
+void ParBlockDiagComp::Mult(const linalgcpp::VectorView<double>& input,
+                 linalgcpp::VectorView<double> output) const
+{
+    redist_.Mult(input, b_);
+    redist_.Mult(output, x_);
+
+    Vector Ax(A_r_.Rows(), 0.0);
+    Vector MAx(A_r_.Rows(), 0.0);
+
+    for (int i = 0; i < num_steps_; ++i)
+    {
+        A_r_.Mult(x_, Ax);
+        auto bAx = b_;
+        bAx -= Ax;
+
+        block_op_.Mult(bAx, MAx);
+        x_ += MAx;
+    }
+
+    redist_.MultAT(x_, output);
+}
+
+} // namespace linalgcpp
diff --git a/modules/sparsesolver/src/sparsesolve.cpp b/modules/sparsesolver/src/sparsesolve.cpp
index 64f4aa2..66f1bff 100644
--- a/modules/sparsesolver/src/sparsesolve.cpp
+++ b/modules/sparsesolver/src/sparsesolve.cpp
@@ -73,7 +73,7 @@ void SparseSolver::Init()
     int num_status = umfpack_di_numeric(indptr, indices, data,
             symbolic, &numeric_, control_.data(), info_.data());
 
-    assert(num_status == 0);
+    linalgcpp_assert(num_status == 0, "SparseSovle Numeric Error!");
 
     umfpack_di_free_symbolic(&symbolic);
 }
@@ -111,7 +111,7 @@ void SparseSolver::Mult(const linalgcpp::VectorView<double>& input,
 
     umfpack_di_report_info(control_.data(), info_.data());
 
-    assert(status == 0);
+    linalgcpp_assert(status == 0, "SparseSolve Solve Error!");
 }
 
 void SparseSolver::MultAT(const linalgcpp::VectorView<double>& input,
diff --git a/src/densematrix.cpp b/src/densematrix.cpp
index 947f6c8..9d971fa 100644
--- a/src/densematrix.cpp
+++ b/src/densematrix.cpp
@@ -566,6 +566,31 @@ void DenseMatrix::AddSubMatrix(const std::vector<int>& rows, std::vector<int>& c
     }
 }
 
+void DenseMatrix::GetSubMatrix(const std::vector<int>& rows, std::vector<int>& cols, DenseMatrix& dense) const
+{
+    int num_rows = rows.size();
+    int num_cols = cols.size();
+
+    dense.SetSize(num_rows, num_cols);
+    dense = 0.0;
+
+    for (int j = 0; j < num_cols; ++j)
+    {
+        for (int i = 0; i < num_rows; ++i)
+        {
+            dense(i, j) = (*this)(rows[i], cols[j]);
+        }
+    }
+}
+
+DenseMatrix DenseMatrix::GetSubMatrix(const std::vector<int>& rows, std::vector<int>& cols) const
+{
+    DenseMatrix dense;
+    GetSubMatrix(rows, cols, dense);
+
+    return dense;
+}
+
 std::vector<double> DenseMatrix::SVD(DenseMatrix& U) const
 {
     U = *this;
diff --git a/src/utilities.cpp b/src/utilities.cpp
index 4aa4232..ec28fbe 100644
--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -3,5 +3,28 @@
 namespace linalgcpp
 {
 
+void SetMarker(std::vector<int>& marker, const std::vector<int>& indices)
+{
+    const int size = indices.size();
+
+    for (int i = 0; i < size; ++i)
+    {
+        assert(indices[i] < static_cast<int>(marker.size()));
+
+        marker[indices[i]] = i;
+    }
+}
+
+void ClearMarker(std::vector<int>& marker, const std::vector<int>& indices)
+{
+    const int size = indices.size();
+
+    for (int i = 0; i < size; ++i)
+    {
+        assert(indices[i] < static_cast<int>(marker.size()));
+
+        marker[indices[i]] = -1;
+    }
+}
 
 } // namespace linalgcpp
diff --git a/tests/test_linalg.cpp b/tests/test_linalg.cpp
index f3c55e8..224b6a0 100644
--- a/tests/test_linalg.cpp
+++ b/tests/test_linalg.cpp
@@ -384,6 +384,19 @@ void test_sparse()
             A_elim.PrintDense("A elim row/col 0");
         }
     }
+
+    // Test Adding SparseMatrix
+    {
+        SparseMatrix<> B = A.Transpose();
+        A.PrintDense("A");
+        B.PrintDense("B");
+
+        SparseMatrix<> C = Add(A, B);
+        C.PrintDense("C = A + B");
+
+        SparseMatrix<> D = Add(1.5, A, 0.5, B);
+        D.PrintDense("D = 1.5A + 0.5B");
+    }
 }
 
 void test_coo()