diff --git a/README.md b/README.md
index 379b83954..2a7afcf6b 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ There are many options for HydraGNN; the dataset and model type are particularly
 important:
  - `["Verbosity"]["level"]`: `0`, `1`, `2`, `3`, `4`
  - `["Dataset"]["name"]`: `CuAu_32atoms`, `FePt_32atoms`, `FeSi_1024atoms`
- - `["NeuralNetwork"]["Architecture"]["model_type"]`: `PNA`, `MFC`, `GIN`, `GAT`, `CGCNN`, `SchNet`
+ - `["NeuralNetwork"]["Architecture"]["model_type"]`: `PNA`, `MFC`, `GIN`, `GAT`, `CGCNN`, `SchNet`, `DimeNet`
 
 ### Citations
 "HydraGNN: Distributed PyTorch implementation of multi-headed graph convolutional neural networks", Copyright ID#: 81929619
diff --git a/examples/csce/train_gap.py b/examples/csce/train_gap.py
index 360647b4f..ca8520cef 100644
--- a/examples/csce/train_gap.py
+++ b/examples/csce/train_gap.py
@@ -322,7 +322,11 @@ def __getitem__(self, idx):
         % (len(trainset), len(valset), len(testset))
     )
 
-    (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+    (
+        train_loader,
+        val_loader,
+        test_loader,
+    ) = hydragnn.preprocess.create_dataloaders(
         trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"]
     )
 
diff --git a/examples/eam/eam.py b/examples/eam/eam.py
index 64a8b804e..ea8c902d5 100644
--- a/examples/eam/eam.py
+++ b/examples/eam/eam.py
@@ -165,7 +165,11 @@ def info(*args, logtype="info", sep=" "):
         % (len(trainset), len(valset), len(testset))
     )
 
-    (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+    (
+        train_loader,
+        val_loader,
+        test_loader,
+    ) = hydragnn.preprocess.create_dataloaders(
         trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"]
     )
     timer.stop()
diff --git a/examples/ising_model/create_configurations.py b/examples/ising_model/create_configurations.py
index 16ac2c07b..140f7a354 100644
--- a/examples/ising_model/create_configurations.py
+++ b/examples/ising_model/create_configurations.py
@@ -8,7 +8,6 @@
 
 
 def write_to_file(total_energy, atomic_features, count_config, dir):
-
     numpy_string_total_value = np.array2string(total_energy)
 
     filetxt = numpy_string_total_value
@@ -40,7 +39,7 @@ def E_dimensionless(config, L, spin_function, scale_spin):
                 spin[x, y, z] = spin_function(config[x, y, z])
 
     count_pos = 0
-    number_nodes = L ** 3
+    number_nodes = L**3
     positions = np.zeros((number_nodes, 3))
     atomic_features = np.zeros((number_nodes, 5))
     for x in range(L):
@@ -76,18 +75,16 @@ def E_dimensionless(config, L, spin_function, scale_spin):
 def create_dataset(
     L, histogram_cutoff, dir, spin_function=lambda x: x, scale_spin=False
 ):
-
     count_config = 0
 
-    for num_downs in tqdm(range(0, L ** 3)):
-
-        primal_configuration = np.ones((L ** 3,))
+    for num_downs in tqdm(range(0, L**3)):
+        primal_configuration = np.ones((L**3,))
         for down in range(0, num_downs):
             primal_configuration[down] = -1.0
 
         # If the current composition has a total number of possible configurations above
         # the hard cutoff threshold, a random configurational subset is picked
-        if scipy.special.binom(L ** 3, num_downs) > histogram_cutoff:
+        if scipy.special.binom(L**3, num_downs) > histogram_cutoff:
             for num_config in range(0, histogram_cutoff):
                 config = np.random.permutation(primal_configuration)
                 config = np.reshape(config, (L, L, L))
@@ -115,7 +112,6 @@ def create_dataset(
 
 
 if __name__ == "__main__":
-
     dir = os.path.join(os.path.dirname(__file__), "../../dataset/ising_model")
     if os.path.exists(dir):
         shutil.rmtree(dir)
diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py
index b4390d9ea..919db604b 100644
--- a/examples/ising_model/train_ising.py
+++ b/examples/ising_model/train_ising.py
@@ -43,7 +43,6 @@
 
 
 def write_to_file(total_energy, atomic_features, count_config, dir, prefix):
-
     numpy_string_total_value = np.array2string(total_energy)
 
     filetxt = numpy_string_total_value
@@ -67,7 +66,7 @@ def create_dataset_mpi(
     comm_size = comm.Get_size()
 
     count_config = 0
-    rx = list(nsplit(range(0, L ** 3), comm_size))[rank]
+    rx = list(nsplit(range(0, L**3), comm_size))[rank]
     info("rx", rx.start, rx.stop)
 
     for num_downs in iterate_tqdm(
@@ -75,13 +74,13 @@ def create_dataset_mpi(
     ):
         prefix = "output_%d_" % num_downs
 
-        primal_configuration = np.ones((L ** 3,))
+        primal_configuration = np.ones((L**3,))
         for down in range(0, num_downs):
             primal_configuration[down] = -1.0
 
         # If the current composition has a total number of possible configurations above
         # the hard cutoff threshold, a random configurational subset is picked
-        if scipy.special.binom(L ** 3, num_downs) > histogram_cutoff:
+        if scipy.special.binom(L**3, num_downs) > histogram_cutoff:
             for num_config in range(0, histogram_cutoff):
                 config = np.random.permutation(primal_configuration)
                 config = np.reshape(config, (L, L, L))
@@ -288,7 +287,11 @@ def info(*args, logtype="info", sep=" "):
         % (len(trainset), len(valset), len(testset))
     )
 
-    (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+    (
+        train_loader,
+        val_loader,
+        test_loader,
+    ) = hydragnn.preprocess.create_dataloaders(
         trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"]
     )
     timer.stop()
diff --git a/examples/lsms/lsms.py b/examples/lsms/lsms.py
index 8d6654e1b..282c740b4 100644
--- a/examples/lsms/lsms.py
+++ b/examples/lsms/lsms.py
@@ -164,7 +164,11 @@ def info(*args, logtype="info", sep=" "):
         % (len(trainset), len(valset), len(testset))
     )
 
-    (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+    (
+        train_loader,
+        val_loader,
+        test_loader,
+    ) = hydragnn.preprocess.create_dataloaders(
         trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"]
     )
     timer.stop()
diff --git a/examples/md17/md17.py b/examples/md17/md17.py
index 6e38a3b3d..bfcd80a38 100644
--- a/examples/md17/md17.py
+++ b/examples/md17/md17.py
@@ -11,6 +11,7 @@
 
 import hydragnn
 
+
 # Update each sample prior to loading.
 def md17_pre_transform(data):
     # Set descriptor as element type.
@@ -68,7 +69,11 @@ def md17_pre_filter(data):
 train, val, test = hydragnn.preprocess.split_dataset(
     dataset, config["NeuralNetwork"]["Training"]["perc_train"], False
 )
-(train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+(
+    train_loader,
+    val_loader,
+    test_loader,
+) = hydragnn.preprocess.create_dataloaders(
     train, val, test, config["NeuralNetwork"]["Training"]["batch_size"]
 )
 
diff --git a/examples/ogb/train_gap.py b/examples/ogb/train_gap.py
index 47e360094..1214b66bc 100644
--- a/examples/ogb/train_gap.py
+++ b/examples/ogb/train_gap.py
@@ -334,7 +334,11 @@ def __getitem__(self, idx):
         % (len(trainset), len(valset), len(testset))
     )
 
-    (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+    (
+        train_loader,
+        val_loader,
+        test_loader,
+    ) = hydragnn.preprocess.create_dataloaders(
         trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"]
     )
 
diff --git a/examples/qm9/qm9.py b/examples/qm9/qm9.py
index cd2943a29..e4f80b01b 100644
--- a/examples/qm9/qm9.py
+++ b/examples/qm9/qm9.py
@@ -11,6 +11,7 @@
 
 import hydragnn
 
+
 # Update each sample prior to loading.
 def qm9_pre_transform(data):
     # Set descriptor as element type.
@@ -59,7 +60,11 @@ def qm9_pre_filter(data):
 train, val, test = hydragnn.preprocess.split_dataset(
     dataset, config["NeuralNetwork"]["Training"]["perc_train"], False
 )
-(train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders(
+(
+    train_loader,
+    val_loader,
+    test_loader,
+) = hydragnn.preprocess.create_dataloaders(
     train, val, test, config["NeuralNetwork"]["Training"]["batch_size"]
 )
 
diff --git a/hydragnn/models/DIMEStack.py b/hydragnn/models/DIMEStack.py
new file mode 100644
index 000000000..88d177a99
--- /dev/null
+++ b/hydragnn/models/DIMEStack.py
@@ -0,0 +1,172 @@
+"""
+DimeNet
+========
+Directional message passing neural network
+for molecular graphs. The convolutional
+layer uses spherical and radial basis
+functions to perform message passing.
+
+In particular this message passing layer
+relies on the angle formed by the triplet
+of incomming and outgoing messages.
+
+The three key components of this network are
+outlined below. In particular, the convolutional
+network that is used for the message passing
+the triplet function that generates to/from
+information for angular values, and finally
+the radial basis embedding that is used to
+include radial basis information.
+
+"""
+from typing import Callable, Tuple
+from torch_geometric.typing import SparseTensor
+
+import torch
+from torch import Tensor
+from torch.nn import SiLU
+
+from torch_geometric.nn import Linear, Sequential
+from torch_geometric.nn.models.dimenet import (
+    BesselBasisLayer,
+    InteractionBlock,
+    SphericalBasisLayer,
+    OutputBlock,
+)
+from .Base import Base
+
+
+class DIMEStack(Base):
+    """
+    Generates angles, distances, to/from indices, radial basis
+    functions and spherical basis functions for learning.
+    """
+
+    def __init__(
+        self,
+        num_bilinear,
+        num_radial,
+        num_spherical,
+        radius,
+        envelope_exponent,
+        num_before_skip,
+        num_after_skip,
+        *args,
+        **kwargs
+    ):
+        self.num_bilinear = num_bilinear
+        self.num_radial = num_radial
+        self.num_spherical = num_spherical
+        self.num_before_skip = num_before_skip
+        self.num_after_skip = num_after_skip
+        self.radius = radius
+
+        super().__init__(*args, **kwargs)
+
+        self.rbf = BesselBasisLayer(num_radial, radius, envelope_exponent)
+        self.sbf = SphericalBasisLayer(
+            num_spherical, num_radial, radius, envelope_exponent
+        )
+
+        
+        pass
+
+
+    def get_conv(self, input_dim, output_dim):
+        emb = EmbeddingBlock(self.num_radial, input_dim, act=SiLU())
+        inter = InteractionBlock(
+            hidden_channels=self.hidden_dim,
+            num_bilinear=self.num_bilinear,
+            num_spherical=self.num_spherical,
+            num_radial=self.num_radial,
+            num_before_skip=self.num_before_skip,
+            num_after_skip=self.num_after_skip,
+            act=SiLU(),
+            )
+        dec = OutputBlock(self.num_radial, self.hidden_dim, output_dim, 1, SiLU())
+        return Sequential('x, rbf, sbf, i, j, idx_kj, idx_ji', [
+            (emb, 'x, rbf, i, j -> x1'),
+            (inter,'x1, rbf, sbf, idx_kj, idx_ji -> x2'),
+            (dec,'x2, rbf, i -> c'),
+        ])
+
+    def _conv_args(self, data):
+        assert (
+            data.pos is not None
+        ), "DimeNet requires node positions (data.pos) to be set."
+        i, j, idx_i, idx_j, idx_k, idx_kj, idx_ji = triplets(
+            data.edge_index, num_nodes=data.x.size(0)
+        )
+        dist = (data.pos[i] - data.pos[j]).pow(2).sum(dim=-1).sqrt()
+
+        # Calculate angles.
+        pos_i = data.pos[idx_i]
+        pos_ji, pos_ki = data.pos[idx_j] - pos_i, data.pos[idx_k] - pos_i
+        a = (pos_ji * pos_ki).sum(dim=-1)
+        b = torch.cross(pos_ji, pos_ki).norm(dim=-1)
+        angle = torch.atan2(b, a)
+
+        rbf = self.rbf(dist)
+        sbf = self.sbf(dist, angle, idx_kj)
+
+        conv_args = {"rbf":rbf, "sbf":sbf, "i": i, "j":j, "idx_kj":idx_kj, "idx_ji":idx_ji}
+
+        return conv_args
+
+
+"""
+Triplets
+---------
+Generates to/from edge_indices for
+angle generating purposes.
+
+"""
+
+
+def triplets(
+    edge_index: Tensor,
+    num_nodes: int,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+    row, col = edge_index  # j->i
+
+    value = torch.arange(row.size(0), device=row.device)
+    adj_t = SparseTensor(
+        row=col, col=row, value=value, sparse_sizes=(num_nodes, num_nodes)
+    )
+    adj_t_row = adj_t[row]
+    num_triplets = adj_t_row.set_value(None).sum(dim=1).to(torch.long)
+
+    # Node indices (k->j->i) for triplets.
+    idx_i = col.repeat_interleave(num_triplets)
+    idx_j = row.repeat_interleave(num_triplets)
+    idx_k = adj_t_row.storage.col()
+    mask = idx_i != idx_k  # Remove i == k triplets.
+    idx_i, idx_j, idx_k = idx_i[mask], idx_j[mask], idx_k[mask]
+
+    # Edge indices (k-j, j->i) for triplets.
+    idx_kj = adj_t_row.storage.value()[mask]
+    idx_ji = adj_t_row.storage.row()[mask]
+
+    return col, row, idx_i, idx_j, idx_k, idx_kj, idx_ji
+
+
+class EmbeddingBlock(torch.nn.Module):
+    def __init__(self, num_radial: int, hidden_channels: int, act: Callable):
+        super().__init__()
+        self.act = act
+
+        # self.emb = Embedding(95, hidden_channels) # Atomic Embeddings are handles by Hydra
+        self.lin_rbf = Linear(num_radial, hidden_channels)
+        self.lin = Linear(3 * hidden_channels, hidden_channels)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # self.emb.weight.data.uniform_(-sqrt(3), sqrt(3))
+        self.lin_rbf.reset_parameters()
+        self.lin.reset_parameters()
+
+    def forward(self, x: Tensor, rbf: Tensor, i: Tensor, j: Tensor) -> Tensor:
+        # x = self.emb(x)
+        rbf = self.act(self.lin_rbf(rbf))
+        return self.act(self.lin(torch.cat([x[i], x[j], rbf], dim=-1)))
\ No newline at end of file
diff --git a/hydragnn/models/PNAStack.py b/hydragnn/models/PNAStack.py
index 427363f1d..a7f5353cb 100644
--- a/hydragnn/models/PNAStack.py
+++ b/hydragnn/models/PNAStack.py
@@ -24,7 +24,6 @@ def __init__(
         *args,
         **kwargs,
     ):
-
         self.aggregators = ["mean", "min", "max", "std"]
         self.scalers = [
             "identity",
diff --git a/hydragnn/models/create.py b/hydragnn/models/create.py
index 806f93199..3d4f966bc 100644
--- a/hydragnn/models/create.py
+++ b/hydragnn/models/create.py
@@ -20,6 +20,7 @@
 from hydragnn.models.CGCNNStack import CGCNNStack
 from hydragnn.models.SAGEStack import SAGEStack
 from hydragnn.models.SCFStack import SCFStack
+from hydragnn.models.DIMEStack import DIMEStack
 
 from hydragnn.utils.distributed import get_device
 from hydragnn.utils.print_utils import print_distributed
@@ -47,6 +48,12 @@ def create_model_config(
         config["Architecture"]["max_neighbours"],
         config["Architecture"]["edge_dim"],
         config["Architecture"]["pna_deg"],
+        config["Architecture"]["num_before_skip"],
+        config["Architecture"]["num_after_skip"],
+        config["Architecture"]["num_bilinear"],
+        config["Architecture"]["num_radial"],
+        config["Architecture"]["num_spherical"],
+        config["Architecture"]["envelope_exponent"],
         config["Architecture"]["num_gaussians"],
         config["Architecture"]["num_filters"],
         config["Architecture"]["radius"],
@@ -72,6 +79,12 @@ def create_model(
     max_neighbours: int = None,
     edge_dim: int = None,
     pna_deg: torch.tensor = None,
+    num_before_skip: int = None,
+    num_after_skip: int = None,
+    num_bilinear: int = None,
+    num_radial: int = None,
+    num_spherical: int = None,
+    envelope_exponent: int = None,
     num_gaussians: int = None,
     num_filters: int = None,
     radius: float = None,
@@ -206,6 +219,37 @@ def create_model(
             num_nodes=num_nodes,
         )
 
+    elif model_type == "DimeNet":
+        assert num_bilinear is not None, "DimeNet requires num_bilinear input."
+        assert num_radial is not None, "DimeNet requires num_radial input."
+        assert num_spherical is not None, "DimeNet requires num_spherical input."
+        assert (
+            envelope_exponent is not None
+        ), "DimeNet requires envelope_exponent input."
+        assert num_before_skip is not None, "DimeNet requires num_before_skip input."
+        assert num_after_skip is not None, "DimeNet requires num_after_skip input."
+        assert radius is not None, "DimeNet requires radius input."
+        model = DIMEStack(
+            num_bilinear,
+            num_radial,
+            num_spherical,
+            radius,
+            envelope_exponent,
+            num_before_skip,
+            num_after_skip,
+            input_dim,
+            hidden_dim,
+            output_dim,
+            output_type,
+            output_heads,
+            loss_function_type,
+            loss_weights=task_weights,
+            freeze_conv=freeze_conv,
+            initial_bias=initial_bias,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
+        )
+
     else:
         raise ValueError("Unknown model_type: {0}".format(model_type))
 
diff --git a/hydragnn/postprocess/visualizer.py b/hydragnn/postprocess/visualizer.py
index 83ee4701f..d43498fb8 100644
--- a/hydragnn/postprocess/visualizer.py
+++ b/hydragnn/postprocess/visualizer.py
@@ -116,7 +116,6 @@ def __scatter_impl(
         y_label=None,
         xylim_equal=False,
     ):
-
         ax.scatter(x, y, s=s, edgecolor="b", marker=marker, facecolor="none")
 
         ax.set_title(title + ", number of samples =" + str(len(x)))
@@ -179,10 +178,10 @@ def create_plot_global_analysis(
             vsum_pred = []
             for isamp in range(nshape[0]):
                 vlen_true.append(
-                    sqrt(sum([comp ** 2 for comp in true_values[isamp][:]]))
+                    sqrt(sum([comp**2 for comp in true_values[isamp][:]]))
                 )
                 vlen_pred.append(
-                    sqrt(sum([comp ** 2 for comp in predicted_values[isamp][:]]))
+                    sqrt(sum([comp**2 for comp in predicted_values[isamp][:]]))
                 )
                 vsum_true.append(sum(true_values[isamp][:]))
                 vsum_pred.append(sum(predicted_values[isamp][:]))
diff --git a/hydragnn/preprocess/cfg_raw_dataset_loader.py b/hydragnn/preprocess/cfg_raw_dataset_loader.py
index b5043abb1..977707b61 100644
--- a/hydragnn/preprocess/cfg_raw_dataset_loader.py
+++ b/hydragnn/preprocess/cfg_raw_dataset_loader.py
@@ -55,7 +55,6 @@ def __transform_CFG_input_to_data_object_base(self, filepath):
         """
 
         if filepath.endswith(".cfg"):
-
             data_object = self.__transform_ASE_object_to_data_object(filepath)
 
             return data_object
@@ -64,7 +63,6 @@ def __transform_CFG_input_to_data_object_base(self, filepath):
             return None
 
     def __transform_ASE_object_to_data_object(self, filepath):
-
         # FIXME:
         #  this still assumes bulk modulus is specific to the CFG format.
         #  To deal with multiple files across formats, one should generalize this function
diff --git a/hydragnn/preprocess/compositional_data_splitting.py b/hydragnn/preprocess/compositional_data_splitting.py
index 574c10dcf..89fdc5655 100644
--- a/hydragnn/preprocess/compositional_data_splitting.py
+++ b/hydragnn/preprocess/compositional_data_splitting.py
@@ -14,6 +14,7 @@
 import torch
 import sklearn
 
+
 # function to return key for any value
 def get_keys(dictionary, val):
     keys = []
diff --git a/hydragnn/preprocess/load_data.py b/hydragnn/preprocess/load_data.py
index 27533fea4..642ee4712 100644
--- a/hydragnn/preprocess/load_data.py
+++ b/hydragnn/preprocess/load_data.py
@@ -225,7 +225,6 @@ def dataset_loading_and_splitting(config: {}):
 
 def create_dataloaders(trainset, valset, testset, batch_size):
     if dist.is_initialized():
-
         train_sampler = torch.utils.data.distributed.DistributedSampler(trainset)
         val_sampler = torch.utils.data.distributed.DistributedSampler(valset)
         test_sampler = torch.utils.data.distributed.DistributedSampler(testset)
@@ -267,7 +266,6 @@ def create_dataloaders(trainset, valset, testset, batch_size):
         )
 
     else:
-
         train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
         val_loader = DataLoader(
             valset,
diff --git a/hydragnn/preprocess/raw_dataset_loader.py b/hydragnn/preprocess/raw_dataset_loader.py
index c0443bf2a..9be45ebe3 100644
--- a/hydragnn/preprocess/raw_dataset_loader.py
+++ b/hydragnn/preprocess/raw_dataset_loader.py
@@ -192,7 +192,6 @@ def scale_features_by_num_nodes(self, dataset):
         return dataset
 
     def normalize_dataset(self):
-
         """Performs the normalization on Data objects and returns the normalized dataset."""
         num_node_features = len(self.node_feature_dim)
         num_graph_features = len(self.graph_feature_dim)
diff --git a/hydragnn/preprocess/serialized_dataset_loader.py b/hydragnn/preprocess/serialized_dataset_loader.py
index 6d3028ea4..be9d8f94f 100644
--- a/hydragnn/preprocess/serialized_dataset_loader.py
+++ b/hydragnn/preprocess/serialized_dataset_loader.py
@@ -238,7 +238,7 @@ def __stratified_sampling(self, dataset: [Data], subsample_percentage: float):
             frequencies = sorted(frequencies[frequencies > 0].tolist())
             category = 0
             for index, frequency in enumerate(frequencies):
-                category += frequency * (100 ** index)
+                category += frequency * (100**index)
             dataset_categories.append(category)
 
         subsample_indices = []
diff --git a/hydragnn/preprocess/utils.py b/hydragnn/preprocess/utils.py
index bf41d7246..ddc6cca77 100644
--- a/hydragnn/preprocess/utils.py
+++ b/hydragnn/preprocess/utils.py
@@ -17,6 +17,7 @@
 import ase.neighborlist
 import os
 
+
 ## This function can be slow if dataset is too large. Use with caution.
 ## Recommend to use check_if_graph_size_variable_dist
 def check_if_graph_size_variable(train_loader, val_loader, test_loader):
diff --git a/hydragnn/run_prediction.py b/hydragnn/run_prediction.py
index 0d997e085..2ff7aecf6 100755
--- a/hydragnn/run_prediction.py
+++ b/hydragnn/run_prediction.py
@@ -31,7 +31,6 @@ def run_prediction(config):
 
 @run_prediction.register
 def _(config_file: str):
-
     with open(config_file, "r") as f:
         config = json.load(f)
 
@@ -40,7 +39,6 @@ def _(config_file: str):
 
 @run_prediction.register
 def _(config: dict):
-
     try:
         os.environ["SERIALIZED_DATA_PATH"]
     except:
diff --git a/hydragnn/run_training.py b/hydragnn/run_training.py
index ade8d1681..c7f2aa863 100644
--- a/hydragnn/run_training.py
+++ b/hydragnn/run_training.py
@@ -46,7 +46,6 @@ def run_training(config):
 
 @run_training.register
 def _(config_file: str):
-
     with open(config_file, "r") as f:
         config = json.load(f)
 
@@ -55,7 +54,6 @@ def _(config_file: str):
 
 @run_training.register
 def _(config: dict):
-
     try:
         os.environ["SERIALIZED_DATA_PATH"]
     except:
diff --git a/hydragnn/train/train_validate_test.py b/hydragnn/train/train_validate_test.py
index 3e165ec68..0cc83d38f 100644
--- a/hydragnn/train/train_validate_test.py
+++ b/hydragnn/train/train_validate_test.py
@@ -373,7 +373,6 @@ def train(
 
 @torch.no_grad()
 def validate(loader, model, verbosity, reduce_ranks=True):
-
     total_error = torch.tensor(0.0, device=get_device())
     tasks_error = torch.zeros(model.module.num_heads, device=get_device())
     num_samples_local = 0
@@ -398,7 +397,6 @@ def validate(loader, model, verbosity, reduce_ranks=True):
 
 @torch.no_grad()
 def test(loader, model, verbosity, reduce_ranks=True, return_samples=True):
-
     total_error = torch.tensor(0.0, device=get_device())
     tasks_error = torch.zeros(model.module.num_heads, device=get_device())
     num_samples_local = 0
diff --git a/hydragnn/utils/abstractrawdataset.py b/hydragnn/utils/abstractrawdataset.py
index edce58e56..f6aafdf2b 100644
--- a/hydragnn/utils/abstractrawdataset.py
+++ b/hydragnn/utils/abstractrawdataset.py
@@ -189,7 +189,6 @@ def __load_raw_data(self):
         self.__normalize_dataset()
 
     def __normalize_dataset(self):
-
         """Performs the normalization on Data objects and returns the normalized dataset."""
         num_node_features = len(self.node_feature_dim)
         num_graph_features = len(self.graph_feature_dim)
@@ -434,7 +433,7 @@ def stratified_sampling(dataset: [Data], subsample_percentage: float, verbosity=
         frequencies = sorted(frequencies[frequencies > 0].tolist())
         category = 0
         for index, frequency in enumerate(frequencies):
-            category += frequency * (100 ** index)
+            category += frequency * (100**index)
         dataset_categories.append(category)
 
     subsample_indices = []
diff --git a/hydragnn/utils/atomicdescriptors.py b/hydragnn/utils/atomicdescriptors.py
index 7c4d95035..b4b2d986b 100644
--- a/hydragnn/utils/atomicdescriptors.py
+++ b/hydragnn/utils/atomicdescriptors.py
@@ -124,7 +124,6 @@ def get_period(self, num_classes=-1):
         return torch.Tensor(period).reshape(len(self.element_types), -1)
 
     def __propertynormalize__(self, prop_list, prop_name):
-
         None_elements = [
             ele for ele, item in zip(self.element_types, prop_list) if item is None
         ]
@@ -138,7 +137,6 @@ def __propertynormalize__(self, prop_list, prop_name):
         return [(item - minval) / (maxval - minval) for item in prop_list]
 
     def __realtocategorical__(self, prop_tensor, num_classes=10):
-
         delval = (prop_tensor.max() - prop_tensor.min()) / num_classes
         categories = torch.minimum(
             (prop_tensor - prop_tensor.min()) / delval, torch.tensor([num_classes - 1])
diff --git a/hydragnn/utils/cfgdataset.py b/hydragnn/utils/cfgdataset.py
index 5e7c59e7d..eecfd00e3 100644
--- a/hydragnn/utils/cfgdataset.py
+++ b/hydragnn/utils/cfgdataset.py
@@ -30,7 +30,6 @@ def __transform_CFG_input_to_data_object_base(self, filepath):
         """
 
         if filepath.endswith(".cfg"):
-
             data_object = self.__transform_ASE_object_to_data_object(filepath)
 
             return data_object
@@ -39,7 +38,6 @@ def __transform_CFG_input_to_data_object_base(self, filepath):
             return None
 
     def __transform_ASE_object_to_data_object(self, filepath):
-
         # FIXME:
         #  this still assumes bulk modulus is specific to the CFG format.
         #  To deal with multiple files across formats, one should generalize this function
diff --git a/hydragnn/utils/config_utils.py b/hydragnn/utils/config_utils.py
index bafbf9a8e..5de7edbc3 100644
--- a/hydragnn/utils/config_utils.py
+++ b/hydragnn/utils/config_utils.py
@@ -59,10 +59,24 @@ def update_config(config, train_loader, val_loader, test_loader):
 
     if "radius" not in config["NeuralNetwork"]["Architecture"]:
         config["NeuralNetwork"]["Architecture"]["radius"] = None
+    # SchNet
     if "num_gaussians" not in config["NeuralNetwork"]["Architecture"]:
         config["NeuralNetwork"]["Architecture"]["num_gaussians"] = None
     if "num_filters" not in config["NeuralNetwork"]["Architecture"]:
         config["NeuralNetwork"]["Architecture"]["num_filters"] = None
+    # DimeNet
+    if "num_before_skip" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["num_before_skip"] = None
+    if "num_after_skip" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["num_after_skip"] = None
+    if "num_bilinear" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["num_bilinear"] = None
+    if "num_radial" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["num_radial"] = None
+    if "num_spherical" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["num_spherical"] = None
+    if "envelope_exponent" not in config["NeuralNetwork"]["Architecture"]:
+        config["NeuralNetwork"]["Architecture"]["envelope_exponent"] = None
 
     config["NeuralNetwork"]["Architecture"] = update_config_edge_dim(
         config["NeuralNetwork"]["Architecture"]
diff --git a/hydragnn/utils/distributed.py b/hydragnn/utils/distributed.py
index 69117dcd8..5c16bedd6 100644
--- a/hydragnn/utils/distributed.py
+++ b/hydragnn/utils/distributed.py
@@ -163,14 +163,12 @@ def setup_ddp():
 
 
 def get_device_list():
-
     available_gpus = [i for i in range(torch.cuda.device_count())]
 
     return available_gpus
 
 
 def get_device_name(use_gpu=True, rank_per_model=1, verbosity_level=0):
-
     available_gpus = get_device_list()
     if not use_gpu or not available_gpus:
         print_distributed(verbosity_level, "Using CPU")
@@ -203,12 +201,10 @@ def get_device_name(use_gpu=True, rank_per_model=1, verbosity_level=0):
 
 
 def get_device_from_name(name: str):
-
     return torch.device(name)
 
 
 def get_device(use_gpu=True, rank_per_model=1, verbosity_level=0):
-
     name = get_device_name(use_gpu, rank_per_model, verbosity_level)
     return get_device_from_name(name)
 
diff --git a/hydragnn/utils/smiles_utils.py b/hydragnn/utils/smiles_utils.py
index 30e32a719..dca24908e 100644
--- a/hydragnn/utils/smiles_utils.py
+++ b/hydragnn/utils/smiles_utils.py
@@ -33,7 +33,6 @@ def get_node_attribute_name(types):
 
 
 def generate_graphdata_from_smilestr(simlestr, ytarget, types, var_config=None):
-
     ps = Chem.SmilesParserParams()
     ps.removeHs = False
 
diff --git a/hydragnn/utils/time_utils.py b/hydragnn/utils/time_utils.py
index f30bb9b11..1653ad319 100644
--- a/hydragnn/utils/time_utils.py
+++ b/hydragnn/utils/time_utils.py
@@ -93,7 +93,6 @@ def reset(self):
 
 
 def print_timers(verbosity):
-
     world_size, world_rank = get_comm_size_and_rank()
 
     # With proper lever of verbosity >=1, the local timers will have different values per process
diff --git a/hydragnn/utils/xyzdataset.py b/hydragnn/utils/xyzdataset.py
index b7c89be30..612e8df80 100644
--- a/hydragnn/utils/xyzdataset.py
+++ b/hydragnn/utils/xyzdataset.py
@@ -31,7 +31,6 @@ def __transform_XYZ_input_to_data_object_base(self, filepath):
         """
 
         if filepath.endswith(".xyz"):
-
             data_object = self.__transform_XYZ_ASE_object_to_data_object(filepath)
 
             return data_object
@@ -40,7 +39,6 @@ def __transform_XYZ_input_to_data_object_base(self, filepath):
             return None
 
     def __transform_XYZ_ASE_object_to_data_object(self, filepath):
-
         # FIXME:
         #  this still assumes bulk modulus is specific to the XYZ format.
 
diff --git a/setup.py b/setup.py
index ff0727869..e1580a0c4 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,7 @@
 
 # Note: setup() has access to cmd arguments of the setup.py script via sys.argv
 
+
 # Utility function to read the README file.
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
diff --git a/tests/deterministic_graph_data.py b/tests/deterministic_graph_data.py
index a383cc75d..0a8d95a38 100755
--- a/tests/deterministic_graph_data.py
+++ b/tests/deterministic_graph_data.py
@@ -130,8 +130,8 @@ def create_configuration(
         knn.fit(positions, node_feature)
         node_output_x = torch.Tensor(knn.predict(positions))
 
-    node_output_x_square = node_output_x ** 2 + node_feature
-    node_output_x_cube = node_output_x ** 3
+    node_output_x_square = node_output_x**2 + node_feature
+    node_output_x_cube = node_output_x**3
 
     updated_table = torch.cat(
         (
diff --git a/tests/inputs/ci.json b/tests/inputs/ci.json
index 3a141cb74..4d9040428 100644
--- a/tests/inputs/ci.json
+++ b/tests/inputs/ci.json
@@ -30,6 +30,12 @@
             "max_neighbours": 100,
             "num_gaussians": 50,
             "num_filters": 126,
+            "num_before_skip": 1,
+            "num_after_skip": 1,
+            "num_bilinear": 2,
+            "num_radial": 2,
+            "num_spherical": 2,
+            "envelope_exponent": 5,
             "periodic_boundary_conditions": false,
             "hidden_dim": 8,
             "num_conv_layers": 2,
diff --git a/tests/inputs/ci_multihead.json b/tests/inputs/ci_multihead.json
index aeb89f267..51f3d7993 100644
--- a/tests/inputs/ci_multihead.json
+++ b/tests/inputs/ci_multihead.json
@@ -28,6 +28,12 @@
             "max_neighbours": 100,
             "num_gaussians": 50,
             "num_filters": 126,
+            "num_before_skip": 1,
+            "num_after_skip": 1,
+            "num_bilinear": 2,
+            "num_radial": 2,
+            "num_spherical": 2,
+            "envelope_exponent": 5,
             "periodic_boundary_conditions": false,
             "hidden_dim": 8,
             "num_conv_layers": 2,
diff --git a/tests/test_config.py b/tests/test_config.py
index bac3caf7c..ff51a9b6c 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -16,7 +16,6 @@
 @pytest.mark.parametrize("config_file", ["lsms/lsms.json"])
 @pytest.mark.mpi_skip()
 def pytest_config(config_file):
-
     config_file = os.path.join("examples", config_file)
     with open(config_file, "r") as f:
         config = json.load(f)
diff --git a/tests/test_enthalpy.py b/tests/test_enthalpy.py
index 4fd7ac04c..63dc9f8a2 100644
--- a/tests/test_enthalpy.py
+++ b/tests/test_enthalpy.py
@@ -19,7 +19,6 @@
 
 
 def unittest_formation_enthalpy():
-
     dir = "dataset/unit_test_enthalpy"
     if not os.path.exists(dir):
         os.makedirs(dir)
diff --git a/tests/test_graphs.py b/tests/test_graphs.py
index 75ce792df..78f17c5d1 100755
--- a/tests/test_graphs.py
+++ b/tests/test_graphs.py
@@ -131,6 +131,7 @@ def unittest_train_model(model_type, ci_input, use_lengths, overwrite_data=False
         "GAT": [0.60, 0.70],
         "CGCNN": [0.50, 0.40],
         "SchNet": [0.20, 0.20],
+        "DimeNet": [0.20, 0.20],
     }
     if use_lengths and ("vector" not in ci_input):
         thresholds["CGCNN"] = [0.175, 0.175]
@@ -173,7 +174,8 @@ def unittest_train_model(model_type, ci_input, use_lengths, overwrite_data=False
 
 # Test across all models with both single/multihead
 @pytest.mark.parametrize(
-    "model_type", ["SAGE", "GIN", "GAT", "MFC", "PNA", "CGCNN", "SchNet"]
+    # "model_type", ["SAGE", "GIN", "GAT", "MFC", "PNA", "CGCNN", "SchNet", "DimeNet"]
+    "model_type", ["DimeNet"]
 )
 @pytest.mark.parametrize("ci_input", ["ci.json", "ci_multihead.json"])
 def pytest_train_model(model_type, ci_input, overwrite_data=False):
@@ -181,7 +183,7 @@ def pytest_train_model(model_type, ci_input, overwrite_data=False):
 
 
 # Test only models
-@pytest.mark.parametrize("model_type", ["PNA", "CGCNN", "SchNet"])
+@pytest.mark.parametrize("model_type", ["PNA", "CGCNN", "SchNet", "DimeNet"])
 def pytest_train_model_lengths(model_type, overwrite_data=False):
     unittest_train_model(model_type, "ci.json", True, overwrite_data)
 
diff --git a/utils/lsms/compositional_histogram_cutoff.py b/utils/lsms/compositional_histogram_cutoff.py
index 9bae71f7a..749422a54 100644
--- a/utils/lsms/compositional_histogram_cutoff.py
+++ b/utils/lsms/compositional_histogram_cutoff.py
@@ -41,7 +41,6 @@ def compositional_histogram_cutoff(
     comp_final = []
     comp_all = np.zeros([num_bins])
     for filename in tqdm(os.listdir(dir)):
-
         path = os.path.join(dir, filename)
         # This is LSMS specific - it assumes only one header line and only atoms following.
         atoms = np.loadtxt(path, skiprows=1)
diff --git a/utils/lsms/convert_total_energy_to_formation_gibbs.py b/utils/lsms/convert_total_energy_to_formation_gibbs.py
index 620eeafbf..b2e6617f6 100644
--- a/utils/lsms/convert_total_energy_to_formation_gibbs.py
+++ b/utils/lsms/convert_total_energy_to_formation_gibbs.py
@@ -51,7 +51,6 @@ def convert_raw_data_energy_to_gibbs(
     # Search for the configurations with pure elements and store their total energy
     all_files = os.listdir(dir)
     for filename in tqdm(all_files):
-
         path = os.path.join(dir, filename)
         total_energy, txt = read_file(path)
         atoms = np.loadtxt(txt[1:])
@@ -73,7 +72,6 @@ def convert_raw_data_energy_to_gibbs(
     # compute thermodynamic entropy
     # compute formation gibbs energy using formation enthalpy and thermodynamic entropy
     for fn, filename in enumerate(tqdm(all_files)):
-
         path = os.path.join(dir, filename)
         total_energy_txt, txt = read_file(path)
         atoms = np.loadtxt(txt[1:])
@@ -143,7 +141,6 @@ def convert_raw_data_energy_to_gibbs(
 def compute_formation_enthalpy(
     path, elements_list, pure_elements_energy, total_energy, atoms
 ):
-
     # FIXME: this currently works only for binary alloys
 
     elements, counts = np.unique(atoms[:, 0], return_counts=True)