diff --git a/include/ParallelPriotityQueue/QNetwork.hpp b/include/ParallelPriotityQueue/QNetwork.hpp
index 36af2db..6bfdefb 100644
--- a/include/ParallelPriotityQueue/QNetwork.hpp
+++ b/include/ParallelPriotityQueue/QNetwork.hpp
@@ -21,6 +21,7 @@ limitations under the License.
 #include <algorithm>
 #include <array>
 #include <iostream>
+#include <numeric>
 
 namespace spapq {
 
@@ -71,6 +72,7 @@ struct QNetwork {
     constexpr bool isStronglyConnected() const;
     constexpr bool isValidQNetwork() const;
 
+    constexpr std::size_t gcdBatchSize() const;
     constexpr std::size_t maxBatchSize() const;
     constexpr std::size_t maxPortNum() const;
 
@@ -505,4 +507,15 @@ inline constexpr std::size_t QNetwork<workers, channels>::target(std::size_t cha
     return tgt;
 }
 
+/**
+ * @brief Returns greatest common divisor of a batchsizes.
+ *
+ */
+template <std::size_t workers, std::size_t channels>
+constexpr std::size_t QNetwork<workers, channels>::gcdBatchSize() const {
+    std::size_t gcd = 0U;
+    for (const std::size_t val : batchSize_) { gcd = std::gcd(gcd, val); }
+    return gcd;
+}
+
 }        // end namespace spapq
diff --git a/include/ParallelPriotityQueue/SpapQueueWorker.hpp b/include/ParallelPriotityQueue/SpapQueueWorker.hpp
index 86122bc..7830cb1 100644
--- a/include/ParallelPriotityQueue/SpapQueueWorker.hpp
+++ b/include/ParallelPriotityQueue/SpapQueueWorker.hpp
@@ -292,6 +292,19 @@ inline void WorkerResource<GlobalQType, LocalQType, numPorts>::pushOutBufferSelf
     }
 
     bufferTail_ += numElements;
+
+    // Realign outBuffer
+    if constexpr (GlobalQType::netw_.gcdBatchSize() > 1U) {
+        if (numElements % GlobalQType::netw_.gcdBatchSize() != 0U) [[unlikely]] {
+            if (bufferTail_ == bufferHead_) [[likely]] {        // should always be the case
+                const std::size_t residue = bufferTail_ % GlobalQType::netw_.gcdBatchSize();
+                const std::size_t shift = (residue == 0U) ? 0U : GlobalQType::netw_.gcdBatchSize() - residue;
+
+                bufferTail_ += shift;
+                bufferHead_ += shift;
+            }
+        }
+    }
 }
 
 /**
diff --git a/tests/QNetwork.cpp b/tests/QNetwork.cpp
index e2e8d63..3d0c4e2 100644
--- a/tests/QNetwork.cpp
+++ b/tests/QNetwork.cpp
@@ -30,17 +30,18 @@ using namespace spapq;
 
 TEST(QNetworkTest, Constructors1) {
     constexpr QNetwork<4, 4> netw(
-        {0, 1, 2, 3, 4}, {1, 2, 3, 0}, {11, 12, 13, 14}, {10, 9, 8, 7}, {1, 2, 3, 4});
+        {0, 1, 2, 3, 4}, {1, 2, 3, 0}, {11, 12, 13, 14}, {10, 9, 8, 7}, {2, 4, 6, 8});
     EXPECT_EQ(netw.numWorkers_, 4);
     EXPECT_EQ(netw.numChannels_, 4);
     for (std::size_t i = 0; i < 5; ++i) { EXPECT_EQ(netw.vertexPointer_[i], i); }
     for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.logicalCore_[i], i + 11U); }
     for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.edgeTargets_[i], (i + 1) % 4); }
     for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.multiplicities_[i], 10U - i); }
-    for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.batchSize_[i], (i + 1)); }
+    for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.batchSize_[i], 2U * (i + 1)); }
 
     EXPECT_EQ(netw.enqueueFrequency_, 16U);
-    EXPECT_EQ(netw.maxBatchSize(), 4U);
+    EXPECT_EQ(netw.gcdBatchSize(), 2U);
+    EXPECT_EQ(netw.maxBatchSize(), 8U);
     EXPECT_TRUE(netw.hasHomogeneousInPorts());
     EXPECT_TRUE(netw.hasHomogeneousOutPorts());
     EXPECT_TRUE(netw.hasHomogeneousPorts());
@@ -63,6 +64,7 @@ TEST(QNetworkTest, Constructors2) {
     for (std::size_t i = 0; i < 4; ++i) { EXPECT_EQ(netw.batchSize_[i], 1); }
 
     EXPECT_TRUE(netw.hasSeparateLogicalCores());
+    EXPECT_EQ(netw.gcdBatchSize(), 1U);
 }
 
 TEST(QNetworkTest, Ports1) {
@@ -394,7 +396,8 @@ TEST(QNetworkTest, Connectivity) {
 TEST(QNetworkTest, SrcTgt) {
     constexpr QNetwork<10, 30> netw1 = PETERSEN_GRAPH;
     for (std::size_t worker = 0U; worker < netw1.numWorkers_; ++worker) {
-        for (std::size_t channel = netw1.vertexPointer_[worker]; channel < netw1.vertexPointer_[worker + 1U]; ++channel) {
+        for (std::size_t channel = netw1.vertexPointer_[worker]; channel < netw1.vertexPointer_[worker + 1U];
+             ++channel) {
             EXPECT_EQ(netw1.source(channel), worker);
             EXPECT_EQ(netw1.target(channel), netw1.edgeTargets_[channel]);
         }
@@ -403,7 +406,8 @@ TEST(QNetworkTest, SrcTgt) {
 
     constexpr auto netw2 = FULLY_CONNECTED_GRAPH<9U>();
     for (std::size_t worker = 0U; worker < netw2.numWorkers_; ++worker) {
-        for (std::size_t channel = netw2.vertexPointer_[worker]; channel < netw2.vertexPointer_[worker + 1U]; ++channel) {
+        for (std::size_t channel = netw2.vertexPointer_[worker]; channel < netw2.vertexPointer_[worker + 1U];
+             ++channel) {
             EXPECT_EQ(netw2.source(channel), worker);
             if (channel % 9U == 0U) {
                 EXPECT_EQ(netw2.target(channel), worker);