From 6b71dc120c3e91c951014086b08400680caec0f0 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sun, 8 Mar 2026 06:09:23 +0200
Subject: [PATCH 01/20] feat: Add energy equation solver with Boussinesq
 buoyancy coupling

Implement temperature advection-diffusion (ROADMAP 2.1) with explicit
Euler time integration, heat source callbacks, and Boussinesq buoyancy
body force. Integrated into all three CPU solvers (explicit Euler,
projection, RK2) with thermal diffusion CFL constraint. Disabled by
default (alpha=0) for full backward compatibility.

Validated with de Vahl Davis differentially heated cavity (Ra=1000).
---
 CMakeLists.txt                                |  10 +
 lib/CMakeLists.txt                            |   3 +
 lib/include/cfd/solvers/energy_solver.h       |  67 +++
 .../cfd/solvers/navier_stokes_solver.h        |  24 ++
 lib/src/solvers/energy/cpu/energy_solver.c    | 124 ++++++
 .../navier_stokes/cpu/solver_explicit_euler.c |  38 +-
 .../navier_stokes/cpu/solver_projection.c     |  15 +
 .../solvers/navier_stokes/cpu/solver_rk2.c    |  22 +-
 tests/solvers/energy/test_energy_solver.c     | 380 ++++++++++++++++++
 tests/validation/test_natural_convection.c    | 230 +++++++++++
 10 files changed, 906 insertions(+), 7 deletions(-)
 create mode 100644 lib/include/cfd/solvers/energy_solver.h
 create mode 100644 lib/src/solvers/energy/cpu/energy_solver.c
 create mode 100644 tests/solvers/energy/test_energy_solver.c
 create mode 100644 tests/validation/test_natural_convection.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51788204..e715bd38 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -297,6 +297,10 @@ if(BUILD_TESTS)
     add_executable(test_taylor_green_3d tests/validation/test_taylor_green_3d.c)
     add_executable(test_quiescent_3d tests/validation/test_quiescent_3d.c)
 
+    # Energy equation tests
+    add_executable(test_energy_solver tests/solvers/energy/test_energy_solver.c)
+    add_executable(test_natural_convection tests/validation/test_natural_convection.c)
+
     # Mathematical accuracy tests
     add_executable(test_finite_differences tests/math/test_finite_differences.c)
     add_executable(test_finite_differences_3d tests/math/test_finite_differences_3d.c)
@@ -398,6 +402,8 @@ if(BUILD_TESTS)
     target_link_libraries(test_quiescent_3d PRIVATE CFD::Library unity)
     target_link_libraries(test_poiseuille_flow PRIVATE CFD::Library unity)
     target_link_libraries(test_poiseuille_3d PRIVATE CFD::Library unity)
+    target_link_libraries(test_energy_solver PRIVATE CFD::Library unity)
+    target_link_libraries(test_natural_convection PRIVATE CFD::Library unity)
     target_link_libraries(test_finite_differences PRIVATE unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
     target_include_directories(test_finite_differences PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lib/include)
     target_link_libraries(test_finite_differences_3d PRIVATE unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
@@ -519,6 +525,10 @@ if(BUILD_TESTS)
     add_test(NAME PoiseuilleFlowTest COMMAND test_poiseuille_flow)
     add_test(NAME Poiseuille3DTest COMMAND test_poiseuille_3d)
 
+    # Energy equation tests
+    add_test(NAME EnergySolverTest COMMAND test_energy_solver)
+    add_test(NAME NaturalConvectionTest COMMAND test_natural_convection)
+
     # Mathematical accuracy tests
     add_test(NAME FiniteDifferencesTest COMMAND test_finite_differences)
     add_test(NAME FiniteDifferences3DTest COMMAND test_finite_differences_3d)
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 1035649e..2ce3c208 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -118,6 +118,8 @@ set(CFD_SCALAR_SOURCES
     src/solvers/navier_stokes/cpu/solver_explicit_euler.c
     src/solvers/navier_stokes/cpu/solver_rk2.c
     src/solvers/navier_stokes/cpu/solver_projection.c
+    # Energy equation solver - scalar
+    src/solvers/energy/cpu/energy_solver.c
     # Linear solvers - scalar implementations only
     src/solvers/linear/cpu/linear_solver_jacobi.c
     src/solvers/linear/cpu/linear_solver_sor.c
@@ -574,6 +576,7 @@ set(CFD_LIBRARY_HEADERS
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/core/cpu_features.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/solvers/navier_stokes_solver.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/solvers/poisson_solver.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/solvers/energy_solver.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/boundary/boundary_conditions.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/io/vtk_output.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/cfd/io/csv_output.h
diff --git a/lib/include/cfd/solvers/energy_solver.h b/lib/include/cfd/solvers/energy_solver.h
new file mode 100644
index 00000000..c98abbce
--- /dev/null
+++ b/lib/include/cfd/solvers/energy_solver.h
@@ -0,0 +1,67 @@
+/**
+ * @file energy_solver.h
+ * @brief Energy equation solver for temperature advection-diffusion
+ *
+ * Solves the energy equation for incompressible flow:
+ *   dT/dt + u*nabla(T) = alpha * nabla^2(T) + Q
+ *
+ * where alpha = k/(rho*cp) is thermal diffusivity and Q is a heat source.
+ *
+ * Boussinesq buoyancy coupling adds a body force to the momentum equations:
+ *   F_buoy = -rho_0 * beta * (T - T_ref) * g
+ *
+ * The energy equation is solved as a post-step after velocity advancement,
+ * using the updated velocity field for advection.
+ */
+
+#ifndef CFD_ENERGY_SOLVER_H
+#define CFD_ENERGY_SOLVER_H
+
+#include "cfd/cfd_export.h"
+#include "cfd/core/cfd_status.h"
+#include "cfd/core/grid.h"
+#include "cfd/solvers/navier_stokes_solver.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Advance the temperature field by one explicit Euler step.
+ *
+ * Computes: T^{n+1} = T^n + dt * (-u*nabla(T) + alpha*nabla^2(T) + Q)
+ *
+ * Only active when params->alpha > 0. Returns immediately otherwise.
+ *
+ * @param field  Flow field (T is updated in-place, u/v/w are read-only)
+ * @param grid   Computational grid
+ * @param params Solver parameters (alpha, heat_source_func, dt)
+ * @param dt     Time step size
+ * @return CFD_SUCCESS, or CFD_ERROR_DIVERGED if NaN detected in T
+ */
+CFD_LIBRARY_EXPORT cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
+                                                      const ns_solver_params_t* params,
+                                                      double dt);
+
+/**
+ * Compute Boussinesq buoyancy source terms from the temperature field.
+ *
+ * Adds buoyancy force: source_{u,v,w} += -beta * (T[idx] - T_ref) * g_{x,y,z}
+ *
+ * Only active when params->beta != 0. Does nothing otherwise.
+ *
+ * @param T_local   Temperature at the current grid point
+ * @param params    Solver parameters (beta, T_ref, gravity)
+ * @param source_u  Output: added u-momentum buoyancy source
+ * @param source_v  Output: added v-momentum buoyancy source
+ * @param source_w  Output: added w-momentum buoyancy source
+ */
+CFD_LIBRARY_EXPORT void energy_compute_buoyancy(double T_local, const ns_solver_params_t* params,
+                                                 double* source_u, double* source_v,
+                                                 double* source_w);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CFD_ENERGY_SOLVER_H */
diff --git a/lib/include/cfd/solvers/navier_stokes_solver.h b/lib/include/cfd/solvers/navier_stokes_solver.h
index 1f983653..6455afab 100644
--- a/lib/include/cfd/solvers/navier_stokes_solver.h
+++ b/lib/include/cfd/solvers/navier_stokes_solver.h
@@ -78,6 +78,18 @@ typedef void (*ns_source_func_t)(double x, double y, double z, double t,
                                   double* source_u, double* source_v,
                                   double* source_w);
 
+/**
+ * Custom heat source callback function
+ * @param x X coordinate
+ * @param y Y coordinate
+ * @param z Z coordinate (0.0 for 2D)
+ * @param t Physical time
+ * @param context User-provided context pointer
+ * @return Heat source term Q at the given point
+ */
+typedef double (*ns_heat_source_func_t)(double x, double y, double z, double t,
+                                         void* context);
+
 /**
  * Navier-Stokes solver parameters
  */
@@ -99,6 +111,16 @@ typedef struct {
     /* Custom source term callback (NULL = use default) */
     ns_source_func_t source_func;  /**< Custom source term function pointer */
     void* source_context;           /**< User context for source_func */
+
+    /* Energy equation parameters (alpha > 0 enables energy equation) */
+    double alpha;           /**< Thermal diffusivity k/(rho*cp) [m^2/s], 0 = disabled */
+    double beta;            /**< Thermal expansion coefficient [1/K] (Boussinesq) */
+    double T_ref;           /**< Reference temperature [K] for Boussinesq */
+    double gravity[3];      /**< Gravity vector (gx, gy, gz) [m/s^2] */
+
+    /* Custom heat source callback (NULL = no heat source) */
+    ns_heat_source_func_t heat_source_func;  /**< Heat source function pointer */
+    void* heat_source_context;                /**< User context for heat_source_func */
 } ns_solver_params_t;
 
 
@@ -144,6 +166,7 @@ typedef struct {
     double residual;         /**< Final residual norm */
     double max_velocity;     /**< Maximum velocity magnitude */
     double max_pressure;     /**< Maximum pressure */
+    double max_temperature;  /**< Maximum temperature (when energy equation active) */
     double cfl_number;       /**< Actual CFL number used */
     double elapsed_time_ms;  /**< Wall clock time for solve */
     cfd_status_t status;     /**< Status of the solve */
@@ -338,6 +361,7 @@ static inline ns_solver_stats_t ns_solver_stats_default(void) {
     stats.residual = 0.0;
     stats.max_velocity = 0.0;
     stats.max_pressure = 0.0;
+    stats.max_temperature = 0.0;
     stats.cfl_number = 0.0;
     stats.elapsed_time_ms = 0.0;
     stats.status = CFD_SUCCESS;
diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
new file mode 100644
index 00000000..1687f9e6
--- /dev/null
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -0,0 +1,124 @@
+/**
+ * @file energy_solver.c
+ * @brief Scalar CPU implementation of the energy equation solver
+ *
+ * Solves: dT/dt + u*nabla(T) = alpha * nabla^2(T) + Q
+ * using explicit Euler time integration and central differences.
+ *
+ * Branch-free 3D: when nz==1, stride_z=0 and inv_2dz/inv_dz2=0.0 cause all
+ * z-terms to vanish, producing identical results to a 2D code path.
+ */
+
+#include "cfd/solvers/energy_solver.h"
+
+#include "cfd/core/indexing.h"
+#include "cfd/core/memory.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
+                                   const ns_solver_params_t* params,
+                                   double dt) {
+    /* Skip when energy equation is disabled */
+    if (params->alpha <= 0.0) {
+        return CFD_SUCCESS;
+    }
+
+    size_t nx = field->nx;
+    size_t ny = field->ny;
+    size_t nz = field->nz;
+    size_t plane = nx * ny;
+    size_t total = plane * nz;
+    double alpha = params->alpha;
+
+    /* Branch-free 3D constants */
+    size_t stride_z = (nz > 1) ? plane : 0;
+    size_t k_start  = (nz > 1) ? 1 : 0;
+    size_t k_end    = (nz > 1) ? (nz - 1) : 1;
+    double inv_2dz  = (nz > 1 && grid->dz) ? 1.0 / (2.0 * grid->dz[0]) : 0.0;
+    double inv_dz2  = (nz > 1 && grid->dz) ? 1.0 / (grid->dz[0] * grid->dz[0]) : 0.0;
+
+    /* Allocate temporary array for new temperature */
+    double* T_new = (double*)cfd_calloc(total, sizeof(double));
+    if (!T_new) {
+        return CFD_ERROR_NOMEM;
+    }
+    memcpy(T_new, field->T, total * sizeof(double));
+
+    for (size_t k = k_start; k < k_end; k++) {
+        for (size_t j = 1; j < ny - 1; j++) {
+            for (size_t i = 1; i < nx - 1; i++) {
+                size_t idx = k * stride_z + IDX_2D(i, j, nx);
+
+                double T_c = field->T[idx];
+                double u_c = field->u[idx];
+                double v_c = field->v[idx];
+                double w_c = field->w[idx];
+
+                double dx = grid->dx[i];
+                double dy = grid->dy[j];
+
+                /* Advection: u * dT/dx + v * dT/dy + w * dT/dz */
+                double dT_dx = (field->T[idx + 1] - field->T[idx - 1]) / (2.0 * dx);
+                double dT_dy = (field->T[idx + nx] - field->T[idx - nx]) / (2.0 * dy);
+                double dT_dz = (field->T[idx + stride_z] - field->T[idx - stride_z]) * inv_2dz;
+
+                double advection = u_c * dT_dx + v_c * dT_dy + w_c * dT_dz;
+
+                /* Diffusion: alpha * (d2T/dx2 + d2T/dy2 + d2T/dz2) */
+                double d2T_dx2 = (field->T[idx + 1] - 2.0 * T_c + field->T[idx - 1]) / (dx * dx);
+                double d2T_dy2 = (field->T[idx + nx] - 2.0 * T_c + field->T[idx - nx]) / (dy * dy);
+                double d2T_dz2 = (field->T[idx + stride_z] - 2.0 * T_c +
+                                   field->T[idx - stride_z]) * inv_dz2;
+
+                double diffusion = alpha * (d2T_dx2 + d2T_dy2 + d2T_dz2);
+
+                /* Heat source term */
+                double Q = 0.0;
+                if (params->heat_source_func) {
+                    double x = grid->x[i];
+                    double y = grid->y[j];
+                    double z = (nz > 1 && grid->z) ? grid->z[k] : 0.0;
+                    Q = params->heat_source_func(x, y, z, 0.0,
+                                                  params->heat_source_context);
+                }
+
+                /* Explicit Euler update */
+                double dT = dt * (-advection + diffusion + Q);
+                T_new[idx] = T_c + dT;
+            }
+        }
+    }
+
+    /* Check for NaN/Inf */
+    for (size_t n = 0; n < total; n++) {
+        if (!isfinite(T_new[n])) {
+            size_t ni = n % nx;
+            size_t nj = (n / nx) % ny;
+            fprintf(stderr, "energy_solver: NaN/Inf at idx=%zu (i=%zu,j=%zu) T_new=%e field->T=%e\n",
+                    n, ni, nj, T_new[n], field->T[n]);
+            cfd_free(T_new);
+            return CFD_ERROR_DIVERGED;
+        }
+    }
+
+    memcpy(field->T, T_new, total * sizeof(double));
+    cfd_free(T_new);
+
+    return CFD_SUCCESS;
+}
+
+void energy_compute_buoyancy(double T_local, const ns_solver_params_t* params,
+                              double* source_u, double* source_v,
+                              double* source_w) {
+    if (params->beta == 0.0) {
+        return;
+    }
+
+    double dT = T_local - params->T_ref;
+    *source_u += -params->beta * dT * params->gravity[0];
+    *source_v += -params->beta * dT * params->gravity[1];
+    *source_w += -params->beta * dT * params->gravity[2];
+}
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index 0429cf6d..801d3887 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -6,6 +6,7 @@
 #include "cfd/core/memory.h"
 
 
+#include "cfd/solvers/energy_solver.h"
 #include "cfd/solvers/navier_stokes_solver.h"
 
 #include "../boundary_copy_utils.h"
@@ -64,7 +65,13 @@ ns_solver_params_t ns_solver_params_default(void) {
                             .source_amplitude_u = DEFAULT_SOURCE_AMPLITUDE_U,
                             .source_amplitude_v = DEFAULT_SOURCE_AMPLITUDE_V,
                             .source_decay_rate = DEFAULT_SOURCE_DECAY_RATE,
-                            .pressure_coupling = DEFAULT_PRESSURE_COUPLING};
+                            .pressure_coupling = DEFAULT_PRESSURE_COUPLING,
+                            .alpha = 0.0,
+                            .beta = 0.0,
+                            .T_ref = 0.0,
+                            .gravity = {0.0, 0.0, 0.0},
+                            .heat_source_func = NULL,
+                            .heat_source_context = NULL};
     return params;
 }
 flow_field* flow_field_create(size_t nx, size_t ny, size_t nz) {
@@ -202,11 +209,21 @@ void compute_time_step(flow_field* field, const grid* grid, ns_solver_params_t*
     // Compute time step based on CFL condition with safety factor
     double dt_cfl = params->cfl * dmin / max_speed;
 
+    // Thermal diffusion stability constraint: dt < dx^2 / (2 * alpha * ndim)
+    double dt_thermal = dt_cfl;
+    if (params->alpha > 0.0) {
+        int ndim = (grid->nz > 1) ? 3 : 2;
+        dt_thermal = (dmin * dmin) / (2.0 * params->alpha * ndim);
+        dt_thermal *= params->cfl;  // Apply safety factor
+    }
+
+    double dt_stable = min_double(dt_cfl, dt_thermal);
+
     // Limit time step to reasonable bounds
     double dt_max = DT_MAX_LIMIT;  // Maximum allowed time step
     double dt_min = DT_MIN_LIMIT;  // Minimum allowed time step
 
-    params->dt = max_double(dt_min, min_double(dt_max, dt_cfl));
+    params->dt = max_double(dt_min, min_double(dt_max, dt_stable));
 }
 
 void apply_boundary_conditions(flow_field* field, const grid* grid) {
@@ -458,6 +475,10 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
                     compute_source_terms(x, y, z, iter, conservative_dt, params,
                                          &source_u, &source_v, &source_w);
 
+                    /* Boussinesq buoyancy source */
+                    energy_compute_buoyancy(field->T[idx], params,
+                                            &source_u, &source_v, &source_w);
+
                     /* u-momentum */
                     double du = conservative_dt *
                         (-u_c * du_dx - v_c * du_dy - w_c * du_dz
@@ -495,7 +516,6 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
                     p_new[idx] = field->p[idx] + dp;
 
                     rho_new[idx] = field->rho[idx];
-                    t_new[idx] = field->T[idx];
                 }
             }
         }
@@ -506,7 +526,17 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
         memcpy(field->w, w_new, bytes);
         memcpy(field->p, p_new, bytes);
         memcpy(field->rho, rho_new, bytes);
-        memcpy(field->T, t_new, bytes);
+
+        /* Energy equation: advance temperature using updated velocity */
+        {
+            cfd_status_t energy_status = energy_step_explicit(field, grid, params,
+                                                               conservative_dt);
+            if (energy_status != CFD_SUCCESS) {
+                cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
+                cfd_free(p_new); cfd_free(rho_new); cfd_free(t_new);
+                return energy_status;
+            }
+        }
 
         /* Save caller BCs, apply periodic BCs, restore caller BCs.
          * Use _3d helper for 6-face copy when nz > 1. */
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_projection.c b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
index f7bf085e..fcba6ae6 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_projection.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
@@ -21,6 +21,7 @@
 #include "cfd/core/grid.h"
 #include "cfd/core/indexing.h"
 #include "cfd/core/memory.h"
+#include "cfd/solvers/energy_solver.h"
 #include "cfd/solvers/navier_stokes_solver.h"
 #include "cfd/solvers/poisson_solver.h"
 
@@ -161,6 +162,10 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
                     compute_source_terms(x_coord, y_coord, z_coord, iter, dt, params,
                                          &source_u, &source_v, &source_w);
 
+                    /* Boussinesq buoyancy source */
+                    energy_compute_buoyancy(field->T[idx], params,
+                                            &source_u, &source_v, &source_w);
+
                     /* Intermediate velocity (without pressure gradient) */
                     u_star[idx] = u + dt * (-conv_u + visc_u + source_u);
                     v_star[idx] = v + dt * (-conv_v + visc_v + source_v);
@@ -241,6 +246,16 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
         /* Update pressure */
         memcpy(field->p, p_new, bytes);
 
+        /* Energy equation: advance temperature after velocity correction */
+        {
+            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt);
+            if (energy_status != CFD_SUCCESS) {
+                cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
+                cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
+                return energy_status;
+            }
+        }
+
         /* Restore caller-set boundary conditions */
         copy_boundary_velocities_3d(field->u, field->v, field->w,
                                     u_star, v_star, w_star, nx, ny, nz);
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
index c18cfa8c..db5e16ab 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
@@ -20,6 +20,7 @@
 #include "cfd/core/grid.h"
 #include "cfd/core/indexing.h"
 #include "cfd/core/memory.h"
+#include "cfd/solvers/energy_solver.h"
 #include "cfd/solvers/navier_stokes_solver.h"
 
 #include <math.h>
@@ -55,7 +56,7 @@
  * during intermediate RK stages).
  */
 static void compute_rhs(const double* u, const double* v, const double* w,
-                         const double* p, const double* rho,
+                         const double* p, const double* rho, const double* T,
                          double* rhs_u, double* rhs_v, double* rhs_w, double* rhs_p,
                          const grid* grid, const ns_solver_params_t* params,
                          size_t nx, size_t ny, size_t nz,
@@ -163,6 +164,12 @@ static void compute_rhs(const double* u, const double* v, const double* w,
                 compute_source_terms(grid->x[i], grid->y[j], z_coord, iter, dt,
                                      params, &source_u, &source_v, &source_w);
 
+                /* Boussinesq buoyancy source */
+                if (T) {
+                    energy_compute_buoyancy(T[idx], params,
+                                            &source_u, &source_v, &source_w);
+                }
+
                 /* RHS for u-momentum */
                 rhs_u[idx] = -u[idx] * du_dx - v[idx] * du_dy - w[idx] * du_dz
                              - dp_dx / rho[idx]
@@ -268,7 +275,7 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
         memset(k1_w, 0, bytes);
         memset(k1_p, 0, bytes);
 
-        compute_rhs(field->u, field->v, field->w, field->p, field->rho,
+        compute_rhs(field->u, field->v, field->w, field->p, field->rho, field->T,
                      k1_u, k1_v, k1_w, k1_p,
                      grid, params, nx, ny, nz,
                      stride_z, k_start, k_end, inv_2dz, inv_dz2,
@@ -298,7 +305,7 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
         memset(k2_w, 0, bytes);
         memset(k2_p, 0, bytes);
 
-        compute_rhs(field->u, field->v, field->w, field->p, field->rho,
+        compute_rhs(field->u, field->v, field->w, field->p, field->rho, field->T,
                      k2_u, k2_v, k2_w, k2_p,
                      grid, params, nx, ny, nz,
                      stride_z, k_start, k_end, inv_2dz, inv_dz2,
@@ -317,6 +324,15 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
             field->w[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->w[n]));
         }
 
+        /* Energy equation: advance temperature after RK2 velocity update */
+        {
+            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt);
+            if (energy_status != CFD_SUCCESS) {
+                status = energy_status;
+                goto cleanup;
+            }
+        }
+
         /* Apply BCs to final state only (after the full RK2 step).
          * This updates ghost cells for the next step's k1 evaluation. */
         apply_boundary_conditions(field, grid);
diff --git a/tests/solvers/energy/test_energy_solver.c b/tests/solvers/energy/test_energy_solver.c
new file mode 100644
index 00000000..9c88890d
--- /dev/null
+++ b/tests/solvers/energy/test_energy_solver.c
@@ -0,0 +1,380 @@
+/**
+ * @file test_energy_solver.c
+ * @brief Unit tests for the energy equation solver
+ *
+ * Tests:
+ * 1. Pure diffusion: 1D Gaussian temperature profile in quiescent flow
+ *    decays according to the analytical heat equation solution.
+ * 2. Pure advection: uniform flow carries temperature profile downstream.
+ * 3. Energy equation disabled: alpha=0 should leave temperature unchanged.
+ * 4. Buoyancy source: verify energy_compute_buoyancy produces correct forces.
+ * 5. Backward compatibility: existing solvers work when alpha=0.
+ */
+
+#include "cfd/core/cfd_init.h"
+#include "cfd/core/cfd_status.h"
+#include "cfd/core/grid.h"
+#include "cfd/core/memory.h"
+#include "cfd/solvers/energy_solver.h"
+#include "cfd/solvers/navier_stokes_solver.h"
+#include "unity.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+/* ============================================================================
+ * TEST SETUP / TEARDOWN
+ * ============================================================================ */
+
+void setUp(void) { cfd_init(); }
+void tearDown(void) { cfd_finalize(); }
+
+/* ============================================================================
+ * TEST 1: Pure Diffusion
+ *
+ * Initialize a Gaussian temperature pulse in quiescent flow (u=v=w=0).
+ * After time t, the analytical solution is:
+ *   T(x,y,t) = T0 / (1 + 4*alpha*t/sigma^2) * exp(-r^2 / (sigma^2 + 4*alpha*t))
+ * where sigma is the initial width and r^2 = (x-x0)^2 + (y-y0)^2.
+ *
+ * We verify the peak temperature decays and the profile broadens.
+ * ============================================================================ */
+
+#define DIFF_NX 33
+#define DIFF_NY 33
+#define DIFF_ALPHA 0.01
+#define DIFF_DT 0.0001
+#define DIFF_STEPS 100
+#define DIFF_SIGMA 0.1
+#define DIFF_T0 10.0
+
+static void test_pure_diffusion(void) {
+    grid* g = grid_create(DIFF_NX, DIFF_NY, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(DIFF_NX, DIFF_NY, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Initialize: zero velocity, Gaussian temperature pulse at center */
+    double cx = 0.5, cy = 0.5;
+    for (size_t j = 0; j < DIFF_NY; j++) {
+        for (size_t i = 0; i < DIFF_NX; i++) {
+            size_t idx = j * DIFF_NX + i;
+            field->u[idx] = 0.0;
+            field->v[idx] = 0.0;
+            field->w[idx] = 0.0;
+            field->p[idx] = 1.0;
+            field->rho[idx] = 1.0;
+
+            double x = g->x[i];
+            double y = g->y[j];
+            double r2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
+            field->T[idx] = DIFF_T0 * exp(-r2 / (DIFF_SIGMA * DIFF_SIGMA));
+        }
+    }
+
+    /* Record initial peak temperature */
+    double T_peak_initial = 0.0;
+    for (size_t n = 0; n < (size_t)(DIFF_NX * DIFF_NY); n++) {
+        if (field->T[n] > T_peak_initial) T_peak_initial = field->T[n];
+    }
+
+    ns_solver_params_t params = ns_solver_params_default();
+    params.alpha = DIFF_ALPHA;
+
+    /* Run diffusion steps */
+    for (int step = 0; step < DIFF_STEPS; step++) {
+        cfd_status_t status = energy_step_explicit(field, g, &params, DIFF_DT);
+        if (status != CFD_SUCCESS) {
+            printf("  DIVERGED at step %d, status=%d\n", step, status);
+            /* Print min/max T for debugging */
+            double tmin = 1e30, tmax = -1e30;
+            for (size_t n = 0; n < (size_t)(DIFF_NX * DIFF_NY); n++) {
+                if (field->T[n] < tmin) tmin = field->T[n];
+                if (field->T[n] > tmax) tmax = field->T[n];
+                if (!isfinite(field->T[n])) {
+                    printf("  NaN/Inf at index %zu\n", n);
+                    break;
+                }
+            }
+            printf("  T range: [%e, %e]\n", tmin, tmax);
+        }
+        TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
+    }
+
+    /* Check peak temperature has decreased (diffusion spreads the pulse) */
+    double T_peak_final = 0.0;
+    for (size_t n = 0; n < (size_t)(DIFF_NX * DIFF_NY); n++) {
+        if (field->T[n] > T_peak_final) T_peak_final = field->T[n];
+    }
+
+    printf("  Diffusion test: T_peak initial=%.4f, final=%.4f\n",
+           T_peak_initial, T_peak_final);
+
+    TEST_ASSERT_TRUE_MESSAGE(T_peak_final < T_peak_initial,
+                              "Peak temperature should decrease due to diffusion");
+    TEST_ASSERT_TRUE_MESSAGE(T_peak_final > 0.0,
+                              "Temperature should remain positive");
+
+    /* Verify analytical decay rate (approximate).
+     * Analytical peak: T0 / (1 + 4*alpha*t/sigma^2)
+     * t = DIFF_STEPS * DIFF_DT = 0.01 */
+    double t_final = DIFF_STEPS * DIFF_DT;
+    double analytical_peak = DIFF_T0 / (1.0 + 4.0 * DIFF_ALPHA * t_final /
+                                         (DIFF_SIGMA * DIFF_SIGMA));
+    double rel_error = fabs(T_peak_final - analytical_peak) / analytical_peak;
+    printf("  Analytical peak=%.4f, numerical=%.4f, rel_error=%.4f\n",
+           analytical_peak, T_peak_final, rel_error);
+
+    /* Allow 20% error due to boundary effects and discretization */
+    TEST_ASSERT_TRUE_MESSAGE(rel_error < 0.20,
+                              "Peak temperature should match analytical within 20%");
+
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
+/* ============================================================================
+ * TEST 2: Pure Advection
+ *
+ * Uniform horizontal flow (u=1, v=0) with a temperature step.
+ * After advection, the temperature profile should shift downstream.
+ * ============================================================================ */
+
+#define ADV_NX 41
+#define ADV_NY 5
+#define ADV_U  1.0
+#define ADV_DT 0.001
+#define ADV_STEPS 50
+
+static void test_pure_advection(void) {
+    grid* g = grid_create(ADV_NX, ADV_NY, 1, 0.0, 2.0, 0.0, 0.5, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(ADV_NX, ADV_NY, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Initialize: uniform rightward flow, temperature hot on left half */
+    for (size_t j = 0; j < ADV_NY; j++) {
+        for (size_t i = 0; i < ADV_NX; i++) {
+            size_t idx = j * ADV_NX + i;
+            field->u[idx] = ADV_U;
+            field->v[idx] = 0.0;
+            field->w[idx] = 0.0;
+            field->p[idx] = 1.0;
+            field->rho[idx] = 1.0;
+            /* Smooth tanh profile centered at x=0.5 */
+            double x = g->x[i];
+            field->T[idx] = 0.5 * (1.0 - tanh(20.0 * (x - 0.5)));
+        }
+    }
+
+    /* Measure initial center of mass of temperature */
+    double sum_Tx_initial = 0.0, sum_T_initial = 0.0;
+    size_t j_mid = ADV_NY / 2;
+    for (size_t i = 0; i < ADV_NX; i++) {
+        size_t idx = j_mid * ADV_NX + i;
+        sum_Tx_initial += field->T[idx] * g->x[i];
+        sum_T_initial += field->T[idx];
+    }
+    double com_initial = sum_Tx_initial / sum_T_initial;
+
+    ns_solver_params_t params = ns_solver_params_default();
+    params.alpha = 0.0001;  /* Small diffusivity to stabilize */
+
+    for (int step = 0; step < ADV_STEPS; step++) {
+        cfd_status_t status = energy_step_explicit(field, g, &params, ADV_DT);
+        TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
+    }
+
+    /* Measure final center of mass */
+    double sum_Tx_final = 0.0, sum_T_final = 0.0;
+    for (size_t i = 0; i < ADV_NX; i++) {
+        size_t idx = j_mid * ADV_NX + i;
+        sum_Tx_final += field->T[idx] * g->x[i];
+        sum_T_final += field->T[idx];
+    }
+    double com_final = sum_Tx_final / sum_T_final;
+
+    double expected_shift = ADV_U * ADV_STEPS * ADV_DT;
+    double actual_shift = com_final - com_initial;
+
+    printf("  Advection test: expected shift=%.4f, actual=%.4f\n",
+           expected_shift, actual_shift);
+
+    /* Advection should move the profile rightward */
+    TEST_ASSERT_TRUE_MESSAGE(actual_shift > 0.0,
+                              "Temperature profile should shift downstream");
+    /* Allow 50% tolerance on shift magnitude (central differencing has
+     * numerical diffusion and dispersion) */
+    TEST_ASSERT_TRUE_MESSAGE(actual_shift > expected_shift * 0.3,
+                              "Shift should be at least 30% of expected");
+
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
+/* ============================================================================
+ * TEST 3: Energy Equation Disabled
+ *
+ * When alpha=0, energy_step_explicit should be a no-op.
+ * ============================================================================ */
+
+static void test_energy_disabled(void) {
+    grid* g = grid_create(9, 9, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(9, 9, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Set non-uniform temperature */
+    for (size_t n = 0; n < 81; n++) {
+        field->u[n] = 1.0;
+        field->v[n] = 0.5;
+        field->rho[n] = 1.0;
+        field->T[n] = 300.0 + (double)n;
+    }
+
+    /* Save original temperature */
+    double T_orig[81];
+    memcpy(T_orig, field->T, 81 * sizeof(double));
+
+    ns_solver_params_t params = ns_solver_params_default();
+    params.alpha = 0.0;  /* Disabled */
+
+    cfd_status_t status = energy_step_explicit(field, g, &params, 0.001);
+    TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
+
+    /* Temperature should be unchanged */
+    for (size_t n = 0; n < 81; n++) {
+        TEST_ASSERT_DOUBLE_WITHIN(1e-15, T_orig[n], field->T[n]);
+    }
+
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
+/* ============================================================================
+ * TEST 4: Buoyancy Source Computation
+ * ============================================================================ */
+
+static void test_buoyancy_source(void) {
+    ns_solver_params_t params = ns_solver_params_default();
+    params.beta = 0.001;    /* Thermal expansion coefficient */
+    params.T_ref = 300.0;   /* Reference temperature */
+    params.gravity[0] = 0.0;
+    params.gravity[1] = -9.81;
+    params.gravity[2] = 0.0;
+
+    double source_u = 0.0, source_v = 0.0, source_w = 0.0;
+
+    /* Hot fluid (T > T_ref): should produce upward force (positive v for g_y < 0) */
+    energy_compute_buoyancy(310.0, &params, &source_u, &source_v, &source_w);
+
+    /* F = -beta * (T - T_ref) * g
+     * source_v = -0.001 * (310 - 300) * (-9.81) = +0.0981 */
+    printf("  Buoyancy: source_u=%.6f, source_v=%.6f, source_w=%.6f\n",
+           source_u, source_v, source_w);
+
+    TEST_ASSERT_DOUBLE_WITHIN(1e-10, 0.0, source_u);
+    TEST_ASSERT_DOUBLE_WITHIN(1e-6, 0.0981, source_v);
+    TEST_ASSERT_DOUBLE_WITHIN(1e-10, 0.0, source_w);
+
+    /* Cold fluid (T < T_ref): should produce downward force */
+    source_u = 0.0; source_v = 0.0; source_w = 0.0;
+    energy_compute_buoyancy(290.0, &params, &source_u, &source_v, &source_w);
+
+    /* source_v = -0.001 * (290 - 300) * (-9.81) = -0.0981 */
+    TEST_ASSERT_DOUBLE_WITHIN(1e-6, -0.0981, source_v);
+
+    /* Zero beta: no buoyancy */
+    params.beta = 0.0;
+    source_u = 0.0; source_v = 0.0; source_w = 0.0;
+    energy_compute_buoyancy(500.0, &params, &source_u, &source_v, &source_w);
+    TEST_ASSERT_DOUBLE_WITHIN(1e-15, 0.0, source_u);
+    TEST_ASSERT_DOUBLE_WITHIN(1e-15, 0.0, source_v);
+    TEST_ASSERT_DOUBLE_WITHIN(1e-15, 0.0, source_w);
+}
+
+/* ============================================================================
+ * TEST 5: Backward Compatibility
+ *
+ * Existing solvers should work identically when alpha=0 (default).
+ * Run a projection step with default params and verify no NaN/crash.
+ * ============================================================================ */
+
+static void test_backward_compatibility(void) {
+    grid* g = grid_create(17, 17, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(17, 17, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Initialize with quiescent flow */
+    for (size_t j = 0; j < 17; j++) {
+        for (size_t i = 0; i < 17; i++) {
+            size_t idx = j * 17 + i;
+            field->u[idx] = 0.0;
+            field->v[idx] = 0.0;
+            field->w[idx] = 0.0;
+            field->p[idx] = 1.0;
+            field->rho[idx] = 1.0;
+            field->T[idx] = 300.0;
+        }
+    }
+
+    ns_solver_params_t params = ns_solver_params_default();
+    params.dt = 0.001;
+    params.max_iter = 1;
+    params.source_func = NULL;
+    params.source_amplitude_u = 0.0;
+    params.source_amplitude_v = 0.0;
+    /* alpha=0 by default, so energy equation is disabled */
+
+    ns_solver_registry_t* registry = cfd_registry_create();
+    TEST_ASSERT_NOT_NULL(registry);
+    cfd_registry_register_defaults(registry);
+
+    ns_solver_t* solver = cfd_solver_create(registry, NS_SOLVER_TYPE_PROJECTION);
+    TEST_ASSERT_NOT_NULL(solver);
+
+    cfd_status_t init_status = solver_init(solver, g, &params);
+    TEST_ASSERT_EQUAL(CFD_SUCCESS, init_status);
+
+    ns_solver_stats_t stats = ns_solver_stats_default();
+    cfd_status_t step_status = solver_step(solver, field, g, &params, &stats);
+    TEST_ASSERT_EQUAL(CFD_SUCCESS, step_status);
+
+    /* Temperature should remain at 300 (energy equation disabled) */
+    for (size_t n = 0; n < 17 * 17; n++) {
+        TEST_ASSERT_DOUBLE_WITHIN(1e-10, 300.0, field->T[n]);
+    }
+
+    solver_destroy(solver);
+    cfd_registry_destroy(registry);
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
+/* ============================================================================
+ * MAIN
+ * ============================================================================ */
+
+int main(void) {
+    UNITY_BEGIN();
+    RUN_TEST(test_pure_diffusion);
+    RUN_TEST(test_pure_advection);
+    RUN_TEST(test_energy_disabled);
+    RUN_TEST(test_buoyancy_source);
+    RUN_TEST(test_backward_compatibility);
+    return UNITY_END();
+}
diff --git a/tests/validation/test_natural_convection.c b/tests/validation/test_natural_convection.c
new file mode 100644
index 00000000..668cbdfc
--- /dev/null
+++ b/tests/validation/test_natural_convection.c
@@ -0,0 +1,230 @@
+/**
+ * @file test_natural_convection.c
+ * @brief Natural convection validation test (differentially heated cavity)
+ *
+ * Tests the coupled energy equation + Boussinesq buoyancy using the
+ * de Vahl Davis (1983) benchmark: a square cavity with:
+ *   - Hot left wall (T = T_hot)
+ *   - Cold right wall (T = T_cold)
+ *   - Insulated (adiabatic) top and bottom walls
+ *   - No-slip velocity BCs on all walls
+ *
+ * At steady state, natural convection creates a clockwise circulation
+ * (for gravity pointing downward). The Rayleigh number is:
+ *   Ra = g * beta * dT * L^3 / (nu * alpha)
+ *
+ * For Ra = 1000, the benchmark Nusselt number is Nu ~ 1.118 (de Vahl Davis).
+ *
+ * This test runs a short simulation and verifies:
+ * 1. Flow develops in the correct direction (buoyancy-driven)
+ * 2. Temperature gradient is established between hot and cold walls
+ * 3. Maximum velocity is physically reasonable
+ * 4. No divergence (NaN/Inf)
+ */
+
+#include "cfd/core/cfd_init.h"
+#include "cfd/core/cfd_status.h"
+#include "cfd/core/grid.h"
+#include "cfd/core/memory.h"
+#include "cfd/solvers/navier_stokes_solver.h"
+#include "unity.h"
+
+#include <math.h>
+#include <stdio.h>
+
+/* ============================================================================
+ * CONSTANTS
+ * ============================================================================ */
+
+/* Domain: unit square cavity */
+#define NC_NX 21
+#define NC_NY 21
+#define NC_LENGTH 1.0
+
+/* Physical parameters for Ra = 1000 */
+#define NC_T_HOT    310.0
+#define NC_T_COLD   290.0
+#define NC_T_REF    300.0   /* (T_hot + T_cold) / 2 */
+#define NC_DT_TEMP  (NC_T_HOT - NC_T_COLD)  /* 20 K */
+#define NC_BETA     0.003333  /* 1/T_ref [1/K] */
+#define NC_G        9.81
+
+/* Derived: Ra = g * beta * dT * L^3 / (nu * alpha)
+ * For Ra = 1000 with the above beta, dT, L=1:
+ *   nu * alpha = g * beta * dT * L^3 / Ra = 9.81 * 0.003333 * 20 / 1000 = 6.54e-4
+ * Choose Pr = nu/alpha = 0.71 (air)
+ *   alpha = sqrt(6.54e-4 / 0.71) = 0.0303
+ *   nu = 0.71 * alpha = 0.0215 */
+#define NC_ALPHA    0.0303
+#define NC_NU       0.0215
+
+/* Time stepping */
+#define NC_DT       0.0005
+#define NC_STEPS    500
+
+/* ============================================================================
+ * TEST SETUP / TEARDOWN
+ * ============================================================================ */
+
+void setUp(void) { cfd_init(); }
+void tearDown(void) { cfd_finalize(); }
+
+/* ============================================================================
+ * Apply thermal boundary conditions for differentially heated cavity
+ * ============================================================================ */
+
+static void apply_cavity_bcs(flow_field* field, size_t nx, size_t ny) {
+    for (size_t j = 0; j < ny; j++) {
+        /* Left wall: hot */
+        field->T[j * nx + 0] = NC_T_HOT;
+        /* Right wall: cold */
+        field->T[j * nx + (nx - 1)] = NC_T_COLD;
+
+        /* No-slip on left and right walls */
+        field->u[j * nx + 0] = 0.0;
+        field->v[j * nx + 0] = 0.0;
+        field->u[j * nx + (nx - 1)] = 0.0;
+        field->v[j * nx + (nx - 1)] = 0.0;
+    }
+
+    for (size_t i = 0; i < nx; i++) {
+        /* Bottom wall: adiabatic (Neumann dT/dy=0 via copy from interior) */
+        field->T[0 * nx + i] = field->T[1 * nx + i];
+        /* Top wall: adiabatic */
+        field->T[(ny - 1) * nx + i] = field->T[(ny - 2) * nx + i];
+
+        /* No-slip on top and bottom walls */
+        field->u[0 * nx + i] = 0.0;
+        field->v[0 * nx + i] = 0.0;
+        field->u[(ny - 1) * nx + i] = 0.0;
+        field->v[(ny - 1) * nx + i] = 0.0;
+    }
+}
+
+/* ============================================================================
+ * TEST: Buoyancy-Driven Flow Development
+ * ============================================================================ */
+
+static void test_natural_convection_development(void) {
+    grid* g = grid_create(NC_NX, NC_NY, 1, 0.0, NC_LENGTH, 0.0, NC_LENGTH, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(NC_NX, NC_NY, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Initialize: quiescent flow, linear temperature profile */
+    for (size_t j = 0; j < NC_NY; j++) {
+        for (size_t i = 0; i < NC_NX; i++) {
+            size_t idx = j * NC_NX + i;
+            field->u[idx] = 0.0;
+            field->v[idx] = 0.0;
+            field->w[idx] = 0.0;
+            field->p[idx] = 1.0;
+            field->rho[idx] = 1.0;
+            /* Linear temperature from hot (left) to cold (right) */
+            double x = g->x[i];
+            field->T[idx] = NC_T_HOT - NC_DT_TEMP * (x / NC_LENGTH);
+        }
+    }
+
+    /* Set up solver params with energy equation and Boussinesq */
+    ns_solver_params_t params = ns_solver_params_default();
+    params.dt = NC_DT;
+    params.mu = NC_NU;
+    params.alpha = NC_ALPHA;
+    params.beta = NC_BETA;
+    params.T_ref = NC_T_REF;
+    params.gravity[0] = 0.0;
+    params.gravity[1] = -NC_G;
+    params.gravity[2] = 0.0;
+    params.max_iter = 1;
+    params.source_amplitude_u = 0.0;
+    params.source_amplitude_v = 0.0;
+
+    /* Use projection solver */
+    ns_solver_registry_t* registry = cfd_registry_create();
+    TEST_ASSERT_NOT_NULL(registry);
+    cfd_registry_register_defaults(registry);
+
+    ns_solver_t* solver = cfd_solver_create(registry, NS_SOLVER_TYPE_PROJECTION);
+    TEST_ASSERT_NOT_NULL(solver);
+
+    cfd_status_t init_status = solver_init(solver, g, &params);
+    TEST_ASSERT_EQUAL(CFD_SUCCESS, init_status);
+
+    /* Run simulation */
+    ns_solver_stats_t stats = ns_solver_stats_default();
+    for (int step = 0; step < NC_STEPS; step++) {
+        /* Apply thermal BCs before each step */
+        apply_cavity_bcs(field, NC_NX, NC_NY);
+
+        cfd_status_t status = solver_step(solver, field, g, &params, &stats);
+        TEST_ASSERT_EQUAL_MESSAGE(CFD_SUCCESS, status,
+                                   "Solver step should succeed");
+
+        /* Re-apply thermal BCs after step */
+        apply_cavity_bcs(field, NC_NX, NC_NY);
+    }
+
+    /* ---- Verify results ---- */
+
+    /* 1. Check that flow has developed (max velocity > 0) */
+    double max_vel = 0.0;
+    for (size_t n = 0; n < (size_t)(NC_NX * NC_NY); n++) {
+        double vel = sqrt(field->u[n] * field->u[n] + field->v[n] * field->v[n]);
+        if (vel > max_vel) max_vel = vel;
+    }
+    printf("  Max velocity: %.6f\n", max_vel);
+    TEST_ASSERT_TRUE_MESSAGE(max_vel > 1e-6,
+                              "Buoyancy should drive flow (max_vel > 0)");
+
+    /* 2. Check vertical velocity direction near hot wall (should be upward = positive v)
+     * and near cold wall (should be downward = negative v) */
+    size_t j_mid = NC_NY / 2;
+    double v_near_hot = field->v[j_mid * NC_NX + 2];   /* Near left (hot) wall */
+    double v_near_cold = field->v[j_mid * NC_NX + (NC_NX - 3)]; /* Near right (cold) wall */
+
+    printf("  v near hot wall: %.6f, v near cold wall: %.6f\n",
+           v_near_hot, v_near_cold);
+
+    TEST_ASSERT_TRUE_MESSAGE(v_near_hot > 0.0,
+                              "Hot fluid should rise (positive v near hot wall)");
+    TEST_ASSERT_TRUE_MESSAGE(v_near_cold < 0.0,
+                              "Cold fluid should sink (negative v near cold wall)");
+
+    /* 3. Check temperature gradient is maintained */
+    double T_left = field->T[j_mid * NC_NX + 0];
+    double T_right = field->T[j_mid * NC_NX + (NC_NX - 1)];
+    printf("  T_left=%.2f, T_right=%.2f\n", T_left, T_right);
+
+    TEST_ASSERT_DOUBLE_WITHIN(0.1, NC_T_HOT, T_left);
+    TEST_ASSERT_DOUBLE_WITHIN(0.1, NC_T_COLD, T_right);
+
+    /* 4. Check no NaN/Inf in any field */
+    for (size_t n = 0; n < (size_t)(NC_NX * NC_NY); n++) {
+        TEST_ASSERT_TRUE(isfinite(field->u[n]));
+        TEST_ASSERT_TRUE(isfinite(field->v[n]));
+        TEST_ASSERT_TRUE(isfinite(field->p[n]));
+        TEST_ASSERT_TRUE(isfinite(field->T[n]));
+    }
+
+    /* 5. Max velocity should be physically reasonable (not exploded) */
+    TEST_ASSERT_TRUE_MESSAGE(max_vel < 10.0,
+                              "Max velocity should be physically reasonable");
+
+    solver_destroy(solver);
+    cfd_registry_destroy(registry);
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
+/* ============================================================================
+ * MAIN
+ * ============================================================================ */
+
+int main(void) {
+    UNITY_BEGIN();
+    RUN_TEST(test_natural_convection_development);
+    return UNITY_END();
+}

From 6829103f3a725e6e551b6c59dffe80244ca0b5a8 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sun, 8 Mar 2026 07:19:18 +0200
Subject: [PATCH 02/20] Remove dead t_new allocation from explicit Euler solver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The energy equation was refactored to use energy_step_explicit() but the
old t_new temporary array was left behind — allocated, copied, and freed
without ever being written to with computed values.
---
 .../navier_stokes/cpu/solver_explicit_euler.c        | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index 801d3887..46ccc37c 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -367,11 +367,10 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     double* w_new = (double*)cfd_calloc(total, sizeof(double));
     double* p_new = (double*)cfd_calloc(total, sizeof(double));
     double* rho_new = (double*)cfd_calloc(total, sizeof(double));
-    double* t_new = (double*)cfd_calloc(total, sizeof(double));
 
-    if (!u_new || !v_new || !w_new || !p_new || !rho_new || !t_new) {
+    if (!u_new || !v_new || !w_new || !p_new || !rho_new) {
         cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-        cfd_free(p_new); cfd_free(rho_new); cfd_free(t_new);
+        cfd_free(p_new); cfd_free(rho_new);
         return CFD_ERROR_NOMEM;
     }
 
@@ -380,7 +379,6 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     memcpy(w_new, field->w, bytes);
     memcpy(p_new, field->p, bytes);
     memcpy(rho_new, field->rho, bytes);
-    memcpy(t_new, field->T, bytes);
 
     double conservative_dt = fmin(params->dt, DT_CONSERVATIVE_LIMIT);
 
@@ -533,7 +531,7 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
                                                                conservative_dt);
             if (energy_status != CFD_SUCCESS) {
                 cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-                cfd_free(p_new); cfd_free(rho_new); cfd_free(t_new);
+                cfd_free(p_new); cfd_free(rho_new);
                 return energy_status;
             }
         }
@@ -557,7 +555,7 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
         }
         if (has_nan) {
             cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-            cfd_free(p_new); cfd_free(rho_new); cfd_free(t_new);
+            cfd_free(p_new); cfd_free(rho_new);
             cfd_set_error(CFD_ERROR_DIVERGED,
                           "NaN/Inf detected in explicit_euler step");
             return CFD_ERROR_DIVERGED;
@@ -565,7 +563,7 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     }
 
     cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-    cfd_free(p_new); cfd_free(rho_new); cfd_free(t_new);
+    cfd_free(p_new); cfd_free(rho_new);
 
     return CFD_SUCCESS;
 }

From 603d0140bc87dad43da71fcb476d27f23f7a7591 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sun, 8 Mar 2026 07:26:43 +0200
Subject: [PATCH 03/20] Pass physical time to heat_source_func in energy solver

The heat source callback received hardcoded 0.0 for the time argument,
making time-varying heat sources silently incorrect. Add a time parameter
to energy_step_explicit and pass iter*dt from all three NS solvers,
matching how compute_source_terms already handles momentum sources.
---
 lib/include/cfd/solvers/energy_solver.h                   | 3 ++-
 lib/src/solvers/energy/cpu/energy_solver.c                | 4 ++--
 lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c | 3 ++-
 lib/src/solvers/navier_stokes/cpu/solver_projection.c     | 3 ++-
 lib/src/solvers/navier_stokes/cpu/solver_rk2.c            | 3 ++-
 tests/solvers/energy/test_energy_solver.c                 | 7 ++++---
 6 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/lib/include/cfd/solvers/energy_solver.h b/lib/include/cfd/solvers/energy_solver.h
index c98abbce..94dbcf12 100644
--- a/lib/include/cfd/solvers/energy_solver.h
+++ b/lib/include/cfd/solvers/energy_solver.h
@@ -37,11 +37,12 @@ extern "C" {
  * @param grid   Computational grid
  * @param params Solver parameters (alpha, heat_source_func, dt)
  * @param dt     Time step size
+ * @param time   Current physical time (passed to heat_source_func)
  * @return CFD_SUCCESS, or CFD_ERROR_DIVERGED if NaN detected in T
  */
 CFD_LIBRARY_EXPORT cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
                                                       const ns_solver_params_t* params,
-                                                      double dt);
+                                                      double dt, double time);
 
 /**
  * Compute Boussinesq buoyancy source terms from the temperature field.
diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index 1687f9e6..998b55d1 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -20,7 +20,7 @@
 
 cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
                                    const ns_solver_params_t* params,
-                                   double dt) {
+                                   double dt, double time) {
     /* Skip when energy equation is disabled */
     if (params->alpha <= 0.0) {
         return CFD_SUCCESS;
@@ -81,7 +81,7 @@ cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
                     double x = grid->x[i];
                     double y = grid->y[j];
                     double z = (nz > 1 && grid->z) ? grid->z[k] : 0.0;
-                    Q = params->heat_source_func(x, y, z, 0.0,
+                    Q = params->heat_source_func(x, y, z, time,
                                                   params->heat_source_context);
                 }
 
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index 46ccc37c..77d315bc 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -528,7 +528,8 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
         /* Energy equation: advance temperature using updated velocity */
         {
             cfd_status_t energy_status = energy_step_explicit(field, grid, params,
-                                                               conservative_dt);
+                                                               conservative_dt,
+                                                               iter * conservative_dt);
             if (energy_status != CFD_SUCCESS) {
                 cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
                 cfd_free(p_new); cfd_free(rho_new);
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_projection.c b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
index fcba6ae6..013b4e5c 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_projection.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
@@ -248,7 +248,8 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
 
         /* Energy equation: advance temperature after velocity correction */
         {
-            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt);
+            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt,
+                                                               iter * dt);
             if (energy_status != CFD_SUCCESS) {
                 cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
                 cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
index db5e16ab..c672083b 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
@@ -326,7 +326,8 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
 
         /* Energy equation: advance temperature after RK2 velocity update */
         {
-            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt);
+            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt,
+                                                               iter * dt);
             if (energy_status != CFD_SUCCESS) {
                 status = energy_status;
                 goto cleanup;
diff --git a/tests/solvers/energy/test_energy_solver.c b/tests/solvers/energy/test_energy_solver.c
index 9c88890d..5a5ff4b5 100644
--- a/tests/solvers/energy/test_energy_solver.c
+++ b/tests/solvers/energy/test_energy_solver.c
@@ -90,7 +90,7 @@ static void test_pure_diffusion(void) {
 
     /* Run diffusion steps */
     for (int step = 0; step < DIFF_STEPS; step++) {
-        cfd_status_t status = energy_step_explicit(field, g, &params, DIFF_DT);
+        cfd_status_t status = energy_step_explicit(field, g, &params, DIFF_DT, step * DIFF_DT);
         if (status != CFD_SUCCESS) {
             printf("  DIVERGED at step %d, status=%d\n", step, status);
             /* Print min/max T for debugging */
@@ -190,7 +190,8 @@ static void test_pure_advection(void) {
     params.alpha = 0.0001;  /* Small diffusivity to stabilize */
 
     for (int step = 0; step < ADV_STEPS; step++) {
-        cfd_status_t status = energy_step_explicit(field, g, &params, ADV_DT);
+        cfd_status_t status = energy_step_explicit(field, g, &params, ADV_DT,
+                                                     step * ADV_DT);
         TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
     }
 
@@ -250,7 +251,7 @@ static void test_energy_disabled(void) {
     ns_solver_params_t params = ns_solver_params_default();
     params.alpha = 0.0;  /* Disabled */
 
-    cfd_status_t status = energy_step_explicit(field, g, &params, 0.001);
+    cfd_status_t status = energy_step_explicit(field, g, &params, 0.001, 0.0);
     TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
 
     /* Temperature should be unchanged */

From b1f51a1ee9dd2f04d8b491a2b92e59d13bfc1469 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sun, 8 Mar 2026 07:39:29 +0200
Subject: [PATCH 04/20] Populate max_temperature in solver stats after each
 step

The field was added to ns_solver_stats_t but never written, always
reporting 0.0. Now computed from field->T in all solver wrappers
(solver_registry.c and avx2 rk2) alongside max_velocity/max_pressure.
---
 lib/src/api/solver_registry.c                 | 29 +++++++++++++++++++
 .../navier_stokes/avx2/solver_rk2_avx2.c      | 16 ++++++++++
 2 files changed, 45 insertions(+)

diff --git a/lib/src/api/solver_registry.c b/lib/src/api/solver_registry.c
index f8cf2571..59d6b131 100644
--- a/lib/src/api/solver_registry.c
+++ b/lib/src/api/solver_registry.c
@@ -24,6 +24,19 @@
 #include <omp.h>
 #endif
 
+/* Compute max |T| for stats reporting */
+static double compute_max_temperature(const flow_field* field) {
+    double max_t = 0.0;
+    if (field->T) {
+        size_t n = field->nx * field->ny * field->nz;
+        for (size_t i = 0; i < n; i++) {
+            double at = fabs(field->T[i]);
+            if (at > max_t) max_t = at;
+        }
+    }
+    return max_t;
+}
+
 // Forward declarations for internal solver implementations
 // These are not part of the public API
 cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
@@ -530,6 +543,7 @@ static cfd_status_t explicit_euler_step(ns_solver_t* solver, flow_field* field,
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
 
     return CFD_SUCCESS;
@@ -561,6 +575,7 @@ static cfd_status_t explicit_euler_solve(ns_solver_t* solver, flow_field* field,
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
 
     return CFD_SUCCESS;
@@ -617,6 +632,7 @@ static cfd_status_t rk2_step(ns_solver_t* solver, flow_field* field, const grid*
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
 
     return status;
@@ -643,6 +659,7 @@ static cfd_status_t rk2_solve(ns_solver_t* solver, flow_field* field, const grid
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
 
     return status;
@@ -721,6 +738,7 @@ static cfd_status_t explicit_euler_simd_solve(ns_solver_t* solver, flow_field* f
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -805,6 +823,7 @@ static cfd_status_t projection_step(ns_solver_t* solver, flow_field* field, cons
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -837,6 +856,7 @@ static cfd_status_t projection_solve(ns_solver_t* solver, flow_field* field, con
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -894,6 +914,7 @@ static cfd_status_t projection_simd_solve(ns_solver_t* solver, flow_field* field
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -1002,6 +1023,7 @@ static cfd_status_t gpu_euler_step(ns_solver_t* solver, flow_field* field, const
                     }
                     stats->max_velocity = max_vel;
                     stats->max_pressure = max_p;
+                    stats->max_temperature = compute_max_temperature(field);
                 }
                 return CFD_SUCCESS;
             }
@@ -1046,6 +1068,7 @@ static cfd_status_t gpu_euler_solve(ns_solver_t* solver, flow_field* field, cons
             }
             stats->max_velocity = max_vel;
             stats->max_pressure = max_p;
+            stats->max_temperature = compute_max_temperature(field);
         }
         return CFD_SUCCESS;
     }
@@ -1172,6 +1195,7 @@ static cfd_status_t explicit_euler_omp_step(ns_solver_t* solver, flow_field* fie
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -1199,6 +1223,7 @@ static cfd_status_t explicit_euler_omp_solve(ns_solver_t* solver, flow_field* fi
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -1261,6 +1286,7 @@ static cfd_status_t rk2_omp_step(ns_solver_t* solver, flow_field* field, const g
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return status;
 }
@@ -1294,6 +1320,7 @@ static cfd_status_t rk2_omp_solve(ns_solver_t* solver, flow_field* field, const
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return status;
 }
@@ -1385,6 +1412,7 @@ static cfd_status_t projection_omp_step(ns_solver_t* solver, flow_field* field,
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
@@ -1415,6 +1443,7 @@ static cfd_status_t projection_omp_solve(ns_solver_t* solver, flow_field* field,
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        stats->max_temperature = compute_max_temperature(field);
     }
     return CFD_SUCCESS;
 }
diff --git a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
index dfbb64b8..ffd5ab68 100644
--- a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
+++ b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
@@ -801,6 +801,14 @@ cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* g
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        double max_t = 0.0;
+        if (field->T) {
+            for (ks = 0; ks < n_s; ks++) {
+                double at = fabs(field->T[ks]);
+                if (at > max_t) max_t = at;
+            }
+        }
+        stats->max_temperature = max_t;
     }
 
     return status;
@@ -848,6 +856,14 @@ cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid*
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
+        double max_t = 0.0;
+        if (field->T) {
+            for (ks = 0; ks < n_s; ks++) {
+                double at = fabs(field->T[ks]);
+                if (at > max_t) max_t = at;
+            }
+        }
+        stats->max_temperature = max_t;
     }
     return status;
 #endif

From 7cc828a5fa4d3c2a6bfc7c9c45b43d47663f5cbc Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sun, 8 Mar 2026 07:40:45 +0200
Subject: [PATCH 05/20] Add test for heat_source_func in energy solver

Exercises the heat source callback path with a uniform Q=1.0 source on
a quiescent flow. Verifies the interior temperature rises by Q*N*dt,
confirming the callback is invoked and the returned value is applied.
---
 tests/solvers/energy/test_energy_solver.c | 70 +++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/tests/solvers/energy/test_energy_solver.c b/tests/solvers/energy/test_energy_solver.c
index 5a5ff4b5..a91b3675 100644
--- a/tests/solvers/energy/test_energy_solver.c
+++ b/tests/solvers/energy/test_energy_solver.c
@@ -366,6 +366,75 @@ static void test_backward_compatibility(void) {
     grid_destroy(g);
 }
 
+/* ============================================================================
+ * TEST 6: Heat Source Function
+ *
+ * Apply a uniform heat source Q=1.0 to a quiescent flow with zero initial
+ * temperature. After N steps, T should increase by approximately Q*N*dt
+ * in the interior (no advection, diffusion of uniform field is zero).
+ * ============================================================================ */
+
+#define HS_NX 17
+#define HS_NY 17
+#define HS_DT 0.001
+#define HS_STEPS 50
+#define HS_Q 1.0
+
+static double uniform_heat_source(double x, double y, double z, double t,
+                                   void* context) {
+    (void)x; (void)y; (void)z; (void)t; (void)context;
+    return HS_Q;
+}
+
+static void test_heat_source(void) {
+    grid* g = grid_create(HS_NX, HS_NY, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
+    TEST_ASSERT_NOT_NULL(g);
+    grid_initialize_uniform(g);
+
+    flow_field* field = flow_field_create(HS_NX, HS_NY, 1);
+    TEST_ASSERT_NOT_NULL(field);
+
+    /* Quiescent flow, uniform T=0, rho=1 */
+    for (size_t n = 0; n < (size_t)(HS_NX * HS_NY); n++) {
+        field->u[n] = 0.0;
+        field->v[n] = 0.0;
+        field->w[n] = 0.0;
+        field->p[n] = 1.0;
+        field->rho[n] = 1.0;
+        field->T[n] = 0.0;
+    }
+
+    ns_solver_params_t params = ns_solver_params_default();
+    params.alpha = 0.01;  /* Need alpha > 0 to enable energy equation */
+    params.heat_source_func = uniform_heat_source;
+    params.heat_source_context = NULL;
+
+    for (int step = 0; step < HS_STEPS; step++) {
+        cfd_status_t status = energy_step_explicit(field, g, &params, HS_DT,
+                                                    step * HS_DT);
+        TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
+    }
+
+    /* Interior points should have T ≈ Q * N * dt = 1.0 * 50 * 0.001 = 0.05
+     * (boundary points stay at 0 since the stencil doesn't update them) */
+    double expected = HS_Q * HS_STEPS * HS_DT;
+    size_t ci = HS_NX / 2;
+    size_t cj = HS_NY / 2;
+    double T_center = field->T[cj * HS_NX + ci];
+
+    printf("  Heat source test: T_center=%.6f, expected≈%.6f\n",
+           T_center, expected);
+
+    /* Uniform field has zero Laplacian, so diffusion doesn't contribute.
+     * The only driver is Q. Allow 10% tolerance for boundary effects. */
+    TEST_ASSERT_TRUE_MESSAGE(T_center > 0.0,
+                              "Temperature should increase with heat source");
+    TEST_ASSERT_DOUBLE_WITHIN(expected * 0.10, expected, T_center);
+
+    flow_field_destroy(field);
+    grid_destroy(g);
+}
+
 /* ============================================================================
  * MAIN
  * ============================================================================ */
@@ -377,5 +446,6 @@ int main(void) {
     RUN_TEST(test_energy_disabled);
     RUN_TEST(test_buoyancy_source);
     RUN_TEST(test_backward_compatibility);
+    RUN_TEST(test_heat_source);
     return UNITY_END();
 }

From c346a9507495c7aaadb360da7a53e64bfe231f31 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 18:44:09 +0300
Subject: [PATCH 06/20] Use plain max T (not fabs) and merge into OMP-parallel
 loop

max_temperature used fabs which misreports when T contains negative
perturbations. Switch to plain max to match the documented semantics.
Also merge the temperature scan into the existing velocity/pressure
reduction loop to avoid a second sequential pass over the array.
---
 lib/src/api/solver_registry.c                 |  8 +++----
 .../navier_stokes/avx2/solver_rk2_avx2.c      | 24 +++++--------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/lib/src/api/solver_registry.c b/lib/src/api/solver_registry.c
index 59d6b131..d2bc7843 100644
--- a/lib/src/api/solver_registry.c
+++ b/lib/src/api/solver_registry.c
@@ -24,14 +24,14 @@
 #include <omp.h>
 #endif
 
-/* Compute max |T| for stats reporting */
+/* Compute max T for stats reporting */
 static double compute_max_temperature(const flow_field* field) {
     double max_t = 0.0;
     if (field->T) {
         size_t n = field->nx * field->ny * field->nz;
-        for (size_t i = 0; i < n; i++) {
-            double at = fabs(field->T[i]);
-            if (at > max_t) max_t = at;
+        max_t = field->T[0];
+        for (size_t i = 1; i < n; i++) {
+            if (field->T[i] > max_t) max_t = field->T[i];
         }
     }
     return max_t;
diff --git a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
index ffd5ab68..209a99cf 100644
--- a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
+++ b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
@@ -785,11 +785,11 @@ cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* g
 
     if (stats) {
         stats->iterations = 1;
-        double max_vel = 0.0, max_p = 0.0;
+        double max_vel = 0.0, max_p = 0.0, max_t = 0.0;
         ptrdiff_t n_s = (ptrdiff_t)(field->nx * field->ny * field->nz);
         ptrdiff_t ks;
 #if defined(_OPENMP) && (_OPENMP >= 201107)
-        #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
+        #pragma omp parallel for reduction(max: max_vel, max_p, max_t) schedule(static)
 #endif
         for (ks = 0; ks < n_s; ks++) {
             double vel = sqrt(field->u[ks] * field->u[ks] +
@@ -798,16 +798,10 @@ cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* g
             if (vel > max_vel) max_vel = vel;
             double ap = fabs(field->p[ks]);
             if (ap > max_p) max_p = ap;
+            if (field->T && field->T[ks] > max_t) max_t = field->T[ks];
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
-        double max_t = 0.0;
-        if (field->T) {
-            for (ks = 0; ks < n_s; ks++) {
-                double at = fabs(field->T[ks]);
-                if (at > max_t) max_t = at;
-            }
-        }
         stats->max_temperature = max_t;
     }
 
@@ -840,11 +834,11 @@ cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid*
 
     if (stats) {
         stats->iterations = params->max_iter;
-        double max_vel = 0.0, max_p = 0.0;
+        double max_vel = 0.0, max_p = 0.0, max_t = 0.0;
         ptrdiff_t n_s = (ptrdiff_t)(field->nx * field->ny * field->nz);
         ptrdiff_t ks;
 #if defined(_OPENMP) && (_OPENMP >= 201107)
-        #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
+        #pragma omp parallel for reduction(max: max_vel, max_p, max_t) schedule(static)
 #endif
         for (ks = 0; ks < n_s; ks++) {
             double vel = sqrt(field->u[ks] * field->u[ks] +
@@ -853,16 +847,10 @@ cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid*
             if (vel > max_vel) max_vel = vel;
             double ap = fabs(field->p[ks]);
             if (ap > max_p) max_p = ap;
+            if (field->T && field->T[ks] > max_t) max_t = field->T[ks];
         }
         stats->max_velocity = max_vel;
         stats->max_pressure = max_p;
-        double max_t = 0.0;
-        if (field->T) {
-            for (ks = 0; ks < n_s; ks++) {
-                double at = fabs(field->T[ks]);
-                if (at > max_t) max_t = at;
-            }
-        }
         stats->max_temperature = max_t;
     }
     return status;

From 4e2b6c4673516ba89a37c2486b3166d6e7c16de6 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 18:47:00 +0300
Subject: [PATCH 07/20] Replace magic number 81 with named constants in energy
 disabled test

Use DIS_NX, DIS_NY, DIS_TOTAL for clarity and maintainability,
matching the pattern used by other tests in this file.
---
 tests/solvers/energy/test_energy_solver.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/solvers/energy/test_energy_solver.c b/tests/solvers/energy/test_energy_solver.c
index a91b3675..418b1573 100644
--- a/tests/solvers/energy/test_energy_solver.c
+++ b/tests/solvers/energy/test_energy_solver.c
@@ -228,16 +228,20 @@ static void test_pure_advection(void) {
  * When alpha=0, energy_step_explicit should be a no-op.
  * ============================================================================ */
 
+#define DIS_NX 9
+#define DIS_NY 9
+#define DIS_TOTAL ((size_t)(DIS_NX) * DIS_NY)
+
 static void test_energy_disabled(void) {
-    grid* g = grid_create(9, 9, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
+    grid* g = grid_create(DIS_NX, DIS_NY, 1, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0);
     TEST_ASSERT_NOT_NULL(g);
     grid_initialize_uniform(g);
 
-    flow_field* field = flow_field_create(9, 9, 1);
+    flow_field* field = flow_field_create(DIS_NX, DIS_NY, 1);
     TEST_ASSERT_NOT_NULL(field);
 
     /* Set non-uniform temperature */
-    for (size_t n = 0; n < 81; n++) {
+    for (size_t n = 0; n < DIS_TOTAL; n++) {
         field->u[n] = 1.0;
         field->v[n] = 0.5;
         field->rho[n] = 1.0;
@@ -245,8 +249,8 @@ static void test_energy_disabled(void) {
     }
 
     /* Save original temperature */
-    double T_orig[81];
-    memcpy(T_orig, field->T, 81 * sizeof(double));
+    double T_orig[DIS_TOTAL];
+    memcpy(T_orig, field->T, DIS_TOTAL * sizeof(double));
 
     ns_solver_params_t params = ns_solver_params_default();
     params.alpha = 0.0;  /* Disabled */
@@ -255,7 +259,7 @@ static void test_energy_disabled(void) {
     TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
 
     /* Temperature should be unchanged */
-    for (size_t n = 0; n < 81; n++) {
+    for (size_t n = 0; n < DIS_TOTAL; n++) {
         TEST_ASSERT_DOUBLE_WITHIN(1e-15, T_orig[n], field->T[n]);
     }
 

From 2d36f774b9fa40a89f41ee39892f8f6913150f75 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 19:07:29 +0300
Subject: [PATCH 08/20] Guard OMP/AVX2 backends against unsupported energy
 params

OMP and SIMD backends don't call energy_step_explicit or
energy_compute_buoyancy, so alpha>0 or beta!=0 would silently produce
wrong results. Now return CFD_ERROR_UNSUPPORTED with a clear message.
Also document the limitation on the params struct.
---
 .../cfd/solvers/navier_stokes_solver.h        |  6 ++-
 lib/src/api/solver_registry.c                 | 41 ++++++++++++++++++-
 .../navier_stokes/avx2/solver_rk2_avx2.c      | 10 +++++
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/lib/include/cfd/solvers/navier_stokes_solver.h b/lib/include/cfd/solvers/navier_stokes_solver.h
index 6455afab..9df26c47 100644
--- a/lib/include/cfd/solvers/navier_stokes_solver.h
+++ b/lib/include/cfd/solvers/navier_stokes_solver.h
@@ -112,7 +112,11 @@ typedef struct {
     ns_source_func_t source_func;  /**< Custom source term function pointer */
     void* source_context;           /**< User context for source_func */
 
-    /* Energy equation parameters (alpha > 0 enables energy equation) */
+    /* Energy equation parameters (alpha > 0 enables energy equation).
+     * NOTE: Energy coupling (temperature advection/diffusion and Boussinesq
+     * buoyancy) is currently only implemented in scalar CPU backends
+     * (explicit_euler, projection, rk2). OMP, AVX2, and GPU backends will
+     * return CFD_ERROR_UNSUPPORTED if alpha > 0 or beta != 0. */
     double alpha;           /**< Thermal diffusivity k/(rho*cp) [m^2/s], 0 = disabled */
     double beta;            /**< Thermal expansion coefficient [1/K] (Boussinesq) */
     double T_ref;           /**< Reference temperature [K] for Boussinesq */
diff --git a/lib/src/api/solver_registry.c b/lib/src/api/solver_registry.c
index d2bc7843..27c4ee2a 100644
--- a/lib/src/api/solver_registry.c
+++ b/lib/src/api/solver_registry.c
@@ -37,6 +37,17 @@ static double compute_max_temperature(const flow_field* field) {
     return max_t;
 }
 
+/* Check if energy equation params are set on a backend that doesn't support them */
+static cfd_status_t check_energy_unsupported(const ns_solver_params_t* params) {
+    if (params->alpha > 0.0 || params->beta != 0.0) {
+        cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                      "Energy equation (alpha/beta) not supported by this backend; "
+                      "use a scalar CPU solver (explicit_euler, projection, rk2)");
+        return CFD_ERROR_UNSUPPORTED;
+    }
+    return CFD_SUCCESS;
+}
+
 // Forward declarations for internal solver implementations
 // These are not part of the public API
 cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
@@ -710,11 +721,21 @@ static ns_solver_t* create_rk2_optimized_solver(void) {
     return s;
 }
 
+static cfd_status_t explicit_euler_simd_step_guarded(ns_solver_t* solver, flow_field* field,
+                                                      const grid* grid, const ns_solver_params_t* params,
+                                                      ns_solver_stats_t* stats) {
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
+    return explicit_euler_simd_step(solver, field, grid, params, stats);
+}
+
 static cfd_status_t explicit_euler_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
                                               const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     if (!solver || !field || !grid || !params) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     for (int i = 0; i < params->max_iter; i++) {
         cfd_status_t status = explicit_euler_simd_step(solver, field, grid, params, NULL);
@@ -757,7 +778,7 @@ static ns_solver_t* create_explicit_euler_optimized_solver(void) {
 
     s->init = explicit_euler_simd_init;
     s->destroy = explicit_euler_simd_destroy;
-    s->step = explicit_euler_simd_step;
+    s->step = explicit_euler_simd_step_guarded;
     s->solve = explicit_euler_simd_solve;
     s->apply_boundary = NULL;
     s->compute_dt = NULL;
@@ -883,11 +904,21 @@ static ns_solver_t* create_projection_solver(void) {
     return s;
 }
 
+static cfd_status_t projection_simd_step_guarded(ns_solver_t* solver, flow_field* field,
+                                                   const grid* grid, const ns_solver_params_t* params,
+                                                   ns_solver_stats_t* stats) {
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
+    return projection_simd_step(solver, field, grid, params, stats);
+}
+
 static cfd_status_t projection_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
                                           const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     if (!solver || !field || !grid || !params) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     // Use the step function which utilizes the persistent context
     for (int i = 0; i < params->max_iter; i++) {
@@ -933,7 +964,7 @@ static ns_solver_t* create_projection_optimized_solver(void) {
 
     s->init = projection_simd_init;
     s->destroy = projection_simd_destroy;
-    s->step = projection_simd_step;
+    s->step = projection_simd_step_guarded;
     s->solve = projection_simd_solve;
     s->apply_boundary = NULL;
     s->compute_dt = NULL;
@@ -1175,6 +1206,8 @@ static cfd_status_t explicit_euler_omp_step(ns_solver_t* solver, flow_field* fie
     if (field->nx < 3 || field->ny < 3) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     ns_solver_params_t step_params = *params;
     step_params.max_iter = 1;
@@ -1260,6 +1293,8 @@ static cfd_status_t rk2_omp_step(ns_solver_t* solver, flow_field* field, const g
     if (field->nx < 3 || field->ny < 3) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     ns_solver_params_t step_params = *params;
     step_params.max_iter = 1;
@@ -1383,6 +1418,8 @@ static cfd_status_t projection_omp_step(ns_solver_t* solver, flow_field* field,
     if (field->nx < 3 || field->ny < 3) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     ns_solver_params_t step_params = *params;
     step_params.max_iter = 1;
diff --git a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
index 209a99cf..86d2b84e 100644
--- a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
+++ b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
@@ -769,6 +769,11 @@ cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* g
     if (!solver || !solver->context || !field || !g || !params) {
         return CFD_ERROR_INVALID;
     }
+    if (params->alpha > 0.0 || params->beta != 0.0) {
+        cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                      "Energy equation not supported by AVX2 backend");
+        return CFD_ERROR_UNSUPPORTED;
+    }
     if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
         return CFD_ERROR_INVALID;
     }
@@ -819,6 +824,11 @@ cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid*
     if (!solver || !solver->context || !field || !g || !params) {
         return CFD_ERROR_INVALID;
     }
+    if (params->alpha > 0.0 || params->beta != 0.0) {
+        cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                      "Energy equation not supported by AVX2 backend");
+        return CFD_ERROR_UNSUPPORTED;
+    }
     if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
         return CFD_ERROR_INVALID;
     }

From d29f06faeacfc6982dec3cbb13accf53c36781d4 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 21:23:58 +0300
Subject: [PATCH 09/20] Validate uniform dz spacing in energy solver for 3D
 grids

The energy solver uses grid->dz[0] as a constant for all z-derivatives.
Reject non-uniform dz grids early with CFD_ERROR_UNSUPPORTED rather than
silently computing incorrect z-derivatives on stretched grids.
---
 lib/src/solvers/energy/cpu/energy_solver.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index 998b55d1..ddd7c0d6 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -33,6 +33,24 @@ cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
     size_t total = plane * nz;
     double alpha = params->alpha;
 
+    /* Validate uniform dz for 3D (energy solver uses constant z-spacing) */
+    if (nz > 1) {
+        if (!grid->dz) {
+            cfd_set_error(CFD_ERROR_INVALID,
+                          "energy_solver: missing dz for 3D energy solve");
+            return CFD_ERROR_INVALID;
+        }
+        const double dz0 = grid->dz[0];
+        const double tol = 1e-12 * fmax(1.0, fabs(dz0));
+        for (size_t k = 1; k < nz; k++) {
+            if (fabs(grid->dz[k] - dz0) > tol) {
+                cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                              "energy_solver: non-uniform dz not supported");
+                return CFD_ERROR_UNSUPPORTED;
+            }
+        }
+    }
+
     /* Branch-free 3D constants */
     size_t stride_z = (nz > 1) ? plane : 0;
     size_t k_start  = (nz > 1) ? 1 : 0;

From 3e464746637dfa01282bd071c7aa4e567f54266c Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 21:35:40 +0300
Subject: [PATCH 10/20] Use cfd_set_error instead of fprintf for NaN detection
 in energy solver

Consistent with other solvers that set a structured error message via
cfd_set_error when returning CFD_ERROR_DIVERGED. Also remove now-unused
stdio.h include.
---
 lib/src/solvers/energy/cpu/energy_solver.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index ddd7c0d6..0a447730 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -15,7 +15,6 @@
 #include "cfd/core/memory.h"
 
 #include <math.h>
-#include <stdio.h>
 #include <string.h>
 
 cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
@@ -113,10 +112,8 @@ cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
     /* Check for NaN/Inf */
     for (size_t n = 0; n < total; n++) {
         if (!isfinite(T_new[n])) {
-            size_t ni = n % nx;
-            size_t nj = (n / nx) % ny;
-            fprintf(stderr, "energy_solver: NaN/Inf at idx=%zu (i=%zu,j=%zu) T_new=%e field->T=%e\n",
-                    n, ni, nj, T_new[n], field->T[n]);
+            cfd_set_error(CFD_ERROR_DIVERGED,
+                          "NaN/Inf detected in energy_step_explicit");
             cfd_free(T_new);
             return CFD_ERROR_DIVERGED;
         }

From 7309a50987b0d29710a2dce59839d2df6efb6950 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Fri, 3 Apr 2026 21:52:36 +0300
Subject: [PATCH 11/20] Guard GPU backends against unsupported energy equation
 params

GPU Euler and projection solvers don't implement energy coupling.
Return CFD_ERROR_UNSUPPORTED when alpha>0 or beta!=0 to prevent
silently ignoring temperature advection/diffusion and buoyancy.
---
 lib/src/api/solver_registry.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/lib/src/api/solver_registry.c b/lib/src/api/solver_registry.c
index 27c4ee2a..c396d71a 100644
--- a/lib/src/api/solver_registry.c
+++ b/lib/src/api/solver_registry.c
@@ -1026,6 +1026,8 @@ static cfd_status_t gpu_euler_step(ns_solver_t* solver, flow_field* field, const
     if (field->nx < 3 || field->ny < 3) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     ns_solver_params_t step_params = *params;
     step_params.max_iter = 1;
@@ -1077,6 +1079,8 @@ static cfd_status_t gpu_euler_solve(ns_solver_t* solver, flow_field* field, cons
     if (field->nx < 3 || field->ny < 3) {
         return CFD_ERROR_INVALID;
     }
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
 
     if (ctx && ctx->use_gpu && ctx->gpu_ctx) {
         // Use full GPU solver
@@ -1146,6 +1150,8 @@ static cfd_status_t gpu_projection_step(ns_solver_t* solver, flow_field* field,
                                         const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     (void)solver;
     (void)stats;
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
     ns_solver_params_t step_params = *params;
     step_params.max_iter = 1;
     /* Override thresholds to allow single-step GPU execution on small grids */
@@ -1159,6 +1165,8 @@ static cfd_status_t gpu_projection_solve(ns_solver_t* solver, flow_field* field,
                                          const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     (void)solver;
     (void)stats;
+    cfd_status_t rc = check_energy_unsupported(params);
+    if (rc != CFD_SUCCESS) return rc;
     /* Override thresholds to allow GPU execution on small grids */
     gpu_config_t cfg = gpu_config_default();
     cfg.min_grid_size = 1;

From 4da24f48b3a54f3fcd0515a94d96009dc0c02ebc Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 08:07:23 +0300
Subject: [PATCH 12/20] Init max_t from field->T[0] in AVX2 rk2 stats reduction

max_t=0.0 incorrectly reports 0 when all temperatures are negative
(e.g. perturbation fields). Initialize from the first element so the
OMP max-reduction produces the correct result for any temperature range.
---
 lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
index 86d2b84e..b7ea59f7 100644
--- a/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
+++ b/lib/src/solvers/navier_stokes/avx2/solver_rk2_avx2.c
@@ -790,8 +790,9 @@ cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* g
 
     if (stats) {
         stats->iterations = 1;
-        double max_vel = 0.0, max_p = 0.0, max_t = 0.0;
+        double max_vel = 0.0, max_p = 0.0;
         ptrdiff_t n_s = (ptrdiff_t)(field->nx * field->ny * field->nz);
+        double max_t = (field->T && n_s > 0) ? field->T[0] : 0.0;
         ptrdiff_t ks;
 #if defined(_OPENMP) && (_OPENMP >= 201107)
         #pragma omp parallel for reduction(max: max_vel, max_p, max_t) schedule(static)
@@ -844,8 +845,9 @@ cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid*
 
     if (stats) {
         stats->iterations = params->max_iter;
-        double max_vel = 0.0, max_p = 0.0, max_t = 0.0;
+        double max_vel = 0.0, max_p = 0.0;
         ptrdiff_t n_s = (ptrdiff_t)(field->nx * field->ny * field->nz);
+        double max_t = (field->T && n_s > 0) ? field->T[0] : 0.0;
         ptrdiff_t ks;
 #if defined(_OPENMP) && (_OPENMP >= 201107)
         #pragma omp parallel for reduction(max: max_vel, max_p, max_t) schedule(static)

From 872590ee27449e3ff95cc66b25315368257dbc05 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 08:48:02 +0300
Subject: [PATCH 13/20] Reuse T_new workspace across energy solver calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

energy_step_explicit allocated and freed a full T_new buffer on every
call, adding heap pressure in long simulations. Add an internal
workspace-aware variant that accepts a caller-owned buffer. The three
scalar CPU solvers now allocate T_energy_ws once before the iteration
loop and pass it through, eliminating per-step malloc/free overhead.
The public API is unchanged — it wraps the workspace variant with NULL.
---
 lib/src/solvers/energy/cpu/energy_solver.c    | 35 ++++++++++++++-----
 .../solvers/energy/energy_solver_internal.h   | 31 ++++++++++++++++
 .../navier_stokes/cpu/solver_explicit_euler.c | 20 ++++++-----
 .../navier_stokes/cpu/solver_projection.c     | 14 ++++++--
 .../solvers/navier_stokes/cpu/solver_rk2.c    | 13 +++++--
 5 files changed, 90 insertions(+), 23 deletions(-)
 create mode 100644 lib/src/solvers/energy/energy_solver_internal.h

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index 0a447730..efe0bd5a 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -10,6 +10,7 @@
  */
 
 #include "cfd/solvers/energy_solver.h"
+#include "../energy_solver_internal.h"
 
 #include "cfd/core/indexing.h"
 #include "cfd/core/memory.h"
@@ -17,9 +18,11 @@
 #include <math.h>
 #include <string.h>
 
-cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
-                                   const ns_solver_params_t* params,
-                                   double dt, double time) {
+cfd_status_t energy_step_explicit_with_workspace(
+    flow_field* field, const grid* grid,
+    const ns_solver_params_t* params,
+    double dt, double time,
+    double* T_workspace, size_t workspace_size) {
     /* Skip when energy equation is disabled */
     if (params->alpha <= 0.0) {
         return CFD_SUCCESS;
@@ -57,10 +60,17 @@ cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
     double inv_2dz  = (nz > 1 && grid->dz) ? 1.0 / (2.0 * grid->dz[0]) : 0.0;
     double inv_dz2  = (nz > 1 && grid->dz) ? 1.0 / (grid->dz[0] * grid->dz[0]) : 0.0;
 
-    /* Allocate temporary array for new temperature */
-    double* T_new = (double*)cfd_calloc(total, sizeof(double));
-    if (!T_new) {
-        return CFD_ERROR_NOMEM;
+    /* Use caller's workspace or allocate internally */
+    int owns_buffer = 0;
+    double* T_new;
+    if (T_workspace && workspace_size >= total) {
+        T_new = T_workspace;
+    } else {
+        T_new = (double*)cfd_calloc(total, sizeof(double));
+        if (!T_new) {
+            return CFD_ERROR_NOMEM;
+        }
+        owns_buffer = 1;
     }
     memcpy(T_new, field->T, total * sizeof(double));
 
@@ -114,17 +124,24 @@ cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
         if (!isfinite(T_new[n])) {
             cfd_set_error(CFD_ERROR_DIVERGED,
                           "NaN/Inf detected in energy_step_explicit");
-            cfd_free(T_new);
+            if (owns_buffer) cfd_free(T_new);
             return CFD_ERROR_DIVERGED;
         }
     }
 
     memcpy(field->T, T_new, total * sizeof(double));
-    cfd_free(T_new);
+    if (owns_buffer) cfd_free(T_new);
 
     return CFD_SUCCESS;
 }
 
+cfd_status_t energy_step_explicit(flow_field* field, const grid* grid,
+                                   const ns_solver_params_t* params,
+                                   double dt, double time) {
+    return energy_step_explicit_with_workspace(field, grid, params,
+                                                dt, time, NULL, 0);
+}
+
 void energy_compute_buoyancy(double T_local, const ns_solver_params_t* params,
                               double* source_u, double* source_v,
                               double* source_w) {
diff --git a/lib/src/solvers/energy/energy_solver_internal.h b/lib/src/solvers/energy/energy_solver_internal.h
new file mode 100644
index 00000000..e0a254f4
--- /dev/null
+++ b/lib/src/solvers/energy/energy_solver_internal.h
@@ -0,0 +1,31 @@
+/**
+ * @file energy_solver_internal.h
+ * @brief Internal workspace-aware energy solver interface
+ *
+ * Not part of the public API. Used by scalar CPU NS solvers to avoid
+ * per-step allocation of the T_new scratch buffer.
+ */
+
+#ifndef CFD_ENERGY_SOLVER_INTERNAL_H
+#define CFD_ENERGY_SOLVER_INTERNAL_H
+
+#include "cfd/core/cfd_status.h"
+#include "cfd/core/grid.h"
+#include "cfd/solvers/navier_stokes_solver.h"
+
+#include <stddef.h>
+
+/**
+ * Workspace-aware energy step.
+ *
+ * When T_workspace is non-NULL and workspace_size >= nx*ny*nz, uses it
+ * as scratch instead of allocating. Caller owns the buffer lifetime.
+ * When T_workspace is NULL, allocates internally (same as public API).
+ */
+cfd_status_t energy_step_explicit_with_workspace(
+    flow_field* field, const grid* grid,
+    const ns_solver_params_t* params,
+    double dt, double time,
+    double* T_workspace, size_t workspace_size);
+
+#endif /* CFD_ENERGY_SOLVER_INTERNAL_H */
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index 77d315bc..beb5a6a0 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -9,6 +9,7 @@
 #include "cfd/solvers/energy_solver.h"
 #include "cfd/solvers/navier_stokes_solver.h"
 
+#include "../../energy/energy_solver_internal.h"
 #include "../boundary_copy_utils.h"
 
 #include <math.h>
@@ -367,10 +368,13 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     double* w_new = (double*)cfd_calloc(total, sizeof(double));
     double* p_new = (double*)cfd_calloc(total, sizeof(double));
     double* rho_new = (double*)cfd_calloc(total, sizeof(double));
+    double* T_energy_ws = (params->alpha > 0.0)
+        ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
-    if (!u_new || !v_new || !w_new || !p_new || !rho_new) {
+    if (!u_new || !v_new || !w_new || !p_new || !rho_new ||
+        (params->alpha > 0.0 && !T_energy_ws)) {
         cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-        cfd_free(p_new); cfd_free(rho_new);
+        cfd_free(p_new); cfd_free(rho_new); cfd_free(T_energy_ws);
         return CFD_ERROR_NOMEM;
     }
 
@@ -527,12 +531,12 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
 
         /* Energy equation: advance temperature using updated velocity */
         {
-            cfd_status_t energy_status = energy_step_explicit(field, grid, params,
-                                                               conservative_dt,
-                                                               iter * conservative_dt);
+            cfd_status_t energy_status = energy_step_explicit_with_workspace(
+                field, grid, params, conservative_dt,
+                iter * conservative_dt, T_energy_ws, total);
             if (energy_status != CFD_SUCCESS) {
                 cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-                cfd_free(p_new); cfd_free(rho_new);
+                cfd_free(p_new); cfd_free(rho_new); cfd_free(T_energy_ws);
                 return energy_status;
             }
         }
@@ -556,7 +560,7 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
         }
         if (has_nan) {
             cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-            cfd_free(p_new); cfd_free(rho_new);
+            cfd_free(p_new); cfd_free(rho_new); cfd_free(T_energy_ws);
             cfd_set_error(CFD_ERROR_DIVERGED,
                           "NaN/Inf detected in explicit_euler step");
             return CFD_ERROR_DIVERGED;
@@ -564,7 +568,7 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     }
 
     cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
-    cfd_free(p_new); cfd_free(rho_new);
+    cfd_free(p_new); cfd_free(rho_new); cfd_free(T_energy_ws);
 
     return CFD_SUCCESS;
 }
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_projection.c b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
index 013b4e5c..8e36ed45 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_projection.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
@@ -25,6 +25,7 @@
 #include "cfd/solvers/navier_stokes_solver.h"
 #include "cfd/solvers/poisson_solver.h"
 
+#include "../../energy/energy_solver_internal.h"
 #include "../boundary_copy_utils.h"
 
 #include <math.h>
@@ -89,10 +90,14 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
     double* p_new  = (double*)cfd_calloc(total, sizeof(double));
     double* p_temp = (double*)cfd_calloc(total, sizeof(double));
     double* rhs    = (double*)cfd_calloc(total, sizeof(double));
+    double* T_energy_ws = (params->alpha > 0.0)
+        ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
-    if (!u_star || !v_star || !w_star || !p_new || !p_temp || !rhs) {
+    if (!u_star || !v_star || !w_star || !p_new || !p_temp || !rhs ||
+        (params->alpha > 0.0 && !T_energy_ws)) {
         cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
         cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
+        cfd_free(T_energy_ws);
         return CFD_ERROR_NOMEM;
     }
 
@@ -248,11 +253,12 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
 
         /* Energy equation: advance temperature after velocity correction */
         {
-            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt,
-                                                               iter * dt);
+            cfd_status_t energy_status = energy_step_explicit_with_workspace(
+                field, grid, params, dt, iter * dt, T_energy_ws, total);
             if (energy_status != CFD_SUCCESS) {
                 cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
                 cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
+                cfd_free(T_energy_ws);
                 return energy_status;
             }
         }
@@ -267,6 +273,7 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
                 !isfinite(field->w[n]) || !isfinite(field->p[n])) {
                 cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
                 cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
+                cfd_free(T_energy_ws);
                 return CFD_ERROR_DIVERGED;
             }
         }
@@ -274,6 +281,7 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
 
     cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
     cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
+    cfd_free(T_energy_ws);
 
     return CFD_SUCCESS;
 }
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
index c672083b..d96ae4b6 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
@@ -23,6 +23,8 @@
 #include "cfd/solvers/energy_solver.h"
 #include "cfd/solvers/navier_stokes_solver.h"
 
+#include "../../energy/energy_solver_internal.h"
+
 #include <math.h>
 #include <string.h>
 
@@ -249,13 +251,17 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
     double* v0 = (double*)cfd_calloc(total, sizeof(double));
     double* w0 = (double*)cfd_calloc(total, sizeof(double));
     double* p0 = (double*)cfd_calloc(total, sizeof(double));
+    double* T_energy_ws = (params->alpha > 0.0)
+        ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
     if (!k1_u || !k1_v || !k1_w || !k1_p ||
         !k2_u || !k2_v || !k2_w || !k2_p ||
-        !u0 || !v0 || !w0 || !p0) {
+        !u0 || !v0 || !w0 || !p0 ||
+        (params->alpha > 0.0 && !T_energy_ws)) {
         cfd_free(k1_u); cfd_free(k1_v); cfd_free(k1_w); cfd_free(k1_p);
         cfd_free(k2_u); cfd_free(k2_v); cfd_free(k2_w); cfd_free(k2_p);
         cfd_free(u0); cfd_free(v0); cfd_free(w0); cfd_free(p0);
+        cfd_free(T_energy_ws);
         return CFD_ERROR_NOMEM;
     }
 
@@ -326,8 +332,8 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
 
         /* Energy equation: advance temperature after RK2 velocity update */
         {
-            cfd_status_t energy_status = energy_step_explicit(field, grid, params, dt,
-                                                               iter * dt);
+            cfd_status_t energy_status = energy_step_explicit_with_workspace(
+                field, grid, params, dt, iter * dt, T_energy_ws, total);
             if (energy_status != CFD_SUCCESS) {
                 status = energy_status;
                 goto cleanup;
@@ -352,6 +358,7 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
     cfd_free(k1_u); cfd_free(k1_v); cfd_free(k1_w); cfd_free(k1_p);
     cfd_free(k2_u); cfd_free(k2_v); cfd_free(k2_w); cfd_free(k2_p);
     cfd_free(u0); cfd_free(v0); cfd_free(w0); cfd_free(p0);
+    cfd_free(T_energy_ws);
 
     return status;
 }

From 49fff5fb59a85bf8dcaff0c8906384b8d2a5a337 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:02:03 +0300
Subject: [PATCH 14/20] Fix OOB read in energy solver dz validation loop

grid->dz is allocated with nz-1 elements (cell spacings), but the
validation loop iterated k < nz, reading one past the end. Limit to
k < nz - 1.
---
 lib/src/solvers/energy/cpu/energy_solver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index efe0bd5a..ff611f35 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -44,7 +44,7 @@ cfd_status_t energy_step_explicit_with_workspace(
         }
         const double dz0 = grid->dz[0];
         const double tol = 1e-12 * fmax(1.0, fabs(dz0));
-        for (size_t k = 1; k < nz; k++) {
+        for (size_t k = 1; k < nz - 1; k++) {
             if (fabs(grid->dz[k] - dz0) > tol) {
                 cfd_set_error(CFD_ERROR_UNSUPPORTED,
                               "energy_solver: non-uniform dz not supported");

From c6de2a5b2ff2d581cd0ec53080bf099b07a84b19 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:07:35 +0300
Subject: [PATCH 15/20] Preserve thermal BCs across apply_boundary_conditions
 calls

The generic periodic BC application overwrites caller-specified thermal
boundary values. Save and restore field->T around the call in explicit
Euler and RK2 solvers, reusing the energy workspace buffer.
---
 .../navier_stokes/cpu/solver_explicit_euler.c        | 10 +++++++++-
 lib/src/solvers/navier_stokes/cpu/solver_rk2.c       | 12 ++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index beb5a6a0..ec4d26d2 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -542,12 +542,20 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
         }
 
         /* Save caller BCs, apply periodic BCs, restore caller BCs.
-         * Use _3d helper for 6-face copy when nz > 1. */
+         * Use _3d helper for 6-face copy when nz > 1.
+         * Preserve temperature so caller-specified thermal BCs are not
+         * overwritten by the generic periodic BC application. */
         copy_boundary_velocities_3d(u_new, v_new, w_new,
                                     field->u, field->v, field->w, nx, ny, nz);
+        if (field->T && T_energy_ws) {
+            memcpy(T_energy_ws, field->T, bytes);
+        }
         apply_boundary_conditions(field, grid);
         copy_boundary_velocities_3d(field->u, field->v, field->w,
                                     u_new, v_new, w_new, nx, ny, nz);
+        if (field->T && T_energy_ws) {
+            memcpy(field->T, T_energy_ws, bytes);
+        }
 
         /* NaN/Inf check */
         int has_nan = 0;
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
index d96ae4b6..4119d76a 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
@@ -341,8 +341,16 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
         }
 
         /* Apply BCs to final state only (after the full RK2 step).
-         * This updates ghost cells for the next step's k1 evaluation. */
-        apply_boundary_conditions(field, grid);
+         * This updates ghost cells for the next step's k1 evaluation.
+         * Preserve temperature so caller-specified thermal BCs are not
+         * overwritten by the generic periodic BC application. */
+        if (field->T && T_energy_ws) {
+            memcpy(T_energy_ws, field->T, bytes);
+            apply_boundary_conditions(field, grid);
+            memcpy(field->T, T_energy_ws, bytes);
+        } else {
+            apply_boundary_conditions(field, grid);
+        }
 
         /* NaN / Inf check */
         for (size_t n = 0; n < total; n++) {

From 80a772622a1b2a907238004d3454b3b294c109e8 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:09:37 +0300
Subject: [PATCH 16/20] Fix Boussinesq doc: acceleration not force (no rho_0
 factor)

The implementation computes -beta*(T-T_ref)*g as a per-unit-mass
acceleration term, not a force. Update the header doc to match.
---
 lib/include/cfd/solvers/energy_solver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/include/cfd/solvers/energy_solver.h b/lib/include/cfd/solvers/energy_solver.h
index 94dbcf12..d632435c 100644
--- a/lib/include/cfd/solvers/energy_solver.h
+++ b/lib/include/cfd/solvers/energy_solver.h
@@ -7,8 +7,8 @@
  *
  * where alpha = k/(rho*cp) is thermal diffusivity and Q is a heat source.
  *
- * Boussinesq buoyancy coupling adds a body force to the momentum equations:
- *   F_buoy = -rho_0 * beta * (T - T_ref) * g
+ * Boussinesq buoyancy coupling adds a buoyancy acceleration to the momentum equations:
+ *   a_buoy = -beta * (T - T_ref) * g
  *
  * The energy equation is solved as a post-step after velocity advancement,
  * using the updated velocity field for advection.

From 84e7cf46f578f3d048113a3849bc869f21748a94 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:33:05 +0300
Subject: [PATCH 17/20] Allocate T workspace for buoyancy-only coupling
 (beta!=0, alpha==0)

The T_energy_ws buffer was only allocated when alpha>0, so buoyancy-only
runs (beta!=0, alpha==0) had no buffer to save/restore temperature
across apply_boundary_conditions. Extend the allocation condition to
cover both alpha>0 and beta!=0 in all three scalar CPU solvers.
---
 lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c | 5 +++--
 lib/src/solvers/navier_stokes/cpu/solver_projection.c     | 5 +++--
 lib/src/solvers/navier_stokes/cpu/solver_rk2.c            | 5 +++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
index ec4d26d2..0049068b 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_explicit_euler.c
@@ -368,11 +368,12 @@ cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_s
     double* w_new = (double*)cfd_calloc(total, sizeof(double));
     double* p_new = (double*)cfd_calloc(total, sizeof(double));
     double* rho_new = (double*)cfd_calloc(total, sizeof(double));
-    double* T_energy_ws = (params->alpha > 0.0)
+    int needs_T_ws = (params->alpha > 0.0 || params->beta != 0.0);
+    double* T_energy_ws = needs_T_ws
         ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
     if (!u_new || !v_new || !w_new || !p_new || !rho_new ||
-        (params->alpha > 0.0 && !T_energy_ws)) {
+        (needs_T_ws && !T_energy_ws)) {
         cfd_free(u_new); cfd_free(v_new); cfd_free(w_new);
         cfd_free(p_new); cfd_free(rho_new); cfd_free(T_energy_ws);
         return CFD_ERROR_NOMEM;
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_projection.c b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
index 8e36ed45..d73fa47d 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_projection.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_projection.c
@@ -90,11 +90,12 @@ cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
     double* p_new  = (double*)cfd_calloc(total, sizeof(double));
     double* p_temp = (double*)cfd_calloc(total, sizeof(double));
     double* rhs    = (double*)cfd_calloc(total, sizeof(double));
-    double* T_energy_ws = (params->alpha > 0.0)
+    int needs_T_ws = (params->alpha > 0.0 || params->beta != 0.0);
+    double* T_energy_ws = needs_T_ws
         ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
     if (!u_star || !v_star || !w_star || !p_new || !p_temp || !rhs ||
-        (params->alpha > 0.0 && !T_energy_ws)) {
+        (needs_T_ws && !T_energy_ws)) {
         cfd_free(u_star); cfd_free(v_star); cfd_free(w_star);
         cfd_free(p_new); cfd_free(p_temp); cfd_free(rhs);
         cfd_free(T_energy_ws);
diff --git a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
index 4119d76a..58d5efc6 100644
--- a/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
+++ b/lib/src/solvers/navier_stokes/cpu/solver_rk2.c
@@ -251,13 +251,14 @@ cfd_status_t rk2_impl(flow_field* field, const grid* grid,
     double* v0 = (double*)cfd_calloc(total, sizeof(double));
     double* w0 = (double*)cfd_calloc(total, sizeof(double));
     double* p0 = (double*)cfd_calloc(total, sizeof(double));
-    double* T_energy_ws = (params->alpha > 0.0)
+    int needs_T_ws = (params->alpha > 0.0 || params->beta != 0.0);
+    double* T_energy_ws = needs_T_ws
         ? (double*)cfd_calloc(total, sizeof(double)) : NULL;
 
     if (!k1_u || !k1_v || !k1_w || !k1_p ||
         !k2_u || !k2_v || !k2_w || !k2_p ||
         !u0 || !v0 || !w0 || !p0 ||
-        (params->alpha > 0.0 && !T_energy_ws)) {
+        (needs_T_ws && !T_energy_ws)) {
         cfd_free(k1_u); cfd_free(k1_v); cfd_free(k1_w); cfd_free(k1_p);
         cfd_free(k2_u); cfd_free(k2_v); cfd_free(k2_w); cfd_free(k2_p);
         cfd_free(u0); cfd_free(v0); cfd_free(w0); cfd_free(p0);

From 3ad6aa542ac6c9cbd45eb6a15f414a27f7cc5ef7 Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:35:19 +0300
Subject: [PATCH 18/20] Add input validation to
 energy_step_explicit_with_workspace

Validate field, grid, params, field->T, grid->dx/dy, and minimum grid
size before any dereference. Prevents crashes when the public API is
called with NULL arguments or degenerate grids.
---
 lib/src/solvers/energy/cpu/energy_solver.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index ff611f35..869282a7 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -23,6 +23,17 @@ cfd_status_t energy_step_explicit_with_workspace(
     const ns_solver_params_t* params,
     double dt, double time,
     double* T_workspace, size_t workspace_size) {
+    if (!field || !grid || !params) {
+        cfd_set_error(CFD_ERROR_INVALID,
+                      "energy_solver: field, grid, and params must be non-NULL");
+        return CFD_ERROR_INVALID;
+    }
+    if (!field->T) {
+        cfd_set_error(CFD_ERROR_INVALID,
+                      "energy_solver: missing temperature field");
+        return CFD_ERROR_INVALID;
+    }
+
     /* Skip when energy equation is disabled */
     if (params->alpha <= 0.0) {
         return CFD_SUCCESS;
@@ -31,6 +42,12 @@ cfd_status_t energy_step_explicit_with_workspace(
     size_t nx = field->nx;
     size_t ny = field->ny;
     size_t nz = field->nz;
+
+    if (!grid->dx || !grid->dy || nx < 3 || ny < 3) {
+        cfd_set_error(CFD_ERROR_INVALID,
+                      "energy_solver: grid too small or missing dx/dy");
+        return CFD_ERROR_INVALID;
+    }
     size_t plane = nx * ny;
     size_t total = plane * nz;
     double alpha = params->alpha;

From 6e86c77e1c15ace39f547d76a04a5fb763a6b92e Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 11:56:08 +0300
Subject: [PATCH 19/20] Validate uniform dx/dy and use precomputed inverse
 spacing constants

The central-difference stencil assumes uniform grid spacing but was
using per-cell dx[i]/dy[j] values that are only correct on uniform
grids. Reject non-uniform dx/dy early (like dz) and switch the inner
loop to precomputed inv_2dx/inv_2dy/inv_dx2/inv_dy2 constants.
---
 lib/src/solvers/energy/cpu/energy_solver.c | 43 +++++++++++++++++-----
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/lib/src/solvers/energy/cpu/energy_solver.c b/lib/src/solvers/energy/cpu/energy_solver.c
index 869282a7..154fd94c 100644
--- a/lib/src/solvers/energy/cpu/energy_solver.c
+++ b/lib/src/solvers/energy/cpu/energy_solver.c
@@ -52,7 +52,27 @@ cfd_status_t energy_step_explicit_with_workspace(
     size_t total = plane * nz;
     double alpha = params->alpha;
 
-    /* Validate uniform dz for 3D (energy solver uses constant z-spacing) */
+    /* Validate uniform spacing (central-difference stencil assumes it) */
+    const double dx0 = grid->dx[0];
+    const double dy0 = grid->dy[0];
+    {
+        const double tol_x = 1e-12 * fmax(1.0, fabs(dx0));
+        for (size_t i = 1; i < nx - 1; i++) {
+            if (fabs(grid->dx[i] - dx0) > tol_x) {
+                cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                              "energy_solver: non-uniform dx not supported");
+                return CFD_ERROR_UNSUPPORTED;
+            }
+        }
+        const double tol_y = 1e-12 * fmax(1.0, fabs(dy0));
+        for (size_t j = 1; j < ny - 1; j++) {
+            if (fabs(grid->dy[j] - dy0) > tol_y) {
+                cfd_set_error(CFD_ERROR_UNSUPPORTED,
+                              "energy_solver: non-uniform dy not supported");
+                return CFD_ERROR_UNSUPPORTED;
+            }
+        }
+    }
     if (nz > 1) {
         if (!grid->dz) {
             cfd_set_error(CFD_ERROR_INVALID,
@@ -60,9 +80,9 @@ cfd_status_t energy_step_explicit_with_workspace(
             return CFD_ERROR_INVALID;
         }
         const double dz0 = grid->dz[0];
-        const double tol = 1e-12 * fmax(1.0, fabs(dz0));
+        const double tol_z = 1e-12 * fmax(1.0, fabs(dz0));
         for (size_t k = 1; k < nz - 1; k++) {
-            if (fabs(grid->dz[k] - dz0) > tol) {
+            if (fabs(grid->dz[k] - dz0) > tol_z) {
                 cfd_set_error(CFD_ERROR_UNSUPPORTED,
                               "energy_solver: non-uniform dz not supported");
                 return CFD_ERROR_UNSUPPORTED;
@@ -70,6 +90,12 @@ cfd_status_t energy_step_explicit_with_workspace(
         }
     }
 
+    /* Precomputed constants for uniform-grid stencil */
+    double inv_2dx = 1.0 / (2.0 * dx0);
+    double inv_2dy = 1.0 / (2.0 * dy0);
+    double inv_dx2 = 1.0 / (dx0 * dx0);
+    double inv_dy2 = 1.0 / (dy0 * dy0);
+
     /* Branch-free 3D constants */
     size_t stride_z = (nz > 1) ? plane : 0;
     size_t k_start  = (nz > 1) ? 1 : 0;
@@ -101,19 +127,16 @@ cfd_status_t energy_step_explicit_with_workspace(
                 double v_c = field->v[idx];
                 double w_c = field->w[idx];
 
-                double dx = grid->dx[i];
-                double dy = grid->dy[j];
-
                 /* Advection: u * dT/dx + v * dT/dy + w * dT/dz */
-                double dT_dx = (field->T[idx + 1] - field->T[idx - 1]) / (2.0 * dx);
-                double dT_dy = (field->T[idx + nx] - field->T[idx - nx]) / (2.0 * dy);
+                double dT_dx = (field->T[idx + 1] - field->T[idx - 1]) * inv_2dx;
+                double dT_dy = (field->T[idx + nx] - field->T[idx - nx]) * inv_2dy;
                 double dT_dz = (field->T[idx + stride_z] - field->T[idx - stride_z]) * inv_2dz;
 
                 double advection = u_c * dT_dx + v_c * dT_dy + w_c * dT_dz;
 
                 /* Diffusion: alpha * (d2T/dx2 + d2T/dy2 + d2T/dz2) */
-                double d2T_dx2 = (field->T[idx + 1] - 2.0 * T_c + field->T[idx - 1]) / (dx * dx);
-                double d2T_dy2 = (field->T[idx + nx] - 2.0 * T_c + field->T[idx - nx]) / (dy * dy);
+                double d2T_dx2 = (field->T[idx + 1] - 2.0 * T_c + field->T[idx - 1]) * inv_dx2;
+                double d2T_dy2 = (field->T[idx + nx] - 2.0 * T_c + field->T[idx - nx]) * inv_dy2;
                 double d2T_dz2 = (field->T[idx + stride_z] - 2.0 * T_c +
                                    field->T[idx - stride_z]) * inv_dz2;
 

From bac421a9ec6842eedc9b474745fdc942260d69bd Mon Sep 17 00:00:00 2001
From: shaia <shaia@outlook.com>
Date: Sat, 4 Apr 2026 18:59:07 +0300
Subject: [PATCH 20/20] Fix TSan SEGV: match Poisson BC backend to solver
 backend

The TSan CI crashed with a SEGV during process exit because
BC_BACKEND_AUTO selected OMP boundary conditions for a scalar
Poisson solver, spawning libomp threads whose cleanup conflicted
with TSan's shadow memory tracking.

Two fixes:
- Propagate CFD_ENABLE_OPENMP to library consumers (tests) via
  INTERFACE compile definition so compile-time guards work correctly
- Dispatch Poisson solver BCs through the solver's own backend
  instead of global AUTO, preventing backend mismatch
---
 lib/CMakeLists.txt                     |  7 +++++++
 lib/src/solvers/linear/linear_solver.c | 17 +++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 2ce3c208..2dc2c13c 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -532,6 +532,13 @@ else()
     )
 endif()
 
+# Propagate CFD_ENABLE_OPENMP to consumers (tests, examples)
+# The directory-level add_compile_definitions() only affects targets in lib/,
+# but test executables in the root CMakeLists.txt need this for compile-time guards.
+if(OpenMP_C_FOUND)
+    target_compile_definitions(cfd_library INTERFACE CFD_ENABLE_OPENMP)
+endif()
+
 # Symbol Visibility and Export Header (needed for both builds)
 include(GenerateExportHeader)
 # For INTERFACE libraries, we can't generate export header from it, use cfd_core instead
diff --git a/lib/src/solvers/linear/linear_solver.c b/lib/src/solvers/linear/linear_solver.c
index e65b8578..35ef551a 100644
--- a/lib/src/solvers/linear/linear_solver.c
+++ b/lib/src/solvers/linear/linear_solver.c
@@ -356,9 +356,22 @@ void poisson_solver_apply_bc(
                plane_size * sizeof(double));
     }
 
-    /* Apply 2D Neumann BCs on each z-plane (x/y faces) */
+    /* Apply 2D Neumann BCs on each z-plane using solver's own backend.
+     * This avoids BC_BACKEND_AUTO selecting a different backend (e.g. OMP)
+     * than the solver itself, which could spawn unexpected threads. */
     for (size_t k = 0; k < nz; k++) {
-        bc_apply_scalar(x + k * plane_size, nx, ny, BC_TYPE_NEUMANN);
+        double* plane = x + k * plane_size;
+        switch (solver->backend) {
+            case POISSON_BACKEND_OMP:
+                bc_apply_scalar_omp(plane, nx, ny, BC_TYPE_NEUMANN);
+                break;
+            case POISSON_BACKEND_SIMD:
+                bc_apply_scalar_simd(plane, nx, ny, BC_TYPE_NEUMANN);
+                break;
+            default:
+                bc_apply_scalar_cpu(plane, nx, ny, BC_TYPE_NEUMANN);
+                break;
+        }
     }
 }