diff --git a/memory_leak_report.md b/memory_leak_report.md new file mode 100644 index 00000000..e14495b8 --- /dev/null +++ b/memory_leak_report.md @@ -0,0 +1,48 @@ +# Memory Leak Investigation Report + +This report documents the potential sources of memory leaks found during deep static analysis of the CONQUEST codebase. These memory leaks could occur during Molecular Dynamics (MD) calculations, particularly scaling significantly with Multi-Site Support Functions (MSSF), leading to increased memory occupation. + +--- + +## 1. Continuous Leak: Unfreed `InfoMat` Read Matrices + +When CONQUEST reads saved or checkpointed matrices from the file system, it invokes `grab_matrix2` (located in `src/store_matrix_module.f90`). This subroutine dynamically allocates an array of `InfoMat` derived types, and inside each element, allocates several deep nested arrays (`alpha_i`, `idglob_i`, `jmax_i`, `beta_j_i`, `rvec_Pij`, `data_Lold`, etc.) proportional to the number of atoms per MPI process and number of spins. + +While `store_matrix_module` provides a `deallocate_InfoMatrixFile` subroutine to cleanly free this memory, several core loops inside CONQUEST invoke `grab_matrix2` without ever calling the deallocation routine, causing massive chunks of memory to be orphaned per step per MPI rank. + +### Detailed Findings: + +**1. File:** `src/move_atoms.module.f90` (The Primary MD Leak Candidate) +- **Location:** Inside `update_pos_and_matrices` (approx. Lines 5120-5160) +- **Reason:** Called on **every MD step**, `update_pos_and_matrices` reads matrix components (`L`, `K`, `S`) across MPI ranks to propagate them forward. The `InfoMat` array structures are dynamically allocated for the local atoms managed by that rank. +- **Why MSSF made it worse:** When MSSF is active (`flag_SFcoeff`), an additional matrix structure (`SFcoeff`) is allocated and read. Thus, the MSSF leak scale directly correlates to this extra `InfoMat` block being leaked on top of the standard MD baseline. +- **Risk Level:** **Critical**. This continuous leak scales directly with the number of **atoms per MPI process** and the number of MD iterations. + +**2. File:** `src/XLBOMD_module.f90` +- **Location:** Inside `initial_XLBOMD` and `Do_XLBOMD` (approx. Lines 539-550, 621) +- **Reason:** `grab_matrix2` is called repeatedly to load the `X`, `Xvel`, and `S` matrices into `InfoMat`, but `deallocate_InfoMatrixFile` and `free_InfoMatGlobal` are never invoked. +- **Risk Level:** **High** (if XL-BOMD is running). Similar to the main MD loop, this could cause a continuous leak per XLBOMD propagation. + +**3. File:** `src/S_matrix_module.f90` +- **Location:** Inside `get_S_matrix` (approx. Line 879) +- **Reason:** When `flag_readT` or `restart_T` is true, the code reads inverse S-matrix components `T` using `grab_matrix2`. Memory is never deallocated. +- **Risk Level:** **Moderate/High** (depending on how often `get_S_matrix` hits the read condition). + +**4. File:** `src/initialisation_module.f90` +- **Location:** Inside `initial_phis` (approx. Lines 1226, 1253, 1273, 1279) +- **Reason:** During the initial matrix startup routines, `grab_matrix2` is called up to four times to load `SFcoeff`, `T`, `L`, and `K` without being followed by `deallocate_InfoMatrixFile`. +- **Risk Level:** **Low** (flat cost per run since this is startup code only). + +--- + +## 2. General Initialization Orphaned Arrays + +**5. File:** `src/cdft_module.f90` +- **Location:** Inside `init_cdft` +- **Reason:** Several Fortran arrays and memory blocks via `allocate_temp_matrix` are allocated (`matWc`, `cDFT_Vc`, `cDFT_W`, `flag_cdft_atom`, `bwgrid`, and `matHzero`) when cDFT conditions are active. The module completely lacks an accompanying deallocation routine (`end_cdft`) to gracefully tear them down at runtime closure in `src/main.f90`. +- **Risk Level:** **Low** (one-time allocation size at program start). + +--- + +## Conclusion +The most critical memory accumulation observed and the accelerated accumulation under MSSF is likely driven by the missing `deallocate_InfoMatrixFile` calls when `grab_matrix2` is invoked (especially inside `update_pos_and_matrices` within `src/move_atoms.module.f90`). Addressing the deallocations of the `InfoMat` allocations in these modules should resolve the memory leaks. \ No newline at end of file diff --git a/src/S_matrix_module.f90 b/src/S_matrix_module.f90 index 845b29fe..c37123cf 100644 --- a/src/S_matrix_module.f90 +++ b/src/S_matrix_module.f90 @@ -760,7 +760,8 @@ subroutine Iter_Hott_InvS(output_level, n_L_iterations, tolerance,n_atoms,& use timer_module, ONLY: cq_timer,start_timer,stop_print_timer,WITH_LEVEL use input_module, ONLY: leqi use store_matrix, ONLY: dump_matrix2, grab_matrix2, InfoMatrixFile, & - matrix_store_global, grab_InfoMatGlobal, set_atom_coord_diff + matrix_store_global, grab_InfoMatGlobal, set_atom_coord_diff, & + deallocate_InfoMatrixFile, free_InfoMatGlobal use UpdateInfo, ONLY: Matrix_CommRebuild use io_module, ONLY: return_prefix @@ -878,6 +879,8 @@ subroutine Iter_Hott_InvS(output_level, n_L_iterations, tolerance,n_atoms,& call grab_matrix2('T',inode,nfile,Info,InfoGlob,index=0,n_matrix=nspin_SF) call my_barrier() call Matrix_CommRebuild(InfoGlob,Info,Trange,T_trans,matT,nfile,symm,n_matrix=nspin_SF) + call deallocate_InfoMatrixFile(nfile, Info) + call free_InfoMatGlobal(InfoGlob) endif ! and evaluate the current value of the functional and its gradient @@ -1444,6 +1447,7 @@ subroutine get_S_analytic(blipL_co, blipR_co, blip_grad, matS, matT, dataM12, da end do end do deallocate(work1,work2,work3,work4,work5,work6, STAT=stat) + deallocate(temp,temp2) if(stat/=0) call cq_abort("Error deallocating arrays for onsite S blip elements: ",blip_info(specj)%FullArraySize,this_nsfL) return end subroutine get_S_analytic diff --git a/src/XLBOMD_module.f90 b/src/XLBOMD_module.f90 index 125fc87a..393d438b 100644 --- a/src/XLBOMD_module.f90 +++ b/src/XLBOMD_module.f90 @@ -523,7 +523,7 @@ subroutine grab_XXvelS(range,trans,InfoGlob) use GenComms, ONLY: inode, ionode use matrix_data, ONLY: Srange use mult_module, ONLY: matS,S_trans,matXL,matXLvel - use store_matrix, ONLY: grab_matrix2, InfoMatrixFile, matrix_store_global + use store_matrix, ONLY: grab_matrix2, InfoMatrixFile, matrix_store_global, deallocate_InfoMatrixFile use UpdateInfo, ONLY: Matrix_CommRebuild implicit none @@ -551,6 +551,8 @@ subroutine grab_XXvelS(range,trans,InfoGlob) call Matrix_CommRebuild(InfoGlob,InfoMat,Srange,S_trans,matS,nfile,symm,n_matrix=nspin_SF) endif + call deallocate_InfoMatrixFile(nfile, InfoMat) + return end subroutine grab_XXvelS !!*** @@ -587,7 +589,7 @@ subroutine grab_Xhistories(range,trans,InfoGlob) use GenComms, ONLY: inode, ionode use matrix_data, ONLY: LSrange,Lrange use mult_module, ONLY: LS_trans,L_trans, matXL_store, maxiter_Dissipation - use store_matrix, ONLY: grab_matrix2, InfoMatrixFile, matrix_store_global + use store_matrix, ONLY: grab_matrix2, InfoMatrixFile, matrix_store_global, deallocate_InfoMatrixFile use UpdateInfo, ONLY: Matrix_CommRebuild !db use global_module, ONLY: io_lun @@ -620,6 +622,7 @@ subroutine grab_Xhistories(range,trans,InfoGlob) do istep = 1, maxiters+1 call grab_matrix2('X',inode,nfile,InfoMat,InfoGlob,index=istep,n_matrix=nspin) call Matrix_CommRebuild(InfoGlob,InfoMat,range,trans,matXL_store(istep,:),nfile,n_matrix=nspin) + call deallocate_InfoMatrixFile(nfile, InfoMat) enddo ! ----- 2019/Nov/13: (comment by TM) ----- diff --git a/src/force_module.f90 b/src/force_module.f90 index 6c432db0..78eb4636 100644 --- a/src/force_module.f90 +++ b/src/force_module.f90 @@ -853,6 +853,8 @@ subroutine force(fixed_potential, vary_mu, n_cg_L_iterations, & if (stat /= 0) & call cq_abort("Error deallocating forces: ", ni_in_cell) call reg_dealloc_mem(area_moveatoms, 5 * 3 * ni_in_cell, type_dbl) + if(allocated(cdft_force)) deallocate(cdft_force) + if(allocated(NA_force)) deallocate(NA_force) end if deallocate(density_total, STAT=stat) if (stat /= 0) call cq_abort("force: Error dealloc mem") diff --git a/src/initialisation_module.f90 b/src/initialisation_module.f90 index 6ede3b9b..3885ac4b 100644 --- a/src/initialisation_module.f90 +++ b/src/initialisation_module.f90 @@ -1133,7 +1133,7 @@ subroutine initial_H(start, start_L, find_chdens, fixed_potential, & use DFT_D2, only: dispersion_D2 use matrix_data, ONLY: Lrange,Trange,LSrange,SFcoeff_range,Hrange use store_matrix, ONLY: matrix_store_global, grab_InfoMatGlobal, grab_matrix2, & - InfoMatrixFile, set_atom_coord_diff + InfoMatrixFile, set_atom_coord_diff, deallocate_InfoMatrixFile, free_InfoMatGlobal use UpdateInfo, ONLY: make_glob2node,Matrix_CommRebuild, Report_UpdateMatrix use XLBOMD_module, ONLY: grab_XXvelS,grab_Xhistories use support_spec_format, only: read_option @@ -1226,6 +1226,7 @@ subroutine initial_H(start, start_L, find_chdens, fixed_potential, & call grab_matrix2('SFcoeff',inode,nfile,Info,InfoGlob,index=index_MatrixFile,n_matrix=nspin_SF) call my_barrier() call Matrix_CommRebuild(InfoGlob,Info,SFcoeff_range,SFcoeff_trans,matSFcoeff,nfile,n_matrix=nspin_SF) + call deallocate_InfoMatrixFile(nfile, Info) ! Added DRB 2017/04/10 to fix issue 26: transpose required before transformation can occur ! Transpose @@ -1253,6 +1254,7 @@ subroutine initial_H(start, start_L, find_chdens, fixed_potential, & call grab_matrix2('T',inode,nfile,Info,InfoGlob,index=index_MatrixFile,n_matrix=nspin_SF) call my_barrier() call Matrix_CommRebuild(InfoGlob,Info,Trange,T_trans,matT,nfile,symm,n_matrix=nspin_SF) + call deallocate_InfoMatrixFile(nfile, Info) endif if (flag_LFD .and. .not.read_option) then ! Spao was already made in sub:initial_SFcoeff @@ -1273,12 +1275,14 @@ subroutine initial_H(start, start_L, find_chdens, fixed_potential, & call grab_matrix2('L',inode,nfile,Info,InfoGlob,index=index_MatrixFile,n_matrix=nspin) call my_barrier() call Matrix_CommRebuild(InfoGlob,Info,Lrange,L_trans,matL,nfile,symm,n_matrix=nspin) + call deallocate_InfoMatrixFile(nfile, Info) if (inode == ionode .and. iprint_init + min_layer > 2) & write(io_lun, fmt='(4x,a)') trim(prefix)//' grabbed L matrix' else call grab_matrix2('K',inode,nfile,Info,InfoGlob,index=index_MatrixFile,n_matrix=nspin) call my_barrier() call Matrix_CommRebuild(InfoGlob,Info,Hrange,H_trans,matK,nfile,n_matrix=nspin) + call deallocate_InfoMatrixFile(nfile, Info) if (inode == ionode .and. iprint_init + min_layer > 2) & write(io_lun, fmt='(4x,a)') trim(prefix)//' grabbed K matrix' !DEBUG call Report_UpdateMatrix("Kmat") @@ -1294,6 +1298,7 @@ subroutine initial_H(start, start_L, find_chdens, fixed_potential, & if (flag_dissipation) call grab_Xhistories(Lrange,L_trans,InfoGlob) endif endif + call free_InfoMatGlobal(InfoGlob) !!$ !!$ !!$ diff --git a/src/move_atoms.module.f90 b/src/move_atoms.module.f90 index 764104b1..d02b376e 100644 --- a/src/move_atoms.module.f90 +++ b/src/move_atoms.module.f90 @@ -1911,6 +1911,7 @@ subroutine backtrack_linemin_full(config, direction, cell_ref, enthalpy_in, enth iter, en_conv * enthalpy_out, en_units(energy_units) end if + deallocate(config_start) call stop_timer(tmr_std_moveatoms) return end subroutine backtrack_linemin_full @@ -4983,7 +4984,7 @@ subroutine update_pos_and_matrices(update_method, velocity) matrix_scale, matrix_transpose, matSFcoeff_tran use matrix_data, only: Lrange, Hrange, Srange, SFcoeff_range use store_matrix, only: matrix_store_global, InfoMatrixFile, grab_InfoMatGlobal, grab_matrix2, & - set_atom_coord_diff + set_atom_coord_diff, deallocate_InfoMatrixFile, free_InfoMatGlobal use UpdateInfo, only: Matrix_CommRebuild, Report_UpdateMatrix use memory_module, only: reg_alloc_mem, type_dbl, reg_dealloc_mem @@ -5159,6 +5160,12 @@ subroutine update_pos_and_matrices(update_method, velocity) enddo endif + ! Deallocate InfoMat and InfoGlob + if (flag_L .or. flag_K .or. flag_S .or. flag_SFcoeff) then + call deallocate_InfoMatrixFile(nfile, InfoMat) + end if + call free_InfoMatGlobal(InfoGlob) + !Switch off Debugging ! flag_debug_move_atoms = .false. diff --git a/src/store_matrix_module.f90 b/src/store_matrix_module.f90 index 811998d0..22f5d74a 100644 --- a/src/store_matrix_module.f90 +++ b/src/store_matrix_module.f90 @@ -903,8 +903,9 @@ subroutine free_InfoMatGlobal(mat_glob) integer :: istat - deallocate(mat_glob%atom_veloc, mat_glob%atom_coord, mat_glob%glob_to_node, STAT=istat) - if(istat /= 0) call cq_abort('Error : deallocation in free_InfoMatGlobal1',istat) + if (allocated(mat_glob%atom_veloc)) deallocate(mat_glob%atom_veloc, STAT=istat) + if (allocated(mat_glob%atom_coord)) deallocate(mat_glob%atom_coord, STAT=istat) + if (allocated(mat_glob%glob_to_node)) deallocate(mat_glob%glob_to_node, STAT=istat) return end subroutine free_InfoMatGlobal !!*** @@ -1460,26 +1461,46 @@ subroutine deallocate_InfoMatrixFile(nfile,InfoMat) if (associated(InfoMat)) then do ifile = 1, nfile - deallocate (InfoMat(ifile)%alpha_i, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating alpha_i:') - deallocate (InfoMat(ifile)%idglob_i, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating idglob_i:') - deallocate (InfoMat(ifile)%jmax_i, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating jmax_i:') - deallocate (InfoMat(ifile)%jbeta_max_i, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating jbeta_max_i:') - deallocate (InfoMat(ifile)%ibeg_Pij, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating ibeg_Pij:') - deallocate (InfoMat(ifile)%ibeg_dataL, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating ibeg_dataL:') - deallocate (InfoMat(ifile)%beta_j_i, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating beta_j_i:') - deallocate (InfoMat(ifile)%idglob_j, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating idglob_j:') - deallocate (InfoMat(ifile)%rvec_Pij, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating rvec_Pij:') - deallocate (InfoMat(ifile)%data_Lold, STAT=stat_alloc) - if (stat_alloc/=0) call cq_abort('Error deallocating data_Lold:') + if (associated(InfoMat(ifile)%alpha_i)) then + deallocate (InfoMat(ifile)%alpha_i, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating alpha_i:') + end if + if (associated(InfoMat(ifile)%idglob_i)) then + deallocate (InfoMat(ifile)%idglob_i, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating idglob_i:') + end if + if (associated(InfoMat(ifile)%jmax_i)) then + deallocate (InfoMat(ifile)%jmax_i, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating jmax_i:') + end if + if (associated(InfoMat(ifile)%jbeta_max_i)) then + deallocate (InfoMat(ifile)%jbeta_max_i, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating jbeta_max_i:') + end if + if (associated(InfoMat(ifile)%ibeg_Pij)) then + deallocate (InfoMat(ifile)%ibeg_Pij, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating ibeg_Pij:') + end if + if (associated(InfoMat(ifile)%ibeg_dataL)) then + deallocate (InfoMat(ifile)%ibeg_dataL, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating ibeg_dataL:') + end if + if (associated(InfoMat(ifile)%beta_j_i)) then + deallocate (InfoMat(ifile)%beta_j_i, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating beta_j_i:') + end if + if (associated(InfoMat(ifile)%idglob_j)) then + deallocate (InfoMat(ifile)%idglob_j, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating idglob_j:') + end if + if (associated(InfoMat(ifile)%rvec_Pij)) then + deallocate (InfoMat(ifile)%rvec_Pij, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating rvec_Pij:') + end if + if (associated(InfoMat(ifile)%data_Lold)) then + deallocate (InfoMat(ifile)%data_Lold, STAT=stat_alloc) + if (stat_alloc/=0) call cq_abort('Error deallocating data_Lold:') + end if enddo deallocate (InfoMat, STAT=stat_alloc) if (stat_alloc/=0) call cq_abort('Error deallocating InfoMat:', nfile) diff --git a/src/system/system.ubuntu.make b/src/system/system.ubuntu.make index df55cae1..e6f2aa9a 100644 --- a/src/system/system.ubuntu.make +++ b/src/system/system.ubuntu.make @@ -27,7 +27,7 @@ SCALAPACK = -lscalapack-openmpi #XC_LIB = -lxcf90 -lxc XC_LIBRARY = LibXC_v5 XC_LIB = -lxcf03 -lxc -XC_COMPFLAGS = -I${HOME}/local/include -I/usr/local/include +XC_COMPFLAGS = -I${HOME}/local/include -I/usr/local/include -I/usr/include # Set FFT library FFT_LIB=-lfftw3