Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f6be44f
Started a new branch to work on OpenMP.
haraldkl May 16, 2018
e67f624
Merged updated polynomial base exchange into the openmp branch
haraldkl May 16, 2018
48bfb6e
Added OpenMP directives to the FPT.
haraldkl May 18, 2018
544896f
reimplemantation of openmp statements
Jul 5, 2018
8f40ed5
rams%b and params%u were removed from params. They were put in their …
Oct 30, 2018
21f5a44
params%b and params%u were removed from params for parallel implement…
Oct 30, 2018
c47a0dc
Backed out last changes on openmp branch and merged with default branch
Dec 19, 2019
2f1b355
Removed all OMP statements.
Jan 7, 2020
637b01c
Introduced OpenMP parallelisation in m2n projection for fpt and l2p.
Feb 13, 2020
2440e69
Introduced OpenMP parallelisation in n2m projection for fpt and for c…
Mar 4, 2020
e9c7782
Updated copyright notice
Mar 4, 2020
18aace7
Merged changes from default into openmp branch
Mar 5, 2020
02293a0
Introduced OpenMP to derivative computation in ply_leg_diff_module + …
Mar 5, 2020
3ccea4f
Moved the OpenMP statements from the vectorization- to the standard-s…
Mar 10, 2020
389fd73
Merged changes from default into openmp branch
Mar 10, 2020
3d0f702
Removed some OpenMP statements in ply_oversample_module that caused u…
Mar 11, 2020
80fc671
Merged changes from default into openmp branch
Mar 24, 2020
b2b1296
Added a performance test for the l2p transformation.
Jul 9, 2020
9890348
Removed mval from private OMP variables in ply_l2p_module
Jul 15, 2020
176a3d2
Adaption to the change precice module.
Jul 17, 2020
67c7b1e
Reverted changes of Rev 453 and replaced unnecessary loop with array …
Jul 20, 2020
997bd97
Moved the OpenMP statement in ply_l2p_projection to the inner loop fo…
Sep 2, 2020
0e32b07
Implemented OpenMP in ply_leg_diff_module
Sep 2, 2020
73fea2c
Merged main developments into the openmp branch.
Sep 10, 2020
fd53c9d
Rearranged L2-projection and OMP-statements on OpenMP-branch for test…
Oct 19, 2020
968689b
Merged open dead head.
haraldkl Dec 10, 2021
0018a83
Merged open head back into branch.
haraldkl Dec 10, 2021
9b0898c
Merge branch 'branch/default' into branch/feature/openmp
haraldkl Nov 14, 2025
a9e5763
Adapt l2p_3D_performance test to changed tem_start interface
haraldkl Nov 14, 2025
27dc67c
Adapt fpt_3D_performance_test to changed tem_start interface
haraldkl Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Polynomials Library
===================

*This is the OpenMP implementation branch*

This project is a supporting library for [TreElM](https://bitbucket.org/apesteam/treelm).
It does not work on its own, but rather needs to be included in
other projects, which also include TreElM.
Expand Down
617 changes: 617 additions & 0 deletions source/fpt/ply_chebPoint_module.f90

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions source/fpt/ply_legFpt_2D_module.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ contains

allocate(alph(n**2))


! original layout (n = 3):
! 1 2 3
! 4 5 6
Expand Down
2 changes: 2 additions & 0 deletions source/fpt/ply_legFpt_3D_module.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ contains
& pntVal = pntVal )
! <<<<< Z-Direction <<<<< !


end subroutine ply_legToPnt_3D_singVar
! ------------------------------------------------------------------------ !

Expand Down Expand Up @@ -266,6 +267,7 @@ contains
& pntVal = alph )
! <<<<< X-Direction <<<<< !


end subroutine ply_pntToLeg_3D_singVar
! ------------------------------------------------------------------------ !

Expand Down
23 changes: 23 additions & 0 deletions source/fpt/ply_legFpt_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
! Copyright (c) 2013-2014, 2017 Peter Vitt <peter.vitt2@uni-siegen.de>
! Copyright (c) 2013-2014 Verena Krupp
! Copyright (c) 2016 Langhammer Kay <kay.langhammer@student.uni-siegen.de>
! Copyright (c) 2020 Daniel Fleischer <daniel.fleischer@student.uni-siegen.de>
!
! Parts of this file were written by Jens Zudrop and Harald Klimach
! for German Research School for Simulation Sciences GmbH.
Expand Down Expand Up @@ -344,8 +345,10 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps )
integer :: n
! -------------------------------------------------------------------- !

!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
n = fpt%legToChebParams%n

!$OMP DO
do iDof = 1, nIndeps*n, n
call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), &
& gam = cheb, &
Expand All @@ -360,6 +363,9 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps )
& cheb, &
& pntVal(iDof:iDof+n-1) )
end do
!$OMP END DO

!$OMP END PARALLEL

end subroutine ply_legToPnt_single
! ------------------------------------------------------------------------ !
Expand Down Expand Up @@ -417,8 +423,10 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps )
integer :: n
! -------------------------------------------------------------------- !

!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
n = fpt%legToChebParams%n

!$OMP DO
do iDof = 1, nIndeps*n, n
call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), &
& gam = cheb, &
Expand All @@ -432,6 +440,9 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps )
& cheb, &
& pntVal(iDof:iDof+n-1) )
end do
!$OMP END DO

!$OMP END PARALLEL

end subroutine ply_legToPnt_lobatto_single
! ------------------------------------------------------------------------ !
Expand Down Expand Up @@ -489,9 +500,12 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps )
integer :: n
! -------------------------------------------------------------------- !

!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
n = fpt%legToChebParams%n

normFactor = 1.0_rk / real(n,kind=rk)

!$OMP DO
do iDof = 1, nIndeps*n, n
call fftw_execute_r2r( fpt%planPntToCheb, &
& pntVal(iDof:iDof+n-1), &
Expand All @@ -506,6 +520,9 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps )
& alph = cheb, &
& params = fpt%ChebToLegParams )
end do
!$OMP END DO

!$OMP END PARALLEL

end subroutine ply_pntToLeg_single
! ------------------------------------------------------------------------ !
Expand Down Expand Up @@ -567,9 +584,12 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps )
integer :: n
! -------------------------------------------------------------------- !

!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
n = fpt%legToChebParams%n

normFactor = 0.5_rk / real(n-1,kind=rk)

!$OMP DO
do iDof = 1, nIndeps*n, n
call fftw_execute_r2r( fpt%planPntToCheb, &
& pntVal(iDof:iDof+n-1), &
Expand All @@ -584,6 +604,9 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps )
& alph = cheb, &
& params = fpt%ChebToLegParams )
end do
!$OMP END DO

!$OMP END PARALLEL

end subroutine ply_pntToLeg_lobatto_single
! ------------------------------------------------------------------------ !
Expand Down
2 changes: 1 addition & 1 deletion source/fpt/ply_polyBaseExc_module.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@ contains
!> Convert strip of coefficients of a modal representation in terms of
!! Legendre polynomials to modal coefficients in terms of Chebyshev
!! polynomials.
subroutine ply_fpt_single( alph, gam, params )
subroutine ply_fpt_single( alph, gam, params)
! -------------------------------------------------------------------- !
!> The parameters of the fast polynomial transformation.
type(ply_trafo_params_type), intent(inout) :: params
Expand Down
5 changes: 5 additions & 0 deletions source/ply_LegPolyProjection_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, &
real(kind=rk), allocatable :: newWorkDat(:)
integer :: nChildDofs, oneDof
! -------------------------------------------------------------------- !

if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then
call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' &
& // 'only implemented for Q-Legendre-Polynomials' )
Expand Down Expand Up @@ -281,6 +282,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, &
real(kind=rk), allocatable :: projCoeffOneDim(:,:,:)
real(kind=rk) :: dimexp
! -------------------------------------------------------------------- !

select case(dofType)
case(ply_QLegendrePoly_prp)
allocate(projection%projCoeff(nDofs, nChildDofs, nChilds))
Expand Down Expand Up @@ -352,6 +354,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, &
& // 'for Q-Legendre polynomials' )
end select
deallocate(projCoeffOneDim)

end subroutine ply_initQLegProjCoeff
! ************************************************************************ !

Expand Down Expand Up @@ -595,6 +598,7 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, &
integer :: oneDof, noChilds, childpos
real(kind=rk), allocatable :: childData(:)
! -------------------------------------------------------------------- !

nChilds = 2**ndims
nElems = tree%nElems
nElemsToRefine = count(new_refine_tree)
Expand Down Expand Up @@ -794,6 +798,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, &
integer :: childDof_pos, parentDof_pos
real(kind=rk) :: projCoeff
! -------------------------------------------------------------------- !

childData(:) = 0.0_rk

childLoop: do iChild = 1, nChilds
Expand Down
1 change: 1 addition & 0 deletions source/ply_fxt_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree )
& modal_data = nodal_data(lb:msq:oversamp_degree+1) )
end do
modal_data = nodal_data

end subroutine ply_fxt_n2m_2D
! ************************************************************************ !

Expand Down
70 changes: 65 additions & 5 deletions source/ply_l2p_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -245,37 +245,97 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix )
! integer, parameter :: vlen = nIndeps
! -------------------------------------------------------------------- !

! Original version (for reference)
!! if (nDofs > 1) then
!!
!! do iStrip=1,nIndeps,vlen
!!
!! ! Calculate the upper bound of the current strip
!! strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1)
!!
!! do iRow = 1, nDofs
!!
!! do iCell = iStrip, strip_ub
!! projected(iCell, iRow) = 0.0_rk
!! end do
!!
!! do iCol = 1, nDofs
!! mval = matrix(iCol,iRow)
!! do iCell = iStrip, strip_ub
!! ! on SX-ACE, this can be identified as matrix multiplication
!! ! which results in VERY HIGH performance
!! projected(iCell, iRow) = projected(iCell, iRow) &
!! & + mval * original(iCol, iCell)
!! end do ! iCell
!! end do ! iCol = 1, nCols
!! end do ! iRow = 1, nRows
!! end do ! iStrip
!!
!! else
!!
!! projected = matrix(nDofs,1) * original
!!
!! end if

if (nDofs > 1) then

do iStrip=0,nIndeps-1,vlen
!$OMP PARALLEL DO DEFAULT(SHARED), &
!$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval)
do iStrip=1,nIndeps,vlen

! Calculate the upper bound of the current strip
strip_ub = min(iStrip + vlen, nIndeps) - iStrip
strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1)

do iRow = 1, nDofs

do iCell = iStrip+1, iStrip+strip_ub
do iCell = iStrip, strip_ub
projected(iCell, iRow) = 0.0_rk
end do

do iCol = 1, nDofs
mval = matrix(iCol,iRow)
do iCell = iStrip+1, iStrip+strip_ub
do iCell = iStrip, strip_ub
! on SX-ACE, this can be identified as matrix multiplication
! which results in VERY HIGH performance
projected(iCell, iRow) = projected(iCell, iRow) &
& + mval * original(iCol, iCell)
end do ! iCell
end do ! iCol = 1, nCols

end do ! iRow = 1, nRows
end do ! iStrip
!$OMP END PARALLEL DO

else

projected = matrix(nDofs,1) * original

end if


! test-version of the loop (will be removed later)
!! if (nDofs > 1) then
!!
!! projected(:, :) = 0.0_rk
!!
!! !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(SHARED), &
!! !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval)
!! do iRow = 1, nDofs
!! do iCol = 1, nDofs
!! mval = matrix(iCol,iRow)
!! do iStrip=1,nIndeps
!! projected(iStrip, iRow) = projected(iStrip, iRow) &
!! & + mval * original(iCol, iStrip)
!! end do
!! end do
!! end do
!! !$OMP END PARALLEL DO
!!
!! else
!!
!! projected = matrix(nDofs,1) * original
!!
!! end if

end subroutine ply_l2_projection
! ************************************************************************ !

Expand Down
Loading