From f6be44fd7232bc81c912415996a40922abe9c59f Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Wed, 16 May 2018 13:06:25 +0200 Subject: [PATCH 01/21] Started a new branch to work on OpenMP. --HG-- branch : feature/openmp --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0f403ee..4e6d623 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ Polynomials Library =================== +*This is the OpenMP implementation branch* + This project is a supporting library for [TreElM](https://bitbucket.org/apesteam/treelm). It does not work on its own, but rather needs to be included in other projects, which also include TreElM. From 48bfb6eefca7616594789ef9261f9eb699612d4f Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Fri, 18 May 2018 18:35:39 +0200 Subject: [PATCH 02/21] Added OpenMP directives to the FPT. Also added a performance utest to check the implementation. Note: this first implementation with OpenMP breaks for polynomial degrees of 255 and higher (the algorithm starts to use approximated blocks then). This is probably due to the usage of memory allocated in the initialization for the FPT, that is shared between threads. There is no problem when using OMP_NUM_THREADS=1 for the ply_fpt_3D_performance_test. --HG-- branch : feature/openmp --- source/fpt/ply_legFpt_module.f90 | 12 +++ .../with_fftw/ply_fpt_3D_performance_test.f90 | 98 +++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 utests/with_fftw/ply_fpt_3D_performance_test.f90 diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 42f70dd..8d0678f 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -227,10 +227,12 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_points) then + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -245,9 +247,11 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO else + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -261,8 +265,10 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO end if ! lobattoPoints + !$OMP END PARALLEL end subroutine ply_legToPnt ! ************************************************************************ ! @@ -284,11 +290,13 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb, normFactor) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_Points) then normFactor = 1.0_rk / real(n,kind=rk) + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -303,10 +311,12 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO else normFactor = 0.5_rk / real(n-1,kind=rk) + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -321,8 +331,10 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO end if ! lobattoPoints + !$OMP END PARALLEL end subroutine ply_pntToLeg ! ************************************************************************ ! diff --git a/utests/with_fftw/ply_fpt_3D_performance_test.f90 b/utests/with_fftw/ply_fpt_3D_performance_test.f90 new file mode 100644 index 0000000..d6ad78b --- /dev/null +++ b/utests/with_fftw/ply_fpt_3D_performance_test.f90 @@ -0,0 +1,98 @@ +!> Unit test to check functionallity of fast polynomial transformations. +!! \author{Jens Zudrop} +program ply_fpt_3D_performance_test + use mpi, only: mpi_wtime + use env_module, only: rk, fin_env + use tem_logging_module, only: logUnit, tem_logging_init_primary + use tem_general_module, only: tem_general_type, tem_start + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_3D_module, only: ply_legToPnt_3D, & + & ply_pntToLeg_3D + + implicit none + + integer :: iPower + integer, parameter :: maxpower = 8 + real(kind=rk) :: res, newRes + type(tem_general_type) :: general + + ! Init the Treelm environment, needed to init the log Unit + call tem_start(codeName = 'FPT 3D Performance Test', & + & version = '1', & + & general = general ) + call tem_logging_init_primary( level = 1, & + & rank = general%proc%rank ) + + res = 0.0_rk + do iPower = 1,maxpower + call ply_check_legToPnt_3D(iPower, newRes) + if (newRes.gt.res) then + res = newRes + end if + end do + + write(*,*) 'Maximal deviation:', res + if (res < 1.e-08) then + write(logUnit(1),*) 'PASSED' + end if + + call fin_env() + +contains + + subroutine ply_check_legToPnt_3D(power,res) + integer, intent(in) :: power + real(kind=rk) :: res + integer :: maxPolyDegree, iVar, nVars + real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) + real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) + type(ply_legFpt_type) :: fpt + real(kind=rk) :: starttime, stoptime + + ! Define the maximal polynomial degree we want to calculate the + ! bases exchange for. + maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 + nVars = 3 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 + + ! Create the Legendre expansion coefficients + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + do iVar = 1, nVars + legCoeffs(:,iVar) = real(iVar, rk) + end do + + ! Init the FPT + call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & + & nIndeps = (maxpolydegree+1)**2, & + & fpt = fpt ) + + ! now transform to the Chebyshev nodes + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + legCoeffsIn = legCoeffs + starttime = MPI_Wtime() + call ply_legToPnt_3D( fpt = fpt, & + & legCoeffs = legCoeffsIn, & + & pntVal = pntVal, & + & nVars = nVars ) + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' trafo: ', stoptime - starttime + + ! now transform back to Legendre coefficients + allocate(legVal( (maxPolyDegree+1)**3,nVars )) + starttime = MPI_Wtime() + call ply_pntToLeg_3D( fpt = fpt, & + & pntVal = pntVal, & + & legCoeffs = legVal, & + & nVars = nVars ) + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', stoptime - starttime + + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) + + end subroutine ply_check_legToPnt_3D + +end program ply_fpt_3D_performance_test From 544896fa445d989ecb7c78ac37ae90c35ccbc36b Mon Sep 17 00:00:00 2001 From: Robin Weihe Date: Thu, 5 Jul 2018 15:33:13 +0200 Subject: [PATCH 03/21] reimplemantation of openmp statements --HG-- branch : feature/openmp --- source/fpt/ply_chebPoint_module.f90 | 74 +++++++++++++++++++++++ source/fpt/ply_legFpt_2D_module.fpp | 16 ++++- source/fpt/ply_legFpt_3D_module.fpp | 2 + source/fpt/ply_polyBaseExc_module.fpp | 2 +- source/ply_LegPolyProjection_module.f90 | 28 +++++++++ source/ply_fxt_module.f90 | 48 +++++++++++++-- source/ply_l2p_module.f90 | 6 +- source/ply_leg_diff_module.fpp | 26 +++++++- source/ply_legser_module.f90 | 7 +++ source/ply_modg_basis_module.fpp | 16 +++++ source/ply_nodes_module.f90 | 14 ++--- source/ply_poly_project_module.fpp | 46 ++++++++++++-- source/ply_poly_transformation_module.f90 | 25 ++++++++ source/ply_sampling_adaptive_module.f90 | 6 +- source/ply_sampling_module.fpp | 6 ++ source/ply_sampling_varsys_module.f90 | 14 +++++ source/ply_space_integration_module.f90 | 48 +++++++++++++++ source/ply_split_element_module.f90 | 9 ++- source/ply_split_legendre_module.f90 | 2 +- source/ply_transfer_module.fpp | 2 + 20 files changed, 371 insertions(+), 26 deletions(-) diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90 index 0a40640..01dc4f0 100644 --- a/source/fpt/ply_chebPoint_module.f90 +++ b/source/fpt/ply_chebPoint_module.f90 @@ -39,12 +39,17 @@ subroutine ply_chebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) allocate(chebPnt1D(nPoints)) + !$OMP DO do iPoint = 1, nPoints chebPnt1D(iPoint) = -1.0_rk & & * cos( PI / nPoints * ( (iPoint - 1.0_rk) + 1.0_rk / 2.0_rk ) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_chebPoint_1D ! ************************************************************************ ! @@ -65,11 +70,16 @@ subroutine ply_lobattoChebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) allocate(chebPnt1D(nPoints)) + !$OMP DO do iPoint = 1, nPoints chebPnt1D(iPoint) = cos( ( iPoint - 1.0_rk ) * PI / ( nPoints - 1.0_rk ) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_lobattoChebPoint_1D ! ************************************************************************ ! @@ -86,12 +96,14 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) nquadpoints = num_intp_per_direction**3 allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -104,6 +116,9 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) end do end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube ! ************************************************************************ ! @@ -121,12 +136,14 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) nquadpoints = num_intp_per_direction**3 allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -139,6 +156,9 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & end do end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube ! ************************************************************************ ! @@ -156,12 +176,16 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) + + nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -172,6 +196,9 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) pointNumber = pointNumber + 1 end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_2d ! ************************************************************************ ! @@ -191,12 +218,14 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -207,6 +236,9 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin pointNumber = pointNumber + 1 end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_2d ! ************************************************************************ ! @@ -222,11 +254,13 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -235,6 +269,9 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_1d ! ************************************************************************ ! @@ -253,11 +290,13 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 + !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -266,6 +305,9 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_1d ! ************************************************************************ ! @@ -291,6 +333,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) @@ -301,6 +344,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -309,7 +353,9 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -318,7 +364,9 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(3) ! face in z direction, z coord is fixes + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber , 1) = chebPnt1D(i) @@ -327,11 +375,14 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube: unknown ' & & // 'face direction' ) end select + !$OMP END PARALLEL + end subroutine create_surface_cheb_points_cube ! ************************************************************************ ! @@ -356,6 +407,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) @@ -366,6 +418,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -374,7 +427,9 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -383,7 +438,9 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(3) ! face in z direction, z coord is fixes + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -392,11 +449,14 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_lobattocheb_points_cube:' & & // ' unknown face direction' ) end select + !$OMP END PARALLEL + end subroutine create_surface_lobattocheb_points_cube ! ************************************************************************ ! @@ -420,6 +480,7 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction allocate(points(nquadpoints,3)) @@ -430,24 +491,30 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select + !$OMP END PARALLEL + end subroutine create_surface_cheb_points_cube_2d ! ************************************************************************ ! @@ -471,6 +538,7 @@ subroutine create_surface_lobattocheb_points_cube_2d( & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction allocate(points(nquadpoints,3)) @@ -481,24 +549,30 @@ subroutine create_surface_lobattocheb_points_cube_2d( & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select + !$OMP END PARALLEL + end subroutine create_surface_lobattocheb_points_cube_2d ! ************************************************************************ ! diff --git a/source/fpt/ply_legFpt_2D_module.fpp b/source/fpt/ply_legFpt_2D_module.fpp index 6ba78c3..2f62583 100644 --- a/source/fpt/ply_legFpt_2D_module.fpp +++ b/source/fpt/ply_legFpt_2D_module.fpp @@ -54,6 +54,7 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iAlph, nIndeps) striplen = fpt%legToChebParams%striplen n = fpt%legToChebParams%n @@ -69,6 +70,7 @@ contains ! 1 4 7 ! 2 5 8 ! 3 6 9 + !$OMP DO yStripLoop: do iStrip = 1, n, striplen ! iAlph is the index of the first element in a line for the transformation ! in y-direction. @@ -88,8 +90,10 @@ contains pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop + !$OMP END DO - ! x-direction + ! x-direction + !$OMP DO xStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) !ztrafo @@ -106,6 +110,9 @@ contains pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_legToPnt_2D_singVar ! ************************************************************************ ! @@ -160,6 +167,7 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(nIndeps, iStrip, iAlph) striplen = fpt%chebToLegParams%striplen n = fpt%legToChebParams%n @@ -168,6 +176,7 @@ contains allocate(alph(min(striplen, n)*n)) allocate(gam(min(striplen, n)*n)) + !$OMP DO yStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !y_Trafo alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) @@ -185,8 +194,10 @@ contains legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop ! iStrip + !$OMP END DO ! x-direction + !$OMP DO xStripLoop: do iStrip = 1,n,striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !ztrafo @@ -204,6 +215,9 @@ contains legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_pntToLeg_2D_singVar ! ************************************************************************ ! diff --git a/source/fpt/ply_legFpt_3D_module.fpp b/source/fpt/ply_legFpt_3D_module.fpp index d20ad88..4acfeef 100644 --- a/source/fpt/ply_legFpt_3D_module.fpp +++ b/source/fpt/ply_legFpt_3D_module.fpp @@ -158,6 +158,7 @@ contains end do xStripLoop + end subroutine ply_legToPnt_3D_singVar ! ************************************************************************ ! @@ -318,6 +319,7 @@ contains end do xStripLoop + end subroutine ply_pntToLeg_3D_singVar ! ************************************************************************ ! diff --git a/source/fpt/ply_polyBaseExc_module.fpp b/source/fpt/ply_polyBaseExc_module.fpp index cea7281..496a912 100644 --- a/source/fpt/ply_polyBaseExc_module.fpp +++ b/source/fpt/ply_polyBaseExc_module.fpp @@ -1048,7 +1048,7 @@ contains !> Convert strip of coefficients of a modal representation in terms of !! Legendre polynomials to modal coefficients in terms of Chebyshev !! polynomials. - subroutine ply_fpt_single( alph, gam, params ) + subroutine ply_fpt_single( alph, gam, params) ! -------------------------------------------------------------------- ! !> The parameters of the fast polynomial transformation. type(ply_trafo_params_type), intent(inout) :: params diff --git a/source/ply_LegPolyProjection_module.f90 b/source/ply_LegPolyProjection_module.f90 index 175bdfe..a36db61 100644 --- a/source/ply_LegPolyProjection_module.f90 +++ b/source/ply_LegPolyProjection_module.f90 @@ -139,6 +139,9 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & real(kind=rk), allocatable :: newWorkDat(:) integer :: nChildDofs, oneDof ! -------------------------------------------------------------------- ! + + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nDofs, nComponents, nChilds, nChildDofs, workDat) + if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' & & // 'only implemented for Q-Legendre-Polynomials' ) @@ -148,6 +151,7 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) + !$OMP DO varLoop: do iVar=1,nVars nDofs = varDofs(iVar) nComponents = varcomps(iVar) @@ -221,6 +225,9 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & deallocate(projection_oneDof%projCoeff) end do varLoop + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_QPolyProjection ! ************************************************************************ ! @@ -262,6 +269,9 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & real(kind=rk), allocatable :: projCoeffOneDim(:,:,:) real(kind=rk) :: dimexp ! -------------------------------------------------------------------- ! + + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, xShift, yShift, zShift) + select case(dofType) case(ply_QLegendrePoly_prp) allocate(projection%projCoeff(nDofs, nChildDofs, nChilds)) @@ -274,6 +284,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & projCoeffOneDim = ply_QLegOneDimCoeff( nint(nDofs**dimexp), & & nint(nChildDofs**dimexp) ) + !$OMP DO ! Loop over the children of this element childLoop: do iChild = 1, nChilds @@ -326,12 +337,14 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & end do childDofLoop end do parentDofLoop end do childLoop + !$OMP END DO case default call tem_abort( 'ERROR in ply_initProjCoeff: initialization of ' & & // 'projection coefficients for subsampling is implemented only ' & & // 'for Q-Legendre polynomials' ) end select + !$OMP END PARALLEL deallocate(projCoeffOneDim) end subroutine ply_initQLegProjCoeff ! ************************************************************************ ! @@ -576,6 +589,9 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & integer :: oneDof, noChilds, childpos real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! + + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iElem, lowElemIndex, upElemIndex, iChild, lowChildIndex, upChildIndex) + nChilds = 2**ndims nElems = tree%nElems nElemsToRefine = count(new_refine_tree) @@ -592,6 +608,7 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then + !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -683,9 +700,11 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & end if end do elementLoop + !$OMP END DO else + !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then allocate(childData(nChildDofs*nChilds*nComponents)) @@ -736,8 +755,11 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop + !$OMP END DO end if + !$OMP END PARALLEL + end subroutine ply_subsampleData ! ************************************************************************ ! @@ -775,8 +797,11 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & integer :: childDof_pos, parentDof_pos real(kind=rk) :: projCoeff ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, iComp, projCoeff, childDof_pos, parentDof_pos) + childData(:) = 0.0_rk + !$OMP DO childLoop: do iChild = 1, nChilds parentDofLoop: do iParentDof = 1, nParentDofs childDofLoop: do iChildDof = 1, nChildDofs @@ -796,6 +821,9 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & end do childDofLoop end do parentDofLoop end do childLoop + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_projDataToChild ! ************************************************************************ ! diff --git a/source/ply_fxt_module.f90 b/source/ply_fxt_module.f90 index e35f65f..616c31c 100644 --- a/source/ply_fxt_module.f90 +++ b/source/ply_fxt_module.f90 @@ -75,7 +75,7 @@ subroutine ply_init_fxt( fxt, header, degree, nDims, nodes, faces ) leg1D_at_gauss = legendre_1D(gaussp1D, degree) select case(nDims) - case(3) + case(3) call fxtf_flptld_init( flpt = fxt%flpt, & & degree = degree, & & nPoints = degree+1, & @@ -114,13 +114,13 @@ subroutine ply_init_fxt( fxt, header, degree, nDims, nodes, faces ) end do end do - case(2) + case(2) call fxtf_flptld_init( flpt = fxt%flpt, & & degree = degree, & & nPoints = degree+1, & - & prec = header%prec ) - - ! Fill up the nodes and the face with gauss legendre points + & prec = header%prec ) + + ! Fill up the nodes and the face with gauss legendre points allocate( nodes(nPoints**2, 3) ) do iPoint=1,nPoints lb = (iPoint-1)*nPoints + 1 @@ -145,7 +145,7 @@ subroutine ply_init_fxt( fxt, header, degree, nDims, nodes, faces ) end do end do - case(1) + case(1) call fxtf_flptld_init( flpt = fxt%flpt, & & degree = degree, & & nPoints = degree+1, & @@ -211,10 +211,12 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim + !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -222,15 +224,20 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = modal_data(lb:ub), & & nodal_data = nodal_data(lb:ub) ) end do + !$OMP END DO + !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = nodal_data(lb:msq:oversamp_degree+1), & & nodal_data = modal_data(lb:msq:oversamp_degree+1) ) end do + !$OMP END DO nodal_data = modal_data + !$OMP END PARALLEL + end subroutine ply_fxt_m2n_2D ! ************************************************************************ ! @@ -251,6 +258,8 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) real(kind=rk), pointer :: tmp_in(:), tmp_out(:) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, tmp_in, tmp_out, iColumn) + nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 @@ -259,6 +268,7 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) tmp_in = -42 tmp_out = -42 + !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -269,7 +279,9 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & nodal_data = tmp_out ) nodal_data(lb:ub) = tmp_out end do + !$OMP END DO + !$OMP DO ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1 ) / nModesPerDim ) * msq & @@ -280,15 +292,20 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = nodal_data(lb:ub:nModesPerDim), & & nodal_data = modal_data(lb:ub:nModesPerDim) ) end do + !$OMP END DO ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs + !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = modal_data(lb:ub:msq), & & nodal_data = nodal_data(lb:ub:msq) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_fxt_m2n_3D ! ************************************************************************ ! @@ -333,10 +350,12 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim + !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -344,14 +363,21 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do + !$OMP END DO + + !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = modal_data(lb:msq:oversamp_degree+1), & & modal_data = nodal_data(lb:msq:oversamp_degree+1) ) end do + !$OMP END DO modal_data = nodal_data + + !$OMP END PARALLEL + end subroutine ply_fxt_n2m_2D ! ************************************************************************ ! @@ -370,10 +396,13 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) integer :: ub, lb, iLine, iColumn, nModesPerDim, msq, ntotalDofs ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, iColumn, ub, lb) + nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 + !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -382,7 +411,9 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do + !$OMP END DO + !$OMP DO ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1) / nModesPerDim ) * msq & @@ -393,15 +424,20 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = modal_data(lb:ub:nModesPerDim), & & modal_data = nodal_data(lb:ub:nModesPerDim) ) end do + !$OMP END DO ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs + !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = nodal_data(lb:ub:msq), & & modal_data = modal_data(lb:ub:msq) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_fxt_n2m_3D ! ************************************************************************ ! diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index 8d25e8c..8e1ce65 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -241,8 +241,10 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iRow, iCell, iCol) if (nDofs > 1) then + !$OMP DO do iStrip=0,nIndeps-1,vlen ! Calculate the upper bound of the current strip @@ -265,6 +267,7 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end do ! iRow = 1, nRows end do ! iStrip + !$OMP END DO else @@ -272,9 +275,10 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end if + !$OMP END PARALLEL end subroutine ply_l2_projection ! ************************************************************************ ! - + ! ************************************************************************ ! !> Transformation between modal and nodal values in 1D via L2 projection. diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index 573e17b..4034bc0 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -46,6 +46,8 @@ contains integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, dofPosPrev, iDeg2, iDeg3, iVar,leg, dofPos, iDeg) + if (present(dirVec)) then DV = dirvec else @@ -58,6 +60,7 @@ contains endif endif + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg1 = (iDeg-1)/(mpd+1) + 1 !! do IDeg1 = 1, mPd+1 iDeg2 = iDeg - (iDeg1-1)*(mpd+1) !! do IDeg2 = 1, mPd=1 !! iDeg2 = mod(iDeg-1,mpd+1)+1 @@ -116,7 +119,9 @@ contains end do end do end do + !$OMP END DO + !$OMP DO ! Scale the results due to the Jacobians of the mappings do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 @@ -128,6 +133,7 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*leg(iDir) - 1.0_rk) end do + !$OMP END DO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! @@ -147,6 +153,8 @@ contains !! end do !! end do + !$OMP END PARALLEL + end subroutine calcDiff_leg_normal ! ************************************************************************ ! @@ -178,6 +186,8 @@ contains integer :: leg(2), iDeg1, iDeg2, DV(2) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, iDeg2, leg, dofPos, dofPosPrev, iVar) + if (present(dirVec)) then DV = dirvec else @@ -188,6 +198,7 @@ contains endif endif + !$OMP DO do iDeg1 = 1, mPd+1 iDeg2 = mPd+1 leg = (/iDeg1, iDeg2/) @@ -244,7 +255,9 @@ contains end do end do end do + !$OMP END DO + !$OMP DO ! Scale the results due to the Jacobians of the mappings do iDeg1 = 1, mPd+1 do iDeg2 = 1, mPd+1 @@ -256,6 +269,9 @@ contains & * (2.0_rk*leg(iDir) - 1.0_rk) end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine calcDiff_leg_2d_normal ! ************************************************************************ ! @@ -336,7 +352,7 @@ contains & elemLength = elemLength, & & dirvec = dirvec(:,iDir), & & iDir = iDir ) - enddo + end do endif end subroutine calcDiff_leg_2d @@ -361,6 +377,8 @@ contains integer :: dofPos, dofPosPrev, dofPos2Prev ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(dofPos, iDegX, dofPosPrev, dofPos2Prev) + ! Build the derivative in x direction dofPos = 1 + maxPolyDegree legCoeffsDiff(dofPos,:) = 0.0_rk @@ -368,6 +386,7 @@ contains dofPosPrev = dofPos dofPos = 1 + (maxPolyDegree-1) legCoeffsDiff(dofPos,:) = legCoeffs(dofPosPrev,:) + !$OMP DO do iDegX = maxPolyDegree-1, 1, -1 dofPos = 1 + (iDegX-1) dofPosPrev = 1 + (iDegX) @@ -375,14 +394,19 @@ contains legCoeffsDiff(dofPos,:) = legCoeffsDiff(dofPos2Prev,:) & & + legCoeffs(dofPosPrev,:) end do + !$OMP END DO end if + !$OMP DO do iDegX = 1, maxPolyDegree+1 dofPos = 1 + (iDegX-1) legCoeffsDiff(dofPos,:) = legCoeffsDiff(dofPos,:) & & * (2.0_rk/elemLength) & & * (2.0_rk*iDegX - 1.0_rk) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine calcDiff_leg_1d ! ************************************************************************ ! diff --git a/source/ply_legser_module.f90 b/source/ply_legser_module.f90 index 0649a2a..dc3ad15 100644 --- a/source/ply_legser_module.f90 +++ b/source/ply_legser_module.f90 @@ -41,19 +41,23 @@ subroutine legser(A, B, n) real(kind=rk) :: ak, al, bb, c, d integer :: k, l, ll ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, d, c, l, ll, al, bb, ak) ak = 0.0_rk ! Calculation of the first Legendre coefficient b(1) = 0.5_rk * a(1) + !$OMP DO do k=3,n,2 ak = ak + 2.0_rk b(1) = b(1) - a(k)/(ak*ak - 1.0_rk) end do + !$OMP END DO c = 2.0_rk / 3.0_rk al = 0.0_rk ! Start main loop (remaining Legendre coefficients) + !$OMP DO do l=2,n ! Calculation of the Lth coefficient ll = l+2 @@ -73,6 +77,9 @@ subroutine legser(A, B, n) & / ( (al+al+3.0_rk)*(al+al+2.0_rk) ) b(l) = (al+0.5_rk)*bb end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine legser ! ************************************************************************ ! diff --git a/source/ply_modg_basis_module.fpp b/source/ply_modg_basis_module.fpp index 526a7e0..acd25c3 100644 --- a/source/ply_modg_basis_module.fpp +++ b/source/ply_modg_basis_module.fpp @@ -159,6 +159,8 @@ contains integer :: iFunc, jFunc ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(jFunc, iFunc) + allocate(GaussPoints(nPoints)) allocate(GaussPoints_left(nPoints)) allocate(GaussPoints_right(nPoints)) @@ -198,6 +200,7 @@ contains allocate( integral%anz_anzShift(1:nFunc, 1:nFunc, 2)) + !$OMP DO !loop over anzatz functions do jFunc = 1, nFunc do iFunc = 1, nFunc @@ -219,6 +222,9 @@ contains end do end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine init_modg_covolumeCoeffs ! ************************************************************************ ! @@ -287,6 +293,7 @@ contains end do end do + end subroutine init_modg_multilevelCoeffs ! ************************************************************************ ! @@ -417,6 +424,8 @@ contains real(kind=rk) :: n_q ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iAns, iAnsX, iAnsY, iAnsZ, n_q, ansPos) + ! allocate the output array select case(basisType) case(Q_space) @@ -442,6 +451,7 @@ contains polyValX(2,:) = coords(:,1) polyValY(2,:) = coords(:,2) polyValZ(2,:) = coords(:,3) + !$OMP DO ! ... higher order polynomials are build recursively do iAns = 3, maxPolyDegree+1 n_q = 1.0_rk / real(iAns-1,kind=rk) @@ -467,11 +477,13 @@ contains & * polyValZ(iAns-2,:) ) & & *n_q end do + !$OMP END DO end if ! Now, build the complete point value. select case(basisType) case(Q_space) + !$OMP DO do iAnsX = 1, maxPolyDegree+1 do iAnsY = 1, maxPolyDegree+1 do iAnsZ = 1, maxPolyDegree+1 @@ -483,19 +495,23 @@ contains end do end do end do + !$OMP END DO case(P_space) iAnsX = 1 iAnsY = 1 iAnsZ = 1 ?? copy :: getDofsPTens(maxPolyDegree, ansPosMax) + !$OMP DO do ansPos = 1, ansPosMax polyVal(ansPos, :) = polyValX(iAnsX,:) & & * polyValY(iAnsY,:) & & * polyValZ(iAnsZ,:) ?? copy :: nextModgCoeffPTens(iAnsX, iAnsY, iAnsZ) end do + !$OMP END DO end select + !$OMP END PARALLEL end subroutine evalLegendreTensPoly ! ************************************************************************ ! diff --git a/source/ply_nodes_module.f90 b/source/ply_nodes_module.f90 index 691edee..13d431d 100644 --- a/source/ply_nodes_module.f90 +++ b/source/ply_nodes_module.f90 @@ -5,13 +5,13 @@ module ply_nodes_module use tem_aux_module, only: tem_abort - use ply_space_integration_module, only: ply_create_surface_gauss_points_cube, & - & ply_create_surface_gauss_points_cube_2d,& - & ply_create_surface_gauss_points_cube_1d,& - & ply_create_volume_gauss_points_cube, & - & ply_create_volume_gauss_points_cube_2d, & - & ply_create_volume_gauss_points_cube_1d - + use ply_space_integration_module, only: ply_create_surface_gauss_points_cube, & + & ply_create_surface_gauss_points_cube_2d,& + & ply_create_surface_gauss_points_cube_1d,& + & ply_create_volume_gauss_points_cube, & + & ply_create_volume_gauss_points_cube_2d, & + & ply_create_volume_gauss_points_cube_1d + use ply_chebPoint_module, only: create_volume_cheb_points_cube, & & create_volume_cheb_points_cube_2d, & & create_volume_cheb_points_cube_1d, & diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index 98b8ddf..65658b4 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -123,7 +123,7 @@ module ply_poly_project_module !> projection header consits of general information like which kind !! of projection is used - + !> In the body datatype, there is for each dimension the main data !! for the projection method stored type(ply_prj_body_type) :: body_1d @@ -359,7 +359,7 @@ contains me%body_3d%oversamp_dofs = (oversampling_order)**3 me%body_2d%oversamp_dofs = (oversampling_order)**2 me%body_1d%oversamp_dofs = oversampling_order - + select case (trim(proj_init%header%kind)) case('fpt') ! Fill fpt datatype @@ -500,6 +500,8 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) + select case(trim(me%kind)) case ('l2p') @@ -509,23 +511,29 @@ contains ! additional summation select case(dim) case (1) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do + !$OMP END DO case (2) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do + !$OMP END DO case (3) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do + !$OMP END DO end select case ('fpt') @@ -542,40 +550,50 @@ contains & legCoeffs = modal_data, & & nVars = nVars ) case (1) + !$OMP DO do iVar = 1,nVars call ply_LegToPnt( fpt = me%body_1d%fpt, & & pntVal = nodal_data(:,iVar), & & legCoeffs = modal_data(:,iVar), & & nIndeps = 1 ) end do + !$OMP END DO end select case ('fxt') select case (dim) case (3) + !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_3D( fxt = me%body_3d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do + !$OMP END DO case (2) + !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_2D( fxt = me%body_2d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do + !$OMP END DO case (1) + !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_1D( fxt = me%body_1d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar) ) end do + !$OMP END DO end select end select + !$OMP END PARALLEL + end subroutine ply_poly_project_m2n_multivar !****************************************************************************! @@ -594,28 +612,36 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) + select case(trim(me%kind)) case ('l2p') select case (dim) case (1) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do + !$OMP END DO case (2) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do + !$OMP END DO case (3) + !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do + !$OMP END DO end select case ('fpt') @@ -632,47 +658,57 @@ contains & pntVal = nodal_data, & & legCoeffs = modal_data ) case (1) + !$OMP DO do iVar = 1,nVars call ply_pntToLeg( fpt = me%body_1d%fpt, & & nIndeps = 1, & & pntVal = nodal_data(:,iVar), & & legCoeffs = modal_data(:,iVar) ) end do + !$OMP END DO end select case ('fxt') select case (dim) case (3) - do iVar = 1, nVars + !$OMP DO + do iVar = 1, nVars call ply_fxt_n2m_3D( & & fxt = me%body_3d%fxt, & & nodal_data = nodal_data(:,iVar), & & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do + !$OMP END DO case (2) - do iVar = 1, nVars + !$OMP DO + do iVar = 1, nVars call ply_fxt_n2m_2D( & & fxt = me%body_2d%fxt, & & nodal_data = nodal_data(:,iVar), & & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do + !$OMP END DO case (1) - do iVar = 1, nVars + !$OMP DO + do iVar = 1, nVars call ply_fxt_n2m_1D( & & fxt = me%body_1d%fxt, & & nodal_data = nodal_data(:,iVar), & & modal_data = modal_data(:,iVar) ) end do + !$OMP END DO end select case default write(logUnit(1),*) 'ERROR in projection nodal to modal' end select + !$OMP END PARALLEL + end subroutine ply_poly_project_n2m_multivar !***************************************************************************! diff --git a/source/ply_poly_transformation_module.f90 b/source/ply_poly_transformation_module.f90 index c727fd5..a6e7a1e 100644 --- a/source/ply_poly_transformation_module.f90 +++ b/source/ply_poly_transformation_module.f90 @@ -101,11 +101,13 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & integer :: nVars, nDofs, nComponents, nChildDofs integer :: iVar ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nComponents, nDofs, workData, nChildDofs) nVars = size(varDofs) allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) + !$OMP DO varLoop: do iVar = 1, nVars nComponents = varComps(iVar) nDofs = vardofs(iVar) @@ -145,6 +147,10 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & deallocate(workData) end do varLoop + !$OMP END DO + + !$OMP END PARALLEL + end subroutine ply_Poly_Transformation ! ************************************************************************ ! @@ -193,6 +199,9 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: transform_matrix(:,:) real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iChild, iElem) + + nChilds = 2**nDims max_modes = nint(real(nDofs, kind=rk)**(1.0_rk/real(nDims, kind=rk))) @@ -218,6 +227,7 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then + !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -304,7 +314,9 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elementLoop + !$OMP END DO else + !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then ! Create lower and upper indices for all data of iElem in meshData. @@ -355,10 +367,13 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop + !$OMP END DO end if deallocate(transform_matrix) + !$OMP END PARALLEL + end subroutine ply_subsampleData ! ************************************************************************ ! @@ -402,6 +417,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & real(kind=rk), allocatable :: temp_data(:) real(kind=rk), allocatable :: childData_prev(:) ! -------------------------------------------------------------------- ! + parent_modes = nint(real(nParentDofs,kind=rk) & & **(1/real(nDimensions,kind=rk))) @@ -708,6 +724,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & deallocate(childData_prev) deallocate(temp_Data) + end subroutine ply_projDataToChild ! ************************************************************************ ! @@ -746,6 +763,8 @@ subroutine ply_transform_matrix(max_modes, v) integer :: m, orig real(kind=rk) :: shifting, scaling ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(orig, m) + ! transformation matrix looks like this: ! [1.0 -- -- shift=0.5 ] @@ -768,6 +787,7 @@ subroutine ply_transform_matrix(max_modes, v) v(2,2) = scaling if (max_modes > 2) then + !$OMP DO do orig = 3,max_modes v(1,orig) = ply_beta(orig-1) * v(1,orig-2) & & + ply_alpha(orig-1) * shifting * v(1,orig-1) & @@ -785,12 +805,14 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do + !$OMP END DO end if ! Due to the symmetry of the problem (the left subinterval has just ! the shifting with a changed sign), we can fill the other half of ! the matrix by copying the already computed values accordingly with ! a change in sign, as needed (alternatingly). + !$OMP DO do m = 1 , max_modes do orig = 1, m-1 if (mod((m+orig),2) /= 0) then @@ -800,8 +822,11 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do + !$OMP END DO end if + !$OMP END PARALLEL + end subroutine ply_transform_matrix ! ************************************************************************ ! diff --git a/source/ply_sampling_adaptive_module.f90 b/source/ply_sampling_adaptive_module.f90 index a7cdbd4..04bdeb6 100644 --- a/source/ply_sampling_adaptive_module.f90 +++ b/source/ply_sampling_adaptive_module.f90 @@ -115,7 +115,7 @@ module ply_sampling_adaptive_module type sampled_method_data_type type(realarray_type), allocatable :: component(:) end type sampled_method_data_type - + contains @@ -446,7 +446,7 @@ subroutine ply_sample_adaptive( me, ndims, orig_mesh, orig_bcs, varsys, & elemlevel = tem_levelOf(curmesh%treeID(iElem)) reached_limit(iElem) = lastrefine & & .or. ( (me%AbsUpperBoundLevel > 0) & - & .and. (elemlevel >= me%AbsUpperBoundLevel - 1) ) + & .and. (elemlevel >= me%AbsUpperBoundLevel - 1) ) ! Now get the spectral variation (sum of absolute values of all ! higher modes). @@ -906,6 +906,7 @@ pure function sum_abs_mode(iMode, degree, nDims, dat) result(modesum) integer :: nModes integer :: jMode, kMode ! -------------------------------------------------------------------- ! + nModes = (degree+1) select case(nDims) @@ -930,6 +931,7 @@ pure function sum_abs_mode(iMode, degree, nDims, dat) result(modesum) end do end do end select + end function sum_abs_mode end module ply_sampling_adaptive_module diff --git a/source/ply_sampling_module.fpp b/source/ply_sampling_module.fpp index aebb914..d9651b3 100644 --- a/source/ply_sampling_module.fpp +++ b/source/ply_sampling_module.fpp @@ -652,15 +652,21 @@ contains integer :: iElem integer :: nComps ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem) + nComps = fun%nComponents datlen = tree%nElems * nComps call c_f_pointer(fun%method_data, p) + !$OMP DO do iElem=1,n res(1+(iElem-1)*nComps:iElem*nComps) & & = p%dat(1+(elempos(iElem)-1)*nComps:elempos(iElem)*nComps) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine get_sampled_element ! ************************************************************************ ! diff --git a/source/ply_sampling_varsys_module.f90 b/source/ply_sampling_varsys_module.f90 index cd9d63f..2473da5 100644 --- a/source/ply_sampling_varsys_module.f90 +++ b/source/ply_sampling_varsys_module.f90 @@ -83,6 +83,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & integer, allocatable :: elempos(:) real(kind=rk), allocatable :: elemdat(:) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, iComponent, iElem, iTotComp) nVars = trackInst%varmap%varPos%nVals @@ -114,12 +115,14 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & nComponents = varsys%method%val(varpos)%nComponents nDofs = (var_degree(iVar)+1)**nDims + !$OMP DO do iComponent=1,nComponents iTotComp = iScalar+iComponent-1 call ply_sampling_var_allocate( var = var(iTotComp), & & nElems = nElems, & & datalen = nDofs*nElems ) end do + !$OMP END DO isScalar: if (nComponents == 1) then @@ -141,6 +144,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & ! To avoid overly large memory consumption, we do this element by ! element. allocate(elemDat(nComponents*ndofs)) + !$OMP DO do iElem=1,nElems call varSys%method%val(varpos)%get_element( & & varSys = varSys, & @@ -156,6 +160,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & & = elemdat(iComponent::nComponents) end do end do + !$OMP END DO deallocate(elemDat) end if isScalar @@ -165,6 +170,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & ! polynomial degree in each element. ! This is redundant, but convenient. lastScalar = iScalar+nComponents-1 + !$OMP DO do iComponent=0,nComponents-1 var(iScalar+iComponent)%first(1) = 1 do iElem=1,nElems @@ -173,11 +179,14 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & var(iScalar+iComponent)%degree(iElem) = var_degree(iVar) end do end do + !$OMP END DO iScalar = iScalar + nComponents end do variables + !$OMP END PARALLEL + end subroutine ply_sampling_varsys_for_track ! ------------------------------------------------------------------------ ! ! ------------------------------------------------------------------------ ! @@ -298,6 +307,7 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) integer :: nElems integer :: ndofs ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem, ndofs, absmean, variation) if (allocated(var%deviates)) deallocate(var%deviates) var%nDeviating = 0 @@ -305,6 +315,7 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) if (allocated(var%first)) then nElems = size(var%first)-1 allocate(var%deviates(nElems)) + !$OMP DO do iElem=1,nElems ndofs = var%first(iElem+1) - var%first(iElem) - 1 absmean = max( abs(var%dat(var%first(iElem))), min_mean ) @@ -312,8 +323,11 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) var%deviates(iElem) = (variation > threshold*absmean) if (var%deviates(iElem)) var%nDeviating = var%nDeviating + 1 end do + !$OMP END DO end if + !$OMP END PARALLEL + end subroutine ply_sampling_var_compute_elemdev ! ------------------------------------------------------------------------ ! ! ------------------------------------------------------------------------ ! diff --git a/source/ply_space_integration_module.f90 b/source/ply_space_integration_module.f90 index 8d7ae83..fc5bb7f 100644 --- a/source/ply_space_integration_module.f90 +++ b/source/ply_space_integration_module.f90 @@ -115,6 +115,7 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) numQuadPoints = num_intp_per_direction**3 allocate(points(numQuadPoints,3)) @@ -129,6 +130,7 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 + !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -144,6 +146,10 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & end do end do end do + !$OMP END DO + + !$OMP END PARALLEL + end subroutine ply_create_volume_gauss_points_cube ! *********************************************************************** ! @@ -166,6 +172,7 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) numQuadPoints = num_intp_per_direction**2 allocate(points(numQuadPoints,3)) @@ -180,6 +187,7 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -192,6 +200,10 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO + + !$OMP END PARALLEL + end subroutine ply_create_volume_gauss_points_cube_2d ! *********************************************************************** ! @@ -213,6 +225,7 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) allocate(points(num_intp_per_direction,3)) allocate(weights(num_intp_per_direction)) @@ -226,6 +239,7 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 + !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -236,6 +250,10 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do + !$OMP END DO + + !$OMP END PARALLEL + end subroutine ply_create_volume_gauss_points_cube_1d ! *********************************************************************** ! @@ -265,6 +283,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nquadPoints ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i) + nQuadPoints = num_intp_per_direction**2 allocate(points(nQuadPoints,3)) @@ -282,6 +302,7 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -293,8 +314,10 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -306,8 +329,10 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case(3) ! face in z direction, z coord is fixed + !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -319,6 +344,7 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube:' & @@ -326,6 +352,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & end select + !$OMP END PARALLEL + end subroutine ply_create_surface_gauss_points_cube ! *********************************************************************** ! @@ -355,6 +383,8 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nQuadPoints ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) + ! The number of quadrature points on the boundary of a 2d volume is the ! number of quad points in one direction nQuadPoints = num_intp_per_direction @@ -374,6 +404,7 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed + !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points for 2d case to get the three dimensional values. @@ -383,8 +414,10 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do + !$OMP END DO case(2) ! face in y direction, y coord is fixed + !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points in 2d case to get the three dimensional values. @@ -394,6 +427,7 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do + !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube_2d:' & @@ -401,6 +435,8 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & end select + !$OMP END PARALLEL + end subroutine ply_create_surface_gauss_points_cube_2d ! *********************************************************************** ! @@ -467,6 +503,8 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j) + numQuadPoints = num_intp_per_direction allocate(points(numQuadPoints,1)) @@ -481,6 +519,7 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 + !$OMP DO do j = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -488,6 +527,10 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(j) pointNumber = pointNumber + 1 end do + !$OMP END DO + + !$OMP END PARALLEL + end subroutine ply_create_gauss_points_1d ! *********************************************************************** ! @@ -515,12 +558,14 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) real(kind=rk) :: EPS integer :: m, i, j ! ---------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, p1, p2, p3, pp, z1, z) EPS= 1.0 / (10.0**(PRECISION(1.0_rk)-2) ) m = (nIntP+1)/2 xm = 0.5*(x2+x1) xl = 0.5*(x2-x1) + !$OMP DO do i = 1, m z = cos(PI*((i-1)+0.75_rk)/(nIntP+0.5_rk)) @@ -545,6 +590,9 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) w(nIntp-i+1) = w(i) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_gaussLegPoints ! *********************************************************************** ! diff --git a/source/ply_split_element_module.f90 b/source/ply_split_element_module.f90 index a02c1b8..5fd0468 100644 --- a/source/ply_split_element_module.f90 +++ b/source/ply_split_element_module.f90 @@ -126,7 +126,7 @@ end subroutine ply_split_element_init !! indices would most likely be better. !! Maybe, using explicit shaped arrays and therby allowing more dimensions !! in the input, while keeping the interface to two dimensions for all - !! cases (the normal direction and all independent degrees of freedom). + !! cases (the normal direction and all independent degrees of freedom). !! For vectorization on x86 it also is necessary to have a stride-1 access !! only in reading and writing. !! The rotation of data might not be the best option because of this. @@ -219,6 +219,7 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & integer :: nParents integer :: parentpos, childpos ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDir, parentMode, iParent, childMode, indep) nParents = size(parent_data,2) @@ -232,10 +233,13 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & ! The number of independent modes (in normal directions) is given ! by the product of the length in all directions, except the last one. nIndeps = 1 + !$OMP DO do iDir=1,nDims-1 nIndeps = nIndeps*inLen(iDir) end do + !$OMP END DO + !$OMP DO oldmodes: do parentMode=1,inLen(nDims) ! Maximal number modes to compute, as this is a triangular matrix ! it is limited by the diagonal (parentMode). However, it may be @@ -266,6 +270,9 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & end do elemloop end do oldmodes + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_split_element_singleD ! ======================================================================== ! diff --git a/source/ply_split_legendre_module.f90 b/source/ply_split_legendre_module.f90 index 2b7ffda..3af9284 100644 --- a/source/ply_split_legendre_module.f90 +++ b/source/ply_split_legendre_module.f90 @@ -346,7 +346,7 @@ subroutine ply_split_legendre_test(success) write(*,*) 'Beta check failed' RETURN end if - + ! Expected properties for alpha_frac: ! * It should be the same as dividing the corresponding values of alpha ! * If both modes are identic, it should be one diff --git a/source/ply_transfer_module.fpp b/source/ply_transfer_module.fpp index 35a2a99..5f87f8a 100644 --- a/source/ply_transfer_module.fpp +++ b/source/ply_transfer_module.fpp @@ -123,12 +123,14 @@ contains ispace_oq: if (inspace == Q_Space) then + !$OMP DO ! Both, output and input are Q Polynomials do out_Y=0,minord-1 out_off = out_Y*(outdegree+1) in_off = out_Y*(indegree+1) outdat(out_off+1:out_off+minord) = indat(in_off+1:in_off+minord) end do + !$OMP END DO else ispace_oq From 8f40ed583de53c43ecd581580ee291554d9ad6d1 Mon Sep 17 00:00:00 2001 From: Robin Weihe Date: Tue, 30 Oct 2018 09:38:11 +0100 Subject: [PATCH 04/21] rams%b and params%u were removed from params. They were put in their own datatype. The modules where params is used were updated. ply_legFpt_module, ply_l2p_module, ply_leg_diff_module and ply_oversample_module were OpenMP parallelized. --HG-- branch : feature/openmp --- source/ply_oversample_module.fpp | 48 +++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index aa8780b..38aab0b 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -155,7 +155,9 @@ contains nScalars = size(modalCoeffs,2) maxorders = 3*(poly_proj%min_degree + 1)-2 + !$OMP WORKSHARE modalCoeffs = 0.0_rk + !$OMP END WORKSHARE if (poly_proj%basisType == Q_Space) then posQ: if (present(ensure_positivity)) then @@ -163,6 +165,7 @@ contains varQ: do iVar=1,nScalars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP SINGLE do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -178,9 +181,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varQ do iVar=1,nScalars + !$OMP DO do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -193,8 +198,10 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END DO end do else posQ + !$OMP DO do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -206,6 +213,7 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end do end do + !$OMP END DO end if posQ else !P_Space @@ -218,6 +226,7 @@ contains iDegY = 1 iDegZ = 1 ordersum = 0.0_rk + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) iOrd = iDegX+iDegY+iDegZ-2 @@ -232,12 +241,14 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varP do iVar=1,nScalars iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs iOrd = iDegX+iDegY+iDegZ-2 if (iOrd > ord_lim) EXIT @@ -248,11 +259,13 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end do else posP iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -261,6 +274,7 @@ contains modalCoeffs(dofOverSamp,:) = state(dof,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end if posP end if @@ -302,6 +316,7 @@ contains nScalars = size(modalCoeffs,2) if (poly_proj%basisType == Q_Space) then + !$OMP DO do iVar=1,nScalars do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 @@ -313,12 +328,14 @@ contains state(dof,iVar) = modalCoeffs(dofOverSamp,iVar) end do end do + !$OMP END DO else !P_Space iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -327,6 +344,7 @@ contains state(dof,:) = modalCoeffs(dofOverSamp,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end if end subroutine ply_convertFromoversample_3d @@ -390,7 +408,9 @@ contains mpd1_square = mpd1**2 ! Initialize oversampled space correct to 0 + !$OMP WORKSHARE modalCoeffs(:,:) = 0.0_rk + !$OMP END WORKSAHRE if (poly_proj%basisType == Q_Space) then posQ: if (present(ensure_positivity)) then @@ -398,6 +418,7 @@ contains varQ: do iVar=1,nPVars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP SINGLE do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 @@ -412,9 +433,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varQ do iVar=1,nPVars + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 @@ -424,14 +447,17 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END DO end do else posQ + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 dofOverSamp = 1 + (iDegX-1) + (iDegY-1)*(oversamp_degree+1) modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) end do + !$OMP END DO end if posQ else !P_Space @@ -440,6 +466,7 @@ contains ord_lim = maxorders varP: do iVar=1,nPVars if (ensure_positivity(iVar)) then + !$OMP SINGLE iDegX = 1 iDegY = 1 ordersum = 0.0_rk @@ -457,9 +484,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varP do iVar=1,nPVars + !$OMP SINGLE iDegX = 1 iDegY = 1 do idof = 1, poly_proj%body_2d%min_dofs @@ -470,8 +499,10 @@ contains modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end do else posP + !$OMP SINGLE iDegX = 1 iDegY = 1 do idof = 1, poly_proj%body_2d%min_dofs @@ -480,6 +511,7 @@ contains modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end if posP end if @@ -530,15 +562,18 @@ contains if (poly_proj%basisType == Q_Space) then + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 dofOverSamp = 1 + (iDegX-1) + (iDegY-1)*(oversamp_degree+1) state(dof,1:nPVars) = modalCoeffs(dofOverSamp,1:nPVars) end do + !$OMP END DO else !P_Space - + + !$OMP SINGLE iDegX = 1 iDegY = 1 iDegZ = 0 ! not used in posOfModgCoeffPTens_2D, nextModgCoeffPTens @@ -548,6 +583,7 @@ contains state(dof,1:nPVars) = modalCoeffs(dofOverSamp,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end if @@ -597,13 +633,16 @@ contains nPVars = (poly_proj%maxPolyDegree+1)*nVars ! Initialize oversampled space correct to 0 + !$OMP WORKSHARE ModalCoeffs(:,:) = 0.0_rk + !$OMP END WORKSHARE if (present(ensure_positivity)) then ord_lim = poly_proj%min_degree+1 do iVar = 1,nVars if (ensure_positivity(iVar)) then varSum = 0.0_rk + !$OMP SINGLE do iPoint=2,ord_lim varSum = varSum + abs(state(iPoint,iVar)) if (varSum >= state(1,iVar)) then @@ -611,19 +650,24 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do do iVar=1,nVars + !$OMP DO do iPoint=1,ord_lim ModalCoeffs(iPoint,iVar) = state(iPoint,iVar) end do + !$OMP END DO end do else + !$OMP DO do iVP = 1,nPVars iVar = (iVP-1)/(poly_proj%min_degree+1) + 1 iPoint = iVP - (iVar-1)*(poly_proj%min_degree+1) ModalCoeffs(iPoint,iVar) = state(iPoint,iVar) end do + !$OMP END DO end if end subroutine ply_convert2oversample_1d @@ -659,11 +703,13 @@ contains nPVars = (poly_proj%maxPolyDegree+1)*size(state,2) end if + !$OMP DO do iVP = 1,nPVars iVar = (iVP-1)/(poly_proj%min_degree+1) + 1 iPoint = iVP - (iVar-1)*(poly_proj%min_degree+1) state(iPoint,iVar) = modalCoeffs(iPoint,iVar) end do + !$OMP END DO end subroutine ply_convertFromoversample_1d ! ************************************************************************ ! From 21f5a440f74c82df5eadbd6b2013eeb96775667f Mon Sep 17 00:00:00 2001 From: Robin Weihe Date: Tue, 30 Oct 2018 15:13:26 +0100 Subject: [PATCH 05/21] params%b and params%u were removed from params for parallel implementation. They were put in their own Datatype. The modules using params were updated. Open MP implementation in ply_legfpt_module, ply_l2p_module,ply_leg_diff_module and ply_oversample_module --HG-- branch : feature/openmp --- source/fpt/ply_chebPoint_module.f90 | 73 -------------- source/fpt/ply_legFpt_2D_module.fpp | 44 ++++----- source/fpt/ply_legFpt_3D_module.fpp | 35 ++++--- source/fpt/ply_legFpt_module.f90 | 51 +++++++--- source/fpt/ply_polyBaseExc_module.fpp | 98 ++++++++++++------- source/ply_LegPolyProjection_module.f90 | 23 +---- source/ply_fxt_module.f90 | 36 +------ source/ply_l2p_module.f90 | 2 +- source/ply_leg_diff_module.fpp | 16 ++- source/ply_legser_module.f90 | 6 -- source/ply_modg_basis_module.fpp | 16 +-- source/ply_oversample_module.fpp | 46 +++++++++ source/ply_poly_project_module.fpp | 67 +++++-------- source/ply_poly_transformation_module.f90 | 19 ---- source/ply_sampling_module.fpp | 4 - source/ply_sampling_varsys_module.f90 | 15 +-- source/ply_space_integration_module.f90 | 35 ------- source/ply_split_element_module.f90 | 7 +- source/ply_transfer_module.fpp | 2 - .../ply_fpt_2D_lobattoNodes_test.fpp | 10 +- .../ply_fpt_2D_singVar_lobattoNodes_test.fpp | 10 +- utests/with_fftw/ply_fpt_2D_singVar_test.fpp | 10 +- utests/with_fftw/ply_fpt_2D_test.fpp | 9 +- .../ply_fpt_3D_lobattoNodes_test.fpp | 9 +- .../with_fftw/ply_fpt_3D_performance_test.f90 | 33 ++++--- utests/with_fftw/ply_fpt_3D_test.fpp | 9 +- ...fpt_ifpt_2D_multiVar_lobattoNodes_test.f90 | 39 ++++---- .../ply_fpt_ifpt_2D_multiVar_test.f90 | 35 ++++--- ..._fpt_ifpt_2D_singVar_lobattoNodes_test.f90 | 15 ++- .../ply_fpt_ifpt_2D_singVar_test.f90 | 35 ++++--- ...fpt_ifpt_3D_multiVar_lobattoNodes_test.f90 | 35 ++++--- .../ply_fpt_ifpt_3D_multiVar_test.f90 | 33 ++++--- ..._fpt_ifpt_3D_singVar_lobattoNodes_test.f90 | 35 ++++--- .../ply_fpt_ifpt_3D_singVar_test.f90 | 35 ++++--- utests/with_fftw/ply_fpt_ifpt_test.f90 | 24 +++-- .../with_fftw/ply_fpt_lobattoNodes_test.f90 | 32 +++--- utests/with_fftw/ply_fpt_test.f90 | 8 +- .../ply_ifpt_3D_singVar_lobattoNodes_test.fpp | 10 +- utests/with_fftw/ply_ifpt_3D_singVar_test.fpp | 10 +- .../with_fftw/ply_ifpt_lobattoNodes_test.f90 | 32 +++--- utests/with_fftw/ply_ifpt_test.f90 | 33 ++++--- 41 files changed, 525 insertions(+), 571 deletions(-) diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90 index 01dc4f0..c9a4b1c 100644 --- a/source/fpt/ply_chebPoint_module.f90 +++ b/source/fpt/ply_chebPoint_module.f90 @@ -39,17 +39,11 @@ subroutine ply_chebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) - allocate(chebPnt1D(nPoints)) - !$OMP DO do iPoint = 1, nPoints chebPnt1D(iPoint) = -1.0_rk & & * cos( PI / nPoints * ( (iPoint - 1.0_rk) + 1.0_rk / 2.0_rk ) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_chebPoint_1D ! ************************************************************************ ! @@ -70,16 +64,11 @@ subroutine ply_lobattoChebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) allocate(chebPnt1D(nPoints)) - !$OMP DO do iPoint = 1, nPoints chebPnt1D(iPoint) = cos( ( iPoint - 1.0_rk ) * PI / ( nPoints - 1.0_rk ) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_lobattoChebPoint_1D ! ************************************************************************ ! @@ -96,14 +85,12 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) nquadpoints = num_intp_per_direction**3 allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -116,9 +103,6 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube ! ************************************************************************ ! @@ -136,14 +120,12 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) nquadpoints = num_intp_per_direction**3 allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -156,9 +138,6 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube ! ************************************************************************ ! @@ -176,8 +155,6 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) - nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) @@ -185,7 +162,6 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -196,9 +172,6 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) pointNumber = pointNumber + 1 end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_2d ! ************************************************************************ ! @@ -218,14 +191,12 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -236,9 +207,6 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin pointNumber = pointNumber + 1 end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_2d ! ************************************************************************ ! @@ -254,13 +222,11 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -269,9 +235,6 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_1d ! ************************************************************************ ! @@ -290,13 +253,11 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -305,9 +266,6 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_1d ! ************************************************************************ ! @@ -333,7 +291,6 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) @@ -344,7 +301,6 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -353,9 +309,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -364,9 +318,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixes - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber , 1) = chebPnt1D(i) @@ -375,14 +327,11 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube: unknown ' & & // 'face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_cheb_points_cube ! ************************************************************************ ! @@ -407,7 +356,6 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 allocate(points(nquadpoints,3)) @@ -418,7 +366,6 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -427,9 +374,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -438,9 +383,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixes - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -449,14 +392,11 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_lobattocheb_points_cube:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_lobattocheb_points_cube ! ************************************************************************ ! @@ -480,7 +420,6 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction allocate(points(nquadpoints,3)) @@ -491,29 +430,24 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL end subroutine create_surface_cheb_points_cube_2d ! ************************************************************************ ! @@ -538,7 +472,6 @@ subroutine create_surface_lobattocheb_points_cube_2d( & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction allocate(points(nquadpoints,3)) @@ -549,30 +482,24 @@ subroutine create_surface_lobattocheb_points_cube_2d( & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_lobattocheb_points_cube_2d ! ************************************************************************ ! diff --git a/source/fpt/ply_legFpt_2D_module.fpp b/source/fpt/ply_legFpt_2D_module.fpp index 2f62583..81343c8 100644 --- a/source/fpt/ply_legFpt_2D_module.fpp +++ b/source/fpt/ply_legFpt_2D_module.fpp @@ -12,6 +12,7 @@ module ply_legFpt_2D_module use ply_legFpt_module, only: ply_legFpt_type, & & ply_legToPnt, & & ply_pntToLeg, & + & ply_legFpt_bu_type, & & assignment(=) implicit none @@ -37,10 +38,11 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_legToPnt_2D_singVar( fpt, legCoeffs, pntVal ) + subroutine ply_legToPnt_2D_singVar( fpt, legCoeffs, pntVal, bu ) ! --------------------------------------------------------------------- ! !> The FPT parameters. type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The Legendre coefficients to convert to point values (Chebyshev nodes). !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying FPT algorithm. So, when @@ -54,8 +56,6 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iAlph, nIndeps) - striplen = fpt%legToChebParams%striplen n = fpt%legToChebParams%n @@ -70,7 +70,7 @@ contains ! 1 4 7 ! 2 5 8 ! 3 6 9 - !$OMP DO + yStripLoop: do iStrip = 1, n, striplen ! iAlph is the index of the first element in a line for the transformation ! in y-direction. @@ -84,16 +84,15 @@ contains call ply_legToPnt( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = alph, & - & pntVal = gam ) + & pntVal = gam, & + & bu = bu ) ! Write gam to pntVal array pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop - !$OMP END DO ! x-direction - !$OMP DO xStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) !ztrafo @@ -105,14 +104,13 @@ contains call ply_legToPnt( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = alph, & - & pntVal = gam ) + & pntVal = gam, & + & bu = bu ) pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_legToPnt_2D_singVar ! ************************************************************************ ! @@ -121,10 +119,11 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_legToPnt_2D_multVar( fpt, legCoeffs, pntVal, nVars ) + subroutine ply_legToPnt_2D_multVar( fpt, legCoeffs, pntVal, nVars, bu ) ! --------------------------------------------------------------------- ! !> The FPT parameters. type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The Legendre coefficients to convert to point values (Chebyshev nodes). !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying FPT algorithm. So, when @@ -140,7 +139,7 @@ contains ! --------------------------------------------------------------------- ! do iVar = 1, nVars - call ply_legToPnt_2D(fpt, legCoeffs(:,iVar), pntVal(:,iVar)) + call ply_legToPnt_2D(fpt, legCoeffs(:,iVar), pntVal(:,iVar), bu) end do end subroutine ply_legToPnt_2D_multVar @@ -150,10 +149,11 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_pntToLeg_2D_singVar( fpt, pntVal, legCoeffs ) + subroutine ply_pntToLeg_2D_singVar( fpt, pntVal, legCoeffs, bu ) ! --------------------------------------------------------------------- ! !> Parameters of the Fast Polynomial transformation. type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The point values to transform to 2D modal Legendre expansion. !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying DCT algorithm. So, when @@ -167,7 +167,6 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(nIndeps, iStrip, iAlph) striplen = fpt%chebToLegParams%striplen n = fpt%legToChebParams%n @@ -176,7 +175,6 @@ contains allocate(alph(min(striplen, n)*n)) allocate(gam(min(striplen, n)*n)) - !$OMP DO yStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !y_Trafo alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) @@ -188,16 +186,16 @@ contains call ply_pntToLeg( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = gam, & - & pntVal = alph ) + & pntVal = alph, & + & bu = bu ) ! temp -> pntVal (stride-1 writing) legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop ! iStrip - !$OMP END DO + ! x-direction - !$OMP DO xStripLoop: do iStrip = 1,n,striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !ztrafo @@ -210,14 +208,13 @@ contains call ply_pntToLeg( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = gam, & - & pntVal = alph ) + & pntVal = alph, & + & bu = bu ) legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_pntToLeg_2D_singVar ! ************************************************************************ ! @@ -226,10 +223,11 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_pntToLeg_2D_multVar( fpt, pntVal, legCoeffs, nVars ) + subroutine ply_pntToLeg_2D_multVar( fpt, pntVal, legCoeffs, nVars, bu ) ! --------------------------------------------------------------------- ! !> Parameters of the Fast Polynomial transformation. type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The point values to transform to 2D modal Legendre expansion. !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying DCT algorithm. So, when @@ -245,7 +243,7 @@ contains ! --------------------------------------------------------------------- ! do iVar = 1, nVars - call ply_pntToLeg_2D(fpt, pntVal(:,iVar), legCoeffs(:,iVar)) + call ply_pntToLeg_2D(fpt, pntVal(:,iVar), legCoeffs(:,iVar), bu) end do end subroutine ply_pntToLeg_2D_multVar diff --git a/source/fpt/ply_legFpt_3D_module.fpp b/source/fpt/ply_legFpt_3D_module.fpp index 4acfeef..79daf4a 100644 --- a/source/fpt/ply_legFpt_3D_module.fpp +++ b/source/fpt/ply_legFpt_3D_module.fpp @@ -8,6 +8,7 @@ module ply_legFpt_3D_module use env_module, only: rk use ply_legFpt_module, only: ply_legFpt_type, & & ply_legToPnt, & + & ply_legFpt_bu_type, & & ply_PntToLeg implicit none @@ -31,10 +32,11 @@ contains ! ************************************************************************ ! - subroutine ply_legToPnt_3D_singvar( fpt, legCoeffs, pntVal ) + subroutine ply_legToPnt_3D_singvar( fpt, legCoeffs, pntVal, bu ) ! --------------------------------------------------------------------- ! !> The FPT parameters. type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The Legendre coefficients to convert to point values (Chebyshev nodes). !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying FPT algorithm. So, when @@ -115,7 +117,8 @@ contains call ply_legToPnt( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = alph, & - & pntVal = gam ) + & pntVal = gam, & + & bu = bu ) pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) @@ -134,7 +137,8 @@ contains call ply_legToPnt( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = alph, & - & pntVal = gam ) + & pntVal = gam, & + & bu = bu ) legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) @@ -152,7 +156,8 @@ contains call ply_legToPnt( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = alph, & - & pntVal = gam ) + & pntVal = gam, & + & bu = bu ) pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) @@ -167,7 +172,7 @@ contains !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. !!VK: no multivar fashion of this routine is used anymore - subroutine ply_legToPnt_3D_multVar( fpt, legCoeffs, pntVal, nVars ) + subroutine ply_legToPnt_3D_multVar( fpt, legCoeffs, pntVal, nVars, bu ) ! --------------------------------------------------------------------- ! !> The Legendre coefficients to convert to point values (Chebyshev nodes). !! \attention Although this array serves as input only, it is modified @@ -176,6 +181,7 @@ contains !! be modified. real(kind=rk), intent(inout) :: legCoeffs(:,:) type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu real(kind=rk), intent(inout) :: pntVal(:,:) integer, intent(in) :: nVars ! --------------------------------------------------------------------- ! @@ -183,7 +189,7 @@ contains ! --------------------------------------------------------------------- ! do iVar = 1, nVars - call ply_legToPnt_3D( fpt, legCoeffs(:,iVar), pntVal(:,iVar) ) + call ply_legToPnt_3D( fpt, legCoeffs(:,iVar), pntVal(:,iVar), bu ) end do end subroutine ply_legToPnt_3D_multVar @@ -193,9 +199,10 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_pntToLeg_3D_multVar( fpt, pntVal, legCoeffs, nVars ) + subroutine ply_pntToLeg_3D_multVar( fpt, pntVal, legCoeffs, nVars, bu ) ! --------------------------------------------------------------------- ! type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The point values to transform to 3D modal Legendre expansion. !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying DCT algorithm. So, when @@ -209,7 +216,7 @@ contains ! --------------------------------------------------------------------- ! do iVar = 1, nVars - call ply_pntToLeg_3D( fpt, pntVal(:,iVar), legCoeffs(:,iVar) ) + call ply_pntToLeg_3D( fpt, pntVal(:,iVar), legCoeffs(:,iVar), bu ) end do end subroutine ply_pntToLeg_3D_multVar @@ -219,9 +226,10 @@ contains ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_pntToLeg_3D_singVar( fpt, pntVal, legCoeffs ) + subroutine ply_pntToLeg_3D_singVar( fpt, pntVal, legCoeffs, bu ) ! --------------------------------------------------------------------- ! type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> The point values to transform to 3D modal Legendre expansion. !! \attention Although this array serves as input only, it is modified !! inside of this routine by the underlying DCT algorithm. So, when @@ -268,7 +276,8 @@ contains call ply_pntToLeg( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = gam, & - & pntVal = alph ) + & pntVal = alph, & + & bu = bu ) legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) @@ -290,7 +299,8 @@ contains call ply_pntToLeg( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = gam, & - & pntVal = alph ) + & pntVal = alph, & + & bu = bu ) ! todo: fft on temp ! temp -> pntVal (stride-1 writing) @@ -310,7 +320,8 @@ contains call ply_pntToLeg( fpt = fpt, & & nIndeps = nIndeps, & & legCoeffs = gam, & - & pntVal = alph ) + & pntVal = alph, & + & bu = bu ) ! todo: fft on temp ! temp -> pntVal (stride-1 writing) diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 8d0678f..8593a19 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -11,6 +11,7 @@ module ply_legFpt_module & ply_fpt_single, & & ply_legToCheb_param, & & ply_chebToLeg_param, & + & ply_bu_type, & & assignment(=) use fftw_wrap @@ -47,11 +48,18 @@ module ply_legFpt_module logical :: use_lobatto_points end type ply_legFpt_type + type ply_legFpt_bu_type + type(ply_bu_type) :: legToCheb_bu + type(ply_bu_type) :: chebToLeg_bu + end type ply_legFpt_bu_type + interface assignment(=) module procedure Copy_fpt + module procedure Copy_fpt_bu end interface public :: ply_legFpt_type, ply_init_legFpt, ply_legToPnt, ply_pntToLeg + public :: ply_legFpt_bu_type public :: assignment(=) @@ -77,18 +85,31 @@ end subroutine Copy_fpt ! ************************************************************************ ! + ! ************************************************************************ ! + subroutine Copy_fpt_bu(left, right) + ! -------------------------------------------------------------------- ! + type(ply_legFpt_bu_type), intent(out) :: left + type(ply_legFpt_bu_type), intent(in) :: right + ! -------------------------------------------------------------------- ! + left%legToCheb_bu = right%legToCheb_bu + left%chebToLeg_bu = right%chebToLeg_bu + end subroutine Copy_fpt_bu + ! ************************************************************************ ! + + ! ************************************************************************ ! !> Subroutine to initialize the fast polynomial transformation !! for Legendre expansion. subroutine ply_init_legFpt( maxPolyDegree, nIndeps, fpt, blocksize, & & approx_terms, striplen, lobattoPoints, & - & subblockingWidth, fft_flags ) + & subblockingWidth, fft_flags, bu ) ! -------------------------------------------------------------------- ! integer, intent(in) :: maxPolyDegree !> Number of independent values that can be computed simultaneously. integer, intent(in) :: nIndeps type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu !> Smallest block that is approximated by approx_terms coefficients. !! @@ -158,7 +179,8 @@ subroutine ply_init_legFpt( maxPolyDegree, nIndeps, fpt, blocksize, & & blocksize = blocksize, & & approx_terms = approx_terms, & & striplen = maxstriplen, & - & subblockingWidth = subblockingWidth ) + & subblockingWidth = subblockingWidth, & + & bu = bu%legToCheb_bu ) ! Init the fast Chebyshev to Legendre transformation. call ply_fpt_init( n = maxPolyDegree+1, & @@ -167,7 +189,8 @@ subroutine ply_init_legFpt( maxPolyDegree, nIndeps, fpt, blocksize, & & blocksize = blocksize, & & approx_terms = approx_terms, & & striplen = maxstriplen, & - & subblockingWidth = subblockingWidth ) + & subblockingWidth = subblockingWidth, & + & bu = bu%chebToLeg_bu ) ! Create the buffers for the intermediate arrays n = fpt%legToChebParams%n @@ -215,10 +238,11 @@ end subroutine ply_init_legFpt ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) + subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps, bu ) ! -------------------------------------------------------------------- ! real(kind=rk), intent(inout) :: legCoeffs(:) type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu real(kind=rk), intent(inout) :: pntVal(:) integer, intent(in) :: nIndeps ! -------------------------------------------------------------------- ! @@ -227,7 +251,7 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb, bu) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_points) then @@ -236,7 +260,8 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & - & params = fpt%legToChebParams ) + & params = fpt%legToChebParams, & + & bu = bu%legToCheb_bu ) ! Normalize the coefficients of the Chebyshev polynomials due ! to the unnormalized version of DCT in the FFTW. @@ -255,7 +280,8 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & - & params = fpt%legToChebParams ) + & params = fpt%legToChebParams, & + & bu = bu%legToCheb_bu ) ! Normalize the coefficients of the Chebyshev polynomials due ! to the unnormalized version of DCT in the FFTW. @@ -277,9 +303,10 @@ end subroutine ply_legToPnt ! ************************************************************************ ! !> Subroutine to transform Legendre expansion to point values !! at Chebyshev nodes. - subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) + subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps, bu ) ! -------------------------------------------------------------------- ! type(ply_legFpt_type), intent(inout) :: fpt + type(ply_legFpt_bu_type), intent(inout) :: bu real(kind=rk), intent(inout) :: pntVal(:) real(kind=rk), intent(inout) :: legCoeffs(:) integer, intent(in) :: nIndeps @@ -290,7 +317,7 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb, normFactor) + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb, normFactor, bu) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_Points) then @@ -309,7 +336,8 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) call ply_fpt_single( gam = legCoeffs(iDof:iDof+n-1), & & alph = cheb, & - & params = fpt%ChebToLegParams ) + & params = fpt%ChebToLegParams, & + & bu = bu%chebToLeg_bu ) end do !$OMP END DO @@ -329,7 +357,8 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) call ply_fpt_single( gam = legCoeffs(iDof:iDof+n-1), & & alph = cheb, & - & params = fpt%ChebToLegParams ) + & params = fpt%ChebToLegParams, & + & bu = bu%chebToLeg_bu ) end do !$OMP END DO diff --git a/source/fpt/ply_polyBaseExc_module.fpp b/source/fpt/ply_polyBaseExc_module.fpp index 496a912..c967880 100644 --- a/source/fpt/ply_polyBaseExc_module.fpp +++ b/source/fpt/ply_polyBaseExc_module.fpp @@ -81,9 +81,15 @@ module ply_polyBaseExc_module type(ply_matrixExpCoeffOddEven_type), allocatable :: col(:) end type ply_subvector_type - type ply_trafo_params_type + type ply_bu_type !> Lagrange polynomials evaluated at the Chebyshev points on [0,+1]. - type(ply_sub_vec), allocatable :: u(:,:) + type(ply_sub_vec), allocatable :: u(:,:) + + !> Conversion data structure used for fpt. + type(ply_subvector_type), allocatable :: b(:) + end type ply_bu_type + + type ply_trafo_params_type !> The array to store the diagonals of the matrix in. !! @@ -138,12 +144,11 @@ module ply_polyBaseExc_module !> The transformation type integer :: trafo - !> Conversion data structure used for fpt. - type(ply_subvector_type), allocatable :: b(:) end type ply_trafo_params_type interface assignment(=) module procedure Copy_trafo_params + module procedure Copy_bu end interface integer, parameter :: ply_legToCheb_param = 1 @@ -153,7 +158,7 @@ module ply_polyBaseExc_module public :: ply_fpt_exec_striped public :: ply_fpt_exec public :: ply_fpt_single - public :: ply_trafo_params_type + public :: ply_trafo_params_type, ply_bu_type public :: ply_legToCheb_param, ply_chebToLeg_param public :: ply_lambda public :: assignment(=) @@ -198,22 +203,38 @@ contains left%diag = right%diag allocate( left%adapter(right%s, (right%s+mod(right%s,2)/2), right%nBlocks) ) left%adapter = right%adapter - if(allocated(left%u))then - deallocate(left%u) - end if - allocate(left%u(1:right%h,0:right%k-1)) - left%u = right%u +! if(allocated(left%u))then +! deallocate(left%u) +! end if +! allocate(left%u(1:right%h,0:right%k-1)) +! left%u = right%u end subroutine Copy_trafo_params ! ************************************************************************ ! + ! ************************************************************************ ! + subroutine Copy_bu( left, right ) + ! -------------------------------------------------------------------- ! + type(ply_bu_type), intent(out) :: left + type(ply_bu_type), intent(in) :: right + ! --------------------------------------------------------------------- ! + if(allocated(left%u))then + deallocate(left%u) + end if + left%u = right%u + end subroutine Copy_bu + ! ************************************************************************ ! + + + ! ************************************************************************ ! subroutine ply_fpt_init( n, params, trafo, blocksize, approx_terms, & - & striplen, subblockingWidth ) + & striplen, subblockingWidth, bu ) ! -------------------------------------------------------------------- ! integer, intent(in) :: n type(ply_trafo_params_type), intent(inout) :: params + type(ply_bu_type), intent(inout) :: bu integer, intent(in) :: trafo !> Smallest block that is approximated by approx_terms coefficients. @@ -571,8 +592,8 @@ contains end if - allocate(params%u(0:h,0:k-1)) - params%u(0:h,0:k-1) = u(0:h,0:k-1) + allocate(bu%u(0:h,0:k-1)) + bu%u(0:h,0:k-1) = u(0:h,0:k-1) allocate(params%sub(0:h)) params%sub(0:h) = sub(0:h) params%n = n @@ -581,12 +602,12 @@ contains params%h = h ! Allocate the coefficients array for conversion - allocate(params%b(0:params%h)) + allocate(bu%b(0:params%h)) do l=0, h nRows = (params%nBlocks - 1) / (2**l) - 1 - allocate(params%b(l)%col(2:nRows+1)) + allocate(bu%b(l)%col(2:nRows+1)) do j=2,nRows+1 - allocate(params%b(l)%col(j)%coeff(0:k-1,0:1)) + allocate(bu%b(l)%col(j)%coeff(0:k-1,0:1)) end do end do @@ -912,7 +933,7 @@ contains !> Convert strip of coefficients of a modal representation in terms of !! Legendre polynomials to modal coefficients in terms of Chebyshev !! polynomials. - subroutine ply_fpt_exec( alph, gam, params, nIndeps ) + subroutine ply_fpt_exec( alph, gam, params, nIndeps, bu ) ! -------------------------------------------------------------------- ! !> Number of values that can be computed independently. integer, intent(in) :: nIndeps @@ -930,6 +951,7 @@ contains !> The parameters of the fast polynomial transformation. type(ply_trafo_params_type), intent(inout) :: params + type(ply_bu_type), intent(inout) :: bu ! -------------------------------------------------------------------- ! integer :: j, r, i, l, k, h, n, s, m integer :: iFun, indep @@ -965,13 +987,13 @@ contains row_rem = mod(n-remainder, rowsize) + remainder + iFun blockColLoop: do j = 2, nRows+1, 1+mod(nRows,2) do r = 0, k-1 - params%b(l)%col(j)%coeff(r,0) = 0.0_rk - params%b(l)%col(j)%coeff(r,1) = 0.0_rk + bu%b(l)%col(j)%coeff(r,0) = 0.0_rk + bu%b(l)%col(j)%coeff(r,1) = 0.0_rk do m = 0, rowsize-1 odd = mod(row_rem + m + (j-1)*rowsize,2) - params%b(l)%col(j)%coeff(r,odd) & - & = params%b(l)%col(j)%coeff(r,odd) & - & + params%u(l,r)%dat(m) & + bu%b(l)%col(j)%coeff(r,odd) & + & = bu%b(l)%col(j)%coeff(r,odd) & + & + bu%u(l,r)%dat(m) & & * alph(row_rem + m + (j-1)*rowsize + 1) end do end do @@ -988,7 +1010,7 @@ contains gam(iVal) = gam(iVal) & & + params%sub(l)%subRow(i)%subCol(j)%rowDat(m)& & %coeff(r) & - & * params%b(l)%col(j)%coeff(r,odd) + & * bu%b(l)%col(j)%coeff(r,odd) end do ! r end do ! m end do ! j @@ -1048,10 +1070,11 @@ contains !> Convert strip of coefficients of a modal representation in terms of !! Legendre polynomials to modal coefficients in terms of Chebyshev !! polynomials. - subroutine ply_fpt_single( alph, gam, params) + subroutine ply_fpt_single( alph, gam, params, bu ) ! -------------------------------------------------------------------- ! !> The parameters of the fast polynomial transformation. type(ply_trafo_params_type), intent(inout) :: params + type(ply_bu_type), intent(inout) :: bu !> Modal coefficients of the Legendre expansion. !! Size has to be: params%n @@ -1093,13 +1116,13 @@ contains row_rem = mod(n-remainder, rowsize) + remainder blockColLoop: do j = 2, nRows+1, 1+mod(nRows,2) do r = 0, k-1 - params%b(l)%col(j)%coeff(r,0) = 0.0_rk - params%b(l)%col(j)%coeff(r,1) = 0.0_rk + bu%b(l)%col(j)%coeff(r,0) = 0.0_rk + bu%b(l)%col(j)%coeff(r,1) = 0.0_rk do m = 0, rowsize-1 odd = mod(row_rem + m + (j-1)*rowsize,2) - params%b(l)%col(j)%coeff(r,odd) & - & = params%b(l)%col(j)%coeff(r,odd) & - & + params%u(l,r)%dat(m) & + bu%b(l)%col(j)%coeff(r,odd) & + & = bu%b(l)%col(j)%coeff(r,odd) & + & + bu%u(l,r)%dat(m) & & * alph(row_rem + m + (j-1)*rowsize + 1) end do end do @@ -1116,7 +1139,7 @@ contains gam(iVal) = gam(iVal) & & + params%sub(l)%subRow(i)%subCol(j)%rowDat(m)& & %coeff(r) & - & * params%b(l)%col(j)%coeff(r,odd) + & * bu%b(l)%col(j)%coeff(r,odd) end do ! r end do ! m end do ! j @@ -1173,13 +1196,14 @@ contains ! ************************************************************************ ! !> Convert coefficients of a modal representation in terms of Legendre !! polynomials to modal coefficients in terms of Chebyshev polynomials. - subroutine ply_fpt_exec_striped( nIndeps, alph, gam, params ) + subroutine ply_fpt_exec_striped( nIndeps, alph, gam, params, bu ) ! -------------------------------------------------------------------- ! !> Number of values that can be computed independently. integer, intent(in) :: nIndeps !> The parameters of the fast polynomial transformation. type(ply_trafo_params_type), intent(inout) :: params + type(ply_bu_type), intent(inout) :: bu !> Modal coefficients of the Legendre expansion. !! Size has to be: (1:params%n*indeps,nVars) @@ -1236,13 +1260,13 @@ contains blockColLoop: do j = 2, nRows+1, 1+mod(nRows,2) do r = 0, k-1 - params%b(l)%col(j)%coeff(r,0) = 0.0_rk - params%b(l)%col(j)%coeff(r,1) = 0.0_rk + bu%b(l)%col(j)%coeff(r,0) = 0.0_rk + bu%b(l)%col(j)%coeff(r,1) = 0.0_rk do m = 0, rowsize-1 odd = mod(row_rem + m + (j-1)*rowsize,2) - params%b(l)%col(j)%coeff(r,odd) & - & = params%b(l)%col(j)%coeff(r,odd) & - & + params%u(l,r)%dat(m) & + bu%b(l)%col(j)%coeff(r,odd) & + & = bu%b(l)%col(j)%coeff(r,odd) & + & + bu%u(l,r)%dat(m) & & * alph(row_rem + m + (j-1)*rowsize + 1) end do end do @@ -1259,7 +1283,7 @@ contains gam(iVal) = gam(iVal) & & + params%sub(l)%subRow(i)%subCol(j)%rowDat(m)& & %coeff(r) & - & * params%b(l)%col(j)%coeff(r,odd) + & * bu%b(l)%col(j)%coeff(r,odd) end do ! r end do ! m end do ! j diff --git a/source/ply_LegPolyProjection_module.f90 b/source/ply_LegPolyProjection_module.f90 index a36db61..e293d5b 100644 --- a/source/ply_LegPolyProjection_module.f90 +++ b/source/ply_LegPolyProjection_module.f90 @@ -140,8 +140,6 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & integer :: nChildDofs, oneDof ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nDofs, nComponents, nChilds, nChildDofs, workDat) - if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' & & // 'only implemented for Q-Legendre-Polynomials' ) @@ -151,7 +149,6 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) - !$OMP DO varLoop: do iVar=1,nVars nDofs = varDofs(iVar) nComponents = varcomps(iVar) @@ -225,9 +222,7 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & deallocate(projection_oneDof%projCoeff) end do varLoop - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_QPolyProjection ! ************************************************************************ ! @@ -270,8 +265,6 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & real(kind=rk) :: dimexp ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, xShift, yShift, zShift) - select case(dofType) case(ply_QLegendrePoly_prp) allocate(projection%projCoeff(nDofs, nChildDofs, nChilds)) @@ -284,7 +277,6 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & projCoeffOneDim = ply_QLegOneDimCoeff( nint(nDofs**dimexp), & & nint(nChildDofs**dimexp) ) - !$OMP DO ! Loop over the children of this element childLoop: do iChild = 1, nChilds @@ -337,14 +329,14 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & end do childDofLoop end do parentDofLoop end do childLoop - !$OMP END DO + case default call tem_abort( 'ERROR in ply_initProjCoeff: initialization of ' & & // 'projection coefficients for subsampling is implemented only ' & & // 'for Q-Legendre polynomials' ) end select - !$OMP END PARALLEL + deallocate(projCoeffOneDim) end subroutine ply_initQLegProjCoeff ! ************************************************************************ ! @@ -590,8 +582,6 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iElem, lowElemIndex, upElemIndex, iChild, lowChildIndex, upChildIndex) - nChilds = 2**ndims nElems = tree%nElems nElemsToRefine = count(new_refine_tree) @@ -608,7 +598,6 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then - !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -700,11 +689,9 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & end if end do elementLoop - !$OMP END DO else - !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then allocate(childData(nChildDofs*nChilds*nComponents)) @@ -755,10 +742,8 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop - !$OMP END DO end if - !$OMP END PARALLEL end subroutine ply_subsampleData ! ************************************************************************ ! @@ -797,11 +782,9 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & integer :: childDof_pos, parentDof_pos real(kind=rk) :: projCoeff ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, iComp, projCoeff, childDof_pos, parentDof_pos) childData(:) = 0.0_rk - !$OMP DO childLoop: do iChild = 1, nChilds parentDofLoop: do iParentDof = 1, nParentDofs childDofLoop: do iChildDof = 1, nChildDofs @@ -821,9 +804,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & end do childDofLoop end do parentDofLoop end do childLoop - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_projDataToChild ! ************************************************************************ ! diff --git a/source/ply_fxt_module.f90 b/source/ply_fxt_module.f90 index 616c31c..b01a393 100644 --- a/source/ply_fxt_module.f90 +++ b/source/ply_fxt_module.f90 @@ -211,12 +211,10 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim - !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -224,19 +222,15 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = modal_data(lb:ub), & & nodal_data = nodal_data(lb:ub) ) end do - !$OMP END DO - !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = nodal_data(lb:msq:oversamp_degree+1), & & nodal_data = modal_data(lb:msq:oversamp_degree+1) ) end do - !$OMP END DO nodal_data = modal_data - !$OMP END PARALLEL end subroutine ply_fxt_m2n_2D ! ************************************************************************ ! @@ -258,8 +252,6 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) real(kind=rk), pointer :: tmp_in(:), tmp_out(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, tmp_in, tmp_out, iColumn) - nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 @@ -268,7 +260,6 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) tmp_in = -42 tmp_out = -42 - !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -279,9 +270,8 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & nodal_data = tmp_out ) nodal_data(lb:ub) = tmp_out end do - !$OMP END DO - !$OMP DO + ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1 ) / nModesPerDim ) * msq & @@ -292,20 +282,17 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = nodal_data(lb:ub:nModesPerDim), & & nodal_data = modal_data(lb:ub:nModesPerDim) ) end do - !$OMP END DO + ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs - !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = modal_data(lb:ub:msq), & & nodal_data = nodal_data(lb:ub:msq) ) end do - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_fxt_m2n_3D ! ************************************************************************ ! @@ -350,12 +337,9 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) - nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim - !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -363,20 +347,16 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do - !$OMP END DO - !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = modal_data(lb:msq:oversamp_degree+1), & & modal_data = nodal_data(lb:msq:oversamp_degree+1) ) end do - !$OMP END DO modal_data = nodal_data - !$OMP END PARALLEL end subroutine ply_fxt_n2m_2D ! ************************************************************************ ! @@ -396,13 +376,10 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) integer :: ub, lb, iLine, iColumn, nModesPerDim, msq, ntotalDofs ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, iColumn, ub, lb) - nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 - !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -411,9 +388,8 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do - !$OMP END DO - !$OMP DO + ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1) / nModesPerDim ) * msq & @@ -424,20 +400,16 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = modal_data(lb:ub:nModesPerDim), & & modal_data = nodal_data(lb:ub:nModesPerDim) ) end do - !$OMP END DO + ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs - !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = nodal_data(lb:ub:msq), & & modal_data = modal_data(lb:ub:msq) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_fxt_n2m_3D ! ************************************************************************ ! diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index 8e1ce65..f9547ef 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -241,7 +241,7 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iRow, iCell, iCol) + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) if (nDofs > 1) then !$OMP DO diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index 4034bc0..5fa6e07 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -45,8 +45,7 @@ contains integer :: dofPos, dofPosPrev, dofPos2Prev integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3) ! -------------------------------------------------------------------- ! - - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, dofPosPrev, iDeg2, iDeg3, iVar,leg, dofPos, iDeg) + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, iDeg2, iDeg3, iDeg, iVar, leg, dofPos) if (present(dirVec)) then DV = dirvec @@ -121,8 +120,8 @@ contains end do !$OMP END DO - !$OMP DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -135,6 +134,7 @@ contains end do !$OMP END DO + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -152,7 +152,6 @@ contains !! end do !! end do !! end do - !$OMP END PARALLEL end subroutine calcDiff_leg_normal @@ -185,7 +184,6 @@ contains integer :: dofPos, dofPosPrev, dofPos2Prev integer :: leg(2), iDeg1, iDeg2, DV(2) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, iDeg2, leg, dofPos, dofPosPrev, iVar) if (present(dirVec)) then @@ -257,8 +255,9 @@ contains end do !$OMP END DO - !$OMP DO + ! Scale the results due to the Jacobians of the mappings + !$OMP DO do iDeg1 = 1, mPd+1 do iDeg2 = 1, mPd+1 leg = (/iDeg1, iDeg2/) @@ -270,9 +269,9 @@ contains end do end do !$OMP END DO - !$OMP END PARALLEL + end subroutine calcDiff_leg_2d_normal ! ************************************************************************ ! @@ -376,7 +375,6 @@ contains integer :: iDegX integer :: dofPos, dofPosPrev, dofPos2Prev ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(dofPos, iDegX, dofPosPrev, dofPos2Prev) ! Build the derivative in x direction @@ -405,9 +403,9 @@ contains & * (2.0_rk*iDegX - 1.0_rk) end do !$OMP END DO - !$OMP END PARALLEL + end subroutine calcDiff_leg_1d ! ************************************************************************ ! diff --git a/source/ply_legser_module.f90 b/source/ply_legser_module.f90 index dc3ad15..b75fd3c 100644 --- a/source/ply_legser_module.f90 +++ b/source/ply_legser_module.f90 @@ -41,23 +41,19 @@ subroutine legser(A, B, n) real(kind=rk) :: ak, al, bb, c, d integer :: k, l, ll ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, d, c, l, ll, al, bb, ak) ak = 0.0_rk ! Calculation of the first Legendre coefficient b(1) = 0.5_rk * a(1) - !$OMP DO do k=3,n,2 ak = ak + 2.0_rk b(1) = b(1) - a(k)/(ak*ak - 1.0_rk) end do - !$OMP END DO c = 2.0_rk / 3.0_rk al = 0.0_rk ! Start main loop (remaining Legendre coefficients) - !$OMP DO do l=2,n ! Calculation of the Lth coefficient ll = l+2 @@ -77,9 +73,7 @@ subroutine legser(A, B, n) & / ( (al+al+3.0_rk)*(al+al+2.0_rk) ) b(l) = (al+0.5_rk)*bb end do - !$OMP END DO - !$OMP END PARALLEL end subroutine legser ! ************************************************************************ ! diff --git a/source/ply_modg_basis_module.fpp b/source/ply_modg_basis_module.fpp index acd25c3..c6be9ef 100644 --- a/source/ply_modg_basis_module.fpp +++ b/source/ply_modg_basis_module.fpp @@ -159,8 +159,6 @@ contains integer :: iFunc, jFunc ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(jFunc, iFunc) - allocate(GaussPoints(nPoints)) allocate(GaussPoints_left(nPoints)) allocate(GaussPoints_right(nPoints)) @@ -200,7 +198,6 @@ contains allocate( integral%anz_anzShift(1:nFunc, 1:nFunc, 2)) - !$OMP DO !loop over anzatz functions do jFunc = 1, nFunc do iFunc = 1, nFunc @@ -222,9 +219,7 @@ contains end do end do - !$OMP END DO - !$OMP END PARALLEL end subroutine init_modg_covolumeCoeffs ! ************************************************************************ ! @@ -424,8 +419,6 @@ contains real(kind=rk) :: n_q ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iAns, iAnsX, iAnsY, iAnsZ, n_q, ansPos) - ! allocate the output array select case(basisType) case(Q_space) @@ -451,7 +444,7 @@ contains polyValX(2,:) = coords(:,1) polyValY(2,:) = coords(:,2) polyValZ(2,:) = coords(:,3) - !$OMP DO + ! ... higher order polynomials are build recursively do iAns = 3, maxPolyDegree+1 n_q = 1.0_rk / real(iAns-1,kind=rk) @@ -477,13 +470,11 @@ contains & * polyValZ(iAns-2,:) ) & & *n_q end do - !$OMP END DO end if ! Now, build the complete point value. select case(basisType) case(Q_space) - !$OMP DO do iAnsX = 1, maxPolyDegree+1 do iAnsY = 1, maxPolyDegree+1 do iAnsZ = 1, maxPolyDegree+1 @@ -495,7 +486,7 @@ contains end do end do end do - !$OMP END DO + case(P_space) iAnsX = 1 iAnsY = 1 @@ -508,10 +499,9 @@ contains & * polyValZ(iAnsZ,:) ?? copy :: nextModgCoeffPTens(iAnsX, iAnsY, iAnsZ) end do - !$OMP END DO + end select - !$OMP END PARALLEL end subroutine evalLegendreTensPoly ! ************************************************************************ ! diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index aa8780b..ea26745 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -155,7 +155,9 @@ contains nScalars = size(modalCoeffs,2) maxorders = 3*(poly_proj%min_degree + 1)-2 + !$OMP WORKSHARE modalCoeffs = 0.0_rk + !$OMP END WORKSHARE if (poly_proj%basisType == Q_Space) then posQ: if (present(ensure_positivity)) then @@ -163,6 +165,7 @@ contains varQ: do iVar=1,nScalars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP SINGLE do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -178,9 +181,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varQ do iVar=1,nScalars + !$OMP DO do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -193,8 +198,10 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END DO end do else posQ + !$OMP DO do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -206,6 +213,7 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end do end do + !$OMP END DO end if posQ else !P_Space @@ -218,6 +226,7 @@ contains iDegY = 1 iDegZ = 1 ordersum = 0.0_rk + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) iOrd = iDegX+iDegY+iDegZ-2 @@ -232,12 +241,14 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varP do iVar=1,nScalars iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs iOrd = iDegX+iDegY+iDegZ-2 if (iOrd > ord_lim) EXIT @@ -248,11 +259,13 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end do else posP iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -261,6 +274,7 @@ contains modalCoeffs(dofOverSamp,:) = state(dof,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end if posP end if @@ -302,6 +316,7 @@ contains nScalars = size(modalCoeffs,2) if (poly_proj%basisType == Q_Space) then + !$OMP DO do iVar=1,nScalars do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 @@ -313,12 +328,14 @@ contains state(dof,iVar) = modalCoeffs(dofOverSamp,iVar) end do end do + !$OMP END DO else !P_Space iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP SINGLE do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -327,6 +344,7 @@ contains state(dof,:) = modalCoeffs(dofOverSamp,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END SINGLE end if end subroutine ply_convertFromoversample_3d @@ -382,7 +400,9 @@ contains else nPVars = size(state,2) end if + !$OMP WORKSHARE maxorders = 2*(poly_proj%min_degree + 1)-1 + !$OMP END WORKSHARE ! Information for the oversampling loop oversamp_degree = poly_proj%oversamp_degree @@ -398,6 +418,7 @@ contains varQ: do iVar=1,nPVars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP SINGLE do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 @@ -412,9 +433,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varQ do iVar=1,nPVars + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 @@ -424,14 +447,17 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END DO end do else posQ + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 dofOverSamp = 1 + (iDegX-1) + (iDegY-1)*(oversamp_degree+1) modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) end do + !$OMP END DO end if posQ else !P_Space @@ -440,6 +466,7 @@ contains ord_lim = maxorders varP: do iVar=1,nPVars if (ensure_positivity(iVar)) then + !$OMP SINGLE iDegX = 1 iDegY = 1 ordersum = 0.0_rk @@ -457,9 +484,11 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do varP do iVar=1,nPVars + !$OMP SINGLE iDegX = 1 iDegY = 1 do idof = 1, poly_proj%body_2d%min_dofs @@ -470,8 +499,10 @@ contains modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end do else posP + !$OMP SINGLE iDegX = 1 iDegY = 1 do idof = 1, poly_proj%body_2d%min_dofs @@ -480,6 +511,7 @@ contains modalCoeffs(dofOverSamp,1:nPVars) = state(dof,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end if posP end if @@ -530,15 +562,18 @@ contains if (poly_proj%basisType == Q_Space) then + !$OMP DO do dof = 1, mpd1_square iDegX = mod(dof-1,mpd1)+1 iDegY = (dof-1)/mpd1+1 dofOverSamp = 1 + (iDegX-1) + (iDegY-1)*(oversamp_degree+1) state(dof,1:nPVars) = modalCoeffs(dofOverSamp,1:nPVars) end do + !$OMP END DO else !P_Space + !$OMP SINGLE iDegX = 1 iDegY = 1 iDegZ = 0 ! not used in posOfModgCoeffPTens_2D, nextModgCoeffPTens @@ -548,6 +583,7 @@ contains state(dof,1:nPVars) = modalCoeffs(dofOverSamp,1:nPVars) ?? copy :: nextModgCoeffPTens2D(iDegX, iDegY) end do + !$OMP END SINGLE end if @@ -597,13 +633,16 @@ contains nPVars = (poly_proj%maxPolyDegree+1)*nVars ! Initialize oversampled space correct to 0 + !$OMP WORKSHARE ModalCoeffs(:,:) = 0.0_rk + !$OMP END WORKSHARE if (present(ensure_positivity)) then ord_lim = poly_proj%min_degree+1 do iVar = 1,nVars if (ensure_positivity(iVar)) then varSum = 0.0_rk + !$OMP SINGLE do iPoint=2,ord_lim varSum = varSum + abs(state(iPoint,iVar)) if (varSum >= state(1,iVar)) then @@ -611,19 +650,24 @@ contains EXIT end if end do + !$OMP END SINGLE end if end do do iVar=1,nVars + !$OMP DO do iPoint=1,ord_lim ModalCoeffs(iPoint,iVar) = state(iPoint,iVar) end do + !$OMP END DO end do else + !$OMP DO do iVP = 1,nPVars iVar = (iVP-1)/(poly_proj%min_degree+1) + 1 iPoint = iVP - (iVar-1)*(poly_proj%min_degree+1) ModalCoeffs(iPoint,iVar) = state(iPoint,iVar) end do + !$OMP END DO end if end subroutine ply_convert2oversample_1d @@ -659,11 +703,13 @@ contains nPVars = (poly_proj%maxPolyDegree+1)*size(state,2) end if + !$OMP DO do iVP = 1,nPVars iVar = (iVP-1)/(poly_proj%min_degree+1) + 1 iPoint = iVP - (iVar-1)*(poly_proj%min_degree+1) state(iPoint,iVar) = modalCoeffs(iPoint,iVar) end do + !$OMP END DO end subroutine ply_convertFromoversample_1d ! ************************************************************************ ! diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index 65658b4..fde5519 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -17,6 +17,7 @@ module ply_poly_project_module & ply_prj_init_type use ply_LegFpt_module, only: ply_legFpt_type, & + & ply_legFpt_bu_type, & & ply_init_legFpt, & & ply_legToPnt, & & ply_PntToLeg, & @@ -58,6 +59,7 @@ module ply_poly_project_module !! of nonlinear equations. It is used if fpt is choses as projection !! method in the lua file type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu !> The Legendre Polynomial type for the Fast Orthogonal Function !! Transform via fxtpack. It is used if 'fxt' is chosen as projection !! method in the lua file @@ -195,6 +197,7 @@ contains !------------------------------------------------------------------------! left%fpt = right%fpt + left%bu = right%bu left%l2p = right%l2p left%fxt = right%fxt left%nodes = right%nodes @@ -376,7 +379,8 @@ contains & blocksize = proj_init%header%fpt_header%blocksize, & & approx_terms = proj_init%header%fpt_header%approx_terms, & & striplen = proj_init%header%fpt_header%striplen, & - & subblockingWidth = proj_init%header%fpt_header%subblockingWidth ) + & subblockingWidth = proj_init%header%fpt_header%subblockingWidth,& + & bu = me%body_1d%bu ) !> Initialization/Create of the volume quadrature nodes and the !! quadrature points on the face call init_cheb_nodes_1d( & @@ -394,7 +398,8 @@ contains & blocksize = proj_init%header%fpt_header%blocksize, & & approx_terms = proj_init%header%fpt_header%approx_terms, & & striplen = proj_init%header%fpt_header%striplen, & - & subblockingWidth = proj_init%header%fpt_header%subblockingWidth ) + & subblockingWidth = proj_init%header%fpt_header%subblockingWidth,& + & bu = me%body_2d%bu ) call init_cheb_nodes_2d( & & me = proj_init%header%fpt_header%nodes_header, & & nodes = me%body_2d%nodes, & @@ -411,7 +416,8 @@ contains & blocksize = proj_init%header%fpt_header%blocksize, & & approx_terms = proj_init%header%fpt_header%approx_terms, & & striplen = proj_init%header%fpt_header%striplen, & - & subblockingWidth = proj_init%header%fpt_header%subblockingWidth ) + & subblockingWidth = proj_init%header%fpt_header%subblockingWidth,& + & bu = me%body_3d%bu ) call init_cheb_nodes( & & me = proj_init%header%fpt_header%nodes_header, & & nodes = me%body_3d%nodes, & @@ -500,8 +506,6 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) - select case(trim(me%kind)) case ('l2p') @@ -511,29 +515,26 @@ contains ! additional summation select case(dim) case (1) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO + case (2) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO + case (3) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO + end select case ('fpt') @@ -543,56 +544,50 @@ contains call ply_LegToPnt_3D( fpt = me%body_3d%fpt, & & pntVal = nodal_data, & & legCoeffs = modal_data, & - & nVars = nVars ) + & nVars = nVars, & + & bu = me%body_3d%bu ) case (2) call ply_LegToPnt_2D( fpt = me%body_2d%fpt, & & pntVal = nodal_data, & & legCoeffs = modal_data, & - & nVars = nVars ) + & nVars = nVars, & + & bu = me%body_2d%bu ) case (1) - !$OMP DO do iVar = 1,nVars call ply_LegToPnt( fpt = me%body_1d%fpt, & & pntVal = nodal_data(:,iVar), & & legCoeffs = modal_data(:,iVar), & - & nIndeps = 1 ) + & nIndeps = 1, & + & bu = me%body_1d%bu ) end do - !$OMP END DO end select case ('fxt') select case (dim) case (3) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_3D( fxt = me%body_3d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_2D( fxt = me%body_2d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (1) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_1D( fxt = me%body_1d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar) ) end do - !$OMP END DO end select end select - !$OMP END PARALLEL end subroutine ply_poly_project_m2n_multivar !****************************************************************************! @@ -612,36 +607,28 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) - select case(trim(me%kind)) case ('l2p') select case (dim) case (1) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO case (3) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO end select case ('fpt') @@ -651,27 +638,27 @@ contains call ply_pntToLeg_3D( fpt = me%body_3d%fpt, & & nVars = nVars, & & pntVal = nodal_data, & - & legCoeffs = modal_data ) + & legCoeffs = modal_data, & + & bu = me%body_3d%bu ) case (2) call ply_pntToLeg_2D( fpt = me%body_2d%fpt, & & nVars = nVars, & & pntVal = nodal_data, & - & legCoeffs = modal_data ) + & legCoeffs = modal_data, & + & bu = me%body_2d%bu ) case (1) - !$OMP DO do iVar = 1,nVars call ply_pntToLeg( fpt = me%body_1d%fpt, & & nIndeps = 1, & & pntVal = nodal_data(:,iVar), & - & legCoeffs = modal_data(:,iVar) ) + & legCoeffs = modal_data(:,iVar), & + & bu = me%body_1d%bu ) end do - !$OMP END DO end select case ('fxt') select case (dim) case (3) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_3D( & & fxt = me%body_3d%fxt, & @@ -679,10 +666,8 @@ contains & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_2D( & & fxt = me%body_2d%fxt, & @@ -690,24 +675,20 @@ contains & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (1) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_1D( & & fxt = me%body_1d%fxt, & & nodal_data = nodal_data(:,iVar), & & modal_data = modal_data(:,iVar) ) end do - !$OMP END DO end select case default write(logUnit(1),*) 'ERROR in projection nodal to modal' end select - !$OMP END PARALLEL end subroutine ply_poly_project_n2m_multivar !***************************************************************************! diff --git a/source/ply_poly_transformation_module.f90 b/source/ply_poly_transformation_module.f90 index a6e7a1e..f457789 100644 --- a/source/ply_poly_transformation_module.f90 +++ b/source/ply_poly_transformation_module.f90 @@ -101,13 +101,11 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & integer :: nVars, nDofs, nComponents, nChildDofs integer :: iVar ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nComponents, nDofs, workData, nChildDofs) nVars = size(varDofs) allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) - !$OMP DO varLoop: do iVar = 1, nVars nComponents = varComps(iVar) nDofs = vardofs(iVar) @@ -147,9 +145,6 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & deallocate(workData) end do varLoop - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_Poly_Transformation @@ -199,8 +194,6 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: transform_matrix(:,:) real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iChild, iElem) - nChilds = 2**nDims @@ -227,7 +220,6 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then - !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -314,9 +306,7 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elementLoop - !$OMP END DO else - !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then ! Create lower and upper indices for all data of iElem in meshData. @@ -367,12 +357,10 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop - !$OMP END DO end if deallocate(transform_matrix) - !$OMP END PARALLEL end subroutine ply_subsampleData ! ************************************************************************ ! @@ -763,8 +751,6 @@ subroutine ply_transform_matrix(max_modes, v) integer :: m, orig real(kind=rk) :: shifting, scaling ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(orig, m) - ! transformation matrix looks like this: ! [1.0 -- -- shift=0.5 ] @@ -787,7 +773,6 @@ subroutine ply_transform_matrix(max_modes, v) v(2,2) = scaling if (max_modes > 2) then - !$OMP DO do orig = 3,max_modes v(1,orig) = ply_beta(orig-1) * v(1,orig-2) & & + ply_alpha(orig-1) * shifting * v(1,orig-1) & @@ -805,14 +790,12 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do - !$OMP END DO end if ! Due to the symmetry of the problem (the left subinterval has just ! the shifting with a changed sign), we can fill the other half of ! the matrix by copying the already computed values accordingly with ! a change in sign, as needed (alternatingly). - !$OMP DO do m = 1 , max_modes do orig = 1, m-1 if (mod((m+orig),2) /= 0) then @@ -822,10 +805,8 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do - !$OMP END DO end if - !$OMP END PARALLEL end subroutine ply_transform_matrix ! ************************************************************************ ! diff --git a/source/ply_sampling_module.fpp b/source/ply_sampling_module.fpp index d9651b3..f72f2b7 100644 --- a/source/ply_sampling_module.fpp +++ b/source/ply_sampling_module.fpp @@ -652,21 +652,17 @@ contains integer :: iElem integer :: nComps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem) nComps = fun%nComponents datlen = tree%nElems * nComps call c_f_pointer(fun%method_data, p) - !$OMP DO do iElem=1,n res(1+(iElem-1)*nComps:iElem*nComps) & & = p%dat(1+(elempos(iElem)-1)*nComps:elempos(iElem)*nComps) end do - !$OMP END DO - !$OMP END PARALLEL end subroutine get_sampled_element ! ************************************************************************ ! diff --git a/source/ply_sampling_varsys_module.f90 b/source/ply_sampling_varsys_module.f90 index 2473da5..aed8a40 100644 --- a/source/ply_sampling_varsys_module.f90 +++ b/source/ply_sampling_varsys_module.f90 @@ -83,7 +83,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & integer, allocatable :: elempos(:) real(kind=rk), allocatable :: elemdat(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, iComponent, iElem, iTotComp) nVars = trackInst%varmap%varPos%nVals @@ -115,14 +114,12 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & nComponents = varsys%method%val(varpos)%nComponents nDofs = (var_degree(iVar)+1)**nDims - !$OMP DO do iComponent=1,nComponents iTotComp = iScalar+iComponent-1 call ply_sampling_var_allocate( var = var(iTotComp), & & nElems = nElems, & & datalen = nDofs*nElems ) end do - !$OMP END DO isScalar: if (nComponents == 1) then @@ -144,7 +141,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & ! To avoid overly large memory consumption, we do this element by ! element. allocate(elemDat(nComponents*ndofs)) - !$OMP DO + do iElem=1,nElems call varSys%method%val(varpos)%get_element( & & varSys = varSys, & @@ -160,7 +157,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & & = elemdat(iComponent::nComponents) end do end do - !$OMP END DO + deallocate(elemDat) end if isScalar @@ -170,7 +167,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & ! polynomial degree in each element. ! This is redundant, but convenient. lastScalar = iScalar+nComponents-1 - !$OMP DO do iComponent=0,nComponents-1 var(iScalar+iComponent)%first(1) = 1 do iElem=1,nElems @@ -179,13 +175,11 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & var(iScalar+iComponent)%degree(iElem) = var_degree(iVar) end do end do - !$OMP END DO iScalar = iScalar + nComponents end do variables - !$OMP END PARALLEL end subroutine ply_sampling_varsys_for_track ! ------------------------------------------------------------------------ ! @@ -307,7 +301,6 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) integer :: nElems integer :: ndofs ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem, ndofs, absmean, variation) if (allocated(var%deviates)) deallocate(var%deviates) var%nDeviating = 0 @@ -315,7 +308,7 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) if (allocated(var%first)) then nElems = size(var%first)-1 allocate(var%deviates(nElems)) - !$OMP DO + do iElem=1,nElems ndofs = var%first(iElem+1) - var%first(iElem) - 1 absmean = max( abs(var%dat(var%first(iElem))), min_mean ) @@ -323,10 +316,8 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) var%deviates(iElem) = (variation > threshold*absmean) if (var%deviates(iElem)) var%nDeviating = var%nDeviating + 1 end do - !$OMP END DO end if - !$OMP END PARALLEL end subroutine ply_sampling_var_compute_elemdev ! ------------------------------------------------------------------------ ! diff --git a/source/ply_space_integration_module.f90 b/source/ply_space_integration_module.f90 index fc5bb7f..fa6f1e0 100644 --- a/source/ply_space_integration_module.f90 +++ b/source/ply_space_integration_module.f90 @@ -115,7 +115,6 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) numQuadPoints = num_intp_per_direction**3 allocate(points(numQuadPoints,3)) @@ -130,7 +129,6 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -146,9 +144,6 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube ! *********************************************************************** ! @@ -172,7 +167,6 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) numQuadPoints = num_intp_per_direction**2 allocate(points(numQuadPoints,3)) @@ -187,7 +181,6 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -200,9 +193,7 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube_2d ! *********************************************************************** ! @@ -225,7 +216,6 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) allocate(points(num_intp_per_direction,3)) allocate(weights(num_intp_per_direction)) @@ -239,7 +229,6 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -250,9 +239,7 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube_1d ! *********************************************************************** ! @@ -283,7 +270,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nquadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i) nQuadPoints = num_intp_per_direction**2 @@ -302,7 +288,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -314,10 +299,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -329,10 +312,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -344,7 +325,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube:' & @@ -352,7 +332,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & end select - !$OMP END PARALLEL end subroutine ply_create_surface_gauss_points_cube ! *********************************************************************** ! @@ -383,7 +362,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) ! The number of quadrature points on the boundary of a 2d volume is the ! number of quad points in one direction @@ -404,7 +382,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points for 2d case to get the three dimensional values. @@ -414,10 +391,8 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points in 2d case to get the three dimensional values. @@ -427,7 +402,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube_2d:' & @@ -435,7 +409,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & end select - !$OMP END PARALLEL end subroutine ply_create_surface_gauss_points_cube_2d ! *********************************************************************** ! @@ -503,7 +476,6 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j) numQuadPoints = num_intp_per_direction @@ -519,7 +491,6 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -527,9 +498,7 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(j) pointNumber = pointNumber + 1 end do - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_create_gauss_points_1d ! *********************************************************************** ! @@ -558,14 +527,12 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) real(kind=rk) :: EPS integer :: m, i, j ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, p1, p2, p3, pp, z1, z) EPS= 1.0 / (10.0**(PRECISION(1.0_rk)-2) ) m = (nIntP+1)/2 xm = 0.5*(x2+x1) xl = 0.5*(x2-x1) - !$OMP DO do i = 1, m z = cos(PI*((i-1)+0.75_rk)/(nIntP+0.5_rk)) @@ -590,9 +557,7 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) w(nIntp-i+1) = w(i) end do - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_gaussLegPoints ! *********************************************************************** ! diff --git a/source/ply_split_element_module.f90 b/source/ply_split_element_module.f90 index 5fd0468..5af6213 100644 --- a/source/ply_split_element_module.f90 +++ b/source/ply_split_element_module.f90 @@ -219,7 +219,6 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & integer :: nParents integer :: parentpos, childpos ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDir, parentMode, iParent, childMode, indep) nParents = size(parent_data,2) @@ -233,13 +232,11 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & ! The number of independent modes (in normal directions) is given ! by the product of the length in all directions, except the last one. nIndeps = 1 - !$OMP DO do iDir=1,nDims-1 nIndeps = nIndeps*inLen(iDir) end do - !$OMP END DO - !$OMP DO + oldmodes: do parentMode=1,inLen(nDims) ! Maximal number modes to compute, as this is a triangular matrix ! it is limited by the diagonal (parentMode). However, it may be @@ -270,9 +267,7 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & end do elemloop end do oldmodes - !$OMP END DO - !$OMP END PARALLEL end subroutine ply_split_element_singleD ! ======================================================================== ! diff --git a/source/ply_transfer_module.fpp b/source/ply_transfer_module.fpp index 5f87f8a..35a2a99 100644 --- a/source/ply_transfer_module.fpp +++ b/source/ply_transfer_module.fpp @@ -123,14 +123,12 @@ contains ispace_oq: if (inspace == Q_Space) then - !$OMP DO ! Both, output and input are Q Polynomials do out_Y=0,minord-1 out_off = out_Y*(outdegree+1) in_off = out_Y*(indegree+1) outdat(out_off+1:out_off+minord) = indat(in_off+1:in_off+minord) end do - !$OMP END DO else ispace_oq diff --git a/utests/with_fftw/ply_fpt_2D_lobattoNodes_test.fpp b/utests/with_fftw/ply_fpt_2D_lobattoNodes_test.fpp index 9f94bbe..3f35a6f 100644 --- a/utests/with_fftw/ply_fpt_2D_lobattoNodes_test.fpp +++ b/utests/with_fftw/ply_fpt_2D_lobattoNodes_test.fpp @@ -7,7 +7,8 @@ program ply_fpt_2D_lobattoNodes_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -52,6 +53,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -122,7 +124,8 @@ contains call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = maxPolyDegree+1, & & fpt = fpt, & - & lobattoPoints = .true. ) + & lobattoPoints = .true., & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**2, nVars )) @@ -130,7 +133,8 @@ contains call ply_legToPnt_2D( fpt = fpt, & & legCoeffs = legCoeffs, & & pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, & + & bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_2D_singVar_lobattoNodes_test.fpp b/utests/with_fftw/ply_fpt_2D_singVar_lobattoNodes_test.fpp index a424ff8..a99696b 100644 --- a/utests/with_fftw/ply_fpt_2D_singVar_lobattoNodes_test.fpp +++ b/utests/with_fftw/ply_fpt_2D_singVar_lobattoNodes_test.fpp @@ -6,7 +6,8 @@ program ply_fpt_2D_singVar_lobattoNodes_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -49,6 +50,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -114,12 +116,14 @@ contains call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = maxpolydegree+1, & & fpt = fpt, & - & lobattoPoints = .true. ) + & lobattoPoints = .true., & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**2 )) write(logUnit(10),*) 'Calculating FPT ...' - call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal ) + call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & + & bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_2D_singVar_test.fpp b/utests/with_fftw/ply_fpt_2D_singVar_test.fpp index 1a5b550..811b053 100644 --- a/utests/with_fftw/ply_fpt_2D_singVar_test.fpp +++ b/utests/with_fftw/ply_fpt_2D_singVar_test.fpp @@ -6,7 +6,8 @@ program ply_fpt_2D_singVar_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_Space @@ -49,6 +50,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -114,12 +116,14 @@ contains ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = maxPolyDegree+1, & - & fpt = fpt ) + & fpt = fpt, & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**2 )) write(logUnit(1),*) 'Calculating FPT ...' - call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal ) + call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & + & bu = bu ) write(logUnit(1),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_2D_test.fpp b/utests/with_fftw/ply_fpt_2D_test.fpp index 2c88472..84f6338 100644 --- a/utests/with_fftw/ply_fpt_2D_test.fpp +++ b/utests/with_fftw/ply_fpt_2D_test.fpp @@ -6,7 +6,8 @@ program ply_fpt_2D_test use tem_param_module, only: PI use tem_general_module, only: tem_general_type, tem_start use tem_logging_module, only: logUnit - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -49,6 +50,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -119,13 +121,14 @@ contains ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = maxPolyDegree+1, & - & fpt = fpt ) + & fpt = fpt, & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**2, nVars )) write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_3D_lobattoNodes_test.fpp b/utests/with_fftw/ply_fpt_3D_lobattoNodes_test.fpp index 33acdc9..7bd95bd 100644 --- a/utests/with_fftw/ply_fpt_3D_lobattoNodes_test.fpp +++ b/utests/with_fftw/ply_fpt_3D_lobattoNodes_test.fpp @@ -6,7 +6,8 @@ program ply_fpt_3D_lobattoNodes_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -49,6 +50,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -124,13 +126,14 @@ contains call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxpolydegree+1)**2, & & fpt = fpt, & - & lobattoPoints = .true. ) + & lobattoPoints = .true., & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**3, nVars )) write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_3D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_3D_performance_test.f90 b/utests/with_fftw/ply_fpt_3D_performance_test.f90 index d6ad78b..74687fe 100644 --- a/utests/with_fftw/ply_fpt_3D_performance_test.f90 +++ b/utests/with_fftw/ply_fpt_3D_performance_test.f90 @@ -5,7 +5,8 @@ program ply_fpt_3D_performance_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit, tem_logging_init_primary use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D, & & ply_pntToLeg_3D @@ -34,7 +35,7 @@ program ply_fpt_3D_performance_test write(*,*) 'Maximal deviation:', res if (res < 1.e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -47,6 +48,7 @@ subroutine ply_check_legToPnt_3D(power,res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu real(kind=rk) :: starttime, stoptime ! Define the maximal polynomial degree we want to calculate the @@ -57,40 +59,43 @@ subroutine ply_check_legToPnt_3D(power,res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar, rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxpolydegree+1)**2, & - & fpt = fpt ) - + & fpt = fpt, & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) legCoeffsIn = legCoeffs starttime = MPI_Wtime() call ply_legToPnt_3D( fpt = fpt, & & legCoeffs = legCoeffsIn, & & pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, & + & bu = bu ) stoptime = MPI_Wtime() write(*,*) 'Time for degree ', maxpolydegree, ' trafo: ', stoptime - starttime ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**3,nVars )) + allocate(legVal( (maxPolyDegree+1)**3,nVars )) starttime = MPI_Wtime() call ply_pntToLeg_3D( fpt = fpt, & & pntVal = pntVal, & & legCoeffs = legVal, & - & nVars = nVars ) + & nVars = nVars, & + & bu = bu ) stoptime = MPI_Wtime() write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', stoptime - starttime - + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) end subroutine ply_check_legToPnt_3D diff --git a/utests/with_fftw/ply_fpt_3D_test.fpp b/utests/with_fftw/ply_fpt_3D_test.fpp index 0858c9c..4fdcbb6 100644 --- a/utests/with_fftw/ply_fpt_3D_test.fpp +++ b/utests/with_fftw/ply_fpt_3D_test.fpp @@ -6,7 +6,8 @@ program ply_fpt_3D_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -49,6 +50,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -124,13 +126,14 @@ contains ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxpolydegree+1)**2, & - & fpt = fpt ) + & fpt = fpt, & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**3, nVars )) write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_3D( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_lobattoNodes_test.f90 b/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_lobattoNodes_test.f90 index be0920d..c3aa6df 100644 --- a/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_lobattoNodes_test.f90 @@ -2,9 +2,10 @@ !! \author{Jens Zudrop} program ply_fpt_ifpt_2D_multiVar_lobattoNodes_test use env_module, only: rk, fin_env - use tem_logging_module, only: logUnit + use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D, & & ply_pntToLeg_2D @@ -18,7 +19,7 @@ program ply_fpt_ifpt_2D_multiVar_lobattoNodes_test call tem_start(codeName = 'Ateles unit test', & & version = 'utest', & & general = general ) - + res = 0.0_rk do iPower = 1,6 call ply_check_legToPnt_2D(iPower, newRes) @@ -29,8 +30,8 @@ program ply_fpt_ifpt_2D_multiVar_lobattoNodes_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if - + end if + call fin_env() contains @@ -43,6 +44,7 @@ subroutine ply_check_legToPnt_2D(power, res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -52,35 +54,36 @@ subroutine ply_check_legToPnt_2D(power, res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**2 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**2, nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**2, nVars)) + allocate(legCoeffs((maxPolyDegree+1)**2, nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**2, nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar,rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & fpt = fpt, & & nIndeps = maxPolyDegree+1, & - & lobattoPoints = .true. ) - + & lobattoPoints = .true., & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**2,nVars )) + allocate(pntVal( (maxPolyDegree+1)**2,nVars )) legCoeffsIn = legCoeffs ! Duplicate input vector to make sure that it is not modified in the trafo write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, & - & pntVal = pntVal, nVars = nVars ) + & pntVal = pntVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**2,nVars )) + allocate(legVal( (maxPolyDegree+1)**2,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, & - & legCoeffs = legVal, nVars = nVars ) + & legCoeffs = legVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iDof = 1, (maxPolyDegree+1)**2 !! write(*,*) 'Leg coeff ', iDof, ' has error: ', legVal(iDof) - legCoeffs(iDof) !!end do @@ -93,7 +96,7 @@ subroutine ply_check_legToPnt_2D(power, res) maxErr = maxloc(abs(legVal(:,iVar) - legCoeffs(:,iVar)), 1) write(*,*) 'Ref. sol ', legCoeffs(maxErr,iVar), ' alg delivers: ', legVal(maxErr,iVar) end do - + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) end subroutine diff --git a/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_test.f90 b/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_test.f90 index 38478c2..18b5a72 100644 --- a/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_2D_multiVar_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_2D_multiVar_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D, & & ply_pntToLeg_2D @@ -18,7 +19,7 @@ program ply_fpt_ifpt_2D_multiVar_test call tem_start(codeName = 'Ateles unit test', & & version = 'utest', & & general = general ) - + res = 0.0_rk do iPower = 1,6 call ply_check_legToPnt_2D(iPower, newRes) @@ -29,7 +30,7 @@ program ply_fpt_ifpt_2D_multiVar_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -43,6 +44,7 @@ subroutine ply_check_legToPnt_2D(power, res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -52,34 +54,35 @@ subroutine ply_check_legToPnt_2D(power, res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**2 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**2, nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**2, nVars)) + allocate(legCoeffs((maxPolyDegree+1)**2, nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**2, nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar,rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & fpt = fpt, & - & nIndeps = maxPolyDegree+1 ) - + & nIndeps = maxPolyDegree+1, & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**2,nVars )) + allocate(pntVal( (maxPolyDegree+1)**2,nVars )) legCoeffsIn = legCoeffs ! Duplicate input vector to make sure that it is not modified in the trafo write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, & - & pntVal = pntVal, nVars = nVars ) + & pntVal = pntVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**2,nVars )) + allocate(legVal( (maxPolyDegree+1)**2,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, & - & legCoeffs = legVal, nVars = nVars ) + & legCoeffs = legVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iDof = 1, (maxPolyDegree+1)**2 !! write(*,*) 'Leg coeff ', iDof, ' has error: ', legVal(iDof) - legCoeffs(iDof) !!end do @@ -92,7 +95,7 @@ subroutine ply_check_legToPnt_2D(power, res) maxErr = maxloc(abs(legVal(:,iVar) - legCoeffs(:,iVar)), 1) write(*,*) 'Ref. sol ', legCoeffs(maxErr,iVar), ' alg delivers: ', legVal(maxErr,iVar) end do - + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) end subroutine diff --git a/utests/with_fftw/ply_fpt_ifpt_2D_singVar_lobattoNodes_test.f90 b/utests/with_fftw/ply_fpt_ifpt_2D_singVar_lobattoNodes_test.f90 index 6f4bcdf..23adb70 100644 --- a/utests/with_fftw/ply_fpt_ifpt_2D_singVar_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_2D_singVar_lobattoNodes_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_2D_singVar_lobattoNodes_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D, & & ply_pntToLeg_2D @@ -29,7 +30,7 @@ program ply_fpt_ifpt_2D_singVar_lobattoNodes_test if (res < 1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -44,6 +45,7 @@ subroutine ply_check_legToPnt_2D(power, res) real(kind=rk), allocatable :: legCoeffs(:), legCoeffsIn(:) real(kind=rk), allocatable :: pntVal(:), legVal(:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -62,19 +64,22 @@ subroutine ply_check_legToPnt_2D(power, res) call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & fpt = fpt, & & nIndeps = maxPolyDegree+1, & - & lobattoPoints = .true. ) + & lobattoPoints = .true., & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal( (maxPolyDegree+1)**2)) legCoeffsIn = legCoeffs ! Duplicate input vector to make sure that it is not modified in the trafo write(logUnit(10),*) 'Calculating FPT ...' - call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, pntVal = pntVal ) + call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, pntVal = pntVal, & + & bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients allocate(legVal( (maxPolyDegree+1)**2 )) write(logUnit(10),*) 'Calculating inverse FPT ...' - call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, legCoeffs = legVal ) + call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, & + & bu = bu ) write(logUnit(10),*) 'Finished' !!do iDof = 1, (maxPolyDegree+1)**2 diff --git a/utests/with_fftw/ply_fpt_ifpt_2D_singVar_test.f90 b/utests/with_fftw/ply_fpt_ifpt_2D_singVar_test.f90 index be0ec3a..b7f2990 100644 --- a/utests/with_fftw/ply_fpt_ifpt_2D_singVar_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_2D_singVar_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_2D_singVar_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_2D_module, only: ply_legToPnt_2D, ply_pntToLeg_2D implicit none @@ -28,7 +29,7 @@ program ply_fpt_ifpt_2D_singVar_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -41,7 +42,8 @@ subroutine ply_check_legToPnt_2D(power, res) real(kind=rk), allocatable :: legCoeffs(:), legCoeffsIn(:) real(kind=rk), allocatable :: pntVal(:), legVal(:) type(ply_legFpt_type) :: fpt - + type(ply_legFpt_bu_type) :: bu + ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 @@ -49,30 +51,31 @@ subroutine ply_check_legToPnt_2D(power, res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**2 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**2)) - allocate(legCoeffsIn((maxPolyDegree+1)**2)) + allocate(legCoeffs((maxPolyDegree+1)**2)) + allocate(legCoeffsIn((maxPolyDegree+1)**2)) legCoeffs(:) = real(1,rk) - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & fpt = fpt, & - & nIndeps = maxPolyDegree+1 ) - + & nIndeps = maxPolyDegree+1,& + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**2)) + allocate(pntVal( (maxPolyDegree+1)**2)) legCoeffsIn = legCoeffs ! Duplicate input vector to make sure that it is not modified in the trafo write(logUnit(10),*) 'Calculating FPT ...' - call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, pntVal = pntVal ) + call ply_legToPnt_2D( fpt = fpt, legCoeffs = legCoeffsIn, pntVal = pntVal, bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**2 )) + allocate(legVal( (maxPolyDegree+1)**2 )) write(logUnit(10),*) 'Calculating inverse FPT ...' - call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, legCoeffs = legVal ) + call ply_pntToLeg_2D( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iDof = 1, (maxPolyDegree+1)**2 !! write(*,*) 'Leg coeff ', iDof, ' has error: ', legVal(iDof) - legCoeffs(iDof) !!end do @@ -83,7 +86,7 @@ subroutine ply_check_legToPnt_2D(power, res) & ' has largest error of: ' ,maxval(abs(legVal(:) - legCoeffs(:))) maxErr = maxloc(abs(legVal(:) - legCoeffs(:)), 1) write(logUnit(10),*) 'Ref. sol ', legCoeffs(maxErr), ' alg delivers: ', legVal(maxErr) - + res = maxval(abs(legVal(:) - legCoeffs(:))) end subroutine diff --git a/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_lobattoNodes_test.f90 b/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_lobattoNodes_test.f90 index 9137aaa..bc043e8 100644 --- a/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_lobattoNodes_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_3D_multiVar_lobattoNodes_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D, & & ply_pntToLeg_3D @@ -29,7 +30,7 @@ program ply_fpt_ifpt_3D_multiVar_lobattoNodes_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -42,7 +43,8 @@ subroutine ply_check_legToPnt_3D(power,res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt - + type(ply_legFpt_bu_type) :: bu + ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 @@ -51,39 +53,40 @@ subroutine ply_check_legToPnt_3D(power,res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar, rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxPolyDegree+1)**2, & & fpt = fpt, & - & lobattoPoints = .true. ) - + & lobattoPoints = .true., & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) legCoeffsIn = legCoeffs write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_3D( fpt = fpt, legCoeffs = legCoeffsIn, & - & pntVal = pntVal, nVars = nVars ) + & pntVal = pntVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**3,nVars )) + allocate(legVal( (maxPolyDegree+1)**3,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal, & - & legCoeffs = legVal, nVars = nVars ) + & legCoeffs = legVal, nVars = nVars, bu = bu ) write(logUnit(10),*) 'Finished' - + ! Write out the coefficient with the largest absolute error do iVar = 1, nVars - do iDof = 1, size(legVal(:,iVar)) + do iDof = 1, size(legVal(:,iVar)) write(logUnit(10),*) legVal(iDof,iVar), legCoeffs(iDof,iVar) end do write(logUnit(10),*) 'For var ', iVar, & diff --git a/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_test.f90 b/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_test.f90 index ee39504..ae817c3 100644 --- a/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_3D_multiVar_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_3D_multiVar_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D, & & ply_pntToLeg_3D @@ -29,7 +30,7 @@ program ply_fpt_ifpt_3D_multiVar_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -42,6 +43,7 @@ subroutine ply_check_legToPnt_3D(power,res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -51,38 +53,41 @@ subroutine ply_check_legToPnt_3D(power,res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar, rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxpolydegree+1)**2, & - & fpt = fpt ) - + & fpt = fpt, & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) legCoeffsIn = legCoeffs write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt_3D( fpt = fpt, & & legCoeffs = legCoeffsIn, & & pntVal = pntVal, & - & nVars = nVars ) + & nVars = nVars, & + & bu = bu ) write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**3,nVars )) + allocate(legVal( (maxPolyDegree+1)**3,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' call ply_pntToLeg_3D( fpt = fpt, & & pntVal = pntVal, & & legCoeffs = legVal, & - & nVars = nVars ) + & nVars = nVars, & + & bu = bu ) write(logUnit(10),*) 'Finished' - + ! Write out the coefficient with the largest absolute error do iVar = 1, nVars diff --git a/utests/with_fftw/ply_fpt_ifpt_3D_singVar_lobattoNodes_test.f90 b/utests/with_fftw/ply_fpt_ifpt_3D_singVar_lobattoNodes_test.f90 index 78cd332..eeec7a2 100644 --- a/utests/with_fftw/ply_fpt_ifpt_3D_singVar_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_3D_singVar_lobattoNodes_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_3D_singVar_lobattoNodes_test use env_module, only: rk, fin_env use tem_general_module, only: tem_general_type, tem_start use tem_logging_module, only: logUnit - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D, & & ply_pntToLeg_3D @@ -18,7 +19,7 @@ program ply_fpt_ifpt_3D_singVar_lobattoNodes_test call tem_start(codeName = 'Ateles unit test', & & version = 'utest', & & general = general ) - + res = 0.0_rk do iPower = 1,3 call ply_check_legToPnt_3D(iPower, newRes) @@ -29,7 +30,7 @@ program ply_fpt_ifpt_3D_singVar_lobattoNodes_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -44,6 +45,7 @@ subroutine ply_check_legToPnt_3D(power,res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -53,43 +55,44 @@ subroutine ply_check_legToPnt_3D(power,res) & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' // & & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar, rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxPolyDegree+1)**2, & & fpt = fpt, & - & lobattoPoints = .true. ) - + & lobattoPoints = .true., & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) legCoeffsIn = legCoeffs write(logUnit(10),*) 'Calculating FPT ...' do iVar=1,nVars call ply_legToPnt_3D( fpt = fpt, legCoeffs = legCoeffsIn(:,iVar), & - & pntVal = pntVal(:,iVar) ) + & pntVal = pntVal(:,iVar), bu = bu ) end do write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**3,nVars )) + allocate(legVal( (maxPolyDegree+1)**3,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' do iVar=1,nVars call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal(:,iVar), & - & legCoeffs = legVal(:,iVar) ) + & legCoeffs = legVal(:,iVar), bu = bu ) end do write(logUnit(10),*) 'Finished' - + ! Write out the coefficient with the largest absolute error do iVar = 1, nVars - do iDof = 1, size(legVal(:,iVar)) + do iDof = 1, size(legVal(:,iVar)) write(logUnit(10),*) legVal(iDof,iVar), legCoeffs(iDof,iVar) end do write(logUnit(10),*) 'For var ', iVar, & diff --git a/utests/with_fftw/ply_fpt_ifpt_3D_singVar_test.f90 b/utests/with_fftw/ply_fpt_ifpt_3D_singVar_test.f90 index 4306662..5a72a18 100644 --- a/utests/with_fftw/ply_fpt_ifpt_3D_singVar_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_3D_singVar_test.f90 @@ -4,7 +4,8 @@ program ply_fpt_ifpt_3D_singVar_test use env_module, only: rk, fin_env use tem_logging_module, only: logUnit use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_legToPnt_3D, ply_pntToLeg_3D implicit none @@ -17,7 +18,7 @@ program ply_fpt_ifpt_3D_singVar_test call tem_start(codeName = 'Ateles unit test', & & version = 'utest', & & general = general ) - + res = 0.0_rk do iPower = 1,3 call ply_check_legToPnt_3D(iPower, newRes) @@ -28,7 +29,7 @@ program ply_fpt_ifpt_3D_singVar_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -41,6 +42,7 @@ subroutine ply_check_legToPnt_3D(power,res) real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -50,40 +52,43 @@ subroutine ply_check_legToPnt_3D(power,res) & // ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 write(logUnit(10),*) '------------------------------------' & & // ' Number of Legendre coefficients (total): ', (maxPolyDegree+1)**3 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) - allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) do iVar = 1, nVars legCoeffs(:,iVar) = real(iVar, rk) end do - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxPolyDegree+1)**2, & - & fpt = fpt ) - + & fpt = fpt, & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) legCoeffsIn = legCoeffs write(logUnit(10),*) 'Calculating FPT ...' do iVar=1,nVars call ply_legToPnt_3D( fpt = fpt, & & legCoeffs = legCoeffsIn(:,iVar), & - & pntVal = pntVal(:,iVar) ) + & pntVal = pntVal(:,iVar), & + & bu = bu ) end do write(logUnit(10),*) 'Finished' ! now transform back to Legendre coefficients - allocate(legVal( (maxPolyDegree+1)**3,nVars )) + allocate(legVal( (maxPolyDegree+1)**3,nVars )) write(logUnit(10),*) 'Calculating inverse FPT ...' do iVar=1,nVars call ply_pntToLeg_3D( fpt = fpt, & & pntVal = pntVal(:,iVar), & - & legCoeffs = legVal(:,iVar) ) + & legCoeffs = legVal(:,iVar), & + & bu = bu ) end do write(logUnit(10),*) 'Finished' - + ! Write out the coefficient with the largest absolute error do iVar = 1, nVars diff --git a/utests/with_fftw/ply_fpt_ifpt_test.f90 b/utests/with_fftw/ply_fpt_ifpt_test.f90 index b6ae7fe..2aabefb 100644 --- a/utests/with_fftw/ply_fpt_ifpt_test.f90 +++ b/utests/with_fftw/ply_fpt_ifpt_test.f90 @@ -5,7 +5,7 @@ program ply_fpt_ifpt_test use tem_logging_module, only: logUnit use tem_aux_module, only: tem_abort use ply_legFpt_module, only: ply_init_legFpt, ply_legFpt_type, & - & ply_legToPnt, ply_pntToLeg + & ply_legToPnt, ply_pntToLeg, ply_legFpt_bu_type use ply_modg_basis_module, only: legendre_1D use tem_general_module, only: tem_general_type, tem_start @@ -30,7 +30,7 @@ program ply_fpt_ifpt_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() @@ -42,33 +42,37 @@ subroutine check_fwd_bwd(power, res) integer :: maxPolyDegree real(kind=rk), allocatable :: legCoeffs(:), pntVal(:), legVal(:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 write(logUnit(10),*) '------- Number of Legendre coefficients: ', maxPolyDegree+1 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs(1:maxPolyDegree+1)) - allocate(legVal(1:maxPolyDegree+1)) + allocate(legCoeffs(1:maxPolyDegree+1)) + allocate(legVal(1:maxPolyDegree+1)) legCoeffs(:) = 1.0_rk legVal = legCoeffs - ! Init the FPT + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = 1, & - & fpt = fpt ) + & fpt = fpt, & + & bu = bu ) ! now transform to the Chebyshev nodes - allocate(pntVal(1:maxPolyDegree+1)) + allocate(pntVal(1:maxPolyDegree+1)) write(logUnit(10),*) 'Calculating FPT ...' - call ply_legToPnt( fpt = fpt, legCoeffs = legVal, pntVal = pntVal, nIndeps = 1 ) + call ply_legToPnt( fpt = fpt, legCoeffs = legVal, pntVal = pntVal, & + nIndeps = 1, bu = bu ) write(logUnit(10),*) 'Finished' ! now transform to the Legendre coefficients write(logUnit(10),*) 'Calculating inverse FPT ...' - call ply_pntToLeg( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, nIndeps = 1 ) + call ply_pntToLeg( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, & + & nIndeps = 1, bu = bu ) write(logUnit(10),*) 'Finished' ! Write out the polynomial coefficient with the largest absolute error diff --git a/utests/with_fftw/ply_fpt_lobattoNodes_test.f90 b/utests/with_fftw/ply_fpt_lobattoNodes_test.f90 index 61ea95c..d1fef55 100644 --- a/utests/with_fftw/ply_fpt_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_fpt_lobattoNodes_test.f90 @@ -7,7 +7,7 @@ program ply_fpt_lobattoNodes_test use tem_logging_module, only: logUnit use tem_aux_module, only: tem_abort use ply_legFpt_module, only: ply_init_legFpt, ply_legFpt_type, & - & ply_legToPnt + & ply_legToPnt, ply_legFpt_bu_type use ply_modg_basis_module, only: legendre_1D use tem_general_module, only: tem_general_type, tem_start @@ -32,7 +32,7 @@ program ply_fpt_lobattoNodes_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() contains @@ -46,23 +46,24 @@ subroutine ply_check_legToPnt(power, res) real(kind=rk), allocatable :: chebPnt(:) real(kind=rk), allocatable :: legValChebPnt(:,:) type(ply_legFpt_type) :: fpt - + type(ply_legFpt_bu_type) :: bu + ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 write(logUnit(10),*) '------- Number of Legendre coefficients: ', maxPolyDegree+1 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs(1:maxPolyDegree+1)) + allocate(legCoeffs(1:maxPolyDegree+1)) legCoeffs(:) = 1.0_rk - + ! Create the Chebyshev nodes on the interval [-1,+1] allocate(chebPnt(maxPolyDegree+1)) do iPoint = 1, maxPolyDegree+1 chebPnt(iPoint) = cos((iPoint-1.0_rk)*PI/maxPolyDegree); !write(*,*) 'Lobatto-Chebyshev-Point', iPoint, chebPnt(iPoint) end do - + ! define the reference results for the point values (Chebyshev nodes) allocate( legValChebPnt(maxPolyDegree+1,maxPolyDegree+1) ) legValChebPnt(:,:) = legendre_1D(chebPnt, maxPolyDegree) @@ -73,23 +74,24 @@ subroutine ply_check_legToPnt(power, res) refVal(:) = refVal(:) + legValChebPnt(iPoly,:) * legCoeffs(iPoly) end do write(logUnit(10),*) 'Finished' - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = 1, & & fpt = fpt, & - & lobattoPoints = .true. ) - + & lobattoPoints = .true., & + & bu = bu ) + ! now transform to the Chebyshev nodes - allocate(pntVal(1:maxPolyDegree+1)) + allocate(pntVal(1:maxPolyDegree+1)) write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt( fpt = fpt, legCoeffs = legCoeffs, pntVal = pntVal, & - & nIndeps = 1 ) + & nIndeps = 1, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iPoint = 1, maxPolyDegree+1 !! write(*,*) 'Point: ', chebPnt(iPoint), & - !! & ' FPT: ', pntVal(iPoint), & + !! & ' FPT: ', pntVal(iPoint), & !! & ' Ref.: ', refVal(iPoint), & !! & ' error: ', pntVal(iPoint)-refVal(iPoint) !!end do diff --git a/utests/with_fftw/ply_fpt_test.f90 b/utests/with_fftw/ply_fpt_test.f90 index 12985b9..e6d1f51 100644 --- a/utests/with_fftw/ply_fpt_test.f90 +++ b/utests/with_fftw/ply_fpt_test.f90 @@ -6,7 +6,7 @@ program ply_fpt_test use tem_logging_module, only: logUnit use ply_legFpt_module, only: ply_init_legFpt, & & ply_legFpt_type, & - & ply_legToPnt + & ply_legToPnt, ply_legFpt_bu_type use ply_modg_basis_module, only: legendre_1D use tem_general_module, only: tem_general_type, tem_start @@ -58,6 +58,7 @@ subroutine ply_check_legToPnt(blocksize, maxpolydegree, res) real(kind=rk), allocatable :: chebPnt(:) real(kind=rk), allocatable :: legValChebPnt(:,:) type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. @@ -89,13 +90,14 @@ subroutine ply_check_legToPnt(blocksize, maxpolydegree, res) call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = 1, & & fpt = fpt, & - & blocksize = blocksize ) + & blocksize = blocksize, & + & bu = bu ) ! now transform to the Chebyshev nodes allocate(pntVal(1:maxPolyDegree+1)) write(logUnit(10),*) 'Calculating FPT ...' call ply_legToPnt( fpt = fpt, legCoeffs = legCoeffs, & - & pntVal = pntVal, nIndeps = 1 ) + & pntVal = pntVal, nIndeps = 1, bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, maxPolyDegree+1 diff --git a/utests/with_fftw/ply_ifpt_3D_singVar_lobattoNodes_test.fpp b/utests/with_fftw/ply_ifpt_3D_singVar_lobattoNodes_test.fpp index 63cfefe..644a17b 100644 --- a/utests/with_fftw/ply_ifpt_3D_singVar_lobattoNodes_test.fpp +++ b/utests/with_fftw/ply_ifpt_3D_singVar_lobattoNodes_test.fpp @@ -6,7 +6,8 @@ program ply_ifpt_3D_singVar_lobattoNodes_test use tem_param_module, only: PI use tem_logging_module, only: logUnit use tem_aux_module, only: tem_abort - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_pntToLeg_3D use ply_modg_basis_module, only: evalLegendreTensPoly use ply_dof_module, only: Q_space @@ -48,6 +49,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -119,11 +121,13 @@ contains call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxPolyDegree+1)**2, & & fpt = fpt, & - & lobattoPoints = .true. ) + & lobattoPoints = .true., & + & bu = bu) ! now transform to the Chebyshev nodes write(logUnit(10),*) 'Calculating FPT ...' - call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal, legCoeffs = legCoeffs ) + call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal, legCoeffs = legCoeffs, & + & bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_ifpt_3D_singVar_test.fpp b/utests/with_fftw/ply_ifpt_3D_singVar_test.fpp index ed82cc8..adbc246 100644 --- a/utests/with_fftw/ply_ifpt_3D_singVar_test.fpp +++ b/utests/with_fftw/ply_ifpt_3D_singVar_test.fpp @@ -7,7 +7,8 @@ program ply_ifpt_3D_singVar_test use tem_logging_module, only: logUnit use tem_aux_module, only: tem_abort use tem_general_module, only: tem_general_type, tem_start - use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT, & + & ply_legFpt_bu_type use ply_legFpt_3D_module, only: ply_pntToLeg_3D use ply_modg_basis_module, only: legendre_1D @@ -48,6 +49,7 @@ contains real(kind=rk), allocatable :: legValChebPnt(:,:) real(kind=rk) :: rfac type(ply_legFpt_type) :: fpt + type(ply_legFpt_bu_type) :: bu integer, allocatable :: rand_seed(:) integer :: nSeeds @@ -113,11 +115,13 @@ contains ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = (maxPolyDegree+1)**2, & - & fpt = fpt ) + & fpt = fpt, & + & bu = bu ) ! now transform to the Chebyshev nodes write(logUnit(10),*) 'Calculating FPT ...' - call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal, legCoeffs = legCoeffs ) + call ply_pntToLeg_3D( fpt = fpt, pntVal = pntVal, legCoeffs = legCoeffs, & + & bu = bu ) write(logUnit(10),*) 'Finished' !!do iPoint = 1, (maxPolyDegree+1)**3 diff --git a/utests/with_fftw/ply_ifpt_lobattoNodes_test.f90 b/utests/with_fftw/ply_ifpt_lobattoNodes_test.f90 index 8cc601f..f3d8240 100644 --- a/utests/with_fftw/ply_ifpt_lobattoNodes_test.f90 +++ b/utests/with_fftw/ply_ifpt_lobattoNodes_test.f90 @@ -8,7 +8,7 @@ program ply_ifpt_lobattoNodes_test use tem_aux_module, only: tem_abort use tem_general_module, only: tem_general_type, tem_start use ply_legFpt_module, only: ply_init_legFpt, ply_legFpt_type, & - & ply_pntToLeg + & ply_pntToLeg, ply_legFpt_bu_type use ply_modg_basis_module, only: legendre_1D implicit none @@ -32,7 +32,7 @@ program ply_ifpt_lobattoNodes_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() contains @@ -46,23 +46,24 @@ subroutine ply_check_pntToLeg(power, res) real(kind=rk), allocatable :: chebPnt(:) real(kind=rk), allocatable :: legValChebPnt(:,:) type(ply_legFpt_type) :: fpt - + type(ply_legFpt_bu_type) :: bu + ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 write(logUnit(10),*) '------- Number of Legendre coefficients: ', maxPolyDegree+1 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs(1:maxPolyDegree+1)) + allocate(legCoeffs(1:maxPolyDegree+1)) legCoeffs(:) = 1.0_rk - + ! Create the Chebyshev nodes on the interval [-1,+1] allocate(chebPnt(maxPolyDegree+1)) do iPoint = 1, maxPolyDegree+1 chebPnt(iPoint) = cos((iPoint-1.0_rk)*PI/maxPolyDegree); !write(*,*) 'Cehbyshev point', iPoint, ' is at: ', chebPnt(iPoint) end do - + ! define the point values (Lobatto-Chebyshev-nodes) allocate( legValChebPnt(maxPolyDegree+1,maxPolyDegree+1) ) legValChebPnt(:,:) = legendre_1D(chebPnt, maxPolyDegree) @@ -73,23 +74,24 @@ subroutine ply_check_pntToLeg(power, res) pntVal(:) = pntVal(:) + legValChebPnt(iPoly,:) * legCoeffs(iPoly) end do write(logUnit(10),*) 'Finished' - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = 1, & & fpt = fpt, & - & lobattoPoints = .true. ) - + & lobattoPoints = .true., & + & bu = bu ) + ! now transform to the Legendre coefficients - allocate(legVal(1:maxPolyDegree+1)) + allocate(legVal(1:maxPolyDegree+1)) write(logUnit(10),*) 'Calculating inverse FPT ...' call ply_pntToLeg( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, & - & nIndeps = 1 ) + & nIndeps = 1, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iPoly = 1, maxPolyDegree+1 !! write(*,*) 'Poly degree: ', iPoly, & - !! & ' iFPT: ', legVal(iPoly), & + !! & ' iFPT: ', legVal(iPoly), & !! & ' Ref.: ', legCoeffs(iPoly), & !! & ' error: ', legVal(iPoly)-legCoeffs(iPoly) !!end do diff --git a/utests/with_fftw/ply_ifpt_test.f90 b/utests/with_fftw/ply_ifpt_test.f90 index 334c42a..7971dcd 100644 --- a/utests/with_fftw/ply_ifpt_test.f90 +++ b/utests/with_fftw/ply_ifpt_test.f90 @@ -7,7 +7,7 @@ program ply_ifpt_test use tem_general_module, only: tem_general_type, tem_start use ply_legFpt_module, only: ply_init_legFpt, & & ply_legFpt_type, & - & ply_pntToLeg + & ply_pntToLeg, ply_legFpt_bu_type use ply_modg_basis_module, only: legendre_1D implicit none @@ -31,7 +31,7 @@ program ply_ifpt_test if(res.lt.1e-08) then write(logUnit(1),*) 'PASSED' - end if + end if call fin_env() contains @@ -45,23 +45,24 @@ subroutine ply_check_pntToLeg(power, res) real(kind=rk), allocatable :: chebPnt(:) real(kind=rk), allocatable :: legValChebPnt(:,:) type(ply_legFpt_type) :: fpt - + type(ply_legFpt_bu_type) :: bu + ! Define the maximal polynomial degree we want to calculate the ! bases exchange for. maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 write(logUnit(10),*) '------- Number of Legendre coefficients: ', maxPolyDegree+1 - + ! Create the Legendre expansion coefficients - allocate(legCoeffs(1:maxPolyDegree+1)) + allocate(legCoeffs(1:maxPolyDegree+1)) legCoeffs(:) = 1.0_rk - + ! Create the Chebyshev nodes on the interval [-1,+1] allocate(chebPnt(maxPolyDegree+1)) do iPoint = 1, maxPolyDegree+1 chebPnt(iPoint) = (-1.0_rk) * cos(PI/(maxPolyDegree+1)*((iPoint-1.0_rk)+1.0_rk/2.0_rk)) !write(*,*) 'Cehbyshev point', iPoint, ' is at: ', chebPnt(iPoint) end do - + ! define the point values (Chebyshev nodes) allocate( legValChebPnt(maxPolyDegree+1,maxPolyDegree+1) ) legValChebPnt(:,:) = legendre_1D(chebPnt, maxPolyDegree) @@ -72,21 +73,23 @@ subroutine ply_check_pntToLeg(power, res) pntVal(:) = pntVal(:) + legValChebPnt(iPoly,:) * legCoeffs(iPoly) end do write(logUnit(10),*) 'Finished' - - ! Init the FPT + + ! Init the FPT call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & & nIndeps = 1, & - & fpt = fpt ) - + & fpt = fpt, & + & bu = bu ) + ! now transform to the Legendre coefficients - allocate(legVal(1:maxPolyDegree+1)) + allocate(legVal(1:maxPolyDegree+1)) write(logUnit(10),*) 'Calculating inverse FPT ...' - call ply_pntToLeg( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, nIndeps=1 ) + call ply_pntToLeg( fpt = fpt, pntVal = pntVal, legCoeffs = legVal, & + & nIndeps=1, bu = bu ) write(logUnit(10),*) 'Finished' - + !!do iPoly = 1, maxPolyDegree+1 !! write(*,*) 'Poly degree: ', iPoly, & - !! & ' iFPT: ', legVal(iPoly), & + !! & ' iFPT: ', legVal(iPoly), & !! & ' Ref.: ', legCoeffs(iPoly), & !! & ' error: ', legVal(iPoly)-legCoeffs(iPoly) !!end do From 2f1b355303591f77e7e031f4247ea268ea246e48 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Tue, 7 Jan 2020 13:04:57 +0100 Subject: [PATCH 06/21] Removed all OMP statements. --HG-- branch : feature/openmp --- source/fpt/ply_chebPoint_module.f90 | 89 ++++------------------- source/fpt/ply_legFpt_2D_module.fpp | 16 +--- source/fpt/ply_legFpt_module.f90 | 12 --- source/ply_LegPolyProjection_module.f90 | 25 +------ source/ply_fxt_module.f90 | 35 --------- source/ply_l2p_module.f90 | 4 - source/ply_leg_diff_module.fpp | 24 ------ source/ply_legser_module.f90 | 7 -- source/ply_modg_basis_module.fpp | 15 ---- source/ply_poly_project_module.fpp | 36 --------- source/ply_poly_transformation_module.f90 | 22 ------ source/ply_sampling_module.fpp | 5 -- source/ply_sampling_varsys_module.f90 | 15 +--- source/ply_space_integration_module.f90 | 43 +---------- source/ply_split_element_module.f90 | 7 -- source/ply_transfer_module.fpp | 2 - 16 files changed, 21 insertions(+), 336 deletions(-) diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90 index 510bab0..4261984 100644 --- a/source/fpt/ply_chebPoint_module.f90 +++ b/source/fpt/ply_chebPoint_module.f90 @@ -63,17 +63,13 @@ subroutine ply_chebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) allocate(chebPnt1D(nPoints)) - !$OMP DO + do iPoint = 1, nPoints chebPnt1D(iPoint) = -1.0_rk & & * cos( PI / nPoints * ( (iPoint - 1.0_rk) + 1.0_rk / 2.0_rk ) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_chebPoint_1D ! ************************************************************************ ! @@ -94,16 +90,12 @@ subroutine ply_lobattoChebPoint_1D( nPoints, chebPnt1D ) ! -------------------------------------------------------------------- ! integer :: iPoint ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iPoint) allocate(chebPnt1D(nPoints)) - !$OMP DO + do iPoint = 1, nPoints chebPnt1D(iPoint) = cos( ( iPoint - 1.0_rk ) * PI / ( nPoints - 1.0_rk ) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_lobattoChebPoint_1D ! ************************************************************************ ! @@ -120,14 +112,13 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) + nquadpoints = num_intp_per_direction**3 + allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) - pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -140,9 +131,6 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube ! ************************************************************************ ! @@ -160,14 +148,14 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) + nquadpoints = num_intp_per_direction**3 + allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -180,9 +168,6 @@ subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube ! ************************************************************************ ! @@ -200,16 +185,14 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) - nquadpoints = num_intp_per_direction**2 + allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -220,9 +203,6 @@ subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) pointNumber = pointNumber + 1 end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_2d ! ************************************************************************ ! @@ -242,14 +222,14 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) + nquadpoints = num_intp_per_direction**2 + allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional @@ -260,9 +240,6 @@ subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, poin pointNumber = pointNumber + 1 end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_2d ! ************************************************************************ ! @@ -278,13 +255,12 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -293,9 +269,6 @@ subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_cheb_points_cube_1d ! ************************************************************************ ! @@ -314,13 +287,12 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & integer :: pointNumber real(kind=rk), allocatable :: chebPnt1D(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) allocate(points(num_intp_per_direction,3)) + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction ! here we build all possible combinations of the one-dimensional ! points to get the three dimensional values. @@ -329,9 +301,6 @@ subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine create_volume_lobattocheb_points_cube_1d ! ************************************************************************ ! @@ -357,9 +326,9 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 + allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) @@ -368,7 +337,6 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -377,9 +345,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -388,9 +354,7 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixes - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber , 1) = chebPnt1D(i) @@ -399,14 +363,11 @@ subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube: unknown ' & & // 'face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_cheb_points_cube ! ************************************************************************ ! @@ -431,9 +392,9 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) nquadpoints = num_intp_per_direction**2 + allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) @@ -442,7 +403,6 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align @@ -451,9 +411,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -462,9 +420,7 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixes - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) @@ -473,14 +429,11 @@ subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_lobattocheb_points_cube:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_lobattocheb_points_cube ! ************************************************************************ ! @@ -504,9 +457,9 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction + allocate(points(nquadpoints,3)) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) @@ -515,30 +468,24 @@ subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_cheb_points_cube_2d ! ************************************************************************ ! @@ -562,9 +509,9 @@ subroutine create_surface_lobattocheb_points_cube_2d( & real(kind=rk), allocatable :: chebPnt1D(:) integer :: nquadpoints ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, pointNumber) nquadpoints = num_intp_per_direction + allocate(points(nquadpoints,3)) call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) @@ -573,30 +520,24 @@ subroutine create_surface_lobattocheb_points_cube_2d( & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = (-1.0_rk)**align points(pointNumber, 2) = chebPnt1D(i) points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction points(pointNumber, 1) = chebPnt1D(i) points(pointNumber, 2) = (-1.0_rk)**align points(pointNumber, 3) = 0.0_rk pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & & // ' unknown face direction' ) end select - !$OMP END PARALLEL - end subroutine create_surface_lobattocheb_points_cube_2d ! ************************************************************************ ! diff --git a/source/fpt/ply_legFpt_2D_module.fpp b/source/fpt/ply_legFpt_2D_module.fpp index 9293b76..ef9629d 100644 --- a/source/fpt/ply_legFpt_2D_module.fpp +++ b/source/fpt/ply_legFpt_2D_module.fpp @@ -80,13 +80,13 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iAlph, nIndeps) striplen = fpt%legToChebParams%striplen n = fpt%legToChebParams%n allocate(alph(min(striplen, n)*n)) allocate(gam(min(striplen, n)*n)) + ! original layout (n = 3): ! 1 2 3 ! 4 5 6 @@ -96,7 +96,7 @@ contains ! 1 4 7 ! 2 5 8 ! 3 6 9 - !$OMP DO + yStripLoop: do iStrip = 1, n, striplen ! iAlph is the index of the first element in a line for the transformation ! in y-direction. @@ -116,10 +116,8 @@ contains pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop - !$OMP END DO ! x-direction - !$OMP DO xStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) !ztrafo @@ -136,9 +134,6 @@ contains pntVal((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_legToPnt_2D_singVar ! ************************************************************************ ! @@ -193,7 +188,6 @@ contains real(kind=rk), dimension(:), allocatable :: alph real(kind=rk), dimension(:), allocatable :: gam ! --------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(nIndeps, iStrip, iAlph) striplen = fpt%chebToLegParams%striplen n = fpt%legToChebParams%n @@ -202,7 +196,6 @@ contains allocate(alph(min(striplen, n)*n)) allocate(gam(min(striplen, n)*n)) - !$OMP DO yStripLoop: do iStrip = 1, n, striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !y_Trafo alph((iAlph-iStrip)*n+1:(iAlph-iStrip+1)*n) = pntVal(iAlph::n) @@ -220,10 +213,8 @@ contains legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do yStripLoop ! iStrip - !$OMP END DO ! x-direction - !$OMP DO xStripLoop: do iStrip = 1,n,striplen do iAlph = iStrip, min(iStrip+striplen-1, n) !ztrafo @@ -241,9 +232,6 @@ contains legCoeffs((iStrip-1)*n+1 : (iStrip+nIndeps-1)*n) = gam(1:nIndeps*n) end do xStripLoop - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_pntToLeg_2D_singVar ! ************************************************************************ ! diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 0518646..55d238d 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -276,12 +276,10 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_points) then - !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -296,11 +294,9 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do - !$OMP END DO else - !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -314,10 +310,8 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do - !$OMP END DO end if ! lobattoPoints - !$OMP END PARALLEL end subroutine ply_legToPnt ! ************************************************************************ ! @@ -339,13 +333,11 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb, normFactor) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_Points) then normFactor = 1.0_rk / real(n,kind=rk) - !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -360,12 +352,10 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do - !$OMP END DO else normFactor = 0.5_rk / real(n-1,kind=rk) - !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -380,10 +370,8 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do - !$OMP END DO end if ! lobattoPoints - !$OMP END PARALLEL end subroutine ply_pntToLeg ! ************************************************************************ ! diff --git a/source/ply_LegPolyProjection_module.f90 b/source/ply_LegPolyProjection_module.f90 index 815bf4a..6a8341a 100644 --- a/source/ply_LegPolyProjection_module.f90 +++ b/source/ply_LegPolyProjection_module.f90 @@ -159,8 +159,6 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & integer :: nChildDofs, oneDof ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nDofs, nComponents, nChilds, nChildDofs, workDat) - if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' & & // 'only implemented for Q-Legendre-Polynomials' ) @@ -170,7 +168,6 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) - !$OMP DO varLoop: do iVar=1,nVars nDofs = varDofs(iVar) nComponents = varcomps(iVar) @@ -244,9 +241,6 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & deallocate(projection_oneDof%projCoeff) end do varLoop - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_QPolyProjection ! ************************************************************************ ! @@ -289,8 +283,6 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & real(kind=rk) :: dimexp ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, xShift, yShift, zShift) - select case(dofType) case(ply_QLegendrePoly_prp) allocate(projection%projCoeff(nDofs, nChildDofs, nChilds)) @@ -303,7 +295,6 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & projCoeffOneDim = ply_QLegOneDimCoeff( nint(nDofs**dimexp), & & nint(nChildDofs**dimexp) ) - !$OMP DO ! Loop over the children of this element childLoop: do iChild = 1, nChilds @@ -356,15 +347,14 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & end do childDofLoop end do parentDofLoop end do childLoop - !$OMP END DO case default call tem_abort( 'ERROR in ply_initProjCoeff: initialization of ' & & // 'projection coefficients for subsampling is implemented only ' & & // 'for Q-Legendre polynomials' ) end select - !$OMP END PARALLEL deallocate(projCoeffOneDim) + end subroutine ply_initQLegProjCoeff ! ************************************************************************ ! @@ -609,8 +599,6 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iElem, lowElemIndex, upElemIndex, iChild, lowChildIndex, upChildIndex) - nChilds = 2**ndims nElems = tree%nElems nElemsToRefine = count(new_refine_tree) @@ -627,7 +615,6 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then - !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -719,11 +706,9 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & end if end do elementLoop - !$OMP END DO else - !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then allocate(childData(nChildDofs*nChilds*nComponents)) @@ -774,11 +759,8 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop - !$OMP END DO end if - !$OMP END PARALLEL - end subroutine ply_subsampleData ! ************************************************************************ ! @@ -816,11 +798,9 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & integer :: childDof_pos, parentDof_pos real(kind=rk) :: projCoeff ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iChild, iParentDof, iChildDof, iComp, projCoeff, childDof_pos, parentDof_pos) childData(:) = 0.0_rk - !$OMP DO childLoop: do iChild = 1, nChilds parentDofLoop: do iParentDof = 1, nParentDofs childDofLoop: do iChildDof = 1, nChildDofs @@ -840,9 +820,6 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & end do childDofLoop end do parentDofLoop end do childLoop - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_projDataToChild ! ************************************************************************ ! diff --git a/source/ply_fxt_module.f90 b/source/ply_fxt_module.f90 index 3272c71..6173bd0 100644 --- a/source/ply_fxt_module.f90 +++ b/source/ply_fxt_module.f90 @@ -232,12 +232,10 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim - !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -245,20 +243,15 @@ subroutine ply_fxt_m2n_2D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = modal_data(lb:ub), & & nodal_data = nodal_data(lb:ub) ) end do - !$OMP END DO - !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = nodal_data(lb:msq:oversamp_degree+1), & & nodal_data = modal_data(lb:msq:oversamp_degree+1) ) end do - !$OMP END DO nodal_data = modal_data - !$OMP END PARALLEL - end subroutine ply_fxt_m2n_2D ! ************************************************************************ ! @@ -279,8 +272,6 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) real(kind=rk), pointer :: tmp_in(:), tmp_out(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, tmp_in, tmp_out, iColumn) - nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 @@ -289,7 +280,6 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) tmp_in = -42 tmp_out = -42 - !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -300,9 +290,7 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & nodal_data = tmp_out ) nodal_data(lb:ub) = tmp_out end do - !$OMP END DO - !$OMP DO ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1 ) / nModesPerDim ) * msq & @@ -313,20 +301,15 @@ subroutine ply_fxt_m2n_3D( fxt, modal_data, nodal_data, oversamp_degree ) & modal_data = nodal_data(lb:ub:nModesPerDim), & & nodal_data = modal_data(lb:ub:nModesPerDim) ) end do - !$OMP END DO ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs - !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_m2n( flpt = fxt%flpt, & & modal_data = modal_data(lb:ub:msq), & & nodal_data = nodal_data(lb:ub:msq) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_fxt_m2n_3D ! ************************************************************************ ! @@ -371,12 +354,10 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) ! -------------------------------------------------------------------- ! integer :: ub, lb, iLine, iColumn, nModesPerDim, msq ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, lb, ub, iColumn) nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim - !$OMP DO do iLine = 1, oversamp_degree+1 lb = (iLine-1) * (oversamp_degree+1) + 1 ub = lb + oversamp_degree @@ -384,21 +365,15 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do - !$OMP END DO - - !$OMP DO do iColumn = 1, oversamp_degree+1 lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = modal_data(lb:msq:oversamp_degree+1), & & modal_data = nodal_data(lb:msq:oversamp_degree+1) ) end do - !$OMP END DO modal_data = nodal_data - !$OMP END PARALLEL - end subroutine ply_fxt_n2m_2D ! ************************************************************************ ! @@ -417,13 +392,10 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) integer :: ub, lb, iLine, iColumn, nModesPerDim, msq, ntotalDofs ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iLine, iColumn, ub, lb) - nModesPerDim = (oversamp_degree+1) msq = nModesPerDim*nModesPerDim nTotalDofs = (oversamp_degree+1)**3 - !$OMP DO ! The loop for msq stripes for independent x Dir evaluations do iLine = 1, msq lb = (iLine-1) * (oversamp_degree+1) + 1 @@ -432,9 +404,7 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = nodal_data(lb:ub), & & modal_data = modal_data(lb:ub) ) end do - !$OMP END DO - !$OMP DO ! The loop for msq stripes for independent y Dir evaluations do iColumn = 1, msq lb = int( (iColumn-1) / nModesPerDim ) * msq & @@ -445,20 +415,15 @@ subroutine ply_fxt_n2m_3D( fxt, nodal_data, modal_data, oversamp_degree ) & nodal_data = modal_data(lb:ub:nModesPerDim), & & modal_data = nodal_data(lb:ub:nModesPerDim) ) end do - !$OMP END DO ! The loop for msq stripes for independent z Dir evaluations ub = nTotalDofs - !$OMP DO do iColumn = 1, msq lb = iColumn call fxtf_flptld_n2m( flpt = fxt%flpt, & & nodal_data = nodal_data(lb:ub:msq), & & modal_data = modal_data(lb:ub:msq) ) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_fxt_n2m_3D ! ************************************************************************ ! diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index e87e6d6..cbbe120 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -271,10 +271,8 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iStrip, iRow, iCell, iCol) if (nDofs > 1) then - !$OMP DO do iStrip=0,nIndeps-1,vlen ! Calculate the upper bound of the current strip @@ -297,7 +295,6 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end do ! iRow = 1, nRows end do ! iStrip - !$OMP END DO else @@ -305,7 +302,6 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end if - !$OMP END PARALLEL end subroutine ply_l2_projection ! ************************************************************************ ! diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index c64985c..18e4b97 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -67,8 +67,6 @@ contains integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, dofPosPrev, iDeg2, iDeg3, iVar,leg, dofPos, iDeg) - if (present(dirVec)) then DV = dirvec else @@ -81,7 +79,6 @@ contains endif endif - !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg1 = (iDeg-1)/(mpd+1) + 1 !! do IDeg1 = 1, mPd+1 iDeg2 = iDeg - (iDeg1-1)*(mpd+1) !! do IDeg2 = 1, mPd=1 !! iDeg2 = mod(iDeg-1,mpd+1)+1 @@ -140,9 +137,7 @@ contains end do end do end do - !$OMP END DO - !$OMP DO ! Scale the results due to the Jacobians of the mappings do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 @@ -154,7 +149,6 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*leg(iDir) - 1.0_rk) end do - !$OMP END DO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! @@ -174,8 +168,6 @@ contains !! end do !! end do - !$OMP END PARALLEL - end subroutine calcDiff_leg_normal ! ************************************************************************ ! @@ -543,8 +535,6 @@ contains integer :: leg(2), iDeg1, iDeg2, DV(2) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDeg1, iDeg2, leg, dofPos, dofPosPrev, iVar) - if (present(dirVec)) then DV = dirvec else @@ -555,7 +545,6 @@ contains endif endif - !$OMP DO do iDeg1 = 1, mPd+1 iDeg2 = mPd+1 leg = (/iDeg1, iDeg2/) @@ -612,9 +601,7 @@ contains end do end do end do - !$OMP END DO - !$OMP DO ! Scale the results due to the Jacobians of the mappings do iDeg1 = 1, mPd+1 do iDeg2 = 1, mPd+1 @@ -626,9 +613,6 @@ contains & * (2.0_rk*leg(iDir) - 1.0_rk) end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine calcDiff_leg_2d_normal ! ************************************************************************ ! @@ -735,8 +719,6 @@ contains integer :: dofPos, dofPosPrev, dofPos2Prev ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(dofPos, iDegX, dofPosPrev, dofPos2Prev) - ! Build the derivative in x direction dofPos = 1 + maxPolyDegree legCoeffsDiff(dofPos,:) = 0.0_rk @@ -744,7 +726,6 @@ contains dofPosPrev = dofPos dofPos = 1 + (maxPolyDegree-1) legCoeffsDiff(dofPos,:) = legCoeffs(dofPosPrev,:) - !$OMP DO do iDegX = maxPolyDegree-1, 1, -1 dofPos = 1 + (iDegX-1) dofPosPrev = 1 + (iDegX) @@ -752,19 +733,14 @@ contains legCoeffsDiff(dofPos,:) = legCoeffsDiff(dofPos2Prev,:) & & + legCoeffs(dofPosPrev,:) end do - !$OMP END DO end if - !$OMP DO do iDegX = 1, maxPolyDegree+1 dofPos = 1 + (iDegX-1) legCoeffsDiff(dofPos,:) = legCoeffsDiff(dofPos,:) & & * (2.0_rk/elemLength) & & * (2.0_rk*iDegX - 1.0_rk) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine calcDiff_leg_1d ! ************************************************************************ ! diff --git a/source/ply_legser_module.f90 b/source/ply_legser_module.f90 index dc3ad15..0649a2a 100644 --- a/source/ply_legser_module.f90 +++ b/source/ply_legser_module.f90 @@ -41,23 +41,19 @@ subroutine legser(A, B, n) real(kind=rk) :: ak, al, bb, c, d integer :: k, l, ll ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, d, c, l, ll, al, bb, ak) ak = 0.0_rk ! Calculation of the first Legendre coefficient b(1) = 0.5_rk * a(1) - !$OMP DO do k=3,n,2 ak = ak + 2.0_rk b(1) = b(1) - a(k)/(ak*ak - 1.0_rk) end do - !$OMP END DO c = 2.0_rk / 3.0_rk al = 0.0_rk ! Start main loop (remaining Legendre coefficients) - !$OMP DO do l=2,n ! Calculation of the Lth coefficient ll = l+2 @@ -77,9 +73,6 @@ subroutine legser(A, B, n) & / ( (al+al+3.0_rk)*(al+al+2.0_rk) ) b(l) = (al+0.5_rk)*bb end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine legser ! ************************************************************************ ! diff --git a/source/ply_modg_basis_module.fpp b/source/ply_modg_basis_module.fpp index 0ecdccb..5812570 100644 --- a/source/ply_modg_basis_module.fpp +++ b/source/ply_modg_basis_module.fpp @@ -192,8 +192,6 @@ contains integer :: iFunc, jFunc ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(jFunc, iFunc) - allocate(GaussPoints(nPoints)) allocate(GaussPoints_left(nPoints)) allocate(GaussPoints_right(nPoints)) @@ -233,7 +231,6 @@ contains allocate( integral%anz_anzShift(1:nFunc, 1:nFunc, 2)) - !$OMP DO !loop over anzatz functions do jFunc = 1, nFunc do iFunc = 1, nFunc @@ -255,9 +252,6 @@ contains end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine init_modg_covolumeCoeffs ! ************************************************************************ ! @@ -457,7 +451,6 @@ contains real(kind=rk) :: n_q ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iAns, iAnsX, iAnsY, iAnsZ, n_q, ansPos) ! allocate the output array select case(basisType) @@ -484,7 +477,6 @@ contains polyValX(2,:) = coords(:,1) polyValY(2,:) = coords(:,2) polyValZ(2,:) = coords(:,3) - !$OMP DO ! ... higher order polynomials are build recursively do iAns = 3, maxPolyDegree+1 n_q = 1.0_rk / real(iAns-1,kind=rk) @@ -510,13 +502,11 @@ contains & * polyValZ(iAns-2,:) ) & & *n_q end do - !$OMP END DO end if ! Now, build the complete point value. select case(basisType) case(Q_space) - !$OMP DO do iAnsX = 1, maxPolyDegree+1 do iAnsY = 1, maxPolyDegree+1 do iAnsZ = 1, maxPolyDegree+1 @@ -528,24 +518,19 @@ contains end do end do end do - !$OMP END DO case(P_space) iAnsX = 1 iAnsY = 1 iAnsZ = 1 ?? copy :: getDofsPTens(maxPolyDegree, ansPosMax) - !$OMP DO do ansPos = 1, ansPosMax polyVal(ansPos, :) = polyValX(iAnsX,:) & & * polyValY(iAnsY,:) & & * polyValZ(iAnsZ,:) ?? copy :: nextModgCoeffPTens(iAnsX, iAnsY, iAnsZ) end do - !$OMP END DO end select - !$OMP END PARALLEL - end subroutine evalLegendreTensPoly ! ************************************************************************ ! diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index 293f2e2..5a1effe 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -525,8 +525,6 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) - select case(trim(me%kind)) case ('l2p') @@ -536,29 +534,23 @@ contains ! additional summation select case(dim) case (1) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO case (3) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%leg2node, & & projected = nodal_data(:,iVar), & & original = modal_data(:,iVar) ) end do - !$OMP END DO end select case ('fpt') @@ -575,50 +567,40 @@ contains & legCoeffs = modal_data, & & nVars = nVars ) case (1) - !$OMP DO do iVar = 1,nVars call ply_LegToPnt( fpt = me%body_1d%fpt, & & pntVal = nodal_data(:,iVar), & & legCoeffs = modal_data(:,iVar), & & nIndeps = 1 ) end do - !$OMP END DO end select case ('fxt') select case (dim) case (3) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_3D( fxt = me%body_3d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_2D( fxt = me%body_2d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (1) - !$OMP DO do iVar = 1,nVars call ply_fxt_m2n_1D( fxt = me%body_1d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar) ) end do - !$OMP END DO end select end select - !$OMP END PARALLEL - end subroutine ply_poly_project_m2n_multivar !****************************************************************************! @@ -637,36 +619,28 @@ contains !--------------------------------------------------------------------------! integer :: iVar !--------------------------------------------------------------------------! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar) - select case(trim(me%kind)) case ('l2p') select case (dim) case (1) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_1D( trafo = me%body_1D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_2D( trafo = me%body_2D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO case (3) - !$OMP DO do iVar = 1, nVars call ply_l2p_trafo_3D( trafo = me%body_3D%l2p%node2leg, & & projected = modal_data(:,iVar), & & original = nodal_data(:,iVar) ) end do - !$OMP END DO end select case ('fpt') @@ -683,20 +657,17 @@ contains & pntVal = nodal_data, & & legCoeffs = modal_data ) case (1) - !$OMP DO do iVar = 1,nVars call ply_pntToLeg( fpt = me%body_1d%fpt, & & nIndeps = 1, & & pntVal = nodal_data(:,iVar), & & legCoeffs = modal_data(:,iVar) ) end do - !$OMP END DO end select case ('fxt') select case (dim) case (3) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_3D( & & fxt = me%body_3d%fxt, & @@ -704,10 +675,8 @@ contains & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (2) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_2D( & & fxt = me%body_2d%fxt, & @@ -715,25 +684,20 @@ contains & modal_data = modal_data(:,iVar), & & oversamp_degree = me%oversamp_degree ) end do - !$OMP END DO case (1) - !$OMP DO do iVar = 1, nVars call ply_fxt_n2m_1D( & & fxt = me%body_1d%fxt, & & nodal_data = nodal_data(:,iVar), & & modal_data = modal_data(:,iVar) ) end do - !$OMP END DO end select case default write(logUnit(1),*) 'ERROR in projection nodal to modal' end select - !$OMP END PARALLEL - end subroutine ply_poly_project_n2m_multivar !***************************************************************************! diff --git a/source/ply_poly_transformation_module.f90 b/source/ply_poly_transformation_module.f90 index 46e946d..4817581 100644 --- a/source/ply_poly_transformation_module.f90 +++ b/source/ply_poly_transformation_module.f90 @@ -120,13 +120,11 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & integer :: nVars, nDofs, nComponents, nChildDofs integer :: iVar ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, nComponents, nDofs, workData, nChildDofs) nVars = size(varDofs) allocate(newVarDofs(nVars)) allocate(newMeshData(nVars)) - !$OMP DO varLoop: do iVar = 1, nVars nComponents = varComps(iVar) nDofs = vardofs(iVar) @@ -166,10 +164,6 @@ subroutine ply_Poly_Transformation( subsamp, dofReduction, mesh, meshData, & deallocate(workData) end do varLoop - !$OMP END DO - - !$OMP END PARALLEL - end subroutine ply_Poly_Transformation ! ************************************************************************ ! @@ -218,8 +212,6 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: transform_matrix(:,:) real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iParentElem, iChild, iElem) - nChilds = 2**nDims @@ -246,7 +238,6 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & if (subsamp%sampling_lvl > 1) then - !$OMP DO elementLoop: do iParentElem=1,nParentElems ! Check if the parent cell was already refined... if (refine_tree(iParentElem)) then @@ -333,9 +324,7 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elementLoop - !$OMP END DO else - !$OMP DO elemLoop: do iElem=1,nElems if (new_refine_tree(iElem)) then ! Create lower and upper indices for all data of iElem in meshData. @@ -386,13 +375,10 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & deallocate(childData) end if end do elemLoop - !$OMP END DO end if deallocate(transform_matrix) - !$OMP END PARALLEL - end subroutine ply_subsampleData ! ************************************************************************ ! @@ -782,8 +768,6 @@ subroutine ply_transform_matrix(max_modes, v) integer :: m, orig real(kind=rk) :: shifting, scaling ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(orig, m) - ! transformation matrix looks like this: ! [1.0 -- -- shift=0.5 ] @@ -806,7 +790,6 @@ subroutine ply_transform_matrix(max_modes, v) v(2,2) = scaling if (max_modes > 2) then - !$OMP DO do orig = 3,max_modes v(1,orig) = ply_beta(orig-1) * v(1,orig-2) & & + ply_alpha(orig-1) * shifting * v(1,orig-1) & @@ -824,14 +807,12 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do - !$OMP END DO end if ! Due to the symmetry of the problem (the left subinterval has just ! the shifting with a changed sign), we can fill the other half of ! the matrix by copying the already computed values accordingly with ! a change in sign, as needed (alternatingly). - !$OMP DO do m = 1 , max_modes do orig = 1, m-1 if (mod((m+orig),2) /= 0) then @@ -841,11 +822,8 @@ subroutine ply_transform_matrix(max_modes, v) end if end do end do - !$OMP END DO end if - !$OMP END PARALLEL - end subroutine ply_transform_matrix ! ************************************************************************ ! diff --git a/source/ply_sampling_module.fpp b/source/ply_sampling_module.fpp index 0165589..c75cb77 100644 --- a/source/ply_sampling_module.fpp +++ b/source/ply_sampling_module.fpp @@ -680,21 +680,16 @@ contains integer :: iElem integer :: nComps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem) nComps = fun%nComponents datlen = tree%nElems * nComps call c_f_pointer(fun%method_data, p) - !$OMP DO do iElem=1,n res(1+(iElem-1)*nComps:iElem*nComps) & & = p%dat(1+(elempos(iElem)-1)*nComps:elempos(iElem)*nComps) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine get_sampled_element ! ************************************************************************ ! diff --git a/source/ply_sampling_varsys_module.f90 b/source/ply_sampling_varsys_module.f90 index d225a41..1eb9bca 100644 --- a/source/ply_sampling_varsys_module.f90 +++ b/source/ply_sampling_varsys_module.f90 @@ -108,7 +108,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & integer, allocatable :: elempos(:) real(kind=rk), allocatable :: elemdat(:) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iVar, iComponent, iElem, iTotComp) nVars = trackInst%varmap%varPos%nVals @@ -146,7 +145,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & variables: do iVar=1,nVars varpos = trackInst%varmap%varPos%val(iVar) nComponents = varsys%method%val(varpos)%nComponents - !$OMP DO do iComponent=1,nComponents iTotComp = iScalar+iComponent-1 call ply_sampling_var_allocate( var = var(iTotComp), & @@ -154,7 +152,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & & datalen = total_dofs ) var(iTotComp)%first(1) = 1 end do - !$OMP END DO ! Varying polynomial degree for elements is possible. ! Need to copy data element by element. @@ -166,7 +163,6 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & upper_bound = lower_bound-1 + nDofs allocate(elemDat(nComponents*nDofs)) - !$OMP DO call varSys%method%val(varpos)%get_element( & & varSys = varSys, & & elempos = elempos(iElem:iElem), & @@ -175,6 +171,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & & nElems = 1, & & nDofs = nDofs, & & res = elemdat ) + do iComponent=1,nComponents iTotComp = iScalar+iComponent-1 var(iTotComp)%dat(lower_bound:upper_bound) & @@ -182,21 +179,16 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & var(iTotComp)%first(iElem+1) = upper_bound+1 var(iTotComp)%degree(iElem) = lvl_degree(iLevel) end do - !$OMP END DO lower_bound = upper_bound + 1 deallocate(elemDat) - !$OMP DO end do - !$OMP END DO iScalar = iScalar + nComponents end do variables - !$OMP END PARALLEL - end subroutine ply_sampling_varsys_for_track ! ------------------------------------------------------------------------ ! ! ------------------------------------------------------------------------ ! @@ -317,7 +309,6 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) integer :: nElems integer :: ndofs ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iElem, ndofs, absmean, variation) if (allocated(var%deviates)) deallocate(var%deviates) var%nDeviating = 0 @@ -325,7 +316,6 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) if (allocated(var%first)) then nElems = size(var%first)-1 allocate(var%deviates(nElems)) - !$OMP DO do iElem=1,nElems ndofs = var%first(iElem+1) - var%first(iElem) - 1 absmean = max( abs(var%dat(var%first(iElem))), min_mean ) @@ -333,11 +323,8 @@ subroutine ply_sampling_var_compute_elemdev(var, threshold, min_mean) var%deviates(iElem) = (variation > threshold*absmean) if (var%deviates(iElem)) var%nDeviating = var%nDeviating + 1 end do - !$OMP END DO end if - !$OMP END PARALLEL - end subroutine ply_sampling_var_compute_elemdev ! ------------------------------------------------------------------------ ! ! ------------------------------------------------------------------------ ! diff --git a/source/ply_space_integration_module.f90 b/source/ply_space_integration_module.f90 index fb5eede..b076296 100644 --- a/source/ply_space_integration_module.f90 +++ b/source/ply_space_integration_module.f90 @@ -144,9 +144,9 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(k, j, i, pointNumber) numQuadPoints = num_intp_per_direction**3 + allocate(points(numQuadPoints,3)) allocate(weights(numQuadPoints)) allocate(gaussp1D(num_intp_per_direction)) @@ -159,7 +159,6 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction @@ -175,9 +174,6 @@ subroutine ply_create_volume_gauss_points_cube( num_intp_per_direction, & end do end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube ! *********************************************************************** ! @@ -201,9 +197,9 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: numQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i, pointNumber) numQuadPoints = num_intp_per_direction**2 + allocate(points(numQuadPoints,3)) allocate(weights(numQuadPoints)) allocate(gaussp1D(num_intp_per_direction)) @@ -216,7 +212,6 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -229,9 +224,6 @@ subroutine ply_create_volume_gauss_points_cube_2d( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube_2d ! *********************************************************************** ! @@ -254,7 +246,6 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) allocate(points(num_intp_per_direction,3)) allocate(weights(num_intp_per_direction)) @@ -268,7 +259,6 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -279,9 +269,6 @@ subroutine ply_create_volume_gauss_points_cube_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_create_volume_gauss_points_cube_1d ! *********************************************************************** ! @@ -312,7 +299,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nquadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j, i) nQuadPoints = num_intp_per_direction**2 @@ -331,7 +317,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -343,10 +328,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -358,10 +341,8 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case(3) ! face in z direction, z coord is fixed - !$OMP DO do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional @@ -373,7 +354,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & pointNumber = pointNumber + 1 end do end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube:' & @@ -381,8 +361,6 @@ subroutine ply_create_surface_gauss_points_cube( num_intp_per_direction, & end select - !$OMP END PARALLEL - end subroutine ply_create_surface_gauss_points_cube ! *********************************************************************** ! @@ -412,7 +390,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & real(kind=rk), allocatable :: weights1D(:) integer :: nQuadPoints ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i) ! The number of quadrature points on the boundary of a 2d volume is the ! number of quad points in one direction @@ -433,7 +410,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & select case(dir) case(1) ! face in x direction, x coord is fixed - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points for 2d case to get the three dimensional values. @@ -443,10 +419,8 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO case(2) ! face in y direction, y coord is fixed - !$OMP DO do i = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points in 2d case to get the three dimensional values. @@ -456,7 +430,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & weights(PointNumber) = weights1D(i) pointNumber = pointNumber + 1 end do - !$OMP END DO case default call tem_abort( 'ERROR in create_surface_gauss_points_cube_2d:' & @@ -464,8 +437,6 @@ subroutine ply_create_surface_gauss_points_cube_2d(num_intp_per_direction, & end select - !$OMP END PARALLEL - end subroutine ply_create_surface_gauss_points_cube_2d ! *********************************************************************** ! @@ -532,7 +503,6 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & real(kind=rk), allocatable :: gaussp1D(:) real(kind=rk), allocatable :: weights1D(:) ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(j) numQuadPoints = num_intp_per_direction @@ -548,7 +518,6 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & & nIntP = num_intp_per_direction ) pointNumber = 1 - !$OMP DO do j = 1, num_intp_per_direction !> here we build all possible combinations of the one-dimensional !! quadrature points to get the three dimensional values. @@ -556,9 +525,6 @@ subroutine ply_create_gauss_points_1d( num_intp_per_direction, & weights(PointNumber) = weights1D(j) pointNumber = pointNumber + 1 end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_create_gauss_points_1d ! *********************************************************************** ! @@ -587,14 +553,12 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) real(kind=rk) :: EPS integer :: m, i, j ! ---------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i, p1, p2, p3, pp, z1, z) EPS= 1.0 / (10.0**(PRECISION(1.0_rk)-2) ) m = (nIntP+1)/2 xm = 0.5*(x2+x1) xl = 0.5*(x2-x1) - !$OMP DO do i = 1, m z = cos(PI*((i-1)+0.75_rk)/(nIntP+0.5_rk)) @@ -619,9 +583,6 @@ subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) w(nIntp-i+1) = w(i) end do - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_gaussLegPoints ! *********************************************************************** ! diff --git a/source/ply_split_element_module.f90 b/source/ply_split_element_module.f90 index 8670c5f..e3e325d 100644 --- a/source/ply_split_element_module.f90 +++ b/source/ply_split_element_module.f90 @@ -252,7 +252,6 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & integer :: nParents integer :: parentpos, childpos ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(iDir, parentMode, iParent, childMode, indep) nParents = size(parent_data,2) @@ -266,18 +265,15 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & ! The number of independent modes (in normal directions) is given ! by the product of the length in all directions, except the last one. nIndeps = 1 - !$OMP DO do iDir=1,nDims-1 nIndeps = nIndeps*inLen(iDir) end do - !$OMP END DO if (ignore) then maxcol = min(outLen(1), inLen(nDims)) else maxcol = inLen(nDims) end if - !$OMP DO oldmodes: do parentMode=1,maxcol ! Maximal number modes to compute, as this is a triangular matrix ! it is limited by the diagonal (parentMode). However, it may be @@ -308,9 +304,6 @@ subroutine ply_split_element_singleD( nDims, inLen, outLen, parent_data, & end do elemloop end do oldmodes - !$OMP END DO - - !$OMP END PARALLEL end subroutine ply_split_element_singleD ! ======================================================================== ! diff --git a/source/ply_transfer_module.fpp b/source/ply_transfer_module.fpp index 980eaed..123a6fb 100644 --- a/source/ply_transfer_module.fpp +++ b/source/ply_transfer_module.fpp @@ -144,14 +144,12 @@ contains ispace_oq: if (inspace == Q_Space) then - !$OMP DO ! Both, output and input are Q Polynomials do out_Y=0,minord-1 out_off = out_Y*(outdegree+1) in_off = out_Y*(indegree+1) outdat(out_off+1:out_off+minord) = indat(in_off+1:in_off+minord) end do - !$OMP END DO else ispace_oq From 637b01c516ec1f97e57345ff43d794defa874794 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Thu, 13 Feb 2020 13:44:36 +0100 Subject: [PATCH 07/21] Introduced OpenMP parallelisation in m2n projection for fpt and l2p. --HG-- branch : feature/openmp --- source/fpt/ply_chebPoint_module.f90 | 1 + source/fpt/ply_legFpt_module.f90 | 6 ++++++ source/ply_l2p_module.f90 | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90 index 4261984..0b8b8b1 100644 --- a/source/fpt/ply_chebPoint_module.f90 +++ b/source/fpt/ply_chebPoint_module.f90 @@ -119,6 +119,7 @@ subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + pointNumber = 1 do k = 1, num_intp_per_direction do j = 1, num_intp_per_direction do i = 1, num_intp_per_direction diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 55d238d..2ab1cd1 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -276,10 +276,12 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_points) then + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -294,9 +296,11 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO else + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -310,8 +314,10 @@ subroutine ply_legToPnt( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO end if ! lobattoPoints + !$OMP END PARALLEL end subroutine ply_legToPnt ! ************************************************************************ ! diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index cbbe120..e5075f2 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -271,8 +271,11 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) if (nDofs > 1) then + !$OMP DO do iStrip=0,nIndeps-1,vlen ! Calculate the upper bound of the current strip @@ -295,12 +298,16 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end do ! iRow = 1, nRows end do ! iStrip + !$OMP END DO else + !$OMP SINGLE projected = matrix(nDofs,1) * original + !$OMP END SINGLE end if + !$OMP END PARALLEL end subroutine ply_l2_projection ! ************************************************************************ ! From 2440e691654090178a68e4901d430741addbd79a Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 4 Mar 2020 13:17:27 +0100 Subject: [PATCH 08/21] Introduced OpenMP parallelisation in n2m projection for fpt and for conversion to and from oversampled representation. --HG-- branch : feature/openmp --- source/fpt/ply_legFpt_module.f90 | 6 ++++++ source/ply_oversample_module.fpp | 28 ++++++++++++++++++++-------- source/ply_poly_project_module.fpp | 5 ++--- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 2ab1cd1..65b0c76 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -339,11 +339,13 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n if (.not. fpt%use_lobatto_Points) then normFactor = 1.0_rk / real(n,kind=rk) + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -358,10 +360,12 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO else normFactor = 0.5_rk / real(n-1,kind=rk) + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -376,8 +380,10 @@ subroutine ply_pntToLeg( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO end if ! lobattoPoints + !$OMP END PARALLEL end subroutine ply_pntToLeg ! ************************************************************************ ! diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index 191a6d2..1e20794 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -169,6 +169,7 @@ contains integer :: maxorders integer :: ord_lim ! -------------------------------------------------------------------- ! + ! Information for the oversampling loop oversamp_degree = poly_proj%oversamp_degree mpd1 = poly_proj%min_degree + 1 @@ -184,6 +185,7 @@ contains varQ: do iVar=1,nScalars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd) do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -191,6 +193,7 @@ contains iOrd = iDegX+iDegY+iDegZ-2 ordersum(iOrd) = ordersum(iOrd) + abs(state(dof,iVar)) end do + !$OMP END PARALLEL DO varsum = 0.0_rk do iOrd=2,ord_lim varsum = varsum + ordersum(iOrd) @@ -202,6 +205,7 @@ contains end if end do varQ do iVar=1,nScalars + !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd, dofOverSamp) do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -214,22 +218,25 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END PARALLEL DO end do else posQ if (oversamp_degree == poly_proj%min_degree) then modalCoeffs = state else modalCoeffs = 0.0_rk - do dof = 1, mpd1_cube - iDegZ = (dof-1)/mpd1_square + 1 - iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 - iDegX = mod(dof-1,mpd1)+1 - dofOverSamp = iDegX + ( iDegY-1 & - & + (iDegZ-1)*(oversamp_degree+1) & - & ) * (oversamp_degree+1) - do iVar=1,nScalars + do iVar=1,nScalars + !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, dofOverSamp) + do dof = 1, mpd1_cube + iDegZ = (dof-1)/mpd1_square + 1 + iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 + iDegX = mod(dof-1,mpd1)+1 + dofOverSamp = iDegX + ( iDegY-1 & + & + (iDegZ-1)*(oversamp_degree+1) & + & ) * (oversamp_degree+1) modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end do + !$OMP END PARALLEL DO end do end if end if posQ @@ -327,10 +334,13 @@ contains mpd1_cube = mpd1**3 nScalars = size(modalCoeffs,2) + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iVar,dof,iDegZ,iDegY,iDegX,dofOverSamp) if (poly_proj%basisType == Q_Space) then if (oversamp_degree == poly_proj%min_degree) then state = modalCoeffs else + !$OMP DO do iVar=1,nScalars do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 @@ -342,6 +352,7 @@ contains state(dof,iVar) = modalCoeffs(dofOverSamp,iVar) end do end do + !$OMP END DO end if else !P_Space @@ -358,6 +369,7 @@ contains ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do end if + !$OMP END PARALLEL end subroutine ply_convertFromoversample_3d ! ************************************************************************ ! diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index 5a1effe..882c897 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -554,7 +554,6 @@ contains end select case ('fpt') - select case (dim) case (3) call ply_LegToPnt_3D( fpt = me%body_3d%fpt, & @@ -582,14 +581,14 @@ contains call ply_fxt_m2n_3D( fxt = me%body_3d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & - & oversamp_degree = me%oversamp_degree ) + & oversamp_degree = me%oversamp_degree ) end do case (2) do iVar = 1,nVars call ply_fxt_m2n_2D( fxt = me%body_2d%fxt, & & modal_data = modal_data(:,iVar), & & nodal_data = nodal_data(:,iVar), & - & oversamp_degree = me%oversamp_degree ) + & oversamp_degree = me%oversamp_degree ) end do case (1) From e9c7782d4b2e42310f238e7513e2f33f29ad7e39 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 4 Mar 2020 14:18:09 +0100 Subject: [PATCH 09/21] Updated copyright notice --HG-- branch : feature/openmp --- source/fpt/ply_legFpt_module.f90 | 1 + source/ply_oversample_module.fpp | 1 + 2 files changed, 2 insertions(+) diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 65b0c76..f97e5a2 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -3,6 +3,7 @@ ! Copyright (c) 2013-2014, 2017 Peter Vitt ! Copyright (c) 2013-2014 Verena Krupp ! Copyright (c) 2016 Langhammer Kay +! Copyright (c) 2020 Daniel Fleischer ! ! Parts of this file were written by Jens Zudrop and Harald Klimach ! for German Research School for Simulation Sciences GmbH. diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index 1e20794..267b851 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -4,6 +4,7 @@ ! Copyright (c) 2014, 2017-2019 Harald Klimach ! Copyright (c) 2014 Verena Krupp ! Copyright (c) 2016 Tobias Girresser +! Copyright (c) 2020 Daniel Fleischer ! ! Parts of this file were written by Jens Zudrop, Nikhil Anand, Harald Klimach, ! Verena Krupp, Peter Vitt, and Tobias Girresser for University of Siegen. From 02293a0562ec826ef2c6a9c32e699f78c55557c9 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Thu, 5 Mar 2020 14:27:48 +0100 Subject: [PATCH 10/21] Introduced OpenMP to derivative computation in ply_leg_diff_module + formatting --HG-- branch : feature/openmp --- source/fpt/ply_legFpt_module.f90 | 2 +- source/ply_leg_diff_module.fpp | 33 ++++++++++++++++++++++++++++++++ source/ply_oversample_module.fpp | 4 +++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 567d904..0120037 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -500,7 +500,7 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n,iDof, cheb) + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n normFactor = 1.0_rk / real(n,kind=rk) diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index 590824f..c4e07b9 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -329,12 +329,16 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) + varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg1 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg2 = iDeg - (iDeg3-1)*(mpd+1) @@ -342,7 +346,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1+1, iDeg2, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg1 = mPd-1, 1, -1 !NEC$ ivdep @@ -356,8 +362,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -367,9 +375,12 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg1 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL + end subroutine calcDiff_leg_x_vec ! ************************************************************************ ! @@ -397,12 +408,16 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) + varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg2 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg3-1)*(mpd+1) @@ -410,7 +425,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2+1, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg2 = mPd-1, 1, -1 !NEC$ ivdep @@ -424,8 +441,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -435,9 +454,12 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg2 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL + end subroutine calcDiff_leg_y_vec ! ************************************************************************ ! @@ -465,12 +487,16 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) + varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg3 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg2 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg2-1)*(mpd+1) @@ -478,7 +504,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2, iDeg3+1, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg3 = mPd-1, 1, -1 !NEC$ ivdep @@ -492,8 +520,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 ideg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -501,9 +531,12 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg3 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL + end subroutine calcDiff_leg_z_vec ! ************************************************************************ ! diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index d742002..402aa9b 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -340,7 +340,7 @@ contains nScalars = size(modalCoeffs,2) !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iVar,dof,iDegZ,iDegY,iDegX,dofOverSamp) + !$OMP PRIVATE(iVar, dof, idof, iDegZ, iDegY, iDegX, dofOverSamp) if (poly_proj%basisType == Q_Space) then if (oversamp_degree == poly_proj%min_degree) then state = modalCoeffs @@ -365,6 +365,7 @@ contains iDegX = 1 iDegY = 1 iDegZ = 1 + !$OMP DO do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -373,6 +374,7 @@ contains state(dof,:) = modalCoeffs(dofOverSamp,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END DO end if !$OMP END PARALLEL From 3ccea4f7d3c34e8e0886e557ce88078b6e472622 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Tue, 10 Mar 2020 12:54:09 +0100 Subject: [PATCH 11/21] Moved the OpenMP statements from the vectorization- to the standard-subroutines in ply_leg_diff_module --HG-- branch : feature/openmp --- source/ply_leg_diff_module.fpp | 44 ++++++++-------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index c4e07b9..72a1d6d 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -67,6 +67,10 @@ contains integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iVar,dofPos,dofPosPrev,dofPos2Prev,iDeg,iDeg1,iDeg2,iDeg3), & + !$OMP PRIVATE(leg,DV) + if (present(dirVec)) then DV = dirvec else @@ -79,6 +83,7 @@ contains endif endif + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg1 = (iDeg-1)/(mpd+1) + 1 !! do IDeg1 = 1, mPd+1 iDeg2 = iDeg - (iDeg1-1)*(mpd+1) !! do IDeg2 = 1, mPd=1 !! iDeg2 = mod(iDeg-1,mpd+1)+1 @@ -137,8 +142,10 @@ contains end do end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -149,6 +156,9 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*leg(iDir) - 1.0_rk) end do + !$OMP END DO + + !$OMP END PARALLEL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! @@ -284,7 +294,6 @@ contains end do varloop - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -329,16 +338,12 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) - varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg1 = mpd - !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg2 = iDeg - (iDeg3-1)*(mpd+1) @@ -346,9 +351,7 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1+1, iDeg2, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do - !$OMP END DO - !$OMP DO do iDeg1 = mPd-1, 1, -1 !NEC$ ivdep @@ -362,10 +365,8 @@ contains & + legCoeffs(dofposprev, iVar) end do end do - !$OMP END DO ! Scale the results due to the Jacobians of the mappings - !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -375,12 +376,9 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg1 - 1.0_rk) end do - !$OMP END DO end do varloop - !$OMP END PARALLEL - end subroutine calcDiff_leg_x_vec ! ************************************************************************ ! @@ -408,16 +406,12 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) - varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg2 = mpd - !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg3-1)*(mpd+1) @@ -425,9 +419,7 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2+1, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do - !$OMP END DO - !$OMP DO do iDeg2 = mPd-1, 1, -1 !NEC$ ivdep @@ -441,10 +433,8 @@ contains & + legCoeffs(dofposprev, iVar) end do end do - !$OMP END DO ! Scale the results due to the Jacobians of the mappings - !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -454,12 +444,9 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg2 - 1.0_rk) end do - !$OMP END DO end do varloop - !$OMP END PARALLEL - end subroutine calcDiff_leg_y_vec ! ************************************************************************ ! @@ -487,16 +474,12 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iDeg3, iDeg2, iDeg1, iDeg, iVar, dofPos, dofPosPrev) - varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg3 = mpd - !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg2 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg2-1)*(mpd+1) @@ -504,9 +487,7 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2, iDeg3+1, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do - !$OMP END DO - !$OMP DO do iDeg3 = mPd-1, 1, -1 !NEC$ ivdep @@ -520,10 +501,8 @@ contains & + legCoeffs(dofposprev, iVar) end do end do - !$OMP END DO ! Scale the results due to the Jacobians of the mappings - !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 ideg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -531,12 +510,9 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg3 - 1.0_rk) end do - !$OMP END DO end do varloop - !$OMP END PARALLEL - end subroutine calcDiff_leg_z_vec ! ************************************************************************ ! From 3d0f7023fd6a8a6aa29f6a3d7e6cc1590d9f5433 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 11 Mar 2020 11:58:54 +0100 Subject: [PATCH 12/21] Removed some OpenMP statements in ply_oversample_module that caused utests to fail --HG-- branch : feature/openmp --- source/ply_oversample_module.fpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index 402aa9b..871c534 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -226,18 +226,16 @@ contains modalCoeffs = state else modalCoeffs = 0.0_rk - do iVar=1,nScalars - !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, dofOverSamp) - do dof = 1, mpd1_cube - iDegZ = (dof-1)/mpd1_square + 1 - iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 - iDegX = mod(dof-1,mpd1)+1 - dofOverSamp = iDegX + ( iDegY-1 & - & + (iDegZ-1)*(oversamp_degree+1) & - & ) * (oversamp_degree+1) + do dof = 1, mpd1_cube + iDegZ = (dof-1)/mpd1_square + 1 + iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 + iDegX = mod(dof-1,mpd1)+1 + dofOverSamp = iDegX + ( iDegY-1 & + & + (iDegZ-1)*(oversamp_degree+1) & + & ) * (oversamp_degree+1) + do iVar=1,nScalars modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end do - !$OMP END PARALLEL DO end do end if end if posQ @@ -289,7 +287,6 @@ contains iDegX = 1 iDegY = 1 iDegZ = 1 - !$OMP PARALLEL DO PRIVATE(idof, iDegZ, iDegY, iDegX, iOrd) do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -298,7 +295,6 @@ contains modalCoeffs(dofOverSamp,:) = state(dof,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do - !$OMP END PARALLEL DO end if posP end if From b2b129665e14e28791cdc52c450fadd156be254e Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Thu, 9 Jul 2020 20:55:07 +0200 Subject: [PATCH 13/21] Added a performance test for the l2p transformation. --HG-- branch : feature/openmp --- utests/ply_l2p_3D_performance_test.f90 | 131 +++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 utests/ply_l2p_3D_performance_test.f90 diff --git a/utests/ply_l2p_3D_performance_test.f90 b/utests/ply_l2p_3D_performance_test.f90 new file mode 100644 index 0000000..571fa43 --- /dev/null +++ b/utests/ply_l2p_3D_performance_test.f90 @@ -0,0 +1,131 @@ +! Copyright (c) 2012, 2014 Jens Zudrop +! Copyright (c) 2012-2016, 2018-2020 Harald Klimach +! Copyright (c) 2013-2014 Peter Vitt +! Copyright (c) 2013-2014 Verena Krupp +! Copyright (c) 2014 Nikhil Anand +! +! Parts of this file were written by Jens Zudrop and Harald Klimach for +! German Research School for Simulation Sciences GmbH. +! +! Parts of this file were written by Harald Klimach, Peter Vitt, Verena Krupp, +! and Nikhil Anand for University of Siegen. +! +! Permission to use, copy, modify, and distribute this software for any +! purpose with or without fee is hereby granted, provided that the above +! copyright notice and this permission notice appear in all copies. +! +! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR +! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +! **************************************************************************** ! + +!> Unit test to check performance of l2p transformations. +program ply_l2p_3D_performance_test + use mpi, only: mpi_wtime + use env_module, only: rk, fin_env + use tem_logging_module, only: logUnit, tem_logging_init_primary + use tem_general_module, only: tem_general_type, tem_start + use ply_l2p_header_module, only: ply_l2p_header_type, ply_l2p_header_define + use ply_l2p_module, only: ply_l2p_type, ply_init_l2p, ply_l2p_trafo_3D + + !mpi!nprocs = 1 + + implicit none + + integer :: iPower + integer, parameter :: maxpower = 7 + real(kind=rk) :: res, newRes + type(tem_general_type) :: general + + ! Init the Treelm environment, needed to init the log Unit + call tem_start(codeName = 'L2P 3D Performance Test', & + & version = '1', & + & general = general ) + call tem_logging_init_primary( level = 1, & + & rank = general%proc%rank ) + + res = 0.0_rk + do iPower = 1,maxpower + call ply_check_legToPnt_3D(iPower, newRes) + write(*,*) 'deviation:', newres + if (newRes.gt.res) then + res = newRes + end if + end do + + write(*,*) 'Maximal deviation:', res + if (res < 1.e-08) then + write(logUnit(1),*) 'PASSED' + end if + + call fin_env() + +contains + + subroutine ply_check_legToPnt_3D(power,res) + integer, intent(in) :: power + real(kind=rk) :: res + integer :: maxPolyDegree, iVar, nVars + real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) + real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) + type(ply_l2p_header_type) :: header + type(ply_l2p_type) :: trafo + real(kind=rk) :: starttime, stoptime + + ! Define the maximal polynomial degree we want to calculate the + ! bases exchange for. + maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 + nVars = 3 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 + + ! Create the Legendre expansion coefficients + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + do iVar = 1, nVars + legCoeffs(:,iVar) = real(iVar, rk) + end do + + ! Init the L2 Projection + call ply_l2p_header_define( me = header, & + & nodes_kind = 'chebyshev' ) + call ply_init_l2p( degree = maxPolyDegree, & + & l2p = trafo, & + & header = header ) + + ! now transform to the Chebyshev nodes + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + legCoeffsIn = legCoeffs + starttime = MPI_Wtime() + do iVar = 1, nVars + call ply_l2p_trafo_3D( trafo = trafo%leg2node, & + & original = legCoeffsIn(:,iVar), & + & projected = pntVal(:,iVar) ) + end do + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' trafo: ', & + & stoptime - starttime + + ! now transform back to Legendre coefficients + allocate(legVal( (maxPolyDegree+1)**3,nVars )) + starttime = MPI_Wtime() + do iVar = 1, nVars + call ply_l2p_trafo_3D( trafo = trafo%node2leg, & + & projected = legVal(:,iVar), & + & original = pntVal(:,iVar) ) + end do + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', & + & stoptime - starttime + + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) + + end subroutine ply_check_legToPnt_3D + +end program ply_l2p_3D_performance_test From 98903487ff58f74555ad284517b6b6df41ae73c7 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 15 Jul 2020 12:13:18 +0200 Subject: [PATCH 14/21] Removed mval from private OMP variables in ply_l2p_module --HG-- branch : feature/openmp --- source/ply_l2p_module.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index fef8b81..7dc5291 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -246,7 +246,7 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! -------------------------------------------------------------------- ! !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) + !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub) if (nDofs > 1) then !$OMP DO From 176a3d2e5d971d2f28921bc27e78596488aacb8d Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Fri, 17 Jul 2020 21:12:25 +0200 Subject: [PATCH 15/21] Adaption to the change precice module. --HG-- branch : feature/openmp --- source/ply_poly_project_module.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index 6acdd45..5747681 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -121,7 +121,7 @@ module ply_poly_project_module & ply_fxt_n2m_3D, & & ply_fxt_n2m_2D use tem_precice_module, only: precice_available, & - & precice + & precice_handle implicit none From 67c7b1e55d9725ee2c1328ba650c75440c71bf86 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Mon, 20 Jul 2020 13:48:13 +0200 Subject: [PATCH 16/21] Reverted changes of Rev 453 and replaced unnecessary loop with array operation --HG-- branch : feature/openmp --- source/ply_l2p_module.f90 | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index 7dc5291..e9f9f5b 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -245,8 +245,9 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL DEFAULT(SHARED), & - !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub) + !$OMP PARALLEL if (nIndeps > 1024), DEFAULT(SHARED) & + !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) + if (nDofs > 1) then !$OMP DO @@ -257,9 +258,7 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) do iRow = 1, nDofs - do iCell = iStrip+1, iStrip+strip_ub - projected(iCell, iRow) = 0.0_rk - end do + projected(iStrip+1:iStrip+strip_ub, iRow) = 0.0_rk do iCol = 1, nDofs mval = matrix(iCol,iRow) do iCell = iStrip+1, iStrip+strip_ub From 997bd9732203f1ab4e8b843c5d47c558a2551abe Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 2 Sep 2020 11:39:08 +0200 Subject: [PATCH 17/21] Moved the OpenMP statement in ply_l2p_projection to the inner loop for better performance --HG-- branch : feature/openmp --- source/ply_l2p_module.f90 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index e9f9f5b..108d42e 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -245,20 +245,22 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL if (nIndeps > 1024), DEFAULT(SHARED) & + !$OMP PARALLEL IF (nIndeps > 256) & + !$OMP DEFAULT(SHARED) & !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) if (nDofs > 1) then - !$OMP DO do iStrip=0,nIndeps-1,vlen ! Calculate the upper bound of the current strip strip_ub = min(iStrip + vlen, nIndeps) - iStrip + !$OMP DO do iRow = 1, nDofs projected(iStrip+1:iStrip+strip_ub, iRow) = 0.0_rk + do iCol = 1, nDofs mval = matrix(iCol,iRow) do iCell = iStrip+1, iStrip+strip_ub @@ -270,8 +272,8 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end do ! iCol = 1, nCols end do ! iRow = 1, nRows + !$OMP END DO end do ! iStrip - !$OMP END DO else From 0e32b07d6daa18ed66441afb991b1ec679d52211 Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Wed, 2 Sep 2020 12:05:26 +0200 Subject: [PATCH 18/21] Implemented OpenMP in ply_leg_diff_module --HG-- branch : feature/openmp --- source/ply_leg_diff_module.fpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index 2f51f79..b0d1567 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -339,6 +339,9 @@ contains integer :: iDeg, iDeg1, iDeg2, iDeg3 real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3 ) & + !$OMP PRIVATE( derivative ) varloop: do iVar = 1, nVars @@ -346,6 +349,7 @@ contains iDeg1 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg2 = iDeg - (iDeg3-1)*(mpd+1) @@ -353,7 +357,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1+1, iDeg2, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg1 = mPd-1, 1, -1 !NEC$ ivdep @@ -367,8 +373,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -378,8 +386,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg1 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_x_vec ! ************************************************************************ ! @@ -407,6 +417,9 @@ contains integer :: iDeg, iDeg1, iDeg2, iDeg3 real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) & + !$OMP PRIVATE( derivative) varloop: do iVar = 1, nVars @@ -414,6 +427,7 @@ contains iDeg2 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg3-1)*(mpd+1) @@ -421,7 +435,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2+1, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg2 = mPd-1, 1, -1 !NEC$ ivdep @@ -435,8 +451,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -446,8 +464,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg2 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_y_vec ! ************************************************************************ ! @@ -476,12 +496,16 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) & + !$OMP PRIVATE( derivative) varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg3 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg2 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg2-1)*(mpd+1) @@ -489,7 +513,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2, iDeg3+1, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg3 = mPd-1, 1, -1 !NEC$ ivdep @@ -503,8 +529,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 ideg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -512,8 +540,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg3 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_z_vec ! ************************************************************************ ! From fd53c9d1b723836fa7d901c83cf5efeaca8b250b Mon Sep 17 00:00:00 2001 From: Daniel Fleischer Date: Mon, 19 Oct 2020 10:57:15 +0200 Subject: [PATCH 19/21] Rearranged L2-projection and OMP-statements on OpenMP-branch for test purposes. --HG-- branch : feature/openmp --- source/ply_l2p_module.f90 | 78 ++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index 108d42e..409bb7e 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -245,44 +245,96 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! - !$OMP PARALLEL IF (nIndeps > 256) & - !$OMP DEFAULT(SHARED) & - !$OMP PRIVATE(iStrip, iRow, iCell, iCol, strip_ub, mval) +! Original version (for reference) +!! if (nDofs > 1) then +!! +!! do iStrip=1,nIndeps,vlen +!! +!! ! Calculate the upper bound of the current strip +!! strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1) +!! +!! do iRow = 1, nDofs +!! +!! do iCell = iStrip, strip_ub +!! projected(iCell, iRow) = 0.0_rk +!! end do +!! +!! do iCol = 1, nDofs +!! mval = matrix(iCol,iRow) +!! do iCell = iStrip, strip_ub +!! ! on SX-ACE, this can be identified as matrix multiplication +!! ! which results in VERY HIGH performance +!! projected(iCell, iRow) = projected(iCell, iRow) & +!! & + mval * original(iCol, iCell) +!! end do ! iCell +!! end do ! iCol = 1, nCols +!! end do ! iRow = 1, nRows +!! end do ! iStrip +!! +!! else +!! +!! projected = matrix(nDofs,1) * original +!! +!! end if if (nDofs > 1) then - do iStrip=0,nIndeps-1,vlen + !$OMP PARALLEL DO DEFAULT(SHARED), & + !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval) + do iStrip=1,nIndeps,vlen ! Calculate the upper bound of the current strip - strip_ub = min(iStrip + vlen, nIndeps) - iStrip + strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1) - !$OMP DO do iRow = 1, nDofs - projected(iStrip+1:iStrip+strip_ub, iRow) = 0.0_rk + do iCell = iStrip, strip_ub + projected(iCell, iRow) = 0.0_rk + end do do iCol = 1, nDofs mval = matrix(iCol,iRow) - do iCell = iStrip+1, iStrip+strip_ub + do iCell = iStrip, strip_ub ! on SX-ACE, this can be identified as matrix multiplication ! which results in VERY HIGH performance projected(iCell, iRow) = projected(iCell, iRow) & & + mval * original(iCol, iCell) end do ! iCell end do ! iCol = 1, nCols - end do ! iRow = 1, nRows - !$OMP END DO end do ! iStrip + !$OMP END PARALLEL DO else - !$OMP SINGLE projected = matrix(nDofs,1) * original - !$OMP END SINGLE end if - !$OMP END PARALLEL + + +! test-version of the loop (will be removed later) +!! if (nDofs > 1) then +!! +!! projected(:, :) = 0.0_rk +!! +!! !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(SHARED), & +!! !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval) +!! do iRow = 1, nDofs +!! do iCol = 1, nDofs +!! mval = matrix(iCol,iRow) +!! do iStrip=1,nIndeps +!! projected(iStrip, iRow) = projected(iStrip, iRow) & +!! & + mval * original(iCol, iStrip) +!! end do +!! end do +!! end do +!! !$OMP END PARALLEL DO +!! +!! else +!! +!! projected = matrix(nDofs,1) * original +!! +!! end if end subroutine ply_l2_projection ! ************************************************************************ ! From a9e57632bc585a957370b366c75bdbf7e4cc924b Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Fri, 14 Nov 2025 12:20:14 +0100 Subject: [PATCH 20/21] Adapt l2p_3D_performance test to changed tem_start interface --- utests/ply_l2p_3D_performance_test.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/utests/ply_l2p_3D_performance_test.f90 b/utests/ply_l2p_3D_performance_test.f90 index 571fa43..27e714d 100644 --- a/utests/ply_l2p_3D_performance_test.f90 +++ b/utests/ply_l2p_3D_performance_test.f90 @@ -43,7 +43,6 @@ program ply_l2p_3D_performance_test ! Init the Treelm environment, needed to init the log Unit call tem_start(codeName = 'L2P 3D Performance Test', & - & version = '1', & & general = general ) call tem_logging_init_primary( level = 1, & & rank = general%proc%rank ) From 27dc67c25fcf3c0c0eaf2efad59c9bdc7f9702cf Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Fri, 14 Nov 2025 12:39:02 +0100 Subject: [PATCH 21/21] Adapt fpt_3D_performance_test to changed tem_start interface --- utests/with_fftw/ply_fpt_3D_performance_test.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/utests/with_fftw/ply_fpt_3D_performance_test.f90 b/utests/with_fftw/ply_fpt_3D_performance_test.f90 index 1fd6c7e..1555f82 100644 --- a/utests/with_fftw/ply_fpt_3D_performance_test.f90 +++ b/utests/with_fftw/ply_fpt_3D_performance_test.f90 @@ -46,7 +46,6 @@ program ply_fpt_3D_performance_test ! Init the Treelm environment, needed to init the log Unit call tem_start(codeName = 'FPT 3D Performance Test', & - & version = '1', & & general = general ) call tem_logging_init_primary( level = 1, & & rank = general%proc%rank )