diff --git a/README.md b/README.md index 8f897a8..93830a3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ Polynomials Library =================== +*This is the OpenMP implementation branch* + This project is a supporting library for [TreElM](https://bitbucket.org/apesteam/treelm). It does not work on its own, but rather needs to be included in other projects, which also include TreElM. diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90 new file mode 100644 index 0000000..0b8b8b1 --- /dev/null +++ b/source/fpt/ply_chebPoint_module.f90 @@ -0,0 +1,617 @@ +! Copyright (c) 2012-2014 Jens Zudrop +! Copyright (c) 2013-2014, 2017 Peter Vitt +! Copyright (c) 2013-2014 Verena Krupp +! Copyright (c) 2014 Harald Klimach +! Copyright (c) 2016 Daniel PetrĂ³ +! +! Parts of this file were written by Jens Zudrop for German Research School for +! Simulation Sciences GmbH. +! +! Parts of this file were written by Verena Krupp, Harald Klimach, Peter Vitt +! and Daniel PetrĂ³ for University of Siegen. +! +! Permission to use, copy, modify, and distribute this software for any +! purpose with or without fee is hereby granted, provided that the above +! copyright notice and this permission notice appear in all copies. +! +! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR +! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +! **************************************************************************** ! +!> Routines and datatypes related to Chebyshev points +!! are located in this module. +!! \author{Jens Zudrop} +module ply_chebPoint_module + use env_module, only: rk + use tem_param_module, only: PI + use tem_aux_module, only: tem_abort + use tem_logging_module, only: logUnit + + implicit none + + private + + public :: create_volume_cheb_points_cube + public :: create_surface_cheb_points_cube + public :: create_volume_cheb_points_cube_1d + public :: create_surface_cheb_points_cube_1d + public :: create_volume_lobattocheb_points_cube_1d + public :: create_surface_lobattocheb_points_cube_1d + public :: create_volume_cheb_points_cube_2d + public :: create_surface_cheb_points_cube_2d + public :: create_volume_lobattocheb_points_cube_2d + public :: create_surface_lobattocheb_points_cube_2d + public :: create_volume_lobattocheb_points_cube + public :: create_surface_lobattocheb_points_cube + + +contains + + + ! ************************************************************************ ! + !> Generates a given number of Chebyshev points on the unit interval [-1;+1]. + subroutine ply_chebPoint_1D( nPoints, chebPnt1D ) + ! -------------------------------------------------------------------- ! + !> The number of points to generate + integer, intent(in) :: nPoints + !> Array with 1D Chebyshev points on the interval [-1,+1] + real(kind=rk), intent(out), allocatable :: chebPnt1D(:) + ! -------------------------------------------------------------------- ! + integer :: iPoint + ! -------------------------------------------------------------------- ! + + allocate(chebPnt1D(nPoints)) + + do iPoint = 1, nPoints + chebPnt1D(iPoint) = -1.0_rk & + & * cos( PI / nPoints * ( (iPoint - 1.0_rk) + 1.0_rk / 2.0_rk ) ) + end do + + end subroutine ply_chebPoint_1D + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Generates a given number of Lobatto-Chebyshev points on the unit interval + !! [-1;+1]. + !! + !! The Lobatto-Chebyshev points are ordered from +1 to -1. The fpt + !! have to be adapted accordingly. + subroutine ply_lobattoChebPoint_1D( nPoints, chebPnt1D ) + ! -------------------------------------------------------------------- ! + !> The number of points to generate + integer, intent(in) :: nPoints + !> Array with 1D Chebyshev points on the interval [-1,+1] + real(kind=rk), intent(out), allocatable :: chebPnt1D(:) + ! -------------------------------------------------------------------- ! + integer :: iPoint + ! -------------------------------------------------------------------- ! + + allocate(chebPnt1D(nPoints)) + + do iPoint = 1, nPoints + chebPnt1D(iPoint) = cos( ( iPoint - 1.0_rk ) * PI / ( nPoints - 1.0_rk ) ) + end do + + end subroutine ply_lobattoChebPoint_1D + ! ************************************************************************ ! + + + ! ************************************************************************ ! + subroutine create_volume_cheb_points_cube(num_intp_per_direction, points) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + integer :: i, j, k ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**3 + + allocate(points(nquadpoints,3)) + + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do k = 1, num_intp_per_direction + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = chebPnt1D(j) + points(pointNumber, 3) = chebPnt1D(k) + pointNumber = pointNumber + 1 + end do + end do + end do + + end subroutine create_volume_cheb_points_cube + ! ************************************************************************ ! + + + ! ************************************************************************ ! + subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, & + & points ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + integer :: i, j, k ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**3 + + allocate(points(nquadpoints,3)) + + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do k = 1, num_intp_per_direction + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = chebPnt1D(j) + points(pointNumber, 3) = chebPnt1D(k) + pointNumber = pointNumber + 1 + end do + end do + end do + + end subroutine create_volume_lobattocheb_points_cube + ! ************************************************************************ ! + + + ! ************************************************************************ ! + subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + !> loop indices + integer :: i, j + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**2 + + allocate(points(nquadpoints,3)) + + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = chebPnt1D(j) + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + end do + + end subroutine create_volume_cheb_points_cube_2d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Creates Lobatto-Chebyshev points (with 3 coordinates) but + !! for 2D kernels. + subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, points) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + !> loop indices + integer :: i, j + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**2 + + allocate(points(nquadpoints,3)) + + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = chebPnt1D(j) + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + end do + + end subroutine create_volume_lobattocheb_points_cube_2d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk), allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + integer :: i ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + ! -------------------------------------------------------------------- ! + + allocate(points(num_intp_per_direction,3)) + + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = 0.0_rk + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + + end subroutine create_volume_cheb_points_cube_1d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Creates Lobatto-Chebyshev points (with 3 coordinates) but + !! for 1D kernels. + subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, & + & points ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + ! -------------------------------------------------------------------- ! + integer :: i ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + ! -------------------------------------------------------------------- ! + + allocate(points(num_intp_per_direction,3)) + + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + do i = 1, num_intp_per_direction + ! here we build all possible combinations of the one-dimensional + ! points to get the three dimensional values. + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = 0.0_rk + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + + end subroutine create_volume_lobattocheb_points_cube_1d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, & + & dir, align ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + !! 3 -> z direction + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: i, j ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**2 + + allocate(points(nquadpoints,3)) + + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = chebPnt1D(i) + points(pointNumber, 3) = chebPnt1D(j) + pointNumber = pointNumber + 1 + end do + end do + case(2) ! face in y direction, y coord is fixed + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = (-1.0_rk)**align + points(pointNumber, 3) = chebPnt1D(j) + pointNumber = pointNumber + 1 + end do + end do + case(3) ! face in z direction, z coord is fixes + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber , 1) = chebPnt1D(i) + points(pointNumber , 2) = chebPnt1D(j) + points(pointNumber , 3) = (-1.0_rk)**align + pointNumber = pointNumber + 1 + end do + end do + case default + call tem_abort( 'ERROR in create_surface_cheb_points_cube: unknown ' & + & // 'face direction' ) + end select + + end subroutine create_surface_cheb_points_cube + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Lobatto-Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, & + & points, dir, align ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + !! 3 -> z direction + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: i, j ! loop indices + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction**2 + + allocate(points(nquadpoints,3)) + + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = chebPnt1D(i) + points(pointNumber, 3) = chebPnt1D(j) + pointNumber = pointNumber + 1 + end do + end do + case(2) ! face in y direction, y coord is fixed + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = (-1.0_rk)**align + points(pointNumber, 3) = chebPnt1D(j) + pointNumber = pointNumber + 1 + end do + end do + case(3) ! face in z direction, z coord is fixes + do j = 1, num_intp_per_direction + do i = 1, num_intp_per_direction + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = chebPnt1D(j) + points(pointNumber, 3) = (-1.0_rk)**align + pointNumber = pointNumber + 1 + end do + end do + case default + call tem_abort( 'ERROR in create_surface_lobattocheb_points_cube:' & + & // ' unknown face direction' ) + end select + + end subroutine create_surface_lobattocheb_points_cube + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, & + & points, dir, align ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: i + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction + + allocate(points(nquadpoints,3)) + + call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + do i = 1, num_intp_per_direction + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = chebPnt1D(i) + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + case(2) ! face in y direction, y coord is fixed + do i = 1, num_intp_per_direction + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = (-1.0_rk)**align + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + case default + call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & + & // ' unknown face direction' ) + end select + + end subroutine create_surface_cheb_points_cube_2d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Lobatto-Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_lobattocheb_points_cube_2d( & + & num_intp_per_direction, points, dir, align ) + ! -------------------------------------------------------------------- ! + integer, intent(in) :: num_intp_per_direction + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: i + integer :: pointNumber + real(kind=rk), allocatable :: chebPnt1D(:) + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = num_intp_per_direction + + allocate(points(nquadpoints,3)) + + call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D ) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + do i = 1, num_intp_per_direction + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = chebPnt1D(i) + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + case(2) ! face in y direction, y coord is fixed + do i = 1, num_intp_per_direction + points(pointNumber, 1) = chebPnt1D(i) + points(pointNumber, 2) = (-1.0_rk)**align + points(pointNumber, 3) = 0.0_rk + pointNumber = pointNumber + 1 + end do + case default + call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' & + & // ' unknown face direction' ) + end select + + end subroutine create_surface_lobattocheb_points_cube_2d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_cheb_points_cube_1d( points, dir, align ) + ! -------------------------------------------------------------------- ! + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: pointNumber + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = 1 + allocate(points(nquadpoints,3)) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = 0.0_rk + points(pointNumber, 3) = 0.0_rk + case default + call tem_abort( 'ERROR in create_surface_cheb_points_cube_1d:' & + & // ' unknown face direction' ) + end select + + end subroutine create_surface_cheb_points_cube_1d + ! ************************************************************************ ! + + + ! ************************************************************************ ! + !> Create Lobatto-Chebyshev nodes on a specific face of the reference element. + subroutine create_surface_lobattocheb_points_cube_1d( points, dir, align ) + ! -------------------------------------------------------------------- ! + real(kind=rk),allocatable, intent(inout) :: points(:,:) + !> The spatial direction of the face. \n + !! 1 -> x direction \n + !! 2 -> y direction \n + integer :: dir + !> Left or right face of the reference element + integer :: align + ! -------------------------------------------------------------------- ! + integer :: pointNumber + integer :: nquadpoints + ! -------------------------------------------------------------------- ! + + nquadpoints = 1 + allocate(points(nquadpoints,3)) + + pointNumber = 1 + + select case(dir) + case(1) ! face in x direction, x coord is fixed + points(pointNumber, 1) = (-1.0_rk)**align + points(pointNumber, 2) = 0.0_rk + points(pointNumber, 3) = 0.0_rk + case default + call tem_abort( 'ERROR in create_surface_cheb_points_cube_1d:' & + & // ' unknown face direction' ) + end select + + end subroutine create_surface_lobattocheb_points_cube_1d + ! ************************************************************************ ! + + +end module ply_chebPoint_module + diff --git a/source/fpt/ply_legFpt_2D_module.fpp b/source/fpt/ply_legFpt_2D_module.fpp index 747b2be..2be0ba9 100644 --- a/source/fpt/ply_legFpt_2D_module.fpp +++ b/source/fpt/ply_legFpt_2D_module.fpp @@ -83,6 +83,7 @@ contains allocate(alph(n**2)) + ! original layout (n = 3): ! 1 2 3 ! 4 5 6 diff --git a/source/fpt/ply_legFpt_3D_module.fpp b/source/fpt/ply_legFpt_3D_module.fpp index fab0d60..dd29a43 100644 --- a/source/fpt/ply_legFpt_3D_module.fpp +++ b/source/fpt/ply_legFpt_3D_module.fpp @@ -145,6 +145,7 @@ contains & pntVal = pntVal ) ! <<<<< Z-Direction <<<<< ! + end subroutine ply_legToPnt_3D_singVar ! ------------------------------------------------------------------------ ! @@ -266,6 +267,7 @@ contains & pntVal = alph ) ! <<<<< X-Direction <<<<< ! + end subroutine ply_pntToLeg_3D_singVar ! ------------------------------------------------------------------------ ! diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90 index 2f97f49..0120037 100644 --- a/source/fpt/ply_legFpt_module.f90 +++ b/source/fpt/ply_legFpt_module.f90 @@ -3,6 +3,7 @@ ! Copyright (c) 2013-2014, 2017 Peter Vitt ! Copyright (c) 2013-2014 Verena Krupp ! Copyright (c) 2016 Langhammer Kay +! Copyright (c) 2020 Daniel Fleischer ! ! Parts of this file were written by Jens Zudrop and Harald Klimach ! for German Research School for Simulation Sciences GmbH. @@ -344,8 +345,10 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -360,6 +363,9 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_legToPnt_single ! ------------------------------------------------------------------------ ! @@ -417,8 +423,10 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n + !$OMP DO do iDof = 1, nIndeps*n, n call ply_fpt_single( alph = legCoeffs(iDof:iDof+n-1), & & gam = cheb, & @@ -432,6 +440,9 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps ) & cheb, & & pntVal(iDof:iDof+n-1) ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_legToPnt_lobatto_single ! ------------------------------------------------------------------------ ! @@ -489,9 +500,12 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n normFactor = 1.0_rk / real(n,kind=rk) + + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -506,6 +520,9 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_pntToLeg_single ! ------------------------------------------------------------------------ ! @@ -567,9 +584,12 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps ) integer :: n ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb) n = fpt%legToChebParams%n normFactor = 0.5_rk / real(n-1,kind=rk) + + !$OMP DO do iDof = 1, nIndeps*n, n call fftw_execute_r2r( fpt%planPntToCheb, & & pntVal(iDof:iDof+n-1), & @@ -584,6 +604,9 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps ) & alph = cheb, & & params = fpt%ChebToLegParams ) end do + !$OMP END DO + + !$OMP END PARALLEL end subroutine ply_pntToLeg_lobatto_single ! ------------------------------------------------------------------------ ! diff --git a/source/fpt/ply_polyBaseExc_module.fpp b/source/fpt/ply_polyBaseExc_module.fpp index 1376b21..fc9e21e 100644 --- a/source/fpt/ply_polyBaseExc_module.fpp +++ b/source/fpt/ply_polyBaseExc_module.fpp @@ -1113,7 +1113,7 @@ contains !> Convert strip of coefficients of a modal representation in terms of !! Legendre polynomials to modal coefficients in terms of Chebyshev !! polynomials. - subroutine ply_fpt_single( alph, gam, params ) + subroutine ply_fpt_single( alph, gam, params) ! -------------------------------------------------------------------- ! !> The parameters of the fast polynomial transformation. type(ply_trafo_params_type), intent(inout) :: params diff --git a/source/ply_LegPolyProjection_module.f90 b/source/ply_LegPolyProjection_module.f90 index 1f41f65..4438637 100644 --- a/source/ply_LegPolyProjection_module.f90 +++ b/source/ply_LegPolyProjection_module.f90 @@ -158,6 +158,7 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData, & real(kind=rk), allocatable :: newWorkDat(:) integer :: nChildDofs, oneDof ! -------------------------------------------------------------------- ! + if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' & & // 'only implemented for Q-Legendre-Polynomials' ) @@ -281,6 +282,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & real(kind=rk), allocatable :: projCoeffOneDim(:,:,:) real(kind=rk) :: dimexp ! -------------------------------------------------------------------- ! + select case(dofType) case(ply_QLegendrePoly_prp) allocate(projection%projCoeff(nDofs, nChildDofs, nChilds)) @@ -352,6 +354,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, & & // 'for Q-Legendre polynomials' ) end select deallocate(projCoeffOneDim) + end subroutine ply_initQLegProjCoeff ! ************************************************************************ ! @@ -595,6 +598,7 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs, & integer :: oneDof, noChilds, childpos real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! + nChilds = 2**ndims nElems = tree%nElems nElemsToRefine = count(new_refine_tree) @@ -794,6 +798,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & integer :: childDof_pos, parentDof_pos real(kind=rk) :: projCoeff ! -------------------------------------------------------------------- ! + childData(:) = 0.0_rk childLoop: do iChild = 1, nChilds diff --git a/source/ply_fxt_module.f90 b/source/ply_fxt_module.f90 index 4bfe466..e9257fb 100644 --- a/source/ply_fxt_module.f90 +++ b/source/ply_fxt_module.f90 @@ -255,6 +255,7 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree ) & modal_data = nodal_data(lb:msq:oversamp_degree+1) ) end do modal_data = nodal_data + end subroutine ply_fxt_n2m_2D ! ************************************************************************ ! diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90 index c2fb01f..409bb7e 100644 --- a/source/ply_l2p_module.f90 +++ b/source/ply_l2p_module.f90 @@ -245,30 +245,65 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) ! integer, parameter :: vlen = nIndeps ! -------------------------------------------------------------------- ! +! Original version (for reference) +!! if (nDofs > 1) then +!! +!! do iStrip=1,nIndeps,vlen +!! +!! ! Calculate the upper bound of the current strip +!! strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1) +!! +!! do iRow = 1, nDofs +!! +!! do iCell = iStrip, strip_ub +!! projected(iCell, iRow) = 0.0_rk +!! end do +!! +!! do iCol = 1, nDofs +!! mval = matrix(iCol,iRow) +!! do iCell = iStrip, strip_ub +!! ! on SX-ACE, this can be identified as matrix multiplication +!! ! which results in VERY HIGH performance +!! projected(iCell, iRow) = projected(iCell, iRow) & +!! & + mval * original(iCol, iCell) +!! end do ! iCell +!! end do ! iCol = 1, nCols +!! end do ! iRow = 1, nRows +!! end do ! iStrip +!! +!! else +!! +!! projected = matrix(nDofs,1) * original +!! +!! end if + if (nDofs > 1) then - do iStrip=0,nIndeps-1,vlen + !$OMP PARALLEL DO DEFAULT(SHARED), & + !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval) + do iStrip=1,nIndeps,vlen ! Calculate the upper bound of the current strip - strip_ub = min(iStrip + vlen, nIndeps) - iStrip + strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1) do iRow = 1, nDofs - do iCell = iStrip+1, iStrip+strip_ub + do iCell = iStrip, strip_ub projected(iCell, iRow) = 0.0_rk end do + do iCol = 1, nDofs mval = matrix(iCol,iRow) - do iCell = iStrip+1, iStrip+strip_ub + do iCell = iStrip, strip_ub ! on SX-ACE, this can be identified as matrix multiplication ! which results in VERY HIGH performance projected(iCell, iRow) = projected(iCell, iRow) & & + mval * original(iCol, iCell) end do ! iCell end do ! iCol = 1, nCols - end do ! iRow = 1, nRows end do ! iStrip + !$OMP END PARALLEL DO else @@ -276,6 +311,31 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix ) end if + +! test-version of the loop (will be removed later) +!! if (nDofs > 1) then +!! +!! projected(:, :) = 0.0_rk +!! +!! !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(SHARED), & +!! !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval) +!! do iRow = 1, nDofs +!! do iCol = 1, nDofs +!! mval = matrix(iCol,iRow) +!! do iStrip=1,nIndeps +!! projected(iStrip, iRow) = projected(iStrip, iRow) & +!! & + mval * original(iCol, iStrip) +!! end do +!! end do +!! end do +!! !$OMP END PARALLEL DO +!! +!! else +!! +!! projected = matrix(nDofs,1) * original +!! +!! end if + end subroutine ply_l2_projection ! ************************************************************************ ! diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp index 5bcb804..8f0a3e4 100644 --- a/source/ply_leg_diff_module.fpp +++ b/source/ply_leg_diff_module.fpp @@ -71,6 +71,10 @@ contains integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iVar,dofPos,dofPosPrev,dofPos2Prev,iDeg,iDeg1,iDeg2,iDeg3), & + !$OMP PRIVATE(leg,DV) + if (present(dirVec)) then DV = dirvec else @@ -83,6 +87,7 @@ contains endif endif + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg1 = (iDeg-1)/(mpd+1) + 1 !! do IDeg1 = 1, mPd+1 iDeg2 = iDeg - (iDeg1-1)*(mpd+1) !! do IDeg2 = 1, mPd=1 !! iDeg2 = mod(iDeg-1,mpd+1)+1 @@ -141,8 +146,10 @@ contains end do end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -153,6 +160,9 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*leg(iDir) - 1.0_rk) end do + !$OMP END DO + + !$OMP END PARALLEL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Uncollapsed version of the scaling ! @@ -198,6 +208,9 @@ contains integer :: iDeg, iDeg1, iDeg2, iDeg3 real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3 ) & + !$OMP PRIVATE( derivative ) varloop: do iVar = 1, nVars @@ -205,6 +218,7 @@ contains iDeg1 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg2 = iDeg - (iDeg3-1)*(mpd+1) @@ -212,7 +226,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1+1, iDeg2, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg1 = mPd-1, 1, -1 !NEC$ ivdep @@ -226,8 +242,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -237,8 +255,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg1 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_x_vec ! ************************************************************************ ! @@ -266,6 +286,9 @@ contains integer :: iDeg, iDeg1, iDeg2, iDeg3 real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) & + !$OMP PRIVATE( derivative) varloop: do iVar = 1, nVars @@ -273,6 +296,7 @@ contains iDeg2 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg3 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg3-1)*(mpd+1) @@ -280,7 +304,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2+1, iDeg3, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg2 = mPd-1, 1, -1 !NEC$ ivdep @@ -294,8 +320,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 iDeg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -305,8 +333,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg2 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_y_vec ! ************************************************************************ ! @@ -335,12 +365,16 @@ contains real(kind=rk) :: derivative((mpd+1)**2,mpd+1) ! -------------------------------------------------------------------- ! + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) & + !$OMP PRIVATE( derivative) varloop: do iVar = 1, nVars derivative(:,mpd+1) = 0.0_rk iDeg3 = mpd + !$OMP DO do iDeg = 1, (mpd+1)**2 iDeg2 = (iDeg-1)/(mpd+1) + 1 iDeg1 = iDeg - (iDeg2-1)*(mpd+1) @@ -348,7 +382,9 @@ contains ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2, iDeg3+1, mPd, dofpos ) derivative(iDeg, mpd) = legCoeffs(dofpos,iVar) end do + !$OMP END DO + !$OMP DO do iDeg3 = mPd-1, 1, -1 !NEC$ ivdep @@ -362,8 +398,10 @@ contains & + legCoeffs(dofposprev, iVar) end do end do + !$OMP END DO ! Scale the results due to the Jacobians of the mappings + !$OMP DO do dofpos=1,(mpd+1)**3 ideg3 = (dofpos-1)/(mpd+1)**2 + 1 ideg = dofpos - (ideg3-1)*(mpd+1)**2 @@ -371,8 +409,10 @@ contains & * (2.0_rk/elemLength) & & * (2.0_rk*iDeg3 - 1.0_rk) end do + !$OMP END DO end do varloop + !$OMP END PARALLEL end subroutine ply_calcDiff_leg_z_vec ! ************************************************************************ ! @@ -565,7 +605,7 @@ contains & elemLength = elemLength, & & dirvec = dirvec(:,iDir), & & iDir = iDir ) - enddo + end do endif end subroutine ply_calcDiff_leg_2d diff --git a/source/ply_modg_basis_module.fpp b/source/ply_modg_basis_module.fpp index fef34be..d684aec 100644 --- a/source/ply_modg_basis_module.fpp +++ b/source/ply_modg_basis_module.fpp @@ -454,6 +454,7 @@ contains real(kind=rk) :: n_q ! -------------------------------------------------------------------- ! + ! allocate the output array select case(basisType) case(Q_space) @@ -533,7 +534,6 @@ contains end do end select - end subroutine ply_evalLegendreTensPoly ! ************************************************************************ ! diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp index 6935e9d..6c90724 100644 --- a/source/ply_oversample_module.fpp +++ b/source/ply_oversample_module.fpp @@ -4,6 +4,7 @@ ! Copyright (c) 2014, 2017-2019 Harald Klimach ! Copyright (c) 2014 Verena Krupp ! Copyright (c) 2016 Tobias Girresser +! Copyright (c) 2020 Daniel Fleischer ! ! Parts of this file were written by Jens Zudrop, Nikhil Anand, Harald Klimach, ! Verena Krupp, Peter Vitt, and Tobias Girresser for University of Siegen. @@ -169,6 +170,7 @@ contains integer :: maxorders integer :: ord_lim ! -------------------------------------------------------------------- ! + ! Information for the oversampling loop oversamp_degree = poly_proj%oversamp_degree mpd1 = poly_proj%min_degree + 1 @@ -184,6 +186,7 @@ contains varQ: do iVar=1,nScalars if (ensure_positivity(iVar)) then ordersum = 0.0_rk + !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd) do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -191,6 +194,7 @@ contains iOrd = iDegX+iDegY+iDegZ-2 ordersum(iOrd) = ordersum(iOrd) + abs(state(dof,iVar)) end do + !$OMP END PARALLEL DO varsum = 0.0_rk do iOrd=2,ord_lim varsum = varsum + ordersum(iOrd) @@ -202,6 +206,7 @@ contains end if end do varQ do iVar=1,nScalars + !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd, dofOverSamp) do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1 @@ -214,6 +219,7 @@ contains modalCoeffs(dofOverSamp,iVar) = state(dof,iVar) end if end do + !$OMP END PARALLEL DO end do else posQ if (oversamp_degree == poly_proj%min_degree) then @@ -244,12 +250,14 @@ contains iDegY = 1 iDegZ = 1 ordersum = 0.0_rk + !$OMP PARALLEL DO PRIVATE(idof, iDegZ, iDegY, iDegX, iOrd) do idof = 1, poly_proj%body_3d%min_dofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) iOrd = iDegX+iDegY+iDegZ-2 ordersum(iOrd) = ordersum(iOrd) + abs(state(dof,iVar)) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END PARALLEL DO varsum = 0.0_rk do iOrd=2,ord_lim varsum = varsum + ordersum(iOrd) @@ -328,10 +336,13 @@ contains mpd1_cube = mpd1**3 nScalars = size(modalCoeffs,2) + !$OMP PARALLEL DEFAULT(SHARED), & + !$OMP PRIVATE(iVar, dof, idof, iDegZ, iDegY, iDegX, dofOverSamp) if (poly_proj%basisType == Q_Space) then if (oversamp_degree == poly_proj%min_degree) then state = modalCoeffs else + !$OMP DO do iVar=1,nScalars do dof = 1, mpd1_cube iDegZ = (dof-1)/mpd1_square + 1 @@ -343,6 +354,7 @@ contains state(dof,iVar) = modalCoeffs(dofOverSamp,iVar) end do end do + !$OMP END DO end if else !P_Space @@ -350,7 +362,9 @@ contains iDegX = 1 iDegY = 1 iDegZ = 1 + p_mindofs = min(poly_proj%body_3d%min_dofs, (mpd1*(mpd1+1)*(mpd1+2))/6) + !$OMP DO do idof = 1, p_mindofs ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof) dofOverSamp = iDegX + ( iDegY-1 & @@ -359,7 +373,9 @@ contains state(dof,:) = modalCoeffs(dofOverSamp,:) ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ) end do + !$OMP END DO end if + !$OMP END PARALLEL end subroutine ply_convertFromoversample_3d ! ************************************************************************ ! diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp index b5eb963..8fb0618 100644 --- a/source/ply_poly_project_module.fpp +++ b/source/ply_poly_project_module.fpp @@ -599,7 +599,6 @@ contains end select case ('fpt') - select case (dim) case (3) call ply_LegToPnt_3D( fpt = me%body_3d%fpt, & diff --git a/source/ply_poly_transformation_module.f90 b/source/ply_poly_transformation_module.f90 index 3a1ff49..46bf8dc 100644 --- a/source/ply_poly_transformation_module.f90 +++ b/source/ply_poly_transformation_module.f90 @@ -212,6 +212,7 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs, & real(kind=rk), allocatable :: transform_matrix(:,:) real(kind=rk), allocatable :: childData(:) ! -------------------------------------------------------------------- ! + nChilds = 2**nDims max_modes = nint(real(nDofs, kind=rk)**(1.0_rk/real(nDims, kind=rk))) @@ -421,6 +422,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, & real(kind=rk), allocatable :: temp_data(:) real(kind=rk), allocatable :: childData_prev(:) ! -------------------------------------------------------------------- ! + parent_modes = nint(real(nParentDofs,kind=rk) & & **(1/real(nDimensions,kind=rk))) @@ -751,6 +753,7 @@ subroutine splitz() end do end subroutine splitz + end subroutine ply_projDataToChild ! ************************************************************************ ! diff --git a/source/ply_sampling_module.fpp b/source/ply_sampling_module.fpp index 8f44666..03caa3d 100644 --- a/source/ply_sampling_module.fpp +++ b/source/ply_sampling_module.fpp @@ -681,6 +681,7 @@ contains integer :: iElem integer :: nComps ! -------------------------------------------------------------------- ! + nComps = fun%nComponents call c_f_pointer(fun%method_data, p) diff --git a/source/ply_sampling_varsys_module.f90 b/source/ply_sampling_varsys_module.f90 index 38c12da..177f38f 100644 --- a/source/ply_sampling_varsys_module.f90 +++ b/source/ply_sampling_varsys_module.f90 @@ -163,6 +163,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, & & nElems = 1, & & nDofs = nDofs, & & res = elemdat ) + do iComponent=1,nComponents iTotComp = iScalar+iComponent-1 var(iTotComp)%dat(lower_bound:upper_bound) & diff --git a/source/ply_space_integration_module.f90 b/source/ply_space_integration_module.f90 index b5edb08..f6e2712 100644 --- a/source/ply_space_integration_module.f90 +++ b/source/ply_space_integration_module.f90 @@ -44,6 +44,15 @@ module ply_space_integration_module ! ------------------------------------------------------------------------ ! !> Create Gauss-Legendre integration points and weights for one-dimensional + + + + + + + + + !! integration on the interval [x1,x2]. subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP ) ! -------------------------------------------------------------------- ! diff --git a/utests/ply_l2p_3D_performance_test.f90 b/utests/ply_l2p_3D_performance_test.f90 new file mode 100644 index 0000000..27e714d --- /dev/null +++ b/utests/ply_l2p_3D_performance_test.f90 @@ -0,0 +1,130 @@ +! Copyright (c) 2012, 2014 Jens Zudrop +! Copyright (c) 2012-2016, 2018-2020 Harald Klimach +! Copyright (c) 2013-2014 Peter Vitt +! Copyright (c) 2013-2014 Verena Krupp +! Copyright (c) 2014 Nikhil Anand +! +! Parts of this file were written by Jens Zudrop and Harald Klimach for +! German Research School for Simulation Sciences GmbH. +! +! Parts of this file were written by Harald Klimach, Peter Vitt, Verena Krupp, +! and Nikhil Anand for University of Siegen. +! +! Permission to use, copy, modify, and distribute this software for any +! purpose with or without fee is hereby granted, provided that the above +! copyright notice and this permission notice appear in all copies. +! +! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR +! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +! **************************************************************************** ! + +!> Unit test to check performance of l2p transformations. +program ply_l2p_3D_performance_test + use mpi, only: mpi_wtime + use env_module, only: rk, fin_env + use tem_logging_module, only: logUnit, tem_logging_init_primary + use tem_general_module, only: tem_general_type, tem_start + use ply_l2p_header_module, only: ply_l2p_header_type, ply_l2p_header_define + use ply_l2p_module, only: ply_l2p_type, ply_init_l2p, ply_l2p_trafo_3D + + !mpi!nprocs = 1 + + implicit none + + integer :: iPower + integer, parameter :: maxpower = 7 + real(kind=rk) :: res, newRes + type(tem_general_type) :: general + + ! Init the Treelm environment, needed to init the log Unit + call tem_start(codeName = 'L2P 3D Performance Test', & + & general = general ) + call tem_logging_init_primary( level = 1, & + & rank = general%proc%rank ) + + res = 0.0_rk + do iPower = 1,maxpower + call ply_check_legToPnt_3D(iPower, newRes) + write(*,*) 'deviation:', newres + if (newRes.gt.res) then + res = newRes + end if + end do + + write(*,*) 'Maximal deviation:', res + if (res < 1.e-08) then + write(logUnit(1),*) 'PASSED' + end if + + call fin_env() + +contains + + subroutine ply_check_legToPnt_3D(power,res) + integer, intent(in) :: power + real(kind=rk) :: res + integer :: maxPolyDegree, iVar, nVars + real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) + real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) + type(ply_l2p_header_type) :: header + type(ply_l2p_type) :: trafo + real(kind=rk) :: starttime, stoptime + + ! Define the maximal polynomial degree we want to calculate the + ! bases exchange for. + maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 + nVars = 3 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 + + ! Create the Legendre expansion coefficients + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + do iVar = 1, nVars + legCoeffs(:,iVar) = real(iVar, rk) + end do + + ! Init the L2 Projection + call ply_l2p_header_define( me = header, & + & nodes_kind = 'chebyshev' ) + call ply_init_l2p( degree = maxPolyDegree, & + & l2p = trafo, & + & header = header ) + + ! now transform to the Chebyshev nodes + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + legCoeffsIn = legCoeffs + starttime = MPI_Wtime() + do iVar = 1, nVars + call ply_l2p_trafo_3D( trafo = trafo%leg2node, & + & original = legCoeffsIn(:,iVar), & + & projected = pntVal(:,iVar) ) + end do + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' trafo: ', & + & stoptime - starttime + + ! now transform back to Legendre coefficients + allocate(legVal( (maxPolyDegree+1)**3,nVars )) + starttime = MPI_Wtime() + do iVar = 1, nVars + call ply_l2p_trafo_3D( trafo = trafo%node2leg, & + & projected = legVal(:,iVar), & + & original = pntVal(:,iVar) ) + end do + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', & + & stoptime - starttime + + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) + + end subroutine ply_check_legToPnt_3D + +end program ply_l2p_3D_performance_test diff --git a/utests/with_fftw/ply_fpt_3D_performance_test.f90 b/utests/with_fftw/ply_fpt_3D_performance_test.f90 new file mode 100644 index 0000000..1555f82 --- /dev/null +++ b/utests/with_fftw/ply_fpt_3D_performance_test.f90 @@ -0,0 +1,128 @@ +! Copyright (c) 2012, 2014 Jens Zudrop +! Copyright (c) 2012-2016, 2018-2020 Harald Klimach +! Copyright (c) 2013-2014 Peter Vitt +! Copyright (c) 2013-2014 Verena Krupp +! Copyright (c) 2014 Nikhil Anand +! +! Parts of this file were written by Jens Zudrop and Harald Klimach for +! German Research School for Simulation Sciences GmbH. +! +! Parts of this file were written by Harald Klimach, Peter Vitt, Verena Krupp, +! and Nikhil Anand for University of Siegen. +! +! Permission to use, copy, modify, and distribute this software for any +! purpose with or without fee is hereby granted, provided that the above +! copyright notice and this permission notice appear in all copies. +! +! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR +! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +! **************************************************************************** ! + +!> Unit test to check functionallity of fast polynomial transformations. +!! \author{Jens Zudrop} +program ply_fpt_3D_performance_test + use mpi, only: mpi_wtime + use env_module, only: rk, fin_env + use tem_logging_module, only: logUnit, tem_logging_init_primary + use tem_general_module, only: tem_general_type, tem_start + use ply_fpt_header_module, only: ply_fpt_header_type, ply_fpt_header_define + use ply_legFpt_module, only: ply_legFpt_type, ply_init_legFPT + use ply_legFpt_3D_module, only: ply_legToPnt_3D, & + & ply_pntToLeg_3D + + !mpi!nprocs = 1 + + implicit none + + integer :: iPower + integer, parameter :: maxpower = 8 + real(kind=rk) :: res, newRes + type(tem_general_type) :: general + + ! Init the Treelm environment, needed to init the log Unit + call tem_start(codeName = 'FPT 3D Performance Test', & + & general = general ) + call tem_logging_init_primary( level = 1, & + & rank = general%proc%rank ) + + res = 0.0_rk + do iPower = 1,maxpower + call ply_check_legToPnt_3D(iPower, newRes) + if (newRes.gt.res) then + res = newRes + end if + end do + + write(*,*) 'Maximal deviation:', res + if (res < 1.e-08) then + write(logUnit(1),*) 'PASSED' + end if + + call fin_env() + +contains + + subroutine ply_check_legToPnt_3D(power,res) + integer, intent(in) :: power + real(kind=rk) :: res + integer :: maxPolyDegree, iVar, nVars + real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:) + real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:) + type(ply_fpt_header_type) :: header + type(ply_legFpt_type) :: fpt + real(kind=rk) :: starttime, stoptime + + ! Define the maximal polynomial degree we want to calculate the + ! bases exchange for. + maxPolyDegree = 2**power-1 ! maxPolyDegree+1 has to be a power of 2 + nVars = 3 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1 + write(logUnit(10),*) '------------------------------------' // & + & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3 + + ! Create the Legendre expansion coefficients + allocate(legCoeffs((maxPolyDegree+1)**3,nVars)) + allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars)) + do iVar = 1, nVars + legCoeffs(:,iVar) = real(iVar, rk) + end do + + ! Init the FPT + call ply_fpt_header_define( me = header ) + call ply_init_legFpt( maxPolyDegree = maxPolyDegree, & + & nIndeps = (maxpolydegree+1)**2, & + & fpt = fpt, & + & header = header ) + + ! now transform to the Chebyshev nodes + allocate(pntVal( (maxPolyDegree+1)**3, nVars )) + legCoeffsIn = legCoeffs + starttime = MPI_Wtime() + call ply_legToPnt_3D( fpt = fpt, & + & legCoeffs = legCoeffsIn, & + & pntVal = pntVal, & + & nVars = nVars ) + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' trafo: ', stoptime - starttime + + ! now transform back to Legendre coefficients + allocate(legVal( (maxPolyDegree+1)**3,nVars )) + starttime = MPI_Wtime() + call ply_pntToLeg_3D( fpt = fpt, & + & pntVal = pntVal, & + & legCoeffs = legVal, & + & nVars = nVars ) + stoptime = MPI_Wtime() + write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', stoptime - starttime + + res = maxval(abs(legVal(:,:) - legCoeffs(:,:))) + + end subroutine ply_check_legToPnt_3D + +end program ply_fpt_3D_performance_test