diff --git a/README.md b/README.md
index 8f897a8..93830a3 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 Polynomials Library
 ===================
 
+*This is the OpenMP implementation branch*
+
 This project is a supporting library for [TreElM](https://bitbucket.org/apesteam/treelm).
 It does not work on its own, but rather needs to be included in
 other projects, which also include TreElM.
diff --git a/source/fpt/ply_chebPoint_module.f90 b/source/fpt/ply_chebPoint_module.f90
new file mode 100644
index 0000000..0b8b8b1
--- /dev/null
+++ b/source/fpt/ply_chebPoint_module.f90
@@ -0,0 +1,617 @@
+! Copyright (c) 2012-2014 Jens Zudrop <j.zudrop@grs-sim.de>
+! Copyright (c) 2013-2014, 2017 Peter Vitt <peter.vitt2@uni-siegen.de>
+! Copyright (c) 2013-2014 Verena Krupp
+! Copyright (c) 2014 Harald Klimach <harald.klimach@uni-siegen.de>
+! Copyright (c) 2016 Daniel Petró <daniel.petro@student.uni-siegen.de>
+!
+! Parts of this file were written by Jens Zudrop for German Research School for
+! Simulation Sciences GmbH.
+!
+! Parts of this file were written by Verena Krupp, Harald Klimach, Peter Vitt
+! and Daniel Petró for University of Siegen.
+!
+! Permission to use, copy, modify, and distribute this software for any
+! purpose with or without fee is hereby granted, provided that the above
+! copyright notice and this permission notice appear in all copies.
+!
+! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+! **************************************************************************** !
+!> Routines and datatypes related to Chebyshev points
+!! are located in this module.
+!! \author{Jens Zudrop}
+module ply_chebPoint_module
+  use env_module,         only: rk
+  use tem_param_module,   only: PI
+  use tem_aux_module,     only: tem_abort
+  use tem_logging_module, only: logUnit
+
+  implicit none
+
+  private
+
+  public :: create_volume_cheb_points_cube
+  public :: create_surface_cheb_points_cube
+  public :: create_volume_cheb_points_cube_1d
+  public :: create_surface_cheb_points_cube_1d
+  public :: create_volume_lobattocheb_points_cube_1d
+  public :: create_surface_lobattocheb_points_cube_1d
+  public :: create_volume_cheb_points_cube_2d
+  public :: create_surface_cheb_points_cube_2d
+  public :: create_volume_lobattocheb_points_cube_2d
+  public :: create_surface_lobattocheb_points_cube_2d
+  public :: create_volume_lobattocheb_points_cube
+  public :: create_surface_lobattocheb_points_cube
+
+
+contains
+
+
+  ! ************************************************************************ !
+  !> Generates a given number of Chebyshev points on the unit interval [-1;+1].
+  subroutine ply_chebPoint_1D( nPoints, chebPnt1D )
+    ! -------------------------------------------------------------------- !
+    !> The number of points to generate
+    integer, intent(in) :: nPoints
+    !> Array with 1D Chebyshev points on the interval [-1,+1]
+    real(kind=rk), intent(out), allocatable :: chebPnt1D(:)
+    ! -------------------------------------------------------------------- !
+    integer :: iPoint
+    ! -------------------------------------------------------------------- !
+
+    allocate(chebPnt1D(nPoints))
+
+    do iPoint = 1, nPoints
+      chebPnt1D(iPoint) = -1.0_rk &
+        & * cos( PI / nPoints * ( (iPoint - 1.0_rk) + 1.0_rk / 2.0_rk ) )
+    end do
+
+  end subroutine ply_chebPoint_1D
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Generates a given number of Lobatto-Chebyshev points on the unit interval
+  !! [-1;+1].
+  !!
+  !! The Lobatto-Chebyshev points are ordered from +1 to -1. The fpt
+  !! have to be adapted accordingly.
+  subroutine ply_lobattoChebPoint_1D( nPoints, chebPnt1D )
+    ! -------------------------------------------------------------------- !
+    !> The number of points to generate
+    integer, intent(in) :: nPoints
+    !> Array with 1D Chebyshev points on the interval [-1,+1]
+    real(kind=rk), intent(out), allocatable :: chebPnt1D(:)
+    ! -------------------------------------------------------------------- !
+    integer :: iPoint
+    ! -------------------------------------------------------------------- !
+
+    allocate(chebPnt1D(nPoints))
+
+    do iPoint = 1, nPoints
+      chebPnt1D(iPoint) = cos( ( iPoint - 1.0_rk ) * PI / ( nPoints - 1.0_rk ) )
+    end do
+
+  end subroutine ply_lobattoChebPoint_1D
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  subroutine create_volume_cheb_points_cube(num_intp_per_direction, points)
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    integer :: i, j, k ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**3
+
+    allocate(points(nquadpoints,3))
+
+    call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do k = 1, num_intp_per_direction
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          ! here we build all possible combinations of the one-dimensional
+          ! points to get the three dimensional values.
+          points(pointNumber, 1) = chebPnt1D(i)
+          points(pointNumber, 2) = chebPnt1D(j)
+          points(pointNumber, 3) = chebPnt1D(k)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    end do
+
+  end subroutine create_volume_cheb_points_cube
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  subroutine create_volume_lobattocheb_points_cube( num_intp_per_direction, &
+    &                                               points                  )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    integer :: i, j, k ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**3
+
+    allocate(points(nquadpoints,3))
+
+    call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do k = 1, num_intp_per_direction
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          ! here we build all possible combinations of the one-dimensional
+          ! points to get the three dimensional values.
+          points(pointNumber, 1) = chebPnt1D(i)
+          points(pointNumber, 2) = chebPnt1D(j)
+          points(pointNumber, 3) = chebPnt1D(k)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    end do
+
+  end subroutine create_volume_lobattocheb_points_cube
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  subroutine create_volume_cheb_points_cube_2d( num_intp_per_direction, points )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    !> loop indices
+    integer :: i, j
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**2
+
+    allocate(points(nquadpoints,3))
+
+    call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do j = 1, num_intp_per_direction
+      do i = 1, num_intp_per_direction
+        ! here we build all possible combinations of the one-dimensional
+        ! points to get the three dimensional values.
+        points(pointNumber, 1) = chebPnt1D(i)
+        points(pointNumber, 2) = chebPnt1D(j)
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    end do
+
+  end subroutine create_volume_cheb_points_cube_2d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Creates Lobatto-Chebyshev points (with 3 coordinates) but
+  !! for 2D kernels.
+  subroutine create_volume_lobattocheb_points_cube_2d(num_intp_per_direction, points)
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    !> loop indices
+    integer :: i, j
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**2
+
+    allocate(points(nquadpoints,3))
+
+    call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do j = 1, num_intp_per_direction
+      do i = 1, num_intp_per_direction
+        ! here we build all possible combinations of the one-dimensional
+        ! points to get the three dimensional values.
+        points(pointNumber, 1) = chebPnt1D(i)
+        points(pointNumber, 2) = chebPnt1D(j)
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    end do
+
+  end subroutine create_volume_lobattocheb_points_cube_2d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  subroutine create_volume_cheb_points_cube_1d( num_intp_per_direction, points )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk), allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    integer :: i ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    ! -------------------------------------------------------------------- !
+
+    allocate(points(num_intp_per_direction,3))
+
+    call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do i = 1, num_intp_per_direction
+      ! here we build all possible combinations of the one-dimensional
+      ! points to get the three dimensional values.
+      points(pointNumber, 1) = chebPnt1D(i)
+      points(pointNumber, 2) = 0.0_rk
+      points(pointNumber, 3) = 0.0_rk
+      pointNumber = pointNumber + 1
+    end do
+
+  end subroutine create_volume_cheb_points_cube_1d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Creates Lobatto-Chebyshev points (with 3 coordinates) but
+  !! for 1D kernels.
+  subroutine create_volume_lobattocheb_points_cube_1d( num_intp_per_direction, &
+    &                                                  points                  )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    ! -------------------------------------------------------------------- !
+    integer :: i ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    ! -------------------------------------------------------------------- !
+
+    allocate(points(num_intp_per_direction,3))
+
+    call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+    do i = 1, num_intp_per_direction
+      ! here we build all possible combinations of the one-dimensional
+      ! points to get the three dimensional values.
+      points(pointNumber, 1) = chebPnt1D(i)
+      points(pointNumber, 2) = 0.0_rk
+      points(pointNumber, 3) = 0.0_rk
+      pointNumber = pointNumber + 1
+    end do
+
+  end subroutine create_volume_lobattocheb_points_cube_1d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_cheb_points_cube( num_intp_per_direction, points, &
+    &                                         dir, align                      )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    !! 3 -> z direction
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: i, j ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**2
+
+    allocate(points(nquadpoints,3))
+
+    call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber, 1) = (-1.0_rk)**align
+          points(pointNumber, 2) = chebPnt1D(i)
+          points(pointNumber, 3) = chebPnt1D(j)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case(2) ! face in y direction, y coord is fixed
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber, 1) = chebPnt1D(i)
+          points(pointNumber, 2) = (-1.0_rk)**align
+          points(pointNumber, 3) = chebPnt1D(j)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case(3) ! face in z direction, z coord is fixes
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber , 1) = chebPnt1D(i)
+          points(pointNumber , 2) = chebPnt1D(j)
+          points(pointNumber , 3) = (-1.0_rk)**align
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case default
+      call tem_abort( 'ERROR in create_surface_cheb_points_cube: unknown ' &
+        & // 'face direction'                                              )
+    end select
+
+  end subroutine create_surface_cheb_points_cube
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Lobatto-Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_lobattocheb_points_cube( num_intp_per_direction, &
+    &                                                points, dir, align      )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    !! 3 -> z direction
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: i, j ! loop indices
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction**2
+
+    allocate(points(nquadpoints,3))
+
+    call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber, 1) = (-1.0_rk)**align
+          points(pointNumber, 2) = chebPnt1D(i)
+          points(pointNumber, 3) = chebPnt1D(j)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case(2) ! face in y direction, y coord is fixed
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber, 1) = chebPnt1D(i)
+          points(pointNumber, 2) = (-1.0_rk)**align
+          points(pointNumber, 3) = chebPnt1D(j)
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case(3) ! face in z direction, z coord is fixes
+      do j = 1, num_intp_per_direction
+        do i = 1, num_intp_per_direction
+          points(pointNumber, 1) = chebPnt1D(i)
+          points(pointNumber, 2) = chebPnt1D(j)
+          points(pointNumber, 3) = (-1.0_rk)**align
+          pointNumber = pointNumber + 1
+        end do
+      end do
+    case default
+      call tem_abort( 'ERROR in create_surface_lobattocheb_points_cube:' &
+        & // ' unknown face direction'                                   )
+    end select
+
+  end subroutine create_surface_lobattocheb_points_cube
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_cheb_points_cube_2d( num_intp_per_direction, &
+    &                                            points, dir, align      )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: i
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction
+
+    allocate(points(nquadpoints,3))
+
+    call ply_chebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      do i = 1, num_intp_per_direction
+        points(pointNumber, 1) = (-1.0_rk)**align
+        points(pointNumber, 2) = chebPnt1D(i)
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    case(2) ! face in y direction, y coord is fixed
+      do i = 1, num_intp_per_direction
+        points(pointNumber, 1) = chebPnt1D(i)
+        points(pointNumber, 2) = (-1.0_rk)**align
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    case default
+      call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' &
+        & // ' unknown face direction'                               )
+    end select
+
+  end subroutine create_surface_cheb_points_cube_2d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Lobatto-Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_lobattocheb_points_cube_2d( &
+    & num_intp_per_direction, points, dir, align        )
+    ! -------------------------------------------------------------------- !
+    integer, intent(in) :: num_intp_per_direction
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: i
+    integer :: pointNumber
+    real(kind=rk), allocatable :: chebPnt1D(:)
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = num_intp_per_direction
+
+    allocate(points(nquadpoints,3))
+
+    call ply_lobattoChebPoint_1D( num_intp_per_direction, chebPnt1D )
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      do i = 1, num_intp_per_direction
+        points(pointNumber, 1) = (-1.0_rk)**align
+        points(pointNumber, 2) = chebPnt1D(i)
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    case(2) ! face in y direction, y coord is fixed
+      do i = 1, num_intp_per_direction
+        points(pointNumber, 1) = chebPnt1D(i)
+        points(pointNumber, 2) = (-1.0_rk)**align
+        points(pointNumber, 3) = 0.0_rk
+        pointNumber = pointNumber + 1
+      end do
+    case default
+      call tem_abort( 'ERROR in create_surface_cheb_points_cube_2d:' &
+        & // ' unknown face direction'                               )
+    end select
+
+  end subroutine create_surface_lobattocheb_points_cube_2d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_cheb_points_cube_1d( points, dir, align )
+    ! -------------------------------------------------------------------- !
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: pointNumber
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = 1
+    allocate(points(nquadpoints,3))
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      points(pointNumber, 1) = (-1.0_rk)**align
+      points(pointNumber, 2) = 0.0_rk
+      points(pointNumber, 3) = 0.0_rk
+    case default
+      call tem_abort( 'ERROR in create_surface_cheb_points_cube_1d:' &
+        & // ' unknown face direction'                               )
+    end select
+
+  end subroutine create_surface_cheb_points_cube_1d
+  ! ************************************************************************ !
+
+
+  ! ************************************************************************ !
+  !> Create Lobatto-Chebyshev nodes on a specific face of the reference element.
+  subroutine create_surface_lobattocheb_points_cube_1d( points, dir, align )
+    ! -------------------------------------------------------------------- !
+    real(kind=rk),allocatable, intent(inout) :: points(:,:)
+    !> The spatial direction of the face. \n
+    !! 1 -> x direction \n
+    !! 2 -> y direction \n
+    integer :: dir
+    !> Left or right face of the reference element
+    integer :: align
+    ! -------------------------------------------------------------------- !
+    integer :: pointNumber
+    integer :: nquadpoints
+    ! -------------------------------------------------------------------- !
+
+    nquadpoints = 1
+    allocate(points(nquadpoints,3))
+
+    pointNumber = 1
+
+    select case(dir)
+    case(1) ! face in x direction, x coord is fixed
+      points(pointNumber, 1) = (-1.0_rk)**align
+      points(pointNumber, 2) = 0.0_rk
+      points(pointNumber, 3) = 0.0_rk
+    case default
+      call tem_abort( 'ERROR in create_surface_cheb_points_cube_1d:' &
+        & // ' unknown face direction'                               )
+    end select
+
+  end subroutine create_surface_lobattocheb_points_cube_1d
+  ! ************************************************************************ !
+
+
+end module ply_chebPoint_module
+
diff --git a/source/fpt/ply_legFpt_2D_module.fpp b/source/fpt/ply_legFpt_2D_module.fpp
index 747b2be..2be0ba9 100644
--- a/source/fpt/ply_legFpt_2D_module.fpp
+++ b/source/fpt/ply_legFpt_2D_module.fpp
@@ -83,6 +83,7 @@ contains
 
     allocate(alph(n**2))
 
+
     ! original layout (n = 3):
     !  1  2  3
     !  4  5  6
diff --git a/source/fpt/ply_legFpt_3D_module.fpp b/source/fpt/ply_legFpt_3D_module.fpp
index fab0d60..dd29a43 100644
--- a/source/fpt/ply_legFpt_3D_module.fpp
+++ b/source/fpt/ply_legFpt_3D_module.fpp
@@ -145,6 +145,7 @@ contains
       &                pntVal    = pntVal   )
     ! <<<<< Z-Direction <<<<< !
 
+
   end subroutine ply_legToPnt_3D_singVar
   ! ------------------------------------------------------------------------ !
 
@@ -266,6 +267,7 @@ contains
       &                pntVal    = alph       )
     ! <<<<< X-Direction <<<<< !
 
+
   end subroutine ply_pntToLeg_3D_singVar
   ! ------------------------------------------------------------------------ !
 
diff --git a/source/fpt/ply_legFpt_module.f90 b/source/fpt/ply_legFpt_module.f90
index 2f97f49..0120037 100644
--- a/source/fpt/ply_legFpt_module.f90
+++ b/source/fpt/ply_legFpt_module.f90
@@ -3,6 +3,7 @@
 ! Copyright (c) 2013-2014, 2017 Peter Vitt <peter.vitt2@uni-siegen.de>
 ! Copyright (c) 2013-2014 Verena Krupp
 ! Copyright (c) 2016 Langhammer Kay <kay.langhammer@student.uni-siegen.de>
+! Copyright (c) 2020 Daniel Fleischer <daniel.fleischer@student.uni-siegen.de>
 !
 ! Parts of this file were written by Jens Zudrop and Harald Klimach
 ! for German Research School for Simulation Sciences GmbH.
@@ -344,8 +345,10 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps )
     integer :: n
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
     n = fpt%legToChebParams%n
 
+    !$OMP DO
     do iDof = 1, nIndeps*n, n
       call ply_fpt_single( alph   = legCoeffs(iDof:iDof+n-1), &
         &                  gam    = cheb,                     &
@@ -360,6 +363,9 @@ subroutine ply_legToPnt_single( fpt, legCoeffs, pntVal, nIndeps )
         &                    cheb,                 &
         &                    pntVal(iDof:iDof+n-1) )
     end do
+    !$OMP END DO
+    
+    !$OMP END PARALLEL
 
   end subroutine ply_legToPnt_single
   ! ------------------------------------------------------------------------ !
@@ -417,8 +423,10 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps )
     integer :: n
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
     n = fpt%legToChebParams%n
 
+    !$OMP DO
     do iDof = 1, nIndeps*n, n
       call ply_fpt_single( alph   = legCoeffs(iDof:iDof+n-1), &
         &                  gam    = cheb,                     &
@@ -432,6 +440,9 @@ subroutine ply_legToPnt_lobatto_single( fpt, legCoeffs, pntVal, nIndeps )
         &                    cheb,                 &
         &                    pntVal(iDof:iDof+n-1) )
     end do
+    !$OMP END DO
+    
+    !$OMP END PARALLEL
 
   end subroutine ply_legToPnt_lobatto_single
   ! ------------------------------------------------------------------------ !
@@ -489,9 +500,12 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps )
     integer :: n
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
     n = fpt%legToChebParams%n
 
     normFactor = 1.0_rk / real(n,kind=rk)
+
+    !$OMP DO
     do iDof = 1, nIndeps*n, n
       call fftw_execute_r2r( fpt%planPntToCheb,     &
         &                    pntVal(iDof:iDof+n-1), &
@@ -506,6 +520,9 @@ subroutine ply_pntToLeg_single( fpt, pntVal, legCoeffs, nIndeps )
         &                  alph   = cheb,                     &
         &                  params = fpt%ChebToLegParams       )
     end do
+    !$OMP END DO
+
+    !$OMP END PARALLEL
 
   end subroutine ply_pntToLeg_single
   ! ------------------------------------------------------------------------ !
@@ -567,9 +584,12 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps )
     integer :: n
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(n, iDof, cheb)
     n = fpt%legToChebParams%n
 
     normFactor = 0.5_rk / real(n-1,kind=rk)
+
+    !$OMP DO
     do iDof = 1, nIndeps*n, n
       call fftw_execute_r2r( fpt%planPntToCheb,     &
         &                    pntVal(iDof:iDof+n-1), &
@@ -584,6 +604,9 @@ subroutine ply_pntToLeg_lobatto_single( fpt, pntVal, legCoeffs, nIndeps )
         &                  alph   = cheb,                     &
         &                  params = fpt%ChebToLegParams       )
     end do
+    !$OMP END DO
+    
+    !$OMP END PARALLEL
 
   end subroutine ply_pntToLeg_lobatto_single
   ! ------------------------------------------------------------------------ !
diff --git a/source/fpt/ply_polyBaseExc_module.fpp b/source/fpt/ply_polyBaseExc_module.fpp
index 1376b21..fc9e21e 100644
--- a/source/fpt/ply_polyBaseExc_module.fpp
+++ b/source/fpt/ply_polyBaseExc_module.fpp
@@ -1113,7 +1113,7 @@ contains
   !> Convert strip of coefficients of a modal representation in terms of
   !! Legendre polynomials to modal coefficients in terms of Chebyshev
   !! polynomials.
-  subroutine ply_fpt_single( alph, gam, params )
+  subroutine ply_fpt_single( alph, gam, params)
     ! -------------------------------------------------------------------- !
     !> The parameters of the fast polynomial transformation.
     type(ply_trafo_params_type), intent(inout) :: params
diff --git a/source/ply_LegPolyProjection_module.f90 b/source/ply_LegPolyProjection_module.f90
index 1f41f65..4438637 100644
--- a/source/ply_LegPolyProjection_module.f90
+++ b/source/ply_LegPolyProjection_module.f90
@@ -158,6 +158,7 @@ subroutine ply_QPolyProjection( subsamp, dofReduction, tree, meshData,   &
     real(kind=rk), allocatable :: newWorkDat(:)
     integer :: nChildDofs, oneDof
     ! -------------------------------------------------------------------- !
+
     if (subsamp%projectionType.ne.ply_QLegendrePoly_prp) then
       call tem_abort( 'ERROR in ply_QPolyProjection: subsampling is ' &
         & // 'only implemented for Q-Legendre-Polynomials'            )
@@ -281,6 +282,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, &
     real(kind=rk), allocatable :: projCoeffOneDim(:,:,:)
     real(kind=rk) :: dimexp
     ! -------------------------------------------------------------------- !
+
     select case(dofType)
     case(ply_QLegendrePoly_prp)
       allocate(projection%projCoeff(nDofs, nChildDofs, nChilds))
@@ -352,6 +354,7 @@ subroutine ply_initQLegProjCoeff( doftype, nDofs, ndims, nChilds, &
         & // 'for Q-Legendre polynomials'                                   )
     end select
     deallocate(projCoeffOneDim)
+
   end subroutine ply_initQLegProjCoeff
   ! ************************************************************************ !
 
@@ -595,6 +598,7 @@ subroutine ply_subsampleData( tree, meshData, nDofs, nChildDofs,          &
     integer :: oneDof, noChilds, childpos
     real(kind=rk), allocatable :: childData(:)
     ! -------------------------------------------------------------------- !
+
     nChilds = 2**ndims
     nElems = tree%nElems
     nElemsToRefine = count(new_refine_tree)
@@ -794,6 +798,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs,        &
     integer :: childDof_pos, parentDof_pos
     real(kind=rk) :: projCoeff
     ! -------------------------------------------------------------------- !
+
     childData(:) = 0.0_rk
 
     childLoop: do iChild = 1, nChilds
diff --git a/source/ply_fxt_module.f90 b/source/ply_fxt_module.f90
index 4bfe466..e9257fb 100644
--- a/source/ply_fxt_module.f90
+++ b/source/ply_fxt_module.f90
@@ -255,6 +255,7 @@ subroutine ply_fxt_n2m_2D( fxt, nodal_data, modal_data, oversamp_degree )
         &                   modal_data = nodal_data(lb:msq:oversamp_degree+1)  )
     end do
     modal_data = nodal_data
+
   end subroutine ply_fxt_n2m_2D
   ! ************************************************************************ !
 
diff --git a/source/ply_l2p_module.f90 b/source/ply_l2p_module.f90
index c2fb01f..409bb7e 100644
--- a/source/ply_l2p_module.f90
+++ b/source/ply_l2p_module.f90
@@ -245,30 +245,65 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix )
     ! integer, parameter :: vlen = nIndeps
     ! -------------------------------------------------------------------- !
 
+! Original version (for reference)
+!!    if (nDofs > 1) then
+!!
+!!      do iStrip=1,nIndeps,vlen
+!!
+!!        ! Calculate the upper bound of the current strip
+!!        strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1)
+!!
+!!        do iRow = 1, nDofs
+!!
+!!          do iCell = iStrip, strip_ub
+!!            projected(iCell, iRow) = 0.0_rk
+!!          end do
+!!
+!!          do iCol = 1, nDofs
+!!            mval =  matrix(iCol,iRow)
+!!            do iCell = iStrip, strip_ub
+!!              ! on SX-ACE, this can be identified as matrix multiplication
+!!              ! which results in VERY HIGH performance
+!!              projected(iCell, iRow) = projected(iCell, iRow) &
+!!                &                   + mval * original(iCol, iCell)
+!!            end do ! iCell
+!!          end do ! iCol = 1, nCols
+!!        end do ! iRow = 1, nRows
+!!      end do ! iStrip
+!!
+!!    else
+!!
+!!      projected = matrix(nDofs,1) * original
+!!
+!!    end if
+
     if (nDofs > 1) then
 
-      do iStrip=0,nIndeps-1,vlen
+      !$OMP PARALLEL DO DEFAULT(SHARED), &
+      !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval)
+      do iStrip=1,nIndeps,vlen
 
         ! Calculate the upper bound of the current strip
-        strip_ub = min(iStrip + vlen, nIndeps) - iStrip
+        strip_ub = iStrip-1 + min(vlen, nIndeps-iStrip+1)
 
         do iRow = 1, nDofs
 
-          do iCell = iStrip+1, iStrip+strip_ub
+          do iCell = iStrip, strip_ub
             projected(iCell, iRow) = 0.0_rk
           end do
+
           do iCol = 1, nDofs
             mval =  matrix(iCol,iRow)
-            do iCell = iStrip+1, iStrip+strip_ub
+            do iCell = iStrip, strip_ub
               ! on SX-ACE, this can be identified as matrix multiplication
               ! which results in VERY HIGH performance
               projected(iCell, iRow) = projected(iCell, iRow) &
                 &                   + mval * original(iCol, iCell)
             end do ! iCell
           end do ! iCol = 1, nCols
-
         end do ! iRow = 1, nRows
       end do ! iStrip
+      !$OMP END PARALLEL DO
 
     else
 
@@ -276,6 +311,31 @@ subroutine ply_l2_projection( nDofs, nIndeps, projected, original, matrix )
 
     end if
 
+
+! test-version of the loop (will be removed later)
+!!    if (nDofs > 1) then
+!!
+!!      projected(:, :) = 0.0_rk
+!!
+!!      !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(SHARED), &
+!!      !$OMP PRIVATE(iStrip, iRow, iCell, iCol, mval)
+!!      do iRow = 1, nDofs
+!!        do iCol = 1, nDofs
+!!          mval =  matrix(iCol,iRow)
+!!          do iStrip=1,nIndeps
+!!            projected(iStrip, iRow) = projected(iStrip, iRow) &
+!!              &                   + mval * original(iCol, iStrip)
+!!          end do
+!!        end do
+!!      end do
+!!      !$OMP END PARALLEL DO
+!!
+!!    else
+!!
+!!      projected = matrix(nDofs,1) * original
+!!
+!!    end if
+
   end subroutine ply_l2_projection
   ! ************************************************************************ !
 
diff --git a/source/ply_leg_diff_module.fpp b/source/ply_leg_diff_module.fpp
index 5bcb804..8f0a3e4 100644
--- a/source/ply_leg_diff_module.fpp
+++ b/source/ply_leg_diff_module.fpp
@@ -71,6 +71,10 @@ contains
     integer :: leg(3), iDeg, iDeg1, iDeg2, iDeg3, DV(3)
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), &
+    !$OMP PRIVATE(iVar,dofPos,dofPosPrev,dofPos2Prev,iDeg,iDeg1,iDeg2,iDeg3), &
+    !$OMP PRIVATE(leg,DV)
+
     if (present(dirVec)) then
       DV = dirvec
     else
@@ -83,6 +87,7 @@ contains
       endif
     endif
 
+    !$OMP DO
     do iDeg = 1, (mpd+1)**2
       iDeg1 = (iDeg-1)/(mpd+1) + 1      !! do IDeg1 = 1, mPd+1
       iDeg2 = iDeg - (iDeg1-1)*(mpd+1)  !! do IDeg2 = 1, mPd=1   !! iDeg2 = mod(iDeg-1,mpd+1)+1
@@ -141,8 +146,10 @@ contains
         end do
       end do
     end do
+    !$OMP END DO
 
     ! Scale the results due to the Jacobians of the mappings
+    !$OMP DO
     do dofpos=1,(mpd+1)**3
       ideg3 = (dofpos-1)/(mpd+1)**2 + 1
       iDeg = dofpos - (ideg3-1)*(mpd+1)**2
@@ -153,6 +160,9 @@ contains
         &                         * (2.0_rk/elemLength)         &
         &                         * (2.0_rk*leg(iDir) - 1.0_rk)
     end do
+    !$OMP END DO
+
+    !$OMP END PARALLEL
 
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ! Uncollapsed version of the scaling !
@@ -198,6 +208,9 @@ contains
     integer :: iDeg, iDeg1, iDeg2, iDeg3
     real(kind=rk) :: derivative((mpd+1)**2,mpd+1)
     ! -------------------------------------------------------------------- !
+    !$OMP PARALLEL DEFAULT(SHARED), &
+    !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3 ) &
+    !$OMP PRIVATE( derivative )
 
     varloop: do iVar = 1, nVars
 
@@ -205,6 +218,7 @@ contains
 
       iDeg1 = mpd
 
+      !$OMP DO
       do iDeg = 1, (mpd+1)**2
         iDeg3 = (iDeg-1)/(mpd+1) + 1
         iDeg2 = iDeg - (iDeg3-1)*(mpd+1)
@@ -212,7 +226,9 @@ contains
 ?? copy :: posOfModgCoeffQTens(iDeg1+1, iDeg2, iDeg3, mPd, dofpos )
         derivative(iDeg, mpd) = legCoeffs(dofpos,iVar)
       end do
+      !$OMP END DO
 
+      !$OMP DO
       do iDeg1 = mPd-1, 1, -1
 
         !NEC$ ivdep
@@ -226,8 +242,10 @@ contains
             &                       + legCoeffs(dofposprev, iVar)
         end do
       end do
+      !$OMP END DO
 
       ! Scale the results due to the Jacobians of the mappings
+      !$OMP DO
       do dofpos=1,(mpd+1)**3
         ideg3 = (dofpos-1)/(mpd+1)**2 + 1
         iDeg = dofpos - (ideg3-1)*(mpd+1)**2
@@ -237,8 +255,10 @@ contains
           &                          * (2.0_rk/elemLength)  &
           &                          * (2.0_rk*iDeg1 - 1.0_rk)
       end do
+      !$OMP END DO
 
     end do varloop
+    !$OMP END PARALLEL
 
   end subroutine ply_calcDiff_leg_x_vec
   ! ************************************************************************ !
@@ -266,6 +286,9 @@ contains
     integer :: iDeg, iDeg1, iDeg2, iDeg3
     real(kind=rk) :: derivative((mpd+1)**2,mpd+1)
     ! -------------------------------------------------------------------- !
+    !$OMP PARALLEL DEFAULT(SHARED), &
+    !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) &
+    !$OMP PRIVATE( derivative)
 
     varloop: do iVar = 1, nVars
 
@@ -273,6 +296,7 @@ contains
 
       iDeg2 = mpd
 
+      !$OMP DO
       do iDeg = 1, (mpd+1)**2
         iDeg3 = (iDeg-1)/(mpd+1) + 1
         iDeg1 = iDeg - (iDeg3-1)*(mpd+1)
@@ -280,7 +304,9 @@ contains
 ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2+1, iDeg3, mPd, dofpos )
         derivative(iDeg, mpd) = legCoeffs(dofpos,iVar)
       end do
+      !$OMP END DO
 
+      !$OMP DO
       do iDeg2 = mPd-1, 1, -1
 
         !NEC$ ivdep
@@ -294,8 +320,10 @@ contains
             &                       + legCoeffs(dofposprev, iVar)
         end do
       end do
+      !$OMP END DO
 
       ! Scale the results due to the Jacobians of the mappings
+      !$OMP DO
       do dofpos=1,(mpd+1)**3
         ideg3 = (dofpos-1)/(mpd+1)**2 + 1
         iDeg = dofpos - (ideg3-1)*(mpd+1)**2
@@ -305,8 +333,10 @@ contains
           &                          * (2.0_rk/elemLength)  &
           &                          * (2.0_rk*iDeg2 - 1.0_rk)
       end do
+      !$OMP END DO
 
     end do varloop
+    !$OMP END PARALLEL
 
   end subroutine ply_calcDiff_leg_y_vec
   ! ************************************************************************ !
@@ -335,12 +365,16 @@ contains
     real(kind=rk) :: derivative((mpd+1)**2,mpd+1)
     ! -------------------------------------------------------------------- !
 
+    !$OMP PARALLEL DEFAULT(SHARED), &
+    !$OMP PRIVATE( iVar, dofPos, dofPosPrev, iDeg, iDeg1, iDeg2, iDeg3) &
+    !$OMP PRIVATE( derivative)
     varloop: do iVar = 1, nVars
 
       derivative(:,mpd+1) = 0.0_rk
 
       iDeg3 = mpd
 
+      !$OMP DO
       do iDeg = 1, (mpd+1)**2
         iDeg2 = (iDeg-1)/(mpd+1) + 1
         iDeg1 = iDeg - (iDeg2-1)*(mpd+1)
@@ -348,7 +382,9 @@ contains
 ?? copy :: posOfModgCoeffQTens(iDeg1, iDeg2, iDeg3+1, mPd, dofpos )
         derivative(iDeg, mpd) = legCoeffs(dofpos,iVar)
       end do
+      !$OMP END DO
 
+      !$OMP DO
       do iDeg3 = mPd-1, 1, -1
 
         !NEC$ ivdep
@@ -362,8 +398,10 @@ contains
             &                       + legCoeffs(dofposprev, iVar)
         end do
       end do
+      !$OMP END DO
 
       ! Scale the results due to the Jacobians of the mappings
+      !$OMP DO
       do dofpos=1,(mpd+1)**3
         ideg3 = (dofpos-1)/(mpd+1)**2 + 1
         ideg = dofpos - (ideg3-1)*(mpd+1)**2
@@ -371,8 +409,10 @@ contains
           &                          * (2.0_rk/elemLength)  &
           &                          * (2.0_rk*iDeg3 - 1.0_rk)
       end do
+      !$OMP END DO
 
     end do varloop
+    !$OMP END PARALLEL
 
   end subroutine ply_calcDiff_leg_z_vec
   ! ************************************************************************ !
@@ -565,7 +605,7 @@ contains
           &    elemLength    = elemLength,              &
           &    dirvec        = dirvec(:,iDir),          &
           &    iDir          = iDir                     )
-      enddo
+      end do
     endif
 
   end subroutine ply_calcDiff_leg_2d
diff --git a/source/ply_modg_basis_module.fpp b/source/ply_modg_basis_module.fpp
index fef34be..d684aec 100644
--- a/source/ply_modg_basis_module.fpp
+++ b/source/ply_modg_basis_module.fpp
@@ -454,6 +454,7 @@ contains
     real(kind=rk) :: n_q
     ! -------------------------------------------------------------------- !
 
+
     ! allocate the output array
     select case(basisType)
       case(Q_space)
@@ -533,7 +534,6 @@ contains
         end do
     end select
 
-
   end subroutine ply_evalLegendreTensPoly
   ! ************************************************************************ !
 
diff --git a/source/ply_oversample_module.fpp b/source/ply_oversample_module.fpp
index 6935e9d..6c90724 100644
--- a/source/ply_oversample_module.fpp
+++ b/source/ply_oversample_module.fpp
@@ -4,6 +4,7 @@
 ! Copyright (c) 2014, 2017-2019 Harald Klimach <harald.klimach@uni-siegen.de>
 ! Copyright (c) 2014 Verena Krupp
 ! Copyright (c) 2016 Tobias Girresser <tobias.girresser@student.uni-siegen.de>
+! Copyright (c) 2020 Daniel Fleischer <daniel.fleischer@student.uni-siegen.de>
 !
 ! Parts of this file were written by Jens Zudrop, Nikhil Anand, Harald Klimach,
 ! Verena Krupp, Peter Vitt, and Tobias Girresser for University of Siegen.
@@ -169,6 +170,7 @@ contains
     integer :: maxorders
     integer :: ord_lim
     ! -------------------------------------------------------------------- !
+
     ! Information for the oversampling loop
     oversamp_degree = poly_proj%oversamp_degree
     mpd1 = poly_proj%min_degree + 1
@@ -184,6 +186,7 @@ contains
         varQ: do iVar=1,nScalars
           if (ensure_positivity(iVar)) then
             ordersum = 0.0_rk
+            !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd)
             do dof = 1, mpd1_cube
               iDegZ = (dof-1)/mpd1_square + 1
               iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1
@@ -191,6 +194,7 @@ contains
               iOrd = iDegX+iDegY+iDegZ-2
               ordersum(iOrd) = ordersum(iOrd) + abs(state(dof,iVar))
             end do
+            !$OMP END PARALLEL DO
             varsum = 0.0_rk
             do iOrd=2,ord_lim
               varsum = varsum + ordersum(iOrd)
@@ -202,6 +206,7 @@ contains
           end if
         end do varQ
         do iVar=1,nScalars
+          !$OMP PARALLEL DO PRIVATE(dof, iDegZ, iDegY, iDegX, iOrd, dofOverSamp)
           do dof = 1, mpd1_cube
             iDegZ = (dof-1)/mpd1_square + 1
             iDegY = (dof-1-(iDegZ-1)*mpd1_square)/mpd1+1
@@ -214,6 +219,7 @@ contains
               modalCoeffs(dofOverSamp,iVar) = state(dof,iVar)
             end if
           end do
+          !$OMP END PARALLEL DO
         end do
       else posQ
         if (oversamp_degree == poly_proj%min_degree) then
@@ -244,12 +250,14 @@ contains
             iDegY = 1
             iDegZ = 1
             ordersum = 0.0_rk
+            !$OMP PARALLEL DO PRIVATE(idof, iDegZ, iDegY, iDegX, iOrd)
             do idof = 1, poly_proj%body_3d%min_dofs
 ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof)
               iOrd = iDegX+iDegY+iDegZ-2
               ordersum(iOrd) = ordersum(iOrd) + abs(state(dof,iVar))
 ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ)
             end do
+            !$OMP END PARALLEL DO
             varsum = 0.0_rk
             do iOrd=2,ord_lim
               varsum = varsum + ordersum(iOrd)
@@ -328,10 +336,13 @@ contains
     mpd1_cube = mpd1**3
     nScalars = size(modalCoeffs,2)
 
+    !$OMP PARALLEL DEFAULT(SHARED), &
+    !$OMP PRIVATE(iVar, dof, idof, iDegZ, iDegY, iDegX, dofOverSamp)
     if (poly_proj%basisType == Q_Space) then
       if (oversamp_degree == poly_proj%min_degree) then
         state = modalCoeffs
       else
+        !$OMP DO
         do iVar=1,nScalars
           do dof = 1, mpd1_cube
             iDegZ = (dof-1)/mpd1_square + 1
@@ -343,6 +354,7 @@ contains
             state(dof,iVar) = modalCoeffs(dofOverSamp,iVar)
           end do
         end do
+        !$OMP END DO
       end if
 
     else !P_Space
@@ -350,7 +362,9 @@ contains
       iDegX = 1
       iDegY = 1
       iDegZ = 1
+
       p_mindofs = min(poly_proj%body_3d%min_dofs, (mpd1*(mpd1+1)*(mpd1+2))/6)
+      !$OMP DO
       do idof = 1, p_mindofs
 ?? copy :: posOfModgCoeffPTens(iDegX, iDegY, iDegZ, dof)
         dofOverSamp = iDegX + ( iDegY-1  &
@@ -359,7 +373,9 @@ contains
         state(dof,:) = modalCoeffs(dofOverSamp,:)
 ?? copy :: nextModgCoeffPTens(iDegX, iDegY, iDegZ)
       end do
+      !$OMP END DO
     end if
+    !$OMP END PARALLEL
 
   end subroutine ply_convertFromoversample_3d
   ! ************************************************************************ !
diff --git a/source/ply_poly_project_module.fpp b/source/ply_poly_project_module.fpp
index b5eb963..8fb0618 100644
--- a/source/ply_poly_project_module.fpp
+++ b/source/ply_poly_project_module.fpp
@@ -599,7 +599,6 @@ contains
       end select
 
     case ('fpt')
-
       select case (dim)
       case (3)
         call ply_LegToPnt_3D( fpt       = me%body_3d%fpt, &
diff --git a/source/ply_poly_transformation_module.f90 b/source/ply_poly_transformation_module.f90
index 3a1ff49..46bf8dc 100644
--- a/source/ply_poly_transformation_module.f90
+++ b/source/ply_poly_transformation_module.f90
@@ -212,6 +212,7 @@ subroutine ply_subsampleData( mesh, meshData, nDofs, nChildDofs,         &
     real(kind=rk), allocatable :: transform_matrix(:,:)
     real(kind=rk), allocatable :: childData(:)
     ! -------------------------------------------------------------------- !
+
     nChilds = 2**nDims
 
     max_modes = nint(real(nDofs, kind=rk)**(1.0_rk/real(nDims, kind=rk)))
@@ -421,6 +422,7 @@ subroutine ply_projDataToChild( parentData, nParentDofs, nChildDofs, &
     real(kind=rk), allocatable :: temp_data(:)
     real(kind=rk), allocatable :: childData_prev(:)
     ! -------------------------------------------------------------------- !
+
     parent_modes = nint(real(nParentDofs,kind=rk)        &
       &                 **(1/real(nDimensions,kind=rk)))
 
@@ -751,6 +753,7 @@ subroutine splitz()
       end do
     end subroutine splitz
 
+
   end subroutine ply_projDataToChild
   ! ************************************************************************ !
 
diff --git a/source/ply_sampling_module.fpp b/source/ply_sampling_module.fpp
index 8f44666..03caa3d 100644
--- a/source/ply_sampling_module.fpp
+++ b/source/ply_sampling_module.fpp
@@ -681,6 +681,7 @@ contains
     integer :: iElem
     integer :: nComps
     ! -------------------------------------------------------------------- !
+
     nComps = fun%nComponents
 
     call c_f_pointer(fun%method_data, p)
diff --git a/source/ply_sampling_varsys_module.f90 b/source/ply_sampling_varsys_module.f90
index 38c12da..177f38f 100644
--- a/source/ply_sampling_varsys_module.f90
+++ b/source/ply_sampling_varsys_module.f90
@@ -163,6 +163,7 @@ subroutine ply_sampling_varsys_for_track( varsys, trackInst, mesh, nDims, &
           &    nElems  = 1,                         &
           &    nDofs   = nDofs,                     &
           &    res     = elemdat                    )
+
         do iComponent=1,nComponents
           iTotComp = iScalar+iComponent-1
           var(iTotComp)%dat(lower_bound:upper_bound) &
diff --git a/source/ply_space_integration_module.f90 b/source/ply_space_integration_module.f90
index b5edb08..f6e2712 100644
--- a/source/ply_space_integration_module.f90
+++ b/source/ply_space_integration_module.f90
@@ -44,6 +44,15 @@ module ply_space_integration_module
 
   ! ------------------------------------------------------------------------ !
   !> Create Gauss-Legendre integration points and weights for one-dimensional
+
+
+
+
+
+
+
+
+
   !! integration on the interval [x1,x2].
   subroutine ply_gaussLegPoints( x1, x2, x, w, nIntP )
     ! -------------------------------------------------------------------- !
diff --git a/utests/ply_l2p_3D_performance_test.f90 b/utests/ply_l2p_3D_performance_test.f90
new file mode 100644
index 0000000..27e714d
--- /dev/null
+++ b/utests/ply_l2p_3D_performance_test.f90
@@ -0,0 +1,130 @@
+! Copyright (c) 2012, 2014 Jens Zudrop <j.zudrop@grs-sim.de>
+! Copyright (c) 2012-2016, 2018-2020 Harald Klimach <harald.klimach@uni-siegen.de>
+! Copyright (c) 2013-2014 Peter Vitt <peter.vitt2@uni-siegen.de>
+! Copyright (c) 2013-2014 Verena Krupp
+! Copyright (c) 2014 Nikhil Anand <nikhil.anand@uni-siegen.de>
+!
+! Parts of this file were written by Jens Zudrop and Harald Klimach for
+! German Research School for Simulation Sciences GmbH.
+!
+! Parts of this file were written by Harald Klimach, Peter Vitt, Verena Krupp,
+! and Nikhil Anand for University of Siegen.
+!
+! Permission to use, copy, modify, and distribute this software for any
+! purpose with or without fee is hereby granted, provided that the above
+! copyright notice and this permission notice appear in all copies.
+!
+! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+! **************************************************************************** !
+
+!> Unit test to check performance of l2p transformations.
+program ply_l2p_3D_performance_test
+  use mpi, only: mpi_wtime
+  use env_module,            only: rk, fin_env
+  use tem_logging_module,    only: logUnit, tem_logging_init_primary
+  use tem_general_module,    only: tem_general_type, tem_start
+  use ply_l2p_header_module, only: ply_l2p_header_type, ply_l2p_header_define
+  use ply_l2p_module,        only: ply_l2p_type, ply_init_l2p, ply_l2p_trafo_3D
+
+  !mpi!nprocs = 1
+
+  implicit none
+
+  integer :: iPower
+  integer, parameter :: maxpower = 7
+  real(kind=rk) :: res, newRes
+  type(tem_general_type) :: general
+
+  ! Init the Treelm environment, needed to init the log Unit
+  call tem_start(codeName = 'L2P 3D Performance Test', &
+    &            general  = general                    )
+  call tem_logging_init_primary( level = 1,               &
+    &                            rank = general%proc%rank )
+
+  res = 0.0_rk
+  do iPower = 1,maxpower
+    call ply_check_legToPnt_3D(iPower, newRes)
+    write(*,*) 'deviation:', newres
+    if (newRes.gt.res) then
+      res = newRes
+    end if
+  end do
+
+  write(*,*) 'Maximal deviation:', res
+  if (res <  1.e-08) then
+    write(logUnit(1),*) 'PASSED'
+  end if
+
+  call fin_env()
+
+contains
+
+  subroutine ply_check_legToPnt_3D(power,res)
+    integer, intent(in) :: power
+    real(kind=rk) :: res
+    integer :: maxPolyDegree, iVar, nVars
+    real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:)
+    real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:)
+    type(ply_l2p_header_type) :: header
+    type(ply_l2p_type) :: trafo
+    real(kind=rk) :: starttime, stoptime
+
+    ! Define the maximal polynomial degree we want to calculate the
+    ! bases exchange for.
+    maxPolyDegree =  2**power-1  ! maxPolyDegree+1 has to be a power of 2
+    nVars = 3
+    write(logUnit(10),*) '------------------------------------' // &
+      & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1
+    write(logUnit(10),*) '------------------------------------' // &
+      & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3
+
+    ! Create the Legendre expansion coefficients
+    allocate(legCoeffs((maxPolyDegree+1)**3,nVars))
+    allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars))
+    do iVar = 1, nVars
+      legCoeffs(:,iVar) = real(iVar, rk)
+    end do
+
+    ! Init the L2 Projection
+    call ply_l2p_header_define( me         = header,     &
+      &                         nodes_kind = 'chebyshev' )
+    call ply_init_l2p( degree = maxPolyDegree, &
+      &                l2p    = trafo,         &
+      &                header = header         )
+
+    ! now transform to the Chebyshev nodes
+    allocate(pntVal( (maxPolyDegree+1)**3, nVars ))
+    legCoeffsIn = legCoeffs
+    starttime = MPI_Wtime()
+    do iVar = 1, nVars
+      call ply_l2p_trafo_3D( trafo     = trafo%leg2node,      &
+        &                    original  = legCoeffsIn(:,iVar), &
+        &                    projected = pntVal(:,iVar)       )
+    end do
+    stoptime = MPI_Wtime()
+    write(*,*) 'Time for degree ', maxpolydegree, ' trafo:   ', &
+      &        stoptime - starttime
+
+    ! now transform back to Legendre coefficients
+    allocate(legVal( (maxPolyDegree+1)**3,nVars ))
+    starttime = MPI_Wtime()
+    do iVar = 1, nVars
+      call ply_l2p_trafo_3D( trafo     = trafo%node2leg, &
+        &                    projected = legVal(:,iVar), &
+        &                    original  = pntVal(:,iVar)  )
+    end do
+    stoptime = MPI_Wtime()
+    write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', &
+      &        stoptime - starttime
+  
+    res = maxval(abs(legVal(:,:) - legCoeffs(:,:)))
+
+  end subroutine ply_check_legToPnt_3D
+
+end program ply_l2p_3D_performance_test
diff --git a/utests/with_fftw/ply_fpt_3D_performance_test.f90 b/utests/with_fftw/ply_fpt_3D_performance_test.f90
new file mode 100644
index 0000000..1555f82
--- /dev/null
+++ b/utests/with_fftw/ply_fpt_3D_performance_test.f90
@@ -0,0 +1,128 @@
+! Copyright (c) 2012, 2014 Jens Zudrop <j.zudrop@grs-sim.de>
+! Copyright (c) 2012-2016, 2018-2020 Harald Klimach <harald.klimach@uni-siegen.de>
+! Copyright (c) 2013-2014 Peter Vitt <peter.vitt2@uni-siegen.de>
+! Copyright (c) 2013-2014 Verena Krupp
+! Copyright (c) 2014 Nikhil Anand <nikhil.anand@uni-siegen.de>
+!
+! Parts of this file were written by Jens Zudrop and Harald Klimach for
+! German Research School for Simulation Sciences GmbH.
+!
+! Parts of this file were written by Harald Klimach, Peter Vitt, Verena Krupp,
+! and Nikhil Anand for University of Siegen.
+!
+! Permission to use, copy, modify, and distribute this software for any
+! purpose with or without fee is hereby granted, provided that the above
+! copyright notice and this permission notice appear in all copies.
+!
+! THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+! WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+! MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+! ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+! WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+! ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+! OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+! **************************************************************************** !
+
+!> Unit test to check functionallity of fast polynomial transformations.
+!! \author{Jens Zudrop}
+program ply_fpt_3D_performance_test
+  use mpi, only: mpi_wtime
+  use env_module,            only: rk, fin_env
+  use tem_logging_module,    only: logUnit, tem_logging_init_primary
+  use tem_general_module,    only: tem_general_type, tem_start
+  use ply_fpt_header_module, only: ply_fpt_header_type, ply_fpt_header_define
+  use ply_legFpt_module,     only: ply_legFpt_type, ply_init_legFPT
+  use ply_legFpt_3D_module,  only: ply_legToPnt_3D, &
+    &                              ply_pntToLeg_3D
+
+  !mpi!nprocs = 1
+
+  implicit none
+
+  integer :: iPower
+  integer, parameter :: maxpower = 8
+  real(kind=rk) :: res, newRes
+  type(tem_general_type) :: general
+
+  ! Init the Treelm environment, needed to init the log Unit
+  call tem_start(codeName = 'FPT 3D Performance Test', &
+    &            general  = general                    )
+  call tem_logging_init_primary( level = 1,               &
+    &                            rank = general%proc%rank )
+
+  res = 0.0_rk
+  do iPower = 1,maxpower
+    call ply_check_legToPnt_3D(iPower, newRes)
+    if (newRes.gt.res) then
+      res = newRes
+    end if
+  end do
+
+  write(*,*) 'Maximal deviation:', res
+  if (res <  1.e-08) then
+    write(logUnit(1),*) 'PASSED'
+  end if
+
+  call fin_env()
+
+contains
+
+  subroutine ply_check_legToPnt_3D(power,res)
+    integer, intent(in) :: power
+    real(kind=rk) :: res
+    integer :: maxPolyDegree, iVar, nVars
+    real(kind=rk), allocatable :: legCoeffs(:,:), legCoeffsIn(:,:)
+    real(kind=rk), allocatable :: pntVal(:,:), legVal(:,:)
+    type(ply_fpt_header_type) :: header
+    type(ply_legFpt_type) :: fpt
+    real(kind=rk) :: starttime, stoptime
+
+    ! Define the maximal polynomial degree we want to calculate the
+    ! bases exchange for.
+    maxPolyDegree =  2**power-1  ! maxPolyDegree+1 has to be a power of 2
+    nVars = 3
+    write(logUnit(10),*) '------------------------------------' // &
+      & ' Number of Legendre coefficients (per dim): ', maxPolyDegree+1
+    write(logUnit(10),*) '------------------------------------' // &
+      & ' Number of Legendre coefficients (total): ',(maxPolyDegree+1)**3
+
+    ! Create the Legendre expansion coefficients
+    allocate(legCoeffs((maxPolyDegree+1)**3,nVars))
+    allocate(legCoeffsIn((maxPolyDegree+1)**3,nVars))
+    do iVar = 1, nVars
+      legCoeffs(:,iVar) = real(iVar, rk)
+    end do
+
+    ! Init the FPT
+    call ply_fpt_header_define( me = header )
+    call ply_init_legFpt( maxPolyDegree = maxPolyDegree,        &
+      &                   nIndeps       = (maxpolydegree+1)**2, &
+      &                   fpt           = fpt,                  &
+      &                   header        = header                )
+
+    ! now transform to the Chebyshev nodes
+    allocate(pntVal( (maxPolyDegree+1)**3, nVars ))
+    legCoeffsIn = legCoeffs
+    starttime = MPI_Wtime()
+    call ply_legToPnt_3D( fpt       = fpt,         &
+      &                   legCoeffs = legCoeffsIn, &
+      &                   pntVal    = pntVal,      &
+      &                   nVars     = nVars        )
+    stoptime = MPI_Wtime()
+    write(*,*) 'Time for degree ', maxpolydegree, ' trafo:   ', stoptime - starttime
+
+    ! now transform back to Legendre coefficients
+    allocate(legVal( (maxPolyDegree+1)**3,nVars ))
+    starttime = MPI_Wtime()
+    call ply_pntToLeg_3D( fpt       = fpt,    &
+      &                   pntVal    = pntVal, &
+      &                   legCoeffs = legVal, &
+      &                   nVars     = nVars   )
+    stoptime = MPI_Wtime()
+    write(*,*) 'Time for degree ', maxpolydegree, ' inverse: ', stoptime - starttime
+  
+    res = maxval(abs(legVal(:,:) - legCoeffs(:,:)))
+
+  end subroutine ply_check_legToPnt_3D
+
+end program ply_fpt_3D_performance_test