From 36544af79317646f3c2438e51451d49036f0c960 Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 16:06:15 -0300 Subject: [PATCH 1/2] FORCING: Parallelize get_vcoefs() using OpenMP. Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 56 ++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index 7404c23..7f2c125 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1699,42 +1699,48 @@ module forcing ! subroutine get_vcoefs() - use blocks , only : block_data, list_data + use blocks , only : block_data, data_blocks, get_dblocks #ifdef MPI use mpitools, only : reduce_sum #endif /* MPI */ implicit none + integer :: m, n, nt + type(block_data), pointer :: pdata + complex(kind=8), dimension(:,:,:), allocatable :: vc + +!$ integer :: omp_get_num_threads, omp_get_thread_num + !------------------------------------------------------------------------------- ! -! reset vcoefs -! - vcoefs(:,:) = cmplx(0.0d+00, 0.0d+00, kind=8) + nt = 0 +!$omp parallel +!$ nt = omp_get_num_threads() - 1 +!$omp end parallel + allocate(vc(nmodes, NDIMS, 0:nt)) -! assign pdata with the first block on the data block list -! - pdata => list_data + vc(:,:,:) = cmplx(0.0d+00, 0.0d+00, kind=8) -! iterate over all data blocks -! - do while (associated(pdata)) + n = get_dblocks() +!$omp parallel default(shared) private(pdata,nt) +!$ nt = omp_get_thread_num() +!$omp do + do m = 1, n + pdata => data_blocks(m)%ptr -! get contribution of velocity coefficients from the current block -! - call get_vcoefs_block(pdata) + call get_vcoefs_block(pdata, vc(:,:,nt)) + end do +!$omp end do +!$omp end parallel -! assign pdata to the next block -! - pdata => pdata%next + vcoefs = sum(vc, 3) - end do ! over data blocks + deallocate(vc) #ifdef MPI -! reduce velocity coefficients over all processes -! call reduce_sum(vcoefs) #endif /* MPI */ @@ -1752,10 +1758,11 @@ module forcing ! Arguments: ! ! pdata - a pointer to the data block; +! vc - an array for the velocity Fourier coefficients; ! !=============================================================================== ! - subroutine get_vcoefs_block(pdata) + subroutine get_vcoefs_block(pdata, vc) use blocks , only : block_data use constants , only : pi2 @@ -1771,7 +1778,8 @@ module forcing implicit none - type(block_data), pointer, intent(inout) :: pdata + type(block_data), pointer , intent(inout) :: pdata + complex(kind=8), dimension(:,:), intent(inout) :: vc integer :: i, j, k, l real(kind=8) :: cs, sn, dvol @@ -1837,10 +1845,10 @@ module forcing cf = cmplx(cs, sn, kind=8) * dvol - vcoefs(l,1) = vcoefs(l,1) + pdata%q(ivx,i,j,k) * cf - vcoefs(l,2) = vcoefs(l,2) + pdata%q(ivy,i,j,k) * cf + vc(l,1) = vc(l,1) + pdata%q(ivx,i,j,k) * cf + vc(l,2) = vc(l,2) + pdata%q(ivy,i,j,k) * cf #if NDIMS == 3 - vcoefs(l,3) = vcoefs(l,3) + pdata%q(ivz,i,j,k) * cf + vc(l,3) = vc(l,3) + pdata%q(ivz,i,j,k) * cf #endif /* NDIMS == 3 */ end do From 89be37c8b51639d46a244da30f18a65f29353bbd Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 16:07:04 -0300 Subject: [PATCH 2/2] FORCING: Parallelize inject_fmodes() using OpenMP. Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index 7f2c125..e19b09b 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1420,33 +1420,28 @@ module forcing ! subroutine inject_fmodes(dt) - use blocks, only : block_data, list_data + use blocks, only : block_data, data_blocks, get_dblocks implicit none real(kind=8), intent(in) :: dt + integer :: m, n + type(block_data), pointer :: pdata !------------------------------------------------------------------------------- ! -! assign pdata with the first block on the data block list -! - pdata => list_data + n = get_dblocks() -! iterate over all data blocks -! - do while (associated(pdata)) +!$omp parallel do default(shared) private(pdata) + do m = 1, n + pdata => data_blocks(m)%ptr -! inject eddy into the current block -! call inject_fmodes_block(pdata, dt) -! assign pdata to the next block -! - pdata => pdata%next - - end do ! over data blocks + end do +!$omp end parallel do !------------------------------------------------------------------------------- !