From 36544af79317646f3c2438e51451d49036f0c960 Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 16:06:15 -0300 Subject: [PATCH 1/5] FORCING: Parallelize get_vcoefs() using OpenMP. Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 56 ++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index 7404c23..7f2c125 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1699,42 +1699,48 @@ module forcing ! subroutine get_vcoefs() - use blocks , only : block_data, list_data + use blocks , only : block_data, data_blocks, get_dblocks #ifdef MPI use mpitools, only : reduce_sum #endif /* MPI */ implicit none + integer :: m, n, nt + type(block_data), pointer :: pdata + complex(kind=8), dimension(:,:,:), allocatable :: vc + +!$ integer :: omp_get_num_threads, omp_get_thread_num + !------------------------------------------------------------------------------- ! -! reset vcoefs -! - vcoefs(:,:) = cmplx(0.0d+00, 0.0d+00, kind=8) + nt = 0 +!$omp parallel +!$ nt = omp_get_num_threads() - 1 +!$omp end parallel + allocate(vc(nmodes, NDIMS, 0:nt)) -! assign pdata with the first block on the data block list -! - pdata => list_data + vc(:,:,:) = cmplx(0.0d+00, 0.0d+00, kind=8) -! iterate over all data blocks -! - do while (associated(pdata)) + n = get_dblocks() +!$omp parallel default(shared) private(pdata,nt) +!$ nt = omp_get_thread_num() +!$omp do + do m = 1, n + pdata => data_blocks(m)%ptr -! get contribution of velocity coefficients from the current block -! - call get_vcoefs_block(pdata) + call get_vcoefs_block(pdata, vc(:,:,nt)) + end do +!$omp end do +!$omp end parallel -! assign pdata to the next block -! - pdata => pdata%next + vcoefs = sum(vc, 3) - end do ! over data blocks + deallocate(vc) #ifdef MPI -! reduce velocity coefficients over all processes -! call reduce_sum(vcoefs) #endif /* MPI */ @@ -1752,10 +1758,11 @@ module forcing ! Arguments: ! ! pdata - a pointer to the data block; +! vc - an array for the velocity Fourier coefficients; ! !=============================================================================== ! - subroutine get_vcoefs_block(pdata) + subroutine get_vcoefs_block(pdata, vc) use blocks , only : block_data use constants , only : pi2 @@ -1771,7 +1778,8 @@ module forcing implicit none - type(block_data), pointer, intent(inout) :: pdata + type(block_data), pointer , intent(inout) :: pdata + complex(kind=8), dimension(:,:), intent(inout) :: vc integer :: i, j, k, l real(kind=8) :: cs, sn, dvol @@ -1837,10 +1845,10 @@ module forcing cf = cmplx(cs, sn, kind=8) * dvol - vcoefs(l,1) = vcoefs(l,1) + pdata%q(ivx,i,j,k) * cf - vcoefs(l,2) = vcoefs(l,2) + pdata%q(ivy,i,j,k) * cf + vc(l,1) = vc(l,1) + pdata%q(ivx,i,j,k) * cf + vc(l,2) = vc(l,2) + pdata%q(ivy,i,j,k) * cf #if NDIMS == 3 - vcoefs(l,3) = vcoefs(l,3) + pdata%q(ivz,i,j,k) * cf + vc(l,3) = vc(l,3) + pdata%q(ivz,i,j,k) * cf #endif /* NDIMS == 3 */ end do From 89be37c8b51639d46a244da30f18a65f29353bbd Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 16:07:04 -0300 Subject: [PATCH 2/5] FORCING: Parallelize inject_fmodes() using OpenMP. Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index 7f2c125..e19b09b 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1420,33 +1420,28 @@ module forcing ! subroutine inject_fmodes(dt) - use blocks, only : block_data, list_data + use blocks, only : block_data, data_blocks, get_dblocks implicit none real(kind=8), intent(in) :: dt + integer :: m, n + type(block_data), pointer :: pdata !------------------------------------------------------------------------------- ! -! assign pdata with the first block on the data block list -! - pdata => list_data + n = get_dblocks() -! iterate over all data blocks -! - do while (associated(pdata)) +!$omp parallel do default(shared) private(pdata) + do m = 1, n + pdata => data_blocks(m)%ptr -! inject eddy into the current block -! call inject_fmodes_block(pdata, dt) -! assign pdata to the next block -! - pdata => pdata%next - - end do ! over data blocks + end do +!$omp end parallel do !------------------------------------------------------------------------------- ! From 9ee8c0a2ae530a6090ef5e4c5f10b347912cedc1 Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 18:10:12 -0300 Subject: [PATCH 3/5] FORCING: Slightly rewrite the parallelization of get_vcoefs(). Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index e19b09b..820411d 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1695,45 +1695,50 @@ module forcing subroutine get_vcoefs() use blocks , only : block_data, data_blocks, get_dblocks + use helpers , only : print_message #ifdef MPI use mpitools, only : reduce_sum #endif /* MPI */ implicit none - integer :: m, n, nt + integer :: m, n, status type(block_data), pointer :: pdata complex(kind=8), dimension(:,:,:), allocatable :: vc -!$ integer :: omp_get_num_threads, omp_get_thread_num + character(len=*), parameter :: loc = 'FORCING:get_vcoefs()' !------------------------------------------------------------------------------- ! - nt = 0 -!$omp parallel -!$ nt = omp_get_num_threads() - 1 -!$omp end parallel - allocate(vc(nmodes, NDIMS, 0:nt)) + n = get_dblocks() + + allocate(vc(nmodes, NDIMS, n), stat=status) + if (status /= 0) then + call print_message(loc, & + "Could not allocate memory for the Fourier coefficients!") + go to 100 + end if vc(:,:,:) = cmplx(0.0d+00, 0.0d+00, kind=8) - n = get_dblocks() -!$omp parallel default(shared) private(pdata,nt) -!$ nt = omp_get_thread_num() -!$omp do +!$omp parallel do default(shared) private(pdata) do m = 1, n pdata => data_blocks(m)%ptr - call get_vcoefs_block(pdata, vc(:,:,nt)) + call get_vcoefs_block(pdata, vc(:,:,m)) end do -!$omp end do -!$omp end parallel +!$omp end parallel do vcoefs = sum(vc, 3) - deallocate(vc) + deallocate(vc, stat=status) + if (status /= 0) & + call print_message(loc, & + "Could not release memory of the Fourier coefficients!") + + 100 continue #ifdef MPI call reduce_sum(vcoefs) From b28a42427c76a38366abd96a7c15c0ff39f4c9a4 Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 18:13:22 -0300 Subject: [PATCH 4/5] FORCING: Parallelize inject_eddy() using OpenMP. Signed-off-by: Grzegorz Kowal --- sources/forcing.F90 | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sources/forcing.F90 b/sources/forcing.F90 index 820411d..e93e7c3 100644 --- a/sources/forcing.F90 +++ b/sources/forcing.F90 @@ -1212,33 +1212,28 @@ module forcing ! subroutine inject_eddy(xp, ap) - use blocks, only : block_data, list_data + use blocks, only : block_data, data_blocks, get_dblocks implicit none real(kind=8), dimension(3), intent(in) :: xp, ap + integer :: m, n + type(block_data), pointer :: pdata !------------------------------------------------------------------------------- ! -! assign pdata with the first block on the data block list -! - pdata => list_data + n = get_dblocks() -! iterate over all data blocks -! - do while (associated(pdata)) +!$omp parallel do default(shared) private(pdata) + do m = 1, n + pdata => data_blocks(m)%ptr -! inject eddy into the current block -! call inject_eddy_block(pdata, xp(:), ap(:)) -! assign pdata to the next block -! - pdata => pdata%next - - end do ! over data blocks + end do +!$omp end parallel do !------------------------------------------------------------------------------- ! From 0a20337054e7942f1965280891f8ad19ec0c5d3a Mon Sep 17 00:00:00 2001 From: Grzegorz Kowal Date: Fri, 25 Nov 2022 18:16:02 -0300 Subject: [PATCH 5/5] EVOLUTION: Remove debug check for NaNs. Signed-off-by: Grzegorz Kowal --- sources/evolution.F90 | 73 ------------------------------------------- 1 file changed, 73 deletions(-) diff --git a/sources/evolution.F90 b/sources/evolution.F90 index 1bc0460..a7245fa 100644 --- a/sources/evolution.F90 +++ b/sources/evolution.F90 @@ -1135,12 +1135,6 @@ module evolution end if ! toplev > 1 -#ifdef DEBUG -! check variables for NaNs -! - call check_variables() -#endif /* DEBUG */ - ! error entry point ! 100 continue @@ -4319,73 +4313,6 @@ module evolution !------------------------------------------------------------------------------- ! end subroutine update_errors_max -#ifdef DEBUG -! -!=============================================================================== -! -! subroutine CHECK_VARIABLES: -! -------------------------- -! -! Subroutine iterates over all data blocks and converts the conservative -! variables to their primitive representation. -! -! -!=============================================================================== -! - subroutine check_variables() - - use coordinates , only : nn => bcells - use equations , only : nv, pvars, cvars - use ieee_arithmetic, only : ieee_is_nan - - use blocks , only : block_meta - use blocks , only : block_data, list_data - - implicit none - - integer :: i, j, k, p - - type(block_meta), pointer :: pmeta - type(block_data), pointer :: pdata - -!------------------------------------------------------------------------------- -! -#if NDIMS == 2 - k = 1 -#endif /* NDIMS == 2 */ - - pdata => list_data - - do while (associated(pdata)) - - pmeta => pdata%meta - -#if NDIMS == 3 - do k = 1, nn -#endif /* NDIMS == 3 */ - do j = 1, nn - do i = 1, nn - do p = 1, nv - if (ieee_is_nan(pdata%u(p,i,j,k))) then - print *, 'U NaN:', cvars(p), pdata%meta%id, i, j, k - end if - if (ieee_is_nan(pdata%q(p,i,j,k))) then - print *, 'Q NaN:', pvars(p), pdata%meta%id, i, j, k - end if - end do ! p = 1, nv - end do ! i = 1, nn - end do ! j = 1, nn -#if NDIMS == 3 - end do ! k = 1, nn -#endif /* NDIMS == 3 */ - - pdata => pdata%next - end do - -!------------------------------------------------------------------------------- -! - end subroutine check_variables -#endif /* DEBUG */ !=============================================================================== !