Tests were made to find the best way to formulate the computation of the parallel gradient.
- This full explicit loop over all indices does not provide the best computation time for the gradients
SUBROUTINE compute_gradients_z
IMPLICIT NONE
INTEGER :: eo, p_int, j_int, ia,ip,ij,iky,ikx,iz,izi
COMPLEX(dp), DIMENSION(local_nz+ngz) :: f_in
COMPLEX(dp), DIMENSION(local_nz) :: f_out
! Compute z first derivative
DO iz=1,local_nz+ngz
izi = iz+ngz/2
DO ikx=1,local_nkx
DO iky=1,local_nky
DO ij=1,local_nj+ngj
DO ip=1,local_np+ngp
DO ia = 1,local_na
ddz_napj(ia,ip,ij,iky,ikx,iz) = inv_deltaz *(&
+onetwelfth*nadiab_moments(ia,ip,ij,iky,ikx,izi-2)&
-twothird*nadiab_moments(ia,ip,ij,iky,ikx,izi-1)&
+twothird*nadiab_moments(ia,ip,ij,iky,ikx,izi+1)&
-onetwelfth*nadiab_moments(ia,ip,ij,iky,ikx,izi-2)&
)
ddzND_Napj(ia,ip,ij,iky,ikx,iz) = inv_deltaz**4 *(&
+1._dp*moments(ia,ip,ij,iky,ikx,izi-2,updatetlevel)&
-4._dp*moments(ia,ip,ij,iky,ikx,izi-1,updatetlevel)&
+6._dp*moments(ia,ip,ij,iky,ikx,izi ,updatetlevel)&
-4._dp*moments(ia,ip,ij,iky,ikx,izi+1,updatetlevel)&
+1._dp*moments(ia,ip,ij,iky,ikx,izi-2,updatetlevel)&
)
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
END SUBROUTINE compute_gradients_z
-
The best version actually tested is a version that feeds z-slices to a subroutine. The gain in performances is light (about 8%) on a dekstop machine. The differences may be larger in Marconi for example.
SUBROUTINE compute_gradients_z IMPLICIT NONE INTEGER :: eo, p_int, j_int, ia,ip,ij,iky,ikx,iz COMPLEX(dp), DIMENSION(local_nz+ngz) :: f_in COMPLEX(dp), DIMENSION(local_nz) :: f_out DO ikx = 1,local_nkx DO iky = 1,local_nky DO ij = 1,local_nj+ngj DO ip = 1,local_np+ngp DO ia = 1,local_na IF(nzgrid .GT. 1) THEN p_int = parray(ip+ngp/2) eo = MODULO(p_int,2)+1 ! Indicates if we are on even or odd z grid ELSE eo = 0 ENDIF ! Compute z first derivative f_in = nadiab_moments(ia,ip,ij,iky,ikx,:) CALL grad_z(eo,local_nz,ngz,inv_deltaz,f_in,f_out) ddz_napj(ia,ip,ij,iky,ikx,:) = f_out(:) ! Parallel numerical diffusion IF (HDz_h) THEN f_in = nadiab_moments(ia,ip,ij,iky,ikx,:) ELSE f_in = moments(ia,ip,ij,iky,ikx,:,updatetlevel) ENDIF CALL grad_z4(local_nz,ngz,inv_deltaz,f_in,f_out) ! get output DO iz = 1,local_nz ddzND_Napj(ia,ip,ij,iky,ikx,iz) = f_out(iz) ENDDO ENDDO ENDDO ENDDO ENDDO ENDDO END SUBROUTINE compute_gradients_z