diff --git a/Makefile b/Makefile index 347e6fd381723b6e6219d5e0562a5f3a6e55a88b..659e365b48c7eb81621cac0b5a29b56a18a26749 100644 --- a/Makefile +++ b/Makefile @@ -4,35 +4,42 @@ include local/make.inc # Standard version with optimized compilation (ifort compiler) all: F90 = mpif90 all: F90FLAGS = -O3 -xHOST -all: EXEC = $(BINDIR)/gyacomo +all: EXEC = $(BINDIR)/gyacomo23_dp all: dirs src/srcinfo.h all: compile +# Single precision version +sp: F90 = mpif90 +sp: F90FLAGS = -DSINGLE_PRECISION -O3 -xHOST +sp: EXEC = $(BINDIR)/gyacomo23_sp +sp: dirs src/srcinfo.h +sp: compile + # Fast compilation fast: F90 = mpif90 fast: F90FLAGS = -fast -fast: EXEC = $(BINDIR)/gyacomo_fast +fast: EXEC = $(BINDIR)/gyacomo23_fast fast: dirs src/srcinfo.h fast: compile # Debug version with all flags debug: F90 = mpif90 debug: F90FLAGS = -C -g -traceback -ftrapuv -warn all -debug all -debug: EXEC = $(BINDIR)/gyacomo_debug +debug: EXEC = $(BINDIR)/gyacomo23_debug debug: dirs src/srcinfo.h debug: compile # For compiling on marconi marconi: F90 = mpiifort marconi: F90FLAGS = -O3 -xHOST -marconi: EXEC = $(BINDIR)/gyacomo +marconi: EXEC = $(BINDIR)/gyacomo23_dp marconi: dirs src/srcinfo.h marconi: compile # For compiling on daint daint: F90 = ftn daint: F90FLAGS = -O3 -daint: EXEC = $(BINDIR)/gyacomo +daint: EXEC = $(BINDIR)/gyacomo23_dp daint: dirs src/srcinfo.h daint: compile @@ -40,7 +47,7 @@ daint: compile gopt: F90 = mpif90 gopt: F90FLAGS = -O3 -std=legacy -ffree-line-length-0 gopt: EXTMOD = -J $(MODDIR) -gopt: EXEC = $(BINDIR)/gyacomo +gopt: EXEC = $(BINDIR)/gyacomo23_dp gopt: dirs src/srcinfo.h gopt: compile @@ -48,7 +55,7 @@ gopt: compile gdebug: F90 = mpif90 gdebug: F90FLAGS = -C -g -std=legacy -ffree-line-length-0 gdebug: EXTMOD = -J $(MODDIR) -gdebug: EXEC = $(BINDIR)/gyacomo_debug +gdebug: EXEC = $(BINDIR)/gyacomo23_debug gdebug: dirs src/srcinfo.h gdebug: compile diff --git a/local/make.inc b/local/make.inc index fa747c0395c4390b219ea80d626024de53936c60..8be74cc8fb29cf9f7dc7c317301caccf477f616c 100644 --- a/local/make.inc +++ b/local/make.inc @@ -71,6 +71,8 @@ LIBS += -lfm # Add FFTW3 local lib ifdef FFTW3DIR LIBS += -lfftw3 -lfftw3_mpi + # single_precision fftw + LIBS += -lfftw3f -lfftw3f_mpi LDIRS += -L$(FFTW3DIR)/lib IDIRS += -I$(FFTW3DIR)/include endif diff --git a/src/diagnose.F90 b/src/diagnose.F90 index 3cf0cb9e3ace35a204b5d523240b5794c4e1627e..9527819e52c08427197145e1369421f28937d344 100644 --- a/src/diagnose.F90 +++ b/src/diagnose.F90 @@ -132,7 +132,7 @@ SUBROUTINE diagnose_full(kstep) CALL putarr(fidres, "/data/grid/coordkx", kxarray_full, "kx*rho_s0", ionode=0) CALL putarr(fidres, "/data/grid/coordky", kyarray_full, "ky*rho_s0", ionode=0) CALL putarr(fidres, "/data/grid/coordz", zarray_full, "z/R", ionode=0) - CALL putarr(fidres, "/data/grid/coorxp" , parray_full, "p", ionode=0) + CALL putarr(fidres, "/data/grid/coordp" , parray_full, "p", ionode=0) CALL putarr(fidres, "/data/grid/coordj" , jarray_full, "j", ionode=0) ! Metric info CALL creatg(fidres, "/data/metric", "Metric data") @@ -274,22 +274,22 @@ SUBROUTINE diagnose_0d CHARACTER :: letter_a INTEGER :: ia ! Time measurement data - CALL append(fidres, "/profiler/Tc_rhs", chrono_mrhs%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_adv_field", chrono_advf%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_clos", chrono_clos%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_ghost", chrono_ghst%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_coll", chrono_coll%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_poisson", chrono_pois%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_Sapj", chrono_sapj%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_checkfield",chrono_chck%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_diag", chrono_diag%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_grad", chrono_grad%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_nadiab", chrono_napj%ttot,ionode=0) - CALL append(fidres, "/profiler/Tc_step", chrono_step%ttot,ionode=0) - CALL append(fidres, "/profiler/time", time,ionode=0) + CALL append(fidres, "/profiler/Tc_rhs", REAL(chrono_mrhs%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_adv_field", REAL(chrono_advf%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_clos", REAL(chrono_clos%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_ghost", REAL(chrono_ghst%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_coll", REAL(chrono_coll%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_poisson", REAL(chrono_pois%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_Sapj", REAL(chrono_sapj%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_checkfield",REAL(chrono_chck%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_diag", REAL(chrono_diag%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_grad", REAL(chrono_grad%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_nadiab", REAL(chrono_napj%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/Tc_step", REAL(chrono_step%ttot,dp),ionode=0) + CALL append(fidres, "/profiler/time", REAL(time,dp),ionode=0) ! Processing data - CALL append(fidres, "/data/var0d/time", time,ionode=0) - CALL append(fidres, "/data/var0d/cstep", real(cstep,xp),ionode=0) + CALL append(fidres, "/data/var0d/time", REAL(time,dp),ionode=0) + CALL append(fidres, "/data/var0d/cstep", real(cstep,dp),ionode=0) CALL getatt(fidres, "/data/var0d/", "frames",iframe0d) iframe0d=iframe0d+1 CALL attach(fidres,"/data/var0d/" , "frames", iframe0d) @@ -298,15 +298,15 @@ SUBROUTINE diagnose_0d CALL compute_radial_transport DO ia=1,Na letter_a = name(ia)(1:1) - CALL append(fidres, "/data/var0d/gflux_x"//letter_a,gflux_x(ia),ionode=0) - CALL append(fidres, "/data/var0d/pflux_x"//letter_a,pflux_x(ia),ionode=0) + CALL append(fidres, "/data/var0d/gflux_x"//letter_a,REAL(gflux_x(ia),dp),ionode=0) + CALL append(fidres, "/data/var0d/pflux_x"//letter_a,REAL(pflux_x(ia),dp),ionode=0) ENDDO ENDIF IF (write_hf) THEN CALL compute_radial_heatflux DO ia=1,Na letter_a = name(ia)(1:1) - CALL append(fidres, "/data/var0d/hflux_x"//letter_a,hflux_x(ia),ionode=0) + CALL append(fidres, "/data/var0d/hflux_x"//letter_a,REAL(hflux_x(ia),dp),ionode=0) ENDDO ENDIF END SUBROUTINE diagnose_0d @@ -334,8 +334,8 @@ SUBROUTINE diagnose_3d COMPLEX(xp), DIMENSION(local_nky,local_nkx,local_nz) :: fmom COMPLEX(xp), DIMENSION(local_np, local_nj, local_nz) :: Napjz_ ! add current time, cstep and frame - CALL append(fidres, "/data/var3d/time", time,ionode=0) - CALL append(fidres, "/data/var3d/cstep", real(cstep,xp),ionode=0) + CALL append(fidres, "/data/var3d/time", REAL(time,dp),ionode=0) + CALL append(fidres, "/data/var3d/cstep", real(cstep,dp),ionode=0) CALL getatt(fidres, "/data/var3d/", "frames",iframe3d) iframe3d=iframe3d+1 CALL attach(fidres,"/data/var3d/" , "frames", iframe3d) @@ -434,11 +434,11 @@ SUBROUTINE diagnose_5d ngp, ngj, ngz, total_na USE time_integration, ONLY: updatetlevel, ntimelevel USE diagnostics_par - USE prec_const, ONLY: xp + USE prec_const, ONLY: xp,dp IMPLICIT NONE - CALL append(fidres, "/data/var5d/time", time,ionode=0) - CALL append(fidres, "/data/var5d/cstep", real(cstep,xp),ionode=0) + CALL append(fidres, "/data/var5d/time", REAL(time,dp),ionode=0) + CALL append(fidres, "/data/var5d/cstep", REAL(cstep,dp),ionode=0) CALL getatt(fidres, "/data/var5d/", "frames",iframe5d) iframe5d=iframe5d+1 CALL attach(fidres,"/data/var5d/" , "frames", iframe5d) diff --git a/src/fourier_mod.F90 b/src/fourier_mod.F90 index 28acf05fc33417b874ce908f53e0a010e8540699..594a33c1d105688de72950776fd3177775d01aab 100644 --- a/src/fourier_mod.F90 +++ b/src/fourier_mod.F90 @@ -10,9 +10,8 @@ MODULE fourier PRIVATE PUBLIC :: init_grid_distr_and_plans, poisson_bracket_and_sum, finalize_plans - - real(C_DOUBLE), pointer, PUBLIC :: real_data_f(:,:), real_data_g(:,:), bracket_sum_r(:,:) - complex(C_DOUBLE_complex), pointer, PUBLIC :: cmpx_data_f(:,:), cmpx_data_g(:,:), bracket_sum_c(:,:) + real (c_xp_r), pointer, PUBLIC :: real_data_f(:,:), real_data_g(:,:), bracket_sum_r(:,:) + complex(c_xp_c), pointer, PUBLIC :: cmpx_data_f(:,:), cmpx_data_g(:,:), bracket_sum_c(:,:) type(C_PTR) :: cdatar_f, cdatar_g, cdatar_c type(C_PTR) :: cdatac_f, cdatac_g, cdatac_c type(C_PTR) , PUBLIC :: planf, planb @@ -34,12 +33,22 @@ MODULE fourier NY_halved = NY_/2 + 1 !! Complex arrays F, G ! Compute the room to allocate +#ifdef SINGLE_PRECISION + alloc_local_1 = fftwf_mpi_local_size_2d(NY_halved, NX_, communicator, local_nky_ptr, local_nky_ptr_offset) +#else alloc_local_1 = fftw_mpi_local_size_2d(NY_halved, NX_, communicator, local_nky_ptr, local_nky_ptr_offset) - ! Initalize pointers to this room +#endif + ! Initalize pointers to this room +#ifdef SINGLE_PRECISION + cdatac_f = fftwf_alloc_complex(alloc_local_1) + cdatac_g = fftwf_alloc_complex(alloc_local_1) + cdatac_c = fftwf_alloc_complex(alloc_local_1) +#else cdatac_f = fftw_alloc_complex(alloc_local_1) cdatac_g = fftw_alloc_complex(alloc_local_1) cdatac_c = fftw_alloc_complex(alloc_local_1) - ! Initalize the arrays with the rooms pointed +#endif + ! Initalize the arrays with the rooms pointed call c_f_pointer(cdatac_f, cmpx_data_f, [NX_ ,local_nky_ptr]) call c_f_pointer(cdatac_g, cmpx_data_g, [NX_ ,local_nky_ptr]) call c_f_pointer(cdatac_c, bracket_sum_c, [NX_ ,local_nky_ptr]) @@ -48,17 +57,29 @@ MODULE fourier ! Compute the room to allocate alloc_local_2 = fftw_mpi_local_size_2d(NX_, NY_halved, communicator, local_nkx_ptr, local_nkx_ptr_offset) ! Initalize pointers to this room +#ifdef SINGLE_PRECISION + cdatar_f = fftwf_alloc_real(2*alloc_local_2) + cdatar_g = fftwf_alloc_real(2*alloc_local_2) + cdatar_c = fftwf_alloc_real(2*alloc_local_2) +#else cdatar_f = fftw_alloc_real(2*alloc_local_2) cdatar_g = fftw_alloc_real(2*alloc_local_2) cdatar_c = fftw_alloc_real(2*alloc_local_2) +#endif + ! Initalize the arrays with the rooms pointed call c_f_pointer(cdatar_f, real_data_f, [2*(NY_/2 + 1),local_nkx_ptr]) call c_f_pointer(cdatar_g, real_data_g, [2*(NY_/2 + 1),local_nkx_ptr]) call c_f_pointer(cdatar_c, bracket_sum_r, [2*(NY_/2 + 1),local_nkx_ptr]) ! Plan Creation (out-of-place forward and backward FFT) +#ifdef SINGLE_PRECISION + planf = fftwf_mpi_plan_dft_r2c_2D(NX_, NY_, real_data_f, cmpx_data_f, communicator, ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT)) + planb = fftwf_mpi_plan_dft_c2r_2D(NX_, NY_, cmpx_data_f, real_data_f, communicator, ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN)) +#else planf = fftw_mpi_plan_dft_r2c_2D(NX_, NY_, real_data_f, cmpx_data_f, communicator, ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT)) planb = fftw_mpi_plan_dft_c2r_2D(NX_, NY_, cmpx_data_f, real_data_f, communicator, ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN)) +#endif if ((.not. c_associated(planf)) .OR. (.not. c_associated(planb))) then ERROR STOP '>> ERROR << plan creation error!!' @@ -75,9 +96,9 @@ MODULE fourier REAL(xp), INTENT(IN) :: inv_Nx, inv_Ny REAL(xp), DIMENSION(local_nky_ptr), INTENT(IN) :: ky_, AA_y REAL(xp), DIMENSION(local_nkx_ptr), INTENT(IN) :: kx_, AA_x - COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(local_nky_ptr,local_nkx_ptr), & + COMPLEX(c_xp_c), DIMENSION(local_nky_ptr,local_nkx_ptr), & INTENT(IN) :: F_(:,:), G_(:,:) - real(C_DOUBLE), pointer, INTENT(INOUT) :: sum_real_(:,:) + real(c_xp_r), pointer, INTENT(INOUT) :: sum_real_(:,:) INTEGER :: ikx,iky ! First term df/dx x dg/dy DO ikx = 1,local_nkx_ptr @@ -86,8 +107,14 @@ MODULE fourier cmpx_data_g(ikx,iky) = imagu*ky_(iky)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky) ENDDO ENDDO + +#ifdef SINGLE_PRECISION + call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f) + call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g) +#else call fftw_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f) call fftw_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g) +#endif sum_real_ = sum_real_ + real_data_f*real_data_g*inv_Ny*inv_Nx ! Second term -df/dy x dg/dx DO ikx = 1,local_nkx_ptr @@ -98,8 +125,13 @@ MODULE fourier imagu*kx_(ikx)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky) ENDDO ENDDO +#ifdef SINGLE_PRECISION + call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f) + call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g) +#else call fftw_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f) call fftw_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g) +#endif sum_real_ = sum_real_ - real_data_f*real_data_g*inv_Ny*inv_Nx END SUBROUTINE poisson_bracket_and_sum diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90 index 9a2279b8160acc6cc3573d545dbe15282e02fef8..217b19140e11e77d1896df9c6835910ed693813e 100644 --- a/src/ghosts_mod.F90 +++ b/src/ghosts_mod.F90 @@ -1,6 +1,6 @@ module ghosts USE mpi -USE prec_const, ONLY: xp +USE prec_const, ONLY: xp, mpi_xp_c IMPLICIT NONE INTEGER :: status(MPI_STATUS_SIZE), source, dest, count, ipg @@ -61,22 +61,22 @@ SUBROUTINE update_ghosts_p_mom ! count = local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!! ! DO ig = 1,ngp/2 - ! CALL mpi_sendrecv(moments(:,last-(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14+ig, & - ! moments(:,first-ig ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14+ig, & + ! CALL mpi_sendrecv(moments(:,last-(ig-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 14+ig, & + ! moments(:,first-ig ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 14+ig, & ! comm0, status, ierr) ! ENDDO !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!! ! DO ig = 1,ngp/2 - ! CALL mpi_sendrecv(moments(:,first+(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16+ig, & - ! moments(:,last + ig ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16+ig, & + ! CALL mpi_sendrecv(moments(:,first+(ig-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 16+ig, & + ! moments(:,last + ig ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 16+ig, & ! comm0, status, ierr) ! ENDDO count = (ngp/2)*local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send - CALL mpi_sendrecv(moments(:,(last-(ngp/2-1)):(last),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14, & - moments(:,(first-ngp/2):(first-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14, & + CALL mpi_sendrecv(moments(:,(last-(ngp/2-1)):(last),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 14, & + moments(:,(first-ngp/2):(first-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 14, & comm0, status, ierr) - CALL mpi_sendrecv(moments(:,(first):(first+(ngp/2-1)),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16, & - moments(:,(last+1):(last+ngp/2) ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16, & + CALL mpi_sendrecv(moments(:,(first):(first+(ngp/2-1)),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 16, & + moments(:,(last+1):(last+ngp/2) ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 16, & comm0, status, ierr) END SUBROUTINE update_ghosts_p_mom @@ -112,14 +112,14 @@ SUBROUTINE update_ghosts_z_mom !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!! ! Send the last local moment to fill the -1 neighbour ghost DO ig=1,ngz/2 - CALL mpi_sendrecv(moments(:,:,:,:,:,last-(ig-1),updatetlevel),count,MPI_DOUBLE_COMPLEX,nbr_U,24+ig, & ! Send to Up the last - buff_pjxy_zBC(:,:,:,:,:,-ig),count,MPI_DOUBLE_COMPLEX,nbr_D,24+ig, & ! Recieve from Down the first-1 + CALL mpi_sendrecv(moments(:,:,:,:,:,last-(ig-1),updatetlevel),count,mpi_xp_c,nbr_U,24+ig, & ! Send to Up the last + buff_pjxy_zBC(:,:,:,:,:,-ig),count,mpi_xp_c,nbr_D,24+ig, & ! Recieve from Down the first-1 comm0, status, ierr) ENDDO !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!! DO ig=1,ngz/2 - CALL mpi_sendrecv(moments(:,:,:,:,:,first+(ig-1),updatetlevel),count,MPI_DOUBLE_COMPLEX,nbr_D,26+ig, & ! Send to Up the last - buff_pjxy_zBC(:,:,:,:,:,ig),count,MPI_DOUBLE_COMPLEX,nbr_U,26+ig, & ! Recieve from Down the first-1 + CALL mpi_sendrecv(moments(:,:,:,:,:,first+(ig-1),updatetlevel),count,mpi_xp_c,nbr_D,26+ig, & ! Send to Up the last + buff_pjxy_zBC(:,:,:,:,:,ig),count,mpi_xp_c,nbr_U,26+ig, & ! Recieve from Down the first-1 comm0, status, ierr) ENDDO ELSE !No parallel (just copy) @@ -174,14 +174,14 @@ SUBROUTINE update_ghosts_z_3D(field) CALL MPI_BARRIER(MPI_COMM_WORLD,ierr) !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!! DO ig = 1,ngz/2 - CALL mpi_sendrecv( field(:,:,last-(ig-1)), count, MPI_DOUBLE_COMPLEX, nbr_U, 30+ig, & ! Send to Up the last - buff_xy_zBC(:,:,-ig), count, MPI_DOUBLE_COMPLEX, nbr_D, 30+ig, & ! Receive from Down the first-1 + CALL mpi_sendrecv( field(:,:,last-(ig-1)), count, mpi_xp_c, nbr_U, 30+ig, & ! Send to Up the last + buff_xy_zBC(:,:,-ig), count, mpi_xp_c, nbr_D, 30+ig, & ! Receive from Down the first-1 comm0, status, ierr) ENDDO !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!! DO ig = 1,ngz/2 - CALL mpi_sendrecv( field(:,:,first+(ig-1)), count, MPI_DOUBLE_COMPLEX, nbr_D, 32+ig, & ! Send to Down the first - buff_xy_zBC(:,:,ig), count, MPI_DOUBLE_COMPLEX, nbr_U, 32+ig, & ! Recieve from Up the last+1 + CALL mpi_sendrecv( field(:,:,first+(ig-1)), count, mpi_xp_c, nbr_D, 32+ig, & ! Send to Down the first + buff_xy_zBC(:,:,ig), count, mpi_xp_c, nbr_U, 32+ig, & ! Recieve from Up the last+1 comm0, status, ierr) ENDDO ELSE diff --git a/src/nonlinear_mod.F90 b/src/nonlinear_mod.F90 index 733524028426595ed8439dba3fc63ef5e91e61e1..3c783e2475796c56db644b1c0ad07e9b71081805 100644 --- a/src/nonlinear_mod.F90 +++ b/src/nonlinear_mod.F90 @@ -116,7 +116,11 @@ SUBROUTINE compute_nonlinear ENDIF ENDDO ! Put the real nonlinear product into k-space +#ifdef SINGLE_PRECISION + call fftwf_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c) +#else call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c) +#endif ! Retrieve convolution in input format and apply anti aliasing DO ikx = 1,local_nkx_ptr DO iky = 1,local_nky_ptr diff --git a/src/parallel_mod.F90 b/src/parallel_mod.F90 index 0dbb02cb9f2edfed1abc3d53796a098500a4f3d5..e132fd04c4787c82b3fa9b6c64c88ebf10f0a08a 100644 --- a/src/parallel_mod.F90 +++ b/src/parallel_mod.F90 @@ -1,5 +1,5 @@ MODULE parallel - use prec_const, ONLY : xp + use prec_const, ONLY : xp, mpi_xp_c USE mpi IMPLICIT NONE ! Auxiliary variables @@ -255,15 +255,15 @@ CONTAINS DO iz = 1,nz_loc ! fill a buffer to contain a slice of data at constant kx and z buffer_yl_zc(1:nky_loc) = field_loc(1:nky_loc,ix,iz) - CALL MPI_GATHERV(buffer_yl_zc, snd_y, MPI_DOUBLE_COMPLEX, & - buffer_yt_zc, rcv_y, dsp_y, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_yl_zc, snd_y, mpi_xp_c, & + buffer_yt_zc, rcv_y, dsp_y, mpi_xp_c, & root_ky, comm_ky, ierr) buffer_yt_zl(1:nky_tot,iz) = buffer_yt_zc(1:nky_tot) ENDDO ! send the full line on y contained by root_ky IF(rank_ky .EQ. root_ky) THEN - CALL MPI_GATHERV(buffer_yt_zl, snd_z, MPI_DOUBLE_COMPLEX, & - buffer_yt_zt, rcv_zy, dsp_zy, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_yt_zl, snd_z, mpi_xp_c, & + buffer_yt_zt, rcv_zy, dsp_zy, mpi_xp_c, & root_z, comm_z, ierr) ENDIF ! ID 0 (the one who output) rebuild the whole array @@ -294,15 +294,15 @@ CONTAINS DO iz = 1,nz_loc ! fill a buffer to contain a slice of data at constant j and z buffer_pl_zc(1:np_loc) = field_loc(1:np_loc,ij,iz) - CALL MPI_GATHERV(buffer_pl_zc, snd_p, MPI_DOUBLE_COMPLEX, & - buffer_pt_zc, rcv_p, dsp_p, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_pl_zc, snd_p, mpi_xp_c, & + buffer_pt_zc, rcv_p, dsp_p, mpi_xp_c, & root_p, comm_p, ierr) buffer_pt_zl(1:np_tot,iz) = buffer_pt_zc(1:np_tot) ENDDO ! send the full line on y contained by root_p IF(rank_p .EQ. root_p) THEN - CALL MPI_GATHERV(buffer_pt_zl, snd_z, MPI_DOUBLE_COMPLEX, & - buffer_pt_zt, rcv_zp, dsp_zp, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_pt_zl, snd_z, mpi_xp_c, & + buffer_pt_zt, rcv_zp, dsp_zp, mpi_xp_c, & root_z, comm_z, ierr) ENDIF ! ID 0 (the one who output) rebuild the whole array @@ -337,23 +337,23 @@ CONTAINS y: DO iy = 1,nky_loc ! fill a buffer to contain a slice of p data at constant j, ky, kx and z buffer_pl_cy_zc(1:np_loc) = field_loc(1:np_loc,ij,iy,ix,iz) - CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p, MPI_DOUBLE_COMPLEX, & - buffer_pt_cy_zc, rcv_p, dsp_p, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p, mpi_xp_c, & + buffer_pt_cy_zc, rcv_p, dsp_p, mpi_xp_c, & root_p, comm_p, ierr) buffer_pt_yl_zc(1:np_tot,iy) = buffer_pt_cy_zc(1:np_tot) ENDDO y ! send the full line on p contained by root_p IF(rank_p .EQ. 0) THEN - CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y, MPI_DOUBLE_COMPLEX, & - buffer_pt_yt_zc, rcv_yp, dsp_yp, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y, mpi_xp_c, & + buffer_pt_yt_zc, rcv_yp, dsp_yp, mpi_xp_c, & root_ky, comm_ky, ierr) buffer_pt_yt_zl(1:np_tot,1:nky_tot,iz) = buffer_pt_yt_zc(1:np_tot,1:nky_tot) ENDIF ENDDO z ! send the full line on y contained by root_kyas IF(rank_ky .EQ. 0) THEN - CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z, MPI_DOUBLE_COMPLEX, & - buffer_pt_yt_zt, rcv_zyp, dsp_zyp, MPI_DOUBLE_COMPLEX, & + CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z, mpi_xp_c, & + buffer_pt_yt_zt, rcv_zyp, dsp_zyp, mpi_xp_c, & root_z, comm_z, ierr) ENDIF ! ID 0 (the one who ouptut) rebuild the whole array @@ -390,11 +390,11 @@ CONTAINS ! Send it to all the other processes DO i_ = 0,num_procs_p-1 IF (i_ .NE. world_rank) & - CALL MPI_SEND(buffer, count, MPI_DOUBLE_COMPLEX, i_, 0, comm_p, ierr) + CALL MPI_SEND(buffer, count, mpi_xp_c, i_, 0, comm_p, ierr) ENDDO ELSE ! Recieve buffer from root - CALL MPI_RECV(buffer, count, MPI_DOUBLE_COMPLEX, root, 0, comm_p, MPI_STATUS_IGNORE, ierr) + CALL MPI_RECV(buffer, count, mpi_xp_c, root, 0, comm_p, MPI_STATUS_IGNORE, ierr) ! Write it in phi DO i3 = 1,n3 DO i2 = 1,n2 @@ -426,11 +426,11 @@ CONTAINS ! Send it to all the other processes DO i_ = 0,num_procs_z-1 IF (i_ .NE. world_rank) & - CALL MPI_SEND(buffer, count, MPI_DOUBLE_COMPLEX, i_, 0, comm_z, ierr) + CALL MPI_SEND(buffer, count, mpi_xp_c, i_, 0, comm_z, ierr) ENDDO ELSE ! Recieve buffer from root - CALL MPI_RECV(buffer, count, MPI_DOUBLE_COMPLEX, root, 0, comm_z, MPI_STATUS_IGNORE, ierr) + CALL MPI_RECV(buffer, count, mpi_xp_c, root, 0, comm_z, MPI_STATUS_IGNORE, ierr) ! Write it in phi v = buffer ENDIF @@ -447,14 +447,14 @@ CONTAINS last = np + ng/2 !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!! DO ig = 1,ng/2 - CALL mpi_sendrecv(f(last-(ig-1)), 1, MPI_DOUBLE_COMPLEX, nbr_R, 14+ig, & - f(first-ig), 1, MPI_DOUBLE_COMPLEX, nbr_L, 14+ig, & + CALL mpi_sendrecv(f(last-(ig-1)), 1, mpi_xp_c, nbr_R, 14+ig, & + f(first-ig), 1, mpi_xp_c, nbr_L, 14+ig, & comm0, MPI_STATUS_IGNORE, ierr) ENDDO !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!! DO ig = 1,ng/2 - CALL mpi_sendrecv(f(first+(ig-1)), 1, MPI_DOUBLE_COMPLEX, nbr_L, 16+ig, & - f(last+ig), 1, MPI_DOUBLE_COMPLEX, nbr_R, 16+ig, & + CALL mpi_sendrecv(f(first+(ig-1)), 1, mpi_xp_c, nbr_L, 16+ig, & + f(last+ig), 1, mpi_xp_c, nbr_R, 16+ig, & comm0, MPI_STATUS_IGNORE, ierr) ENDDO END SUBROUTINE exchange_ghosts_1D diff --git a/src/prec_const_mod.F90 b/src/prec_const_mod.F90 index af0ac57c497487a05eed54411dcd1f09a0ad48b2..ae3bfdf31103400741e3fbf4e7730562b6e0c384 100644 --- a/src/prec_const_mod.F90 +++ b/src/prec_const_mod.F90 @@ -9,18 +9,30 @@ MODULE prec_const use, intrinsic :: iso_c_binding ! Define single and double precision - ! INTEGER, PARAMETER :: sp = REAL32 !Single precision - ! INTEGER, PARAMETER :: dp = REAL64 !Double precision - ! INTEGER, private :: dp_r, dp_p !Range and Aprecision of doubles - ! INTEGER, private :: sp_r, sp_p !Range and precision of singles - ! INTEGER, private :: MPI_SP !Single precision for MPI - ! INTEGER, private :: MPI_DP !Double precision in MPI - ! INTEGER, private :: MPI_SUM_DP !Sum reduction operation for DP datatype - ! INTEGER, private :: MPI_MAX_DP !Max reduction operation for DP datatype - ! INTEGER, private :: MPI_MIN_DP !Min reduction operation for DP datatype + INTEGER, PARAMETER :: sp = REAL32 !Single precision + INTEGER, PARAMETER :: dp = REAL64 !Double precision + INTEGER, private :: dp_r, dp_p !Range and Aprecision of doubles + INTEGER, private :: sp_r, sp_p !Range and precision of singles + INTEGER, private :: MPI_SP !Single precision for MPI + INTEGER, private :: MPI_DP !Double precision in MPI + INTEGER, private :: MPI_SUM_DP !Sum reduction operation for DP datatype + INTEGER, private :: MPI_MAX_DP !Max reduction operation for DP datatype + INTEGER, private :: MPI_MIN_DP !Min reduction operation for DP datatype ! Define a generic precision parameter for the entire program - INTEGER, PARAMETER :: xp = REAL64!real(xp), enforced through the code +#ifdef SINGLE_PRECISION + INTEGER, PARAMETER :: xp = REAL32 + INTEGER, PARAMETER :: c_xp_c = C_FLOAT_COMPLEX + INTEGER, PARAMETER :: c_xp_r = C_FLOAT + INTEGER, PARAMETER :: mpi_xp_c = MPI_COMPLEX +#else + INTEGER, PARAMETER :: xp = REAL64 + INTEGER, PARAMETER :: c_xp_c = C_DOUBLE_COMPLEX + INTEGER, PARAMETER :: c_xp_r = C_DOUBLE + INTEGER, PARAMETER :: mpi_xp_c = MPI_DOUBLE_COMPLEX + +#endif + ! Auxiliary variables (unused) INTEGER, private :: xp_r, xp_p !Range and precision of single INTEGER, private :: MPI_XP !Double precision in MPI INTEGER, private :: MPI_SUM_XP !Sum reduction operation for xp datatype diff --git a/wk/fast_analysis.m b/wk/fast_analysis.m index 34d685b3601ae55c1c8e35df0e1d2b9ee4802c2c..10b931f35726afbf755b5aff570276f43148f36f 100644 --- a/wk/fast_analysis.m +++ b/wk/fast_analysis.m @@ -10,7 +10,8 @@ PARTITION = '/misc/gyacomo23_outputs/'; % resdir = 'paper_2_GYAC23/CBC/Full_NL_7x4x192x96x32_nu_0.05_muxy_1.0_muz_2.0'; %% tests -resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24'; +% resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_xp'; +resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_sp'; % resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_Lx_180'; %% J0 = 00; J1 = 10;