diff --git a/Makefile b/Makefile index 1da9fec824ad10f8f75e39df1e17ccf1548fa1c1..cc3344bef4b1d6d31a920a7b00d1c89efbeab759 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ FOBJ=$(OBJDIR)/advance_field.o $(OBJDIR)/array_mod.o $(OBJDIR)/auxval.o $(OBJDIR $(OBJDIR)/coeff_mod.o $(OBJDIR)/compute_Sapj.o $(OBJDIR)/control.o $(OBJDIR)/fourier_mod.o \ $(OBJDIR)/diagnose.o $(OBJDIR)/diagnostics_par_mod.o $(OBJDIR)/endrun.o $(OBJDIR)/fields_mod.o \ $(OBJDIR)/inital.o $(OBJDIR)/initial_par_mod.o $(OBJDIR)/main.o $(OBJDIR)/memory.o \ -$(OBJDIR)/model_mod.o $(OBJDIR)/mkl_dfti.o $(OBJDIR)/moments_eq_rhs.o $(OBJDIR)/poisson.o \ +$(OBJDIR)/model_mod.o $(OBJDIR)/moments_eq_rhs.o $(OBJDIR)/poisson.o \ $(OBJDIR)/ppexit.o $(OBJDIR)/ppinit.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/readinputs.o \ $(OBJDIR)/grid_mod.o $(OBJDIR)/stepon.o $(OBJDIR)/tesend.o $(OBJDIR)/time_integration_mod.o \ $(OBJDIR)/utility_mod.o @@ -62,7 +62,7 @@ $(OBJDIR)/utility_mod.o $(OBJDIR)/array_mod.o : src/array_mod.F90 $(OBJDIR)/prec_const_mod.o $(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/array_mod.F90 -o $@ - $(OBJDIR)/auxval.o : src/auxval.F90 $(OBJDIR)/memory.o $(OBJDIR)/model_mod.o $(OBJDIR)/grid_mod.o + $(OBJDIR)/auxval.o : src/auxval.F90 $(OBJDIR)/fourier_mod.o $(OBJDIR)/memory.o $(OBJDIR)/model_mod.o $(OBJDIR)/grid_mod.o $(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/auxval.F90 -o $@ $(OBJDIR)/basic_mod.o : src/basic_mod.F90 $(OBJDIR)/prec_const_mod.o diff --git a/local/dirs.inc b/local/dirs.inc index 26854325c8f4153f8382245a36320ceeae0e0479..e5471f07bc17e63fa0f3fe8beadefa574ce266b1 100644 --- a/local/dirs.inc +++ b/local/dirs.inc @@ -6,7 +6,6 @@ OBJDIR = $(PREFIX)/obj LIBDIR = $(PREFIX)/lib MODDIR = $(PREFIX)/mod FMDIR = $(PREFIX)/FM -#FFTW3DIR = /usr/local/fftw3 FFTW3DIR = /home/ahoffman/lib/fftw-3.3.8 CHCKPTDIR = $(PREFIX)/checkpoint FUTILS = /home/ahoffman/lib/futils_1.4/src diff --git a/local/make.inc b/local/make.inc index 4a88e7762f9d7751e5ddac2a8ed88cd90ff64e88..d293ec87c0e91ec2212f7933cb84b87c1c22e8b8 100644 --- a/local/make.inc +++ b/local/make.inc @@ -1,39 +1,11 @@ ################################################################ # -# Section I: Preprocessor options -# -################################################################ - -#0/1 = turn pardiso off/on -PARDISO=0 - -#0/1 = turn MUMPS off/on -ifdef MUMPS - USEMUMPS=1 -else - USEMUMPS=0 -endif - -#0/1 = turn parallel MG solver off/on -USEPARMG=0 - -#0/1 = turn OpenMP compilation off/on -USEOPENMP=0 - -#0/1 = turn verification off/on -USEVERIFICATION=0 - -#Please set precompiler flags here -#FPPFLAGS=-DPARDISO=$(PARDISO) -DMUMPS=$(USEMUMPS) -DPARMG=$(USEPARMG) -DVERIFICATION=$(USEVERIFICATION) - -################################################################ -# -# Section II: Compiler options +# Section I: Compiler options # ################################################################ #Default optimization level (O=optimized, g=debug) -OPTLEVEL = O +OPTLEVEL = g F90FLAGS = CFLAGS = @@ -78,111 +50,25 @@ endif ################################################################ # -# Section III: Libraries and where to find them +# Section II: Libraries and where to find them # ################################################################ IDIRS := -I$(SPC_LOCAL)/include/$(OPTLEVEL) -#IDIRS := -I$(FUTILS)/include/$(OPTLEVEL) -I$(HDF5)/include -I$(ZLIB)/include -#LIBS := -lbsplines -lfutils -lpppack -lpputils2 \ -# -lhdf5_fortran -lhdf5 -lz -ldl -lpthread LIBS := -lfutils -lhdf5_fortran -lhdf5 -lz -ldl -lpthread LDIRS := -L$(SPC_LOCAL)/lib/$(OPTLEVEL) -L$(HDF5)/lib -#LDIRS := -L$(FUTILS)/lib -L$(HDF5)/lib -L$(ZLIB)/lib # Add Multiple-Precision Library LIBS += -lfm -# -# setup of MKL library -# working well with intel compiler and intelmpi -# MKL_LIB has to be set in .bashrc -# -ifdef MKL_LIB - - # common libraries - LIBS += -lmkl_scalapack_lp64 -lmkl_core -lmkl_blacs_intelmpi_lp64 -lm - - # compiler dependent libraries - ifeq ($(COMPTYPE), i) #intel - LIBS += -lmkl_intel_lp64 - endif - ifeq ($(COMPTYPE), g) #gnu - LIBS += -lmkl_gf_lp64 - endif - - # different library depending on threaded or sequential - ifeq ($(USEOPENMP), 1) - LIBS += -lmkl_intel_thread - else - LIBS += -lmkl_sequential - endif - - # MKL needs intel omp library when compiled with gnu - ifeq ($(USEOPENMP), 1) - ifeq ($(COMPTYPE), g) #gnu - LIBS += -liomp5 - endif - endif - - LDIRS += -L$(MKL_LIB) - IDIRS += -I$(MKL_LIB)/../../include - -endif ifdef FFTW3DIR - LIBS += -lfftw3 + LIBS += -lfftw3 -lfftw3_mpi LDIRS += -L$(FFTW3DIR)/lib64 IDIRS += -I$(FFTW3DIR)/include endif -# -# Add mumps libraries if preprocessor set -# -ifeq ($(USEMUMPS),1) - LIBS += -lzmumps -ldmumps -lmumps_common -lpord - IDIRS += -I$(MUMPS)/include - LDIRS += -L$(MUMPS)/lib - ifdef PARMETIS - LIBS += -lparmetis -lmetis - LDIRS += -L$(PARMETIS)/lib -L$(METIS)/lib - endif - ifdef SCOTCH - LIBS += -lesmumps -lscotch -lscotcherr - LDIRS += -L$(SCOTCH)/lib - endif -endif - -# -# Add parallel multigrid libraries if preprocessor set -# -#ifeq ($(USEPARMG),1) -# LIBS += -lparmgrid -#endif - -################################################################ -# -# Section IV: Set up compiler and compiler flags -# -################################################################ - -CC = $(SPC_MPICC) -FC = $(SPC_MPIF90) -F90 = $(SPC_MPIF90) -GBS_EXEC = $(BINDIR)/gbs_$(PLAT)_$(COMP) -LDFLAGS = $(F90FLAGS) - -# -# Intel + Dora + OpenMP needs dynamic linking -# -ifeq ($(PLAT), dora) - ifeq ($(COMPTYPE), i) #intel - ifeq ($(USEOPENMP),1) - LDFLAGS += -dynamic - endif - endif -endif ################################################################ # diff --git a/src/auxval.F90 b/src/auxval.F90 index 952057927bebc2c2c3128e1b450429d2c2373393..50efa00a92690ac936f8667c480a49e5c358fcd9 100644 --- a/src/auxval.F90 +++ b/src/auxval.F90 @@ -4,7 +4,7 @@ subroutine auxval USE basic USE grid USE array - ! use mumps_bsplines, only: putrow_mumps => putrow, updtmat_mumps => updtmat, factor_mumps => factor, bsolve_mumps => bsolve ! from BSPLINES + USE fourier, ONLY: initialize_FFT use prec_const IMPLICIT NONE @@ -12,15 +12,13 @@ subroutine auxval IF (my_id .EQ. 0) WRITE(*,*) '=== Set auxiliary values ===' CALL set_krgrid - CALL mpi_barrier(MPI_COMM_WORLD, ierr) - CALL set_kzgrid - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + CALL set_kzgrid ! Distributed dimension CALL set_pj - CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL memory ! Allocate memory for global arrays - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + + CALL initialize_FFT ! intialization of FFTW plans END SUBROUTINE auxval diff --git a/src/basic_mod.F90 b/src/basic_mod.F90 index 7499fd80b1b12ea76f1d958c78bf613d6930dfa7..e71b4d03b2e5001927fb57b172ea5237dc367078 100644 --- a/src/basic_mod.F90 +++ b/src/basic_mod.F90 @@ -1,6 +1,6 @@ MODULE basic ! Basic module for time dependent problems - + use, intrinsic :: iso_c_binding use prec_const IMPLICIT none @@ -20,8 +20,9 @@ MODULE basic INTEGER :: ierr ! flag for MPI error INTEGER :: my_id ! identification number of current process INTEGER :: num_procs ! number of MPI processes - INTEGER :: grp_world ! world communicator - INTEGER :: comm_world + integer(C_INTPTR_T) :: alloc_local ! total local data allocation size (mpi process) + integer(C_INTPTR_T) :: local_nkr ! local size of parallelized kz direction + integer(C_INTPTR_T) :: local_kr_start ! local start of parallelized kz direction INTEGER :: iframe1d ! counting the number of times 1d datasets are outputed (for diagnose) INTEGER :: iframe2d ! counting the number of times 2d datasets are outputed (for diagnose) diff --git a/src/compute_Sapj.F90 b/src/compute_Sapj.F90 index 43693ae25c512c215ab5d0c5d9d63d29a71c18ca..b4d02513667ab91c92554c6c62f5bc8d89a95336 100644 --- a/src/compute_Sapj.F90 +++ b/src/compute_Sapj.F90 @@ -13,9 +13,7 @@ SUBROUTINE compute_Sapj USE time_integration!, ONLY : updatetlevel IMPLICIT NONE - COMPLEX(dp), DIMENSION(Nkr,Nkz) :: F_, G_, CONV - ! COMPLEX(dp), DIMENSION(ips_e:ipe_e, ijs_e:ije_e, Nkr, Nkz) :: Sepj - ! COMPLEX(dp), DIMENSION(ips_i:ipe_i, ijs_i:ije_i, Nkr, Nkz) :: Sipj + COMPLEX(dp), DIMENSION(ikrs:ikre,ikzs:ikze) :: F_, G_, CONV INTEGER :: in, is REAL(dp):: kr, kz, kerneln, be_2, bi_2, factn REAL(dp):: sigmae2_taue_o2, sigmai2_taui_o2 @@ -65,7 +63,12 @@ SUBROUTINE compute_Sapj ENDDO krloope1 CALL convolve_2D_F2F( F_, G_, CONV ) ! Convolve Fourier to Fourier - Sepj(ip,ij,:,:) = Sepj(ip,ij,:,:) + CONV ! Add it to Sepj + + DO ikr = ikrs,ikre ! Loop over kr + DO ikz = ikzs,ikze ! Loop over kz + Sepj(ip,ij,ikr,ikz) = Sepj(ip,ij,ikr,ikz) + CONV(ikr,ikz) ! Add it to Sepj + ENDDO + ENDDO IF ( NON_LIN ) THEN ! Fully non linear term ikz phi * ikr Napj krloope2: DO ikr = ikrs,ikre ! Loop over kr @@ -88,7 +91,13 @@ SUBROUTINE compute_Sapj ENDDO krloope2 CALL convolve_2D_F2F( F_, G_, CONV ) - Sepj(ip,ij,:,:) = Sepj(ip,ij,:,:) - CONV ! substract it to Sepj + + DO ikr = ikrs,ikre + DO ikz = ikzs,ikze + Sepj(ip,ij,ikr,ikz) = Sepj(ip,ij,ikr,ikz) - CONV(ikr,ikz) ! substract it to Sepj + ENDDO + ENDDO + ENDIF IF ( in + 1 .LE. jmaxe+1 ) THEN @@ -141,7 +150,11 @@ SUBROUTINE compute_Sapj ENDDO krloopi1 CALL convolve_2D_F2F( F_, G_, CONV ) ! Convolve Fourier to Fourier - Sipj(ip,ij,:,:) = Sipj(ip,ij,:,:) + CONV ! Add it to Sipj + DO ikr = ikrs,ikre + DO ikz = ikzs,ikze + Sipj(ip,ij,ikr,ikz) = Sipj(ip,ij,ikr,ikz) + CONV(ikr,ikz) ! add it to Sipj + ENDDO + ENDDO krloopi2: DO ikr = ikrs,ikre ! Loop over kr kzloopi2: DO ikz = ikzs,ikze ! Loop over kz @@ -171,7 +184,11 @@ SUBROUTINE compute_Sapj ENDDO krloopi2 CALL convolve_2D_F2F( F_, G_, CONV ) ! Convolve and back to Fourier - Sipj(ip,ij,:,:) = Sipj(ip,ij,:,:) - CONV! substract it to Sipj + DO ikr = ikrs,ikre + DO ikz = ikzs,ikze + Sipj(ip,ij,ikr,ikz) = Sipj(ip,ij,ikr,ikz) - CONV(ikr,ikz) ! substract it to Sepj + ENDDO + ENDDO IF ( in + 1 .LE. jmaxi+1 ) THEN factn = real(in,dp) * factn ! factorial(n+1) diff --git a/src/control.F90 b/src/control.F90 index 3a78b912e722324fbac8a08fbdba987f473a9ac8..b2c710a27dfa6cc3c0462f32f448e1db12d1c5b1 100644 --- a/src/control.F90 +++ b/src/control.F90 @@ -9,52 +9,52 @@ SUBROUTINE control !________________________________________________________________________________ ! 1. Prologue ! 1.1 Initialize the parallel environment - IF (my_id .EQ. 0) WRITE(*,*) 'Initialize MPI...' + IF (my_id .EQ. 1) WRITE(*,*) 'Initialize MPI...' CALL ppinit - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...MPI initialized' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...MPI initialized' CALL daytim('Start at ') ! 1.2 Define data specific to run - IF (my_id .EQ. 0) WRITE(*,*) 'Load basic data...' + IF (my_id .EQ. 1) WRITE(*,*) 'Load basic data...' CALL basic_data - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...basic data loaded.' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...basic data loaded.' - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! 1.3 Read input parameters from input file - IF (my_id .EQ. 0) WRITE(*,*) 'Read input parameters...' + IF (my_id .EQ. 1) WRITE(*,*) 'Read input parameters...' CALL readinputs - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...input parameters read' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...input parameters read' - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! 1.4 Set auxiliary values (allocate arrays, set grid, ...) - IF (my_id .EQ. 0) WRITE(*,*) 'Calculate auxval...' + IF (my_id .EQ. 1) WRITE(*,*) 'Calculate auxval...' CALL auxval - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...auxval calculated' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...auxval calculated' - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! 1.5 Initial conditions - IF (my_id .EQ. 0) WRITE(*,*) 'Create initial state...' + IF (my_id .EQ. 1) WRITE(*,*) 'Create initial state...' CALL inital - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...initial state created' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...initial state created' - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! 1.6 Initial diagnostics - IF (my_id .EQ. 0) WRITE(*,*) 'Initial diagnostics...' + IF (my_id .EQ. 1) WRITE(*,*) 'Initial diagnostics...' CALL diagnose(0) - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...initial diagnostics done' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...initial diagnostics done' - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL FLUSH(stdout) !________________________________________________________________________________ - IF (my_id .EQ. 0) WRITE(*,*) 'Time integration loop..' + IF (my_id .EQ. 1) WRITE(*,*) 'Time integration loop..' !________________________________________________________________________________ ! 2. Main loop DO @@ -63,7 +63,7 @@ CALL cpu_time(t0_step) ! Measuring time step = step + 1 cstep = cstep + 1 - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL stepon CALL mpi_barrier(MPI_COMM_WORLD, ierr) @@ -76,18 +76,18 @@ CALL cpu_time(t0_diag) ! Measuring time CALL diagnose(step) CALL cpu_time(t1_diag); tc_diag = tc_diag + (t1_diag - t0_diag) - CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL cpu_time(t1_step); tc_step = tc_step + (t1_step - t0_step) END DO - IF (my_id .EQ. 0) WRITE(*,'(a/)') '...time integration done' + IF (my_id .EQ. 1) WRITE(*,'(a/)') '...time integration done' !________________________________________________________________________________ ! 9. Epilogue CALL diagnose(-1) CALL endrun - IF (my_id .EQ. 0) CALL daytim('Done at ') + IF (my_id .EQ. 1) CALL daytim('Done at ') CALL ppexit diff --git a/src/fourier_mod.F90 b/src/fourier_mod.F90 index a3dcca39e15f6e07e37f33d60e644e013d1c87df..03978ff3e10b9244179a6cd7afe7a202f5067330 100644 --- a/src/fourier_mod.F90 +++ b/src/fourier_mod.F90 @@ -2,6 +2,7 @@ MODULE fourier USE prec_const USE grid + USE basic use, intrinsic :: iso_c_binding implicit none @@ -9,184 +10,109 @@ MODULE fourier INCLUDE 'fftw3-mpi.f03' PRIVATE - PUBLIC :: fft_r2cc - PUBLIC :: ifft_cc2r - PUBLIC :: fft2_r2cc - PUBLIC :: ifft2_cc2r + type(C_PTR) :: planf, planb, real_ptr, cmpx_ptr + real(C_DOUBLE), pointer :: real_data_1(:,:), real_data_2(:,:) + complex(C_DOUBLE_complex), pointer :: cmpx_data(:,:) + integer (C_INTPTR_T) :: nx, ny, nh + + PUBLIC :: initialize_FFT, finalize_FFT PUBLIC :: convolve_2D_F2F CONTAINS - SUBROUTINE fft_r2cc(fx_in, Fk_out) - - IMPLICIT NONE - REAL(dp), DIMENSION(Nr), INTENT(IN) :: fx_in - COMPLEX(dp), DIMENSION(Nkr), INTENT(OUT):: Fk_out - integer*8 plan - - CALL dfftw_plan_dft_r2c_1d(plan,Nr,fx_in,Fk_out,FFTW_FORWARD,FFTW_ESTIMATE) - CALL dfftw_execute_dft_r2c(plan, fx_in, Fk_out) - CALL dfftw_destroy_plan(plan) - - END SUBROUTINE fft_r2cc - - SUBROUTINE ifft_cc2r(Fk_in, fx_out) - - IMPLICIT NONE - COMPLEX(dp), DIMENSION(Nkr), INTENT(IN) :: Fk_in - REAL(dp), DIMENSION(Nr), INTENT(OUT):: fx_out - integer*8 plan - - CALL dfftw_plan_dft_c2r_1d(plan,Nr,Fk_in,fx_out,FFTW_BACKWARD,FFTW_ESTIMATE) - CALL dfftw_execute_dft_c2r(plan, Fk_in, fx_out) - CALL dfftw_destroy_plan(plan) - fx_out = fx_out/Nr - - END SUBROUTINE ifft_cc2r - - - - SUBROUTINE fft2_r2cc( ffx_in, FFk_out ) - + SUBROUTINE initialize_FFT IMPLICIT NONE - REAL(dp), DIMENSION(Nr,Nz), INTENT(IN) :: ffx_in - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(OUT):: FFk_out - integer*8 plan + nx = Nr; ny = Nz; nh = ( nx / 2 ) + 1 - !!! 2D Forward FFT ________________________! - CALL dfftw_plan_dft_r2c_2d(plan,Nr,Nz,ffx_in,FFk_out,FFTW_FORWARD,FFTW_ESTIMATE) - CALL dfftw_execute_dft_r2c(plan,ffx_in,FFk_out) - CALL dfftw_destroy_plan(plan) + IF ( my_id .EQ. 1)write(*,*) 'Initialize FFT..' + CALL fftw_mpi_init - END SUBROUTINE fft2_r2cc + IF ( my_id .EQ. 1)write(*,*) 'distribution of the array along kr..' + alloc_local = fftw_mpi_local_size_2d(nh, ny, MPI_COMM_WORLD, local_nkr, local_kr_start) + write(*,*) 'local_nkr', local_nkr, 'local_kr_start', local_kr_start + real_ptr = fftw_alloc_real(2 * alloc_local) + cmpx_ptr = fftw_alloc_complex(alloc_local) + call c_f_pointer(real_ptr, real_data_1, [2*local_nkr, nx]) + call c_f_pointer(real_ptr, real_data_2, [2*local_nkr, nx]) + call c_f_pointer(cmpx_ptr, cmpx_data, [ local_nkr, nx]) - SUBROUTINE ifft2_cc2r( FFk_in, ffx_out ) + IF ( my_id .EQ. 1)write(*,*) 'setting FFT iFFT 2D plans..' + ! Backward FFT plan + planb = fftw_mpi_plan_dft_c2r_2d(ny, nx, cmpx_data, real_data_1, MPI_COMM_WORLD, & + FFTW_PATIENT) + ! Forward FFT plan + planf = fftw_mpi_plan_dft_r2c_2d(ny, nx, real_data_1, cmpx_data, MPI_COMM_WORLD, & + FFTW_PATIENT) + END SUBROUTINE initialize_FFT + SUBROUTINE finalize_FFT IMPLICIT NONE - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(IN) :: FFk_in - REAL(dp), DIMENSION(Nr,Nz), INTENT(OUT) :: ffx_out - COMPLEX(dp), DIMENSION(Nkr,Nkz) :: tmp_c - integer*8 plan - tmp_c = FFk_in - !!! 2D Backward FFT ________________________! - CALL dfftw_plan_dft_c2r_2d(plan,Nr,Nz,tmp_c,ffx_out,FFTW_BACKWARD,FFTW_ESTIMATE) - CALL dfftw_execute_dft_c2r(plan,tmp_c,ffx_out) - CALL dfftw_destroy_plan(plan) - ffx_out = ffx_out/Nr/Nz + IF ( my_id .EQ. 0)write(*,*) 'finalize FFTW' + CALL dfftw_destroy_plan(planf) + CALL dfftw_destroy_plan(planb) + call fftw_mpi_cleanup + call fftw_free(real_ptr) + call fftw_free(cmpx_ptr) - END SUBROUTINE ifft2_cc2r + END SUBROUTINE finalize_FFT !!! Convolution 2D Fourier to Fourier - ! - Compute the convolution using the convolution theorem and MKL + ! - Compute the convolution using the convolution theorem SUBROUTINE convolve_2D_F2F( F_2D, G_2D, C_2D ) - USE basic - USE grid, ONLY : AA_r, AA_z, Lr, Lz IMPLICIT NONE - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(IN) :: F_2D, G_2D ! input fields - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(OUT) :: C_2D ! output convolutioned field - REAL(dp), DIMENSION(Nr,Nz) :: ff, gg ! iFFT of input fields - REAL(dp), DIMENSION(Nr,Nz) :: ffgg ! will receive the product of f*g in Real - INTEGER :: ikr, ikz - REAL :: a_r - + COMPLEX(dp), DIMENSION(ikrs:ikre,ikzs:ikze), INTENT(IN) :: F_2D, G_2D ! input fields + COMPLEX(dp), DIMENSION(ikrs:ikre,ikzs:ikze), INTENT(OUT) :: C_2D ! output convolutioned field + + ! initialize data to some function my_function(i,j) + do ikr = ikrs,ikre + do ikz = ikzs,ikze + cmpx_data(ikr-local_kr_start, ikz) = F_2D(ikr, ikz) + end do + end do + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + + ! 2D backward Fourier transform + ! write(*,*) my_id, 'iFFT(F) ..' + call fftw_mpi_execute_dft_c2r(planb, cmpx_data, real_data_1) + + ! initialize data to some function my_function(i,j) + do ikr = ikrs,ikre + do ikz = ikzs,ikze + cmpx_data(ikr-local_kr_start, ikz) = G_2D(ikr, ikz) + end do + end do ! 2D inverse Fourier transform - IF ( num_procs .EQ. 1 ) THEN - CALL ifft2_cc2r(F_2D,ff) - CALL ifft2_cc2r(G_2D,gg) - ELSE - CALL ifft2_cc2r_mpi(F_2D,ff) - CALL ifft2_cc2r_mpi(G_2D,gg) - ENDIF + ! write(*,*) my_id, 'iFFT(G) ..' + call fftw_mpi_execute_dft_c2r(planb, cmpx_data, real_data_2) ! Product in physical space - ffgg = ff * gg; + ! write(*,*) 'multply..' + real_data_1 = real_data_1 * real_data_2 ! 2D Fourier tranform - IF ( num_procs .EQ. 1 ) THEN - CALL fft2_r2cc(ffgg,C_2D) - ELSE - CALL fft2_r2cc_mpi(ffgg,C_2D) - ENDIF + ! write(*,*) my_id, 'FFT(f*g) ..' + call fftw_mpi_execute_dft_r2c(planf, real_data_1, cmpx_data) + + ! COPY TO OUTPUT ARRAY + ! write(*,*) my_id, 'out ..' + do ikr = ikrs,ikre + do ikz = ikzs,ikze + cmpx_data(ikr-local_kr_start,ikz) = C_2D(ikr,ikz) + end do + end do ! Anti aliasing (2/3 rule) DO ikr = ikrs,ikre - a_r = AA_r(ikr) DO ikz = ikzs,ikze - C_2D(ikr,ikz) = C_2D(ikr,ikz) * a_r * AA_z(ikz) + C_2D(ikr,ikz) = C_2D(ikr,ikz) * AA_r(ikr) * AA_z(ikz) ENDDO ENDDO END SUBROUTINE convolve_2D_F2F -!! MPI routines -SUBROUTINE fft2_r2cc_mpi( ffx_in, FFk_out ) - - IMPLICIT NONE - REAL(dp), DIMENSION(Nr,Nz), INTENT(IN) :: ffx_in - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(OUT):: FFk_out - REAL(dp), DIMENSION(Nkr,Nkz) :: tmp_r - type(C_PTR) :: plan - integer(C_INTPTR_T) :: L - integer(C_INTPTR_T) :: M - - ! L = Nr; M = Nz - ! - ! tmp_r = ffx_in - ! !!! 2D Forward FFT ________________________! - ! plan = fftw_mpi_plan_dft_r2c_2d(L, M, tmp_r, FFk_out, MPI_COMM_WORLD, FFTW_ESTIMATE) - ! - ! if ((.not. c_associated(plan))) then - ! write(*,*) "plan creation error!!" - ! stop - ! end if - ! - ! CALL fftw_mpi_execute_dft_r2c(plan,tmp_r,FFk_out) - ! CALL fftw_destroy_plan(plan) - -END SUBROUTINE fft2_r2cc_mpi - - - -SUBROUTINE ifft2_cc2r_mpi( FFk_in, ffx_out ) - - IMPLICIT NONE - COMPLEX(dp), DIMENSION(Nkr,Nkz), INTENT(IN) :: FFk_in - REAL(dp), DIMENSION(Nr,Nz), INTENT(OUT) :: ffx_out - COMPLEX(dp), DIMENSION(Nkr,Nkz) :: tmp_c - type(C_PTR) :: plan - integer(C_INTPTR_T) :: L - integer(C_INTPTR_T) :: M - - ! L = Nr; M = Nz - ! - ! tmp_c = FFk_in - ! !!! 2D Backward FFT ________________________! - ! plan = fftw_mpi_plan_dft_c2r_2d(L, M, tmp_c, ffx_out, MPI_COMM_WORLD, FFTW_ESTIMATE) - ! - ! if ((.not. c_associated(plan))) then - ! write(*,*) "plan creation error!!" - ! stop - ! end if - ! - ! CALL fftw_mpi_execute_dft_c2r(plan,tmp_c,ffx_out) - ! CALL fftw_destroy_plan(plan) - ! - ! ffx_out = ffx_out/Nr/Nz - -END SUBROUTINE ifft2_cc2r_mpi - -! Empty set/free routines to switch easily with MKL DFTI -SUBROUTINE set_descriptors - -END SUBROUTINE set_descriptors - -SUBROUTINE free_descriptors - -END SUBROUTINE free_descriptors - END MODULE fourier diff --git a/src/grid_mod.F90 b/src/grid_mod.F90 index 12f9adf0fda718ca175a388e44636263fca32bda..a3c0286bdb3cca69e0b2ff41c8cfe9882c9ca72d 100644 --- a/src/grid_mod.F90 +++ b/src/grid_mod.F90 @@ -85,7 +85,7 @@ CONTAINS DO ij = ijs_e,ije_e; jarray_e(ij) = ij-1; END DO DO ij = ijs_i,ije_i; jarray_i(ij) = ij-1; END DO maxj = MAX(jmaxi, jmaxe) - + END SUBROUTINE set_pj SUBROUTINE set_krgrid @@ -95,16 +95,11 @@ CONTAINS Nkr = Nr/2+1 ! Defined only on positive kr since fields are real ! ! Start and END indices of grid - IF ( Nkr .GT. 1 ) THEN - ikrs = my_id * (Nkr-1)/num_procs + 1 - ikre = (my_id+1) * (Nkr-1)/num_procs - ELSE - ikrs = 1; ikre = Nkr - ENDIF + ikrs = my_id * Nkr/num_procs + 1 + ikre = (my_id+1) * Nkr/num_procs + + IF(my_id .EQ. num_procs) ikre = Nkr - IF (my_id .EQ. num_procs-1) THEN - ikre = Nkr - ENDIF WRITE(*,*) 'ID = ',my_id,' ikrs = ', ikrs, ' ikre = ', ikre ! Grid spacings @@ -144,8 +139,6 @@ CONTAINS ! Start and END indices of grid ikzs = 1 ikze = Nkz - ! ikzs = my_id * Nz/num_procs + 1 - ! ikze = (my_id+1) * Nz/num_procs WRITE(*,*) 'ID = ',my_id,' ikzs = ', ikzs, ' ikze = ', ikze ! Grid spacings deltakz = 2._dp*PI/Lz @@ -154,12 +147,11 @@ CONTAINS ALLOCATE(kzarray(ikzs:ikze)) DO ikz = ikzs,ikze kzarray(ikz) = deltakz*(MODULO(ikz-1,Nkz/2)-Nkz/2*FLOOR(2.*real(ikz-1)/real(Nkz))) - if (kzarray(ikz) .EQ. 0) THEN - ikz_0 = ikz - ENDIF + if (kzarray(ikz) .EQ. 0) ikz_0 = ikz + if (ikz .EQ. Nz/2+1) kzarray(ikz) = -kzarray(ikz) END DO - kzarray(Nz/2+1) = -kzarray(Nz/2+1) + IF (my_id .EQ. 1) WRITE(*,*) 'ID = ',my_id,' kz = ',kzarray ! Orszag 2/3 filter two_third_kzmax = 2._dp/3._dp*deltakz*(Nkz/2); ALLOCATE(AA_z(ikzs:ikze)) @@ -173,7 +165,7 @@ CONTAINS ! Put kz0RT to the nearest grid point on kz ikz0KH = NINT(kr0KH/deltakr)+1 - kr0KH = kzarray(ikz0KH) + IF ( (ikz0KH .GE. ikzs) .AND. (ikz0KH .LE. ikze) ) kr0KH = kzarray(ikz0KH) END SUBROUTINE set_kzgrid diff --git a/src/inital.F90 b/src/inital.F90 index 79d006893a1698b1b7420fc1ad6b4936456155a5..7f50dbb8096918d19ab8d7f0619780f55b26b628 100644 --- a/src/inital.F90 +++ b/src/inital.F90 @@ -23,14 +23,13 @@ SUBROUTINE inital CALL init_moments ENDIF !!!!!! Set phi !!!!!! - ! WRITE(*,*) 'Init phi' + IF (my_id .EQ. 1) WRITE(*,*) 'Init phi' CALL poisson !!!!!! Set Sepj, Sipj and dnjs coeff table !!!!!! IF ( NON_LIN .OR. (A0KH .NE. 0)) THEN; - ! WRITE(*,*) 'Init Sapj' + IF (my_id .EQ. 1) WRITE(*,*) 'Init Sapj' CALL compute_Sapj - ! WRITE(*,*) 'Building Dnjs table' CALL build_dnjs_table ENDIF @@ -105,12 +104,12 @@ SUBROUTINE init_moments END DO END DO - ! IF ( ikrs .EQ. 1 ) THEN + IF ( ikrs .EQ. 1 ) THEN DO ikz=2,Nkz/2 !symmetry at kr = 0 CALL RANDOM_NUMBER(noise) moments_e( ip,ij,1,ikz, :) = moments_e( ip,ij,1,Nkz+2-ikz, :) END DO - ! ENDIF + ENDIF END DO END DO @@ -124,12 +123,12 @@ SUBROUTINE init_moments END DO END DO - ! IF ( ikrs .EQ. 1 ) THEN + IF ( ikrs .EQ. 1 ) THEN DO ikz=2,Nkz/2 !symmetry at kr = 0 CALL RANDOM_NUMBER(noise) moments_i( ip,ij,1,ikz, :) = moments_i( ip,ij,1,Nkz+2-ikz, :) END DO - ! ENDIF + ENDIF END DO END DO diff --git a/src/poisson.F90 b/src/poisson.F90 index 0c8db4f38e03e027a1ccdbf84f9e87fcc74e2ad2..fbc1a3eba03cfbb65f7e092fc23db20d7ec0e6b3 100644 --- a/src/poisson.F90 +++ b/src/poisson.F90 @@ -93,7 +93,7 @@ SUBROUTINE poisson END DO ! Cancel origin singularity -phi(ikr_0,ikz_0) = 0 +IF ((ikr_0 .GE. ikrs) .AND. (ikr_0 .LE. ikre)) phi(ikr_0,ikz_0) = 0 ! Execution time end CALL cpu_time(t1_poisson) diff --git a/src/ppexit.F90 b/src/ppexit.F90 index 0c0ddb715171c699cecd39a99b312d2968d4edfd..9995c91e9b0ac9291e6dfba9012c249f7b1c5902 100644 --- a/src/ppexit.F90 +++ b/src/ppexit.F90 @@ -2,12 +2,14 @@ SUBROUTINE ppexit ! Exit parallel environment USE basic + USE fourier, ONLY : finalize_FFT use prec_const IMPLICIT NONE - + + CALL finalize_FFT CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL mpi_finalize(ierr) - + END SUBROUTINE ppexit diff --git a/src/ppinit.F90 b/src/ppinit.F90 index c80bf8779406baaa02d373f19e2d61368a1af2f2..57d7dc390c31b9d156351ebd6c573d0bbf389b8b 100644 --- a/src/ppinit.F90 +++ b/src/ppinit.F90 @@ -14,11 +14,5 @@ SUBROUTINE ppinit CALL MPI_COMM_RANK (MPI_COMM_WORLD, my_id, ierr) CALL MPI_COMM_SIZE (MPI_COMM_WORLD, num_procs, ierr) - CALL MPI_COMM_GROUP(MPI_COMM_WORLD,grp_world,ierr) - ! CALL MPI_COMM_CREATE_GROUP(MPI_COMM_WORLD,grp_world,0,comm_self,ierr) - - ! IF (NON_LIN .AND. (num_procs .GT. 1)) THEN - ! CALL fftw_mpi_init - ! ENDIF - + END SUBROUTINE ppinit diff --git a/src/srcinfo.h b/src/srcinfo.h index e6f1a84e19462bbd3c431f5511f11cbe78806a08..d9f601e55ad0d1c4261bf844a62604ac06f30ee8 100644 --- a/src/srcinfo.h +++ b/src/srcinfo.h @@ -3,8 +3,8 @@ character(len=40) BRANCH character(len=20) AUTHOR character(len=40) EXECDATE character(len=40) HOST -parameter (VERSION='8775464-dirty') -parameter (BRANCH='master') +parameter (VERSION='003c315-dirty') +parameter (BRANCH='MPI') parameter (AUTHOR='ahoffman') -parameter (EXECDATE='Mon Nov 9 13:42:43 CET 2020') +parameter (EXECDATE='Fri Nov 20 14:14:05 CET 2020') parameter (HOST ='spcpc606') diff --git a/src/srcinfo/srcinfo.h b/src/srcinfo/srcinfo.h index e6f1a84e19462bbd3c431f5511f11cbe78806a08..d9f601e55ad0d1c4261bf844a62604ac06f30ee8 100644 --- a/src/srcinfo/srcinfo.h +++ b/src/srcinfo/srcinfo.h @@ -3,8 +3,8 @@ character(len=40) BRANCH character(len=20) AUTHOR character(len=40) EXECDATE character(len=40) HOST -parameter (VERSION='8775464-dirty') -parameter (BRANCH='master') +parameter (VERSION='003c315-dirty') +parameter (BRANCH='MPI') parameter (AUTHOR='ahoffman') -parameter (EXECDATE='Mon Nov 9 13:42:43 CET 2020') +parameter (EXECDATE='Fri Nov 20 14:14:05 CET 2020') parameter (HOST ='spcpc606') diff --git a/src/stepon.F90 b/src/stepon.F90 index 4826a63f5c2660394fb4abcb0089fd12fda3f24c..7f16f1adaab6cf1c521bdb9c3b2d8ff0598dfdfd 100644 --- a/src/stepon.F90 +++ b/src/stepon.F90 @@ -17,8 +17,11 @@ SUBROUTINE stepon DO num_step=1,ntimelevel ! eg RK4 compute successively k1, k2, k3, k4 ! Compute right hand side of moments hierarchy equation CALL moments_eq_rhs + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! Advance from updatetlevel to updatetlevel+1 (according to num. scheme) CALL advance_time_level + CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! Update the moments with the hierarchy RHS (step by step) @@ -34,18 +37,27 @@ SUBROUTINE stepon CALL advance_field(moments_i(ip,ij,:,:,:),moments_rhs_i(ip,ij,:,:,:)) ENDDO ENDDO + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! Execution time end CALL cpu_time(t1_adv_field) tc_adv_field = tc_adv_field + (t1_adv_field - t0_adv_field) + CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! Update electrostatic potential CALL poisson + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + ! Update nonlinear term IF ( NON_LIN .OR. (A0KH .NE. 0) ) THEN CALL compute_Sapj ENDIF + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + !(The two routines above are called in inital for t=0) CALL checkfield_all() + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + END DO CONTAINS @@ -54,7 +66,7 @@ SUBROUTINE stepon ! Execution time start CALL cpu_time(t0_checkfield) - CALL enforce_symetry() ! Enforcing symmetry on kr = 0 + IF ( ikrs .EQ. 1 ) CALL enforce_symetry() ! Enforcing symmetry on kr = 0 IF(.NOT.nlend) THEN nlend=nlend .or. checkfield(phi,' phi') diff --git a/wk/analysis_2D.m b/wk/analysis_2D.m index 71fab610aa2f413372230d25a89d04f554b4ccb9..20d1a9152edbc68fdaa44e170a7f495d25c5a3b6 100644 --- a/wk/analysis_2D.m +++ b/wk/analysis_2D.m @@ -234,7 +234,7 @@ save_figure end %% -t0 = 50; +t0 = 0; skip_ = 1; DELAY = 0.01*skip_; FRAMES = floor(t0/(Ts2D(2)-Ts2D(1)))+1:skip_:numel(Ts2D); diff --git a/wk/flux_results.m b/wk/flux_results.m index 1cefbb8a6ad67388cc1cceb283e656008485ea45..d1c37d7fe1421c1473535a6ecdc5aafe2b4f4712 100644 --- a/wk/flux_results.m +++ b/wk/flux_results.m @@ -54,7 +54,7 @@ end end grid on; title('$\nu = 0.01$') legend('Mix. Length, Ricci 2006','$P=2$, $J=1$','$P=3$, $J=2$','$P=4$, $J=2$','$P=5$, $J=3$') -FIGNAME = [SIMDIR,'flux_study_nu_1e-2.png']; +FIGNAME = [SIMDIR,'flux_study_nu_1e-2.png']; ylim([1e-4, 10]) saveas(fig,FIGNAME); disp(['Figure saved @ : ',FIGNAME]) diff --git a/wk/fort.90 b/wk/fort.90 index d823444bba59b837ec95babcacf2150103d978cf..412c03cac1350770f03669d8484a2b92377a29a5 100644 --- a/wk/fort.90 +++ b/wk/fort.90 @@ -1,42 +1,42 @@ &BASIC nrun = 100000000 - dt = 0.004 - tmax = 400 - RESTART = .true. + dt = 0.01 + tmax = 1 + RESTART = .false. / &GRID - pmaxe =5 - jmaxe = 3 - pmaxi = 5 - jmaxi = 3 - Nr = 256 - Lr = 66 - Nz = 256 - Lz = 66 + pmaxe =2 + jmaxe = 1 + pmaxi = 2 + jmaxi = 1 + Nr = 64 + Lr = 20 + Nz = 64 + Lz = 20 kpar = 0 CANCEL_ODD_P = .false. / &OUTPUT_PAR - nsave_0d = 250 + nsave_0d = 1 nsave_1d = 0 - nsave_2d = 250 - nsave_5d = 2500 + nsave_2d = 1 + nsave_5d = 1 write_Na00 = .true. write_moments = .true. write_phi = .true. write_non_lin = .true. write_doubleprecision = .true. - resfile0 = '../results/ZP/256x128_L_66_Pe_5_Je_3_Pi_5_Ji_3_nB_0.5_nN_1_nu_1e-01_FC_mu_5e-04/outputs' - rstfile0 = '../results/ZP/256x128_L_66_Pe_5_Je_3_Pi_5_Ji_3_nB_0.5_nN_1_nu_1e-01_FC_mu_5e-04/checkpoint' + resfile0 = '../results/test_parallel/64x32_L_20_Pe_2_Je_1_Pi_2_Ji_1_nB_0.5_nN_1_nu_1e-02_DG_mu_5e-06/outputs' + rstfile0 = '../results/test_parallel/64x32_L_20_Pe_2_Je_1_Pi_2_Ji_1_nB_0.5_nN_1_nu_1e-02_DG_mu_5e-06/checkpoint' job2load = 0 / &MODEL_PAR ! Collisionality - CO = -1 + CO = -2 DK = .false. NON_LIN = .true. - mu = 0.0005 - nu = 0.1 + mu = 5e-06 + nu = 0.01 tau_e = 1 tau_i = 1 sigma_e = 0.023338 @@ -55,9 +55,9 @@ initback_moments =0 initnoise_moments =1e-05 iseed =42 - selfmat_file ='../iCa/self_Coll_GKE_0_GKI_0_ESELF_1_ISELF_1_Pmaxe_5_Jmaxe_3_Pmaxi_5_Jmaxi_3_pamaxx_10.h5' - eimat_file ='../iCa/ei_Coll_GKE_0_GKI_0_ETEST_1_EBACK_1_Pmaxe_5_Jmaxe_3_Pmaxi_5_Jmaxi_3_pamaxx_10_tau_1.0000_mu_0.0233.h5' - iemat_file ='../iCa/ie_Coll_GKE_0_GKI_0_ITEST_1_IBACK_1_Pmaxe_5_Jmaxe_3_Pmaxi_5_Jmaxi_3_pamaxx_10_tau_1.0000_mu_0.0233.h5' + selfmat_file ='../iCa/self_Coll_GKE_0_GKI_0_ESELF_1_ISELF_1_Pmaxe_2_Jmaxe_1_Pmaxi_2_Jmaxi_1_pamaxx_10.h5' + eimat_file ='../iCa/ei_Coll_GKE_0_GKI_0_ETEST_1_EBACK_1_Pmaxe_2_Jmaxe_1_Pmaxi_2_Jmaxi_1_pamaxx_10_tau_1.0000_mu_0.0233.h5' + iemat_file ='../iCa/ie_Coll_GKE_0_GKI_0_ITEST_1_IBACK_1_Pmaxe_2_Jmaxe_1_Pmaxi_2_Jmaxi_1_pamaxx_10_tau_1.0000_mu_0.0233.h5' / &TIME_INTEGRATION_PAR numerical_scheme='RK4'