From 0d926a200cfc25e4e8772c93b962425313b65b07 Mon Sep 17 00:00:00 2001 From: Antoine Cyril David Hoffmann <ahoffman@spcpc606.epfl.ch> Date: Tue, 13 Apr 2021 16:13:48 +0200 Subject: [PATCH] Update intrinsic comput. time profiler --- matlab/profiler.m | 19 +++++++++++-------- src/basic_mod.F90 | 6 +++--- src/closure_mod.F90 | 12 ++++++++---- src/collision_mod.F90 | 7 +++++++ src/diagnose.F90 | 14 +++++++++----- src/ghosts_mod.F90 | 8 ++++---- src/poisson.F90 | 13 +++---------- 7 files changed, 45 insertions(+), 34 deletions(-) diff --git a/matlab/profiler.m b/matlab/profiler.m index 2b5cc433..37d363ba 100644 --- a/matlab/profiler.m +++ b/matlab/profiler.m @@ -7,27 +7,30 @@ DT_SIM = h5readatt(filename,'/data/input','dt'); rhs_Tc = h5read(filename,'/profiler/Tc_rhs'); adv_field_Tc = h5read(filename,'/profiler/Tc_adv_field'); +ghost_Tc = h5read(filename,'/profiler/Tc_ghost'); +clos_Tc = h5read(filename,'/profiler/Tc_clos'); +coll_Tc = h5read(filename,'/profiler/Tc_coll'); poisson_Tc = h5read(filename,'/profiler/Tc_poisson'); Sapj_Tc = h5read(filename,'/profiler/Tc_Sapj'); -diag_Tc = h5read(filename,'/profiler/Tc_diag'); -comm_Tc = h5read(filename,'/profiler/Tc_comm'); checkfield_Tc= h5read(filename,'/profiler/Tc_checkfield'); +diag_Tc = h5read(filename,'/profiler/Tc_diag'); step_Tc = h5read(filename,'/profiler/Tc_step'); Ts0D = h5read(filename,'/profiler/time'); -missing_Tc = step_Tc - rhs_Tc - adv_field_Tc - comm_Tc -... - poisson_Tc - Sapj_Tc -diag_Tc -checkfield_Tc; +missing_Tc = step_Tc - rhs_Tc - adv_field_Tc - ghost_Tc -clos_Tc ... + -coll_Tc -poisson_Tc -Sapj_Tc -checkfield_Tc -diag_Tc; total_Tc = step_Tc; -TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(comm_Tc);... - diff(Sapj_Tc); diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)]; -TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/8,8]); +TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(ghost_Tc);... + diff(clos_Tc); diff(coll_Tc); diff(poisson_Tc); diff(Sapj_Tc); ... + diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)]; +TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/10,10]); TIME_PER_STEP = sum(TIME_PER_FCT,2); TIME_PER_CPU = trapz(Ts0D(2:end),TIME_PER_STEP); %% Plots -TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(comm_Tc);... +TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(ghost_Tc);... diff(Sapj_Tc); diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)]; TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/8,8]); fig = figure; diff --git a/src/basic_mod.F90 b/src/basic_mod.F90 index add5d775..c6a33252 100644 --- a/src/basic_mod.F90 +++ b/src/basic_mod.F90 @@ -43,9 +43,9 @@ MODULE basic ! To measure computation time real :: start, finish - real(dp) :: t0_rhs, t0_adv_field, t0_poisson, t0_Sapj, t0_diag, t0_checkfield, t0_step, t0_comm - real(dp) :: t1_rhs, t1_adv_field, t1_poisson, t1_Sapj, t1_diag, t1_checkfield, t1_step, t1_comm - real(dp) :: tc_rhs, tc_adv_field, tc_poisson, tc_Sapj, tc_diag, tc_checkfield, tc_step, tc_comm + real(dp) :: t0_rhs, t0_adv_field, t0_poisson, t0_Sapj, t0_diag, t0_checkfield, t0_step, t0_clos, t0_ghost, t0_coll + real(dp) :: t1_rhs, t1_adv_field, t1_poisson, t1_Sapj, t1_diag, t1_checkfield, t1_step, t1_clos, t1_ghost, t1_coll + real(dp) :: tc_rhs, tc_adv_field, tc_poisson, tc_Sapj, tc_diag, tc_checkfield, tc_step, tc_clos, tc_ghost, tc_coll real(dp):: maxruntime = 1e9 ! Maximum simulation CPU time INTERFACE allocate_array diff --git a/src/closure_mod.F90 b/src/closure_mod.F90 index ab6343cd..92363210 100644 --- a/src/closure_mod.F90 +++ b/src/closure_mod.F90 @@ -26,6 +26,8 @@ SUBROUTINE apply_closure_model sqpp1p_e = SQRT((pmaxe_dp+1)*(pmaxe_dp)) sqpp1p_i = SQRT((pmaxi_dp+1)*(pmaxi_dp)) + CALL cpu_time(t0_clos) + ! Negative out of bounds indices are put to zero (analytically correct) DO ikr = ikrs,ikre DO ikz = ikzs,ikze @@ -74,7 +76,7 @@ SUBROUTINE apply_closure_model moments_i(ipeg_i ,ij,ikr,ikz,:) = 0._dp ENDDO kernel_i(ijeg_i,ikr,ikz) = 0._dp - + ENDDO ENDDO @@ -140,7 +142,7 @@ SUBROUTINE apply_closure_model ! Copy closure : P+2 <- P, P+1 <- P-1, J+1 <- J DO ikr = ikrs,ikre DO ikz = ikzs,ikze - + DO ip = ipsg_e,ipeg_e ! J ghost is J+1, so we put moment J to J+1 moments_e(ip,ijeg_e,ikr,ikz,:) = moments_e(ip,ije_e,ikr,ikz,:) @@ -158,7 +160,7 @@ SUBROUTINE apply_closure_model moments_i(ipeg_i-1,ij,ikr,ikz,:) = moments_i(ipe_i-1,ij,ikr,ikz,:) moments_i(ipeg_i ,ij,ikr,ikz,:) = moments_i(ipe_i ,ij,ikr,ikz,:) ENDDO - + ENDDO ENDDO @@ -166,6 +168,8 @@ SUBROUTINE apply_closure_model if(my_id .EQ. 0) write(*,*) '! Closure scheme not found !' ENDIF - + + CALL cpu_time(t1_clos) + tc_clos = tc_clos + (t1_clos - t0_clos) END SUBROUTINE apply_closure_model END module closure diff --git a/src/collision_mod.F90 b/src/collision_mod.F90 index a2f09bb7..84e0b947 100644 --- a/src/collision_mod.F90 +++ b/src/collision_mod.F90 @@ -224,6 +224,10 @@ CONTAINS COMPLEX(dp), DIMENSION(ips_i:ipe_i) :: TColl_distr_i COMPLEX(dp) :: TColl INTEGER :: ikrs_C, ikre_C, ikzs_C, ikze_C + + ! Execution time start + CALL cpu_time(t0_coll) + IF (ABS(CO) .GE. 2) THEN !compute only if COSOlver matrices are used DO ikr = ikrs,ikre @@ -268,6 +272,9 @@ CONTAINS ENDDO ENDIF + ! Execution time end + CALL cpu_time(t1_coll) + tc_coll = tc_coll + (t1_coll - t0_coll) END SUBROUTINE compute_TColl !******************************************************************************! diff --git a/src/diagnose.F90 b/src/diagnose.F90 index e8def621..51c95bc4 100644 --- a/src/diagnose.F90 +++ b/src/diagnose.F90 @@ -54,11 +54,13 @@ SUBROUTINE diagnose(kstep) CALL creatg(fidres, "/profiler", "performance analysis") CALL creatd(fidres, 0, dims, "/profiler/Tc_rhs", "cumulative rhs computation time") CALL creatd(fidres, 0, dims, "/profiler/Tc_adv_field", "cumulative adv. fields computation time") - CALL creatd(fidres, 0, dims, "/profiler/Tc_comm", "cumulative communication time") + CALL creatd(fidres, 0, dims, "/profiler/Tc_clos", "cumulative closure computation time") + CALL creatd(fidres, 0, dims, "/profiler/Tc_ghost", "cumulative communication time") + CALL creatd(fidres, 0, dims, "/profiler/Tc_coll", "cumulative collision computation time") CALL creatd(fidres, 0, dims, "/profiler/Tc_poisson", "cumulative poisson computation time") CALL creatd(fidres, 0, dims, "/profiler/Tc_Sapj", "cumulative Sapj computation time") - CALL creatd(fidres, 0, dims, "/profiler/Tc_diag", "cumulative sym computation time") CALL creatd(fidres, 0, dims, "/profiler/Tc_checkfield", "cumulative checkfield computation time") + CALL creatd(fidres, 0, dims, "/profiler/Tc_diag", "cumulative sym computation time") CALL creatd(fidres, 0, dims, "/profiler/Tc_step", "cumulative total step computation time") CALL creatd(fidres, 0, dims, "/profiler/time", "current simulation time") @@ -266,7 +268,7 @@ SUBROUTINE diagnose(kstep) CALL closef(fidres) END IF - + CALL cpu_time(t1_diag); tc_diag = tc_diag + (t1_diag - t0_diag) END SUBROUTINE diagnose @@ -284,11 +286,13 @@ SUBROUTINE diagnose_0d ! Time measurement data CALL append(fidres, "/profiler/Tc_rhs", tc_rhs,ionode=0) CALL append(fidres, "/profiler/Tc_adv_field", tc_adv_field,ionode=0) + CALL append(fidres, "/profiler/Tc_clos", tc_clos,ionode=0) + CALL append(fidres, "/profiler/Tc_ghost", tc_ghost,ionode=0) + CALL append(fidres, "/profiler/Tc_coll", tc_coll,ionode=0) CALL append(fidres, "/profiler/Tc_poisson", tc_poisson,ionode=0) CALL append(fidres, "/profiler/Tc_Sapj", tc_Sapj,ionode=0) - CALL append(fidres, "/profiler/Tc_diag", tc_diag,ionode=0) CALL append(fidres, "/profiler/Tc_checkfield",tc_checkfield,ionode=0) - CALL append(fidres, "/profiler/Tc_comm", tc_comm,ionode=0) + CALL append(fidres, "/profiler/Tc_diag", tc_diag,ionode=0) CALL append(fidres, "/profiler/Tc_step", tc_step,ionode=0) CALL append(fidres, "/profiler/time", time,ionode=0) ! Processing data diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90 index b3a32d21..416b7a3c 100644 --- a/src/ghosts_mod.F90 +++ b/src/ghosts_mod.F90 @@ -13,7 +13,7 @@ PUBLIC :: update_ghosts CONTAINS SUBROUTINE update_ghosts - CALL cpu_time(t0_comm) + CALL cpu_time(t0_ghost) IF (num_procs_p .GT. 1) THEN ! Do it only if we share the p CALL MPI_BARRIER(MPI_COMM_WORLD,ierr) @@ -22,9 +22,9 @@ SUBROUTINE update_ghosts CALL MPI_BARRIER(MPI_COMM_WORLD,ierr) CALL update_ghosts_p_i ENDIF - - CALL cpu_time(t1_comm) - tc_comm = tc_comm + (t1_comm - t0_comm) + + CALL cpu_time(t1_ghost) + tc_ghost = tc_ghost + (t1_ghost - t0_ghost) END SUBROUTINE update_ghosts diff --git a/src/poisson.F90 b/src/poisson.F90 index c220d812..9aff5dae 100644 --- a/src/poisson.F90 +++ b/src/poisson.F90 @@ -65,14 +65,8 @@ SUBROUTINE poisson ! Cancel origin singularity IF ((ikr_0 .GE. ikrs) .AND. (ikr_0 .LE. ikre)) phi(ikr_0,ikz_0) = 0 - ! Execution time end - CALL cpu_time(t1_poisson) - tc_poisson = tc_poisson + (t1_poisson - t0_poisson) - ENDIF - CALL cpu_time(t0_comm) - root_bcast = 0 ! Proc zero computes phi for every p !!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!! @@ -105,9 +99,8 @@ SUBROUTINE poisson ENDIF ENDIF - CALL cpu_time(t1_comm) - tc_comm = tc_comm + (t1_comm - t0_comm) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + ! Execution time end + CALL cpu_time(t1_poisson) + tc_poisson = tc_poisson + (t1_poisson - t0_poisson) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! END SUBROUTINE poisson -- GitLab