From 0d926a200cfc25e4e8772c93b962425313b65b07 Mon Sep 17 00:00:00 2001
From: Antoine Cyril David Hoffmann <ahoffman@spcpc606.epfl.ch>
Date: Tue, 13 Apr 2021 16:13:48 +0200
Subject: [PATCH] Update intrinsic comput. time profiler

---
 matlab/profiler.m     | 19 +++++++++++--------
 src/basic_mod.F90     |  6 +++---
 src/closure_mod.F90   | 12 ++++++++----
 src/collision_mod.F90 |  7 +++++++
 src/diagnose.F90      | 14 +++++++++-----
 src/ghosts_mod.F90    |  8 ++++----
 src/poisson.F90       | 13 +++----------
 7 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/matlab/profiler.m b/matlab/profiler.m
index 2b5cc433..37d363ba 100644
--- a/matlab/profiler.m
+++ b/matlab/profiler.m
@@ -7,27 +7,30 @@ DT_SIM    = h5readatt(filename,'/data/input','dt');
 
 rhs_Tc       = h5read(filename,'/profiler/Tc_rhs');
 adv_field_Tc = h5read(filename,'/profiler/Tc_adv_field');
+ghost_Tc      = h5read(filename,'/profiler/Tc_ghost');
+clos_Tc      = h5read(filename,'/profiler/Tc_clos');
+coll_Tc      = h5read(filename,'/profiler/Tc_coll');
 poisson_Tc   = h5read(filename,'/profiler/Tc_poisson');
 Sapj_Tc      = h5read(filename,'/profiler/Tc_Sapj');
-diag_Tc      = h5read(filename,'/profiler/Tc_diag');
-comm_Tc      = h5read(filename,'/profiler/Tc_comm');
 checkfield_Tc= h5read(filename,'/profiler/Tc_checkfield');
+diag_Tc      = h5read(filename,'/profiler/Tc_diag');
 step_Tc      = h5read(filename,'/profiler/Tc_step');
 Ts0D         = h5read(filename,'/profiler/time');
 
-missing_Tc   = step_Tc - rhs_Tc - adv_field_Tc - comm_Tc -...
-               poisson_Tc - Sapj_Tc -diag_Tc -checkfield_Tc;
+missing_Tc   = step_Tc - rhs_Tc - adv_field_Tc - ghost_Tc -clos_Tc ...
+              -coll_Tc -poisson_Tc -Sapj_Tc -checkfield_Tc -diag_Tc;
 total_Tc     = step_Tc;
 
-TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(comm_Tc);...
-    diff(Sapj_Tc); diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)];
-TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/8,8]);
+TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(ghost_Tc);...
+    diff(clos_Tc); diff(coll_Tc); diff(poisson_Tc); diff(Sapj_Tc); ...
+    diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)];
+TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/10,10]);
 
 TIME_PER_STEP = sum(TIME_PER_FCT,2);
 TIME_PER_CPU  = trapz(Ts0D(2:end),TIME_PER_STEP);
 
 %% Plots
-TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(comm_Tc);...
+TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(poisson_Tc); diff(ghost_Tc);...
     diff(Sapj_Tc); diff(checkfield_Tc); diff(diag_Tc); diff(missing_Tc)];
 TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/8,8]);
 fig = figure;
diff --git a/src/basic_mod.F90 b/src/basic_mod.F90
index add5d775..c6a33252 100644
--- a/src/basic_mod.F90
+++ b/src/basic_mod.F90
@@ -43,9 +43,9 @@ MODULE basic
 
   ! To measure computation time
   real :: start, finish
-  real(dp) :: t0_rhs, t0_adv_field, t0_poisson, t0_Sapj, t0_diag, t0_checkfield, t0_step, t0_comm
-  real(dp) :: t1_rhs, t1_adv_field, t1_poisson, t1_Sapj, t1_diag, t1_checkfield, t1_step, t1_comm
-  real(dp) :: tc_rhs, tc_adv_field, tc_poisson, tc_Sapj, tc_diag, tc_checkfield, tc_step, tc_comm
+  real(dp) :: t0_rhs, t0_adv_field, t0_poisson, t0_Sapj, t0_diag, t0_checkfield, t0_step, t0_clos, t0_ghost, t0_coll
+  real(dp) :: t1_rhs, t1_adv_field, t1_poisson, t1_Sapj, t1_diag, t1_checkfield, t1_step, t1_clos, t1_ghost, t1_coll
+  real(dp) :: tc_rhs, tc_adv_field, tc_poisson, tc_Sapj, tc_diag, tc_checkfield, tc_step, tc_clos, tc_ghost, tc_coll
   real(dp):: maxruntime = 1e9 ! Maximum simulation CPU time
 
   INTERFACE allocate_array
diff --git a/src/closure_mod.F90 b/src/closure_mod.F90
index ab6343cd..92363210 100644
--- a/src/closure_mod.F90
+++ b/src/closure_mod.F90
@@ -26,6 +26,8 @@ SUBROUTINE apply_closure_model
   sqpp1p_e   = SQRT((pmaxe_dp+1)*(pmaxe_dp))
   sqpp1p_i   = SQRT((pmaxi_dp+1)*(pmaxi_dp))
 
+  CALL cpu_time(t0_clos)
+
   ! Negative out of bounds indices are put to zero (analytically correct)
     DO ikr = ikrs,ikre
       DO ikz = ikzs,ikze
@@ -74,7 +76,7 @@ SUBROUTINE apply_closure_model
             moments_i(ipeg_i  ,ij,ikr,ikz,:) = 0._dp
           ENDDO
           kernel_i(ijeg_i,ikr,ikz)      = 0._dp
-          
+
         ENDDO
       ENDDO
 
@@ -140,7 +142,7 @@ SUBROUTINE apply_closure_model
         ! Copy closure : P+2 <- P, P+1 <- P-1, J+1 <- J
         DO ikr = ikrs,ikre
           DO ikz = ikzs,ikze
-  
+
             DO ip = ipsg_e,ipeg_e
               ! J ghost is J+1, so we put moment J to J+1
               moments_e(ip,ijeg_e,ikr,ikz,:) = moments_e(ip,ije_e,ikr,ikz,:)
@@ -158,7 +160,7 @@ SUBROUTINE apply_closure_model
               moments_i(ipeg_i-1,ij,ikr,ikz,:) = moments_i(ipe_i-1,ij,ikr,ikz,:)
               moments_i(ipeg_i  ,ij,ikr,ikz,:) = moments_i(ipe_i  ,ij,ikr,ikz,:)
             ENDDO
-            
+
           ENDDO
         ENDDO
 
@@ -166,6 +168,8 @@ SUBROUTINE apply_closure_model
       if(my_id .EQ. 0) write(*,*) '! Closure scheme not found !'
 
     ENDIF
-
+    
+    CALL cpu_time(t1_clos)
+    tc_clos = tc_clos + (t1_clos - t0_clos)
   END SUBROUTINE apply_closure_model
 END module closure
diff --git a/src/collision_mod.F90 b/src/collision_mod.F90
index a2f09bb7..84e0b947 100644
--- a/src/collision_mod.F90
+++ b/src/collision_mod.F90
@@ -224,6 +224,10 @@ CONTAINS
     COMPLEX(dp), DIMENSION(ips_i:ipe_i) :: TColl_distr_i
     COMPLEX(dp) :: TColl
     INTEGER :: ikrs_C, ikre_C, ikzs_C, ikze_C
+
+    ! Execution time start
+    CALL cpu_time(t0_coll)
+
     IF (ABS(CO) .GE. 2) THEN !compute only if COSOlver matrices are used
 
       DO ikr = ikrs,ikre
@@ -268,6 +272,9 @@ CONTAINS
       ENDDO
     ENDIF
 
+    ! Execution time end
+    CALL cpu_time(t1_coll)
+    tc_coll = tc_coll + (t1_coll - t0_coll)
   END SUBROUTINE compute_TColl
 
   !******************************************************************************!
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index e8def621..51c95bc4 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -54,11 +54,13 @@ SUBROUTINE diagnose(kstep)
      CALL creatg(fidres, "/profiler", "performance analysis")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_rhs",        "cumulative rhs computation time")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_adv_field",  "cumulative adv. fields computation time")
-     CALL creatd(fidres, 0, dims, "/profiler/Tc_comm",       "cumulative communication time")
+     CALL creatd(fidres, 0, dims, "/profiler/Tc_clos",       "cumulative closure computation time")
+     CALL creatd(fidres, 0, dims, "/profiler/Tc_ghost",       "cumulative communication time")
+     CALL creatd(fidres, 0, dims, "/profiler/Tc_coll",       "cumulative collision computation time")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_poisson",    "cumulative poisson computation time")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_Sapj",       "cumulative Sapj computation time")
-     CALL creatd(fidres, 0, dims, "/profiler/Tc_diag",       "cumulative sym computation time")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_checkfield", "cumulative checkfield computation time")
+     CALL creatd(fidres, 0, dims, "/profiler/Tc_diag",       "cumulative sym computation time")
      CALL creatd(fidres, 0, dims, "/profiler/Tc_step",       "cumulative total step computation time")
      CALL creatd(fidres, 0, dims, "/profiler/time",          "current simulation time")
 
@@ -266,7 +268,7 @@ SUBROUTINE diagnose(kstep)
      CALL closef(fidres)
 
   END IF
-  
+
   CALL cpu_time(t1_diag); tc_diag = tc_diag + (t1_diag - t0_diag)
 
 END SUBROUTINE diagnose
@@ -284,11 +286,13 @@ SUBROUTINE diagnose_0d
   ! Time measurement data
   CALL append(fidres, "/profiler/Tc_rhs",              tc_rhs,ionode=0)
   CALL append(fidres, "/profiler/Tc_adv_field",  tc_adv_field,ionode=0)
+  CALL append(fidres, "/profiler/Tc_clos",            tc_clos,ionode=0)
+  CALL append(fidres, "/profiler/Tc_ghost",          tc_ghost,ionode=0)
+  CALL append(fidres, "/profiler/Tc_coll",            tc_coll,ionode=0)
   CALL append(fidres, "/profiler/Tc_poisson",      tc_poisson,ionode=0)
   CALL append(fidres, "/profiler/Tc_Sapj",            tc_Sapj,ionode=0)
-  CALL append(fidres, "/profiler/Tc_diag",            tc_diag,ionode=0)
   CALL append(fidres, "/profiler/Tc_checkfield",tc_checkfield,ionode=0)
-  CALL append(fidres, "/profiler/Tc_comm",            tc_comm,ionode=0)
+  CALL append(fidres, "/profiler/Tc_diag",            tc_diag,ionode=0)
   CALL append(fidres, "/profiler/Tc_step",            tc_step,ionode=0)
   CALL append(fidres, "/profiler/time",                  time,ionode=0)
   ! Processing data
diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90
index b3a32d21..416b7a3c 100644
--- a/src/ghosts_mod.F90
+++ b/src/ghosts_mod.F90
@@ -13,7 +13,7 @@ PUBLIC :: update_ghosts
 CONTAINS
 
 SUBROUTINE update_ghosts
-    CALL cpu_time(t0_comm)
+    CALL cpu_time(t0_ghost)
 
     IF (num_procs_p .GT. 1) THEN ! Do it only if we share the p
         CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
@@ -22,9 +22,9 @@ SUBROUTINE update_ghosts
         CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
         CALL update_ghosts_p_i
     ENDIF
-    
-    CALL cpu_time(t1_comm)
-    tc_comm = tc_comm + (t1_comm - t0_comm)
+
+    CALL cpu_time(t1_ghost)
+    tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts
 
 
diff --git a/src/poisson.F90 b/src/poisson.F90
index c220d812..9aff5dae 100644
--- a/src/poisson.F90
+++ b/src/poisson.F90
@@ -65,14 +65,8 @@ SUBROUTINE poisson
     ! Cancel origin singularity
     IF ((ikr_0 .GE. ikrs) .AND. (ikr_0 .LE. ikre)) phi(ikr_0,ikz_0) = 0
 
-    ! Execution time end
-    CALL cpu_time(t1_poisson)
-    tc_poisson = tc_poisson + (t1_poisson - t0_poisson)
-
   ENDIF
 
-  CALL cpu_time(t0_comm)
-
   root_bcast = 0 ! Proc zero computes phi for every p
 
   !!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!!
@@ -105,9 +99,8 @@ SUBROUTINE poisson
     ENDIF
   ENDIF
 
-  CALL cpu_time(t1_comm)
-  tc_comm = tc_comm + (t1_comm - t0_comm)
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
+  ! Execution time end
+  CALL cpu_time(t1_poisson)
+  tc_poisson = tc_poisson + (t1_poisson - t0_poisson)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 END SUBROUTINE poisson
-- 
GitLab