From 5931c32a90061910d9c325ddc16d3dd62f7af98b Mon Sep 17 00:00:00 2001
From: Antoine Cyril David Hoffmann <ahoffman@spcpc606.epfl.ch>
Date: Wed, 25 May 2022 17:42:44 +0200
Subject: [PATCH] implementing gatherv output

---
 src/array_mod.F90                |   3 +-
 src/collision_mod.F90            |   4 +-
 src/diag_headers/diagnose_full.h |   7 +
 src/diagnose.F90                 | 503 ++++++++++++++++++++++++++++++-
 src/grid_mod.F90                 |  20 +-
 src/memory.F90                   |   4 +
 src/parallel_mod.F90             | 144 +++++++--
 src/ppinit.F90                   |  16 +-
 wk/analysis_3D.m                 |   8 +-
 wk/analysis_header.m             |   4 +-
 wk/debug_script.m                |  18 ++
 11 files changed, 679 insertions(+), 52 deletions(-)
 create mode 100644 wk/debug_script.m

diff --git a/src/array_mod.F90 b/src/array_mod.F90
index b423cc2a..ef3d3a40 100644
--- a/src/array_mod.F90
+++ b/src/array_mod.F90
@@ -67,13 +67,14 @@ MODULE array
   ! moments
   COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: moments_e_full
   COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: moments_i_full
-  
+
   ! ES potential
   COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: phi_full
 
   ! Gyrocenter density for electron and ions (ikx,iky,iz)
   COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ne00
   COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ni00
+  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ni00_full
 
   ! Kinetic spectrum sum_kx,ky(|Napj(z)|^2), (ip,ij,iz)
   REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: Nepjz
diff --git a/src/collision_mod.F90 b/src/collision_mod.F90
index ea43b1d3..f2ca2956 100644
--- a/src/collision_mod.F90
+++ b/src/collision_mod.F90
@@ -499,7 +499,7 @@ CONTAINS
                   ! Sum up all the sub collision terms on root 0
                   CALL MPI_REDUCE(local_sum_e, buffer_e, total_np_e, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, comm_p, ierr)
                   ! distribute the sum over the process among p
-                  CALL MPI_SCATTERV(buffer_e, counts_np_e, displs_np_e, MPI_DOUBLE_COMPLEX,&
+                  CALL MPI_SCATTERV(buffer_e, rcv_p_e, dsp_p_e, MPI_DOUBLE_COMPLEX,&
                                     TColl_distr_e, local_np_e, MPI_DOUBLE_COMPLEX,&
                                     0, comm_p, ierr)
                 ELSE
@@ -522,7 +522,7 @@ CONTAINS
               CALL MPI_REDUCE(local_sum_i, buffer_i, total_np_i, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, comm_p, ierr)
               ! buffer contains the entire collision term along p, we scatter it between
               ! the other processes (use of scatterv since Pmax/Np is not an integer)
-              CALL MPI_SCATTERV(buffer_i, counts_np_i, displs_np_i, MPI_DOUBLE_COMPLEX,&
+              CALL MPI_SCATTERV(buffer_i, rcv_p_i, dsp_p_i, MPI_DOUBLE_COMPLEX,&
                                 TColl_distr_i, local_np_i, MPI_DOUBLE_COMPLEX, &
                                 0, comm_p, ierr)
             ELSE
diff --git a/src/diag_headers/diagnose_full.h b/src/diag_headers/diagnose_full.h
index 93b45ca0..091ff4b0 100644
--- a/src/diag_headers/diagnose_full.h
+++ b/src/diag_headers/diagnose_full.h
@@ -115,9 +115,11 @@ IF ((kstep .EQ. 0)) THEN
     IF(KIN_E)&
     CALL creatg(fidres, "/data/var3d/Ne00", "Ne00")
     CALL creatg(fidres, "/data/var3d/Ni00", "Ni00")
+    CALL creatg(fidres, "/data/var3d/Ni00_gatherv", "Ni00_gatherv")
     IF(KIN_E)&
     CALL creatg(fidres, "/data/var3d/Nepjz", "Nepjz")
     CALL creatg(fidres, "/data/var3d/Nipjz", "Nipjz")
+    CALL creatg(fidres, "/data/var3d/Nipjz_full", "Nipjz")
    ENDIF
 
    IF (write_dens) THEN
@@ -317,6 +319,11 @@ SUBROUTINE diagnose_3d
     ENDIF
     CALL write_field3d_kykxz(Ni00(ikys:ikye,ikxs:ikxe,izs:ize), 'Ni00')
 
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var3d/Ni00_gatherv", iframe3d
+    CALL gather_xyz(Ni00(ikys:ikye,1:Nkx,izs:ize),Ni00_full(1:Nky,1:Nkx,1:Nz))
+    CALL putarr(fidres, dset_name, Ni00_full(1:Nky,1:Nkx,1:Nz), ionode=0)
+    CALL attach(fidres, dset_name, "time", time)
+
     CALL compute_Napjz_spectrum
     IF(KIN_E) &
     CALL write_field3d_pjz_e(Nepjz(ips_e:ipe_e,ijs_e:ije_e,izs:ize), 'Nepjz')
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index 4296d0eb..729b0e50 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -26,12 +26,6 @@ SUBROUTINE diagnose(kstep)
   END IF
   !! Specific diagnostic calls
   CALL diagnose_full(kstep)
-  ! IF(nsave_5d .GT. 0) CALL diagnose_moments(kstep)
-  ! IF(nsave_3d .GT. 0) CALL diagnose_momspectrum(kstep)
-  ! IF(nsave_3d .GT. 0) CALL diagnose_fields(kstep)
-  ! IF(nsave_0d .GT. 0) CALL diagnose_profiler(kstep)
-  ! IF(nsave_0d .GT. 0) CALL diagnose_gridgeom(kstep)
-  ! IF(nsave_0d .GT. 0) CALL diagnose_timetraces(kstep)
 
   CALL cpu_time(t1_diag); tc_diag = tc_diag + (t1_diag - t0_diag)
 
@@ -106,5 +100,498 @@ SUBROUTINE init_outfile(comm,file0,file,fid)
   CALL putfile(fid, TRIM(str), TRIM(input_fname),ionode=0)
 END SUBROUTINE init_outfile
 
-!! Auxiliary routines hidden in headers
-INCLUDE 'diag_headers/diagnose_full.h'
+SUBROUTINE diagnose_full(kstep)
+USE basic
+USE grid
+USE diagnostics_par
+USE futils, ONLY: creatf, creatg, creatd, closef, putarr, putfile, attach, openf, putarrnd
+USE array
+USE model
+USE initial_par
+USE fields
+USE time_integration
+USE parallel
+USE prec_const
+USE collision, ONLY: coll_outputinputs
+USE geometry
+IMPLICIT NONE
+
+INTEGER, INTENT(in) :: kstep
+INTEGER, parameter  :: BUFSIZE = 2
+INTEGER :: rank = 0
+INTEGER :: dims(1) = (/0/)
+INTEGER :: cp_counter = 0
+CHARACTER(len=256) :: str,test_
+!____________________________________________________________________________
+!                   1.   Initial diagnostics
+
+IF ((kstep .EQ. 0)) THEN
+  CALL init_outfile(comm0,   resfile0,resfile,fidres)
+
+  ! Profiler time measurement
+  CALL creatg(fidres, "/profiler", "performance analysis")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_rhs",        "cumulative rhs computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_adv_field",  "cumulative adv. fields computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_clos",       "cumulative closure computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_ghost",       "cumulative communication time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_coll",       "cumulative collision computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_poisson",    "cumulative poisson computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_Sapj",       "cumulative Sapj computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_checkfield", "cumulative checkfield computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_diag",       "cumulative sym computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_process",    "cumulative process computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/Tc_step",       "cumulative total step computation time")
+  CALL creatd(fidres, 0, dims, "/profiler/time",          "current simulation time")
+
+  ! Grid info
+  CALL creatg(fidres, "/data/grid", "Grid data")
+  CALL putarr(fidres, "/data/grid/coordkx",   kxarray_full,  "kx*rho_s0", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordky",   kyarray_full,  "ky*rho_s0", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordz",    zarray_full,   "z/R", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordp_e" , parray_e_full, "p_e", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordj_e" , jarray_e_full, "j_e", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordp_i" , parray_i_full, "p_i", ionode=0)
+  CALL putarr(fidres, "/data/grid/coordj_i" , jarray_i_full, "j_i", ionode=0)
+
+  ! Metric info
+  CALL   creatg(fidres, "/data/metric", "Metric data")
+  CALL putarrnd(fidres, "/data/metric/gxx",            gxx(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gxy",            gxy(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gyy",            gyy(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gyz",            gyz(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gzz",            gzz(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/hatR",          hatR(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/hatZ",          hatZ(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/hatB",          hatB(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gradxB",      gradxB(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gradyB",      gradyB(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gradzB",      gradzB(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff(izs:ize,0:1), (/1, 1, 1/))
+  CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(ikys:ikye,ikxs:ikxe,izs:ize,0:1), (/1, 1, 3/))
+  CALL putarrnd(fidres, "/data/metric/kernel_i",    kernel_i(ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,0:1), (/ 1, 2, 4/))
+
+  !  var0d group (gyro transport)
+  IF (nsave_0d .GT. 0) THEN
+   CALL creatg(fidres, "/data/var0d", "0d profiles")
+   CALL creatd(fidres, rank, dims,  "/data/var0d/time",     "Time t*c_s/R")
+   CALL creatd(fidres, rank, dims, "/data/var0d/cstep", "iteration number")
+
+   IF (write_gamma) THEN
+     CALL creatd(fidres, rank, dims, "/data/var0d/gflux_ri", "Radial gyro ion transport")
+     CALL creatd(fidres, rank, dims, "/data/var0d/pflux_ri", "Radial part ion transport")
+   ENDIF
+   IF (write_hf) THEN
+     CALL creatd(fidres, rank, dims, "/data/var0d/hflux_x", "Radial part ion heat flux")
+   ENDIF
+   IF (cstep==0) THEN
+     iframe0d=0
+   ENDIF
+   CALL attach(fidres,"/data/var0d/" , "frames", iframe0d)
+  END IF
+
+
+  !  var2d group (??)
+  IF (nsave_2d .GT. 0) THEN
+   CALL creatg(fidres, "/data/var2d", "2d profiles")
+   CALL creatd(fidres, rank, dims,  "/data/var2d/time",     "Time t*c_s/R")
+   CALL creatd(fidres, rank, dims, "/data/var2d/cstep", "iteration number")
+   IF (cstep==0) THEN
+     iframe2d=0
+   ENDIF
+   CALL attach(fidres,"/data/var2d/" , "frames", iframe2d)
+  END IF
+
+  !  var3d group (electro. pot., Ni00 moment)
+  IF (nsave_3d .GT. 0) THEN
+   CALL creatg(fidres, "/data/var3d", "3d profiles")
+   CALL creatd(fidres, rank, dims,  "/data/var3d/time",     "Time t*c_s/R")
+   CALL creatd(fidres, rank, dims, "/data/var3d/cstep", "iteration number")
+
+   IF (write_phi) CALL creatg(fidres, "/data/var3d/phi", "phi")
+   IF (write_phi) THEN
+    CALL creatg(fidres, "/data/var3d/phi_gatherv", "phi_gatherv")
+   ENDIF
+
+   IF (write_Na00) THEN
+    IF(KIN_E)&
+    CALL creatg(fidres, "/data/var3d/Ne00", "Ne00")
+    CALL creatg(fidres, "/data/var3d/Ni00", "Ni00")
+    CALL creatg(fidres, "/data/var3d/Ni00_gatherv", "Ni00_gatherv")
+    IF(KIN_E)&
+    CALL creatg(fidres, "/data/var3d/Nepjz", "Nepjz")
+    CALL creatg(fidres, "/data/var3d/Nipjz", "Nipjz")
+    CALL creatg(fidres, "/data/var3d/Nipjz_gatherv", "Nipjz_gatherv")
+   ENDIF
+
+   IF (write_dens) THEN
+     IF(KIN_E)&
+    CALL creatg(fidres, "/data/var3d/dens_e", "dens_e")
+    CALL creatg(fidres, "/data/var3d/dens_i", "dens_i")
+   ENDIF
+
+   IF (write_fvel) THEN
+     IF(KIN_E) THEN
+     CALL creatg(fidres, "/data/var3d/upar_e", "upar_e")
+     CALL creatg(fidres, "/data/var3d/uper_e", "uper_e")
+     ENDIF
+     CALL creatg(fidres, "/data/var3d/upar_i", "upar_i")
+     CALL creatg(fidres, "/data/var3d/uper_i", "uper_i")
+   ENDIF
+
+   IF (write_temp) THEN
+     IF(KIN_E) THEN
+     CALL creatg(fidres, "/data/var3d/Tper_e", "Tper_e")
+     CALL creatg(fidres, "/data/var3d/Tpar_e", "Tpar_e")
+     CALL creatg(fidres, "/data/var3d/temp_e", "temp_e")
+     ENDIF
+     CALL creatg(fidres, "/data/var3d/Tper_i", "Tper_i")
+     CALL creatg(fidres, "/data/var3d/Tpar_i", "Tpar_i")
+     CALL creatg(fidres, "/data/var3d/temp_i", "temp_i")
+   ENDIF
+
+   IF (cstep==0) THEN
+     iframe3d=0
+   ENDIF
+   CALL attach(fidres,"/data/var3d/" , "frames", iframe3d)
+  END IF
+
+  !  var5d group (moments)
+  IF (nsave_5d .GT. 0) THEN
+    CALL creatg(fidres, "/data/var5d", "5d profiles")
+    CALL creatd(fidres, rank, dims,  "/data/var5d/time",     "Time t*c_s/R")
+    CALL creatd(fidres, rank, dims, "/data/var5d/cstep", "iteration number")
+
+    IF (write_Napj) THEN
+      IF(KIN_E)&
+     CALL creatg(fidres, "/data/var5d/moments_e", "moments_e")
+     CALL creatg(fidres, "/data/var5d/moments_i", "moments_i")
+    ENDIF
+
+    IF (write_Sapj) THEN
+      IF(KIN_E)&
+      CALL creatg(fidres, "/data/var5d/Sepj", "Sepj")
+      CALL creatg(fidres, "/data/var5d/Sipj", "Sipj")
+    ENDIF
+
+    IF (cstep==0) THEN
+     iframe5d=0
+    END IF
+    CALL attach(fidres,"/data/var5d/" , "frames", iframe5d)
+  END IF
+ENDIF
+
+!_____________________________________________________________________________
+!                   2.   Periodic diagnostics
+!
+IF (kstep .GE. 0) THEN
+
+   !                       2.1   0d history arrays
+   IF (nsave_0d .GT. 0) THEN
+      IF ( MOD(cstep, nsave_0d) == 0 ) THEN
+         CALL diagnose_0d
+      END IF
+   END IF
+
+   !                       2.2   1d profiles
+   ! empty in our case
+
+   !                       2.3   2d profiles
+   ! empty in our case
+
+
+   !                       2.3   2d profiles
+   IF (nsave_3d .GT. 0) THEN
+      IF (MOD(cstep, nsave_3d) == 0) THEN
+        CALL diagnose_3d
+      END IF
+   END IF
+
+   !                       2.4   3d profiles
+   IF (nsave_5d .GT. 0 .AND. cstep .GT. 0) THEN
+      IF (MOD(cstep, nsave_5d) == 0) THEN
+         CALL diagnose_5d
+      END IF
+   END IF
+
+!_____________________________________________________________________________
+!                   3.   Final diagnostics
+
+ELSEIF (kstep .EQ. -1) THEN
+   CALL attach(fidres, "/data/input","cpu_time",finish-start)
+
+   ! make a checkpoint at last timestep if not crashed
+   IF(.NOT. crashed) THEN
+     IF(my_id .EQ. 0) write(*,*) 'Saving last state'
+     IF (nsave_5d .GT. 0) &
+     CALL diagnose_5d
+   ENDIF
+
+   !   Close all diagnostic files
+   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+   CALL closef(fidres)
+
+END IF
+
+END SUBROUTINE diagnose_full
+
+!!-------------- Auxiliary routines -----------------!!
+SUBROUTINE diagnose_0d
+
+  USE basic
+  USE futils, ONLY: append, attach, getatt
+  USE diagnostics_par
+  USE prec_const
+  USE processing
+  USE model, ONLY: KIN_E
+
+  IMPLICIT NONE
+  ! Time measurement data
+  CALL append(fidres, "/profiler/Tc_rhs",              tc_rhs,ionode=0)
+  CALL append(fidres, "/profiler/Tc_adv_field",  tc_adv_field,ionode=0)
+  CALL append(fidres, "/profiler/Tc_clos",            tc_clos,ionode=0)
+  CALL append(fidres, "/profiler/Tc_ghost",          tc_ghost,ionode=0)
+  CALL append(fidres, "/profiler/Tc_coll",            tc_coll,ionode=0)
+  CALL append(fidres, "/profiler/Tc_poisson",      tc_poisson,ionode=0)
+  CALL append(fidres, "/profiler/Tc_Sapj",            tc_Sapj,ionode=0)
+  CALL append(fidres, "/profiler/Tc_checkfield",tc_checkfield,ionode=0)
+  CALL append(fidres, "/profiler/Tc_diag",            tc_diag,ionode=0)
+  CALL append(fidres, "/profiler/Tc_process",      tc_process,ionode=0)
+  CALL append(fidres, "/profiler/Tc_step",            tc_step,ionode=0)
+  CALL append(fidres, "/profiler/time",                  time,ionode=0)
+  ! Processing data
+  CALL append(fidres,  "/data/var0d/time",           time,ionode=0)
+  CALL append(fidres, "/data/var0d/cstep", real(cstep,dp),ionode=0)
+  CALL getatt(fidres,      "/data/var0d/",       "frames",iframe2d)
+  iframe0d=iframe0d+1
+  CALL attach(fidres,"/data/var0d/" , "frames", iframe0d)
+  ! Ion transport data
+  IF (write_gamma) THEN
+    CALL compute_radial_ion_transport
+    CALL append(fidres, "/data/var0d/gflux_ri",gflux_ri,ionode=0)
+    CALL append(fidres, "/data/var0d/pflux_ri",pflux_ri,ionode=0)
+  ENDIF
+  IF (write_hf) THEN
+    CALL compute_radial_heatflux
+    CALL append(fidres, "/data/var0d/hflux_x",hflux_x,ionode=0)
+  ENDIF
+END SUBROUTINE diagnose_0d
+
+SUBROUTINE diagnose_3d
+
+  USE basic
+  USE futils, ONLY: append, getatt, attach, putarrnd, putarr
+  USE fields
+  USE array
+  USE grid, ONLY: ikxs,ikxe, ikys,ikye, Nkx, Nky, local_nkx, ikx, iky, ips_e, ips_i
+  USE time_integration
+  USE diagnostics_par
+  USE prec_const
+  USE processing
+  USE parallel, ONLY : gather_xyz
+  USE model, ONLY: KIN_E
+
+  IMPLICIT NONE
+
+  INTEGER        :: i_, root, world_rank, world_size
+  CHARACTER(256) :: dset_name
+  CALL append(fidres,  "/data/var3d/time",           time,ionode=0)
+  CALL append(fidres, "/data/var3d/cstep", real(cstep,dp),ionode=0)
+  CALL getatt(fidres,      "/data/var3d/",       "frames",iframe3d)
+  iframe3d=iframe3d+1
+  CALL attach(fidres,"/data/var3d/" , "frames", iframe3d)
+
+  IF (write_phi) CALL write_field3d_kykxz(phi (ikys:ikye,ikxs:ikxe,izs:ize), 'phi')
+  IF (write_phi) THEN
+    CALL gather_xyz(phi(ikys:ikye,1:Nkx,izs:ize),phi_full(1:Nky,1:Nkx,1:Nz))
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var3d/phi_gatherv", iframe3d
+    CALL putarr(fidres, dset_name, phi_full(1:Nky,1:Nkx,1:Nz), ionode=0)
+    CALL attach(fidres, dset_name, "time", time)
+  ENDIF
+
+  IF (write_Na00) THEN
+    IF(KIN_E)THEN
+    IF (ips_e .EQ. 1) &
+      Ne00(ikys:ikye,ikxs:ikxe,izs:ize) = moments_e(ips_e,1,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
+    CALL write_field3d_kykxz(Ne00(ikys:ikye,ikxs:ikxe,izs:ize), 'Ne00')
+    ENDIF
+    IF (ips_i .EQ. 1) &
+      Ni00(ikys:ikye,ikxs:ikxe,izs:ize) = moments_i(ips_i,1,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
+    CALL write_field3d_kykxz(Ni00(ikys:ikye,ikxs:ikxe,izs:ize), 'Ni00')
+
+    IF (ips_i .EQ. 1) &
+      CALL gather_xyz(Ni00(ikys:ikye,1:Nkx,izs:ize),Ni00_full(1:Nky,1:Nkx,1:Nz))
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var3d/Ni00_gatherv", iframe3d
+    CALL putarr(fidres, dset_name, Ni00_full(1:Nky,1:Nkx,1:Nz), ionode=0)
+    CALL attach(fidres, dset_name, "time", time)
+
+    ! CALL compute_Napjz_spectrum
+    ! IF(KIN_E) &
+    ! CALL write_field3d_pjz_e(Nepjz(ips_e:ipe_e,ijs_e:ije_e,izs:ize), 'Nepjz')
+    ! CALL write_field3d_pjz_i(Nipjz(ips_i:ipe_i,ijs_i:ije_i,izs:ize), 'Nipjz')
+  ENDIF
+
+  !! Fuid moments
+  IF (write_dens .OR. write_fvel .OR. write_temp) &
+  CALL compute_fluid_moments
+
+  IF (write_dens) THEN
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(dens_e(ikys:ikye,ikxs:ikxe,izs:ize), 'dens_e')
+    CALL write_field3d_kykxz(dens_i(ikys:ikye,ikxs:ikxe,izs:ize), 'dens_i')
+  ENDIF
+
+  IF (write_fvel) THEN
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(upar_e(ikys:ikye,ikxs:ikxe,izs:ize), 'upar_e')
+    CALL write_field3d_kykxz(upar_i(ikys:ikye,ikxs:ikxe,izs:ize), 'upar_i')
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(uper_e(ikys:ikye,ikxs:ikxe,izs:ize), 'uper_e')
+    CALL write_field3d_kykxz(uper_i(ikys:ikye,ikxs:ikxe,izs:ize), 'uper_i')
+  ENDIF
+
+  IF (write_temp) THEN
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(Tpar_e(ikys:ikye,ikxs:ikxe,izs:ize), 'Tpar_e')
+    CALL write_field3d_kykxz(Tpar_i(ikys:ikye,ikxs:ikxe,izs:ize), 'Tpar_i')
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(Tper_e(ikys:ikye,ikxs:ikxe,izs:ize), 'Tper_e')
+    CALL write_field3d_kykxz(Tper_i(ikys:ikye,ikxs:ikxe,izs:ize), 'Tper_i')
+    IF(KIN_E)&
+    CALL write_field3d_kykxz(temp_e(ikys:ikye,ikxs:ikxe,izs:ize), 'temp_e')
+    CALL write_field3d_kykxz(temp_i(ikys:ikye,ikxs:ikxe,izs:ize), 'temp_i')
+  ENDIF
+
+  CONTAINS
+
+  SUBROUTINE write_field3d_kykxz(field, text)
+    IMPLICIT NONE
+    COMPLEX(dp), DIMENSION(ikys:ikye,ikxs:ikxe, izs:ize), INTENT(IN) :: field
+    CHARACTER(*), INTENT(IN) :: text
+    CHARACTER(256) :: dset_name
+    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
+    IF (num_procs .EQ. 1) THEN ! no data distribution
+      CALL putarr(fidres, dset_name, field(ikys:ikye,ikxs:ikxe, izs:ize), ionode=0)
+    ELSE
+      CALL putarrnd(fidres, dset_name, field(ikys:ikye,ikxs:ikxe, izs:ize),  (/1, 1, 3/))
+    ENDIF
+    CALL attach(fidres, dset_name, "time", time)
+  END SUBROUTINE write_field3d_kykxz
+
+  SUBROUTINE write_field3d_pjz_i(field, text)
+    IMPLICIT NONE
+    REAL(dp), DIMENSION(ips_i:ipe_i,ijs_i:ije_i,izs:ize), INTENT(IN) :: field
+    CHARACTER(*), INTENT(IN) :: text
+    CHARACTER(LEN=50) :: dset_name
+    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
+    IF (num_procs .EQ. 1) THEN ! no data distribution
+      CALL putarr(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,izs:ize), ionode=0)
+    ELSE
+      CALL putarrnd(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,izs:ize),  (/1, 0, 3/))
+    ENDIF
+    CALL attach(fidres, dset_name, "time", time)
+  END SUBROUTINE write_field3d_pjz_i
+
+  SUBROUTINE write_field3d_pjz_e(field, text)
+    IMPLICIT NONE
+    REAL(dp), DIMENSION(ips_e:ipe_e,ijs_e:ije_e,izs:ize), INTENT(IN) :: field
+    CHARACTER(*), INTENT(IN) :: text
+    CHARACTER(LEN=50) :: dset_name
+    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
+    IF (num_procs .EQ. 1) THEN ! no data distribution
+      CALL putarr(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,izs:ize), ionode=0)
+    ELSE
+      CALL putarrnd(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,izs:ize),  (/1, 0, 3/))
+    ENDIF
+    CALL attach(fidres, dset_name, "time", time)
+  END SUBROUTINE write_field3d_pjz_e
+
+END SUBROUTINE diagnose_3d
+
+SUBROUTINE diagnose_5d
+
+  USE basic
+  USE futils, ONLY: append, getatt, attach, putarrnd
+  USE fields
+  USE array!, ONLY: Sepj, Sipj
+  USE grid, ONLY: ips_e,ipe_e, ips_i, ipe_i, &
+                 ijs_e,ije_e, ijs_i, ije_i, &
+                 ikxs,ikxe,ikys,ikye,izs,ize
+  USE time_integration
+  USE diagnostics_par
+  USE prec_const
+  USE model, ONLY: KIN_E
+  IMPLICIT NONE
+
+  CALL append(fidres,  "/data/var5d/time",           time,ionode=0)
+  CALL append(fidres, "/data/var5d/cstep", real(cstep,dp),ionode=0)
+  CALL getatt(fidres,      "/data/var5d/",       "frames",iframe5d)
+  iframe5d=iframe5d+1
+  CALL attach(fidres,"/data/var5d/" , "frames", iframe5d)
+
+  IF (write_Napj) THEN
+   IF(KIN_E)&
+  CALL write_field5d_e(moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel), 'moments_e')
+  CALL write_field5d_i(moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel), 'moments_i')
+  ENDIF
+
+  IF (write_Sapj) THEN
+   IF(KIN_E)&
+   CALL write_field5d_e(Sepj(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), 'Sepj')
+   CALL write_field5d_i(Sipj(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), 'Sipj')
+  ENDIF
+
+  CONTAINS
+
+  SUBROUTINE write_field5d_e(field, text)
+    USE futils, ONLY: attach, putarr, putarrnd
+    USE grid,   ONLY: ips_e,ipe_e, ijs_e,ije_e, ikxs,ikxe, ikys,ikye, izs,ize
+    USE prec_const
+    IMPLICIT NONE
+
+    COMPLEX(dp), DIMENSION(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), INTENT(IN) :: field
+    CHARACTER(*), INTENT(IN) :: text
+
+    CHARACTER(LEN=50) :: dset_name
+
+    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
+    IF (num_procs .EQ. 1) THEN
+     CALL putarr(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), ionode=0)
+    ELSE
+     CALL putarrnd(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize),  (/1,3,5/))
+    ENDIF
+    CALL attach(fidres, dset_name, 'cstep', cstep)
+    CALL attach(fidres, dset_name, 'time', time)
+    CALL attach(fidres, dset_name, 'jobnum', jobnum)
+    CALL attach(fidres, dset_name, 'dt', dt)
+    CALL attach(fidres, dset_name, 'iframe2d', iframe2d)
+    CALL attach(fidres, dset_name, 'iframe5d', iframe5d)
+
+  END SUBROUTINE write_field5d_e
+
+  SUBROUTINE write_field5d_i(field, text)
+    USE futils, ONLY: attach, putarr, putarrnd
+    USE grid, ONLY: ips_i,ipe_i, ijs_i,ije_i, ikxs,ikxe, ikys,ikye, izs,ize
+    USE prec_const
+    IMPLICIT NONE
+
+    COMPLEX(dp), DIMENSION(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), INTENT(IN) :: field
+    CHARACTER(*), INTENT(IN) :: text
+
+    CHARACTER(LEN=50) :: dset_name
+
+    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
+    IF (num_procs .EQ. 1) THEN
+      CALL putarr(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), ionode=0)
+    ELSE
+      CALL putarrnd(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize),  (/1,3,5/))
+    ENDIF
+    CALL attach(fidres, dset_name, 'cstep', cstep)
+    CALL attach(fidres, dset_name, 'time', time)
+    CALL attach(fidres, dset_name, 'jobnum', jobnum)
+    CALL attach(fidres, dset_name, 'dt', dt)
+    CALL attach(fidres, dset_name, 'iframe2d', iframe2d)
+    CALL attach(fidres, dset_name, 'iframe5d', iframe5d)
+
+  END SUBROUTINE write_field5d_i
+END SUBROUTINE diagnose_5d
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index 139cd0ab..62154f9d 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -58,8 +58,8 @@ MODULE grid
   INTEGER,             PUBLIC :: local_np_e, local_np_i
   INTEGER,             PUBLIC :: total_np_e, total_np_i
   integer(C_INTPTR_T), PUBLIC :: local_np_e_offset, local_np_i_offset
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: counts_np_e, counts_np_i
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: displs_np_e, displs_np_i
+  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: rcv_p_e, rcv_p_i
+  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: dsp_p_e, dsp_p_i
   ! "" for z
   INTEGER,             PUBLIC :: local_nz
   INTEGER,             PUBLIC :: total_nz
@@ -179,17 +179,17 @@ CONTAINS
     ipgs_e = ips_e - 2/deltape; ipge_e = ipe_e + 2/deltape;
     ipgs_i = ips_i - 2/deltapi; ipge_i = ipe_i + 2/deltapi;
     ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
-    ALLOCATE(counts_np_e (1:num_procs_p))
-    ALLOCATE(counts_np_i (1:num_procs_p))
-    ALLOCATE(displs_np_e (1:num_procs_p))
-    ALLOCATE(displs_np_i (1:num_procs_p))
+    ALLOCATE(rcv_p_e (1:num_procs_p))
+    ALLOCATE(rcv_p_i (1:num_procs_p))
+    ALLOCATE(dsp_p_e (1:num_procs_p))
+    ALLOCATE(dsp_p_i (1:num_procs_p))
     DO in = 0,num_procs_p-1
       CALL decomp1D(total_np_e, num_procs_p, in, istart, iend)
-      counts_np_e(in+1) = iend-istart+1
-      displs_np_e(in+1) = istart-1
+      rcv_p_e(in+1) = iend-istart+1
+      dsp_p_e(in+1) = istart-1
       CALL decomp1D(total_np_i, num_procs_p, in, istart, iend)
-      counts_np_i(in+1) = iend-istart+1
-      displs_np_i(in+1) = istart-1
+      rcv_p_i(in+1) = iend-istart+1
+      dsp_p_i(in+1) = istart-1
     ENDDO
 
     ! local grid computation
diff --git a/src/memory.F90 b/src/memory.F90
index 6fbaafde..3c313f53 100644
--- a/src/memory.F90
+++ b/src/memory.F90
@@ -58,6 +58,7 @@ SUBROUTINE memory
 
   !Ions arrays
   CALL allocate_array(             Ni00, ikys,ikye, ikxs,ikxe, izs,ize)
+  CALL allocate_array(        Ni00_full,     1,Nky,     1,Nkx,    1,Nz)
   CALL allocate_array(           dens_i, ikys,ikye, ikxs,ikxe, izs,ize)
   CALL allocate_array(           upar_i, ikys,ikye, ikxs,ikxe, izs,ize)
   CALL allocate_array(           uper_i, ikys,ikye, ikxs,ikxe, izs,ize)
@@ -125,4 +126,7 @@ SUBROUTINE memory
       ENDIF
       CALL allocate_array(  Ciipj, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxi+1)*(jmaxi+1), 1,1, 1,1, 1,1)
  ENDIF
+
+ !____________DIAGNOSTIC PURPOSE ONLY ARRAYS_______________
+
 END SUBROUTINE memory
diff --git a/src/parallel_mod.F90 b/src/parallel_mod.F90
index 1598bf33..52233774 100644
--- a/src/parallel_mod.F90
+++ b/src/parallel_mod.F90
@@ -1,18 +1,25 @@
 MODULE parallel
   USE basic
-  USE grid, ONLY: Nkx,Nky,Nz, ikys,ikye, izs,ize, local_nky, local_nz
+  USE grid, ONLY: Nkx,Nky,Nz, ikys,ikye, izs,ize, local_nky, local_nz, &
+                  local_np_e, local_np_i, total_np_e, total_np_i, &
+                  pmaxi, pmaxe, ips_e, ipe_e, ips_i, ipe_i, &
+                  jmaxi, jmaxe, ijs_e, ije_e, ijs_i, ije_i, &
+                  rcv_p_e, rcv_p_i, dsp_p_e, dsp_p_i
   use prec_const
   IMPLICIT NONE
   ! recieve and displacement counts for gatherv
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_z, rcv_y, dsp_z, dsp_y
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_y, dsp_y, rcv_zy, dsp_zy
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp_e, dsp_zp_e
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp_i, dsp_zp_i
 
   PUBLIC :: manual_0D_bcast, manual_3D_bcast, init_parallel_var, &
-            gather_xyz!, gather_pjz, gather_pjxyz
+            gather_xyz, gather_pjz_e, gather_pjz_i!, gather_pjxyz
 
 CONTAINS
 
   SUBROUTINE init_parallel_var
     INTEGER :: i_
+    !!!!!! XYZ gather variables
     !! Y reduction at constant x and z
     ! number of points recieved and displacement for the y reduction
     ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
@@ -23,18 +30,44 @@ CONTAINS
     DO i_=1,num_procs_ky-1
        dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
     END DO
-    print*, rcv_y, dsp_y
     !! Z reduction for full slices of y data but constant x
     ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_z(0:num_procs_z-1),dsp_z(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Nky,1,MPI_INTEGER,rcv_z,1,MPI_INTEGER,comm_z,ierr)
+    CALL MPI_ALLGATHER(local_nz*Nky,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
     ! the displacement array can be build from each local_np as
-    dsp_z(0)=0
+    dsp_zy(0)=0
     DO i_=1,num_procs_z-1
-       dsp_z(i_) =dsp_z(i_-1) + rcv_z(i_-1)
+       dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
     END DO
-    print*, rcv_z, dsp_z
+    print*, rcv_zy, dsp_zy
+
+    !!!!! PJZ gather variables
+    ! ELECTRONS
+    ! P variables are already defined in grid module (see rcv_p_a, dsp_p_a)
+    !! Z reduction for full slices of p data but constant j
+    ! number of points recieved and displacement for the z reduction
+    ALLOCATE(rcv_zp_e(0:num_procs_z-1),dsp_zp_e(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! all processes share their local number of points
+    CALL MPI_ALLGATHER(local_nz*total_np_e,1,MPI_INTEGER,rcv_zp_e,1,MPI_INTEGER,comm_z,ierr)
+    ! the displacement array can be build from each local_np as
+    dsp_zp_e(0)=0
+    DO i_=1,num_procs_z-1
+       dsp_zp_e(i_) =dsp_zp_e(i_-1) + dsp_zp_e(i_-1)
+    END DO
+    ! IONS
+    ! P variables are already defined in grid module (see rcv_p_a, dsp_p_a)
+    !! Z reduction for full slices of p data but constant j
+    ! number of points recieved and displacement for the z reduction
+    ALLOCATE(rcv_zp_i(0:num_procs_z-1),dsp_zp_i(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! all processes share their local number of points
+    CALL MPI_ALLGATHER(local_nz*total_np_i,1,MPI_INTEGER,rcv_zp_i,1,MPI_INTEGER,comm_z,ierr)
+    ! the displacement array can be build from each local_np as
+    dsp_zp_i(0)=0
+    DO i_=1,num_procs_z-1
+       dsp_zp_i(i_) =dsp_zp_i(i_-1) + dsp_zp_i(i_-1)
+    END DO
+
   END SUBROUTINE init_parallel_var
 
   !!!!! Gather a field in spatial coordinates on rank 0 !!!!!
@@ -45,26 +78,28 @@ CONTAINS
     COMPLEX(dp), DIMENSION(   1:Nky )           :: buffer_fy_cz !full  y, constant z
     COMPLEX(dp), DIMENSION(   1:Nky,  izs:ize ) :: buffer_fy_lz !full  y, local z
     COMPLEX(dp), DIMENSION(   1:Nky,    1:Nz  ) :: buffer_fy_fz !full  y, full  z
-    INTEGER :: send_count, root_p, root_z, root_ky, ix, iz
+    INTEGER :: snd_y, snd_z, root_p, root_z, root_ky, ix, iz
+
+    snd_y  = local_nky    ! Number of points to send along y (per z)
+    snd_z  = Nky*local_nz ! Number of points to send along z (full y)
 
     root_p = 0; root_z = 0; root_ky = 0
     IF(rank_p .EQ. root_p) THEN
       DO ix = 1,Nkx
         DO iz = izs,ize
           ! fill a buffer to contain a slice of data at constant kx and z
-          print*,iz
           buffer_ly_cz(ikys:ikye) = field_sub(ikys:ikye,ix,iz)
-          CALL MPI_GATHERV(buffer_ly_cz,local_nky,MPI_DOUBLE_COMPLEX, &
+          CALL MPI_GATHERV(buffer_ly_cz, snd_y,        MPI_DOUBLE_COMPLEX, &
                            buffer_fy_cz, rcv_y, dsp_y, MPI_DOUBLE_COMPLEX, &
-                           root_ky, comm_ky)
+                           root_ky, comm_ky, ierr)
           buffer_fy_lz(1:Nky,iz) = buffer_fy_cz(1:Nky)
         ENDDO
 
-        ! send the full line on y contained by root_ky
+        ! send the full line on y contained by root_kyas
         IF(rank_ky .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fy_lz,Nky*local_nz,MPI_DOUBLE_COMPLEX, &
-                           buffer_fy_fz, rcv_z, dsp_z, MPI_DOUBLE_COMPLEX, &
-                           root_z, comm_z)
+          CALL MPI_GATHERV(buffer_fy_lz, snd_z,        MPI_DOUBLE_COMPLEX, &
+                           buffer_fy_fz, rcv_zy, dsp_zy, MPI_DOUBLE_COMPLEX, &
+                           root_z, comm_z, ierr)
         ENDIF
         ! ID 0 (the one who output) rebuild the whole array
         IF(my_id .EQ. 0) &
@@ -74,6 +109,81 @@ CONTAINS
   END SUBROUTINE gather_xyz
 
   !!!!! Gather a field in kinetic + z coordinates on rank 0 !!!!!
+  SUBROUTINE gather_pjz_e(field_sub,field_full)
+    COMPLEX(dp), DIMENSION(ips_e:ipe_e, ijs_e:ije_e, izs:ize), INTENT(IN)    :: field_sub
+    COMPLEX(dp), DIMENSION(   1:pmaxe+1,  1:jmaxe+1,   1:Nz),  INTENT(INOUT) :: field_full
+    COMPLEX(dp), DIMENSION(ips_e:ipe_e)         :: buffer_lp_cz !local p, constant z
+    COMPLEX(dp), DIMENSION(   1:pmaxe+1 )       :: buffer_fp_cz !full  p, constant z
+    COMPLEX(dp), DIMENSION(   1:pmaxe+1,  izs:ize ) :: buffer_fp_lz !full  p, local z
+    COMPLEX(dp), DIMENSION(   1:pmaxe+1,    1:Nz  ) :: buffer_fp_fz !full  p, full  z
+    INTEGER :: snd_p, snd_z, root_p, root_z, root_ky, ij, iz, Npe
+
+    Npe    = pmaxe+1      ! total number of hermite moments
+    snd_p  = local_np_e   ! Number of points to send along y (per z)
+    snd_z  = Npe*local_nz ! Number of points to send along z (full y)
+
+    root_p = 0; root_z = 0; root_ky = 0
+    IF(rank_ky .EQ. root_ky) THEN
+      DO ij = 1,jmaxe+1
+        DO iz = izs,ize
+          ! fill a buffer to contain a slice of data at constant kx and z
+          buffer_lp_cz(ips_e:ipe_e) = field_sub(ips_e:ipe_e,ij,iz)
+          CALL MPI_GATHERV(buffer_lp_cz, snd_p,           MPI_DOUBLE_COMPLEX, &
+                           buffer_fp_cz, rcv_p_e, dsp_p_e, MPI_DOUBLE_COMPLEX, &
+                           root_p, comm_p, ierr)
+          buffer_fp_lz(1:Npe,iz) = buffer_fp_cz(1:Npe)
+        ENDDO
+
+        ! send the full line on y contained by root_kyas
+        IF(rank_p .EQ. 0) THEN
+          CALL MPI_GATHERV(buffer_fp_lz, snd_z,              MPI_DOUBLE_COMPLEX, &
+                           buffer_fp_fz, rcv_zp_e, dsp_zp_e, MPI_DOUBLE_COMPLEX, &
+                           root_z, comm_z, ierr)
+        ENDIF
+        ! ID 0 (the one who output) rebuild the whole array
+        IF(my_id .EQ. 0) &
+          field_full(1:Npe,ij,1:Nz) = buffer_fp_fz(1:Npe,1:Nz)
+      ENDDO
+    ENDIF
+  END SUBROUTINE gather_pjz_e
+
+  SUBROUTINE gather_pjz_i(field_sub,field_full)
+    COMPLEX(dp), DIMENSION(ips_i:ipe_i, ijs_i:ije_i, izs:ize), INTENT(IN)    :: field_sub
+    COMPLEX(dp), DIMENSION(   1:pmaxi+1,  1:jmaxi+1,   1:Nz),  INTENT(INOUT) :: field_full
+    COMPLEX(dp), DIMENSION(ips_i:ipe_i)         :: buffer_lp_cz !local p, constant z
+    COMPLEX(dp), DIMENSION(   1:pmaxi+1 )       :: buffer_fp_cz !full  p, constant z
+    COMPLEX(dp), DIMENSION(   1:pmaxi+1,  izs:ize ) :: buffer_fp_lz !full  p, local z
+    COMPLEX(dp), DIMENSION(   1:pmaxi+1,    1:Nz  ) :: buffer_fp_fz !full  p, full  z
+    INTEGER :: snd_p, snd_z, root_p, root_z, root_ky, ij, iz, Npi
+
+    Npi    = pmaxi+1      ! total number of hermite moments
+    snd_p  = local_np_i   ! Number of points to send along y (per z)
+    snd_z  = Npi*local_nz ! Number of points to send along z (full y)
+
+    root_p = 0; root_z = 0; root_ky = 0
+    IF(rank_ky .EQ. root_ky) THEN
+      DO ij = 1,jmaxi+1
+        DO iz = izs,ize
+          ! fill a buffer to contain a slice of data at constant kx and z
+          buffer_lp_cz(ips_i:ipe_i) = field_sub(ips_i:ipe_i,ij,iz)
+          CALL MPI_GATHERV(buffer_lp_cz, snd_p,            MPI_DOUBLE_COMPLEX, &
+                           buffer_fp_cz, rcv_p_i, dsp_p_i, MPI_DOUBLE_COMPLEX, &
+                           root_p, comm_p, ierr)
+          buffer_fp_lz(1:Npi,iz) = buffer_fp_cz(1:Npi)
+        ENDDO
+
+        ! send the full line on y contained by root_kyas
+        IF(rank_p .EQ. 0) THEN
+          CALL MPI_GATHERV(buffer_fp_lz, snd_z,              MPI_DOUBLE_COMPLEX, &
+                           buffer_fp_fz, rcv_zp_i, dsp_zp_i, MPI_DOUBLE_COMPLEX, &
+                           root_z, comm_z, ierr)
+        ENDIF
+        ! ID 0 (the one who output) rebuild the whole array
+        IF(my_id .EQ. 0) &
+          field_full(1:Npi,ij,1:Nz) = buffer_fp_fz(1:Npi,1:Nz)
+      ENDDO
+    ENDIF
+  END SUBROUTINE gather_pjz_i
 
   !!!!! Gather a field in kinetic + spatial coordinates on rank 0 !!!!!
 
diff --git a/src/ppinit.F90 b/src/ppinit.F90
index f103cdd9..1863dcf9 100644
--- a/src/ppinit.F90
+++ b/src/ppinit.F90
@@ -50,7 +50,7 @@ SUBROUTINE ppinit
   periods(3)=.TRUE.
 
   CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm0, ierr)
-  CALL MPI_COMM_GROUP(comm0,group0)
+  CALL MPI_COMM_GROUP(comm0,group0, ierr)
   CALL MPI_COMM_RANK(comm0, rank_0,  ierr)
   CALL MPI_CART_COORDS(comm0,rank_0,ndims,coords,ierr)
 
@@ -81,12 +81,12 @@ SUBROUTINE ppinit
   CALL MPI_CART_SHIFT(comm0, 2, 1, nbr_D, nbr_U, ierr) !down   up    neighbours
 
   ! Create the communicator for groups used in gatherv
-  CALL MPI_COMM_GROUP(comm0,group_ky0)
-  ALLOCATE(rank2include(0:num_procs_ky))
-  DO r_ = 0,rank_0
-    IF(rank_y .EQ. 0) &
-      rank2exclude
-  ENDDO
-  CALL MPI_COMM_CREATE_GROUPE(comm0, group_p0, comm_ky0)
+  ! CALL MPI_COMM_GROUP(comm0,group_ky0)
+  ! ALLOCATE(rank2include(0:num_procs_ky))
+  ! DO r_ = 0,rank_0
+  !   IF(rank_y .EQ. 0) &
+  !     rank2exclude
+  ! ENDDO
+  ! CALL MPI_COMM_CREATE_GROUPE(comm0, group_p0, comm_ky0)
 
 END SUBROUTINE ppinit
diff --git a/wk/analysis_3D.m b/wk/analysis_3D.m
index 84d0bd3a..248577ed 100644
--- a/wk/analysis_3D.m
+++ b/wk/analysis_3D.m
@@ -53,7 +53,7 @@ options.PLAN      = 'xy';
 % options.PLAN      = 'sx';
 options.COMP      = 9;
 % options.TIME      = dat.Ts5D;
-options.TIME      = 920:1:1250;
+options.TIME      = 920:1:1100;
 data.EPS          = 0.1;
 data.a = data.EPS * 2000;
 create_film(data,options,'.gif')
@@ -71,11 +71,11 @@ options.NAME      = 'N_i^{00}';
 % options.NAME      = 'T_i';
 % options.NAME      = '\Gamma_x';
 % options.NAME      = 'k^2n_e';
-options.PLAN      = 'xy';
+options.PLAN      = 'xz';
 % options.NAME      = 'f_i';
 % options.PLAN      = 'sx';
-options.COMP      = 'avg';
-options.TIME      = [900 923 927 990];
+options.COMP      = 9;
+options.TIME      = [900 1000 1100];
 data.a = data.EPS * 2e3;
 fig = photomaton(data,options);
 save_figure(data,fig)
diff --git a/wk/analysis_header.m b/wk/analysis_header.m
index 28d53d9f..4be2eb2f 100644
--- a/wk/analysis_header.m
+++ b/wk/analysis_header.m
@@ -5,9 +5,9 @@ helazdir = '/home/ahoffman/HeLaZ/';
 outfile ='';
 outfile ='';
 outfile ='';
-% outfile ='shearless_cyclone/128x128x16xdmax_6_L_120_CBC_1.0';
+outfile ='shearless_cyclone/128x128x16xdmax_6_L_120_CBC_1.0';
 % outfile ='shearless_cyclone/128x128x16xdmax_L_120_CBC_1.0';
-outfile ='shearless_cyclone/linear_CBC_100_D_16';
+% outfile ='shearless_cyclone/linear_CBC_100_D_16';
 % outfile ='quick_run/CLOS_1_64x64_5x3_L_120_kN_2.0_kT_0.5_nu_1e-01_SGGK';
 % outfile ='pedestal/64x64x16x2x1_L_300_LnT_20_nu_0.1';
 % outfile ='quick_run/32x32x16_5x3_L_300_q0_2.5_e_0.18_kN_20_kT_20_nu_1e-01_DGGK';
diff --git a/wk/debug_script.m b/wk/debug_script.m
new file mode 100644
index 00000000..3954d020
--- /dev/null
+++ b/wk/debug_script.m
@@ -0,0 +1,18 @@
+system('cd ../results/debug/test_diag; mpirun -np 8 ./helaz3_dbg 2 2 2; cd $HOME/HeLaZ/wk');
+
+
+filename = '../results/debug/test_diag/outputs_00.h5';
+
+f_        = h5read(filename,'/data/var3d/phi/000004/');
+f_gatherv = h5read(filename,'/data/var3d/phi_gatherv/000004/');
+f_        = f_.real        + 1i*f_.imaginary;
+f_gatherv = f_gatherv.real + 1i*f_gatherv.imaginary;
+err = sum(sum(sum(abs(f_-f_gatherv))))/sum(sum(sum(abs(f_))));
+disp(['error_phi = ',sprintf('%2.2e',err)]);    
+
+f_        = h5read(filename,'/data/var3d/Ni00/000004/');
+f_gatherv = h5read(filename,'/data/var3d/Ni00_gatherv/000004/');
+f_        = f_.real        + 1i*f_.imaginary;
+f_gatherv = f_gatherv.real + 1i*f_gatherv.imaginary;
+err = sum(sum(sum(abs(f_-f_gatherv))))/sum(sum(sum(abs(f_))));
+disp(['error_Ni00 = ',sprintf('%2.2e',err)]);    
-- 
GitLab