From c636d794bde18b6c5f8716779aba81d880009b2f Mon Sep 17 00:00:00 2001
From: Antoine Cyril David Hoffmann <ahoffman@spcpc606.epfl.ch>
Date: Fri, 8 Apr 2022 13:16:27 +0200
Subject: [PATCH] Working 3D parallel version

---
 src/auxval.F90             |   9 +-
 src/basic_mod.F90          |   7 +-
 src/calculus_mod.F90       | 131 +++++++++++++++--------------
 src/closure_mod.F90        |   2 +-
 src/control.F90            |   1 -
 src/diagnose.F90           |  36 ++++----
 src/geometry_mod.F90       |   8 +-
 src/ghosts_mod.F90         | 163 ++++++++++++++++++++-----------------
 src/grid_mod.F90           |  17 ++--
 src/inital.F90             |   8 +-
 src/memory.F90             |   2 +-
 src/moments_eq_rhs_mod.F90 | 120 +++++++++++++--------------
 src/nonlinear_mod.F90      |   4 +-
 src/numerics_mod.F90       |  15 ++--
 src/poisson.F90            |  34 ++++----
 src/ppexit.F90             |   4 +-
 src/ppinit.F90             |  13 +--
 src/processing_mod.F90     |  64 +++++++--------
 src/srcinfo.h              |   6 +-
 src/srcinfo/srcinfo.h      |   6 +-
 src/stepon.F90             |  48 ++++++-----
 src/utility_mod.F90        |  34 +++++++-
 wk/analysis_3D.m           |   2 +-
 wk/analysis_header.m       |   2 -
 24 files changed, 395 insertions(+), 341 deletions(-)

diff --git a/src/auxval.F90 b/src/auxval.F90
index e826615f..cebb78c0 100644
--- a/src/auxval.F90
+++ b/src/auxval.F90
@@ -51,10 +51,11 @@ subroutine auxval
     IF (my_id .EQ. i_) THEN
       IF (my_id .EQ. 0) WRITE(*,*) ''
       IF (my_id .EQ. 0) WRITE(*,*) '--------- Parallel environement ----------'
-      IF (my_id .EQ. 0) WRITE(*,'(A9,I3,A10,I3,A10,I3)') 'n_procs= ', num_procs, ', num_procs_p   = ', num_procs_p, ', num_procs_kx   = ', num_procs_kx
+      IF (my_id .EQ. 0) WRITE(*,'(A12,I3)') 'n_procs ', num_procs
+      IF (my_id .EQ. 0) WRITE(*,'(A12,I3,A14,I3,A14,I3)') 'num_procs_p   = ', num_procs_p, ', num_procs_kx   = ', num_procs_kx, ', num_procs_z   = ', num_procs_z
       IF (my_id .EQ. 0) WRITE(*,*) ''
-      WRITE(*,'(A9,I3,A10,I3,A10,I3)')&
-       'my_id  = ', my_id, ', rank_p  = ', rank_p, ', rank_kx  = ', rank_kx
+      WRITE(*,'(A9,I3,A10,I3,A10,I3,A9,I3)')&
+       'my_id  = ', my_id, ', rank_p  = ', rank_p, ', rank_kx  = ', rank_kx,', rank_z  = ', rank_z
        WRITE(*,'(A22,I3,A11,I3)')&
        '              ips_e = ', ips_e, ', ipe_e  = ', ipe_e
        WRITE(*,'(A22,I3,A11,I3)')&
@@ -69,8 +70,6 @@ subroutine auxval
        '              ikys  = ', ikys , ', ikye   = ', ikye
        WRITE(*,'(A22,I3,A11,I3)')&
        '              izs   = ', izs  , ', ize    = ', ize
-       ! write(*,*) 'local kx =', kxarray
-       ! write(*,*) 'local ky =', kyarray
       IF (my_id .NE. num_procs-1) WRITE (*,*) ''
       IF (my_id .EQ. num_procs-1) WRITE(*,*) '------------------------------------------'
     ENDIF
diff --git a/src/basic_mod.F90 b/src/basic_mod.F90
index 75e2a6d4..b22a5f23 100644
--- a/src/basic_mod.F90
+++ b/src/basic_mod.F90
@@ -12,7 +12,10 @@ MODULE basic
   real(dp) :: time   = 0           ! Current simulation time (Init from restart file)
 
   INTEGER :: comm0                 ! Default communicator with a topology
-  INTEGER :: comm_p, comm_kx, comm_z! Communicators for 1-dim cartesian subgrids of comm0
+  INTEGER :: rank_0                ! Ranks in comm0
+  ! Communicators for 1-dim cartesian subgrids of comm0
+  INTEGER :: comm_p, comm_kx, comm_z
+  INTEGER :: rank_p, rank_kx, rank_z! Ranks
   INTEGER :: commr_p0              ! Communicators along kx for only rank 0 on p
 
   INTEGER :: jobnum  = 0           ! Job number
@@ -27,8 +30,6 @@ MODULE basic
   INTEGER :: num_procs_p           ! Number of processes in p
   INTEGER :: num_procs_kx          ! Number of processes in r
   INTEGER :: num_procs_z           ! Number of processes in z
-  INTEGER :: rank_0                ! Ranks in comm0
-  INTEGER :: rank_p, rank_kx, rank_z! Ranks in comm_p, comm_kx, comm_z
   INTEGER :: nbr_L, nbr_R          ! Left and right neighbours (along p)
   INTEGER :: nbr_T, nbr_B          ! Top and bottom neighbours (along kx)
   INTEGER :: nbr_U, nbr_D          ! Upstream and downstream neighbours (along z)
diff --git a/src/calculus_mod.F90 b/src/calculus_mod.F90
index c06c9ca4..76e22a86 100644
--- a/src/calculus_mod.F90
+++ b/src/calculus_mod.F90
@@ -3,18 +3,20 @@ MODULE calculus
   USE basic
   USE prec_const
   USE grid
+  USE utility
   IMPLICIT NONE
   REAL(dp), dimension(-2:2) :: dz_usu = &
    (/  onetwelfth, -twothird, &
                        0._dp, &
          twothird, -onetwelfth /) ! fd4 centered stencil
-  REAL(dp), dimension(-2:2) :: dz4_usu = &
-  (/  1._dp, -4._dp, 6._dp, -4._dp, 1._dp /)* 0.0625 ! 4th derivative, 2nd order (for parallel hypdiff)
   REAL(dp), dimension(-2:1) :: dz_o2e = &
    (/ onetwentyfourth,-nineeighths, nineeighths,-onetwentyfourth /) ! fd4 odd to even stencil
   REAL(dp), dimension(-1:2) :: dz_e2o = &
    (/ onetwentyfourth,-nineeighths, nineeighths,-onetwentyfourth /) ! fd4 odd to even stencil
-
+   REAL(dp), dimension(-2:2) :: dz2_usu = &
+   (/-1.0_dp/12.0_dp, 4.0_dp/3.0_dp, -5.0_dp/2.0_dp, 4.0_dp/3.0_dp, -1.0_dp/12.0_dp /)! 2th derivative, 4th order (for parallel hypdiff)
+   REAL(dp), dimension(-2:2) :: dz4_usu = &
+   (/  1._dp, -4._dp, 6._dp, -4._dp, 1._dp /)* 0.0625 ! 4th derivative, 2nd order (for parallel hypdiff)
   PUBLIC :: simpson_rule_z, interp_z, grad_z, grad_z4
 
 CONTAINS
@@ -26,8 +28,8 @@ SUBROUTINE grad_z(target,f,ddzf)
   ! not staggered : the derivative results must be on the same grid as the field
   !     staggered : the derivative is computed from a grid to the other
   INTEGER, INTENT(IN) :: target
-  COMPLEX(dp),dimension( izgs:izge ), intent(in)  :: f
-  COMPLEX(dp),dimension(  izs:ize ), intent(out) :: ddzf
+  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f
+  COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddzf
   IF(Nz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
     IF(SG) THEN
       IF(TARGET .EQ. 0) THEN
@@ -40,6 +42,7 @@ SUBROUTINE grad_z(target,f,ddzf)
        ddzf(iz) = dz_usu(-2)*f(iz-2) + dz_usu(-1)*f(iz-1) &
                  +dz_usu( 1)*f(iz+1) + dz_usu( 2)*f(iz+2)
       ENDDO
+      ddzf(:) = ddzf(:) * inv_deltaz
     ENDIF
   ELSE
     ddzf = 0._dp
@@ -52,7 +55,7 @@ CONTAINS
     COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddzfe !
     DO iz = izs,ize
      ddzfe(iz) = dz_o2e(-2)*fo(iz-2) + dz_o2e(-1)*fo(iz-1) &
-               +dz_o2e( 0)*fo(iz  ) + dz_o2e( 1)*fo(iz+1)
+                +dz_o2e( 0)*fo(iz  ) + dz_o2e( 1)*fo(iz+1)
     ENDDO
     ddzfe(:) = ddzfe(:) * inv_deltaz
   END SUBROUTINE grad_z_o2e
@@ -70,6 +73,24 @@ CONTAINS
   END SUBROUTINE grad_z_e2o
 END SUBROUTINE grad_z
 
+SUBROUTINE grad_z2(f,ddz2f)
+  implicit none
+  ! Compute the second order fourth derivative for periodic boundary condition
+  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f
+  COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddz2f
+  IF(Nz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+      DO iz = izs,ize
+       ddz2f(iz) = dz2_usu(-2)*f(iz-2) + dz2_usu(-1)*f(iz-1) &
+                  +dz2_usu( 0)*f(iz  )&
+                  +dz2_usu( 1)*f(iz+1) + dz2_usu( 2)*f(iz+2)
+      ENDDO
+  ELSE
+    ddz2f = 0._dp
+  ENDIF
+  ddz2f = ddz2f * inv_deltaz**2
+END SUBROUTINE grad_z2
+
+
 SUBROUTINE grad_z4(f,ddz4f)
   implicit none
   ! Compute the second order fourth derivative for periodic boundary condition
@@ -102,7 +123,7 @@ SUBROUTINE interp_z(target,f_in,f_out)
       CALL interp_e2o_z(f_in,f_out)
     ENDIF
   ELSE ! No staggered grid -> identity
-    f_out(:) = f_in(:)
+    f_out(izs:ize) = f_in(izs:ize)
   ENDIF
 CONTAINS
   SUBROUTINE interp_o2e_z(fo,fe)
@@ -122,7 +143,7 @@ CONTAINS
    COMPLEX(dp),dimension(izgs:izge), intent(in)  :: fe
    COMPLEX(dp),dimension( izs:ize ), intent(out) :: fo
    ! 4th order interp
-   DO iz = 1,Nz
+   DO iz = izs,ize
     fo(iz) = onesixteenth * (-fe(iz-1) + 9._dp*(fe(iz) + fe(iz+1)) - fe(iz+2))
    ENDDO
   END SUBROUTINE interp_e2o_z
@@ -133,70 +154,46 @@ SUBROUTINE simpson_rule_z(f,intf)
  ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
  !from molix BJ Frei
  implicit none
- complex(dp),dimension(izgs:izge), intent(in) :: f
+ complex(dp),dimension(izs:ize), intent(in) :: f
  COMPLEX(dp), intent(out) :: intf
- COMPLEX(dp) :: buff_
- IF(Nz .GT. 1) THEN
+ COMPLEX(dp)              :: buffer, local_int
+ INTEGER :: root, i_
+
+ IF(Nz .EQ. 1) THEN !2D zpinch simulations
+   intf = f(izs)
+
+ ELSE !3D fluxtube
    IF(mod(Nz,2) .ne. 0 ) THEN
       ERROR STOP 'Simpson rule: Nz must be an even number  !!!!'
    ENDIF
-   buff_ = 0._dp
-   DO iz = izs, Nz/2
-      IF(iz .eq. Nz/2) THEN ! ... iz = ize
-         buff_ = buff_ + (f(izs) + 4._dp*f(ize) + f(ize-1 ))
-      ELSE
-         buff_ = buff_ + (f(2*iz+1) + 4._dp*f(2*iz) + f(2*iz-1 ))
-      ENDIF
+   ! Buil local sum using the weights of composite Simpson's rule
+   local_int = 0._dp
+   DO iz = izs,ize
+      local_int = local_int + zweights_SR(iz)*f(iz)
    ENDDO
-   intf = buff_*deltaz/3._dp
- ELSE
-   intf = f(izs)
- ENDIF
-END SUBROUTINE simpson_rule_z
+   buffer = local_int
+   root = 0
+   !Gather manually among the rank_z=0 processes and perform the sum
+   intf = 0._dp
+   IF (num_procs_z .GT. 1) THEN
+       !! Everyone sends its local_sum to root = 0
+       IF (rank_z .NE. root) THEN
+           CALL MPI_SEND(buffer, 1 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
+       ELSE
+           ! Recieve from all the other processes
+           DO i_ = 0,num_procs_z-1
+               IF (i_ .NE. rank_z) &
+                   CALL MPI_RECV(buffer, 1 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
+                   intf = intf + buffer
+           ENDDO
+       ENDIF
+       CALL manual_0D_bcast(intf)
+   ELSE
+     intf = local_int
+   ENDIF
+   intf = onethird*deltaz*intf
+   ENDIF
 
-! SUBROUTINE simpson_rule_z(f,intf)
-!  ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
-!  !from molix BJ Frei
-!  implicit none
-!  complex(dp),dimension(izs:ize), intent(in) :: f
-!  COMPLEX(dp), intent(out) :: intf
-!  COMPLEX(dp)              :: buffer(1:2), local_int
-!  INTEGER :: root, i_
-!
-!  IF(Nz .EQ. 1) THEN !2D zpinch simulations
-!    intf = f(izs)
-!
-!  ELSE !3D fluxtube
-!    IF(mod(Nz,2) .ne. 0 ) THEN
-!       ERROR STOP 'Simpson rule: Nz must be an even number  !!!!'
-!    ENDIF
-!    ! Buil local sum using the weights of composite Simpson's rule
-!    local_int = 0._dp
-!    DO iz = izs,ize
-!       local_int = local_int + zweights_SR(iz)*f(iz)
-!    ENDDO
-!
-!    buffer(2) = local_int
-!    root = 0
-!    !Gather manually among the rank_z=0 processes and perform the sum
-!    intf = 0._dp
-!    IF (num_procs_z .GT. 1) THEN
-!        !! Everyone sends its local_sum to root = 0
-!        IF (rank_z .NE. root) THEN
-!            CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
-!        ELSE
-!            ! Recieve from all the other processes
-!            DO i_ = 0,num_procs_z-1
-!                IF (i_ .NE. rank_kx) &
-!                    CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
-!                    intf = intf + buffer(2)
-!            ENDDO
-!        ENDIF
-!    ELSE
-!      intf = local_int
-!    ENDIF
-!    intf = onethird*deltaz*intf
-!   ENDIF
-! END SUBROUTINE simpson_rule_z
+END SUBROUTINE simpson_rule_z
 
 END MODULE calculus
diff --git a/src/closure_mod.F90 b/src/closure_mod.F90
index 40be81ee..e0e66585 100644
--- a/src/closure_mod.F90
+++ b/src/closure_mod.F90
@@ -132,7 +132,7 @@ SUBROUTINE ghosts_lower_truncation
   IF(ijs_i .EQ. 1) THEN
     ! applies only for the process that has lowest j index
     DO ip = ipgs_i,ipge_i
-      moments_i(ip,ije_i-1,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel) = 0._dp
+      moments_i(ip,ijs_i-1,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel) = 0._dp
     ENDDO
   ENDIF
   ! applies only for the process that has lowest p index
diff --git a/src/control.F90 b/src/control.F90
index a757a0c8..5c2571da 100644
--- a/src/control.F90
+++ b/src/control.F90
@@ -57,7 +57,6 @@ SUBROUTINE control
 
      step  = step  + 1
      cstep = cstep + 1
-
      CALL stepon
 
      time  = time  + dt
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index 84eb57d3..f3efd5ff 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -73,21 +73,21 @@ SUBROUTINE diagnose(kstep)
 
      ! Metric info
      CALL   creatg(fidres, "/data/metric", "Metric data")
-     CALL putarrnd(fidres, "/data/metric/gxx",            gxx(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gxy",            gxy(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gyy",            gyy(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gyz",            gyz(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gzz",            gzz(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/hatR",          hatR(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/hatZ",          hatZ(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/hatB",          hatB(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gradxB",      gradxB(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gradyB",      gradyB(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gradzB",      gradzB(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff(izs:ize,0:1), (/1, 1/))
-     CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(ikxs:ikxe,ikys:ikye,izs:ize,0:1), (/1, 3/))
-     CALL putarrnd(fidres, "/data/metric/kernel_i",    kernel_i(ijs_i:ije_i,ikxs:ikxe,ikys:ikye,izs:ize,0:1), (/1, 3/))
+     CALL putarrnd(fidres, "/data/metric/gxx",            gxx(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gxy",            gxy(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gyy",            gyy(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gyz",            gyz(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gzz",            gzz(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/hatR",          hatR(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/hatZ",          hatZ(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/hatB",          hatB(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gradxB",      gradxB(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gradyB",      gradyB(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gradzB",      gradzB(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff(izs:ize,0:1), (/1, 1, 1/))
+     CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(ikxs:ikxe,ikys:ikye,izs:ize,0:1), (/1, 1, 3/))
+     CALL putarrnd(fidres, "/data/metric/kernel_i",    kernel_i(ijs_i:ije_i,ikxs:ikxe,ikys:ikye,izs:ize,0:1), (/ 1, 2, 4/))
 
      !  var0d group (gyro transport)
      IF (nsave_0d .GT. 0) THEN
@@ -311,12 +311,14 @@ SUBROUTINE diagnose(kstep)
      ! make a checkpoint at last timestep if not crashed
      IF(.NOT. crashed) THEN
        IF(my_id .EQ. 0) write(*,*) 'Saving last state'
+       IF (nsave_5d .GT. 0) &
        CALL diagnose_5d
      ENDIF
      ! Display computational time cost
      IF (my_id .EQ. 0) CALL display_h_min_s(finish-start)
 
      !   Close all diagnostic files
+     CALL mpi_barrier(MPI_COMM_WORLD, ierr)
      CALL closef(fidres)
 
   END IF
@@ -510,7 +512,7 @@ CONTAINS
     IF (num_procs .EQ. 1) THEN ! no data distribution
       CALL putarr(fidres, dset_name, field(ikxs:ikxe, ikys:ikye, izs:ize), ionode=0)
     ELSE
-      CALL putarrnd(fidres, dset_name, field(ikxs:ikxe, ikys:ikye, izs:ize),  (/1, 3/))
+      CALL putarrnd(fidres, dset_name, field(ikxs:ikxe, ikys:ikye, izs:ize),  (/1, 1, 3/))
     ENDIF
     CALL attach(fidres, dset_name, "time", time)
 
@@ -593,7 +595,7 @@ SUBROUTINE diagnose_5d
       IF (num_procs .EQ. 1) THEN
         CALL putarr(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,ikys:ikye,izs:ize), ionode=0)
       ELSE
-        CALL putarrnd(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,ikys:ikye,izs:ize),  (/1,3/))
+        CALL putarrnd(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,ikys:ikye,izs:ize),  (/1,3,5/))
       ENDIF
      CALL attach(fidres, dset_name, 'cstep', cstep)
      CALL attach(fidres, dset_name, 'time', time)
diff --git a/src/geometry_mod.F90 b/src/geometry_mod.F90
index 460529a4..ebf1a642 100644
--- a/src/geometry_mod.F90
+++ b/src/geometry_mod.F90
@@ -20,7 +20,7 @@ contains
     ! evalute metrix, elementwo_third_kpmaxts, jacobian and gradient
     implicit none
     REAL(dp) :: kx,ky
-    COMPLEX(dp), DIMENSION(izgs:izge) :: integrant
+    COMPLEX(dp), DIMENSION(izs:ize) :: integrant
     INTEGER :: fid
     !
     IF( (Ny .EQ. 1) .AND. (Nz .EQ. 1)) THEN !1D perp linear run
@@ -47,19 +47,19 @@ contains
        ENDDO
     ENDDO
     ENDDO
+
     two_third_kpmax = 2._dp/3._dp * MAXVAL(kparray)
     !
     ! Compute the inverse z integrated Jacobian (useful for flux averaging)
-    integrant = Jacobian(izgs:izge,0) ! Convert into complex array
+    integrant = Jacobian(izs:ize,0) ! Convert into complex array
     CALL simpson_rule_z(integrant,iInt_Jacobian)
     iInt_Jacobian = 1._dp/iInt_Jacobian ! reverse it
-
   END SUBROUTINE eval_magnetic_geometry
   !
   !--------------------------------------------------------------------------------
   !
 
-  subroutine eval_salphaB_geometry
+  SUBROUTINE eval_salphaB_geometry
   ! evaluate s-alpha geometry model
   implicit none
   REAL(dp) :: z, kx, ky, alpha_MHD
diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90
index a50e0fc3..fee016af 100644
--- a/src/ghosts_mod.F90
+++ b/src/ghosts_mod.F90
@@ -24,7 +24,6 @@ SUBROUTINE update_ghosts_p_moments
         CALL update_ghosts_p_i
     ENDIF
 
-    CALL update_ghosts_z_moments
     CALL cpu_time(t1_ghost)
     tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts_p_moments
@@ -45,25 +44,26 @@ END SUBROUTINE update_ghosts_p_moments
 SUBROUTINE update_ghosts_p_e
 
     IMPLICIT NONE
+
     count = (ijge_e-ijgs_e+1)*(ikxe-ikxs+1)*(ikye-ikys+1)*(izge-izgs+1)
 
     !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
     ! Send the last local moment to fill the -1 neighbour ghost
-    CALL mpi_sendrecv(moments_e(ipe_e  ,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 10, & ! Send to right
-                      moments_e(ips_e-1,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 10, & ! Recieve from left
+    CALL mpi_sendrecv(moments_e(ipe_e  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 10, & ! Send to right
+                      moments_e(ips_e-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 10, & ! Recieve from left
                       comm0, status, ierr)
     IF (deltape .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_e(ipe_e-1,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 11, & ! Send to right
-                      moments_e(ips_e-2,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 11, & ! Recieve from left
+    CALL mpi_sendrecv(moments_e(ipe_e-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 11, & ! Send to right
+                      moments_e(ips_e-2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 11, & ! Recieve from left
                       comm0, status, ierr)
 
     !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_e(ips_e  ,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 12, & ! Send to left
-                      moments_e(ipe_e+1,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 12, & ! Recieve from right
+    CALL mpi_sendrecv(moments_e(ips_e  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 12, & ! Send to left
+                      moments_e(ipe_e+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 12, & ! Recieve from right
                       comm0, status, ierr)
     IF (deltape .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_e(ips_e+1,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 13, & ! Send to left
-                      moments_e(ipe_e+2,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 13, & ! Recieve from right
+    CALL mpi_sendrecv(moments_e(ips_e+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 13, & ! Send to left
+                      moments_e(ipe_e+2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 13, & ! Recieve from right
                       comm0, status, ierr)
 
 END SUBROUTINE update_ghosts_p_e
@@ -76,33 +76,35 @@ SUBROUTINE update_ghosts_p_i
     count = (ijge_i-ijgs_i+1)*(ikxe-ikxs+1)*(ikye-ikys+1)*(izge-izgs+1) ! Number of elements sent
 
     !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_i(ipe_i  ,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14, &
-                      moments_i(ips_i-1,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14, &
+    CALL mpi_sendrecv(moments_i(ipe_i  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14, &
+                      moments_i(ips_i-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14, &
                       comm0, status, ierr)
     IF (deltapi .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_i(ipe_i-1,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 15, &
-                      moments_i(ips_i-2,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 15, &
+    CALL mpi_sendrecv(moments_i(ipe_i-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 15, &
+                      moments_i(ips_i-2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 15, &
                       comm0, status, ierr)
 
     !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_cart_shift(comm0, 0, -1, source , dest , ierr)
-    CALL mpi_sendrecv(moments_i(ips_i  ,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16, &
-                      moments_i(ipe_i+1,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16, &
+    CALL mpi_sendrecv(moments_i(ips_i  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16, &
+                      moments_i(ipe_i+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16, &
                       comm0, status, ierr)
     IF (deltapi .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_i(ips_i+1,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 17, &
-                      moments_i(ipe_i+2,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izgs:izge,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 17, &
+    CALL mpi_sendrecv(moments_i(ips_i+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 17, &
+                      moments_i(ipe_i+2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 17, &
                       comm0, status, ierr)
 
 END SUBROUTINE update_ghosts_p_i
 
 SUBROUTINE update_ghosts_z_moments
   IMPLICIT NONE
-
+  CALL cpu_time(t0_ghost)
+    IF(Nz .GT. 1) THEN
     IF(KIN_E) &
     CALL update_ghosts_z_e
     CALL update_ghosts_z_i
-
+    ENDIF
+    CALL cpu_time(t1_ghost)
+    tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts_z_moments
 
 !Communicate z+1, z+2 moments to left neighboor and z-1, z-2 moments to right one
@@ -123,29 +125,34 @@ SUBROUTINE update_ghosts_z_e
 
   CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
   IF (num_procs_z .GT. 1) THEN
+
     count = (ipge_e-ipgs_e+1)*(ijge_e-ijgs_e+1)*(ikxe-ikxs+1)*(ikye-ikys+1)
 
-    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+    !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
     ! Send the last local moment to fill the -1 neighbour ghost
-    CALL mpi_sendrecv(moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 20, & ! Send to right
-                      moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 20, & ! Recieve from left
+    CALL mpi_sendrecv(moments_e(:,:,:,:,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 20, & ! Send to Up the last
+                      moments_e(:,:,:,:,izs-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 20, & ! Recieve from Down the first-1
                       comm0, status, ierr)
-    CALL mpi_sendrecv(moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 21, & ! Send to right
-                      moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,ize-2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 21, & ! Recieve from left
+    CALL mpi_sendrecv(moments_e(:,:,:,:,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 21, & ! Send to Up the last-1
+                      moments_e(:,:,:,:,izs-2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 21, & ! Recieve from Down the first-2
                       comm0, status, ierr)
 
-    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 22, & ! Send to left
-                      moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 22, & ! Recieve from right
+    !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
+    CALL mpi_sendrecv(moments_e(:,:,:,:,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 22, & ! Send to Down the first
+                      moments_e(:,:,:,:,ize+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 22, & ! Recieve from Up the last+1
                       comm0, status, ierr)
-    CALL mpi_sendrecv(moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 23, & ! Send to left
-                      moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikxs:ikxe,ikys:ikye,izs+2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 23, & ! Recieve from right
+    CALL mpi_sendrecv(moments_e(:,:,:,:,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 23, & ! Send to Down the first+1
+                      moments_e(:,:,:,:,ize+2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 23, & ! Recieve from Up the last+2
                       comm0, status, ierr)
   ELSE ! still need to perform periodic boundary conditions
-      moments_e(:,:,:,:,   0,updatetlevel) = moments_e(:,:,:,:,  Nz,updatetlevel)
-      moments_e(:,:,:,:,  -1,updatetlevel) = moments_e(:,:,:,:,Nz-1,updatetlevel)
-      moments_e(:,:,:,:,Nz+1,updatetlevel) = moments_e(:,:,:,:,   1,updatetlevel)
-      moments_e(:,:,:,:,Nz+2,updatetlevel) = moments_e(:,:,:,:,   2,updatetlevel)
+    ! first-1 gets last
+    moments_e(:,:,:,:,izs-1,updatetlevel) = moments_e(:,:,:,:,ize  ,updatetlevel)
+    ! first-2 gets last-1
+    moments_e(:,:,:,:,izs-2,updatetlevel) = moments_e(:,:,:,:,ize-1,updatetlevel)
+    ! last+1 gets first
+    moments_e(:,:,:,:,ize+1,updatetlevel) = moments_e(:,:,:,:,izs  ,updatetlevel)
+    ! last+2 gets first+1
+    moments_e(:,:,:,:,ize+2,updatetlevel) = moments_e(:,:,:,:,izs+1,updatetlevel)
   ENDIF
 
 END SUBROUTINE update_ghosts_z_e
@@ -155,62 +162,70 @@ SUBROUTINE update_ghosts_z_i
   IMPLICIT NONE
   CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
   IF (num_procs_z .GT. 1) THEN
+
     count = (ipge_i-ipgs_i+1)*(ijge_i-ijgs_i+1)*(ikxe-ikxs+1)*(ikye-ikys+1)
 
-    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+    !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
     ! Send the last local moment to fill the -1 neighbour ghost
-    CALL mpi_sendrecv(moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 30, & ! Send to right
-                      moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 30, & ! Recieve from left
+    CALL mpi_sendrecv(moments_i(:,:,:,:,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 30, & ! Send to Up the last
+                      moments_i(:,:,:,:,izs-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 30, & ! Recieve from Down the first-1
                       comm0, status, ierr)
-    CALL mpi_sendrecv(moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 31, & ! Send to right
-                      moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,ize-2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 31, & ! Recieve from left
+    CALL mpi_sendrecv(moments_i(:,:,:,:,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 31, & ! Send to Up the last-1
+                      moments_i(:,:,:,:,izs-2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 31, & ! Recieve from Down the first-2
                       comm0, status, ierr)
 
-    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 32, & ! Send to left
-                      moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 32, & ! Recieve from right
+    !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
+    CALL mpi_sendrecv(moments_i(:,:,:,:,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 32, & ! Send to Down the first
+                      moments_i(:,:,:,:,ize+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 32, & ! Recieve from Down the last+1
                       comm0, status, ierr)
-    CALL mpi_sendrecv(moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 33, & ! Send to left
-                      moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikxs:ikxe,ikys:ikye,izs+2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 33, & ! Recieve from right
+    CALL mpi_sendrecv(moments_i(:,:,:,:,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 33, & ! Send to Down the first+1
+                      moments_i(:,:,:,:,ize+2,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 33, & ! Recieve from Down the last+2
                       comm0, status, ierr)
   ELSE ! still need to perform periodic boundary conditions
-    moments_i(:,:,:,:,   0,updatetlevel) = moments_i(:,:,:,:,  Nz,updatetlevel)
-    moments_i(:,:,:,:,  -1,updatetlevel) = moments_i(:,:,:,:,Nz-1,updatetlevel)
-    moments_i(:,:,:,:,Nz+1,updatetlevel) = moments_i(:,:,:,:,   1,updatetlevel)
-    moments_i(:,:,:,:,Nz+2,updatetlevel) = moments_i(:,:,:,:,   2,updatetlevel)
+    moments_i(:,:,:,:,izs-1,updatetlevel) = moments_i(:,:,:,:,ize  ,updatetlevel)
+
+    moments_i(:,:,:,:,izs-2,updatetlevel) = moments_i(:,:,:,:,ize-1,updatetlevel)
+
+    moments_i(:,:,:,:,ize+1,updatetlevel) = moments_i(:,:,:,:,izs  ,updatetlevel)
+
+    moments_i(:,:,:,:,ize+2,updatetlevel) = moments_i(:,:,:,:,izs+1,updatetlevel)
   ENDIF
 END SUBROUTINE update_ghosts_z_i
 
 SUBROUTINE update_ghosts_z_phi
 
   IMPLICIT NONE
-  CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-  IF (num_procs_z .GT. 1) THEN
-    count = (ikxe-ikxs+1)*(ikye-ikys+1)
-
-    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
-    ! Send the last local moment to fill the -1 neighbour ghost
-    CALL mpi_sendrecv(phi(ikxs:ikxe,ikys:ikye,  ize), count, MPI_DOUBLE_COMPLEX, nbr_U, 40, & ! Send to right
-                      phi(ikxs:ikxe,ikys:ikye,ize-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 40, & ! Recieve from left
-                      comm0, status, ierr)
-    CALL mpi_sendrecv(phi(ikxs:ikxe,ikys:ikye,ize-1), count, MPI_DOUBLE_COMPLEX, nbr_U, 41, & ! Send to right
-                      phi(ikxs:ikxe,ikys:ikye,ize-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 41, & ! Recieve from left
-                      comm0, status, ierr)
-                      !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-
-    CALL mpi_sendrecv(phi(ikxs:ikxe,ikys:ikye,  izs), count, MPI_DOUBLE_COMPLEX, nbr_U, 42, & ! Send to right
-                      phi(ikxs:ikxe,ikys:ikye,izs+1), count, MPI_DOUBLE_COMPLEX, nbr_D, 42, & ! Recieve from left
-                      comm0, status, ierr)
-    CALL mpi_sendrecv(phi(ikxs:ikxe,ikys:ikye,izs+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 43, & ! Send to right
-                      phi(ikxs:ikxe,ikys:ikye,izs+2), count, MPI_DOUBLE_COMPLEX, nbr_D, 43, & ! Recieve from left
-                      comm0, status, ierr)
-
-  ELSE ! still need to perform periodic boundary conditions
-    phi(:,:,   0) = phi(:,:,   Nz)
-    phi(:,:,  -1) = phi(:,:, Nz-1)
-    phi(:,:,Nz+1) = phi(:,:,    1)
-    phi(:,:,Nz+2) = phi(:,:,    2)
+  CALL cpu_time(t1_ghost)
+  IF(Nz .GT. 1) THEN
+    CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
+    IF (num_procs_z .GT. 1) THEN
+      count = (ikxe-ikxs+1) * (ikye-ikys+1)
+
+      !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
+      ! Send the last local moment to fill the -1 neighbour ghost
+      CALL mpi_sendrecv(phi(:,:,ize  ), count, MPI_DOUBLE_COMPLEX, nbr_U, 40, & ! Send to Up the last
+                        phi(:,:,izs-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 40, & ! Receive from Down the first-1
+                        comm0, status, ierr)
+      CALL mpi_sendrecv(phi(:,:,ize-1), count, MPI_DOUBLE_COMPLEX, nbr_U, 41, & ! Send to Up the last-1
+                        phi(:,:,izs-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 41, & ! Receive from Down the first-2
+                        comm0, status, ierr)
+      !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
+      CALL mpi_sendrecv(phi(:,:,izs  ), count, MPI_DOUBLE_COMPLEX, nbr_D, 42, & ! Send to Down the first
+                        phi(:,:,ize+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 42, & ! Recieve from Up the last+1
+                        comm0, status, ierr)
+      CALL mpi_sendrecv(phi(:,:,izs+1), count, MPI_DOUBLE_COMPLEX, nbr_D, 43, & ! Send to Down the first+1
+                        phi(:,:,ize+2), count, MPI_DOUBLE_COMPLEX, nbr_U, 43, & ! Recieve from Up the last+2
+                        comm0, status, ierr)
+
+    ELSE ! still need to perform periodic boundary conditions
+      phi(:,:,izs-1) = phi(:,:,ize)
+      phi(:,:,izs-2) = phi(:,:,ize-1)
+      phi(:,:,ize+1) = phi(:,:,izs)
+      phi(:,:,ize+2) = phi(:,:,izs+1)
+    ENDIF
   ENDIF
+  CALL cpu_time(t1_ghost)
+  tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts_z_phi
 
 END MODULE ghosts
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index 4f67a86e..782699c6 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -46,7 +46,7 @@ MODULE grid
   REAL(dp), DIMENSION(:),   ALLOCATABLE, PUBLIC :: zarray_full
   ! local z weights for computing simpson rule
   INTEGER,  DIMENSION(:),   ALLOCATABLE, PUBLIC :: zweights_SR
-  REAL(dp), PUBLIC, PROTECTED  ::  deltax,  deltay, deltaz, inv_deltaz
+  REAL(dp), PUBLIC, PROTECTED  ::  deltax,  deltay, deltaz, inv_deltaz, diff_dz_coeff
   INTEGER,  PUBLIC, PROTECTED  ::  ixs,  ixe,  iys,  iye,  izs,  ize
   INTEGER,  PUBLIC, PROTECTED  ::  izgs, izge ! ghosts
   LOGICAL,  PUBLIC, PROTECTED  ::  SG = .true.! shifted grid flag
@@ -67,6 +67,8 @@ MODULE grid
   integer(C_INTPTR_T), PUBLIC :: local_nz_offset
   INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: counts_nz
   INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: displs_nz
+  ! "" for j (not parallelized)
+  INTEGER,             PUBLIC :: local_nj_e, local_nj_i
   ! Grids containing position in fourier space
   REAL(dp), DIMENSION(:),     ALLOCATABLE, PUBLIC :: kxarray, kxarray_full
   REAL(dp), DIMENSION(:),     ALLOCATABLE, PUBLIC :: kyarray, kyarray_full
@@ -257,6 +259,9 @@ CONTAINS
     ! Ghosts boundaries
     ijgs_e = ijs_e - 1; ijge_e = ije_e + 1;
     ijgs_i = ijs_i - 1; ijge_i = ije_i + 1;
+    ! Local number of J
+    local_nj_e = ijge_e - ijgs_e + 1
+    local_nj_i = ijge_i - ijgs_i + 1
     ALLOCATE(jarray_e(ijgs_e:ijge_e))
     ALLOCATE(jarray_i(ijgs_i:ijge_i))
     DO ij = ijgs_e,ijge_e; jarray_e(ij) = ij-1; END DO
@@ -290,7 +295,6 @@ CONTAINS
      kxarray_full(ikx) = REAL(ikx-1,dp) * deltakx
     END DO
     !! Parallel distribution
-    ! Start and END indices of grid
     ikxs = local_nkx_offset + 1
     ikxe = ikxs + local_nkx - 1
     ALLOCATE(kxarray(ikxs:ikxe))
@@ -337,6 +341,7 @@ CONTAINS
     ! Start and END indices of grid
     ikys = 1
     ikye = Nky
+    local_nky = ikye - ikys + 1
     ALLOCATE(kyarray(ikys:ikye))
     IF (Ny .EQ. 1) THEN ! "cancel" y dimension
       deltaky         = 1._dp
@@ -371,7 +376,7 @@ CONTAINS
       END DO
       ! Build the full grids on process 0 to diagnose it without comm
       ! ky
-      DO iky = 1,Nky
+      DO iky = ikys,ikye
         kyarray_full(iky) = deltaky*(MODULO(iky-1,Nky/2)-Nky/2*FLOOR(2.*real(iky-1)/real(Nky)))
         IF (iky .EQ. Ny/2+1) kyarray_full(iky) = -kyarray_full(iky)
       END DO
@@ -400,8 +405,9 @@ CONTAINS
     ! Length of the flux tube (in ballooning angle)
     Lz         = 2_dp*pi*Npol
     ! Z stepping (#interval = #points since periodic)
-    deltaz     = Lz/REAL(Nz,dp)
-    inv_deltaz = 1._dp/deltaz
+    deltaz        = Lz/REAL(Nz,dp)
+    inv_deltaz    = 1._dp/deltaz
+    diff_dz_coeff = (deltaz/2._dp)**2
     IF (SG) THEN
       grid_shift = deltaz/2._dp
     ELSE
@@ -409,6 +415,7 @@ CONTAINS
     ENDIF
     ! Build the full grids on process 0 to diagnose it without comm
     ALLOCATE(zarray_full(1:Nz))
+    IF (Nz .EQ. 1) Npol = 0
     DO iz = 1,total_nz
       zarray_full(iz) = REAL(iz-1,dp)*deltaz - PI*REAL(Npol,dp)
     END DO
diff --git a/src/inital.F90 b/src/inital.F90
index d527d74f..ac0222c1 100644
--- a/src/inital.F90
+++ b/src/inital.F90
@@ -69,6 +69,7 @@ SUBROUTINE inital
   IF (my_id .EQ. 0) WRITE(*,*) 'Ghosts communication'
   CALL update_ghosts_p_moments
   CALL update_ghosts_z_moments
+  CALL update_ghosts_z_phi
   !! End of phi and moments initialization
 
   ! Save (kx,0) and (0,ky) modes for num exp
@@ -77,7 +78,8 @@ SUBROUTINE inital
   CALL play_with_modes
 
   ! Load the COSOlver collision operator coefficients
-  IF(cosolver_coll) CALL load_COSOlver_mat
+  IF(cosolver_coll) &
+  CALL load_COSOlver_mat
 
   !! Preparing auxiliary arrays at initial state
   ! particle density, fluid velocity and temperature (used in diagnose)
@@ -306,9 +308,9 @@ SUBROUTINE init_phi
     !symmetry at kx = 0 to keep real inverse transform
     IF ( contains_kx0 ) THEN
       DO iky=2,Nky/2
-        phi(ikx_0,iky,:) = phi(ikx_0,Nky+2-iky,:)
+        phi(ikx_0,iky,izs:ize) = phi(ikx_0,Nky+2-iky,izs:ize)
       END DO
-      phi(ikx_0,Ny/2,:) = REAL(phi(ikx_0,Ny/2,:)) !origin must be real
+      phi(ikx_0,Ny/2,izs:ize) = REAL(phi(ikx_0,Ny/2,izs:ize)) !origin must be real
     ENDIF
 
     !**** ensure no previous moments initialization
diff --git a/src/memory.F90 b/src/memory.F90
index 24be2ae9..ad8a1ea1 100644
--- a/src/memory.F90
+++ b/src/memory.F90
@@ -16,7 +16,7 @@ SUBROUTINE memory
   CALL allocate_array(           phi, ikxs,ikxe, ikys,ikye, izgs,izge)
   CALL allocate_array(        phi_ZF, ikxs,ikxe, izs,ize)
   CALL allocate_array(        phi_EM, ikys,ikye, izs,ize)
-  CALL allocate_array(inv_poisson_op, ikxs,ikxe, ikys,ikye, izgs,izge)
+  CALL allocate_array(inv_poisson_op, ikxs,ikxe, ikys,ikye, izs,ize)
 
   !Electrons arrays
   IF(KIN_E) THEN
diff --git a/src/moments_eq_rhs_mod.F90 b/src/moments_eq_rhs_mod.F90
index 10c765b3..87380472 100644
--- a/src/moments_eq_rhs_mod.F90
+++ b/src/moments_eq_rhs_mod.F90
@@ -1,7 +1,14 @@
 MODULE moments_eq_rhs
   IMPLICIT NONE
-  PUBLIC :: moments_eq_rhs_e, moments_eq_rhs_i
+  PUBLIC :: compute_moments_eq_rhs
 CONTAINS
+
+SUBROUTINE compute_moments_eq_rhs
+  USE model, only: KIN_E
+  IMPLICIT NONE
+  IF(KIN_E) CALL moments_eq_rhs_e
+            CALL moments_eq_rhs_i
+END SUBROUTINE compute_moments_eq_rhs
 !_____________________________________________________________________________!
 !_____________________________________________________________________________!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -19,7 +26,7 @@ SUBROUTINE moments_eq_rhs_e
   USE prec_const
   USE collision
   use geometry
-  USE calculus, ONLY : interp_z, grad_z, grad_z4
+  USE calculus, ONLY : interp_z, grad_z, grad_z2
   IMPLICIT NONE
 
   INTEGER     :: p_int, j_int ! loops indices and polynom. degrees
@@ -31,7 +38,7 @@ SUBROUTINE moments_eq_rhs_e
   COMPLEX(dp) :: i_ky
   ! To store derivatives and odd-even z grid interpolations
   COMPLEX(dp), DIMENSION(izs:ize) :: ddznepp1j, ddznepm1j, &
-              nepp1j, nepp1jm1, nepm1j, nepm1jm1, nepm1jp1, ddz4Nepj
+              nepp1j, nepp1jm1, nepm1j, nepm1jm1, nepm1jp1, ddz2Nepj
 
    ! Measuring execution time
   CALL cpu_time(t0_rhs)
@@ -58,7 +65,7 @@ SUBROUTINE moments_eq_rhs_e
         CALL interp_z(eo,nadiab_moments_e(ip-1,ij+1,ikx,iky,izgs:izge),  nepm1jp1(izs:ize))
         CALL interp_z(eo,nadiab_moments_e(ip-1,ij-1,ikx,iky,izgs:izge),  nepm1jm1(izs:ize))
         ! Parallel hyperdiffusion
-        CALL  grad_z4(moments_e(ip,ij,ikx,iky,:,updatetlevel),   ddz4Nepj(:))
+        CALL  grad_z2(moments_e(ip,ij,ikx,iky,izgs:izge,updatetlevel),   ddz2Nepj(izs:ize))
 
         zloope : DO  iz = izs,ize
           ! kperp
@@ -77,32 +84,29 @@ SUBROUTINE moments_eq_rhs_e
           Tnepjp1 = xnepjp1(ij) * nadiab_moments_e(ip,ij+1,ikx,iky,iz)
           ! term propto n_e^{p,j-1}
           Tnepjm1 = xnepjm1(ij) * nadiab_moments_e(ip,ij-1,ikx,iky,iz)
-          ! Parallel dynamic
-          IF(Nz .GT. 1) THEN
-            ! ddz derivative for Landau damping term
-            Tpar = xnepp1j(ip) * ddznepp1j(iz) + xnepm1j(ip) * ddznepm1j(iz)
-            ! Mirror terms (trapping)
-            Tnepp1j   = ynepp1j  (ip,ij) * nepp1j  (iz)
-            Tnepp1jm1 = ynepp1jm1(ip,ij) * nepp1jm1(iz)
-            Tnepm1j   = ynepm1j  (ip,ij) * nepm1j  (iz)
-            Tnepm1jm1 = ynepm1jm1(ip,ij) * nepm1jm1(iz)
-            ! Trapping terms
-            Unepm1j   = znepm1j  (ip,ij) * nepm1j  (iz)
-            Unepm1jp1 = znepm1jp1(ip,ij) * nepm1jp1(iz)
-            Unepm1jm1 = znepm1jm1(ip,ij) * nepm1jm1(iz)
+          ! ddz derivative for Landau damping term
+          Tpar = xnepp1j(ip) * ddznepp1j(iz) + xnepm1j(ip) * ddznepm1j(iz)
+          ! Mirror terms (trapping)
+          Tnepp1j   = ynepp1j  (ip,ij) * nepp1j  (iz)
+          Tnepp1jm1 = ynepp1jm1(ip,ij) * nepp1jm1(iz)
+          Tnepm1j   = ynepm1j  (ip,ij) * nepm1j  (iz)
+          Tnepm1jm1 = ynepm1jm1(ip,ij) * nepm1jm1(iz)
+          ! Trapping terms
+          Unepm1j   = znepm1j  (ip,ij) * nepm1j  (iz)
+          Unepm1jp1 = znepm1jp1(ip,ij) * nepm1jp1(iz)
+          Unepm1jm1 = znepm1jm1(ip,ij) * nepm1jm1(iz)
 
-            Tmir = Tnepp1j + Tnepp1jm1 + Tnepm1j + Tnepm1jm1 + Unepm1j + Unepm1jp1 + Unepm1jm1
-          ENDIF
+          Tmir = Tnepp1j + Tnepp1jm1 + Tnepm1j + Tnepm1jm1 + Unepm1j + Unepm1jp1 + Unepm1jm1
           !! Electrical potential term
           IF ( p_int .LE. 2 ) THEN ! kronecker p0 p1 p2
-          Tphi = phi(ikx,iky,iz) * (xphij(ip,ij)*kernel_e(ij,ikx,iky,iz,eo) &
-                   + xphijp1(ip,ij)*kernel_e(ij+1,ikx,iky,iz,eo) &
-                   + xphijm1(ip,ij)*kernel_e(ij-1,ikx,iky,iz,eo) )
+            Tphi = (xphij  (ip,ij)*kernel_e(ij  ,ikx,iky,iz,eo) &
+                  + xphijp1(ip,ij)*kernel_e(ij+1,ikx,iky,iz,eo) &
+                  + xphijm1(ip,ij)*kernel_e(ij-1,ikx,iky,iz,eo))*phi(ikx,iky,iz)
           ELSE
             Tphi = 0._dp
           ENDIF
 
-          !! Sum of all linear terms (the sign is inverted to match RHS)
+          !! Sum of all RHS terms
           moments_rhs_e(ip,ij,ikx,iky,iz,updatetlevel) = &
               ! Perpendicular magnetic gradient/curvature effects
               - imagu*Ckxky(ikx,iky,iz,eo)*hatR(iz,eo)* (Tnepj + Tnepp2j + Tnepm2j + Tnepjp1 + Tnepjm1)&
@@ -112,18 +116,14 @@ SUBROUTINE moments_eq_rhs_e
               - gradzB(iz,eo)* Tmir  *gradz_coeff(iz,eo) &
               ! Drives (density + temperature gradients)
               - i_ky * Tphi &
-              ! Electrostatic background gradients
-              - i_ky * K_E * moments_e(ip,ij,ikx,iky,iz,updatetlevel) &
               ! Numerical perpendicular hyperdiffusion (totally artificial, for stability purpose)
               - (mu_x*kx**4 + mu_y*ky**4)*moments_e(ip,ij,ikx,iky,iz,updatetlevel) &
-              ! Numerical parallel hyperdiffusion "- (mu_z*kz**4)"
-              - mu_z * ddz4Nepj(iz) &
+              ! Numerical parallel hyperdiffusion "+ (mu_z*kz**4)"
+              + mu_z * diff_dz_coeff * ddz2Nepj(iz) &
               ! Collision term
-              + TColl_e(ip,ij,ikx,iky,iz)
-
-          !! Adding non linearity
-            moments_rhs_e(ip,ij,ikx,iky,iz,updatetlevel) = &
-              moments_rhs_e(ip,ij,ikx,iky,iz,updatetlevel) - Sepj(ip,ij,ikx,iky,iz)
+              + TColl_e(ip,ij,ikx,iky,iz) &
+              ! Nonlinear term
+              - Sepj(ip,ij,ikx,iky,iz)
 
          END DO zloope
         END DO kyloope
@@ -152,7 +152,7 @@ SUBROUTINE moments_eq_rhs_i
   USE model
   USE prec_const
   USE collision
-  USE calculus, ONLY : interp_z, grad_z, grad_z4
+  USE calculus, ONLY : interp_z, grad_z, grad_z2
   IMPLICIT NONE
 
   INTEGER     :: p_int, j_int ! loops indices and polynom. degrees
@@ -164,7 +164,7 @@ SUBROUTINE moments_eq_rhs_i
   COMPLEX(dp) :: i_ky
   ! To store derivatives and odd-even z grid interpolations
   COMPLEX(dp), DIMENSION(izs:ize) :: ddznipp1j, ddznipm1j, &
-              nipp1j, nipp1jm1, nipm1j, nipm1jm1, nipm1jp1, ddz4Nipj
+              nipp1j, nipp1jm1, nipm1j, nipm1jm1, nipm1jp1, ddz2Nipj
   ! Measuring execution time
   CALL cpu_time(t0_rhs)
 
@@ -190,7 +190,7 @@ SUBROUTINE moments_eq_rhs_i
         CALL interp_z(eo,nadiab_moments_i(ip-1,ij+1,ikx,iky,izgs:izge), nipm1jp1(izs:ize))
         CALL interp_z(eo,nadiab_moments_i(ip-1,ij-1,ikx,iky,izgs:izge), nipm1jm1(izs:ize))
         ! for hyperdiffusion
-        CALL  grad_z4(moments_i(ip,ij,ikx,iky,:,updatetlevel),  ddz4Nipj(:))
+        CALL  grad_z2(moments_i(ip,ij,ikx,iky,izgs:izge,updatetlevel),  ddz2Nipj(:))
 
         zloopi : DO  iz = izs,ize
           kperp2= kparray(ikx,iky,iz,eo)**2
@@ -211,52 +211,48 @@ SUBROUTINE moments_eq_rhs_i
           ! Tperp
           Tperp = Tnipj + Tnipp2j + Tnipm2j + Tnipjp1 + Tnipjm1
           ! Parallel dynamic
-          IF(Nz .GT. 1) THEN
-            ! ddz derivative for Landau damping term
-            Tpar = xnipp1j(ip) * ddznipp1j(iz) + xnipm1j(ip) * ddznipm1j(iz)
-            ! Mirror terms
-            Tnipp1j   = ynipp1j  (ip,ij) * nipp1j  (iz)
-            Tnipp1jm1 = ynipp1jm1(ip,ij) * nipp1jm1(iz)
-            Tnipm1j   = ynipm1j  (ip,ij) * nipm1j  (iz)
-            Tnipm1jm1 = ynipm1jm1(ip,ij) * nipm1jm1(iz)
-            ! Trapping terms
-            Unipm1j   = znipm1j  (ip,ij) * nipm1j  (iz)
-            Unipm1jp1 = znipm1jp1(ip,ij) * nipm1jp1(iz)
-            Unipm1jm1 = znipm1jm1(ip,ij) * nipm1jm1(iz)
+          ! ddz derivative for Landau damping term
+          Tpar = xnipp1j(ip) * ddznipp1j(iz) + xnipm1j(ip) * ddznipm1j(iz)
+          ! Mirror terms
+          Tnipp1j   = ynipp1j  (ip,ij) * nipp1j  (iz)
+          Tnipp1jm1 = ynipp1jm1(ip,ij) * nipp1jm1(iz)
+          Tnipm1j   = ynipm1j  (ip,ij) * nipm1j  (iz)
+          Tnipm1jm1 = ynipm1jm1(ip,ij) * nipm1jm1(iz)
+          ! Trapping terms
+          Unipm1j   = znipm1j  (ip,ij) * nipm1j  (iz)
+          Unipm1jp1 = znipm1jp1(ip,ij) * nipm1jp1(iz)
+          Unipm1jm1 = znipm1jm1(ip,ij) * nipm1jm1(iz)
+
+          Tmir = Tnipp1j + Tnipp1jm1 + Tnipm1j + Tnipm1jm1 + Unipm1j + Unipm1jp1 + Unipm1jm1
 
-            Tmir = Tnipp1j + Tnipp1jm1 + Tnipm1j + Tnipm1jm1 + Unipm1j + Unipm1jp1 + Unipm1jm1
-          ENDIF
           !! Electrical potential term
           IF ( p_int .LE. 2 ) THEN ! kronecker p0 p1 p2
-            Tphi = phi(ikx,iky,iz) * (xphij(ip,ij)*kernel_i(ij,ikx,iky,iz,eo) &
-                     + xphijp1(ip,ij)*kernel_i(ij+1,ikx,iky,iz,eo) &
-                     + xphijm1(ip,ij)*kernel_i(ij-1,ikx,iky,iz,eo) )
+            Tphi = (xphij  (ip,ij)*kernel_i(ij  ,ikx,iky,iz,eo) &
+                  + xphijp1(ip,ij)*kernel_i(ij+1,ikx,iky,iz,eo) &
+                  + xphijm1(ip,ij)*kernel_i(ij-1,ikx,iky,iz,eo))*phi(ikx,iky,iz)
           ELSE
             Tphi = 0._dp
           ENDIF
 
-          !! Sum of all linear terms (the sign is inverted to match RHS)
+          !! Sum of all RHS terms
           moments_rhs_i(ip,ij,ikx,iky,iz,updatetlevel) = &
               ! Perpendicular magnetic gradient/curvature effects
               - imagu*Ckxky(ikx,iky,iz,eo)*hatR(iz,eo) * Tperp &
-              ! Parallel coupling (Landau Damping)
+              ! Parallel coupling (Landau damping)
               - gradz_coeff(iz,eo) * Tpar &
               ! Mirror term (parallel magnetic gradient)
               - gradzB(iz,eo) * gradz_coeff(iz,eo) * Tmir &
               ! Drives (density + temperature gradients)
               - i_ky * Tphi &
-              ! Electrostatic background gradients
-              - i_ky * K_E * moments_i(ip,ij,ikx,iky,iz,updatetlevel) &
               ! Numerical hyperdiffusion (totally artificial, for stability purpose)
               - (mu_x*kx**4 + mu_y*ky**4)*moments_i(ip,ij,ikx,iky,iz,updatetlevel) &
-              ! Numerical parallel hyperdiffusion "- (mu_z*kz**4)"
-              - mu_z * ddz4Nipj(iz) &
+              ! Numerical parallel hyperdiffusion "+ (mu_z*kz**4)"
+              + mu_z * diff_dz_coeff * ddz2Nipj(iz) &
               ! Collision term
-              + TColl_i(ip,ij,ikx,iky,iz)
+              + TColl_i(ip,ij,ikx,iky,iz)&
+              ! Nonlinear term
+              - Sipj(ip,ij,ikx,iky,iz)
 
-          !! Adding non linearity
-           moments_rhs_i(ip,ij,ikx,iky,iz,updatetlevel) = &
-             moments_rhs_i(ip,ij,ikx,iky,iz,updatetlevel) - Sipj(ip,ij,ikx,iky,iz)
           END DO zloopi
         END DO kyloopi
       END DO kxloopi
diff --git a/src/nonlinear_mod.F90 b/src/nonlinear_mod.F90
index b327b44b..c70de47a 100644
--- a/src/nonlinear_mod.F90
+++ b/src/nonlinear_mod.F90
@@ -248,7 +248,7 @@ zloopi: DO iz = izs,ize
             Gx_cmpx(ikx,iky) = 0._dp
             IF(iky .EQ. iky_0) THEN
               Fx_cmpx(ikx,iky) = imagu*kx* phi(ikx,iky,iz) * kerneln
-              smax = MIN( (in-1)+(ij-1), jmaxe );
+              smax = MIN( (in-1)+(ij-1), jmaxi );
               DO is = 1, smax+1 ! sum truncation on number of moments
                 Gx_cmpx(ikx,iky) = Gx_cmpx(ikx,iky) + &
                   dnjs(in,ij,is) * moments_i(ip,is,ikx,iky,iz,updatetlevel)
@@ -377,7 +377,7 @@ zloopi: DO iz = izs,ize
           kyloopi: DO iky = ikys,ikye ! Loop over ky
             ! Zonal terms (=0 for all ky not 0)
             Fx_cmpx(ikx,iky) = imagu*kx* phi(ikx,iky,iz) * kerneln
-            smax = MIN( (in-1)+(ij-1), jmaxe );
+            smax = MIN( (in-1)+(ij-1), jmaxi );
             DO is = 1, smax+1 ! sum truncation on number of moments
               Gx_cmpx(ikx,iky) = Gx_cmpx(ikx,iky) + &
                 dnjs(in,ij,is) * moments_i(ip,is,ikx,iky,iz,updatetlevel)
diff --git a/src/numerics_mod.F90 b/src/numerics_mod.F90
index 4d3f5af8..d8e2b178 100644
--- a/src/numerics_mod.F90
+++ b/src/numerics_mod.F90
@@ -90,7 +90,7 @@ END SUBROUTINE evaluate_kernels
 !******************************************************************************!
 
 !******************************************************************************!
-!!!!!!! Evaluate polarisation operator for Poisson equation
+!!!!!!! Evaluate inverse polarisation operator for Poisson equation
 !******************************************************************************!
 SUBROUTINE evaluate_poisson_op
   USE basic
@@ -101,20 +101,21 @@ SUBROUTINE evaluate_poisson_op
   REAL(dp)    :: pol_i, pol_e     ! (Z_a^2/tau_a (1-sum_n kernel_na^2))
   INTEGER     :: ini,ine
 
+  ! This term has no staggered grid dependence. It is evalued for the
+  ! even z grid since poisson uses p=0 moments and phi only.
   kxloop: DO ikx = ikxs,ikxe
   kyloop: DO iky = ikys,ikye
-  zloop:  DO iz  = izgs,izge
-  ! This term is evalued on the even z grid since poisson uses only p=0 and phi
+  zloop:  DO iz  = izs,ize
   IF( (kxarray(ikx).EQ.0._dp) .AND. (kyarray(iky).EQ.0._dp) ) THEN
       inv_poisson_op(ikx, iky, iz) =  0._dp
-    ELSE
+  ELSE
     !!!!!!!!!!!!!!!!! Ion contribution
     ! loop over n only if the max polynomial degree
     pol_i = 0._dp
     DO ini=1,jmaxi+1
       pol_i = pol_i  + qi2_taui*kernel_i(ini,ikx,iky,iz,0)**2 ! ... sum recursively ...
     END DO
-    !!!!!!!!!!!!! Electron contribution\
+    !!!!!!!!!!!!! Electron contribution
     pol_e = 0._dp
     ! Kinetic model
     IF (KIN_E) THEN
@@ -267,12 +268,12 @@ SUBROUTINE save_EM_ZF_modes
   IF(contains_kx0) THEN
     IF(KIN_E) &
     moments_e_EM(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = moments_e(ips_e:ipe_e,ijs_e:ije_e,ikx_0,ikys:ikye,izs:ize,updatetlevel)
-    moments_i_EM(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = moments_i(ips_i:ipe_i,ijs_i:ije_i,ikx_0,ikys:ikye,izs:ize,updatetlevel)
+    moments_i_EM(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,izs:ize) = moments_i(ips_i:ipe_i,ijs_i:ije_i,ikx_0,ikys:ikye,izs:ize,updatetlevel)
     phi_EM(ikys:ikye,izs:ize) = phi(ikx_0,ikys:ikye,izs:ize)
   ELSE
     IF(KIN_E) &
     moments_e_EM(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = 0._dp
-    moments_i_EM(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = 0._dp
+    moments_i_EM(ips_e:ipe_e,ijs_i:ije_i,ikys:ikye,izs:ize) = 0._dp
     phi_EM(ikys:ikye,izs:ize) = 0._dp
   ENDIF
 END SUBROUTINE
diff --git a/src/poisson.F90 b/src/poisson.F90
index 18eac749..c30d9d12 100644
--- a/src/poisson.F90
+++ b/src/poisson.F90
@@ -6,7 +6,6 @@ SUBROUTINE poisson
   USE array
   USE fields
   USE grid
-  USE ghosts, ONLY: update_ghosts_z_phi
   USE calculus, ONLY : simpson_rule_z
   USE utility,  ONLY : manual_3D_bcast
   use model, ONLY : qe2_taue, qi2_taui, q_e, q_i, lambdaD, KIN_E
@@ -19,37 +18,42 @@ SUBROUTINE poisson
   COMPLEX(dp) :: fa_phi, intf_   ! current flux averaged phi
   INTEGER     :: count !! mpi integer to broadcast the electric potential at the end
   COMPLEX(dp) :: buffer(ikxs:ikxe,ikys:ikye)
-  COMPLEX(dp), DIMENSION(izgs:izge) :: rho_i, rho_e     ! charge density q_a n_a
+  COMPLEX(dp), DIMENSION(izs:ize) :: rho_i, rho_e     ! charge density q_a n_a
 
-  !! Poisson can be solved only for process containing ip=1
+  ! Execution time start
+  CALL cpu_time(t0_poisson)
+  !! Poisson can be solved only for process containing p=0
   IF ( (ips_e .EQ. ip0_e) .AND. (ips_i .EQ. ip0_i) ) THEN
-    ! Execution time start
-    CALL cpu_time(t0_poisson)
+
 
     kxloop: DO ikx = ikxs,ikxe
       kyloop: DO iky = ikys,ikye
         !!!! Compute ion gyro charge density
         rho_i = 0._dp
         DO ini=1,jmaxi+1
-          rho_i(izs:ize) = rho_i(izs:ize) &
-           +q_i*kernel_i(ini,ikx,iky,izs:ize,0)*moments_i(ip0_i,ini,ikx,iky,izs:ize,updatetlevel)
+          DO iz = izs,ize
+          rho_i(iz) = rho_i(iz) &
+           +q_i*kernel_i(ini,ikx,iky,iz,0)*moments_i(ip0_i,ini,ikx,iky,iz,updatetlevel)
+          ENDDO
         END DO
 
         !!!! Compute electron gyro charge density
         rho_e = 0._dp
         IF (KIN_E) THEN ! Kinetic electrons
           DO ine=1,jmaxe+1
-            rho_e(izs:ize) = rho_e(izs:ize) &
-             +q_e*kernel_e(ine,ikx,iky,izs:ize,0)*moments_e(ip0_e,ine, ikx,iky,izs:ize,updatetlevel)
+            DO iz = izs,ize
+            rho_e(iz) = rho_e(iz) &
+             +q_e*kernel_e(ine,ikx,iky,iz,0)*moments_e(ip0_e,ine,ikx,iky,iz,updatetlevel)
+            ENDDO
           END DO
         ELSE  ! Adiabatic electrons
           ! Adiabatic charge density (linked to flux averaged phi)
           fa_phi = 0._dp
           IF(kyarray(iky).EQ.0._dp) THEN ! take ky=0 mode (average)
             DO ini=1,jmaxi+1 ! some over FLR contributions
-                rho_e(izgs:izge) = Jacobian(izgs:izge,0)*moments_i(ip0_i,ini,ikx,iky,izgs:izge,updatetlevel)&
-                           *kernel_i(ini,ikx,iky,izgs:izge,0)*(inv_poisson_op(ikx,iky,izgs:izge)-1._dp)
-                call simpson_rule_z(rho_e,intf_) ! integrate over z
+                rho_e(izs:ize) = Jacobian(izs:ize,0)*moments_i(ip0_i,ini,ikx,iky,izs:ize,updatetlevel)&
+                           *kernel_i(ini,ikx,iky,izs:ize,0)*(inv_poisson_op(ikx,iky,izs:ize)-1._dp)
+                call simpson_rule_z(rho_e(izs:ize),intf_) ! integrate over z
                 fa_phi = fa_phi + intf_
             ENDDO
             rho_e(izs:ize) = fa_phi*iInt_Jacobian !Normalize by 1/int(Jxyz)dz
@@ -57,12 +61,14 @@ SUBROUTINE poisson
         ENDIF
 
         !!!!!!!!!!!!!!! Inverting the poisson equation !!!!!!!!!!!!!!!!!!!!!!!!!!
-        phi(ikx, iky, izs:ize) =  (rho_e(izs:ize) + rho_i(izs:ize))*inv_poisson_op(ikx,iky,izs:ize)
+        DO iz = izs,ize
+        phi(ikx, iky, iz) =  (rho_e(iz) + rho_i(iz))*inv_poisson_op(ikx,iky,iz)
+        ENDDO
       END DO kyloop
     END DO kxloop
 
     ! Cancel origin singularity
-    IF (contains_kx0 .AND. contains_ky0) phi(ikx_0,iky_0,izs:ize) = 0._dp
+    IF (contains_kx0 .AND. contains_ky0) phi(ikx_0,iky_0,:) = 0._dp
 
   ENDIF
 
diff --git a/src/ppexit.F90 b/src/ppexit.F90
index b6b04cd9..0e557085 100644
--- a/src/ppexit.F90
+++ b/src/ppexit.F90
@@ -9,7 +9,7 @@ SUBROUTINE ppexit
 
 
   CALL finalize_plans
-  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  CALL mpi_finalize(ierr)
+  CALL MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  CALL MPI_FINALIZE(ierr)
 
 END SUBROUTINE ppexit
diff --git a/src/ppinit.F90 b/src/ppinit.F90
index a51d666a..b403fdba 100644
--- a/src/ppinit.F90
+++ b/src/ppinit.F90
@@ -55,18 +55,19 @@ SUBROUTINE ppinit
   CALL MPI_CART_COORDS(comm0,rank_0,ndims,coords,ierr)
 
   !
-  !  Partitions 2-dim cartesian topology of comm0 into 1-dim cartesian subgrids
+  !  Partitions 3-dim cartesian topology of comm0 into 1-dim cartesian subgrids
   !
-  CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.FALSE./), comm_p, ierr)
+  CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.FALSE./),  comm_p, ierr)
   CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.FALSE./), comm_kx, ierr)
   CALL MPI_CART_SUB (comm0, (/.FALSE.,.FALSE.,.TRUE./),  comm_z, ierr)
-  ! Find id inside the sub communicators
+  ! Find id inside the 1d-sub communicators
   CALL MPI_COMM_RANK(comm_p,  rank_p,  ierr)
   CALL MPI_COMM_RANK(comm_kx, rank_kx, ierr)
   CALL MPI_COMM_RANK(comm_z,  rank_z,  ierr)
+
   ! Find neighbours
-  CALL MPI_CART_SHIFT(comm0, 0, 1, nbr_L, nbr_R, ierr)
-  CALL MPI_CART_SHIFT(comm0, 1, 1, nbr_B, nbr_T, ierr)
-  CALL MPI_CART_SHIFT(comm0, 2, 1, nbr_D, nbr_U, ierr)
+  CALL MPI_CART_SHIFT(comm0, 0, 1, nbr_L, nbr_R, ierr) !left   right neighbours
+  CALL MPI_CART_SHIFT(comm0, 1, 1, nbr_B, nbr_T, ierr) !bottom top   neighbours
+  CALL MPI_CART_SHIFT(comm0, 2, 1, nbr_D, nbr_U, ierr) !down   up    neighbours
 
 END SUBROUTINE ppinit
diff --git a/src/processing_mod.F90 b/src/processing_mod.F90
index 06430661..0a4878f4 100644
--- a/src/processing_mod.F90
+++ b/src/processing_mod.F90
@@ -169,39 +169,39 @@ SUBROUTINE compute_nadiab_moments
   implicit none
 
   ! Electron non adiab moments
-  DO ikx = ikxs,ikxe
-  DO iky = ikys,ikye
-  DO iz  = izgs,izge
-  IF(KIN_E) THEN
-  DO ip=ipgs_e,ipge_e
-    IF(parray_e(ip) .EQ. 0) THEN
-      DO ij=ijgs_e,ijge_e
-        nadiab_moments_e(ip,ij,ikx,iky,iz) = moments_e(ip,ij,ikx,iky,iz,updatetlevel) &
-                                  + qe_taue*kernel_e(ij,ikx,iky,iz,0)*phi(ikx,iky,iz)
-      ENDDO
-    ELSE
-      DO ij=ijgs_e,ijge_e
-        nadiab_moments_e(ip,ij,ikx,iky,iz) = moments_e(ip,ij,ikx,iky,iz,updatetlevel)
-      ENDDO
-    ENDIF
-  ENDDO
-  ENDIF
-  ! Ions non adiab moments
-  DO ip=ipgs_i,ipge_i
-    IF(parray_i(ip) .EQ. 0) THEN
-      DO ij=ijgs_i,ijge_i
-        nadiab_moments_i(ip,ij,ikx,iky,iz) = moments_i(ip,ij,ikx,iky,iz,updatetlevel) &
-                                  + qi_taui*kernel_i(ij,ikx,iky,iz,0)*phi(ikx,iky,iz)
-      ENDDO
-    ELSE
-      DO ij=ijgs_i,ijge_i
-        nadiab_moments_i(ip,ij,ikx,iky,iz) = moments_i(ip,ij,ikx,iky,iz,updatetlevel)
+  xloop: DO ikx = ikxs,ikxe
+  yloop: DO iky = ikys,ikye
+  zloop: DO iz  = izgs,izge
+    IF(KIN_E) THEN
+      DO ip=ipgs_e,ipge_e
+        IF(parray_e(ip) .EQ. 0) THEN
+          DO ij=ijgs_e,ijge_e
+            nadiab_moments_e(ip,ij,ikx,iky,iz) = moments_e(ip,ij,ikx,iky,iz,updatetlevel) &
+                                      + qe_taue*kernel_e(ij,ikx,iky,iz,0)*phi(ikx,iky,iz)
+          ENDDO
+        ELSE
+          DO ij=ijgs_e,ijge_e
+            nadiab_moments_e(ip,ij,ikx,iky,iz) = moments_e(ip,ij,ikx,iky,iz,updatetlevel)
+          ENDDO
+        ENDIF
       ENDDO
     ENDIF
-  ENDDO
-  ENDDO
-  ENDDO
-  ENDDO
+    ! Ions non adiab moments
+    DO ip=ipgs_i,ipge_i
+      IF(parray_i(ip) .EQ. 0) THEN
+        DO ij=ijgs_i,ijge_i
+          nadiab_moments_i(ip,ij,ikx,iky,iz) = moments_i(ip,ij,ikx,iky,iz,updatetlevel) &
+                                    + qi_taui*kernel_i(ij,ikx,iky,iz,0)*phi(ikx,iky,iz)
+        ENDDO
+      ELSE
+        DO ij=ijgs_i,ijge_i
+          nadiab_moments_i(ip,ij,ikx,iky,iz) = moments_i(ip,ij,ikx,iky,iz,updatetlevel)
+        ENDDO
+      ENDIF
+    ENDDO
+  ENDDO zloop
+  ENDDO yloop
+  ENDDO xloop
   !
 END SUBROUTINE compute_nadiab_moments
 
@@ -264,7 +264,7 @@ SUBROUTINE compute_uperp
             uperp = 0._dp
             DO ij = ijs_i, ije_i
               uperp = uperp + kernel_i(ij,ikx,iky,iz,0) *&
-               0.5_dp*(nadiab_moments_i(ip0_i,ij,ikx,iky,iz) - nadiab_moments_i(ip0_e,ij-1,ikx,iky,iz))
+               0.5_dp*(nadiab_moments_i(ip0_i,ij,ikx,iky,iz) - nadiab_moments_i(ip0_i,ij-1,ikx,iky,iz))
              ENDDO
             uper_i(ikx,iky,iz) = uperp
           ENDDO
diff --git a/src/srcinfo.h b/src/srcinfo.h
index e4739288..23acbcb4 100644
--- a/src/srcinfo.h
+++ b/src/srcinfo.h
@@ -3,8 +3,8 @@ character(len=40) BRANCH
 character(len=20) AUTHOR
 character(len=40) EXECDATE
 character(len=40) HOST
-parameter (VERSION='aa401ea-dirty')
-parameter (BRANCH='master')
+parameter (VERSION='5e48668-dirty')
+parameter (BRANCH='3D_parallel')
 parameter (AUTHOR='ahoffman')
-parameter (EXECDATE='Tue Apr 5 17:06:15 CEST 2022')
+parameter (EXECDATE='Fri Apr 8 11:33:03 CEST 2022')
 parameter (HOST ='spcpc606')
diff --git a/src/srcinfo/srcinfo.h b/src/srcinfo/srcinfo.h
index e4739288..23acbcb4 100644
--- a/src/srcinfo/srcinfo.h
+++ b/src/srcinfo/srcinfo.h
@@ -3,8 +3,8 @@ character(len=40) BRANCH
 character(len=20) AUTHOR
 character(len=40) EXECDATE
 character(len=40) HOST
-parameter (VERSION='aa401ea-dirty')
-parameter (BRANCH='master')
+parameter (VERSION='5e48668-dirty')
+parameter (BRANCH='3D_parallel')
 parameter (AUTHOR='ahoffman')
-parameter (EXECDATE='Tue Apr 5 17:06:15 CEST 2022')
+parameter (EXECDATE='Fri Apr 8 11:33:03 CEST 2022')
 parameter (HOST ='spcpc606')
diff --git a/src/stepon.F90 b/src/stepon.F90
index 5aef1d2e..c518a430 100644
--- a/src/stepon.F90
+++ b/src/stepon.F90
@@ -23,7 +23,7 @@ SUBROUTINE stepon
    !----- BEFORE: All fields are updated for step = n
       ! Compute right hand side from current fields
       ! N_rhs(N_n, nadia_n, phi_n, S_n, Tcoll_n)
-      CALL compute_RHS
+      CALL assemble_RHS
 
       ! ---- step n -> n+1 transition
       ! Advance from updatetlevel to updatetlevel+1 (according to num. scheme)
@@ -57,10 +57,10 @@ SUBROUTINE stepon
 
    CONTAINS
      !!!! Basic structure to simplify stepon
-     SUBROUTINE compute_RHS
-       USE moments_eq_rhs, ONLY: moments_eq_rhs_e,moments_eq_rhs_i
-       USE collision, ONLY: compute_TColl
-       USE nonlinear, ONLY: compute_Sapj
+     SUBROUTINE assemble_RHS
+       USE moments_eq_rhs, ONLY: compute_moments_eq_rhs
+       USE collision,      ONLY: compute_TColl
+       USE nonlinear,      ONLY: compute_Sapj
        IMPLICIT NONE
          ! compute auxiliary non adiabatic moments arrays
          CALL compute_nadiab_moments
@@ -68,11 +68,9 @@ SUBROUTINE stepon
          CALL compute_Sapj
          ! compute collision term ("if coll, if nu >0" is included inside)
          CALL compute_TColl
-         ! compute the moments equation rhs for electrons and ions
-         IF (KIN_E) &
-         CALL moments_eq_rhs_e
-         CALL moments_eq_rhs_i
-     END SUBROUTINE compute_RHS
+         ! compute the moments equation rhs
+         CALL compute_moments_eq_rhs
+     END SUBROUTINE assemble_RHS
 
       SUBROUTINE checkfield_all ! Check all the fields for inf or nan
         ! Execution time start
@@ -85,14 +83,14 @@ SUBROUTINE stepon
         IF(.NOT.nlend) THEN
            mlend=mlend .or. checkfield(phi,' phi')
            IF(KIN_E) THEN
-           DO ip=ips_e,ipe_e
-             DO ij=ijs_e,ije_e
+           DO ip=ipgs_e,ipge_e
+             DO ij=ijgs_e,ijge_e
               mlend=mlend .or. checkfield(moments_e(ip,ij,:,:,:,updatetlevel),' moments_e')
              ENDDO
            ENDDO
            ENDIF
-           DO ip=ips_i,ipe_i
-             DO ij=ijs_i,ije_i
+           DO ip=ipgs_i,ipge_i
+             DO ij=ijgs_i,ijge_i
               mlend=mlend .or. checkfield(moments_i(ip,ij,:,:,:,updatetlevel),' moments_i')
              ENDDO
            ENDDO
@@ -106,11 +104,11 @@ SUBROUTINE stepon
 
       SUBROUTINE anti_aliasing
         IF(KIN_E)THEN
-        DO ip=ips_e,ipe_e
-          DO ij=ijs_e,ije_e
+        DO ip=ipgs_e,ipge_e
+          DO ij=ijgs_e,ijge_e
             DO ikx=ikxs,ikxe
               DO iky=ikys,ikye
-                DO iz=izs,ize
+                DO iz=izgs,izge
                   moments_e( ip,ij,ikx,iky,iz,:) = AA_x(ikx)* AA_y(iky) * moments_e( ip,ij,ikx,iky,iz,:)
                 END DO
               END DO
@@ -118,11 +116,11 @@ SUBROUTINE stepon
           END DO
         END DO
         ENDIF
-        DO ip=ips_i,ipe_i
+        DO ip=ipgs_i,ipge_i
           DO ij=ijs_i,ije_i
             DO ikx=ikxs,ikxe
               DO iky=ikys,ikye
-                DO iz=izs,ize
+                DO iz=izgs,izge
                   moments_i( ip,ij,ikx,iky,iz,:) = AA_x(ikx)* AA_y(iky) * moments_i( ip,ij,ikx,iky,iz,:)
                 END DO
               END DO
@@ -135,8 +133,8 @@ SUBROUTINE stepon
         IF ( contains_kx0 ) THEN
           ! Electron moments
           IF(KIN_E) THEN
-          DO ip=ips_e,ipe_e
-            DO ij=ijs_e,ije_e
+          DO ip=ipgs_e,ipge_e
+            DO ij=ijgs_e,ijge_e
               DO iz=izs,ize
                 DO iky=2,Nky/2 !symmetry at kx = 0
                   moments_e( ip,ij,ikx_0,iky,iz, :) = CONJG(moments_e( ip,ij,ikx_0,Nky+2-iky,iz, :))
@@ -148,8 +146,8 @@ SUBROUTINE stepon
           END DO
           ENDIF
           ! Ion moments
-          DO ip=ips_i,ipe_i
-            DO ij=ijs_i,ije_i
+          DO ip=ipgs_i,ipge_i
+            DO ij=ijgs_i,ijge_i
               DO iz=izs,ize
                 DO iky=2,Nky/2 !symmetry at kx = 0
                   moments_i( ip,ij,ikx_0,iky,iz, :) = CONJG(moments_i( ip,ij,ikx_0,Nky+2-iky,iz, :))
@@ -161,10 +159,10 @@ SUBROUTINE stepon
           END DO
           ! Phi
           DO iky=2,Nky/2 !symmetry at kx = 0
-            phi(ikx_0,iky,:) = phi(ikx_0,Nky+2-iky,:)
+            phi(ikx_0,iky,izs:ize) = phi(ikx_0,Nky+2-iky,izs:ize)
           END DO
           ! must be real at origin
-          phi(ikx_0,iky_0,:) = REAL(phi(ikx_0,iky_0,:))
+          phi(ikx_0,iky_0,izs:ize) = REAL(phi(ikx_0,iky_0,izs:ize))
         ENDIF
       END SUBROUTINE enforce_symmetry
 
diff --git a/src/utility_mod.F90 b/src/utility_mod.F90
index 577a130f..bd3ab13b 100644
--- a/src/utility_mod.F90
+++ b/src/utility_mod.F90
@@ -4,7 +4,7 @@ MODULE utility
 
   use prec_const
   IMPLICIT NONE
-  PUBLIC :: manual_2D_bcast, manual_3D_bcast
+  PUBLIC :: manual_0D_bcast, manual_2D_bcast, manual_3D_bcast
 
 CONTAINS
 
@@ -151,4 +151,36 @@ SUBROUTINE manual_3D_bcast(field_)
   ENDIF
 END SUBROUTINE manual_3D_bcast
 
+!!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!!
+SUBROUTINE manual_0D_bcast(v)
+USE grid
+IMPLICIT NONE
+COMPLEX(dp), INTENT(INOUT) :: v
+COMPLEX(dp) :: buffer
+INTEGER     :: i_, root, world_rank, world_size, count
+root = 0;
+count = 1;
+
+CALL MPI_COMM_RANK(comm_z,world_rank,ierr)
+CALL MPI_COMM_SIZE(comm_z,world_size,ierr)
+IF (world_size .GT. 1) THEN
+  !! Broadcast phi to the other processes on the same k range (communicator along p)
+  IF (world_rank .EQ. root) THEN
+    ! Fill the buffer
+    buffer = v
+    ! Send it to all the other processes
+    DO i_ = 0,num_procs_z-1
+      IF (i_ .NE. world_rank) &
+      CALL MPI_SEND(buffer, count, MPI_DOUBLE_COMPLEX, i_, 0, comm_z, ierr)
+    ENDDO
+  ELSE
+    ! Recieve buffer from root
+    CALL MPI_RECV(buffer, count, MPI_DOUBLE_COMPLEX, root, 0, comm_z, MPI_STATUS_IGNORE, ierr)
+    ! Write it in phi
+    v = buffer
+  ENDIF
+ENDIF
+END SUBROUTINE manual_0D_bcast
+
+
 END MODULE utility
diff --git a/wk/analysis_3D.m b/wk/analysis_3D.m
index 7c194520..a6f008af 100644
--- a/wk/analysis_3D.m
+++ b/wk/analysis_3D.m
@@ -51,7 +51,7 @@ options.PLAN      = 'yz';
 % options.PLAN      = 'sx';
 options.COMP      = 1;
 % options.TIME      = dat.Ts5D;
-options.TIME      = 0:0.5:400;
+options.TIME      = 0:0.5:100;
 data.EPS          = 0.1;
 data.a = data.EPS * 2000;
 create_film(data,options,'.gif')
diff --git a/wk/analysis_header.m b/wk/analysis_header.m
index 18b07606..a2017024 100644
--- a/wk/analysis_header.m
+++ b/wk/analysis_header.m
@@ -5,6 +5,4 @@ outfile ='linear_shearless_cyclone/dbg';
 % outfile ='debug/test_zpar/';
 % outfile ='shearless_cyclone/128x128x16x4x2_L_120_CTC_1.0_colless/';
 % outfile ='shearless_cyclone/128x128x16x4x2_L_120_CTC_0.8/';
-
-
 analysis_3D
\ No newline at end of file
-- 
GitLab