diff --git a/src/advance_field_mod.F90 b/src/advance_field_mod.F90
index 5bed8086f17341e4da2537a7b3b7e8f1e70a0bac..1818f8fb8e40bc4ac60ff58af2eb91fe807d1cd6 100644
--- a/src/advance_field_mod.F90
+++ b/src/advance_field_mod.F90
@@ -6,10 +6,7 @@ implicit none
 CONTAINS
 
   SUBROUTINE advance_time_level
-
-    USE basic
-    USE time_integration
-    use prec_const
+    USE time_integration, ONLY :set_updatetlevel, updatetlevel, ntimelevel
     IMPLICIT NONE
     CALL set_updatetlevel(mod(updatetlevel,ntimelevel)+1)
   END SUBROUTINE advance_time_level
@@ -38,9 +35,8 @@ CONTAINS
       DO ip    =1,local_np
         ipi = ip+ngp/2
       DO ia    =1,local_na
-        IF((CLOS .NE. 1) .OR. (parray(ipi)+2*jarray(iji) .LE. dmax))&
         moments(ia,ipi,iji,iky,ikx,izi,1) = moments(ia,ipi,iji,iky,ikx,izi,1) &
-                + dt*b_E(istage)*moments_rhs(ia,ip,ij,iky,ikx,iz,istage)
+               + dt*b_E(istage)*moments_rhs(ia,ip,ij,iky,ikx,iz,istage)
       END DO
       END DO
       END DO
diff --git a/src/calculus_mod.F90 b/src/calculus_mod.F90
index ac649cf1e74a7c9b2f774dda39f4b1a2efb7947d..f36ce8c20c7b8f56f9091ac0504e088678448a92 100644
--- a/src/calculus_mod.F90
+++ b/src/calculus_mod.F90
@@ -171,49 +171,47 @@ CONTAINS
 END SUBROUTINE interp_z
 
 SUBROUTINE simpson_rule_z(local_nz,dz,f,intf)
- ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
- !from molix BJ Frei
- USE prec_const, ONLY: dp, onethird
- USE parallel,   ONLY: num_procs_z, rank_z, comm_z, manual_0D_bcast
- USE mpi
- implicit none
- INTEGER, INTENT(IN) :: local_nz
- REAL(dp),INTENT(IN) :: dz
- complex(dp),dimension(local_nz), intent(in) :: f
- COMPLEX(dp), intent(out) :: intf
- COMPLEX(dp)              :: buffer, local_int
- INTEGER :: root, i_, iz, ierr
-
- ! Buil local sum using the weights of composite Simpson's rule
- local_int = 0._dp
- DO iz = 1,local_nz
-   IF(MODULO(iz,2) .EQ. 1) THEN ! odd iz
-     local_int = local_int + 2._dp*onethird*dz*f(iz)
-   ELSE ! even iz
-     local_int = local_int + 4._dp*onethird*dz*f(iz)
-   ENDIF
- ENDDO
- buffer = local_int
- root = 0
- !Gather manually among the rank_z=0 processes and perform the sum
- intf = 0._dp
- IF (num_procs_z .GT. 1) THEN
-     !! Everyone sends its local_sum to root = 0
-     IF (rank_z .NE. root) THEN
-         CALL MPI_SEND(buffer, 1 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
-     ELSE
-         ! Recieve from all the other processes
-         DO i_ = 0,num_procs_z-1
-             IF (i_ .NE. rank_z) &
-                 CALL MPI_RECV(buffer, 1 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
-                 intf = intf + buffer
-         ENDDO
-     ENDIF
-     CALL manual_0D_bcast(intf)
- ELSE
-   intf = local_int
- ENDIF
-
+  ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
+  !from molix BJ Frei
+  USE prec_const, ONLY: dp, onethird
+  USE parallel,   ONLY: num_procs_z, rank_z, comm_z, manual_0D_bcast
+  USE mpi
+  implicit none
+  INTEGER, INTENT(IN) :: local_nz
+  REAL(dp),INTENT(IN) :: dz
+  complex(dp),dimension(local_nz), intent(in) :: f
+  COMPLEX(dp), intent(out) :: intf
+  COMPLEX(dp)              :: buffer, local_int
+  INTEGER :: root, i_, iz, ierr
+  ! Buil local sum using the weights of composite Simpson's rule
+  local_int = 0._dp
+  DO iz = 1,local_nz
+    IF(MODULO(iz,2) .EQ. 1) THEN ! odd iz
+      local_int = local_int + 2._dp*onethird*dz*f(iz)
+    ELSE ! even iz
+      local_int = local_int + 4._dp*onethird*dz*f(iz)
+    ENDIF
+  ENDDO
+  buffer = local_int
+  root = 0
+  !Gather manually among the rank_z=0 processes and perform the sum
+  intf = 0._dp
+  IF (num_procs_z .GT. 1) THEN
+      !! Everyone sends its local_sum to root = 0
+      IF (rank_z .NE. root) THEN
+          CALL MPI_SEND(buffer, 1 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
+      ELSE
+          ! Recieve from all the other processes
+          DO i_ = 0,num_procs_z-1
+              IF (i_ .NE. rank_z) &
+                  CALL MPI_RECV(buffer, 1 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
+                  intf = intf + buffer
+          ENDDO
+      ENDIF
+      CALL manual_0D_bcast(intf)
+  ELSE
+    intf = local_int
+  ENDIF
 END SUBROUTINE simpson_rule_z
 
 END MODULE calculus
diff --git a/src/collision_mod.F90 b/src/collision_mod.F90
index 702defea94904d65440698b9f98ebfdc97cf23cf..b4ed42cbd375090d1fd5bd0b899d9edc79decb79 100644
--- a/src/collision_mod.F90
+++ b/src/collision_mod.F90
@@ -98,7 +98,7 @@ CONTAINS
         CASE ('DG')
           CALL compute_dougherty
         CASE ('SG','LR','LD')
-          CALL compute_cosolver_coll(GK_CO,INTERSPECIES)
+          CALL compute_cosolver_coll(GK_CO)
         CASE ('none','hypcoll','dvpar4')
           Capj = 0._dp
         CASE DEFAULT
@@ -171,9 +171,9 @@ CONTAINS
   !! Doughtery drift-kinetic collision operator (species like)
   !******************************************************************************!
   SUBROUTINE Dougherty_DK
-    USE grid, ONLY: ias,iae, ips,ipe, ijs,ije, parray, jarray, &
-                    ip0, ip1, ip2, &
-                    ikys,ikye, ikxs,ikxe, izs,ize
+    USE grid, ONLY: local_na, local_np, local_nj, parray, jarray, ngp, ngj, &
+                    ip0, ip1, ip2, ij0, ij1, &
+                    local_nky, local_nkx, local_nz, ngz
     USE species,          ONLY: nu_ab
     USE time_integration, ONLY: updatetlevel
     USE array,            ONLY: Capj
@@ -181,37 +181,46 @@ CONTAINS
     USE prec_const,       ONLY: dp, SQRT2, twothird
     IMPLICIT NONE
     COMPLEX(dp) :: Tmp
-    INTEGER     :: iz,ikx,iky,ij,ip,ia
+    INTEGER     :: iz,ikx,iky,ij,ip,ia, ipi,iji,izi
     REAL(dp)    :: j_dp, p_dp
-    DO ia = ias,iae
-    DO iz = izs,ize; DO iky = ikys,ikye; DO ikx = ikxs,ikxe
-    DO ij = ijs,ije; DO ip = ips,ipe
+    DO iz = 1,local_nz
+      izi = iz + ngz/2
+      DO ikx = 1,local_nkx
+        DO iky = 1,local_nky 
+          DO ij = 1,local_nj
+            iji = ij + ngj/2 
+            DO ip = 1,local_np
+              ipi = ip + ngp/2
+              DO ia = 1,local_na
                 !** Auxiliary variables **
-                p_dp      = REAL(parray(ip),dp)
-                j_dp      = REAL(jarray(ij),dp)
+                p_dp      = REAL(parray(ipi),dp)
+                j_dp      = REAL(jarray(iji),dp)
                 !** Assembling collison operator **
-                Tmp = -(p_dp + 2._dp*j_dp)*moments(ia,ip,ij,iky,ikx,iz,updatetlevel)
+                Tmp = -(p_dp + 2._dp*j_dp)*moments(ia,ipi,iji,iky,ikx,izi,updatetlevel)
                 IF( (p_dp .EQ. 1._dp) .AND. (j_dp .EQ. 0._dp)) THEN !Ce10
-                  Tmp = Tmp +  moments(ia,ip1,1,iky,ikx,iz,updatetlevel)
+                  Tmp = Tmp +  moments(ia,ip1,ij1,iky,ikx,iz,updatetlevel)
                 ELSEIF( (p_dp .EQ. 2._dp) .AND. (j_dp .EQ. 0._dp)) THEN ! Ce20
-                  Tmp = Tmp +       twothird*moments(ia,ip2,1,iky,ikx,iz,updatetlevel) &
-                            - SQRT2*twothird*moments(ia,ip0,2,iky,ikx,iz,updatetlevel)
+                  Tmp = Tmp +       twothird*moments(ia,ip2,ij0,iky,ikx,izi,updatetlevel) &
+                            - SQRT2*twothird*moments(ia,ip0,ij1,iky,ikx,izi,updatetlevel)
                 ELSEIF( (p_dp .EQ. 0._dp) .AND. (j_dp .EQ. 1._dp)) THEN ! Ce01
-                  Tmp = Tmp +  2._dp*twothird*moments(ia,ip0,2,iky,ikx,iz,updatetlevel) &
-                             - SQRT2*twothird*moments(ia,ip2,1,iky,ikx,iz,updatetlevel)
+                  Tmp = Tmp +  2._dp*twothird*moments(ia,ip0,ij1,iky,ikx,izi,updatetlevel) &
+                              -SQRT2*twothird*moments(ia,ip2,ij0,iky,ikx,izi,updatetlevel)
                 ENDIF
                 Capj(ia,ip,ij,iky,ikx,iz) = nu_ab(ia,ia) * Tmp
-    ENDDO;ENDDO
-    ENDDO;ENDDO;ENDDO
+              ENDDO
+            ENDDO
+          ENDDO
+        ENDDO
+      ENDDO
     ENDDO
   END SUBROUTINE Dougherty_DK
   !******************************************************************************!
   !! Doughtery gyrokinetic collision operator (species like)
   !******************************************************************************!
   SUBROUTINE Dougherty_GK
-    USE grid, ONLY: ias,iae, ips,ipe, ijs,ije, parray, jarray, &
-                    ip0, ip1, ip2, jmax, &
-                    ikys,ikye, ikxs,ikxe, izs,ize, kparray
+    USE grid, ONLY: local_na, local_np, local_nj, parray, jarray, ngp, ngj, &
+                    ip0, ip1, ip2, &
+                    local_nky, local_nkx, local_nz, ngz, kparray, nzgrid
     USE species,          ONLY: sigma2_tau_o2, sqrt_sigma2_tau_o2, nu_ab
     USE array,            ONLY: Capj, nadiab_moments, kernel
     USE prec_const,       ONLY: dp, SQRT2, twothird
@@ -220,61 +229,70 @@ CONTAINS
     COMPLEX(dp) :: dens_,upar_,uperp_,Tpar_,Tperp_,Temp_
     COMPLEX(dp) :: nadiab_moment_0j, Tmp
     REAL(dp)    :: Knp0, Knp1, Knm1, kp
-    INTEGER     :: iz,ikx,iky,ij,ip,ia,eo,in
     REAL(dp)    :: n_dp, j_dp, p_dp, ba, ba_2
-    DO ia = ias,iae
-    DO iz = izs,ize; DO iky = ikys, ikye; DO ikx = ikxs, ikxe;
-    DO ij = ijs,ije; DO ip = ips,ipe;
+    INTEGER     :: iz,ikx,iky,ij,ip,ia,eo,in, ipi,iji,izi,ini
+    DO iz = 1,local_nz
+      izi = iz + ngz/2
+      DO ikx = 1,local_nkx
+        DO iky = 1,local_nky 
+          DO ij = 1,local_nj
+            iji = ij + ngj/2 
+            DO ip = 1,local_np
+              ipi = ip + ngp/2
+              DO ia = 1,local_na
     !** Auxiliary variables **
-    p_dp      = REAL(parray(ip),dp)
-    j_dp      = REAL(jarray(ij),dp)
-    eo        = MODULO(parray(ip),2)
-    kp        = kparray(iky,ikx,iz,eo)
+    p_dp      = REAL(parray(ipi),dp)
+    j_dp      = REAL(jarray(iji),dp)
+    eo        = MIN(nzgrid,MODULO(parray(ipi),2)+1)
+    kp        = kparray(iky,ikx,izi,eo)
     ba_2      = kp**2 * sigma2_tau_o2(ia) ! this is (l_a/2)^2
     ba        = 2_dp*kp * sqrt_sigma2_tau_o2(ia)  ! this is l_a
     !** Assembling collison operator **
     ! Velocity-space diffusion (similar to Lenard Bernstein)
     ! -nu (p + 2j + b^2/2) Napj
-    Tmp = -(p_dp + 2._dp*j_dp + 2._dp*ba_2)*nadiab_moments(ia,ip,ij,iky,ikx,iz)
+    Tmp = -(p_dp + 2._dp*j_dp + 2._dp*ba_2)*nadiab_moments(ia,ipi,iji,iky,ikx,izi)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 0 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     IF( p_dp .EQ. 0 ) THEN ! Kronecker p0
         !** build required fluid moments **
         dens_  = 0._dp
         upar_  = 0._dp; uperp_ = 0._dp
         Tpar_  = 0._dp; Tperp_ = 0._dp
-        DO in = 1,jmax+1
-          n_dp = REAL(in-1,dp)
+        DO in = 1,local_nj
+          ini = in + ngj/2
+          n_dp = REAL(jarray(ini),dp)
           ! Store the kernels for sparing readings
-          Knp0 =  kernel(ia,in  ,iky,ikx,iz,eo)
-          Knp1 =  kernel(ia,in+1,iky,ikx,iz,eo)
-          Knm1 =  kernel(ia,in-1,iky,ikx,iz,eo)
+          Knp0 =  kernel(ia,ini  ,iky,ikx,izi,eo)
+          Knp1 =  kernel(ia,ini+1,iky,ikx,izi,eo)
+          Knm1 =  kernel(ia,ini-1,iky,ikx,izi,eo)
           ! Nonadiabatic moments (only different from moments when p=0)
-          nadiab_moment_0j   = nadiab_moments(ia,ip0,in,iky,ikx,iz)
+          nadiab_moment_0j   = nadiab_moments(ia,ip0,ini,iky,ikx,izi)
           ! Density
           dens_  = dens_  + Knp0 * nadiab_moment_0j
           ! Perpendicular velocity
           uperp_ = uperp_ + ba*0.5_dp*(Knp0 - Knm1) * nadiab_moment_0j
           ! Parallel temperature
-          Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,in,iky,ikx,iz) + nadiab_moment_0j)
+          Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,ini,iky,ikx,izi) + nadiab_moment_0j)
           ! Perpendicular temperature
           Tperp_ = Tperp_ + ((2._dp*n_dp+1._dp)*Knp0 - (n_dp+1._dp)*Knp1 - n_dp*Knm1)*nadiab_moment_0j
         ENDDO
       Temp_  = (Tpar_ + 2._dp*Tperp_)/3._dp - dens_
       ! Add energy restoring term
-      Tmp = Tmp + Temp_* 4._dp *  j_dp          * kernel(ia,ij  ,iky,ikx,iz,eo)
-      Tmp = Tmp - Temp_* 2._dp * (j_dp + 1._dp) * kernel(ia,ij+1,iky,ikx,iz,eo)
-      Tmp = Tmp - Temp_* 2._dp *  j_dp          * kernel(ia,ij-1,iky,ikx,iz,eo)
-      Tmp = Tmp + uperp_*ba* (kernel(ia,ij,iky,ikx,iz,eo) - kernel(ia,ij-1,iky,ikx,iz,eo))
+      Tmp = Tmp + Temp_* 4._dp *  j_dp          * kernel(ia,iji  ,iky,ikx,izi,eo)
+      Tmp = Tmp - Temp_* 2._dp * (j_dp + 1._dp) * kernel(ia,iji+1,iky,ikx,izi,eo)
+      Tmp = Tmp - Temp_* 2._dp *  j_dp          * kernel(ia,iji-1,iky,ikx,izi,eo)
+      Tmp = Tmp + uperp_*ba* (kernel(ia,iji,iky,ikx,izi,eo) - kernel(ia,iji-1,iky,ikx,izi,eo))
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ELSEIF( p_dp .eq. 1 ) THEN ! kronecker p1
       !** build required fluid moments **
       upar_  = 0._dp
-      DO in = 1,jmax+1
+      DO in = 1,local_nj
+        ini = in + ngj/2
+        n_dp = REAL(jarray(ini),dp)
         ! Parallel velocity
-        upar_  = upar_  + kernel(ia,in,iky,ikx,iz,eo) * nadiab_moments(ia,ip1,in,iky,ikx,iz)
+        upar_  = upar_  + kernel(ia,ini,iky,ikx,izi,eo) * nadiab_moments(ia,ip1,ini,iky,ikx,izi)
       ENDDO
-      Tmp = Tmp + upar_*kernel(ia,ij,iky,ikx,iz,eo)
+      Tmp = Tmp + upar_*kernel(ia,iji,iky,ikx,izi,eo)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ELSEIF( p_dp .eq. 2 ) THEN ! kronecker p2
@@ -282,23 +300,24 @@ CONTAINS
       dens_  = 0._dp
       upar_  = 0._dp; uperp_ = 0._dp
       Tpar_  = 0._dp; Tperp_ = 0._dp
-      DO in = 1,jmax+1
-        n_dp = REAL(in-1,dp)
+      DO in = 1,local_nj
+        ini = in + ngj/2
+        n_dp = REAL(jarray(ini),dp)
         ! Store the kernels for sparing readings
-        Knp0 =  kernel(ia,in  ,iky,ikx,iz,eo)
-        Knp1 =  kernel(ia,in+1,iky,ikx,iz,eo)
-        Knm1 =  kernel(ia,in-1,iky,ikx,iz,eo)
+        Knp0 =  kernel(ia,ini  ,iky,ikx,izi,eo)
+        Knp1 =  kernel(ia,ini+1,iky,ikx,izi,eo)
+        Knm1 =  kernel(ia,ini-1,iky,ikx,izi,eo)
         ! Nonadiabatic moments (only different from moments when p=0)
-        nadiab_moment_0j = nadiab_moments(ia,ip0,in,iky,ikx,iz)
+        nadiab_moment_0j = nadiab_moments(ia,ip0,ini,iky,ikx,izi)
         ! Density
         dens_     = dens_     + Knp0 * nadiab_moment_0j
         ! Parallel temperature
-        Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,in,iky,ikx,iz) + nadiab_moment_0j)
+        Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,ini,iky,ikx,izi) + nadiab_moment_0j)
         ! Perpendicular temperature
         Tperp_ = Tperp_ + ((2._dp*n_dp+1._dp)*Knp0 - (n_dp+1._dp)*Knp1 - n_dp*Knm1)*nadiab_moment_0j
       ENDDO
       Temp_  = (Tpar_ + 2._dp*Tperp_)/3._dp - dens_
-      Tmp = Tmp + Temp_*SQRT2*kernel(ia,ij,iky,ikx,iz,eo)
+      Tmp = Tmp + Temp_*SQRT2*kernel(ia,iji,iky,ikx,izi,eo)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ENDIF
     ! Multiply by collision parameter
diff --git a/src/control.F90 b/src/control.F90
index df0bee295cc4b78935b6d53d0ef7b0a391dc8798..20b0239b72e6b12894dbfc72f56515b7cb9b5021 100644
--- a/src/control.F90
+++ b/src/control.F90
@@ -18,13 +18,13 @@ SUBROUTINE control
 
 
   CALL daytim('Start at ')
-
+  
   !                   1.2     Define data specific to run
   CALL speak( 'Load basic data...')
   CALL basic_data
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   CALL speak('...basic data loaded.')
-
+  
   !                   1.3   Read input parameters from input file
   CALL speak('Read input parameters...')
   CALL readinputs
@@ -36,13 +36,13 @@ SUBROUTINE control
   CALL auxval
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   CALL speak('...auxval calculated')
-
+  
   !                   1.5     Initial conditions
   CALL speak( 'Create initial state...')
   CALL inital
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   CALL speak('...initial state created')
-
+  
   !                   1.6     Initial diagnostics
   CALL speak( 'Initial diagnostics...')
   CALL cpu_time(t_init_diag_0) ! Measure the time of the init diag
diff --git a/src/cosolver_interface_mod.F90 b/src/cosolver_interface_mod.F90
index 6d8de067ca8acd4efa1ee04fc5a12c5ac2962dbc..a96bceaf804cf47d47b88fceb92a29f952107251 100644
--- a/src/cosolver_interface_mod.F90
+++ b/src/cosolver_interface_mod.F90
@@ -9,31 +9,34 @@ CONTAINS
   !******************************************************************************!
   !! compute the collision terms in a (Np x Nj x Nkx x Nky) matrix all at once
   !******************************************************************************!
-  SUBROUTINE compute_cosolver_coll(GK_CO,INTERSPECIES)
+  SUBROUTINE compute_cosolver_coll(GK_CO)
     USE parallel,    ONLY: num_procs_p, comm_p,dsp_p,rcv_p
     USE grid,        ONLY: &
-      ias,iae, &
-      ips,ipe,parray,&
-      local_np,total_np,&
-      ijs,ije,jarray,jmax, dmax,&
-      ikxs,ikxe,ikys,ikye,izs,ize, bar
-    USE array,       ONLY: Capj, Caa, Cab_T, Cab_F, nadiab_moments
+      local_na, &
+      local_np, total_np, parray, ngp,&
+      total_nj,jarray, ngj,&
+      local_nkx, local_nky, local_nz, ngz, bar
+    USE array,       ONLY: Capj
     USE MPI
-    USE model,       ONLY: Na, CLOS
-    USE species,     ONLY: nu_ab
     IMPLICIT NONE
-    LOGICAL, INTENT(IN) :: GK_CO, INTERSPECIES
-    COMPLEX(dp), DIMENSION(1:total_np)   :: local_sum, buffer
-    COMPLEX(dp), DIMENSION(ips:ipe)      :: TColl_distr
+    LOGICAL, INTENT(IN) :: GK_CO
+    COMPLEX(dp), DIMENSION(total_np)    :: local_sum, buffer
+    COMPLEX(dp), DIMENSION(local_np)    :: TColl_distr
     COMPLEX(dp) :: Tmp_
-    INTEGER :: iz,ikx,iky,ij,ip,ia,ib,iq,il,ikx_C,iky_C,iz_C
-    INTEGER :: p_int,q_int,j_int,l_int, ierr
-    z:DO iz = izs,ize
-      x:DO ikx = ikxs,ikxe
-        y:DO iky = ikys,ikye
-          a:DO ia = ias,iae
-            j:DO ij = 1,Jmax+1
+    INTEGER :: iz,ikx,iky,ij,ip,ia,ikx_C,iky_C,iz_C
+    INTEGER :: p_int,j_int, ierr
+    INTEGER :: ipi, iji, izi
+    z:DO iz = 1,local_nz
+    izi = iz+ngz/2
+      x:DO ikx = 1,local_nkx
+        y:DO iky = 1,local_nky
+          a:DO ia = 1,local_na
+            j:DO ij = 1,total_nj
+            iji   = ij+ngj/2
+            j_int = jarray(iji)
               p:DO ip = 1,total_np
+              ipi   = ip + ngp/2
+              p_int = parray(ipi)
                 !! Take GK or DK limit
                 IF (GK_CO) THEN ! GK operator (k-dependant)
                   ikx_C = ikx; iky_C = iky; iz_C = iz;
@@ -41,29 +44,7 @@ CONTAINS
                   ikx_C = 1;   iky_C = 1; iz_C = 1;
                 ENDIF
                 !! Apply the cosolver collision matrix
-                Tmp_ = 0._dp ! Initialization
-                ! self interaction
-                Tmp_ = Tmp_ + nadiab_moments(ia,iq,il,iky,ikx,iz) &
-                       * nu_ab(ia,ia)*Caa(ia,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
-                ! sum the contribution over the other species
-                IF (INTERSPECIES) THEN
-                  b:DO ib = 1,Na
-                    q:DO iq = ips,ipe
-                      q_int = parray(iq)
-                      l:DO il = ijs,ije
-                        l_int = jarray(il)
-                        IF((CLOS .NE. 1) .OR. (q_int+2*l_int .LE. dmax)) THEN
-                          ! Test contribution
-                          Tmp_ = Tmp_ + nadiab_moments(ia,iq,il,iky,ikx,iz) &
-                              * nu_ab(ia,ib) * Cab_T(ia,ib,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
-                          ! Field contribution
-                          Tmp_ = Tmp_ + nadiab_moments(ib,iq,il,iky,ikx,iz) &
-                              * nu_ab(ia,ib) * Cab_F(ia,ib,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
-                        ENDIF
-                      ENDDO l
-                    ENDDO q
-                  ENDDO b
-                ENDIF
+                CALL apply_cosolver_mat(ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C,Tmp_)
                 local_sum(ip) = Tmp_
               ENDDO p
               IF (num_procs_p .GT. 1) THEN
@@ -78,7 +59,7 @@ CONTAINS
                 TColl_distr = local_sum
               ENDIF
               ! Write in output variable
-              DO ip = ips,ipe
+              DO ip = 1,local_np
                 Capj(ia,ip,ij,iky,ikx,iz) = TColl_distr(ip)
               ENDDO
             ENDDO j
@@ -88,17 +69,65 @@ CONTAINS
     ENDDO z
   END SUBROUTINE compute_cosolver_coll
 
+    !******************************************************************************!
+  !! compute the collision terms in a (Np x Nj x Nkx x Nky) matrix all at once
+  !******************************************************************************!
+  SUBROUTINE apply_cosolver_mat(ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C,local_sum)
+    USE grid,        ONLY: &
+      local_na, &
+      local_np, parray, ngp,&
+      total_nj, jarray, dmax, ngj, bar, ngz
+    USE array,       ONLY: Caa, Cab_T, Cab_F, nadiab_moments
+    USE model,       ONLY: CLOS
+    USE species,     ONLY: nu_ab
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C
+    COMPLEX(dp), INTENT(OUT) :: local_sum
+    INTEGER :: ib,iq,il
+    INTEGER :: p_int,q_int,j_int,l_int
+    INTEGER :: ipi, iji, izi, iqi, ili
+    izi = iz+ngz/2
+    iji   = ij+ngj/2
+    j_int = jarray(iji)
+    ipi   = ip + ngp/2
+    p_int = parray(ipi)
+    !! Apply the cosolver collision matrix
+    local_sum = 0._dp ! Initialization
+    q:DO iq = 1,local_np
+      iqi   = iq + ngp/2
+      q_int = parray(iqi)
+      l:DO il = 1,total_nj
+        ili   = il + ngj/2
+        l_int = jarray(ili)
+        ! self interaction
+        local_sum = local_sum + nadiab_moments(ia,iqi,ili,iky,ikx,izi) &
+              * nu_ab(ia,ia)*Caa(ia,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
+        ! sum the contribution over the other species
+        b:DO ib = 1,local_na
+          IF((CLOS .NE. 1) .OR. (q_int+2*l_int .LE. dmax)) THEN
+            ! Test contribution
+            local_sum = local_sum + nadiab_moments(ia,iqi,ili,iky,ikx,izi) &
+                * nu_ab(ia,ib) * Cab_T(ia,ib,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
+            ! Field contribution
+            local_sum = local_sum + nadiab_moments(ib,iqi,ili,iky,ikx,izi) &
+                * nu_ab(ia,ib) * Cab_F(ia,ib,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
+          ENDIF
+        ENDDO b
+      ENDDO l
+    ENDDO q
+  END SUBROUTINE apply_cosolver_mat
+
     !******************************************************************************!
     !!!!!!! Load the collision matrix coefficient table from COSOlver results
     !******************************************************************************!
     SUBROUTINE load_COSOlver_mat(GK_CO,INTERSPECIES,matfile,collision_kcut) ! Load a sub matrix from iCa files (works for pmax,jmax<=P_full,J_full)
-      USE basic,       ONLY: allocate_array
+      USE basic,       ONLY: allocate_array, speak
       USE parallel,    ONLY: comm_p, my_id
       USE grid,        ONLY: &
-        pmax,jmax,&
-        ikxs,ikxe,&
-        ikys,ikye, kparray,&
-        izs,ize,bar
+        local_na,&
+        local_nkx,local_nky, kparray,&
+        local_nz, ngz, bar,&
+        pmax, jmax, ieven
       USE array,       ONLY: Caa, Cab_T, Cab_F
       USE MPI
       USE model,       ONLY: Na, LINEARITY
@@ -162,12 +191,12 @@ CONTAINS
         ELSE
           write(ikp_string,'(i5.5)') 0
         ENDIF
-        a:DO ia = 1,Na
+        a:DO ia = 1,local_na
           name_a = name(ia); letter_a = name_a(1:1)
           ! get the self colision matrix
           ! Naming of the array to load (kperp dependant)
           ! we look for the data stored at e.g. '00001/Caapj/Ceepj'
-          WRITE(var_name,'(a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Caapj/',letter_a,letter_a,'pj'
+          WRITE(var_name,'(a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Caapj/C',letter_a,letter_a,'pj'
           CALL getarr(fid, var_name, Caa_full) ! get array
           ! Fill sub array with the usefull polynmial degrees only
           DO ip = 0,pmax ! Loop over rows
@@ -183,15 +212,15 @@ CONTAINS
               ENDDO
             ENDDO
           ENDDO
-          b: DO ib = 1,Na
-            name_b = name(ib); letter_b = name_b(1:1)
-            IF(INTERSPECIES) THEN ! Pitch angle is only applied on like-species
+          b: DO ib = 1,local_na
+          name_b = name(ib); letter_b = name_b(1:1)
+          IF(INTERSPECIES .AND. (ib .NE. ia)) THEN ! Pitch angle is only applied on like-species
               !!!!!!!!!!!!!!! Test and field matrices !!!!!!!!!!!!!!
               ! we look for the data stored at e.g. '00001/Ceipj/CeipjT'
-              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/',letter_a,letter_b,'pjT'
+              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/C',letter_a,letter_b,'pjT'
               CALL getarr(fid, var_name, CabT_full)
               ! we look for the data stored at e.g. '00001/Ceipj/CeipjF'
-              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/',letter_a,letter_b,'pjF'
+              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/C',letter_a,letter_b,'pjF'
               CALL getarr(fid, var_name, CabF_full)
               ! Fill sub array with only usefull polynmials degree
               DO ip = 0,pmax ! Loop over rows
@@ -209,7 +238,7 @@ CONTAINS
               ENDDO
               ENDDO
             ELSE
-              CabT_kp = 0._dp; CabF_kp = 0._dp
+              CabT_kp(ia,ib,:,:,:) = 0._dp; CabF_kp(ia,ib,:,:,:) = 0._dp
             ENDIF
           ENDDO b
         ENDDO a
@@ -221,11 +250,11 @@ CONTAINS
 
       IF (GK_CO) THEN ! Interpolation of the kperp matrix values on kx ky grid
         IF (my_id .EQ. 0 ) WRITE(*,*) '...Interpolation from matrices kperp to simulation kx,ky...'
-        DO ikx = ikxs,ikxe
-          DO iky = ikys,ikye
-            DO iz = izs,ize
+        DO ikx = 1,local_nkx
+          DO iky = 1,local_nky
+            DO iz = 1,local_nz
               ! Check for nonlinear case if we are in the anti aliased domain or the filtered one
-              kperp_sim = MIN(kparray(iky,ikx,iz,0),collision_kcut) ! current simulation kperp
+              kperp_sim = MIN(kparray(iky,ikx,iz+ngz/2,ieven),collision_kcut) ! current simulation kperp
               ! Find the interval in kp grid mat where kperp_sim is contained
               ! Loop over the whole kp mat grid to find the smallest kperp that is
               ! larger than the current kperp_sim (brute force...)
@@ -265,12 +294,11 @@ CONTAINS
       DEALLOCATE (Caa__kp); DEALLOCATE (CabT_kp); DEALLOCATE (CabF_kp)
 
       IF( .NOT. INTERSPECIES ) THEN
-        IF(my_id.EQ.0) write(*,*) "--Like Species operator--"
+        CALL speak("--Like Species operator--")
         Cab_F = 0._dp;
         Cab_T = 0._dp;
       ENDIF
-
-      IF (my_id .EQ. 0) WRITE(*,*) '============DONE==========='
+      CALL speak('============DONE===========')
 
     END SUBROUTINE load_COSOlver_mat
     !******************************************************************************!
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index 4722714359a2ba42583bdab69f689ce909a09cca..95dd1774baa4591f4b57874f4640dac1cc22995e 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -237,34 +237,30 @@ SUBROUTINE diagnose_full(kstep)
     !                       2.3   3d profiles
     IF (nsave_3d .GT. 0) THEN
       IF (MOD(cstep, nsave_3d) == 0) THEN
-          CALL diagnose_3d
-          ! Looks at the folder if the file check_phi exists and spits a snapshot
-          ! of the current electrostatic potential in a basic text file
-          CALL spit_snapshot_check
-        ENDIF
+        CALL diagnose_3d
+        ! Looks at the folder if the file check_phi exists and spits a snapshot
+        ! of the current electrostatic potential in a basic text file
+        CALL spit_snapshot_check
       ENDIF
-      !                       2.4   5d profiles
-      IF (nsave_5d .GT. 0) THEN
-        IF (MOD(cstep, nsave_5d) == 0) THEN
-          CALL diagnose_5d
-        END IF
+    ENDIF
+    !                       2.4   5d profiles
+    IF (nsave_5d .GT. 0) THEN
+      IF (MOD(cstep, nsave_5d) == 0) THEN
+        CALL diagnose_5d
       END IF
-      !_____________________________________________________________________________
-      !                   3.   Final diagnostics
-      ELSEIF (kstep .EQ. -1) THEN
-        CALL attach(fidres, "/data/input","cpu_time",finish-start)
-        
-        ! make a checkpoint at last timestep if not crashed
-        IF(.NOT. crashed) THEN
-       IF(my_id .EQ. 0) write(*,*) 'Saving last state'
-       IF (nsave_5d .GT. 0) &
-       CALL diagnose_5d
-     ENDIF
-
-     !   Close all diagnostic files
-     CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-     CALL closef(fidres)
-
+    END IF
+  !_____________________________________________________________________________
+  !                   3.   Final diagnostics
+  ELSEIF (kstep .EQ. -1) THEN
+    CALL attach(fidres, "/data/input","cpu_time",finish-start)
+    ! make a checkpoint at last timestep if not crashed
+    IF(.NOT. crashed) THEN
+      IF(my_id .EQ. 0) write(*,*) 'Saving last state'
+      IF (nsave_5d .GT. 0) CALL diagnose_5d
+    ENDIF
+    !   Close all diagnostic files
+    CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+    CALL closef(fidres)
   END IF
 END SUBROUTINE diagnose_full
 
@@ -332,6 +328,7 @@ SUBROUTINE diagnose_3d
   USE processing, ONLY: compute_fluid_moments, compute_Napjz_spectrum
   USE model,      ONLY: EM
   USE species,    ONLY: name
+  USE parallel,   ONLY: manual_3D_bcast
   IMPLICIT NONE
   CHARACTER :: letter_a
   INTEGER   :: ia
@@ -354,14 +351,15 @@ SUBROUTINE diagnose_3d
       IF (CONTAINSp0) THEN
         ! gyrocenter density
         Na00_    = moments(ia,ip0,ij0,:,:,(1+ngz/2):(local_nz+ngz/2),updatetlevel)
-        CALL write_field3d_kykxz(Na00_, 'N'//letter_a//'00')
+      ELSE
+        Na00_    = 0._dp
       ENDIF
-      ! <<Napj>x>y spectrum
+      CALL write_field3d_kykxz(Na00_, 'N'//letter_a//'00')
+     ! <<Napj>x>y spectrum
       Napjz_ = Napjz(ia,:,:,:)
       CALL write_field3d_pjz(Napjz_, 'N'//letter_a//'pjz')
     ENDDO
   ENDIF
-
   !! Fuid moments
   IF (write_dens .OR. write_fvel .OR. write_temp) &
   CALL compute_fluid_moments
@@ -391,34 +389,29 @@ SUBROUTINE diagnose_3d
   ENDDO
   CONTAINS
     SUBROUTINE write_field3d_kykxz(field, text)
-      USE basic,    ONLY : GATHERV_OUTPUT
       USE parallel, ONLY : gather_xyz, num_procs
       IMPLICIT NONE
-      COMPLEX(dp), DIMENSION(local_nky,local_nkx,local_nz), INTENT(IN) :: field
+      COMPLEX(dp), DIMENSION(local_nky,total_nkx,local_nz), INTENT(IN) :: field
       CHARACTER(*), INTENT(IN) :: text
       COMPLEX(dp), DIMENSION(total_nky,total_nkx,total_nz) :: field_full
-      CHARACTER(256) :: dset_name
+      CHARACTER(50) :: dset_name
       field_full = 0;
       WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
-
       IF (num_procs .EQ. 1) THEN ! no data distribution
         CALL putarr(fidres, dset_name, field, ionode=0)
-
-      ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
+      ELSE
         CALL gather_xyz(field,field_full,local_nky,total_nky,total_nkx,local_nz,total_nz)
         CALL putarr(fidres, dset_name, field_full, ionode=0)
-      ELSE ! output using putarrnd (very slow on marconi)
-        CALL putarrnd(fidres, dset_name, field,  (/1, 1, 3/))
       ENDIF
-      CALL attach(fidres, dset_name, "time", time)
-    END SUBROUTINE write_field3d_kykxz
+        ! CALL attach(fidres, dset_name, "time", time)
+      END SUBROUTINE write_field3d_kykxz
 
     SUBROUTINE write_field3d_pjz(field, text)
       USE parallel, ONLY : gather_pjz, num_procs
       IMPLICIT NONE
       COMPLEX(dp), DIMENSION(local_np,local_nj,local_nz), INTENT(IN) :: field
-      COMPLEX(dp), DIMENSION(total_np,total_nj,total_nz) :: field_full
       CHARACTER(*), INTENT(IN) :: text
+      COMPLEX(dp), DIMENSION(total_np,total_nj,total_nz) :: field_full
       CHARACTER(LEN=50) :: dset_name
       field_full = 0;
       WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
@@ -475,9 +468,9 @@ SUBROUTINE diagnose_5d
     WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
     IF (num_procs .EQ. 1) THEN
       CALL putarr(fidres, dset_name, field_sub, ionode=0)
-      ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
-        CALL gather_pjxyz(field_sub,field_full,total_na,local_np,total_np,total_nj,local_nky,total_nky,total_nkx,local_nz,total_nz)
-        CALL putarr(fidres, dset_name, field_full, ionode=0)
+    ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
+      CALL gather_pjxyz(field_sub,field_full,total_na,local_np,total_np,total_nj,local_nky,total_nky,total_nkx,local_nz,total_nz)
+      CALL putarr(fidres, dset_name, field_full, ionode=0)
     ELSE
       CALL putarrnd(fidres, dset_name, field_sub,  (/1,3,5/))
     ENDIF
diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90
index 5c1585e05c3275dc4d2f36997887d4cfa8b6c978..ab4c2515bbecad9048a352939fdebecd507ef7a3 100644
--- a/src/ghosts_mod.F90
+++ b/src/ghosts_mod.F90
@@ -1,6 +1,6 @@
 module ghosts
-USE mpi
-USE prec_const, ONLY: dp
+USE mpi
+USE prec_const, ONLY: dp
 IMPLICIT NONE
 
 INTEGER :: status(MPI_STATUS_SIZE), source, dest, count, ipg
@@ -59,26 +59,41 @@ END SUBROUTINE update_ghosts_EM
 SUBROUTINE update_ghosts_p_mom
   USE time_integration, ONLY: updatetlevel
   USE fields,   ONLY: moments
-  USE parallel, ONLY: nbr_R,nbr_L,comm0
+  USE parallel, ONLY: nbr_R,nbr_L,comm0, exchange_ghosts_1D
   USE grid,     ONLY: local_na,local_np,local_nj,local_nky,local_nkx,local_nz,&
                               ngp,ngj,ngz
   IMPLICIT NONE
-  INTEGER :: ierr, first, last, ig
-  first = 1 + ngp/2
-  last  = local_np + ngp/2
-  count = local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send
-  !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
-  DO ig = 1,ngp/2
-    CALL mpi_sendrecv(moments(:,last-(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14+ig, &
-                      moments(:,first-ig   ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14+ig, &
-                      comm0, status, ierr)
-  ENDDO
-  !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-  DO ig = 1,ngp/2
-  CALL mpi_sendrecv(moments(:,first+(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16+ig, &
-                    moments(:,last + ig   ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16+ig, &
-                    comm0, status, ierr)
+  COMPLEX(dp), DIMENSION(local_np+ngp) :: slice_z
+  INTEGER :: ia,iz,ij,iy,ix
+  DO iz = 1,local_nz
+    DO ix = 1,local_nkx
+      DO iy = 1,local_nky
+        DO ij = 1,local_nj
+          DO ia = 1,local_na
+            slice_z = moments(ia,:,ij,iy,ix,iz,updatetlevel)
+            CALL exchange_ghosts_1D(slice_z,nbr_L,nbr_R,local_np,ngp)
+            moments(ia,:,ij,iy,ix,iz,updatetlevel) = slice_z
+          ENDDO
+        ENDDO
+      ENDDO
+    ENDDO
   ENDDO
+  ! INTEGER :: ierr, first, last, ig, count
+  ! first = 1 + ngp/2
+  ! last  = local_np + ngp/2
+  ! count = local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send
+  ! !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+  ! DO ig = 1,ngp/2
+  !   CALL mpi_sendrecv(moments(:,last-(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14+ig, &
+  !                     moments(:,first-ig   ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14+ig, &
+  !                     comm0, status, ierr)
+  ! ENDDO
+  ! !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
+  ! DO ig = 1,ngp/2
+  ! CALL mpi_sendrecv(moments(:,first+(ig-1),:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16+ig, &
+  !                   moments(:,last + ig   ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16+ig, &
+  !                   comm0, status, ierr)
+  ! ENDDO
 END SUBROUTINE update_ghosts_p_mom
 
 !Communicate z+1, z+2 moments to left neighboor and z-1, z-2 moments to right one
@@ -180,7 +195,7 @@ SUBROUTINE update_ghosts_z_3D(field)
                         comm0, status, ierr)
       ENDDO
       !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-      DO ig = 1,ngz
+      DO ig = 1,ngz/2
       CALL mpi_sendrecv(     field(:,:,first+(ig-1)), count, MPI_DOUBLE_COMPLEX, nbr_D, 32+ig, & ! Send to Down the first
                        buff_xy_zBC(:,:,ig),           count, MPI_DOUBLE_COMPLEX, nbr_U, 32+ig, & ! Recieve from Up the last+1
                         comm0, status, ierr)
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index 7cba361806bb808950bec83dbc6e465ecb848383..e46d4b039c4961675574f0ed50a07a03473c3bbd 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -38,7 +38,7 @@ MODULE grid
   INTEGER,  PUBLIC, PROTECTED ::  ikxs,ikxe ! Fourier x mode
   INTEGER,  PUBLIC, PROTECTED ::  izs ,ize  ! z-grid
   INTEGER,  PUBLIC, PROTECTED ::  ieven, iodd ! indices for the staggered grids
-  INTEGER,  PUBLIC, PROTECTED ::  ip0, ip1, ip2, ip3, ij0
+  INTEGER,  PUBLIC, PROTECTED ::  ip0, ip1, ip2, ip3, ij0, ij1, pp2
   INTEGER,  PUBLIC, PROTECTED ::  ikx0, iky0, ikx_max, iky_max ! Indices of k-grid origin and max
   ! Total numbers of points for Hermite and Laguerre
   INTEGER, PUBLIC, PROTECTED :: total_na
@@ -68,7 +68,7 @@ MODULE grid
   integer(C_INTPTR_T), PUBLIC,PROTECTED :: local_nkx_ptr, local_nky_ptr
   integer(C_INTPTR_T), PUBLIC,PROTECTED :: local_nkx_ptr_offset, local_nky_ptr_offset
   ! Grid spacing and limits
-  REAL(dp), PUBLIC, PROTECTED ::  deltap, pp2, deltaz, inv_deltaz
+  REAL(dp), PUBLIC, PROTECTED ::  deltap, deltaz, inv_deltaz
   REAL(dp), PUBLIC, PROTECTED ::  deltakx, deltaky, kx_max, ky_max, kx_min, ky_min!, kp_max
   INTEGER , PUBLIC, PROTECTED ::  local_pmin,  local_pmax
   INTEGER , PUBLIC, PROTECTED ::  local_jmin,  local_jmax
@@ -296,9 +296,10 @@ CONTAINS
     ! Precomputations
     jmax_dp      = real(jmax,dp)
     diff_j_coeff = jmax_dp*(1._dp/jmax_dp)**6
-    ! j=0 indices
-    DO ij = 1,local_nj
+    ! j=0 and j=1 indices
+    DO ij = 1,local_nj+ngj
       IF(jarray(ij) .EQ. 0) ij0 = ij
+      IF(jarray(ij) .EQ. 1) ij1 = ij
     END DO
   END SUBROUTINE set_jgrid
 
@@ -476,7 +477,7 @@ CONTAINS
     INTEGER :: istart, iend, in, Npol, iz, ig, eo, iglob
     total_nz = Nz
     ! Length of the flux tube (in ballooning angle)
-    Lz         = 2_dp*pi*Npol
+    Lz         = 2._dp*pi*REAL(Npol,dp)
     ! Z stepping (#interval = #points since periodic)
     deltaz        = Lz/REAL(Nz,dp)
     inv_deltaz    = 1._dp/deltaz
diff --git a/src/inital.F90 b/src/inital.F90
index 61d7e5f731009d28813ee240a4192922ed5ba536..04c4d5112dcf689b880bb441c034cd2f28fa8746 100644
--- a/src/inital.F90
+++ b/src/inital.F90
@@ -108,7 +108,7 @@ SUBROUTINE init_moments
 
   REAL(dp) :: noise
   INTEGER, DIMENSION(12) :: iseedarr
-  INTEGER  :: ia,ip,ij,ikx,iky,iz
+  INTEGER  :: ia,ip,ij,ikx,iky,iz, ipi,iji,izi
 
   ! Seed random number generator
   iseedarr(:)=iseed
@@ -116,19 +116,22 @@ SUBROUTINE init_moments
 
     !**** Broad noise initialization *******************************************
   DO ia=1,local_na
-    DO ip=1,local_np + ngp
-      DO ij=1,local_nj + ngj
+    DO ip=1,local_np
+      ipi = ip+ngp/2
+      DO ij=1,local_nj
+        iji = ij+ngj/2
         DO ikx=1,total_nkx
           DO iky=1,local_nky
-            DO iz=1,local_nz +ngz
+            DO iz=1,local_nz
+              izi = iz+ngz/2
               CALL RANDOM_NUMBER(noise)
-              moments(ia,ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_dp))
+              moments(ia,ipi,iji,iky,ikx,izi,:) = (init_background + init_noiselvl*(noise-0.5_dp))
             END DO
           END DO
         END DO
         IF ( contains_ky0 ) THEN
           DO ikx=2,total_nkx/2 !symmetry at ky = 0 for all z
-            moments(ia,ip,ij,iky0,ikx,:,:) = moments(ia,ip,ij,iky0,total_nkx+2-ikx,:,:)
+            moments(ia,ipi,iji,iky0,ikx,:,:) = moments(ia,ipi,iji,iky0,total_nkx+2-ikx,:,:)
           END DO
         ENDIF
       END DO
@@ -138,9 +141,12 @@ SUBROUTINE init_moments
       DO ikx=1,total_nkx
         DO iky=1,local_nky
           DO iz=1,local_nz + ngz
+            izi = iz+ngz/2
             DO ip=1,local_np + ngp
+              ipi = ip+ngp/2
               DO ij=1,local_nj + ngj
-                moments(ia,ip,ij,iky,ikx,iz, :) = moments(ia, ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
+                iji = ij+ngj/2
+                moments(ia,ipi,iji,iky,ikx,izi, :) = moments(ia, ipi,iji,iky,ikx,izi, :)*AA_x(ikx)*AA_y(iky)
               ENDDO
             ENDDO
           ENDDO
diff --git a/src/memory.F90 b/src/memory.F90
index 3ee7f91cf71f3311cdfd0ef0869630f3d4fcc93f..2b27684f939259f27f8610c56ab4388ab41193cf 100644
--- a/src/memory.F90
+++ b/src/memory.F90
@@ -4,52 +4,52 @@ SUBROUTINE memory
   USE array
   USE basic, ONLY: allocate_array
   USE fields
-  USE grid, ONLY: local_Na, local_Np,Ngp ,local_Nj,Ngj, local_nky, local_nkx,local_nz,Ngz, jmax, pmax
+  USE grid, ONLY: local_na, local_np,ngp ,local_nj,ngj, local_nky, local_nkx,local_nz,ngz, jmax, pmax, nzgrid
   USE collision
   USE time_integration, ONLY: ntimelevel
   USE prec_const
-  USE model, ONLY: Na
+  USE model, ONLY: na
   IMPLICIT NONE
 
   ! Electrostatic potential
-  CALL allocate_array(           phi, 1,local_nky, 1,local_nkx, 1,local_nz+Ngz)
-  CALL allocate_array(           psi, 1,local_nky, 1,local_nkx, 1,local_nz+Ngz)
+  CALL allocate_array(           phi, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
+  CALL allocate_array(           psi, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
   CALL allocate_array(inv_poisson_op, 1,local_nky, 1,local_nkx, 1,local_nz)
   CALL allocate_array( inv_ampere_op, 1,local_nky, 1,local_nkx, 1,local_nz)
   CALL allocate_array(   inv_pol_ion, 1,local_nky, 1,local_nkx, 1,local_nz)
   ! CALL allocate_array(HF_phi_correction_operator, 1,local_nky, 1,local_nkx, 1,local_nz)
 
   !Moments related arrays
-  CALL allocate_array(           dens, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(           upar, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(           uper, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(           Tpar, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(           Tper, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(           temp, 1,local_Na, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(         Kernel, 1,local_Na,                 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz+Ngz, 1,2)
-  CALL allocate_array(        moments, 1,local_Na, 1,local_Np+Ngp, 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz+Ngz, 1,ntimelevel )
-  CALL allocate_array(          Napjz, 1,local_Na, 1,local_Np,     1,local_Nj,                               1,local_nz)
-  CALL allocate_array(    moments_rhs, 1,local_Na, 1,local_Np,     1,local_Nj,     1,local_nky, 1,local_nkx, 1,local_nz,     1,ntimelevel )
-  CALL allocate_array( nadiab_moments, 1,local_Na, 1,local_Np+Ngp, 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz+Ngz)
-  CALL allocate_array(       ddz_napj, 1,local_Na, 1,local_Np+Ngp, 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(     ddzND_Napj, 1,local_Na, 1,local_Np+Ngp, 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(    interp_napj, 1,local_Na, 1,local_Np+Ngp, 1,local_Nj+Ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(          Capj,  1,local_Na, 1,local_Np,     1,local_Nj,     1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(          Sapj,  1,local_Na, 1,local_Np,     1,local_Nj,     1,local_nky, 1,local_nkx, 1,local_nz)
-  CALL allocate_array(     xnapj, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array(   xnapp2j, 1,local_Na, 1,local_Np)
-  CALL allocate_array(   xnapp1j, 1,local_Na, 1,local_Np)
-  CALL allocate_array(   xnapm1j, 1,local_Na, 1,local_Np)
-  CALL allocate_array(   xnapm2j, 1,local_Na, 1,local_Np)
-  CALL allocate_array(   xnapjp1, 1,local_Na, 1,local_Nj)
-  CALL allocate_array(   xnapjm1, 1,local_Na, 1,local_Nj)
-  CALL allocate_array(   ynapp1j, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array(   ynapm1j, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( ynapp1jm1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( ynapm1jm1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array(   zNapm1j, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( zNapm1jp1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( zNapm1jm1, 1,local_Na, 1,local_Np, 1,local_Nj)
+  CALL allocate_array(           dens, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           upar, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           uper, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           Tpar, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           Tper, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           temp, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(         Kernel, 1,local_na,                 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,nzgrid)
+  CALL allocate_array(        moments, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,ntimelevel )
+  CALL allocate_array(          napjz, 1,local_na, 1,local_np,     1,local_nj,                               1,local_nz)
+  CALL allocate_array(    moments_rhs, 1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz,     1,ntimelevel )
+  CALL allocate_array( nadiab_moments, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
+  CALL allocate_array(       ddz_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(     ddzND_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(    interp_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(          Capj,  1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(          Sapj,  1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(     xnapj, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   xnapp2j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapp1j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapm1j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapm2j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapjp1, 1,local_na, 1,local_nj)
+  CALL allocate_array(   xnapjm1, 1,local_na, 1,local_nj)
+  CALL allocate_array(   ynapp1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   ynapm1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( ynapp1jm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( ynapm1jm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   znapm1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( znapm1jp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( znapm1jm1, 1,local_na, 1,local_np, 1,local_nj)
 
   ! Non linear terms and dnjs table
   CALL allocate_array( dnjs, 1,jmax+1, 1,jmax+1, 1,jmax+1)
@@ -57,23 +57,22 @@ SUBROUTINE memory
   ! Hermite fourth derivative coeff table 4*sqrt(p!/(p-4)!)
   CALL allocate_array( dv4_Hp_coeff, -2, pmax)
 
-  CALL allocate_array( xphij,   1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( xphijp1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( xphijm1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( xpsij,   1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( xpsijp1, 1,local_Na, 1,local_Np, 1,local_Nj)
-  CALL allocate_array( xpsijm1, 1,local_Na, 1,local_Np, 1,local_Nj)
-
+  CALL allocate_array( xphij,   1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xphijp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xphijm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsij,   1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsijp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsijm1, 1,local_na, 1,local_np, 1,local_nj)
+  
   !___________________ 2x5D ARRAYS __________________________
   !! Collision matrices
   IF (GK_CO) THEN !GK collision matrices (one for each kperp)
-    CALL allocate_array(  Cab_F, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
-    CALL allocate_array(  Cab_T, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
-    CALL allocate_array(  Caa,   1,Na,       1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(  Cab_F, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(  Cab_T, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(  Caa,   1,na,       1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
   ELSE !DK collision matrix (same for every k)
-      CALL allocate_array(  Cab_F, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
-      CALL allocate_array(  Cab_T, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
-      CALL allocate_array(  Caa,    1,Na,      1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(  Cab_F, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(  Cab_T, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(  Caa,    1,na,      1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
 ENDIF
-
-END SUBROUTINE memory
+END SUBROUTINE memory
\ No newline at end of file
diff --git a/src/moments_eq_rhs_mod.F90 b/src/moments_eq_rhs_mod.F90
index 8f2fce0187bbfcfb03a2e2740c71832bfed1319d..f24ed835477543ec4b48467f19d787a3bf4a46e9 100644
--- a/src/moments_eq_rhs_mod.F90
+++ b/src/moments_eq_rhs_mod.F90
@@ -17,7 +17,7 @@ SUBROUTINE compute_moments_eq_rhs
   USE time_integration
   USE geometry, ONLY: gradz_coeff, dlnBdz, Ckxky!, Gamma_phipar
   USE calculus, ONLY: interp_z, grad_z, grad_z2
-  USE species,  ONLY: dpdx
+  ! USE species,  ONLY: dpdx
   IMPLICIT NONE
   INTEGER     :: ia, iz, iky,  ikx, ip ,ij, eo ! counters
   INTEGER     :: izi,ipi,iji ! interior points counters
@@ -32,7 +32,6 @@ SUBROUTINE compute_moments_eq_rhs
   COMPLEX(dp) :: Napj, RHS
    ! Measuring execution time
   CALL cpu_time(t0_rhs)
-
   ! Spatial loops
   z:DO  iz = 1,local_nz
     izi = iz + ngz/2
@@ -68,7 +67,8 @@ SUBROUTINE compute_moments_eq_rhs
                 ! term propto n^{p,j-1}
                 Tnapjm1 = xnapjm1(ia,ij) * nadiab_moments(ia,ipi,    iji-1,iky,ikx,izi)
                 ! Perpendicular magnetic term (curvature and gradient drifts)
-                Mperp   = imagu*Ckxky(iky,ikx,iz,eo)*(Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)
+                Mperp   = imagu*Ckxky(iky,ikx,izi,eo)&
+                          *(Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)
                 ! Parallel dynamic
                 ! ddz derivative for Landau damping term
                 Ldamp     = xnapp1j(ia,ip) * ddz_napj(ia,ipi+1,iji,iky,ikx,iz) &
@@ -83,26 +83,18 @@ SUBROUTINE compute_moments_eq_rhs
                 Unapm1jp1 = znapm1jp1(ia,ip,ij) * interp_napj(ia,ipi-1,iji+1,iky,ikx,iz)
                 Unapm1jm1 = znapm1jm1(ia,ip,ij) * interp_napj(ia,ipi-1,iji-1,iky,ikx,iz)
                 ! sum the parallel forces
-                Fmir = dlnBdz(iz,eo)*(Tnapp1j + Tnapp1jm1 + Tnapm1j + Tnapm1jm1 +&
+                Fmir = dlnBdz(izi,eo)*(Tnapp1j + Tnapp1jm1 + Tnapm1j + Tnapm1jm1 +&
                                       Unapm1j + Unapm1jp1 + Unapm1jm1)
                 ! Parallel magnetic term (Landau damping and the mirror force)
-                Mpara = gradz_coeff(iz,eo)*(Ldamp + Fmir)
+                Mpara = gradz_coeff(izi,eo)*(Ldamp + Fmir)
                 !! Electrical potential term
-                IF ( p_int .LE. 2 ) THEN ! kronecker p0 p1 p2
-                  Dphi =i_ky*( xphij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
-                              +xphijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
-                              +xphijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo) )*phi(iky,ikx,izi)
-                ELSE
-                  Dphi = 0._dp
-                ENDIF
+                Dphi =i_ky*( xphij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
+                            +xphijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
+                            +xphijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo) )*phi(iky,ikx,izi)
                 !! Vector potential term
-                IF ( (p_int .LE. 3) .AND. (p_int .GE. 1) ) THEN ! Kronecker p1 or p3
-                  Dpsi =-i_ky*( xpsij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
-                               +xpsijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
-                               +xpsijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo))*psi(iky,ikx,izi)
-                ELSE
-                  Dpsi = 0._dp
-                ENDIF
+                Dpsi =-i_ky*( xpsij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
+                              +xpsijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
+                              +xpsijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo))*psi(iky,ikx,izi)
                 !! Sum of all RHS terms
                 RHS = &
                     ! Nonlinear term Sapj_ = {phi,f}
@@ -111,18 +103,18 @@ SUBROUTINE compute_moments_eq_rhs
                     - Mperp &
                     ! Parallel magnetic term
                     - Mpara &
-                    ! ! Drives (density + temperature gradients)
+                    ! Drives (density + temperature gradients)
                     - (Dphi + Dpsi) &
-                    ! ! Collision term
+                    ! Collision term
                     + Capj(ia,ip,ij,iky,ikx,iz) &
-                    ! ! Perpendicular pressure effects (electromagnetic term) (TO CHECK)
-                    - i_ky*beta*dpdx(ia) * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
-                    ! ! Parallel drive term (should be negligible, to test)
-                    !! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
-                    ! ! Numerical perpendicular hyperdiffusion
+                    ! Perpendicular pressure effects (electromagnetic term) (TO CHECK)
+                    ! - i_ky*beta*dpdx(ia) * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
+                    ! Parallel drive term (should be negligible, to test)
+                    ! !! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
+                    ! Numerical perpendicular hyperdiffusion
                     -mu_x*diff_kx_coeff*kx**N_HD*Napj &
                     -mu_y*diff_ky_coeff*ky**N_HD*Napj &
-                    ! ! Numerical parallel hyperdiffusion "mu_z*ddz**4"  see Pueschel 2010 (eq 25)
+                    ! ! ! Numerical parallel hyperdiffusion "mu_z*ddz**4"  see Pueschel 2010 (eq 25)
                     -mu_z*diff_dz_coeff*ddzND_napj(ia,ipi,iji,iky,ikx,iz)
                 !! Velocity space dissipation (should be implemented somewhere else)
                 SELECT CASE(HYP_V)
@@ -131,15 +123,15 @@ SUBROUTINE compute_moments_eq_rhs
                     RHS = RHS - mu_p*diff_p_coeff*p_int**6*Napj
                   IF (j_int .GT. 1)  &
                     RHS = RHS - mu_j*diff_j_coeff*j_int**6*Napj
-                CASE('dvpar4')
-                  ! fourth order numerical diffusion in vpar
-                  IF(p_int .GE. 4) &
-                  ! Numerical parallel velocity hyperdiffusion "+ dvpar4 g_a" see Pueschel 2010 (eq 33)
-                  ! (not used often so not parallelized)
-                  RHS = RHS + mu_p*dv4_Hp_coeff(p_int)*moments(ia,ipi-4,iji,iky,ikx,izi,updatetlevel)
-                  ! + dummy Laguerre diff
-                  IF (j_int .GT. 1)  &
-                    RHS = RHS - mu_j*diff_j_coeff*j_int**6*Napj
+                ! CASE('dvpar4')
+                !   ! fourth order numerical diffusion in vpar
+                !   IF(p_int .GE. 4) &
+                !   ! Numerical parallel velocity hyperdiffusion "+ dvpar4 g_a" see Pueschel 2010 (eq 33)
+                !   ! (not used often so not parallelized)
+                !   RHS = RHS + mu_p*dv4_Hp_coeff(p_int)*moments(ia,ipi-4,iji,iky,ikx,izi,updatetlevel)
+                !   ! + dummy Laguerre diff
+                !   IF (j_int .GE. 2)  &
+                !     RHS = RHS - mu_j*diff_j_coeff*j_int**6*Napj
                 CASE DEFAULT
                 END SELECT
               ELSE
@@ -156,12 +148,24 @@ SUBROUTINE compute_moments_eq_rhs
   ! Execution time end
   CALL cpu_time(t1_rhs)
   tc_rhs = tc_rhs + (t1_rhs-t0_rhs)
-  ! print*, moments(1,3,2,2,2,3,updatetlevel)
-  ! print*, moments_rhs(1,3,2,2,2,3,updatetlevel)
-  print*, SUM(REAL(moments_rhs(1,:,:,:,:,:,:)))
-  print*, SUM(REAL(moments_rhs(2,:,:,:,:,:,:)))
-  print*, SUM(REAL(phi))
-  ! stop
+
+      print*,'sumabs moments i', SUM(ABS(moments(1,:,:,:,:,:,updatetlevel)))
+      print*,'moment rhs i 221', moments_rhs(1,1,1,2,2,1,updatetlevel)
+      print*,'sum real nadiabe', SUM(REAL(nadiab_moments(2,:,:,:,:,:)))
+      print*,'sumreal momrhs i', SUM(REAL(moments_rhs(1,:,:,:,:,:,:)))
+      print*,'sumimag momrhs i', SUM(IMAG(moments_rhs(1,:,:,:,:,:,:)))
+      print*,'sumreal phi     ', SUM(REAL(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+      print*,'sumimag phi     ', SUM(IMAG(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+      print*,'phi(2,2,1)      ', phi(2,2,1+ngz/2)
+      print*,'sumreal ddznipj ', SUM(REAL(ddz_napj(1,:,:,:,:,:)))
+      print*,'sumimag ddznipj ', SUM(IMAG(ddz_napj(1,:,:,:,:,:)))
+      print*,'        ddznipj  ',(ddz_napj(1,2+ngp/2,2+ngj/2,2,2,1))
+      print*,'sumreal Capj    ', SUM(REAL(Capj(1,:,:,:,:,:)))
+      print*,'---'
+      IF(updatetlevel .EQ. 4) stop
+  ! print*, xnapjp1(2,:)
+  ! print*, pp2
+  stop
 
 END SUBROUTINE compute_moments_eq_rhs
 
diff --git a/src/numerics_mod.F90 b/src/numerics_mod.F90
index 7c3c74765eae377863027a7e8cf2d5d3de3309d8..e8f3aac55d2e94731f4e5e5d10e83fdba5de7af1 100644
--- a/src/numerics_mod.F90
+++ b/src/numerics_mod.F90
@@ -139,6 +139,7 @@ SUBROUTINE evaluate_poisson_op
   IMPLICIT NONE
   REAL(dp)    :: pol_ion, pol_tot, operator, operator_ion     ! (Z^2/tau (1-sum_n kernel_na^2))
   INTEGER     :: in,ikx,iky,iz,ia
+  REAL(dp)    :: pol_i, pol_e, sumker, spol     ! (Z_a^2/tau_a (1-sum_n kernel_na^2))
 
   ! This term has no staggered grid dependence. It is evalued for the
   ! even z grid since poisson uses p=0 moments and phi only.
@@ -147,23 +148,24 @@ SUBROUTINE evaluate_poisson_op
   zloop:  DO iz  = 1,local_nz
   IF( (kxarray(ikx).EQ.0._dp) .AND. (kyarray(iky).EQ.0._dp) ) THEN
       inv_poisson_op(iky, ikx, iz) =  0._dp
-  ELSE
-    operator = 0._dp
-    DO ia = 1,local_na ! sum over species
-      ! loop over n only up to the max polynomial degree
-      pol_tot  = 0._dp  ! total polarisation term
-      pol_ion  = 0._dp  ! sum of ion polarisation term
+      inv_pol_ion   (iky, ikx, iz) =  0._dp
+ELSE
+    ! loop over n only up to the max polynomial degree
+    pol_tot = 0._dp  ! total polarisation term
+    a:DO ia = 1,local_na ! sum over species
+    ! ia = 1
+      sumker  = 0._dp  ! sum of ion polarisation term
       DO in=1,local_nj
-        pol_tot = pol_tot + kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,ieven)**2 ! ... sum recursively ...
-        pol_ion = pol_ion + kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,ieven)**2 !
+        sumker = sumker + q2_tau(ia)*kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,ieven)**2 ! ... sum recursively ...
       END DO
-      operator = operator + q2_tau(ia)*(1._dp - pol_tot)
-    ENDDO
-    operator_ion = operator
-    IF(ADIAB_E) THEN ! Adiabatic model
+      pol_tot = pol_tot + q2_tau(ia) - sumker
+    ENDDO a
+    operator_ion = pol_tot
+    IF(ADIAB_E) THEN ! Adiabatic electron model
       pol_tot = pol_tot +  1._dp/tau_e - 1._dp
     ENDIF
-    inv_poisson_op(iky, ikx, iz) =  1._dp/operator
+    operator = pol_tot
+    inv_poisson_op(iky, ikx, iz) =  1._dp/pol_tot
     inv_pol_ion   (iky, ikx, iz) =  1._dp/operator_ion
   ENDIF
   END DO zloop
diff --git a/src/parallel_mod.F90 b/src/parallel_mod.F90
index 5fa74308349829cbd4e1e2318ce6a663bb0471d5..f437d332e11b0357ebc7dce5b4be35ee7e6fc0db 100644
--- a/src/parallel_mod.F90
+++ b/src/parallel_mod.F90
@@ -28,13 +28,15 @@ MODULE parallel
 
 
   ! recieve and displacement counts for gatherv
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_p, dsp_p, rcv_y, dsp_y, rcv_zy, dsp_zy
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_p, dsp_p
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_y, dsp_y
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zy, dsp_zy
   INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp,  dsp_zp
   INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_yp,  dsp_yp
   INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zyp, dsp_zyp
 
   PUBLIC :: ppinit, manual_0D_bcast, manual_3D_bcast, init_parallel_var, &
-            gather_xyz, gather_pjz, gather_pjxyz
+            gather_xyz, gather_pjz, gather_pjxyz, exchange_ghosts_1D
 
 CONTAINS
 
@@ -45,6 +47,7 @@ CONTAINS
     INTEGER, PARAMETER :: ndims=3 ! p, kx and z
     INTEGER, DIMENSION(ndims) :: dims=0, coords=0
     LOGICAL :: periods(ndims) = .FALSE., reorder=.FALSE.
+    LOGICAL :: com_log(ndims) = .FALSE.
     CHARACTER(len=32) :: str
     INTEGER :: nargs, i, l
 
@@ -87,16 +90,21 @@ CONTAINS
     !
     !  Partitions 3-dim cartesian topology of comm0 into 1-dim cartesian subgrids
     !
-    CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.FALSE./),  comm_p, ierr)
-    CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.FALSE./), comm_ky, ierr)
-    CALL MPI_CART_SUB (comm0, (/.FALSE.,.FALSE.,.TRUE./),  comm_z, ierr)
+    com_log = (/.TRUE.,.FALSE.,.FALSE./)
+    CALL MPI_CART_SUB (comm0, com_log,  comm_p, ierr)
+    com_log = (/.FALSE.,.TRUE.,.FALSE./)
+    CALL MPI_CART_SUB (comm0, com_log, comm_ky, ierr)
+    com_log = (/.FALSE.,.FALSE.,.TRUE./)
+    CALL MPI_CART_SUB (comm0, com_log,  comm_z, ierr)
     ! Find id inside the 1d-sub communicators
     CALL MPI_COMM_RANK(comm_p,  rank_p,  ierr)
     CALL MPI_COMM_RANK(comm_ky, rank_ky, ierr)
     CALL MPI_COMM_RANK(comm_z,  rank_z,  ierr)
     ! 2D communicator
-    CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.TRUE./),  comm_pz,  ierr)
-    CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.TRUE./),  comm_kyz, ierr)
+    com_log = (/.TRUE.,.FALSE.,.TRUE./)
+    CALL MPI_CART_SUB (comm0, com_log,  comm_pz,  ierr)
+    com_log = (/.FALSE.,.TRUE.,.TRUE./)
+    CALL MPI_CART_SUB (comm0, com_log,  comm_kyz, ierr)
     ! Count the number of processes in 2D comms
     CALL MPI_COMM_SIZE(comm_pz, num_procs_pz, ierr)
     CALL MPI_COMM_SIZE(comm_kyz,num_procs_kyz,ierr)
@@ -113,73 +121,110 @@ CONTAINS
     IMPLICIT NONE
     INTEGER, INTENT(IN) :: np_loc,np_tot,nky_loc,nky_tot,nz_loc
     INTEGER :: i_
-    !! P reduction at constant x,y,z,j
+    !     !! P reduction at constant x,y,z,j
     ! ALLOCATE(rcv_p(0:num_procs_p-1),dsp_p(0:num_procs_p-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(np_loc,1,MPI_INTEGER,rcv_p,1,MPI_INTEGER,comm_p,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_p(0)=0
+    ! DO i_=1,num_procs_p-1
+    !    dsp_p(i_) =dsp_p(i_-1) + rcv_p(i_-1)
+    ! END DO
+    ! !!!!!! XYZ gather variables
+    ! !! Y reduction at constant x and z
+    ! ! number of points recieved and displacement for the y reduction
+    ! ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nky_loc,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_y(0)=0
+    ! DO i_=1,num_procs_ky-1
+    !    dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
+    ! END DO
+    ! !! Z reduction for full slices of y data but constant x
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*nky_tot,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zy(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
+    ! END DO
+    ! !!!!! PJZ gather variables
+    ! !! P reduction at constant j and z is already done in module GRID
+    ! !! Z reduction for full slices of p data but constant j
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zp(0:num_procs_z-1),dsp_zp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*np_tot,1,MPI_INTEGER,rcv_zp,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zp(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zp(i_) =dsp_zp(i_-1) + rcv_zp(i_-1)
+    ! END DO
+    ! !!!!! PJXYZ gather variables
+    ! !! Y reduction for full slices of p data but constant j
+    ! ! number of points recieved and displacement for the y reduction
+    ! ALLOCATE(rcv_yp(0:num_procs_ky-1),dsp_yp(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nky_loc*np_tot,1,MPI_INTEGER,rcv_yp,1,MPI_INTEGER,comm_ky,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_yp(0)=0
+    ! DO i_=1,num_procs_ky-1
+    !    dsp_yp(i_) =dsp_yp(i_-1) + rcv_yp(i_-1)
+    ! END DO
+    ! !! Z reduction for full slices of py data but constant j
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zyp(0:num_procs_z-1),dsp_zyp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*np_tot*nky_tot,1,MPI_INTEGER,rcv_zyp,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zyp(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zyp(i_) =dsp_zyp(i_-1) + rcv_zyp(i_-1)
+    ! END DO
+    ! P reduction at constant x,y,z,j
     ALLOCATE(rcv_p(num_procs_p),dsp_p(num_procs_p)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(np_loc,1,MPI_INTEGER,rcv_p,1,MPI_INTEGER,comm_p,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_p(1)=0
     DO i_=2,num_procs_p
        dsp_p(i_) =dsp_p(i_-1) + rcv_p(i_-1)
     END DO
     !!!!!! XYZ gather variables
-    !! Y reduction at constant x and z
-    ! number of points recieved and displacement for the y reduction
-    ! ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
     ALLOCATE(rcv_y(num_procs_ky),dsp_y(num_procs_ky)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(nky_loc,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_y(1)=0
-    DO i_=2,num_procs_ky-1
+    DO i_=2,num_procs_ky
        dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
     END DO
     !! Z reduction for full slices of y data but constant x
-    ! number of points recieved and displacement for the z reduction
-    ! ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
     ALLOCATE(rcv_zy(num_procs_z),dsp_zy(num_procs_z)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*nky_tot,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_zy(1)=0
-    DO i_=2,num_procs_z-1
+    DO i_=2,num_procs_z
        dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
     END DO
     !!!!! PJZ gather variables
-    !! P reduction at constant j and z is already done in module GRID
-    !! Z reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ! ALLOCATE(rcv_zp(0:num_procs_z-1),dsp_zp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
     ALLOCATE(rcv_zp(num_procs_z),dsp_zp(num_procs_z)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*np_tot,1,MPI_INTEGER,rcv_zp,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_zp(1)=0
-    DO i_=2,num_procs_z-1
+    DO i_=2,num_procs_z
        dsp_zp(i_) =dsp_zp(i_-1) + rcv_zp(i_-1)
     END DO
     !!!!! PJXYZ gather variables
     !! Y reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the y reduction
-    ! ALLOCATE(rcv_yp(0:num_procs_ky-1),dsp_yp(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
     ALLOCATE(rcv_yp(num_procs_ky),dsp_yp(num_procs_ky)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(nky_loc*np_tot,1,MPI_INTEGER,rcv_yp,1,MPI_INTEGER,comm_ky,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_yp(1)=0
-    DO i_=2,num_procs_ky-1
+    DO i_=2,num_procs_ky
        dsp_yp(i_) =dsp_yp(i_-1) + rcv_yp(i_-1)
     END DO
     !! Z reduction for full slices of py data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ! ALLOCATE(rcv_zyp(0:num_procs_z-1),dsp_zyp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
     ALLOCATE(rcv_zyp(num_procs_z),dsp_zyp(num_procs_z)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*np_tot*nky_tot,1,MPI_INTEGER,rcv_zyp,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each np_loc as
     dsp_zyp(1)=0
-    DO i_=2,num_procs_z-1
+    DO i_=2,num_procs_z
        dsp_zyp(i_) =dsp_zyp(i_-1) + rcv_zyp(i_-1)
     END DO
   END SUBROUTINE init_parallel_var
@@ -200,7 +245,7 @@ CONTAINS
     CALL attach(fid, TRIM(str),        "Np_z", num_procs_z)
   END SUBROUTINE parallel_ouptutinputs
 
-  !!!!! Gather a field in spatial coordinates on rank 0 !!!!!
+  !!!! Gather a field in spatial coordinates on rank 0 !!!!!
   SUBROUTINE gather_xyz(field_loc,field_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot)
     IMPLICIT NONE
     INTEGER, INTENT(IN) :: nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot
@@ -208,13 +253,12 @@ CONTAINS
     COMPLEX(dp), DIMENSION(nky_tot,nkx_tot,nz_tot), INTENT(OUT) :: field_tot
     COMPLEX(dp), DIMENSION(nky_tot,nz_loc) :: buffer_yt_zl !full  y, local z
     COMPLEX(dp), DIMENSION(nky_tot,nz_tot) :: buffer_yt_zt !full  y, full  z
-    COMPLEX(dp), DIMENSION(nz_loc) :: buffer_yl_zc !local y, constant z
+    COMPLEX(dp), DIMENSION(nky_loc):: buffer_yl_zc !local y, constant z
     COMPLEX(dp), DIMENSION(nky_tot):: buffer_yt_zc !full  y, constant z
     INTEGER :: snd_y, snd_z, root_p, root_z, root_ky, ix, iz
 
     snd_y  = nky_loc    ! Number of points to send along y (per z)
     snd_z  = nky_tot*nz_loc ! Number of points to send along z (full y)
-
     root_p = 0; root_z = 0; root_ky = 0
     IF(rank_p .EQ. root_p) THEN
       DO ix = 1,nkx_tot
@@ -226,14 +270,13 @@ CONTAINS
                            root_ky, comm_ky, ierr)
           buffer_yt_zl(1:nky_tot,iz) = buffer_yt_zc(1:nky_tot)
         ENDDO
-
-        ! send the full line on y contained by root_kyas
-        IF(rank_ky .EQ. 0) THEN
+        ! send the full line on y contained by root_ky
+        IF(rank_ky .EQ. root_ky) THEN
           CALL MPI_GATHERV(buffer_yt_zl, snd_z,          MPI_DOUBLE_COMPLEX, &
                            buffer_yt_zt, rcv_zy, dsp_zy, MPI_DOUBLE_COMPLEX, &
                            root_z, comm_z, ierr)
         ENDIF
-        ! ID 0 (the one who ouptut) rebuild the whole array
+        ! ID 0 (the one who output) rebuild the whole array
         IF(my_id .EQ. 0) &
           field_tot(1:nky_tot,ix,1:nz_tot) = buffer_yt_zt(1:nky_tot,1:nz_tot)
       ENDDO
@@ -266,14 +309,13 @@ CONTAINS
                            root_p, comm_p, ierr)
           buffer_pt_zl(1:np_tot,iz) = buffer_pt_zc(1:np_tot)
         ENDDO
-
-        ! send the full line on y contained by root_kyas
-        IF(rank_p .EQ. 0) THEN
+        ! send the full line on y contained by root_p
+        IF(rank_p .EQ. root_p) THEN
           CALL MPI_GATHERV(buffer_pt_zl, snd_z,          MPI_DOUBLE_COMPLEX, &
                            buffer_pt_zt, rcv_zp, dsp_zp, MPI_DOUBLE_COMPLEX, &
                            root_z, comm_z, ierr)
         ENDIF
-        ! ID 0 (the one who ouptut) rebuild the whole array
+        ! ID 0 (the one who output) rebuild the whole array
         IF(my_id .EQ. 0) &
           field_tot(1:np_tot,ij,1:nz_tot) = buffer_pt_zt(1:np_tot,1:nz_tot)
       ENDDO
@@ -285,49 +327,51 @@ CONTAINS
   SUBROUTINE gather_pjxyz(field_loc,field_tot,na_tot,np_loc,np_tot,nj_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot)
     IMPLICIT NONE
     INTEGER, INTENT(IN) :: na_tot,np_loc,np_tot,nj_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot
-    COMPLEX(dp), DIMENSION(na_tot,np_loc,nj_tot,nky_loc,nkx_tot,nz_loc), INTENT(IN)  :: field_loc
+    COMPLEX(dp), DIMENSION(np_loc,nj_tot,nky_loc,nkx_tot,nz_loc), INTENT(IN)  :: field_loc
     COMPLEX(dp), DIMENSION(na_tot,np_tot,nj_tot,nky_tot,nkx_tot,nz_tot), INTENT(OUT) :: field_tot
-    COMPLEX(dp), DIMENSION(na_tot,np_tot,nky_tot,nz_loc) :: buffer_pt_yt_zl ! full p,     full y, local    z
-    COMPLEX(dp), DIMENSION(na_tot,np_tot,nky_tot,nz_tot) :: buffer_pt_yt_zt ! full p,     full y, full     z
-    COMPLEX(dp), DIMENSION(na_tot,np_tot,nky_loc) :: buffer_pt_yl_zc     ! full p,    local y, constant z
-    COMPLEX(dp), DIMENSION(na_tot,np_tot,nky_tot) :: buffer_pt_yt_zc     ! full p,     full y, constant z
-    COMPLEX(dp), DIMENSION(na_tot,np_loc)       :: buffer_pl_cy_zc     !local p, constant y, constant z
-    COMPLEX(dp), DIMENSION(na_tot,np_tot)       :: buffer_pt_cy_zc     ! full p, constant y, constant z
-    INTEGER :: snd_p, snd_y, snd_z, root_p, root_z, root_ky, iy, ix, iz, ij
-    snd_p  = na_tot*np_loc                ! Number of points to send along p (per z,y)
-    snd_y  = na_tot*np_tot*nky_loc        ! Number of points to send along y (per z, full p)
-    snd_z  = na_tot*np_tot*nky_tot*nz_loc ! Number of points to send along z (full y, full p)
+    COMPLEX(dp), DIMENSION(np_tot,nky_tot,nz_loc) :: buffer_pt_yt_zl ! full p,     full y, local    z
+    COMPLEX(dp), DIMENSION(np_tot,nky_tot,nz_tot) :: buffer_pt_yt_zt ! full p,     full y, full     z
+    COMPLEX(dp), DIMENSION(np_tot,nky_loc) :: buffer_pt_yl_zc     ! full p,    local y, constant z
+    COMPLEX(dp), DIMENSION(np_tot,nky_tot) :: buffer_pt_yt_zc     ! full p,     full y, constant z
+    COMPLEX(dp), DIMENSION(np_loc)       :: buffer_pl_cy_zc     !local p, constant y, constant z
+    COMPLEX(dp), DIMENSION(np_tot)       :: buffer_pt_cy_zc     ! full p, constant y, constant z
+    INTEGER :: snd_p, snd_y, snd_z, root_p, root_z, root_ky, iy, ix, iz, ij, ia
+    snd_p  = np_loc                ! Number of points to send along p (per z,y)
+    snd_y  = np_tot*nky_loc        ! Number of points to send along y (per z, full p)
+    snd_z  = np_tot*nky_tot*nz_loc ! Number of points to send along z (full y, full p)
     root_p = 0; root_z = 0; root_ky = 0
-    j: DO ij = 1,nj_tot
-      x: DO ix = 1,nkx_tot
-        z: DO iz = 1,nz_loc
-          y: DO iy = 1,nky_loc
-            ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
-            buffer_pl_cy_zc(1:na_tot,1:np_loc) = field_loc(1:na_tot,1:np_loc,ij,iy,ix,iz)
-            CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p,        MPI_DOUBLE_COMPLEX, &
-                             buffer_pt_cy_zc, na_tot*rcv_p, na_tot*dsp_p, MPI_DOUBLE_COMPLEX, &
-                             root_p, comm_p, ierr)
-            buffer_pt_yl_zc(1:na_tot,1:np_tot,iy) = buffer_pt_cy_zc(1:na_tot,1:np_tot)
-          ENDDO y
-          ! send the full line on p contained by root_p
-          IF(rank_p .EQ. 0) THEN
-            CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y,          MPI_DOUBLE_COMPLEX, &
-                             buffer_pt_yt_zc, na_tot*rcv_yp, na_tot*dsp_yp, MPI_DOUBLE_COMPLEX, &
-                             root_ky, comm_ky, ierr)
-            buffer_pt_yt_zl(1:na_tot,1:np_tot,1:nky_tot,iz) = buffer_pt_yt_zc(1:na_tot,1:np_tot,1:nky_tot)
+    a: DO ia= 1,na_tot
+      j: DO ij = 1,nj_tot
+        x: DO ix = 1,nkx_tot
+          z: DO iz = 1,nz_loc
+            y: DO iy = 1,nky_loc
+              ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
+              buffer_pl_cy_zc(1:np_loc) = field_loc(1:np_loc,ij,iy,ix,iz)
+              CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p,        MPI_DOUBLE_COMPLEX, &
+                              buffer_pt_cy_zc, rcv_p, dsp_p, MPI_DOUBLE_COMPLEX, &
+                              root_p, comm_p, ierr)
+              buffer_pt_yl_zc(1:np_tot,iy) = buffer_pt_cy_zc(1:np_tot)
+            ENDDO y
+            ! send the full line on p contained by root_p
+            IF(rank_p .EQ. 0) THEN
+              CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y,          MPI_DOUBLE_COMPLEX, &
+                              buffer_pt_yt_zc, rcv_yp, dsp_yp, MPI_DOUBLE_COMPLEX, &
+                              root_ky, comm_ky, ierr)
+              buffer_pt_yt_zl(1:np_tot,1:nky_tot,iz) = buffer_pt_yt_zc(1:np_tot,1:nky_tot)
+            ENDIF
+          ENDDO z
+          ! send the full line on y contained by root_kyas
+          IF(rank_ky .EQ. 0) THEN
+            CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z,            MPI_DOUBLE_COMPLEX, &
+                            buffer_pt_yt_zt, rcv_zyp, dsp_zyp, MPI_DOUBLE_COMPLEX, &
+                            root_z, comm_z, ierr)
           ENDIF
-        ENDDO z
-        ! send the full line on y contained by root_kyas
-        IF(rank_ky .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z,            MPI_DOUBLE_COMPLEX, &
-                           buffer_pt_yt_zt, na_tot*rcv_zyp, na_tot*dsp_zyp, MPI_DOUBLE_COMPLEX, &
-                           root_z, comm_z, ierr)
-        ENDIF
-        ! ID 0 (the one who ouptut) rebuild the whole array
-        IF(my_id .EQ. 0) &
-          field_tot(1:na_tot,1:np_tot,ij,1:nky_tot,ix,1:nz_tot) = buffer_pt_yt_zt(1:na_tot,1:np_tot,1:nky_tot,1:nz_tot)
-      ENDDO x
-    ENDDO j
+          ! ID 0 (the one who ouptut) rebuild the whole array
+          IF(my_id .EQ. 0) &
+            field_tot(ia,1:np_tot,ij,1:nky_tot,ix,1:nz_tot) = buffer_pt_yt_zt(1:np_tot,1:nky_tot,1:nz_tot)
+        ENDDO x
+      ENDDO j
+    ENDDO a
   END SUBROUTINE gather_pjxyz
 
   !!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!!
@@ -403,5 +447,26 @@ CONTAINS
     ENDIF
   END SUBROUTINE manual_0D_bcast
 
+  ! Routine that exchange ghosts on one dimension
+  SUBROUTINE exchange_ghosts_1D(f,nbr_L,nbr_R,np,ng)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: np,ng, nbr_L, nbr_R
+    COMPLEX(dp), DIMENSION(np+ng), INTENT(INOUT) :: f
+    INTEGER :: ierr, first, last, ig
+    first = 1 + ng/2
+    last  = np + ng/2
+    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+    DO ig = 1,ng/2
+      CALL mpi_sendrecv(f(last-(ig-1)), 1, MPI_DOUBLE_COMPLEX, nbr_R, 14+ig, &
+                           f(first-ig), 1, MPI_DOUBLE_COMPLEX, nbr_L, 14+ig, &
+                        comm0, MPI_STATUS_IGNORE, ierr)
+    ENDDO
+    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
+    DO ig = 1,ng/2
+    CALL mpi_sendrecv(f(first+(ig-1)), 1, MPI_DOUBLE_COMPLEX, nbr_L, 16+ig, &
+                           f(last+ig), 1, MPI_DOUBLE_COMPLEX, nbr_R, 16+ig, &
+                      comm0, MPI_STATUS_IGNORE, ierr)
+    ENDDO
+  END SUBROUTINE exchange_ghosts_1D
 
 END MODULE parallel
diff --git a/src/processing_mod.F90 b/src/processing_mod.F90
index 43fffe517b3f3e49dda2e6dbb2a9f0015a65423b..5cc934350aed6dbc2bee9adfb18b5c5e48e731e9 100644
--- a/src/processing_mod.F90
+++ b/src/processing_mod.F90
@@ -47,7 +47,7 @@ CONTAINS
       IMPLICIT NONE
       COMPLEX(dp) :: pflux_local, gflux_local, integral
       REAL(dp)    :: buffer(2)
-      INTEGER     :: i_, root, iky, ikx, ia, iz, in, izi
+      INTEGER     :: i_, root, iky, ikx, ia, iz, in, izi, ini
       COMPLEX(dp), DIMENSION(local_nz) :: integrant
       DO ia = 1,local_na
          pflux_local = 0._dp ! particle flux
@@ -61,8 +61,8 @@ CONTAINS
                DO ikx = 1,local_nkx
                   DO iky = 1,local_nky
                      integrant(iz) = integrant(iz) &
-                        +Jacobian(izi,ieven)*moments(ia,ip0,ij0,iky,ikx,izi,updatetlevel)&
-                        *imagu*kyarray(iky)*CONJG(phi(iky,ikx,izi))
+                     +Jacobian(izi,ieven)*moments(ia,ip0,ij0,iky,ikx,izi,updatetlevel)&
+                     *imagu*kyarray(iky)*CONJG(phi(iky,ikx,izi))
                   ENDDO
                ENDDO
             ENDDO
@@ -94,10 +94,11 @@ CONTAINS
               izi = iz + ngz/2 !interior index for ghosted arrays
                DO ikx = 1,local_nkx
                   DO iky = 1,local_nky
-                     DO in = 1+ngj/2, local_nj+ngj/2 ! only interior points
+                     DO in = 1, local_nj
+                        ini = in + ngj/2 !interior index for ghosted arrays
                         integrant(iz) = integrant(iz)+ &
-                           Jacobian(izi,ieven)*moments(ia,ip0,in,iky,ikx,izi,updatetlevel)&
-                           *imagu*kyarray(iky)*kernel(ia,in,iky,ikx,izi,ieven)*CONJG(phi(iky,ikx,izi))
+                           Jacobian(izi,ieven)*moments(ia,ip0,ini,iky,ikx,izi,updatetlevel)&
+                           *imagu*kyarray(iky)*kernel(ia,ini,iky,ikx,izi,ieven)*CONJG(phi(iky,ikx,izi))
                      ENDDO
                   ENDDO
                ENDDO
@@ -109,19 +110,24 @@ CONTAINS
               izi = iz + ngz/2 !interior index for ghosted arrays
               DO ikx = 1,local_nkx
                   DO iky = 1,local_nky
-                     DO in = 1+ngj/2, local_nj+ngj/2 ! only interior points
-                        integrant(iz) = integrant(iz) - &
-                           Jacobian(izi,iodd)*sqrt_tau_o_sigma(ia)*moments(ia,ip1,in,iky,ikx,izi,updatetlevel)&
-                           *imagu*kyarray(iky)*kernel(ia,in,iky,ikx,izi,iodd)*CONJG(psi(iky,ikx,izi))
+                     DO in = 1, local_nj
+                        ini = in + ngj/2 !interior index for ghosted arrays
+                           integrant(iz) = integrant(iz) - &
+                           Jacobian(izi,iodd)*sqrt_tau_o_sigma(ia)*moments(ia,ip1,ini,iky,ikx,izi,updatetlevel)&
+                           *imagu*kyarray(iky)*kernel(ia,ini,iky,ikx,izi,iodd)*CONJG(psi(iky,ikx,izi))
                      ENDDO
                   ENDDO
                ENDDO
             ENDDO
          ENDIF
          ! Integrate over z
+         ! print*, integrant
          call simpson_rule_z(local_nz,deltaz,integrant,integral)
+         ! print*, integral
+         ! stop
          ! Get process local particle flux with a factor two to account for the negative ky modes
          pflux_local = 2._dp*integral*iInt_Jacobian
+         print*,REAL(pflux_local,dp)
 
          !!!!---------- Sum over all processes ----------
          buffer(1) = REAL(gflux_local,dp)
@@ -148,7 +154,6 @@ CONTAINS
             pflux_x(ia) = pflux_local
          ENDIF
       ENDDO
-      ! if(my_id .eq. 0) write(*,*) 'pflux_ri = ',pflux_ri
    END SUBROUTINE compute_radial_transport
 
 ! 1D diagnostic to compute the average radial particle transport <T_i v_ExB_x>_xyz
@@ -156,7 +161,7 @@ CONTAINS
       IMPLICIT NONE
       COMPLEX(dp) :: hflux_local, integral
       REAL(dp)    :: buffer(2), n_dp
-      INTEGER     :: i_, root, in, ia, iky, ikx, iz, izi
+      INTEGER     :: i_, root, in, ia, iky, ikx, iz, izi, ini
       COMPLEX(dp), DIMENSION(local_nz) :: integrant        ! charge density q_a n_a
       DO ia = 1,local_na
          hflux_local = 0._dp ! heat flux
@@ -168,15 +173,16 @@ CONTAINS
               izi = iz + ngz/2 !interior index for ghosted arrays
               DO ikx = 1,local_nkx
                   DO iky = 1,local_nky
-                     DO in = 1+ngj/2, local_nj+ngj/2 ! only interior points
-                        n_dp = jarray(in)
+                     DO in = 1, local_nj
+                        ini  = in+ngj/2 !interior index for ghosted arrays
+                        n_dp = jarray(ini)
                         integrant(iz) = integrant(iz) &
-                           +Jacobian(izi,ieven)*tau(ia)*imagu*kyarray(iky)*CONJG(phi(iky,ikx,izi))&
-                           *kernel(ia,in,iky,ikx,izi,ieven)*(&
-                                     0.5_dp*SQRT2*moments(ia,ip2,in  ,iky,ikx,izi,updatetlevel)&
-                           +(2._dp*n_dp + 1.5_dp)*moments(ia,ip0,in  ,iky,ikx,izi,updatetlevel)&
-                                    -(n_dp+1._dp)*moments(ia,ip0,in+1,iky,ikx,izi,updatetlevel)&
-                                            -n_dp*moments(ia,ip0,in-1,iky,ikx,izi,updatetlevel))
+                           -Jacobian(izi,ieven)*tau(ia)*imagu*kyarray(iky)*phi(iky,ikx,izi)&
+                           *kernel(ia,ini,iky,ikx,izi,ieven)*CONJG(&
+                                     0.5_dp*SQRT2*moments(ia,ip2,ini  ,iky,ikx,izi,updatetlevel)&
+                           +(2._dp*n_dp + 1.5_dp)*moments(ia,ip0,ini  ,iky,ikx,izi,updatetlevel)&
+                                    -(n_dp+1._dp)*moments(ia,ip0,ini+1,iky,ikx,izi,updatetlevel)&
+                                            -n_dp*moments(ia,ip0,ini-1,iky,ikx,izi,updatetlevel))
                      ENDDO
                   ENDDO
                ENDDO
@@ -188,16 +194,17 @@ CONTAINS
               izi = iz + ngz/2 !interior index for ghosted arrays
                DO ikx = 1,local_nkx
                   DO iky = 1,local_nky
-                     DO in = 1+ngj/2, local_nj+ngj/2 ! only interior points
+                     DO in = 1, local_nj
+                        ini = in + ngj/2 !interior index for ghosted arrays
                         n_dp = jarray(in)
                         integrant(iz) = integrant(iz) &
                            +Jacobian(izi,iodd)*tau(ia)*sqrt_tau_o_sigma(ia)*imagu*kyarray(iky)*CONJG(psi(iky,ikx,izi))&
-                           *kernel(ia,in,iky,ikx,izi,iodd)*(&
-                           0.5_dp*SQRT2*SQRT3*moments(ia,ip3,in  ,iky,ikx,izi,updatetlevel)&
-                                      +1.5_dp*moments(ia,ip1,in  ,iky,ikx,izi,updatetlevel)&
-                          +(2._dp*n_dp+1._dp)*moments(ia,ip1,in  ,iky,ikx,izi,updatetlevel)&
-                                -(n_dp+1._dp)*moments(ia,ip1,in+1,iky,ikx,izi,updatetlevel)&
-                                        -n_dp*moments(ia,ip1,in-1,iky,ikx,izi,updatetlevel))
+                           *kernel(ia,ini,iky,ikx,izi,iodd)*(&
+                           0.5_dp*SQRT2*SQRT3*moments(ia,ip3,ini  ,iky,ikx,izi,updatetlevel)&
+                                      +1.5_dp*moments(ia,ip1,ini  ,iky,ikx,izi,updatetlevel)&
+                          +(2._dp*n_dp+1._dp)*moments(ia,ip1,ini  ,iky,ikx,izi,updatetlevel)&
+                                -(n_dp+1._dp)*moments(ia,ip1,ini+1,iky,ikx,izi,updatetlevel)&
+                                        -n_dp*moments(ia,ip1,ini-1,iky,ikx,izi,updatetlevel))
                      ENDDO
                   ENDDO
                ENDDO
@@ -252,8 +259,8 @@ CONTAINS
                      DO ia = 1,local_na
                         IF(parray(ip) .EQ. 0) THEN
                            nadiab_moments(ia,ip,ij,iky,ikx,iz) = moments(ia,ip,ij,iky,ikx,iz,updatetlevel) &
-                              + q_tau(ia)*kernel(ia,ij,iky,ikx,iz,ieven)*phi(iky,ikx,iz)
-                        ELSEIF( (parray(ip) .EQ. 1) .AND. (beta .GT. 0) ) THEN
+                           + q_tau(ia)*kernel(ia,ij,iky,ikx,iz,ieven)*phi(iky,ikx,iz)
+                           ELSEIF( (parray(ip) .EQ. 1) .AND. (beta .GT. 0) ) THEN
                            nadiab_moments(ia,ip,ij,iky,ikx,iz) = moments(ia,ip,ij,iky,ikx,iz,updatetlevel) &
                               - q_o_sqrt_tau_sigma(ia)*kernel(ia,ij,iky,ikx,iz,ieven)*psi(iky,ikx,iz)
                         ELSE
@@ -265,7 +272,6 @@ CONTAINS
             ENDDO
          ENDDO
       ENDDO
-
       !! Ensure to kill the moments too high if closue option is set to 1
       IF(CLOS .EQ. 1) THEN
          DO ij=1,local_nj+ngj
@@ -279,7 +285,6 @@ CONTAINS
             ENDDO
          ENDDO
       ENDIF
-
       !------------- INTERP AND GRADIENTS ALONG Z ----------------------------------
       DO ikx = 1,local_nkx
          DO iky = 1,local_nky
diff --git a/src/solve_EM_fields.F90 b/src/solve_EM_fields.F90
index 01717c89314d6ab5d6bf267f227a88a4895ad93d..8f2120a3b0f2518c3c489230e6142e74c20e22d1 100644
--- a/src/solve_EM_fields.F90
+++ b/src/solve_EM_fields.F90
@@ -28,32 +28,35 @@ CONTAINS
     USE geometry,         ONLY: iInt_Jacobian, Jacobian
     IMPLICIT NONE
 
-    INTEGER     :: in, ia, ikx, iky, iz
-    COMPLEX(dp) :: fsa_phi, intf_   ! current flux averaged phi
+    INTEGER     :: in, ia, ikx, iky, iz, izi, ini
+    COMPLEX(dp) :: fsa_phi, intf_, rhtot   ! current flux averaged phi
     COMPLEX(dp), DIMENSION(local_nz) :: rho, integrant  ! charge density q_a n_a and aux var
-
     ! Execution time start
     CALL cpu_time(t0_poisson)
+    rhtot = 0
     !! Poisson can be solved only for process containng p=0
     IF ( SOLVE_POISSON ) THEN
         x:DO ikx = 1,local_nkx
           y:DO iky = 1,local_nky
             !!!!!!!!!!!!!!! Compute particle charge density q_a n_a for each evolved species
             DO iz = 1,local_nz
+              izi = iz+ngz/2
               rho(iz) = 0._dp
               DO in = 1,total_nj
+                ini = in+ngj/2
                 DO ia = 1,local_na
-                  rho(iz) = rho(iz) + q(ia)*kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,ieven)*moments(ia,ip0,in+ngj/2,iky,ikx,iz+ngz/2,updatetlevel)
+                  rho(iz) = rho(iz) + q(ia)*kernel(ia,ini,iky,ikx,izi,ieven)&
+                              *moments(ia,ip0,ini,iky,ikx,izi,updatetlevel)
                 END DO
               END DO
             END DO
             !!!!!!!!!!!!!!! adiabatic electron contribution if asked
-            IF (ADIAB_E) THEN
             ! Adiabatic charge density (linked to flux surface averaged phi)
             ! We compute the flux surface average solving a flux surface averaged
             ! Poisson equation, i.e.
             ! [qi^2(1-sum_j K_j^2)/tau_i] <phi>_psi = <q_i n_i >_psi
             !       inv_pol_ion^-1         fsa_phi  = simpson(Jacobian rho_i ) * iInt_Jacobian
+            IF (ADIAB_E) THEN
               fsa_phi = 0._dp
               IF(kyarray(iky).EQ.0._dp) THEN ! take ky=0 mode (y-average)
                 ! Prepare integrant for z-average
@@ -70,6 +73,7 @@ CONTAINS
             DO iz = 1,local_nz
               phi(iky,ikx,iz+ngz/2) = inv_poisson_op(iky,ikx,iz)*rho(iz)
             ENDDO
+            rhtot = rhtot + sum(real(rho))
           ENDDO y
         ENDDO x
       ! Cancel origin singularity
@@ -80,6 +84,15 @@ CONTAINS
     ! Execution time end
     CALL cpu_time(t1_poisson)
     tc_poisson = tc_poisson + (t1_poisson - t0_poisson)
+    ! print*, SUM(ABS(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(REAL(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(IMAG(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(REAL(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+    ! print*, SUM(REAL(phi(:,:,:)))
+    ! print*, SUM(IMAG(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+    ! print*, SUM(inv_poisson_op(:,:,:))
+    ! print*, rhtot
+    ! stop
   END SUBROUTINE poisson
 
   SUBROUTINE ampere
diff --git a/src/stepon.F90 b/src/stepon.F90
index b193d207053493197bd2f58c443c655a8c9e7a7d..33f0c0daa9dc07b7a4b49193e189b252af42160f 100644
--- a/src/stepon.F90
+++ b/src/stepon.F90
@@ -41,6 +41,7 @@ SUBROUTINE stepon
 
       CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
    !----- AFTER: All fields are updated for step = n+1
+
    END DO
 
    CONTAINS
diff --git a/testcases/linear_entropy_mode/fort.90 b/testcases/linear_entropy_mode/fort.90
new file mode 100644
index 0000000000000000000000000000000000000000..173b1626d453d98a325c32a174280e79d1bbe16f
--- /dev/null
+++ b/testcases/linear_entropy_mode/fort.90
@@ -0,0 +1,100 @@
+&BASIC
+  nrun       = 1
+  dt         = 0.01
+  tmax       = 5
+  maxruntime = 356400
+  job2load   = -1
+/
+&GRID
+  pmax   = 4
+  jmax   = 2
+  Nx     = 2
+  Lx     = 200
+  Ny     = 2
+  Ly     = 60
+  Nz     = 1
+  SG     = .f.
+  Nexc   = 1
+/
+&GEOMETRY
+  geom   = 'zpinch'
+  q0     = 1.4
+  shear  = 0.0
+  eps    = 0.18
+  kappa  = 1.0
+  s_kappa= 0.0
+  delta  = 0.0
+  s_delta= 0.0
+  zeta   = 0.0
+  s_zeta = 0.0
+  parallel_bc = 'dirichlet'
+  shift_y= 0.0
+/
+&OUTPUT_PAR
+  dtsave_0d = 0.01
+  dtsave_1d = -1
+  dtsave_2d = -1
+  dtsave_3d = 0.01
+  dtsave_5d = 0.01
+  write_doubleprecision = .f.
+  write_gamma = .t.
+  write_hf    = .t.
+  write_phi   = .t.
+  write_Na00  = .t.
+  write_Napj  = .t.
+  write_dens  = .t.
+  write_fvel  = .t.
+  write_temp  = .t.
+/
+&MODEL_PAR
+  ! Collisionality
+  CLOS    = 0
+  NL_CLOS = -1
+  LINEARITY = 'linear'
+  Na      = 2 ! number of species
+  mu_x    = 0
+  mu_y    = 0
+  N_HD    = 4
+  mu_z    = 0
+  mu_p    = 0
+  mu_j    = 0
+  nu      = 0
+  beta    = 0
+  ADIAB_E = .f.
+  tau_e   = 1.0
+/
+&SPECIES
+ ! ions
+ name_ = 'ions'
+ tau_  = 1.0
+ sigma_= 1.0
+ q_    = 1.0
+ k_N_  = 1.0!2.22
+ k_T_  = 0.0!6.96
+/
+&SPECIES
+ ! electrons
+ name_ = 'electrons'
+ tau_  = 1.0
+ sigma_= 0.023338
+ q_    = -1.0
+ k_N_  = 0.0!2.22
+ k_T_  = 0.0!6.96
+/
+
+&COLLISION_PAR
+  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  GK_CO           = .f.
+  INTERSPECIES    = .true.
+  !mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+/
+&INITIAL_CON
+  INIT_OPT         = 'mom00'
+  ACT_ON_MODES     = 'donothing'
+  init_background  = 1.0
+  init_noiselvl    = 0.0
+  iseed            = 42
+/
+&TIME_INTEGRATION_PAR
+  numerical_scheme = 'RK4'
+/
diff --git a/testcases/linear_entropy_mode/fort_00.90 b/testcases/linear_entropy_mode/fort_00.90
new file mode 100644
index 0000000000000000000000000000000000000000..158f88d3b91191a77bd08b55e0520aea84f2de62
--- /dev/null
+++ b/testcases/linear_entropy_mode/fort_00.90
@@ -0,0 +1,82 @@
+&BASIC
+  nrun   = 1
+  dt     = 0.01
+  tmax   = 5
+  maxruntime = 356400
+/
+&GRID
+  pmaxe  = 4
+  jmaxe  = 2
+  pmaxi  = 4
+  jmaxi  = 2
+  Nx     = 2
+  Lx     = 200
+  Ny     = 2
+  Ly     = 60
+  Nz     = 1
+  SG     = .f.
+/
+&GEOMETRY
+  geom   = 'zpinch'
+  q0     = 1.4
+  shear  = 0.0
+  eps    = 0.18
+  parallel_bc = 'dirichlet'
+/
+&OUTPUT_PAR
+  nsave_0d = 1
+  nsave_1d = -1
+  nsave_2d = -1
+  nsave_3d = 1
+  nsave_5d = 1
+  write_doubleprecision = .f.
+  write_gamma = .t.
+  write_hf    = .t.
+  write_phi   = .t.
+  write_Na00  = .t.
+  write_Napj  = .t.
+  write_dens  = .t.
+  write_temp  = .t.
+  job2load    = -1
+/
+&MODEL_PAR
+  ! Collisionality
+  CLOS    = 0
+  NL_CLOS = -1
+  LINEARITY = 'linear'
+  KIN_E   = .t.
+  mu_x    = 0.0
+  mu_y    = 0.0
+  N_HD    = 4
+  mu_z    = 0!0.1
+  mu_p    = 0
+  mu_j    = 0
+  nu      = 0!1.0
+  tau_e   = 1
+  tau_i   = 1
+  sigma_e = 0.023338
+  sigma_i = 1
+  q_e     = -1
+  q_i     = 1
+  K_Ne    = 0!6.96
+  K_Te    = 0!2.22
+  K_Ni    = 1!6.96
+  K_Ti    = 0!2.22
+  beta    = 0
+/
+&COLLISION_PAR
+  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  gyrokin_CO      = .f.
+  interspecies    = .true.
+  !mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+/
+&INITIAL_CON
+  INIT_OPT    = 'mom00'
+  ACT_ON_MODES       = 'donothing'
+  init_background  = 1.0
+  init_noiselvl = 0.0
+  iseed         = 42
+/
+&TIME_INTEGRATION_PAR
+  numerical_scheme = 'RK4'
+/
diff --git a/testcases/linear_entropy_mode/gyacomo_1_debug b/testcases/linear_entropy_mode/gyacomo_1_debug
new file mode 120000
index 0000000000000000000000000000000000000000..f0c8bbfd26fdc619d94a6e4d6f59dde7d8630636
--- /dev/null
+++ b/testcases/linear_entropy_mode/gyacomo_1_debug
@@ -0,0 +1 @@
+../../../gyacomo_1/bin/gyacomo_debug
\ No newline at end of file
diff --git a/testcases/linear_entropy_mode/gyacomo_debug b/testcases/linear_entropy_mode/gyacomo_debug
new file mode 120000
index 0000000000000000000000000000000000000000..363e139a389f2e5d2d2097c09bf5ab363772dbfc
--- /dev/null
+++ b/testcases/linear_entropy_mode/gyacomo_debug
@@ -0,0 +1 @@
+../../bin/gyacomo_debug
\ No newline at end of file
diff --git a/testcases/smallest_problem/fort.90 b/testcases/smallest_problem/fort.90
index c1c4f37e47e34d99ba5e677aac11db57fab46f50..9437edb8162bb303f50c00acf4eb2ef4f8459119 100644
--- a/testcases/smallest_problem/fort.90
+++ b/testcases/smallest_problem/fort.90
@@ -6,13 +6,13 @@
   job2load   = -1
 /
 &GRID
-  pmax   = 2
+  pmax   = 4
   jmax   = 1
-  Nx     = 2
+  Nx     = 4
   Lx     = 200
   Ny     = 2
   Ly     = 60
-  Nz     = 4
+  Nz     = 6
   SG     = .f.
   Nexc   = 1
 /
@@ -52,13 +52,14 @@
   NL_CLOS = -1
   LINEARITY = 'linear'
   Na      = 2 ! number of species
-  mu_x    = 0
-  mu_y    = 0
+  mu_x    = 0.2
+  mu_y    = 0.4
   N_HD    = 4
-  mu_z    = 0
-  mu_p    = 0
-  mu_j    = 0
-  nu      = 0
+  mu_z    = 0.6
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.1
+  mu_j    = 0.
+  nu      = 1.0
   beta    = 0
   ADIAB_E = .f.
   tau_e   = 1.0
@@ -69,27 +70,27 @@
  tau_  = 1.0
  sigma_= 1.0
  q_    = 1.0
- k_N_  = 1.0!2.22
- k_T_  = 0.0!6.96
+ k_N_  = 3.0!2.22
+ k_T_  = 4.0!6.96
 /
 &SPECIES
  ! electrons
  name_ = 'electrons'
  tau_  = 1.0
- sigma_= 0.023338
- q_    = -1.0
- k_N_  = 0.0!2.22
- k_T_  = 0.0!6.96
+ sigma_= 1!0.023338
+ q_    = 1!-1.0
+ k_N_  = 1.0!2.22
+ k_T_  = 2.0!6.96
 /
 
 &COLLISION_PAR
-  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
-  GK_CO           = .f.
+  collision_model = 'SG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  GK_CO           = .t.
   INTERSPECIES    = .true.
-  !mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+  mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
-  INIT_OPT         = 'mom00'
+  INIT_OPT         = 'allmom'
   ACT_ON_MODES     = 'donothing'
   init_background  = 1.0
   init_noiselvl    = 0.0
diff --git a/testcases/smallest_problem/fort_00.90 b/testcases/smallest_problem/fort_00.90
index d9fedf898e6ec7cca5eeaec3e7d3ef9513902185..1b515d515953d085d3ba50570066b8d215dccd06 100644
--- a/testcases/smallest_problem/fort_00.90
+++ b/testcases/smallest_problem/fort_00.90
@@ -5,15 +5,15 @@
   maxruntime = 356400
 /
 &GRID
-  pmaxe  = 2
+  pmaxe  = 4
   jmaxe  = 1
-  pmaxi  = 2
+  pmaxi  = 4
   jmaxi  = 1
-  Nx     = 2
+  Nx     = 4
   Lx     = 200
   Ny     = 2
   Ly     = 60
-  Nz     = 4
+  Nz     = 6
   SG     = .f.
 /
 &GEOMETRY
@@ -45,33 +45,34 @@
   NL_CLOS = -1
   LINEARITY = 'linear'
   KIN_E   = .t.
-  mu_x    = 0.0
-  mu_y    = 0.0
+  mu_x    = 0.2
+  mu_y    = 0.4
   N_HD    = 4
-  mu_z    = 0!0.1
-  mu_p    = 0
-  mu_j    = 0
-  nu      = 0!1.0
+  mu_z    = 0.6
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.1
+  mu_j    = 0.
+  nu      = 1.0
   tau_e   = 1
   tau_i   = 1
-  sigma_e = 0.023338
+  sigma_e = 1!0.023338
   sigma_i = 1
-  q_e     = -1
+  q_e     = 1!-1
   q_i     = 1
-  K_Ne    = 0!6.96
-  K_Te    = 0!2.22
-  K_Ni    = 1!6.96
-  K_Ti    = 0!2.22
+  K_Ne    = 1!6.96
+  K_Te    = 2!2.22
+  K_Ni    = 3!6.96
+  K_Ti    = 4!2.22
   beta    = 0
 /
 &COLLISION_PAR
-  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
-  gyrokin_CO      = .f.
+  collision_model = 'SG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  gyrokin_CO      = .t.
   interspecies    = .true.
-  !mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+  mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
-  INIT_OPT    = 'mom00'
+  INIT_OPT    = 'allmom'
   ACT_ON_MODES       = 'donothing'
   init_background  = 1.0
   init_noiselvl = 0.0
diff --git a/testcases/smallest_problem/gyacomo_1 b/testcases/smallest_problem/gyacomo_1
index d087f907f6e032370d37ee4a2d6c5c29a5350208..2fd67105ac22ff7e7d826fd5df5e4dc4ccc8d3ff 120000
--- a/testcases/smallest_problem/gyacomo_1
+++ b/testcases/smallest_problem/gyacomo_1
@@ -1 +1 @@
-../../bin/gyacomo_1
\ No newline at end of file
+../../../gyacomo_1/bin/gyacomo
\ No newline at end of file
diff --git a/wk/analysis_gyacomo.m b/wk/analysis_gyacomo.m
index 855f6ea9566e237b3e5f7fb2d050d3c6a91c9796..5ba7a9f112fc714e6cfe2f06cb72db3e9336ef06 100644
--- a/wk/analysis_gyacomo.m
+++ b/wk/analysis_gyacomo.m
@@ -79,7 +79,7 @@ options.NAME      = '\phi';
 % options.NAME      = 'Q_x';
 % options.NAME      = 'n_i';
 % options.NAME      = 'n_i-n_e';
-options.PLAN      = 'kxz';
+options.PLAN      = 'xz';
 % options.NAME      = 'f_i';
 % options.PLAN      = 'sx';
 options.COMP      = 'avg';
diff --git a/wk/header_3D_results.m b/wk/header_3D_results.m
index f0001360132f32b9cfb3c492df533902330e3209..0f8bc4ae7bd55db8bbc26f8a1e0a340f5b6fc920 100644
--- a/wk/header_3D_results.m
+++ b/wk/header_3D_results.m
@@ -50,6 +50,7 @@ PARTITION  = '/misc/gyacomo_outputs/';
 % resdir = 'paper_2_nonlinear/kT_5.3/5x3x128x64x24';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_sp';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_dp';
+% resdir = 'paper_2_nonlinear/kT_5.3/7x4x192x96x32_dp';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_MUxy_0';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_NL_-1';
@@ -62,10 +63,11 @@ PARTITION  = '/misc/gyacomo_outputs/';
 % resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x24';
 % resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x24_dp';
 % resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x64';
+resdir = 'paper_2_nonlinear/kT_5.3/21x11x128x64x24_dp';
 
-%% Old stuff
-% resdir = 'CBC/kT_4.5_128x64x16x13x7/';
-% resdir = 'CBC/kT_5.3_192x96x24x13x7/';
+%% nu scans kT=5.3
+% resdir = 'paper_2_nonlinear/nu_scan_kT_5.3/DGGK_7x4x128x64x24_dp';
+% resdir = 'paper_2_nonlinear/nu_scan_kT_5.3/SGGK_7x4x128x64x24_dp';
 
 %% Miller
 % resdir = 'paper_2_nonlinear/kT_4.15_miller/7x4x128x64x24';