From 88a2388be20ea249d7092fd58c783234c96e205c Mon Sep 17 00:00:00 2001
From: Antoine Hoffmann <antoine.hoffmann@epfl.ch>
Date: Mon, 13 Mar 2023 13:39:16 +0100
Subject: [PATCH] debug

---
 src/calculus_mod.F90                  | 64 +++++++++++++--------------
 src/diagnose.F90                      | 53 +++++++++++-----------
 src/grid_mod.F90                      |  7 +--
 src/moments_eq_rhs_mod.F90            | 22 ++++-----
 src/numerics_mod.F90                  |  6 +--
 src/parallel_mod.F90                  | 48 +++++++++++---------
 src/processing_mod.F90                |  2 -
 testcases/smallest_problem/fort.90    |  2 +-
 testcases/smallest_problem/fort_00.90 |  2 +-
 wk/header_3D_results.m                |  5 ++-
 10 files changed, 110 insertions(+), 101 deletions(-)

diff --git a/src/calculus_mod.F90 b/src/calculus_mod.F90
index 1ef419a8..ac649cf1 100644
--- a/src/calculus_mod.F90
+++ b/src/calculus_mod.F90
@@ -20,23 +20,23 @@ MODULE calculus
 
 CONTAINS
 
-SUBROUTINE grad_z(target,local_nz,Ngz,inv_deltaz,f,ddzf)
+SUBROUTINE grad_z(target,local_nz,ngz,inv_deltaz,f,ddzf)
   implicit none
   ! Compute the periodic boundary condition 4 points centered finite differences
   ! formula among staggered grid or not.
   ! not staggered : the derivative results must be on the same grid as the field
   !     staggered : the derivative is computed from a grid to the other
-  INTEGER,  INTENT(IN) :: target, local_nz, Ngz
+  INTEGER,  INTENT(IN) :: target, local_nz, ngz
   REAL(dp), INTENT(IN) :: inv_deltaz
-  COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: f
+  COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: f
   COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: ddzf
   INTEGER :: iz
-  IF(Ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
     SELECT CASE(TARGET)
     CASE(1)
-      CALL grad_z_o2e(local_nz,Ngz,inv_deltaz,f,ddzf)
+      CALL grad_z_o2e(local_nz,ngz,inv_deltaz,f,ddzf)
     CASE(2)
-      CALL grad_z_e2o(local_nz,Ngz,inv_deltaz,f,ddzf)
+      CALL grad_z_e2o(local_nz,ngz,inv_deltaz,f,ddzf)
     CASE DEFAULT ! No staggered grid -> usual centered finite differences
       DO iz = 1,local_nz
        ddzf(iz) = dz_usu(-2)*f(iz  ) + dz_usu(-1)*f(iz+1) &
@@ -49,12 +49,12 @@ SUBROUTINE grad_z(target,local_nz,Ngz,inv_deltaz,f,ddzf)
     ddzf = 0._dp
   ENDIF
 CONTAINS
-  SUBROUTINE grad_z_o2e(local_nz,Ngz,inv_deltaz,fo,ddzfe) ! Paruta 2018 eq (27)
+  SUBROUTINE grad_z_o2e(local_nz,ngz,inv_deltaz,fo,ddzfe) ! Paruta 2018 eq (27)
     ! gives the gradient of a field from the odd grid to the even one
     implicit none
-    INTEGER,  INTENT(IN) :: local_nz, Ngz
+    INTEGER,  INTENT(IN) :: local_nz, ngz
     REAL(dp), INTENT(IN) :: inv_deltaz
-    COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: fo
+    COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: fo
     COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: ddzfe !
     INTEGER :: iz
     DO iz = 1,local_nz
@@ -64,12 +64,12 @@ CONTAINS
     ddzfe(:) = ddzfe(:) * inv_deltaz
   END SUBROUTINE grad_z_o2e
 
-  SUBROUTINE grad_z_e2o(local_nz,Ngz,inv_deltaz,fe,ddzfo) ! n2v for Paruta 2018 eq (28)
+  SUBROUTINE grad_z_e2o(local_nz,ngz,inv_deltaz,fe,ddzfo) ! n2v for Paruta 2018 eq (28)
     ! gives the gradient of a field from the even grid to the odd one
     implicit none
-    INTEGER,  INTENT(IN) :: local_nz, Ngz
+    INTEGER,  INTENT(IN) :: local_nz, ngz
     REAL(dp), INTENT(IN) :: inv_deltaz
-    COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: fe
+    COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: fe
     COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: ddzfo
     INTEGER :: iz
     DO iz = 1,local_nz
@@ -80,15 +80,15 @@ CONTAINS
   END SUBROUTINE grad_z_e2o
 END SUBROUTINE grad_z
 
-SUBROUTINE grad_z2(local_nz,Ngz,inv_deltaz,f,ddz2f)
+SUBROUTINE grad_z2(local_nz,ngz,inv_deltaz,f,ddz2f)
   ! Compute the second order fourth derivative for periodic boundary condition
   implicit none
-  INTEGER, INTENT(IN)  :: local_nz, Ngz
+  INTEGER, INTENT(IN)  :: local_nz, ngz
   REAL(dp), INTENT(IN) :: inv_deltaz
-  COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: f
+  COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: f
   COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: ddz2f
   INTEGER :: iz
-  IF(Ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
       DO iz = 1,local_nz
        ddz2f(iz) = dz2_usu(-2)*f(iz  ) + dz2_usu(-1)*f(iz+1) &
                   +dz2_usu( 0)*f(iz+2)&
@@ -101,15 +101,15 @@ SUBROUTINE grad_z2(local_nz,Ngz,inv_deltaz,f,ddz2f)
 END SUBROUTINE grad_z2
 
 
-SUBROUTINE grad_z4(local_nz,Ngz,inv_deltaz,f,ddz4f)
+SUBROUTINE grad_z4(local_nz,ngz,inv_deltaz,f,ddz4f)
   ! Compute the second order fourth derivative for periodic boundary condition
   implicit none
-  INTEGER,  INTENT(IN) :: local_nz, Ngz
+  INTEGER,  INTENT(IN) :: local_nz, ngz
   REAL(dp), INTENT(IN) :: inv_deltaz
-  COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: f
+  COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: f
   COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: ddz4f
   INTEGER :: iz
-  IF(Ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
       DO iz = 1,local_nz
        ddz4f(iz) = dz4_usu(-2)*f(iz  ) + dz4_usu(-1)*f(iz+1) &
                   +dz4_usu( 0)*f(iz+2)&
@@ -122,29 +122,29 @@ SUBROUTINE grad_z4(local_nz,Ngz,inv_deltaz,f,ddz4f)
 END SUBROUTINE grad_z4
 
 
-SUBROUTINE interp_z(target,local_nz,Ngz,f_in,f_out)
+SUBROUTINE interp_z(target,local_nz,ngz,f_in,f_out)
   ! Function meant to interpolate one field defined on a even/odd z into
   !  the other odd/even z grid.
   ! If Staggered Grid flag (SG) is false, returns identity
   implicit none
-  INTEGER, INTENT(IN) :: local_nz, Ngz
+  INTEGER, INTENT(IN) :: local_nz, ngz
   INTEGER, intent(in) :: target ! target grid : 0 for even grid, 1 for odd
-  COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: f_in
+  COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: f_in
   COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: f_out
   SELECT CASE(TARGET)
   CASE(1) ! output on even grid
-    CALL interp_o2e_z(local_nz,Ngz,f_in,f_out)
+    CALL interp_o2e_z(local_nz,ngz,f_in,f_out)
   CASE(2) ! output on odd grid
-    CALL interp_e2o_z(local_nz,Ngz,f_in,f_out)
+    CALL interp_e2o_z(local_nz,ngz,f_in,f_out)
   CASE DEFAULT ! No staggered grid -> usual centered finite differences
-    f_out = f_in
+    f_out = f_in(1+ngz/2:local_nz+ngz/2)
   END SELECT
 CONTAINS
-  SUBROUTINE interp_o2e_z(local_nz, Ngz,fo,fe)
+  SUBROUTINE interp_o2e_z(local_nz, ngz,fo,fe)
    ! gives the value of a field from the odd grid to the even one
    implicit none
-   INTEGER, INTENT(IN) :: local_nz, Ngz
-   COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: fo
+   INTEGER, INTENT(IN) :: local_nz, ngz
+   COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: fo
    COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: fe
    INTEGER :: iz
    ! 4th order interp
@@ -154,11 +154,11 @@ CONTAINS
    ENDDO
   END SUBROUTINE interp_o2e_z
 
-  SUBROUTINE interp_e2o_z(local_nz, Ngz,fe,fo)
+  SUBROUTINE interp_e2o_z(local_nz, ngz,fe,fo)
    ! gives the value of a field from the even grid to the odd one
    implicit none
-   INTEGER, INTENT(IN) :: local_nz, Ngz
-   COMPLEX(dp),dimension(local_nz+Ngz), INTENT(IN)  :: fe
+   INTEGER, INTENT(IN) :: local_nz, ngz
+   COMPLEX(dp),dimension(local_nz+ngz), INTENT(IN)  :: fe
    COMPLEX(dp),dimension(local_nz),     INTENT(OUT) :: fo
    INTEGER :: iz
    ! 4th order interp
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index 00ec221c..47227143 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -114,7 +114,6 @@ SUBROUTINE diagnose_full(kstep)
   INTEGER :: dims(1) = (/0/)
   !____________________________________________________________________________
   !                   1.   Initial diagnostics
-
   IF ((kstep .EQ. 0)) THEN
     CALL init_outfile(comm0,   resfile0,resfile,fidres)
     ! Profiler time measurement
@@ -155,7 +154,7 @@ SUBROUTINE diagnose_full(kstep)
     CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
     CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
     CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(1:local_nky,1:local_nkx,(1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 3/))
-    CALL putarrnd(fidres, "/data/metric/kernel",    kernel(1,(1+ngj/2):(local_nj+ngj/2),1:local_nky,1:local_nkx,(1+ngz/2):(local_nz+ngz/2),1), (/1, 1, 2, 4/))
+    CALL putarrnd(fidres, "/data/metric/kernel",    kernel(1,(1+ngj/2):(local_nj+ngj/2),1:local_nky,1:local_nkx,(1+ngz/2):(local_nz+ngz/2),1), (/1, 2, 4/))
     !  var0d group (gyro transport)
     IF (nsave_0d .GT. 0) THEN
      CALL creatg(fidres, "/data/var0d", "0d profiles")
@@ -229,34 +228,34 @@ SUBROUTINE diagnose_full(kstep)
   !                   2.   Periodic diagnostics
   !
   IF (kstep .GE. 0) THEN
-     !                       2.1   0d history arrays
-     IF (nsave_0d .GT. 0) THEN
-        IF ( MOD(cstep, nsave_0d) == 0 ) THEN
-           CALL diagnose_0d
-        END IF
-     END IF
-     !                       2.3   3d profiles
-     IF (nsave_3d .GT. 0) THEN
-        IF (MOD(cstep, nsave_3d) == 0) THEN
+    !                       2.1   0d history arrays
+    IF (nsave_0d .GT. 0) THEN
+      IF ( MOD(cstep, nsave_0d) == 0 ) THEN
+        CALL diagnose_0d
+      END IF
+    END IF
+    !                       2.3   3d profiles
+    IF (nsave_3d .GT. 0) THEN
+      IF (MOD(cstep, nsave_3d) == 0) THEN
           CALL diagnose_3d
-        ! Looks at the folder if the file check_phi exists and spits a snapshot
+          ! Looks at the folder if the file check_phi exists and spits a snapshot
           ! of the current electrostatic potential in a basic text file
           CALL spit_snapshot_check
         ENDIF
-     ENDIF
-     !                       2.4   5d profiles
-     IF (nsave_5d .GT. 0) THEN
+      ENDIF
+      !                       2.4   5d profiles
+      IF (nsave_5d .GT. 0) THEN
         IF (MOD(cstep, nsave_5d) == 0) THEN
-           CALL diagnose_5d
+          CALL diagnose_5d
         END IF
-     END IF
-  !_____________________________________________________________________________
-  !                   3.   Final diagnostics
-  ELSEIF (kstep .EQ. -1) THEN
-     CALL attach(fidres, "/data/input","cpu_time",finish-start)
-
-     ! make a checkpoint at last timestep if not crashed
-     IF(.NOT. crashed) THEN
+      END IF
+      !_____________________________________________________________________________
+      !                   3.   Final diagnostics
+      ELSEIF (kstep .EQ. -1) THEN
+        CALL attach(fidres, "/data/input","cpu_time",finish-start)
+        
+        ! make a checkpoint at last timestep if not crashed
+        IF(.NOT. crashed) THEN
        IF(my_id .EQ. 0) write(*,*) 'Saving last state'
        IF (nsave_5d .GT. 0) &
        CALL diagnose_5d
@@ -476,9 +475,9 @@ SUBROUTINE diagnose_5d
     WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
     IF (num_procs .EQ. 1) THEN
       CALL putarr(fidres, dset_name, field_sub, ionode=0)
-    ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
-      CALL gather_pjxyz(field_sub,field_full,total_na,local_np,total_np,total_nj,local_nky,total_nky,total_nkx,local_nz,total_nz)
-      CALL putarr(fidres, dset_name, field_full, ionode=0)
+      ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
+        CALL gather_pjxyz(field_sub,field_full,total_na,local_np,total_np,total_nj,local_nky,total_nky,total_nkx,local_nz,total_nz)
+        CALL putarr(fidres, dset_name, field_full, ionode=0)
     ELSE
       CALL putarrnd(fidres, dset_name, field_sub,  (/1,3,5/))
     ENDIF
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index 9003f696..7cba3618 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -308,7 +308,8 @@ CONTAINS
     CHARACTER(len=*), INTENT(IN) ::LINEARITY
     INTEGER, INTENT(IN) :: N_HD
     INTEGER :: iky
-    Nky = Ny/2+1 ! Defined only on positive kx since fields are real
+    Nky       = Ny/2+1 ! Defined only on positive kx since fields are real
+    total_nky = Nky
     ! Grid spacings
     IF (Ny .EQ. 1) THEN
       deltaky = 2._dp*PI/Ly
@@ -341,7 +342,7 @@ CONTAINS
         kyarray_full(iky) = deltaky
         SINGLE_KY         = .TRUE.
       ELSE
-        kyarray(iky) = kyarray_full(iky-local_nky_offset)
+        kyarray(iky) = kyarray_full(iky+local_nky_offset)
       ENDIF
       ! Finding kx=0
       IF (kyarray(iky) .EQ. 0) THEN
@@ -431,7 +432,7 @@ CONTAINS
         ! Set local grid (not parallelized so same as full one)
         local_kxmax = 0._dp
         DO ikx = 1,local_nkx
-          kxarray(ikx) = kxarray_full(ikx-local_nkx_offset)
+          kxarray(ikx) = kxarray_full(ikx+local_nkx_offset)
           ! Finding kx=0
           IF (kxarray(ikx) .EQ. 0) THEN
             ikx0 = ikx
diff --git a/src/moments_eq_rhs_mod.F90 b/src/moments_eq_rhs_mod.F90
index 7a127f53..8f2fce01 100644
--- a/src/moments_eq_rhs_mod.F90
+++ b/src/moments_eq_rhs_mod.F90
@@ -86,7 +86,7 @@ SUBROUTINE compute_moments_eq_rhs
                 Fmir = dlnBdz(iz,eo)*(Tnapp1j + Tnapp1jm1 + Tnapm1j + Tnapm1jm1 +&
                                       Unapm1j + Unapm1jp1 + Unapm1jm1)
                 ! Parallel magnetic term (Landau damping and the mirror force)
-                Mpara = gradz_coeff(iz,eo)*(Ldamp) !+ Fmir)
+                Mpara = gradz_coeff(iz,eo)*(Ldamp + Fmir)
                 !! Electrical potential term
                 IF ( p_int .LE. 2 ) THEN ! kronecker p0 p1 p2
                   Dphi =i_ky*( xphij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
@@ -110,20 +110,20 @@ SUBROUTINE compute_moments_eq_rhs
                     ! Perpendicular magnetic term
                     - Mperp &
                     ! Parallel magnetic term
-                    - Mpara !&
+                    - Mpara &
                     ! ! Drives (density + temperature gradients)
-                    ! - (Dphi + Dpsi) &
+                    - (Dphi + Dpsi) &
                     ! ! Collision term
-                    ! + Capj(ia,ip,ij,iky,ikx,iz) &
+                    + Capj(ia,ip,ij,iky,ikx,iz) &
                     ! ! Perpendicular pressure effects (electromagnetic term) (TO CHECK)
-                    ! - i_ky*beta*dpdx(ia) * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
+                    - i_ky*beta*dpdx(ia) * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
                     ! ! Parallel drive term (should be negligible, to test)
-                    ! ! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
+                    !! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
                     ! ! Numerical perpendicular hyperdiffusion
-                    ! -mu_x*diff_kx_coeff*kx**N_HD*Napj &
-                    ! -mu_y*diff_ky_coeff*ky**N_HD*Napj &
+                    -mu_x*diff_kx_coeff*kx**N_HD*Napj &
+                    -mu_y*diff_ky_coeff*ky**N_HD*Napj &
                     ! ! Numerical parallel hyperdiffusion "mu_z*ddz**4"  see Pueschel 2010 (eq 25)
-                    ! -mu_z*diff_dz_coeff*ddzND_napj(ia,ipi,iji,iky,ikx,iz)
+                    -mu_z*diff_dz_coeff*ddzND_napj(ia,ipi,iji,iky,ikx,iz)
                 !! Velocity space dissipation (should be implemented somewhere else)
                 SELECT CASE(HYP_V)
                 CASE('hypcoll') ! GX like Hermite hypercollisions see Mandell et al. 2023 (eq 3.23), unadvised to use it
@@ -159,7 +159,9 @@ SUBROUTINE compute_moments_eq_rhs
   ! print*, moments(1,3,2,2,2,3,updatetlevel)
   ! print*, moments_rhs(1,3,2,2,2,3,updatetlevel)
   print*, SUM(REAL(moments_rhs(1,:,:,:,:,:,:)))
-  stop
+  print*, SUM(REAL(moments_rhs(2,:,:,:,:,:,:)))
+  print*, SUM(REAL(phi))
+  ! stop
 
 END SUBROUTINE compute_moments_eq_rhs
 
diff --git a/src/numerics_mod.F90 b/src/numerics_mod.F90
index 98e08e18..7c3c7476 100644
--- a/src/numerics_mod.F90
+++ b/src/numerics_mod.F90
@@ -178,8 +178,8 @@ END SUBROUTINE evaluate_poisson_op
 SUBROUTINE evaluate_ampere_op
   USE prec_const,   ONLY : dp
   USE array,    ONLY : kernel, inv_ampere_op
-  USE grid,     ONLY : local_na, local_nkx, local_nky, local_nz, &
-                       jmax, kparray, kxarray, kyarray, SOLVE_AMPERE
+  USE grid,     ONLY : local_na, local_nkx, local_nky, local_nz, ngz, &
+                       jmax, kparray, kxarray, kyarray, SOLVE_AMPERE, iodd
   USE model,    ONLY : beta
   USE species,  ONLY : q, sigma
   USE geometry, ONLY : hatB
@@ -204,7 +204,7 @@ SUBROUTINE evaluate_ampere_op
           pol_tot = pol_tot  + q(ia)**2/(sigma(ia)**2)*kernel(ia,in,iky,ikx,iz,1)**2 ! ... sum recursively ...
         END DO
       END DO
-      inv_ampere_op(iky, ikx, iz) =  1._dp/(2._dp*kperp2*hatB(iz,0)**2 + beta*pol_tot)
+      inv_ampere_op(iky, ikx, iz) =  1._dp/(2._dp*kperp2*hatB(iz+ngz/2,iodd)**2 + beta*pol_tot)
     ENDIF
     END DO
     END DO
diff --git a/src/parallel_mod.F90 b/src/parallel_mod.F90
index 2cf09efa..5fa74308 100644
--- a/src/parallel_mod.F90
+++ b/src/parallel_mod.F90
@@ -114,66 +114,72 @@ CONTAINS
     INTEGER, INTENT(IN) :: np_loc,np_tot,nky_loc,nky_tot,nz_loc
     INTEGER :: i_
     !! P reduction at constant x,y,z,j
-    ALLOCATE(rcv_p(0:num_procs_p-1),dsp_p(0:num_procs_p-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_p(0:num_procs_p-1),dsp_p(0:num_procs_p-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_p(num_procs_p),dsp_p(num_procs_p)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(np_loc,1,MPI_INTEGER,rcv_p,1,MPI_INTEGER,comm_p,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_p(0)=0
-    DO i_=1,num_procs_p-1
+    dsp_p(1)=0
+    DO i_=2,num_procs_p
        dsp_p(i_) =dsp_p(i_-1) + rcv_p(i_-1)
     END DO
     !!!!!! XYZ gather variables
     !! Y reduction at constant x and z
     ! number of points recieved and displacement for the y reduction
-    ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_y(num_procs_ky),dsp_y(num_procs_ky)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(nky_loc,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_y(0)=0
-    DO i_=1,num_procs_ky-1
+    dsp_y(1)=0
+    DO i_=2,num_procs_ky-1
        dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
     END DO
     !! Z reduction for full slices of y data but constant x
     ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_zy(num_procs_z),dsp_zy(num_procs_z)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*nky_tot,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_zy(0)=0
-    DO i_=1,num_procs_z-1
+    dsp_zy(1)=0
+    DO i_=2,num_procs_z-1
        dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
     END DO
     !!!!! PJZ gather variables
     !! P reduction at constant j and z is already done in module GRID
     !! Z reduction for full slices of p data but constant j
     ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zp(0:num_procs_z-1),dsp_zp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_zp(0:num_procs_z-1),dsp_zp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_zp(num_procs_z),dsp_zp(num_procs_z)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*np_tot,1,MPI_INTEGER,rcv_zp,1,MPI_INTEGER,comm_z,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_zp(0)=0
-    DO i_=1,num_procs_z-1
+    dsp_zp(1)=0
+    DO i_=2,num_procs_z-1
        dsp_zp(i_) =dsp_zp(i_-1) + rcv_zp(i_-1)
     END DO
     !!!!! PJXYZ gather variables
     !! Y reduction for full slices of p data but constant j
     ! number of points recieved and displacement for the y reduction
-    ALLOCATE(rcv_yp(0:num_procs_ky-1),dsp_yp(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_yp(0:num_procs_ky-1),dsp_yp(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_yp(num_procs_ky),dsp_yp(num_procs_ky)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(nky_loc*np_tot,1,MPI_INTEGER,rcv_yp,1,MPI_INTEGER,comm_ky,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_yp(0)=0
-    DO i_=1,num_procs_ky-1
+    dsp_yp(1)=0
+    DO i_=2,num_procs_ky-1
        dsp_yp(i_) =dsp_yp(i_-1) + rcv_yp(i_-1)
     END DO
     !! Z reduction for full slices of py data but constant j
     ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zyp(0:num_procs_z-1),dsp_zyp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ALLOCATE(rcv_zyp(0:num_procs_z-1),dsp_zyp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ALLOCATE(rcv_zyp(num_procs_z),dsp_zyp(num_procs_z)) !Displacement sizes for balance diagnostic
     ! all processes share their local number of points
     CALL MPI_ALLGATHER(nz_loc*np_tot*nky_tot,1,MPI_INTEGER,rcv_zyp,1,MPI_INTEGER,comm_z,ierr)
     ! the displacement array can be build from each np_loc as
-    dsp_zyp(0)=0
-    DO i_=1,num_procs_z-1
+    dsp_zyp(1)=0
+    DO i_=2,num_procs_z-1
        dsp_zyp(i_) =dsp_zyp(i_-1) + rcv_zyp(i_-1)
     END DO
   END SUBROUTINE init_parallel_var
@@ -299,14 +305,14 @@ CONTAINS
             ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
             buffer_pl_cy_zc(1:na_tot,1:np_loc) = field_loc(1:na_tot,1:np_loc,ij,iy,ix,iz)
             CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p,        MPI_DOUBLE_COMPLEX, &
-                             buffer_pt_cy_zc, rcv_p, dsp_p, MPI_DOUBLE_COMPLEX, &
+                             buffer_pt_cy_zc, na_tot*rcv_p, na_tot*dsp_p, MPI_DOUBLE_COMPLEX, &
                              root_p, comm_p, ierr)
             buffer_pt_yl_zc(1:na_tot,1:np_tot,iy) = buffer_pt_cy_zc(1:na_tot,1:np_tot)
           ENDDO y
           ! send the full line on p contained by root_p
           IF(rank_p .EQ. 0) THEN
             CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y,          MPI_DOUBLE_COMPLEX, &
-                             buffer_pt_yt_zc, rcv_yp, dsp_yp, MPI_DOUBLE_COMPLEX, &
+                             buffer_pt_yt_zc, na_tot*rcv_yp, na_tot*dsp_yp, MPI_DOUBLE_COMPLEX, &
                              root_ky, comm_ky, ierr)
             buffer_pt_yt_zl(1:na_tot,1:np_tot,1:nky_tot,iz) = buffer_pt_yt_zc(1:na_tot,1:np_tot,1:nky_tot)
           ENDIF
@@ -314,7 +320,7 @@ CONTAINS
         ! send the full line on y contained by root_kyas
         IF(rank_ky .EQ. 0) THEN
           CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z,            MPI_DOUBLE_COMPLEX, &
-                           buffer_pt_yt_zt, rcv_zyp, dsp_zyp, MPI_DOUBLE_COMPLEX, &
+                           buffer_pt_yt_zt, na_tot*rcv_zyp, na_tot*dsp_zyp, MPI_DOUBLE_COMPLEX, &
                            root_z, comm_z, ierr)
         ENDIF
         ! ID 0 (the one who ouptut) rebuild the whole array
diff --git a/src/processing_mod.F90 b/src/processing_mod.F90
index 5872be31..43fffe51 100644
--- a/src/processing_mod.F90
+++ b/src/processing_mod.F90
@@ -321,8 +321,6 @@ CONTAINS
             ENDDO
          ENDDO
       ENDDO
-      print*, ddz_napj(1,3,2,:,:,:)
-      stop
       ! Phi parallel gradient (not implemented fully, should be negligible)
       ! DO ikx = 1,local_nkx
       !   DO iky = 1,local_nky
diff --git a/testcases/smallest_problem/fort.90 b/testcases/smallest_problem/fort.90
index 8fb8b7da..c1c4f37e 100644
--- a/testcases/smallest_problem/fort.90
+++ b/testcases/smallest_problem/fort.90
@@ -69,7 +69,7 @@
  tau_  = 1.0
  sigma_= 1.0
  q_    = 1.0
- k_N_  = 0.0!2.22
+ k_N_  = 1.0!2.22
  k_T_  = 0.0!6.96
 /
 &SPECIES
diff --git a/testcases/smallest_problem/fort_00.90 b/testcases/smallest_problem/fort_00.90
index f14e2591..d9fedf89 100644
--- a/testcases/smallest_problem/fort_00.90
+++ b/testcases/smallest_problem/fort_00.90
@@ -60,7 +60,7 @@
   q_i     = 1
   K_Ne    = 0!6.96
   K_Te    = 0!2.22
-  K_Ni    = 0!6.96
+  K_Ni    = 1!6.96
   K_Ti    = 0!2.22
   beta    = 0
 /
diff --git a/wk/header_3D_results.m b/wk/header_3D_results.m
index 70619eee..f0001360 100644
--- a/wk/header_3D_results.m
+++ b/wk/header_3D_results.m
@@ -49,15 +49,18 @@ PARTITION  = '/misc/gyacomo_outputs/';
 % resdir = 'paper_2_nonlinear/kT_5.3/5x3x128x64x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/5x3x128x64x24';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_sp';
-% resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x64_dp';
+% resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_dp';
+% resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_MUxy_0';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_NL_-1';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x24_nuDG_0.01';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x128x64x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/7x4x192x96x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/9x5x128x64x24';
+% resdir = 'paper_2_nonlinear/kT_5.3/9x5x128x64x24_dp';
 % resdir = 'paper_2_nonlinear/kT_5.3/9x5x128x64x64';
 % resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x24';
+% resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x24_dp';
 % resdir = 'paper_2_nonlinear/kT_5.3/11x6x128x64x64';
 
 %% Old stuff
-- 
GitLab