diff --git a/src/auxval.F90 b/src/auxval.F90
index eec635940a18e18a89b106a238e82096dd840a7f..dd4f4a16d6b9cb27c3de193db4c965d50478fc59 100644
--- a/src/auxval.F90
+++ b/src/auxval.F90
@@ -19,6 +19,7 @@ subroutine auxval
   INTEGER :: i_, ierr
   CALL speak('=== Set auxiliary values ===')
   ! Init the grids
+  CALL init_grids_data(Na,EM,LINEARITY) 
   CALL set_grids(shear,Npol,LINEARITY,N_HD,EM,Na) 
   ! Allocate memory for global arrays
   CALL memory
@@ -40,13 +41,11 @@ subroutine auxval
   ! precompute the hermite fourth derivative table
   CALL build_dv4Hp_table 
   ! set the closure scheme in use
-  CALL set_closure_model 
-  
+  CALL set_closure_model   
 #ifdef TEST_SVD
   ! If we want to test SVD decomposition etc.
   CALL init_CLA(local_nky,local_np*local_nj)
 #endif
-
   !! Display parallel settings
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   DO i_ = 0,num_procs-1
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index 6aa9ad31d44793bff3df6d21586d3e181546db07..9891b2acd64bad32fa2465bfb4969647b9873829 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -108,8 +108,8 @@ MODULE grid
   REAL(xp), DIMENSION(:), ALLOCATABLE, PUBLIC,PROTECTED :: AA_y
 
   ! Public Functions
-  PUBLIC :: init_1Dgrid_distr
-  PUBLIC :: set_grids, set_kparray
+  PUBLIC :: init_1Dgrid_distr, init_grids_data
+  PUBLIC :: set_grids, set_kxgrid, set_kparray
   PUBLIC :: grid_readinputs, grid_outputinputs
   PUBLIC :: bar
 
@@ -118,7 +118,6 @@ MODULE grid
 
 CONTAINS
 
-
   SUBROUTINE grid_readinputs
     ! Read the input parameters
     USE prec_const
@@ -135,6 +134,136 @@ CONTAINS
     inv_Nx = 1._xp/REAL(Nx,xp)
     inv_Ny = 1._xp/REAL(Ny,xp)
   END SUBROUTINE grid_readinputs
+
+  !! Init the local and global number of points in all directions
+  SUBROUTINE init_grids_data(Na,EM,LINEARITY) 
+    USE fourier,  ONLY: init_grid_distr_and_plans
+    USE parallel, ONLY: num_procs_p, rank_p, num_procs_z, rank_z
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: Na        ! number of species coming from the model module
+    LOGICAL, INTENT(IN) :: EM        ! electromagnetic effects (to skip odd Hermite or not)
+    CHARACTER(len=*), INTENT(IN) :: LINEARITY    ! Linear or nonlinear run
+    INTEGER :: in, istart, iend
+    !!----------------- SPECIES INDICES (not parallelized)
+    ias = 1
+    iae = Na
+    total_Na = Na
+    local_Na = Na
+    local_Na_offset = ias - 1
+    !!----------------- HERMITE INDICES (parallelized)
+    ! If no parallel dim (Nz=1) and no EM effects (beta=0), the moment hierarchy
+    !! is separable between odds and even P
+    IF((Nz .EQ. 1) .AND. .NOT. EM) THEN
+      deltap  = 2
+      Ngp     = 2  ! two ghosts cells for p+/-2 only
+      pp2     = 1  ! index p+2 is ip+1
+    ELSE
+      deltap  = 1
+      Ngp     = 4  ! four ghosts cells for p+/-1 and p+/-2 terms
+      pp2     = 2  ! index p+2 is ip+2
+    ENDIF
+    ! Total number of Hermite polynomials we will evolve
+    total_np = (Pmax/deltap) + 1
+    ! Local data distribution
+    CALL decomp1D(total_np, num_procs_p, rank_p, ips, ipe)
+    local_np        = ipe - ips + 1
+    local_np_offset = ips - 1
+    ! Allocate the grid arrays
+    ALLOCATE(parray_full(total_np))
+    ALLOCATE(parray(local_np+ngp))
+    !!----------------- LAGUERRE INDICES (not parallelized)
+    ! Total number of J degrees
+    total_nj   = jmax+1
+    local_jmax = jmax
+    Ngj= 2      ! 2-points ghosts for j+\-1 terms
+    ! Indices of local data
+    ijs = 1; ije = jmax + 1
+    ! Local number of J
+    local_nj        = ije - ijs + 1
+    local_nj_offset = ijs - 1
+    ! allocate global and local
+    ALLOCATE(jarray_full(total_nj))
+    ALLOCATE(jarray(local_nj+ngj))
+    ALLOCATE(kroneck_j0(local_nj+ngj));
+    ALLOCATE(kroneck_j1(local_nj+ngj));
+    !!----------------- SPATIAL GRIDS (parallelized)
+    !! Parallel distribution of kx ky grid
+    IF (LINEARITY .NE. 'linear') THEN ! we let FFTW distribute if we use it
+      IF (my_id .EQ. 0) write(*,*) 'FFTW3 y-grid distribution'
+      CALL init_grid_distr_and_plans(Nx,Ny,comm_ky,local_nkx_ptr,local_nkx_ptr_offset,local_nky_ptr,local_nky_ptr_offset)
+    ELSE ! otherwise we distribute equally
+      IF (my_id .EQ. 0) write(*,*) 'Manual y-grid distribution'
+      CALL init_1Dgrid_distr
+    ENDIF
+    !!----------------- BINORMAL KY INDICES (parallelized)
+    Nky       = Ny/2+1 ! Defined only on positive kx since fields are real
+    total_nky = Nky
+    Ngy = 0 ! no ghosts cells in ky
+    ikys = local_nky_ptr_offset + 1
+    ikye = ikys + local_nky_ptr - 1
+    local_nky = ikye - ikys + 1
+    local_nky_offset = local_nky_ptr_offset
+    ALLOCATE(kyarray_full(Nky))
+    ALLOCATE(kyarray(local_nky))
+    ALLOCATE(AA_y(local_nky))
+    !!---------------- RADIAL KX INDICES (not parallelized)
+    Nkx       = Nx
+    total_nkx = Nx
+    ikxs = 1
+    ikxe = total_nkx
+    local_nkx_ptr = ikxe - ikxs + 1
+    local_nkx     = ikxe - ikxs + 1
+    local_nkx_offset = ikxs - 1
+    ALLOCATE(kxarray(local_nkx))
+    ALLOCATE(kxarray_full(total_nkx))
+    ALLOCATE(AA_x(local_nkx))
+    !!---------------- PARALLEL Z GRID (parallelized)
+    total_nz = Nz
+    IF (SG) THEN
+      CALL speak('--2 staggered z grids--')
+      ! indices for even p and odd p grids (used in kernel, jacobian, gij etc.)
+      ieven  = 1
+      iodd   = 2
+      nzgrid = 2
+    ELSE
+      ieven  = 1
+      iodd   = 1
+      nzgrid = 1
+    ENDIF
+    !! Parallel data distribution
+    IF( (Nz .EQ. 1) .AND. (num_procs_z .GT. 1) ) &
+    ERROR STOP '>> ERROR << Cannot have multiple core in z-direction (Nz = 1)'
+    ! Local data distribution
+    CALL decomp1D(total_nz, num_procs_z, rank_z, izs, ize)
+    local_nz        = ize - izs + 1
+    local_nz_offset = izs - 1
+    ! Ghosts boundaries (depend on the order of z operators)
+    IF(Nz .EQ. 1) THEN
+      ngz = 0
+    ELSEIF(Nz .GE. 4) THEN
+      ngz =4
+      IF(mod(Nz,2) .NE. 0 ) THEN
+        ERROR STOP '>> ERROR << Nz must be an even number for Simpson integration rule !!!!'
+     ENDIF
+    ELSE
+      ERROR STOP '>> ERROR << Nz is not appropriate!!'
+    ENDIF
+    ALLOCATE(zarray(local_nz+ngz,nzgrid))
+    ALLOCATE(zarray_full(total_nz))
+    ALLOCATE(counts_nz (num_procs_z))
+    ALLOCATE(displs_nz (num_procs_z))
+    CALL allocate_array(local_zmax,1,nzgrid)
+    CALL allocate_array(local_zmin,1,nzgrid)
+    ALLOCATE(zweights_SR(local_nz))
+    ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
+    DO in = 0,num_procs_z-1
+      CALL decomp1D(total_nz, num_procs_z, in, istart, iend)
+      counts_nz(in+1) = iend-istart+1
+      displs_nz(in+1) = istart-1
+    ENDDO
+    !!---------------- Kperp grid
+    CALL allocate_array( kparray, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,nzgrid)
+  END SUBROUTINE
   !!!! GRID REPRESENTATION
   ! We define the grids that contain ghosts (p,j,z) with indexing from 1 to Nlocal + nghost
   ! e.g. nghost = 4, nlocal = 4
@@ -145,7 +274,6 @@ CONTAINS
   ! |_|_|_|_|
   !  1 2 3 4
   SUBROUTINE set_grids(shear,Npol,LINEARITY,N_HD,EM,Na)
-    USE fourier, ONLY: init_grid_distr_and_plans
     REAL(xp), INTENT(IN) :: shear, Npol
     CHARACTER(len=*), INTENT(IN) :: LINEARITY
     INTEGER, INTENT(IN)  :: N_HD
@@ -154,16 +282,8 @@ CONTAINS
     CALL set_agrid(Na)
     CALL set_pgrid(EM)
     CALL set_jgrid
-    !! Parallel distribution of kx ky grid
-    IF (LINEARITY .NE. 'linear') THEN
-      IF (my_id .EQ. 0) write(*,*) 'FFTW3 y-grid distribution'
-      CALL init_grid_distr_and_plans(Nx,Ny,comm_ky,local_nkx_ptr,local_nkx_ptr_offset,local_nky_ptr,local_nky_ptr_offset)
-    ELSE
-      CALL init_1Dgrid_distr
-      IF (my_id .EQ. 0) write(*,*) 'Manual y-grid distribution'
-    ENDIF
     CALL set_kygrid(LINEARITY,N_HD)
-    CALL set_kxgrid(shear,Npol,LINEARITY,N_HD)
+    CALL set_kxgrid(shear,Npol,1._xp,LINEARITY,N_HD) ! this will be redone after geometry if Cyq0_x0 .NE. 1
     CALL set_zgrid (Npol)
   END SUBROUTINE set_grids
 
@@ -188,39 +308,14 @@ CONTAINS
 
   SUBROUTINE set_pgrid(EM)
     USE prec_const
-    USE parallel, ONLY: num_procs_p, rank_p
     IMPLICIT NONE
     LOGICAL, INTENT(IN) :: EM
     INTEGER :: ip
-    ! If no parallel dim (Nz=1) and no EM effects (beta=0), the moment hierarchy
-    !! is separable between odds and even P and since the energy is injected in
-    !! P=0 and P=2 for density/temperature gradients there is no need of
-    !! simulating the odd p which will only be damped.
-    !! We define in this case a grid Parray = 0,2,4,...,Pmax i.e. deltap = 2
-    !! instead of 1 to spare computation
-    IF((Nz .EQ. 1) .AND. .NOT. EM) THEN
-      deltap  = 2
-      Ngp     = 2  ! two ghosts cells for p+/-2 only
-      pp2     = 1  ! index p+2 is ip+1
-    ELSE
-      deltap  = 1
-      Ngp     = 4  ! four ghosts cells for p+/-1 and p+/-2 terms
-      pp2     = 2  ! index p+2 is ip+2
-    ENDIF
-    ! Total number of Hermite polynomials we will evolve
-    total_np = (Pmax/deltap) + 1
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(parray_full(total_np))
     ! P
     DO ip = 1,total_np; parray_full(ip) = (ip-1)*deltap; END DO
-    !! Parallel data distribution
-    ! Local data distribution
-    CALL decomp1D(total_np, num_procs_p, rank_p, ips, ipe)
-    local_np       = ipe - ips + 1
-    local_np_offset = ips - 1
     !! local grid computations
-    ! Allocate and fill pgrid array
-    ALLOCATE(parray(local_np+ngp))
+    ! Fill pgrid array
     DO ip = 1,local_np+ngp
       parray(ip) = (ip-1-ngp/2+local_np_offset)*deltap
     ENDDO
@@ -279,20 +374,11 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER :: ij
-    ! Total number of J degrees
-    total_nj   = jmax+1
-    local_jmax = jmax
-    Ngj= 2      ! 2-points ghosts for j+\-1 terms
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(jarray_full(total_nj))
     ! J
-    DO ij = 1,total_nj; jarray_full(ij) = (ij-1); END DO
-    ! Indices of local data
-    ijs = 1; ije = jmax + 1
-    ! Local number of J
-    local_nj        = ije - ijs + 1
-    local_nj_offset = ijs - 1
-    ALLOCATE(jarray(local_nj+ngj))
+    DO ij = 1,total_nj
+      jarray_full(ij) = (ij-1)
+    END DO
     DO ij = 1,local_nj+ngj
       jarray(ij) = ij-1-ngj/2+local_nj_offset
     END DO
@@ -307,8 +393,8 @@ CONTAINS
       IF(jarray(ij) .EQ. 1) ij1 = ij
     END DO
     ! Kronecker arrays for j
-    ALLOCATE(kroneck_j0(local_nj+ngj)); kroneck_j0 = 0._xp
-    ALLOCATE(kroneck_j1(local_nj+ngj)); kroneck_j1 = 0._xp
+    kroneck_j0 = 0._xp
+    kroneck_j1 = 0._xp
     DO ij = 1,local_nj+ngj
       SELECT CASE(jarray(ij))
       CASE(0)
@@ -325,29 +411,18 @@ CONTAINS
     CHARACTER(len=*), INTENT(IN) ::LINEARITY
     INTEGER, INTENT(IN) :: N_HD
     INTEGER :: iky
-    Nky       = Ny/2+1 ! Defined only on positive kx since fields are real
-    total_nky = Nky
     ! Grid spacings
     IF (Ny .EQ. 1) THEN
-      deltaky = 2._xp*PI/Ly
-      ky_max  = deltaky
-      ky_min  = deltaky
+      ERROR STOP "Gyacomo cannot run with only one ky"
     ELSE
       deltaky = 2._xp*PI/Ly
       ky_max  = (Nky-1)*deltaky
       ky_min  = deltaky
     ENDIF
-    Ngy = 0 ! no ghosts cells in ky
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(kyarray_full(Nky))
     DO iky = 1,Nky
      kyarray_full(iky) = REAL(iky-1,xp) * deltaky
     END DO
-    ikys = local_nky_ptr_offset + 1
-    ikye = ikys + local_nky_ptr - 1
-    local_nky = ikye - ikys + 1
-    local_nky_offset = local_nky_ptr_offset
-    ALLOCATE(kyarray(local_nky))
     local_kymax = 0._xp
     ! Creating a grid ordered as dk*(0 1 2 3)
     ! We loop over the natural iky numbers (|1 2 3||4 5 6||... Nky|)
@@ -378,7 +453,6 @@ CONTAINS
     END DO
     ! Orszag 2/3 filter
     two_third_kymax = 2._xp/3._xp*deltaky*(Nky-1)
-    ALLOCATE(AA_y(local_nky))
     DO iky = 1,local_nky
       IF ( (kyarray(iky) .LT. two_third_kymax) .OR. (LINEARITY .EQ. 'linear')) THEN
         AA_y(iky) = 1._xp;
@@ -394,10 +468,10 @@ CONTAINS
     ENDIF
   END SUBROUTINE set_kygrid
 
-  SUBROUTINE set_kxgrid(shear,Npol,LINEARITY,N_HD)
+  SUBROUTINE set_kxgrid(shear,Npol,Cyq0_x0,LINEARITY,N_HD)
     USE prec_const
     IMPLICIT NONE
-    REAL(xp), INTENT(IN) :: shear, Npol
+    REAL(xp), INTENT(IN) :: shear, Npol, Cyq0_x0
     CHARACTER(len=*), INTENT(IN) ::LINEARITY
     INTEGER, INTENT(IN)  :: N_HD
     INTEGER :: ikx
@@ -405,7 +479,7 @@ CONTAINS
     IF(shear .GT. 0) THEN
       IF(my_id.EQ.0) write(*,*) 'Magnetic shear detected: set up sheared kx grid..'
       ! mininal size of box in x to respect dkx = 2pi shear dky
-      Lx_adapted = Ly/(2._xp*pi*shear*Npol)
+      Lx_adapted = Ly/(2._xp*pi*shear*Npol*Cyq0_x0)
       ! Put Nexc to 0 so that it is computed from a target value Lx
       IF(Nexc .EQ. 0) THEN
         Nexc = CEILING(0.9 * Lx/Lx_adapted)
@@ -414,60 +488,36 @@ CONTAINS
       ! x length is adapted
       Lx = Lx_adapted*Nexc
     ENDIF
-    Nkx       = Nx
-    total_nkx = Nx
-    ! Local data
-    ! Start and END indices of grid
-    ikxs = 1
-    ikxe = total_nkx
-    local_nkx_ptr = ikxe - ikxs + 1
-    local_nkx     = ikxe - ikxs + 1
-    local_nkx_offset = ikxs - 1
-    ALLOCATE(kxarray(local_nkx))
-    ALLOCATE(kxarray_full(total_nkx))
-    IF (Nx .EQ. 1) THEN
-      deltakx         = 1._xp
-      kxarray(1)      = 0._xp
-      ikx0           = 1
-      contains_kx0    = .true.
-      kx_max          = 0._xp
-      ikx_max         = 1
-      kx_min          = 0._xp
-      kxarray_full(1) = 0._xp
-      local_kxmax     = 0._xp
-    ELSE ! Build apprpopriate grid
-      deltakx      = 2._xp*PI/Lx
-      IF(MODULO(total_nkx,2) .EQ. 0) THEN ! Even number of kx (-2 -1 0 1 2 3)
-        ! Creating a grid ordered as dk*(0 1 2 3 -2 -1)
-        DO ikx = 1,total_nkx
-          kxarray_full(ikx) = deltakx*REAL(MODULO(ikx-1,total_nkx/2)-(total_nkx/2)*FLOOR(2.*real(ikx-1)/real(total_nkx)),xp)
-          IF (ikx .EQ. total_nkx/2+1) kxarray_full(ikx) = -kxarray_full(ikx)
-        END DO
-        kx_max = MAXVAL(kxarray_full)!(total_nkx/2)*deltakx
-        kx_min = MINVAL(kxarray_full)!-kx_max+deltakx
-        ! Set local grid (not parallelized so same as full one)
-        local_kxmax = 0._xp
-        DO ikx = 1,local_nkx
-          kxarray(ikx) = kxarray_full(ikx+local_nkx_offset)
-          ! Finding kx=0
-          IF (kxarray(ikx) .EQ. 0) THEN
-            ikx0 = ikx
-            contains_kx0 = .true.
-          ENDIF
-          ! Finding local kxmax
-          IF (ABS(kxarray(ikx)) .GT. local_kxmax) THEN
-            local_kxmax = ABS(kxarray(ikx))
-            ikx_max = ikx
-          ENDIF
-        END DO
-      ELSE ! Odd number of kx (-2 -1 0 1 2)
-        ERROR STOP "Gyacomo is safer with a even Kx number"
-      ENDIF
+    deltakx      = 2._xp*PI/Lx
+    IF(MODULO(total_nkx,2) .EQ. 0) THEN ! Even number of kx (-2 -1 0 1 2 3)
+      ! Creating a grid ordered as dk*(0 1 2 3 -2 -1)
+      DO ikx = 1,total_nkx
+        kxarray_full(ikx) = deltakx*REAL(MODULO(ikx-1,total_nkx/2)-(total_nkx/2)*FLOOR(2.*real(ikx-1)/real(total_nkx)),xp)
+        IF (ikx .EQ. total_nkx/2+1) kxarray_full(ikx) = -kxarray_full(ikx)
+      END DO
+      kx_max = MAXVAL(kxarray_full)!(total_nkx/2)*deltakx
+      kx_min = MINVAL(kxarray_full)!-kx_max+deltakx
+      ! Set local grid (not parallelized so same as full one)
+      local_kxmax = 0._xp
+      DO ikx = 1,local_nkx
+        kxarray(ikx) = kxarray_full(ikx+local_nkx_offset)
+        ! Finding kx=0
+        IF (kxarray(ikx) .EQ. 0) THEN
+          ikx0 = ikx
+          contains_kx0 = .true.
+        ENDIF
+        ! Finding local kxmax
+        IF (ABS(kxarray(ikx)) .GT. local_kxmax) THEN
+          local_kxmax = ABS(kxarray(ikx))
+          ikx_max = ikx
+        ENDIF
+      END DO
+    ELSE ! Odd number of kx (-2 -1 0 1 2)
+      ERROR STOP "Gyacomo is safer with an even Kx number"
     ENDIF
     ! Orszag 2/3 filter
     two_third_kxmax = 2._xp/3._xp*kx_max;
     ! Antialiasing filter
-    ALLOCATE(AA_x(local_nkx))
     DO ikx = 1,local_nkx
       IF ( ((kxarray(ikx) .GT. -two_third_kxmax) .AND. &
            (kxarray(ikx) .LT. two_third_kxmax))   .OR. (LINEARITY .EQ. 'linear')) THEN
@@ -486,11 +536,9 @@ CONTAINS
 
   SUBROUTINE set_zgrid(Npol)
     USE prec_const
-    USE parallel, ONLY: num_procs_z, rank_z
     IMPLICIT NONE
     REAL(xp):: grid_shift, Lz, zmax, zmin, Npol
-    INTEGER :: istart, iend, in, iz, ig, eo, iglob
-    total_nz = Nz
+    INTEGER :: iz, ig, eo
     ! Length of the flux tube (in ballooning angle)
     Lz         = 2._xp*pi*Npol
     ! Z stepping (#interval = #points since periodic)
@@ -499,80 +547,48 @@ CONTAINS
     ! Parallel hyperdiffusion coefficient
     diff_dz_coeff = (deltaz/2._xp)**4 ! adaptive fourth derivative (~GENE)
     IF (SG) THEN
-      CALL speak('--2 staggered z grids--')
       grid_shift = deltaz/2._xp
-      ! indices for even p and odd p grids (used in kernel, jacobian, gij etc.)
-      ieven  = 1
-      iodd   = 2
-      nzgrid = 2
     ELSE
       grid_shift = 0._xp
-      ieven  = 1
-      iodd   = 1
-      nzgrid = 1
-    ENDIF
-    ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(zarray_full(total_nz))
-    IF (Nz .EQ. 1) Npol = 0._xp
-      zmax = 0; zmin = 0;
-      DO iz = 1,total_nz ! z in [-pi pi-dz] x Npol
-        zarray_full(iz) = REAL(iz-1,xp)*deltaz - Lz/2._xp
-        IF(zarray_full(iz) .GT. zmax) zmax = zarray_full(iz)
-        IF(zarray_full(iz) .LT. zmin) zmin = zarray_full(iz)
-      END DO
-      !! Parallel data distribution
-      IF( (Nz .EQ. 1) .AND. (num_procs_z .GT. 1) ) &
-      ERROR STOP '>> ERROR << Cannot have multiple core in z-direction (Nz = 1)'
-      ! Local data distribution
-      CALL decomp1D(total_nz, num_procs_z, rank_z, izs, ize)
-      local_nz        = ize - izs + 1
-      local_nz_offset = izs - 1
-      ! Ghosts boundaries (depend on the order of z operators)
-      IF(Nz .EQ. 1) THEN
-        ngz              = 0
-        zarray_full(izs) = 0
-    ELSEIF(Nz .GE. 4) THEN
-      ngz =4
-      IF(mod(Nz,2) .NE. 0 ) THEN
-        ERROR STOP '>> ERROR << Nz must be an even number for Simpson integration rule !!!!'
-     ENDIF
-    ELSE
-      ERROR STOP '>> ERROR << Nz is not appropriate!!'
     ENDIF
-    ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
-    ALLOCATE(counts_nz (num_procs_z))
-    ALLOCATE(displs_nz (num_procs_z))
-    DO in = 0,num_procs_z-1
-      CALL decomp1D(total_nz, num_procs_z, in, istart, iend)
-      counts_nz(in+1) = iend-istart+1
-      displs_nz(in+1) = istart-1
-    ENDDO
+    ! Build the full grids on process 0 to diagnose it without comm   
+    IF (Nz .EQ. 1) &
+      Npol = 0._xp
+    zmax = 0; zmin = 0;
+    DO iz = 1,total_nz ! z in [-pi pi-dz] x Npol
+      zarray_full(iz) = REAL(iz-1,xp)*deltaz - Lz/2._xp
+      IF(zarray_full(iz) .GT. zmax) zmax = zarray_full(iz)
+      IF(zarray_full(iz) .LT. zmin) zmin = zarray_full(iz)
+    END DO
+    IF(Nz .EQ. 1) &
+      zarray_full(izs) = 0
     ! Local z array
-    ALLOCATE(zarray(local_nz+ngz,nzgrid))
-    !! interior point loop
     DO iz = 1,local_nz
       DO eo = 1,nzgrid
         zarray(iz+ngz/2,eo) = zarray_full(iz+local_nz_offset) + REAL(eo-1,xp)*grid_shift
       ENDDO
     ENDDO
-    CALL allocate_array(local_zmax,1,nzgrid)
-    CALL allocate_array(local_zmin,1,nzgrid)
     DO eo = 1,nzgrid
       ! Find local extrema
       local_zmax(eo) = zarray(local_nz+ngz/2,eo)
       local_zmin(eo) = zarray(1+ngz/2,eo)
       ! Periodic z \in (-pi pi-dz)
-      DO ig = 1,ngz/2 ! first ghost cells
-        iglob = ig+local_nz_offset-ngz/2
-        IF (iglob .LE. 0) &
-          iglob = iglob + total_nz
-        zarray(ig,eo) = zarray_full(iglob)
-      ENDDO
-      DO ig = local_nz+ngz/2,local_nz+ngz ! last ghost cells
-        iglob = ig+local_nz_offset-ngz/2
-        IF (iglob .GT. total_nz) &
-          iglob = iglob - total_nz
-        zarray(ig,eo) = zarray_full(iglob)
+      ! DO ig = 1,ngz/2 ! first ghost cells
+      !   iglob = ig+local_nz_offset-ngz/2
+      !   IF (iglob .LE. 0) &
+      !     iglob = iglob + total_nz
+      !   zarray(ig,eo) = zarray_full(iglob)
+      ! ENDDO
+      ! DO ig = local_nz+ngz/2,local_nz+ngz ! last ghost cells
+      !   iglob = ig+local_nz_offset-ngz/2
+      !   IF (iglob .GT. total_nz) &
+      !     iglob = iglob - total_nz
+      !   zarray(ig,eo) = zarray_full(iglob)
+      ! ENDDO
+      ! continue in z<pi and z>pi
+      DO ig = 1,ngz/2
+        zarray(local_nz+ngz/2+ig,eo) = zarray(local_nz+ngz/2,eo) + ig*deltaz
+        zarray(ig,eo) = zarray(1+ngz/2,eo) - (3-ig)*deltaz
       ENDDO
       ! Set up the flags to know if the process contains the tip and/or the tail
       ! of the z domain (important for z-boundary condition)
@@ -582,7 +598,6 @@ CONTAINS
         contains_zmax = .TRUE.
     ENDDO
     ! local weights for Simpson rule
-    ALLOCATE(zweights_SR(local_nz))
     IF(total_nz .EQ. 1) THEN
       zweights_SR = 1._xp
     ELSE
@@ -597,10 +612,10 @@ CONTAINS
   END SUBROUTINE set_zgrid
 
   SUBROUTINE set_kparray(gxx, gxy, gyy,hatB)
+    IMPLICIT NONE
     REAL(xp), DIMENSION(local_nz+ngz,nzgrid), INTENT(IN) :: gxx,gxy,gyy,hatB
     INTEGER     :: eo,iz,iky,ikx
     REAL(xp)    :: kx, ky
-    CALL allocate_array( kparray, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,nzgrid)
     DO eo = 1,nzgrid
       DO iz = 1,local_nz+ngz
         DO iky = 1,local_nky
diff --git a/src/miller_mod.F90 b/src/miller_mod.F90
index f6b9959fb8882dca4f07e209e9f8a7c9b63b65b5..ed65957ea3a812896b0f859a334924efe6bf9774 100644
--- a/src/miller_mod.F90
+++ b/src/miller_mod.F90
@@ -6,10 +6,9 @@ MODULE miller
   USE basic
   USE parallel, ONLY: my_id, num_procs_z, nbr_U, nbr_D, comm0
   ! use coordinates,only: gcoor, get_dzprimedz
-  USE grid, ONLY: local_Nky, local_Nkx, local_nz, ngz, nzgrid, kyarray, kxarray, zarray, total_nz, local_nz_offset
+  USE grid, ONLY: local_Nky, local_Nkx, local_nz, ngz, nzgrid, kyarray, kxarray, zarray, total_nz, local_nz_offset, iodd, ieven
   ! use discretization
   USE lagrange_interpolation
-  ! use par_in, only: beta, sign_Ip_CW, sign_Bt_CW, Npol
   USE model
 
   implicit none
@@ -29,25 +28,24 @@ CONTAINS
   !>Set defaults for miller parameters
   subroutine set_miller_parameters(kappa_,s_kappa_,delta_,s_delta_,zeta_,s_zeta_)
     real(xp), INTENT(IN) :: kappa_,s_kappa_,delta_,s_delta_,zeta_,s_zeta_
-    rho     = -1.0
+    rho     = -1._xp
     kappa   = kappa_
     s_kappa = s_kappa_
     delta   = delta_
     s_delta = s_delta_
     zeta    = zeta_
     s_zeta  = s_zeta_
-    drR     = 0.0
-    drZ     = 0.0
-
-    thetak = 0.0
-    thetad = 0.0
+    drR     = 0._xp
+    drZ     = 0._xp
+    thetak  = 0._xp
+    thetad = 0._xp
 
   end subroutine set_miller_parameters
 
   !>Get Miller metric, magnetic field, jacobian etc.
   subroutine get_miller(trpeps,major_R,major_Z,q0,shat,Npol,amhd,edge_opt,&
-       C_y,C_xy,xpdx_pm_geom,gxx_,gyy_,gzz_,gxy_,gxz_,gyz_,dBdx_,dBdy_,&
-       Bfield_,jacobian_,dBdz_,R_hat_,Z_hat_,dxdR_,dxdZ_,Ckxky_,gradz_coeff_)
+       C_y,C_xy,Cyq0_x0,xpdx_pm_geom,gxx_,gxy_,gxz_,gyy_,gyz_,gzz_,dBdx_,dBdy_,dBdz_,&
+       Bfield_,jacobian_,R_hat_,Z_hat_,dxdR_,dxdZ_)
     !!!!!!!!!!!!!!!! GYACOMO INTERFACE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     real(xp), INTENT(INOUT) :: trpeps          ! eps in gyacomo (inverse aspect ratio)
     real(xp), INTENT(INOUT) :: major_R         ! major radius
@@ -58,20 +56,16 @@ CONTAINS
     real(xp), INTENT(INOUT) :: amhd            ! alpha mhd
     real(xp), INTENT(INOUT) :: edge_opt        ! optimization of point placement
     real(xp), INTENT(INOUT) :: xpdx_pm_geom    ! amplitude mag. eq. pressure grad.
-    real(xp), INTENT(INOUT) :: C_y, C_xy
-    real(xp), dimension(local_nz+ngz,nzgrid), INTENT(INOUT) :: &
-                                              gxx_,gyy_,gzz_,gxy_,gxz_,gyz_,&
-                                              dBdx_,dBdy_,Bfield_,jacobian_,&
-                                              dBdz_,R_hat_,Z_hat_,dxdR_,dxdZ_, &
-                                              gradz_coeff_
-    real(xp), dimension(local_Nky,local_Nkx,local_nz+ngz,nzgrid), INTENT(INOUT) :: Ckxky_
-    INTEGER :: iz, ikx, iky, eo
+    real(xp), INTENT(INOUT) :: C_y, C_xy, Cyq0_x0
+    real(xp), dimension(local_nz+ngz,nzgrid), INTENT(OUT) :: &
+                                              gxx_,gxy_,gxz_,gyy_,gyz_,gzz_,&
+                                              dBdx_,dBdy_,dBdz_,Bfield_,jacobian_,&
+                                              R_hat_,Z_hat_,dxdR_,dxdZ_
+    INTEGER :: iz,eo
     ! No parameter in gyacomo yet
-    real(xp) :: sign_Ip_CW=1 ! current sign (only normal current)
-    real(xp) :: sign_Bt_CW=1 ! current sign (only normal current)
+    real(xp) :: sign_Ip_CW=1._xp ! current sign (only normal current)
+    real(xp) :: sign_Bt_CW=1._xp ! current sign (only normal current)
     !!!!!!!!!!!!!! END GYACOMO INTERFACE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    ! Auxiliary variables for curvature computation
-    real(xp) :: G1,G2,G3,Cx,Cy,ky,kx
 
     integer:: np, np_s, Npol_ext, Npol_s
 
@@ -96,9 +90,9 @@ CONTAINS
     real(xp), dimension(500*(Npol+1)):: gxx, gxy, gxz, gyy, gyz, gzz, dtheta_dchi_s, dBp_dchi_s, jacobian, dBdx, dBdz
     real(xp), dimension(500*(Npol+1)):: g_xx, g_xy, g_xz, g_yy, g_yz, g_zz, tmp_arr_s, dxdR_s, dxdZ_s, K_x, K_y !tmp_arr2
 
-    real(xp), dimension(1:total_nz):: gxx_out,gxy_out,gxz_out,gyy_out,gyz_out,gzz_out,Bfield_out,jacobian_out, dBdx_out, dBdz_out, chi_out
-    real(xp), dimension(1:total_nz):: R_out, Z_out, dxdR_out, dxdZ_out
-    real(xp):: d_inv, drPsi, dxPsi, dq_dx, dq_xpsi, R0, Z0, B0, F, D0_full, D1_full, D2_full, D3_full
+    real(xp), dimension(1:total_nz+ngz):: gxx_out,gxy_out,gxz_out,gyy_out,gyz_out,gzz_out,Bfield_out,jacobian_out, dBdx_out, dBdz_out, chi_out
+    real(xp), dimension(1:total_nz+ngz):: R_out, Z_out, dxdR_out, dxdZ_out
+    real(xp):: d_inv, drPsi, dxPsi, dq_dx, dq_dpsi, R0, Z0, B0, F, D0_full, D1_full, D2_full, D3_full
     !real(xp) :: Lnorm, Psi0 ! currently module-wide defined anyway
     real(xp):: pprime, ffprime, D0_mid, D1_mid, D2_mid, D3_mid, dx_drho, pi, mu_0, dzprimedz
     ! real(xp):: rho_a, psiN, drpsiN, CN2, CN3, Rcenter, Zcenter, drRcenter, drZcenter
@@ -111,139 +105,140 @@ CONTAINS
     np_s = 500*Npol_s
 
     rho = trpeps*major_R
-    if (rho.le.0.0) ERROR STOP '>> ERROR << flux surface radius not defined'
+    if (rho.le.0._xp) ERROR STOP '>> ERROR << flux surface radius not defined'
     trpeps = rho/major_R
 
     q0 = sign_Ip_CW * sign_Bt_CW * abs(q0)
 
     R0=major_R
-    B0=1.0*sign_Bt_CW
+    B0=1._xp*sign_Bt_CW
     F=R0*B0
     Z0=major_Z
-    pi = acos(-1.0)
-    mu_0=4.0*pi
+    pi = acos(-1._xp)
+    mu_0=4._xp*pi
 
     theta=linspace(-pi*Npol_ext,pi*Npol_ext-2._xp*pi*Npol_ext/np,np)
     d_inv=asin(delta)
 
-    thetaShift = 0.0
+    thetaShift = 0._xp
     iBmax = 1
+    bMaxShift = .true. ! limits the initialization routine to run no more than twice
+    do while (bMaxShift)
+      !flux surface parametrization
+      thAdj = theta + thetaShift
+      if (zeta/=0._xp .or. s_zeta/=0._xp) then
+        R = R0 + rho*Cos(thAdj + d_inv*Sin(thAdj))
+        Z = Z0 + kappa*rho*Sin(thAdj + zeta*Sin(2*thAdj))
+
+        R_rho = drR + Cos(thAdj + d_inv*Sin(thAdj)) - s_delta*Sin(thAdj)*Sin(thAdj + d_inv*Sin(thAdj))
+        Z_rho = drZ + kappa*s_zeta*Cos(thAdj + zeta*Sin(2*thAdj))*Sin(2*thAdj) &
+              + kappa*Sin(thAdj + zeta*Sin(2*thAdj)) + kappa*s_kappa*Sin(thAdj + zeta*Sin(2*thAdj))
+
+        R_theta = -(rho*(1 + d_inv*Cos(thAdj))*Sin(thAdj + d_inv*Sin(thAdj)))
+        Z_theta = kappa*rho*(1 + 2*zeta*Cos(2*thAdj))*Cos(thAdj + zeta*Sin(2*thAdj))
+
+        R_theta_theta = -(rho*(1 + d_inv*Cos(thAdj))**2*Cos(thAdj + d_inv*Sin(thAdj))) &
+              + d_inv*rho*Sin(thAdj)*Sin(thAdj + d_inv*Sin(thAdj))
+        Z_theta_theta = -4*kappa*rho*zeta*Cos(thAdj + zeta*Sin(2*thAdj))*Sin(2*thAdj) &
+              - kappa*rho*(1 + 2*zeta*Cos(2*thAdj))**2*Sin(thAdj + zeta*Sin(2*thAdj))
+      else
+        Rcirc = rho*Cos(thAdj - thetad + thetak)
+        Zcirc = rho*Sin(thAdj - thetad + thetak)
+        Relong = Rcirc
+        Zelong = Zcirc + (-1 + kappa)*rho*Sin(thAdj - thetad + thetak)
+        RelongTilt = Relong*Cos(thetad - thetak) - Zelong*Sin(thetad - thetak)
+        ZelongTilt = Zelong*Cos(thetad - thetak) + Relong*Sin(thetad - thetak)
+        Rtri = RelongTilt - rho*Cos(thAdj) + rho*Cos(thAdj + delta*Sin(thAdj))
+        Ztri = ZelongTilt
+        RtriTilt = Rtri*Cos(thetad) + Ztri*Sin(thetad)
+        ZtriTilt = Ztri*Cos(thetad) - Rtri*Sin(thetad)
+        R = R0 + RtriTilt
+        Z = Z0 + ZtriTilt
+
+        drRcirc = Cos(thAdj - thetad + thetak)
+        drZcirc = Sin(thAdj - thetad + thetak)
+        drRelong = drRcirc
+        drZelong = drZcirc - (1 - kappa - kappa*s_kappa)*Sin(thAdj - thetad + thetak)
+        drRelongTilt = drRelong*Cos(thetad - thetak) - drZelong*Sin(thetad - thetak)
+        drZelongTilt = drZelong*Cos(thetad - thetak) + drRelong*Sin(thetad - thetak)
+        drRtri = drRelongTilt - Cos(thAdj) + Cos(thAdj + delta*Sin(thAdj)) &
+              - s_delta*Sin(thAdj)*Sin(thAdj + delta*Sin(thAdj))
+        drZtri = drZelongTilt
+        drRtriTilt = drRtri*Cos(thetad) + drZtri*Sin(thetad)
+        drZtriTilt = drZtri*Cos(thetad) - drRtri*Sin(thetad)
+        R_rho = drR + drRtriTilt
+        Z_rho = drZ + drZtriTilt
+
+        dtRcirc = -(rho*Sin(thAdj - thetad + thetak))
+        dtZcirc = rho*Cos(thAdj - thetad + thetak)
+        dtRelong = dtRcirc
+        dtZelong = dtZcirc + (-1 + kappa)*rho*Cos(thAdj - thetad + thetak)
+        dtRelongTilt = dtRelong*Cos(thetad - thetak) - dtZelong*Sin(thetad - thetak)
+        dtZelongTilt = dtZelong*Cos(thetad - thetak) + dtRelong*Sin(thetad - thetak)
+        dtRtri = dtRelongTilt + rho*Sin(thAdj) - rho*(1 + delta*Cos(thAdj))*Sin(thAdj + delta*Sin(thAdj))
+        dtZtri = dtZelongTilt
+        dtRtriTilt = dtRtri*Cos(thetad) + dtZtri*Sin(thetad)
+        dtZtriTilt = dtZtri*Cos(thetad) - dtRtri*Sin(thetad)
+        R_theta = dtRtriTilt
+        Z_theta = dtZtriTilt
+
+        dtdtRcirc = -(rho*Cos(thAdj - thetad + thetak))
+        dtdtZcirc = -(rho*Sin(thAdj - thetad + thetak))
+        dtdtRelong = dtdtRcirc
+        dtdtZelong = dtdtZcirc - (-1 + kappa)*rho*Sin(thAdj - thetad + thetak)
+        dtdtRelongTilt = dtdtRelong*Cos(thetad - thetak) - dtdtZelong*Sin(thetad - thetak)
+        dtdtZelongTilt = dtdtZelong*Cos(thetad - thetak) + dtdtRelong*Sin(thetad - thetak)
+        dtdtRtri = dtdtRelongTilt + rho*Cos(thAdj) - rho*(1 + delta*Cos(thAdj))**2*Cos(thAdj + delta*Sin(thAdj)) &
+              + delta*rho*Sin(thAdj)*Sin(thAdj + delta*Sin(thAdj))
+        dtdtZtri = dtdtZelongTilt
+        dtdtRtriTilt = dtdtRtri*Cos(thetad) + dtdtZtri*Sin(thetad)
+        dtdtZtriTilt = dtdtZtri*Cos(thetad) - dtdtRtri*Sin(thetad)
+        R_theta_theta = dtdtRtriTilt
+        Z_theta_theta = dtdtZtriTilt
+      endif
 
-    !flux surface parametrization
-    thAdj = theta + thetaShift
-    if (zeta/=0.0 .or. s_zeta/=0.0) then
-       R = R0 + rho*Cos(thAdj + d_inv*Sin(thAdj))
-       Z = Z0 + kappa*rho*Sin(thAdj + zeta*Sin(2*thAdj))
-
-       R_rho = drR + Cos(thAdj + d_inv*Sin(thAdj)) - s_delta*Sin(thAdj)*Sin(thAdj + d_inv*Sin(thAdj))
-       Z_rho = drZ + kappa*s_zeta*Cos(thAdj + zeta*Sin(2*thAdj))*Sin(2*thAdj) &
-            + kappa*Sin(thAdj + zeta*Sin(2*thAdj)) + kappa*s_kappa*Sin(thAdj + zeta*Sin(2*thAdj))
-
-       R_theta = -(rho*(1 + d_inv*Cos(thAdj))*Sin(thAdj + d_inv*Sin(thAdj)))
-       Z_theta = kappa*rho*(1 + 2*zeta*Cos(2*thAdj))*Cos(thAdj + zeta*Sin(2*thAdj))
-
-       R_theta_theta = -(rho*(1 + d_inv*Cos(thAdj))**2*Cos(thAdj + d_inv*Sin(thAdj))) &
-            + d_inv*rho*Sin(thAdj)*Sin(thAdj + d_inv*Sin(thAdj))
-       Z_theta_theta = -4*kappa*rho*zeta*Cos(thAdj + zeta*Sin(2*thAdj))*Sin(2*thAdj) &
-            - kappa*rho*(1 + 2*zeta*Cos(2*thAdj))**2*Sin(thAdj + zeta*Sin(2*thAdj))
-    else
-       Rcirc = rho*Cos(thAdj - thetad + thetak)
-       Zcirc = rho*Sin(thAdj - thetad + thetak)
-       Relong = Rcirc
-       Zelong = Zcirc + (-1 + kappa)*rho*Sin(thAdj - thetad + thetak)
-       RelongTilt = Relong*Cos(thetad - thetak) - Zelong*Sin(thetad - thetak)
-       ZelongTilt = Zelong*Cos(thetad - thetak) + Relong*Sin(thetad - thetak)
-       Rtri = RelongTilt - rho*Cos(thAdj) + rho*Cos(thAdj + delta*Sin(thAdj))
-       Ztri = ZelongTilt
-       RtriTilt = Rtri*Cos(thetad) + Ztri*Sin(thetad)
-       ZtriTilt = Ztri*Cos(thetad) - Rtri*Sin(thetad)
-       R = R0 + RtriTilt
-       Z = Z0 + ZtriTilt
-
-       drRcirc = Cos(thAdj - thetad + thetak)
-       drZcirc = Sin(thAdj - thetad + thetak)
-       drRelong = drRcirc
-       drZelong = drZcirc - (1 - kappa - kappa*s_kappa)*Sin(thAdj - thetad + thetak)
-       drRelongTilt = drRelong*Cos(thetad - thetak) - drZelong*Sin(thetad - thetak)
-       drZelongTilt = drZelong*Cos(thetad - thetak) + drRelong*Sin(thetad - thetak)
-       drRtri = drRelongTilt - Cos(thAdj) + Cos(thAdj + delta*Sin(thAdj)) &
-            - s_delta*Sin(thAdj)*Sin(thAdj + delta*Sin(thAdj))
-       drZtri = drZelongTilt
-       drRtriTilt = drRtri*Cos(thetad) + drZtri*Sin(thetad)
-       drZtriTilt = drZtri*Cos(thetad) - drRtri*Sin(thetad)
-       R_rho = drR + drRtriTilt
-       Z_rho = drZ + drZtriTilt
-
-       dtRcirc = -(rho*Sin(thAdj - thetad + thetak))
-       dtZcirc = rho*Cos(thAdj - thetad + thetak)
-       dtRelong = dtRcirc
-       dtZelong = dtZcirc + (-1 + kappa)*rho*Cos(thAdj - thetad + thetak)
-       dtRelongTilt = dtRelong*Cos(thetad - thetak) - dtZelong*Sin(thetad - thetak)
-       dtZelongTilt = dtZelong*Cos(thetad - thetak) + dtRelong*Sin(thetad - thetak)
-       dtRtri = dtRelongTilt + rho*Sin(thAdj) - rho*(1 + delta*Cos(thAdj))*Sin(thAdj + delta*Sin(thAdj))
-       dtZtri = dtZelongTilt
-       dtRtriTilt = dtRtri*Cos(thetad) + dtZtri*Sin(thetad)
-       dtZtriTilt = dtZtri*Cos(thetad) - dtRtri*Sin(thetad)
-       R_theta = dtRtriTilt
-       Z_theta = dtZtriTilt
-
-       dtdtRcirc = -(rho*Cos(thAdj - thetad + thetak))
-       dtdtZcirc = -(rho*Sin(thAdj - thetad + thetak))
-       dtdtRelong = dtdtRcirc
-       dtdtZelong = dtdtZcirc - (-1 + kappa)*rho*Sin(thAdj - thetad + thetak)
-       dtdtRelongTilt = dtdtRelong*Cos(thetad - thetak) - dtdtZelong*Sin(thetad - thetak)
-       dtdtZelongTilt = dtdtZelong*Cos(thetad - thetak) + dtdtRelong*Sin(thetad - thetak)
-       dtdtRtri = dtdtRelongTilt + rho*Cos(thAdj) - rho*(1 + delta*Cos(thAdj))**2*Cos(thAdj + delta*Sin(thAdj)) &
-            + delta*rho*Sin(thAdj)*Sin(thAdj + delta*Sin(thAdj))
-       dtdtZtri = dtdtZelongTilt
-       dtdtRtriTilt = dtdtRtri*Cos(thetad) + dtdtZtri*Sin(thetad)
-       dtdtZtriTilt = dtdtZtri*Cos(thetad) - dtdtRtri*Sin(thetad)
-       R_theta_theta = dtdtRtriTilt
-       Z_theta_theta = dtdtZtriTilt
-    endif
-
-    !dl/dtheta
-    dlp=(R_theta**2+Z_theta**2)**0.5
-
-    !curvature radius
-    Rc=dlp**3*(R_theta*Z_theta_theta-Z_theta*R_theta_theta)**(-1)
-
-    ! some useful quantities (see papers for definition of u)
-    cosu=Z_theta/dlp
-    sinu=-R_theta/dlp
-
-    !Jacobian J_r = (dPsi/dr) J_psi = (dPsi/dr) / [(nabla fz x nabla psi)* nabla theta]
-    !             = R * (dR/drho dZ/dtheta - dR/dtheta dZ/drho) = R dlp / |nabla r|
-    J_r=R*(R_rho*Z_theta-R_theta*Z_rho)
-
-    !From definition of q = 1/(2 pi) int (B nabla fz) / (B nabla theta) dtheta:
-    !dPsi/dr = sign_Bt sign_Ip / (2 pi q) int F / R^2 J_r dtheta
-    !        = F / (2 pi |q|) int J_r/R^2 dtheta
-    tmp_arr=J_r/R**2
-    drPsi=sign_Ip_CW*F/(2.*pi*Npol_ext*q0)*sum(tmp_arr)*2*pi*Npol_ext/np !dlp_int(tmp_arr,1.0)
-
-    !Poloidal field (Bp = Bvec * nabla l)
-    Bp=sign_Ip_CW * drPsi / J_r * dlp
-
-    !toroidal field
-    Bphi=F/R
-
-    !total modulus of Bfield
-    B=sqrt(Bphi**2+Bp**2)
-
-    bMaxShift = .false.
-    ! if (thetaShift==0.0.and.trim(magn_geometry).ne.'miller_general') then
-    if (thetaShift==0.0) then
-      do i = 2,np-1
-         if (B(iBmax)<B(i)) then
-            iBmax = i
-         end if
-      enddo
-      if (iBmax/=1) then
-         bMaxShift = .true.
-         thetaShift = theta(iBmax)-theta(1)
+      !dl/dtheta
+      dlp=(R_theta**2+Z_theta**2)**0.5_xp
+
+      !curvature radius
+      Rc=dlp**3*(R_theta*Z_theta_theta-Z_theta*R_theta_theta)**(-1)
+
+      ! some useful quantities (see papers for definition of u)
+      cosu=Z_theta/dlp
+      sinu=-R_theta/dlp
+
+      !Jacobian J_r = (dPsi/dr) J_psi = (dPsi/dr) / [(nabla fz x nabla psi)* nabla theta]
+      !             = R * (dR/drho dZ/dtheta - dR/dtheta dZ/drho) = R dlp / |nabla r|
+      J_r=R*(R_rho*Z_theta-R_theta*Z_rho)
+
+      !From definition of q = 1/(2 pi) int (B nabla fz) / (B nabla theta) dtheta:
+      !dPsi/dr = sign_Bt sign_Ip / (2 pi q) int F / R^2 J_r dtheta
+      !        = F / (2 pi |q|) int J_r/R^2 dtheta
+      tmp_arr=J_r/R**2
+      drPsi=sign_Ip_CW*F/(2._xp*pi*Npol_ext*q0)*sum(tmp_arr)*2._xp*pi*Npol_ext/np !dlp_int(tmp_arr,1.0)
+
+      !Poloidal field (Bp = Bvec * nabla l)
+      Bp=sign_Ip_CW * drPsi / J_r * dlp
+
+      !toroidal field
+      Bphi=F/R
+
+      !total modulus of Bfield
+      B=sqrt(Bphi**2+Bp**2)
+
+      bMaxShift = .false.
+      if (thetaShift==0._xp) then
+        do i = 2,np-1
+          if (B(iBmax)<B(i)) then
+              iBmax = i
+          end if
+        enddo
+        if (iBmax/=1) then
+          bMaxShift = .true.
+          thetaShift = theta(iBmax)-theta(1)
+        end if
       end if
-    end if
+    enddo
 
     !definition of radial coordinate! dx_drho=1 --> x = r
     dx_drho=1. !drPsi/Psi0*Lnorm*q0
@@ -257,14 +252,15 @@ CONTAINS
             "Setting C_xy = ",C_xy,' C_y = ', C_y," C_x' = ", 1./dxPsi
        write(*,'(A,ES12.4)') "B_unit/Bref conversion factor = ", q0/rho*drPsi
        write(*,'(A,ES12.4)') "dPsi/dr = ", drPsi
-       if (thetaShift.ne.0.0) write(*,'(A,ES12.4)') "thetaShift = ", thetaShift
+       if (thetaShift.ne.0._xp) write(*,'(A,ES12.4)') "thetaShift = ", thetaShift
     endif
 
 
     !--------shear is expected to be defined as rho/q*dq/drho--------!
-    dq_dx=shat*q0/rho/dx_drho
-    dq_xpsi=dq_dx/dxPsi
-    pprime=-amhd/q0**2/R0/(2*mu_0)*B0**2/drPsi
+    dq_dx   = shat*q0/rho/dx_drho
+    Cyq0_x0 = C_y*q0/rho
+    dq_dpsi = dq_dx/dxPsi
+    pprime  = -amhd/q0**2/R0/(2*mu_0)*B0**2/drPsi
 
     !neg. xpdx normalized to magnetic pressure for pressure term
     xpdx_pm_geom=amhd/q0**2/R0/dx_drho
@@ -274,7 +270,7 @@ CONTAINS
 
     !integrals for ffprime evaluation
     do i=1,np
-       tmp_arr=(2./Rc-2.*cosu/R)/(R*psi1**2)
+       tmp_arr=(2._xp/Rc-2._xp*cosu/R)/(R*psi1**2)
        D0(i)=-F*dlp_int_ind(tmp_arr,dlp,i)
        tmp_arr=B**2*R/psi1**3
        D1(i)=-dlp_int_ind(tmp_arr,dlp,i)/F
@@ -283,7 +279,7 @@ CONTAINS
        tmp_arr=1./(R*psi1)
        D3(i)=-dlp_int_ind(tmp_arr,dlp,i)*F
     enddo
-    tmp_arr=(2./Rc-2.*cosu/R)/(R*psi1**2)
+    tmp_arr=(2._xp/Rc-2._xp*cosu/R)/(R*psi1**2)
     D0_full=-F*dlp_int(tmp_arr,dlp)
     tmp_arr=B**2*R/psi1**3
     D1_full=-dlp_int(tmp_arr,dlp)/F
@@ -296,7 +292,7 @@ CONTAINS
     D2_mid=D2(np/2+1)
     D3_mid=D3(np/2+1)
 
-    ffprime=-(sign_Ip_CW*dq_xpsi*2.*pi*Npol_ext+D0_full+D2_full*pprime)/D1_full
+    ffprime=-(sign_Ip_CW*dq_dpsi*2._xp*pi*Npol_ext+D0_full+D2_full*pprime)/D1_full
 
     if (my_id==0) then
        write(*,'(A,ES12.4)') "ffprime = ", ffprime
@@ -318,7 +314,7 @@ CONTAINS
     !new grid equidistant in straight field line angle
     chi_s = linspace(-pi*Npol_s,pi*Npol_s-2*pi*Npol_s/np_s,np_s)
 
-    if (sign_Ip_CW.lt.0.0) then !make chi increasing function to not confuse lag3interp
+    if (sign_Ip_CW.lt.0._xp) then !make chi increasing function to not confuse lag3interp
        tmp_reverse = chi(np:1:-1)
        theta_reverse = theta(np:1:-1)
        call lag3interp(theta_reverse,tmp_reverse,np,theta_s,chi_s,np_s)
@@ -332,7 +328,7 @@ CONTAINS
     !arrays equidistant in straight field line angle
     thAdj_s = theta_s + thetaShift
 
-    if (zeta/=0.0 .or. s_zeta/=0.0) then
+    if (zeta/=0._xp .or. s_zeta/=0._xp) then
       R_s = R0 + rho*Cos(thAdj_s + d_inv*Sin(thAdj_s))
       Z_s = Z0 + kappa*rho*Sin(thAdj_s + zeta*Sin(2*thAdj_s))
 
@@ -366,7 +362,7 @@ CONTAINS
       R_theta_s = dtheta_dchi_s*dtRtriTilt_s
       Z_theta_s = dtheta_dchi_s*dtZtriTilt_s
     endif
-    if (sign_Ip_CW.lt.0.0) then
+    if (sign_Ip_CW.lt.0._xp) then
        call lag3interp(nu1,theta,np,tmp_arr_s,theta_s_reverse,np_s)
        nu1_s = tmp_arr_s(np_s:1:-1)
        call lag3interp(Bp,theta,np,tmp_arr_s,theta_s_reverse,np_s)
@@ -402,13 +398,13 @@ CONTAINS
     dnu_dl_s=-F/(R_s*psi1_s)
     grad_nu_s=sqrt(dnu_drho_s**2+dnu_dl_s**2)
 
-    !contravariant metric coefficients (varrho,l,fz)->(x,y,z)
+    !contravariant metric coefficients (varrho,l,phi)->(x,y,z)
     gxx=(psi1_s/dxPsi)**2
     gxy=-psi1_s/dxPsi*C_y*sign_Ip_CW*nu1_s
-    gxz=-psi1_s/dxPsi*(nu1_s+psi1_s*dq_xpsi*chi_s)/q0
+    gxz=-psi1_s/dxPsi*(nu1_s+psi1_s*dq_dpsi*chi_s)/q0
     gyy=C_y**2*(grad_nu_s**2+1/R_s**2)
-    gyz=sign_Ip_CW*C_y/q0*(grad_nu_s**2+dq_xpsi*nu1_s*psi1_s*chi_s)
-    gzz=1./q0**2*(grad_nu_s**2+2.*dq_xpsi*nu1_s*psi1_s*chi_s+(dq_xpsi*psi1_s*chi_s)**2)
+    gyz=sign_Ip_CW*C_y/q0*(grad_nu_s**2+dq_dpsi*nu1_s*psi1_s*chi_s)
+    gzz=1./q0**2*(grad_nu_s**2+2.*dq_dpsi*nu1_s*psi1_s*chi_s+(dq_dpsi*psi1_s*chi_s)**2)
 
     jacobian=1./sqrt(gxx*gyy*gzz + 2.*gxy*gyz*gxz - gxz**2*gyy - gyz**2*gxx - gzz*gxy**2)
 
@@ -422,32 +418,35 @@ CONTAINS
 
     !Bfield derivatives
     !dBdx = e_x * nabla B = J (nabla y x nabla z) * nabla B
-    dBdx=jacobian*C_y/(q0*R_s)*(F/(R_s*psi1_s)*dB_drho_s+(nu1_s+dq_xpsi*chi_s*psi1_s)*dB_dl_s)
+    dBdx=jacobian*C_y/(q0*R_s)*(F/(R_s*psi1_s)*dB_drho_s+(nu1_s+dq_dpsi*chi_s*psi1_s)*dB_dl_s)
     dBdz=1./B_s*(Bp_s*dBp_dchi_s-F**2/R_s**3*R_theta_s)
 
-    !curvature terms (these are just local and will be recalculated in geometry.F90)
-    K_x = (0.-g_yz/g_zz*dBdz)
+    !curvature terms (these are just local and will be recalculated in geometry module)
+    K_x = (0._xp-g_yz/g_zz*dBdz)
     K_y = (dBdx-g_xz/g_zz*dBdz)
 
     !(R,Z) derivatives for visualization
     dxdR_s = dx_drho/drPsi*psi1_s*cosu_s
     dxdZ_s = dx_drho/drPsi*psi1_s*sinu_s
-
-    if (edge_opt==0.0) then
-       !gene z-grid
-       chi_out=linspace(-pi*Npol,pi*Npol-2*pi*Npol/total_nz,total_nz)
+    ! GHOSTS ADAPTED VERSION
+    if (edge_opt==0._xp) then
+      !gyacomo z-grid wo ghosts
+      ! chi_out=linspace(-pi*Npol,pi*Npol-2*pi*Npol/total_nz,total_nz)
+      !gyacomo z-grid with ghosts
+      chi_out=linspace(-pi*Npol-4._xp*pi*Npol/total_nz,pi*Npol+2._xp*pi*Npol/total_nz,total_nz+ngz)
     else
+      ERROR STOP '>> ERROR << ghosts not implemented for edge_opt yet'
        !new parallel coordinate chi_out==zprime
        !see also tracer_aux.F90
-       if (Npol>1) ERROR STOP '>> ERROR << Npol>1 has not been implemented for edge_opt=\=0.0'
+       if (Npol>1) ERROR STOP '>> ERROR << Npol>1 has not been implemented for edge_opt=\=0._xp'
        do k=1,total_nz
-          chi_out(k)=sinh((-pi+k*2.*pi/total_nz)*log(edge_opt*pi+sqrt(edge_opt**2*pi**2+1))/pi)/edge_opt
+          chi_out(k)=sinh((-pi+k*2._xp*pi/total_nz)*log(edge_opt*pi+sqrt(edge_opt**2*pi**2+1))/pi)/edge_opt
        enddo
        !transform metrics according to chain rule
        do k=1,np_s
          !>dz'/dz conversion for edge_opt as function of z
           if (edge_opt.gt.0) then
-             dzprimedz = edge_opt*pi/log(edge_opt*pi+sqrt((edge_opt*pi)**2+1))/&
+             dzprimedz = edge_opt*pi/log(edge_opt*pi+sqrt((edge_opt*pi)**2+1._xp))/&
                   sqrt((edge_opt*chi_s(k))**2+1)
           else
              dzprimedz = 1.0
@@ -459,123 +458,117 @@ CONTAINS
           dBdz(k)=dBdz(k)/dzprimedz
        enddo
     endif !edge_opt
-
-    !interpolate down to GENE z-grid
-    call lag3interp(gxx,chi_s,np_s,gxx_out,chi_out,total_nz)
-    call lag3interp(gxy,chi_s,np_s,gxy_out,chi_out,total_nz)
-    call lag3interp(gxz,chi_s,np_s,gxz_out,chi_out,total_nz)
-    call lag3interp(gyy,chi_s,np_s,gyy_out,chi_out,total_nz)
-    call lag3interp(gyz,chi_s,np_s,gyz_out,chi_out,total_nz)
-    call lag3interp(gzz,chi_s,np_s,gzz_out,chi_out,total_nz)
-    call lag3interp(B_s,chi_s,np_s,Bfield_out,chi_out,total_nz)
-    call lag3interp(jacobian,chi_s,np_s,jacobian_out,chi_out,total_nz)
-    call lag3interp(dBdx,chi_s,np_s,dBdx_out,chi_out,total_nz)
-    call lag3interp(dBdz,chi_s,np_s,dBdz_out,chi_out,total_nz)
-    call lag3interp(R_s,chi_s,np_s,R_out,chi_out,total_nz)
-    call lag3interp(Z_s,chi_s,np_s,Z_out,chi_out,total_nz)
-    call lag3interp(dxdR_s,chi_s,np_s,dxdR_out,chi_out,total_nz)
-    call lag3interp(dxdZ_s,chi_s,np_s,dxdZ_out,chi_out,total_nz)
-    ! Fill the interior of the geom arrays with the results
-    do eo=1,nzgrid
-      DO iz = 1,local_nz
-        gxx_(iz+ngz/2,eo)      = gxx_out(iz+local_nz_offset)
-        gyy_(iz+ngz/2,eo)      = gyy_out(iz+local_nz_offset)
-        gxz_(iz+ngz/2,eo)      = gxz_out(iz+local_nz_offset)
-        gyz_(iz+ngz/2,eo)      = gyz_out(iz+local_nz_offset)
-        dBdx_(iz+ngz/2,eo)     = dBdx_out(iz+local_nz_offset)
-        dBdy_(iz+ngz/2,eo)     = 0.
-        gxy_(iz+ngz/2,eo)      = gxy_out(iz+local_nz_offset)
-        gzz_(iz+ngz/2,eo)      = gzz_out(iz+local_nz_offset)
-        Bfield_(iz+ngz/2,eo)   = Bfield_out(iz+local_nz_offset)
-        jacobian_(iz+ngz/2,eo) = jacobian_out(iz+local_nz_offset)
-        dBdz_(iz+ngz/2,eo)     = dBdz_out(iz+local_nz_offset)
-        R_hat_(iz+ngz/2,eo)    = R_out(iz+local_nz_offset)
-        Z_hat_(iz+ngz/2,eo)    = Z_out(iz+local_nz_offset)
-        dxdR_(iz+ngz/2,eo)     = dxdR_out(iz+local_nz_offset)
-        dxdZ_(iz+ngz/2,eo)     = dxdZ_out(iz+local_nz_offset)
-      ENDDO
-      !! UPDATE GHOSTS VALUES (since the miller function in GENE does not)
-      CALL update_ghosts_z(gxx_(:,eo))
-      CALL update_ghosts_z(gyy_(:,eo))
-      CALL update_ghosts_z(gxz_(:,eo))
-      CALL update_ghosts_z(gxy_(:,eo))
-      CALL update_ghosts_z(gzz_(:,eo))
-      CALL update_ghosts_z(Bfield_(:,eo))
-      CALL update_ghosts_z(dBdx_(:,eo))
-      CALL update_ghosts_z(dBdy_(:,eo))
-      CALL update_ghosts_z(dBdz_(:,eo))
-      CALL update_ghosts_z(jacobian_(:,eo))
-      CALL update_ghosts_z(R_hat_(:,eo))
-      CALL update_ghosts_z(Z_hat_(:,eo))
-      CALL update_ghosts_z(dxdR_(:,eo))
-      CALL update_ghosts_z(dxdZ_(:,eo))
-
-      ! Curvature operator (Frei et al. 2022 eq 2.15)
-      DO iz = 1,local_nz+ngz
-        G1 = gxy_(iz,eo)*gxy_(iz,eo)-gxx_(iz,eo)*gyy_(iz,eo)
-        G2 = gxy_(iz,eo)*gxz_(iz,eo)-gxx_(iz,eo)*gyz_(iz,eo)
-        G3 = gyy_(iz,eo)*gxz_(iz,eo)-gxy_(iz,eo)*gyz_(iz,eo)
-        Cx = (G1*dBdy_(iz,eo) + G2*dBdz_(iz,eo))/Bfield_(iz,eo)
-        Cy = (G3*dBdz_(iz,eo) - G1*dBdx_(iz,eo))/Bfield_(iz,eo)
-
-        DO iky = 1,local_Nky
-          ky = kyarray(iky)
-          DO ikx= 1,local_Nkx
-            kx = kxarray(ikx)
-            Ckxky_(iky, ikx, iz,eo) = Cx*kx + Cy*ky
-          ENDDO
-        ENDDO
-        ! coefficient in the front of parallel derivative
-        gradz_coeff_(iz,eo) = 1._xp / jacobian_(iz,eo) / Bfield_(iz,eo)
+    ! interpolate with ghosts
+    call lag3interp(gxx,chi_s,np_s,gxx_out,chi_out,total_nz+ngz)
+    call lag3interp(gxy,chi_s,np_s,gxy_out,chi_out,total_nz+ngz)
+    call lag3interp(gxz,chi_s,np_s,gxz_out,chi_out,total_nz+ngz)
+    call lag3interp(gyy,chi_s,np_s,gyy_out,chi_out,total_nz+ngz)
+    call lag3interp(gyz,chi_s,np_s,gyz_out,chi_out,total_nz+ngz)
+    call lag3interp(gzz,chi_s,np_s,gzz_out,chi_out,total_nz+ngz)
+    call lag3interp(B_s,chi_s,np_s,Bfield_out,chi_out,total_nz+ngz)
+    call lag3interp(dBdx,chi_s,np_s,dBdx_out,chi_out,total_nz+ngz)
+    call lag3interp(dBdz,chi_s,np_s,dBdz_out,chi_out,total_nz+ngz)
+    call lag3interp(jacobian,chi_s,np_s,jacobian_out,chi_out,total_nz+ngz)
+    call lag3interp(R_s,chi_s,np_s,R_out,chi_out,total_nz+ngz)
+    call lag3interp(Z_s,chi_s,np_s,Z_out,chi_out,total_nz+ngz)
+    call lag3interp(dxdR_s,chi_s,np_s,dxdR_out,chi_out,total_nz+ngz)
+    call lag3interp(dxdZ_s,chi_s,np_s,dxdZ_out,chi_out,total_nz+ngz)
+    ! Fill the local geom arrays with the results
+    do eo=iodd,ieven
+      DO iz = 1,local_nz + ngz
+        gxx_(iz,eo)      = gxx_out(iz+local_nz_offset)
+        gxy_(iz,eo)      = gxy_out(iz+local_nz_offset)
+        gxz_(iz,eo)      = gxz_out(iz+local_nz_offset)
+        gyy_(iz,eo)      = gyy_out(iz+local_nz_offset)
+        gyz_(iz,eo)      = gyz_out(iz+local_nz_offset)
+        gzz_(iz,eo)      = gzz_out(iz+local_nz_offset)
+        Bfield_(iz,eo)   = Bfield_out(iz+local_nz_offset)
+        dBdx_(iz,eo)     = dBdx_out(iz+local_nz_offset)
+        dBdy_(iz,eo)     = 0._xp
+        dBdz_(iz,eo)     = dBdz_out(iz+local_nz_offset)
+        jacobian_(iz,eo) = jacobian_out(iz+local_nz_offset)
+        R_hat_(iz,eo)    = R_out(iz+local_nz_offset)
+        Z_hat_(iz,eo)    = Z_out(iz+local_nz_offset)
+        dxdR_(iz,eo)     = dxdR_out(iz+local_nz_offset)
+        dxdZ_(iz,eo)     = dxdZ_out(iz+local_nz_offset)
       ENDDO
-  ENDDO
-
+    ENDDO
   contains
 
-
-    SUBROUTINE update_ghosts_z(fz_)
+    ! Update (and communicate) ghosts of the metric arrays
+    SUBROUTINE update_ghosts_z(fz_,eo,periodic)
       IMPLICIT NONE
       ! INTEGER,  INTENT(IN) :: nztot_
-      REAL(xp), DIMENSION(1:local_nz+ngz), INTENT(INOUT) :: fz_
-      REAL(xp), DIMENSION(-2:2) :: buff
-      INTEGER :: status(MPI_STATUS_SIZE), count, last, first
-      last = local_nz+ngz/2
+      REAL(xp), DIMENSION(local_nz+ngz), INTENT(INOUT) :: fz_
+      LOGICAL,  INTENT(IN) :: periodic
+      INTEGER,  INTENT(IN) :: eo !even/odd z grid
+      REAL(xp), DIMENSION(-ngz/2:ngz/2) :: buff
+      INTEGER :: status(MPI_STATUS_SIZE), count, last, first, ig
+      REAL(xp):: dfdz, beta, z1, z2, f1, f2, z3, f3
       first= 1 + ngz/2
-      IF(total_nz .GT. 1) THEN
-        IF (num_procs_z .GT. 1) THEN
-          CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-            count = 1 ! one point to exchange
-            !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-            CALL mpi_sendrecv(fz_(last), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
-                               buff(-1), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-1
-                              comm0, status, ierr)
-
-            CALL mpi_sendrecv(fz_(last-1), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
-                                 buff(-2), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-2
-                              comm0, status, ierr)
-
-            !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-            CALL mpi_sendrecv(fz_(first), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
-                                buff(+1), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+1
-                              comm0, status, ierr)
-
-            CALL mpi_sendrecv(fz_(first+1), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
-                                  buff(+2), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+2
-                              comm0, status, ierr)
-         ELSE
-           buff(-1) = fz_(last  )
-           buff(-2) = fz_(last-1)
-           buff(+1) = fz_(first  )
-           buff(+2) = fz_(first+1)
-         ENDIF
-         fz_(last +1) = buff(+1)
-         fz_(last +2) = buff(+2)
-         fz_(first-1) = buff(-1)
-         fz_(first-2) = buff(-2)
+      last = local_nz+ngz/2
+      count = 1 ! one point to exchange
+      IF (num_procs_z .GT. 1) THEN
+        CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
+        !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
+        DO ig = 1,ngz/2
+          CALL mpi_sendrecv(fz_(last-(ig-1)), count, mpi_xp_r, nbr_U, 1206+ig, & ! Send to Up the last
+          buff(-ig),        count, mpi_xp_r, nbr_D, 1206+ig, & ! Receive from Down the first-1
+          comm0, status, ierr)
+        ENDDO
+        !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!\
+        DO ig = 1,ngz/2
+          CALL mpi_sendrecv(fz_(first+(ig-1)), count, mpi_xp_r, nbr_D, 1208+ig, & ! Send to Down the first
+          buff(ig),          count, mpi_xp_r, nbr_U, 1208+ig, & ! Recieve from Up the last+1
+          comm0, status, ierr)
+        ENDDO
+      ELSE
+        DO ig = 1,ngz/2
+          buff(-ig) = fz_(last-(ig-1))
+          buff( ig) = fz_(first+(ig-1))
+        ENDDO
+      ENDIF
+      ! if the metric is not periodic, we extrapolate it linearly
+      IF(.NOT. periodic) THEN
+        !!!! Right side
+        ! extrapolation from a linear fit
+        ! g(z) = dfdz*(z-z1) + f1
+        f1 = fz_(last-1)
+        f2 = fz_(last)
+        z1 = zarray(last-1,eo)
+        z2 = zarray(last,eo)
+        dfdz = (f2-f1)/(z2-z1) ! slope
+        beta  = f1              ! shift
+        ! right ghosts values
+        DO ig = 1,ngz/2
+          z3    = z2 + REAL(ig,xp)*(z2 - z1) ! z3 = z2 + ig * dz
+          f3    = dfdz*(z3 - z1) + beta
+          buff(ig) = f3
+        ENDDO       
+        !!!! Left side
+        ! extrapolation from a linear fit
+        ! g(z) = dfdz*(z-z1) + f1
+        f1 = fz_(first)
+        f2 = fz_(first+1)
+        z1 = zarray(first,eo)
+        z2 = zarray(first+1,eo)
+        dfdz = (f2-f1)/(z2-z1) ! slope
+        beta  = f1              ! shift
+        ! right ghosts values
+        DO ig = 1,ngz/2
+          z3    = z1 - REAL(ig,xp)*(z2 - z1) ! z3 = z1 - ig * dz
+          f3    = dfdz*(z3 -z1) + beta
+          buff(-ig) = f3
+        ENDDO       
       ENDIF
+      ! Updating ghosts 
+      DO ig = 1,ngz/2
+        fz_(last +ig) = buff(ig)
+        fz_(first-ig) = buff(-ig)
+      ENDDO
+      !  print*,fz_
     END SUBROUTINE update_ghosts_z
 
-
     !> Generate an equidistant array from min to max with n points
     function linspace(min,max,n) result(out)
       real(xp), INTENT(IN):: min, max
@@ -596,15 +589,14 @@ CONTAINS
     !> full theta integral with weight function dlp
     real(xp) function dlp_int(var,dlp)
       real(xp), dimension(np), INTENT(IN):: var, dlp
-      dlp_int=sum(var*dlp)*2*pi*Npol_ext/np
+      dlp_int=sum(var*dlp)*2._xp*pi*Npol_ext/np
     end function dlp_int
 
     !> theta integral with weight function dlp, up to index 'ind'
     real(xp) function dlp_int_ind(var,dlp,ind)
       real(xp), dimension(np), INTENT(IN):: var, dlp
       integer, INTENT(IN):: ind
-
-      dlp_int_ind=0.
+      dlp_int_ind=0._xp
       if (ind.gt.1) then
          dlp_int_ind=dlp_int_ind+var(1)*dlp(1)*pi*Npol_ext/np
          dlp_int_ind=dlp_int_ind+(sum(var(2:ind-1)*dlp(2:ind-1)))*2*pi*Npol_ext/np
@@ -617,22 +609,18 @@ CONTAINS
       integer,                INTENT(IN) :: n
       real(xp), dimension(n), INTENT(IN):: x,y
       real(xp), dimension(n) :: out,dx
-
       !call lag3deriv(y,x,n,out,x,n)
-
-      out=0.
+      out=0._xp
       do i=2,n-1
-         out(i)=out(i)-y(i-1)/2
-         out(i)=out(i)+y(i+1)/2
+         out(i)=out(i)-y(i-1)/2._xp
+         out(i)=out(i)+y(i+1)/2._xp
       enddo
       out(1)=y(2)-y(1)
       out(n)=y(n)-y(n-1)
       dx=x(2)-x(1)
       out=out/dx
-
     end function deriv_fd
 
-
   end subroutine get_miller