diff --git a/Makefile b/Makefile
index 8ef11ad6808c12febcf2ba69fc833dc4d94f0296..bd218c821bf12e13473d86197c58892b47f84ed1 100644
--- a/Makefile
+++ b/Makefile
@@ -4,35 +4,42 @@ include local/make.inc
 # Standard version with optimized compilation (ifort compiler)
 all: F90 = mpif90
 all: F90FLAGS = -O3 -xHOST
-all: EXEC = $(BINDIR)/gyacomo22
+all: EXEC = $(BINDIR)/gyacomo23_dp
 all: dirs src/srcinfo.h
 all: compile
 
+# Single precision version
+sp: F90 = mpif90
+sp: F90FLAGS = -DSINGLE_PRECISION -O3 -xHOST
+sp: EXEC = $(BINDIR)/gyacomo23_sp
+sp: dirs src/srcinfo.h
+sp: compile
+
 # Fast compilation
 fast: F90 = mpif90
 fast: F90FLAGS = -fast
-fast: EXEC = $(BINDIR)/gyacomo22_fast
+fast: EXEC = $(BINDIR)/gyacomo23_fast
 fast: dirs src/srcinfo.h
 fast: compile
 
 # Debug version with all flags
 debug: F90 = mpif90
-debug: F90FLAGS = -g -traceback -ftrapuv -warn all -debug all
-debug: EXEC = $(BINDIR)/gyacomo22_debug
+debug: F90FLAGS = -C -g -traceback -ftrapuv -warn all -debug all
+debug: EXEC = $(BINDIR)/gyacomo23_debug
 debug: dirs src/srcinfo.h
 debug: compile
 
 # For compiling on marconi
 marconi: F90 = mpiifort
 marconi: F90FLAGS = -O3 -xHOST
-marconi: EXEC = $(BINDIR)/gyacomo22
+marconi: EXEC = $(BINDIR)/gyacomo23_dp
 marconi: dirs src/srcinfo.h
 marconi: compile
 
 # For compiling on daint
 daint: F90 = ftn
 daint: F90FLAGS = -O3
-daint: EXEC = $(BINDIR)/gyacomo22
+daint: EXEC = $(BINDIR)/gyacomo23_dp
 daint: dirs src/srcinfo.h
 daint: compile
 
@@ -40,7 +47,7 @@ daint: compile
 gopt: F90 = mpif90
 gopt: F90FLAGS = -O3 -std=legacy -ffree-line-length-0
 gopt: EXTMOD   = -J $(MODDIR)
-gopt: EXEC = $(BINDIR)/gyacomo22
+gopt: EXEC = $(BINDIR)/gyacomo23_dp
 gopt: dirs src/srcinfo.h
 gopt: compile
 
@@ -48,7 +55,7 @@ gopt: compile
 gdebug: F90 = mpif90
 gdebug: F90FLAGS = -C -g -std=legacy -ffree-line-length-0
 gdebug: EXTMOD   = -J $(MODDIR)
-gdebug: EXEC = $(BINDIR)/gyacomo22_debug
+gdebug: EXEC = $(BINDIR)/gyacomo23_debug
 gdebug: dirs src/srcinfo.h
 gdebug: compile
 
@@ -83,9 +90,9 @@ $(OBJDIR)/ghosts_mod.o $(OBJDIR)/grid_mod.o $(OBJDIR)/inital.o \
 $(OBJDIR)/initial_par_mod.o $(OBJDIR)/lag_interp_mod.o $(OBJDIR)/main.o \
 $(OBJDIR)/memory.o $(OBJDIR)/miller_mod.o $(OBJDIR)/model_mod.o \
 $(OBJDIR)/moments_eq_rhs_mod.o $(OBJDIR)/numerics_mod.o $(OBJDIR)/parallel_mod.o \
-$(OBJDIR)/ppexit.o $(OBJDIR)/ppinit.o $(OBJDIR)/prec_const_mod.o \
+$(OBJDIR)/ppexit.o $(OBJDIR)/prec_const_mod.o \
 $(OBJDIR)/processing_mod.o $(OBJDIR)/readinputs.o $(OBJDIR)/restarts_mod.o \
-$(OBJDIR)/solve_EM_fields.o $(OBJDIR)/stepon.o $(OBJDIR)/tesend.o \
+$(OBJDIR)/solve_EM_fields.o $(OBJDIR)/species_mod.o $(OBJDIR)/stepon.o $(OBJDIR)/tesend.o \
 $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 
 # To compile the executable
@@ -96,7 +103,7 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
  $(OBJDIR)/advance_field_mod.o : src/advance_field_mod.F90 \
    $(OBJDIR)/grid_mod.o $(OBJDIR)/array_mod.o $(OBJDIR)/initial_par_mod.o \
 	 $(OBJDIR)/prec_const_mod.o $(OBJDIR)/time_integration_mod.o $(OBJDIR)/basic_mod.o \
-	 $(OBJDIR)/fields_mod.o
+	 $(OBJDIR)/fields_mod.o $(OBJDIR)/model_mod.o $(OBJDIR)/closure_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/advance_field_mod.F90 -o $@
 
  $(OBJDIR)/array_mod.o : src/array_mod.F90 \
@@ -106,11 +113,11 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
  $(OBJDIR)/auxval.o : src/auxval.F90 \
  	 $(OBJDIR)/fourier_mod.o $(OBJDIR)/memory.o $(OBJDIR)/model_mod.o \
 	 $(OBJDIR)/geometry_mod.o  $(OBJDIR)/grid_mod.o $(OBJDIR)/numerics_mod.o \
-	 $(OBJDIR)/parallel_mod.o
+	 $(OBJDIR)/parallel_mod.o $(OBJDIR)/processing_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/auxval.F90 -o $@
 
  $(OBJDIR)/basic_mod.o : src/basic_mod.F90 \
- 	 $(OBJDIR)/prec_const_mod.o
+ 	 $(OBJDIR)/prec_const_mod.o $(OBJDIR)/parallel_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/basic_mod.F90 -o $@
 
  $(OBJDIR)/calculus_mod.o : src/calculus_mod.F90  \
@@ -129,17 +136,22 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/closure_mod.F90 -o $@
 
  $(OBJDIR)/collision_mod.o : src/collision_mod.F90 \
- 	 $(OBJDIR)/array_mod.o $(OBJDIR)/basic_mod.o $(OBJDIR)/fields_mod.o \
-   $(OBJDIR)/grid_mod.o $(OBJDIR)/model_mod.o $(OBJDIR)/prec_const_mod.o \
-	 $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
+ 	 $(OBJDIR)/array_mod.o $(OBJDIR)/basic_mod.o $(OBJDIR)/cosolver_interface_mod.o\
+	 $(OBJDIR)/fields_mod.o $(OBJDIR)/grid_mod.o $(OBJDIR)/model_mod.o \
+	 $(OBJDIR)/prec_const_mod.o $(OBJDIR)/species_mod.o $(OBJDIR)/time_integration_mod.o \
+	 $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/collision_mod.F90 -o $@
 
  $(OBJDIR)/control.o : src/control.F90 \
 	 $(OBJDIR)/auxval.o $(OBJDIR)/geometry_mod.o $(OBJDIR)/prec_const_mod.o \
-   $(OBJDIR)/basic_mod.o $(OBJDIR)/ppexit.o $(OBJDIR)/ppinit.o \
-	 $(OBJDIR)/readinputs.o $(OBJDIR)/tesend.o
+   $(OBJDIR)/basic_mod.o $(OBJDIR)/ppexit.o $(OBJDIR)/readinputs.o $(OBJDIR)/tesend.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/control.F90 -o $@
 
+ $(OBJDIR)/cosolver_interface_mod.o : src/cosolver_interface_mod.F90 \
+   $(OBJDIR)/grid_mod.o $(OBJDIR)/array_mod.o $(OBJDIR)/model_mod.o $(OBJDIR)/species_mod.o\
+	 $(OBJDIR)/prec_const_mod.o
+	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/cosolver_interface_mod.F90 -o $@
+
  $(OBJDIR)/diagnose.o : src/diagnose.F90 \
  	 $(OBJDIR)/prec_const_mod.o $(OBJDIR)/processing_mod.o $(OBJDIR)/array_mod.o \
    $(OBJDIR)/basic_mod.o $(OBJDIR)/diagnostics_par_mod.o $(OBJDIR)/fields_mod.o \
@@ -161,7 +173,7 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/fields_mod.F90 -o $@
 
  $(OBJDIR)/fourier_mod.o : src/fourier_mod.F90 \
- 	 $(OBJDIR)/basic_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/grid_mod.o
+ 	 $(OBJDIR)/basic_mod.o $(OBJDIR)/prec_const_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/fourier_mod.F90 -o $@
 
  $(OBJDIR)/geometry_mod.o : src/geometry_mod.F90 \
@@ -172,11 +184,11 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 
  $(OBJDIR)/ghosts_mod.o : src/ghosts_mod.F90 \
  	 $(OBJDIR)/basic_mod.o $(OBJDIR)/fields_mod.o $(OBJDIR)/grid_mod.o\
-   $(OBJDIR)/geometry_mod.o $(OBJDIR)/ppinit.o  $(OBJDIR)/time_integration_mod.o
+   $(OBJDIR)/geometry_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/time_integration_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/ghosts_mod.F90 -o $@
 
  $(OBJDIR)/grid_mod.o : src/grid_mod.F90 \
- 	 $(OBJDIR)/basic_mod.o $(OBJDIR)/model_mod.o $(OBJDIR)/prec_const_mod.o
+ 	 $(OBJDIR)/basic_mod.o $(OBJDIR)/fourier_mod.o $(OBJDIR)/prec_const_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/grid_mod.F90 -o $@
 
  $(OBJDIR)/inital.o : src/inital.F90 \
@@ -210,7 +222,7 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/miller_mod.F90 -o $@
 
  $(OBJDIR)/model_mod.o : src/model_mod.F90 \
- 	 $(OBJDIR)/prec_const_mod.o
+ 	 $(OBJDIR)/grid_mod.o $(OBJDIR)/prec_const_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/model_mod.F90 -o $@
 
  $(OBJDIR)/moments_eq_rhs_mod.o : src/moments_eq_rhs_mod.F90 \
@@ -231,19 +243,13 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/numerics_mod.F90 -o $@
 
  $(OBJDIR)/parallel_mod.o : src/parallel_mod.F90 \
- 	 $(OBJDIR)/basic_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/grid_mod.o
+ 	 $(OBJDIR)/prec_const_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/parallel_mod.F90 -o $@
 
  $(OBJDIR)/ppexit.o : src/ppexit.F90 \
  	 $(OBJDIR)/prec_const_mod.o $(OBJDIR)/basic_mod.o $(OBJDIR)/coeff_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/ppexit.F90 -o $@
 
- $(OBJDIR)/ppinit.o : src/ppinit.F90 \
- 	 $(OBJDIR)/array_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/grid_mod.o\
-   $(OBJDIR)/fields_mod.o $(OBJDIR)/array_mod.o $(OBJDIR)/time_integration_mod.o \
-	 $(OBJDIR)/basic_mod.o
-	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/ppinit.F90 -o $@
-
  $(OBJDIR)/prec_const_mod.o : src/prec_const_mod.F90
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/prec_const_mod.F90 -o $@
 
@@ -262,11 +268,15 @@ $(OBJDIR)/time_integration_mod.o $(OBJDIR)/utility_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/restarts_mod.F90 -o $@
 
  $(OBJDIR)/solve_EM_fields.o : src/solve_EM_fields.F90 \
- 	 $(OBJDIR)/array_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/grid_mod.o \
+   $(OBJDIR)/array_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/grid_mod.o \
 	 $(OBJDIR)/ghosts_mod.o $(OBJDIR)/fields_mod.o $(OBJDIR)/array_mod.o \
 	 $(OBJDIR)/time_integration_mod.o $(OBJDIR)/basic_mod.o $(OBJDIR)/parallel_mod.o
 	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/solve_EM_fields.F90 -o $@
 
+ $(OBJDIR)/species_mod.o : src/species_mod.F90 \
+   $(OBJDIR)/basic_mod.o $(OBJDIR)/model_mod.o $(OBJDIR)/prec_const_mod.o
+	$(F90) -c $(F90FLAGS) $(FPPFLAGS) $(EXTMOD) $(EXTINC) src/species_mod.F90 -o $@
+
  $(OBJDIR)/stepon.o : src/stepon.F90 \
  	 $(OBJDIR)/initial_par_mod.o $(OBJDIR)/prec_const_mod.o $(OBJDIR)/advance_field_mod.o \
    $(OBJDIR)/basic_mod.o $(OBJDIR)/nonlinear_mod.o $(OBJDIR)/grid_mod.o \
diff --git a/scripts/submit_00.cmd b/bash scripts/submit_00.cmd
similarity index 100%
rename from scripts/submit_00.cmd
rename to bash scripts/submit_00.cmd
diff --git a/fort_example.90 b/fort_example.90
deleted file mode 100644
index 7607e1e10c45a55d7e6b3c7cf6e8f96146101617..0000000000000000000000000000000000000000
--- a/fort_example.90
+++ /dev/null
@@ -1,91 +0,0 @@
-&BASIC
-  nrun   = 100000000              !number of maximal time steps
-  dt     = 0.01                   !time step
-  tmax   = 500                    !maximal physical time
-  maxruntime = 60 ! 1h 14400 !4h  !maximal wallclock run time (in seconds)
-/
-&GRID
-  pmaxe  = 6                      !maximal degree of Hermite polynomials for e
-  jmaxe  = 3                      !maximal degree of Laguerre polynomials for e
-  pmaxi  = 6                      !maximal degree of Hermite polynomials for i
-  jmaxi  = 3                      !maximal degree of Laguerre polynomials for i
-  Nx     = 200                    !resolution in x (=Nkx)
-  Lx     = 120                    !box size in x
-  Ny     = 64                     !resolution in y (=2(Nky-1))
-  Ly     = 160                    !box size in y
-  Nz     = 24                     !resolution in z
-  Npol   = 1                      !number of poloidal turns (Lz=2piNpol)
-  Nexc   = 1                      !factor to increase Lx in sheared geometry
-  SG     = .f.                    !staggered grid option (not recommended)
-/
-&GEOMETRY
-  geom   = 's-alpha'              !magnetic equilibrium geometry (Z-pinch,s-alpha,miller)
-  q0     = 1.4                    !safety factor (s-alpha,miller only)
-  shear  = 0.8                    !shear (s-alpha,miller only)
-  eps    = 0.18                   !inverse aspect ratio (s-alpha,miller only)
-  kappa  = 1                      !elongation (miller only)
-  delta  = 0                      !triangularity (miller only)
-  zeta   = 0                      !squareness (miller only)
-  parallel_bc = 'dirichlet'       !boundary condition for modes that does not connect due to shear (dirichlet,periodic)
-/
-&OUTPUT_PAR
-  nsave_0d = 50                   !period in number of step for time traces
-  nsave_1d = -1                   !unused
-  nsave_2d = -1                   !unused
-  nsave_3d = 100                  !period in number of step for 3D fields (phi,psi,...)
-  nsave_5d = 1000                 !period in number of step for 5D fields (moments)
-  write_doubleprecision = .t.     !for HDF5 output (double precision faster on marconi)
-  write_gamma = .t.               !to write particle flux
-  write_hf    = .t.               !to write heat flux
-  write_phi   = .t.               !to write ES and EM potentials
-  write_Na00  = .t.               !to write gyrocenter densities
-  write_Napj  = .t.               !to write moments
-  write_Sapj  = .f.               !to write nonlinear terms
-  write_dens  = .t.               !to write particle densities
-  write_temp  = .t.               !to write particle temperatures
-  job2load    = -1                !ID of the job to load in a restart (-1 means no restart)
-/
-&MODEL_PAR
-  ! Collisionality
-  CLOS    = 0                     !closure model (0: zero-truncation)
-  NL_CLOS = 0                     !NL closure model (-1 is full FLR sum until n=J-j, n>-1 is up to nth term)
-  LINEARITY = 'nonlinear'         !to activate nonlinear term (linear,nonlinear)
-  KIN_E   = .f.                   !to have a kinetic electron model (adiabatic otherwise)
-  mu_x    = 1.0                   !x numerical diffusion coefficient
-  mu_y    = 1.0                   !y numerical diffusion coefficient
-  N_HD    = 4                     !xy numerical diffusion order
-  mu_z    = 2.0                   !z numerical diffusion coefficient (order 4)
-  mu_p    = 0                     !p numerical diffusion coefficient (usually not used)
-  mu_j    = 0                     !j numerical diffusion coefficient (usually not used)
-  nu      = 0.05                  !collision frequency (=0.49*nu_GENE)
-  tau_e   = 1                     !electron temperature ratio
-  tau_i   = 1                     !ion temperature ratio
-  sigma_e = 0.023338              !electron mass ratio
-  sigma_i = 1                     !ion mass ratio
-  q_e     = -1                    !electron charge
-  q_i     = 1                     !ion charge
-  K_Ne    = 2.22                  !electron density gradient intensity
-  K_Te    = 6.96                  !electron temperature gradient intensity
-  K_Ni    = 2.22                  !ion density gradient intensity
-  K_Ti    = 6.96                  !ion density temperature intensity
-  k_gB     = 1                   !magnetic field gradient strength
-  k_cB     = 1                   !magnetic curvature strength
-  lambdaD = 0                     !Debye length (not tested when non zero)
-/
-&COLLISION_PAR
-  collision_model = 'DG'          !collision model (DG,SG,LD,LR), all need a matrix except DG
-  gyrokin_CO      = .f.           !activate gyrokinetic terms in the CO
-  interspecies    = .t.           !activate interspecies if CO has some
-  mat_file        = 'LDGK_P10_J5_dk_5e-2_km_5_NFLR_30.h5' !path to find the collision matrix
-  collision_kcut  = 1.8           !maximal wavelength of the CO matrix. For higher kperp, the matrix at collision_kcut will be applied.
-/
-&INITIAL_CON
-  INIT_OPT        = 'phi'         !initialization option (phi,mom00,allmom,ppj)
-  ACT_ON_MODES    = 'donothing'   !to perform numerical experiments (unused)
-  init_background = 0             !background value for a noise initialization
-  init_noiselvl   = 0.0001        !fluctuation value ''
-  iseed           = 42            !seed of the noise
-/
-&TIME_INTEGRATION_PAR
-  numerical_scheme = 'RK4'        !numerical scheme for time-stepping (RK2,RK3,RK4,DOPRI5)
-/
diff --git a/local/make.inc b/local/make.inc
index fa747c0395c4390b219ea80d626024de53936c60..8be74cc8fb29cf9f7dc7c317301caccf477f616c 100644
--- a/local/make.inc
+++ b/local/make.inc
@@ -71,6 +71,8 @@ LIBS += -lfm
 # Add FFTW3 local lib
 ifdef FFTW3DIR
       LIBS  += -lfftw3 -lfftw3_mpi
+      # single_precision fftw
+      LIBS  += -lfftw3f -lfftw3f_mpi
       LDIRS += -L$(FFTW3DIR)/lib
       IDIRS += -I$(FFTW3DIR)/include
 endif
diff --git a/matlab/compile_results.m b/matlab/compile_results.m
index de85096b0a624dae2a44a2dc8a3be3cac40ad7ac..27c14463e467bb13ca11f93c5d559a5eb2920fe3 100644
--- a/matlab/compile_results.m
+++ b/matlab/compile_results.m
@@ -1,355 +1,329 @@
 function [DATA] = compile_results(DIRECTORY,JOBNUMMIN,JOBNUMMAX)
-    DATA = {};
-    CONTINUE = 1;
-    JOBNUM   = JOBNUMMIN; JOBFOUND = 0;
-    DATA.TJOB_SE  = []; % Start and end times of jobs
-    DATA.NU_EVOL  = []; % evolution of parameter nu between jobs
-    DATA.CO_EVOL  = []; % evolution of CO
-    DATA.MUx_EVOL  = []; % evolution of parameter mu between jobs
-    DATA.MUy_EVOL  = []; % evolution of parameter mu between jobs
-    DATA.MUz_EVOL  = []; % evolution of parameter mu between jobs
-    DATA.K_N_EVOL = []; %
-    DATA.K_T_EVOL = []; %
-    DATA.L_EVOL   = []; % 
-    DATA.DT_EVOL  = []; %
-    % FIELDS
-    Nipj_    = []; Nepj_    = [];
-    Ni00_    = []; Ne00_    = [];
-    Nipjz_   = []; Nepjz_   = [];
-    HFLUXI_   = [];
-    HFLUXE_   = [];
-    GGAMMAI_ = [];
-    PGAMMAI_ = [];
-    GGAMMAE_ = [];
-    PGAMMAE_ = [];
-    PHI_     = [];
-    PSI_     = [];
-    DENS_E_  = [];
-    DENS_I_  = [];
-    UPAR_E_  = [];
-    UPAR_I_  = [];
-    UPER_E_  = [];
-    UPER_I_  = [];
-    TPAR_E_  = [];
-    TPAR_I_  = [];
-    TPER_E_  = [];
-    TPER_I_  = [];
-    TEMP_E_  = [];
-    TEMP_I_  = [];
-    TEMP_E_  = [];
-    TEMP_I_  = [];
-    Ts0D_    = [];
-    Ts3D_    = [];
-    Ts5D_    = [];
-    Sipj_    = []; Sepj_    = [];
-    Pe_old   = 1e9; Pi_old = Pe_old; Je_old = Pe_old; Ji_old = Pe_old;
-    Pi_max=0; Pe_max=0; Ji_max=0; Je_max=0;
-    DATA.outfilenames = {};
-    ii = 1;
-    while(CONTINUE)
-        filename = sprintf([DIRECTORY,'outputs_%.2d.h5'],JOBNUM);
-        % Check presence and jobnummax
-        if (exist(filename, 'file') == 2 && JOBNUM <= JOBNUMMAX)
-            DATA.outfilenames{ii} = filename;
-            %test if it is corrupted or currently running
-            try
-                openable = ~isempty(h5read(filename,'/data/var3d/time'));
-            catch
-                openable = 0;
-            end
-            if openable
-            %% load results %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-            disp(sprintf('Loading ID %.2d (%s)',JOBNUM,filename));
-            % Loading from output file
-            CPUTIME   = h5readatt(filename,'/data/input','cpu_time');
-            DT_SIM    = h5readatt(filename,'/data/input','dt');
-            [Pe, Je, Pi, Ji, kx, ky, z] = load_grid_data(filename);
-            W_GAMMA   = strcmp(h5readatt(filename,'/data/input','write_gamma'),'y');
-            W_HF      = strcmp(h5readatt(filename,'/data/input','write_hf'   ),'y');
-            W_PHI     = strcmp(h5readatt(filename,'/data/input','write_phi'  ),'y');
-            W_NA00    = strcmp(h5readatt(filename,'/data/input','write_Na00' ),'y');
-            W_NAPJ    = strcmp(h5readatt(filename,'/data/input','write_Napj' ),'y');
-            W_SAPJ    = strcmp(h5readatt(filename,'/data/input','write_Sapj' ),'y');
-            W_DENS    = strcmp(h5readatt(filename,'/data/input','write_dens' ),'y');
-            W_TEMP    = strcmp(h5readatt(filename,'/data/input','write_temp' ),'y');
-            KIN_E     = strcmp(h5readatt(filename,'/data/input',     'KIN_E' ),'y');
-            try
-                BETA      = h5readatt(filename,'/data/input','beta');
-            catch
-                BETA = 0;
-            end
-            % Check polynomials degrees
-            Pe_new= numel(Pe); Je_new= numel(Je);
-            Pi_new= numel(Pi); Ji_new= numel(Ji);
-            if(Pe_max < Pe_new); Pe_max = Pe_new; end;
-            if(Je_max < Je_new); Je_max = Je_new; end;
-            if(Pi_max < Pi_new); Pi_max = Pi_new; end;
-            if(Ji_max < Ji_new); Ji_max = Ji_new; end;
-            % If a degree is larger than previous job, put them in a larger array
-            if (sum([Pe_new, Je_new, Pi_new, Ji_new]>[Pe_old, Je_old, Pi_old, Ji_old]) >= 1)
-                if W_NAPJ
-                    tmp = Nipj_; sz = size(tmp);
-                    Nipj_ = zeros(cat(1,[Pi_new,Ji_new]',sz(3:end)')');
-                    Nipj_(1:Pi_old,1:Ji_old,:,:,:,:) = tmp;
-                    if(KIN_E) 
-                    tmp = Nepj_; sz = size(tmp);
-                    Nepj_ = zeros(cat(1,[Pe_new,Je_new]',sz(3:end)')');
-                    Nepj_(1:Pe_old,1:Je_old,:,:,:,:) = tmp;
-                    end
-                end
-                if W_SAPJ
-                    tmp = Sipj_; sz = size(tmp);
-                    Sipj_ = zeros(cat(1,[Pi_new,Ji_new]',sz(3:end)')');
-                    Sipj_(1:Pi_old,1:Ji_old,:,:,:,:) = tmp;
-                    tmp = Sepj_; sz = size(tmp);
-    %                 Sepj_ = zeros(cat(1,[Pe_new,Je_new]',sz(3:end)')');
-    %                 Sepj_(1:Pe_old,1:Je_old,:,:,:,:) = tmp;
-                end
-            % If a degree is smaller than previous job, put zero to add. deg.
-            elseif (sum([Pe_new, Je_new, Pi_new, Ji_new]<[Pe_old, Je_old, Pi_old, Ji_old]) >= 1 && Pe_old ~= 1e9)
-                if W_NAPJ
-                    tmp = Nipj; sz = size(tmp);
-                    Nipj = zeros(cat(1,[Pi_max,Ji_max]',sz(3:end)')');
-                    Nipj(1:Pi_new,1:Ji_new,:,:,:) = tmp;
-                    tmp = Nepj; sz = size(tmp);
-                    Nepj = zeros(cat(1,[Pe_max,Je_max]',sz(3:end)')');
-                    Nepj(1:Pe_new,1:Je_new,:,:,:) = tmp;
-                end
-                if W_SAPJ
-                    tmp = Sipj; sz = size(tmp);
-                    Sipj = zeros(cat(1,[Pi_max,Ji_max]',sz(3:end)')');
-                    Sipj(1:Pi_new,1:Ji_new,:,:,:) = tmp;
-                    tmp = Sepj; sz = size(tmp);
-                    Sepj = zeros(cat(1,[Pe_max,Je_max]',sz(3:end)')');
-                    Sepj(1:Pe_new,1:Je_new,:,:,:) = tmp;
-                end
-            end
-    
-    
-            if W_GAMMA
-                [ GGAMMA_RI, Ts0D, ~] = load_0D_data(filename, 'gflux_ri');
-                PGAMMA_RI            = load_0D_data(filename, 'pflux_ri');
-                GGAMMAI_ = cat(1,GGAMMAI_,GGAMMA_RI); clear GGAMMA_RI
-                PGAMMAI_ = cat(1,PGAMMAI_,PGAMMA_RI); clear PGAMMA_RI
-            end
-    
-            if W_HF
-                [ HFLUX_XI, Ts0D, ~] = load_0D_data(filename, 'hflux_xi');
-                HFLUXI_ = cat(1,HFLUXI_,HFLUX_XI); clear HFLUX_XI
-    %             if(KIN_E)
-    %             [ HFLUX_XE, Ts0D, ~] = load_0D_data(filename, 'hflux_xe');
-    %             HFLUXE_ = cat(1,HFLUXE_,HFLUX_XE); clear HFLUX_XE 
-    %             end
-            end
-    
-            if W_PHI
-                [ PHI, Ts3D, ~] = load_3D_data(filename, 'phi');
-                PHI_  = cat(4,PHI_,PHI); clear PHI
-                if BETA > 0
-                    [ PSI, Ts3D, ~] = load_3D_data(filename, 'psi');
-                    PSI_  = cat(4,PSI_,PSI); clear PSI
-                end
-            end
-            if W_NA00
-                if KIN_E
-                 Ne00  = load_3D_data(filename, 'Ne00');
-                 Ne00_ = cat(4,Ne00_,Ne00); clear Ne00
-                end
-                [Ni00, Ts3D, ~] = load_3D_data(filename, 'Ni00');
-                 Ni00_ = cat(4,Ni00_,Ni00); clear Ni00
-                if KIN_E
-                    try
-                 Nepjz  = load_3D_data(filename, 'Nepjz');
-                 Nepjz_ = cat(4,Nepjz_,Nepjz); clear Nepjz
-                    catch
-                     disp('Cannot load Nepjz');
-                   end
-                end
-                try
-                [Nipjz, Ts3D, ~] = load_3D_data(filename, 'Nipjz');
-                 Nipjz_ = cat(4,Nipjz_,Nipjz); clear Nipjz        
-                catch
-                    disp('Cannot load Nipjz');
-                end
-            end
-            if W_DENS
-                if KIN_E
-            [DENS_E, Ts3D, ~] = load_3D_data(filename, 'dens_e');
-                DENS_E_ = cat(4,DENS_E_,DENS_E); clear DENS_E
-                end
-            [DENS_I, Ts3D, ~] = load_3D_data(filename, 'dens_i');
-                DENS_I_ = cat(4,DENS_I_,DENS_I); clear DENS_I
-            end
-            if 0
-                if KIN_E
-            [UPAR_E, Ts3D, ~] = load_3D_data(filename, 'upar_e');
-                UPAR_E_ = cat(4,UPAR_E_,UPAR_E); clear UPAR_E
-            [UPER_E, Ts3D, ~] = load_3D_data(filename, 'uper_e');
-    %             UPER_E_ = cat(4,UPER_E_,UPER_E); clear UPER_E
+DATA = {};
+CONTINUE = 1;
+JOBNUM   = JOBNUMMIN; JOBFOUND = 0;
+DATA.TJOB_SE  = []; % Start and end times of jobs
+DATA.NU_EVOL  = []; % evolution of parameter nu between jobs
+DATA.CO_EVOL  = []; % evolution of CO
+DATA.MUx_EVOL  = []; % evolution of parameter mu between jobs
+DATA.MUy_EVOL  = []; % evolution of parameter mu between jobs
+DATA.MUz_EVOL  = []; % evolution of parameter mu between jobs
+DATA.K_N_EVOL = []; %
+DATA.L_EVOL   = []; % 
+DATA.DT_EVOL  = []; %
+% FIELDS
+Nipj_    = []; Nepj_    = [];
+Ni00_    = []; Ne00_    = [];
+Nipjz_   = []; Nepjz_   = [];
+HFLUXI_   = [];
+HFLUXE_   = [];
+GGAMMAI_ = [];
+PGAMMAI_ = [];
+GGAMMAE_ = [];
+PGAMMAE_ = [];
+PHI_     = [];
+PSI_     = [];
+DENS_E_  = [];
+DENS_I_  = [];
+UPAR_E_  = [];
+UPAR_I_  = [];
+UPER_E_  = [];
+UPER_I_  = [];
+TPAR_E_  = [];
+TPAR_I_  = [];
+TPER_E_  = [];
+TPER_I_  = [];
+TEMP_E_  = [];
+TEMP_I_  = [];
+TEMP_E_  = [];
+TEMP_I_  = [];
+Ts0D_    = [];
+Ts3D_    = [];
+Ts5D_    = [];
+Sipj_    = []; Sepj_    = [];
+Pe_old   = 1e9; Pi_old = Pe_old; Je_old = Pe_old; Ji_old = Pe_old;
+Pi_max=0; Pe_max=0; Ji_max=0; Je_max=0;
+DATA.outfilenames = {};
+ii = 1;
+while(CONTINUE)
+    filename = sprintf([DIRECTORY,'outputs_%.2d.h5'],JOBNUM);
+    % Check presence and jobnummax
+    if (exist(filename, 'file') == 2 && JOBNUM <= JOBNUMMAX)
+        DATA.outfilenames{ii} = filename;
+        %test if it is corrupted or currently running
+        try
+            openable = ~isempty(h5read(filename,'/data/var3d/time'));
+        catch
+            openable = 0;
+        end
+        if openable
+        %% load results %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        disp(sprintf('Loading ID %.2d (%s)',JOBNUM,filename));
+        % Loading from output file
+        CPUTIME   = h5readatt(filename,'/data/input','cpu_time');
+        DT_SIM    = h5readatt(filename,'/data/input','dt');
+        [Pe, Je, Pi, Ji, kx, ky, z] = load_grid_data(filename);
+        W_GAMMA   = strcmp(h5readatt(filename,'/data/input','write_gamma'),'y');
+        W_HF      = strcmp(h5readatt(filename,'/data/input','write_hf'   ),'y');
+        W_PHI     = strcmp(h5readatt(filename,'/data/input','write_phi'  ),'y');
+        W_NA00    = strcmp(h5readatt(filename,'/data/input','write_Na00' ),'y');
+        W_NAPJ    = strcmp(h5readatt(filename,'/data/input','write_Napj' ),'y');
+        W_SAPJ    = strcmp(h5readatt(filename,'/data/input','write_Sapj' ),'y');
+        W_DENS    = strcmp(h5readatt(filename,'/data/input','write_dens' ),'y');
+        W_TEMP    = strcmp(h5readatt(filename,'/data/input','write_temp' ),'y');
+        KIN_E     = strcmp(h5readatt(filename,'/data/input',     'KIN_E' ),'y');
+        try
+            BETA      = h5readatt(filename,'/data/input','beta');
+        catch
+            BETA = 0;
+        end
+        % Check polynomials degrees
+        Pe_new= numel(Pe); Je_new= numel(Je);
+        Pi_new= numel(Pi); Ji_new= numel(Ji);
+        if(Pe_max < Pe_new); Pe_max = Pe_new; end;
+        if(Je_max < Je_new); Je_max = Je_new; end;
+        if(Pi_max < Pi_new); Pi_max = Pi_new; end;
+        if(Ji_max < Ji_new); Ji_max = Ji_new; end;
+        % If a degree is larger than previous job, put them in a larger array
+        if (sum([Pe_new, Je_new, Pi_new, Ji_new]>[Pe_old, Je_old, Pi_old, Ji_old]) >= 1)
+            if W_NAPJ
+                tmp = Nipj_; sz = size(tmp);
+                Nipj_ = zeros(cat(1,[Pi_new,Ji_new]',sz(3:end)')');
+                Nipj_(1:Pi_old,1:Ji_old,:,:,:,:) = tmp;
+                if(KIN_E) 
+                tmp = Nepj_; sz = size(tmp);
+                Nepj_ = zeros(cat(1,[Pe_new,Je_new]',sz(3:end)')');
+                Nepj_(1:Pe_old,1:Je_old,:,:,:,:) = tmp;
                 end
-            [UPAR_I, Ts3D, ~] = load_3D_data(filename, 'upar_i');
-                UPAR_I_ = cat(4,UPAR_I_,UPAR_I); clear UPAR_I
-            [UPER_I, Ts3D, ~] = load_3D_data(filename, 'uper_i');
-                UPER_I_ = cat(4,UPER_I_,UPER_I); clear UPER_I
             end
-            if W_TEMP
-                if KIN_E 
-    %         [TPAR_E, Ts3D, ~] = load_3D_data(filename, 'Tpar_e');
-    %             TPAR_E_ = cat(4,TPAR_E_,TPAR_E); clear TPAR_E
-    %         [TPER_E, Ts3D, ~] = load_3D_data(filename, 'Tper_e');
-    %             TPER_E_ = cat(4,TPER_E_,TPER_E); clear TPER_E
-            [TEMP_E, Ts3D, ~] = load_3D_data(filename, 'temp_e');
-                    TEMP_E_ = cat(4,TEMP_E_,TEMP_E); clear TEMP_E
-                end
-    %         [TPAR_I, Ts3D, ~] = load_3D_data(filename, 'Tpar_i');
-    %             TPAR_I_ = cat(4,TPAR_I_,TPAR_I); clear TPAR_I
-    %         [TPER_I, Ts3D, ~] = load_3D_data(filename, 'Tper_i');
-    %             TPER_I_ = cat(4,TPER_I_,TPER_I); clear TPER_I
-            [TEMP_I, Ts3D, ~] = load_3D_data(filename, 'temp_i');
-                TEMP_I_ = cat(4,TEMP_I_,TEMP_I); clear TEMP_I
+            if W_SAPJ
+                tmp = Sipj_; sz = size(tmp);
+                Sipj_ = zeros(cat(1,[Pi_new,Ji_new]',sz(3:end)')');
+                Sipj_(1:Pi_old,1:Ji_old,:,:,:,:) = tmp;
+                tmp = Sepj_; sz = size(tmp);
+%                 Sepj_ = zeros(cat(1,[Pe_new,Je_new]',sz(3:end)')');
+%                 Sepj_(1:Pe_old,1:Je_old,:,:,:,:) = tmp;
             end
-    
-            Ts5D = [];
+        % If a degree is smaller than previous job, put zero to add. deg.
+        elseif (sum([Pe_new, Je_new, Pi_new, Ji_new]<[Pe_old, Je_old, Pi_old, Ji_old]) >= 1 && Pe_old ~= 1e9)
             if W_NAPJ
-            [Nipj, Ts5D, ~] = load_5D_data(filename, 'moments_i');
-            tic
-                Nipj_ = cat(6,Nipj_,Nipj); clear Nipj
-            toc
-                if KIN_E
-                    Nepj  = load_5D_data(filename, 'moments_e');
-                    Nepj_ = cat(6,Nepj_,Nepj); clear Nepj
-                end
+                tmp = Nipj; sz = size(tmp);
+                Nipj = zeros(cat(1,[Pi_max,Ji_max]',sz(3:end)')');
+                Nipj(1:Pi_new,1:Ji_new,:,:,:) = tmp;
+                tmp = Nepj; sz = size(tmp);
+                Nepj = zeros(cat(1,[Pe_max,Je_max]',sz(3:end)')');
+                Nepj(1:Pe_new,1:Je_new,:,:,:) = tmp;
             end
             if W_SAPJ
-               Sipj_ = cat(6,Sipj_,Sipj);
-              if KIN_E
-               Sepj_ = cat(6,Sepj_,Sepj);
-              end
-            end
-            Ts0D_   = cat(1,Ts0D_,Ts0D);
-            Ts3D_   = cat(1,Ts3D_,Ts3D);
-            Ts5D_   = cat(1,Ts5D_,Ts5D);
-    
-            % Evolution of simulation parameters
-            load_params
-            DATA.TJOB_SE   = [DATA.TJOB_SE   Ts0D(1)     Ts0D(end)];
-            DATA.NU_EVOL   = [DATA.NU_EVOL   DATA.NU     DATA.NU];
-            DATA.CO_EVOL   = [DATA.CO_EVOL   DATA.CO     DATA.CO];
-            DATA.MUx_EVOL  = [DATA.MUx_EVOL  DATA.MUx    DATA.MUx];
-            DATA.MUy_EVOL  = [DATA.MUy_EVOL  DATA.MUy    DATA.MUy];
-            DATA.MUz_EVOL  = [DATA.MUz_EVOL  DATA.MUz    DATA.MUz];
-            DATA.K_N_EVOL  = [DATA.K_N_EVOL DATA.K_N   DATA.K_N];
-            DATA.K_T_EVOL  = [DATA.K_T_EVOL DATA.K_T   DATA.K_T];
-            DATA.L_EVOL    = [DATA.L_EVOL    DATA.L      DATA.L];
-            DATA.DT_EVOL   = [DATA.DT_EVOL   DATA.DT_SIM DATA.DT_SIM];
-            
-            ii = ii + 1;
-            JOBFOUND = JOBFOUND + 1;
-            LASTJOB  = JOBNUM;
-            Pe_old = Pe_new; Je_old = Je_new;
-            Pi_old = Pi_new; Ji_old = Ji_new;
+                tmp = Sipj; sz = size(tmp);
+                Sipj = zeros(cat(1,[Pi_max,Ji_max]',sz(3:end)')');
+                Sipj(1:Pi_new,1:Ji_new,:,:,:) = tmp;
+                tmp = Sepj; sz = size(tmp);
+                Sepj = zeros(cat(1,[Pe_max,Je_max]',sz(3:end)')');
+                Sepj(1:Pe_new,1:Je_new,:,:,:) = tmp;
             end
-        elseif (JOBNUM > JOBNUMMAX)
-            CONTINUE = 0;
-            disp(['found ',num2str(JOBFOUND),' results']);
         end
-        JOBNUM   = JOBNUM + 1;
-    end
-    
-    if(JOBFOUND == 0)
-        disp('no results found, please verify the paths');
-        return;
-    else
-        %% Build grids
-    
-        Nky = numel(ky); 
-        if Nky > 1
-            dky = ky(2); 
-            Ly = 2*pi/dky;   
-        else
-            dky = 0;
-            Ly  = 0;   
+
+
+        if W_GAMMA
+            [ GGAMMA_RI, Ts0D, ~] = load_0D_data(filename, 'gflux_ri');
+            PGAMMA_RI            = load_0D_data(filename, 'pflux_ri');
+            GGAMMAI_ = cat(1,GGAMMAI_,GGAMMA_RI); clear GGAMMA_RI
+            PGAMMAI_ = cat(1,PGAMMAI_,PGAMMA_RI); clear PGAMMA_RI
         end
-        [~,iky0] = min(abs(ky)); 
-        Ny = 2*Nky-1;  
-        y  = linspace(-Ly/2,Ly/2,Ny+1); y = y(1:end-1);
-    
-        Nkx = numel(kx);
-        if Nkx > 1
-            dkx = kx(2);
-            Lx = 2*pi/dkx;
-        else
-            dkx = 0;
-            Lx  = 0;
-        end    
-        [~,ikx0] = min(abs(kx));
-        Nx = Nkx;      
-        x  = linspace(-Lx/2,Lx/2,Nx+1); x = x(1:end-1);
-    
-        Nz = numel(z);
-    
-        [KX,KY] = meshgrid(kx,ky);
-        KPERP2 = KX.^2+KY.^2;
-        %% Add everything in output structure
-        % scaling
-        DATA.scale = 1;%(1/Nx/Ny)^2;
-        % Fields
-        DATA.GGAMMA_RI = GGAMMAI_; DATA.PGAMMA_RI = PGAMMAI_; DATA.HFLUX_X = HFLUXI_;
-        if W_NAPJ
-        DATA.Nipj   = zeros(Pi_new,Ji_new,Nky,Nkx,Nz,numel(Ts5D_)); DATA.Nipj(:,:,:,:,1:Nz,:) = Nipj_;
+
+        if W_HF
+            [ HFLUX_XI, Ts0D, ~] = load_0D_data(filename, 'hflux_xi');
+            HFLUXI_ = cat(1,HFLUXI_,HFLUX_XI); clear HFLUX_XI
+%             if(KIN_E)
+%             [ HFLUX_XE, Ts0D, ~] = load_0D_data(filename, 'hflux_xe');
+%             HFLUXE_ = cat(1,HFLUXE_,HFLUX_XE); clear HFLUX_XE 
+%             end
+        end
+
+        if W_PHI
+            [ PHI, Ts3D, ~] = load_3D_data(filename, 'phi');
+            PHI_  = cat(4,PHI_,PHI); clear PHI
+            if BETA > 0
+                [ PSI, Ts3D, ~] = load_3D_data(filename, 'psi');
+                PSI_  = cat(4,PSI_,PSI); clear PSI
+            end
         end
         if W_NA00
-        DATA.Ni00   = zeros(Nky,Nkx,Nz,numel(Ts3D_));   DATA.Ni00(:,:,1:Nz,:)   = Ni00_;
-        DATA.Nipjz  = zeros(Pi_new,Ji_new,Nz,numel(Ts3D_));  DATA.Nipjz(:,:,1:Nz,:)   = Nipjz_;
+            if KIN_E
+             Ne00  = load_3D_data(filename, 'Ne00');
+             Ne00_ = cat(4,Ne00_,Ne00); clear Ne00
+            end
+            [Ni00, Ts3D, ~] = load_3D_data(filename, 'Ni00');
+             Ni00_ = cat(4,Ni00_,Ni00); clear Ni00
+            if KIN_E
+                try
+             Nepjz  = load_3D_data(filename, 'Nepjz');
+             Nepjz_ = cat(4,Nepjz_,Nepjz); clear Nepjz
+                catch
+                 disp('Cannot load Nepjz');
+               end
+            end
+            try
+            [Nipjz, Ts3D, ~] = load_3D_data(filename, 'Nipjz');
+             Nipjz_ = cat(4,Nipjz_,Nipjz); clear Nipjz        
+            catch
+                disp('Cannot load Nipjz');
+            end
         end
         if W_DENS
-        DATA.DENS_I = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.DENS_I(:,:,1:Nz,:)   = DENS_I_;
+            if KIN_E
+        [DENS_E, Ts3D, ~] = load_3D_data(filename, 'dens_e');
+            DENS_E_ = cat(4,DENS_E_,DENS_E); clear DENS_E
+            end
+        [DENS_I, Ts3D, ~] = load_3D_data(filename, 'dens_i');
+            DENS_I_ = cat(4,DENS_I_,DENS_I); clear DENS_I
+        end
+        if 0
+            if KIN_E
+        [UPAR_E, Ts3D, ~] = load_3D_data(filename, 'upar_e');
+            UPAR_E_ = cat(4,UPAR_E_,UPAR_E); clear UPAR_E
+        [UPER_E, Ts3D, ~] = load_3D_data(filename, 'uper_e');
+%             UPER_E_ = cat(4,UPER_E_,UPER_E); clear UPER_E
+            end
+        [UPAR_I, Ts3D, ~] = load_3D_data(filename, 'upar_i');
+            UPAR_I_ = cat(4,UPAR_I_,UPAR_I); clear UPAR_I
+        [UPER_I, Ts3D, ~] = load_3D_data(filename, 'uper_i');
+            UPER_I_ = cat(4,UPER_I_,UPER_I); clear UPER_I
         end
         if W_TEMP
-        DATA.TEMP_I = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.TEMP_I(:,:,1:Nz,:)   = TEMP_I_;
+            if KIN_E 
+%         [TPAR_E, Ts3D, ~] = load_3D_data(filename, 'Tpar_e');
+%             TPAR_E_ = cat(4,TPAR_E_,TPAR_E); clear TPAR_E
+%         [TPER_E, Ts3D, ~] = load_3D_data(filename, 'Tper_e');
+%             TPER_E_ = cat(4,TPER_E_,TPER_E); clear TPER_E
+        [TEMP_E, Ts3D, ~] = load_3D_data(filename, 'temp_e');
+                TEMP_E_ = cat(4,TEMP_E_,TEMP_E); clear TEMP_E
+            end
+%         [TPAR_I, Ts3D, ~] = load_3D_data(filename, 'Tpar_i');
+%             TPAR_I_ = cat(4,TPAR_I_,TPAR_I); clear TPAR_I
+%         [TPER_I, Ts3D, ~] = load_3D_data(filename, 'Tper_i');
+%             TPER_I_ = cat(4,TPER_I_,TPER_I); clear TPER_I
+        [TEMP_I, Ts3D, ~] = load_3D_data(filename, 'temp_i');
+            TEMP_I_ = cat(4,TEMP_I_,TEMP_I); clear TEMP_I
         end
-        if(KIN_E)
+
+        Ts5D = [];
         if W_NAPJ
-        DATA.Nepj   = zeros(Pe_new,Je_new,Nky,Nkx,Nz,numel(Ts5D_)); DATA.Nepj(:,:,:,:,1:Nz,:) = Nepj_;
-        end
-        if W_NA00    
-        DATA.Ne00   = zeros(Nky,Nkx,Nz,numel(Ts3D_));   DATA.Ne00(:,:,1:Nz,:)   = Ne00_;
-        DATA.Nepjz  = zeros(Nky,Nkx,Nz,numel(Ts3D_));  DATA.Nepjz(:,:,1:Nz,:)   = Nepjz_;
-        end
-        if W_DENS    
-        DATA.DENS_E = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.DENS_E(:,:,1:Nz,:)   = DENS_E_;
-        end
-        if W_TEMP
-        DATA.TEMP_E = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.TEMP_E(:,:,1:Nz,:)   = TEMP_E_;
+        [Nipj, Ts5D, ~] = load_5D_data(filename, 'moments_i');
+        tic
+            Nipj_ = cat(6,Nipj_,Nipj); clear Nipj
+        toc
+            if KIN_E
+                Nepj  = load_5D_data(filename, 'moments_e');
+                Nepj_ = cat(6,Nepj_,Nepj); clear Nepj
+            end
         end
-        DATA.HFLUX_XE = HFLUXE_;
+        if W_SAPJ
+     	  Sipj_ = cat(6,Sipj_,Sipj);
+          if KIN_E
+           Sepj_ = cat(6,Sepj_,Sepj);
+          end
         end
-        DATA.Ts5D = Ts5D_; DATA.Ts3D = Ts3D_; DATA.Ts0D = Ts0D_;
-        DATA.PHI   = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.PHI(:,:,1:Nz,:)   = PHI_;
-        if BETA>0
-        DATA.PSI   = zeros(Nky,Nkx,Nz,numel(Ts3D_)); DATA.PSI(:,:,1:Nz,:)   = PSI_;
+        Ts0D_   = cat(1,Ts0D_,Ts0D);
+        Ts3D_   = cat(1,Ts3D_,Ts3D);
+        Ts5D_   = cat(1,Ts5D_,Ts5D);
+
+        % Evolution of simulation parameters
+        load_params
+        DATA.TJOB_SE   = [DATA.TJOB_SE   Ts0D(1)     Ts0D(end)];
+        DATA.NU_EVOL   = [DATA.NU_EVOL   DATA.NU     DATA.NU];
+        DATA.CO_EVOL   = [DATA.CO_EVOL   DATA.CO     DATA.CO];
+        DATA.MUx_EVOL  = [DATA.MUx_EVOL  DATA.MUx    DATA.MUx];
+        DATA.MUy_EVOL  = [DATA.MUy_EVOL  DATA.MUy    DATA.MUy];
+        DATA.MUz_EVOL  = [DATA.MUz_EVOL  DATA.MUz    DATA.MUz];
+        DATA.K_N_EVOL  = [DATA.K_N_EVOL DATA.K_N   DATA.K_N];
+        DATA.L_EVOL    = [DATA.L_EVOL    DATA.L      DATA.L];
+        DATA.DT_EVOL   = [DATA.DT_EVOL   DATA.DT_SIM DATA.DT_SIM];
+        
+        ii = ii + 1;
+        JOBFOUND = JOBFOUND + 1;
+        LASTJOB  = JOBNUM;
+        Pe_old = Pe_new; Je_old = Je_new;
+        Pi_old = Pi_new; Ji_old = Ji_new;
         end
-        DATA.KIN_E=KIN_E;
-        % grids
-        DATA.Pe = Pe; DATA.Pi = Pi; 
-        DATA.Je = Je; DATA.Ji = Ji; 
-        DATA.kx = kx; DATA.ky = ky; DATA.z = z; DATA.Npol = -z(1)/pi;
-        DATA.x  = x;  DATA.y  = y;
-        DATA.ikx0 = ikx0; DATA.iky0 = iky0;
-        DATA.Nx = Nx; DATA.Ny = Ny; DATA.Nz = Nz; DATA.Nkx = Nkx; DATA.Nky = Nky; 
-        DATA.Pmaxe = numel(Pe); DATA.Pmaxi = numel(Pi); DATA.Jmaxe = numel(Je); DATA.Jmaxi = numel(Ji);
-        DATA.dir      = DIRECTORY;
-        DATA.localdir = DIRECTORY;
-        DATA.param_title=['$\nu_{',DATA.CONAME,'}=$', num2str(DATA.NU), ...
-            ', $\kappa_{Ni}=$',num2str(DATA.K_N),', $\kappa_{Ti}=$',num2str(DATA.K_T),...
-            ', $L=',num2str(DATA.L),'$, $N=',...
-            num2str(DATA.Nx),'$, $(P,J)=(',num2str(DATA.PMAXI),',',...
-            num2str(DATA.JMAXI),')$,',' $\mu_{hd}=$(',num2str(DATA.MUx),...
-            ',',num2str(DATA.MUy),')'];
-        DATA.paramshort = [num2str(DATA.Pmaxi),'x',num2str(DATA.Jmaxi),'x',...
-            num2str(DATA.Nkx),'x',num2str(DATA.Nky),'x',num2str(DATA.Nz)];
-        JOBNUM = LASTJOB;
-    
-        filename = sprintf([DIRECTORY,'outputs_%.2d.h5'],JOBNUM);
+    elseif (JOBNUM > JOBNUMMAX)
+        CONTINUE = 0;
+        disp(['found ',num2str(JOBFOUND),' results']);
+    end
+    JOBNUM   = JOBNUM + 1;
+end
+
+if(JOBFOUND == 0)
+    disp('no results found, please verify the paths');
+    return;
+else
+    %% Build grids
+
+    Nky = numel(ky); 
+    if Nky > 1
+        dky = ky(2); 
+        Ly = 2*pi/dky;   
+    else
+        dky = 0;
+        Ly  = 0;   
+    end
+    [~,iky0] = min(abs(ky)); 
+    Ny = 2*Nky-1;  
+    y  = linspace(-Ly/2,Ly/2,Ny+1); y = y(1:end-1);
+
+    Nkx = numel(kx);
+    if Nkx > 1
+        dkx = kx(2);
+        Lx = 2*pi/dkx;
+    else
+        dkx = 0;
+        Lx  = 0;
+    end    
+    [~,ikx0] = min(abs(kx));
+    Nx = Nkx;      
+    x  = linspace(-Lx/2,Lx/2,Nx+1); x = x(1:end-1);
+
+    Nz = numel(z);
+
+    [KX,KY] = meshgrid(kx,ky);
+    KPERP2 = KX.^2+KY.^2;
+    %% Add everything in output structure
+    % scaling
+    DATA.scale = 1;%(1/Nx/Ny)^2;
+    % Fields
+    DATA.GGAMMA_RI = GGAMMAI_; DATA.PGAMMA_RI = PGAMMAI_; DATA.HFLUX_X = HFLUXI_;
+    DATA.Nipj = Nipj_; DATA.Ni00 = Ni00_; DATA.Nipjz = Nipjz_;
+    DATA.DENS_I = DENS_I_; DATA.TEMP_I = TEMP_I_;
+    if(KIN_E)
+    DATA.Nepj = Nepj_; DATA.Ne00 = Ne00_; DATA.Nepjz = Nepjz_;
+    DATA.DENS_E = DENS_E_; DATA.TEMP_E = TEMP_E_;
+    DATA.HFLUX_XE = HFLUXE_;
     end
-    end
\ No newline at end of file
+    DATA.Ts5D = Ts5D_; DATA.Ts3D = Ts3D_; DATA.Ts0D = Ts0D_;
+    DATA.PHI  = PHI_; 
+    DATA.PSI  = PSI_; 
+    DATA.KIN_E=KIN_E;
+    % grids
+    DATA.Pe = Pe; DATA.Pi = Pi; 
+    DATA.Je = Je; DATA.Ji = Ji; 
+    DATA.kx = kx; DATA.ky = ky; DATA.z = z; DATA.Npol = -z(1)/pi;
+    DATA.x  = x;  DATA.y  = y;
+    DATA.ikx0 = ikx0; DATA.iky0 = iky0;
+    DATA.Nx = Nx; DATA.Ny = Ny; DATA.Nz = Nz; DATA.Nkx = Nkx; DATA.Nky = Nky; 
+    DATA.Pmaxe = numel(Pe); DATA.Pmaxi = numel(Pi); DATA.Jmaxe = numel(Je); DATA.Jmaxi = numel(Ji);
+    DATA.dir      = DIRECTORY;
+    DATA.localdir = DIRECTORY;
+    DATA.param_title=['$\nu_{',DATA.CONAME,'}=$', num2str(DATA.NU), ...
+        ', $\kappa_{Ni}=$',num2str(DATA.K_N),', $\kappa_{Ti}=$',num2str(DATA.K_T),...
+        ', $L=',num2str(DATA.L),'$, $N=',...
+        num2str(DATA.Nx),'$, $(P,J)=(',num2str(DATA.PMAXI),',',...
+        num2str(DATA.JMAXI),')$,',' $\mu_{hd}=$(',num2str(DATA.MUx),...
+        ',',num2str(DATA.MUy),')'];
+    DATA.paramshort = [num2str(DATA.Pmaxi),'x',num2str(DATA.Jmaxi),'x',...
+        num2str(DATA.Nkx),'x',num2str(DATA.Nky),'x',num2str(DATA.Nz)];
+    JOBNUM = LASTJOB;
+
+    filename = sprintf([DIRECTORY,'outputs_%.2d.h5'],JOBNUM);
+end
+end
\ No newline at end of file
diff --git a/matlab/extract_fig_data.m b/matlab/extract_fig_data.m
index 8c8f0d92c40ffbbaccb3a1067262a3630300fc73..825aff0500dd933b4547b26e36718e2aa3a00689 100644
--- a/matlab/extract_fig_data.m
+++ b/matlab/extract_fig_data.m
@@ -4,7 +4,7 @@
 % tw = [3000 4000];
 % tw = [4000 4500];
 % tw = [4500 5000];
-tw = [100 180];
+tw = [00 1000];
 
 fig = gcf;
 axObjs = fig.Children;
diff --git a/matlab/load/compile_results_low_mem.m b/matlab/load/compile_results_low_mem.m
index c86518e824b4876a57e85f64e2968e91d41ac90d..bfe83682dd18aa9d6a3e01a4b9f58bf2569a64b3 100644
--- a/matlab/load/compile_results_low_mem.m
+++ b/matlab/load/compile_results_low_mem.m
@@ -40,6 +40,7 @@ while(CONTINUE)
             W_GAMMA   = strcmp(h5readatt(filename,'/data/input/diag_par','write_gamma'),'y');
             W_HF      = strcmp(h5readatt(filename,'/data/input/diag_par','write_hf'   ),'y');
             KIN_E     = strcmp(h5readatt(filename,'/data/input/model',     'ADIAB_E' ),'n');
+            BETA      = h5readatt(filename,'/data/input/model','beta');
 
             if W_GAMMA
                 [ GGAMMA_RI, Ts0D, ~] = load_0D_data(filename, 'gflux_xi');
diff --git a/matlab/load/load_0D_data.m b/matlab/load/load_0D_data.m
index 7c2182f1281be5310dd2245144305eb005bbb50b..ee55c6fa0f56ae6657c31f525b57b649ffa720c2 100644
--- a/matlab/load/load_0D_data.m
+++ b/matlab/load/load_0D_data.m
@@ -1,8 +1,8 @@
 function [ data, time, dt ] = load_0D_data( filename, variablename )
 %LOAD_0D_DATA load a 0D variable stored in a hdf5 result file from HeLaZ
     time     = h5read(filename,'/data/var0d/time');
-    dt    = h5readatt(filename,'/data/input','dt');
-    cstart= h5readatt(filename,'/data/input','start_iframe0d'); 
+    dt    = h5readatt(filename,'/data/input/basic','dt');
+    cstart= h5readatt(filename,'/data/input/basic','start_iframe0d'); 
     data     = h5read(filename,['/data/var0d/',variablename]);
 
 end
diff --git a/matlab/load/load_3D_data.m b/matlab/load/load_3D_data.m
index 451428f444e97e9a21f94fbe40ddca677fa6a080..47bc4bf072c22380f08784035a8fa75ac9e84d88 100644
--- a/matlab/load/load_3D_data.m
+++ b/matlab/load/load_3D_data.m
@@ -1,8 +1,8 @@
 function [ data, time, dt ] = load_3D_data( filename, variablename )
 %LOAD_3D_DATA load a 3D variable stored in a hdf5 result file from HeLaZ
     time     = h5read(filename,'/data/var3d/time');
-    dt    = h5readatt(filename,'/data/input','dt');
-    cstart= h5readatt(filename,'/data/input','start_iframe3d'); 
+    dt    = h5readatt(filename,'/data/input/basic','dt');
+    cstart= h5readatt(filename,'/data/input/basic','start_iframe3d'); 
     
     % Find array size by loading the first output
     tmp   = h5read(filename,['/data/var3d/',variablename,'/', num2str(cstart+1,'%06d')]);
diff --git a/matlab/load/load_5D_data.m b/matlab/load/load_5D_data.m
index bc96f113bba9decc602b02db6c46ed0cafbcac34..6642a3bebc689f4775aad61ec70fa7eaf9d190a8 100644
--- a/matlab/load/load_5D_data.m
+++ b/matlab/load/load_5D_data.m
@@ -1,24 +1,20 @@
 function [ data, time, dt ] = load_5D_data( filename, variablename )
 %LOAD_5D_DATA load a 5D variable stored in a hdf5 result file from HeLaZ
     time  = h5read(filename,'/data/var5d/time');
-    if strcmp(variablename,'moments_e') || strcmp(variablename,'Sepj')
-        p     = h5read(filename,'/data/grid/coordp_e');
-        j     = h5read(filename,'/data/grid/coordj_e');
-    else
-        p     = h5read(filename,'/data/grid/coordp_i');
-        j     = h5read(filename,'/data/grid/coordj_i');
-    end
+    na    = h5readatt(filename,'/data/input/model','Na');
+    p     = h5read(filename,'/data/grid/coordp');
+    j     = h5read(filename,'/data/grid/coordj');
     kx    = h5read(filename,'/data/grid/coordkx');
     ky    = h5read(filename,'/data/grid/coordky');
     z     = h5read(filename,'/data/grid/coordz');
 
-    dt    = h5readatt(filename,'/data/input','dt');
-    cstart= h5readatt(filename,'/data/input','start_iframe5d'); 
+    dt    = h5readatt(filename,'/data/input/basic','dt');
+    cstart= h5readatt(filename,'/data/input/basic','start_iframe5d'); 
     
-    data  = zeros(numel(p),numel(j),numel(ky),numel(kx),numel(z),numel(time));
+    data  = zeros(na,numel(p),numel(j),numel(ky),numel(kx),numel(z),numel(time));
     
     for it = 1:numel(time)
         tmp          = h5read(filename,['/data/var5d/', variablename,'/', num2str(cstart+it,'%06d')]);
-        data(:,:,:,:,:,it) = tmp.real + 1i * tmp.imaginary;
+        data(:,:,:,:,:,:,it) = tmp.real + 1i * tmp.imaginary;
     end
 end
\ No newline at end of file
diff --git a/matlab/load/load_grid_data.m b/matlab/load/load_grid_data.m
index 940381ec8f880067cb09d19edc1657405fa1a3a1..2a1dd8c9ae2e8c5699704f492792ae68cdbb8ec5 100644
--- a/matlab/load/load_grid_data.m
+++ b/matlab/load/load_grid_data.m
@@ -1,9 +1,9 @@
 function [ pe, je, pi, ji, kx, ky, z ] = load_grid_data( filename )
 %LOAD_GRID_DATA stored in a hdf5 result file from HeLaZ
-    pe    = h5read(filename,'/data/grid/coordp_e');
-    je    = h5read(filename,'/data/grid/coordj_e');
-    pi    = h5read(filename,'/data/grid/coordp_i');
-    ji    = h5read(filename,'/data/grid/coordj_i');
+    pe    = h5read(filename,'/data/grid/coordp');
+    je    = h5read(filename,'/data/grid/coordj');
+    pi    = h5read(filename,'/data/grid/coordp');
+    ji    = h5read(filename,'/data/grid/coordj');
     kx    = h5read(filename,'/data/grid/coordkx');
     ky    = h5read(filename,'/data/grid/coordky');
     z     = h5read(filename,'/data/grid/coordz');
diff --git a/matlab/load/load_params.m b/matlab/load/load_params.m
index 1f1b32a8e3444066b28b6ceda3f7cf4b3bfabbdc..56e128617a8bb1d7c986f62ad986b841297b4a8c 100644
--- a/matlab/load/load_params.m
+++ b/matlab/load/load_params.m
@@ -1,86 +1,81 @@
-DATA.CO      = h5readatt(filename,'/data/input','CO');
-try
-    DATA.ETA_N   = h5readatt(filename,'/data/input','ETA_N');
-    DATA.ETA_T   = h5readatt(filename,'/data/input','ETA_T');
-catch
-    DATA.ETA_N = 1.0;
-    DATA.ETA_T = 1.0;
-end
-try
-    DATA.K_N     = h5readatt(filename,'/data/input','K_n');
-catch
-    try
-        DATA.K_N     = h5readatt(filename,'/data/input','k_N');
-    catch
-        DATA.K_N     = h5readatt(filename,'/data/input','k_Ni');
-    end
-end
+DATA.CO      = h5readatt(filename,'/data/input/coll','CO');
+DATA.K_N     = h5readatt(filename,'/data/input/ions','k_N');
+DATA.K_T     = h5readatt(filename,'/data/input/ions','k_T');
+DATA.Q0      = h5readatt(filename,'/data/input/geometry','q0');
+DATA.EPS     = h5readatt(filename,'/data/input/geometry','eps');
+DATA.SHEAR   = h5readatt(filename,'/data/input/geometry','shear');
+DATA.GEOM    = h5readatt(filename,'/data/input/geometry','geometry');
+% DATA.KAPPA   = h5readatt(filename,'/data/input/geometry','kappa');
+% DATA.DELTA   = h5readatt(filename,'/data/input/geometry','delta');
+
+DATA.DT_SIM  = h5readatt(filename,'/data/input/basic','dt');
+DATA.PMAX    = h5readatt(filename,'/data/input/grid','pmax');
+DATA.JMAX    = h5readatt(filename,'/data/input/grid','jmax');
+DATA.Nx      = h5readatt(filename,'/data/input/grid','Nx');
+DATA.Ny      = h5readatt(filename,'/data/input/grid','Ny');
+DATA.L       = h5readatt(filename,'/data/input/grid','Lx');
 try
-    DATA.K_T     = h5readatt(filename,'/data/input','K_T');
+DATA.CLOS    = h5readatt(filename,'/data/input/model','CLOS');
+DATA.NL_CLOS = h5readatt(filename,'/data/input/model','NL_CLOS');
 catch
     try
-        DATA.K_T     = h5readatt(filename,'/data/input','k_T');
+        DATA.ha_cl   = h5readatt(filename,'/data/input/closure','hierarchy_closure');
+        DATA.CLOS    = h5readatt(filename,'/data/input/closure','dmax');   
+        DATA.nl_cl   = h5readatt(filename,'/data/input/closure','nonlinear_closure');   
+        DATA.NL_CLOS = h5readatt(filename,'/data/input/closure','nmax');   
     catch
-        DATA.K_T     = h5readatt(filename,'/data/input','k_Ti');
+        DATA.CLOS = 99;
+        DATA.NL_CLOS = 99;
     end
 end
-DATA.sigma_e = h5readatt(filename,'/data/input','sigma_e');
-DATA.sigma_i = h5readatt(filename,'/data/input','sigma_i');
-DATA.tau_e   = h5readatt(filename,'/data/input','tau_e');
-DATA.tau_i   = h5readatt(filename,'/data/input','tau_i');
-DATA.q_e     = h5readatt(filename,'/data/input','q_e');
-DATA.q_i     = h5readatt(filename,'/data/input','q_i');
-DATA.Q0      = h5readatt(filename,'/data/input','q0');
-DATA.SHEAR   = h5readatt(filename,'/data/input','shear');
-DATA.EPS     = h5readatt(filename,'/data/input','eps');
-DATA.PMAXI   = h5readatt(filename,'/data/input','pmaxi');
-DATA.JMAXI   = h5readatt(filename,'/data/input','jmaxi');
-DATA.PMAXE   = h5readatt(filename,'/data/input','pmaxe');
-DATA.JMAXE   = h5readatt(filename,'/data/input','jmaxe');
-% DATA.LINEARITY = h5readatt(filename,'/data/input','NON_LIN');
-% DATA.LINEARITY = h5readatt(filename,'/data/input','LINEARITY');
-DATA.NU      = h5readatt(filename,'/data/input','nu');
-DATA.Nx      = h5readatt(filename,'/data/input','Nx');
-DATA.Ny      = h5readatt(filename,'/data/input','Ny');
-DATA.L       = h5readatt(filename,'/data/input','Lx');
-DATA.CLOS    = h5readatt(filename,'/data/input','CLOS');
-DATA.DT_SIM  = h5readatt(filename,'/data/input','dt');
-DATA.MU      = h5readatt(filename,'/data/input','mu');
-DATA.MUx     = h5readatt(filename,'/data/input','mu_x');
-DATA.MUy     = h5readatt(filename,'/data/input','mu_y');
-DATA.MUz     = h5readatt(filename,'/data/input','mu_z');
-try
-    DATA.BETA    = h5readatt(filename,'/data/input','beta');
-catch
-    DATA.BETA  = 0;
-end
-DATA.W_GAMMA   = h5readatt(filename,'/data/input','write_gamma') == 'y';
-DATA.W_PHI     = h5readatt(filename,'/data/input','write_phi')   == 'y';
-DATA.W_NA00    = h5readatt(filename,'/data/input','write_Na00')  == 'y';
-DATA.W_NAPJ    = h5readatt(filename,'/data/input','write_Napj')  == 'y';
-DATA.W_SAPJ    = h5readatt(filename,'/data/input','write_Sapj')  == 'y';
+DATA.Na      = h5readatt(filename,'/data/input/model','Na');
+DATA.NU      = h5readatt(filename,'/data/input/model','nu');
+DATA.MUp     = h5readatt(filename,'/data/input/model','mu_p');
+DATA.MUj     = h5readatt(filename,'/data/input/model','mu_j');
+DATA.MUx     = h5readatt(filename,'/data/input/model','mu_x');
+DATA.MUy     = h5readatt(filename,'/data/input/model','mu_y');
+DATA.MUz     = h5readatt(filename,'/data/input/model','mu_z');
+DATA.LINEARITY = h5readatt(filename,'/data/input/model','LINEARITY');
+DATA.BETA    = h5readatt(filename,'/data/input/model','beta');
+DATA.TAU_E   = h5readatt(filename,'/data/input/model','tau_e');
+DATA.HYP_V   = h5readatt(filename,'/data/input/model','HYP_V');
+DATA.K_cB    = h5readatt(filename,'/data/input/model','k_cB');
+DATA.K_gB    = h5readatt(filename,'/data/input/model','k_gB');
 
-% if DATA.LINEARITY == 'y'
-%     DATA.LINEARITY = 1;
-% else
-%     DATA.LINEARITY = 0;
-% end
+DATA.W_GAMMA   = h5readatt(filename,'/data/input/diag_par','write_gamma') == 'y';
+DATA.W_PHI     = h5readatt(filename,'/data/input/diag_par','write_phi')   == 'y';
+DATA.W_NA00    = h5readatt(filename,'/data/input/diag_par','write_Na00')  == 'y';
+DATA.W_NAPJ    = h5readatt(filename,'/data/input/diag_par','write_Napj')  == 'y';
+DATA.W_SAPJ    = h5readatt(filename,'/data/input/diag_par','write_Sapj')  == 'y';
 
+% Species dependent parameters
+DATA.sigma = zeros(1,DATA.Na);
+DATA.tau   = zeros(1,DATA.Na);
+DATA.q     = zeros(1,DATA.Na);
+DATA.K_N   = zeros(1,DATA.Na);
+DATA.K_T   = zeros(1,DATA.Na);
+spnames = {'ions','electrons'};
+for ia=1:DATA.Na
+    spdata = ['/data/input/',spnames{ia}];
+    DATA.sigma(ia) = h5readatt(filename,spdata,'sigma');
+    DATA.tau(ia)   = h5readatt(filename,spdata,'tau');
+    DATA.q(ia)     = h5readatt(filename,spdata,'q');
+    DATA.K_N(ia)   = h5readatt(filename,spdata,'k_N');
+    DATA.K_T(ia)   = h5readatt(filename,spdata,'k_T');
+end
+DATA.spnames = spnames{1:DATA.Na};
 DATA.CONAME = DATA.CO;
 
 if    (DATA.CLOS == 0); DATA.CLOSNAME = 'Trunc.';
 elseif(DATA.CLOS == 1); DATA.CLOSNAME = 'Clos. 1';
 elseif(DATA.CLOS == 2); DATA.CLOSNAME = 'Clos. 2';
 end
-if (DATA.PMAXE == DATA.PMAXI) && (DATA.JMAXE == DATA.JMAXI)
-    degngrad   = ['P_',num2str(DATA.PMAXE),'_J_',num2str(DATA.JMAXE)];
-else
-    degngrad   = ['Pe_',num2str(DATA.PMAXE),'_Je_',num2str(DATA.JMAXE),...
-        '_Pi_',num2str(DATA.PMAXI),'_Ji_',num2str(DATA.JMAXI)];
-end
+
+degngrad   = ['P_',num2str(DATA.PMAX),'_J_',num2str(DATA.JMAX)];
+
 degngrad = [degngrad,'_Kni_%1.1f_nu_%0.0e_',...
         DATA.CONAME,'_CLOS_',num2str(DATA.CLOS),'_mu_%0.0e'];
-degngrad   = sprintf(degngrad,[DATA.K_N,DATA.NU,DATA.MU]);
+degngrad   = sprintf(degngrad,[DATA.K_N,DATA.NU,DATA.MUx]);
 % if ~DATA.LINEARITY; degngrad = ['lin_',degngrad]; end
 resolution = [num2str(DATA.Nx),'x',num2str(DATA.Ny),'_'];
 gridname   = ['L_',num2str(DATA.L),'_'];
diff --git a/matlab/plot/plot_radial_transport_and_spacetime.m b/matlab/plot/plot_radial_transport_and_spacetime.m
index 098eae036c1b7109e6177d3a20b3aa8637c134ef..3ee858a779b539957b602fd00cbb197fdb1a4f68 100644
--- a/matlab/plot/plot_radial_transport_and_spacetime.m
+++ b/matlab/plot/plot_radial_transport_and_spacetime.m
@@ -1,4 +1,4 @@
-function [FIGURE] = plot_radial_transport_and_spacetime(DATA, OPTIONS,CODE)
+function [FIGURE] = plot_radial_transport_and_spacetime(DATA, OPTIONS)
     %Compute steady radial transport
     tend = OPTIONS.TAVG_1; tstart = OPTIONS.TAVG_0;
     [~,its0D] = min(abs(DATA.Ts0D-tstart));
@@ -11,11 +11,11 @@ function [FIGURE] = plot_radial_transport_and_spacetime(DATA, OPTIONS,CODE)
     Qx_infty_avg = mean(DATA.HFLUX_X(its0D:ite0D))*SCALE;
     Qx_infty_std = std (DATA.HFLUX_X(its0D:ite0D))*SCALE;
 %     disp(['Q_x=',sprintf('%2.2e',Qx_infty_avg),'+-',sprintf('%2.2e',Qx_infty_std)]);
-    f_avg_z      = squeeze(mean(DATA.PHI(:,:,:,:),3));
-    [~,ikzf] = max(squeeze(mean(abs(f_avg_z(1,:,its3D:ite3D)),3)));
-    ikzf = min([ikzf,DATA.Nky]);
-    Ns3D = numel(DATA.Ts3D);
-    [KX, KY] = meshgrid(DATA.kx, DATA.ky);
+%     f_avg_z      = squeeze(mean(DATA.PHI(:,:,:,:),3));
+%     [~,ikzf] = max(squeeze(mean(abs(f_avg_z(1,:,its3D:ite3D)),3)));
+%     ikzf = min([ikzf,DATA.Nky]);
+%     Ns3D = numel(DATA.Ts3D);
+%     [KX, KY] = meshgrid(DATA.kx, DATA.ky);
     %% error estimation
     DT_       = (tend-tstart)/OPTIONS.NCUT;
     Qx_ee     = zeros(1,OPTIONS.NCUT);
@@ -27,42 +27,36 @@ function [FIGURE] = plot_radial_transport_and_spacetime(DATA, OPTIONS,CODE)
     Qx_avg    = mean(Qx_ee);
     Qx_err    =  std(Qx_ee);
     disp(['Q_avg=',sprintf('%2.2e',Qx_avg),'+-',sprintf('%2.2e',Qx_err)]);
-    %% computations
-
-    % Compute zonal and non zonal energies
-    E_Zmode_SK       = zeros(1,Ns3D);
-    E_NZmode_SK      = zeros(1,Ns3D);
-    for it = 1:numel(DATA.Ts3D)
-        E_Zmode_SK(it)   = squeeze(DATA.ky(ikzf).^2.*abs(squeeze(f_avg_z(ikzf,1,it))).^2);
-        E_NZmode_SK(it)  = squeeze(sum(sum(((1+KX.^2+KY.^2).*abs(squeeze(f_avg_z(:,:,it))).^2.*(KY~=0)))));
-    end
-    % Compute thermodynamic entropy Eq.(5) Navarro et al. 2012 PoP
-    % 1/2 sum_p sum_j Napj^2(k=0) (avg z)
-    switch CODE
-        case 'GYACOMO'
-        Nipjz = sum(sum(sum(sum(conj(DATA.Nipj).*DATA.Nipj))));
-        ff = trapz(DATA.z,Nipjz,5);
-        E_TE = 0.5*squeeze(ff);
-        % Compute electrostatic energy
-        E_ES = zeros(size(DATA.Ts5D));
-        bi = sqrt(KX.^2+KY.^2)*DATA.sigma_i*sqrt(2*DATA.tau_i); %argument of the kernel
-        for it5D = 1:numel(DATA.Ts5D)
-            [~,it3D] = min(abs(DATA.Ts3D-DATA.Ts5D(it5D)));
-            for in = 1:DATA.Jmaxi
-                Knphi = kernel(in-1,bi).*squeeze(trapz(DATA.z,DATA.PHI(:,:,:,it3D),3));
-                Ni0n_z= squeeze(trapz(DATA.z,DATA.Nipj(1,in,:,:,:,it5D),5));
-                E_ES(it5D) = 0.5*sum(sum(abs(conj(Knphi).*Ni0n_z)));
-            end
-        end
-        otherwise
-            E_TE = 0; E_ES =0; DATA.Ts5D =[0 1];
-    end
-
+%     %% computations
+% 
+%     % Compute zonal and non zonal energies
+%     E_Zmode_SK       = zeros(1,Ns3D);
+%     E_NZmode_SK      = zeros(1,Ns3D);
+%     for it = 1:numel(DATA.Ts3D)
+%         E_Zmode_SK(it)   = squeeze(DATA.ky(ikzf).^2.*abs(squeeze(f_avg_z(ikzf,1,it))).^2);
+%         E_NZmode_SK(it)  = squeeze(sum(sum(((1+KX.^2+KY.^2).*abs(squeeze(f_avg_z(:,:,it))).^2.*(KY~=0)))));
+%     end
+%     % Compute thermodynamic entropy Eq.(5) Navarro et al. 2012 PoP
+%     % 1/2 sum_p sum_j Napj^2(k=0) (avg z)
+%     Nipjz = sum(sum(sum(sum(conj(DATA.Nipj).*DATA.Nipj))));
+%     ff = trapz(DATA.z,Nipjz,5);
+%     E_TE = 0.5*squeeze(ff);
+%     % Compute electrostatic energy
+%     E_ES = zeros(size(DATA.Ts5D));
+%     bi = sqrt(KX.^2+KY.^2)*DATA.sigma(1)*sqrt(2*DATA.tau(1)); %argument of the kernel
+%     for it5D = 1:numel(DATA.Ts5D)
+%         [~,it3D] = min(abs(DATA.Ts3D-DATA.Ts5D(it5D)));
+%         for in = 1:DATA.Jmaxi
+%             Knphi = kernel(in-1,bi).*squeeze(trapz(DATA.z,DATA.PHI(:,:,:,it3D),3));
+%             Ni0n_z= squeeze(trapz(DATA.z,DATA.Nipj(1,in,:,:,:,it5D),5));
+%             E_ES(it5D) = 0.5*sum(sum(abs(conj(Knphi).*Ni0n_z)));
+%         end
+%     end
 %% Figure    
 clr_ = lines(20);
 mvm = @(x) movmean(x,OPTIONS.NMVA);
     FIGURE.fig = figure; FIGURE.FIGNAME = ['ZF_transport_drphi','_',DATA.PARAMS]; %set(gcf, 'Position',  [500, 1000, 1000, 600])
-    FIGURE.ax1 = subplot(3,1,1,'parent',FIGURE.fig);
+    FIGURE.ax1 = subplot(2,1,1,'parent',FIGURE.fig);
         plot(mvm(DATA.Ts0D),mvm(DATA.PGAMMA_RI*SCALE),'--',...
             'color',clr_((DATA.Pmaxi-1)/2,:),...
             'DisplayName',['$\Gamma_x$ ',DATA.paramshort]); hold on;
@@ -86,16 +80,16 @@ mvm = @(x) movmean(x,OPTIONS.NMVA);
     title({DATA.param_title,...
         ['$\Gamma^{\infty} = $',num2str(Gx_infty_avg),'$, Q^{\infty} = $',num2str(Qx_infty_avg)]});
     
- %% Free energy    
-    FIGURE.ax2 = subplot(3,1,2,'parent',FIGURE.fig);
-    yyaxis left
-        plot(DATA.Ts5D,E_TE,'DisplayName','$\epsilon_f$'); hold on;
-        ylabel('Entropy');%('$\epsilon_f$')
-    yyaxis right
-        plot(DATA.Ts5D,E_ES,'DisplayName','$\epsilon_\phi$');
-        ylabel('ES energy');%('$\epsilon_\phi$')
-        xlim([DATA.Ts5D(1), DATA.Ts5D(end)]);
-        xlabel('$t c_s/R$'); grid on; set(gca,'xticklabel',[]);% xlim([0 500]);
+%  %% Free energy    
+%     FIGURE.ax2 = subplot(3,1,2,'parent',FIGURE.fig);
+%     yyaxis left
+%         plot(DATA.Ts5D,E_TE,'DisplayName','$\epsilon_f$'); hold on;
+%         ylabel('Entropy');%('$\epsilon_f$')
+%     yyaxis right
+%         plot(DATA.Ts5D,E_ES,'DisplayName','$\epsilon_\phi$');
+%         ylabel('ES energy');%('$\epsilon_\phi$')
+%         xlim([DATA.Ts5D(1), DATA.Ts5D(end)]);
+%         xlabel('$t c_s/R$'); grid on; set(gca,'xticklabel',[]);% xlim([0 500]);
 %% radial shear radial profile
     % computation
     Ns3D = numel(DATA.Ts3D);
@@ -114,7 +108,7 @@ mvm = @(x) movmean(x,OPTIONS.NMVA);
     f2plot = toplot.FIELD;
     dframe = ite3D - its3D;
     clim = max(max(max(abs(plt(f2plot(:,:,:))))));
-    FIGURE.ax3 = subplot(3,1,3,'parent',FIGURE.fig);
+    FIGURE.ax3 = subplot(2,1,2,'parent',FIGURE.fig);
         [TY,TX] = meshgrid(DATA.x,DATA.Ts3D(toplot.FRAMES));
         pclr = pcolor(TX,TY,squeeze(plt(f2plot))'); 
         set(pclr, 'edgecolor','none'); 
diff --git a/matlab/profiler.m b/matlab/profiler.m
index db06c080792d27b42523e36bfe1173e761da0880..5056f61e4e709e2f65f094e9dd41484c57c8bfb0 100644
--- a/matlab/profiler.m
+++ b/matlab/profiler.m
@@ -1,48 +1,55 @@
+function [] = profiler(data)
 %% load profiling
 % filename = sprintf([BASIC.RESDIR,'outputs_%.2d.h5'],00);
 % outfilename = ['/misc/HeLaZ_outputs',filename(3:end)];
-outfilename = data.outfilenames{end};
-CPUTIME   = double(h5readatt(outfilename,'/data/input','cpu_time'));
-DT_SIM    = h5readatt(outfilename,'/data/input','dt');
+CPUTI=[]; DTSIM=[]; RHSTC=[]; POITC=[]; SAPTC=[]; COLTC=[];
+GRATC=[]; NADTC=[];  ADVTC=[]; GHOTC=[]; CLOTC=[]; CHKTC=[];
+DIATC=[]; STETC=[]; TS0TC=[];
+for i = 1:numel(data.outfilenames)
+    outfilename = data.outfilenames{i};
+    CPUTI = [ CPUTI; double(h5readatt(outfilename,'/data/input','cpu_time'))];
+    DTSIM = [ DTSIM; h5readatt(outfilename,'/data/input/basic','dt')];
 
+    RHSTC = [ RHSTC; h5read(outfilename,'/profiler/Tc_rhs')];
+    POITC = [ POITC; h5read(outfilename,'/profiler/Tc_poisson')];
+    SAPTC = [ SAPTC; h5read(outfilename,'/profiler/Tc_Sapj')];
+    COLTC = [ COLTC; h5read(outfilename,'/profiler/Tc_coll')];
+    GRATC = [ GRATC; h5read(outfilename,'/profiler/Tc_grad')];
+    NADTC = [ NADTC; h5read(outfilename,'/profiler/Tc_nadiab')];
+    ADVTC = [ ADVTC; h5read(outfilename,'/profiler/Tc_adv_field')];
+    GHOTC = [ GHOTC; h5read(outfilename,'/profiler/Tc_ghost')];
+    CLOTC = [ CLOTC; h5read(outfilename,'/profiler/Tc_clos')];
+    CHKTC = [ CHKTC; h5read(outfilename,'/profiler/Tc_checkfield')];
+    DIATC = [ DIATC; h5read(outfilename,'/profiler/Tc_diag')];
+    STETC = [ STETC; h5read(outfilename,'/profiler/Tc_step')];
+    TS0TC = [ TS0TC; h5read(outfilename,'/profiler/time')];
+end
 
-rhs_Tc       = h5read(outfilename,'/profiler/Tc_rhs');
-poisson_Tc   = h5read(outfilename,'/profiler/Tc_poisson');
-Sapj_Tc      = h5read(outfilename,'/profiler/Tc_Sapj');
-coll_Tc      = h5read(outfilename,'/profiler/Tc_coll');
-process_Tc   = h5read(outfilename,'/profiler/Tc_process');
-adv_field_Tc = h5read(outfilename,'/profiler/Tc_adv_field');
-ghost_Tc      = h5read(outfilename,'/profiler/Tc_ghost');
-clos_Tc      = h5read(outfilename,'/profiler/Tc_clos');
-checkfield_Tc= h5read(outfilename,'/profiler/Tc_checkfield');
-diag_Tc      = h5read(outfilename,'/profiler/Tc_diag');
-step_Tc      = h5read(outfilename,'/profiler/Tc_step');
-Ts0D         = h5read(outfilename,'/profiler/time');
-
-N_T          = 11;
+N_T          = 12;
 
-missing_Tc   = step_Tc - rhs_Tc - adv_field_Tc - ghost_Tc -clos_Tc ...
-              -coll_Tc -poisson_Tc -Sapj_Tc -checkfield_Tc -diag_Tc-process_Tc;
-total_Tc     = step_Tc;
+missing_Tc   = STETC - RHSTC - ADVTC - GHOTC - CLOTC ...
+              -COLTC - POITC - SAPTC - CHKTC - DIATC - GRATC - NADTC;
+total_Tc     = STETC;
 
-TIME_PER_FCT = [diff(rhs_Tc); diff(adv_field_Tc); diff(ghost_Tc);...
-    diff(clos_Tc); diff(coll_Tc); diff(poisson_Tc); diff(Sapj_Tc); ...
-    diff(checkfield_Tc); diff(diag_Tc); diff(process_Tc); diff(missing_Tc)];
+TIME_PER_FCT = [diff(RHSTC); diff(ADVTC); diff(GHOTC);...
+    diff(CLOTC); diff(COLTC); diff(POITC); diff(SAPTC); ...
+    diff(CHKTC); diff(DIATC); diff(GRATC); diff(NADTC); diff(missing_Tc)];
 TIME_PER_FCT = reshape(TIME_PER_FCT,[numel(TIME_PER_FCT)/N_T,N_T]);
 
 TIME_PER_STEP = sum(TIME_PER_FCT,2);
-TIME_PER_CPU  = trapz(Ts0D(2:end),TIME_PER_STEP);
+TIME_PER_CPU  = trapz(TS0TC(2:end),TIME_PER_STEP);
 
-rhs_Ta        = mean(diff(rhs_Tc));
-adv_field_Ta  = mean(diff(adv_field_Tc));
-ghost_Ta      = mean(diff(ghost_Tc));
-clos_Ta       = mean(diff(clos_Tc));
-coll_Ta       = mean(diff(coll_Tc));
-poisson_Ta    = mean(diff(poisson_Tc));
-Sapj_Ta       = mean(diff(Sapj_Tc));
-checkfield_Ta = mean(diff(checkfield_Tc));
-process_Ta    = mean(diff(process_Tc));
-diag_Ta       = mean(diff(diag_Tc));
+rhs_Ta        = mean(diff(RHSTC));
+adv_field_Ta  = mean(diff(ADVTC));
+ghost_Ta      = mean(diff(GHOTC));
+clos_Ta       = mean(diff(CLOTC));
+coll_Ta       = mean(diff(COLTC));
+poisson_Ta    = mean(diff(POITC));
+Sapj_Ta       = mean(diff(SAPTC));
+checkfield_Ta = mean(diff(CHKTC));
+grad_Ta       = mean(diff(GRATC));
+nadiab_Ta     = mean(diff(NADTC));
+diag_Ta       = mean(diff(DIATC));
 miss_Ta       = mean(diff(missing_Tc));
 total_Ta      = mean(diff(total_Tc));
 names = {...
@@ -50,17 +57,18 @@ names = {...
     'Advf';
     'Ghst';
     'Clos';
-    'Coll';
+    'Capj';
     'Pois';
     'Sapj';
     'Chck';
     'Diag';
-    'Proc';
+    'Grad';
+    'napj';
     'Miss';
 };
 Ts_A = [rhs_Ta adv_field_Ta ghost_Ta clos_Ta coll_Ta...
-    poisson_Ta Sapj_Ta checkfield_Ta diag_Ta process_Ta miss_Ta];
-NSTEP_PER_SAMP= mean(diff(Ts0D))/DT_SIM;
+    poisson_Ta Sapj_Ta checkfield_Ta diag_Ta grad_Ta  nadiab_Ta miss_Ta];
+NSTEP_PER_SAMP= mean(diff(TS0TC))/DTSIM;
 
 %% Plots
 if 1
@@ -69,7 +77,7 @@ fig = figure;
 % colors = rand(N_T,3);
 % colors = lines(N_T);
 colors = distinguishable_colors(N_T);
-x_ = Ts0D(2:end);
+x_ = TS0TC(2:end);
 y_ = TIME_PER_FCT;
 xx_= zeros(2*numel(x_),1);
 yy_= zeros(2*numel(x_),numel(names));
@@ -81,23 +89,23 @@ for i = 2:numel(x_)
     dx = x_(i) - x_(i-1);
     xx_(2*i-1) = x_(i)-dx/2;
     xx_(2*i  ) = x_(i)+dx/2;
-    yy_(2*i-1,:) = y_(i,:)/(dx/DT_SIM);
-    yy_(2*i  ,:) = y_(i,:)/(dx/DT_SIM);
+    yy_(2*i-1,:) = y_(i,:)/(dx/DTSIM);
+    yy_(2*i  ,:) = y_(i,:)/(dx/DTSIM);
 end
 p1 = area(xx_,yy_,'LineStyle','none');
 for i = 1:N_T; p1(i).FaceColor = colors(i,:);
 %     LEGEND{i} = sprintf('%s t=%1.1e[s] (%0.1f %s)',names{i},Ts_A(i),Ts_A(i)/total_Tc(end)*100,'\%');
     LEGEND{i} = [names{i},' $\hat t=$',sprintf('%1.1e[s] (%0.1f %s)',Ts_A(i)/NSTEP_PER_SAMP,Ts_A(i)/total_Ta*100,'\%')];
 end;
-legend(LEGEND);
+legend(LEGEND,'Location','bestoutside');
 % legend('Compute RHS','Adv. fields','ghosts comm', 'closure', 'collision','Poisson','Nonlin','Check+sym', 'Diagnos.', 'Process', 'Missing')
-xlabel('t. u.'); ylabel('Step Comp. Time [s]')
-xlim([Ts0D(2),Ts0D(end)]);
-ylim([0, 1.1*CPUTIME/(Ts0D(end)/DT_SIM)])
-h_ = floor(CPUTIME/3600);
-m_ = floor(floor(CPUTIME/60)-60*h_);
-s_ = CPUTIME - 3600*h_ - 60*m_;
-title(sprintf('Gyacomo 1 (%.0f [h] ~%.0f [min] ~%.0f [s])',...
+xlabel('Sim. Time [$\rho_s/c_s$]'); ylabel('Step Comp. Time [s]')
+xlim([TS0TC(2),TS0TC(end)]);
+ylim([0, 1.1*CPUTI/(TS0TC(end)/DTSIM)])
+h_ = floor(CPUTI/3600);
+m_ = floor(floor(CPUTI/60)-60*h_);
+s_ = CPUTI - 3600*h_ - 60*m_;
+title(sprintf('Gyacomo 2 (%.0f [h] ~%.0f [min] ~%.0f [s])',...
     h_,m_,s_))
 hold on
 FIGNAME = 'profiler';
@@ -147,4 +155,5 @@ xlabel('Step Comp. Time [s]'); ylabel('')
 set(gca,'Xscale','log')
 FIGNAME = 'profiler';
 % save_figure
+end
 end
\ No newline at end of file
diff --git a/matlab/setup.m b/matlab/setup.m
index 468f08c57ea4feab7a6d2d3ce9319d126347fe6d..aa1c410ad82fe19fbfbeee48503839deda3cb3dc 100644
--- a/matlab/setup.m
+++ b/matlab/setup.m
@@ -58,8 +58,8 @@ MODEL.k_cB   = k_cB;      % Magnetic curvature
 MODEL.lambdaD = LAMBDAD;
 % Collision parameters
 COLL.collision_model = ['''',CO,''''];
-if (GKCO); COLL.gyrokin_CO = '.true.'; else; COLL.gyrokin_CO = '.false.';end;
-if (ABCO); COLL.interspecies = '.true.'; else; COLL.interspecies = '.false.';end;
+if (GKCO); COLL.GK_CO = '.true.'; else; COLL.GK_CO = '.false.';end;
+if (ABCO); COLL.INTERSPECIES = '.true.'; else; COLL.INTERSPECIES = '.false.';end;
 COLL.mat_file   = '''null''';
 switch CO
     case 'SG'
diff --git a/matlab/write_fort90.m b/matlab/write_fort90.m
index 54e7f08b71adccef5c9afbd3bd596c64ed63fdb9..23bbd924d1d4ce8c384719e89753396c321791e0 100644
--- a/matlab/write_fort90.m
+++ b/matlab/write_fort90.m
@@ -86,8 +86,8 @@ fprintf(fid,'/\n');
 
 fprintf(fid,'&COLLISION_PAR\n');
 fprintf(fid,['  collision_model = ', COLL.collision_model,'\n']);
-fprintf(fid,['  gyrokin_CO      = ', COLL.gyrokin_CO,'\n']);
-fprintf(fid,['  interspecies    = ', COLL.interspecies,'\n']);
+fprintf(fid,['  GK_CO      = ', COLL.GK_CO,'\n']);
+fprintf(fid,['  INTERSPECIES    = ', COLL.INTERSPECIES,'\n']);
 fprintf(fid,['  mat_file        = ', COLL.mat_file,'\n']);
 fprintf(fid,['  collision_kcut  = ', num2str(COLL.coll_kcut),'\n']);
 fprintf(fid,'/\n');
diff --git a/scripts/job_pipeline.sh b/scripts/job_pipeline.sh
deleted file mode 100644
index 7cf4f3348f9b3447b512c73de67e0bd6d6be957a..0000000000000000000000000000000000000000
--- a/scripts/job_pipeline.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#! /bin/bash
-#This script automatizes the launch of multiple job with chain dependency. 
-nu_=0.01
-dnu=0.01
-
-Tm_=2000
-dTm=2000
-
-# First submit
-lastji=$(sbatch submit_00.cmd)
-lastjid=${lastji##* }
-echo $lastji
-#echo $lastjid
-
-for i in {1..1}; do
-    # Setup indices of job id (current and previous one)
-    im1=$(awk "BEGIN {print $i-1}")
-    idm1=$(printf '%02d' $im1)
-    id=$(printf '%02d' $i)
-    
-    # Create new submit file from older one
-    awk -v "ID=$id" '{ 
-            if (NR == 8) print "#SBATCH --error=err_"ID".txt";
-            else if (NR == 9) print "#SBATCH --output=out_"ID".txt";
-            else if (NR == 12) print "srun --cpu-bind=cores ./gyacomo 2 24 1 "ID;
-            else print $0}' submit_$idm1.cmd > submit_$id.cmd
- 
-    # Create new fort file from older one
-    awk -v "NU=$nu_" -v "TM=$Tm_" -v "J2L=$im1" '{ 
-            if (NR == 04) print "  tmax       = "TM;
-       else if (NR == 40) print "  job2load   = "J2L;
-       else if (NR == 54) print "  nu         = "NU;
-       else print $0}' fort_$idm1.90 > fort_$id.90
-    
-    # Retrieve last jobid and launch next job with dep
-    lastji=$(sbatch --dependency=afterok:$lastjid submit_0$i.cmd)
-    lastjid=${lastji##* }
-    echo $lastjid
-    
-    # Increment variables
-    nu_=$(awk "BEGIN {print $nu_+$dnu}")
-    Tm_=$(awk "BEGIN {print $Tm_+$dTm}")
-done
-
diff --git a/scripts/scan_kN_pipeline.sh b/scripts/scan_kN_pipeline.sh
deleted file mode 100644
index 8b9cef7e67996733bebf806abc7ad0e4f3987d8f..0000000000000000000000000000000000000000
--- a/scripts/scan_kN_pipeline.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#! /bin/bash
-# lastjid=$(sbatch submit_00.cmd)
-
-nu_=0.01
-dnu=0.0
-kn_=1.7
-dkn=0.2
-kt_=0.425
-dkt=0.05
-Lx_=150
-dLx=030
-
-Tm_=4000
-dTm=1000
-
-# First submit
-istart=0
-lastji=$(sbatch submit_0$istart.cmd)
-lastjid=${lastji##* }
-echo $lastji
-
-for i in {1..5}; do
-    # Setup indices of job id (current and previous one)
-    im1=$(awk "BEGIN {print $i-1}")
-    idm1=$(printf '%02d' $im1)
-    id=$(printf '%02d' $i)
-    
-    # Create new submit file from older one
-    awk -v "ID=$id" '{ 
-            if (NR == 8) print "#SBATCH --error=err_"ID".txt";
-            else if (NR == 9) print "#SBATCH --output=out_"ID".txt";
-            else if (NR == 12) print "srun --cpu-bind=cores ./gyacomo 2 24 1 "ID;
-            else print $0}' submit_$idm1.cmd > submit_$id.cmd
- 
-    # Create new fort file from older one
-    awk -v "NU=$nu_" -v "TM=$Tm_" -v "J2L=$im1" -v "KN=$kn_" -v "KT=$kt_" -v "LX=$Lx_" '{ 
-            if (NR == 04) print "  tmax       = "TM;
-       else if (NR == 40) print "  job2load   = "J2L;
-       else if (NR == 13) print "  Lx         = "LX;
-       else if (NR == 54) print "  nu         = "NU;
-       else if (NR == 61) print "  K_Ne       = "KN;
-       else if (NR == 62) print "  K_Ni       = "KN;
-       else if (NR == 63) print "  K_Te       = "KT;
-       else if (NR == 64) print "  K_Ti       = "KT;
-       else print $0}' fort_$idm1.90 > fort_$id.90
-    
-    # Retrieve last jobid and launch next job with dep
-    lastji=$(sbatch --dependency=afterok:$lastjid submit_$id.cmd)
-    lastjid=${lastji##* }
-    echo $lastjid
-    
-    # Increment variables
-    Lx_=$(awk "BEGIN {print $Lx_+$dLx}")
-    nu_=$(awk "BEGIN {print $nu_+$dnu}")
-    kn_=$(awk "BEGIN {print $kn_+$dkn}")    
-    kt_=$(awk "BEGIN {print $kt_+$dkt}")
-    Tm_=$(awk "BEGIN {print $Tm_+$dTm}")
-done
-
diff --git a/src/advance_field_mod.F90 b/src/advance_field_mod.F90
index 5fd45d9f0976ef9ded8a1ed3c14c1965feaab19c..9ee8c4be44c09daff72b78a13fff0815c6aaea09 100644
--- a/src/advance_field_mod.F90
+++ b/src/advance_field_mod.F90
@@ -6,77 +6,66 @@ implicit none
 CONTAINS
 
   SUBROUTINE advance_time_level
-
-    USE basic
-    USE time_integration
-    use prec_const
+    USE time_integration, ONLY :set_updatetlevel, updatetlevel, ntimelevel
     IMPLICIT NONE
     CALL set_updatetlevel(mod(updatetlevel,ntimelevel)+1)
   END SUBROUTINE advance_time_level
 
   SUBROUTINE advance_moments
 
-    USE basic
-    USE time_integration
-    USE grid
-    use prec_const
-    USE model,  ONLY: CLOS, KIN_E
-    use fields, ONLY: moments_e,     moments_i
-    use array,  ONLY: moments_rhs_e, moments_rhs_i
+    USE basic,  ONLY: dt
+    USE grid,   ONLY:local_na,local_np,local_nj,local_nky,local_nkx,local_nz,&
+                     ngp, ngj, ngz
+    USE closure,ONLY: evolve_mom
+    use fields, ONLY: moments
+    use array,  ONLY: moments_rhs
+    USE time_integration, ONLY: updatetlevel, A_E, b_E, ntimelevel
     IMPLICIT NONE
-    INTEGER :: p_int, j_int
-
-    CALL cpu_time(t0_adv_field)
-    IF(KIN_E) THEN
-      DO ip=ips_e,ipe_e
-        p_int = parray_e(ip)
-        DO ij=ijs_e,ije_e
-          j_int = jarray_e(ij)
-          IF((CLOS .NE. 1) .OR. (p_int+2*j_int .LE. dmaxe))&
-          CALL advance_field(moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izs:ize,:), moments_rhs_e(ip,ij,ikys:ikye,ikxs:ikxe,izs:ize,:))
-        ENDDO
-      ENDDO
-    ENDIF
-    DO ip=ips_i,ipe_i
-      p_int = parray_i(ip)
-      DO ij=ijs_i,ije_i
-        j_int = jarray_i(ij)
-        IF((CLOS .NE. 1) .OR. (p_int+2*j_int .LE. dmaxi))&
-        CALL advance_field(moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izs:ize,:), moments_rhs_i(ip,ij,ikys:ikye,ikxs:ikxe,izs:ize,:))
-      ENDDO
-    ENDDO
-    ! Execution time end
-    CALL cpu_time(t1_adv_field)
-    tc_adv_field = tc_adv_field + (t1_adv_field - t0_adv_field)
-  END SUBROUTINE advance_moments
-
-
-  SUBROUTINE advance_field( f, f_rhs )
-
-    USE basic
-    USE time_integration
-    USE array
-    USE grid
-    use prec_const
-    IMPLICIT NONE
-
-    COMPLEX(dp), DIMENSION ( ikys:ikye, ikxs:ikxe, izs:ize, ntimelevel ) :: f
-    COMPLEX(dp), DIMENSION ( ikys:ikye, ikxs:ikxe, izs:ize, ntimelevel ) :: f_rhs
-    INTEGER :: istage
-
+    INTEGER :: ia, ip, ij, ikx,iky,iz, istage, ipi, iji, izi
     SELECT CASE (updatetlevel)
     CASE(1)
-        DO istage=1,ntimelevel
-          f(ikys:ikye,ikxs:ikxe,izs:ize,1) = f(ikys:ikye,ikxs:ikxe,izs:ize,1) &
-                   + dt*b_E(istage)*f_rhs(ikys:ikye,ikxs:ikxe,izs:ize,istage)
-        END DO
+      DO istage=1,ntimelevel
+      DO iz    =1,local_nz
+        izi = iz+ngz/2
+      DO ikx   =1,local_nkx
+      DO iky   =1,local_nky
+      DO ij    =1,local_nj
+        iji = ij+ngj/2
+      DO ip    =1,local_np
+        ipi = ip+ngp/2
+      DO ia    =1,local_na
+        IF( evolve_mom(ipi,iji) )&
+        moments(ia,ipi,iji,iky,ikx,izi,1) = moments(ia,ipi,iji,iky,ikx,izi,1) &
+               + dt*b_E(istage)*moments_rhs(ia,ip,ij,iky,ikx,iz,istage)
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
     CASE DEFAULT
-        f(ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel) = f(ikys:ikye,ikxs:ikxe,izs:ize,1);
-        DO istage=1,updatetlevel-1
-          f(ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel) = f(ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel) + &
-                            dt*A_E(updatetlevel,istage)*f_rhs(ikys:ikye,ikxs:ikxe,izs:ize,istage)
-        END DO
+      moments(:,:,:,:,:,:,updatetlevel) = moments(:,:,:,:,:,:,1);
+      DO istage=1,ntimelevel-1
+      DO iz    =1,local_nz
+        izi = iz+ngz/2
+      DO ikx   =1,local_nkx
+      DO iky   =1,local_nky
+      DO ij    =1,local_nj
+        iji = ij+ngj/2
+      DO ip    =1,local_np
+        ipi = ip+ngp/2
+      DO ia    =1,local_na
+        IF( evolve_mom(ipi,iji) )&
+        moments(ia,ipi,iji,iky,ikx,izi,updatetlevel) = moments(ia,ipi,iji,iky,ikx,izi,updatetlevel) + &
+                          dt*A_E(updatetlevel,istage)*moments_rhs(ia,ip,ij,iky,ikx,iz,istage)
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
+      END DO
     END SELECT
-  END SUBROUTINE advance_field
-
+  END SUBROUTINE advance_moments
 END MODULE advance_field_routine
diff --git a/src/array_mod.F90 b/src/array_mod.F90
index 213c108be9650f8109646c20165a7681ab7fdcf0..749167837c967c7475c6d9a92812057bfce50576 100644
--- a/src/array_mod.F90
+++ b/src/array_mod.F90
@@ -3,98 +3,66 @@ MODULE array
   use prec_const
   implicit none
 
-  ! Arrays to store the rhs, for time integration (ip,ij,iky,ikx,iz,updatetlevel)
-  COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_rhs_e
-  COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_rhs_i
+  ! Arrays to store the rhs, for time integration (ia,ip,ij,iky,ikx,iz)
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:,:), ALLOCATABLE :: moments_rhs
 
   ! Arrays of non-adiabatique moments
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: nadiab_moments_e
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: nadiab_moments_i
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: nadiab_moments
 
   ! Derivatives and interpolated moments
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: ddz_nepj
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: interp_nepj
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: ddzND_Nepj
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: ddz_nipj
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: interp_nipj
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: ddzND_Nipj
-
-  ! Arrays to store special initial modes (semi linear simulation)
-  ! Zonal ones (ky=0)
-  COMPLEX(dp), DIMENSION(:,:,:,:), ALLOCATABLE :: moments_e_ZF
-  COMPLEX(dp), DIMENSION(:,:,:,:), ALLOCATABLE :: moments_i_ZF
-  COMPLEX(dp), DIMENSION(:,:),     ALLOCATABLE :: phi_ZF
-  ! non-zonal modes (kx=0)
-  COMPLEX(dp), DIMENSION(:,:,:,:), ALLOCATABLE :: moments_e_NZ
-  COMPLEX(dp), DIMENSION(:,:,:,:), ALLOCATABLE :: moments_i_NZ
-  COMPLEX(dp), DIMENSION(:,:),     ALLOCATABLE :: phi_NZ
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: ddz_napj
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: interp_napj
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: ddzND_Napj
 
   ! Non linear term array (ip,ij,iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: Sepj ! electron
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: Sipj ! ion
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: Sapj ! electron
 
-  ! To load collision matrix (ip,ij,iky,ikx,iz)
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: Ceepj, CeipjT
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: CeipjF
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: Ciipj, CiepjT
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: CiepjF
+  ! a-a collision matrix (ia,ip,ij,iky,ikx,iz)
+  REAL(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: Caa
+  ! Test and field collision matrices (ia,ib,ip,ij,iky,ikx,iz)
+  REAL(xp), DIMENSION(:,:,:,:,:,:,:), ALLOCATABLE :: Cab_F, Cab_T
+  ! nu x self collision matrix nuCself = nuaa*Caa + sum_b_neq_a nu_ab Cab_T
+  REAL(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: nuCself
 
   ! Collision term (ip,ij,iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: TColl_e, TColl_i
-  COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: TColl_e_local, TColl_i_local
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: Capj
+  COMPLEX(xp), DIMENSION(:,:,:,:,:), ALLOCATABLE   :: TColl_e_local, TColl_i_local
 
   ! dnjs coefficient storage (in, ij, is)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: dnjs
+  COMPLEX(xp), DIMENSION(:,:,:), ALLOCATABLE :: dnjs
 
   ! Hermite fourth derivative coeff storage 4*sqrt(p!/(p-4)!)
-  COMPLEX(dp), DIMENSION(:), ALLOCATABLE :: dv4_Hp_coeff
+  COMPLEX(xp), DIMENSION(:), ALLOCATABLE :: dv4_Hp_coeff
 
   ! lin rhs p,j coefficient storage (ip,ij)
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xnepj,xnipj
-  REAL(dp), DIMENSION(:),   ALLOCATABLE :: xnepp1j, xnepm1j,   xnepp2j,   xnepm2j, xnepjp1, xnepjm1
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: ynepp1j, ynepm1j,   ynepp1jm1, ynepm1jm1 ! mirror lin coeff for non adiab mom
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: zNepm1j, zNepm1jp1, zNepm1jm1            ! mirror lin coeff for adiab mom
-  REAL(dp), DIMENSION(:),   ALLOCATABLE :: xnipp1j, xnipm1j, xnipp2j, xnipm2j, xnipjp1, xnipjm1
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: ynipp1j, ynipm1j,   ynipp1jm1, ynipm1jm1 ! mirror lin coeff for non adiab mom
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: zNipm1j, zNipm1jp1, zNipm1jm1            ! mirror lin coeff for adiab mom
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xphij_e, xphijp1_e, xphijm1_e
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xphij_i, xphijp1_i, xphijm1_i
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xpsij_e, xpsijp1_e, xpsijm1_e
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xpsij_i, xpsijp1_i, xpsijm1_i
-  ! Kernel function evaluation (ij,iky,ikx,iz,odd/even p)
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: kernel_e
-  REAL(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: kernel_i
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: xnapj
+  REAL(xp), DIMENSION(:,:),   ALLOCATABLE :: xnapp1j, xnapp2j,   xnapm1j, xnapm2j, xnapjp1, xnapjm1
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: ynapp1j, ynapm1j,   ynapp1jm1, ynapm1jm1 ! mirror lin coeff for non adiab mom
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: zNapm1j, zNapm1jp1, zNapm1jm1            ! mirror lin coeff for adiab mom
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: xphij, xphijp1, xphijm1
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: xpsij, xpsijp1, xpsijm1
+  ! Kernel function evaluation (ia,ij,iky,ikx,iz,odd/even p)
+  REAL(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: kernel
 
   ! Poisson operator (iky,ikx,iz)
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: inv_poisson_op
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: inv_ampere_op
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: inv_pol_ion
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: HF_phi_correction_operator
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: inv_poisson_op
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: inv_ampere_op
+  REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: inv_pol_ion
+  ! REAL(xp), DIMENSION(:,:,:), ALLOCATABLE :: HF_phi_correction_operator
 
-  ! Gyrocenter density for electron and ions (iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ne00
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ni00
-
-  ! Kinetic spectrum sum_kx,ky(|Napj(z)|^2), (ip,ij,iz) (should be real)
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: Nepjz
-  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: Nipjz
+  ! Kinetic spectrum sum_kx,ky(|Napj(z)|^2), (ia,ip,ij,iz) (should be real)
+  REAL(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: Napjz
 
   ! particle density for electron and ions (iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: dens_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: dens_i
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: dens
 
   ! particle fluid velocity for electron and ions (iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: upar_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: upar_i
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: uper_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: uper_i
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: upar
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: uper
 
   ! particle temperature for electron and ions (iky,ikx,iz)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Tpar_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Tpar_i
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Tper_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Tper_i
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: temp_e
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: temp_i
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: Tpar
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: Tper
+  COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE :: temp
 
 END MODULE array
diff --git a/src/auxval.F90 b/src/auxval.F90
index b236c48ee65e6cdf6c91664f634f1aee6757d94c..fb8e13cae4b15f794ede5770f42dc2671cffbbdd 100644
--- a/src/auxval.F90
+++ b/src/auxval.F90
@@ -9,34 +9,22 @@ subroutine auxval
   use prec_const
   USE numerics
   USE geometry
-  USE parallel, ONLY: init_parallel_var
+  USE closure, ONLY: set_closure_model, hierarchy_closure
+  USE parallel, ONLY: init_parallel_var, my_id, num_procs, num_procs_p, num_procs_z, num_procs_ky, rank_p, rank_ky, rank_z
+  USE processing, ONLY: init_process
   IMPLICIT NONE
 
-  INTEGER :: i_
+  INTEGER :: i_, ierr
   IF (my_id .EQ. 0) WRITE(*,*) '=== Set auxiliary values ==='
 
-  IF (LINEARITY .NE. 'linear') THEN
-    IF (my_id .EQ. 0) write(*,*) 'FFTW3 y-grid distribution'
-    CALL init_grid_distr_and_plans(Nx,Ny)
-  ELSE
-    CALL init_1Dgrid_distr
-    IF (my_id .EQ. 0) write(*,*) 'Manual y-grid distribution'
-  ENDIF
   ! Init the grids
-  CALL set_pgrid ! parallel kin (MPI distributed)
-
-  CALL set_jgrid ! perp kin
-
-  CALL set_kxgrid(shear) ! radial modes (MPI distributed by FFTW)
-
-  CALL set_kygrid ! azymuthal modes
-
-  CALL set_zgrid  ! field aligned angle
-  IF ((my_id .EQ. 0) .AND. SG) WRITE(*,*) '--2 staggered z grids--'
+  CALL set_grids(shear,Npol,LINEARITY,N_HD,EM,Na) ! radial modes (MPI distributed by FFTW)
 
   CALL memory ! Allocate memory for global arrays
 
-  CALL init_parallel_var
+  CALL init_parallel_var(local_np,total_np,local_nky,total_nky,local_nz)
+
+  CALL init_process
 
   CALL eval_magnetic_geometry ! precompute coeff for lin equation
 
@@ -52,7 +40,10 @@ subroutine auxval
 
   CALL build_dv4Hp_table ! precompute the hermite fourth derivative table
 
+  CALL set_closure_model ! set the closure scheme in use
+
   !! Display parallel settings
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   DO i_ = 0,num_procs-1
     CALL mpi_barrier(MPI_COMM_WORLD, ierr)
     IF (my_id .EQ. i_) THEN
@@ -64,29 +55,26 @@ subroutine auxval
       WRITE(*,'(A9,I3,A10,I3,A10,I3,A9,I3)')&
        'my_id  = ', my_id, ', rank_p  = ', rank_p, ', rank_ky  = ', rank_ky,', rank_z  = ', rank_z
        WRITE(*,'(A22,I3,A11,I3)')&
-       '              ips_e = ', ips_e, ', ipe_e  = ', ipe_e
-       WRITE(*,'(A22,I3,A11,I3)')&
-       '              ijs_e = ', ijs_e, ', ije_e  = ', ije_e
+       '         local_np   = ', local_np  , ', offset = ', local_np_offset
        WRITE(*,'(A22,I3,A11,I3)')&
-       '              ips_i = ', ips_i, ', ipe_i  = ', ipe_i
+       '         local_nj   = ', local_nj  , ', offset = ', local_nj_offset
        WRITE(*,'(A22,I3,A11,I3)')&
-       '              ijs_i = ', ijs_i, ', ije_i  = ', ije_i
+       '         local_nkx  = ', local_nkx , ', offset = ', local_nkx_offset
        WRITE(*,'(A22,I3,A11,I3)')&
-       '              ikxs  = ', ikxs , ', ikxe   = ', ikxe
+       '         local_nky  = ', local_nky , ', offset = ', local_nky_offset
        WRITE(*,'(A22,I3,A11,I3)')&
-       '              ikys  = ', ikys , ', ikye   = ', ikye
-       WRITE(*,'(A22,I3,A11,I3)')&
-       '              izs   = ', izs  , ', ize    = ', ize
+       '         local_nz   = ', local_nz  , ', offset = ', local_nz_offset
       IF (my_id .NE. num_procs-1) WRITE (*,*) ''
       IF (my_id .EQ. num_procs-1) WRITE(*,*) '------------------------------------------'
     ENDIF
   ENDDO
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 
-  IF((my_id.EQ.0) .AND. (CLOS .EQ. 1)) THEN
-  IF(KIN_E) &
-  write(*,*) 'Closure = 1 -> Maximal Nepj degree is min(Pmaxe,2*Jmaxe+1): De = ', dmaxi
-  write(*,*) 'Closure = 1 -> Maximal Nipj degree is min(Pmaxi,2*Jmaxi+1): Di = ', dmaxi
-  ENDIF
+  SELECT CASE(hierarchy_closure)
+  CASE('truncation')
+    CALL speak('Truncation closure')
+  CASE('max_degree')
+    CALL speak('Max degree closure -> Maximal Napj degree is D = '// str(dmax))
+  END SELECT
 
 END SUBROUTINE auxval
diff --git a/src/basic_mod.F90 b/src/basic_mod.F90
index 527c97121978d31219d156ab88b412ba70bec164..0784de192446f954d2d62854826dae745c35ecb6 100644
--- a/src/basic_mod.F90
+++ b/src/basic_mod.F90
@@ -1,76 +1,64 @@
 MODULE basic
   !   Basic module for time dependent problems
   use, intrinsic :: iso_c_binding
-  use prec_const
+  use prec_const, ONLY : xp
   IMPLICIT none
-
+  PRIVATE
   ! INCLUDE 'fftw3-mpi.f03'
-
-  INTEGER  :: nrun   = 1           ! Number of time steps to run
-  real(dp) :: tmax   = 100000.0    ! Maximum simulation time
-  real(dp) :: dt     = 1.0         ! Time step
-  real(dp) :: time   = 0           ! Current simulation time (Init from restart file)
-
-  INTEGER :: comm0                 ! Default communicator with a topology
-  INTEGER :: group0                ! Default group with a topology
-  INTEGER :: rank_0                ! Ranks in comm0
-  ! Communicators for 1-dim cartesian subgrids of comm0
-  INTEGER :: comm_p, comm_ky, comm_z
-  INTEGER :: rank_p, rank_ky, rank_z! Ranks
-  INTEGER :: comm_pz,  rank_pz      ! 2D comm for N_a(p,j,z) output (mspfile)
-  INTEGER :: comm_kyz, rank_kyz     ! 2D comm for N_a(p,j,z) output (mspfile)
-  INTEGER :: comm_ky0, rank_ky0     ! comm along ky with p=0
-  INTEGER :: comm_z0,  rank_z0      ! comm along z  with p=0
-
-  INTEGER :: group_ky0, group_z0
-
-  INTEGER :: jobnum  = 0           ! Job number
-  INTEGER :: step    = 0           ! Calculation step of this run
-  INTEGER :: cstep   = 0           ! Current step number (Init from restart file)
-  LOGICAL :: nlend   = .FALSE.     ! Signal end of run
-  LOGICAL :: crashed = .FALSE.     ! Signal end of crashed run
-
-  INTEGER :: ierr                  ! flag for MPI error
-  INTEGER :: my_id                 ! Rank in COMM_WORLD
-  INTEGER :: num_procs             ! number of MPI processes
-  INTEGER :: num_procs_p           ! Number of processes in p
-  INTEGER :: num_procs_ky          ! Number of processes in r
-  INTEGER :: num_procs_z           ! Number of processes in z
-  INTEGER :: num_procs_pz          ! Number of processes in pz comm
-  INTEGER :: num_procs_kyz         ! Number of processes in kyz comm
-  INTEGER :: nbr_L, nbr_R          ! Left and right neighbours (along p)
-  INTEGER :: nbr_T, nbr_B          ! Top and bottom neighbours (along kx)
-  INTEGER :: nbr_U, nbr_D          ! Upstream and downstream neighbours (along z)
-
-  INTEGER :: iframe0d              ! counting the number of times 0d datasets are outputed (for diagnose)
-  INTEGER :: iframe1d              ! counting the number of times 1d datasets are outputed (for diagnose)
-  INTEGER :: iframe2d              ! counting the number of times 2d datasets are outputed (for diagnose)
-  INTEGER :: iframe3d              ! counting the number of times 3d datasets are outputed (for diagnose)
-  INTEGER :: iframe5d              ! counting the number of times 5d datasets are outputed (for diagnose)
+  ! INPUT PARAMETERS
+  INTEGER,  PUBLIC, PROTECTED :: nrun       = 1        ! Number of time steps to run
+  real(xp), PUBLIC, PROTECTED :: tmax       = 100000.0 ! Maximum simulation time
+  real(xp), PUBLIC, PROTECTED :: dt         = 1.0      ! Time step
+  real(xp), PUBLIC, PROTECTED :: maxruntime = 1e9      ! Maximum simulation CPU time
+  INTEGER,  PUBLIC, PROTECTED :: job2load   = 99       ! jobnum of the checkpoint to load
+  ! Auxiliary variables
+  real(xp), PUBLIC, PROTECTED :: time   = 0            ! Current simulation time (Init from restart file)
+
+  INTEGER, PUBLIC, PROTECTED  :: jobnum  = 0           ! Job number
+  INTEGER, PUBLIC, PROTECTED  :: step    = 0           ! Calculation step of this run
+  INTEGER, PUBLIC, PROTECTED  :: cstep   = 0           ! Current step number (Init from restart file)
+  LOGICAL, PUBLIC             :: nlend   = .FALSE.     ! Signal end of run
+  LOGICAL, PUBLIC             :: crashed = .FALSE.     ! Signal end of crashed run
+
+  INTEGER, PUBLIC :: iframe0d ! counting the number of times 0d datasets are outputed (for diagnose)
+  INTEGER, PUBLIC :: iframe1d ! counting the number of times 1d datasets are outputed (for diagnose)
+  INTEGER, PUBLIC :: iframe2d ! counting the number of times 2d datasets are outputed (for diagnose)
+  INTEGER, PUBLIC :: iframe3d ! counting the number of times 3d datasets are outputed (for diagnose)
+  INTEGER, PUBLIC :: iframe5d ! counting the number of times 5d datasets are outputed (for diagnose)
 
   !  List of logical file units
-  INTEGER :: lu_in   = 90              ! File duplicated from STDIN
-  INTEGER :: lu_stop = 91              ! stop file, see subroutine TESEND
+  INTEGER, PUBLIC, PROTECTED  :: lu_in   = 90              ! File duplicated from STDIN
+  INTEGER, PUBLIC, PROTECTED  :: lu_stop = 91              ! stop file, see subroutine TESEND
 
   ! To measure computation time
-  real(dp) :: start, finish
-  real(dp) :: t0_rhs, t0_adv_field, t0_poisson, t0_Sapj, t0_diag, t0_checkfield,&
-              t0_step, t0_clos, t0_ghost, t0_coll, t0_process
-  real(dp) :: t1_rhs, t1_adv_field, t1_poisson, t1_Sapj, t1_diag, t1_checkfield,&
-              t1_step, t1_clos, t1_ghost, t1_coll, t1_process
-  real(dp) :: tc_rhs, tc_adv_field, tc_poisson, tc_Sapj, tc_diag, tc_checkfield,&
-              tc_step, tc_clos, tc_ghost, tc_coll, tc_process
-  real(dp) :: maxruntime = 1e9 ! Maximum simulation CPU time
+  type :: chrono
+    real(xp) :: tstart !start of the chrono
+    real(xp) :: tstop  !stop 
+    real(xp) :: ttot   !cumulative time
+  end type chrono
+
+  type(chrono), PUBLIC, PROTECTED :: chrono_runt, chrono_mrhs, chrono_advf, chrono_pois, chrono_sapj,&
+   chrono_diag, chrono_chck, chrono_step, chrono_clos, chrono_ghst, chrono_coll, chrono_napj, chrono_grad
 
-  LOGICAL :: GATHERV_OUTPUT = .true.
+  LOGICAL, PUBLIC, PROTECTED :: GATHERV_OUTPUT = .true.
+
+  PUBLIC :: allocate_array, basic_outputinputs,basic_data,&
+            speak, str, increase_step, increase_cstep, increase_time, display_h_min_s,&
+            set_basic_cp, daytim, start_chrono, stop_chrono
 
   INTERFACE allocate_array
-    MODULE PROCEDURE allocate_array_dp1,allocate_array_dp2,allocate_array_dp3,allocate_array_dp4, allocate_array_dp5, allocate_array_dp6
-    MODULE PROCEDURE allocate_array_dc1,allocate_array_dc2,allocate_array_dc3,allocate_array_dc4, allocate_array_dc5, allocate_array_dc6
+    MODULE PROCEDURE allocate_array_xp1,allocate_array_xp2,allocate_array_xp3, &
+                     allocate_array_xp4, allocate_array_xp5, allocate_array_xp6, allocate_array_xp7
+    MODULE PROCEDURE allocate_array_dc1,allocate_array_dc2,allocate_array_dc3, &
+                     allocate_array_dc4, allocate_array_dc5, allocate_array_dc6, allocate_array_dc7
     MODULE PROCEDURE allocate_array_i1,allocate_array_i2,allocate_array_i3,allocate_array_i4
     MODULE PROCEDURE allocate_array_l1,allocate_array_l2,allocate_array_l3,allocate_array_l4
   END INTERFACE allocate_array
 
+  INTERFACE str
+    MODULE PROCEDURE str_xp, str_int
+  END INTERFACE
+
 CONTAINS
   !================================================================================
   SUBROUTINE basic_data
@@ -79,36 +67,38 @@ CONTAINS
     use prec_const
     IMPLICIT NONE
 
-    NAMELIST /BASIC/  nrun, dt, tmax, maxruntime
+    NAMELIST /BASIC/  nrun, dt, tmax, maxruntime, job2load
 
     CALL find_input_file
 
     READ(lu_in,basic)
 
-    !Init cumulative timers
-    tc_rhs        = 0.
-    tc_poisson    = 0.
-    tc_Sapj       = 0.
-    tc_coll       = 0.
-    tc_process    = 0.
-    tc_adv_field  = 0.
-    tc_ghost      = 0.
-    tc_clos       = 0.
-    tc_checkfield = 0.
-    tc_diag       = 0.
-    tc_step       = 0.
+    !Init chronometers
+    chrono_mrhs%ttot = 0
+    chrono_pois%ttot = 0
+    chrono_sapj%ttot = 0
+    chrono_napj%ttot = 0
+    chrono_grad%ttot = 0
+    chrono_advf%ttot = 0
+    chrono_ghst%ttot = 0
+    chrono_clos%ttot = 0
+    chrono_chck%ttot = 0
+    chrono_diag%ttot = 0
+    chrono_step%ttot = 0
   END SUBROUTINE basic_data
 
 
-  SUBROUTINE basic_outputinputs(fid, str)
+  SUBROUTINE basic_outputinputs(fid)
     !
     !    Write the input parameters to the results_xx.h5 file
     !
     USE prec_const
-    USE futils, ONLY: attach
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
     INTEGER, INTENT(in) :: fid
-    CHARACTER(len=256), INTENT(in) :: str
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/basic'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Basic Input')
     CALL attach(fid, TRIM(str), "start_iframe0d", iframe0d)
     CALL attach(fid, TRIM(str), "start_iframe2d", iframe2d)
     CALL attach(fid, TRIM(str), "start_iframe3d", iframe3d)
@@ -119,21 +109,59 @@ CONTAINS
     CALL attach(fid, TRIM(str),        "tmax",     tmax)
     CALL attach(fid, TRIM(str),        "nrun",     nrun)
     CALL attach(fid, TRIM(str),    "cpu_time",       -1)
-    CALL attach(fid, TRIM(str),       "Nproc",   num_procs)
-    CALL attach(fid, TRIM(str),       "Np_p" , num_procs_p)
-    CALL attach(fid, TRIM(str),       "Np_kx",num_procs_ky)
-    CALL attach(fid, TRIM(str),        "Np_z", num_procs_z)
   END SUBROUTINE basic_outputinputs
+  !! Increments private attributes
+  SUBROUTINE increase_step
+    IMPLICIT NONE
+    step  = step  + 1
+  END SUBROUTINE
+  SUBROUTINE increase_cstep
+    IMPLICIT NONE
+    cstep  = cstep  + 1
+  END SUBROUTINE
+  SUBROUTINE increase_time
+    IMPLICIT NONE
+    time  = time  + dt
+  END SUBROUTINE
+  SUBROUTINE set_basic_cp(cstep_cp,time_cp,jobnum_cp)
+    IMPLICIT NONE
+    REAL(xp), INTENT(IN) :: time_cp
+    INTEGER,  INTENT(IN) :: cstep_cp, jobnum_cp
+    cstep  = cstep_cp
+    time   = time_cp
+    jobnum = jobnum_cp+1
+  END SUBROUTINE
+  !! Chrono handling
+  SUBROUTINE start_chrono(timer)
+    IMPLICIT NONE
+    type(chrono) :: timer
+    CALL cpu_time(timer%tstart)
+  END SUBROUTINE
+  SUBROUTINE stop_chrono(timer)
+    IMPLICIT NONE
+    type(chrono) :: timer
+    CALL cpu_time(timer%tstop)
+    timer%ttot = timer%ttot + (timer%tstop-timer%tstart)
+  END SUBROUTINE
+  !================================================================================
+  ! routine to speak in the terminal
+  SUBROUTINE speak(message)
+    USE parallel, ONLY: my_id
+    IMPLICIT NONE
+    CHARACTER(len=*), INTENT(in) :: message
+    IF(my_id .EQ. 0) write(*,*) message
+  END SUBROUTINE
   !================================================================================
   SUBROUTINE find_input_file
+    USE parallel, ONLY: my_id
     IMPLICIT NONE
-    CHARACTER(len=32) :: str, input_file
-    INTEGER :: nargs, fileid, l
+    CHARACTER(len=32) :: str_, input_file
+    INTEGER :: nargs, fileid, l, ierr
     LOGICAL :: mlexist
     nargs = COMMAND_ARGUMENT_COUNT()
     IF((nargs .EQ. 1) .OR. (nargs .EQ. 4)) THEN
-      CALL GET_COMMAND_ARGUMENT(nargs, str, l, ierr)
-      READ(str(1:l),'(i3)')  fileid
+      CALL GET_COMMAND_ARGUMENT(nargs, str_, l, ierr)
+      READ(str_(1:l),'(i3)')  fileid
       WRITE(input_file,'(a,a1,i2.2,a3)') 'fort','_',fileid,'.90'
 
       INQUIRE(file=input_file, exist=mlexist)
@@ -149,7 +177,7 @@ CONTAINS
   !================================================================================
   SUBROUTINE daytim(str)
     !   Print date and time
-
+    USE parallel, ONLY: my_id
     use prec_const
     IMPLICIT NONE
 
@@ -160,13 +188,16 @@ CONTAINS
     CALL DATE_AND_TIME(d,t)
     dat=d(7:8) // '/' // d(5:6) // '/' // d(1:4)
     time=t(1:2) // ':' // t(3:4) // ':' // t(5:10)
-    WRITE(*,'(a,1x,a,1x,a)') str, dat(1:10), time(1:12)
+    IF (my_id .EQ. 0) &
+      WRITE(*,'(a,1x,a,1x,a)') str, dat(1:10), time(1:12)
     !
   END SUBROUTINE daytim
   !================================================================================
   SUBROUTINE display_h_min_s(time)
-    real(dp) :: time
-    integer :: days, hours, mins, secs
+    USE parallel, ONLY: my_id
+    IMPLICIT NONE
+    real(xp) :: time
+    integer  :: days, hours, mins, secs
     days = FLOOR(time/24./3600.);
     hours= FLOOR(time/3600.);
     mins = FLOOR(time/60.);
@@ -197,103 +228,135 @@ CONTAINS
   END SUBROUTINE display_h_min_s
 !================================================================================
 
+  function str_xp(k) result( str_ )
+  !   "Convert an integer to string."
+      REAL(xp), intent(in) :: k
+      character(len=10):: str_
+      write (str_, "(G10.2)") k
+      str_ = adjustl(str_)
+  end function str_xp
+
+  function str_int(k) result( str_ )
+  !   "Convert an integer to string."
+      integer, intent(in) :: k
+      character(len=10)   :: str_
+      write (str_, "(i2.2)") k
+      str_ = adjustl(str_)
+  end function str_int
+
 ! To allocate arrays of doubles, integers, etc. at run time
-  SUBROUTINE allocate_array_dp1(a,is1,ie1)
+  SUBROUTINE allocate_array_xp1(a,is1,ie1)
     IMPLICIT NONE
-    real(dp), DIMENSION(:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1
     ALLOCATE(a(is1:ie1))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp1
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp1
 
-  SUBROUTINE allocate_array_dp2(a,is1,ie1,is2,ie2)
+  SUBROUTINE allocate_array_xp2(a,is1,ie1,is2,ie2)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2
     ALLOCATE(a(is1:ie1,is2:ie2))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp2
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp2
 
-  SUBROUTINE allocate_array_dp3(a,is1,ie1,is2,ie2,is3,ie3)
+  SUBROUTINE allocate_array_xp3(a,is1,ie1,is2,ie2,is3,ie3)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp3
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp3
 
-  SUBROUTINE allocate_array_dp4(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4)
+  SUBROUTINE allocate_array_xp4(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp4
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp4
 
-  SUBROUTINE allocate_array_dp5(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5)
+  SUBROUTINE allocate_array_xp5(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp5
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp5
 
-  SUBROUTINE allocate_array_dp6(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6)
+  SUBROUTINE allocate_array_xp6(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    real(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5,is6:ie6))
-    a=0.0_dp
-  END SUBROUTINE allocate_array_dp6
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp6
+
+  SUBROUTINE allocate_array_xp7(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6,is7,ie7)
+    IMPLICIT NONE
+    REAL(xp), DIMENSION(:,:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6,is7,ie7
+    ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5,is6:ie6,is7:ie7))
+    a=0.0_xp
+  END SUBROUTINE allocate_array_xp7
   !========================================
 
   SUBROUTINE allocate_array_dc1(a,is1,ie1)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1
     ALLOCATE(a(is1:ie1))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc1
 
   SUBROUTINE allocate_array_dc2(a,is1,ie1,is2,ie2)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2
     ALLOCATE(a(is1:ie1,is2:ie2))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc2
 
   SUBROUTINE allocate_array_dc3(a,is1,ie1,is2,ie2,is3,ie3)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc3
 
   SUBROUTINE allocate_array_dc4(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc4
 
   SUBROUTINE allocate_array_dc5(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc5
 
   SUBROUTINE allocate_array_dc6(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6)
     IMPLICIT NONE
-    DOUBLE COMPLEX, DIMENSION(:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5,is6:ie6))
-    a=CMPLX(0.0_dp,0.0_dp)
+    a=CMPLX(0.0_xp,0.0_xp)
   END SUBROUTINE allocate_array_dc6
+
+  SUBROUTINE allocate_array_dc7(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6,is7,ie7)
+    IMPLICIT NONE
+    COMPLEX(xp), DIMENSION(:,:,:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5,is6,ie6,is7,ie7
+    ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5,is6:ie6,is7:ie7))
+    a=CMPLX(0.0_xp,0.0_xp)
+  END SUBROUTINE allocate_array_dc7
   !========================================
 
   SUBROUTINE allocate_array_i1(a,is1,ie1)
@@ -330,7 +393,7 @@ CONTAINS
 
   SUBROUTINE allocate_array_i5(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5)
     IMPLICIT NONE
-    real(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
+    INTEGER, DIMENSION(:,:,:,:,:), ALLOCATABLE, INTENT(INOUT) :: a
     INTEGER, INTENT(IN) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4,is5,ie5
     ALLOCATE(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4,is5:ie5))
     a=0
diff --git a/src/calculus_mod.F90 b/src/calculus_mod.F90
index 9c1f0060ab83aff223aae2935cee35b6bdd9f4ef..29c08f7a5f31647569c7824739e9bce09ba0ddf4 100644
--- a/src/calculus_mod.F90
+++ b/src/calculus_mod.F90
@@ -1,199 +1,256 @@
 MODULE calculus
   ! Routine to evaluate gradients, interpolation schemes and integrals
-  USE basic
-  USE prec_const
-  USE grid
-  USE parallel, ONLY: manual_0D_bcast
+  USE prec_const, ONLY: xp
   IMPLICIT NONE
-  REAL(dp), dimension(-2:2) :: dz_usu = &
-   (/  onetwelfth, -twothird, &
-                       0._dp, &
-         twothird, -onetwelfth /) ! fd4 centered stencil
-  REAL(dp), dimension(-2:1) :: dz_o2e = &
-   (/ onetwentyfourth,-nineeighths, nineeighths,-onetwentyfourth /) ! fd4 odd to even stencil
-  REAL(dp), dimension(-1:2) :: dz_e2o = &
-   (/ onetwentyfourth,-nineeighths, nineeighths,-onetwentyfourth /) ! fd4 odd to even stencil
-   REAL(dp), dimension(-2:2) :: dz2_usu = &
-   (/-1.0_dp/12.0_dp, 4.0_dp/3.0_dp, -5.0_dp/2.0_dp, 4.0_dp/3.0_dp, -1.0_dp/12.0_dp /)! 2th derivative, 4th order (for parallel hypdiff)
-   REAL(dp), dimension(-2:2) :: dz4_usu = &
-   (/  1._dp, -4._dp, 6._dp, -4._dp, 1._dp /) ! 4th derivative, 2nd order (for parallel hypdiff)
-  PUBLIC :: simpson_rule_z, interp_z, grad_z, grad_z4
+  REAL(xp), dimension(-2:2) :: dz_usu = &
+   (/  1._xp/12._xp, -2._xp/3._xp, 0._xp, 2._xp/3._xp, -1._xp/12._xp /) ! fd4 centered stencil
+  REAL(xp), dimension(-2:1) :: dz_o2e = &
+   (/ 1._xp/24._xp,-9._xp/8._xp, 9._xp/8._xp,-1._xp/24._xp /) ! fd4 odd to even stencil
+  REAL(xp), dimension(-1:2) :: dz_e2o = &
+   (/ 1._xp/24._xp,-9._xp/8._xp, 9._xp/8._xp,-1._xp/24._xp /) ! fd4 odd to even stencil
+   REAL(xp), dimension(-2:2) :: dz2_usu = &
+   (/-1._xp/12._xp, 4._xp/3._xp, -5._xp/2._xp, 4._xp/3._xp, -1._xp/12._xp /)! 2th derivative, 4th order (for parallel hypdiff)
+   REAL(xp), dimension(-2:2) :: dz4_usu = &
+   (/  1._xp, -4._xp, 6._xp, -4._xp, 1._xp /) ! 4th derivative, 2nd order (for parallel hypdiff)
+   REAL(xp), dimension(-2:1) :: iz_o2e = &
+   (/ -1._xp/16._xp, 9._xp/16._xp, 9._xp/16._xp, -1._xp/16._xp /) ! grid interpolation, 4th order, odd to even
+   REAL(xp), dimension(-1:2) :: iz_e2o = &
+   (/ -1._xp/16._xp, 9._xp/16._xp, 9._xp/16._xp, -1._xp/16._xp /) ! grid interpolation, 4th order, even to odd
+  PUBLIC :: simpson_rule_z, interp_z, grad_z, grad_z4, grad_z_5D, grad_z4_5D
 
 CONTAINS
 
-SUBROUTINE grad_z(target,f,ddzf)
+SUBROUTINE grad_z(target,local_nz,ngz,inv_deltaz,f,ddzf)
   implicit none
   ! Compute the periodic boundary condition 4 points centered finite differences
   ! formula among staggered grid or not.
   ! not staggered : the derivative results must be on the same grid as the field
   !     staggered : the derivative is computed from a grid to the other
-  INTEGER, INTENT(IN) :: target
-  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f
-  COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddzf
-  IF(Nz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
-    IF(SG) THEN
-      IF(TARGET .EQ. 0) THEN
-        CALL grad_z_o2e(f,ddzf)
-      ELSE
-        CALL grad_z_e2o(f,ddzf)
-      ENDIF
-    ELSE ! No staggered grid -> usual centered finite differences
-      DO iz = izs,ize
-       ddzf(iz) = dz_usu(-2)*f(iz-2) + dz_usu(-1)*f(iz-1) &
-                 +dz_usu( 1)*f(iz+1) + dz_usu( 2)*f(iz+2)
+  INTEGER,  INTENT(IN) :: target, local_nz, ngz
+  REAL(xp), INTENT(IN) :: inv_deltaz
+  COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: f
+  COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: ddzf
+  INTEGER :: iz
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+    SELECT CASE(TARGET)
+    CASE(1)
+      CALL grad_z_o2e(local_nz,ngz,inv_deltaz,f,ddzf)
+    CASE(2)
+      CALL grad_z_e2o(local_nz,ngz,inv_deltaz,f,ddzf)
+    CASE DEFAULT ! No staggered grid -> usual centered finite differences
+      DO iz = 1,local_nz
+       ddzf(iz) = dz_usu(-2)*f(iz  ) + dz_usu(-1)*f(iz+1) &
+                 +dz_usu( 0)*f(iz+2) &
+                 +dz_usu( 1)*f(iz+3) + dz_usu( 2)*f(iz+4)
       ENDDO
       ddzf(:) = ddzf(:) * inv_deltaz
-    ENDIF
+    END SELECT
   ELSE
-    ddzf = 0._dp
+    ddzf = 0._xp
   ENDIF
 CONTAINS
-  SUBROUTINE grad_z_o2e(fo,ddzfe) ! Paruta 2018 eq (27)
+  SUBROUTINE grad_z_o2e(local_nz,ngz,inv_deltaz,fo,ddzfe) ! Paruta 2018 eq (27)
     ! gives the gradient of a field from the odd grid to the even one
     implicit none
-    COMPLEX(dp),dimension(izgs:izge), intent(in)  :: fo
-    COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddzfe !
-    DO iz = izs,ize
-     ddzfe(iz) = dz_o2e(-2)*fo(iz-2) + dz_o2e(-1)*fo(iz-1) &
-                +dz_o2e( 0)*fo(iz  ) + dz_o2e( 1)*fo(iz+1)
+    INTEGER,  INTENT(IN) :: local_nz, ngz
+    REAL(xp), INTENT(IN) :: inv_deltaz
+    COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: fo
+    COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: ddzfe !
+    INTEGER :: iz
+    DO iz = 1,local_nz
+     ddzfe(iz) = dz_o2e(-2)*fo(iz  ) + dz_o2e(-1)*fo(iz+1) &
+                +dz_o2e( 0)*fo(iz+2) + dz_o2e( 1)*fo(iz+3)
     ENDDO
     ddzfe(:) = ddzfe(:) * inv_deltaz
   END SUBROUTINE grad_z_o2e
 
-  SUBROUTINE grad_z_e2o(fe,ddzfo) ! n2v for Paruta 2018 eq (28)
+  SUBROUTINE grad_z_e2o(local_nz,ngz,inv_deltaz,fe,ddzfo) ! n2v for Paruta 2018 eq (28)
     ! gives the gradient of a field from the even grid to the odd one
     implicit none
-    COMPLEX(dp),dimension(izgs:izge), intent(in)  :: fe
-    COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddzfo
-    DO iz = izs,ize
-     ddzfo(iz) = dz_e2o(-1)*fe(iz-1) + dz_e2o(0)*fe(iz  ) &
-                +dz_e2o( 1)*fe(iz+1) + dz_e2o(2)*fe(iz+2)
+    INTEGER,  INTENT(IN) :: local_nz, ngz
+    REAL(xp), INTENT(IN) :: inv_deltaz
+    COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: fe
+    COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: ddzfo
+    INTEGER :: iz
+    DO iz = 1,local_nz
+     ddzfo(iz) = dz_e2o(-1)*fe(iz+1) + dz_e2o(0)*fe(iz+2) &
+                +dz_e2o( 1)*fe(iz+3) + dz_e2o(2)*fe(iz+4)
     ENDDO
     ddzfo(:) = ddzfo(:) * inv_deltaz
   END SUBROUTINE grad_z_e2o
 END SUBROUTINE grad_z
 
-SUBROUTINE grad_z2(f,ddz2f)
+SUBROUTINE grad_z_5D(local_nz,ngz,inv_deltaz,f,ddzf)
   implicit none
+  ! Compute the periodic boundary condition 4 points centered finite differences
+  ! formula among staggered grid or not.
+  ! not staggered : the derivative results must be on the same grid as the field
+  !     staggered : the derivative is computed from a grid to the other
+  INTEGER,  INTENT(IN) :: local_nz, ngz
+  REAL(xp), INTENT(IN) :: inv_deltaz
+  COMPLEX(xp),dimension(:,:,:,:,:,:), INTENT(IN)  :: f
+  COMPLEX(xp),dimension(:,:,:,:,:,:),     INTENT(OUT) :: ddzf
+  INTEGER :: iz
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+    DO iz = 1,local_nz
+      ddzf(:,:,:,:,:,iz) = inv_deltaz*&
+        (dz_usu(-2)*f(:,:,:,:,:,iz  ) + dz_usu(-1)*f(:,:,:,:,:,iz+1) &
+        +dz_usu( 0)*f(:,:,:,:,:,iz+2) &
+        +dz_usu( 1)*f(:,:,:,:,:,iz+3) + dz_usu( 2)*f(:,:,:,:,:,iz+4))
+    ENDDO
+  ELSE
+    ddzf = 0._xp
+  ENDIF
+END SUBROUTINE grad_z_5D
+
+
+SUBROUTINE grad_z2(local_nz,ngz,inv_deltaz,f,ddz2f)
   ! Compute the second order fourth derivative for periodic boundary condition
-  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f
-  COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddz2f
-  IF(Nz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
-      DO iz = izs,ize
-       ddz2f(iz) = dz2_usu(-2)*f(iz-2) + dz2_usu(-1)*f(iz-1) &
-                  +dz2_usu( 0)*f(iz  )&
-                  +dz2_usu( 1)*f(iz+1) + dz2_usu( 2)*f(iz+2)
+  implicit none
+  INTEGER, INTENT(IN)  :: local_nz, ngz
+  REAL(xp), INTENT(IN) :: inv_deltaz
+  COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: f
+  COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: ddz2f
+  INTEGER :: iz
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+      DO iz = 1,local_nz
+       ddz2f(iz) = dz2_usu(-2)*f(iz  ) + dz2_usu(-1)*f(iz+1) &
+                  +dz2_usu( 0)*f(iz+2)&
+                  +dz2_usu( 1)*f(iz+3) + dz2_usu( 2)*f(iz+4)
       ENDDO
   ELSE
-    ddz2f = 0._dp
+    ddz2f = 0._xp
   ENDIF
   ddz2f = ddz2f * inv_deltaz**2
 END SUBROUTINE grad_z2
 
-
-SUBROUTINE grad_z4(f,ddz4f)
+SUBROUTINE grad_z4_5D(local_nz,ngz,inv_deltaz,f,ddz4f)
+  ! Compute the second order fourth derivative for periodic boundary condition
   implicit none
+  INTEGER,  INTENT(IN) :: local_nz, ngz
+  REAL(xp), INTENT(IN) :: inv_deltaz
+  COMPLEX(xp),dimension(:,:,:,:,:,:), INTENT(IN)  :: f
+  COMPLEX(xp),dimension(:,:,:,:,:,:),     INTENT(OUT) :: ddz4f
+  INTEGER :: iz
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+      DO iz = 1,local_nz
+       ddz4f(:,:,:,:,:,iz) = inv_deltaz**4*&
+          (dz4_usu(-2)*f(:,:,:,:,:,iz  ) + dz4_usu(-1)*f(:,:,:,:,:,iz+1) &
+          +dz4_usu( 0)*f(:,:,:,:,:,iz+2)&
+          +dz4_usu( 1)*f(:,:,:,:,:,iz+3) + dz4_usu( 2)*f(:,:,:,:,:,iz+4))
+      ENDDO
+  ELSE
+    ddz4f = 0._xp
+  ENDIF
+END SUBROUTINE grad_z4_5D
+
+SUBROUTINE grad_z4(local_nz,ngz,inv_deltaz,f,ddz4f)
   ! Compute the second order fourth derivative for periodic boundary condition
-  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f
-  COMPLEX(dp),dimension( izs:ize ), intent(out) :: ddz4f
-  IF(Nz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
-      DO iz = izs,ize
-       ddz4f(iz) = dz4_usu(-2)*f(iz-2) + dz4_usu(-1)*f(iz-1) &
-                  +dz4_usu( 0)*f(iz)&
-                  +dz4_usu( 1)*f(iz+1) + dz4_usu( 2)*f(iz+2)
+  implicit none
+  INTEGER,  INTENT(IN) :: local_nz, ngz
+  REAL(xp), INTENT(IN) :: inv_deltaz
+  COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: f
+  COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: ddz4f
+  INTEGER :: iz
+  IF(ngz .GT. 3) THEN ! Cannot apply four points stencil on less than four points grid
+      DO iz = 1,local_nz
+       ddz4f(iz) = dz4_usu(-2)*f(iz  ) + dz4_usu(-1)*f(iz+1) &
+                  +dz4_usu( 0)*f(iz+2)&
+                  +dz4_usu( 1)*f(iz+3) + dz4_usu( 2)*f(iz+4)
       ENDDO
   ELSE
-    ddz4f = 0._dp
+    ddz4f = 0._xp
   ENDIF
   ddz4f = ddz4f * inv_deltaz**4
 END SUBROUTINE grad_z4
 
 
-SUBROUTINE interp_z(target,f_in,f_out)
+SUBROUTINE interp_z(target,local_nz,ngz,f_in,f_out)
   ! Function meant to interpolate one field defined on a even/odd z into
   !  the other odd/even z grid.
   ! If Staggered Grid flag (SG) is false, returns identity
   implicit none
+  INTEGER, INTENT(IN) :: local_nz, ngz
   INTEGER, intent(in) :: target ! target grid : 0 for even grid, 1 for odd
-  COMPLEX(dp),dimension(izgs:izge), intent(in)  :: f_in
-  COMPLEX(dp),dimension( izs:ize ), intent(out) :: f_out !
-  IF(SG) THEN
-    IF(target .EQ. 0) THEN
-      CALL interp_o2e_z(f_in,f_out)
-    ELSE
-      CALL interp_e2o_z(f_in,f_out)
-    ENDIF
-  ELSE ! No staggered grid -> identity
-    f_out(izs:ize) = f_in(izs:ize)
-  ENDIF
+  COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: f_in
+  COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: f_out
+  SELECT CASE(TARGET)
+  CASE(1) ! output on even grid
+    CALL interp_o2e_z(local_nz,ngz,f_in,f_out)
+  CASE(2) ! output on odd grid
+    CALL interp_e2o_z(local_nz,ngz,f_in,f_out)
+  CASE DEFAULT ! No staggered grid -> usual centered finite differences
+    f_out = f_in((1+ngz/2):(local_nz+ngz/2))
+  END SELECT
 CONTAINS
-  SUBROUTINE interp_o2e_z(fo,fe)
+  SUBROUTINE interp_o2e_z(local_nz, ngz,fo,fe)
    ! gives the value of a field from the odd grid to the even one
    implicit none
-   COMPLEX(dp),dimension(izgs:izge), intent(in)  :: fo
-   COMPLEX(dp),dimension( izs:ize ), intent(out) :: fe !
+   INTEGER, INTENT(IN) :: local_nz, ngz
+   COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: fo
+   COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: fe
+   INTEGER :: iz
    ! 4th order interp
-   DO iz = izs,ize
-    fe(iz) = onesixteenth * (-fo(iz-2) + 9._dp*(fo(iz-1) + fo(iz)) - fo(iz+1))
+   DO iz = 1,local_nz
+     fe(iz) = iz_o2e(-2)*fo(iz )  + iz_o2e(-1)*fo(iz+1) &
+            + iz_o2e( 0)*fo(iz+2) + iz_o2e( 1)*fo(iz+3)
    ENDDO
   END SUBROUTINE interp_o2e_z
 
-  SUBROUTINE interp_e2o_z(fe,fo)
+  SUBROUTINE interp_e2o_z(local_nz, ngz,fe,fo)
    ! gives the value of a field from the even grid to the odd one
    implicit none
-   COMPLEX(dp),dimension(izgs:izge), intent(in)  :: fe
-   COMPLEX(dp),dimension( izs:ize ), intent(out) :: fo
+   INTEGER, INTENT(IN) :: local_nz, ngz
+   COMPLEX(xp),dimension(local_nz+ngz), INTENT(IN)  :: fe
+   COMPLEX(xp),dimension(local_nz),     INTENT(OUT) :: fo
+   INTEGER :: iz
    ! 4th order interp
-   DO iz = izs,ize
-    fo(iz) = onesixteenth * (-fe(iz-1) + 9._dp*(fe(iz) + fe(iz+1)) - fe(iz+2))
+   DO iz = 1,local_nz
+     fo(iz) = iz_e2o(-1)*fe(iz+1) + iz_e2o( 0)*fe(iz+2) &
+            + iz_e2o( 1)*fe(iz+3) + iz_e2o( 2)*fe(iz+4)
    ENDDO
   END SUBROUTINE interp_e2o_z
 
 END SUBROUTINE interp_z
 
-SUBROUTINE simpson_rule_z(f,intf)
- ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
- !from molix BJ Frei
- implicit none
- complex(dp),dimension(izs:ize), intent(in) :: f
- COMPLEX(dp), intent(out) :: intf
- COMPLEX(dp)              :: buffer, local_int
- INTEGER :: root, i_
-
- IF(Nz .EQ. 1) THEN !2D zpinch simulations
-   intf = f(izs)
-
- ELSE !3D fluxtube
-   IF(mod(Nz,2) .ne. 0 ) THEN
-      ERROR STOP '>> ERROR << Simpson rule: Nz must be an even number  !!!!'
-   ENDIF
-   ! Buil local sum using the weights of composite Simpson's rule
-   local_int = 0._dp
-   DO iz = izs,ize
+SUBROUTINE simpson_rule_z(local_nz,zweights_SR,f,intf)
+  ! integrate f(z) over z using the simpon's rule. Assume periodic boundary conditions (f(ize+1) = f(izs))
+  !from molix BJ Frei
+  USE prec_const, ONLY: xp, onethird
+  USE parallel,   ONLY: num_procs_z, rank_z, comm_z, manual_0D_bcast
+  USE mpi
+  implicit none
+  INTEGER, INTENT(IN) :: local_nz
+  REAL(xp),   dimension(local_nz), intent(in) :: zweights_SR
+  complex(xp),dimension(local_nz), intent(in) :: f
+  COMPLEX(xp), intent(out) :: intf
+  COMPLEX(xp)              :: buffer, local_int
+  INTEGER :: root, i_, iz, ierr
+  ! Buil local sum using the weights of composite Simpson's rule
+  local_int = 0._xp
+  DO iz = 1,local_nz
       local_int = local_int + zweights_SR(iz)*f(iz)
-   ENDDO
-   buffer = local_int
-   root = 0
-   !Gather manually among the rank_z=0 processes and perform the sum
-   intf = 0._dp
-   IF (num_procs_z .GT. 1) THEN
-       !! Everyone sends its local_sum to root = 0
-       IF (rank_z .NE. root) THEN
-           CALL MPI_SEND(buffer, 1 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
-       ELSE
-           ! Recieve from all the other processes
-           DO i_ = 0,num_procs_z-1
-               IF (i_ .NE. rank_z) &
-                   CALL MPI_RECV(buffer, 1 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
-                   intf = intf + buffer
-           ENDDO
-       ENDIF
-       CALL manual_0D_bcast(intf)
-   ELSE
-     intf = local_int
-   ENDIF
-   intf = onethird*deltaz*intf
-   ENDIF
+  ENDDO
+  buffer = local_int
+  root = 0
+  !Gather manually among the rank_z=0 processes and perform the sum
+  intf = 0._xp
+  IF (num_procs_z .GT. 1) THEN
+      !! Everyone sends its local_sum to root = 0
+      IF (rank_z .NE. root) THEN
+          CALL MPI_SEND(buffer, 1 , MPI_DOUBLE_COMPLEX, root, 5678, comm_z, ierr)
+      ELSE
+          ! Recieve from all the other processes
+          DO i_ = 0,num_procs_z-1
+              IF (i_ .NE. rank_z) &
+                  CALL MPI_RECV(buffer, 1 , MPI_DOUBLE_COMPLEX, i_, 5678, comm_z, MPI_STATUS_IGNORE, ierr)
+                  intf = intf + buffer
+          ENDDO
+      ENDIF
+      CALL manual_0D_bcast(intf)
+  ELSE
+    intf = local_int
+  ENDIF
 END SUBROUTINE simpson_rule_z
 
 END MODULE calculus
diff --git a/src/closure_mod.F90 b/src/closure_mod.F90
index c7d7bb20f89ef318c253dc7916d381665f300132..9e18d7040b3851afdf07e1b9c881a4cbb1697cb1 100644
--- a/src/closure_mod.F90
+++ b/src/closure_mod.F90
@@ -1,146 +1,192 @@
 module closure
 ! Contains the routines to define closures
-USE basic
-USE model,  ONLY: CLOS, tau_e, tau_i, q_e, q_i, nu, KIN_E
-USE grid
-USE array,  ONLY: kernel_e,  kernel_i
-USE fields, ONLY: moments_e, moments_i
-USE time_integration, ONLY: updatetlevel
 IMPLICIT NONE
-
-PUBLIC :: apply_closure_model
+! Input
+CHARACTER(len=32),  PUBLIC, PROTECTED :: hierarchy_closure = 'truncation'! closure for the moment hierarchy
+INTEGER,            PUBLIC, PROTECTED :: dmax              = -1          ! max evolved degree moment
+CHARACTER(len=32),  PUBLIC, PROTECTED :: nonlinear_closure = 'truncation'! nonlinear truncation method
+INTEGER,            PUBLIC, PROTECTED :: nmax              = 0           ! upperbound of the nonlinear sum over n
+!  Attributes
+LOGICAL,DIMENSION(:,:), ALLOCATABLE, PUBLIC, PROTECTED :: evolve_mom     ! array that sets if a moment has to be evolved or not
+INTEGER,DIMENSION(:),   ALLOCATABLE, PUBLIC, PROTECTED :: nmaxarray      ! upperbound of the nonlinear sum over n (depend on j)
+ 
+PUBLIC :: closure_readinputs, set_closure_model, apply_closure_model
 
 CONTAINS
 
-! Positive Oob indices are approximated with a model
-SUBROUTINE apply_closure_model
+SUBROUTINE closure_readinputs
+  USE basic, ONLY: lu_in
   IMPLICIT NONE
+  NAMELIST /CLOSURE_PAR/ hierarchy_closure, dmax, nonlinear_closure, nmax
+  READ(lu_in,closure_par)
+END SUBROUTINE
 
-  CALL cpu_time(t0_clos)
-  IF (CLOS .EQ. 0) THEN
-    ! zero truncation, An+1=0 for n+1>nmax only
-    CALL ghosts_upper_truncation
-
-
-  ELSEIF (CLOS .EQ. 1) THEN
-    ! zero truncation, An+1=0 for n+1>nmax only
-    CALL ghosts_upper_truncation
-    ! Additional truncation at highest fully represented kinetic moment
-    ! e.g. Dmax = 3 means
-    ! only Napj s.t. p+2j <= 3 are evolved
-    ! -> (p,j) allowed are (0,0),(1,0),(0,1),(2,0),(1,1),(3,0)
-    ! =>> Dmax = min(Pmax,2*Jmax+1)
-    IF(KIN_E) THEN
-    DO ip = ipgs_e,ipge_e
-      DO ij = ijgs_e,ijge_e
-        IF ( parray_e(ip)+2*jarray_e(ij) .GT. dmaxe) &
-        moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
+SUBROUTINE set_closure_model
+  USE grid, ONLY: local_np, ngp, local_nj, ngj, parray, jarray,&
+                  pmax, jmax
+  IMPLICIT NONE
+  INTEGER :: ip,ij
+  ! adapt the dmax if it is set <0
+  IF(dmax .LT. 0) THEN
+    dmax = MIN(pmax,2*jmax+1)
+  ELSEIF(dmax .GT. (pmax+2*jmax)) THEN
+    ERROR STOP "dmax is higher than the maximal moments degree available"
+  ENDIF
+  ! set the evolve mom array
+  ALLOCATE(evolve_mom(local_np+ngp,local_nj+ngj))
+  SELECT CASE(hierarchy_closure)
+  CASE('truncation')
+    DO ip = 1,local_np+ngp
+      DO ij = 1, local_nj+ngj
+        evolve_mom(ip,ij) = (parray(ip).GE.0) .AND. (jarray(ij).GE.0) &
+                      .AND. (parray(ip).LE.pmax) .AND. (jarray(ij).LE.jmax)
       ENDDO
     ENDDO
-    ENDIF
-    DO ip = ipgs_i,ipge_i
-      DO ij = ijgs_i,ijge_i
-        IF ( parray_i(ip)+2*jarray_i(ij) .GT. dmaxi) &
-        moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
+  CASE('max_degree')
+    DO ip = 1,local_np+ngp
+      DO ij = 1, local_nj+ngj
+          evolve_mom(ip,ij) = (parray(ip).GE.0) .AND. (jarray(ij.GE.0)) &
+                        .AND. (2*parray(ip)+jarray(ij) .GT. dmax)
       ENDDO
+    ENDDO  
+  CASE DEFAULT
+    ERROR STOP "closure scheme not recognized (avail: truncation,max_degree)"
+  END SELECT
+
+  ! Set the nonlinear closure scheme (truncation of sum over n in Sapj)
+  ALLOCATE(nmaxarray(local_nj))
+  SELECT CASE(nonlinear_closure)
+  CASE('truncation')
+    IF(nmax .LT. 0) THEN
+      ERROR STOP "cannot truncate the sum with a number smaller than 0"
+    ELSE
+      nmaxarray(:) = nmax
+    ENDIF
+  CASE('anti_laguerre_aliasing')
+    DO ij = 1,local_nj
+      nmaxarray(ij) = jmax - jarray(ij+ngj/2)
     ENDDO
-    ! + ghosts truncation
-    CALL ghosts_upper_truncation
-  ELSE
-    ERROR STOP '>> ERROR << Closure scheme not found '
-
-  ENDIF
-
-  CALL ghosts_lower_truncation
+  CASE('full_sum')
+    nmaxarray(:) = jmax
+  CASE DEFAULT
+    ERROR STOP "nonlinear closure scheme not recognized (avail: truncation,anti_laguerre_aliasing,full_sum)"
+  END SELECT
 
-  CALL cpu_time(t1_clos)
-  tc_clos = tc_clos + (t1_clos - t0_clos)
-END SUBROUTINE apply_closure_model
+END SUBROUTINE set_closure_model
 
 ! Positive Oob indices are approximated with a model
-SUBROUTINE ghosts_upper_truncation
+SUBROUTINE apply_closure_model
+  USE prec_const, ONLY: xp
+  USE grid,       ONLY: local_nj,ngj,local_np,ngp,local_na
+  USE fields,     ONLY: moments
+  USE time_integration, ONLY: updatetlevel
   IMPLICIT NONE
+  INTEGER ::ij,ip,ia
+  SELECT CASE (hierarchy_closure)
+    CASE('truncation','max_degree')
+      DO ij = 1, local_nj+ngj
+        DO ip = 1,local_np+ngp
+          DO ia = 1,local_na
+            IF(.NOT. evolve_mom(ip,ij))&
+              moments(ia,ip,ij,:,:,:,updatetlevel) = 0._xp
+          ENDDO
+        ENDDO
+      ENDDO  
+    CASE DEFAULT
+      ERROR STOP "closure scheme not recognized"
+  END SELECT
+END SUBROUTINE apply_closure_model
 
-! zero truncation, An+1=0 for n+1>nmax
-  ! Electrons
-  IF(KIN_E) THEN
-    ! applies only for the process that has largest j index
-    IF(ije_e .EQ. Jmaxe+1) THEN
-      DO ip = ipgs_e,ipge_e
-        moments_e(ip,ije_e+1,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      ENDDO
-    ENDIF
-    ! applies only for the process that has largest p index
-    IF(ipe_e .EQ. Pmaxe+1) THEN
-      DO ij = ijgs_e,ijge_e
-        moments_e(ipe_e+1,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-        IF(deltape .EQ. 1) THEN ! Must truncate the second stencil
-        moments_e(ipe_e+2,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-        ENDIF
-      ENDDO
-    ENDIF
-  ENDIF
-
-  ! Ions
-  ! applies only for the process that has largest j index
-  IF(ije_i .EQ. Jmaxi+1) THEN
-    DO ip = ipgs_i,ipge_i
-      moments_i(ip,ije_i+1,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-    ENDDO
-  ENDIF
-  ! applies only for the process that has largest p index
-  IF(ipe_i .EQ. Pmaxi+1) THEN
-    DO ij = ijgs_i,ijge_i
-      moments_i(ipe_i+1,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      IF(deltape .EQ. 1) THEN ! Must truncate the second stencil
-      moments_i(ipe_i+2,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      ENDIF
-    ENDDO
-  ENDIF
-
-END SUBROUTINE ghosts_upper_truncation
-
-! Negative OoB indices are 0
-SUBROUTINE ghosts_lower_truncation
+! ! Positive Oob indices are approximated with a model
+! SUBROUTINE apply_closure_model
+!   USE prec_const, ONLY: xp
+!   USE grid,       ONLY: local_nj,ngj, jarray,&
+!                         local_np,ngp, parray
+!   USE fields,     ONLY: moments
+!   USE time_integration, ONLY: updatetlevel
+!   IMPLICIT NONE
+!   INTEGER ::ij,ip,ia
+!   SELECT CASE (hierarchy_closure)
+!     CASE('truncation')
+!     ! zero truncation, An+1=0 for n+1>nmax only
+!     CALL ghosts_upper_truncation
+!     CASE('max_degree')
+!     ! truncation at highest fully represented kinetic moment
+!     ! e.g. Dmax = 3 means
+!     ! only Napj s.t. p+2j <= 3 are evolved
+!     ! -> (p,j) allowed are (0,0),(1,0),(0,1),(2,0),(1,1),(3,0)
+!     ! =>> Dmax = min(Pmax,2*Jmax+1)
+!     j: DO ij = 1,local_nj+ngj
+!     p: DO ip = 1,local_np+ngp
+!       IF ( parray(ip)+2*jarray(ij) .GT. dmax) THEN
+!         moments(ia,ip,ij,:,:,:,updatetlevel) = 0._xp
+!       ENDIF
+!     ENDDO p
+!     ENDDO j
+!   END SELECT
+!   CALL ghosts_lower_truncation
+! END SUBROUTINE apply_closure_model
+
+! ! Positive Oob indices are approximated with a model
+! SUBROUTINE ghosts_upper_truncation
+!   USE prec_const, ONLY: xp
+!   USE grid,       ONLY: local_np,ngp,local_pmax, pmax,&
+!                         local_nj,ngj,local_jmax, jmax
+!   USE fields,           ONLY: moments
+!   USE time_integration, ONLY: updatetlevel
+!   IMPLICIT NONE
+!   INTEGER ::ig
+!   ! zero truncation, An+1=0 for n+1>nmax
+!     ! applies only for the processes that evolve the highest moment degree
+!     IF(local_jmax .GE. Jmax) THEN
+!       DO ig = 1,ngj/2
+!         moments(:,:,local_nj+ngj/2+ig,:,:,:,updatetlevel) = 0._xp
+!       ENDDO
+!     ENDIF
+!     ! applies only for the process that has largest p index
+!     IF(local_pmax .GE. Pmax) THEN
+!       DO ig = 1,ngp/2
+!         moments(:,local_np+ngp/2+ig,:,:,:,:,updatetlevel) = 0._xp
+!       ENDDO
+!     ENDIF
+! END SUBROUTINE ghosts_upper_truncation
+
+! ! Negative OoB indices are 0
+! SUBROUTINE ghosts_lower_truncation
+!   USE prec_const, ONLY: xp
+!   USE grid,       ONLY: ngp,ngj,local_pmin,local_jmin
+!   USE fields,           ONLY: moments
+!   USE time_integration, ONLY: updatetlevel
+!   IMPLICIT NONE
+!   INTEGER :: ig
+! ! zero truncation, An=0 for n<0
+!     IF(local_jmin .EQ. 0) THEN
+!       DO ig  = 1,ngj/2
+!         moments(:,:,ig,:,:,:,updatetlevel) = 0._xp
+!       ENDDO
+!     ENDIF
+!     ! applies only for the process that has lowest p index
+!     IF(local_pmin .EQ. 0) THEN
+!       DO ig  = 1,ngp/2
+!         moments(:,ig,:,:,:,:,updatetlevel) = 0._xp
+!       ENDDO
+!     ENDIF
+
+! END SUBROUTINE ghosts_lower_truncation
+
+
+SUBROUTINE closure_outputinputs(fid)
+  ! Write the input parameters to the results_xx.h5 file
+  USE futils, ONLY: attach, creatd
   IMPLICIT NONE
-
-! zero truncation, An=0 for n<0
-  ! Electrons
-  IF(KIN_E) THEN
-    ! applies only for the process that has lowest j index
-    IF(ijs_e .EQ. 1) THEN
-      DO ip = ipgs_e,ipge_e
-        moments_e(ip,ijs_e-1,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      ENDDO
-    ENDIF
-    ! applies only for the process that has lowest p index
-    IF(ips_e .EQ. 1) THEN
-      DO ij = ijgs_e,ijge_e
-        moments_e(ips_e-1,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-        IF(deltape .EQ. 1) THEN ! Must truncate the second stencil
-        moments_e(ips_e-2,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-        ENDIF
-      ENDDO
-    ENDIF
-  ENDIF
-
-  ! Ions
-  IF(ijs_i .EQ. 1) THEN
-    ! applies only for the process that has lowest j index
-    DO ip = ipgs_i,ipge_i
-      moments_i(ip,ijs_i-1,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-    ENDDO
-  ENDIF
-  ! applies only for the process that has lowest p index
-  IF(ips_i .EQ. 1) THEN
-    DO ij = ijgs_i,ijge_i
-      moments_i(ips_i-1,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      IF(deltape .EQ. 1) THEN ! Must truncate the second stencil
-      moments_i(ips_i-2,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) = 0._dp
-      ENDIF
-    ENDDO
-  ENDIF
-
-END SUBROUTINE ghosts_lower_truncation
+  INTEGER, INTENT(in) :: fid
+  CHARACTER(len=256)  :: str
+  WRITE(str,'(a)') '/data/input/closure'
+  CALL creatd(fid, 0,(/0/),TRIM(str),'Closure Input')
+  CALL attach(fid, TRIM(str),"hierarchy_closure",hierarchy_closure)
+  CALL attach(fid, TRIM(str),             "dmax",dmax)
+  CALL attach(fid, TRIM(str),"nonlinear_closure",nonlinear_closure)
+  CALL attach(fid, TRIM(str),             "nmax",nmax)
+END SUBROUTINE closure_outputinputs
 
 END module closure
diff --git a/src/coeff_mod.F90 b/src/coeff_mod.F90
index cfd322431a9f2d8b9a4a30dc9835ac9c880b2800..3c4c9125a19bfb7103e6050140f4850558bb715e 100644
--- a/src/coeff_mod.F90
+++ b/src/coeff_mod.F90
@@ -44,11 +44,11 @@ CONTAINS
       XOUT = TO_FM('0.0')
       !
       DO n1=0,n 
-         AUXN =Lpjl(REAL(m,dp)-0.5_dp,REAL(n,dp),REAL(n1,dp))
+         AUXN =Lpjl(REAL(m,xp)-0.5_xp,REAL(n,xp),REAL(n1,xp))
          DO k1=0,k
-            AUXK =Lpjl(-0.5_dp,REAL(k,dp),REAL(k1,dp))
+            AUXK =Lpjl(-0.5_xp,REAL(k,xp),REAL(k1,xp))
             DO s1=0,s
-               AUXS = Lpjl(-0.5_dp,REAL(s,dp),REAL(s1,dp))
+               AUXS = Lpjl(-0.5_xp,REAL(s,xp),REAL(s1,xp))
                XOUT = XOUT + FACTORIAL(TO_FM(n1 + k1 + s1 ))*AUXN*AUXK*AUXS 
             ENDDO
          ENDDO
@@ -75,7 +75,7 @@ CONTAINS
       IMPLICIT NONE
 
       !
-      REAL(dp), intent(in) :: p,j,l
+      REAL(xp), intent(in) :: p,j,l
       TYPE(FM) :: XOUT
       !
       CALL FM_ENTER_USER_FUNCTION(XOUT)
diff --git a/src/collision_mod.F90 b/src/collision_mod.F90
index 2209615acf896e4d903d8878f1742ffbd879c670..903e6ddbae0a9488e30bae562b6a6541f9389292 100644
--- a/src/collision_mod.F90
+++ b/src/collision_mod.F90
@@ -1,85 +1,92 @@
 module collision
-! contains the Hermite-Laguerre collision operators. Solved using COSOlver.
-USE array
-USE basic
-USE fields
-USE futils
-USE grid
-USE model
-USE prec_const
-USE time_integration
-USE utility
+! contains the Hermite-Laguerre collision operators without the use of COSOLVER
+USE prec_const, ONLY : xp
 IMPLICIT NONE
 PRIVATE
-! Set the collision model to use
-! (Lenard-Bernstein: 'LB', Dougherty: 'DG', Sugama: 'SG', Lorentz: 'LR', Landau: 'LD')
-CHARACTER(len=16),PUBLIC,PROTECTED :: collision_model
-LOGICAL, PUBLIC, PROTECTED :: gyrokin_CO   =.true. ! activates GK effects on CO
-LOGICAL, PUBLIC, PROTECTED :: interspecies =.true. ! activates interpecies collision
-CHARACTER(len=128), PUBLIC :: mat_file    ! COSOlver matrix file names
-REAL(dp), PUBLIC, PROTECTED :: collision_kcut = 100.0
-LOGICAL, PUBLIC, PROTECTED :: cosolver_coll ! check if cosolver matrices are used
-
+CHARACTER(len=32),    PUBLIC, PROTECTED :: collision_model      ! (Lenard-Bernstein: 'LB', Dougherty: 'DG', Sugama: 'SG', Lorentz: 'LR', Landau: 'LD')
+LOGICAL,              PUBLIC, PROTECTED :: GK_CO        =.true. ! activates GK effects on CO
+LOGICAL,              PUBLIC, PROTECTED :: INTERSPECIES =.true. ! activates interpecies collision
+CHARACTER(len=128),   PUBLIC, PROTECTED :: mat_file    ! COSOlver matrix file names
+REAL(xp),             PUBLIC, PROTECTED :: collision_kcut = 100.0
+LOGICAL,              PUBLIC, PROTECTED :: cosolver_coll ! check if cosolver matrices are used
+
+PUBLIC :: init_collision
 PUBLIC :: collision_readinputs, coll_outputinputs
-PUBLIC :: compute_TColl
+PUBLIC :: compute_Capj
 PUBLIC :: compute_lenard_bernstein, compute_dougherty
-PUBLIC :: load_COSOlver_mat, compute_cosolver_coll
-PUBLIC :: apply_COSOlver_mat_e, apply_COSOlver_mat_i
 
 CONTAINS
+  SUBROUTINE init_collision
+    USE cosolver_interface, ONLY: load_COSOlver_mat
+    IMPLICIT NONE
+    ! Load the COSOlver collision operator coefficients
+    IF(cosolver_coll) &
+    CALL load_COSOlver_mat(GK_CO,INTERSPECIES,mat_file,collision_kcut)
+  END SUBROUTINE init_collision
 
   SUBROUTINE collision_readinputs
     ! Read the input parameters
+    USE basic, ONLY: lu_in
     IMPLICIT NONE
-    NAMELIST /COLLISION_PAR/ collision_model, gyrokin_CO, interspecies, mat_file, collision_kcut
+    NAMELIST /COLLISION_PAR/ collision_model, GK_CO, INTERSPECIES, mat_file, collision_kcut
     READ(lu_in,collision_par)
     SELECT CASE(collision_model)
-      CASE ('LB') ! Lenhard bernstein
-        cosolver_coll = .false.
-        interspecies  = .false.
-      CASE ('DG') ! Dougherty
-        cosolver_coll = .false.
-        interspecies  = .false.
-      CASE ('SG') ! Sugama
-        cosolver_coll = .true.
-      CASE ('LR') ! Lorentz (Pitch angle)
-        cosolver_coll = .true.
-        interspecies  = .false.
-      CASE ('LD') ! Landau
-        cosolver_coll = .true.
+      ! Lenhard bernstein
+      CASE   ('LB','lenhard-bernstein','Lenhard-Bernstein')
+        collision_model = 'LB'
+        cosolver_coll   = .false.
+        INTERSPECIES    = .false.
+      ! Dougherty
+      CASE ('DG','dougherty','Dougherty')
+        collision_model = 'DG'
+        cosolver_coll   = .false.
+        INTERSPECIES    = .false.
+      ! Sugama
+      CASE ('SG','sugama','Sugama')
+        collision_model = 'SG'
+        cosolver_coll   = .true.
+      ! Lorentz (Pitch angle)
+      CASE ('LR','lorentz','Lorentz','PA','pitch-angle')
+        collision_model = 'LR'
+        cosolver_coll   = .true.
+        INTERSPECIES    = .false.
+      ! Landau named also Coulomb or Fokker-Planck
+      CASE ('LD','landau','Landau','CL','coulomb','Coulomb','FP','fokker-planck','Fokker-Planck')
+        collision_model = 'LD'
+        cosolver_coll   = .true.
       CASE ('none','hypcoll','dvpar4')
-        cosolver_coll = .false.
-        interspecies  = .false.
+        collision_model = 'NO'
+        cosolver_coll   = .false.
+        INTERSPECIES    = .false.
       CASE DEFAULT
         ERROR STOP '>> ERROR << collision model not recognized!!'
     END SELECT
 
   END SUBROUTINE collision_readinputs
 
-  SUBROUTINE coll_outputinputs(fidres, str)
+  SUBROUTINE coll_outputinputs(fidres)
     !    Write the input parameters to the results_xx.h5 file
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
     INTEGER, INTENT(in) :: fidres
-    CHARACTER(len=256), INTENT(in) :: str
-    CHARACTER(len=2) :: gkco = 'dk'
-    CHARACTER(len=2) :: abco = 'aa'
-    CHARACTER(len=6) :: coname
-
-    IF (gyrokin_CO)   gkco = 'GK'
-    IF (interspecies) abco = 'ab'
+    CHARACTER(len=256) :: str
+    CHARACTER(len=2)   :: gkco = 'DK'
+    CHARACTER(len=2)   :: abco = 'aa'
+    CHARACTER(len=6)   :: coname
+    IF (GK_CO)   gkco = 'GK'
+    IF (INTERSPECIES) abco = 'ab'
     WRITE(coname,'(a2,a2,a2)') collision_model,gkco,abco
-
-    CALL attach(fidres, TRIM(str),   "CO", coname)
-    CALL attach(fidres, TRIM(str), "matfilename",    mat_file)
+    WRITE(str,'(a)') '/data/input/coll'
+    CALL creatd(fidres, 0,(/0/),TRIM(str),'Collision Input')
+    CALL attach(fidres, TRIM(str),         "CO", coname)
+    CALL attach(fidres, TRIM(str), "matfilename",mat_file)
   END SUBROUTINE coll_outputinputs
 
-  SUBROUTINE compute_TColl
-    USE basic
-    USE model, ONLY : nu
+  SUBROUTINE compute_Capj
+    USE array, ONLY: Capj
+    USE model, ONLY: nu
+    USE cosolver_interface, ONLY: compute_cosolver_coll
     IMPLICIT NONE
-    ! Execution time start
-    CALL cpu_time(t0_coll)
-
     IF (nu .NE. 0) THEN
       SELECT CASE(collision_model)
         CASE ('LB')
@@ -87,716 +94,229 @@ CONTAINS
         CASE ('DG')
           CALL compute_dougherty
         CASE ('SG','LR','LD')
-          CALL compute_cosolver_coll
+          CALL compute_cosolver_coll(GK_CO)
         CASE ('none','hypcoll','dvpar4')
-          IF(KIN_E) &
-          TColl_e = 0._dp
-          TColl_i = 0._dp
+          Capj = 0._xp
         CASE DEFAULT
           ERROR STOP '>> ERROR << collision operator not recognized!!'
       END SELECT
     ELSE
-      IF(KIN_E) &
-      TColl_e = 0._dp
-      TColl_i = 0._dp
+      Capj = 0._xp
     ENDIF
-
-    ! Execution time end
-    CALL cpu_time(t1_coll)
-    tc_coll = tc_coll + (t1_coll - t0_coll)
-  END SUBROUTINE compute_TColl
+  END SUBROUTINE compute_Capj
 
   !******************************************************************************!
   !! Lenard Bernstein collision operator
   !******************************************************************************!
   SUBROUTINE compute_lenard_bernstein
+    USE grid, ONLY: ias,iae, ips,ipe, ijs,ije, parray, jarray, &
+                    ikys,ikye, ikxs,ikxe, izs,ize, kparray
+    USE species,          ONLY: sigma2_tau_o2, nu_ab
+    USE time_integration, ONLY: updatetlevel
+    USE array,            ONLY: Capj
+    USE fields,           ONLY: moments
     IMPLICIT NONE
-    COMPLEX(dp) :: TColl_
-    IF (KIN_E) THEN
-      DO ip = ips_e,ipe_e;DO ij = ijs_e,ije_e
-      DO ikx = ikxs, ikxe;DO iky = ikys, ikye; DO iz = izs,ize
-                CALL LenardBernstein_e(ip,ij,iky,ikx,iz,TColl_)
-                TColl_e(ip,ij,iky,ikx,iz) = TColl_
-      ENDDO;ENDDO;ENDDO
-      ENDDO;ENDDO
-    ENDIF
-    DO ip = ips_i,ipe_i;DO ij = ijs_i,ije_i
-    DO ikx = ikxs, ikxe;DO iky = ikys, ikye; DO iz = izs,ize
-        CALL LenardBernstein_i(ip,ij,iky,ikx,iz,TColl_)
-        TColl_i(ip,ij,iky,ikx,iz) = TColl_
-    ENDDO;ENDDO;ENDDO
-    ENDDO;ENDDO
-  END SUBROUTINE compute_lenard_bernstein
-
-  !******************************************************************************!
-  !! for electrons
-  !******************************************************************************!
-  SUBROUTINE LenardBernstein_e(ip_,ij_,iky_,ikx_,iz_,TColl_)
-    IMPLICIT NONE
-    INTEGER,     INTENT(IN)    :: ip_,ij_,iky_,ikx_,iz_
-    COMPLEX(dp), INTENT(OUT)   :: TColl_
-
-    REAL(dp)    :: j_dp, p_dp, be_2, kp
-    INTEGER     :: eo_
-
-    !** Auxiliary variables **
-    eo_       = MODULO(parray_e(ip_),2)
-    p_dp      = REAL(parray_e(ip_),dp)
-    j_dp      = REAL(jarray_e(ij_),dp)
-    kp        = kparray(iky_,ikx_,iz_,eo_)
-    be_2      = kp**2 * sigmae2_taue_o2 ! this is (be/2)^2
-    eo_       = MODULO(parray_e(ip_),2)
-
-    !** Assembling collison operator **
-    ! -nuee (p + 2j) Nepj
-    TColl_ = -nu_ee * (p_dp + 2._dp*j_dp)*moments_e(ip_,ij_,iky_,ikx_,iz_,updatetlevel)
-    IF(gyrokin_CO) THEN
-      TColl_ = TColl_ - nu_ee *2._dp*be_2*moments_e(ip_,ij_,iky_,ikx_,iz_,updatetlevel)
-    ENDIF
-  END SUBROUTINE LenardBernstein_e
-
-    !******************************************************************************!
-    !! for ions
-    !******************************************************************************!
-    SUBROUTINE LenardBernstein_i(ip_,ij_,iky_,ikx_,iz_,TColl_)
-      USE fields, ONLY: moments_i
-      USE grid,   ONLY: parray_i, jarray_i
-      USE basic
-      USE model,  ONLY: sigmai2_taui_o2, nu_i
-      USE time_integration, ONLY : updatetlevel
-      IMPLICIT NONE
-      INTEGER,     INTENT(IN)    :: ip_,ij_,iky_,ikx_,iz_
-      COMPLEX(dp), INTENT(OUT)   :: TColl_
-
-      REAL(dp)    :: j_dp, p_dp, kp, bi_2
-      INTEGER     :: eo_
-
+    COMPLEX(xp) :: TColl_
+    REAL(xp)    :: j_xp, p_xp, ba_2, kp
+    INTEGER     :: iz,ikx,iky,ij,ip,ia,eo
+    DO iz = izs,ize
+    DO ikx = ikxs, ikxe;
+    DO iky = ikys, ikye;
+    DO ij = ijs,ije
+    DO ip = ips,ipe;
+    DO ia = ias, iae
       !** Auxiliary variables **
-      eo_       = MODULO(parray_i(ip_),2)
-      p_dp      = REAL(parray_i(ip_),dp)
-      j_dp      = REAL(jarray_i(ij_),dp)
-      kp        = kparray(iky_,ikx_,iz_,eo_)
-      bi_2      = kp**2 * sigmai2_taui_o2 ! this is (be/2)^2
+      eo   = MODULO(parray(ip),2)
+      p_xp = REAL(parray(ip),xp)
+      j_xp = REAL(jarray(ij),xp)
+      kp   = kparray(iky,ikx,iz,eo)
+      ba_2 = kp**2 * sigma2_tau_o2(ia) ! this is (ba/2)^2
 
       !** Assembling collison operator **
-      ! -nuii (p + 2j) Nipj
-      TColl_ = -nu_i * (p_dp + 2._dp*j_dp)*moments_i(ip_,ij_,iky_,ikx_,iz_,updatetlevel)
-      IF(gyrokin_CO) THEN
-        TColl_ = TColl_ - nu_i *2._dp*bi_2*moments_i(ip_,ij_,iky_,ikx_,iz_,updatetlevel)
+      ! -nuee (p + 2j) Nepj
+      TColl_ = -nu_ab(ia,ia) * (p_xp + 2._xp*j_xp)*moments(ia,ip,ij,iky,ikx,iz,updatetlevel)
+      IF(GK_CO) THEN
+        TColl_ = TColl_ - nu_ab(ia,ia) *2._xp*ba_2*moments(ia,ip,ij,iky,ikx,iz,updatetlevel)
       ENDIF
-    END SUBROUTINE LenardBernstein_i
+      Capj(ia,ip,ij,iky,ikx,iz) = TColl_
+    ENDDO
+    ENDDO
+    ENDDO
+    ENDDO
+    ENDDO
+    ENDDO
+    END SUBROUTINE compute_lenard_bernstein
 
   !******************************************************************************!
   !! Doughtery collision operator
   !******************************************************************************!
   SUBROUTINE compute_dougherty
     IMPLICIT NONE
-    IF(gyrokin_CO) THEN
-      if(KIN_E) &
-      CALL DoughertyGK_aa(ips_e,ipe_e,ijs_e,ije_e,ijgs_e,ijge_e,ip0_e,ip1_e,ip2_e,Jmaxe,&
-                parray_e(ips_e:ipe_e),jarray_e(ijs_e:ije_e),&
-                kernel_e(ijgs_e:ijge_e,ikys:ikye,ikxs:ikxe,izs:ize,0:1),&
-                nadiab_moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize),&
-                nu_ee,sigmae2_taue_o2,sqrt_sigmae2_taue_o2,TColl_e)
-      CALL DoughertyGK_aa(ips_i,ipe_i,ijs_i,ije_i,ijgs_i,ijge_i,ip0_i,ip1_i,ip2_i,Jmaxi,&
-                parray_i(ips_i:ipe_i),jarray_i(ijs_i:ije_i),&
-                kernel_i(ijgs_i:ijge_i,ikys:ikye,ikxs:ikxe,izs:ize,0:1),&
-                nadiab_moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize),&
-                nu_i,sigmai2_taui_o2,sqrt_sigmai2_taui_o2,TColl_i)
+    IF(GK_CO) THEN
+      CALL Dougherty_GK
     ELSE
-      if(KIN_E) &
-      CALL DoughertyDK_aa(ips_e,ipe_e,ijs_e,ije_e,ip0_e,ip1_e,ip2_e,&
-                parray_e(ips_e:ipe_e),jarray_e(ijs_e:ije_e),&
-                moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel),&
-                nu_ee,TColl_e)
-      CALL DoughertyDK_aa(ips_i,ipe_i,ijs_i,ije_i,ip0_i,ip1_i,ip2_i,&
-                parray_i(ips_i:ipe_i),jarray_i(ijs_i:ije_i),&
-                moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel),&
-                nu_i,TColl_i)
+      CALL Dougherty_DK
     ENDIF
   END SUBROUTINE compute_dougherty
+
   !******************************************************************************!
-  !! Doughtery driftkinetic collision operator (like-species)
+  !! Doughtery drift-kinetic collision operator (species like)
   !******************************************************************************!
-  SUBROUTINE DoughertyDK_aa(ips_,ipe_,ijs_,ije_,ip0_,ip1_,ip2_,&
-                            parray_,jarray_,moments_,nu_,Tcoll_)
-
+  SUBROUTINE Dougherty_DK
+    USE grid, ONLY: local_na, local_np, local_nj, parray, jarray, ngp, ngj, &
+                    ip0, ip1, ip2, ij0, ij1, &
+                    local_nky, local_nkx, local_nz, ngz
+    USE species,          ONLY: nu_ab
+    USE time_integration, ONLY: updatetlevel
+    USE array,            ONLY: Capj
+    USE fields,           ONLY: moments
+    USE prec_const,       ONLY: xp, SQRT2, twothird
     IMPLICIT NONE
-    !! INPUTS
-    INTEGER, INTENT(IN) :: ips_, ipe_, ijs_, ije_
-    INTEGER, INTENT(IN) :: ip0_, ip1_, ip2_
-    INTEGER,  DIMENSION(ips_:ipe_), INTENT(IN) :: parray_
-    INTEGER,  DIMENSION(ijs_:ije_), INTENT(IN) :: jarray_
-    COMPLEX(dp), DIMENSION(ips_:ipe_,ijs_:ije_,ikys:ikye,ikxs:ikxe,izs:ize),INTENT(IN) :: moments_
-    REAL(dp), INTENT(IN) :: nu_
-    !! OUTPUT
-    COMPLEX(dp), DIMENSION(ips_:ipe_, ijs_:ije_,ikys:ikye,ikxs:ikxe, izs:ize), INTENT(OUT) :: TColl_
-    !! Local variables
-    REAL(dp)    :: j_dp, p_dp
-    COMPLEX(dp) :: Tmp
-    DO iz = izs,ize
-      DO iky = ikys, ikye;
-        DO ikx = ikxs, ikxe;
-          DO ij = ijs_,ije_
-            DO ip = ips_,ipe_;
-              !** Auxiliary variables **
-              p_dp      = REAL(parray_(ip),dp)
-              j_dp      = REAL(jarray_(ij),dp)
-              !** Assembling collison operator **
-              Tmp = -(p_dp + 2._dp*j_dp)*moments_(ip,ij,iky,ikx,iz)
-              IF( (p_dp .EQ. 1._dp) .AND. (j_dp .EQ. 0._dp)) THEN !Ce10
-                Tmp = Tmp +  moments_(ip1_,1,iky,ikx,iz)
-              ELSEIF( (p_dp .EQ. 2._dp) .AND. (j_dp .EQ. 0._dp)) THEN ! Ce20
-                Tmp = Tmp +       twothird*moments_(ip2_,1,iky,ikx,iz) &
-                          - SQRT2*twothird*moments_(ip0_,2,iky,ikx,iz)
-              ELSEIF( (p_dp .EQ. 0._dp) .AND. (j_dp .EQ. 1._dp)) THEN ! Ce01
-                Tmp = Tmp +  2._dp*twothird*moments_(ip0_,2,iky,ikx,iz) &
-                           - SQRT2*twothird*moments_(ip2_,1,iky,ikx,iz)
-              ENDIF
-              TColl_(ip,ij,iky,ikx,iz) = nu_ * Tmp
-    ENDDO;ENDDO;ENDDO
-    ENDDO;ENDDO
-  END SUBROUTINE DoughertyDK_aa
-
+    COMPLEX(xp) :: Tmp
+    INTEGER     :: iz,ikx,iky,ij,ip,ia, ipi,iji,izi
+    REAL(xp)    :: j_xp, p_xp
+    DO iz = 1,local_nz
+      izi = iz + ngz/2
+      DO ikx = 1,local_nkx
+        DO iky = 1,local_nky 
+          DO ij = 1,local_nj
+            iji = ij + ngj/2 
+            DO ip = 1,local_np
+              ipi = ip + ngp/2
+              DO ia = 1,local_na
+                !** Auxiliary variables **
+                p_xp      = REAL(parray(ipi),xp)
+                j_xp      = REAL(jarray(iji),xp)
+                !** Assembling collison operator **
+                Tmp = -(p_xp + 2._xp*j_xp)*moments(ia,ipi,iji,iky,ikx,izi,updatetlevel)
+                IF( (p_xp .EQ. 1._xp) .AND. (j_xp .EQ. 0._xp)) THEN !Ce10
+                  Tmp = Tmp +  moments(ia,ip1,ij1,iky,ikx,iz,updatetlevel)
+                ELSEIF( (p_xp .EQ. 2._xp) .AND. (j_xp .EQ. 0._xp)) THEN ! Ce20
+                  Tmp = Tmp +       twothird*moments(ia,ip2,ij0,iky,ikx,izi,updatetlevel) &
+                            - SQRT2*twothird*moments(ia,ip0,ij1,iky,ikx,izi,updatetlevel)
+                ELSEIF( (p_xp .EQ. 0._xp) .AND. (j_xp .EQ. 1._xp)) THEN ! Ce01
+                  Tmp = Tmp +  2._xp*twothird*moments(ia,ip0,ij1,iky,ikx,izi,updatetlevel) &
+                              -SQRT2*twothird*moments(ia,ip2,ij0,iky,ikx,izi,updatetlevel)
+                ENDIF
+                Capj(ia,ip,ij,iky,ikx,iz) = nu_ab(ia,ia) * Tmp
+              ENDDO
+            ENDDO
+          ENDDO
+        ENDDO
+      ENDDO
+    ENDDO
+  END SUBROUTINE Dougherty_DK
   !******************************************************************************!
   !! Doughtery gyrokinetic collision operator (species like)
   !******************************************************************************!
-  SUBROUTINE DoughertyGK_aa(ips_,ipe_,ijs_,ije_,ijgs_,ijge_,ip0_,ip1_,ip2_,jmax_,&
-                            parray_,jarray_,kernel_,nadiab_moments_,&
-                            nu_,sigmaa2_taua_o2, sqrt_sigmaa2_taua_o2,Tcoll_)
+  SUBROUTINE Dougherty_GK
+    USE grid, ONLY: local_na, local_np, local_nj, parray, jarray, ngp, ngj, &
+                    ip0, ip1, ip2, &
+                    local_nky, local_nkx, local_nz, ngz, kparray, nzgrid
+    USE species,          ONLY: sigma2_tau_o2, sqrt_sigma2_tau_o2, nu_ab
+    USE array,            ONLY: Capj, nadiab_moments, kernel
+    USE prec_const,       ONLY: xp, SQRT2, twothird
     IMPLICIT NONE
-    !! INPUTS
-    INTEGER,     INTENT(IN) :: ips_, ipe_, ijs_, ije_, ijgs_, ijge_
-    INTEGER,     INTENT(IN) :: ip0_, ip1_, ip2_,jmax_
-    INTEGER,     DIMENSION(ips_:ipe_), INTENT(IN) :: parray_
-    INTEGER,     DIMENSION(ijs_:ije_), INTENT(IN) :: jarray_
-    REAL(dp),    DIMENSION(ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izs:ize,0:1),  INTENT(IN) :: kernel_
-    COMPLEX(dp), DIMENSION(ips_:ipe_,ijs_:ije_,ikys:ikye,ikxs:ikxe,izs:ize),INTENT(IN) :: nadiab_moments_
-    REAL(dp),    INTENT(IN) :: nu_, sigmaa2_taua_o2, sqrt_sigmaa2_taua_o2
-    !! OUTPUT
-    COMPLEX(dp), DIMENSION(ips_:ipe_,ijs_:ije_,ikys:ikye,ikxs:ikxe,izs:ize), INTENT(OUT) :: TColl_
     !! Local variables
-    COMPLEX(dp) :: dens,upar,uperp,Tpar, Tperp, Temp
-    COMPLEX(dp) :: nadiab_moment_0j, Tmp
-    REAL(dp)    :: Knp0, Knp1, Knm1, kp
-    INTEGER     :: in, eo
-    REAL(dp)    :: n_dp, j_dp, p_dp, ba, ba_2
-    DO iz = izs,ize
-    DO iky = ikys, ikye;
-    DO ikx = ikxs, ikxe;
-    DO ij = ijs_,ije_
-    DO ip = ips_,ipe_;
+    COMPLEX(xp) :: dens_,upar_,uperp_,Tpar_,Tperp_,Temp_
+    COMPLEX(xp) :: nadiab_moment_0j, Tmp
+    REAL(xp)    :: Knp0, Knp1, Knm1, kp
+    REAL(xp)    :: n_xp, j_xp, p_xp, ba, ba_2
+    INTEGER     :: iz,ikx,iky,ij,ip,ia,eo,in, ipi,iji,izi,ini
+    DO iz = 1,local_nz
+      izi = iz + ngz/2
+      DO ikx = 1,local_nkx
+        DO iky = 1,local_nky 
+          DO ij = 1,local_nj
+            iji = ij + ngj/2 
+            DO ip = 1,local_np
+              ipi = ip + ngp/2
+              DO ia = 1,local_na
     !** Auxiliary variables **
-    p_dp      = REAL(parray_(ip),dp)
-    eo        = MODULO(parray_(ip),2)
-    j_dp      = REAL(jarray_(ij),dp)
-    kp        = kparray(iky,ikx,iz,eo)
-    ba_2      = kp**2 * sigmaa2_taua_o2 ! this is (l_a/2)^2
-    ba        = 2_dp*kp * sqrt_sigmaa2_taua_o2  ! this is l_a
-
+    p_xp      = REAL(parray(ipi),xp)
+    j_xp      = REAL(jarray(iji),xp)
+    eo        = MIN(nzgrid,MODULO(parray(ipi),2)+1)
+    kp        = kparray(iky,ikx,izi,eo)
+    ba_2      = kp**2 * sigma2_tau_o2(ia) ! this is (l_a/2)^2
+    ba        = 2_xp*kp * sqrt_sigma2_tau_o2(ia)  ! this is l_a
     !** Assembling collison operator **
     ! Velocity-space diffusion (similar to Lenard Bernstein)
     ! -nu (p + 2j + b^2/2) Napj
-    Tmp = -(p_dp + 2._dp*j_dp + 2._dp*ba_2)*nadiab_moments_(ip,ij,iky,ikx,iz)
-
+    Tmp = -(p_xp + 2._xp*j_xp + 2._xp*ba_2)*nadiab_moments(ia,ipi,iji,iky,ikx,izi)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 0 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    IF( p_dp .EQ. 0 ) THEN ! Kronecker p0
+    IF( p_xp .EQ. 0 ) THEN ! Kronecker p0
         !** build required fluid moments **
-        dens  = 0._dp
-        upar  = 0._dp; uperp = 0._dp
-        Tpar  = 0._dp; Tperp = 0._dp
-        DO in = 1,jmax_+1
-          n_dp = REAL(in-1,dp)
+        dens_  = 0._xp
+        upar_  = 0._xp; uperp_ = 0._xp
+        Tpar_  = 0._xp; Tperp_ = 0._xp
+        DO in = 1,local_nj
+          ini = in + ngj/2
+          n_xp = REAL(jarray(ini),xp)
           ! Store the kernels for sparing readings
-          Knp0 =  Kernel_(in  ,iky,ikx,iz,eo)
-          Knp1 =  Kernel_(in+1,iky,ikx,iz,eo)
-          Knm1 =  Kernel_(in-1,iky,ikx,iz,eo)
+          Knp0 =  kernel(ia,ini  ,iky,ikx,izi,eo)
+          Knp1 =  kernel(ia,ini+1,iky,ikx,izi,eo)
+          Knm1 =  kernel(ia,ini-1,iky,ikx,izi,eo)
           ! Nonadiabatic moments (only different from moments when p=0)
-          nadiab_moment_0j   = nadiab_moments_(ip0_,in,iky,ikx,iz)
+          nadiab_moment_0j   = nadiab_moments(ia,ip0,ini,iky,ikx,izi)
           ! Density
-          dens  = dens  + Knp0 * nadiab_moment_0j
+          dens_  = dens_  + Knp0 * nadiab_moment_0j
           ! Perpendicular velocity
-          uperp = uperp + ba*0.5_dp*(Knp0 - Knm1) * nadiab_moment_0j
+          uperp_ = uperp_ + ba*0.5_xp*(Knp0 - Knm1) * nadiab_moment_0j
           ! Parallel temperature
-          Tpar  = Tpar  + Knp0 * (SQRT2*nadiab_moments_(ip2_,in,iky,ikx,iz) + nadiab_moment_0j)
+          Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,ini,iky,ikx,izi) + nadiab_moment_0j)
           ! Perpendicular temperature
-          Tperp = Tperp + ((2._dp*n_dp+1._dp)*Knp0 - (n_dp+1._dp)*Knp1 - n_dp*Knm1)*nadiab_moment_0j
+          Tperp_ = Tperp_ + ((2._xp*n_xp+1._xp)*Knp0 - (n_xp+1._xp)*Knp1 - n_xp*Knm1)*nadiab_moment_0j
         ENDDO
-      Temp  = (Tpar + 2._dp*Tperp)/3._dp - dens
+      Temp_  = (Tpar_ + 2._xp*Tperp_)/3._xp - dens_
       ! Add energy restoring term
-      Tmp = Tmp + Temp* 4._dp *  j_dp          * Kernel_(ij  ,iky,ikx,iz,eo)
-      Tmp = Tmp - Temp* 2._dp * (j_dp + 1._dp) * Kernel_(ij+1,iky,ikx,iz,eo)
-      Tmp = Tmp - Temp* 2._dp *  j_dp          * Kernel_(ij-1,iky,ikx,iz,eo)
-      Tmp = Tmp + uperp*ba* (Kernel_(ij,iky,ikx,iz,eo) - Kernel_(ij-1,iky,ikx,iz,eo))
+      Tmp = Tmp + Temp_* 4._xp *  j_xp          * kernel(ia,iji  ,iky,ikx,izi,eo)
+      Tmp = Tmp - Temp_* 2._xp * (j_xp + 1._xp) * kernel(ia,iji+1,iky,ikx,izi,eo)
+      Tmp = Tmp - Temp_* 2._xp *  j_xp          * kernel(ia,iji-1,iky,ikx,izi,eo)
+      Tmp = Tmp + uperp_*ba* (kernel(ia,iji,iky,ikx,izi,eo) - kernel(ia,iji-1,iky,ikx,izi,eo))
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    ELSEIF( p_dp .eq. 1 ) THEN ! kronecker p1
+    ELSEIF( p_xp .eq. 1 ) THEN ! kronecker p1
       !** build required fluid moments **
-      upar  = 0._dp
-      DO in = 1,jmax_+1
+      upar_  = 0._xp
+      DO in = 1,local_nj
+        ini = in + ngj/2
+        n_xp = REAL(jarray(ini),xp)
         ! Parallel velocity
-        upar  = upar  + Kernel_(in,iky,ikx,iz,eo) * nadiab_moments_(ip1_,in,iky,ikx,iz)
+        upar_  = upar_  + kernel(ia,ini,iky,ikx,izi,eo) * nadiab_moments(ia,ip1,ini,iky,ikx,izi)
       ENDDO
-      Tmp = Tmp + upar*Kernel_(ij,iky,ikx,iz,eo)
+      Tmp = Tmp + upar_*kernel(ia,iji,iky,ikx,izi,eo)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Non zero term for p = 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    ELSEIF( p_dp .eq. 2 ) THEN ! kronecker p2
+    ELSEIF( p_xp .eq. 2 ) THEN ! kronecker p2
       !** build required fluid moments **
-      dens  = 0._dp
-      upar  = 0._dp; uperp = 0._dp
-      Tpar  = 0._dp; Tperp = 0._dp
-      DO in = 1,jmax_+1
-        n_dp = REAL(in-1,dp)
+      dens_  = 0._xp
+      upar_  = 0._xp; uperp_ = 0._xp
+      Tpar_  = 0._xp; Tperp_ = 0._xp
+      DO in = 1,local_nj
+        ini = in + ngj/2
+        n_xp = REAL(jarray(ini),xp)
         ! Store the kernels for sparing readings
-        Knp0 =  Kernel_(in  ,iky,ikx,iz,eo)
-        Knp1 =  Kernel_(in+1,iky,ikx,iz,eo)
-        Knm1 =  Kernel_(in-1,iky,ikx,iz,eo)
+        Knp0 =  kernel(ia,ini  ,iky,ikx,izi,eo)
+        Knp1 =  kernel(ia,ini+1,iky,ikx,izi,eo)
+        Knm1 =  kernel(ia,ini-1,iky,ikx,izi,eo)
         ! Nonadiabatic moments (only different from moments when p=0)
-        nadiab_moment_0j = nadiab_moments_(ip0_,in,iky,ikx,iz)
+        nadiab_moment_0j = nadiab_moments(ia,ip0,ini,iky,ikx,izi)
         ! Density
-        dens     = dens     + Knp0 * nadiab_moment_0j
+        dens_     = dens_     + Knp0 * nadiab_moment_0j
         ! Parallel temperature
-        Tpar  = Tpar  + Knp0 * (SQRT2*nadiab_moments_(ip2_,in,iky,ikx,iz) + nadiab_moment_0j)
+        Tpar_  = Tpar_  + Knp0 * (SQRT2*nadiab_moments(ia,ip2,ini,iky,ikx,izi) + nadiab_moment_0j)
         ! Perpendicular temperature
-        Tperp = Tperp + ((2._dp*n_dp+1._dp)*Knp0 - (n_dp+1._dp)*Knp1 - n_dp*Knm1)*nadiab_moment_0j
+        Tperp_ = Tperp_ + ((2._xp*n_xp+1._xp)*Knp0 - (n_xp+1._xp)*Knp1 - n_xp*Knm1)*nadiab_moment_0j
       ENDDO
-      Temp  = (Tpar + 2._dp*Tperp)/3._dp - dens
-      Tmp = Tmp + Temp*SQRT2*Kernel_(ij,iky,ikx,iz,eo)
+      Temp_  = (Tpar_ + 2._xp*Tperp_)/3._xp - dens_
+      Tmp = Tmp + Temp_*SQRT2*kernel(ia,iji,iky,ikx,izi,eo)
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
     ENDIF
     ! Multiply by collision parameter
-    TColl_(ip,ij,iky,ikx,iz) = nu_ * Tmp
+    Capj(ia,ip,ij,iky,ikx,iz) = nu_ab(ia,ia) * Tmp
     ENDDO;ENDDO;ENDDO
     ENDDO;ENDDO
-  END SUBROUTINE DoughertyGK_aa
-  !******************************************************************************!
-  !! compute the collision terms in a (Np x Nj x Nkx x Nky) matrix all at once
-  !******************************************************************************!
-  SUBROUTINE compute_cosolver_coll
-    IMPLICIT NONE
-    COMPLEX(dp), DIMENSION(1:total_np_e)   :: local_sum_e, buffer_e
-    COMPLEX(dp), DIMENSION(ips_e:ipe_e) :: TColl_distr_e
-    COMPLEX(dp), DIMENSION(1:total_np_i)   :: local_sum_i, buffer_i
-    COMPLEX(dp), DIMENSION(ips_i:ipe_i) :: TColl_distr_i
-    COMPLEX(dp) :: TColl
-    DO iz = izs,ize
-      DO ikx = ikxs,ikxe
-        DO iky = ikys,ikye
-          IF(KIN_E) THEN
-            DO ij = 1,Jmaxe+1
-              ! Electrons
-                ! Loop over all p to compute sub collision term
-                DO ip = 1,total_np_e
-                  CALL apply_COSOlver_mat_e(ip,ij,iky,ikx,iz,TColl)
-                  local_sum_e(ip) = TColl
-                ENDDO
-                IF (num_procs_p .GT. 1) THEN
-                  ! Sum up all the sub collision terms on root 0
-                  CALL MPI_REDUCE(local_sum_e, buffer_e, total_np_e, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, comm_p, ierr)
-                  ! distribute the sum over the process among p
-                  CALL MPI_SCATTERV(buffer_e, rcv_p_e, dsp_p_e, MPI_DOUBLE_COMPLEX,&
-                                    TColl_distr_e, local_np_e, MPI_DOUBLE_COMPLEX,&
-                                    0, comm_p, ierr)
-                ELSE
-                  TColl_distr_e = local_sum_e
-                ENDIF
-                ! Write in output variable
-                DO ip = ips_e,ipe_e
-                  TColl_e(ip,ij,iky,ikx,iz) = TColl_distr_e(ip)
-                ENDDO
-            ENDDO
-          ENDIF
-          ! Ions
-          DO ij = 1,Jmaxi+1
-            DO ip = 1,total_np_i
-              CALL apply_COSOlver_mat_i(ip,ij,iky,ikx,iz,TColl)
-              local_sum_i(ip) = TColl
-            ENDDO
-            IF (num_procs_p .GT. 1) THEN
-              ! Reduce the local_sums to root = 0
-              CALL MPI_REDUCE(local_sum_i, buffer_i, total_np_i, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, comm_p, ierr)
-              ! buffer contains the entire collision term along p, we scatter it between
-              ! the other processes (use of scatterv since Pmax/Np is not an integer)
-              CALL MPI_SCATTERV(buffer_i, rcv_p_i, dsp_p_i, MPI_DOUBLE_COMPLEX,&
-                                TColl_distr_i, local_np_i, MPI_DOUBLE_COMPLEX, &
-                                0, comm_p, ierr)
-            ELSE
-              TColl_distr_i = local_sum_i
-            ENDIF
-            ! Write in output variable
-            DO ip = ips_i,ipe_i
-              TColl_i(ip,ij,iky,ikx,iz) = TColl_distr_i(ip)
-            ENDDO
-          ENDDO
-        ENDDO
-      ENDDO
     ENDDO
-  END SUBROUTINE compute_cosolver_coll
-
-  !******************************************************************************!
-  !!!!!!! Compute electron collision term
-  !******************************************************************************!
-  SUBROUTINE apply_COSOlver_mat_e(ip_,ij_,iky_,ikx_,iz_,TColl_)
-    IMPLICIT NONE
-
-    INTEGER,     INTENT(IN)  :: ip_, ij_ ,ikx_, iky_, iz_
-    COMPLEX(dp), INTENT(OUT) :: TColl_
-
-    INTEGER     :: ip2,ij2, p_int,j_int, p2_int,j2_int, iky_C, ikx_C, iz_C
-    p_int = parray_e_full(ip_); j_int = jarray_e_full(ij_);
-
-    IF (gyrokin_CO) THEN ! GK operator (k-dependant)
-      ikx_C = ikx_; iky_C = iky_; iz_C = iz_
-    ELSE ! DK operator (only one mat for every k)
-      ikx_C = 1;   iky_C = 1; iz_C = 1;
-    ENDIF
-
-    TColl_ = 0._dp ! Initialization of the local sum
-
-    ! sum the electron-self and electron-ion test terms
-    ploopee: DO ip2 = ips_e,ipe_e
-      p2_int = parray_e(ip2)
-      jloopee: DO ij2 = ijs_e,ije_e
-        j2_int = jarray_e(ij2)
-        IF((CLOS .NE. 1) .OR. (p2_int+2*j2_int .LE. dmaxe))&
-        TColl_ = TColl_ + nadiab_moments_e(ip2,ij2,iky_,ikx_,iz_) &
-           *( nu_e  * CeipjT(bare(p_int,j_int), bare(p2_int,j2_int),iky_C, ikx_C, iz_C) &
-             +nu_ee * Ceepj (bare(p_int,j_int), bare(p2_int,j2_int),iky_C, ikx_C, iz_C))
-      ENDDO jloopee
-    ENDDO ploopee
-
-    ! sum the electron-ion field terms
-    ploopei: DO ip2 = ips_i,ipe_i
-      p2_int = parray_i(ip2)
-      jloopei: DO ij2 = ijs_i,ije_i
-        j2_int = jarray_i(ij2)
-        IF((CLOS .NE. 1) .OR. (p2_int+2*j2_int .LE. dmaxi))&
-        TColl_ = TColl_ + nadiab_moments_i(ip2,ij2,iky_,ikx_,iz_) &
-          *(nu_e * CeipjF(bare(p_int,j_int), bari(p2_int,j2_int),iky_C, ikx_C, iz_C))
-      END DO jloopei
-    ENDDO ploopei
-
-  END SUBROUTINE apply_COSOlver_mat_e
-
-  !******************************************************************************!
-  !!!!!!! Compute ion collision term
-  !******************************************************************************!
-  SUBROUTINE apply_COSOlver_mat_i(ip_,ij_,iky_,ikx_,iz_,TColl_)
-    IMPLICIT NONE
-    INTEGER,     INTENT(IN)    :: ip_, ij_ ,ikx_, iky_, iz_
-    COMPLEX(dp), INTENT(OUT)   :: TColl_
-
-    INTEGER     :: ip2,ij2, p_int,j_int, p2_int,j2_int, iky_C, ikx_C, iz_C
-    p_int = parray_i_full(ip_); j_int = jarray_i_full(ij_);
-
-    IF (gyrokin_CO) THEN ! GK operator (k-dependant)
-      ikx_C = ikx_; iky_C = iky_; iz_C = iz_;
-    ELSE ! DK operator (only one mat for every k)
-      ikx_C = 1;   iky_C = 1; iz_C = 1;
-    ENDIF
-
-    TColl_ = 0._dp ! Initialization
-    ! sum the ion-self and ion-electron test terms
-    ploopii: DO ip2 = ips_i,ipe_i
-      p2_int = parray_i(ip2)
-      jloopii: DO ij2 = ijs_i,ije_i
-        j2_int = jarray_i(ij2)
-        IF((CLOS .NE. 1) .OR. (p2_int+2*j2_int .LE. dmaxi))&
-        ! Ion-ion collision
-        TColl_ = TColl_ + nadiab_moments_i(ip2,ij2,iky_,ikx_,iz_) &
-            * nu_i  * Ciipj (bari(p_int,j_int), bari(p2_int,j2_int), iky_C, ikx_C, iz_C)
-        IF(KIN_E) & ! Ion-electron collision test
-        TColl_ = TColl_ + nadiab_moments_i(ip2,ij2,iky_,ikx_,iz_) &
-            * nu_ie * CiepjT(bari(p_int,j_int), bari(p2_int,j2_int), iky_C, ikx_C, iz_C)
-      ENDDO jloopii
-    ENDDO ploopii
-
-    IF(KIN_E) THEN ! Ion-electron collision field
-    ploopie: DO ip2 = ips_e,ipe_e ! sum the ion-electron field terms
-      p2_int = parray_e(ip2)
-      jloopie: DO ij2 = ijs_e,ije_e
-        j2_int = jarray_e(ij2)
-        IF((CLOS .NE. 1) .OR. (p2_int+2*j2_int .LE. dmaxe))&
-        TColl_ = TColl_ + nadiab_moments_e(ip2,ij2,iky_,ikx_,iz_) &
-          *(nu_ie * CiepjF(bari(p_int,j_int), bare(p2_int,j2_int), iky_C, ikx_C, iz_C))
-      ENDDO jloopie
-    ENDDO ploopie
-    ENDIF
-  END SUBROUTINE apply_COSOlver_mat_i
-
-    !******************************************************************************!
-    !!!!!!! Load the collision matrix coefficient table from COSOlver results
-    !******************************************************************************!
-    SUBROUTINE load_COSOlver_mat ! Load a sub matrix from iCa files (works for pmaxa,jmaxa<=P_full,J_full)
-      IMPLICIT NONE
-      ! Indices for row and columns of the COSOlver matrix (4D compressed 2D matrices)
-      INTEGER :: irow_sub, irow_full, icol_sub, icol_full
-      INTEGER :: fid ! file indexation
-
-      INTEGER :: ip_e, ij_e, il_e, ik_e, ikps_C, ikpe_C                  ! indices for electrons loops
-      REAL(dp), DIMENSION(2) :: dims_e
-      INTEGER :: pdime, jdime                                            ! dimensions of the COSOlver matrices
-      REAL(dp), DIMENSION(:,:),   ALLOCATABLE :: Ceepj_full, CeipjT_full ! To load the entire matrix
-      REAL(dp), DIMENSION(:,:),   ALLOCATABLE :: CeipjF_full             ! ''
-      REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ceepj__kp, CeipjT_kp    ! To store the coeff that will be used along kperp
-      REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: CeipjF_kp               ! ''
-      INTEGER :: ip_i, ij_i, il_i, ik_i                                  ! same for ions
-      INTEGER,  DIMENSION(2) :: dims_i
-      INTEGER :: pdimi, jdimi                                            ! dimensions of the COSOlver matrices
-      REAL(dp), DIMENSION(:,:),   ALLOCATABLE :: Ciipj_full, CiepjT_full ! .
-      REAL(dp), DIMENSION(:,:),   ALLOCATABLE :: CiepjF_full             ! .
-      REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: Ciipj__kp, CiepjT_kp    ! .
-      REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: CiepjF_kp               ! .
-
-      REAL(dp), DIMENSION(:),     ALLOCATABLE :: kp_grid_mat             ! kperp grid of the matrices
-      INTEGER  :: ikp_next, ikp_prev, nkp_mat, ikp_mat
-      REAL(dp) :: kp_max
-      REAL(dp) :: kperp_sim, kperp_mat, zerotoone
-
-      CHARACTER(len=128) :: var_name, ikp_string
-
-      !! Some terminal info
-      SELECT CASE (collision_model)
-      CASE ('SG')
-        IF (my_id .EQ. 0)  WRITE(*,*) '=== Load Sugama matrix ==='
-      CASE ('LR')
-        IF (my_id .EQ. 0) WRITE(*,*) '=== Load Lorentz matrix ==='
-      CASE ('LD')
-        IF (my_id .EQ. 0) WRITE(*,*) '=== Load Landau matrix ==='
-      END SELECT
-      SELECT CASE (gyrokin_CO)
-      CASE (.true.)
-        IF (my_id .EQ. 0) WRITE(*,*) '..gyrokinetic model..'
-      CASE (.false.)
-        IF (my_id .EQ. 0) WRITE(*,*) '..driftkinetic model..'
-      END SELECT
-
-      ! Opening the compiled cosolver matrices results
-      if(my_id.EQ.0)write(*,*) mat_file
-      CALL openf(mat_file,fid, 'r', 'D', mpicomm=comm_p);
-
-      ! Get matrices dimensions (polynomials degrees and kperp grid)
-      CALL getarr(fid, '/dims_e', dims_e) ! Get the electron polynomial degrees
-      pdime = dims_e(1); jdime = dims_e(2);
-      CALL getarr(fid, '/dims_i', dims_i) ! Get the ion      polynomial degrees
-      pdimi = dims_i(1); jdimi = dims_i(2);
-      IF ( ((pdime .LT. pmaxe) .OR. (jdime .LT. jmaxe)) .AND. (my_id .EQ. 0)) ERROR STOP '>> ERROR << Pe,Je Matrix too small'
-      IF ( ((pdimi .LT. pmaxi) .OR. (jdimi .LT. jmaxi)) .AND. (my_id .EQ. 0)) ERROR STOP '>> ERROR << Pi,Ji Matrix too small'
-
-      CALL getsize(fid, '/coordkperp', nkp_mat) ! Get the dimension kperp grid of the matrices
-      CALL allocate_array(kp_grid_mat, 1,nkp_mat)
-      CALL getarr(fid, '/coordkperp', kp_grid_mat)
-
-      kp_max = SQRT(kx_max**2+ky_max**2)
-      ! check that we have enough kperps mat
-      IF (LINEARITY .NE. 'linear') THEN
-        IF ( (kp_grid_mat(nkp_mat) .LT. 2./3.*kp_max) .AND. (my_id .EQ. 0)) WRITE(*,*) 'warning: Matrix kperp grid too small'
-      ELSE
-        IF ( (kp_grid_mat(nkp_mat) .LT. kp_max) .AND. (my_id .EQ. 0)) WRITE(*,*) 'warning: Matrix kperp grid too small !!'
-      ENDIF
-
-      IF (gyrokin_CO) THEN ! GK operator (k-dependant)
-        ikps_C = 1; ikpe_C = nkp_mat
-      ELSE ! DK operator (only one mat for all k)
-        ikps_C = 1; ikpe_C = 1
-      ENDIF
-
-      CALL allocate_array(  Ceepj__kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-      CALL allocate_array(  CeipjT_kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-      CALL allocate_array(  CeipjF_kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-      CALL allocate_array(  Ciipj__kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-      CALL allocate_array(  CiepjT_kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-      CALL allocate_array(  CiepjF_kp, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikps_C,ikpe_C)
-
-      DO ikp = ikps_C,ikpe_C ! Loop over everz kperp values
-        ! Kperp value in string format to select in cosolver hdf5 file
-        IF (gyrokin_CO) THEN
-          write(ikp_string,'(i5.5)') ikp-1
-        ELSE
-          write(ikp_string,'(i5.5)') 0
-        ENDIF
-        !!!!!!!!!!!! E-E matrices !!!!!!!!!!!!
-        ! get the self electron colision matrix
-        ! Allocate space for storing full collision matrix
-        CALL allocate_array(  Ceepj_full, 1,(pdime+1)*(jdime+1), 1,(pdime+1)*(jdime+1))
-        ! Naming of the array to load (kperp dependant)
-        WRITE(var_name,'(a,a)') TRIM(ADJUSTL(ikp_string)),'/Caapj/Ceepj'
-        CALL getarr(fid, var_name, Ceepj_full) ! get array (moli format)
-        ! Fill sub array with the usefull polynmial degrees only
-        DO ip_e = 0,pmaxe ! Loop over rows
-        DO ij_e = 0,jmaxe
-              irow_sub  = (jmaxe +1)*ip_e + ij_e +1
-              irow_full = (jdime +1)*ip_e + ij_e +1
-              DO il_e = 0,pmaxe ! Loop over columns
-              DO ik_e = 0,jmaxe
-                    icol_sub  = (jmaxe +1)*il_e + ik_e +1
-                    icol_full = (jdime +1)*il_e + ik_e +1
-                    Ceepj__kp (irow_sub,icol_sub,ikp) = Ceepj_full (irow_full,icol_full)
-              ENDDO
-              ENDDO
-        ENDDO
-        ENDDO
-        DEALLOCATE(Ceepj_full)
-
-        !!!!!!!!!!!!!!! I-I matrices !!!!!!!!!!!!!!
-        ! get the self electron colision matrix
-        CALL allocate_array(  Ciipj_full, 1,(pdimi+1)*(jdimi+1), 1,(pdimi+1)*(jdimi+1))
-        WRITE(var_name,'(a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Caapj/Ciipj'
-        CALL getarr(fid, var_name, Ciipj_full) ! get array (moli format)
-        ! Fill sub array with only usefull polynmials degree
-        DO ip_i = 0,Pmaxi ! Loop over rows
-        DO ij_i = 0,Jmaxi
-              irow_sub  = (Jmaxi +1)*ip_i + ij_i +1
-              irow_full = (jdimi +1)*ip_i + ij_i +1
-              DO il_i = 0,Pmaxi ! Loop over columns
-              DO ik_i = 0,Jmaxi
-                    icol_sub  = (Jmaxi +1)*il_i + ik_i +1
-                    icol_full = (jdimi +1)*il_i + ik_i +1
-                    Ciipj__kp (irow_sub,icol_sub,ikp) = Ciipj_full (irow_full,icol_full)
-              ENDDO
-              ENDDO
-        ENDDO
-        ENDDO
-        DEALLOCATE(Ciipj_full)
-
-        IF(interspecies) THEN ! Pitch angle is only applied on like-species
-          !!!!!!!!!!!!!!! E-I matrices !!!!!!!!!!!!!!
-          ! Get test and field e-i collision matrices
-          CALL allocate_array( CeipjT_full, 1,(pdime+1)*(jdime+1), 1,(pdime+1)*(jdime+1))
-          CALL allocate_array( CeipjF_full, 1,(pdime+1)*(jdime+1), 1,(pdimi+1)*(jdimi+1))
-          WRITE(var_name,'(a,a)') TRIM(ADJUSTL(ikp_string)),'/Ceipj/CeipjT'
-          CALL getarr(fid, var_name, CeipjT_full)
-          WRITE(var_name,'(a,a)') TRIM(ADJUSTL(ikp_string)),'/Ceipj/CeipjF'
-          CALL getarr(fid, var_name, CeipjF_full)
-          ! Fill sub array with only usefull polynmials degree
-          DO ip_e = 0,pmaxe ! Loop over rows
-          DO ij_e = 0,jmaxe
-                irow_sub  = (jmaxe +1)*ip_e + ij_e +1
-                irow_full = (jdime +1)*ip_e + ij_e +1
-                DO il_e = 0,pmaxe ! Loop over columns
-                DO ik_e = 0,jmaxe
-                      icol_sub  = (jmaxe +1)*il_e + ik_e +1
-                      icol_full = (jdime +1)*il_e + ik_e +1
-                      CeipjT_kp(irow_sub,icol_sub,ikp) = CeipjT_full(irow_full,icol_full)
-                ENDDO
-                ENDDO
-                DO il_i = 0,pmaxi ! Loop over columns
-                DO ik_i = 0,jmaxi
-                      icol_sub  = (Jmaxi +1)*il_i + ik_i +1
-                      icol_full = (jdimi +1)*il_i + ik_i +1
-                      CeipjF_kp(irow_sub,icol_sub,ikp) = CeipjF_full(irow_full,icol_full)
-                ENDDO
-                ENDDO
-          ENDDO
-          ENDDO
-          DEALLOCATE(CeipjF_full)
-          DEALLOCATE(CeipjT_full)
-
-          !!!!!!!!!!!!!!! I-E matrices !!!!!!!!!!!!!!
-          ! get the Test and Back field electron ion collision matrix
-          CALL allocate_array( CiepjT_full, 1,(pdimi+1)*(jdimi+1), 1,(pdimi+1)*(jdimi+1))
-          CALL allocate_array( CiepjF_full, 1,(pdimi+1)*(jdimi+1), 1,(pdime+1)*(jdime+1))
-          WRITE(var_name,'(a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Ciepj/CiepjT'
-          CALL getarr(fid, var_name, CiepjT_full)
-          WRITE(var_name,'(a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Ciepj/CiepjF'
-          CALL getarr(fid, var_name, CiepjF_full)
-          ! Fill sub array with only usefull polynmials degree
-          DO ip_i = 0,Pmaxi ! Loop over rows
-          DO ij_i = 0,Jmaxi
-                irow_sub  = (Jmaxi +1)*ip_i + ij_i +1
-                irow_full = (jdimi +1)*ip_i + ij_i +1
-                DO il_i = 0,Pmaxi ! Loop over columns
-                DO ik_i = 0,Jmaxi
-                      icol_sub  = (Jmaxi +1)*il_i + ik_i +1
-                      icol_full = (jdimi +1)*il_i + ik_i +1
-                      CiepjT_kp(irow_sub,icol_sub,ikp) = CiepjT_full(irow_full,icol_full)
-                ENDDO
-                ENDDO
-                DO il_e = 0,pmaxe ! Loop over columns
-                DO ik_e = 0,jmaxe
-                      icol_sub  = (jmaxe +1)*il_e + ik_e +1
-                      icol_full = (jdime +1)*il_e + ik_e +1
-                      CiepjF_kp(irow_sub,icol_sub,ikp) = CiepjF_full(irow_full,icol_full)
-                ENDDO
-                ENDDO
-          ENDDO
-          ENDDO
-          DEALLOCATE(CiepjF_full)
-          DEALLOCATE(CiepjT_full)
-        ELSE
-          CeipjT_kp = 0._dp; CeipjF_kp = 0._dp; CiepjT_kp = 0._dp; CiepjF_kp = 0._dp;
-        ENDIF
-      ENDDO
-      CALL closef(fid)
-
-      IF (gyrokin_CO) THEN ! Interpolation of the kperp matrix values on kx ky grid
-        IF (my_id .EQ. 0 ) WRITE(*,*) '...Interpolation from matrices kperp to simulation kx,ky...'
-        DO ikx = ikxs,ikxe
-          DO iky = ikys,ikye
-            DO iz = izs,ize
-              ! Check for nonlinear case if we are in the anti aliased domain or the filtered one
-              kperp_sim = MIN(kparray(iky,ikx,iz,0),collision_kcut) ! current simulation kperp
-              ! Find the interval in kp grid mat where kperp_sim is contained
-              ! Loop over the whole kp mat grid to find the smallest kperp that is
-              ! larger than the current kperp_sim (brute force...)
-              DO ikp=1,nkp_mat
-                ikp_mat   = ikp ! the first indice of the interval (k0)
-                kperp_mat = kp_grid_mat(ikp)
-                IF(kperp_mat .GT. kperp_sim) EXIT ! a matrix with kperp2 > current kx2 + ky2 has been found
-              ENDDO
-              ! Interpolation
-              ! interval boundaries
-              ikp_next  = ikp_mat     !index of k1 (larger than kperp_sim thanks to previous loop)
-              ikp_prev  = ikp_mat - 1 !index of k0 (smaller neighbour to interpolate inbetween)
-              if ( (kp_grid_mat(ikp_prev) .GT. kperp_sim) .OR. (kp_grid_mat(ikp_next) .LT. kperp_sim) ) THEN
-                ! write(*,*) 'Warning, linear interp of collision matrix failed!! '
-                ! write(*,*) kp_grid_mat(ikp_prev), '<', kperp_sim, '<', kp_grid_mat(ikp_next)
-              ENDIF
-              ! 0->1 variable for linear interp, i.e. zero2one = (k-k0)/(k1-k0)
-              zerotoone = MIN(1._dp,(kperp_sim - kp_grid_mat(ikp_prev))/(kp_grid_mat(ikp_next) - kp_grid_mat(ikp_prev)))
-              ! Linear interpolation between previous and next kperp matrix values
-              Ceepj (:,:,iky,ikx,iz) = (Ceepj__kp(:,:,ikp_next) - Ceepj__kp(:,:,ikp_prev))*zerotoone + Ceepj__kp(:,:,ikp_prev)
-              Ciipj (:,:,iky,ikx,iz) = (Ciipj__kp(:,:,ikp_next) - Ciipj__kp(:,:,ikp_prev))*zerotoone + Ciipj__kp(:,:,ikp_prev)
-              IF(interspecies) THEN
-                CeipjT(:,:,iky,ikx,iz) = (CeipjT_kp(:,:,ikp_next) - CeipjT_kp(:,:,ikp_prev))*zerotoone + CeipjT_kp(:,:,ikp_prev)
-                CeipjF(:,:,iky,ikx,iz) = (CeipjF_kp(:,:,ikp_next) - CeipjF_kp(:,:,ikp_prev))*zerotoone + CeipjF_kp(:,:,ikp_prev)
-                CiepjT(:,:,iky,ikx,iz) = (CiepjT_kp(:,:,ikp_next) - CiepjT_kp(:,:,ikp_prev))*zerotoone + CiepjT_kp(:,:,ikp_prev)
-                CiepjF(:,:,iky,ikx,iz) = (CiepjF_kp(:,:,ikp_next) - CiepjF_kp(:,:,ikp_prev))*zerotoone + CiepjF_kp(:,:,ikp_prev)
-              ELSE
-                CeipjT(:,:,iky,ikx,iz) = 0._dp
-                CeipjF(:,:,iky,ikx,iz) = 0._dp
-                CiepjT(:,:,iky,ikx,iz) = 0._dp
-                CiepjF(:,:,iky,ikx,iz) = 0._dp
-              ENDIF
-            ENDDO
-          ENDDO
-        ENDDO
-      ELSE ! DK -> No kperp dep, copy simply to final collision matrices
-        Ceepj (:,:,1,1,1) = Ceepj__kp(:,:,1)
-        CeipjT(:,:,1,1,1) = CeipjT_kp(:,:,1)
-        CeipjF(:,:,1,1,1) = CeipjF_kp(:,:,1)
-        Ciipj (:,:,1,1,1) = Ciipj__kp(:,:,1)
-        CiepjT(:,:,1,1,1) = CiepjT_kp(:,:,1)
-        CiepjF(:,:,1,1,1) = CiepjF_kp(:,:,1)
-      ENDIF
-      ! Deallocate auxiliary variables
-      DEALLOCATE (Ceepj__kp); DEALLOCATE (CeipjT_kp); DEALLOCATE (CeipjF_kp)
-      DEALLOCATE (Ciipj__kp); DEALLOCATE (CiepjT_kp); DEALLOCATE (CiepjF_kp)
-
-      IF( .NOT. interspecies ) THEN
-        IF(my_id.EQ.0) write(*,*) "--Like Species operator--"
-        CeipjF = 0._dp;
-        CeipjT = 0._dp;
-        CiepjF = 0._dp;
-        CiepjT = 0._dp;
-      ENDIF
-
-      IF (my_id .EQ. 0) WRITE(*,*) '============DONE==========='
-
-    END SUBROUTINE load_COSOlver_mat
-    !******************************************************************************!
+  END SUBROUTINE Dougherty_GK
 
 end module collision
diff --git a/src/compute_gradients.F90 b/src/compute_gradients.F90
new file mode 100644
index 0000000000000000000000000000000000000000..06c920d688846c9a0af11ece0e031cf05e4295a3
--- /dev/null
+++ b/src/compute_gradients.F90
@@ -0,0 +1,28 @@
+SUBROUTINE compute_gradients
+    !   This routine compute the gradients of the moments without copying or slices.
+    ! It should be faster than using a routine taking a slice as argument (see 
+    ! calculus_mod) since it avoid copying.
+    USE fields, ONLY: moments
+    USE grid,   ONLY: local_nz, ngz, inv_deltaz
+    USE prec_const, ONLY: dp
+    IMPLICIT NONE
+
+    REAL(dp), dimension(-2:2) :: dz_usu = &
+     (/  1._dp/12._dp, -2._dp/3._dp, 0._dp, 2._dp/3._dp, -1._dp/12._dp /) ! fd4 centered stencil
+    REAL(dp), dimension(-2:1) :: dz_o2e = &
+     (/ 1._dp/24._dp,-9._dp/8._dp, 9._dp/8._dp,-1._dp/24._dp /) ! fd4 odd to even stencil
+    REAL(dp), dimension(-1:2) :: dz_e2o = &
+     (/ 1._dp/24._dp,-9._dp/8._dp, 9._dp/8._dp,-1._dp/24._dp /) ! fd4 odd to even stencil
+     REAL(dp), dimension(-2:2) :: dz2_usu = &
+     (/-1._dp/12._dp, 4._dp/3._dp, -5._dp/2._dp, 4._dp/3._dp, -1._dp/12._dp /)! 2th derivative, 4th order (for parallel hypdiff)
+     REAL(dp), dimension(-2:2) :: dz4_usu = &
+     (/  1._dp, -4._dp, 6._dp, -4._dp, 1._dp /) ! 4th derivative, 2nd order (for parallel hypdiff)
+     REAL(dp), dimension(-2:1) :: iz_o2e = &
+     (/ -1._dp/16._dp, 9._dp/16._dp, 9._dp/16._dp, -1._dp/16._dp /) ! grid interpolation, 4th order, odd to even
+     REAL(dp), dimension(-1:2) :: iz_e2o = &
+     (/ -1._dp/16._dp, 9._dp/16._dp, 9._dp/16._dp, -1._dp/16._dp /) ! grid interpolation, 4th order, even to odd
+    PUBLIC :: simpson_rule_z, interp_z, grad_z, grad_z4
+  
+IMPLICIT NONE
+
+END SUBROUTINE compute_gradients
\ No newline at end of file
diff --git a/src/control.F90 b/src/control.F90
index d69d991c41befe949f95d2b4548cf2f4627c9b27..c8e307daa7d1c7f59c540fe8f54d14ff8381af49 100644
--- a/src/control.F90
+++ b/src/control.F90
@@ -1,91 +1,104 @@
 SUBROUTINE control
   !   Control the run
 
-  use basic
-  use prec_const
+  use basic,      ONLY: str,daytim,speak,basic_data,&
+                        nlend,step,increase_step,increase_time,increase_cstep,&
+                        chrono_runt,chrono_step, chrono_diag, start_chrono, stop_chrono
+  use prec_const, ONLY: xp, stdout
+  USE parallel,   ONLY: ppinit
+  USE mpi
   IMPLICIT NONE
-  REAL(dp) :: t_init_diag_0, t_init_diag_1
-
-  CALL cpu_time(start)
+  REAL(xp) :: t_init_diag_0, t_init_diag_1
+  INTEGER  :: ierr
+  ! start the chronometer for the total runtime
+  CALL start_chrono(chrono_runt)
   !________________________________________________________________________________
   !              1.   Prologue
   !                   1.1     Initialize the parallel environment
   CALL ppinit
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') 'MPI initialized'
-
+  CALL speak('MPI initialized')
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 
   CALL daytim('Start at ')
-
+  
   !                   1.2     Define data specific to run
-  IF (my_id .EQ. 0) WRITE(*,*) 'Load basic data...'
+  CALL speak( 'Load basic data...')
   CALL basic_data
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') '...basic data loaded.'
-
+  CALL speak('...basic data loaded.')
+  
   !                   1.3   Read input parameters from input file
-  IF (my_id .EQ. 0) WRITE(*,*) 'Read input parameters...'
+  CALL speak('Read input parameters...')
   CALL readinputs
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') '...input parameters read'
+  CALL speak('...input parameters read')
 
   !                   1.4     Set auxiliary values (allocate arrays, set grid, ...)
-  IF (my_id .EQ. 0) WRITE(*,*) 'Calculate auxval...'
+  CALL speak('Calculate auxval...')
   CALL auxval
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') '...auxval calculated'
-
+  CALL speak('...auxval calculated')
+  
   !                   1.5     Initial conditions
-  IF (my_id .EQ. 0) WRITE(*,*) 'Create initial state...'
+  CALL speak( 'Create initial state...')
   CALL inital
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') '...initial state created'
-
+  CALL speak('...initial state created')
+  
   !                   1.6     Initial diagnostics
-  IF (my_id .EQ. 0) WRITE(*,*) 'Initial diagnostics...'
+  CALL speak( 'Initial diagnostics...')
   CALL cpu_time(t_init_diag_0) ! Measure the time of the init diag
   CALL diagnose(0)
   CALL cpu_time(t_init_diag_1)
   ! CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-  IF (my_id .EQ. 0) THEN
-    WRITE(*,'(a)') '...initial diagnostics done'
-    WRITE(*,'(a,F6.3,a/)') '(',t_init_diag_1-t_init_diag_0,'[s])'
-  ENDIF
+  CALL speak('...initial diagnostics done')
+  CALL speak('('//str(t_init_diag_1-t_init_diag_0)//'[s])')
 
   CALL FLUSH(stdout)
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   !________________________________________________________________________________
 
-  IF (my_id .EQ. 0) WRITE(*,*) 'Time integration loop..'
+  CALL speak( 'Time integration loop..')
   !________________________________________________________________________________
   !              2.   Main loop
   DO
-     CALL cpu_time(t0_step) ! Measuring time
+    CALL start_chrono(chrono_step) ! Measuring time per step
+    
+    ! Test if the stopping requirements are met (update nlend)
+    CALL tesend
+    IF( nlend ) EXIT ! exit do loop
 
-     step  = step  + 1
-     cstep = cstep + 1
-     CALL stepon
+    ! Increment steps and csteps (private in basic module)
+    CALL increase_step
+    CALL increase_cstep
 
-     time  = time  + dt
+    ! Do a full RK step (e.g. 4 substeps for ERK4)
+    CALL stepon
 
-     CALL tesend
-     IF( nlend ) EXIT ! exit do loop
+    ! Increment time (private in basic module)
+    CALL increase_time
 
-     CALL diagnose(step)
+    ! Periodic diagnostics
+    CALL start_chrono(chrono_diag)
+      CALL diagnose(step)
+    CALL stop_chrono(chrono_diag)
 
-    CALL cpu_time(t1_step);
-    tc_step = tc_step + (t1_step - t0_step)
+    CALL stop_chrono(chrono_step)
 
   END DO
 
-  IF (my_id .EQ. 0) WRITE(*,'(a/)') '...time integration done'
+  CALL speak('...time integration done')
   !________________________________________________________________________________
   !              9.   Epilogue
-
+  ! Stop total run chronometer (must be done before the last diagnostic)
+  CALL stop_chrono(chrono_runt)
+  ! last diagnostic
   CALL diagnose(-1)
+  ! end the run
   CALL endrun
-
-  IF (my_id .EQ. 0) CALL daytim('Done at ')
-
+  ! display final time
+  CALL daytim('Done at ')
+  ! close mpi environement
   CALL ppexit
 
 END SUBROUTINE control
diff --git a/src/cosolver_interface_mod.F90 b/src/cosolver_interface_mod.F90
new file mode 100644
index 0000000000000000000000000000000000000000..988c3b0100228343da1ebf6664452d2c982b4167
--- /dev/null
+++ b/src/cosolver_interface_mod.F90
@@ -0,0 +1,308 @@
+module cosolver_interface
+! contains the Hermite-Laguerre collision operators solved using COSOlver.
+USE prec_const, ONLY: xp
+IMPLICIT NONE
+PRIVATE
+PUBLIC :: load_COSOlver_mat, compute_cosolver_coll
+
+CONTAINS
+  !******************************************************************************!
+  !! compute the collision terms in a (Np x Nj x Nkx x Nky) matrix all at once
+  !******************************************************************************!
+  SUBROUTINE compute_cosolver_coll(GK_CO)
+    USE parallel,    ONLY: num_procs_p, comm_p,dsp_p,rcv_p
+    USE grid,        ONLY: &
+      local_na, &
+      local_np, ngp, total_np, total_nj, ngj,&
+      local_nkx, local_nky, local_nz, bar
+    USE array,       ONLY: Capj
+    USE MPI
+    USE closure,     ONLY: evolve_mom
+    IMPLICIT NONE
+    LOGICAL, INTENT(IN) :: GK_CO
+    COMPLEX(xp), DIMENSION(total_np)    :: local_coll, buffer
+    COMPLEX(xp), DIMENSION(local_np)    :: TColl_distr
+    COMPLEX(xp) :: Tmp_
+    INTEGER :: iz,ikx,iky,ij,ip,ia,ikx_C,iky_C,iz_C
+    INTEGER :: ierr
+    z:DO iz = 1,local_nz
+      x:DO ikx = 1,local_nkx
+        y:DO iky = 1,local_nky
+          a:DO ia = 1,local_na
+            j:DO ij = 1,total_nj
+              p:DO ip = 1,total_np
+              IF(evolve_mom(ip+ngp/2,ij+ngj/2)) THEN !compute for every moments except for closure 1
+                  !! Take GK or DK limit
+                  IF (GK_CO) THEN ! GK operator (k-dependant)
+                    ikx_C = ikx; iky_C = iky; iz_C = iz;
+                  ELSE ! DK operator (only one mat for every k)
+                    ikx_C = 1;   iky_C = 1; iz_C = 1;
+                  ENDIF
+                  !! Apply the cosolver collision matrix
+                  CALL apply_cosolver_mat(ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C,Tmp_)
+                  local_coll(ip) = Tmp_
+                ELSE
+                  local_coll(ip) = 0._xp
+                ENDIF
+              ENDDO p
+              IF (num_procs_p .GT. 1) THEN
+                ! Reduce the local_sums to root = 0
+                CALL MPI_REDUCE(local_coll, buffer, total_np, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, comm_p, ierr)
+                ! buffer contains the entire collision term along p, we scatter it between
+                ! the other processes (use of scatterv since Pmax/Np is not an integer)
+                CALL MPI_SCATTERV(buffer, rcv_p, dsp_p, MPI_DOUBLE_COMPLEX,&
+                                  TColl_distr, local_np, MPI_DOUBLE_COMPLEX, &
+                                  0, comm_p, ierr)
+              ELSE
+                TColl_distr = local_coll
+              ENDIF
+              ! Write in output variable
+              DO ip = 1,local_np
+                Capj(ia,ip,ij,iky,ikx,iz) = TColl_distr(ip)
+              ENDDO
+            ENDDO j
+          ENDDO a
+        ENDDO y
+      ENDDO x
+    ENDDO z
+  END SUBROUTINE compute_cosolver_coll
+
+    !******************************************************************************!
+  !! compute the collision terms in a (Np x Nj x Nkx x Nky) matrix all at once
+  !******************************************************************************!
+  SUBROUTINE apply_cosolver_mat(ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C,local_coll)
+    USE grid,        ONLY: &
+      total_na, &
+      local_np, parray,parray_full, ngp,&
+      total_nj, jarray,jarray_full, ngj, bar, ngz
+    USE array,       ONLY: nuCself, Cab_F, nadiab_moments
+    USE species,     ONLY: nu_ab
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: ia,ip,ij,iky,ikx,iz,ikx_C,iky_C,iz_C
+    COMPLEX(xp), INTENT(OUT) :: local_coll
+    INTEGER :: ib,iq,il
+    INTEGER :: p_int,q_int,j_int,l_int
+    INTEGER :: izi, iqi, ili
+    izi = iz+ngz/2
+    p_int = parray_full(ip)
+    j_int = jarray_full(ij)
+    !! Apply the cosolver collision matrix
+    local_coll = 0._xp ! Initialization
+    q:DO iq = 1,local_np
+      iqi   = iq + ngp/2
+      q_int = parray(iqi)
+      l:DO il = 1,total_nj
+        ili   = il + ngj/2
+        l_int = jarray(ili)
+        ! self interaction + test interaction
+        local_coll = local_coll + nadiab_moments(ia,iqi,ili,iky,ikx,izi) &
+              * nuCself(ia,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
+        ! sum the contribution over the other species
+        b:DO ib = 1,total_na
+          IF(ib .NE. ia) THEN
+            ! Field contribution
+            local_coll = local_coll + nadiab_moments(ib,iqi,ili,iky,ikx,izi) &
+                * nu_ab(ia,ib) * Cab_F(ia,ib,bar(p_int,j_int), bar(q_int,l_int), iky_C, ikx_C, iz_C)
+          ENDIF
+        ENDDO b
+      ENDDO l
+    ENDDO q
+  END SUBROUTINE apply_cosolver_mat
+
+    !******************************************************************************!
+    !!!!!!! Load the collision matrix coefficient table from COSOlver results
+    !******************************************************************************!
+    SUBROUTINE load_COSOlver_mat(GK_CO,INTERSPECIES,matfile,collision_kcut) ! Load a sub matrix from iCa files (works for pmax,jmax<=P_full,J_full)
+      USE basic,       ONLY: allocate_array, speak
+      USE parallel,    ONLY: comm_p, my_id
+      USE grid,        ONLY: &
+        local_na, total_na, &
+        local_nkx,local_nky, kparray,&
+        local_nz, ngz, bar,&
+        pmax, jmax, ieven
+      USE array,       ONLY: Caa, Cab_T, Cab_F, nuCself
+      USE MPI
+      USE model,       ONLY: Na, LINEARITY
+      USE species,     ONLY: name, nu_ab
+      USE futils
+      IMPLICIT NONE
+      ! Input
+      LOGICAL,            INTENT(IN) :: GK_CO, INTERSPECIES
+      CHARACTER(len=128), INTENT(IN) :: matfile    ! COSOlver matrix file names
+      REAL(xp),           INTENT(IN) :: collision_kcut
+      ! Local variables
+      REAL(xp), DIMENSION(:,:),       ALLOCATABLE :: Caa_full,CabT_full, CabF_full  ! To load the self entire matrices
+      REAL(xp), DIMENSION(:,:,:,:),   ALLOCATABLE :: Caa__kp         ! To store the coeff that will be used along kperp
+      REAL(xp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: CabF_kp,CabT_kp ! ''
+      REAL(xp), DIMENSION(:),         ALLOCATABLE :: kp_grid_mat     ! kperp grid of the matrices
+      REAL(xp), DIMENSION(2) :: dims
+      ! Indices for row and columns of the COSOlver matrix (4D compressed 2D matrices)
+      INTEGER  :: irow_sub, irow_full, icol_sub, icol_full
+      INTEGER  :: fid                                       ! file indexation
+      INTEGER  :: ip, ij, il, ikx, iky, iz, ik, ikp, ikps_C,ikpe_C,ia,ib  ! indices for loops
+      INTEGER  :: pdim, jdim                                ! dimensions of the COSOlver matrices
+      INTEGER  :: ikp_next, ikp_prev, nkp_mat, ikp_mat
+      REAL(xp) :: kp_max,kperp_sim, kperp_mat, zerotoone
+      CHARACTER(len=128) :: var_name, ikp_string, name_a, name_b
+      CHARACTER(len=1)   :: letter_a, letter_b
+      ! Opening the compiled cosolver matrices results
+      CALL openf(matfile,fid, 'r', 'D', mpicomm=comm_p);
+      ! Get matrices dimensions (polynomials degrees and kperp grid)
+      CALL getarr(fid, '/dims_i', dims) ! Get the ion polynomial degrees (consider same for electrons)
+      pdim = dims(1); jdim = dims(2);
+      !! Here we stop if the matrix is too small, we could put zero to these coefficients otherwise?
+      IF ( ((pdim .LT. pmax) .OR. (jdim .LT. jmax)) .AND. (my_id .EQ. 0)) ERROR STOP '>> ERROR << P,J Matrix too small'
+      ! Get the dimension kperp grid of the matrices for GK operator
+      CALL getsize(fid, '/coordkperp', nkp_mat)
+      CALL allocate_array(kp_grid_mat, 1,nkp_mat)
+      CALL getarr(fid, '/coordkperp', kp_grid_mat)
+      kp_max = MAXVAL(kparray)
+      ! check that we have enough kperps mat, if not we apply the kpmax matrix to all k>kpmax
+      IF (LINEARITY .NE. 'linear') THEN
+        IF ( (kp_grid_mat(nkp_mat) .LT. 2./3.*kp_max) .AND. (my_id .EQ. 0)) WRITE(*,*) 'warning: Matrix kperp grid too small'
+      ELSE
+        IF ( (kp_grid_mat(nkp_mat) .LT. kp_max) .AND. (my_id .EQ. 0)) WRITE(*,*) 'warning: Matrix kperp grid too small !!'
+      ENDIF
+      IF (GK_CO) THEN ! GK operator (k-dependant)
+        ikps_C = 1; ikpe_C = nkp_mat
+      ELSE ! DK operator, only the k=0 mat applied to all k
+        ikps_C = 1; ikpe_C = 1
+      ENDIF
+      ! allocate the temporary matrices
+      CALL allocate_array(  Caa__kp, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), ikps_C,ikpe_C)
+      CALL allocate_array(  CabF_kp, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), ikps_C,ikpe_C)
+      CALL allocate_array(  CabT_kp, 1,Na, 1,Na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), ikps_C,ikpe_C)
+      CALL allocate_array(  Caa_full,1,(pdim+1)*(jdim+1), 1,(pdim+1)*(jdim+1))
+      CALL allocate_array( CabT_full,1,(pdim+1)*(jdim+1), 1,(pdim+1)*(jdim+1))
+      CALL allocate_array( CabF_full,1,(pdim+1)*(jdim+1), 1,(pdim+1)*(jdim+1))
+      ! Loop over every kperp values that will get the collision operator
+      kp:DO ikp = ikps_C,ikpe_C
+        ! Kperp value in string format to select in cosolver hdf5 file
+        IF (GK_CO) THEN
+          write(ikp_string,'(i5.5)') ikp-1
+        ELSE
+          write(ikp_string,'(i5.5)') 0
+        ENDIF
+        a:DO ia = 1,local_na
+          name_a = name(ia); letter_a = name_a(1:1)
+          ! get the self colision matrix
+          ! Naming of the array to load (kperp dependant)
+          ! we look for the data stored at e.g. '00001/Caapj/Ceepj'
+          WRITE(var_name,'(a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/Caapj/C',letter_a,letter_a,'pj'
+          CALL getarr(fid, var_name, Caa_full) ! get array
+          ! Fill sub array with the usefull polynmial degrees only
+          DO ip = 0,pmax ! Loop over rows
+            DO ij = 0,jmax
+              irow_sub  = (jmax +1)*ip + ij +1
+              irow_full = (jdim +1)*ip + ij +1
+              DO il = 0,pmax ! Loop over columns
+                DO ik = 0,jmax
+                  icol_sub  = (jmax +1)*il + ik +1
+                  icol_full = (jdim +1)*il + ik +1
+                  Caa__kp (ia,irow_sub,icol_sub,ikp) = Caa_full(irow_full,icol_full)
+                ENDDO
+              ENDDO
+            ENDDO
+          ENDDO
+          b: DO ib = 1,local_na
+          name_b = name(ib); letter_b = name_b(1:1)
+          IF(INTERSPECIES .AND. (ib .NE. ia)) THEN ! Pitch angle is only applied on like-species
+              !!!!!!!!!!!!!!! Test and field matrices !!!!!!!!!!!!!!
+              ! we look for the data stored at e.g. '00001/Ceipj/CeipjT'
+              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/C',letter_a,letter_b,'pjT'
+              CALL getarr(fid, var_name, CabT_full)
+              ! we look for the data stored at e.g. '00001/Ceipj/CeipjF'
+              WRITE(var_name,'(a,a,a,a,a,a,a,a)') TRIM(ADJUSTL(ikp_string)),'/C',letter_a,letter_b,'pj/C',letter_a,letter_b,'pjF'
+              CALL getarr(fid, var_name, CabF_full)
+              ! Fill sub array with only usefull polynmials degree
+              DO ip = 0,pmax ! Loop over rows
+              DO ij = 0,jmax
+                    irow_sub  = (jmax +1)*ip + ij +1
+                    irow_full = (jdim +1)*ip + ij +1
+                    DO il = 0,pmax ! Loop over columns
+                    DO ik = 0,jmax
+                          icol_sub  = (jmax +1)*il + ik +1
+                          icol_full = (jdim +1)*il + ik +1
+                          CabT_kp(ia,ib,irow_sub,icol_sub,ikp) = CabT_full(irow_full,icol_full)
+                          CabF_kp(ia,ib,irow_sub,icol_sub,ikp) = CabF_full(irow_full,icol_full)
+                    ENDDO
+                    ENDDO
+              ENDDO
+              ENDDO
+            ELSE
+              CabT_kp(ia,ib,:,:,:) = 0._xp; CabF_kp(ia,ib,:,:,:) = 0._xp
+            ENDIF
+          ENDDO b
+        ENDDO a
+      ENDDO kp
+      DEALLOCATE(Caa_full)
+      DEALLOCATE(CabT_full)
+      DEALLOCATE(CabF_full)
+      CALL closef(fid)
+
+      IF (GK_CO) THEN ! Interpolation of the kperp matrix values on kx ky grid
+        IF (my_id .EQ. 0 ) WRITE(*,*) '...Interpolation from matrices kperp to simulation kx,ky...'
+        DO ikx = 1,local_nkx
+          DO iky = 1,local_nky
+            DO iz = 1,local_nz
+              ! Check for nonlinear case if we are in the anti aliased domain or the filtered one
+              kperp_sim = MIN(kparray(iky,ikx,iz+ngz/2,ieven),collision_kcut) ! current simulation kperp
+              ! Find the interval in kp grid mat where kperp_sim is contained
+              ! Loop over the whole kp mat grid to find the smallest kperp that is
+              ! larger than the current kperp_sim (brute force...)
+              DO ikp=1,nkp_mat
+                ikp_mat   = ikp ! the first indice of the interval (k0)
+                kperp_mat = kp_grid_mat(ikp)
+                IF(kperp_mat .GT. kperp_sim) EXIT ! a matrix with kperq > current kx2 + ky2 has been found
+              ENDDO
+              ! Interpolation
+              ! interval boundaries
+              ikp_next  = ikp_mat     !index of k1 (larger than kperp_sim thanks to previous loop)
+              ikp_prev  = ikp_mat - 1 !index of k0 (smaller neighbour to interpolate inbetween)
+              if ( (kp_grid_mat(ikp_prev) .GT. kperp_sim) .OR. (kp_grid_mat(ikp_next) .LT. kperp_sim) ) THEN
+                ! write(*,*) 'Warning, linear interp of collision matrix failed!! '
+                ! write(*,*) kp_grid_mat(ikp_prev), '<', kperp_sim, '<', kp_grid_mat(ikp_next)
+              ENDIF
+              ! 0->1 variable for linear interp, i.e. zero2one = (k-k0)/(k1-k0)
+              zerotoone = MIN(1._xp,(kperp_sim - kp_grid_mat(ikp_prev))/(kp_grid_mat(ikp_next) - kp_grid_mat(ikp_prev)))
+              ! Linear interpolation between previous and next kperp matrix values
+              Caa (:,:,:,iky,ikx,iz) = (Caa__kp(:,:,:,ikp_next) - Caa__kp(:,:,:,ikp_prev))*zerotoone + Caa__kp(:,:,:,ikp_prev)
+              IF(INTERSPECIES) THEN
+                Cab_T(:,:,:,:,iky,ikx,iz) = (CabT_kp(:,:,:,:,ikp_next) - CabT_kp(:,:,:,:,ikp_prev))*zerotoone + CabT_kp(:,:,:,:,ikp_prev)
+                Cab_F(:,:,:,:,iky,ikx,iz) = (CabF_kp(:,:,:,:,ikp_next) - CabF_kp(:,:,:,:,ikp_prev))*zerotoone + CabF_kp(:,:,:,:,ikp_prev)
+              ELSE
+                Cab_T(:,:,:,:,iky,ikx,iz) = 0._xp
+                Cab_F(:,:,:,:,iky,ikx,iz) = 0._xp
+              ENDIF
+            ENDDO
+          ENDDO
+        ENDDO
+      ELSE ! DK -> No kperp dep, copy simply to final collision matrices
+        Caa (:,:,:,1,1,1)    = Caa__kp(:,:,:,1)
+        Cab_T(:,:,:,:,1,1,1) = CabT_kp(:,:,:,:,1)
+        Cab_F(:,:,:,:,1,1,1) = CabF_kp(:,:,:,:,1)
+      ENDIF
+      ! Deallocate auxiliary variables
+      DEALLOCATE (Caa__kp); DEALLOCATE (CabT_kp); DEALLOCATE (CabF_kp)
+
+      IF( .NOT. INTERSPECIES ) THEN
+        CALL speak("--Like Species operator--")
+        Cab_F = 0._xp;
+        Cab_T = 0._xp;
+      ENDIF
+      ! Build the self matrix   
+      ! nuCself = nuaa*Caa + sum_b_neq_a nu_ab Cab_T
+      DO ia = 1,total_na
+        nuCself(ia,:,:,:,:,:) = nu_ab(ia,ia)*Caa(ia,:,:,:,:,:)
+        DO ib = 1,total_na
+          IF(ib .NE. ia) THEN
+            nuCself(ia,:,:,:,:,:) = nuCself(ia,:,:,:,:,:)&
+                                    +nu_ab(ia,ib)*Cab_T(ia,ib,:,:,:,:,:)
+          ENDIF
+        ENDDO
+      ENDDO
+      CALL speak('============DONE===========')
+
+    END SUBROUTINE load_COSOlver_mat
+    !******************************************************************************!
+END MODULE cosolver_interface
diff --git a/src/diagnose.F90 b/src/diagnose.F90
index 77dcdd1dcf23187d88e056c62a87404ca2179516..9ca937dc642f068b7231a9b2f1407521b0747343 100644
--- a/src/diagnose.F90
+++ b/src/diagnose.F90
@@ -1,13 +1,11 @@
 SUBROUTINE diagnose(kstep)
   !   Diagnostics, writing simulation state to disk
-  USE basic
-  USE diagnostics_par
-  USE processing, ONLY: gflux_ri, hflux_xi
+  USE basic,           ONLY: lu_in, chrono_runt, cstep, dt, time, tmax, display_h_min_s
+  USE diagnostics_par, ONLY: input_fname
+  USE processing,      ONLY: pflux_x, hflux_x
+  USE parallel,        ONLY: my_id
   IMPLICIT NONE
-
   INTEGER, INTENT(in) :: kstep
-  CALL cpu_time(t0_diag) ! Measuring time
-
   !! Basic diagnose loop for reading input file, displaying advancement and ending
   IF ((kstep .EQ. 0)) THEN
     INQUIRE(unit=lu_in, name=input_fname)
@@ -15,32 +13,32 @@ SUBROUTINE diagnose(kstep)
   ENDIF
   !! End diag
   IF (kstep .EQ. -1) THEN
-    CALL cpu_time(finish)
-     ! Display computational time cost
-     IF (my_id .EQ. 0) CALL display_h_min_s(finish-start)
+     ! Display total run time
+     CALL display_h_min_s(chrono_runt%ttot)
      ! Show last state transport values
      IF (my_id .EQ. 0) &
-      WRITE(*,"(A,G10.2,A8,G10.2,A)") 'Final transport values : | Gxi = ',gflux_ri,'| Qxi = ',hflux_xi,'|'
+      WRITE(*,"(A,G10.2,A8,G10.2,A)") 'Final transport values : | Gxi = ',pflux_x(1),'| Qxi = ',hflux_x(1),'|'
   END IF
   !! Specific diagnostic calls
   CALL diagnose_full(kstep)
   ! Terminal info
-  IF ((kstep .GT. 0) .AND. (MOD(cstep, INT(1.0/dt)) == 0) .AND. (my_id .EQ. 0)) THEN
-    WRITE(*,"(A,F6.0,A1,F6.0,A8,G10.2,A8,G10.2,A)")'|t/tmax = ', time,"/",tmax,'| Gxi = ',gflux_ri,'| Qxi = ',hflux_xi,'|'
+  IF ((kstep .GE. 0) .AND. (MOD(cstep, INT(1.0/dt)) == 0) .AND. (my_id .EQ. 0)) THEN
+    WRITE(*,"(A,F6.0,A1,F6.0,A8,G10.2,A8,G10.2,A)")'|t/tmax = ', time,"/",tmax,'| Gxi = ',pflux_x(1),'| Qxi = ',hflux_x(1),'|'
   ENDIF
-  CALL cpu_time(t1_diag); tc_diag = tc_diag + (t1_diag - t0_diag)
 END SUBROUTINE diagnose
 
 SUBROUTINE init_outfile(comm,file0,file,fid)
-  USE diagnostics_par, ONLY : write_doubleprecision, diag_par_outputinputs, input_fname
-  USE basic,           ONLY : my_id, jobnum, basic_outputinputs
-  USE grid,            ONLY : grid_outputinputs
-  USE geometry,        ONLY : geometry_outputinputs
-  USE model,           ONLY : model_outputinputs
-  USE collision,       ONLY : coll_outputinputs
-  USE initial_par,     ONLY : initial_outputinputs
-  USE time_integration,ONLY : time_integration_outputinputs
-  USE futils,          ONLY : creatf, creatg, creatd, attach, putfile
+  USE diagnostics_par, ONLY: write_doubleprecision, diag_par_outputinputs, input_fname
+  USE basic,           ONLY: speak, jobnum, basic_outputinputs
+  USE grid,            ONLY: grid_outputinputs
+  USE geometry,        ONLY: geometry_outputinputs
+  USE model,           ONLY: model_outputinputs
+  USE closure,         ONLY: closure_outputinputs
+  USE species,         ONLY: species_outputinputs
+  USE collision,       ONLY: coll_outputinputs
+  USE initial_par,     ONLY: initial_outputinputs
+  USE time_integration,ONLY: time_integration_outputinputs
+  USE futils,          ONLY: creatf, creatg, creatd, attach, putfile
   IMPLICIT NONE
   !input
   INTEGER,            INTENT(IN)    :: comm
@@ -49,7 +47,6 @@ SUBROUTINE init_outfile(comm,file0,file,fid)
   INTEGER,            INTENT(OUT)   :: fid
   CHARACTER(len=256)                :: str
   INCLUDE 'srcinfo.h'
-
   ! Writing output filename
   WRITE(file,'(a,a1,i2.2,a3)') TRIM(file0)   ,'_',jobnum,'.h5'
   !                      1.1   Initial run
@@ -59,31 +56,30 @@ SUBROUTINE init_outfile(comm,file0,file,fid)
   ELSE
     CALL creatf(file, fid, mpicomm=comm)
   END IF
-  IF (my_id .EQ. 0) WRITE(*,'(3x,a,a)')  TRIM(file), ' created'
+  CALL speak(TRIM(file)//' created')
   !  basic data group
   CALL creatg(fid, "/data", "data")
   !  File group
   CALL creatg(fid, "/files", "files")
   CALL attach(fid, "/files",  "jobnum", jobnum)
-
   ! Add the code info and parameters to the file
-  WRITE(str,'(a,i2.2)') "/data/input"
-  CALL creatd(fid, 0,(/0/),TRIM(str),'Input parameters')
-  CALL attach(fid, TRIM(str),     "version",  VERSION) !defined in srcinfo.h
-  CALL attach(fid, TRIM(str),      "branch",   BRANCH) !defined in srcinfo.h
-  CALL attach(fid, TRIM(str),      "author",   AUTHOR) !defined in srcinfo.h
-  CALL attach(fid, TRIM(str),    "execdate", EXECDATE) !defined in srcinfo.h
-  CALL attach(fid, TRIM(str),        "host",     HOST) !defined in srcinfo.h
-
-  CALL basic_outputinputs(fid,str)
-  CALL grid_outputinputs(fid, str)
-  CALL geometry_outputinputs(fid, str)
-  CALL diag_par_outputinputs(fid, str)
-  CALL model_outputinputs(fid, str)
-  CALL coll_outputinputs(fid, str)
-  CALL initial_outputinputs(fid, str)
-  CALL time_integration_outputinputs(fid, str)
-
+  CALL creatg(fid, "/data/input", "input")
+  CALL creatd(fid, 0,(/0/),"/data/input/codeinfo",'Code Information')
+  CALL attach(fid, "/data/input/codeinfo",  "version",  VERSION) !defined in srcinfo.h
+  CALL attach(fid, "/data/input/codeinfo",   "branch",   BRANCH) !defined in srcinfo.h
+  CALL attach(fid, "/data/input/codeinfo",   "author",   AUTHOR) !defined in srcinfo.h
+  CALL attach(fid, "/data/input/codeinfo", "execdate", EXECDATE) !defined in srcinfo.h
+  CALL attach(fid, "/data/input/codeinfo",     "host",     HOST) !defined in srcinfo.h
+  CALL            basic_outputinputs(fid)
+  CALL             grid_outputinputs(fid)
+  CALL         geometry_outputinputs(fid)
+  CALL         diag_par_outputinputs(fid)
+  CALL            model_outputinputs(fid)
+  CALL          closure_outputinputs(fid)
+  CALL          species_outputinputs(fid)
+  CALL             coll_outputinputs(fid)
+  CALL          initial_outputinputs(fid)
+  CALL time_integration_outputinputs(fid)
   !  Save STDIN (input file) of this run
   IF(jobnum .LE. 99) THEN
      WRITE(str,'(a,i2.2)') "/files/STDIN.",jobnum
@@ -94,38 +90,38 @@ SUBROUTINE init_outfile(comm,file0,file,fid)
 END SUBROUTINE init_outfile
 
 SUBROUTINE diagnose_full(kstep)
-  USE basic
-  USE grid
+  USE basic,           ONLY: speak,chrono_runt,&
+                             cstep,iframe0d,iframe3d,iframe5d,crashed
+  USE grid,            ONLY: &
+    local_nj,local_nky,local_nkx,local_nz,ngj,ngz,&
+    parray_full,pmax,jarray_full,jmax,&
+    kyarray_full,kxarray_full,zarray_full, total_na
   USE diagnostics_par
-  USE futils, ONLY: creatf, creatg, creatd, closef, putarr, putfile, attach, openf, putarrnd
+  USE futils,          ONLY: creatf, creatg, creatd, closef, putarr, putfile, attach, openf, putarrnd
+  USE species,         ONLY: name
   USE array
-  USE model
-  USE initial_par
-  USE fields
-  USE time_integration
-  USE parallel
-  USE prec_const
-  USE collision, ONLY: coll_outputinputs
-  USE geometry
+  USE model,           ONLY: EM
+  USE parallel,        ONLY: my_id, comm0
+  USE collision,       ONLY: coll_outputinputs
+  USE geometry,        ONLY: gxx,gxy,gyy,gxz,gyz,gzz,hatR,hatZ,hatB,dBdx,dBdy,dBdz,Jacobian,gradz_coeff,Ckxky
   IMPLICIT NONE
-
+  CHARACTER           :: letter_a
   INTEGER, INTENT(in) :: kstep
   INTEGER, parameter  :: BUFSIZE = 2
-  INTEGER :: rank = 0
+  INTEGER :: rank = 0, ierr, ia
   INTEGER :: dims(1) = (/0/)
   !____________________________________________________________________________
   !                   1.   Initial diagnostics
-
   IF ((kstep .EQ. 0)) THEN
     CALL init_outfile(comm0,   resfile0,resfile,fidres)
-
     ! Profiler time measurement
     CALL creatg(fidres, "/profiler", "performance analysis")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_rhs",        "cumulative rhs computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_poisson",    "cumulative poisson computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_Sapj",       "cumulative Sapj computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_coll",       "cumulative collision computation time")
-    CALL creatd(fidres, 0, dims, "/profiler/Tc_process",    "cumulative process computation time")
+    CALL creatd(fidres, 0, dims, "/profiler/Tc_grad",       "cumulative grad computation time")
+    CALL creatd(fidres, 0, dims, "/profiler/Tc_nadiab",     "cumulative nadiab moments computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_adv_field",  "cumulative adv. fields computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_ghost",       "cumulative communication time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_clos",       "cumulative closure computation time")
@@ -133,505 +129,382 @@ SUBROUTINE diagnose_full(kstep)
     CALL creatd(fidres, 0, dims, "/profiler/Tc_diag",       "cumulative sym computation time")
     CALL creatd(fidres, 0, dims, "/profiler/Tc_step",       "cumulative total step computation time")
     CALL creatd(fidres, 0, dims, "/profiler/time",          "current simulation time")
-
     ! Grid info
     CALL creatg(fidres, "/data/grid", "Grid data")
     CALL putarr(fidres, "/data/grid/coordkx",   kxarray_full,  "kx*rho_s0", ionode=0)
     CALL putarr(fidres, "/data/grid/coordky",   kyarray_full,  "ky*rho_s0", ionode=0)
     CALL putarr(fidres, "/data/grid/coordz",    zarray_full,   "z/R", ionode=0)
-    CALL putarr(fidres, "/data/grid/coordp_e" , parray_e_full, "p_e", ionode=0)
-    CALL putarr(fidres, "/data/grid/coordj_e" , jarray_e_full, "j_e", ionode=0)
-    CALL putarr(fidres, "/data/grid/coordp_i" , parray_i_full, "p_i", ionode=0)
-    CALL putarr(fidres, "/data/grid/coordj_i" , jarray_i_full, "j_i", ionode=0)
-
+    CALL putarr(fidres, "/data/grid/coordp" ,   parray_full,   "p", ionode=0)
+    CALL putarr(fidres, "/data/grid/coordj" ,   jarray_full,   "j", ionode=0)
     ! Metric info
     CALL   creatg(fidres, "/data/metric", "Metric data")
-    CALL putarrnd(fidres, "/data/metric/gxx",            gxx(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gxy",            gxy(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gxz",            gxz(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gyy",            gyy(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gyz",            gyz(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gzz",            gzz(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/hatR",          hatR(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/hatZ",          hatZ(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/hatB",          hatB(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/dBdx",      dBdx(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/dBdy",      dBdy(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/dBdz",      dBdz(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff(izs:ize,0:1), (/1, 1, 1/))
-    CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(ikys:ikye,ikxs:ikxe,izs:ize,0:1), (/1, 1, 3/))
-    CALL putarrnd(fidres, "/data/metric/kernel_i",    kernel_i(ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,0:1), (/ 1, 2, 4/))
-
+    CALL putarrnd(fidres, "/data/metric/gxx",            gxx((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gxy",            gxy((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gxz",            gxz((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gyy",            gyy((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gyz",            gyz((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gzz",            gzz((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/hatR",          hatR((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/hatZ",          hatZ((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/hatB",          hatB((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/dBdx",      dBdx((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/dBdy",      dBdy((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/dBdz",      dBdz((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/Jacobian",    Jacobian((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/gradz_coeff", gradz_coeff((1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 1/))
+    CALL putarrnd(fidres, "/data/metric/Ckxky",       Ckxky(1:local_nky,1:local_nkx,(1+ngz/2):(local_nz+ngz/2),:), (/1, 1, 3/))
+    CALL putarrnd(fidres, "/data/metric/kernel",    kernel(1,(1+ngj/2):(local_nj+ngj/2),1:local_nky,1:local_nkx,(1+ngz/2):(local_nz+ngz/2),1), (/1, 2, 4/))
     !  var0d group (gyro transport)
     IF (nsave_0d .GT. 0) THEN
      CALL creatg(fidres, "/data/var0d", "0d profiles")
      CALL creatd(fidres, rank, dims,  "/data/var0d/time",     "Time t*c_s/R")
      CALL creatd(fidres, rank, dims, "/data/var0d/cstep", "iteration number")
-
      IF (write_gamma) THEN
-       CALL creatd(fidres, rank, dims, "/data/var0d/gflux_ri", "Radial gyro ion transport")
-       CALL creatd(fidres, rank, dims, "/data/var0d/pflux_ri", "Radial part ion transport")
-       IF(KIN_E) THEN
-       CALL creatd(fidres, rank, dims, "/data/var0d/gflux_re", "Radial gyro electron transport")
-       CALL creatd(fidres, rank, dims, "/data/var0d/pflux_re", "Radial part electron transport")
-       ENDIF
+       DO ia=1,total_na
+         letter_a = name(ia)(1:1)
+         CALL creatd(fidres, rank, dims, "/data/var0d/gflux_x"//letter_a, "Radial gyro transport")
+         CALL creatd(fidres, rank, dims, "/data/var0d/pflux_x"//letter_a, "Radial part transport")
+      ENDDO
      ENDIF
      IF (write_hf) THEN
-       CALL creatd(fidres, rank, dims, "/data/var0d/hflux_xi", "Radial part ion heat flux")
-       IF(KIN_E) THEN
-       CALL creatd(fidres, rank, dims, "/data/var0d/hflux_xe", "Radial part electron heat flux")
-       ENDIF
+       DO ia=1,total_na
+         letter_a = name(ia)(1:1)
+         CALL creatd(fidres, rank, dims, "/data/var0d/hflux_x"//letter_a, "Radial part heat flux")
+      ENDDO
      ENDIF
      IF (cstep==0) THEN
        iframe0d=0
      ENDIF
      CALL attach(fidres,"/data/var0d/" , "frames", iframe0d)
     END IF
-
-
-    !  var2d group (??)
-    IF (nsave_2d .GT. 0) THEN
-     CALL creatg(fidres, "/data/var2d", "2d profiles")
-     CALL creatd(fidres, rank, dims,  "/data/var2d/time",     "Time t*c_s/R")
-     CALL creatd(fidres, rank, dims, "/data/var2d/cstep", "iteration number")
-     IF (cstep==0) THEN
-       iframe2d=0
-     ENDIF
-     CALL attach(fidres,"/data/var2d/" , "frames", iframe2d)
-    END IF
-
-    !  var3d group (electro. pot., Ni00 moment)
+    !  var3d group (phi,psi, fluid moments, Ni00, Napjz)
     IF (nsave_3d .GT. 0) THEN
      CALL creatg(fidres, "/data/var3d", "3d profiles")
      CALL creatd(fidres, rank, dims,  "/data/var3d/time",     "Time t*c_s/R")
      CALL creatd(fidres, rank, dims, "/data/var3d/cstep", "iteration number")
-
      IF (write_phi) CALL creatg(fidres, "/data/var3d/phi", "phi")
-     IF (write_phi) CALL creatg(fidres, "/data/var3d/psi", "psi")
-
-     IF (write_Na00) THEN
-      IF(KIN_E)&
-      CALL creatg(fidres, "/data/var3d/Ne00", "Ne00")
-      CALL creatg(fidres, "/data/var3d/Ni00", "Ni00")
-      IF(KIN_E)&
-      CALL creatg(fidres, "/data/var3d/Nepjz", "Nepjz")
-      CALL creatg(fidres, "/data/var3d/Nipjz", "Nipjz")
-     ENDIF
-
-     IF (write_dens) THEN
-       IF(KIN_E)&
-      CALL creatg(fidres, "/data/var3d/dens_e", "dens_e")
-      CALL creatg(fidres, "/data/var3d/dens_i", "dens_i")
-     ENDIF
-
-     IF (write_fvel) THEN
-       IF(KIN_E) THEN
-       CALL creatg(fidres, "/data/var3d/upar_e", "upar_e")
-       CALL creatg(fidres, "/data/var3d/uper_e", "uper_e")
+     IF (write_phi.AND.EM) CALL creatg(fidres, "/data/var3d/psi", "psi")
+     ! Loop to create species related data
+     DO ia=1,total_na
+       letter_a = name(ia)(1:1)
+       IF (write_Na00) THEN
+        CALL creatg(fidres, "/data/var3d/N"//letter_a//"00", "gyroceneter density "//letter_a)
+        CALL creatg(fidres, "/data/var3d/N"//letter_a//"pjz", "pj(z) moment spectrum "//letter_a)
        ENDIF
-       CALL creatg(fidres, "/data/var3d/upar_i", "upar_i")
-       CALL creatg(fidres, "/data/var3d/uper_i", "uper_i")
-     ENDIF
-
-     IF (write_temp) THEN
-       IF(KIN_E) THEN
-       CALL creatg(fidres, "/data/var3d/Tper_e", "Tper_e")
-       CALL creatg(fidres, "/data/var3d/Tpar_e", "Tpar_e")
-       CALL creatg(fidres, "/data/var3d/temp_e", "temp_e")
+       IF (write_dens) THEN
+        CALL creatg(fidres, "/data/var3d/dens_"//letter_a, "density "//letter_a)
        ENDIF
-       CALL creatg(fidres, "/data/var3d/Tper_i", "Tper_i")
-       CALL creatg(fidres, "/data/var3d/Tpar_i", "Tpar_i")
-       CALL creatg(fidres, "/data/var3d/temp_i", "temp_i")
-     ENDIF
-
+       IF (write_fvel) THEN
+         CALL creatg(fidres, "/data/var3d/upar_"//letter_a, "parallel fluid velocity "//letter_a)
+         CALL creatg(fidres, "/data/var3d/uper_"//letter_a, "perpendicular fluid velocity "//letter_a)
+       ENDIF
+       IF (write_temp) THEN
+         CALL creatg(fidres, "/data/var3d/Tper_"//letter_a, "perpendicular temperature "//letter_a)
+         CALL creatg(fidres, "/data/var3d/Tpar_"//letter_a, "parallel temperature "//letter_a)
+         CALL creatg(fidres, "/data/var3d/temp_"//letter_a, "tiotal temperature "//letter_a)
+       ENDIF
+    ENDDO
      IF (cstep==0) THEN
        iframe3d=0
      ENDIF
      CALL attach(fidres,"/data/var3d/" , "frames", iframe3d)
     END IF
-
     !  var5d group (moments)
     IF (nsave_5d .GT. 0) THEN
       CALL creatg(fidres, "/data/var5d", "5d profiles")
       CALL creatd(fidres, rank, dims,  "/data/var5d/time",     "Time t*c_s/R")
       CALL creatd(fidres, rank, dims, "/data/var5d/cstep", "iteration number")
-
       IF (write_Napj) THEN
-        IF(KIN_E)&
-       CALL creatg(fidres, "/data/var5d/moments_e", "moments_e")
-       CALL creatg(fidres, "/data/var5d/moments_i", "moments_i")
-      ENDIF
-
-      IF (write_Sapj) THEN
-        IF(KIN_E)&
-        CALL creatg(fidres, "/data/var5d/Sepj", "Sepj")
-        CALL creatg(fidres, "/data/var5d/Sipj", "Sipj")
+       CALL creatg(fidres, "/data/var5d/moments", "full moments array")
       ENDIF
-
       IF (cstep==0) THEN
        iframe5d=0
       END IF
       CALL attach(fidres,"/data/var5d/" , "frames", iframe5d)
     END IF
   ENDIF
-
   !_____________________________________________________________________________
   !                   2.   Periodic diagnostics
   !
   IF (kstep .GE. 0) THEN
-
-     !                       2.1   0d history arrays
-     IF (nsave_0d .GT. 0) THEN
-        IF ( MOD(cstep, nsave_0d) == 0 ) THEN
-           CALL diagnose_0d
-        END IF
-     END IF
-
-     !                       2.2   1d profiles
-     ! empty in our case
-
-     !                       2.3   2d profiles
-     ! empty in our case
-
-
-     !                       2.3   3d profiles
-     IF (nsave_3d .GT. 0) THEN
-        IF (MOD(cstep, nsave_3d) == 0) THEN
-          CALL diagnose_3d
-          ! Looks at the folder if the file check_phi exists and spits a snapshot
-          ! of the current electrostatic potential in a basic text file
-          CALL spit_snapshot_check
-        ENDIF
-     ENDIF
-
-     !                       2.4   5d profiles
-     IF (nsave_5d .GT. 0 .AND. cstep .GT. 0) THEN
-        IF (MOD(cstep, nsave_5d) == 0) THEN
-           CALL diagnose_5d
-        END IF
-     END IF
-
+    !                       2.1   0d history arrays
+    IF (nsave_0d .GT. 0) THEN
+      IF ( MOD(cstep, nsave_0d) == 0 ) THEN
+        CALL diagnose_0d
+      END IF
+    END IF
+    !                       2.3   3d profiles
+    IF (nsave_3d .GT. 0) THEN
+      IF (MOD(cstep, nsave_3d) == 0) THEN
+        CALL diagnose_3d
+        ! Looks at the folder if the file check_phi exists and spits a snapshot
+        ! of the current electrostatic potential in a basic text file
+        CALL spit_snapshot_check
+      ENDIF
+    ENDIF
+    !                       2.4   5d profiles
+    IF (nsave_5d .GT. 0) THEN
+      IF (MOD(cstep, nsave_5d) == 0) THEN
+        CALL diagnose_5d
+      END IF
+    END IF
   !_____________________________________________________________________________
   !                   3.   Final diagnostics
-
   ELSEIF (kstep .EQ. -1) THEN
-     CALL attach(fidres, "/data/input","cpu_time",finish-start)
-
-     ! make a checkpoint at last timestep if not crashed
-     IF(.NOT. crashed) THEN
-       IF(my_id .EQ. 0) write(*,*) 'Saving last state'
-       IF (nsave_5d .GT. 0) &
-       CALL diagnose_5d
-     ENDIF
-
-     !   Close all diagnostic files
-     CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-     CALL closef(fidres)
-
+    CALL attach(fidres, "/data/input","cpu_time",chrono_runt%ttot)
+    ! make a checkpoint at last timestep if not crashed
+    IF(.NOT. crashed) THEN
+      IF(my_id .EQ. 0) write(*,*) 'Saving last state'
+      IF (nsave_5d .GT. 0) CALL diagnose_5d
+    ENDIF
+    !   Close all diagnostic files
+    CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+    CALL closef(fidres)
   END IF
 END SUBROUTINE diagnose_full
 
 !!-------------- Auxiliary routines -----------------!!
 SUBROUTINE diagnose_0d
-
   USE basic
   USE futils, ONLY: append, attach, getatt
   USE diagnostics_par
   USE prec_const
   USE processing
-  USE model, ONLY: KIN_E
-
+  USE model,   ONLY: Na
+  USE species, ONLY: name
   IMPLICIT NONE
+  CHARACTER :: letter_a
+  INTEGER   :: ia
   ! Time measurement data
-  CALL append(fidres, "/profiler/Tc_rhs",              tc_rhs,ionode=0)
-  CALL append(fidres, "/profiler/Tc_adv_field",  tc_adv_field,ionode=0)
-  CALL append(fidres, "/profiler/Tc_clos",            tc_clos,ionode=0)
-  CALL append(fidres, "/profiler/Tc_ghost",          tc_ghost,ionode=0)
-  CALL append(fidres, "/profiler/Tc_coll",            tc_coll,ionode=0)
-  CALL append(fidres, "/profiler/Tc_poisson",      tc_poisson,ionode=0)
-  CALL append(fidres, "/profiler/Tc_Sapj",            tc_Sapj,ionode=0)
-  CALL append(fidres, "/profiler/Tc_checkfield",tc_checkfield,ionode=0)
-  CALL append(fidres, "/profiler/Tc_diag",            tc_diag,ionode=0)
-  CALL append(fidres, "/profiler/Tc_process",      tc_process,ionode=0)
-  CALL append(fidres, "/profiler/Tc_step",            tc_step,ionode=0)
-  CALL append(fidres, "/profiler/time",                  time,ionode=0)
+  CALL append(fidres, "/profiler/Tc_rhs",       REAL(chrono_mrhs%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_adv_field", REAL(chrono_advf%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_clos",      REAL(chrono_clos%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_ghost",     REAL(chrono_ghst%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_coll",      REAL(chrono_coll%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_poisson",   REAL(chrono_pois%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_Sapj",      REAL(chrono_sapj%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_checkfield",REAL(chrono_chck%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_diag",      REAL(chrono_diag%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_grad",      REAL(chrono_grad%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_nadiab",    REAL(chrono_napj%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/Tc_step",      REAL(chrono_step%ttot,dp),ionode=0)
+  CALL append(fidres, "/profiler/time",                REAL(time,dp),ionode=0)
   ! Processing data
-  CALL append(fidres,  "/data/var0d/time",           time,ionode=0)
+  CALL append(fidres,  "/data/var0d/time",      REAL(time,dp),ionode=0)
   CALL append(fidres, "/data/var0d/cstep", real(cstep,dp),ionode=0)
-  CALL getatt(fidres,      "/data/var0d/",       "frames",iframe2d)
+  CALL getatt(fidres,      "/data/var0d/",       "frames",iframe0d)
   iframe0d=iframe0d+1
   CALL attach(fidres,"/data/var0d/" , "frames", iframe0d)
   ! Ion transport data
   IF (write_gamma) THEN
-    CALL compute_radial_ion_transport
-    CALL append(fidres, "/data/var0d/gflux_ri",gflux_ri,ionode=0)
-    CALL append(fidres, "/data/var0d/pflux_ri",pflux_ri,ionode=0)
-    IF(KIN_E) THEN
-    CALL compute_radial_electron_transport
-    CALL append(fidres, "/data/var0d/gflux_re",gflux_re,ionode=0)
-    CALL append(fidres, "/data/var0d/pflux_re",pflux_re,ionode=0)
-    ENDIF
+    CALL compute_radial_transport
+    DO ia=1,Na
+      letter_a = name(ia)(1:1)
+      CALL append(fidres, "/data/var0d/gflux_x"//letter_a,REAL(gflux_x(ia),dp),ionode=0)
+      CALL append(fidres, "/data/var0d/pflux_x"//letter_a,REAL(pflux_x(ia),dp),ionode=0)
+    ENDDO
   ENDIF
   IF (write_hf) THEN
-    CALL compute_radial_ion_heatflux
-    CALL append(fidres, "/data/var0d/hflux_xi",hflux_xi,ionode=0)
-    IF(KIN_E) THEN
-    CALL compute_radial_electron_heatflux
-    CALL append(fidres, "/data/var0d/hflux_xe",hflux_xe,ionode=0)
-    ENDIF
+    CALL compute_radial_heatflux
+    DO ia=1,Na
+      letter_a = name(ia)(1:1)
+      CALL append(fidres, "/data/var0d/hflux_x"//letter_a,REAL(hflux_x(ia),dp),ionode=0)
+    ENDDO
   ENDIF
 END SUBROUTINE diagnose_0d
 
 SUBROUTINE diagnose_3d
   USE basic
   USE futils, ONLY: append, getatt, attach, putarrnd, putarr
-  USE fields
-  USE array
-  USE grid, ONLY: ikxs,ikxe, ikys,ikye, Nkx, Nky, local_nkx, ikx, iky, ips_e, ips_i
-  USE time_integration
+  USE fields, ONLY: phi, psi, moments
+  USE array,  ONLY: Napjz,dens,upar,uper,Tpar,Tper,temp
+  USE grid, ONLY: CONTAINSp0, ip0,ij0, local_na,&
+                  total_np, total_nj, total_nky, total_nkx, total_nz, &
+                  local_np, local_nj, local_nky, local_nkx, local_nz, &
+                  ngz
+  USE time_integration, ONLY: updatetlevel
   USE diagnostics_par
   USE prec_const
-  USE processing
-  USE model, ONLY: KIN_E
+  USE processing, ONLY: compute_fluid_moments, compute_Napjz_spectrum
+  USE model,      ONLY: EM
+  USE species,    ONLY: name
+  USE parallel,   ONLY: manual_3D_bcast
   IMPLICIT NONE
-
-  CALL append(fidres,  "/data/var3d/time",           time,ionode=0)
+  CHARACTER :: letter_a
+  INTEGER   :: ia
+  COMPLEX(xp), DIMENSION(local_nky,local_nkx,local_nz) :: Na00_
+  COMPLEX(xp), DIMENSION(local_nky,local_nkx,local_nz) :: fmom
+  COMPLEX(xp), DIMENSION(local_np, local_nj, local_nz) :: Napjz_
+  ! add current time, cstep and frame
+  CALL append(fidres,  "/data/var3d/time",           REAL(time,dp),ionode=0)
   CALL append(fidres, "/data/var3d/cstep", real(cstep,dp),ionode=0)
   CALL getatt(fidres,      "/data/var3d/",       "frames",iframe3d)
   iframe3d=iframe3d+1
   CALL attach(fidres,"/data/var3d/" , "frames", iframe3d)
-
-  IF (write_phi) CALL write_field3d_kykxz(phi (ikys:ikye,ikxs:ikxe,izs:ize), 'phi')
-  IF (write_phi) CALL write_field3d_kykxz(psi (ikys:ikye,ikxs:ikxe,izs:ize), 'psi')
-
+  ! Write current EM fields
+  IF (write_phi)        CALL write_field3d_kykxz(phi (:,:,(1+ngz/2):(local_nz+ngz/2)), 'phi')
+  IF (write_phi.AND.EM) CALL write_field3d_kykxz(psi (:,:,(1+ngz/2):(local_nz+ngz/2)), 'psi')
   IF (write_Na00) THEN
-    IF(KIN_E)THEN
-    IF (CONTAINS_ip0_e) &
-      Ne00(ikys:ikye,ikxs:ikxe,izs:ize) = moments_e(ip0_e,ij0_e,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
-    CALL write_field3d_kykxz(Ne00(ikys:ikye,ikxs:ikxe,izs:ize), 'Ne00')
-    ENDIF
-    IF (CONTAINS_ip0_i) &
-      Ni00(ikys:ikye,ikxs:ikxe,izs:ize) = moments_i(ip0_i,ij0_i,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
-    CALL write_field3d_kykxz(Ni00(ikys:ikye,ikxs:ikxe,izs:ize), 'Ni00')
-
     CALL compute_Napjz_spectrum
-    IF(KIN_E) &
-    CALL write_field3d_pjz_e(Nepjz(ips_e:ipe_e,ijs_e:ije_e,izs:ize), 'Nepjz')
-    CALL write_field3d_pjz_i(Nipjz(ips_i:ipe_i,ijs_i:ije_i,izs:ize), 'Nipjz')
+    DO ia=1,local_na
+      letter_a = name(ia)(1:1)
+      IF (CONTAINSp0) THEN
+        ! gyrocenter density
+        Na00_    = moments(ia,ip0,ij0,:,:,(1+ngz/2):(local_nz+ngz/2),updatetlevel)
+      ELSE
+        Na00_    = 0._xp
+      ENDIF
+      CALL write_field3d_kykxz(Na00_, 'N'//letter_a//'00')
+     ! <<Napj>x>y spectrum
+      Napjz_ = Napjz(ia,:,:,:)
+      CALL write_field3d_pjz(Napjz_, 'N'//letter_a//'pjz')
+    ENDDO
   ENDIF
-
   !! Fuid moments
   IF (write_dens .OR. write_fvel .OR. write_temp) &
   CALL compute_fluid_moments
 
-  IF (write_dens) THEN
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(dens_e(ikys:ikye,ikxs:ikxe,izs:ize), 'dens_e')
-    CALL write_field3d_kykxz(dens_i(ikys:ikye,ikxs:ikxe,izs:ize), 'dens_i')
-  ENDIF
-
-  IF (write_fvel) THEN
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(upar_e(ikys:ikye,ikxs:ikxe,izs:ize), 'upar_e')
-    CALL write_field3d_kykxz(upar_i(ikys:ikye,ikxs:ikxe,izs:ize), 'upar_i')
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(uper_e(ikys:ikye,ikxs:ikxe,izs:ize), 'uper_e')
-    CALL write_field3d_kykxz(uper_i(ikys:ikye,ikxs:ikxe,izs:ize), 'uper_i')
-  ENDIF
-
-  IF (write_temp) THEN
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(Tpar_e(ikys:ikye,ikxs:ikxe,izs:ize), 'Tpar_e')
-    CALL write_field3d_kykxz(Tpar_i(ikys:ikye,ikxs:ikxe,izs:ize), 'Tpar_i')
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(Tper_e(ikys:ikye,ikxs:ikxe,izs:ize), 'Tper_e')
-    CALL write_field3d_kykxz(Tper_i(ikys:ikye,ikxs:ikxe,izs:ize), 'Tper_i')
-    IF(KIN_E)&
-    CALL write_field3d_kykxz(temp_e(ikys:ikye,ikxs:ikxe,izs:ize), 'temp_e')
-    CALL write_field3d_kykxz(temp_i(ikys:ikye,ikxs:ikxe,izs:ize), 'temp_i')
-  ENDIF
-
-  CONTAINS
-
-  SUBROUTINE write_field3d_kykxz(field, text)
-    USE parallel, ONLY : gather_xyz
-    IMPLICIT NONE
-    COMPLEX(dp), DIMENSION(ikys:ikye,ikxs:ikxe, izs:ize), INTENT(IN) :: field
-    CHARACTER(*), INTENT(IN) :: text
-    COMPLEX(dp), DIMENSION(1:Nky,1:Nkx,1:Nz) :: field_full
-    CHARACTER(256) :: dset_name
-    field_full = 0;
-    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
-
-    IF (num_procs .EQ. 1) THEN ! no data distribution
-      CALL putarr(fidres, dset_name, field(ikys:ikye,ikxs:ikxe, izs:ize), ionode=0)
-
-    ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
-      CALL gather_xyz(field(ikys:ikye,1:Nkx,izs:ize),field_full(1:Nky,1:Nkx,1:Nz))
-      CALL putarr(fidres, dset_name, field_full(1:Nky,1:Nkx,1:Nz), ionode=0)
-    ELSE ! output using putarrnd (very slow on marconi)
-      CALL putarrnd(fidres, dset_name, field(ikys:ikye,ikxs:ikxe, izs:ize),  (/1, 1, 3/))
+  DO ia=1,local_na
+    letter_a = name(ia)(1:1)
+    IF (write_dens) THEN
+      fmom = dens(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'dens_'//letter_a)
     ENDIF
-    CALL attach(fidres, dset_name, "time", time)
-  END SUBROUTINE write_field3d_kykxz
 
-  SUBROUTINE write_field3d_pjz_i(field, text)
-    USE parallel, ONLY : gather_pjz_i
-    IMPLICIT NONE
-    REAL(dp), DIMENSION(ips_i:ipe_i,ijs_i:ije_i,izs:ize), INTENT(IN) :: field
-    REAL(dp), DIMENSION(1:Np_i,1:Nj_i,1:Nz) :: field_full
-    CHARACTER(*), INTENT(IN) :: text
-    CHARACTER(LEN=50) :: dset_name
-    field_full = 0;
-    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
-    IF (num_procs .EQ. 1) THEN ! no data distribution
-      CALL putarr(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,izs:ize), ionode=0)
-    ELSE
-      CALL gather_pjz_i(field(ips_i:ipe_i,ijs_i:ije_i,izs:ize),field_full(1:Np_i,1:Nj_i,1:Nz))
-      CALL putarr(fidres, dset_name, field(1:Np_i,1:Nj_i,1:Nz), ionode=0)
+    IF (write_fvel) THEN
+      fmom = upar(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'upar_'//letter_a)
+      fmom = uper(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'uper_'//letter_a)
     ENDIF
-    CALL attach(fidres, dset_name, "time", time)
-  END SUBROUTINE write_field3d_pjz_i
 
-  SUBROUTINE write_field3d_pjz_e(field, text)
-    USE parallel, ONLY : gather_pjz_e
-    IMPLICIT NONE
-    REAL(dp), DIMENSION(ips_e:ipe_e,ijs_e:ije_e,izs:ize), INTENT(IN) :: field
-    REAL(dp), DIMENSION(1:pmaxe+1,1:jmaxe+1,1:Nz) :: field_full
-    CHARACTER(*), INTENT(IN) :: text
-    CHARACTER(LEN=50) :: dset_name
-    field_full = 0;
-    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
-    IF (num_procs .EQ. 1) THEN ! no data distribution
-      CALL putarr(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,izs:ize), ionode=0)
-    ELSE
-      CALL gather_pjz_e(field(ips_e:ipe_e,ijs_e:ije_e,izs:ize),field_full(1:pmaxe+1,1:jmaxe+1,1:Nz))
-      CALL putarr(fidres, dset_name, field(1:Np_i,1:Nj_i,1:Nz), ionode=0)
+    IF (write_temp) THEN
+      fmom = Tpar(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'Tpar_'//letter_a)
+      fmom = Tper(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'Tper_'//letter_a)
+      fmom = temp(ia,:,:,:)
+      CALL write_field3d_kykxz(fmom, 'temp_'//letter_a)
     ENDIF
-    CALL attach(fidres, dset_name, "time", time)
-  END SUBROUTINE write_field3d_pjz_e
+  ENDDO
+  CONTAINS
+    SUBROUTINE write_field3d_kykxz(field, text)
+      USE parallel, ONLY : gather_xyz, num_procs
+      IMPLICIT NONE
+      COMPLEX(xp), DIMENSION(local_nky,total_nkx,local_nz), INTENT(IN) :: field
+      CHARACTER(*), INTENT(IN) :: text
+      COMPLEX(xp), DIMENSION(total_nky,total_nkx,total_nz) :: field_full
+      CHARACTER(50) :: dset_name
+      field_full = 0;
+      WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
+      IF (num_procs .EQ. 1) THEN ! no data distribution
+        CALL putarr(fidres, dset_name, field, ionode=0)
+      ELSE
+        CALL gather_xyz(field,field_full,local_nky,total_nky,total_nkx,local_nz,total_nz)
+        CALL putarr(fidres, dset_name, field_full, ionode=0)
+      ENDIF
+        ! CALL attach(fidres, dset_name, "time", time)
+      END SUBROUTINE write_field3d_kykxz
+
+    SUBROUTINE write_field3d_pjz(field, text)
+      USE parallel, ONLY : gather_pjz, num_procs
+      IMPLICIT NONE
+      COMPLEX(xp), DIMENSION(local_np,local_nj,local_nz), INTENT(IN) :: field
+      CHARACTER(*), INTENT(IN) :: text
+      COMPLEX(xp), DIMENSION(total_np,total_nj,total_nz) :: field_full
+      CHARACTER(LEN=50) :: dset_name
+      field_full = 0;
+      WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var3d", TRIM(text), iframe3d
+      IF (num_procs .EQ. 1) THEN ! no data distribution
+        CALL putarr(fidres, dset_name, field, ionode=0)
+      ELSE
+        CALL gather_pjz(field,field_full,local_np,total_np,total_nj,local_nz,total_nz)
+        CALL putarr(fidres, dset_name, field_full, ionode=0)
+      ENDIF
+      CALL attach(fidres, dset_name, "time", time)
+    END SUBROUTINE write_field3d_pjz
 
 END SUBROUTINE diagnose_3d
 
 SUBROUTINE diagnose_5d
 
-  USE basic
+  USE basic,  ONLY: time, iframe5d,cstep
   USE futils, ONLY: append, getatt, attach, putarrnd, putarr
-  USE fields
-  USE array!, ONLY: Sepj, Sipj
-  USE grid, ONLY: ips_e,ipe_e, ips_i, ipe_i, &
-                 ijs_e,ije_e, ijs_i, ije_i, &
-                 Np_i, Nj_i, Np_e, Nj_e, Nky, Nkx, Nz, &
-                 ikxs,ikxe,ikys,ikye,izs,ize
-  USE time_integration
+  USE fields, ONLY: moments
+  USE grid,   ONLY:total_np, total_nj, total_nky, total_nkx, total_nz, &
+                   local_np, local_nj, local_nky, local_nkx, local_nz, &
+                   ngp, ngj, ngz, total_na
+  USE time_integration, ONLY: updatetlevel, ntimelevel
   USE diagnostics_par
-  USE prec_const
-  USE model, ONLY: KIN_E
+  USE prec_const, ONLY: xp,dp
   IMPLICIT NONE
 
-  CALL append(fidres,  "/data/var5d/time",           time,ionode=0)
-  CALL append(fidres, "/data/var5d/cstep", real(cstep,dp),ionode=0)
+  CALL append(fidres,  "/data/var5d/time",  REAL(time,dp),ionode=0)
+  CALL append(fidres, "/data/var5d/cstep", REAL(cstep,dp),ionode=0)
   CALL getatt(fidres,      "/data/var5d/",       "frames",iframe5d)
   iframe5d=iframe5d+1
   CALL attach(fidres,"/data/var5d/" , "frames", iframe5d)
 
   IF (write_Napj) THEN
-   IF(KIN_E)&
-  CALL write_field5d_e(moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel), 'moments_e')
-  CALL write_field5d_i(moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel), 'moments_i')
-  ENDIF
-
-  IF (write_Sapj) THEN
-   IF(KIN_E)&
-   CALL write_field5d_e(Sepj(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), 'Sepj')
-   CALL write_field5d_i(Sipj(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), 'Sipj')
+  CALL write_field5d(moments, 'moments')
   ENDIF
 
   CONTAINS
 
-  SUBROUTINE write_field5d_e(field, text)
-    USE futils, ONLY: attach, putarr, putarrnd
-    USE parallel, ONLY: gather_pjxyz_e
-    USE grid,   ONLY: ips_e,ipe_e, ijs_e,ije_e, ikxs,ikxe, ikys,ikye, izs,ize
-    USE prec_const
+  SUBROUTINE write_field5d(field, text)
+    USE basic,    ONLY: GATHERV_OUTPUT, jobnum, dt
+    USE futils,   ONLY: attach, putarr, putarrnd
+    USE parallel, ONLY: gather_pjxyz, num_procs
+    USE prec_const, ONLY: xp
     IMPLICIT NONE
-    COMPLEX(dp), DIMENSION(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), INTENT(IN) :: field
+    COMPLEX(xp), DIMENSION(total_na,local_np+ngp,local_nj+ngj,local_nky,local_nkx,local_nz+ngz,ntimelevel), INTENT(IN) :: field
     CHARACTER(*), INTENT(IN) :: text
-    COMPLEX(dp), DIMENSION(1:Np_e,1:Nj_e,1:Nky,1:Nkx,1:Nz) :: field_full
+    COMPLEX(xp), DIMENSION(total_na,local_np,local_nj,local_nky,local_nkx,local_nz) :: field_sub
+    COMPLEX(xp), DIMENSION(total_na,total_np,total_nj,total_nky,total_nkx,total_nz) :: field_full
     CHARACTER(LEN=50) :: dset_name
+    field_sub  = field(1:total_na,(1+ngp/2):(local_np+ngp/2),((1+ngj/2)):((local_nj+ngj/2)),&
+                          1:local_nky,1:local_nkx,  ((1+ngz/2)):((local_nz+ngz/2)),updatetlevel)
     field_full = 0;
     WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
     IF (num_procs .EQ. 1) THEN
-     CALL putarr(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize), ionode=0)
-   ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
-     CALL gather_pjxyz_e(field(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize),&
-                         field_full(1:Np_e,1:Nj_e,1:Nky,1:Nkx,1:Nz))
-     CALL putarr(fidres, dset_name, field_full(1:Np_i,1:Nj_i,1:Nky,1:Nkx,1:Nz), ionode=0)
-   ELSE
-     CALL putarrnd(fidres, dset_name, field(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize),  (/1,3,5/))
-    ENDIF
-    CALL attach(fidres, dset_name, 'cstep', cstep)
-    CALL attach(fidres, dset_name, 'time', time)
-    CALL attach(fidres, dset_name, 'jobnum', jobnum)
-    CALL attach(fidres, dset_name, 'dt', dt)
-    CALL attach(fidres, dset_name, 'iframe2d', iframe2d)
-    CALL attach(fidres, dset_name, 'iframe5d', iframe5d)
-
-  END SUBROUTINE write_field5d_e
-
-  SUBROUTINE write_field5d_i(field, text)
-    USE futils, ONLY: attach, putarr, putarrnd
-    USE parallel, ONLY: gather_pjxyz_i
-    USE grid, ONLY: ips_i,ipe_i, ijs_i,ije_i, ikxs,ikxe, ikys,ikye, izs,ize
-    USE prec_const
-    IMPLICIT NONE
-    COMPLEX(dp), DIMENSION(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), INTENT(IN) :: field
-    CHARACTER(*), INTENT(IN) :: text
-    COMPLEX(dp), DIMENSION(1:Np_i,1:Nj_i,1:Nky,1:Nkx,1:Nz) :: field_full
-    CHARACTER(LEN=50) :: dset_name
-    field_full = 0;
-    WRITE(dset_name, "(A, '/', A, '/', i6.6)") "/data/var5d", TRIM(text), iframe5d
-    IF (num_procs .EQ. 1) THEN
-      CALL putarr(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize), ionode=0)
+      CALL putarr(fidres, dset_name, field_sub, ionode=0)
     ELSEIF(GATHERV_OUTPUT) THEN ! output using one node (gatherv)
-      CALL gather_pjxyz_i(field(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize),&
-                        field_full(1:Np_i,1:Nj_i,1:Nky,1:Nkx,1:Nz))
-      CALL putarr(fidres, dset_name, field_full(1:Np_i,1:Nj_i,1:Nky,1:Nkx,1:Nz), ionode=0)
+      CALL gather_pjxyz(field_sub,field_full,total_na,local_np,total_np,total_nj,local_nky,total_nky,total_nkx,local_nz,total_nz)
+      CALL putarr(fidres, dset_name, field_full, ionode=0)
     ELSE
-      CALL putarrnd(fidres, dset_name, field(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize),  (/1,3,5/))
+      CALL putarrnd(fidres, dset_name, field_sub,  (/1,3,5/))
     ENDIF
     CALL attach(fidres, dset_name, 'cstep', cstep)
     CALL attach(fidres, dset_name, 'time', time)
     CALL attach(fidres, dset_name, 'jobnum', jobnum)
     CALL attach(fidres, dset_name, 'dt', dt)
-    CALL attach(fidres, dset_name, 'iframe2d', iframe2d)
     CALL attach(fidres, dset_name, 'iframe5d', iframe5d)
-
-  END SUBROUTINE write_field5d_i
+  END SUBROUTINE write_field5d
 END SUBROUTINE diagnose_5d
 
 SUBROUTINE spit_snapshot_check
   USE fields, ONLY: phi
-  USE grid, ONLY: ikxs,ikxe,Nkx,ikys,ikye,Nky,izs,ize,Nz
-  USE parallel, ONLY: gather_xyz
+  USE grid, ONLY: total_nkx,total_nky,total_nz,&
+                  local_nky,local_nz, ngz
+  USE parallel, ONLY: gather_xyz, my_id
   USE basic
+  USE prec_const, ONLY: xp
   IMPLICIT NONE
   LOGICAL :: file_exist
   INTEGER :: fid_check, ikx, iky, iz
   CHARACTER(256) :: check_filename
-  COMPLEX(dp), DIMENSION(1:Nky,1:Nkx,1:Nz) :: field_to_check
+  COMPLEX(xp), DIMENSION(total_nky,total_nkx,total_nz) :: field_to_check
   !! Spit a snapshot of PHI if requested (triggered by creating a file named "check_phi")
   INQUIRE(file='check_phi', exist=file_exist)
   IF( file_exist ) THEN
      IF(my_id.EQ. 0) WRITE(*,*) 'Check file found -> gather phi..'
-     CALL gather_xyz(phi(ikys:ikye,ikxs:ikxe,izs:ize), field_to_check)
+     CALL gather_xyz(phi(:,:,(1+Ngz/2):(local_nz+Ngz/2)), field_to_check,local_nky,total_nky,total_nkx,local_nz,total_nz)
      IF(my_id.EQ. 0) THEN
        WRITE(check_filename,'(a16)') 'check_phi.out'
        OPEN(fid_check, file=check_filename, form='formatted')
        WRITE(*,*) 'Check file found -> output phi ..'
-       WRITE(fid_check,*) Nky, Nkx, Nz
-       DO iky = 1,Nky; DO ikx = 1, Nkx; DO iz = 1,Nz
+       WRITE(fid_check,*) total_nky, total_nkx, total_nz
+       DO iky = 1,total_nky; DO ikx = 1, total_nkx; DO iz = 1,total_nz
          WRITE(fid_check,*) real(field_to_check(iky,ikx,iz)), ',' , imag(field_to_check(iky,ikx,iz))
        ENDDO; ENDDO; ENDDO
        CLOSE(fid_check)
diff --git a/src/diagnostics_par_mod.F90 b/src/diagnostics_par_mod.F90
index 9cbe257dad638a9a99602f22d309e9491a1b2146..0bb60ee0a09138e065aade86e4c3921273a2c691 100644
--- a/src/diagnostics_par_mod.F90
+++ b/src/diagnostics_par_mod.F90
@@ -11,7 +11,8 @@ MODULE diagnostics_par
   LOGICAL, PUBLIC, PROTECTED :: write_Napj, write_Sapj
   LOGICAL, PUBLIC, PROTECTED :: write_dens, write_fvel, write_temp
 
-  INTEGER, PUBLIC, PROTECTED :: nsave_0d, nsave_1d, nsave_2d, nsave_3d, nsave_5d
+  INTEGER, PUBLIC, PROTECTED :: nsave_0d, nsave_1d, nsave_2d, nsave_3d, nsave_5d ! save data every n step
+  REAL,    PUBLIC, PROTECTED :: dtsave_0d, dtsave_1d, dtsave_2d, dtsave_3d, dtsave_5d ! save data every dt time unit
 
   !  HDF5 file
   CHARACTER(len=256), PUBLIC :: resfile,resfile0 = "outputs"            ! Head of main result file name
@@ -23,7 +24,6 @@ MODULE diagnostics_par
   CHARACTER(len=256), PUBLIC :: prffile,prffile0 = "profiler"        ! Head of time traces (gamma_x,Q_x)
   CHARACTER(len=256), PUBLIC :: input_fname
   CHARACTER(len=256), PUBLIC :: rstfile                     ! restart result file
-  INTEGER, PUBLIC            :: job2load                    ! jobnum of the checkpoint to load
   INTEGER, PUBLIC            :: fidres,fidmsp,fidfld,fidttr ! FID for output
   INTEGER, PUBLIC            :: fidmom,fidggm, fidprf
   INTEGER, PUBLIC            :: fidrst                      ! FID for restart file
@@ -36,31 +36,38 @@ CONTAINS
   SUBROUTINE diag_par_readinputs
     !    Read the input parameters
 
-    USE basic, ONLY : lu_in
+    USE basic, ONLY : lu_in, dt
     USE prec_const
     IMPLICIT NONE
 
-    NAMELIST /OUTPUT_PAR/ nsave_0d, nsave_1d, nsave_2d, nsave_3d, nsave_5d
+    NAMELIST /OUTPUT_PAR/ dtsave_0d, dtsave_1d, dtsave_2d, dtsave_3d, dtsave_5d
     NAMELIST /OUTPUT_PAR/ write_doubleprecision, write_gamma, write_hf, write_phi
     NAMELIST /OUTPUT_PAR/ write_Na00, write_Napj, write_Sapj
     NAMELIST /OUTPUT_PAR/ write_dens, write_fvel, write_temp
-    NAMELIST /OUTPUT_PAR/ job2load
 
     READ(lu_in,output_par)
 
+    ! set nsave variables from dtsave ones (time unit to steps)
+    nsave_0d = CEILING(dtsave_0d/dt)
+    nsave_1d = CEILING(dtsave_1d/dt)
+    nsave_2d = CEILING(dtsave_2d/dt)
+    nsave_3d = CEILING(dtsave_3d/dt)
+    nsave_5d = CEILING(dtsave_5d/dt)
+
   END SUBROUTINE diag_par_readinputs
 
 
-  SUBROUTINE diag_par_outputinputs(fid, str)
+  SUBROUTINE diag_par_outputinputs(fid)
     !
     !    Write the input parameters to the results_xx.h5 file
     !
     USE prec_const
-    USE futils, ONLY: attach
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
     INTEGER, INTENT(in) :: fid
-    CHARACTER(len=256), INTENT(in) :: str
-
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/diag_par'
+    CALL creatd(fidres, 0,(/0/),TRIM(str),'Diagnostics Parameters Input')
     CALL attach(fid, TRIM(str), "write_doubleprecision", write_doubleprecision)
     CALL attach(fid, TRIM(str), "nsave_0d", nsave_0d)
     CALL attach(fid, TRIM(str), "nsave_1d", nsave_1d)
diff --git a/src/endrun.F90 b/src/endrun.F90
index 4667c04e4c505b11200839e7300cf55567c221af..80413fe64732e7554f1bd9e2e0453733972fd203 100644
--- a/src/endrun.F90
+++ b/src/endrun.F90
@@ -9,7 +9,7 @@ SUBROUTINE endrun
   IF( nlend ) THEN
      !----------------------------------------------------------------------
      !              1.   Normal end of run
-     IF(my_id .EQ. 0) WRITE(*,'(/a)') '   Normal exit'
+     CALL speak('   Normal exit')
 
      !----------------------------------------------------------------------
      !              2.   Abnormal exit
diff --git a/src/fields_mod.F90 b/src/fields_mod.F90
index 728b9fdbf47df65964cb6b509c63300689f125c6..f689f8f51c454227d1bd5f995ed80c2098753f82 100644
--- a/src/fields_mod.F90
+++ b/src/fields_mod.F90
@@ -3,16 +3,14 @@ MODULE fields
   use prec_const
   implicit none
 !------------------MOMENTS Napj------------------
-  ! Hermite-Moments: N_a^pj ! dimensions correspond to: p, j, kx, ky, z, updatetlevel.
-  COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_e
-  COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_i
-
+  ! Hermite-Moments: N_a^pj ! dimensions correspond to: species (a), p, j, kx, ky, z, updatetlevel.
+  COMPLEX(xp), DIMENSION(:,:,:,:,:,:,:), ALLOCATABLE :: moments
 !------------------ELECTROSTATIC POTENTIAL------------------
 
   ! Normalized electric potential: \hat{\phi} ! (kx,ky,z)
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: phi
+  COMPLEX(xp), DIMENSION(:,:,:), ALLOCATABLE :: phi
 
 !------------------Vector field part
-  COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: psi
+  COMPLEX(xp), DIMENSION(:,:,:), ALLOCATABLE :: psi
 
 END MODULE fields
diff --git a/src/fourier_mod.F90 b/src/fourier_mod.F90
index 3c0484d7031013bf0c510ba1ed7f9b04634f62c8..594a33c1d105688de72950776fd3177775d01aab 100644
--- a/src/fourier_mod.F90
+++ b/src/fourier_mod.F90
@@ -1,8 +1,6 @@
 MODULE fourier
-
   USE prec_const
-  USE grid
-  USE basic
+  USE parallel
   use, intrinsic :: iso_c_binding
   implicit none
 
@@ -11,61 +9,77 @@ MODULE fourier
 
   PRIVATE
 
-  PUBLIC :: init_grid_distr_and_plans, poisson_bracket_and_sum, convolve_and_add, finalize_plans
-
-  real(C_DOUBLE), pointer, PUBLIC            :: real_data_f(:,:), real_data_g(:,:), bracket_sum_r(:,:)
-  complex(C_DOUBLE_complex), pointer, PUBLIC :: cmpx_data_f(:,:), cmpx_data_g(:,:), bracket_sum_c(:,:)
-  type(C_PTR)                            :: cdatar_f, cdatar_g, cdatar_c
-  type(C_PTR)                            :: cdatac_f, cdatac_g, cdatac_c
-  type(C_PTR) , PUBLIC                   :: planf, planb
-  integer(C_INTPTR_T)                    :: i, ix, iy
-  integer(C_INTPTR_T), PUBLIC            :: alloc_local_1, alloc_local_2
-  integer(C_INTPTR_T)                    :: NX_, NY_, NY_halved
-  integer                                :: communicator
+  PUBLIC :: init_grid_distr_and_plans, poisson_bracket_and_sum, finalize_plans
+  real   (c_xp_r), pointer, PUBLIC :: real_data_f(:,:), real_data_g(:,:), bracket_sum_r(:,:)
+  complex(c_xp_c), pointer, PUBLIC :: cmpx_data_f(:,:), cmpx_data_g(:,:), bracket_sum_c(:,:)
+  type(C_PTR)                 :: cdatar_f, cdatar_g, cdatar_c
+  type(C_PTR)                 :: cdatac_f, cdatac_g, cdatac_c
+  type(C_PTR) ,        PUBLIC :: planf, planb
+  integer(C_INTPTR_T)         :: i, ix, iy
+  integer(C_INTPTR_T), PUBLIC :: alloc_local_1, alloc_local_2
+  integer(C_INTPTR_T)         :: NX_, NY_, NY_halved
   ! many plan data variables
-  integer(C_INTPTR_T) :: howmany=9 ! numer of eleemnt of the tensor
+  integer(C_INTPTR_T) :: howmany=9 ! numer of element of the tensor
   integer :: rank=3                ! rank of the transform
   integer(C_INTPTR_T), dimension(2) :: fft_dims ! array containing data extent
 
   CONTAINS
 
-  SUBROUTINE init_grid_distr_and_plans(Nx,Ny)
+  SUBROUTINE init_grid_distr_and_plans(Nx,Ny,communicator,local_nkx_ptr,local_nkx_ptr_offset,local_nky_ptr,local_nky_ptr_offset)
     IMPLICIT NONE
-
-    INTEGER, INTENT(IN) :: Nx,Ny
+    INTEGER, INTENT(IN)  :: Nx,Ny, communicator
+    INTEGER(C_INTPTR_T), INTENT(OUT) :: local_nkx_ptr,local_nkx_ptr_offset,local_nky_ptr,local_nky_ptr_offset
     NX_ = Nx; NY_ = Ny
     NY_halved = NY_/2 + 1
-
-    ! communicator = MPI_COMM_WORLD
-    communicator = comm_ky
-
     !! Complex arrays F, G
     ! Compute the room to allocate
-    alloc_local_1 = fftw_mpi_local_size_2d(NY_halved, NX_, communicator, local_nky, local_nky_offset)
-    ! Initalize pointers to this room
+#ifdef SINGLE_PRECISION
+    alloc_local_1 = fftwf_mpi_local_size_2d(NY_halved, NX_, communicator, local_nky_ptr, local_nky_ptr_offset)
+#else
+    alloc_local_1 = fftw_mpi_local_size_2d(NY_halved, NX_, communicator, local_nky_ptr, local_nky_ptr_offset)
+#endif
+  ! Initalize pointers to this room
+#ifdef SINGLE_PRECISION
+    cdatac_f = fftwf_alloc_complex(alloc_local_1)
+    cdatac_g = fftwf_alloc_complex(alloc_local_1)
+    cdatac_c = fftwf_alloc_complex(alloc_local_1)
+#else
     cdatac_f = fftw_alloc_complex(alloc_local_1)
     cdatac_g = fftw_alloc_complex(alloc_local_1)
     cdatac_c = fftw_alloc_complex(alloc_local_1)
-    ! Initalize the arrays with the rooms pointed
-    call c_f_pointer(cdatac_f,   cmpx_data_f, [NX_ ,local_nky])
-    call c_f_pointer(cdatac_g,   cmpx_data_g, [NX_ ,local_nky])
-    call c_f_pointer(cdatac_c, bracket_sum_c, [NX_ ,local_nky])
+#endif
+      ! Initalize the arrays with the rooms pointed
+    call c_f_pointer(cdatac_f,   cmpx_data_f, [NX_ ,local_nky_ptr])
+    call c_f_pointer(cdatac_g,   cmpx_data_g, [NX_ ,local_nky_ptr])
+    call c_f_pointer(cdatac_c, bracket_sum_c, [NX_ ,local_nky_ptr])
 
     !! Real arrays iFFT(F), iFFT(G)
     ! Compute the room to allocate
-    alloc_local_2 = fftw_mpi_local_size_2d(NX_, NY_halved, communicator, local_nkx, local_nkx_offset)
+    alloc_local_2 = fftw_mpi_local_size_2d(NX_, NY_halved, communicator, local_nkx_ptr, local_nkx_ptr_offset)
     ! Initalize pointers to this room
+#ifdef SINGLE_PRECISION
+    cdatar_f = fftwf_alloc_real(2*alloc_local_2)
+    cdatar_g = fftwf_alloc_real(2*alloc_local_2)
+    cdatar_c = fftwf_alloc_real(2*alloc_local_2)
+#else
     cdatar_f = fftw_alloc_real(2*alloc_local_2)
     cdatar_g = fftw_alloc_real(2*alloc_local_2)
     cdatar_c = fftw_alloc_real(2*alloc_local_2)
+#endif
+
     ! Initalize the arrays with the rooms pointed
-    call c_f_pointer(cdatar_f,   real_data_f, [2*(NY_/2  + 1),local_nkx])
-    call c_f_pointer(cdatar_g,   real_data_g, [2*(NY_/2  + 1),local_nkx])
-    call c_f_pointer(cdatar_c, bracket_sum_r, [2*(NY_/2  + 1),local_nkx])
+    call c_f_pointer(cdatar_f,   real_data_f, [2*(NY_/2 + 1),local_nkx_ptr])
+    call c_f_pointer(cdatar_g,   real_data_g, [2*(NY_/2 + 1),local_nkx_ptr])
+    call c_f_pointer(cdatar_c, bracket_sum_r, [2*(NY_/2 + 1),local_nkx_ptr])
 
     ! Plan Creation (out-of-place forward and backward FFT)
+#ifdef SINGLE_PRECISION
+    planf = fftwf_mpi_plan_dft_r2c_2D(NX_, NY_, real_data_f, cmpx_data_f, communicator,  ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))
+    planb = fftwf_mpi_plan_dft_c2r_2D(NX_, NY_, cmpx_data_f, real_data_f, communicator,  ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))
+#else
     planf = fftw_mpi_plan_dft_r2c_2D(NX_, NY_, real_data_f, cmpx_data_f, communicator,  ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))
     planb = fftw_mpi_plan_dft_c2r_2D(NX_, NY_, cmpx_data_f, real_data_f, communicator,  ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))
+#endif
 
    if ((.not. c_associated(planf)) .OR. (.not. c_associated(planb))) then
       ERROR STOP '>> ERROR << plan creation error!!'
@@ -73,62 +87,57 @@ MODULE fourier
 
   END SUBROUTINE init_grid_distr_and_plans
 
-
   !!! Compute the poisson bracket of [F,G] to real space
   !   - Compute the convolution using the convolution theorem
-  SUBROUTINE poisson_bracket_and_sum( F_, G_, sum_real_)
+  SUBROUTINE poisson_bracket_and_sum(ky_, kx_, inv_Ny, inv_Nx, AA_y, AA_x,&
+                                     local_nky_ptr, local_nkx_ptr, F_, G_, sum_real_)
     IMPLICIT NONE
-    COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(ikys:ikye,ikxs:ikxe),&
-                             INTENT(IN)    :: F_, G_ ! input fields
-    real(C_DOUBLE), pointer, INTENT(INOUT) :: sum_real_(:,:)
+    INTEGER(C_INTPTR_T),                  INTENT(IN) :: local_nkx_ptr,local_nky_ptr
+    REAL(xp),                             INTENT(IN) :: inv_Nx, inv_Ny
+    REAL(xp), DIMENSION(local_nky_ptr),   INTENT(IN) :: ky_, AA_y
+    REAL(xp), DIMENSION(local_nkx_ptr),   INTENT(IN) :: kx_, AA_x
+    COMPLEX(c_xp_c), DIMENSION(local_nky_ptr,local_nkx_ptr), &
+                                          INTENT(IN) :: F_(:,:), G_(:,:)
+    real(c_xp_r), pointer,             INTENT(INOUT) :: sum_real_(:,:)
+    INTEGER :: ikx,iky
     ! First term df/dx x dg/dy
-    DO ikx = ikxs, ikxe
-      DO iky = ikys, ikye
-        cmpx_data_f(ikx,iky-local_nky_offset) = &
-              imagu*kxarray(ikx)*F_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-        cmpx_data_g(ikx,iky-local_nky_offset) = &
-              imagu*kyarray(iky)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
+    DO ikx = 1,local_nkx_ptr
+      DO iky = 1,local_nky_ptr
+        cmpx_data_f(ikx,iky) = imagu*kx_(ikx)*F_(iky,ikx)*AA_x(ikx)*AA_y(iky) 
+        cmpx_data_g(ikx,iky) = imagu*ky_(iky)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky)
       ENDDO
     ENDDO
+
+#ifdef SINGLE_PRECISION
+    call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f)
+    call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g)
+#else
     call fftw_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f)
     call fftw_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g)
-    sum_real_ = sum_real_ + real_data_f  * real_data_g*inv_Ny*inv_Nx
+#endif
+    sum_real_ = sum_real_ + real_data_f*real_data_g*inv_Ny*inv_Nx
     ! Second term -df/dy x dg/dx
-    DO ikx = ikxs, ikxe
-      DO iky = ikys, ikye
-        cmpx_data_f(ikx,iky-local_nky_offset) = &
-              imagu*kyarray(iky)*F_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-        cmpx_data_g(ikx,iky-local_nky_offset) = &
-              imagu*kxarray(ikx)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
+    DO ikx = 1,local_nkx_ptr
+      DO iky = 1,local_nky_ptr
+        cmpx_data_f(ikx,iky) = &
+              imagu*ky_(iky)*F_(iky,ikx)*AA_x(ikx)*AA_y(iky)
+        cmpx_data_g(ikx,iky) = &
+              imagu*kx_(ikx)*G_(iky,ikx)*AA_x(ikx)*AA_y(iky)
       ENDDO
     ENDDO
+#ifdef SINGLE_PRECISION
+    call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f)
+    call fftwf_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g)
+#else
     call fftw_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f)
     call fftw_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g)
-    sum_real_ = sum_real_ - real_data_f  * real_data_g*inv_Ny*inv_Nx
+#endif
+    sum_real_ = sum_real_ - real_data_f*real_data_g*inv_Ny*inv_Nx
 END SUBROUTINE poisson_bracket_and_sum
 
-!!! Compute the poisson bracket of [F,G] to real space
-!   - Compute the convolution using the convolution theorem
-SUBROUTINE convolve_and_add( F_, G_)
-  IMPLICIT NONE
-  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(ikys:ikye,ikxs:ikxe),&
-                  INTENT(IN)  :: F_, G_ ! input fields
-  ! First term df/dx x dg/dy
-  DO ikx = ikxs, ikxe
-    DO iky = ikys, ikye
-      cmpx_data_f(ikx,iky-local_nky_offset) = F_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-      cmpx_data_g(ikx,iky-local_nky_offset) = G_(iky,ikx)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-    ENDDO
-  ENDDO
-  call fftw_mpi_execute_dft_c2r(planb, cmpx_data_f, real_data_f)
-  call fftw_mpi_execute_dft_c2r(planb, cmpx_data_g, real_data_g)
-  bracket_sum_r = bracket_sum_r + real_data_f  * real_data_g*inv_Ny*inv_Nx
-END SUBROUTINE convolve_and_add
-
 
 SUBROUTINE finalize_plans
   IMPLICIT NONE
-
   IF (my_id .EQ. 0) write(*,*) '..plan Destruction.'
   call fftw_destroy_plan(planb)
   call fftw_destroy_plan(planf)
diff --git a/src/geometry_mod.F90 b/src/geometry_mod.F90
index 1160c567169f23462a72d3a2a63f12598d5b05db..7d6dd5d6cd18779e127938436361972d9c416848 100644
--- a/src/geometry_mod.F90
+++ b/src/geometry_mod.F90
@@ -1,33 +1,26 @@
 module geometry
 ! computes geometrical quantities
 ! Adapted from B.J.Frei MOLIX code (2021)
-
-  use prec_const
-  use model
-  use grid
-  use array
-  use fields
-  use basic
-  use calculus, ONLY: simpson_rule_z
-  use miller, ONLY: set_miller_parameters, get_miller
+  use prec_const, ONLY: xp
 implicit none
   PRIVATE
   ! Geometry input parameters
   CHARACTER(len=16), &
                PUBLIC, PROTECTED :: geom      = 's-alpha'
-  REAL(dp),    PUBLIC, PROTECTED :: q0        = 1.4_dp  ! safety factor
-  REAL(dp),    PUBLIC, PROTECTED :: shear     = 0._dp   ! magnetic field shear
-  REAL(dp),    PUBLIC, PROTECTED :: eps       = 0.18_dp ! inverse aspect ratio
-  REAL(dp),    PUBLIC, PROTECTED :: alpha_MHD = 0 ! shafranov shift effect alpha = -q2 R dbeta/dr
+  REAL(xp),    PUBLIC, PROTECTED :: q0        = 1.4_xp  ! safety factor
+  REAL(xp),    PUBLIC, PROTECTED :: shear     = 0._xp   ! magnetic field shear
+  REAL(xp),    PUBLIC, PROTECTED :: eps       = 0.18_xp ! inverse aspect ratio
+  REAL(xp),    PUBLIC, PROTECTED :: alpha_MHD = 0 ! shafranov shift effect alpha = -q2 R dbeta/dr
   ! parameters for Miller geometry
-  REAL(dp),    PUBLIC, PROTECTED :: kappa     = 1._dp ! elongation (1 for circular)
-  REAL(dp),    PUBLIC, PROTECTED :: s_kappa   = 0._dp ! r normalized derivative skappa = r/kappa dkappa/dr
-  REAL(dp),    PUBLIC, PROTECTED :: delta     = 0._dp ! triangularity
-  REAL(dp),    PUBLIC, PROTECTED :: s_delta   = 0._dp ! '' sdelta = r/sqrt(1-delta2) ddelta/dr
-  REAL(dp),    PUBLIC, PROTECTED :: zeta      = 0._dp ! squareness
-  REAL(dp),    PUBLIC, PROTECTED :: s_zeta    = 0._dp ! '' szeta = r dzeta/dr
+  REAL(xp),    PUBLIC, PROTECTED :: kappa     = 1._xp ! elongation (1 for circular)
+  REAL(xp),    PUBLIC, PROTECTED :: s_kappa   = 0._xp ! r normalized derivative skappa = r/kappa dkappa/dr
+  REAL(xp),    PUBLIC, PROTECTED :: delta     = 0._xp ! triangularity
+  REAL(xp),    PUBLIC, PROTECTED :: s_delta   = 0._xp ! '' sdelta = r/sqrt(1-delta2) ddelta/dr
+  REAL(xp),    PUBLIC, PROTECTED :: zeta      = 0._xp ! squareness
+  REAL(xp),    PUBLIC, PROTECTED :: s_zeta    = 0._xp ! '' szeta = r dzeta/dr
   ! to apply shift in the parallel z-BC if shearless
-  REAL(dp),    PUBLIC, PROTECTED :: shift_y   = 0._dp ! for Arno
+  REAL(xp),    PUBLIC, PROTECTED :: shift_y   = 0._xp ! for Arno <3
+  INTEGER,     PUBLIC, PROTECTED :: Npol      = 1         ! number of poloidal turns
   ! Chooses the type of parallel BC we use for the unconnected kx modes (active for non-zero shear only)
   !  'periodic'     : Connect a disconnected kx to a mode on the other cadran
   !  'dirichlet'    : Connect a disconnected kx to 0
@@ -37,39 +30,39 @@ implicit none
                PUBLIC, PROTECTED :: parallel_bc
 
   ! GENE unused additional parameters for miller_mod
-  REAL(dp), PUBLIC, PROTECTED :: edge_opt      = 0._dp ! meant to redistribute the points in z
-  REAL(dp), PUBLIC, PROTECTED :: major_R       = 1._dp ! major radius
-  REAL(dp), PUBLIC, PROTECTED :: major_Z       = 0._dp ! vertical elevation
-  REAL(dp), PUBLIC, PROTECTED :: dpdx_pm_geom  = 0._dp ! amplitude mag. eq. pressure grad.
-  REAL(dp), PUBLIC, PROTECTED ::          C_y  = 0._dp ! defines y coordinate : Cy (q theta - phi)
-  REAL(dp), PUBLIC, PROTECTED ::         C_xy  = 1._dp ! defines x coordinate : B = Cxy Vx x Vy
+  REAL(xp), PUBLIC, PROTECTED :: edge_opt      = 0._xp ! meant to redistribute the points in z
+  REAL(xp), PUBLIC, PROTECTED :: major_R       = 1._xp ! major radius
+  REAL(xp), PUBLIC, PROTECTED :: major_Z       = 0._xp ! vertical elevation
+  REAL(xp), PUBLIC, PROTECTED :: xpdx_pm_geom  = 0._xp ! amplitude mag. eq. pressure grad.
+  REAL(xp), PUBLIC, PROTECTED ::          C_y  = 0._xp ! defines y coordinate : Cy (q theta - phi)
+  REAL(xp), PUBLIC, PROTECTED ::         C_xy  = 1._xp ! defines x coordinate : B = Cxy Vx x Vy
 
   ! Geometrical auxiliary variables
   LOGICAL,     PUBLIC, PROTECTED :: SHEARED  = .false. ! flag for shear magn. geom or not
   ! Curvature
-  REAL(dp),    PUBLIC, DIMENSION(:,:,:,:), ALLOCATABLE :: Ckxky  ! dimensions: kx, ky, z, odd/even p
+  REAL(xp),    PUBLIC, DIMENSION(:,:,:,:), ALLOCATABLE :: Ckxky  ! dimensions: kx, ky, z, odd/even p
   ! Jacobian
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: Jacobian ! dimensions: z, odd/even p
-  COMPLEX(dp), PUBLIC, PROTECTED        :: iInt_Jacobian ! Inverse integrated Jacobian
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: Jacobian ! dimensions: z, odd/even p
+  COMPLEX(xp), PUBLIC, PROTECTED        :: iInt_Jacobian ! Inverse integrated Jacobian
   ! Metric
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: gxx, gxy, gxz, gyy, gyz, gzz ! dimensions: z, odd/even p
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: dxdr, dxdZ, Rc, phic, Zc
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: gxx, gxy, gxz, gyy, gyz, gzz ! dimensions: z, odd/even p
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: dxdr, dxdZ, Rc, phic, Zc
   ! derivatives of magnetic field strength
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: dBdx, dBdy, dBdz, dlnBdz
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: dBdx, dBdy, dBdz, dlnBdz
   ! Relative magnetic field strength
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: hatB
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: hatB
   ! Relative strength of major radius
-  REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: hatR, hatZ
+  REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: hatR, hatZ
   ! Some geometrical coefficients
-  REAL(dp),    PUBLIC, DIMENSION(:,:) , ALLOCATABLE :: gradz_coeff  ! 1 / [ J_{xyz} \hat{B} ]
+  REAL(xp),    PUBLIC, DIMENSION(:,:) , ALLOCATABLE :: gradz_coeff  ! 1 / [ J_{xyz} \hat{B} ]
   ! Array to map the index of mode (kx,ky,-pi) to (kx+2pi*s*ky,ky,pi) for sheared periodic boundary condition
   INTEGER,     PUBLIC, DIMENSION(:,:), ALLOCATABLE :: ikx_zBC_L, ikx_zBC_R
   ! Geometric factor in front of the parallel phi derivative (not implemented)
-  ! REAL(dp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: Gamma_phipar
+  ! REAL(xp),    PUBLIC, DIMENSION(:,:), ALLOCATABLE :: Gamma_phipar
   ! pb_phase, for parallel boundary phase, contains the factor that occurs when taking into account
   !   that q0 is defined in the middle of the fluxtube whereas the radial position spans in [0,Lx)
   !   This shift introduces a (-1)^(Nexc*iky) phase change that is included in GENE
-  COMPLEX(dp), PUBLIC, DIMENSION(:),   ALLOCATABLE :: pb_phase_L, pb_phase_R
+  COMPLEX(xp), PUBLIC, DIMENSION(:),   ALLOCATABLE :: pb_phase_L, pb_phase_R
 
   ! Functions
   PUBLIC :: geometry_readinputs, geometry_outputinputs,&
@@ -78,13 +71,14 @@ CONTAINS
 
 
   SUBROUTINE geometry_readinputs
+    USE basic, ONLY: lu_in, speak
     ! Read the input parameters
     IMPLICIT NONE
     NAMELIST /GEOMETRY/ geom, q0, shear, eps,&
       kappa, s_kappa,delta, s_delta, zeta, s_zeta,& ! For miller
-      parallel_bc, shift_y
+      parallel_bc, shift_y, Npol
     READ(lu_in,geometry)
-    IF(shear .NE. 0._dp) SHEARED = .true.
+    IF(shear .NE. 0._xp) SHEARED = .true.
     SELECT CASE(parallel_bc)
       CASE ('dirichlet')
       CASE ('periodic')
@@ -94,40 +88,49 @@ CONTAINS
       CASE DEFAULT
         ERROR STOP '>> ERROR << Parallel BC not recognized'
     END SELECT
-    IF(my_id .EQ. 0) print*, 'Parallel BC : ', parallel_bc
+    CALL speak('Parallel BC : '//parallel_bc)
 
   END SUBROUTINE geometry_readinputs
 
   subroutine eval_magnetic_geometry
+    USE grid,     ONLY: total_nky, total_nz, local_nkx, local_nky, local_nz, Ngz,&
+                        kxarray, kyarray, set_kparray, nzgrid, zweights_SR, ieven
+    USE basic,    ONLY: speak
+    USE miller,   ONLY: set_miller_parameters, get_miller
+    USE calculus, ONLY: simpson_rule_z
     ! evalute metrix, elementwo_third_kpmaxts, jacobian and gradient
     implicit none
-    REAL(dp) :: kx,ky
-    COMPLEX(dp), DIMENSION(izs:ize) :: integrant
-    real(dp) :: G1,G2,G3,Cx,Cy
+    REAL(xp) :: kx,ky
+    COMPLEX(xp), DIMENSION(local_nz) :: integrant
+    real(xp) :: G1,G2,G3,Cx,Cy
+    INTEGER  :: eo,iz,iky,ikx
 
     ! Allocate arrays
-    CALL geometry_allocate_mem
+    CALL geometry_allocate_mem(local_nky,local_nkx,local_nz,Ngz,nzgrid)
     !
-    IF( (Ny .EQ. 1) .AND. (Nz .EQ. 1)) THEN !1D perp linear run
-      IF( my_id .eq. 0 ) WRITE(*,*) '1D perpendicular geometry'
+    IF( (total_nky .EQ. 1) .AND. (total_nz .EQ. 1)) THEN !1D perp linear run
+      CALL speak('1D perpendicular geometry')
       call eval_1D_geometry
     ELSE
       SELECT CASE(geom)
         CASE('s-alpha')
-          IF( my_id .eq. 0 ) WRITE(*,*) 's-alpha geometry'
+          CALL speak('s-alpha geometry')
           call eval_salpha_geometry
         CASE('Z-pinch','z-pinch','Zpinch','zpinch')
-          IF( my_id .eq. 0 ) WRITE(*,*) 'Z-pinch geometry'
+          CALL speak('Z-pinch geometry')
           call eval_zpinch_geometry
           SHEARED = .FALSE.
-          shear   = 0._dp
+          shear   = 0._xp
+          q0      = 0._xp
+          eps     = 0._xp
+          kappa   = 1._xp
         CASE('miller')
-          IF( my_id .eq. 0 ) WRITE(*,*) 'Miller geometry'
+          CALL speak('Miller geometry')
           call set_miller_parameters(kappa,s_kappa,delta,s_delta,zeta,s_zeta)
-          call get_miller(eps,major_R,major_Z,q0,shear,alpha_MHD,edge_opt,&
-               C_y,C_xy,dpdx_pm_geom,gxx,gyy,gzz,gxy,gxz,gyz,&
-               dBdx,dBdy,hatB,jacobian,dBdz,hatR,hatZ,dxdR,dxdZ,&
-               Ckxky,gradz_coeff)
+          call get_miller(eps,major_R,major_Z,q0,shear,Npol,alpha_MHD,edge_opt,&
+                          C_y,C_xy,xpdx_pm_geom,gxx,gyy,gzz,gxy,gxz,gyz,&
+                          dBdx,dBdy,hatB,jacobian,dBdz,hatR,hatZ,dxdR,dxdZ,&
+                          Ckxky,gradz_coeff)
         CASE DEFAULT
           ERROR STOP '>> ERROR << geometry not recognized!!'
         END SELECT
@@ -136,82 +139,69 @@ CONTAINS
     ! Evaluate perpendicular wavenumber
     !  k_\perp^2 = g^{xx} k_x^2 + 2 g^{xy}k_x k_y + k_y^2 g^{yy}
     !  normalized to rhos_
-    DO eo = 0,1
-      DO iz = izgs,izge
-        DO iky = ikys, ikye
-          ky = kyarray(iky)
-          DO ikx = ikxs, ikxe
-            kx = kxarray(ikx)
-            ! there is a factor 1/B from the normalization; important to match GENE
-            ! this factor comes from $b_a$ argument in the Bessel. Kperp is not used otherwise.
-            kparray(iky, ikx, iz, eo) = &
-            SQRT( gxx(iz,eo)*kx**2 + 2._dp*gxy(iz,eo)*kx*ky + gyy(iz,eo)*ky**2)/ hatB(iz,eo)
-          ENDDO
-        ENDDO
-      ENDDO
+    CALL set_kparray(gxx,gxy,gyy,hatB)
+    DO eo = 1,nzgrid
       ! Curvature operator (Frei et al. 2022 eq 2.15)
-      DO iz = izgs,izge
+      DO iz = 1,local_nz+Ngz
         G1 = gxx(iz,eo)*gyy(iz,eo)-gxy(iz,eo)*gxy(iz,eo)
         G2 = gxx(iz,eo)*gyz(iz,eo)-gxy(iz,eo)*gxz(iz,eo)
         G3 = gxy(iz,eo)*gyz(iz,eo)-gyy(iz,eo)*gxz(iz,eo)
         ! Here we divide by hatB because our equation is formulated with grad(lnB) terms (not gradB like in GENE)
         Cx =-(dBdy(iz,eo) + G2/G1*dBdz(iz,eo))/hatB(iz,eo)
         Cy = (dBdx(iz,eo) - G3/G1*dBdz(iz,eo))/hatB(iz,eo)
-
-        DO iky = ikys, ikye
+        DO iky = 1,local_nky
           ky = kyarray(iky)
-           DO ikx= ikxs, ikxe
+           DO ikx= 1,local_nkx
              kx = kxarray(ikx)
              Ckxky(iky, ikx, iz,eo) = (Cx*kx + Cy*ky)/C_xy
            ENDDO
         ENDDO
         ! coefficient in the front of parallel derivative
-        gradz_coeff(iz,eo) = 1._dp /(jacobian(iz,eo)*hatB(iz,eo))
+        gradz_coeff(iz,eo) = 1._xp /(jacobian(iz,eo)*hatB(iz,eo))
         ! d/dz(ln B) to correspond to the formulation in paper 2023
         dlnBdz(iz,eo)      = dBdz(iz,eo)/hatB(iz,eo)
         ! Geometric factor in front to the maxwellian dzphi term (not implemented)
         ! Gamma_phipar(iz,eo) = G2/G1
       ENDDO
     ENDDO
-
-
+    !
     ! set the mapping for parallel boundary conditions
     CALL set_ikx_zBC_map
-
-    two_third_kpmax = 2._dp/3._dp * MAXVAL(kparray)
     !
     ! Compute the inverse z integrated Jacobian (useful for flux averaging)
-    integrant = Jacobian(izs:ize,0) ! Convert into complex array
-    CALL simpson_rule_z(integrant,iInt_Jacobian)
-    iInt_Jacobian = 1._dp/iInt_Jacobian ! reverse it
+    integrant = Jacobian((1+ngz/2):(local_nz+ngz/2),ieven) ! Convert into complex array
+    CALL simpson_rule_z(local_nz,zweights_SR,integrant,iInt_Jacobian)
+    iInt_Jacobian = 1._xp/iInt_Jacobian ! reverse it
   END SUBROUTINE eval_magnetic_geometry
   !
   !--------------------------------------------------------------------------------
   !
 
   SUBROUTINE eval_salpha_geometry
+    USE grid, ONLY : local_nz,Ngz,zarray,nzgrid
   ! evaluate s-alpha geometry model
   implicit none
-  REAL(dp) :: z
-  alpha_MHD = 0._dp
+  REAL(xp) :: z
+  INTEGER  :: iz, eo
+  alpha_MHD = 0._xp
 
-  parity: DO eo = 0,1
-  zloop: DO iz = izgs,izge
+  DO eo = 1,nzgrid
+   DO iz = 1,local_nz+Ngz
     z = zarray(iz,eo)
 
     ! metric
-      gxx(iz,eo) = 1._dp
+      gxx(iz,eo) = 1._xp
       gxy(iz,eo) = shear*z - alpha_MHD*SIN(z)
-      gxz(iz,eo) = 0._dp
-      gyy(iz,eo) = 1._dp + (shear*z - alpha_MHD*SIN(z))**2
-      gyz(iz,eo) = 1._dp/eps
-      gzz(iz,eo) = 1._dp/eps**2
+      gxz(iz,eo) = 0._xp
+      gyy(iz,eo) = 1._xp + (shear*z - alpha_MHD*SIN(z))**2
+      gyz(iz,eo) = 1._xp/eps
+      gzz(iz,eo) = 1._xp/eps**2
       dxdR(iz,eo)= COS(z)
       dxdZ(iz,eo)= SIN(z)
 
     ! Poloidal plane coordinates
-      hatR(iz,eo) = 1._dp + eps*COS(z)
-      hatZ(iz,eo) = 1._dp + eps*SIN(z)
+      hatR(iz,eo) = 1._xp + eps*COS(z)
+      hatZ(iz,eo) = 1._xp + eps*SIN(z)
 
     ! toroidal coordinates
       Rc  (iz,eo) = hatR(iz,eo)
@@ -219,49 +209,50 @@ CONTAINS
       Zc  (iz,eo) = hatZ(iz,eo)
 
     ! Relative strengh of modulus of B
-      hatB(iz,eo) = 1._dp/(1._dp + eps*COS(z))
+      hatB(iz,eo) = 1._xp/(1._xp + eps*COS(z))
 
     ! Jacobian
       Jacobian(iz,eo) = q0/hatB(iz,eo)
 
     ! Derivative of the magnetic field strenght
       dBdx(iz,eo) = -COS(z)*hatB(iz,eo)**2 ! LB = 1
-      dBdy(iz,eo) =  0._dp
+      dBdy(iz,eo) =  0._xp
       dBdz(iz,eo) =  eps*SIN(z)*hatB(iz,eo)**2
 
     ! Curvature factor
-      C_xy = 1._dp
-
-  ENDDO zloop
-  ENDDO parity
+      C_xy = 1._xp
 
+   ENDDO
+  ENDDO
   END SUBROUTINE eval_salpha_geometry
   !
   !--------------------------------------------------------------------------------
   !
 
   SUBROUTINE eval_zpinch_geometry
+  USE grid, ONLY : local_nz,Ngz,zarray,nzgrid
   implicit none
-  REAL(dp) :: z
-  alpha_MHD = 0._dp
+  REAL(xp) :: z
+  INTEGER  :: iz, eo
+  alpha_MHD = 0._xp
 
-  parity: DO eo = 0,1
-  zloop: DO iz = izgs,izge
+  DO eo = 1,nzgrid
+   DO iz = 1,local_nz+Ngz
     z = zarray(iz,eo)
 
     ! metric
-      gxx(iz,eo) = 1._dp
-      gxy(iz,eo) = 0._dp
-      gxz(iz,eo) = 0._dp
-      gyy(iz,eo) = 1._dp ! 1/R but R is the normalization length
-      gyz(iz,eo) = 0._dp
-      gzz(iz,eo) = 1._dp
+      gxx(iz,eo) = 1._xp
+      gxy(iz,eo) = 0._xp
+      gxz(iz,eo) = 0._xp
+      gyy(iz,eo) = 1._xp ! 1/R but R is the normalization length
+      gyz(iz,eo) = 0._xp
+      gzz(iz,eo) = 1._xp
       dxdR(iz,eo)= COS(z)
       dxdZ(iz,eo)= SIN(z)
 
     ! Relative strengh of radius
-      hatR(iz,eo) = 1._dp ! R but R is the normalization length
-      hatZ(iz,eo) = 1._dp
+      hatR(iz,eo) = 1._xp ! R but R is the normalization length
+      hatZ(iz,eo) = 1._xp
 
     ! toroidal coordinates
       Rc  (iz,eo) = hatR(iz,eo)
@@ -269,61 +260,51 @@ CONTAINS
       Zc  (iz,eo) = hatZ(iz,eo)
 
     ! Jacobian
-      Jacobian(iz,eo) = 1._dp ! R but R is the normalization length
+      Jacobian(iz,eo) = 1._xp ! R but R is the normalization length
 
     ! Relative strengh of modulus of B
-      hatB   (iz,eo) = 1._dp
+      hatB   (iz,eo) = 1._xp
 
     ! Derivative of the magnetic field strenght
       dBdx(iz,eo) = -hatB(iz,eo) ! LB = 1
-      dBdy(iz,eo) = 0._dp
-      dBdz(iz,eo) = 0._dp ! Gene put a factor hatB or 1/hatR in this
+      dBdy(iz,eo) = 0._xp
+      dBdz(iz,eo) = 0._xp ! Gene put a factor hatB or 1/hatR in this
 
-  ENDDO zloop
-  ENDDO parity
+  ENDDO
+  ENDDO
 
   ! Curvature factor
-    C_xy = 1._dp
+    C_xy = 1._xp
   END SUBROUTINE eval_zpinch_geometry
     !
     !--------------------------------------------------------------------------------
-    !
+    ! NOT TESTED
   subroutine eval_1D_geometry
+    USE grid, ONLY : local_nz,Ngz,zarray, nzgrid
     ! evaluate 1D perp geometry model
     implicit none
-    REAL(dp) :: z, kx, ky
+    REAL(xp) :: z
+    INTEGER  :: iz, eo
+    DO eo = 1,nzgrid
+      DO iz = 1,local_nz+Ngz
+      z = zarray(iz,eo)
 
-    parity: DO eo = 0,1
-    zloop: DO iz = izs,ize
-     z = zarray(iz,eo)
+      ! metric
+      gxx(iz,eo) = 1._xp
+      gxy(iz,eo) = 0._xp
+      gyy(iz,eo) = 1._xp
 
-    ! metric
-       gxx(iz,eo) = 1._dp
-       gxy(iz,eo) = 0._dp
-       gyy(iz,eo) = 1._dp
-
-    ! Relative strengh of radius
-       hatR(iz,eo) = 1._dp
+      ! Relative strengh of radius
+      hatR(iz,eo) = 1._xp
 
-    ! Jacobian
-       Jacobian(iz,eo) = 1._dp
+      ! Jacobian
+      Jacobian(iz,eo) = 1._xp
 
-    ! Relative strengh of modulus of B
-       hatB(iz,eo) = 1._dp
-
-    ! Curvature operator
-       DO iky = ikys, ikye
-         ky = kyarray(iky)
-          DO ikx= ikxs, ikxe
-            kx = kxarray(ikx)
-            Ckxky(ikx, iky, iz,eo) = -kx ! .. multiply by hatB to cancel the 1/ hatB factor in moments_eqs_rhs.f90 routine
-          ENDDO
-       ENDDO
-    ! coefficient in the front of parallel derivative
-       gradz_coeff(iz,eo) = 1._dp
-  ENDDO zloop
-  ENDDO parity
+      ! Relative strengh of modulus of B
+      hatB(iz,eo) = 1._xp
 
+      ENDDO
+    ENDDO
    END SUBROUTINE eval_1D_geometry
 
    !
@@ -331,186 +312,190 @@ CONTAINS
    !
 
  SUBROUTINE set_ikx_zBC_map
- IMPLICIT NONE
- REAL(dp) :: shift
-
- ALLOCATE(ikx_zBC_L(ikys:ikye,ikxs:ikxe))
- ALLOCATE(ikx_zBC_R(ikys:ikye,ikxs:ikxe))
- ALLOCATE(pb_phase_L(ikys:ikye))
- ALLOCATE(pb_phase_R(ikys:ikye))
- !! No shear case (simple id mapping) or not at the end of the z domain
- !3            | 1    2    3    4    5    6 |  ky = 3 dky
- !2   ky       | 1    2    3    4    5    6 |  ky = 2 dky
- !1   A        | 1    2    3    4    5    6 |  ky = 1 dky
- !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
- !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=free)
- DO iky = ikys,ikye
-   DO ikx = ikxs,ikxe
-     ikx_zBC_L(iky,ikx) = ikx ! connect to itself per default
-     ikx_zBC_R(iky,ikx) = ikx
-   ENDDO
-   pb_phase_L(iky) = 1._dp ! no phase change per default
-   pb_phase_R(iky) = 1._dp
- ENDDO
- ! Parallel boundary are not trivial for sheared case and if
- !  the user does not ask explicitly for shearless bc
- IF(SHEARED .AND. (parallel_bc .NE. 'shearless')) THEN
-   !!!!!!!!!! LEFT PARALLEL BOUNDARY
-   ! Modify connection map only at border of z (matters for MPI z-parallelization)
-   IF(contains_zmin) THEN ! Check if the process is at the start of the fluxtube
-     DO iky = ikys,ikye
-       ! Formula for the shift due to shear after Npol turns
-       shift = 2._dp*PI*shear*kyarray(iky)*Npol
-         DO ikx = ikxs,ikxe
-           ! Usual formula for shifting indices using that dkx = 2pi*shear*dky/Nexc
-           ikx_zBC_L(iky,ikx) = ikx-(iky-1)*Nexc
-           ! Check if it points out of the kx domain
-           ! IF( (kxarray(ikx) - shift) .LT. kx_min ) THEN
-           IF( (ikx-(iky-1)*Nexc) .LT. 1 ) THEN ! outside of the frequ domain
-             SELECT CASE(parallel_bc)
-               CASE ('dirichlet')! connected to 0
-                 ikx_zBC_L(iky,ikx) = -99
-               CASE ('periodic')
-                 ikx_zBC_L(iky,ikx) = ikx
-               CASE ('cyclic')! reroute it by cycling through modes
-                 ikx_zBC_L(iky,ikx) = MODULO(ikx_zBC_L(iky,ikx)-1,Nkx)+1
-             END SELECT
-           ENDIF
-         ENDDO
-         ! phase present in GENE from a shift of the x origin by Lx/2 (useless?)
-         ! We also put the user defined shift in the y direction (see Volcokas et al. 2022)
-         pb_phase_L(iky) = (-1._dp)**(Nexc*(iky-1))*EXP(imagu*REAL(iky-1,dp)*2._dp*pi*shift_y)
-     ENDDO
-   ENDIF
-   ! Option for disconnecting every modes, viz. connecting all boundary to 0
-   IF(parallel_bc .EQ. 'disconnected') ikx_zBC_L = -99
-   !!!!!!!!!! RIGHT PARALLEL BOUNDARY
-   IF(contains_zmax) THEN ! Check if the process is at the end of the flux-tube
-     DO iky = ikys,ikye
-       ! Formula for the shift due to shear after Npol
-       shift = 2._dp*PI*shear*kyarray(iky)*Npol
-         DO ikx = ikxs,ikxe
-           ! Usual formula for shifting indices
-           ikx_zBC_R(iky,ikx) = ikx+(iky-1)*Nexc
-           ! Check if it points out of the kx domain
-           ! IF( (kxarray(ikx) + shift) .GT. kx_max ) THEN ! outside of the frequ domain
-           IF( (ikx+(iky-1)*Nexc) .GT. Nkx ) THEN ! outside of the frequ domain
-             SELECT CASE(parallel_bc)
-               CASE ('dirichlet') ! connected to 0
-                 ikx_zBC_R(iky,ikx) = -99
-               CASE ('periodic') ! connected to itself as for shearless
-                 ikx_zBC_R(iky,ikx) = ikx
-               CASE ('cyclic')
-                 ! write(*,*) 'check',ikx,iky, kxarray(ikx) + shift, '>', kx_max
-                 ikx_zBC_R(iky,ikx) = MODULO(ikx_zBC_R(iky,ikx)-1,Nkx)+1
-             END SELECT
-           ENDIF
-         ENDDO
-         ! phase present in GENE from a shift ofthe x origin by Lx/2 (useless?)
-         ! We also put the user defined shift in the y direction (see Volcokas et al. 2022)
-         pb_phase_R(iky) = (-1._dp)**(Nexc*(iky-1))*EXP(-imagu*REAL(iky-1,dp)*2._dp*pi*shift_y)
+   USE grid,       ONLY: local_nky,total_nkx,contains_zmin,contains_zmax, Nexc,&
+                         local_nky_offset
+   USE prec_const, ONLY: imagu, pi
+   IMPLICIT NONE
+   ! REAL(xp) :: shift
+   INTEGER :: ikx,iky, mn_y
+   ALLOCATE(ikx_zBC_L(local_nky,total_nkx))
+   ALLOCATE(ikx_zBC_R(local_nky,total_nkx))
+   ALLOCATE(pb_phase_L(local_nky))
+   ALLOCATE(pb_phase_R(local_nky))
+   !! No shear case (simple id mapping) or not at the end of the z domain
+   !3            | 1    2    3    4    5    6 |  ky = 3 dky
+   !2   ky       | 1    2    3    4    5    6 |  ky = 2 dky
+   !1   A        | 1    2    3    4    5    6 |  ky = 1 dky
+   !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
+   !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=free)
+   DO iky = 1,local_nky
+     DO ikx = 1,total_nkx
+       ikx_zBC_L(iky,ikx) = ikx ! connect to itself per default
+       ikx_zBC_R(iky,ikx) = ikx
      ENDDO
-   ENDIF
-   ! Option for disconnecting every modes, viz. connecting all boundary to 0
-   IF(parallel_bc .EQ. 'disconnected') ikx_zBC_R = -99
-  ENDIF
-  ! write(*,*) kxarray
-  ! write(*,*) kyarray
-  ! write(*,*) 'ikx_zBC_L :-----------'
-  ! DO iky = ikys,ikye
-  !   print*, ikx_zBC_L(iky,:)
-  ! enddo
-  ! print*, pb_phase_L
-  ! write(*,*) 'ikx_zBC_R :-----------'
-  ! DO iky = ikys,ikye
-  !   print*, ikx_zBC_R(iky,:)
-  ! enddo
-  ! print*, pb_phase_R
-  ! print*, shift_y
-  ! stop
-  !!!!!!! Example of maps ('x' means connected to 0 value, in the table it is -99)
-  ! dirichlet connection map BC of the RIGHT boundary (z=pi*Npol-dz)
-  !3            | 4    x    x    x    2    3 |  ky = 3 dky
-  !2   ky       | 3    4    x    x    1    2 |  ky = 2 dky
-  !1   A        | 2    3    4    x    6    1 |  ky = 1 dky
-  !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
-  !kx =           0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
-
-  ! periodic connection map BC of the RIGHT boundary (z=pi*Npol-dz)
-  !3            | 4    2    3    4    2    3 |  ky = 3 dky
-  !2   ky       | 3    4    3    4    1    2 |  ky = 2 dky
-  !1   A        | 2    3    4    4    6    1 |  ky = 1 dky
-  !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
-  !kx =           0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
-
-  ! cyclic connection map BC of the LEFT boundary (z=-pi*Npol)
-  !3            | 4    5    6    1    2    3 |  ky = 3 dky
-  !2   ky       | 5    6    1    2    3    4 |  ky = 2 dky
-  !1   A        | 6    1    2    3    4    5 |  ky = 1 dky
-  !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
-  !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
-
-  ! shearless connection map BC of the LEFT/RIGHT boundary (z=+/-pi*Npol)
-  !3            | 1    2    3    4    5    6 |  ky = 3 dky
-  !2   ky       | 1    2    3    4    5    6 |  ky = 2 dky
-  !1   A        | 1    2    3    4    5    6 |  ky = 1 dky
-  !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
-  !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
-
-  ! disconnected connection map BC of the LEFT/RIGHT boundary (z=+/-pi*Npol)
-  !3            | x    x    x    x    x    x |  ky = 3 dky
-  !2   ky       | x    x    x    x    x    x |  ky = 2 dky
-  !1   A        | x    x    x    x    x    x |  ky = 1 dky
-  !0   | -> kx  | x____x____x____x____x____x |  ky = 0 dky
-  !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
+     pb_phase_L(iky) = 1._xp ! no phase change per default
+     pb_phase_R(iky) = 1._xp
+   ENDDO
+   ! Parallel boundary are not trivial for sheared case and if
+   !  the user does not ask explicitly for shearless bc
+   IF(SHEARED .AND. (parallel_bc .NE. 'shearless')) THEN
+     !!!!!!!!!! LEFT PARALLEL BOUNDARY
+     ! Modify connection map only at border of z (matters for MPI z-parallelization)
+     IF(contains_zmin) THEN ! Check if the process is at the start of the fluxtube
+       DO iky = 1,local_nky
+        ! get the real mode number (iky starts at 1 and is shifted from paral)
+        mn_y = iky-1+local_nky_offset
+        ! Formula for the shift due to shear after Npol turns
+        ! shift = 2._xp*PI*shear*kyarray(iky)*Npol
+          DO ikx = 1,total_nkx
+            ! Usual formula for shifting indices using that dkx = 2pi*shear*dky/Nexc
+            ikx_zBC_L(iky,ikx) = ikx-mn_y*Nexc
+            ! Check if it points out of the kx domain
+            ! IF( (kxarray(ikx) - shift) .LT. kx_min ) THEN
+            IF( (ikx-mn_y*Nexc) .LT. 1 ) THEN ! outside of the frequ domain
+              SELECT CASE(parallel_bc)
+                CASE ('dirichlet')! connected to 0
+                  ikx_zBC_L(iky,ikx) = -99
+                CASE ('periodic')
+                  ikx_zBC_L(iky,ikx) = ikx
+                CASE ('cyclic')! reroute it by cycling through modes
+                  ikx_zBC_L(iky,ikx) = MODULO(ikx_zBC_L(iky,ikx)-1,total_nkx)+1
+              END SELECT
+            ENDIF
+          ENDDO
+          ! phase present in GENE from a shift of the x origin by Lx/2 (useless?)
+          ! We also put the user defined shift in the y direction (see Volcokas et al. 2022)
+          pb_phase_L(iky) = (-1._xp)**(Nexc*mn_y)*EXP(imagu*REAL(mn_y,xp)*2._xp*pi*shift_y)
+       ENDDO
+     ENDIF
+     ! Option for disconnecting every modes, viz. connecting all boundary to 0
+     IF(parallel_bc .EQ. 'disconnected') ikx_zBC_L = -99
+     !!!!!!!!!! RIGHT PARALLEL BOUNDARY
+     IF(contains_zmax) THEN ! Check if the process is at the end of the flux-tube
+       DO iky = 1,local_nky
+        ! get the real mode number (iky starts at 1 and is shifted from paral)
+        mn_y = iky-1+local_nky_offset
+        ! Formula for the shift due to shear after Npol
+        ! shift = 2._xp*PI*shear*kyarray(iky)*Npol
+          DO ikx = 1,total_nkx
+            ! Usual formula for shifting indices
+            ikx_zBC_R(iky,ikx) = ikx+mn_y*Nexc
+            ! Check if it points out of the kx domain
+            ! IF( (kxarray(ikx) + shift) .GT. kx_max ) THEN ! outside of the frequ domain
+            IF( (ikx+mn_y*Nexc) .GT. total_nkx ) THEN ! outside of the frequ domain
+              SELECT CASE(parallel_bc)
+                CASE ('dirichlet') ! connected to 0
+                  ikx_zBC_R(iky,ikx) = -99
+                CASE ('periodic') ! connected to itself as for shearless
+                  ikx_zBC_R(iky,ikx) = ikx
+                CASE ('cyclic')
+                  ! write(*,*) 'check',ikx,iky, kxarray(ikx) + shift, '>', kx_max
+                  ikx_zBC_R(iky,ikx) = MODULO(ikx_zBC_R(iky,ikx)-1,total_nkx)+1
+              END SELECT
+            ENDIF
+          ENDDO
+          ! phase present in GENE from a shift ofthe x origin by Lx/2 (useless?)
+          ! We also put the user defined shift in the y direction (see Volcokas et al. 2022)
+          pb_phase_R(iky) = (-1._xp)**(Nexc*mn_y)*EXP(-imagu*REAL(mn_y,xp)*2._xp*pi*shift_y)
+       ENDDO
+     ENDIF
+     ! Option for disconnecting every modes, viz. connecting all boundary to 0
+     IF(parallel_bc .EQ. 'disconnected') ikx_zBC_R = -99
+    ENDIF
+    ! write(*,*) kxarray
+    ! write(*,*) kyarray
+    ! write(*,*) 'ikx_zBC_L :-----------'
+    ! DO iky = ikys,ikye
+    !   print*, ikx_zBC_L(iky,:)
+    ! enddo
+    ! print*, pb_phase_L
+    ! write(*,*) 'ikx_zBC_R :-----------'
+    ! DO iky = ikys,ikye
+    !   print*, ikx_zBC_R(iky,:)
+    ! enddo
+    ! print*, pb_phase_R
+    ! print*, shift_y
+    ! stop
+    !!!!!!! Example of maps ('x' means connected to 0 value, in the table it is -99)
+    ! dirichlet connection map BC of the RIGHT boundary (z=pi*Npol-dz)
+    !3            | 4    x    x    x    2    3 |  ky = 3 dky
+    !2   ky       | 3    4    x    x    1    2 |  ky = 2 dky
+    !1   A        | 2    3    4    x    6    1 |  ky = 1 dky
+    !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
+    !kx =           0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
+
+    ! periodic connection map BC of the RIGHT boundary (z=pi*Npol-dz)
+    !3            | 4    2    3    4    2    3 |  ky = 3 dky
+    !2   ky       | 3    4    3    4    1    2 |  ky = 2 dky
+    !1   A        | 2    3    4    4    6    1 |  ky = 1 dky
+    !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
+    !kx =           0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
+
+    ! cyclic connection map BC of the LEFT boundary (z=-pi*Npol)
+    !3            | 4    5    6    1    2    3 |  ky = 3 dky
+    !2   ky       | 5    6    1    2    3    4 |  ky = 2 dky
+    !1   A        | 6    1    2    3    4    5 |  ky = 1 dky
+    !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
+    !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
+
+    ! shearless connection map BC of the LEFT/RIGHT boundary (z=+/-pi*Npol)
+    !3            | 1    2    3    4    5    6 |  ky = 3 dky
+    !2   ky       | 1    2    3    4    5    6 |  ky = 2 dky
+    !1   A        | 1    2    3    4    5    6 |  ky = 1 dky
+    !0   | -> kx  | 1____2____3____4____5____6 |  ky = 0 dky
+    !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
+
+    ! disconnected connection map BC of the LEFT/RIGHT boundary (z=+/-pi*Npol)
+    !3            | x    x    x    x    x    x |  ky = 3 dky
+    !2   ky       | x    x    x    x    x    x |  ky = 2 dky
+    !1   A        | x    x    x    x    x    x |  ky = 1 dky
+    !0   | -> kx  | x____x____x____x____x____x |  ky = 0 dky
+    !(e.g.) kx =    0   0.1  0.2  0.3 -0.2 -0.1  (dkx=2pi*shear*npol*dky)
 END SUBROUTINE set_ikx_zBC_map
 
 !
 !--------------------------------------------------------------------------------
 !
 
-   SUBROUTINE geometry_allocate_mem
-
+   SUBROUTINE geometry_allocate_mem(local_nky,local_nkx,local_nz,Ngz,nzgrid)
+     INTEGER, INTENT(IN) :: local_nky,local_nkx,local_nz,Ngz,nzgrid
        ! Curvature and geometry
-       CALL allocate_array( Ckxky,   ikys,ikye, ikxs,ikxe,izgs,izge,0,1)
-       CALL allocate_array(   Jacobian,izgs,izge, 0,1)
-       CALL allocate_array(        gxx,izgs,izge, 0,1)
-       CALL allocate_array(        gxy,izgs,izge, 0,1)
-       CALL allocate_array(        gxz,izgs,izge, 0,1)
-       CALL allocate_array(        gyy,izgs,izge, 0,1)
-       CALL allocate_array(        gyz,izgs,izge, 0,1)
-       CALL allocate_array(        gzz,izgs,izge, 0,1)
-       CALL allocate_array(       dBdx,izgs,izge, 0,1)
-       CALL allocate_array(       dBdy,izgs,izge, 0,1)
-       CALL allocate_array(       dBdz,izgs,izge, 0,1)
-       CALL allocate_array(     dlnBdz,izgs,izge, 0,1)
-       CALL allocate_array(       hatB,izgs,izge, 0,1)
-       ! CALL allocate_array(Gamma_phipar,izgs,izge, 0,1) (not implemented)
-       CALL allocate_array(       hatR,izgs,izge, 0,1)
-       CALL allocate_array(       hatZ,izgs,izge, 0,1)
-       CALL allocate_array(         Rc,izgs,izge, 0,1)
-       CALL allocate_array(       phic,izgs,izge, 0,1)
-       CALL allocate_array(         Zc,izgs,izge, 0,1)
-       CALL allocate_array(       dxdR,izgs,izge, 0,1)
-       CALL allocate_array(       dxdZ,izgs,izge, 0,1)
-       call allocate_array(gradz_coeff,izgs,izge, 0,1)
-       CALL allocate_array( kparray, ikys,ikye, ikxs,ikxe,izgs,izge,0,1)
+       ALLOCATE( Ckxky(local_nky,local_nkx,local_nz+Ngz,nzgrid))
+       ALLOCATE(   Jacobian(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gxx(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gxy(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gxz(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gyy(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gyz(local_nz+Ngz,nzgrid))
+       ALLOCATE(        gzz(local_nz+Ngz,nzgrid))
+       ALLOCATE(       dBdx(local_nz+Ngz,nzgrid))
+       ALLOCATE(       dBdy(local_nz+Ngz,nzgrid))
+       ALLOCATE(       dBdz(local_nz+Ngz,nzgrid))
+       ALLOCATE(     dlnBdz(local_nz+Ngz,nzgrid))
+       ALLOCATE(       hatB(local_nz+Ngz,nzgrid))
+       ! ALLOCATE(Gamma_phipar,(local_nz+Ngz,nzgrid)) (not implemented)
+       ALLOCATE(       hatR(local_nz+Ngz,nzgrid))
+       ALLOCATE(       hatZ(local_nz+Ngz,nzgrid))
+       ALLOCATE(         Rc(local_nz+Ngz,nzgrid))
+       ALLOCATE(       phic(local_nz+Ngz,nzgrid))
+       ALLOCATE(         Zc(local_nz+Ngz,nzgrid))
+       ALLOCATE(       dxdR(local_nz+Ngz,nzgrid))
+       ALLOCATE(       dxdZ(local_nz+Ngz,nzgrid))
+       ALLOCATE(gradz_coeff(local_nz+Ngz,nzgrid))
 
    END SUBROUTINE geometry_allocate_mem
 
-   SUBROUTINE geometry_outputinputs(fidres, str)
+   SUBROUTINE geometry_outputinputs(fid)
      ! Write the input parameters to the results_xx.h5 file
-
-     USE futils, ONLY: attach
-
-     USE prec_const
+     USE futils, ONLY: attach, creatd
      IMPLICIT NONE
-
-     INTEGER, INTENT(in) :: fidres
-     CHARACTER(len=256), INTENT(in) :: str
-     CALL attach(fidres, TRIM(str),"geometry",  geom)
-     CALL attach(fidres, TRIM(str),      "q0",    q0)
-     CALL attach(fidres, TRIM(str),   "shear", shear)
-     CALL attach(fidres, TRIM(str),     "eps",   eps)
+     INTEGER, INTENT(in) :: fid
+     CHARACTER(len=256)  :: str
+     WRITE(str,'(a)') '/data/input/geometry'
+     CALL creatd(fid, 0,(/0/),TRIM(str),'Geometry Input')
+     CALL attach(fid, TRIM(str),"geometry",  geom)
+     CALL attach(fid, TRIM(str),      "q0",    q0)
+     CALL attach(fid, TRIM(str),   "shear", shear)
+     CALL attach(fid, TRIM(str),     "eps",   eps)
    END SUBROUTINE geometry_outputinputs
 end module geometry
diff --git a/src/ghosts_mod.F90 b/src/ghosts_mod.F90
index bd23ad9217f795eefb7489e3b37387627c4fc616..217b19140e11e77d1896df9c6835910ed693813e 100644
--- a/src/ghosts_mod.F90
+++ b/src/ghosts_mod.F90
@@ -1,9 +1,6 @@
 module ghosts
-USE basic
-USE grid
-USE time_integration
-USE model, ONLY : KIN_E, beta
-USE geometry, ONLY : SHEARED, ikx_zBC_L, ikx_zBC_R, pb_phase_L, pb_phase_R
+USE mpi
+USE prec_const, ONLY: xp, mpi_xp_c
 IMPLICIT NONE
 
 INTEGER :: status(MPI_STATUS_SIZE), source, dest, count, ipg
@@ -13,35 +10,27 @@ PUBLIC :: update_ghosts_moments, update_ghosts_EM
 CONTAINS
 
 SUBROUTINE update_ghosts_moments
-  CALL cpu_time(t0_ghost)
-
+  USE grid,     ONLY: total_nz
+  USE parallel, ONLY: num_procs_p
+  IMPLICIT NONE
   IF (num_procs_p .GT. 1) THEN ! Do it only if we share the p
-      IF(KIN_E)&
-      CALL update_ghosts_p_e
-      CALL update_ghosts_p_i
+    CALL update_ghosts_p_mom
   ENDIF
-
-  IF(Nz .GT. 1) THEN
-    IF(KIN_E) &
-    CALL update_ghosts_z_e
-    CALL update_ghosts_z_i
+  IF(total_nz .GT. 1) THEN
+    CALL update_ghosts_z_mom
   ENDIF
-
-  CALL cpu_time(t1_ghost)
-  tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts_moments
 
 SUBROUTINE update_ghosts_EM
-  CALL cpu_time(t0_ghost)
-
-  IF(Nz .GT. 1) THEN
-    CALL update_ghosts_z_phi
-    IF(beta .GT. 0._dp) &
-      CALL update_ghosts_z_psi
+  USE model,  ONLY :  beta
+  USE grid,   ONLY: total_nz
+  USE fields, ONLY: phi, psi
+  IMPLICIT NONE
+  IF(total_nz .GT. 1) THEN
+    CALL update_ghosts_z_3D(phi)
+    IF(beta .GT. 0._xp) &
+      CALL update_ghosts_z_3D(psi)
   ENDIF
-
-  CALL cpu_time(t1_ghost)
-  tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
 END SUBROUTINE update_ghosts_EM
 
 
@@ -57,59 +46,39 @@ END SUBROUTINE update_ghosts_EM
 !proc 3:                        [11 12|13 14 15 16|17 18]
 !                                                   ^  ^
 !Closure by zero truncation :                       0  0
-SUBROUTINE update_ghosts_p_e
-    USE fields, ONLY : moments_e
-    IMPLICIT NONE
-
-    count = (ijge_e-ijgs_e+1)*(ikye-ikys+1)*(ikxe-ikxs+1)*(izge-izgs+1)
-
-    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
-    ! Send the last local moment to fill the -1 neighbour ghost
-    CALL mpi_sendrecv(moments_e(ipe_e  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 10, & ! Send to right
-                      moments_e(ips_e-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 10, & ! Recieve from left
-                      comm0, status, ierr)
-    IF (deltape .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_e(ipe_e-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 11, & ! Send to right
-                      moments_e(ips_e-2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 11, & ! Recieve from left
-                      comm0, status, ierr)
-
-    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_e(ips_e  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 12, & ! Send to left
-                      moments_e(ipe_e+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 12, & ! Recieve from right
-                      comm0, status, ierr)
-    IF (deltape .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_e(ips_e+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 13, & ! Send to left
-                      moments_e(ipe_e+2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 13, & ! Recieve from right
-                      comm0, status, ierr)
-
-END SUBROUTINE update_ghosts_p_e
 
 !Communicate p+1, p+2 moments to left neighboor and p-1, p-2 moments to right one
-SUBROUTINE update_ghosts_p_i
-  USE fields, ONLY : moments_i
-    IMPLICIT NONE
-
-    count = (ijge_i-ijgs_i+1)*(ikye-ikys+1)*(ikxe-ikxs+1)*(izge-izgs+1) ! Number of elements sent
-
-    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_i(ipe_i  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 14, &
-                      moments_i(ips_i-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 14, &
-                      comm0, status, ierr)
-    IF (deltapi .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_i(ipe_i-1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 15, &
-                      moments_i(ips_i-2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 15, &
-                      comm0, status, ierr)
-
-    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
-    CALL mpi_sendrecv(moments_i(ips_i  ,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 16, &
-                      moments_i(ipe_i+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 16, &
-                      comm0, status, ierr)
-    IF (deltapi .EQ. 1) & ! If we have odd Hermite degrees we need a 2nd order stencil
-    CALL mpi_sendrecv(moments_i(ips_i+1,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_L, 17, &
-                      moments_i(ipe_i+2,:,:,:,:,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_R, 17, &
-                      comm0, status, ierr)
-
-END SUBROUTINE update_ghosts_p_i
+SUBROUTINE update_ghosts_p_mom
+  USE time_integration, ONLY: updatetlevel
+  USE fields,   ONLY: moments
+  USE parallel, ONLY: nbr_R,nbr_L,comm0, exchange_ghosts_1D
+  USE grid,     ONLY: local_na,local_np,local_nj,local_nky,local_nkx,local_nz,&
+                              ngp,ngj,ngz
+  IMPLICIT NONE
+  INTEGER :: ierr, first, last, count
+  first = 1 + ngp/2
+  last  = local_np + ngp/2
+  ! count = local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send
+  !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+  ! DO ig = 1,ngp/2
+  !   CALL mpi_sendrecv(moments(:,last-(ig-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 14+ig, &
+  !                     moments(:,first-ig   ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 14+ig, &
+  !                     comm0, status, ierr)
+  ! ENDDO
+  !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
+  ! DO ig = 1,ngp/2
+  ! CALL mpi_sendrecv(moments(:,first+(ig-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 16+ig, &
+  !                   moments(:,last + ig   ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 16+ig, &
+  !                   comm0, status, ierr)
+  ! ENDDO
+  count = (ngp/2)*local_na*(local_nj+ngj)*local_nky*local_nkx*(local_nz+ngz) ! Number of elements to send
+  CALL mpi_sendrecv(moments(:,(last-(ngp/2-1)):(last),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 14, &
+                    moments(:,(first-ngp/2):(first-1),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 14, &
+                    comm0, status, ierr)
+  CALL mpi_sendrecv(moments(:,(first):(first+(ngp/2-1)),:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_L, 16, &
+                    moments(:,(last+1):(last+ngp/2)    ,:,:,:,:,updatetlevel), count, mpi_xp_c, nbr_R, 16, &
+                    comm0, status, ierr)
+END SUBROUTINE update_ghosts_p_mom
 
 !Communicate z+1, z+2 moments to left neighboor and z-1, z-2 moments to right one
 ! [a b|C D|e f] : proc n has moments a to f where a,b,e,f are ghosts
@@ -123,245 +92,130 @@ END SUBROUTINE update_ghosts_p_i
 !proc 3:                        [11 12|13 14 15 16|17 18]
 !                                                   ^  ^
 !Periodic:                                          0  1
-SUBROUTINE update_ghosts_z_e
-  USE parallel, ONLY : buff_pjxy_zBC_e
-  USE fields, ONLY : moments_e
-  IMPLICIT NONE
-  INTEGER :: ikxBC_L, ikxBC_R
-  IF(Nz .GT. 1) THEN
-    IF (num_procs_z .GT. 1) THEN
-      CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-      count = (ipge_e-ipgs_e+1)*(ijge_e-ijgs_e+1)*(ikye-ikys+1)*(ikxe-ikxs+1)
 
-      !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-      ! Send the last local moment to fill the -1 neighbour ghost
-      CALL mpi_sendrecv(moments_e(:,:,:,:,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 20, & ! Send to Up the last
-                                  buff_pjxy_zBC_e(:,:,:,:,-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 20, & ! Recieve from Down the first-1
-                        comm0, status, ierr)
-      CALL mpi_sendrecv(moments_e(:,:,:,:,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 21, & ! Send to Up the last
-                                  buff_pjxy_zBC_e(:,:,:,:,-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 21, & ! Recieve from Down the first-1
-                        comm0, status, ierr)
-      !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-      CALL mpi_sendrecv(moments_e(:,:,:,:,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 22, & ! Send to Up the last
-                                  buff_pjxy_zBC_e(:,:,:,:,+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 22, & ! Recieve from Down the first-1
+SUBROUTINE update_ghosts_z_mom
+  USE geometry,         ONLY: ikx_zBC_L, ikx_zBC_R, pb_phase_L, pb_phase_R
+  USE time_integration, ONLY: updatetlevel
+  USE parallel, ONLY: comm0,nbr_U,nbr_D,num_procs_z
+  USE fields,   ONLY: moments
+  USE grid,     ONLY: local_na,local_np,local_nj,local_nky,local_nkx,local_nz,&
+                      ngp,ngj,ngz
+  IMPLICIT NONE
+  !! buffer for data exchanges
+  COMPLEX(xp),DIMENSION(local_na,local_np+ngp,local_nj+ngj,local_nky,local_nkx,-Ngz/2:Ngz/2) :: buff_pjxy_zBC
+  INTEGER :: ikxBC_L, ikxBC_R, ikx, iky, first, last, ig, ierr
+  first = 1 + ngz/2
+  last  = local_nz + ngz/2
+  count = local_na*(local_np+ngp)*(local_nj+ngj)*local_nky*local_nkx
+  IF (num_procs_z .GT. 1) THEN
+    CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
+    !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
+    ! Send the last local moment to fill the -1 neighbour ghost
+    DO ig=1,ngz/2
+      CALL mpi_sendrecv(moments(:,:,:,:,:,last-(ig-1),updatetlevel),count,mpi_xp_c,nbr_U,24+ig, & ! Send to Up the last
+                                       buff_pjxy_zBC(:,:,:,:,:,-ig),count,mpi_xp_c,nbr_D,24+ig, & ! Recieve from Down the first-1
                         comm0, status, ierr)
-      CALL mpi_sendrecv(moments_e(:,:,:,:,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 23, & ! Send to Up the last
-                                  buff_pjxy_zBC_e(:,:,:,:,+2), count, MPI_DOUBLE_COMPLEX, nbr_U, 23, & ! Recieve from Down the first-1
+    ENDDO
+    !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
+    DO ig=1,ngz/2
+      CALL mpi_sendrecv(moments(:,:,:,:,:,first+(ig-1),updatetlevel),count,mpi_xp_c,nbr_D,26+ig, & ! Send to Up the last
+                                         buff_pjxy_zBC(:,:,:,:,:,ig),count,mpi_xp_c,nbr_U,26+ig, & ! Recieve from Down the first-1
                         comm0, status, ierr)
-    ELSE !No parallel (copy)
-      buff_pjxy_zBC_e(:,:,:,:,-1) = moments_e(:,:,:,:,ize  ,updatetlevel)
-      buff_pjxy_zBC_e(:,:,:,:,-2) = moments_e(:,:,:,:,ize-1,updatetlevel)
-      buff_pjxy_zBC_e(:,:,:,:,+1) = moments_e(:,:,:,:,izs  ,updatetlevel)
-      buff_pjxy_zBC_e(:,:,:,:,+2) = moments_e(:,:,:,:,izs+1,updatetlevel)
-    ENDIF
-    DO iky = ikys,ikye
-      DO ikx = ikxs,ikxe
-        ikxBC_L = ikx_zBC_L(iky,ikx);
-        IF (ikxBC_L .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! first-1 gets last
-          moments_e(:,:,iky,ikx,izs-1,updatetlevel) = pb_phase_L(iky)*buff_pjxy_zBC_e(:,:,iky,ikxBC_L,-1)
-          ! first-2 gets last-1
-          moments_e(:,:,iky,ikx,izs-2,updatetlevel) = pb_phase_L(iky)*buff_pjxy_zBC_e(:,:,iky,ikxBC_L,-2)
-        ELSE
-          moments_e(:,:,iky,ikx,izs-1,updatetlevel) = 0._dp
-          moments_e(:,:,iky,ikx,izs-2,updatetlevel) = 0._dp
-        ENDIF
-        ikxBC_R = ikx_zBC_R(iky,ikx);
-        IF (ikxBC_R .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! last+1 gets first
-          moments_e(:,:,iky,ikx,ize+1,updatetlevel) = pb_phase_R(iky)*buff_pjxy_zBC_e(:,:,iky,ikxBC_R,+1)
-          ! last+2 gets first+1
-          moments_e(:,:,iky,ikx,ize+2,updatetlevel) = pb_phase_R(iky)*buff_pjxy_zBC_e(:,:,iky,ikxBC_R,+2)
-        ELSE
-          moments_e(:,:,iky,ikx,ize+1,updatetlevel) = 0._dp
-          moments_e(:,:,iky,ikx,ize+2,updatetlevel) = 0._dp
-        ENDIF
-      ENDDO
+    ENDDO
+  ELSE !No parallel (just copy)
+    DO ig=1,ngz/2
+      buff_pjxy_zBC(:,:,:,:,:,-ig) = moments(:,:,:,:,:, last-(ig-1),updatetlevel)
+      buff_pjxy_zBC(:,:,:,:,:, ig) = moments(:,:,:,:,:,first+(ig-1),updatetlevel)
     ENDDO
   ENDIF
-END SUBROUTINE update_ghosts_z_e
+  DO iky = 1,local_nky
+    DO ikx = 1,local_nkx
+      ikxBC_L = ikx_zBC_L(iky,ikx);
+      ! Exchanging the modes that have a periodic pair (from sheared BC)
+      IF (ikxBC_L .NE. -99) THEN
+        ! Fill the lower ghosts cells with the buffer value (upper cells from LEFT process)
+        DO ig=1,ngz/2
+          moments(:,:,:,iky,ikx,first-ig,updatetlevel) = pb_phase_L(iky)*buff_pjxy_zBC(:,:,:,iky,ikxBC_L,-ig)
+        ENDDO
+      ELSE
+        DO ig=1,ngz/2
+          moments(:,:,:,iky,ikx,first-ig,updatetlevel) = 0._xp
+        ENDDO
+      ENDIF
+      ikxBC_R = ikx_zBC_R(iky,ikx);
+      ! Exchanging the modes that have a periodic pair (from sheared BC)
+      IF (ikxBC_R .NE. -99) THEN
+        ! Fill the upper ghosts cells with the buffer value (lower cells from RIGHT process)
+        DO ig=1,ngz/2
+          moments(:,:,:,iky,ikx,last+ig,updatetlevel) = pb_phase_R(iky)*buff_pjxy_zBC(:,:,:,iky,ikxBC_R,ig)
+        ENDDO
+      ELSE
+        DO ig=1,ngz/2
+          moments(:,:,:,iky,ikx,last+ig,updatetlevel) = 0._xp
+        ENDDO
+      ENDIF
+    ENDDO
+  ENDDO
+END SUBROUTINE update_ghosts_z_mom
 
-SUBROUTINE update_ghosts_z_i
-  USE parallel, ONLY : buff_pjxy_zBC_i
-  USE fields, ONLY : moments_i
+SUBROUTINE update_ghosts_z_3D(field)
+  USE geometry, ONLY: ikx_zBC_L, ikx_zBC_R, pb_phase_L, pb_phase_R
+  USE parallel, ONLY: nbr_U,nbr_D,comm0,num_procs_z
+  USE grid,     ONLY: local_nky,local_nkx,local_nz,ngz
   IMPLICIT NONE
-  INTEGER :: ikxBC_L, ikxBC_R
-  IF(Nz .GT. 1) THEN
-    IF (num_procs_z .GT. 1) THEN
-      CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-      count = (ipge_i-ipgs_i+1)*(ijge_i-ijgs_i+1)*(ikye-ikys+1)*(ikxe-ikxs+1)
-
+  !! buffer for data exchanges
+  COMPLEX(xp),DIMENSION(local_nky,local_nkx,-ngz/2:ngz/2) :: buff_xy_zBC
+  COMPLEX(xp), INTENT(INOUT) :: field(local_nky,local_nkx,local_nz+ngz)
+  INTEGER :: ikxBC_L, ikxBC_R, ikx, iky, first, last, ig, ierr
+  first = 1 + ngz/2
+  last  = local_nz + ngz/2
+  count = local_nky * local_nkx
+  IF (num_procs_z .GT. 1) THEN
+    CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
       !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-      ! Send the last local moment to fill the -1 neighbour ghost
-      CALL mpi_sendrecv(moments_i(:,:,:,:,ize  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 24, & ! Send to Up the last
-                                  buff_pjxy_zBC_i(:,:,:,:,-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 24, & ! Recieve from Down the first-1
-                        comm0, status, ierr)
-      CALL mpi_sendrecv(moments_i(:,:,:,:,ize-1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_U, 25, & ! Send to Up the last
-                                  buff_pjxy_zBC_i(:,:,:,:,-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 25, & ! Recieve from Down the first-1
+      DO ig = 1,ngz/2
+      CALL mpi_sendrecv(     field(:,:,last-(ig-1)), count, mpi_xp_c, nbr_U, 30+ig, & ! Send to Up the last
+                       buff_xy_zBC(:,:,-ig),         count, mpi_xp_c, nbr_D, 30+ig, & ! Receive from Down the first-1
                         comm0, status, ierr)
+      ENDDO
       !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-      CALL mpi_sendrecv(moments_i(:,:,:,:,izs  ,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 26, & ! Send to Up the last
-                                  buff_pjxy_zBC_i(:,:,:,:,+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 26, & ! Recieve from Down the first-1
-                        comm0, status, ierr)
-      CALL mpi_sendrecv(moments_i(:,:,:,:,izs+1,updatetlevel), count, MPI_DOUBLE_COMPLEX, nbr_D, 27, & ! Send to Up the last
-                                  buff_pjxy_zBC_i(:,:,:,:,+2), count, MPI_DOUBLE_COMPLEX, nbr_U, 27, & ! Recieve from Down the first-1
+      DO ig = 1,ngz/2
+      CALL mpi_sendrecv(     field(:,:,first+(ig-1)), count, mpi_xp_c, nbr_D, 32+ig, & ! Send to Down the first
+                       buff_xy_zBC(:,:,ig),           count, mpi_xp_c, nbr_U, 32+ig, & ! Recieve from Up the last+1
                         comm0, status, ierr)
-    ELSE !No parallel (copy)
-      buff_pjxy_zBC_i(:,:,:,:,-1) = moments_i(:,:,:,:,ize  ,updatetlevel)
-      buff_pjxy_zBC_i(:,:,:,:,-2) = moments_i(:,:,:,:,ize-1,updatetlevel)
-      buff_pjxy_zBC_i(:,:,:,:,+1) = moments_i(:,:,:,:,izs  ,updatetlevel)
-      buff_pjxy_zBC_i(:,:,:,:,+2) = moments_i(:,:,:,:,izs+1,updatetlevel)
-    ENDIF
-    DO iky = ikys,ikye
-      DO ikx = ikxs,ikxe
-        ikxBC_L = ikx_zBC_L(iky,ikx);
-        IF (ikxBC_L .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! first-1 gets last
-          moments_i(:,:,iky,ikx,izs-1,updatetlevel) = pb_phase_L(iky)*buff_pjxy_zBC_i(:,:,iky,ikxBC_L,-1)
-          ! first-2 gets last-1
-          moments_i(:,:,iky,ikx,izs-2,updatetlevel) = pb_phase_L(iky)*buff_pjxy_zBC_i(:,:,iky,ikxBC_L,-2)
-        ELSE
-          moments_i(:,:,iky,ikx,izs-1,updatetlevel) = 0._dp
-          moments_i(:,:,iky,ikx,izs-2,updatetlevel) = 0._dp
-        ENDIF
-        ikxBC_R = ikx_zBC_R(iky,ikx);
-        IF (ikxBC_R .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! last+1 gets first
-          moments_i(:,:,iky,ikx,ize+1,updatetlevel) = pb_phase_R(iky)*buff_pjxy_zBC_i(:,:,iky,ikxBC_R,+1)
-          ! last+2 gets first+1
-          moments_i(:,:,iky,ikx,ize+2,updatetlevel) = pb_phase_R(iky)*buff_pjxy_zBC_i(:,:,iky,ikxBC_R,+2)
-        ELSE
-          moments_i(:,:,iky,ikx,ize+1,updatetlevel) = 0._dp
-          moments_i(:,:,iky,ikx,ize+2,updatetlevel) = 0._dp
-        ENDIF
       ENDDO
+   ELSE
+     ! no parallelization so just copy last cell into first ghosts and vice versa
+     DO ig = 1,ngz/2
+       buff_xy_zBC(:,:,-ig) = field(:,:,last-(ig-1))
+       buff_xy_zBC(:,:, ig) = field(:,:,first+(ig-1))
+     ENDDO
+   ENDIF
+  DO iky = 1,local_nky
+    DO ikx = 1,local_nkx
+      ikxBC_L = ikx_zBC_L(iky,ikx);
+      IF (ikxBC_L .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
+        DO ig = 1,ngz/2
+          field(iky,ikx,first-ig) = pb_phase_L(iky)*buff_xy_zBC(iky,ikxBC_L,-ig)
+        ENDDO
+      ELSE
+        DO ig = 1,ngz/2
+          field(iky,ikx,first-ig) = 0._xp
+        ENDDO
+      ENDIF
+      ikxBC_R = ikx_zBC_R(iky,ikx);
+      IF (ikxBC_R .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
+        ! last+1 gets first
+        DO ig = 1,ngz/2
+          field(iky,ikx,last+ig) = pb_phase_R(iky)*buff_xy_zBC(iky,ikxBC_R,ig)
+        ENDDO
+      ELSE
+        DO ig = 1,ngz/2
+          field(iky,ikx,last+ig) = 0._xp
+        ENDDO
+      ENDIF
     ENDDO
-  ENDIF
-END SUBROUTINE update_ghosts_z_i
-
-SUBROUTINE update_ghosts_z_phi
-  USE parallel, ONLY : buff_xy_zBC
-  USE fields,   ONLY : phi
-  IMPLICIT NONE
-  INTEGER :: ikxBC_L, ikxBC_R
-  CALL cpu_time(t1_ghost)
-  IF(Nz .GT. 1) THEN
-    IF (num_procs_z .GT. 1) THEN
-      CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-        count = (ikye-ikys+1) * (ikxe-ikxs+1)
-        !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-        CALL mpi_sendrecv(     phi(:,:,ize  ), count, MPI_DOUBLE_COMPLEX, nbr_U, 30, & ! Send to Up the last
-                          buff_xy_zBC(:,:,-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 30, & ! Receive from Down the first-1
-                          comm0, status, ierr)
-
-        CALL mpi_sendrecv(     phi(:,:,ize-1), count, MPI_DOUBLE_COMPLEX, nbr_U, 31, & ! Send to Up the last
-                          buff_xy_zBC(:,:,-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 31, & ! Receive from Down the first-2
-                          comm0, status, ierr)
-
-        !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-        CALL mpi_sendrecv(     phi(:,:,izs  ), count, MPI_DOUBLE_COMPLEX, nbr_D, 32, & ! Send to Down the first
-                          buff_xy_zBC(:,:,+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 32, & ! Recieve from Up the last+1
-                          comm0, status, ierr)
-
-        CALL mpi_sendrecv(     phi(:,:,izs+1), count, MPI_DOUBLE_COMPLEX, nbr_D, 33, & ! Send to Down the first
-                          buff_xy_zBC(:,:,+2), count, MPI_DOUBLE_COMPLEX, nbr_U, 33, & ! Recieve from Up the last+2
-                          comm0, status, ierr)
-     ELSE
-       buff_xy_zBC(:,:,-1) = phi(:,:,ize  )
-       buff_xy_zBC(:,:,-2) = phi(:,:,ize-1)
-       buff_xy_zBC(:,:,+1) = phi(:,:,izs  )
-       buff_xy_zBC(:,:,+2) = phi(:,:,izs+1)
-     ENDIF
-    DO iky = ikys,ikye
-      DO ikx = ikxs,ikxe
-        ikxBC_L = ikx_zBC_L(iky,ikx);
-        IF (ikxBC_L .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! first-1 gets last
-          phi(iky,ikx,izs-1) = pb_phase_L(iky)*buff_xy_zBC(iky,ikxBC_L,-1)
-          ! first-2 gets last-1
-          phi(iky,ikx,izs-2) = pb_phase_L(iky)*buff_xy_zBC(iky,ikxBC_L,-2)
-        ELSE
-          phi(iky,ikx,izs-1) = 0._dp
-          phi(iky,ikx,izs-2) = 0._dp
-        ENDIF
-        ikxBC_R = ikx_zBC_R(iky,ikx);
-        IF (ikxBC_R .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! last+1 gets first
-          phi(iky,ikx,ize+1) = pb_phase_R(iky)*buff_xy_zBC(iky,ikxBC_R,+1)
-          ! last+2 gets first+1
-          phi(iky,ikx,ize+2) = pb_phase_R(iky)*buff_xy_zBC(iky,ikxBC_R,+2)
-        ELSE
-          phi(iky,ikx,ize+1) = 0._dp
-          phi(iky,ikx,ize+2) = 0._dp
-        ENDIF
-      ENDDO
-    ENDDO
-  ENDIF
-  CALL cpu_time(t1_ghost)
-  tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
-END SUBROUTINE update_ghosts_z_phi
-
-SUBROUTINE update_ghosts_z_psi
-  USE parallel, ONLY : buff_xy_zBC
-  USE fields, ONLY : psi
-  IMPLICIT NONE
-  INTEGER :: ikxBC_L, ikxBC_R
-  CALL cpu_time(t1_ghost)
-  IF(Nz .GT. 1) THEN
-    IF (num_procs_z .GT. 1) THEN
-      CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-        count = (ikye-ikys+1) * (ikxe-ikxs+1)
-        !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-        CALL mpi_sendrecv(     psi(:,:,ize  ), count, MPI_DOUBLE_COMPLEX, nbr_U, 40, & ! Send to Up the last
-                          buff_xy_zBC(:,:,-1), count, MPI_DOUBLE_COMPLEX, nbr_D, 40, & ! Receive from Down the first-1
-                          comm0, status, ierr)
-
-        CALL mpi_sendrecv(     psi(:,:,ize-1), count, MPI_DOUBLE_COMPLEX, nbr_U, 41, & ! Send to Up the last
-                          buff_xy_zBC(:,:,-2), count, MPI_DOUBLE_COMPLEX, nbr_D, 41, & ! Receive from Down the first-2
-                          comm0, status, ierr)
-
-        !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-        CALL mpi_sendrecv(     psi(:,:,izs  ), count, MPI_DOUBLE_COMPLEX, nbr_D, 42, & ! Send to Down the first
-                          buff_xy_zBC(:,:,+1), count, MPI_DOUBLE_COMPLEX, nbr_U, 42, & ! Recieve from Up the last+1
-                          comm0, status, ierr)
-
-        CALL mpi_sendrecv(     psi(:,:,izs+1), count, MPI_DOUBLE_COMPLEX, nbr_D, 43, & ! Send to Down the first
-                          buff_xy_zBC(:,:,+2), count, MPI_DOUBLE_COMPLEX, nbr_U, 43, & ! Recieve from Up the last+2
-                          comm0, status, ierr)
-     ELSE
-       buff_xy_zBC(:,:,-1) = psi(:,:,ize  )
-       buff_xy_zBC(:,:,-2) = psi(:,:,ize-1)
-       buff_xy_zBC(:,:,+1) = psi(:,:,izs  )
-       buff_xy_zBC(:,:,+2) = psi(:,:,izs+1)
-     ENDIF
-    DO iky = ikys,ikye
-      DO ikx = ikxs,ikxe
-        ikxBC_L = ikx_zBC_L(iky,ikx);
-        IF (ikxBC_L .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! first-1 gets last
-          psi(iky,ikx,izs-1) = pb_phase_L(iky)*buff_xy_zBC(iky,ikxBC_L,-1)
-          ! first-2 gets last-1
-          psi(iky,ikx,izs-2) = pb_phase_L(iky)*buff_xy_zBC(iky,ikxBC_L,-2)
-        ELSE
-          psi(iky,ikx,izs-1) = 0._dp
-          psi(iky,ikx,izs-2) = 0._dp
-        ENDIF
-        ikxBC_R = ikx_zBC_R(iky,ikx);
-        IF (ikxBC_R .NE. -99) THEN ! Exchanging the modes that have a periodic pair (a)
-          ! last+1 gets first
-          psi(iky,ikx,ize+1) = pb_phase_R(iky)*buff_xy_zBC(iky,ikxBC_R,+1)
-          ! last+2 gets first+1
-          psi(iky,ikx,ize+2) = pb_phase_R(iky)*buff_xy_zBC(iky,ikxBC_R,+2)
-        ELSE
-          psi(iky,ikx,ize+1) = 0._dp
-          psi(iky,ikx,ize+2) = 0._dp
-        ENDIF
-      ENDDO
-    ENDDO
-  ENDIF
-  CALL cpu_time(t1_ghost)
-  tc_ghost = tc_ghost + (t1_ghost - t0_ghost)
-END SUBROUTINE update_ghosts_z_psi
-
+  ENDDO
+END SUBROUTINE update_ghosts_z_3D
 
 END MODULE ghosts
diff --git a/src/grid_mod.F90 b/src/grid_mod.F90
index f2feb2b01d5b828924fb9bcce7a9b559a7f803ed..0c7e65b705748d8912c69178346857fc80ad54f2 100644
--- a/src/grid_mod.F90
+++ b/src/grid_mod.F90
@@ -2,84 +2,89 @@ MODULE grid
   ! Grid module for spatial discretization
   USE prec_const
   USE basic
-
+  USE parallel, ONLY: my_id, comm_ky
+  USE iso_c_binding
   IMPLICIT NONE
   PRIVATE
 
-  !   GRID Namelist
-  INTEGER,  PUBLIC, PROTECTED :: pmaxe = 1      ! The maximal electron Hermite-moment computed
-  INTEGER,  PUBLIC, PROTECTED :: jmaxe = 1      ! The maximal electron Laguerre-moment computed
-  INTEGER,  PUBLIC, PROTECTED :: pmaxi = 1      ! The maximal ion Hermite-moment computed
-  INTEGER,  PUBLIC, PROTECTED :: jmaxi = 1      ! The maximal ion Laguerre-moment computed
-  INTEGER,  PUBLIC, PROTECTED :: maxj  = 1      ! The maximal Laguerre-moment
-  INTEGER,  PUBLIC, PROTECTED :: dmaxe = 1      ! The maximal full GF set of e-moments v^dmax
-  INTEGER,  PUBLIC, PROTECTED :: dmaxi = 1      ! The maximal full GF set of i-moments v^dmax
+  !   GRID Input
+  INTEGER,  PUBLIC, PROTECTED :: pmax  = 1      ! The maximal Hermite-moment computed
+  INTEGER,  PUBLIC, PROTECTED :: jmax  = 1      ! The maximal Laguerre-moment computed
+  INTEGER,  PUBLIC, PROTECTED :: maxj  = 1     ! The maximal Laguerre-moment
+  INTEGER,  PUBLIC, PROTECTED :: dmax  = 1      ! The maximal full GF set of i-moments v^dmax
   INTEGER,  PUBLIC, PROTECTED :: Nx    = 4      ! Number of total internal grid points in x
-  REAL(dp), PUBLIC, PROTECTED :: Lx    = 120_dp ! horizontal length of the spatial box
+  REAL(xp), PUBLIC, PROTECTED :: Lx    = 120_xp ! horizontal length of the spatial box
   INTEGER,  PUBLIC, PROTECTED :: Nexc  = 1      ! factor to increase Lx when shear>0 (Lx = Nexc/kymin/shear)
   INTEGER,  PUBLIC, PROTECTED :: Ny    = 4      ! Number of total internal grid points in y
-  REAL(dp), PUBLIC, PROTECTED :: Ly    = 120_dp ! vertical length of the spatial box
+  REAL(xp), PUBLIC, PROTECTED :: Ly    = 120_xp ! vertical length of the spatial box
   INTEGER,  PUBLIC, PROTECTED :: Nz    = 4      ! Number of total perpendicular planes
-  INTEGER,  PUBLIC, PROTECTED :: Npol  = 1      ! number of poloidal turns
   INTEGER,  PUBLIC, PROTECTED :: Odz   = 4      ! order of z interp and derivative schemes
-  INTEGER,  PUBLIC, PROTECTED :: Nkx   = 4      ! Number of total internal grid points in kx
-  INTEGER,  PUBLIC, PROTECTED :: Nky   = 4      ! Number of total internal grid points in ky
-  REAL(dp), PUBLIC, PROTECTED :: kpar  = 0_dp   ! parallel wave vector component
-  ! For Orszag filter
-  REAL(dp), PUBLIC, PROTECTED :: two_third_kxmax
-  REAL(dp), PUBLIC, PROTECTED :: two_third_kymax
-  REAL(dp), PUBLIC :: two_third_kpmax
-
-  ! 1D Antialiasing arrays (2/3 rule)
-  REAL(dp), DIMENSION(:), ALLOCATABLE, PUBLIC :: AA_x
-  REAL(dp), DIMENSION(:), ALLOCATABLE, PUBLIC :: AA_y
-
-  ! Grids containing position in physical space
-  REAL(dp), DIMENSION(:),   ALLOCATABLE, PUBLIC :: xarray
-  REAL(dp), DIMENSION(:),   ALLOCATABLE, PUBLIC :: yarray
-  ! Local and global z grids, 2D since it has to store odd and even grids
-  REAL(dp), DIMENSION(:,:), ALLOCATABLE, PUBLIC :: zarray
-  REAL(dp), DIMENSION(:),   ALLOCATABLE, PUBLIC :: zarray_full
+  INTEGER,  PUBLIC, PROTECTED :: Nkx            ! Number of total internal grid points in kx
+  INTEGER,  PUBLIC, PROTECTED :: Nky            ! Number of total internal grid points in ky
+  REAL(xp), PUBLIC, PROTECTED :: kpar  = 0_xp   ! parallel wave vector component
+  ! Grid arrays
+  INTEGER,  DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: parray,  parray_full
+  INTEGER,  DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: jarray,  jarray_full
+  REAL(xp), DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: kxarray, kxarray_full
+  REAL(xp), DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: kyarray, kyarray_full
+  REAL(xp), DIMENSION(:,:), ALLOCATABLE, PUBLIC,PROTECTED :: zarray
+  REAL(xp), DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: zarray_full
+  REAL(xp), DIMENSION(:,:,:,:), ALLOCATABLE, PUBLIC,PROTECTED :: kparray !kperp
+  ! Indexation variables
+  INTEGER,  PUBLIC, PROTECTED ::  ias ,iae  ! species index
+  INTEGER,  PUBLIC, PROTECTED ::  ips ,ipe  ! Hermite
+  INTEGER,  PUBLIC, PROTECTED ::  ijs ,ije  ! Laguerre
+  INTEGER,  PUBLIC, PROTECTED ::  ikys,ikye ! Fourier y mode
+  INTEGER,  PUBLIC, PROTECTED ::  ikxs,ikxe ! Fourier x mode
+  INTEGER,  PUBLIC, PROTECTED ::  izs ,ize  ! z-grid
+  INTEGER,  PUBLIC, PROTECTED ::  ieven, iodd ! indices for the staggered grids
+  INTEGER,  PUBLIC, PROTECTED ::  ip0, ip1, ip2, ip3, ij0, ij1, pp2
+  INTEGER,  PUBLIC, PROTECTED ::  ikx0, iky0, ikx_max, iky_max ! Indices of k-grid origin and max
+  ! Total numbers of points for Hermite and Laguerre
+  INTEGER, PUBLIC, PROTECTED :: total_na
+  INTEGER, PUBLIC, PROTECTED :: total_np
+  INTEGER, PUBLIC, PROTECTED :: total_nj
+  INTEGER, PUBLIC, PROTECTED :: total_nky
+  INTEGER, PUBLIC, PROTECTED :: total_nkx
+  INTEGER, PUBLIC, PROTECTED :: total_nz
+  ! Local numbers of points (without ghosts)
+  INTEGER, PUBLIC, PROTECTED :: local_na
+  INTEGER, PUBLIC, PROTECTED :: local_np
+  INTEGER, PUBLIC, PROTECTED :: local_nj
+  INTEGER, PUBLIC, PROTECTED :: local_nky
+  INTEGER, PUBLIC, PROTECTED :: local_nkx
+  INTEGER, PUBLIC, PROTECTED :: local_nz
+  INTEGER, PUBLIC, PROTECTED :: local_nkp
+  INTEGER, PUBLIC, PROTECTED :: ngp, ngj, ngx, ngy, ngz ! number of ghosts points
+  INTEGER, PUBLIC, PROTECTED :: nzgrid  ! one or two depending on the staggered grid option
+  ! Local offsets
+  INTEGER, PUBLIC, PROTECTED :: local_na_offset
+  INTEGER, PUBLIC, PROTECTED :: local_np_offset
+  INTEGER, PUBLIC, PROTECTED :: local_nj_offset
+  INTEGER, PUBLIC, PROTECTED :: local_nky_offset
+  INTEGER, PUBLIC, PROTECTED :: local_nkx_offset
+  INTEGER, PUBLIC, PROTECTED :: local_nz_offset
+  ! C-pointer type for FFTW3
+  integer(C_INTPTR_T), PUBLIC,PROTECTED :: local_nkx_ptr, local_nky_ptr
+  integer(C_INTPTR_T), PUBLIC,PROTECTED :: local_nkx_ptr_offset, local_nky_ptr_offset
+  ! Grid spacing and limits
+  REAL(xp), PUBLIC, PROTECTED ::  deltap, deltaz, inv_deltaz
+  REAL(xp), PUBLIC, PROTECTED ::  deltakx, deltaky, kx_max, ky_max, kx_min, ky_min!, kp_max
+  INTEGER , PUBLIC, PROTECTED ::  local_pmin,  local_pmax
+  INTEGER , PUBLIC, PROTECTED ::  local_jmin,  local_jmax
+  REAL(xp), PUBLIC, PROTECTED ::  local_kymin, local_kymax
+  REAL(xp), PUBLIC, PROTECTED ::  local_kxmin, local_kxmax
+  REAL(xp), DIMENSION(:), ALLOCATABLE, PUBLIC, PROTECTED ::  local_zmin,  local_zmax
   ! local z weights for computing simpson rule
-  REAL(dp),  DIMENSION(:),   ALLOCATABLE, PUBLIC :: zweights_SR
-  REAL(dp), PUBLIC, PROTECTED  ::  deltax,  deltay, deltaz, inv_deltaz
-  REAL(dp), PUBLIC, PROTECTED  ::  diff_pe_coeff, diff_je_coeff
-  REAL(dp), PUBLIC, PROTECTED  ::  diff_pi_coeff, diff_ji_coeff
-  REAL(dp), PUBLIC, PROTECTED  ::  diff_kx_coeff, diff_ky_coeff, diff_dz_coeff
-  INTEGER,  PUBLIC, PROTECTED  ::  ixs,  ixe,  iys,  iye,  izs,  ize
-  INTEGER,  PUBLIC, PROTECTED  ::  izgs, izge ! ghosts
+  REAL(xp),  DIMENSION(:),   ALLOCATABLE, PUBLIC,PROTECTED :: zweights_SR
+  ! Numerical diffusion scaling
+  REAL(xp), PUBLIC, PROTECTED  ::  diff_p_coeff, diff_j_coeff
+  REAL(xp), PUBLIC, PROTECTED  ::  diff_kx_coeff, diff_ky_coeff, diff_dz_coeff
   LOGICAL,  PUBLIC, PROTECTED  ::  SG = .true.! shifted grid flag
-  INTEGER,  PUBLIC :: ir,iz ! counters
-  ! Data about parallel distribution for ky.kx
-  integer(C_INTPTR_T), PUBLIC :: local_nkx, local_nky
-  integer(C_INTPTR_T), PUBLIC :: local_nkx_offset, local_nky_offset
-  INTEGER,             PUBLIC :: local_nkp
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: counts_nkx, counts_nky
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: displs_nkx, displs_nky
-  ! "" for p
-  INTEGER,             PUBLIC :: local_np_e, local_np_i
-  INTEGER,             PUBLIC :: total_np_e, total_np_i, Np_e, Np_i
-  integer(C_INTPTR_T), PUBLIC :: local_np_e_offset, local_np_i_offset
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: rcv_p_e, rcv_p_i
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: dsp_p_e, dsp_p_i
-  ! "" for z
-  INTEGER,             PUBLIC :: local_nz
-  INTEGER,             PUBLIC :: total_nz
-  integer(C_INTPTR_T), PUBLIC :: local_nz_offset
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: counts_nz
-  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: displs_nz
-  ! "" for j (not parallelized)
-  INTEGER,             PUBLIC :: local_nj_e, local_nj_i, Nj_e, Nj_i
-  ! Grids containing position in fourier space
-  REAL(dp), DIMENSION(:),     ALLOCATABLE, PUBLIC :: kxarray, kxarray_full
-  REAL(dp), DIMENSION(:),     ALLOCATABLE, PUBLIC :: kyarray, kyarray_full
+  ! Array to know the distribution of data among all processes (for MPI comm)
+  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC,PROTECTED :: counts_total_nkx, counts_nky, counts_nz
+  INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC,PROTECTED :: displs_total_nkx, displs_nky, displs_nz
   ! Kperp array depends on kx, ky, z (geometry), eo (even or odd zgrid)
-  REAL(dp), DIMENSION(:,:,:,:), ALLOCATABLE, PUBLIC :: kparray
-  REAL(dp), PUBLIC, PROTECTED ::  deltakx, deltaky, kx_max, ky_max, kx_min, ky_min!, kp_max
-  REAL(dp), PUBLIC, PROTECTED ::  local_kxmax, local_kymax
-  INTEGER,  PUBLIC, PROTECTED ::  ikxs, ikxe, ikys, ikye!, ikps, ikpe
-  INTEGER,  PUBLIC, PROTECTED ::  ikx_0, iky_0, ikx_max, iky_max ! Indices of k-grid origin and max
-  INTEGER,  PUBLIC            ::  ikx, iky, ip, ij, ikp, pp2, eo ! counters
   LOGICAL,  PUBLIC, PROTECTED ::  contains_kx0   = .false. ! flag if the proc contains kx=0 index
   LOGICAL,  PUBLIC, PROTECTED ::  contains_ky0   = .false. ! flag if the proc contains ky=0 index
   LOGICAL,  PUBLIC, PROTECTED ::  contains_kymax = .false. ! flag if the proc contains kx=kxmax index
@@ -87,34 +92,26 @@ MODULE grid
   LOGICAL,  PUBLIC, PROTECTED ::  contains_zmin  = .false. ! flag if the proc contains z=-pi index
   LOGICAL,  PUBLIC, PROTECTED ::       SINGLE_KY = .false. ! to check if it is a single non 0 ky simulation
   ! Grid containing the polynomials degrees
-  INTEGER,  DIMENSION(:), ALLOCATABLE, PUBLIC :: parray_e, parray_e_full
-  INTEGER,  DIMENSION(:), ALLOCATABLE, PUBLIC :: parray_i, parray_i_full
-  INTEGER,  DIMENSION(:), ALLOCATABLE, PUBLIC :: jarray_e, jarray_e_full
-  INTEGER,  DIMENSION(:), ALLOCATABLE, PUBLIC :: jarray_i, jarray_i_full
-  INTEGER,  PUBLIC, PROTECTED ::  ips_e,ipe_e, ijs_e,ije_e ! Start and end indices for pol. deg.
-  INTEGER,  PUBLIC, PROTECTED ::  ips_i,ipe_i, ijs_i,ije_i
-  INTEGER,  PUBLIC, PROTECTED ::  ipgs_e,ipge_e, ijgs_e,ijge_e ! Ghosts start and end indices
-  INTEGER,  PUBLIC, PROTECTED ::  ipgs_i,ipge_i, ijgs_i,ijge_i
-  INTEGER,  PUBLIC, PROTECTED ::  deltape, ip0_e, ip1_e, ip2_e, ip3_e ! Pgrid spacing and moment 0,1,2 index
-  INTEGER,  PUBLIC, PROTECTED ::  deltapi, ip0_i, ip1_i, ip2_i, ip3_i
-  LOGICAL,  PUBLIC, PROTECTED ::  CONTAINS_ip0_e, CONTAINS_ip0_i
-  LOGICAL,  PUBLIC, PROTECTED ::  CONTAINS_ip1_e, CONTAINS_ip1_i
-  LOGICAL,  PUBLIC, PROTECTED ::  CONTAINS_ip2_e, CONTAINS_ip2_i
-  LOGICAL,  PUBLIC, PROTECTED ::  CONTAINS_ip3_e, CONTAINS_ip3_i
+  LOGICAL,  PUBLIC, PROTECTED ::  CONTAINSp0, CONTAINSp1, CONTAINSp2, CONTAINSp3
   LOGICAL,  PUBLIC, PROTECTED ::  SOLVE_POISSON, SOLVE_AMPERE
-  INTEGER,  PUBLIC, PROTECTED ::  ij0_i, ij0_e
-! Usefull inverse numbers
-  REAL(dp), PUBLIC, PROTECTED :: inv_Nx, inv_Ny, inv_Nz
+  ! Usefull inverse numbers
+  REAL(xp), PUBLIC, PROTECTED :: inv_Nx, inv_Ny, inv_Nz
+  ! For Orszag filter
+  REAL(xp), PUBLIC, PROTECTED :: two_third_kxmax
+  REAL(xp), PUBLIC, PROTECTED :: two_third_kymax
+  REAL(xp), PUBLIC, PROTECTED :: two_third_kpmax
+  ! 1D Antialiasing arrays (2/3 rule)
+  REAL(xp), DIMENSION(:), ALLOCATABLE, PUBLIC,PROTECTED :: AA_x
+  REAL(xp), DIMENSION(:), ALLOCATABLE, PUBLIC,PROTECTED :: AA_y
 
   ! Public Functions
   PUBLIC :: init_1Dgrid_distr
-  PUBLIC :: set_pgrid, set_jgrid
-  PUBLIC :: set_kxgrid, set_kygrid, set_zgrid
+  PUBLIC :: set_grids, set_kparray
   PUBLIC :: grid_readinputs, grid_outputinputs
-  PUBLIC :: bare, bari
+  PUBLIC :: bar
 
   ! Precomputations
-  real(dp), PUBLIC, PROTECTED    :: pmaxe_dp, pmaxi_dp, jmaxe_dp,jmaxi_dp
+  real(xp), PUBLIC, PROTECTED    :: pmax_xp, jmax_xp
 
 CONTAINS
 
@@ -123,131 +120,141 @@ CONTAINS
     ! Read the input parameters
     USE prec_const
     IMPLICIT NONE
-    INTEGER :: lu_in   = 90              ! File duplicated from STDIN
-
-    NAMELIST /GRID/ pmaxe, jmaxe, pmaxi, jmaxi, &
-                    Nx, Lx, Ny, Ly, Nz, Npol, SG, Nexc
-    READ(lu_in,grid)
-
+    INTEGER :: lun   = 90              ! File duplicated from STDIN
+    NAMELIST /GRID/ pmax, jmax, Nx, Lx, Ny, Ly, Nz, SG, Nexc
+    READ(lun,grid)
     IF(Nz .EQ. 1) & ! overwrite SG option if Nz = 1 for safety of use
       SG      = .FALSE.
-
     !! Compute the maximal degree of full GF moments set
     !   i.e. : all moments N_a^pj s.t. p+2j<=d are simulated (see GF closure)
-    dmaxe = min(pmaxe,2*jmaxe+1)
-    dmaxi = min(pmaxi,2*jmaxi+1)
-
+    dmax = min(pmax,2*jmax+1)
     ! Usefull precomputations
-    inv_Nx = 1._dp/REAL(Nx,dp)
-    inv_Ny = 1._dp/REAL(Ny,dp)
-
+    inv_Nx = 1._xp/REAL(Nx,xp)
+    inv_Ny = 1._xp/REAL(Ny,xp)
   END SUBROUTINE grid_readinputs
+  !!!! GRID REPRESENTATION
+  ! We define the grids that contain ghosts (p,j,z) with indexing from 1 to Nlocal + nghost
+  ! e.g. nghost = 4, nlocal = 4
+  ! |x|x|_|_|_|_|x|x|
+  !  1 2 3 4 5 6 7 8    (x are ghosts)
+  ! the interior points are contained between 1+Ng/2 and Nlocal+Ng/2
+  ! The other grids are simply
+  ! |_|_|_|_|
+  !  1 2 3 4
+  SUBROUTINE set_grids(shear,Npol,LINEARITY,N_HD,EM,Na)
+    USE fourier, ONLY: init_grid_distr_and_plans
+    REAL(xp), INTENT(IN) :: shear
+    INTEGER,  INTENT(IN) :: Npol
+    CHARACTER(len=*), INTENT(IN) :: LINEARITY
+    INTEGER, INTENT(IN)  :: N_HD
+    LOGICAL, INTENT(IN)  :: EM
+    INTEGER, INTENT(IN)  :: Na
+    CALL set_agrid(Na)
+    CALL set_pgrid(EM)
+    CALL set_jgrid
+    !! Parallel distribution of kx ky grid
+    IF (LINEARITY .NE. 'linear') THEN
+      IF (my_id .EQ. 0) write(*,*) 'FFTW3 y-grid distribution'
+      CALL init_grid_distr_and_plans(Nx,Ny,comm_ky,local_nkx_ptr,local_nkx_ptr_offset,local_nky_ptr,local_nky_ptr_offset)
+    ELSE
+      CALL init_1Dgrid_distr
+      IF (my_id .EQ. 0) write(*,*) 'Manual y-grid distribution'
+    ENDIF
+    CALL set_kygrid(LINEARITY,N_HD)
+    CALL set_kxgrid(shear,Npol,LINEARITY,N_HD)
+    CALL set_zgrid (Npol)
+    ! print*, 'p:',parray
+    ! print*, 'j:',jarray
+    ! print*, 'ky:',kyarray
+    ! print*, 'kx:',kxarray
+    ! print*, 'z:',zarray
+    ! print*, parray(ip0)
+    ! print*, jarray(ij0)
+    ! print*, kyarray(iky0)
+    ! print*, kxarray(ikx0)
+  END SUBROUTINE set_grids
 
   SUBROUTINE init_1Dgrid_distr
+    USE parallel, ONLY: num_procs_ky, rank_ky
     ! write(*,*) Nx
-    local_nky        = (Ny/2+1)/num_procs_ky
-    ! write(*,*) local_nkx
-    local_nky_offset = rank_ky*local_nky
-    if (rank_ky .EQ. num_procs_ky-1) local_nky = (Ny/2+1)-local_nky_offset
+    local_nky_ptr        = (Ny/2+1)/num_procs_ky
+    ! write(*,*) local_nkx_ptr
+    local_nky_ptr_offset = rank_ky*local_nky_ptr
+    if (rank_ky .EQ. num_procs_ky-1) local_nky_ptr = (Ny/2+1)-local_nky_ptr_offset
   END SUBROUTINE init_1Dgrid_distr
 
-  SUBROUTINE set_pgrid
+  SUBROUTINE set_agrid(Na) ! you're a sorcerer harry
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: Na
+    ias = 1
+    iae = Na
+    total_Na = Na
+    local_Na = Na
+    local_Na_offset = ias - 1
+  END SUBROUTINE
+
+  SUBROUTINE set_pgrid(EM)
     USE prec_const
-    USE model, ONLY: beta ! To know if we solve ampere or not and put odd  p moments
+    USE parallel, ONLY: num_procs_p, rank_p
     IMPLICIT NONE
-    INTEGER :: ip, istart, iend, in
-
+    LOGICAL, INTENT(IN) :: EM
+    INTEGER :: ip
     ! If no parallel dim (Nz=1) and no EM effects (beta=0), the moment hierarchy
     !! is separable between odds and even P and since the energy is injected in
     !! P=0 and P=2 for density/temperature gradients there is no need of
     !! simulating the odd p which will only be damped.
     !! We define in this case a grid Parray = 0,2,4,...,Pmax i.e. deltap = 2
     !! instead of 1 to spare computation
-    IF((Nz .EQ. 1) .AND. (beta .EQ. 0._dp)) THEN
-      deltape = 2; deltapi = 2;
-      pp2     = 1; ! index p+2 is ip+1
+    IF((Nz .EQ. 1) .AND. .NOT. EM) THEN
+      deltap  = 2
+      Ngp     = 2  ! two ghosts cells for p+/-2 only
+      pp2     = 1  ! index p+2 is ip+1
     ELSE
-      deltape = 1; deltapi = 1;
-      pp2     = 2; ! index p+2 is ip+2
+      deltap  = 1
+      Ngp     = 4  ! four ghosts cells for p+/-1 and p+/-2 terms
+      pp2     = 2  ! index p+2 is ip+2
     ENDIF
-
     ! Total number of Hermite polynomials we will evolve
-    total_np_e = (Pmaxe/deltape) + 1
-    total_np_i = (Pmaxi/deltapi) + 1
-    Np_e = total_np_e ! Reduced names (redundant)
-    Np_i = total_np_i
+    total_np = (Pmax/deltap) + 1
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(parray_e_full(1:total_np_e))
-    ALLOCATE(parray_i_full(1:total_np_i))
+    ALLOCATE(parray_full(total_np))
     ! P
-    DO ip = 1,total_np_e; parray_e_full(ip) = (ip-1)*deltape; END DO
-    DO ip = 1,total_np_i; parray_i_full(ip) = (ip-1)*deltapi; END DO
+    DO ip = 1,total_np; parray_full(ip) = (ip-1)*deltap; END DO
     !! Parallel data distribution
     ! Local data distribution
-    CALL decomp1D(total_np_e, num_procs_p, rank_p, ips_e, ipe_e)
-    CALL decomp1D(total_np_i, num_procs_p, rank_p, ips_i, ipe_i)
-    local_np_e = ipe_e - ips_e + 1
-    local_np_i = ipe_i - ips_i + 1
-    ! Ghosts boundaries
-    ipgs_e = ips_e - 2/deltape; ipge_e = ipe_e + 2/deltape;
-    ipgs_i = ips_i - 2/deltapi; ipge_i = ipe_i + 2/deltapi;
-    ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
-    ALLOCATE(rcv_p_e (1:num_procs_p))
-    ALLOCATE(rcv_p_i (1:num_procs_p))
-    ALLOCATE(dsp_p_e (1:num_procs_p))
-    ALLOCATE(dsp_p_i (1:num_procs_p))
-    DO in = 0,num_procs_p-1
-      CALL decomp1D(total_np_e, num_procs_p, in, istart, iend)
-      rcv_p_e(in+1) = iend-istart+1
-      dsp_p_e(in+1) = istart-1
-      CALL decomp1D(total_np_i, num_procs_p, in, istart, iend)
-      rcv_p_i(in+1) = iend-istart+1
-      dsp_p_i(in+1) = istart-1
-    ENDDO
-
+    CALL decomp1D(total_np, num_procs_p, rank_p, ips, ipe)
+    local_np       = ipe - ips + 1
+    local_np_offset = ips - 1
     !! local grid computations
     ! Flag to avoid unnecessary logical operations
-    CONTAINS_ip0_e = .FALSE.; CONTAINS_ip1_e = .FALSE.
-    CONTAINS_ip2_e = .FALSE.; CONTAINS_ip3_e = .FALSE.
-    CONTAINS_ip0_i = .FALSE.; CONTAINS_ip1_i = .FALSE.
-    CONTAINS_ip2_i = .FALSE.; CONTAINS_ip3_i = .FALSE.
+    CONTAINSp0 = .FALSE.; CONTAINSp1 = .FALSE.
+    CONTAINSp2 = .FALSE.; CONTAINSp3 = .FALSE.
     SOLVE_POISSON  = .FALSE.; SOLVE_AMPERE   = .FALSE.
-    ALLOCATE(parray_e(ipgs_e:ipge_e))
-    ALLOCATE(parray_i(ipgs_i:ipge_i))
-    DO ip = ipgs_e,ipge_e
-      parray_e(ip) = (ip-1)*deltape
+    ALLOCATE(parray(local_np+ngp))
+    ! Fill the interior (no ghosts)
+    DO ip = 1,local_np+ngp
+      parray(ip) = (ip-1-ngp/2+local_np_offset)*deltap
       ! Storing indices of particular degrees for fluid moments computations
-      SELECT CASE (parray_e(ip))
-      CASE(0); ip0_e = ip; CONTAINS_ip0_e = .TRUE.
-      CASE(1); ip1_e = ip; CONTAINS_ip1_e = .TRUE.
-      CASE(2); ip2_e = ip; CONTAINS_ip2_e = .TRUE.
-      CASE(3); ip3_e = ip; CONTAINS_ip3_e = .TRUE.
+      SELECT CASE (parray(ip))
+      CASE(0); ip0 = ip; CONTAINSp0 = .TRUE.
+      CASE(1); ip1 = ip; CONTAINSp1 = .TRUE.
+      CASE(2); ip2 = ip; CONTAINSp2 = .TRUE.
+      CASE(3); ip3 = ip; CONTAINSp3 = .TRUE.
       END SELECT
     END DO
-    DO ip = ipgs_i,ipge_i
-      parray_i(ip) = (ip-1)*deltapi
-      ! Storing indices of particular degrees for fluid moments computations
-      SELECT CASE (parray_i(ip))
-      CASE(0); ip0_i = ip; CONTAINS_ip0_i = .TRUE.
-      CASE(1); ip1_i = ip; CONTAINS_ip1_i = .TRUE.
-      CASE(2); ip2_i = ip; CONTAINS_ip2_i = .TRUE.
-      CASE(3); ip3_i = ip; CONTAINS_ip3_i = .TRUE.
-      END SELECT
-    END DO
-    IF(CONTAINS_ip0_e .AND. CONTAINS_ip0_i) SOLVE_POISSON = .TRUE.
-    IF(CONTAINS_ip1_e .AND. CONTAINS_ip1_i) SOLVE_AMPERE  = .TRUE.
+    local_pmax = parray(local_np+ngp/2)
+    local_pmin = parray(1+ngp/2)
+    IF(CONTAINSp0) SOLVE_POISSON = .TRUE.
+    IF(CONTAINSp1) SOLVE_AMPERE  = .TRUE.
     !DGGK operator uses moments at index p=2 (ip=3) for the p=0 term so the
     ! process that contains ip=1 MUST contain ip=3 as well for both e and i.
-    IF(((ips_e .EQ. ip0_e) .OR. (ips_i .EQ. ip0_e)) .AND. ((ipe_e .LT. ip2_e) .OR. (ipe_i .LT. ip2_i)))&
+    IF(CONTAINSp0 .AND. .NOT. (CONTAINSp2))&
      WRITE(*,*) "Warning : distribution along p may not work with DGGK"
     ! Precomputations
-    pmaxe_dp      = real(pmaxe,dp)
-    pmaxi_dp      = real(pmaxi,dp)
-    diff_pe_coeff = pmaxe_dp*(1._dp/pmaxe_dp)**6
-    diff_pi_coeff = pmaxi_dp*(1._dp/pmaxi_dp)**6
-
+    pmax_xp       = real(pmax,xp)
+    diff_p_coeff  = pmax_xp*(1._xp/pmax_xp)**6
     ! Overwrite SOLVE_AMPERE flag if beta is zero
-    IF(beta .EQ. 0._dp) THEN
+    IF(.NOT. EM) THEN
       SOLVE_AMPERE = .FALSE.
     ENDIF
   END SUBROUTINE set_pgrid
@@ -257,86 +264,79 @@ CONTAINS
     IMPLICIT NONE
     INTEGER :: ij
     ! Total number of J degrees
-    Nj_e = jmaxe+1
-    Nj_i = jmaxi+1
+    total_nj   = jmax+1
+    local_jmax = jmax
+    Ngj= 2      ! 2-points ghosts for j+\-1 terms
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(jarray_e_full(1:jmaxe+1))
-    ALLOCATE(jarray_i_full(1:jmaxi+1))
+    ALLOCATE(jarray_full(total_nj))
     ! J
-    DO ij = 1,jmaxe+1; jarray_e_full(ij) = (ij-1); END DO
-    DO ij = 1,jmaxi+1; jarray_i_full(ij) = (ij-1); END DO
-    ! Local data
-    ijs_e = 1; ije_e = jmaxe + 1
-    ijs_i = 1; ije_i = jmaxi + 1
-    ! Ghosts boundaries
-    ijgs_e = ijs_e - 1; ijge_e = ije_e + 1;
-    ijgs_i = ijs_i - 1; ijge_i = ije_i + 1;
+    DO ij = 1,total_nj; jarray_full(ij) = (ij-1); END DO
+    ! Indices of local data
+    ijs = 1; ije = jmax + 1
     ! Local number of J
-    local_nj_e = ijge_e - ijgs_e + 1
-    local_nj_i = ijge_i - ijgs_i + 1
-    ALLOCATE(jarray_e(ijgs_e:ijge_e))
-    ALLOCATE(jarray_i(ijgs_i:ijge_i))
-    DO ij = ijgs_e,ijge_e; jarray_e(ij) = ij-1; END DO
-    DO ij = ijgs_i,ijge_i; jarray_i(ij) = ij-1; END DO
+    local_nj        = ije - ijs + 1
+    local_nj_offset = ijs - 1
+    ALLOCATE(jarray(local_nj+ngj))
+    DO ij = 1,local_nj+ngj
+      jarray(ij) = ij-1-ngj/2+local_nj_offset
+    END DO
+    local_jmax = jarray(local_nj+ngj/2)
+    local_jmin = jarray(1+ngj/2)
     ! Precomputations
-    maxj       = MAX(jmaxi, jmaxe)
-    jmaxe_dp   = real(jmaxe,dp)
-    jmaxi_dp   = real(jmaxi,dp)
-    diff_je_coeff = jmaxe_dp*(1._dp/jmaxe_dp)**6
-    diff_ji_coeff = jmaxi_dp*(1._dp/jmaxi_dp)**6
-
-    ! j=0 indices
-    DO ij = ijs_e,ije_e; IF(jarray_e(ij) .EQ. 0) ij0_e = ij; END DO
-    DO ij = ijs_i,ije_i; IF(jarray_i(ij) .EQ. 0) ij0_i = ij; END DO
+    jmax_xp      = real(jmax,xp)
+    diff_j_coeff = jmax_xp*(1._xp/jmax_xp)**6
+    ! j=0 and j=1 indices
+    DO ij = 1,local_nj+ngj
+      IF(jarray(ij) .EQ. 0) ij0 = ij
+      IF(jarray(ij) .EQ. 1) ij1 = ij
+    END DO
   END SUBROUTINE set_jgrid
 
-
-  SUBROUTINE set_kygrid
+  SUBROUTINE set_kygrid(LINEARITY,N_HD)
     USE prec_const
-    USE model, ONLY: LINEARITY, N_HD
     IMPLICIT NONE
-    INTEGER :: in, istart, iend
-    Nky = Ny/2+1 ! Defined only on positive kx since fields are real
+    CHARACTER(len=*), INTENT(IN) ::LINEARITY
+    INTEGER, INTENT(IN) :: N_HD
+    INTEGER :: iky
+    Nky       = Ny/2+1 ! Defined only on positive kx since fields are real
+    total_nky = Nky
     ! Grid spacings
     IF (Ny .EQ. 1) THEN
-      deltaky = 2._dp*PI/Ly
+      deltaky = 2._xp*PI/Ly
       ky_max  = deltaky
       ky_min  = deltaky
     ELSE
-      deltaky = 2._dp*PI/Ly
+      deltaky = 2._xp*PI/Ly
       ky_max  = (Nky-1)*deltaky
       ky_min  = deltaky
     ENDIF
+    Ngy = 0 ! no ghosts cells in ky
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(kyarray_full(1:Nky))
+    ALLOCATE(kyarray_full(Nky))
     DO iky = 1,Nky
-     kyarray_full(iky) = REAL(iky-1,dp) * deltaky
+     kyarray_full(iky) = REAL(iky-1,xp) * deltaky
     END DO
-    !! Parallel distribution
-    ikys = local_nky_offset + 1
-    ikye = ikys + local_nky - 1
-    ALLOCATE(kyarray(ikys:ikye))
-    local_kymax = 0._dp
-    ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
-    ALLOCATE(counts_nky (1:num_procs_ky))
-    ALLOCATE(displs_nky (1:num_procs_ky))
-    DO in = 0,num_procs_ky-1
-      CALL decomp1D(Nky, num_procs_ky, in, istart, iend)
-      counts_nky(in+1) = iend-istart+1
-      displs_nky(in+1) = istart-1
-    ENDDO
+    ikys = local_nky_ptr_offset + 1
+    ikye = ikys + local_nky_ptr - 1
+    local_nky = ikye - ikys + 1
+    local_nky_offset = local_nky_ptr_offset
+    ALLOCATE(kyarray(local_nky))
+    local_kymax = 0._xp
     ! Creating a grid ordered as dk*(0 1 2 3)
-    DO iky = ikys,ikye
+    ! We loop over the natural iky numbers (|1 2 3||4 5 6||... Nky|)
+    DO iky = 1,local_nky
+      ! We shift the natural iky index by the offset to obtain the mpi dependent
+      ! indexation (|1 2 3||1 2 3|... local_nky|)
       IF(Ny .EQ. 1) THEN
         kyarray(iky)      = deltaky
         kyarray_full(iky) = deltaky
         SINGLE_KY         = .TRUE.
       ELSE
-        kyarray(iky) = REAL(iky-1,dp) * deltaky
+        kyarray(iky) = kyarray_full(iky+local_nky_offset)
       ENDIF
       ! Finding kx=0
       IF (kyarray(iky) .EQ. 0) THEN
-        iky_0 = iky
+        iky0 = iky
         contains_ky0 = .true.
       ENDIF
       ! Finding local kxmax value
@@ -350,34 +350,36 @@ CONTAINS
       ENDIF
     END DO
     ! Orszag 2/3 filter
-    two_third_kymax = 2._dp/3._dp*deltaky*(Nky-1)
-    ! For hyperdiffusion
-    IF(LINEARITY.EQ.'linear') THEN
-      diff_ky_coeff= (1._dp/ky_max)**N_HD
-    ELSE
-      diff_ky_coeff= (1._dp/two_third_kymax)**N_HD
-    ENDIF
-
-    ALLOCATE(AA_y(ikys:ikye))
-    DO iky = ikys,ikye
+    two_third_kymax = 2._xp/3._xp*deltaky*(Nky-1)
+    ALLOCATE(AA_y(local_nky))
+    DO iky = 1,local_nky
       IF ( (kyarray(iky) .LT. two_third_kymax) .OR. (LINEARITY .EQ. 'linear')) THEN
-        AA_y(iky) = 1._dp;
+        AA_y(iky) = 1._xp;
       ELSE
-        AA_y(iky) = 0._dp;
+        AA_y(iky) = 0._xp;
       ENDIF
     END DO
+    ! For hyperdiffusion
+    IF(LINEARITY.EQ.'linear') THEN
+      diff_ky_coeff= (1._xp/ky_max)**N_HD
+    ELSE
+      diff_ky_coeff= (1._xp/two_third_kymax)**N_HD
+    ENDIF
   END SUBROUTINE set_kygrid
 
-  SUBROUTINE set_kxgrid(shear)
+  SUBROUTINE set_kxgrid(shear,Npol,LINEARITY,N_HD)
     USE prec_const
-    USE model, ONLY: LINEARITY, N_HD
     IMPLICIT NONE
-    REAL(dp), INTENT(IN) :: shear
-    REAL(dp):: Lx_adapted
+    REAL(xp), INTENT(IN) :: shear
+    INTEGER,  INTENT(IN) :: Npol
+    CHARACTER(len=*), INTENT(IN) ::LINEARITY
+    INTEGER, INTENT(IN)  :: N_HD
+    INTEGER :: ikx
+    REAL(xp):: Lx_adapted
     IF(shear .GT. 0) THEN
       IF(my_id.EQ.0) write(*,*) 'Magnetic shear detected: set up sheared kx grid..'
       ! mininal size of box in x to respect dkx = 2pi shear dky
-      Lx_adapted = Ly/(2._dp*pi*shear*Npol)
+      Lx_adapted = Ly/(2._xp*pi*shear*Npol)
       ! Put Nexc to 0 so that it is computed from a target value Lx
       IF(Nexc .EQ. 0) THEN
         Nexc = CEILING(0.9 * Lx/Lx_adapted)
@@ -386,138 +388,109 @@ CONTAINS
       ! x length is adapted
       Lx = Lx_adapted*Nexc
     ENDIF
-    Nkx = Nx;
+    Nkx       = Nx
+    total_nkx = Nx
     ! Local data
     ! Start and END indices of grid
     ikxs = 1
-    ikxe = Nkx
-    local_nkx = ikxe - ikxs + 1
-    ALLOCATE(kxarray(ikxs:ikxe))
-    ALLOCATE(kxarray_full(1:Nkx))
+    ikxe = total_nkx
+    local_nkx_ptr = ikxe - ikxs + 1
+    local_nkx     = ikxe - ikxs + 1
+    local_nkx_offset = ikxs - 1
+    ALLOCATE(kxarray(local_nkx))
+    ALLOCATE(kxarray_full(total_nkx))
     IF (Nx .EQ. 1) THEN
-      deltakx         = 1._dp
-      kxarray(1)      = 0._dp
-      ikx_0           = 1
+      deltakx         = 1._xp
+      kxarray(1)      = 0._xp
+      ikx0           = 1
       contains_kx0    = .true.
-      kx_max          = 0._dp
+      kx_max          = 0._xp
       ikx_max         = 1
-      kx_min          = 0._dp
-      kxarray_full(1) = 0._dp
-      local_kxmax     = 0._dp
+      kx_min          = 0._xp
+      kxarray_full(1) = 0._xp
+      local_kxmax     = 0._xp
     ELSE ! Build apprpopriate grid
-      deltakx      = 2._dp*PI/Lx
-      IF(MODULO(Nkx,2) .EQ. 0) THEN ! Even number of Nkx (-2 -1 0 1 2 3)
-        kx_max = (Nkx/2)*deltakx
+      deltakx      = 2._xp*PI/Lx
+      IF(MODULO(total_nkx,2) .EQ. 0) THEN ! Even number of kx (-2 -1 0 1 2 3)
+        kx_max = (total_nkx/2)*deltakx
         kx_min = -kx_max+deltakx
         ! Creating a grid ordered as dk*(0 1 2 3 -2 -1)
-        local_kxmax = 0._dp
-        DO ikx = ikxs,ikxe
-          kxarray(ikx) = deltakx*(MODULO(ikx-1,Nkx/2)-Nkx/2*FLOOR(2.*REAL(ikx-1,dp)/REAL(Nkx,dp)))
-          if (ikx .EQ. Nx/2+1)     kxarray(ikx) = -kxarray(ikx)
-          ! Finding kx=0
-          IF (kxarray(ikx) .EQ. 0) THEN
-            ikx_0 = ikx
-            contains_kx0 = .true.
-          ENDIF
-          ! Finding local kxmax
-          IF (ABS(kxarray(ikx)) .GT. local_kxmax) THEN
-            local_kxmax = ABS(kxarray(ikx))
-          ENDIF
-          ! Finding kxmax
-          IF (kxarray(ikx) .EQ. kx_max) ikx_max = ikx
-        END DO
-        ! Build the full grids on process 0 to diagnose it without comm
-        ! kx
-        DO ikx = 1,Nkx
-            kxarray_full(ikx) = deltakx*(MODULO(ikx-1,Nkx/2)-Nkx/2*FLOOR(2.*REAL(ikx-1,dp)/REAL(Nkx,dp)))
-            IF (ikx .EQ. Nx/2+1) kxarray_full(ikx) = -kxarray_full(ikx)
+        DO ikx = 1,total_nkx
+          kxarray_full(ikx) = deltakx*REAL(MODULO(ikx-1,total_nkx/2)-(total_nkx/2)*FLOOR(2.*real(ikx-1)/real(total_nkx)),xp)
+          IF (ikx .EQ. total_nkx/2+1) kxarray_full(ikx) = -kxarray_full(ikx)
         END DO
-      ELSE ! Odd number of kx (-2 -1 0 1 2)
-        kx_max = (Nkx-1)/2*deltakx
-        kx_min = -kx_max
-        ! Creating a grid ordered as dk*(0 1 2 -2 -1)
-        local_kxmax = 0._dp
-        DO ikx = ikxs,ikxe
-          IF(ikx .LE. (Nkx-1)/2+1) THEN
-            kxarray(ikx) = deltakx*(ikx-1)
-          ELSE
-            kxarray(ikx) = deltakx*(ikx-Nkx-1)
-          ENDIF
+        ! Set local grid (not parallelized so same as full one)
+        local_kxmax = 0._xp
+        DO ikx = 1,local_nkx
+          kxarray(ikx) = kxarray_full(ikx+local_nkx_offset)
           ! Finding kx=0
           IF (kxarray(ikx) .EQ. 0) THEN
-            ikx_0 = ikx
+            ikx0 = ikx
             contains_kx0 = .true.
           ENDIF
           ! Finding local kxmax
           IF (ABS(kxarray(ikx)) .GT. local_kxmax) THEN
             local_kxmax = ABS(kxarray(ikx))
-          ENDIF
-          ! Finding kxmax
-          IF (kxarray(ikx) .EQ. kx_max) ikx_max = ikx
-        END DO
-        ! Build the full grids on process 0 to diagnose it without comm
-        ! kx
-        DO ikx = 1,Nkx
-          IF(ikx .LE. (Nkx-1)/2+1) THEN
-            kxarray_full(ikx) = deltakx*(ikx-1)
-          ELSE
-            kxarray_full(ikx) = deltakx*(ikx-Nkx-1)
+            ikx_max = ikx
           ENDIF
         END DO
+      ELSE ! Odd number of kx (-2 -1 0 1 2)
+        kx_max = (total_nkx-1)/2*deltakx
       ENDIF
     ENDIF
     ! Orszag 2/3 filter
-    two_third_kxmax = 2._dp/3._dp*kx_max;
-
-    ! For hyperdiffusion
-    IF(LINEARITY.EQ.'linear') THEN
-      diff_kx_coeff= (1._dp/kx_max)**N_HD
-    ELSE
-      diff_kx_coeff= (1._dp/two_third_kxmax)**N_HD
-    ENDIF
-
+    two_third_kxmax = 2._xp/3._xp*kx_max;
     ! Antialiasing filter
-    ALLOCATE(AA_x(ikxs:ikxe))
-    DO ikx = ikxs,ikxe
+    ALLOCATE(AA_x(local_nkx))
+    DO ikx = 1,local_nkx
       IF ( ((kxarray(ikx) .GT. -two_third_kxmax) .AND. &
            (kxarray(ikx) .LT. two_third_kxmax))   .OR. (LINEARITY .EQ. 'linear')) THEN
-        AA_x(ikx) = 1._dp;
+        AA_x(ikx) = 1._xp;
       ELSE
-        AA_x(ikx) = 0._dp;
+        AA_x(ikx) = 0._xp;
       ENDIF
     END DO
+    ! For hyperdiffusion
+    IF(LINEARITY.EQ.'linear') THEN
+      diff_kx_coeff= (1._xp/kx_max)**N_HD
+    ELSE
+      diff_kx_coeff= (1._xp/two_third_kxmax)**N_HD
+    ENDIF
   END SUBROUTINE set_kxgrid
 
-
-  SUBROUTINE set_zgrid
+  SUBROUTINE set_zgrid(Npol)
     USE prec_const
-    USE model, ONLY: mu_z
+    USE parallel, ONLY: num_procs_z, rank_z
     IMPLICIT NONE
-    REAL(dp) :: grid_shift, Lz, zmax, zmin
-    INTEGER  :: istart, iend, in
+    REAL(xp):: grid_shift, Lz, zmax, zmin
+    INTEGER :: istart, iend, in, Npol, iz, ig, eo, iglob
     total_nz = Nz
     ! Length of the flux tube (in ballooning angle)
-    Lz         = 2_dp*pi*REAL(Npol,dp)
+    Lz         = 2._xp*pi*REAL(Npol,xp)
     ! Z stepping (#interval = #points since periodic)
-    deltaz        = Lz/REAL(Nz,dp)
-    inv_deltaz    = 1._dp/deltaz
+    deltaz        = Lz/REAL(Nz,xp)
+    inv_deltaz    = 1._xp/deltaz
     ! Parallel hyperdiffusion coefficient
-    IF(mu_z .GT. 0) THEN
-      diff_dz_coeff = (deltaz/2._dp)**4 ! adaptive fourth derivative (~GENE)
-    ELSE
-      diff_dz_coeff = -1._dp    ! non adaptive (negative sign to compensate mu_z neg)
-    ENDIF
+    diff_dz_coeff = (deltaz/2._xp)**4 ! adaptive fourth derivative (~GENE)
     IF (SG) THEN
-      grid_shift = deltaz/2._dp
+      CALL speak('--2 staggered z grids--')
+      grid_shift = deltaz/2._xp
+      ! indices for even p and odd p grids (used in kernel, jacobian, gij etc.)
+      ieven  = 1
+      iodd   = 2
+      nzgrid = 2
     ELSE
-      grid_shift = 0._dp
+      grid_shift = 0._xp
+      ieven  = 1
+      iodd   = 1
+      nzgrid = 1
     ENDIF
     ! Build the full grids on process 0 to diagnose it without comm
-    ALLOCATE(zarray_full(1:Nz))
+    ALLOCATE(zarray_full(total_nz))
     IF (Nz .EQ. 1) Npol = 0
     zmax = 0; zmin = 0;
     DO iz = 1,total_nz ! z in [-pi pi-dz] x Npol
-      zarray_full(iz) = REAL(iz-1,dp)*deltaz - Lz/2._dp
+      zarray_full(iz) = REAL(iz-1,xp)*deltaz - Lz/2._xp
       IF(zarray_full(iz) .GT. zmax) zmax = zarray_full(iz)
       IF(zarray_full(iz) .LT. zmin) zmin = zarray_full(iz)
     END DO
@@ -526,112 +499,147 @@ CONTAINS
     ERROR STOP '>> ERROR << Cannot have multiple core in z-direction (Nz = 1)'
     ! Local data distribution
     CALL decomp1D(total_nz, num_procs_z, rank_z, izs, ize)
-    local_nz = ize - izs + 1
+    local_nz        = ize - izs + 1
+    local_nz_offset = izs - 1
     ! Ghosts boundaries (depend on the order of z operators)
     IF(Nz .EQ. 1) THEN
-      izgs = izs;     izge = ize;
-      zarray_full(izs) = 0;
+      ngz              = 0
+      zarray_full(izs) = 0
     ELSEIF(Nz .GE. 4) THEN
-      izgs = izs - 2; izge = ize + 2;
+      ngz =4
+      IF(mod(Nz,2) .NE. 0 ) THEN
+        ERROR STOP '>> ERROR << Nz must be an even number for Simpson integration rule !!!!'
+     ENDIF
     ELSE
       ERROR STOP '>> ERROR << Nz is not appropriate!!'
     ENDIF
     ! List of shift and local numbers between the different processes (used in scatterv and gatherv)
-    ALLOCATE(counts_nz (1:num_procs_z))
-    ALLOCATE(displs_nz (1:num_procs_z))
+    ALLOCATE(counts_nz (num_procs_z))
+    ALLOCATE(displs_nz (num_procs_z))
     DO in = 0,num_procs_z-1
       CALL decomp1D(total_nz, num_procs_z, in, istart, iend)
       counts_nz(in+1) = iend-istart+1
       displs_nz(in+1) = istart-1
     ENDDO
     ! Local z array
-    ALLOCATE(zarray(izgs:izge,0:1))
-    DO iz = izgs,izge
-      IF(iz .EQ. 0) THEN
-        zarray(iz,0) = zarray_full(total_nz)
-        zarray(iz,1) = zarray_full(total_nz) + grid_shift
-      ELSEIF(iz .EQ. -1) THEN
-        zarray(iz,0) = zarray_full(total_nz-1)
-        zarray(iz,1) = zarray_full(total_nz-1) + grid_shift
-      ELSEIF(iz .EQ. total_nz + 1) THEN
-        zarray(iz,0) = zarray_full(1)
-        zarray(iz,1) = zarray_full(1) + grid_shift
-      ELSEIF(iz .EQ. total_nz + 2) THEN
-        zarray(iz,0) = zarray_full(2)
-        zarray(iz,1) = zarray_full(2) + grid_shift
-      ELSE
-        zarray(iz,0) = zarray_full(iz)
-        zarray(iz,1) = zarray_full(iz) + grid_shift
-      ENDIF
+    ALLOCATE(zarray(local_nz+ngz,nzgrid))
+    !! interior point loop
+    DO iz = 1,local_nz
+      DO eo = 1,nzgrid
+        zarray(iz+ngz/2,eo) = zarray_full(iz+local_nz_offset) + REAL(eo-1,xp)*grid_shift
+      ENDDO
     ENDDO
-    IF(abs(zarray(izs,0) - zmin) .LT. EPSILON(zmin)) &
-      contains_zmin = .TRUE.
-    IF(abs(zarray(ize,0) - zmax) .LT. EPSILON(zmax)) &
-      contains_zmax = .TRUE.
-    ! Weights for Simpson rule
-    ALLOCATE(zweights_SR(izs:ize))
-    DO iz = izs,ize
-      IF(MODULO(iz,2) .EQ. 1) THEN ! odd iz
-        zweights_SR(iz) = 4._dp
-      ELSE ! even iz
-        zweights_SR(iz) = 2._dp
-      ENDIF
+    CALL allocate_array(local_zmax,1,nzgrid)
+    CALL allocate_array(local_zmin,1,nzgrid)
+    DO eo = 1,nzgrid
+      ! Find local extrema
+      local_zmax(eo) = zarray(local_nz+ngz/2,eo)
+      local_zmin(eo) = zarray(1+ngz/2,eo)
+      ! Fill the ghosts
+      ! Continue angles
+      ! DO ig = 1,ngz/2
+      !   zarray(ig,eo)          = local_zmin(eo)-REAL(ngz/2-(ig-1),xp)*deltaz
+      !   zarray(local_nz+ngz/2+ig,eo) = local_zmax(eo)+REAL(ig,xp)*deltaz
+      ! ENDDO
+      ! Periodic z \in (-pi pi-dz)
+      DO ig = 1,ngz/2 ! first ghost cells
+        iglob = ig+local_nz_offset-ngz/2
+        IF (iglob .LE. 0) &
+          iglob = iglob + total_nz
+        zarray(ig,eo) = zarray_full(iglob)
+      ENDDO
+      DO ig = local_nz+ngz/2,local_nz+ngz ! last ghost cells
+        iglob = ig+local_nz_offset-ngz/2
+        IF (iglob .GT. total_nz) &
+          iglob = iglob - total_nz
+        zarray(ig,eo) = zarray_full(iglob)
+      ENDDO
+      ! Set up the flags to know if the process contains the tip and/or the tail
+      ! of the z domain (important for z-boundary condition)
+      IF(abs(local_zmin(eo) - (zmin+REAL(eo-1,xp)*grid_shift)) .LT. EPSILON(zmin)) &
+        contains_zmin = .TRUE.
+      IF(abs(local_zmax(eo) - (zmax+REAL(eo-1,xp)*grid_shift)) .LT. EPSILON(zmax)) &
+        contains_zmax = .TRUE.
     ENDDO
-    ! IF (contains_zmin) &
-    !   zweights_SR(izs) = 1._dp
-    ! IF (contains_zmax) &
-    !   zweights_SR(ize) = 1._dp
+    ! local weights for Simpson rule
+    ALLOCATE(zweights_SR(local_nz))
+    IF(total_nz .EQ. 1) THEN
+      zweights_SR = 1._xp
+    ELSE
+      DO iz = 1,local_nz
+        IF(MODULO(iz+local_nz_offset,2) .EQ. 1) THEN ! odd iz
+          zweights_SR(iz) = onethird*deltaz*2._xp
+        ELSE ! even iz
+          zweights_SR(iz) = onethird*deltaz*4._xp
+        ENDIF
+      ENDDO
+    ENDIF
   END SUBROUTINE set_zgrid
 
-  SUBROUTINE grid_outputinputs(fidres, str)
-    ! Write the input parameters to the results_xx.h5 file
-
-    USE futils, ONLY: attach
+  SUBROUTINE set_kparray(gxx, gxy, gyy,hatB)
+    REAL(xp), DIMENSION(local_nz+ngz,nzgrid), INTENT(IN) :: gxx,gxy,gyy,hatB
+    INTEGER     :: eo,iz,iky,ikx
+    REAL(xp)    :: kx, ky
+    CALL allocate_array( kparray, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,nzgrid)
+    DO eo = 1,nzgrid
+      DO iz = 1,local_nz+ngz
+        DO iky = 1,local_nky
+          ky = kyarray(iky)
+          DO ikx = 1,local_nkx
+            kx = kxarray(ikx)
+            ! there is a factor 1/B from the normalization; important to match GENE
+            ! this factor comes from $b_a$ argument in the Bessel. Kperp is not used otherwise.
+            kparray(iky, ikx, iz, eo) = &
+            SQRT( gxx(iz,eo)*kx**2 + 2._xp*gxy(iz,eo)*kx*ky + gyy(iz,eo)*ky**2)/ hatB(iz,eo)
+          ENDDO
+        ENDDO
+      ENDDO
+    ENDDO
+    two_third_kpmax = 2._xp/3._xp * MAXVAL(kparray)
+  END SUBROUTINE
 
-    USE prec_const
+  SUBROUTINE grid_outputinputs(fid)
+    ! Write the input parameters to the results_xx.h5 file
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
-
-    INTEGER, INTENT(in) :: fidres
-    CHARACTER(len=256), INTENT(in) :: str
-    CALL attach(fidres, TRIM(str), "pmaxe", pmaxe)
-    CALL attach(fidres, TRIM(str), "jmaxe", jmaxe)
-    CALL attach(fidres, TRIM(str), "pmaxi", pmaxi)
-    CALL attach(fidres, TRIM(str), "jmaxi", jmaxi)
-    CALL attach(fidres, TRIM(str),   "Nx",   Nx)
-    CALL attach(fidres, TRIM(str),   "Lx",   Lx)
-    CALL attach(fidres, TRIM(str), "Nexc", Nexc)
-    CALL attach(fidres, TRIM(str),   "Ny",   Ny)
-    CALL attach(fidres, TRIM(str),   "Ly",   Ly)
-    CALL attach(fidres, TRIM(str),   "Nz",   Nz)
-    CALL attach(fidres, TRIM(str),  "Nkx",  Nkx)
-    CALL attach(fidres, TRIM(str),  "Nky",  Nky)
-    CALL attach(fidres, TRIM(str),   "SG",   SG)
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/grid'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Grid Input')
+    CALL attach(fid, TRIM(str),  "pmax", pmax)
+    CALL attach(fid, TRIM(str),"deltap", deltap)
+    CALL attach(fid, TRIM(str),  "jmax", jmax)
+    CALL attach(fid, TRIM(str),   "Nkx",  Nkx)
+    CALL attach(fid, TRIM(str),    "Nx",   Nx)
+    CALL attach(fid, TRIM(str),    "Lx",   Lx)
+    CALL attach(fid, TRIM(str),  "Nexc", Nexc)
+    CALL attach(fid, TRIM(str),    "Ny",   Ny)
+    CALL attach(fid, TRIM(str),   "Nky",  Nky)
+    CALL attach(fid, TRIM(str),    "Ly",   Ly)
+    CALL attach(fid, TRIM(str),    "Nz",   Nz)
+    CALL attach(fid, TRIM(str),   "total_nkx",  total_nkx)
+    CALL attach(fid, TRIM(str),   "Nky",  Nky)
+    CALL attach(fid, TRIM(str),    "SG",   SG)
   END SUBROUTINE grid_outputinputs
 
-  FUNCTION bare(p_,j_)
-    IMPLICIT NONE
-    INTEGER :: bare, p_, j_
-    bare = (jmaxe+1)*p_ + j_ + 1
-  END FUNCTION
-
-  FUNCTION bari(p_,j_)
+  FUNCTION bar(p_,j_)
     IMPLICIT NONE
-    INTEGER :: bari, p_, j_
-    bari = (jmaxi+1)*p_ + j_ + 1
+    INTEGER :: bar, p_, j_
+    bar = (jmax+1)*p_ + j_ + 1
   END FUNCTION
 
-  SUBROUTINE decomp1D( n, numprocs, myid, s, e )
-      INTEGER :: n, numprocs, myid, s, e
+  SUBROUTINE decomp1D( n, numprocs, myid, is, ie )
+      INTEGER :: n, numprocs, myid, is, ie
       INTEGER :: nlocal
       INTEGER :: deficit
 
       nlocal   = n / numprocs
-      s        = myid * nlocal + 1
+      is        = myid * nlocal + 1
       deficit  = MOD(n,numprocs)
-      s        = s + MIN(myid,deficit)
+      is        = is + MIN(myid,deficit)
       IF (myid .LT. deficit) nlocal = nlocal + 1
-      e = s + nlocal - 1
-      IF (e .GT. n .OR. myid .EQ. numprocs-1) e = n
+      ie = is + nlocal - 1
+      IF ((ie .GT. n) .OR. (myid .EQ. numprocs-1)) ie = n
   END SUBROUTINE decomp1D
 
 END MODULE grid
diff --git a/src/inital.F90 b/src/inital.F90
index 2f61ff9c47e66244605cdff573174a5a8721ad9d..cfc88cb4fee3ba51069b797b11636534b9015832 100644
--- a/src/inital.F90
+++ b/src/inital.F90
@@ -3,15 +3,15 @@
 !******************************************************************************!
 SUBROUTINE inital
 
-  USE basic,            ONLY: my_id
+  USE basic,            ONLY: speak
   USE initial_par,      ONLY: INIT_OPT
   USE time_integration, ONLY: set_updatetlevel
-  USE collision,        ONLY: load_COSOlver_mat, cosolver_coll
+  USE collision,        ONLY: init_collision
   USE closure,          ONLY: apply_closure_model
   USE ghosts,           ONLY: update_ghosts_moments, update_ghosts_EM
   USE restarts,         ONLY: load_moments, job2load
   USE processing,       ONLY: compute_fluid_moments
-  USE model,            ONLY: KIN_E, LINEARITY
+  USE model,            ONLY: LINEARITY
   USE nonlinear,        ONLY: compute_Sapj, nonlinear_init
   IMPLICIT NONE
 
@@ -20,7 +20,7 @@ SUBROUTINE inital
   !!!!!! Set the moments arrays Nepj, Nipj and phi!!!!!!
   ! through loading a previous state
   IF ( job2load .GE. 0 ) THEN
-    IF (my_id .EQ. 0) WRITE(*,*) 'Load moments'
+   CALL speak('Load moments')
     CALL load_moments ! get N_0
     CALL update_ghosts_moments
     CALL solve_EM_fields ! compute phi_0=phi(N_0)
@@ -30,37 +30,37 @@ SUBROUTINE inital
     SELECT CASE (INIT_OPT)
     ! set phi with noise and set moments to 0
     CASE ('phi')
-      IF (my_id .EQ. 0) WRITE(*,*) 'Init noisy phi'
+      CALL speak('Init noisy phi')
       CALL init_phi
       CALL update_ghosts_EM
     CASE ('phi_ppj')
-      IF (my_id .EQ. 0) WRITE(*,*) 'Init noisy phi'
+      CALL speak('Init noisy phi')
       CALL init_phi_ppj
       CALL update_ghosts_EM
     ! set moments_00 (GC density) with noise and compute phi afterwards
     CASE('mom00')
-      IF (my_id .EQ. 0) WRITE(*,*) 'Init noisy gyrocenter density'
+      CALL speak('Init noisy gyrocenter density')
       CALL init_gyrodens ! init only gyrocenter density
       CALL update_ghosts_moments
       CALL solve_EM_fields
       CALL update_ghosts_EM
     ! init all moments randomly (unadvised)
     CASE('allmom')
-      IF (my_id .EQ. 0) WRITE(*,*) 'Init noisy moments'
+      CALL speak('Init noisy moments')
       CALL init_moments ! init all moments
       CALL update_ghosts_moments
       CALL solve_EM_fields
       CALL update_ghosts_EM
     ! init a gaussian blob in gyrodens
     CASE('blob')
-      IF (my_id .EQ. 0) WRITE(*,*) '--init a blob'
+      CALL speak('--init a blob')
       CALL initialize_blob
       CALL update_ghosts_moments
       CALL solve_EM_fields
       CALL update_ghosts_EM
     ! init moments 00 with a power law similarly to GENE
     CASE('ppj')
-      IF (my_id .EQ. 0) WRITE(*,*) 'ppj init ~ GENE'
+      CALL speak('ppj init ~ GENE')
       call init_ppj
       CALL update_ghosts_moments
       CALL solve_EM_fields
@@ -68,21 +68,20 @@ SUBROUTINE inital
     END SELECT
   ENDIF
   ! closure of j>J, p>P and j<0, p<0 moments
-  IF (my_id .EQ. 0) WRITE(*,*) 'Apply closure'
+  CALL speak('Apply closure')
   CALL apply_closure_model
   ! ghosts for p parallelization
-  IF (my_id .EQ. 0) WRITE(*,*) 'Ghosts communication'
+  CALL speak('Ghosts communication')
   CALL update_ghosts_moments
   CALL update_ghosts_EM
   !! End of phi and moments initialization
 
-  ! Load the COSOlver collision operator coefficients
-  IF(cosolver_coll) &
-  CALL load_COSOlver_mat
+  ! Init collision operator
+  CALL init_collision
 
   !! Preparing auxiliary arrays at initial state
   ! particle density, fluid velocity and temperature (used in diagnose)
-  IF (my_id .EQ. 0) WRITE(*,*) 'Computing fluid moments'
+  CALL speak('Computing fluid moments')
   CALL compute_fluid_moments
 
   ! init auxval for nonlinear
@@ -97,86 +96,63 @@ END SUBROUTINE inital
 !!!!!!! Initialize all the moments randomly
 !******************************************************************************!
 SUBROUTINE init_moments
-  USE basic
-  USE grid
+  USE grid,       ONLY: local_na, local_np, local_nj, total_nkx, local_nky, local_nz,&
+                        ngp, ngj, ngz, iky0, contains_ky0, AA_x, AA_y
   USE initial_par,ONLY: iseed, init_noiselvl, init_background
-  USE fields,     ONLY: moments_e, moments_i
-  USE prec_const, ONLY: dp
-  USE utility,    ONLY: checkfield
-  USE model,      ONLY : LINEARITY, KIN_E
+  USE fields,     ONLY: moments
+  USE prec_const, ONLY: xp
+  USE model,      ONLY: LINEARITY
+  USE parallel,   ONLY: my_id
   IMPLICIT NONE
 
-  REAL(dp) :: noise
+  REAL(xp) :: noise
   INTEGER, DIMENSION(12) :: iseedarr
+  INTEGER  :: ia,ip,ij,ikx,iky,iz, ipi,iji,izi
 
   ! Seed random number generator
   iseedarr(:)=iseed
   CALL RANDOM_SEED(PUT=iseedarr+my_id)
 
     !**** Broad noise initialization *******************************************
-    ! Electron init
-    IF(KIN_E) THEN
-    DO ip=ips_e,ipe_e
-      DO ij=ijs_e,ije_e
-        DO ikx=ikxs,ikxe
-          DO iky=ikys,ikye
-            DO iz=izs,ize
+  DO ia=1,local_na
+    DO ip=1,local_np
+      ipi = ip+ngp/2
+      DO ij=1,local_nj
+        iji = ij+ngj/2
+        DO ikx=1,total_nkx
+          DO iky=1,local_nky
+            DO iz=1,local_nz
+              izi = iz+ngz/2
               CALL RANDOM_NUMBER(noise)
-              moments_e(ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_dp))
+              moments(ia,ipi,iji,iky,ikx,izi,:) = (init_background + init_noiselvl*(noise-0.5_xp))
             END DO
           END DO
         END DO
         IF ( contains_ky0 ) THEN
-          DO iky=2,Nky/2 !symmetry at ky = 0 for all z
-            moments_e(ip,ij,iky_0,ikx,:,:) = moments_e( ip,ij,iky_0,Nkx+2-ikx,:, :)
+          DO ikx=2,total_nkx/2 !symmetry at ky = 0 for all z
+            moments(ia,ipi,iji,iky0,ikx,:,:) = moments(ia,ipi,iji,iky0,total_nkx+2-ikx,:,:)
           END DO
         ENDIF
       END DO
     END DO
-    ENDIF
-    ! Ion init
-    DO ip=ips_i,ipe_i
-      DO ij=ijs_i,ije_i
-
-        DO ikx=ikxs,ikxe
-          DO iky=ikys,ikye
-            DO iz=izs,ize
-              CALL RANDOM_NUMBER(noise)
-              moments_i(ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_dp))
-            END DO
-          END DO
-        END DO
-
-        IF ( contains_ky0 ) THEN
-          DO ikx=2,Nkx/2 !symmetry at ky = 0 for all z
-            moments_i( ip,ij,iky_0,ikx,:,:) = moments_i( ip,ij,iky_0,Nkx+2-ikx,:,:)
-          END DO
-        ENDIF
-
-      END DO
-    END DO
-
     ! Putting to zero modes that are not in the 2/3 Orszag rule
     IF (LINEARITY .NE. 'linear') THEN
-      DO ikx=ikxs,ikxe
-      DO iky=ikys,ikye
-      DO iz=izs,ize
-        IF(KIN_E) THEN
-        DO ip=ips_e,ipe_e
-        DO ij=ijs_e,ije_e
-          moments_e( ip,ij,iky,ikx,iz, :) = moments_e( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
-        ENDDO
+      DO ikx=1,total_nkx
+        DO iky=1,local_nky
+          DO iz=1,local_nz
+            izi = iz+ngz/2
+            DO ip=1,local_np
+              ipi = ip+ngp/2
+              DO ij=1,local_nj
+                iji = ij+ngj/2
+                moments(ia,ipi,iji,iky,ikx,izi, :) = moments(ia, ipi,iji,iky,ikx,izi, :)*AA_x(ikx)*AA_y(iky)
+              ENDDO
+            ENDDO
+          ENDDO
         ENDDO
-        ENDIF
-        DO ip=ips_i,ipe_i
-        DO ij=ijs_i,ije_i
-          moments_i( ip,ij,iky,ikx,iz, :) = moments_i( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
-        ENDDO
-        ENDDO
-      ENDDO
-      ENDDO
       ENDDO
     ENDIF
+  ENDDO
 END SUBROUTINE init_moments
 !******************************************************************************!
 
@@ -184,90 +160,61 @@ END SUBROUTINE init_moments
 !!!!!!! Initialize the gyrocenter density randomly
 !******************************************************************************!
 SUBROUTINE init_gyrodens
-  USE basic
-  USE grid
-  USE fields
-  USE prec_const
-  USE utility, ONLY: checkfield
-  USE initial_par
-  USE model, ONLY: KIN_E, LINEARITY
+  USE grid,       ONLY: local_na, local_np, local_nj, total_nkx, local_nky, local_nz,&
+                        ngp, ngj, ngz, iky0, parray, jarray, contains_ky0, AA_x, AA_y
+  USE fields,     ONLY: moments
+  USE prec_const, ONLY: xp
+  USE initial_par,ONLY: iseed, init_noiselvl, init_background
+  USE model,      ONLY: LINEARITY
+  USE parallel,   ONLY: my_id
   IMPLICIT NONE
 
-  REAL(dp) :: noise
+  REAL(xp) :: noise
+  INTEGER  :: ia,ip,ij,ikx,iky,iz
   INTEGER, DIMENSION(12) :: iseedarr
 
   ! Seed random number generator
   iseedarr(:)=iseed
   CALL RANDOM_SEED(PUT=iseedarr+my_id)
-
+  moments = 0._xp
     !**** Broad noise initialization *******************************************
-    IF(KIN_E) THEN
-    DO ip=ips_e,ipe_e
-      DO ij=ijs_e,ije_e
-        DO ikx=ikxs,ikxe
-          DO iky=ikys,ikye
-            DO iz=izs,ize
-              CALL RANDOM_NUMBER(noise)
-              IF ( (ip .EQ. 1) .AND. (ij .EQ. 1) ) THEN
-                moments_e(ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_dp))
-              ELSE
-                moments_e(ip,ij,iky,ikx,iz,:) = 0._dp
-              ENDIF
-            END DO
-          END DO
-        END DO
-        IF ( contains_ky0 ) THEN
-          DO ikx=2,Nkx/2 !symmetry at ky = 0 for all z
-            moments_e(ip,ij,iky_0,ikx,:,:) = moments_e(ip,ij,iky_0,Nkx+2-ikx,:,:)
-          END DO
-        ENDIF
-      END DO
-    END DO
-    ENDIF
-
-    DO ip=ips_i,ipe_i
-      DO ij=ijs_i,ije_i
-        DO ikx=ikxs,ikxe
-          DO iky=ikys,ikye
-            DO iz=izs,ize
+  DO ia=1,local_na
+    DO ip=1+ngp/2,local_np+ngp/2
+      DO ij=1+ngj/2,local_nj+ngj/2
+        DO ikx=1,total_nkx
+          DO iky=1,local_nky
+            DO iz=1+ngz/2,local_nz+ngz/2
               CALL RANDOM_NUMBER(noise)
-              IF ( (ip .EQ. 1) .AND. (ij .EQ. 1) ) THEN
-                moments_i(ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_dp))
+              IF ( (parray(ip) .EQ. 0) .AND. (jarray(ij) .EQ. 0) ) THEN
+                moments(ia,ip,ij,iky,ikx,iz,:) = (init_background + init_noiselvl*(noise-0.5_xp))
               ELSE
-                moments_i(ip,ij,iky,ikx,iz,:) = 0._dp
+                moments(ia,ip,ij,iky,ikx,iz,:) = 0._xp
               ENDIF
             END DO
           END DO
         END DO
         IF ( contains_ky0 ) THEN
-          DO ikx=2,Nkx/2 !symmetry at ky = 0 for all z
-            moments_i( ip,ij,iky_0,ikx,:,:) = moments_i( ip,ij,iky_0,Nkx+2-ikx,:,:)
+          DO ikx=2,total_nkx/2 !symmetry at ky = 0 for all z
+            moments(ia, ip,ij,iky0,ikx,:,:) = moments(ia, ip,ij,iky0,total_nkx+2-ikx,:,:)
           END DO
         ENDIF
       END DO
     END DO
-
     ! Putting to zero modes that are not in the 2/3 Orszag rule
     IF (LINEARITY .NE. 'linear') THEN
-      DO ikx=ikxs,ikxe
-      DO iky=ikys,ikye
-      DO iz=izs,ize
-        IF(KIN_E) THEN
-        DO ip=ips_e,ipe_e
-        DO ij=ijs_e,ije_e
-          moments_e( ip,ij,iky,ikx,iz, :) = moments_e( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
-        ENDDO
-        ENDDO
-        ENDIF
-        DO ip=ips_i,ipe_i
-        DO ij=ijs_i,ije_i
-          moments_i( ip,ij,iky,ikx,iz, :) = moments_i( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
-        ENDDO
+      DO ikx=1,total_nkx
+        DO iky=1,local_nky
+          DO iz=1,local_nz+ngz
+            DO ip=1,local_np+ngp
+              DO ij=1,local_nj+ngj
+                moments(ia, ip,ij,iky,ikx,iz, :) = moments(ia, ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
+              ENDDO
+            ENDDO
+          ENDDO
         ENDDO
       ENDDO
-      ENDDO
-      ENDDO
     ENDIF
+  ENDDO
 END SUBROUTINE init_gyrodens
 !******************************************************************************!
 
@@ -275,57 +222,53 @@ END SUBROUTINE init_gyrodens
 !!!!!!! Initialize a noisy ES potential and cancel the moments
 !******************************************************************************!
 SUBROUTINE init_phi
-  USE basic
-  USE grid
-  USE fields
-  USE prec_const
-  USE initial_par
-  USE model, ONLY: KIN_E, LINEARITY
+  USE grid,       ONLY: total_nkx, local_nky, local_nz,&
+                        ngz, iky0, ikx0, contains_ky0
+  USE fields,     ONLY: phi, moments
+  USE prec_const, ONLY: xp
+  USE initial_par,ONLY: iseed, init_noiselvl, init_background
+  USE model,      ONLY: LINEARITY
+  USE parallel,   ONLY: my_id
   IMPLICIT NONE
-
-  REAL(dp) :: noise
+  REAL(xp) :: noise
   INTEGER, DIMENSION(12) :: iseedarr
-
+  INTEGER :: iky,ikx,iz
   ! Seed random number generator
   iseedarr(:)=iseed
   CALL RANDOM_SEED(PUT=iseedarr+my_id)
-
-    !**** noise initialization *******************************************
-
-    DO ikx=ikxs,ikxe
-      DO iky=ikys,ikye
+  !**** noise initialization *******************************************
+  DO ikx=1,total_nkx
+    DO iky=1,local_nky
+      DO iz=1,local_nz+ngz
         CALL RANDOM_NUMBER(noise)
-        DO iz=izs,ize
-          phi(iky,ikx,iz) = (init_background + init_noiselvl*(noise-0.5_dp))!*AA_x(ikx)*AA_y(iky)
-        ENDDO
-      END DO
+        phi(iky,ikx,iz) = (init_background + init_noiselvl*(noise-0.5_xp))!*AA_x(ikx)*AA_y(iky)
+      ENDDO
     END DO
-
-    !symmetry at ky = 0 to keep real inverse transform
-    IF ( contains_ky0 ) THEN
-      DO ikx=2,Nkx/2
-        phi(iky_0,ikx,izs:ize) = phi(iky_0,Nkx+2-ikx,izs:ize)
-      END DO
-      phi(iky_0,ikx_0,izs:ize) = REAL(phi(iky_0,ikx_0,izs:ize),dp) !origin must be real
-    ENDIF
-
-    !**** ensure no previous moments initialization
-    IF(KIN_E) moments_e = 0._dp
-    moments_i = 0._dp
-
-    !**** Zonal Flow initialization *******************************************
-    ! put a mode at ikx = mode number + 1, symmetry is already included since kx>=0
-    IF(INIT_ZF .GT. 0) THEN
-      IF (my_id .EQ. 0) WRITE(*,*) 'Init ZF phi'
-      IF( (INIT_ZF+1 .GT. ikxs) .AND. (INIT_ZF+1 .LT. ikxe) ) THEN
-        DO iz = izs,ize
-          phi(iky_0,INIT_ZF+1,iz) = ZF_AMP*(2._dp*PI)**2/deltakx/deltaky/2._dp * COS((iz-1)/Nz*2._dp*PI)
-          moments_i(1,1,iky_0,INIT_ZF+1,iz,:) = kxarray(INIT_ZF+1)**2*phi(iky_0,INIT_ZF+1,iz)* COS((iz-1)/Nz*2._dp*PI)
-          IF(KIN_E) moments_e(1,1,iky_0,INIT_ZF+1,iz,:) = 0._dp
-        ENDDO
-      ENDIF
-    ENDIF
-
+  END DO
+  !symmetry at ky = 0 to keep real inverse transform
+  IF ( contains_ky0 ) THEN
+    DO iz=1,local_nz+ngz
+      DO ikx=2,total_nkx/2
+        phi(iky0,ikx,iz) = phi(iky0,total_nkx+2-ikx,iz)
+      ENDDO
+    phi(iky0,ikx0,iz) = REAL(phi(iky0,ikx0,iz),xp) !origin must be real
+  END DO
+  ENDIF
+  !**** ensure no previous moments initialization
+  moments = 0._xp
+  !**** Zonal Flow initialization *******************************************
+  ! put a mode at ikx = mode number + 1, symmetry is already included since kx>=0
+  ! IF(INIT_ZF .GT. 0) THEN
+  !   IF (my_id .EQ. 0) WRITE(*,*) 'Init ZF phi'
+  !   IF( (INIT_ZF+1 .GT. ikxs) .AND. (INIT_ZF+1 .LT. ikxe) ) THEN
+  !     DO ia=1,local_na
+  !       DO iz = 1,local_nz+ngz
+  !         phi(iky0,INIT_ZF+1,iz) = ZF_AMP*(2._xp*PI)**2/deltakx/deltaky/2._xp * COS((iz-1)/Nz*2._xp*PI)
+  !         moments(ia,ip0,ij0,iky0,INIT_ZF+1,iz,:) = kxarray(INIT_ZF+1)**2*phi(iky0,INIT_ZF+1,iz)* COS((iz-1)/Nz*2._xp*PI)
+  !       ENDDO
+  !     ENDDO
+  !   ENDIF
+  ! ENDIF
 END SUBROUTINE init_phi
 !******************************************************************************!
 
@@ -333,47 +276,46 @@ END SUBROUTINE init_phi
 !!!!!!! Initialize a ppj ES potential and cancel the moments
 !******************************************************************************!
 SUBROUTINE init_phi_ppj
-  USE basic
-  USE grid
-  USE fields
-  USE prec_const
-  USE initial_par
-  USE model, ONLY: KIN_E, LINEARITY
-  USE geometry, ONLY: Jacobian, iInt_Jacobian
+  USE grid,       ONLY: total_nkx, local_nky, local_nz,&
+                        ngz, iky0, ikx0, contains_ky0, ieven, kxarray, kyarray, zarray, deltakx
+  USE fields,     ONLY: phi, moments
+  USE prec_const, ONLY: xp
+  USE initial_par,ONLY: iseed, init_noiselvl, init_background
+  USE model,      ONLY: LINEARITY
+  USE geometry,   ONLY: Jacobian, iInt_Jacobian
   IMPLICIT NONE
-  REAL(dp) :: kx, ky, z, amp
-  amp = 1.0_dp
+  REAL(xp) :: kx, ky, z, amp
+  INTEGER  :: ikx, iky, iz
+  amp = 1.0_xp
     !**** ppj initialization *******************************************
-      DO ikx=ikxs,ikxe
+      DO ikx=1,total_nkx
         kx = kxarray(ikx)
-        DO iky=ikys,ikye
+        DO iky=1,local_nky
           ky = kyarray(iky)
-          DO iz=izs,ize
-            z = zarray(iz,0)
+          DO iz=1,local_nz+ngz
+            z = zarray(iz,ieven)
             IF (ky .NE. 0) THEN
-              phi(iky,ikx,iz) = 0._dp
+              phi(iky,ikx,iz) = 0._xp
             ELSE
-              phi(iky,ikx,iz) = 0.5_dp*amp*(deltakx/(ABS(kx)+deltakx))
+              phi(iky,ikx,iz) = 0.5_xp*amp*(deltakx/(ABS(kx)+deltakx))
             ENDIF
             ! z-dep and noise
             phi(iky,ikx,iz) = phi(iky,ikx,iz) * &
-            (Jacobian(iz,0)*iInt_Jacobian)**2
+            (Jacobian(iz,ieven)*iInt_Jacobian)**2
           END DO
         END DO
       END DO
-
     !symmetry at ky = 0 to keep real inverse transform
     IF ( contains_ky0 ) THEN
-      DO ikx=2,Nkx/2
-        phi(iky_0,ikx,izs:ize) = phi(iky_0,Nkx+2-ikx,izs:ize)
+      DO iz=1,local_nz+ngz
+        DO ikx=2,total_nkx/2
+          phi(iky0,ikx,iz) = phi(iky0,total_nkx+2-ikx,iz)
+        ENDDO
+        phi(iky0,ikx0,iz) = REAL(phi(iky0,ikx0,iz),xp) !origin must be real
       END DO
-      phi(iky_0,ikx_0,izs:ize) = REAL(phi(iky_0,ikx_0,izs:ize),dp) !origin must be real
     ENDIF
-
     !**** ensure no previous moments initialization
-    IF(KIN_E) moments_e = 0._dp
-    moments_i = 0._dp
-
+    moments = 0._xp
 END SUBROUTINE init_phi_ppj
 !******************************************************************************!
 
@@ -382,184 +324,119 @@ END SUBROUTINE init_phi_ppj
 !!!!!!! Initialize an ionic Gaussian blob on top of the preexisting modes
 !******************************************************************************!
 SUBROUTINE initialize_blob
-  USE fields
-  USE grid
-  USE geometry, ONLY: shear, Jacobian, iInt_Jacobian
-  USE model, ONLY: KIN_E, LINEARITY
+  USE grid,       ONLY: local_na, local_np, local_nj, total_nkx, local_nky, local_nz,&
+                        AA_x, AA_y, parray, jarray,&
+                        ngp,ngj,ngz, iky0, ieven, kxarray, kyarray, zarray
+  USE fields,     ONLY: moments
+  USE prec_const, ONLY: xp
+  USE initial_par,ONLY: iseed, init_noiselvl, init_background
+  USE model,      ONLY: LINEARITY
+  USE geometry,   ONLY: Jacobian, iInt_Jacobian, shear
   IMPLICIT NONE
-  REAL(dp) ::kx, ky, z, sigma_x, sigma_y, gain
+  REAL(xp) ::kx, ky, z, sigma_x, sigma_y, gain
+  INTEGER :: ia,iky,ikx,iz,ip,ij, p, j
   sigma_y = 1.0
   sigma_x = sigma_y
   gain  = 10.0
-
-
-  DO iky=ikys,ikye
-    ky = kyarray(iky)
-  DO iz=izs,ize
-    z  = zarray(iz,0)
-  DO ikx=ikxs,ikxe
-    kx = kxarray(ikx) + z*shear*ky
-    DO ip=ips_i,ipe_i
-    DO ij=ijs_i,ije_i
-      IF( (iky .NE. iky_0) .AND. (ip .EQ. 1) .AND. (ij .EQ. 1)) THEN
-        moments_i( ip,ij,iky,ikx,iz, :) = moments_i( ip,ij,iky,ikx,iz, :) &
-        + gain * exp(-((kx/sigma_x)**2+(ky/sigma_y)**2)) &
-          * AA_x(ikx)*AA_y(iky)* &
-          (Jacobian(iz,0)*iInt_Jacobian)**2!&
-          ! * exp(sigmai2_taui_o2*(kx**2+ky**2))
-      ENDIF
-    ENDDO
-    ENDDO
-    IF(KIN_E) THEN
-    DO ip=ips_e,ipe_e
-    DO ij=ijs_e,ije_e
-      IF( (iky .NE. iky_0) .AND. (ip .EQ. 1) .AND. (ij .EQ. 1)) THEN
-        moments_e( ip,ij,iky,ikx,iz, :) = moments_e( ip,ij,iky,ikx,iz, :) &
-        + gain * exp(-((kx/sigma_x)**2+(ky/sigma_y)**2)) &
-          * AA_x(ikx)*AA_y(iky)* &
-          (Jacobian(iz,0)*iInt_Jacobian)**2!&
-          ! * exp(sigmai2_taui_o2*(kx**2+ky**2))
-      ENDIF
-    ENDDO
+  DO ia=1,local_na
+    DO iky=1,local_nky
+      ky = kyarray(iky)
+      DO iz=1+ngz/2,local_nz+ngz/2
+        z  = zarray(iz,ieven)
+        DO ikx=1,total_nkx
+          kx = kxarray(ikx) + z*shear*ky
+          DO ip=1+ngp/2,local_np+ngp/2
+            p = parray(ip)
+            DO ij=1+ngj/2,local_nj+ngj/2
+              j = jarray(ij)
+              IF( (iky .NE. iky0) .AND. (p .EQ. 0) .AND. (j .EQ. 0)) THEN
+                moments(ia,ip,ij,iky,ikx,iz, :) = moments(ia,ip,ij,iky,ikx,iz, :) &
+                + gain * exp(-((kx/sigma_x)**2+(ky/sigma_y)**2)) &
+                  * AA_x(ikx)*AA_y(iky)* &
+                  (Jacobian(iz,ieven)*iInt_Jacobian)**2!&
+                  ! * exp(sigmai2_taui_o2*(kx**2+ky**2))
+              ENDIF
+            ENDDO
+          ENDDO
+        ENDDO
+      ENDDO
     ENDDO
-    ENDIF
-  ENDDO
-  ENDDO
   ENDDO
 END SUBROUTINE initialize_blob
 !******************************************************************************!
-
-
 !******************************************************************************!
 !!!!!!! Initialize the gyrocenter in a ppj gene manner (power law)
 !******************************************************************************!
 SUBROUTINE init_ppj
-  USE basic
-  USE grid
-  USE fields, ONLY: moments_e, moments_i
-  USE array,  ONLY: kernel_e, kernel_i
-  USE prec_const
-  USE utility, ONLY: checkfield
-  USE initial_par
-  USE model, ONLY: KIN_E, LINEARITY
-  USE geometry, ONLY: Jacobian, iInt_Jacobian
-
+  USE grid,       ONLY: local_na, local_np, local_nj, total_nkx, local_nky, local_nz,&
+                        AA_x, AA_y, deltakx, deltaky,contains_ky0,&
+                        ngp,ngj,ngz, iky0, ieven, kxarray, kyarray, zarray
+  USE fields,     ONLY: moments
+  USE prec_const, ONLY: xp, pi
+  USE initial_par,ONLY: iseed, init_noiselvl, init_background
+  USE model,      ONLY: LINEARITY
+  USE geometry,   ONLY: Jacobian, iInt_Jacobian, shear
   IMPLICIT NONE
-
-  REAL(dp) :: kx, ky, sigma_z, amp, z
-
-  sigma_z = pi/4._dp
-  amp = 1.0_dp
-
+  REAL(xp) :: kx, ky, sigma_z, amp, z
+  INTEGER :: ia,iky,ikx,iz,ip,ij
+  sigma_z = pi/4._xp
+  amp = 1.0_xp
     !**** Broad noise initialization *******************************************
-    ! Electrons
-    IF (KIN_E) THEN
-    DO ip=ips_e,ipe_e
-      DO ij=ijs_e,ije_e
-        IF ( (ip .EQ. 1) .AND. (ij .EQ. 1) ) THEN
-          DO ikx=ikxs,ikxe
-            kx = kxarray(ikx)
-            DO iky=ikys,ikye
-              ky = kyarray(iky)
-              DO iz=izs,ize
-                z = zarray(iz,0)
-                IF (kx .EQ. 0) THEN
-                  IF(ky .EQ. 0) THEN
-                    moments_e(ip,ij,iky,ikx,iz,:) = 0._dp
-                  ELSE
-                    moments_e(ip,ij,iky,ikx,iz,:) = 0.5_dp * ky_min/(ABS(ky)+ky_min)
-                  ENDIF
-                ELSE
-                  IF(ky .GT. 0) THEN
-                    moments_e(ip,ij,iky,ikx,iz,:) = (deltakx/(ABS(kx)+deltakx))*(ky_min/(ABS(ky)+ky_min))
+    DO ia=1,local_na
+      DO ip=1,local_np+ngp
+        DO ij=1,local_nj+ngj
+          IF ( (ip .EQ. 1) .AND. (ij .EQ. 1) ) THEN
+            DO ikx=1,total_nkx
+              kx = kxarray(ikx)
+              DO iky=1,local_nky
+                ky = kyarray(iky)
+                DO iz=1,local_nz+ngz
+                  z = zarray(iz,ieven)
+                  IF (kx .EQ. 0) THEN
+                    IF(ky .EQ. 0) THEN
+                      moments(ia,ip,ij,iky,ikx,iz,:) = 0._xp
+                    ELSE
+                      moments(ia,ip,ij,iky,ikx,iz,:) = 0.5_xp * deltaky/(ABS(ky)+deltaky)
+                    ENDIF
                   ELSE
-                    moments_e(ip,ij,iky,ikx,iz,:) = 0.5_dp*amp*(deltakx/(ABS(kx)+deltakx))
+                    IF(ky .GT. 0) THEN
+                      moments(ia,ip,ij,iky,ikx,iz,:) = (deltakx/(ABS(kx)+deltakx))*(deltaky/(ABS(ky)+deltaky))
+                    ELSE
+                      moments(ia,ip,ij,iky,ikx,iz,:) = 0.5_xp*amp*(deltakx/(ABS(kx)+deltakx))
+                    ENDIF
                   ENDIF
-                ENDIF
-                ! z-dep and noise
-                moments_e(ip,ij,iky,ikx,iz,:) = moments_e(ip,ij,iky,ikx,iz,:) * &
-                (Jacobian(iz,0)*iInt_Jacobian)**2
-
-                ! divide by kernel_0 to adjust to particle density (n = kernel_0 N00)
-                ! moments_e(ip,ij,iky,ikx,iz,:) = moments_e(ip,ij,iky,ikx,iz,:)/kernel_e(ij,iky,ikx,iz,0)
+                  ! z-dep and noise
+                  moments(ia,ip,ij,iky,ikx,iz,:) = moments(ia,ip,ij,iky,ikx,iz,:) * &
+                  (Jacobian(iz,ieven)*iInt_Jacobian)**2
+                  ! divide by kernel_0 to adjust to particle density (n = kernel_0 N00)
+                  ! moments(ia,ip,ij,iky,ikx,iz,:) = moments(ia,ip,ij,iky,ikx,iz,:)/kernel(ia,ij,iky,ikx,iz,0)
+                END DO
               END DO
             END DO
-          END DO
-
-          IF ( contains_ky0 ) THEN
-            DO ikx=2,Nkx/2 !symmetry at kx = 0 for all z
-              moments_e(ip,ij,iky_0,ikx,:,:) = moments_e( ip,ij,iky_0,Nkx+2-ikx,:, :)
-            END DO
-          ENDIF
-        ELSE
-          moments_e(ip,ij,:,:,:,:) = 0._dp
-        ENDIF
-      END DO
-    END DO
-    ENDIF
-
-    ! Ions
-    DO ip=ips_i,ipe_i
-      DO ij=ijs_i,ije_i
-        IF ( (ip .EQ. 1) .AND. (ij .EQ. 1) ) THEN
-          DO ikx=ikxs,ikxe
-            kx = kxarray(ikx)
-            DO iky=ikys,ikye
-              ky = kyarray(iky)
-              DO iz=izs,ize
-                z = zarray(iz,0)
-                IF (kx .EQ. 0) THEN
-                  IF(ky .EQ. 0) THEN
-                    moments_i(ip,ij,iky,ikx,iz,:) = 0._dp
-                  ELSE
-                    moments_i(ip,ij,iky,ikx,iz,:) = 0.5_dp * ky_min/(ABS(ky)+ky_min)
-                  ENDIF
-                ELSE
-                  IF(ky .GT. 0) THEN
-                    moments_i(ip,ij,iky,ikx,iz,:) = (deltakx/(ABS(kx)+deltakx))*(ky_min/(ABS(ky)+ky_min))
-                  ELSE
-                    moments_i(ip,ij,iky,ikx,iz,:) = 0.5_dp*amp*(deltakx/(ABS(kx)+deltakx))
-                  ENDIF
-                ENDIF
-                ! z-dep and noise
-                moments_i(ip,ij,iky,ikx,iz,:) = moments_i(ip,ij,iky,ikx,iz,:) * &
-                (Jacobian(iz,0)*iInt_Jacobian)**2
-                ! divide by kernel_0 to adjust to particle density (n = kernel_0 N00)
-                ! moments_i(ip,ij,iky,ikx,iz,:) = moments_i(ip,ij,iky,ikx,iz,:)/kernel_i(ij,iky,ikx,iz,0)
+            IF ( contains_ky0 ) THEN
+              DO ikx=2,total_nkx/2 !symmetry at kx = 0 for all z
+                moments(ia,ip,ij,iky0,ikx,:,:) = moments(ia, ip,ij,iky0,total_nkx+2-ikx,:, :)
               END DO
-            END DO
-          END DO
-
-          IF ( contains_ky0 ) THEN
-            DO ikx=2,Nkx/2 !symmetry at kx = 0 for all z
-              moments_i(ip,ij,iky_0,ikx,:,:) = moments_i( ip,ij,iky_0,Nkx+2-ikx,:, :)
-            END DO
-          ENDIF
+            ENDIF
         ELSE
-          moments_i(ip,ij,:,:,:,:) = 0._dp
+          moments(ia,ip,ij,:,:,:,:) = 0._xp
         ENDIF
       END DO
     END DO
-
     ! Putting to zero modes that are not in the 2/3 Orszag rule
     IF (LINEARITY .NE. 'linear') THEN
-      DO ikx=ikxs,ikxe
-      DO iky=ikys,ikye
-      DO iz=izs,ize
-        DO ip=ips_e,ipe_e
-        DO ij=ijs_e,ije_e
-          moments_e( ip,ij,iky,ikx,iz, :) = moments_e( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
-        ENDDO
-        ENDDO
-        DO ip=ips_i,ipe_i
-        DO ij=ijs_i,ije_i
-          moments_i( ip,ij,iky,ikx,iz, :) = moments_i( ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
+      DO ikx=1,total_nkx
+      DO iky=1,local_nky
+      DO iz=1,local_nz+ngz
+        DO ip=1,local_np+ngp
+        DO ij=1,local_nj+ngj
+          moments(ia, ip,ij,iky,ikx,iz, :) = moments(ia, ip,ij,iky,ikx,iz, :)*AA_x(ikx)*AA_y(iky)
         ENDDO
         ENDDO
       ENDDO
       ENDDO
       ENDDO
     ENDIF
-
+  ENDDO
 END SUBROUTINE init_ppj
 !******************************************************************************!
diff --git a/src/initial_par_mod.F90 b/src/initial_par_mod.F90
index a9d9714f62728f5902bfc7f88c3e4c2f0c3d6385..4fd93b0b843875259ca3eb1c32aabae26d2cb699 100644
--- a/src/initial_par_mod.F90
+++ b/src/initial_par_mod.F90
@@ -9,13 +9,13 @@ MODULE initial_par
   CHARACTER(len=32), PUBLIC, PROTECTED :: INIT_OPT = 'phi'
   ! Initialization through a zonal flow phi
   INTEGER,  PUBLIC, PROTECTED :: INIT_ZF    = 0
-  REAL(DP), PUBLIC, PROTECTED :: ZF_AMP     = 1E+3_dp
+  REAL(xp), PUBLIC, PROTECTED :: ZF_AMP     = 1E+3_xp
   ! Act on modes artificially (keep/wipe, zonal, non zonal, entropy mode etc.)
   CHARACTER(len=32),  PUBLIC, PROTECTED :: ACT_ON_MODES = 'nothing'
   ! Initial background level
-  REAL(dp), PUBLIC, PROTECTED :: init_background=0._dp
+  REAL(xp), PUBLIC, PROTECTED :: init_background=0._xp
   ! Initial noise amplitude
-  REAL(dp), PUBLIC, PROTECTED :: init_noiselvl=1E-6_dp
+  REAL(xp), PUBLIC, PROTECTED :: init_noiselvl=1E-6_xp
   ! Initialization for random number generator
   INTEGER,  PUBLIC, PROTECTED :: iseed=42
 
@@ -43,23 +43,18 @@ CONTAINS
   END SUBROUTINE initial_readinputs
 
 
-  SUBROUTINE initial_outputinputs(fidres, str)
+  SUBROUTINE initial_outputinputs(fid)
     ! Write the input parameters to the results_xx.h5 file
-
-    USE futils, ONLY: attach
-    USE prec_const
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
-    INTEGER, INTENT(in) :: fidres
-    CHARACTER(len=256), INTENT(in) :: str
-
-    CALL attach(fidres, TRIM(str), "INIT_OPT", INIT_OPT)
-
-    CALL attach(fidres, TRIM(str), "init_background", init_background)
-
-    CALL attach(fidres, TRIM(str), "init_noiselvl", init_noiselvl)
-
-    CALL attach(fidres, TRIM(str), "iseed", iseed)
-
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/intial'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Initial Input')
+    CALL attach(fid, TRIM(str), "INIT_OPT", INIT_OPT)
+    CALL attach(fid, TRIM(str), "init_background", init_background)
+    CALL attach(fid, TRIM(str), "init_noiselvl", init_noiselvl)
+    CALL attach(fid, TRIM(str), "iseed", iseed)
   END SUBROUTINE initial_outputinputs
 
 END MODULE initial_par
diff --git a/src/lag_interp_mod.F90 b/src/lag_interp_mod.F90
index 8415df5d63783ba680b21431aec8dbbeb3fe55e5..39cea31f42bf310914a047883608c5e5679f9ca9 100644
--- a/src/lag_interp_mod.F90
+++ b/src/lag_interp_mod.F90
@@ -21,11 +21,11 @@ CONTAINS
   !> Third order lagrange interpolation
   SUBROUTINE lag3interp_scalar(y_in,x_in,n_in,y_out,x_out)
     INTEGER, INTENT(IN) :: n_in
-    REAL(dp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
-    REAL(dp), INTENT(IN) :: x_out
-    REAL(dp), INTENT(OUT) :: y_out
+    REAL(xp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
+    REAL(xp), INTENT(IN) :: x_out
+    REAL(xp), INTENT(OUT) :: y_out
 
-    REAL(dp), DIMENSION(1) :: xout_wrap, yout_wrap
+    REAL(xp), DIMENSION(1) :: xout_wrap, yout_wrap
 
     xout_wrap = x_out
     call lag3interp_array(y_in,x_in,n_in,yout_wrap,xout_wrap,1)
@@ -36,11 +36,11 @@ CONTAINS
   !> Third order lagrange interpolation
   subroutine lag3interp_array(y_in,x_in,n_in,y_out,x_out,n_out)
     INTEGER, INTENT(IN) :: n_in,n_out
-    REAL(dp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
-    REAL(dp), DIMENSION(n_out), INTENT(IN) :: x_out
-    REAL(dp), DIMENSION(n_out), INTENT(OUT) :: y_out
+    REAL(xp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
+    REAL(xp), DIMENSION(n_out), INTENT(IN) :: x_out
+    REAL(xp), DIMENSION(n_out), INTENT(OUT) :: y_out
 
-    REAL(dp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
+    REAL(xp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
     INTEGER :: j,jm,j0,j1,j2
     INTEGER :: jstart,jfirst,jlast,jstep
 
@@ -91,11 +91,11 @@ CONTAINS
   SUBROUTINE lag3interp_complex(y_in,x_in,n_in,y_out,x_out,n_out)
     INTEGER, INTENT(IN) :: n_in,n_out
     COMPLEX, DIMENSION(n_in), INTENT(IN) :: y_in
-    REAL(dp), DIMENSION(n_in), INTENT(IN) :: x_in
+    REAL(xp), DIMENSION(n_in), INTENT(IN) :: x_in
     COMPLEX, DIMENSION(n_out), INTENT(OUT) :: y_out
-    REAL(dp), DIMENSION(n_out), INTENT(IN) :: x_out
+    REAL(xp), DIMENSION(n_out), INTENT(IN) :: x_out
 
-    REAL(dp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
+    REAL(xp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
     INTEGER :: j,jm,j0,j1,j2
     INTEGER :: jstart,jfirst,jlast,jstep
 
@@ -143,22 +143,22 @@ CONTAINS
 
 
   !>2D interpolation
-  !\TODO check whether a "REAL(dp)" 2D interpolation would
+  !\TODO check whether a "REAL(xp)" 2D interpolation would
   !! be more appropriate
   SUBROUTINE lag3interp_2d(y_in,x1_in,n1_in,x2_in,n2_in,&
        &y_out,x1_out,n1_out,x2_out,n2_out)
     INTEGER, INTENT(IN) :: n1_in,n2_in,n1_out,n2_out
-    REAL(dp), DIMENSION(n1_in,n2_in), INTENT(IN) :: y_in
-    REAL(dp), DIMENSION(n1_in) :: x1_in
-    REAL(dp), DIMENSION(n2_in) :: x2_in
-    REAL(dp), DIMENSION(n1_out), INTENT(IN) :: x1_out
-    REAL(dp), DIMENSION(n2_out), INTENT(IN) :: x2_out
-    REAL(dp), DIMENSION(n1_out,n2_out), INTENT(OUT) :: y_out
+    REAL(xp), DIMENSION(n1_in,n2_in), INTENT(IN) :: y_in
+    REAL(xp), DIMENSION(n1_in) :: x1_in
+    REAL(xp), DIMENSION(n2_in) :: x2_in
+    REAL(xp), DIMENSION(n1_out), INTENT(IN) :: x1_out
+    REAL(xp), DIMENSION(n2_out), INTENT(IN) :: x2_out
+    REAL(xp), DIMENSION(n1_out,n2_out), INTENT(OUT) :: y_out
 
     !local variables
-    REAL(dp), DIMENSION(n2_in) :: y2_in_tmp
-    REAL(dp), DIMENSION(n2_out) :: y2_out_tmp
-    REAL(dp), DIMENSION(n1_in,n2_out) :: y_tmp
+    REAL(xp), DIMENSION(n2_in) :: y2_in_tmp
+    REAL(xp), DIMENSION(n2_out) :: y2_out_tmp
+    REAL(xp), DIMENSION(n1_in,n2_out) :: y_tmp
     INTEGER :: i
 
     DO i=1,n1_in
@@ -181,11 +181,11 @@ CONTAINS
     IMPLICIT NONE
 
     INTEGER, INTENT(IN) :: n_in
-    REAL(dp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
-    REAL(dp), INTENT(IN) :: x_out
-    REAL(dp), INTENT(OUT) :: dydx_out
+    REAL(xp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
+    REAL(xp), INTENT(IN) :: x_out
+    REAL(xp), INTENT(OUT) :: dydx_out
 
-    REAL(dp), DIMENSION(1) :: xout_wrap, dydxout_wrap
+    REAL(xp), DIMENSION(1) :: xout_wrap, dydxout_wrap
 
     xout_wrap = x_out
     call lag3deriv_array(y_in,x_in,n_in,dydxout_wrap,xout_wrap,1)
@@ -198,11 +198,11 @@ CONTAINS
 !>Returns Derivative based on a 3rd order lagrange interpolation
   subroutine lag3deriv_array(y_in,x_in,n_in,dydx_out,x_out,n_out)
     INTEGER :: n_in,n_out
-    REAL(dp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
-    REAL(dp), DIMENSION(n_out), INTENT(IN) :: x_out
-    REAL(dp), DIMENSION(n_out), INTENT(OUT) :: dydx_out
+    REAL(xp), DIMENSION(n_in), INTENT(IN) :: y_in,x_in
+    REAL(xp), DIMENSION(n_out), INTENT(IN) :: x_out
+    REAL(xp), DIMENSION(n_out), INTENT(OUT) :: dydx_out
 
-    REAL(dp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
+    REAL(xp) :: x,aintm,aint0,aint1,aint2,xm,x0,x1,x2
     INTEGER :: j,jm,j0,j1,j2
     INTEGER :: jstart,jfirst,jlast,jstep
 
diff --git a/src/memory.F90 b/src/memory.F90
index 4e737e77f8df93d1599f14d81e89079eac5da94d..0fccaf0f7e231eb748e69edc327e221f82682731 100644
--- a/src/memory.F90
+++ b/src/memory.F90
@@ -4,138 +4,77 @@ SUBROUTINE memory
   USE array
   USE basic, ONLY: allocate_array
   USE fields
-  USE grid
-  USE time_integration
-  USE model, ONLY: LINEARITY, KIN_E
+  USE grid, ONLY: local_na, local_np,ngp ,local_nj,ngj, local_nky, local_nkx,local_nz,ngz, jmax, pmax, nzgrid
   USE collision
-
+  USE time_integration, ONLY: ntimelevel
   USE prec_const
+  USE model, ONLY: na
   IMPLICIT NONE
 
   ! Electrostatic potential
-  CALL allocate_array(           phi, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(           psi, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(        phi_ZF, ikxs,ikxe, izs,ize)
-  CALL allocate_array(        phi_NZ, ikys,ikye, izs,ize)
-  CALL allocate_array(inv_poisson_op, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array( inv_ampere_op, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(   inv_pol_ion, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(HF_phi_correction_operator, ikys,ikye, ikxs,ikxe, izs,ize)
-
-  !Electrons arrays
-  IF(KIN_E) THEN
-  CALL allocate_array(             Ne00, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           dens_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           upar_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           uper_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           Tpar_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           Tper_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           temp_e, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(         Kernel_e,                ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge,  0,1)
-  CALL allocate_array(        moments_e, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge, 1,ntimelevel )
-  CALL allocate_array(            Nepjz,  ips_e,ipe_e,   ijs_e,ije_e,                         izs,ize)
-  CALL allocate_array(    moments_rhs_e,  ips_e,ipe_e,   ijs_e,ije_e,  ikys,ikye, ikxs,ikxe,  izs,ize,  1,ntimelevel )
-  CALL allocate_array( nadiab_moments_e, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(         ddz_nepj, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(        ddzND_Nepj, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(      interp_nepj, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(     moments_e_ZF, ipgs_e,ipge_e, ijgs_e,ijge_e, ikxs,ikxe, izs,ize)
-  CALL allocate_array(     moments_e_NZ, ipgs_e,ipge_e, ijgs_e,ijge_e, ikys,ikye, izs,ize)
-  CALL allocate_array(          TColl_e,  ips_e,ipe_e,   ijs_e,ije_e , ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(             Sepj,  ips_e,ipe_e,   ijs_e,ije_e,  ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           xnepj,   ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(           xnepp2j, ips_e,ipe_e)
-  CALL allocate_array(           xnepp1j, ips_e,ipe_e)
-  CALL allocate_array(           xnepm1j, ips_e,ipe_e)
-  CALL allocate_array(           xnepm2j, ips_e,ipe_e)
-  CALL allocate_array(           xnepjp1,                ijs_e,ije_e)
-  CALL allocate_array(           xnepjm1,                ijs_e,ije_e)
-  CALL allocate_array(           ynepp1j, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(           ynepm1j, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(         ynepp1jm1, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(         ynepm1jm1, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(           zNepm1j, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(         zNepm1jp1, ips_e,ipe_e,   ijs_e,ije_e)
-  CALL allocate_array(         zNepm1jm1, ips_e,ipe_e,   ijs_e,ije_e)
-  ENDIF
+  CALL allocate_array(           phi, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
+  CALL allocate_array(           psi, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
+  CALL allocate_array(inv_poisson_op, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array( inv_ampere_op, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(   inv_pol_ion, 1,local_nky, 1,local_nkx, 1,local_nz)
+  ! CALL allocate_array(HF_phi_correction_operator, 1,local_nky, 1,local_nkx, 1,local_nz)
 
-  !Ions arrays
-  CALL allocate_array(             Ni00, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           dens_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           upar_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           uper_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           Tpar_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           Tper_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(           temp_i, ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(         Kernel_i,                ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge,  0,1)
-  CALL allocate_array(        moments_i, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge, 1,ntimelevel )
-  CALL allocate_array(            Nipjz,  ips_i,ipe_i,   ijs_i,ije_i,                         izs,ize)
-  CALL allocate_array(    moments_rhs_i,  ips_i,ipe_i,   ijs_i,ije_i,  ikys,ikye, ikxs,ikxe,  izs,ize,  1,ntimelevel )
-  CALL allocate_array( nadiab_moments_i, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(         ddz_nipj, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(        ddzND_Nipj, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(      interp_nipj, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, ikxs,ikxe, izgs,izge)
-  CALL allocate_array(     moments_i_ZF, ipgs_i,ipge_i, ijgs_i,ijge_i, ikxs,ikxe, izs,ize)
-  CALL allocate_array(     moments_i_NZ, ipgs_i,ipge_i, ijgs_i,ijge_i, ikys,ikye, izs,ize)
-  CALL allocate_array(          TColl_i,  ips_i,ipe_i,   ijs_i,ije_i,  ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array(             Sipj,  ips_i,ipe_i,   ijs_i,ije_i,  ikys,ikye, ikxs,ikxe, izs,ize)
-  CALL allocate_array( xnipj,   ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xnipp2j, ips_i,ipe_i)
-  CALL allocate_array( xnipp1j, ips_i,ipe_i)
-  CALL allocate_array( xnipm1j, ips_i,ipe_i)
-  CALL allocate_array( xnipm2j, ips_i,ipe_i)
-  CALL allocate_array( xnipjp1, ijs_i,ije_i)
-  CALL allocate_array( xnipjm1, ijs_i,ije_i)
-  CALL allocate_array(   ynipp1j, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array(   ynipm1j, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( ynipp1jm1, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( ynipm1jm1, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array(   zNipm1j, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( zNipm1jp1, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( zNipm1jm1, ips_i,ipe_i, ijs_i,ije_i)
+  !Moments related arrays
+  CALL allocate_array(           dens, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           upar, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           uper, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           Tpar, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           Tper, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(           temp, 1,local_na, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(         Kernel, 1,local_na,                 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,nzgrid)
+  CALL allocate_array(        moments, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz, 1,ntimelevel )
+  CALL allocate_array(          napjz, 1,local_na, 1,local_np,     1,local_nj,                               1,local_nz)
+  CALL allocate_array(    moments_rhs, 1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz,     1,ntimelevel )
+  CALL allocate_array( nadiab_moments, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz+ngz)
+  CALL allocate_array(       ddz_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(     ddzND_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(    interp_napj, 1,local_na, 1,local_np+ngp, 1,local_nj+ngj, 1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(          Capj,  1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(          Sapj,  1,local_na, 1,local_np,     1,local_nj,     1,local_nky, 1,local_nkx, 1,local_nz)
+  CALL allocate_array(     xnapj, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   xnapp2j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapp1j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapm1j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapm2j, 1,local_na, 1,local_np)
+  CALL allocate_array(   xnapjp1, 1,local_na, 1,local_nj)
+  CALL allocate_array(   xnapjm1, 1,local_na, 1,local_nj)
+  CALL allocate_array(   ynapp1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   ynapm1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( ynapp1jm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( ynapm1jm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array(   znapm1j, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( znapm1jp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( znapm1jm1, 1,local_na, 1,local_np, 1,local_nj)
 
   ! Non linear terms and dnjs table
-  CALL allocate_array( dnjs, 1,maxj+1, 1,maxj+1, 1,maxj+1)
+  CALL allocate_array( dnjs, 1,jmax+1, 1,jmax+1, 1,jmax+1)
 
   ! Hermite fourth derivative coeff table 4*sqrt(p!/(p-4)!)
-  CALL allocate_array( dv4_Hp_coeff, -2, MAX(pmaxe,pmaxi))
-
-  ! elect. pot. linear terms
-  IF (KIN_E) THEN
-    CALL allocate_array( xphij_e,   ips_e,ipe_e, ijs_e,ije_e)
-    CALL allocate_array( xphijp1_e, ips_e,ipe_e, ijs_e,ije_e)
-    CALL allocate_array( xphijm1_e, ips_e,ipe_e, ijs_e,ije_e)
-    CALL allocate_array( xpsij_e,   ips_e,ipe_e, ijs_e,ije_e)
-    CALL allocate_array( xpsijp1_e, ips_e,ipe_e, ijs_e,ije_e)
-    CALL allocate_array( xpsijm1_e, ips_e,ipe_e, ijs_e,ije_e)
-  ENDIF
-  CALL allocate_array( xphij_i,   ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xphijp1_i, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xphijm1_i, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xpsij_i,   ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xpsijp1_i, ips_i,ipe_i, ijs_i,ije_i)
-  CALL allocate_array( xpsijm1_i, ips_i,ipe_i, ijs_i,ije_i)
+  CALL allocate_array( dv4_Hp_coeff, -2, pmax)
 
+  CALL allocate_array( xphij,   1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xphijp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xphijm1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsij,   1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsijp1, 1,local_na, 1,local_np, 1,local_nj)
+  CALL allocate_array( xpsijm1, 1,local_na, 1,local_np, 1,local_nj)
+  
   !___________________ 2x5D ARRAYS __________________________
   !! Collision matrices
-  IF (gyrokin_CO) THEN !GK collision matrices (one for each kperp)
-    IF (KIN_E) THEN
-    CALL allocate_array(  Ceepj, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikys,ikye, ikxs,ikxe, izs,ize)
-    CALL allocate_array( CeipjT, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), ikys,ikye, ikxs,ikxe, izs,ize)
-    CALL allocate_array( CeipjF, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxi+1)*(jmaxi+1), ikys,ikye, ikxs,ikxe, izs,ize)
-    CALL allocate_array( CiepjT, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxi+1)*(jmaxi+1), ikys,ikye, ikxs,ikxe, izs,ize)
-    CALL allocate_array( CiepjF, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxe+1)*(jmaxe+1), ikys,ikye, ikxs,ikxe, izs,ize)
-    ENDIF
-    CALL allocate_array(  Ciipj, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxi+1)*(jmaxi+1), ikys,ikye, ikxs,ikxe, izs,ize)
+  IF (GK_CO) THEN !GK collision matrices (one for each kperp)
+    CALL allocate_array(  Cab_F, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(  Cab_T, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(  Caa,   1,na,       1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
+    CALL allocate_array(nuCself, 1,na,       1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,local_nky, 1,local_nkx, 1,local_nz)
   ELSE !DK collision matrix (same for every k)
-      IF (KIN_E) THEN
-      CALL allocate_array(  Ceepj, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), 1,1, 1,1, 1,1)
-      CALL allocate_array( CeipjT, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxe+1)*(jmaxe+1), 1,1, 1,1, 1,1)
-      CALL allocate_array( CeipjF, 1,(pmaxe+1)*(jmaxe+1), 1,(pmaxi+1)*(jmaxi+1), 1,1, 1,1, 1,1)
-      CALL allocate_array( CiepjT, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxi+1)*(jmaxi+1), 1,1, 1,1, 1,1)
-      CALL allocate_array( CiepjF, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxe+1)*(jmaxe+1), 1,1, 1,1, 1,1)
-      ENDIF
-      CALL allocate_array(  Ciipj, 1,(pmaxi+1)*(jmaxi+1), 1,(pmaxi+1)*(jmaxi+1), 1,1, 1,1, 1,1)
- ENDIF
-
-END SUBROUTINE memory
+      CALL allocate_array(  Cab_F, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(  Cab_T, 1,na, 1,na, 1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(  Caa,    1,na,      1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+      CALL allocate_array(nuCself,  1,na,      1,(pmax+1)*(jmax+1), 1,(pmax+1)*(jmax+1), 1,1, 1,1, 1,1)
+ENDIF
+END SUBROUTINE memory
\ No newline at end of file
diff --git a/src/miller_mod.F90 b/src/miller_mod.F90
index 34a175cd027e61e8eb8b24265befabbb66ba892b..495347ebbe17628c42937c6bca34959cc64b1395 100644
--- a/src/miller_mod.F90
+++ b/src/miller_mod.F90
@@ -4,8 +4,9 @@
 MODULE miller
   USE prec_const
   USE basic
+  USE parallel, ONLY: my_id, num_procs_z, nbr_U, nbr_D, comm0
   ! use coordinates,only: gcoor, get_dzprimedz
-  USE grid
+  USE grid, ONLY: local_Nky, local_Nkx, local_nz, Ngz, kyarray, kxarray, zarray, Nz, local_nz_offset
   ! use discretization
   USE lagrange_interpolation
   ! use par_in, only: beta, sign_Ip_CW, sign_Bt_CW, Npol
@@ -19,15 +20,15 @@ MODULE miller
 
   private
 
-  real(dp) :: rho, kappa, delta, s_kappa, s_delta, drR, drZ, zeta, s_zeta
-  real(dp) :: thetaShift
-  real(dp) :: thetak, thetad
-
+  real(xp) :: rho, kappa, delta, s_kappa, s_delta, drR, drZ, zeta, s_zeta
+  real(xp) :: thetaShift
+  real(xp) :: thetak, thetad
+  INTEGER  :: ierr
 CONTAINS
 
   !>Set defaults for miller parameters
   subroutine set_miller_parameters(kappa_,s_kappa_,delta_,s_delta_,zeta_,s_zeta_)
-    real(dp), INTENT(IN) :: kappa_,s_kappa_,delta_,s_delta_,zeta_,s_zeta_
+    real(xp), INTENT(IN) :: kappa_,s_kappa_,delta_,s_delta_,zeta_,s_zeta_
     rho     = -1.0
     kappa   = kappa_
     s_kappa = s_kappa_
@@ -44,62 +45,63 @@ CONTAINS
   end subroutine set_miller_parameters
 
   !>Get Miller metric, magnetic field, jacobian etc.
-  subroutine get_miller(trpeps,major_R,major_Z,q0,shat,amhd,edge_opt,&
-       C_y,C_xy,dpdx_pm_geom,gxx_,gyy_,gzz_,gxy_,gxz_,gyz_,dBdx_,dBdy_,&
+  subroutine get_miller(trpeps,major_R,major_Z,q0,shat,Npol,amhd,edge_opt,&
+       C_y,C_xy,xpdx_pm_geom,gxx_,gyy_,gzz_,gxy_,gxz_,gyz_,dBdx_,dBdy_,&
        Bfield_,jacobian_,dBdz_,R_hat_,Z_hat_,dxdR_,dxdZ_,Ckxky_,gradz_coeff_)
     !!!!!!!!!!!!!!!! GYACOMO INTERFACE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    real(dp), INTENT(INOUT) :: trpeps          ! eps in gyacomo (inverse aspect ratio)
-    real(dp), INTENT(INOUT) :: major_R         ! major radius
-    real(dp), INTENT(INOUT) :: major_Z         ! major Z
-    real(dp), INTENT(INOUT) :: q0              ! safetyfactor
-    real(dp), INTENT(INOUT) :: shat            ! safetyfactor
-    real(dp), INTENT(INOUT) :: amhd            ! alpha mhd
-    real(dp), INTENT(INOUT) :: edge_opt        ! alpha mhd
-    real(dp), INTENT(INOUT) :: dpdx_pm_geom    ! amplitude mag. eq. pressure grad.
-    real(dp), INTENT(INOUT) :: C_y, C_xy
-
-    real(dp), dimension(izgs:izge,0:1), INTENT(INOUT) :: &
+    real(xp), INTENT(INOUT) :: trpeps          ! eps in gyacomo (inverse aspect ratio)
+    real(xp), INTENT(INOUT) :: major_R         ! major radius
+    real(xp), INTENT(INOUT) :: major_Z         ! major Z
+    real(xp), INTENT(INOUT) :: q0              ! safetyfactor
+    real(xp), INTENT(INOUT) :: shat            ! safetyfactor
+    INTEGER,  INTENT(IN)    :: Npol            ! number of poloidal turns
+    real(xp), INTENT(INOUT) :: amhd            ! alpha mhd
+    real(xp), INTENT(INOUT) :: edge_opt        ! alpha mhd
+    real(xp), INTENT(INOUT) :: xpdx_pm_geom    ! amplitude mag. eq. pressure grad.
+    real(xp), INTENT(INOUT) :: C_y, C_xy
+    real(xp), dimension(1:local_nz+Ngz,1:2), INTENT(INOUT) :: &
                                               gxx_,gyy_,gzz_,gxy_,gxz_,gyz_,&
                                               dBdx_,dBdy_,Bfield_,jacobian_,&
                                               dBdz_,R_hat_,Z_hat_,dxdR_,dxdZ_, &
                                               gradz_coeff_
-    real(dp), dimension(ikys:ikye,ikxs:ikxe,izgs:izge,0:1), INTENT(INOUT) :: Ckxky_
+    real(xp), dimension(1:local_Nky,1:local_Nkx,1:local_nz+Ngz,1:2), INTENT(INOUT) :: Ckxky_
+    INTEGER :: iz, ikx, iky, eo
     ! No parameter in gyacomo yet
-    real(dp) :: sign_Ip_CW=1 ! current sign (only normal current)
-    real(dp) :: sign_Bt_CW=1 ! current sign (only normal current)
+    real(xp) :: sign_Ip_CW=1 ! current sign (only normal current)
+    real(xp) :: sign_Bt_CW=1 ! current sign (only normal current)
     !!!!!!!!!!!!!! END GYACOMO INTERFACE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ! Auxiliary variables for curvature computation
-    real(dp) :: G1,G2,G3,Cx,Cy,ky,kx
+    real(xp) :: G1,G2,G3,Cx,Cy,ky,kx
 
     integer:: np, np_s, Npol_ext, Npol_s
 
-    real(dp), dimension(500*(Npol+2)):: R,Z,R_rho,Z_rho,R_theta,Z_theta,R_theta_theta,Z_theta_theta,dlp,Rc,cosu,sinu,Bphi
-    real(dp), dimension(500*(Npol+2)):: drRcirc, drRelong, drRelongTilt, drRtri, drRtriTilt, drZcirc, drZelong, drZelongTilt
-    real(dp), dimension(500*(Npol+2)):: drZtri, drZtriTilt, dtdtRcirc, dtdtRelong, dtdtRelongTilt, dtdtRtri, dtdtRtriTilt
-    real(dp), dimension(500*(Npol+2)):: dtdtZcirc, dtdtZelong, dtdtZelongTilt, dtdtZtri, dtdtZtriTilt, dtRcirc, dtRelong
-    real(dp), dimension(500*(Npol+2)):: dtRelongTilt, dtRtri, dtRtriTilt, dtZcirc, dtZelong, dtZelongTilt, dtZtri, dtZtriTilt
-    real(dp), dimension(500*(Npol+2)):: Rcirc, Relong, RelongTilt, Rtri, RtriTilt, Zcirc, Zelong, ZelongTilt, Ztri, ZtriTilt
-    ! real(dp), dimension(500*(Npol+2)):: drrShape, drrAng, drxAng, dryAng, dtdtrShape, dtdtrAng, dtdtxAng
-    ! real(dp), dimension(500*(Npol+2)):: dtdtyAng, dtrShape, dtrAng, dtxAng, dtyAng, rShape, rAng, xAng, yAng
-    real(dp), dimension(500*(Npol+2)):: theta, thAdj, J_r, B, Bp, D0, D1, D2, D3, nu, chi, psi1, nu1
-    real(dp), dimension(500*(Npol+2)):: tmp_reverse, theta_reverse, tmp_arr
-
-    real(dp), dimension(500*(Npol+1)):: theta_s, thAdj_s, chi_s, theta_s_reverse
-    real(dp), dimension(500*(Npol+1)):: R_s, Z_s, R_theta_s, Z_theta_s, Rc_s, cosu_s, sinu_s, Bphi_s, B_s, Bp_s, dlp_s
-    real(dp), dimension(500*(Npol+1)):: dtRcirc_s, dtRelong_s, dtRelongTilt_s, dtRtri_s, dtRtriTilt_s, dtZcirc_s
-    real(dp), dimension(500*(Npol+1)):: dtZelong_s, dtZelongTilt_s, dtZtri_s, dtZtriTilt_s, Rcirc_s, Relong_s, RelongTilt_s
-    real(dp), dimension(500*(Npol+1)):: Rtri_s, RtriTilt_s, Zcirc_s, Zelong_s, ZelongTilt_s, Ztri_s, ZtriTilt_s!, dtrShape_s
-    ! real(dp), dimension(500*(Npol+1)):: dtrAng_s, dtxAng_s, dtyAng_s, rShape_s, rAng_s, xAng_s, yAng_s
-    real(dp), dimension(500*(Npol+1)):: psi1_s, nu1_s, dchidx_s, dB_drho_s, dB_dl_s, dnu_drho_s, dnu_dl_s, grad_nu_s
-    real(dp), dimension(500*(Npol+1)):: gxx, gxy, gxz, gyy, gyz, gzz, dtheta_dchi_s, dBp_dchi_s, jacobian, dBdx, dBdz
-    real(dp), dimension(500*(Npol+1)):: g_xx, g_xy, g_xz, g_yy, g_yz, g_zz, tmp_arr_s, dxdR_s, dxdZ_s, K_x, K_y !tmp_arr2
-
-    real(dp), dimension(1:Nz):: gxx_out,gxy_out,gxz_out,gyy_out,gyz_out,gzz_out,Bfield_out,jacobian_out, dBdx_out, dBdz_out, chi_out
-    real(dp), dimension(1:Nz):: R_out, Z_out, dxdR_out, dxdZ_out
-    real(dp):: d_inv, drPsi, dxPsi, dq_dx, dq_dpsi, R0, Z0, B0, F, D0_full, D1_full, D2_full, D3_full
-    !real(dp) :: Lnorm, Psi0 ! currently module-wide defined anyway
-    real(dp):: pprime, ffprime, D0_mid, D1_mid, D2_mid, D3_mid, dx_drho, pi, mu_0, dzprimedz
-    ! real(dp):: rho_a, psiN, drpsiN, CN2, CN3, Rcenter, Zcenter, drRcenter, drZcenter
+    real(xp), dimension(500*(Npol+2)):: R,Z,R_rho,Z_rho,R_theta,Z_theta,R_theta_theta,Z_theta_theta,dlp,Rc,cosu,sinu,Bphi
+    real(xp), dimension(500*(Npol+2)):: drRcirc, drRelong, drRelongTilt, drRtri, drRtriTilt, drZcirc, drZelong, drZelongTilt
+    real(xp), dimension(500*(Npol+2)):: drZtri, drZtriTilt, dtdtRcirc, dtdtRelong, dtdtRelongTilt, dtdtRtri, dtdtRtriTilt
+    real(xp), dimension(500*(Npol+2)):: dtdtZcirc, dtdtZelong, dtdtZelongTilt, dtdtZtri, dtdtZtriTilt, dtRcirc, dtRelong
+    real(xp), dimension(500*(Npol+2)):: dtRelongTilt, dtRtri, dtRtriTilt, dtZcirc, dtZelong, dtZelongTilt, dtZtri, dtZtriTilt
+    real(xp), dimension(500*(Npol+2)):: Rcirc, Relong, RelongTilt, Rtri, RtriTilt, Zcirc, Zelong, ZelongTilt, Ztri, ZtriTilt
+    ! real(xp), dimension(500*(Npol+2)):: drrShape, drrAng, drxAng, dryAng, dtdtrShape, dtdtrAng, dtdtxAng
+    ! real(xp), dimension(500*(Npol+2)):: dtdtyAng, dtrShape, dtrAng, dtxAng, dtyAng, rShape, rAng, xAng, yAng
+    real(xp), dimension(500*(Npol+2)):: theta, thAdj, J_r, B, Bp, D0, D1, D2, D3, nu, chi, psi1, nu1
+    real(xp), dimension(500*(Npol+2)):: tmp_reverse, theta_reverse, tmp_arr
+
+    real(xp), dimension(500*(Npol+1)):: theta_s, thAdj_s, chi_s, theta_s_reverse
+    real(xp), dimension(500*(Npol+1)):: R_s, Z_s, R_theta_s, Z_theta_s, Rc_s, cosu_s, sinu_s, Bphi_s, B_s, Bp_s, dlp_s
+    real(xp), dimension(500*(Npol+1)):: dtRcirc_s, dtRelong_s, dtRelongTilt_s, dtRtri_s, dtRtriTilt_s, dtZcirc_s
+    real(xp), dimension(500*(Npol+1)):: dtZelong_s, dtZelongTilt_s, dtZtri_s, dtZtriTilt_s, Rcirc_s, Relong_s, RelongTilt_s
+    real(xp), dimension(500*(Npol+1)):: Rtri_s, RtriTilt_s, Zcirc_s, Zelong_s, ZelongTilt_s, Ztri_s, ZtriTilt_s!, dtrShape_s
+    ! real(xp), dimension(500*(Npol+1)):: dtrAng_s, dtxAng_s, dtyAng_s, rShape_s, rAng_s, xAng_s, yAng_s
+    real(xp), dimension(500*(Npol+1)):: psi1_s, nu1_s, dchidx_s, dB_drho_s, dB_dl_s, dnu_drho_s, dnu_dl_s, grad_nu_s
+    real(xp), dimension(500*(Npol+1)):: gxx, gxy, gxz, gyy, gyz, gzz, dtheta_dchi_s, dBp_dchi_s, jacobian, dBdx, dBdz
+    real(xp), dimension(500*(Npol+1)):: g_xx, g_xy, g_xz, g_yy, g_yz, g_zz, tmp_arr_s, dxdR_s, dxdZ_s, K_x, K_y !tmp_arr2
+
+    real(xp), dimension(1:Nz):: gxx_out,gxy_out,gxz_out,gyy_out,gyz_out,gzz_out,Bfield_out,jacobian_out, dBdx_out, dBdz_out, chi_out
+    real(xp), dimension(1:Nz):: R_out, Z_out, dxdR_out, dxdZ_out
+    real(xp):: d_inv, drPsi, dxPsi, dq_dx, dq_xpsi, R0, Z0, B0, F, D0_full, D1_full, D2_full, D3_full
+    !real(xp) :: Lnorm, Psi0 ! currently module-wide defined anyway
+    real(xp):: pprime, ffprime, D0_mid, D1_mid, D2_mid, D3_mid, dx_drho, pi, mu_0, dzprimedz
+    ! real(xp):: rho_a, psiN, drpsiN, CN2, CN3, Rcenter, Zcenter, drRcenter, drZcenter
     logical:: bMaxShift
     integer:: i, k, iBmax
 
@@ -121,7 +123,7 @@ CONTAINS
     pi = acos(-1.0)
     mu_0=4.0*pi
 
-    theta=linspace(-pi*Npol_ext,pi*Npol_ext-2._dp*pi*Npol_ext/np,np)
+    theta=linspace(-pi*Npol_ext,pi*Npol_ext-2._xp*pi*Npol_ext/np,np)
     d_inv=asin(delta)
 
     thetaShift = 0.0
@@ -261,11 +263,11 @@ CONTAINS
 
     !--------shear is expected to be defined as rho/q*dq/drho--------!
     dq_dx=shat*q0/rho/dx_drho
-    dq_dpsi=dq_dx/dxPsi
+    dq_xpsi=dq_dx/dxPsi
     pprime=-amhd/q0**2/R0/(2*mu_0)*B0**2/drPsi
 
-    !neg. dpdx normalized to magnetic pressure for pressure term
-    dpdx_pm_geom=amhd/q0**2/R0/dx_drho
+    !neg. xpdx normalized to magnetic pressure for pressure term
+    xpdx_pm_geom=amhd/q0**2/R0/dx_drho
 
     !first coefficient of psi in varrho expansion
     psi1 = R*Bp*sign_Ip_CW
@@ -294,7 +296,7 @@ CONTAINS
     D2_mid=D2(np/2+1)
     D3_mid=D3(np/2+1)
 
-    ffprime=-(sign_Ip_CW*dq_dpsi*2.*pi*Npol_ext+D0_full+D2_full*pprime)/D1_full
+    ffprime=-(sign_Ip_CW*dq_xpsi*2.*pi*Npol_ext+D0_full+D2_full*pprime)/D1_full
 
     if (my_id==0) then
        write(*,'(A,ES12.4)') "ffprime = ", ffprime
@@ -403,10 +405,10 @@ CONTAINS
     !contravariant metric coefficients (varrho,l,fz)->(x,y,z)
     gxx=(psi1_s/dxPsi)**2
     gxy=-psi1_s/dxPsi*C_y*sign_Ip_CW*nu1_s
-    gxz=-psi1_s/dxPsi*(nu1_s+psi1_s*dq_dpsi*chi_s)/q0
+    gxz=-psi1_s/dxPsi*(nu1_s+psi1_s*dq_xpsi*chi_s)/q0
     gyy=C_y**2*(grad_nu_s**2+1/R_s**2)
-    gyz=sign_Ip_CW*C_y/q0*(grad_nu_s**2+dq_dpsi*nu1_s*psi1_s*chi_s)
-    gzz=1./q0**2*(grad_nu_s**2+2.*dq_dpsi*nu1_s*psi1_s*chi_s+(dq_dpsi*psi1_s*chi_s)**2)
+    gyz=sign_Ip_CW*C_y/q0*(grad_nu_s**2+dq_xpsi*nu1_s*psi1_s*chi_s)
+    gzz=1./q0**2*(grad_nu_s**2+2.*dq_xpsi*nu1_s*psi1_s*chi_s+(dq_xpsi*psi1_s*chi_s)**2)
 
     jacobian=1./sqrt(gxx*gyy*gzz + 2.*gxy*gyz*gxz - gxz**2*gyy - gyz**2*gxx - gzz*gxy**2)
 
@@ -420,7 +422,7 @@ CONTAINS
 
     !Bfield derivatives
     !dBdx = e_x * nabla B = J (nabla y x nabla z) * nabla B
-    dBdx=jacobian*C_y/(q0*R_s)*(F/(R_s*psi1_s)*dB_drho_s+(nu1_s+dq_dpsi*chi_s*psi1_s)*dB_dl_s)
+    dBdx=jacobian*C_y/(q0*R_s)*(F/(R_s*psi1_s)*dB_drho_s+(nu1_s+dq_xpsi*chi_s*psi1_s)*dB_dl_s)
     dBdz=1./B_s*(Bp_s*dBp_dchi_s-F**2/R_s**3*R_theta_s)
 
     !curvature terms (these are just local and will be recalculated in geometry.F90)
@@ -438,7 +440,7 @@ CONTAINS
        !new parallel coordinate chi_out==zprime
        !see also tracer_aux.F90
        if (Npol>1) ERROR STOP '>> ERROR << Npol>1 has not been implemented for edge_opt=\=0.0'
-       do k=izs,ize
+       do k=1,Nz
           chi_out(k)=sinh((-pi+k*2.*pi/Nz)*log(edge_opt*pi+sqrt(edge_opt**2*pi**2+1))/pi)/edge_opt
        enddo
        !transform metrics according to chain rule
@@ -473,24 +475,25 @@ CONTAINS
     call lag3interp(Z_s,chi_s,np_s,Z_out,chi_out,Nz)
     call lag3interp(dxdR_s,chi_s,np_s,dxdR_out,chi_out,Nz)
     call lag3interp(dxdZ_s,chi_s,np_s,dxdZ_out,chi_out,Nz)
-    ! Fill the geom arrays with the results
-    do eo=0,1
-    gxx_(izs:ize,eo)      =gxx_out(izs:ize)
-    gyy_(izs:ize,eo)      =gyy_out(izs:ize)
-    gxz_(izs:ize,eo)      =gxz_out(izs:ize)
-    gyz_(izs:ize,eo)      =gyz_out(izs:ize)
-    dBdx_(izs:ize,eo)     =dBdx_out(izs:ize)
-    dBdy_(izs:ize,eo)     =0.
-    gxy_(izs:ize,eo)      =gxy_out(izs:ize)
-    gzz_(izs:ize,eo)      =gzz_out(izs:ize)
-    Bfield_(izs:ize,eo)   =Bfield_out(izs:ize)
-    jacobian_(izs:ize,eo) =jacobian_out(izs:ize)
-    dBdz_(izs:ize,eo)     =dBdz_out(izs:ize)
-    R_hat_(izs:ize,eo)    =R_out(izs:ize)
-    Z_hat_(izs:ize,eo)    =Z_out(izs:ize)
-    dxdR_(izs:ize,eo)     = dxdR_out(izs:ize)
-    dxdZ_(izs:ize,eo)     = dxdZ_out(izs:ize)
-
+    ! Fill the interior of the geom arrays with the results
+    do eo=1,2
+      DO iz = 1,local_nz
+        gxx_(iz+Ngz/2,eo)      = gxx_out(iz-local_nz_offset)
+        gyy_(iz+Ngz/2,eo)      = gyy_out(iz-local_nz_offset)
+        gxz_(iz+Ngz/2,eo)      = gxz_out(iz-local_nz_offset)
+        gyz_(iz+Ngz/2,eo)      = gyz_out(iz-local_nz_offset)
+        dBdx_(iz+Ngz/2,eo)     = dBdx_out(iz-local_nz_offset)
+        dBdy_(iz+Ngz/2,eo)     = 0.
+        gxy_(iz+Ngz/2,eo)      = gxy_out(iz-local_nz_offset)
+        gzz_(iz+Ngz/2,eo)      = gzz_out(iz-local_nz_offset)
+        Bfield_(iz+Ngz/2,eo)   = Bfield_out(iz-local_nz_offset)
+        jacobian_(iz+Ngz/2,eo) = jacobian_out(iz-local_nz_offset)
+        dBdz_(iz+Ngz/2,eo)     = dBdz_out(iz-local_nz_offset)
+        R_hat_(iz+Ngz/2,eo)    = R_out(iz-local_nz_offset)
+        Z_hat_(iz+Ngz/2,eo)    = Z_out(iz-local_nz_offset)
+        dxdR_(iz+Ngz/2,eo)     = dxdR_out(iz-local_nz_offset)
+        dxdZ_(iz+Ngz/2,eo)     = dxdZ_out(iz-local_nz_offset)
+      ENDDO
     !! UPDATE GHOSTS VALUES (since the miller function in GENE does not)
     CALL update_ghosts_z(gxx_(:,eo))
     CALL update_ghosts_z(gyy_(:,eo))
@@ -508,22 +511,22 @@ CONTAINS
     CALL update_ghosts_z(dxdZ_(:,eo))
 
     ! Curvature operator (Frei et al. 2022 eq 2.15)
-    DO iz = izgs,izge
+    DO iz = 1,local_nz+Ngz
       G1 = gxy_(iz,eo)*gxy_(iz,eo)-gxx_(iz,eo)*gyy_(iz,eo)
       G2 = gxy_(iz,eo)*gxz_(iz,eo)-gxx_(iz,eo)*gyz_(iz,eo)
       G3 = gyy_(iz,eo)*gxz_(iz,eo)-gxy_(iz,eo)*gyz_(iz,eo)
       Cx = (G1*dBdy_(iz,eo) + G2*dBdz_(iz,eo))/Bfield_(iz,eo)
       Cy = (G3*dBdz_(iz,eo) - G1*dBdx_(iz,eo))/Bfield_(iz,eo)
 
-      DO iky = ikys, ikye
+      DO iky = 1,local_Nky
         ky = kyarray(iky)
-         DO ikx= ikxs, ikxe
+         DO ikx= 1,local_Nkx
            kx = kxarray(ikx)
            Ckxky_(iky, ikx, iz,eo) = (Cx*kx + Cy*ky)
          ENDDO
       ENDDO
       ! coefficient in the front of parallel derivative
-      gradz_coeff_(iz,eo) = 1._dp / jacobian_(iz,eo) / Bfield_(iz,eo)
+      gradz_coeff_(iz,eo) = 1._xp / jacobian_(iz,eo) / Bfield_(iz,eo)
     ENDDO
   ENDDO
 
@@ -533,50 +536,51 @@ CONTAINS
     SUBROUTINE update_ghosts_z(fz_)
       IMPLICIT NONE
       ! INTEGER,  INTENT(IN) :: nztot_
-      REAL(dp), DIMENSION(izgs:izge), INTENT(INOUT) :: fz_
-      REAL(dp), DIMENSION(-2:2) :: buff
-      INTEGER :: status(MPI_STATUS_SIZE), count
-
+      REAL(xp), DIMENSION(1:local_nz+Ngz), INTENT(INOUT) :: fz_
+      REAL(xp), DIMENSION(-2:2) :: buff
+      INTEGER :: status(MPI_STATUS_SIZE), count, last, first
+      last = local_nz+Ngz/2
+      first= 1 + Ngz/2
       IF(Nz .GT. 1) THEN
         IF (num_procs_z .GT. 1) THEN
           CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
             count = 1 ! one point to exchange
             !!!!!!!!!!! Send ghost to up neighbour !!!!!!!!!!!!!!!!!!!!!!
-            CALL mpi_sendrecv(fz_(ize), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
-                              buff(-1), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-1
+            CALL mpi_sendrecv(fz_(last), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
+                               buff(-1), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-1
                               comm0, status, ierr)
 
-            CALL mpi_sendrecv(fz_(ize-1), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
-                                buff(-2), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-2
+            CALL mpi_sendrecv(fz_(last-1), count, MPI_DOUBLE, nbr_U, 0, & ! Send to Up the last
+                                 buff(-2), count, MPI_DOUBLE, nbr_D, 0, & ! Receive from Down the first-2
                               comm0, status, ierr)
 
             !!!!!!!!!!! Send ghost to down neighbour !!!!!!!!!!!!!!!!!!!!!!
-            CALL mpi_sendrecv(fz_(izs), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
-                              buff(+1), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+1
+            CALL mpi_sendrecv(fz_(first), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
+                                buff(+1), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+1
                               comm0, status, ierr)
 
-            CALL mpi_sendrecv(fz_(izs+1), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
-                                buff(+2), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+2
+            CALL mpi_sendrecv(fz_(first+1), count, MPI_DOUBLE, nbr_D, 0, & ! Send to Down the first
+                                  buff(+2), count, MPI_DOUBLE, nbr_U, 0, & ! Recieve from Up the last+2
                               comm0, status, ierr)
          ELSE
-           buff(-1) = fz_(ize  )
-           buff(-2) = fz_(ize-1)
-           buff(+1) = fz_(izs  )
-           buff(+2) = fz_(izs+1)
+           buff(-1) = fz_(last  )
+           buff(-2) = fz_(last-1)
+           buff(+1) = fz_(first  )
+           buff(+2) = fz_(first+1)
          ENDIF
-         fz_(ize+1) = buff(+1)
-         fz_(ize+2) = buff(+2)
-         fz_(izs-1) = buff(-1)
-         fz_(izs-2) = buff(-2)
+         fz_(last +1) = buff(+1)
+         fz_(last +2) = buff(+2)
+         fz_(first-1) = buff(-1)
+         fz_(first-2) = buff(-2)
       ENDIF
     END SUBROUTINE update_ghosts_z
 
 
     !> Generate an equidistant array from min to max with n points
     function linspace(min,max,n) result(out)
-      real(dp), INTENT(IN):: min, max
+      real(xp), INTENT(IN):: min, max
       integer,  INTENT(IN):: n
-      real(dp), dimension(n):: out
+      real(xp), dimension(n):: out
 
       do i=1,n
          out(i)=min+(i-1)*(max-min)/(n-1)
@@ -584,20 +588,20 @@ CONTAINS
     end function linspace
 
     !> Weighted average
-    real(dp) function average(var,weight)
-      real(dp), dimension(np), INTENT(IN):: var, weight
+    real(xp) function average(var,weight)
+      real(xp), dimension(np), INTENT(IN):: var, weight
       average=sum(var*weight)/sum(weight)
     end function average
 
     !> full theta integral with weight function dlp
-    real(dp) function dlp_int(var,dlp)
-      real(dp), dimension(np), INTENT(IN):: var, dlp
+    real(xp) function dlp_int(var,dlp)
+      real(xp), dimension(np), INTENT(IN):: var, dlp
       dlp_int=sum(var*dlp)*2*pi*Npol_ext/np
     end function dlp_int
 
     !> theta integral with weight function dlp, up to index 'ind'
-    real(dp) function dlp_int_ind(var,dlp,ind)
-      real(dp), dimension(np), INTENT(IN):: var, dlp
+    real(xp) function dlp_int_ind(var,dlp,ind)
+      real(xp), dimension(np), INTENT(IN):: var, dlp
       integer, INTENT(IN):: ind
 
       dlp_int_ind=0.
@@ -611,8 +615,8 @@ CONTAINS
     !> 1st derivative with 2nd order finite differences
     function deriv_fd(y,x,n) result(out)
       integer,                INTENT(IN) :: n
-      real(dp), dimension(n), INTENT(IN):: x,y
-      real(dp), dimension(n) :: out,dx
+      real(xp), dimension(n), INTENT(IN):: x,y
+      real(xp), dimension(n) :: out,dx
 
       !call lag3deriv(y,x,n,out,x,n)
 
diff --git a/src/model_mod.F90 b/src/model_mod.F90
index 400ec152a3dd53d1f8c49f158b3cb432712de7b7..7be127dd9e7a782165b4bf824f08bc04465ecaf9 100644
--- a/src/model_mod.F90
+++ b/src/model_mod.F90
@@ -3,72 +3,43 @@ MODULE model
   USE prec_const
   IMPLICIT NONE
   PRIVATE
-
-  INTEGER,  PUBLIC, PROTECTED ::    CLOS =  0         ! linear truncation method
-  INTEGER,  PUBLIC, PROTECTED :: NL_CLOS =  0         ! nonlinear truncation method
+  ! INPUTS
   INTEGER,  PUBLIC, PROTECTED ::    KERN =  0         ! Kernel model
   CHARACTER(len=16), &
             PUBLIC, PROTECTED ::LINEARITY= 'linear'   ! To turn on non linear bracket term
-  LOGICAL,  PUBLIC, PROTECTED ::   KIN_E =  .true.    ! Simulate kinetic electron (adiabatic otherwise)
+  REAL(xp), PUBLIC, PROTECTED ::    mu_x =  0._xp     ! spatial    x-Hyperdiffusivity coefficient (for num. stability)
+  REAL(xp), PUBLIC, PROTECTED ::    mu_y =  0._xp     ! spatial    y-Hyperdiffusivity coefficient (for num. stability)
   INTEGER,  PUBLIC, PROTECTED ::    N_HD =  4         ! order of numerical spatial diffusion
-  REAL(dp), PUBLIC, PROTECTED ::    mu_x =  0._dp     ! spatial    x-Hyperdiffusivity coefficient (for num. stability)
-  REAL(dp), PUBLIC, PROTECTED ::    mu_y =  0._dp     ! spatial    y-Hyperdiffusivity coefficient (for num. stability)
   LOGICAL,  PUBLIC, PROTECTED ::   HDz_h =  .false.    ! to apply z-hyperdiffusion on non adiab part
-  REAL(dp), PUBLIC, PROTECTED ::    mu_z =  0._dp     ! spatial    z-Hyperdiffusivity coefficient (for num. stability)
+  REAL(xp), PUBLIC, PROTECTED ::    mu_z =  0._xp     ! spatial    z-Hyperdiffusivity coefficient (for num. stability)
+  REAL(xp), PUBLIC, PROTECTED ::    mu_p =  0._xp     ! kinetic para hyperdiffusivity coefficient (for num. stability)
+  REAL(xp), PUBLIC, PROTECTED ::    mu_j =  0._xp     ! kinetic perp hyperdiffusivity coefficient (for num. stability)
   CHARACTER(len=16), &
-            PUBLIC, PROTECTED ::   HYP_V = 'hypcoll'  ! hyperdiffusion model for velocity space ('none','hypcoll','dvpar4')
-  REAL(dp), PUBLIC, PROTECTED ::    mu_p =  0._dp     ! kinetic para hyperdiffusivity coefficient (for num. stability)
-  REAL(dp), PUBLIC, PROTECTED ::    mu_j =  0._dp     ! kinetic perp hyperdiffusivity coefficient (for num. stability)
-  REAL(dp), PUBLIC, PROTECTED ::      nu =  0._dp     ! Collision frequency
-  REAL(dp), PUBLIC, PROTECTED ::   tau_e =  1._dp     ! Temperature
-  REAL(dp), PUBLIC, PROTECTED ::   tau_i =  1._dp     !
-  REAL(dp), PUBLIC, PROTECTED :: sigma_e =  0.023338_dp! sqrt of electron ion mass ratio
-  REAL(dp), PUBLIC, PROTECTED :: sigma_i =  1._dp     !
-  REAL(dp), PUBLIC, PROTECTED ::     q_e = -1._dp     ! Charge
-  REAL(dp), PUBLIC, PROTECTED ::     q_i =  1._dp     !
-  REAL(dp), PUBLIC, PROTECTED ::    k_Ni =  0._dp     ! Ion density drive (L_ref/L_Ni)
-  REAL(dp), PUBLIC, PROTECTED ::    k_Ne =  0._dp     ! Ele ''
-  REAL(dp), PUBLIC, PROTECTED ::    k_Ti =  0._dp     ! Ion temperature drive (L_ref/L_Ti)
-  REAL(dp), PUBLIC, PROTECTED ::    k_Te =  0._dp     ! Ele ''
-  REAL(dp), PUBLIC, PROTECTED ::    K_E  =  0._dp     ! Backg. electric field drive (L_ref/L_E)
-  REAL(dp), PUBLIC, PROTECTED ::    k_gB =  1._dp     ! Magnetic gradient drive (L_ref/L_gB)
-  REAL(dp), PUBLIC, PROTECTED ::    k_cB =  1._dp     ! Magnetic curvature drive (L_ref/L_cB)
-  REAL(dp), PUBLIC, PROTECTED :: lambdaD =  0._dp     ! Debye length
-  REAL(dp), PUBLIC, PROTECTED ::    beta =  0._dp     ! electron plasma Beta (8piNT_e/B0^2)
-
+  PUBLIC, PROTECTED ::   HYP_V = 'hypcoll'  ! hyperdiffusion model for velocity space ('none','hypcoll','dvpar4')
+  INTEGER,  PUBLIC, PROTECTED ::      Na =  1         ! number of evolved species
+  REAL(xp), PUBLIC, PROTECTED ::      nu =  0._xp     ! collision frequency parameter
+  REAL(xp), PUBLIC, PROTECTED ::    k_gB =  1._xp     ! Magnetic gradient strength (L_ref/L_gB)
+  REAL(xp), PUBLIC, PROTECTED ::    k_cB =  1._xp     ! Magnetic curvature strength (L_ref/L_cB)
+  REAL(xp), PUBLIC, PROTECTED :: lambdaD =  0._xp     ! Debye length
+  REAL(xp), PUBLIC, PROTECTED ::    beta =  0._xp     ! electron plasma Beta (8piNT_e/B0^2)
+  LOGICAL,  PUBLIC            :: ADIAB_E =  .false.   ! adiabatic electron model
+  REAL(xp), PUBLIC, PROTECTED ::   tau_e = 1.0        ! electron temperature ratio for adiabatic electrons
+  ! Auxiliary variable
   LOGICAL,  PUBLIC, PROTECTED ::      EM =  .false.   ! Electromagnetic effects flag
-  REAL(dp), PUBLIC, PROTECTED :: taue_qe         ! factor of the magnetic moment coupling
-  REAL(dp), PUBLIC, PROTECTED :: taui_qi         !
-  REAL(dp), PUBLIC, PROTECTED :: qi_taui         !
-  REAL(dp), PUBLIC, PROTECTED :: qe_taue         !
-  REAL(dp), PUBLIC, PROTECTED :: sqrtTaue_qe          ! factor of parallel moment term
-  REAL(dp), PUBLIC, PROTECTED :: sqrtTaui_qi          !
-  REAL(dp), PUBLIC, PROTECTED :: qe_sigmae_sqrtTaue   ! factor of parallel phi term
-  REAL(dp), PUBLIC, PROTECTED :: qi_sigmai_sqrtTaui   !
-  REAL(dp), PUBLIC, PROTECTED :: sigmae2_taue_o2      ! factor of the Kernel argument
-  REAL(dp), PUBLIC, PROTECTED :: sigmai2_taui_o2      !
-  REAL(dp), PUBLIC, PROTECTED :: sqrt_sigmae2_taue_o2      ! factor of the Kernel argument
-  REAL(dp), PUBLIC, PROTECTED :: sqrt_sigmai2_taui_o2
-  REAL(dp), PUBLIC, PROTECTED :: nu_e,  nu_i          ! electron-ion, ion-ion collision frequency
-  REAL(dp), PUBLIC, PROTECTED :: nu_ee, nu_ie         ! e-e, i-e coll. frequ.
-  REAL(dp), PUBLIC, PROTECTED :: qe2_taue, qi2_taui   ! factor of the gammaD sum
-  REAL(dp), PUBLIC, PROTECTED :: q_o_sqrt_tau_sigma_e, q_o_sqrt_tau_sigma_i
-  REAL(dp), PUBLIC, PROTECTED :: sqrt_tau_o_sigma_e, sqrt_tau_o_sigma_i
-  REAL(dp), PUBLIC, PROTECTED :: dpdx = 0             ! radial pressure gradient
   PUBLIC :: model_readinputs, model_outputinputs
 
 CONTAINS
 
   SUBROUTINE model_readinputs
     !    Read the input parameters
-    USE basic, ONLY : lu_in, my_id, num_procs_p
+    USE basic,    ONLY: lu_in
+    USE parallel, ONLY: my_id,num_procs_p
     USE prec_const
     IMPLICIT NONE
 
-    NAMELIST /MODEL_PAR/ CLOS, NL_CLOS, KERN, LINEARITY, KIN_E, &
-                         mu_x, mu_y, N_HD, HDz_h, mu_z, mu_p, mu_j, HYP_V, nu,&
-                         tau_e, tau_i, sigma_e, sigma_i, q_e, q_i,&
-                         k_Ne, k_Ni, k_Te, k_Ti, k_gB, k_cB, lambdaD, beta
+    NAMELIST /MODEL_PAR/ KERN, LINEARITY, &
+                         mu_x, mu_y, N_HD, HDz_h, mu_z, mu_p, mu_j, HYP_V, Na,&
+                         nu, k_gB, k_cB, lambdaD, beta, ADIAB_E, tau_e
 
     READ(lu_in,model_par)
 
@@ -76,85 +47,45 @@ CONTAINS
       ERROR STOP '>> ERROR << dvpar4 velocity dissipation is not compatible with current p parallelization'
     ENDIF
 
-    IF(.NOT. KIN_E) THEN
+    IF(Na .EQ. 1) THEN
       IF(my_id.EQ.0) print*, 'Adiabatic electron model -> beta = 0'
-      beta = 0._dp
+      beta = 0._xp
     ENDIF
 
     IF(beta .GT. 0) THEN
       IF(my_id.EQ.0) print*, 'Electromagnetic effects are included'
       EM   = .TRUE.
-      dpdx = -(tau_i*(k_Ni + k_Ti) + tau_e*(k_Ne + k_Te))
     ENDIF
 
-    taue_qe    = tau_e/q_e ! factor of the magnetic moment coupling
-    taui_qi    = tau_i/q_i ! factor of the magnetic moment coupling
-    qe_taue    = q_e/tau_e
-    qi_taui    = q_i/tau_i
-    sqrtTaue_qe     = sqrt(tau_e)/q_e   ! factor of parallel moment term
-    sqrtTaui_qi     = sqrt(tau_i)/q_i   ! factor of parallel moment term
-    qe_sigmae_sqrtTaue = q_e/sigma_e/SQRT(tau_e) ! factor of parallel phi term
-    qi_sigmai_sqrtTaui = q_i/sigma_i/SQRT(tau_i)
-    qe2_taue        = (q_e**2)/tau_e ! factor of the gammaD sum
-    qi2_taui        = (q_i**2)/tau_i
-    sigmae2_taue_o2 = sigma_e**2 * tau_e/2._dp ! factor of the Kernel argument
-    sigmai2_taui_o2 = sigma_i**2 * tau_i/2._dp
-    sqrt_sigmae2_taue_o2 = SQRT(sigma_e**2 * tau_e/2._dp) ! to avoid multiple SQRT eval
-    sqrt_sigmai2_taui_o2 = SQRT(sigma_i**2 * tau_i/2._dp)
-    q_o_sqrt_tau_sigma_e = q_e/SQRT(tau_e)/sigma_e ! For psi field terms
-    q_o_sqrt_tau_sigma_i = q_i/SQRT(tau_i)/sigma_i ! For psi field terms
-    sqrt_tau_o_sigma_e   = SQRT(tau_e)/sigma_e     ! For Ampere eq
-    sqrt_tau_o_sigma_i   = SQRT(tau_i)/sigma_i
-    !! We use the ion-ion collision as normalization with definition
-    !   nu_ii = 4 sqrt(pi)/3 T_i^(-3/2) m_i^(-1/2) q^4 n_i0 ln(Lambda)
-    !
-    nu_e            = nu/sigma_e * (tau_e)**(3._dp/2._dp)  ! electron-ion collision frequency (where already multiplied by 0.532)
-    nu_i            = nu ! ion-ion collision frequ.
-    nu_ee           = nu_e ! e-e coll. frequ.
-    nu_ie           = nu_i ! i-e coll. frequ.
-
-    ! Old normalization (MOLI Jorge/Frei)
-    ! nu_e            = 0.532_dp*nu ! electron-ion collision frequency (where already multiplied by 0.532)
-    ! nu_i            = 0.532_dp*nu*sigma_e*tau_e**(-3._dp/2._dp)/SQRT2 ! ion-ion collision frequ.
-    ! nu_ee           = nu_e/SQRT2 ! e-e coll. frequ.
-    ! nu_ie           = 0.532_dp*nu*sigma_e**2 ! i-e coll. frequ.
-
   END SUBROUTINE model_readinputs
 
 
-  SUBROUTINE model_outputinputs(fidres, str)
-    !    Write the input parameters to the results_xx.h5 file
-
-    USE futils, ONLY: attach
-    USE prec_const
+  SUBROUTINE model_outputinputs(fid)
+    ! Write the input parameters to the results_xx.h5 file
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
-
-    INTEGER, INTENT(in) :: fidres
-    CHARACTER(len=256), INTENT(in) :: str
-    CALL attach(fidres, TRIM(str),      "CLOS",    CLOS)
-    CALL attach(fidres, TRIM(str),      "KERN",    KERN)
-    CALL attach(fidres, TRIM(str), "LINEARITY", LINEARITY)
-    CALL attach(fidres, TRIM(str),     "KIN_E",   KIN_E)
-    CALL attach(fidres, TRIM(str),        "nu",      nu)
-    CALL attach(fidres, TRIM(str),        "mu",       0)
-    CALL attach(fidres, TRIM(str),      "mu_x",    mu_x)
-    CALL attach(fidres, TRIM(str),      "mu_y",    mu_y)
-    CALL attach(fidres, TRIM(str),      "mu_z",    mu_z)
-    CALL attach(fidres, TRIM(str),      "mu_p",    mu_p)
-    CALL attach(fidres, TRIM(str),      "mu_j",    mu_j)
-    CALL attach(fidres, TRIM(str),     "tau_e",   tau_e)
-    CALL attach(fidres, TRIM(str),     "tau_i",   tau_i)
-    CALL attach(fidres, TRIM(str),   "sigma_e", sigma_e)
-    CALL attach(fidres, TRIM(str),   "sigma_i", sigma_i)
-    CALL attach(fidres, TRIM(str),       "q_e",     q_e)
-    CALL attach(fidres, TRIM(str),       "q_i",     q_i)
-    CALL attach(fidres, TRIM(str),      "k_Ne",    k_Ne)
-    CALL attach(fidres, TRIM(str),      "k_Ni",    k_Ni)
-    CALL attach(fidres, TRIM(str),      "k_Te",    k_Te)
-    CALL attach(fidres, TRIM(str),      "k_Ti",    k_Ti)
-    CALL attach(fidres, TRIM(str),       "K_E",     K_E)
-    CALL attach(fidres, TRIM(str),   "lambdaD", lambdaD)
-    CALL attach(fidres, TRIM(str),      "beta",    beta)
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/model'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Model Input')
+    CALL attach(fid, TRIM(str),      "KERN",    KERN)
+    CALL attach(fid, TRIM(str), "LINEARITY", LINEARITY)
+    CALL attach(fid, TRIM(str),      "mu_x",    mu_x)
+    CALL attach(fid, TRIM(str),      "mu_y",    mu_y)
+    CALL attach(fid, TRIM(str),      "N_HD",    N_HD)
+    CALL attach(fid, TRIM(str),      "mu_z",    mu_z)
+    CALL attach(fid, TRIM(str),     "HDz_h",   HDz_h)
+    CALL attach(fid, TRIM(str),      "mu_p",    mu_p)
+    CALL attach(fid, TRIM(str),      "mu_j",    mu_j)
+    CALL attach(fid, TRIM(str),     "HYP_V",   HYP_V)
+    CALL attach(fid, TRIM(str),        "Na",      Na)
+    CALL attach(fid, TRIM(str),        "nu",      nu)
+    CALL attach(fid, TRIM(str),      "k_gB",    k_gB)
+    CALL attach(fid, TRIM(str),      "k_cB",    k_cB)
+    CALL attach(fid, TRIM(str),   "lambdaD", lambdaD)
+    CALL attach(fid, TRIM(str),      "beta",    beta)
+    CALL attach(fid, TRIM(str),   "ADIAB_E", ADIAB_E)
+    CALL attach(fid, TRIM(str),     "tau_e",   tau_e)
   END SUBROUTINE model_outputinputs
 
 END MODULE model
diff --git a/src/moments_eq_rhs_mod.F90 b/src/moments_eq_rhs_mod.F90
index f30e107c0049c95c678dcd896ecedf3f474ed26b..e2a25dd4696741cc717436f2a87bcada6f186a9c 100644
--- a/src/moments_eq_rhs_mod.F90
+++ b/src/moments_eq_rhs_mod.F90
@@ -7,326 +7,231 @@ SUBROUTINE compute_moments_eq_rhs
   USE model
   USE array
   USE fields
-  USE grid
+  USE grid,       ONLY: local_na, local_np, local_nj, local_nkx, local_nky, local_nz,&
+                        nzgrid,pp2,ngp,ngj,ngz,&
+                        diff_dz_coeff,diff_kx_coeff,diff_ky_coeff,diff_p_coeff,diff_j_coeff,&
+                        parray,jarray,kxarray, kyarray, kparray
   USE basic
+  USE closure,    ONLY: evolve_mom
   USE prec_const
   USE collision
   USE time_integration
+  ! USE species, ONLY: xpdx
   USE geometry, ONLY: gradz_coeff, dlnBdz, Ckxky!, Gamma_phipar
-  USE calculus, ONLY : interp_z, grad_z, grad_z2
+  USE calculus, ONLY: interp_z, grad_z, grad_z2
+  ! USE species,  ONLY: xpdx
   IMPLICIT NONE
-
-    !compute ion moments_eq_rhs
-    CALL moments_eq_rhs(ips_i,ipe_i,ipgs_i,ipge_i,ijs_i,ije_i,ijgs_i,ijge_i,jarray_i,parray_i,&
-                     xnipj, xnipp2j, xnipm2j, xnipjp1, xnipjm1, xnipp1j, xnipm1j,&
-                     ynipp1j, ynipp1jm1, ynipm1j, ynipm1jm1, &
-                     znipm1j, znipm1jp1, znipm1jm1, &
-                     xphij_i, xphijp1_i, xphijm1_i, xpsij_i, xpsijp1_i, xpsijm1_i,&
-                     kernel_i, nadiab_moments_i, ddz_nipj, interp_nipj, Sipj,&
-                     moments_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel),&
-                     TColl_i, ddzND_Nipj, diff_pi_coeff, diff_ji_coeff,&
-                     moments_rhs_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel))
-
-    !compute ion moments_eq_rhs
-    IF(KIN_E) &
-    CALL moments_eq_rhs(ips_e,ipe_e,ipgs_e,ipge_e,ijs_e,ije_e,ijgs_e,ijge_e,jarray_e,parray_e,&
-                     xnepj, xnepp2j, xnepm2j, xnepjp1, xnepjm1, xnepp1j, xnepm1j,&
-                     ynepp1j, ynepp1jm1, ynepm1j, ynepm1jm1, &
-                     znepm1j, znepm1jp1, znepm1jm1, &
-                     xphij_e, xphijp1_e, xphijm1_e, xpsij_e, xpsijp1_e, xpsijm1_e,&
-                     kernel_e, nadiab_moments_e, ddz_nepj, interp_nepj, Sepj,&
-                     moments_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel),&
-                     TColl_e, ddzND_Nepj, diff_pe_coeff, diff_je_coeff,&
-                     moments_rhs_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel))
-
-  CONTAINS
-  !_____________________________________________________________________________!
-  !_____________________________________________________________________________!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  !!! moments_ RHS computation !!!!!!!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! This routine assemble the RHS of the moment hierarchy equations. It uses
-  ! linear coefficients that are stored in arrays (xn*, yn* and zn*) computed in
-  ! numerics_mod.F90. Otherwise it simply adds the collision term TColl_ that is
-  ! computed in collision_mod.F90 and the nonlinear term Sapj_ computed in
-  ! nonlinear_mod.F90.
-  ! All arguments of the subroutines are inputs only except the last one,
-  ! moments_rhs_ that will contain the sum of every terms in the RHS.
-  !_____________________________________________________________________________!
-  SUBROUTINE moments_eq_rhs(ips_,ipe_,ipgs_,ipge_,ijs_,ije_,ijgs_,ijge_,jarray_,parray_,&
-                   xnapj_, xnapp2j_, xnapm2j_, xnapjp1_, xnapjm1_, xnapp1j_, xnapm1j_,&
-                   ynapp1j_, ynapp1jm1_, ynapm1j_, ynapm1jm1_, &
-                   znapm1j_, znapm1jp1_, znapm1jm1_, &
-                   xphij_, xphijp1_, xphijm1_, xpsij_, xpsijp1_, xpsijm1_,&
-                   kernel_, nadiab_moments_, ddz_napj_, interp_napj_, Sapj_,&
-                   moments_, TColl_, ddzND_napj_, diff_p_coeff_, diff_j_coeff_, moments_rhs_)
-
-    IMPLICIT NONE
-    !! INPUTS
-    INTEGER, INTENT(IN) :: ips_, ipe_, ipgs_, ipge_, ijs_, ije_, ijgs_, ijge_
-    INTEGER,  DIMENSION(ipgs_:ipge_), INTENT(IN) :: parray_
-    INTEGER,  DIMENSION(ijgs_:ijge_), INTENT(IN) :: jarray_
-    REAL(dp), DIMENSION(ips_:ipe_,ijs_:ije_), INTENT(IN) :: xnapj_
-    REAL(dp), DIMENSION(ips_:ipe_),           INTENT(IN) :: xnapp2j_, xnapm2j_
-    REAL(dp), DIMENSION(ijs_:ije_),           INTENT(IN) :: xnapjp1_, xnapjm1_
-    REAL(dp), DIMENSION(ips_:ipe_),           INTENT(IN) :: xnapp1j_, xnapm1j_
-    REAL(dp), DIMENSION(ips_:ipe_,ijs_:ije_), INTENT(IN) :: ynapp1j_, ynapp1jm1_, ynapm1j_, ynapm1jm1_
-    REAL(dp), DIMENSION(ips_:ipe_,ijs_:ije_), INTENT(IN) :: znapm1j_, znapm1jp1_, znapm1jm1_
-    REAL(dp), DIMENSION(ips_:ipe_,ijs_:ije_), INTENT(IN) :: xphij_, xphijp1_, xphijm1_
-    REAL(dp), DIMENSION(ips_:ipe_,ijs_:ije_), INTENT(IN) :: xpsij_, xpsijp1_, xpsijm1_
-
-    REAL(dp), DIMENSION(ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge,0:1),INTENT(IN) :: kernel_
-
-    COMPLEX(dp), DIMENSION(ipgs_:ipge_,ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge),INTENT(IN) :: nadiab_moments_
-    COMPLEX(dp), DIMENSION(ipgs_:ipge_,ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge),INTENT(IN) :: ddz_napj_
-    COMPLEX(dp), DIMENSION(ipgs_:ipge_,ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge),INTENT(IN) :: interp_napj_
-    COMPLEX(dp), DIMENSION( ips_:ipe_,  ijs_:ije_, ikys:ikye,ikxs:ikxe, izs:ize), INTENT(IN) :: Sapj_
-    COMPLEX(dp), DIMENSION(ipgs_:ipge_,ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge),INTENT(IN) :: moments_
-    COMPLEX(dp), DIMENSION( ips_:ipe_,  ijs_:ije_, ikys:ikye,ikxs:ikxe, izs:ize), INTENT(IN) :: TColl_
-    COMPLEX(dp), DIMENSION(ipgs_:ipge_,ijgs_:ijge_,ikys:ikye,ikxs:ikxe,izgs:izge),INTENT(IN) :: ddzND_napj_
-    REAL(dp),    INTENT(IN) :: diff_p_coeff_, diff_j_coeff_
-    !! OUTPUT
-    COMPLEX(dp), DIMENSION( ips_:ipe_,  ijs_:ije_, ikys:ikye,ikxs:ikxe, izs:ize),INTENT(OUT) :: moments_rhs_
-
-    INTEGER     :: p_int, j_int ! loops indices and polynom. degrees
-    REAL(dp)    :: kx, ky, kperp2
-    COMPLEX(dp) :: Tnapj, Tnapp2j, Tnapm2j, Tnapjp1, Tnapjm1 ! Terms from b x gradB and drives
-    COMPLEX(dp) :: Tnapp1j, Tnapm1j, Tnapp1jm1, Tnapm1jm1 ! Terms from mirror force with non adiab moments_
-    COMPLEX(dp) :: Tpar, Tmir
-    COMPLEX(dp) :: Mperp, Mpara, Dphi, Dpsi
-    COMPLEX(dp) :: Unapm1j, Unapm1jp1, Unapm1jm1 ! Terms from mirror force with adiab moments_
-    COMPLEX(dp) :: i_kx,i_ky,phikykxz, psikykxz
-
-     ! Measuring execution time
-    CALL cpu_time(t0_rhs)
-
-    ! Spatial loops
-    zloop : DO  iz = izs,ize
-      kxloop : DO ikx = ikxs,ikxe
-        kx       = kxarray(ikx)   ! radial wavevector
-        i_kx     = imagu * kx     ! radial derivative
-
-        kyloop : DO iky = ikys,ikye
-          ky     = kyarray(iky)   ! binormal wavevector
-          i_ky   = imagu * ky     ! binormal derivative
-          psikykxz = psi(iky,ikx,iz)! tmp psi value
-
-          ! Kinetic loops
-          jloop : DO ij = ijs_, ije_  ! This loop is from 1 to jmaxi+1
-            j_int = jarray_(ij)
-
-            ploop : DO ip = ips_, ipe_  ! Hermite loop
-              p_int = parray_(ip)      ! Hermite degree
-              eo    = MODULO(p_int,2) ! Indicates if we are on odd or even z grid
-              kperp2= kparray(iky,ikx,iz,eo)**2
-
-            IF((CLOS .NE. 1) .OR. (p_int+2*j_int .LE. dmaxe)) THEN ! for the closure scheme
-              !! Compute moments_ mixing terms
-              ! Perpendicular dynamic
-              ! term propto n^{p,j}
-              Tnapj   = xnapj_(ip,ij)* nadiab_moments_(ip,ij,iky,ikx,iz)
-              ! term propto n^{p+2,j}
-              Tnapp2j = xnapp2j_(ip) * nadiab_moments_(ip+pp2,ij,iky,ikx,iz)
-              ! term propto n^{p-2,j}
-              Tnapm2j = xnapm2j_(ip) * nadiab_moments_(ip-pp2,ij,iky,ikx,iz)
-              ! term propto n^{p,j+1}
-              Tnapjp1 = xnapjp1_(ij) * nadiab_moments_(ip,ij+1,iky,ikx,iz)
-              ! term propto n^{p,j-1}
-              Tnapjm1 = xnapjm1_(ij) * nadiab_moments_(ip,ij-1,iky,ikx,iz)
-              ! Perpendicular magnetic term (curvature and gradient drifts)
-              Mperp   = imagu*Ckxky(iky,ikx,iz,eo)*(Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)
-
-              ! Parallel dynamic
-              ! ddz derivative for Landau damping term
-              Tpar      = xnapp1j_(ip) * ddz_napj_(ip+1,ij,iky,ikx,iz) &
-                        + xnapm1j_(ip) * ddz_napj_(ip-1,ij,iky,ikx,iz)
-              ! Mirror terms
-              Tnapp1j   = ynapp1j_  (ip,ij) * interp_napj_(ip+1,ij  ,iky,ikx,iz)
-              Tnapp1jm1 = ynapp1jm1_(ip,ij) * interp_napj_(ip+1,ij-1,iky,ikx,iz)
-              Tnapm1j   = ynapm1j_  (ip,ij) * interp_napj_(ip-1,ij  ,iky,ikx,iz)
-              Tnapm1jm1 = ynapm1jm1_(ip,ij) * interp_napj_(ip-1,ij-1,iky,ikx,iz)
-              ! Trapping terms
-              Unapm1j   = znapm1j_  (ip,ij) * interp_napj_(ip-1,ij  ,iky,ikx,iz)
-              Unapm1jp1 = znapm1jp1_(ip,ij) * interp_napj_(ip-1,ij+1,iky,ikx,iz)
-              Unapm1jm1 = znapm1jm1_(ip,ij) * interp_napj_(ip-1,ij-1,iky,ikx,iz)
-
-              Tmir = dlnBdz(iz,eo)*(Tnapp1j + Tnapp1jm1 + Tnapm1j + Tnapm1jm1 +&
-                                    Unapm1j + Unapm1jp1 + Unapm1jm1)
-              ! Parallel magnetic term (Landau damping and the mirror force)
-              Mpara = gradz_coeff(iz,eo)*(Tpar + Tmir)
-              !! Electrical potential term
-              IF ( p_int .LE. 2 ) THEN ! kronecker p0 p1 p2
-                Dphi =i_ky*( xphij_  (ip,ij)*kernel_(ij  ,iky,ikx,iz,eo) &
-                            +xphijp1_(ip,ij)*kernel_(ij+1,iky,ikx,iz,eo) &
-                            +xphijm1_(ip,ij)*kernel_(ij-1,iky,ikx,iz,eo) )*phi(iky,ikx,iz)
-              ELSE
-                Dphi = 0._dp
-              ENDIF
-
-              !! Vector potential term
-              IF ( (p_int .LE. 3) .AND. (p_int .GE. 1) ) THEN ! Kronecker p1 or p3
-                Dpsi =-i_ky*( xpsij_  (ip,ij)*kernel_(ij  ,iky,ikx,iz,eo) &
-                             +xpsijp1_(ip,ij)*kernel_(ij+1,iky,ikx,iz,eo) &
-                             +xpsijm1_(ip,ij)*kernel_(ij-1,iky,ikx,iz,eo))*psi(iky,ikx,iz)
+  INTEGER     :: ia, iz, iky,  ikx, ip ,ij, eo ! counters
+  INTEGER     :: izi,ipi,iji ! interior points counters
+  INTEGER     :: p_int, j_int ! loops indices and polynom. degrees
+  REAL(xp)    :: kx, ky, kperp2
+  COMPLEX(xp) :: Tnapj, Tnapp2j, Tnapm2j, Tnapjp1, Tnapjm1 ! Terms from b x gradB and drives
+  COMPLEX(xp) :: Tnapp1j, Tnapm1j, Tnapp1jm1, Tnapm1jm1 ! Terms from mirror force with non adiab moments_
+  COMPLEX(xp) :: Ldamp, Fmir
+  COMPLEX(xp) :: Mperp, Mpara, Dphi, Dpsi
+  COMPLEX(xp) :: Unapm1j, Unapm1jp1, Unapm1jm1 ! Terms from mirror force with adiab moments_
+  COMPLEX(xp) :: i_kx,i_ky
+  COMPLEX(xp) :: Napj, RHS, phikykxz, psikykxz
+  ! Spatial loops
+  z:DO  iz = 1,local_nz
+    izi = iz + ngz/2
+    x:DO ikx = 1,local_nkx
+      kx       = kxarray(ikx)                     ! radial wavevector
+      i_kx     = imagu * kx                       ! radial derivative
+      y:DO iky = 1,local_nky
+        ky       = kyarray(iky)                     ! binormal wavevector
+        i_ky     = imagu * ky                       ! binormal derivative
+        phikykxz = phi(iky,ikx,izi)
+        psikykxz = psi(iky,ikx,izi)
+        ! Kinetic loops
+        j:DO ij = 1,local_nj               ! This loop is from 1 to jmaxi+1
+          iji   = ij+ngj/2
+          j_int = jarray(iji)
+          p:DO ip = 1,local_np             ! Hermite loop
+            ipi   = ip+ngp/2
+            p_int = parray(ipi)                   ! Hermite degree
+            eo    = min(nzgrid,MODULO(p_int,2)+1) ! Indicates if we are on odd or even z grid
+            kperp2= kparray(iky,ikx,izi,eo)**2
+            ! Species loop
+            a:DO ia = 1,local_na
+              Napj = moments(ia,ipi,iji,iky,ikx,izi,updatetlevel)
+              RHS = 0._xp
+              IF(evolve_mom(ipi,iji)) THEN ! for the closure scheme
+                !! Compute moments_ mixing terms
+                ! Perpendicular dynamic
+                ! term propto n^{p,j}
+                Tnapj   = xnapj(ia,ip,ij)* nadiab_moments(ia,ipi,    iji,  iky,ikx,izi)
+                ! term propto n^{p+2,j}
+                Tnapp2j = xnapp2j(ia,ip) * nadiab_moments(ia,ipi+pp2,iji,  iky,ikx,izi)
+                ! term propto n^{p-2,j}
+                Tnapm2j = xnapm2j(ia,ip) * nadiab_moments(ia,ipi-pp2,iji,  iky,ikx,izi)
+                ! term propto n^{p,j+1}
+                Tnapjp1 = xnapjp1(ia,ij) * nadiab_moments(ia,ipi,    iji+1,iky,ikx,izi)
+                ! term propto n^{p,j-1}
+                Tnapjm1 = xnapjm1(ia,ij) * nadiab_moments(ia,ipi,    iji-1,iky,ikx,izi)
+                ! Perpendicular magnetic term (curvature and gradient drifts)
+                Mperp   = imagu*Ckxky(iky,ikx,izi,eo)&
+                          *(Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)
+                ! Parallel dynamic
+                ! ddz derivative for Landau damping term
+                Ldamp     = xnapp1j(ia,ip) * ddz_napj(ia,ipi+1,iji,iky,ikx,iz) &
+                          + xnapm1j(ia,ip) * ddz_napj(ia,ipi-1,iji,iky,ikx,iz)
+                ! Mirror terms
+                Tnapp1j   = ynapp1j  (ia,ip,ij) * interp_napj(ia,ipi+1,iji  ,iky,ikx,iz)
+                Tnapp1jm1 = ynapp1jm1(ia,ip,ij) * interp_napj(ia,ipi+1,iji-1,iky,ikx,iz)
+                Tnapm1j   = ynapm1j  (ia,ip,ij) * interp_napj(ia,ipi-1,iji  ,iky,ikx,iz)
+                Tnapm1jm1 = ynapm1jm1(ia,ip,ij) * interp_napj(ia,ipi-1,iji-1,iky,ikx,iz)
+                ! Trapping terms
+                Unapm1j   = znapm1j  (ia,ip,ij) * interp_napj(ia,ipi-1,iji  ,iky,ikx,iz)
+                Unapm1jp1 = znapm1jp1(ia,ip,ij) * interp_napj(ia,ipi-1,iji+1,iky,ikx,iz)
+                Unapm1jm1 = znapm1jm1(ia,ip,ij) * interp_napj(ia,ipi-1,iji-1,iky,ikx,iz)
+                ! sum the parallel forces
+                Fmir = dlnBdz(izi,eo)*(Tnapp1j + Tnapp1jm1 + Tnapm1j + Tnapm1jm1 +&
+                                       Unapm1j + Unapm1jp1 + Unapm1jm1)
+                ! Parallel magnetic term (Landau damping and the mirror force)
+                Mpara = gradz_coeff(izi,eo)*(Ldamp + Fmir)
+                !! Electrical potential term
+                IF ( p_int  .LE. 2 ) THEN ! kronecker p0 p1 p2
+                  Dphi =i_ky*( xphij(ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
+                            +xphijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
+                            +xphijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo) )*phikykxz
+                ELSE
+                  Dphi = 0._xp
+                ENDIF
+                !! Vector potential term
+                IF ( (p_int  .LE. 3) .AND. (p_int  .GE. 1) ) THEN ! Kronecker p1 or p3
+                  Dpsi =-i_ky*( xpsij  (ia,ip,ij)*kernel(ia,iji  ,iky,ikx,izi,eo) &
+                              +xpsijp1(ia,ip,ij)*kernel(ia,iji+1,iky,ikx,izi,eo) &
+                              +xpsijm1(ia,ip,ij)*kernel(ia,iji-1,iky,ikx,izi,eo))*psikykxz
+                ELSE
+                  Dpsi = 0._xp
+                ENDIF
+                !! Sum of all RHS terms
+                RHS = &
+                    ! Nonlinear term Sapj_ = {phi,f}
+                    - Sapj(ia,ip,ij,iky,ikx,iz) &
+                    ! Perpendicular magnetic term
+                    - Mperp &
+                    ! Parallel magnetic term
+                    - Mpara &
+                    ! Drives (density + temperature gradients)
+                    - (Dphi + Dpsi) &
+                    ! Collision term
+                    + Capj(ia,ip,ij,iky,ikx,iz) &
+                    ! Perpendicular pressure effects (electromagnetic term) (TO CHECK)
+                    ! - i_ky*beta*xpdx(ia) * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
+                    ! Parallel drive term (should be negligible, to test)
+                    ! !! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
+                    ! Numerical perpendicular hyperdiffusion
+                    -mu_x*diff_kx_coeff*kx**N_HD*Napj &
+                    -mu_y*diff_ky_coeff*ky**N_HD*Napj &
+                    ! ! ! Numerical parallel hyperdiffusion "mu_z*ddz**4"  see Pueschel 2010 (eq 25)
+                    -mu_z*diff_dz_coeff*ddzND_napj(ia,ipi,iji,iky,ikx,iz)
+                !! Velocity space dissipation (should be implemented somewhere else)
+                SELECT CASE(HYP_V)
+                CASE('hypcoll') ! GX like Hermite hypercollisions see Mandell et al. 2023 (eq 3.23), unadvised to use it
+                  IF (p_int  .GT. 2)  &
+                    RHS = RHS - mu_p*diff_p_coeff*p_int**6*Napj
+                  IF (j_int .GT. 1)  &
+                    RHS = RHS - mu_j*diff_j_coeff*j_int**6*Napj
+                CASE('dvpar4')
+                  ! fourth order numerical diffusion in vpar
+                  IF(p_int  .GE. 4) &
+                  ! Numerical parallel velocity hyperdiffusion "+ dvpar4 g_a" see Pueschel 2010 (eq 33)
+                  ! (not used often so not parallelized)
+                  RHS = RHS + mu_p*dv4_Hp_coeff(p_int)*moments(ia,ipi-4,iji,iky,ikx,izi,updatetlevel)
+                  ! + dummy Laguerre diff
+                  IF (j_int .GE. 2)  &
+                    RHS = RHS - mu_j*diff_j_coeff*j_int**6*Napj
+                CASE DEFAULT
+                END SELECT
               ELSE
-                Dpsi = 0._dp
+                RHS = 0._xp
               ENDIF
-
-              !! Sum of all RHS terms
-              moments_rhs_(ip,ij,iky,ikx,iz) = &
-                  ! Nonlinear term Sapj_ = {phi,f}
-                  - Sapj_(ip,ij,iky,ikx,iz) &
-                  ! Perpendicular magnetic term
-                  - Mperp &
-                  ! Parallel magnetic term
-                  - Mpara &
-                  ! Drives (density + temperature gradients)
-                  - (Dphi + Dpsi) &
-                  ! Collision term
-                  + TColl_(ip,ij,iky,ikx,iz) &
-                  ! Perpendicular pressure effects (electromagnetic term) (TO CHECK)
-                  - i_ky*beta*dpdx * (Tnapj + Tnapp2j + Tnapm2j + Tnapjp1 + Tnapjm1)&
-                  ! Parallel drive term (should be negligible, to test)
-                  ! -Gamma_phipar(iz,eo)*Tphi*ddz_phi(iky,ikx,iz) &
-                  ! Numerical perpendicular hyperdiffusion
-                  -mu_x*diff_kx_coeff*kx**N_HD*moments_(ip,ij,iky,ikx,iz) &
-                  -mu_y*diff_ky_coeff*ky**N_HD*moments_(ip,ij,iky,ikx,iz) &
-                  ! Numerical parallel hyperdiffusion "mu_z*ddz**4"  see Pueschel 2010 (eq 25)
-                  -mu_z*diff_dz_coeff*ddzND_napj_(ip,ij,iky,ikx,iz)
-
-                  !! Velocity space dissipation (should be implemented somewhere else)
-                  SELECT CASE(HYP_V)
-                  CASE('hypcoll') ! GX like Hermite hypercollisions see Mandell et al. 2023 (eq 3.23), unadvised to use it
-                    IF (p_int .GT. 2)  &
-                      moments_rhs_(ip,ij,iky,ikx,iz) = &
-                        moments_rhs_(ip,ij,iky,ikx,iz) - mu_p*diff_pe_coeff*p_int**6*moments_(ip,ij,iky,ikx,iz)
-                    IF (j_int .GT. 1)  &
-                      moments_rhs_(ip,ij,iky,ikx,iz) = &
-                        moments_rhs_(ip,ij,iky,ikx,iz) - mu_j*diff_je_coeff*j_int**6*moments_(ip,ij,iky,ikx,iz)
-                  CASE('dvpar4')
-                    ! fourth order numerical diffusion in vpar
-                    IF(ip-4 .GT. 0) &
-                    ! Numerical parallel velocity hyperdiffusion "+ dvpar4 g_a" see Pueschel 2010 (eq 33)
-                    ! (not used often so not parallelized)
-                    moments_rhs_(ip,ij,iky,ikx,iz) = &
-                      moments_rhs_(ip,ij,iky,ikx,iz) &
-                        + mu_p*dv4_Hp_coeff(p_int)*moments_(ip-4,ij,iky,ikx,iz)
-                    ! + dummy Laguerre diff
-                    IF (j_int .GT. 1)  &
-                      moments_rhs_(ip,ij,iky,ikx,iz) = &
-                        moments_rhs_(ip,ij,iky,ikx,iz) - mu_j*diff_je_coeff*j_int**6*moments_(ip,ij,iky,ikx,iz)
-                  CASE DEFAULT
-                  END SELECT
-            ELSE
-              moments_rhs_(ip,ij,iky,ikx,iz) = 0._dp
-            ENDIF
-            END DO ploop
-          END DO jloop
-        END DO kyloop
-      END DO kxloop
-    END DO zloop
-    ! Execution time end
-    CALL cpu_time(t1_rhs)
-    tc_rhs = tc_rhs + (t1_rhs-t0_rhs)
-
-  END SUBROUTINE moments_eq_rhs
-  !_____________________________________________________________________________!
-  !_____________________________________________________________________________!
+              !! Put RHS in the array
+              moments_rhs(ia,ip,ij,iky,ikx,iz,updatetlevel) = RHS
+            END DO a
+          END DO p
+        END DO j
+      END DO y
+    END DO x
+  END DO z
+
+  !     print*,'sumabs moments i', SUM(ABS(moments(1,:,:,:,:,:,updatetlevel)))
+  !     print*,'moment rhs i 221', moments_rhs(1,1,1,2,2,1,updatetlevel)
+  !     ! print*,'sum real nadiabe', SUM(REAL(nadiab_moments(2,:,:,:,:,:)))
+  !     print*,'sumreal momrhs i', SUM(REAL(moments_rhs(1,:,:,:,:,:,:)))
+  !     print*,'sumimag momrhs i', SUM(IMAG(moments_rhs(1,:,:,:,:,:,:)))
+  !     print*,'sumreal phi     ', SUM(REAL(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+  !     print*,'sumimag phi     ', SUM(IMAG(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+  !     print*,'phi(2,2,1)      ', phi(2,2,1+ngz/2)
+  !     print*,'sumreal ddznipj ', SUM(REAL(ddz_napj(1,:,:,:,:,:)))
+  !     print*,'sumimag ddznipj ', SUM(IMAG(ddz_napj(1,:,:,:,:,:)))
+  !     print*,'        ddznipj  ',(ddz_napj(1,2+ngp/2,2+ngj/2,2,2,1))
+  !     print*,'      ddzNDnipj  ',SUM(REAL(ddzND_Napj(1,:,:,:,:,:)))
+  !     print*,'sumreal Capj    ', SUM(REAL(Capj(1,:,:,:,:,:)))
+  !     print*,'sum phi coeff', SUM(xphij(1,:,:)) + SUM(xphijp1(1,:,:)) + SUM(xphijm1(1,:,:))
+  !     print*,'---'
+  !     IF(updatetlevel .EQ. 4) stop
+  ! stop
 
 END SUBROUTINE compute_moments_eq_rhs
 
-SUBROUTINE add_Maxwellian_background_terms
-  ! This routine is meant to add the terms rising from the magnetic operator,
-  ! i.e. (B x k_gB) Grad, applied on the background Maxwellian distribution
-  ! (x_a + spar^2)(b x k_gB) GradFaM
-  ! It gives birth to kx=ky=0 sources terms (averages) that hit moments_ 00, 20,
-  ! 40, 01,02, 21 with background gradient dependences.
-  USE prec_const
-  USE time_integration, ONLY : updatetlevel
-  USE model,      ONLY: taue_qe, taui_qi, k_Ni, k_Ne, k_Ti, k_Te, KIN_E
-  USE array,      ONLY: moments_rhs_e, moments_rhs_i
-  USE grid,       ONLY: contains_kx0, contains_ky0, ikx_0, iky_0,&
-                        ips_e,ipe_e,ijs_e,ije_e,ips_i,ipe_i,ijs_i,ije_i,&
-                        zarray, izs,ize,&
-                        ip,ij
-  IMPLICIT NONE
-  real(dp), DIMENSION(izs:ize) :: sinz
-
-  sinz(izs:ize) = SIN(zarray(izs:ize,0))
-
-  IF(contains_kx0 .AND. contains_ky0) THEN
-    IF(KIN_E) THEN
-      DO ip = ips_e,ipe_e
-        DO ij = ijs_e,ije_e
-          SELECT CASE(ij-1)
-          CASE(0) ! j = 0
-            SELECT CASE (ip-1)
-            CASE(0) ! Na00 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  +taue_qe * sinz(izs:ize) * (1.5_dp*k_Ne - 1.125_dp*k_Te)
-            CASE(2) ! Na20 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  +taue_qe * sinz(izs:ize) * (SQRT2*0.5_dp*k_Ne - 2.75_dp*k_Te)
-            CASE(4) ! Na40 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  +taue_qe * sinz(izs:ize) * SQRT6*0.75_dp*k_Te
-            END SELECT
-          CASE(1) ! j = 1
-            SELECT CASE (ip-1)
-            CASE(0) ! Na01 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  -taue_qe * sinz(izs:ize) * (k_Ne + 3.5_dp*k_Te)
-            CASE(2) ! Na21 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  -taue_qe * sinz(izs:ize) * SQRT2*k_Te
-            END SELECT
-          CASE(2) ! j = 2
-            SELECT CASE (ip-1)
-            CASE(0) ! Na02 term
-                moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_e(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                  +taue_qe * sinz(izs:ize) * 2._dp*k_Te
-            END SELECT
-          END SELECT
-        ENDDO
-      ENDDO
-    ENDIF
-
-    DO ip = ips_i,ipe_i
-      DO ij = ijs_i,ije_i
-        SELECT CASE(ij-1)
-        CASE(0) ! j = 0
-          SELECT CASE (ip-1)
-          CASE(0) ! Na00 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                +taui_qi * sinz(izs:ize) * (1.5_dp*k_Ni - 1.125_dp*k_Ti)
-          CASE(2) ! Na20 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                +taui_qi * sinz(izs:ize) * (SQRT2*0.5_dp*k_Ni - 2.75_dp*k_Ti)
-          CASE(4) ! Na40 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                +taui_qi * sinz(izs:ize) * SQRT6*0.75_dp*k_Ti
-          END SELECT
-        CASE(1) ! j = 1
-          SELECT CASE (ip-1)
-          CASE(0) ! Na01 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                -taui_qi * sinz(izs:ize) * (k_Ni + 3.5_dp*k_Ti)
-          CASE(2) ! Na21 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                -taui_qi * sinz(izs:ize) * SQRT2*k_Ti
-          END SELECT
-        CASE(2) ! j = 2
-          SELECT CASE (ip-1)
-          CASE(0) ! Na02 term
-              moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel) = moments_rhs_i(ip,ij,iky_0,ikx_0,izs:ize,updatetlevel)&
-                +taui_qi * sinz(izs:ize) * 2._dp*k_Ti
-          END SELECT
-        END SELECT
-      ENDDO
-    ENDDO
-
-  ENDIF
-
-END SUBROUTINE
+! SUBROUTINE add_Maxwellian_background_terms
+!   ! This routine is meant to add the terms rising from the magnetic operator,
+!   ! i.e. (B x k_gB) Grad, applied on the background Maxwellian distribution
+!   ! (x_a + spar^2)(b x k_gB) GradFaM
+!   ! It gives birth to kx=ky=0 sources terms (averages) that hit moments_ 00, 20,
+!   ! 40, 01,02, 21 with background gradient dependences.
+!   USE prec_const
+!   USE time_integration, ONLY : updatetlevel
+!   USE species,          ONLY: tau_q, k_N, k_T
+!   USE array,            ONLY: moments_rhs
+!   USE grid,             ONLY: contains_kx0, contains_ky0, ikx0, iky0,&
+!                               ia,ias,iae,ip,ips,ipe, ij,ijs,ije, zarray,izs,ize
+!   IMPLICIT NONE
+!   real(xp), DIMENSION(izs:ize) :: sinz
+!
+!   sinz(izs:ize) = SIN(zarray(izs:ize,0))
+!
+!   IF(contains_kx0 .AND. contains_ky0) THEN
+!     DO ia = ias, iae
+!       DO ip = ips,ipe
+!         DO ij = ijs,ije
+!           SELECT CASE(ij-1)
+!           CASE(0) ! j = 0
+!             SELECT CASE (ip-1)
+!             CASE(0) ! Na00 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   +tau_q(ia) * sinz(izs:ize) * (1.5_xp*k_N(ia) - 1.125_xp*k_T(ia))
+!             CASE(2) ! Na20 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   +tau_q(ia) * sinz(izs:ize) * (SQRT2*0.5_xp*k_N(ia) - 2.75_xp*k_T(ia))
+!             CASE(4) ! Na40 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   +tau_q(ia) * sinz(izs:ize) * SQRT6*0.75_xp*k_T(ia)
+!             END SELECT
+!           CASE(1) ! j = 1
+!             SELECT CASE (ip-1)
+!             CASE(0) ! Na01 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   -tau_q(ia) * sinz(izs:ize) * (k_N(ia) + 3.5_xp*k_T(ia))
+!             CASE(2) ! Na21 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   -tau_q(ia) * sinz(izs:ize) * SQRT2*k_T(ia)
+!             END SELECT
+!           CASE(2) ! j = 2
+!             SELECT CASE (ip-1)
+!             CASE(0) ! Na02 term
+!                 moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel) = moments_rhs(ia,ip,ij,iky0,ikx0,izs:ize,updatetlevel)&
+!                   +tau_q(ia) * sinz(izs:ize) * 2._xp*k_T(ia)
+!             END SELECT
+!           END SELECT
+!         ENDDO
+!       ENDDO
+!     ENDDO
+!   ENDIF
+!
+! END SUBROUTINE
 
 END MODULE moments_eq_rhs
diff --git a/src/nonlinear_mod.F90 b/src/nonlinear_mod.F90
index 1e0a8bca607c50440461a4ec303d66d1494703b9..2e2791e45c3b02ef3acdd11d7f0b88da87c27d2a 100644
--- a/src/nonlinear_mod.F90
+++ b/src/nonlinear_mod.F90
@@ -1,462 +1,136 @@
 MODULE nonlinear
-  USE array, ONLY : dnjs, Sepj, Sipj, kernel_i, kernel_e,&
-                    moments_e_ZF, moments_i_ZF, phi_ZF
+  USE array,       ONLY : dnjs, Sapj, kernel
   USE initial_par, ONLY : ACT_ON_MODES
-  USE basic
-  USE fourier
-  USE fields, ONLY : phi, psi, moments_e, moments_i
-  USE grid
-  USE model
-  USE prec_const
+  USE fourier,     ONLY : bracket_sum_r, bracket_sum_c, planf, planb, poisson_bracket_and_sum
+  USE fields,      ONLY : phi, psi, moments
+  USE grid,        ONLY: local_na, &
+                         local_np,ngp,parray,pmax,&
+                         local_nj,ngj,jarray,jmax, local_nj_offset, dmax,&
+                         kyarray, AA_y, local_nky_ptr, local_nky_ptr_offset,inv_Ny,&
+                         local_nkx_ptr,kxarray, AA_x, inv_Nx,&
+                         local_nz,ngz,zarray,nzgrid
+  USE model,       ONLY : LINEARITY, EM
+  USE closure,     ONLY : evolve_mom, nmaxarray
+  USE prec_const,  ONLY : xp
+  USE species,     ONLY : sqrt_tau_o_sigma
   USE time_integration, ONLY : updatetlevel
+  use, intrinsic :: iso_c_binding
+
   IMPLICIT NONE
-  INCLUDE 'fftw3-mpi.f03'
 
-  COMPLEX(dp), DIMENSION(:,:), ALLOCATABLE :: F_cmpx, G_cmpx
-  COMPLEX(dp), DIMENSION(:,:), ALLOCATABLE :: Fx_cmpx, Gy_cmpx
-  COMPLEX(dp), DIMENSION(:,:), ALLOCATABLE :: Fy_cmpx, Gx_cmpx, F_conv_G
+  INCLUDE 'fftw3-mpi.f03'
 
-  INTEGER :: in, is, p_int, j_int
-  INTEGER :: nmax, smax ! Upper bound of the sums
-  REAL(dp):: kx, ky, kerneln, sqrt_p, sqrt_pp1
-  PUBLIC :: compute_Sapj, nonlinear_init
+  COMPLEX(xp), DIMENSION(:,:), ALLOCATABLE :: F_cmpx, G_cmpx
+  COMPLEX(xp), DIMENSION(:,:), ALLOCATABLE :: Fx_cmpx, Gy_cmpx
+  COMPLEX(xp), DIMENSION(:,:), ALLOCATABLE :: Fy_cmpx, Gx_cmpx, F_conv_G
+  INTEGER :: in, is, p_int, j_int, n_int
+  INTEGER :: smax
+  REAL(xp):: sqrt_p, sqrt_pp1
+  PUBLIC  :: compute_Sapj, nonlinear_init
 
 CONTAINS
 
 SUBROUTINE nonlinear_init
-  ALLOCATE( F_cmpx(ikys:ikye,ikxs:ikxe))
-  ALLOCATE( G_cmpx(ikys:ikye,ikxs:ikxe))
+  IMPLICIT NONE
+  ALLOCATE( F_cmpx(local_nky_ptr,local_nkx_ptr))
+  ALLOCATE( G_cmpx(local_nky_ptr,local_nkx_ptr))
 
-  ALLOCATE(Fx_cmpx(ikys:ikye,ikxs:ikxe))
-  ALLOCATE(Gy_cmpx(ikys:ikye,ikxs:ikxe))
-  ALLOCATE(Fy_cmpx(ikys:ikye,ikxs:ikxe))
-  ALLOCATE(Gx_cmpx(ikys:ikye,ikxs:ikxe))
+  ALLOCATE(Fx_cmpx(local_nky_ptr,local_nkx_ptr))
+  ALLOCATE(Gy_cmpx(local_nky_ptr,local_nkx_ptr))
+  ALLOCATE(Fy_cmpx(local_nky_ptr,local_nkx_ptr))
+  ALLOCATE(Gx_cmpx(local_nky_ptr,local_nkx_ptr))
 
-  ALLOCATE(F_conv_G(ikys:ikye,ikxs:ikxe))
+  ALLOCATE(F_conv_G(local_nky_ptr,local_nkx_ptr))
 END SUBROUTINE nonlinear_init
 
 SUBROUTINE compute_Sapj
+  IMPLICIT NONE
   ! This routine is meant to compute the non linear term for each specie and degree
   !! In real space Sapj ~ b*(grad(phi) x grad(g)) which in moments in fourier becomes
   !! Sapj = Sum_n (ikx Kn phi)#(iky Sum_s d_njs Naps) - (iky Kn phi)#(ikx Sum_s d_njs Naps)
   !! where # denotes the convolution.
-
-  ! Execution time start
-  CALL cpu_time(t0_Sapj)
-
   SELECT CASE(LINEARITY)
     CASE ('nonlinear')
       CALL compute_nonlinear
-    CASE ('ZF_semilin')
-      CALL compute_semi_linear_ZF
-    CASE ('NZ_semilin')
-      CALL compute_semi_linear_NZ
     CASE ('linear')
-      Sepj = 0._dp; Sipj = 0._dp
+      Sapj = 0._xp
     CASE DEFAULT
       ERROR STOP '>> ERROR << Linearity not recognized '
   END SELECT
-
-  ! Execution time END
-  CALL cpu_time(t1_Sapj)
-  tc_Sapj = tc_Sapj + (t1_Sapj - t0_Sapj)
-
 END SUBROUTINE compute_Sapj
 
+! Compute the poisson bracket {F,G}
 SUBROUTINE compute_nonlinear
   IMPLICIT NONE
-  !!!!!!!!!!!!!!!!!!!! ELECTRON non linear term computation (Sepj)!!!!!!!!!!
-  IF(KIN_E) THEN
-  zloope: DO iz = izs,ize
-
-    ploope: DO ip = ips_e,ipe_e ! Loop over Hermite moments
-      eo     = MODULO(parray_e(ip),2)
-      p_int  = parray_e(ip)
-      sqrt_p = SQRT(REAL(p_int,dp)); sqrt_pp1 = SQRT(REAL(p_int,dp)+1._dp);
-
-      jloope: DO ij = ijs_e, ije_e ! Loop over Laguerre moments
-        j_int=jarray_e(ij)
-        IF((CLOS .NE. 1) .OR. (p_int+2*j_int .LE. dmaxe)) THEN !compute
-          ! Set non linear sum truncation
-          IF (NL_CLOS .EQ. -2) THEN
-            nmax = Jmaxe
-          ELSEIF (NL_CLOS .EQ. -1) THEN
-            nmax = Jmaxe-j_int
-          ELSE
-            nmax = min(NL_CLOS,Jmaxe-j_int)
-          ENDIF
-          bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-
-          nloope: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-!-----------!! ELECTROSTATIC CONTRIBUTION {Kernel phi, Sum_s dnjs Neps}
-            ! First convolution terms
-            F_cmpx(ikys:ikye,ikxs:ikxe) = phi(ikys:ikye,ikxs:ikxe,iz) * kernel_e(in, ikys:ikye,ikxs:ikxe, iz, eo)
-            ! Second convolution terms
-            G_cmpx(ikys:ikye,ikxs:ikxe) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), Jmaxe );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              G_cmpx(ikys:ikye,ikxs:ikxe)  = G_cmpx(ikys:ikye,ikxs:ikxe) + &
-                dnjs(in,ij,is) * moments_e(ip,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel)
-            ENDDO
-            ! this function add its result to bracket_sum_r
-            CALL poisson_bracket_and_sum(F_cmpx,G_cmpx,bracket_sum_r)
-
-!-----------!! ELECTROMAGNETIC CONTRIBUTION -sqrt(tau)/sigma*{Sum_s dnjs [sqrt(p+1)Nap+1s + sqrt(p)Nap-1s], Kernel psi}
-            IF(EM) THEN
-            ! First convolution terms
-            F_cmpx(ikys:ikye,ikxs:ikxe) = -sqrt_tau_o_sigma_e * psi(ikys:ikye,ikxs:ikxe,iz) * kernel_e(in, ikys:ikye,ikxs:ikxe, iz, eo)
-            ! Second convolution terms
-            G_cmpx(ikys:ikye,ikxs:ikxe) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), Jmaxe );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              G_cmpx(ikys:ikye,ikxs:ikxe)  = G_cmpx(ikys:ikye,ikxs:ikxe) + &
-                dnjs(in,ij,is) * (sqrt_pp1*moments_e(ip+1,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel)&
-                                 +sqrt_p  *moments_e(ip-1,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel))
-            ENDDO
-            ! this function add its result to bracket_sum_r
-            CALL poisson_bracket_and_sum(F_cmpx,G_cmpx,bracket_sum_r)
-            ENDIF
-          ENDDO nloope
-
-!---------! Put back the real nonlinear product into k-space
-          call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-          ! Retrieve convolution in input format
-          DO iky = ikys, ikye
-            Sepj(ip,ij,iky,ikxs:ikxe,iz) = bracket_sum_c(ikxs:ikxe,iky-local_nky_offset)*AA_x(ikxs:ikxe)*AA_y(iky) !Anti aliasing filter
-          ENDDO
-        ELSE
-          Sepj(ip,ij,:,:,iz) = 0._dp
-        ENDIF
-      ENDDO jloope
-    ENDDO ploope
-  ENDDO zloope
-ENDIF
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-  !!!!!!!!!!!!!!!!!!!! ION non linear term computation (Sipj)!!!!!!!!!!
-  zloopi: DO iz = izs,ize
-    ploopi: DO ip = ips_i,ipe_i ! Loop over Hermite moments
-      p_int = parray_i(ip)
-      eo = MODULO(parray_i(ip),2)
-      jloopi: DO ij = ijs_i, ije_i ! Loop over Laguerre moments
-        j_int=jarray_i(ij)
-        IF((CLOS .NE. 1) .OR. (p_int+2*j_int .LE. dmaxi)) THEN !compute for every moments except for closure 1
-          ! Set non linear sum truncation
-          IF (NL_CLOS .EQ. -2) THEN
-            nmax = Jmaxi
-          ELSEIF (NL_CLOS .EQ. -1) THEN
-            nmax = Jmaxi-j_int
-          ELSE
-            nmax = min(NL_CLOS,Jmaxi-j_int)
-          ENDIF
-          bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-          nloopi: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-!-----------!! ELECTROSTATIC CONTRIBUTION
-            ! First convolution terms
-            F_cmpx(ikys:ikye,ikxs:ikxe) = phi(ikys:ikye,ikxs:ikxe,iz) * kernel_i(in, ikys:ikye,ikxs:ikxe, iz, eo)
-            ! Second convolution terms
-            G_cmpx(ikys:ikye,ikxs:ikxe) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), jmaxi );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              G_cmpx(ikys:ikye,ikxs:ikxe) = G_cmpx(ikys:ikye,ikxs:ikxe) + &
-                dnjs(in,ij,is) * moments_i(ip,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel)
-            ENDDO
-            ! this function add its result to bracket_sum_r
-            CALL poisson_bracket_and_sum(F_cmpx,G_cmpx,bracket_sum_r)
-!-----------!! ELECTROMAGNETIC CONTRIBUTION -sqrt(tau)/sigma*{Sum_s dnjs [sqrt(p+1)Nap+1s + sqrt(p)Nap-1s], Kernel psi}
-            IF(EM) THEN
-            ! First convolution terms
-            F_cmpx(ikys:ikye,ikxs:ikxe) = -sqrt_tau_o_sigma_i * psi(ikys:ikye,ikxs:ikxe,iz) * kernel_i(in, ikys:ikye,ikxs:ikxe, iz, eo)
-            ! Second convolution terms
-            G_cmpx(ikys:ikye,ikxs:ikxe) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), Jmaxi );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              G_cmpx(ikys:ikye,ikxs:ikxe)  = G_cmpx(ikys:ikye,ikxs:ikxe) + &
-                dnjs(in,ij,is) * (sqrt_pp1*moments_i(ip+1,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel)&
-                                 +sqrt_p  *moments_i(ip-1,is,ikys:ikye,ikxs:ikxe,iz,updatetlevel))
-            ENDDO
-            ! this function add its result to bracket_sum_r
-            CALL poisson_bracket_and_sum(F_cmpx,G_cmpx,bracket_sum_r)
-            ENDIF
-          ENDDO nloopi
-          ! Put the real nonlinear product into k-space
-          call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-          ! Retrieve convolution in input format
-          DO iky = ikys, ikye
-            Sipj(ip,ij,iky,ikxs:ikxe,iz) = bracket_sum_c(ikxs:ikxe,iky-local_nky_offset)*AA_x(ikxs:ikxe)*AA_y(iky)
-          ENDDO
-        ELSE
-          Sipj(ip,ij,:,:,iz) = 0._dp
-        ENDIF
-      ENDDO jloopi
-    ENDDO ploopi
-  ENDDO zloopi
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-END SUBROUTINE compute_nonlinear
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-! Semi linear computation : Only NZ-ZF convolutions are kept
-SUBROUTINE compute_semi_linear_ZF
-  IMPLICIT NONE
-  !!!!!!!!!!!!!!!!!!!! ELECTRON semi linear term computation (Sepj)!!!!!!!!!!
-  IF(KIN_E) THEN
-  zloope: DO iz = izs,ize
-  ploope: DO ip = ips_e,ipe_e ! Loop over Hermite moments
-    eo = MODULO(parray_e(ip),2)
-    jloope: DO ij = ijs_e, ije_e ! Loop over Laguerre moments
-      j_int=jarray_e(ij)
-      ! Set non linear sum truncation
-      IF (NL_CLOS .EQ. -2) THEN
-        nmax = Jmaxe
-      ELSEIF (NL_CLOS .EQ. -1) THEN
-        nmax = Jmaxe-(ij-1)
-      ELSE
-        nmax = NL_CLOS
-      ENDIF
-      bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-      nloope: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-        ! Build the terms to convolve
-        kxloope: DO ikx = ikxs,ikxe ! Loop over kx
-          kyloope: DO iky = ikys,ikye ! Loop over ky
-            kx      = kxarray(ikx)
-            ky      = kyarray(iky)
-            kerneln = kernel_e(in, ikx, iky, iz, eo)
-            ! Zonal terms (=0 for all ky not 0)
-            Fx_cmpx(iky,ikx) = 0._dp
-            Gx_cmpx(iky,ikx) = 0._dp
-            IF(iky .EQ. iky_0) THEN
-              Fx_cmpx(iky,ikx) = imagu*kx* phi(iky,ikx,iz) * kerneln
-              smax = MIN( (in-1)+(ij-1), jmaxe );
+  INTEGER :: iz,ij,ip,eo,ia,ikx,iky,izi,ipi,iji,ini,isi
+  DO iz = 1,local_nz
+    izi = iz + ngz/2
+    DO ij = 1,local_nj ! Loop over Laguerre moments
+      iji = ij + ngj/2
+      j_int=jarray(iji)
+      DO ip = 1,local_np ! Loop over Hermite moments
+        ipi = ip + ngp/2
+        IF(evolve_mom(ipi,iji)) THEN !compute for every moments except for closure 1
+        p_int    = parray(ipi)
+        sqrt_p   = SQRT(REAL(p_int,xp))
+        sqrt_pp1 = SQRT(REAL(p_int,xp) + 1._xp)
+        eo       = min(nzgrid,MODULO(parray(ip),2)+1)
+        DO ia = 1,local_na
+            ! Set non linear sum truncation
+            bracket_sum_r = 0._xp ! initialize sum over real nonlinear term
+            DO in = 1,nmaxarray(ij)+1 ! Loop over laguerre for the sum
+              ini = in+ngj/2
+  !-----------!! ELECTROSTATIC CONTRIBUTION
+              ! First convolution terms
+              F_cmpx(:,:) = phi(:,:,izi) * kernel(ia,ini,:,:,izi,eo)
+              ! Second convolution terms
+              G_cmpx = 0._xp ! initialization of the sum
+              smax   = MIN( jarray(ini)+jarray(iji), jmax );
               DO is = 1, smax+1 ! sum truncation on number of moments
-                Gx_cmpx(iky,ikx) = Gx_cmpx(iky,ikx) + &
-                  dnjs(in,ij,is) * moments_e(ip,is,iky,ikx,iz,updatetlevel)
+                isi = is + ngj/2
+                G_cmpx(:,:) = G_cmpx(:,:) + &
+                  dnjs(in,ij,is) * moments(ia,ipi,isi,:,:,izi,updatetlevel)
               ENDDO
-              Gx_cmpx(iky,ikx) = imagu*kx*Gx_cmpx(iky,ikx)
-            ENDIF
-            ! NZ terms
-            Fy_cmpx(iky,ikx) = imagu*ky* phi(iky,ikx,iz) * kerneln
-            Gy_cmpx(iky,ikx) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), jmaxe );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              Gy_cmpx(iky,ikx) = Gy_cmpx(iky,ikx) + &
-                dnjs(in,ij,is) * moments_e(ip,is,iky,ikx,iz,updatetlevel)
+              ! this function add its result to bracket_sum_r
+              CALL poisson_bracket_and_sum(kyarray,kxarray,inv_Ny,inv_Nx,AA_y,AA_x,local_nky_ptr,local_nkx_ptr,F_cmpx,G_cmpx,bracket_sum_r)
+  !-----------!! ELECTROMAGNETIC CONTRIBUTION -sqrt(tau)/sigma*{Sum_s dnjs [sqrt(p+1)Nap+1s + sqrt(p)Nap-1s], Kernel psi}
+              IF(EM) THEN
+                ! First convolution terms
+                F_cmpx(:,:) = -sqrt_tau_o_sigma(ia) * psi(:,:,izi) * kernel(ia,ini,:,:,izi,eo)
+                ! Second convolution terms
+                G_cmpx = 0._xp ! initialization of the sum
+                DO is = 1, smax+1 ! sum truncation on number of moments
+                  isi = is + ngj/2
+                  G_cmpx(:,:)  = G_cmpx(:,:) + &
+                    dnjs(in,ij,is) * (sqrt_pp1*moments(ia,ipi+1,isi,:,:,izi,updatetlevel)&
+                                    +sqrt_p  *moments(ia,ipi-1,isi,:,:,izi,updatetlevel))
+                ENDDO
+                ! this function add its result to bracket_sum_r
+                CALL poisson_bracket_and_sum(kyarray,kxarray,inv_Ny,inv_Nx,AA_y,AA_x,local_nky_ptr,local_nkx_ptr,F_cmpx,G_cmpx,bracket_sum_r)
+              ENDIF
             ENDDO
-            Gy_cmpx(iky,ikx) = imagu*ky*Gy_cmpx(iky,ikx)
-          ENDDO kyloope
-        ENDDO kxloope
-        ! First term df/dx x dg/dy
-        CALL convolve_and_add(Fx_cmpx,Gy_cmpx)
-        ! Second term -df/dy x dg/dx
-        CALL convolve_and_add(-Fy_cmpx,Gx_cmpx)
-      ENDDO nloope
-      ! Put the real nonlinear product into k-space
-      call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-      ! Retrieve convolution in input format
-      DO ikx = ikxs, ikxe
-        DO iky = ikys, ikye
-          Sepj(ip,ij,iky,ikx,iz) = bracket_sum_c(ikx,iky-local_nky_offset)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-        ENDDO
-      ENDDO
-    ENDDO jloope
-  ENDDO ploope
-ENDDO zloope
-ENDIF
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-  !!!!!!!!!!!!!!!!!!!! ION non linear term computation (Sipj)!!!!!!!!!!
-zloopi: DO iz = izs,ize
-  ploopi: DO ip = ips_i,ipe_i ! Loop over Hermite moments
-    eo = MODULO(parray_i(ip),2)
-    jloopi: DO ij = ijs_i, ije_i ! Loop over Laguerre moments
-      j_int=jarray_i(ij)
-      ! Set non linear sum truncation
-      IF (NL_CLOS .EQ. -2) THEN
-        nmax = Jmaxi
-      ELSEIF (NL_CLOS .EQ. -1) THEN
-        nmax = Jmaxi-(ij-1)
-      ELSE
-        nmax = NL_CLOS
-      ENDIF
-      bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-      nloopi: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-        kxloopi: DO ikx = ikxs,ikxe ! Loop over kx
-          kyloopi: DO iky = ikys,ikye ! Loop over ky
-            ! Zonal terms (=0 for all ky not 0)
-            Fx_cmpx(iky,ikx) = 0._dp
-            Gx_cmpx(iky,ikx) = 0._dp
-            IF(iky .EQ. iky_0) THEN
-              Fx_cmpx(iky,ikx) = imagu*kx* phi(iky,ikx,iz) * kerneln
-              smax = MIN( (in-1)+(ij-1), jmaxi );
-              DO is = 1, smax+1 ! sum truncation on number of moments
-                Gx_cmpx(iky,ikx) = Gx_cmpx(iky,ikx) + &
-                  dnjs(in,ij,is) * moments_i(ip,is,iky,ikx,iz,updatetlevel)
+            ! Put the real nonlinear product back into k-space
+#ifdef SINGLE_PRECISION
+            call fftwf_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
+#else
+            call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
+#endif
+            ! Retrieve convolution in input format and apply anti aliasing
+            DO ikx = 1,local_nkx_ptr
+              DO iky = 1,local_nky_ptr
+                Sapj(ia,ip,ij,iky,ikx,iz) = bracket_sum_c(ikx,iky)*AA_x(ikx)*AA_y(iky)
               ENDDO
-              Gx_cmpx(iky,ikx) = imagu*kx*Gx_cmpx(iky,ikx)
-            ENDIF
-
-            ! NZ terms
-            Fy_cmpx(iky,ikx) = imagu*ky* phi(iky,ikx,iz) * kerneln
-            Gy_cmpx(iky,ikx) = 0._dp ! initialization of the sum
-            smax = MIN( (in-1)+(ij-1), jmaxi );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              Gy_cmpx(iky,ikx) = Gy_cmpx(iky,ikx) + &
-                dnjs(in,ij,is) * moments_i(ip,is,iky,ikx,iz,updatetlevel)
             ENDDO
-            Gy_cmpx(iky,ikx) = imagu*ky*Gy_cmpx(iky,ikx)
-          ENDDO kyloopi
-        ENDDO kxloopi
-        ! First term drphi x dzf
-        CALL convolve_and_add(Fy_cmpx,Gx_cmpx)
-        ! Second term -dzphi x drf
-        CALL convolve_and_add(Fy_cmpx,Gx_cmpx)
-      ENDDO nloopi
-      ! Put the real nonlinear product into k-space
-      call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-      ! Retrieve convolution in input format
-      DO ikx = ikxs, ikxe
-        DO iky = ikys, ikye
-          Sipj(ip,ij,iky,ikx,iz) = bracket_sum_c(ikx,iky-local_nky_offset)*AA_x(ikx)*AA_y(iky)
-        ENDDO
-      ENDDO
-    ENDDO jloopi
-  ENDDO ploopi
-ENDDO zloopi
-END SUBROUTINE compute_semi_linear_ZF
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-! Semi linear computation : Only kx=0*all convolutions are kept
-SUBROUTINE compute_semi_linear_NZ
-  IMPLICIT NONE
-  !!!!!!!!!!!!!!!!!!!! ELECTRON semi linear term computation (Sepj)!!!!!!!!!!
-  IF(KIN_E) THEN
-  zloope: DO iz = izs,ize
-  ploope: DO ip = ips_e,ipe_e ! Loop over Hermite moments
-    eo = MODULO(parray_e(ip),2)
-    jloope: DO ij = ijs_e, ije_e ! Loop over Laguerre moments
-      j_int=jarray_e(ij)
-      ! Set non linear sum truncation
-      IF (NL_CLOS .EQ. -2) THEN
-        nmax = Jmaxe
-      ELSEIF (NL_CLOS .EQ. -1) THEN
-        nmax = Jmaxe-(ij-1)
-      ELSE
-        nmax = NL_CLOS
-      ENDIF
-      bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-      nloope: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-        ! Build the terms to convolve
-        kxloope: DO ikx = ikxs,ikxe ! Loop over kx
-          kyloope: DO iky = ikys,ikye ! Loop over ky
-            kx      = kxarray(ikx)
-            ky      = kyarray(iky)
-            kerneln = kernel_e(in, ikx, iky, iz, eo)
-            ! All terms
-            Fx_cmpx(iky,ikx) = imagu*kx* phi(iky,ikx,iz) * kerneln
-            smax = MIN( (in-1)+(ij-1), jmaxe );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              Gx_cmpx(iky,ikx) = Gx_cmpx(iky,ikx) + &
-                dnjs(in,ij,is) * moments_e(ip,is,iky,ikx,iz,updatetlevel)
             ENDDO
-            Gx_cmpx(iky,ikx) = imagu*kx*Gx_cmpx(iky,ikx)
-            ! Kx = 0 terms
-            Fy_cmpx(iky,ikx) = 0._dp
-            Gy_cmpx(iky,ikx) = 0._dp
-            IF (ikx .EQ. ikx_0) THEN
-              Fy_cmpx(iky,ikx) = imagu*ky* phi(iky,ikx,iz) * kerneln
-              Gy_cmpx(iky,ikx) = 0._dp ! initialization of the sum
-              smax = MIN( (in-1)+(ij-1), jmaxe );
-              DO is = 1, smax+1 ! sum truncation on number of moments
-                Gy_cmpx(iky,ikx) = Gy_cmpx(iky,ikx) + &
-                  dnjs(in,ij,is) * moments_e(ip,is,iky,ikx,iz,updatetlevel)
-              ENDDO
-              Gy_cmpx(iky,ikx) = imagu*ky*Gy_cmpx(iky,ikx)
-            ENDIF
-          ENDDO kyloope
-        ENDDO kxloope
-        ! First term df/dx x dg/dy
-        CALL convolve_and_add(Fx_cmpx,Gy_cmpx)
-        ! Second term -df/dy x dg/dx
-        CALL convolve_and_add(-Fy_cmpx,Gx_cmpx)
-      ENDDO nloope
-      ! Put the real nonlinear product into k-space
-      call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-      ! Retrieve convolution in input format
-      DO ikx = ikxs, ikxe
-        DO iky = ikys, ikye
-          Sepj(ip,ij,iky,ikx,iz) = bracket_sum_c(ikx,iky-local_nky_offset)*AA_x(ikx)*AA_y(iky) !Anti aliasing filter
-        ENDDO
+          ELSE
+            Sapj(:,ip,ij,:,:,iz) = 0._xp
+          ENDIF
       ENDDO
-    ENDDO jloope
-  ENDDO ploope
-ENDDO zloope
-ENDIF
+    ENDDO
+  ENDDO
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-  !!!!!!!!!!!!!!!!!!!! ION non linear term computation (Sipj)!!!!!!!!!!
-zloopi: DO iz = izs,ize
-  ploopi: DO ip = ips_i,ipe_i ! Loop over Hermite moments
-    eo = MODULO(parray_i(ip),2)
-    jloopi: DO ij = ijs_i, ije_i ! Loop over Laguerre moments
-      j_int=jarray_i(ij)
-      ! Set non linear sum truncation
-      IF (NL_CLOS .EQ. -2) THEN
-        nmax = Jmaxi
-      ELSEIF (NL_CLOS .EQ. -1) THEN
-        nmax = Jmaxi-(ij-1)
-      ELSE
-        nmax = NL_CLOS
-      ENDIF
-      bracket_sum_r = 0._dp ! initialize sum over real nonlinear term
-      nloopi: DO in = 1,nmax+1 ! Loop over laguerre for the sum
-        kxloopi: DO ikx = ikxs,ikxe ! Loop over kx
-          kyloopi: DO iky = ikys,ikye ! Loop over ky
-            ! Zonal terms (=0 for all ky not 0)
-            Fx_cmpx(iky,ikx) = imagu*kx* phi(iky,ikx,iz) * kerneln
-            smax = MIN( (in-1)+(ij-1), jmaxi );
-            DO is = 1, smax+1 ! sum truncation on number of moments
-              Gx_cmpx(iky,ikx) = Gx_cmpx(iky,ikx) + &
-                dnjs(in,ij,is) * moments_i(ip,is,iky,ikx,iz,updatetlevel)
-            ENDDO
-            Gx_cmpx(iky,ikx) = imagu*kx*Gx_cmpx(iky,ikx)
-
-            ! Kx = 0 terms
-            Fy_cmpx(iky,ikx) = 0._dp
-            Gy_cmpx(iky,ikx) = 0._dp
-            IF (ikx .EQ. ikx_0) THEN
-              Fy_cmpx(iky,ikx) = imagu*ky* phi(iky,ikx,iz) * kerneln
-              Gy_cmpx(iky,ikx) = 0._dp ! initialization of the sum
-              smax = MIN( (in-1)+(ij-1), jmaxi );
-              DO is = 1, smax+1 ! sum truncation on number of moments
-                Gy_cmpx(iky,ikx) = Gy_cmpx(iky,ikx) + &
-                  dnjs(in,ij,is) * moments_i(ip,is,iky,ikx,iz,updatetlevel)
-              ENDDO
-              Gy_cmpx(iky,ikx) = imagu*ky*Gy_cmpx(iky,ikx)
-            ENDIF
-          ENDDO kyloopi
-        ENDDO kxloopi
-        ! First term drphi x dzf
-        CALL convolve_and_add(Fy_cmpx,Gx_cmpx)
-        ! Second term -dzphi x drf
-        CALL convolve_and_add(Fy_cmpx,Gx_cmpx)
-      ENDDO nloopi
-      ! Put the real nonlinear product into k-space
-      call fftw_mpi_execute_dft_r2c(planf, bracket_sum_r, bracket_sum_c)
-      ! Retrieve convolution in input format
-      DO ikx = ikxs, ikxe
-        DO iky = ikys, ikye
-          Sipj(ip,ij,iky,ikx,iz) = bracket_sum_c(ikx,iky-local_nky_offset)*AA_x(ikx)*AA_y(iky)
-        ENDDO
-      ENDDO
-    ENDDO jloopi
-  ENDDO ploopi
-ENDDO zloopi
-END SUBROUTINE compute_semi_linear_NZ
+END SUBROUTINE compute_nonlinear
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 END MODULE nonlinear
diff --git a/src/numerical_experiments_mod.F90 b/src/numerical_experiments_mod.F90
deleted file mode 100644
index c316a123293ff0215f067bc2becc3549f6a1bd5f..0000000000000000000000000000000000000000
--- a/src/numerical_experiments_mod.F90
+++ /dev/null
@@ -1,113 +0,0 @@
-!!  The numerical_experiments module contains routines to "play" with the fourier
-! modes in order to understand mechanisms. These routines are not integrated in
-! the main code anymore because they are not used. This file serves as an archive.
-MODULE numerical_experiments
-USE basic
-USE prec_const
-USE grid
-USE utility
-
-implicit none
-
-PUBLIC :: play_with_modes, save_EM_ZF_modes
-
-CONTAINS
-!******************************************************************************!
-!!!!!!! Routine that can artificially increase or wipe modes
-!******************************************************************************!
-SUBROUTINE save_EM_ZF_modes
-  USE fields
-  USE array, ONLY : moments_e_ZF, moments_i_ZF, phi_ZF, moments_e_NZ,moments_i_NZ,phi_NZ
-  USE grid
-  USE time_integration, ONLY: updatetlevel
-  USE model, ONLY: KIN_E
-  IMPLICIT NONE
-  ! Store Zonal and entropy modes
-  IF(contains_ky0) THEN
-  IF(KIN_E) &
-    moments_e_ZF(ips_e:ipe_e,ijs_e:ije_e,ikxs:ikxe,izs:ize) = moments_e(ips_e:ipe_e,ijs_e:ije_e,iky_0,ikxs:ikxe,izs:ize,updatetlevel)
-    moments_i_ZF(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,izs:ize) = moments_i(ips_i:ipe_i,ijs_i:ije_i,iky_0,ikxs:ikxe,izs:ize,updatetlevel)
-    phi_ZF(ikxs:ikxe,izs:ize) = phi(iky_0,ikxs:ikxe,izs:ize)
-  ELSE
-    IF(KIN_E) &
-    moments_e_ZF(ips_e:ipe_e,ijs_e:ije_e,ikxs:ikxe,izs:ize) = 0._dp
-    moments_i_ZF(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,izs:ize) = 0._dp
-    phi_ZF(ikxs:ikxe,izs:ize) = 0._dp
-  ENDIF
-  IF(contains_kx0) THEN
-    IF(KIN_E) &
-    moments_e_NZ(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikx_0,izs:ize,updatetlevel)
-    moments_i_NZ(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,izs:ize) = moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikx_0,izs:ize,updatetlevel)
-    phi_NZ(ikys:ikye,izs:ize) = phi(ikys:ikye,ikx_0,izs:ize)
-  ELSE
-    IF(KIN_E) &
-    moments_e_NZ(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize) = 0._dp
-    moments_i_NZ(ips_e:ipe_e,ijs_i:ije_i,ikys:ikye,izs:ize) = 0._dp
-    phi_NZ(ikys:ikye,izs:ize) = 0._dp
-  ENDIF
-END SUBROUTINE
-
-SUBROUTINE play_with_modes
-  USE fields
-  USE array, ONLY : moments_e_ZF, moments_i_ZF, phi_ZF, moments_e_NZ,moments_i_NZ,phi_NZ
-  USE grid
-  USE time_integration, ONLY: updatetlevel
-  USE initial_par, ONLY: ACT_ON_MODES
-  USE model, ONLY: KIN_E
-  IMPLICIT NONE
-  REAL(dp) :: AMP = 1.5_dp
-
-  SELECT CASE(ACT_ON_MODES)
-  CASE('wipe_zonal') ! Errase the zonal flow
-    IF(KIN_E) &
-    moments_e(ips_e:ipe_e,ijs_e:ije_e,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = 0._dp
-    moments_i(ips_i:ipe_i,ijs_i:ije_i,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = 0._dp
-    phi(iky_0,ikxs:ikxe,izs:ize) = 0._dp
-  CASE('wipe_entropymode')
-    IF(KIN_E) &
-    moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikx_0,izs:ize,updatetlevel) = 0._dp
-    moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikx_0,izs:ize,updatetlevel) = 0._dp
-    phi(ikys:ikye,ikx_0,izs:ize) = 0._dp
-  CASE('wipe_turbulence')
-    DO ikx = ikxs,ikxe
-      DO iky = ikys, ikye
-        IF ( (ikx .NE. ikx_0) .AND. (iky .NE. iky_0) ) THEN
-          IF(KIN_E) &
-          moments_e(ips_e:ipe_e,ijs_e:ije_e,iky,ikx,izs:ize,updatetlevel) = 0._dp
-          moments_i(ips_i:ipe_i,ijs_i:ije_i,iky,ikx,izs:ize,updatetlevel) = 0._dp
-          phi(iky,ikx,izs:ize) = 0._dp
-        ENDIF
-      ENDDO
-    ENDDO
-  CASE('wipe_nonzonal')
-    DO ikx = ikxs,ikxe
-      DO iky = ikys, ikye
-        IF ( (ikx .NE. ikx_0) ) THEN
-          IF(KIN_E) &
-          moments_e(ips_e:ipe_e,ijs_e:ije_e,iky,ikx,izs:ize,updatetlevel) = 0._dp
-          moments_i(ips_i:ipe_i,ijs_i:ije_i,iky,ikx,izs:ize,updatetlevel) = 0._dp
-          phi(iky,ikx,izs:ize) = 0._dp
-        ENDIF
-      ENDDO
-    ENDDO
-  CASE('freeze_zonal')
-    IF(KIN_E) &
-    moments_e(ips_e:ipe_e,ijs_e:ije_e,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = moments_e_ZF(ips_e:ipe_e,ijs_e:ije_e,ikxs:ikxe,izs:ize)
-    moments_i(ips_i:ipe_i,ijs_i:ije_i,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = moments_i_ZF(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,izs:ize)
-    phi(iky_0,ikxs:ikxe,izs:ize) = phi_ZF(ikxs:ikxe,izs:ize)
-  CASE('freeze_entropymode')
-    IF(contains_kx0) THEN
-      IF(KIN_E) &
-      moments_e(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,ikx_0,izs:ize,updatetlevel) = moments_e_NZ(ips_e:ipe_e,ijs_e:ije_e,ikys:ikye,izs:ize)
-      moments_i(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,ikx_0,izs:ize,updatetlevel) = moments_i_NZ(ips_i:ipe_i,ijs_i:ije_i,ikys:ikye,izs:ize)
-      phi(ikys:ikye,ikx_0,izs:ize) = phi_NZ(ikys:ikye,izs:ize)
-    ENDIF
-  CASE('amplify_zonal')
-    IF(KIN_E) &
-    moments_e(ips_e:ipe_e,ijs_e:ije_e,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = AMP*moments_e_ZF(ips_e:ipe_e,ijs_e:ije_e,ikxs:ikxe,izs:ize)
-    moments_i(ips_i:ipe_i,ijs_i:ije_i,iky_0,ikxs:ikxe,izs:ize,updatetlevel) = AMP*moments_i_ZF(ips_i:ipe_i,ijs_i:ije_i,ikxs:ikxe,izs:ize)
-    phi(iky_0,ikxs:ikxe,izs:ize) = AMP*phi_ZF(ikxs:ikxe,izs:ize)
-  END SELECT
-END SUBROUTINE
-
-END MODULE numerical experiments
diff --git a/src/numerics_mod.F90 b/src/numerics_mod.F90
index bacd92dd7417cd83282b85713406d33947c325ad..9111ea3f5f6fe06080805b4dbd6b520e1e6d1fc4 100644
--- a/src/numerics_mod.F90
+++ b/src/numerics_mod.F90
@@ -1,12 +1,7 @@
 !! MODULE NUMERICS
 !   The module numerics contains a set of routines that are called only once at
-! the begining of a run. These routines do not need to be optimzed
+! the beginng of a run. These routines do not need to be optimzed
 MODULE numerics
-    USE basic
-    USE prec_const
-    USE grid
-    USE utility
-
     implicit none
 
     PUBLIC :: build_dnjs_table, evaluate_kernels, evaluate_EM_op
@@ -21,14 +16,15 @@ SUBROUTINE build_dnjs_table
   USE array, ONLY : dnjs
   USE FMZM,  ONLY : TO_DP
   USE coeff, ONLY : ALL2L
+  USE grid,  ONLY : jmax
   IMPLICIT NONE
 
   INTEGER :: in, ij, is, J
   INTEGER :: n_, j_, s_
 
-  J = max(jmaxe,jmaxi)
+  J = jmax
 
-  DO in = 1,J+1 ! Nested dependent loops to make benefit from dnjs symmetry
+  DO in = 1,J+1 ! Nested dependent loops to make benefit from dnjs symmetrys
     n_ = in - 1
     DO ij = in,J+1
       j_ = ij - 1
@@ -51,22 +47,22 @@ END SUBROUTINE build_dnjs_table
 !!!!!!! Build the fourth derivative Hermite coefficient table
 !******************************************************************************!
 SUBROUTINE build_dv4Hp_table
-  USE array, ONLY: dv4_Hp_coeff
-  USE grid, ONLY: pmaxi, pmaxe
+  USE array,      ONLY: dv4_Hp_coeff
+  USE grid,       ONLY: pmax
+  USE prec_const, ONLY: xp, PI
   IMPLICIT NONE
-  INTEGER :: p_, pmax_
-  pmax_ = MAX(pmaxi,pmaxe)
-  DO p_ = -2,pmax_
+  INTEGER :: p_
+  DO p_ = -2,pmax
     if (p_ < 4) THEN
-      dv4_Hp_coeff(p_) = 0._dp
+      dv4_Hp_coeff(p_) = 0._xp
     ELSE
-      dv4_Hp_coeff(p_) = 4_dp*SQRT(REAL((p_-3)*(p_-2)*(p_-1)*p_,dp))
+      dv4_Hp_coeff(p_) = 4_xp*SQRT(REAL((p_-3)*(p_-2)*(p_-1)*p_,xp))
     ENDIF
   ENDDO
    !we scale it w.r.t. to the max degree since
    !D_4^{v}\sim (\Delta v/2)^4 and \Delta v \sim 2pi/kvpar = pi/\sqrt{2P}
-   ! dv4_Hp_coeff = dv4_Hp_coeff*(1._dp/2._dp/SQRT(REAL(pmax_,dp)))**4
-   dv4_Hp_coeff = dv4_Hp_coeff*(PI/2._dp/SQRT(2._dp*REAL(pmax_,dp)))**4
+   ! dv4_Hp_coeff = dv4_Hp_coeff*(1._xp/2._xp/SQRT(REAL(pmax,xp)))**4
+   dv4_Hp_coeff = dv4_Hp_coeff*(PI/2._xp/SQRT(2._xp*REAL(pmax,xp)))**4
 END SUBROUTINE build_dv4Hp_table
 !******************************************************************************!
 
@@ -75,66 +71,50 @@ END SUBROUTINE build_dv4Hp_table
 !******************************************************************************!
 SUBROUTINE evaluate_kernels
   USE basic
-  USE array, Only : kernel_e, kernel_i, HF_phi_correction_operator
-  USE grid
-  USE model, ONLY : sigmae2_taue_o2, sigmai2_taui_o2, KIN_E
+  USE array,   ONLY : kernel!, HF_phi_correction_operator
+  USE grid,    ONLY : local_na, local_nj,ngj, local_nkx, local_nky, local_nz, ngz, jarray, kparray,&
+                      nzgrid
+  USE species, ONLY : sigma2_tau_o2
+  USE prec_const, ONLY: xp
   IMPLICIT NONE
-  INTEGER    :: j_int
-  REAL(dp)   :: j_dp, y_, factj
+  INTEGER    :: j_int, ia, eo, ikx, iky, iz, ij
+  REAL(xp)   :: j_xp, y_, factj
 
-DO eo  = 0,1
-DO ikx = ikxs,ikxe
-DO iky = ikys,ikye
-DO iz  = izgs,izge
-  !!!!! Electron kernels !!!!!
-  IF(KIN_E) THEN
-  DO ij = ijgs_e, ijge_e
-    j_int = jarray_e(ij)
-    j_dp  = REAL(j_int,dp)
-    y_    =  sigmae2_taue_o2 * kparray(iky,ikx,iz,eo)**2
-    IF(j_int .LT. 0) THEN
-      kernel_e(ij,iky,ikx,iz,eo) = 0._dp
-    ELSE
-      factj = GAMMA(j_dp+1._dp)
-      kernel_e(ij,iky,ikx,iz,eo) = y_**j_int*EXP(-y_)/factj
-    ENDIF
-  ENDDO
-  IF (ijs_e .EQ. 1) &
-  kernel_e(ijgs_e,iky,ikx,iz,eo) = 0._dp
-  ENDIF
-  !!!!! Ion kernels !!!!!
-  DO ij = ijgs_i, ijge_i
-    j_int = jarray_i(ij)
-    j_dp  = REAL(j_int,dp)
-    y_    =  sigmai2_taui_o2 * kparray(iky,ikx,iz,eo)**2
-    IF(j_int .LT. 0) THEN
-      kernel_i(ij,iky,ikx,iz,eo) = 0._dp
-    ELSE
-      factj = GAMMA(j_dp+1._dp)
-      kernel_i(ij,iky,ikx,iz,eo) = y_**j_int*EXP(-y_)/factj
-    ENDIF
+DO ia  = 1,local_na
+  DO eo  = 1,nzgrid
+    DO ikx = 1,local_nkx
+      DO iky = 1,local_nky
+        DO iz  = 1,local_nz + ngz
+          DO ij = 1,local_nj + ngj
+            j_int = jarray(ij)
+            j_xp  = REAL(j_int,xp)
+            y_    =  sigma2_tau_o2(ia) * kparray(iky,ikx,iz,eo)**2
+            IF(j_int .LT. 0) THEN !ghosts values
+              kernel(ia,ij,iky,ikx,iz,eo) = 0._xp
+            ELSE
+              factj = REAL(GAMMA(j_xp+1._xp),xp)
+              kernel(ia,ij,iky,ikx,iz,eo) = y_**j_int*EXP(-y_)/factj
+            ENDIF
+          ENDDO
+        ENDDO
+      ENDDO
+    ENDDO
   ENDDO
-  IF (ijs_i .EQ. 1) &
-  kernel_i(ijgs_i,iky,ikx,iz,eo) = 0._dp
+  ! !! Correction term for the evaluation of the heat flux
+  ! HF_phi_correction_operator(:,:,:) = &
+  !        2._xp * Kernel(ia,1,:,:,:,1) &
+  !       -1._xp * Kernel(ia,2,:,:,:,1)
+  !
+  ! DO ij = 1,local_Nj
+  !   j_int = jarray(ij)
+  !   j_xp  = REAL(j_int,xp)
+  !   HF_phi_correction_operator(:,:,:) = HF_phi_correction_operator(:,:,:) &
+  !   - Kernel(ia,ij,:,:,:,1) * (&
+  !       2._xp*(j_xp+1.5_xp) * Kernel(ia,ij  ,:,:,:,1) &
+  !       -     (j_xp+1.0_xp) * Kernel(ia,ij+1,:,:,:,1) &
+  !       -              j_xp * Kernel(ia,ij-1,:,:,:,1))
+  ! ENDDO
 ENDDO
-ENDDO
-ENDDO
-ENDDO
-!! Correction term for the evaluation of the heat flux
-HF_phi_correction_operator(ikys:ikye,ikxs:ikxe,izs:ize) = &
-       2._dp * Kernel_i(1,ikys:ikye,ikxs:ikxe,izs:ize,0) &
-      -1._dp * Kernel_i(2,ikys:ikye,ikxs:ikxe,izs:ize,0)
-
-DO ij = ijs_i, ije_i
-  j_int = jarray_i(ij)
-  j_dp  = REAL(j_int,dp)
-  HF_phi_correction_operator(ikys:ikye,ikxs:ikxe,izs:ize) = HF_phi_correction_operator(ikys:ikye,ikxs:ikxe,izs:ize) &
-  - Kernel_i(ij,ikys:ikye,ikxs:ikxe,izs:ize,0) * (&
-      2._dp*(j_dp+1.5_dp) * Kernel_i(ij  ,ikys:ikye,ikxs:ikxe,izs:ize,0) &
-      -     (j_dp+1.0_dp) * Kernel_i(ij+1,ikys:ikye,ikxs:ikxe,izs:ize,0) &
-      -              j_dp * Kernel_i(ij-1,ikys:ikye,ikxs:ikxe,izs:ize,0))
-ENDDO
-
 END SUBROUTINE evaluate_kernels
 !******************************************************************************!
 
@@ -150,44 +130,47 @@ END SUBROUTINE evaluate_EM_op
 !******************************************************************************!
 SUBROUTINE evaluate_poisson_op
   USE basic
-  USE array, Only : kernel_e, kernel_i, inv_poisson_op, inv_pol_ion
-  USE grid
-  USE model, ONLY : qe2_taue, qi2_taui, KIN_E
+  USE array,   ONLY : kernel, inv_poisson_op, inv_pol_ion
+  USE grid,    ONLY : local_na, local_nkx, local_nky, local_nz,&
+                      kxarray, kyarray, local_nj, ngj, ngz, ieven
+  USE species, ONLY : q2_tau
+  USE model,   ONLY : ADIAB_E
+  USE prec_const, ONLY: xp
   IMPLICIT NONE
-  REAL(dp)    :: pol_i, pol_e     ! (Z_a^2/tau_a (1-sum_n kernel_na^2))
-  INTEGER     :: ini,ine
+  REAL(xp)    :: pol_tot, operator, operator_ion     ! (Z^2/tau (1-sum_n kernel_na^2))
+  INTEGER     :: in,ikx,iky,iz,ia
+  REAL(xp)    :: sumker     ! (Z_a^2/tau_a (1-sum_n kernel_na^2))
 
   ! This term has no staggered grid dependence. It is evalued for the
   ! even z grid since poisson uses p=0 moments and phi only.
-  kxloop: DO ikx = ikxs,ikxe
-  kyloop: DO iky = ikys,ikye
-  zloop:  DO iz  = izs,ize
-  IF( (kxarray(ikx).EQ.0._dp) .AND. (kyarray(iky).EQ.0._dp) ) THEN
-      inv_poisson_op(iky, ikx, iz) =  0._dp
-  ELSE
-    !!!!!!!!!!!!!!!!! Ion contribution
-    ! loop over n only if the max polynomial degree
-    pol_i = 0._dp
-    DO ini=1,jmaxi+1
-      pol_i = pol_i  + qi2_taui*kernel_i(ini,iky,ikx,iz,0)**2 ! ... sum recursively ...
-    END DO
-    !!!!!!!!!!!!! Electron contribution
-    pol_e = 0._dp
-    IF (KIN_E) THEN ! Kinetic model
-    ! loop over n only if the max polynomial degree
-    DO ine=1,jmaxe+1 ! ine = n+1
-      pol_e = pol_e  + qe2_taue*kernel_e(ine,iky,ikx,iz,0)**2 ! ... sum recursively ...
-    END DO
-    ELSE ! Adiabatic model
-      pol_e = qe2_taue - 1._dp
+  kxloop: DO ikx = 1,local_nkx
+  kyloop: DO iky = 1,local_nky
+  zloop:  DO iz  = 1,local_nz
+  IF( (kxarray(ikx).EQ.0._xp) .AND. (kyarray(iky).EQ.0._xp) ) THEN
+      inv_poisson_op(iky, ikx, iz) =  0._xp
+      inv_pol_ion   (iky, ikx, iz) =  0._xp
+ELSE
+    ! loop over n only up to the max polynomial degree
+    pol_tot = 0._xp  ! total polarisation term
+    a:DO ia = 1,local_na ! sum over species
+    ! ia = 1
+      sumker  = 0._xp  ! sum of ion polarisation term
+      DO in=1,local_nj
+        sumker = sumker + q2_tau(ia)*kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,ieven)**2 ! ... sum recursively ...
+      END DO
+      pol_tot = pol_tot + q2_tau(ia) - sumker
+    ENDDO a
+    operator_ion = pol_tot
+    IF(ADIAB_E) THEN ! Adiabatic electron model
+      pol_tot = pol_tot + 1._xp
     ENDIF
-    inv_poisson_op(iky, ikx, iz) =  1._dp/(qi2_taui - pol_i + qe2_taue - pol_e)
-    inv_pol_ion   (iky, ikx, iz) =  1._dp/(qi2_taui - pol_i)
+    operator = pol_tot
+    inv_poisson_op(iky, ikx, iz) =  1._xp/pol_tot
+    inv_pol_ion   (iky, ikx, iz) =  1._xp/operator_ion
   ENDIF
   END DO zloop
   END DO kyloop
   END DO kxloop
-
 END SUBROUTINE evaluate_poisson_op
 !******************************************************************************!
 
@@ -195,188 +178,140 @@ END SUBROUTINE evaluate_poisson_op
 !!!!!!! Evaluate inverse polarisation operator for Poisson equation
 !******************************************************************************!
 SUBROUTINE evaluate_ampere_op
-  USE basic
-  USE array, Only : kernel_e, kernel_i, inv_ampere_op
-  USE grid
-  USE model, ONLY : q_e, q_i, beta, sigma_e, sigma_i
+  USE prec_const,   ONLY : xp
+  USE array,    ONLY : kernel, inv_ampere_op
+  USE grid,     ONLY : local_na, local_nkx, local_nky, local_nz, ngz, total_nj, ngj,&
+                       kparray, kxarray, kyarray, SOLVE_AMPERE, iodd
+  USE model,    ONLY : beta
+  USE species,  ONLY : q, sigma
   USE geometry, ONLY : hatB
+  USE prec_const, ONLY: xp
   IMPLICIT NONE
-  REAL(dp)    :: pol_i, pol_e, kperp2     ! (Z_a^2/tau_a (1-sum_n kernel_na^2))
-  INTEGER     :: ini,ine
-
+  REAL(xp)    :: sum_jpol, kperp2, operator     ! (Z^2/tau (1-sum_n kernel_na^2))
+  INTEGER     :: in,ikx,iky,iz,ia
   ! We do not solve Ampere if beta = 0 to spare waste of ressources
   IF(SOLVE_AMPERE) THEN
-    ! This term has no staggered grid dependence. It is evalued for the
-    ! even z grid since poisson uses p=0 moments and phi only.
-    kxloop: DO ikx = ikxs,ikxe
-    kyloop: DO iky = ikys,ikye
-    zloop:  DO iz  = izs,ize
-    kperp2 = kparray(iky,ikx,iz,0)**2
-    IF( (kxarray(ikx).EQ.0._dp) .AND. (kyarray(iky).EQ.0._dp) ) THEN
-        inv_ampere_op(iky, ikx, iz) =  0._dp
+    x:DO ikx = 1,local_nkx
+    y:DO iky = 1,local_nky
+    z:DO iz  = 1,local_nz
+    kperp2 = kparray(iky,ikx,iz+ngz/2,iodd)**2
+    IF( (kxarray(ikx).EQ.0._xp) .AND. (kyarray(iky).EQ.0._xp) ) THEN
+        inv_ampere_op(iky, ikx, iz) =  0._xp
     ELSE
-      !!!!!!!!!!!!!!!!! Ion contribution
-      pol_i = 0._dp
-      ! loop over n only up to the max polynomial degree
-      DO ini=1,jmaxi+1
-        pol_i = pol_i  + kernel_i(ini,iky,ikx,iz,0)**2 ! ... sum recursively ...
-      END DO
-      pol_i = q_i**2/(sigma_i**2) * pol_i
-      !!!!!!!!!!!!! Electron contribution
-      pol_e = 0._dp
-      ! loop over n only up to the max polynomial degree
-      DO ine=1,jmaxe+1 ! ine = n+1
-        pol_e = pol_e  + kernel_e(ine,iky,ikx,iz,0)**2 ! ... sum recursively ...
-      END DO
-      pol_e = q_e**2/(sigma_e**2) * pol_e
-      inv_ampere_op(iky, ikx, iz) =  1._dp/(2._dp*kperp2*hatB(iz,0)**2 + beta*(pol_i + pol_e))
+      sum_jpol = 0._xp
+      a:DO ia  = 1,local_na
+        ! loop over n only up to the max polynomial degree
+        j:DO in=1,total_nj
+          sum_jpol = sum_jpol  + q(ia)**2/(sigma(ia)**2)*kernel(ia,in+ngj/2,iky,ikx,iz+ngz/2,iodd)**2 ! ... sum recursively ...
+        END DO j
+      END DO a
+      operator = 2._xp*kperp2*hatB(iz+ngz/2,iodd)**2 + beta*sum_jpol
+      inv_ampere_op(iky, ikx, iz) =  1._xp/operator
     ENDIF
-    END DO zloop
-    END DO kyloop
-    END DO kxloop
+    END DO z
+    END DO y
+    END DO x
   ENDIF
-
 END SUBROUTINE evaluate_ampere_op
 !******************************************************************************!
 
 SUBROUTINE compute_lin_coeff
 
-  USE array, ONLY:  xnepj, &
-                    ynepp1j, ynepm1j, ynepp1jm1, ynepm1jm1,&
-                    zNepm1j, zNepm1jp1, zNepm1jm1,&
-                    xnepp1j, xnepm1j, xnepp2j, xnepm2j,&
-                    xnepjp1, xnepjm1,&
-                    xphij_e, xphijp1_e, xphijm1_e,&
-                    xpsij_e, xpsijp1_e, xpsijm1_e,&
-                    xnipj, &
-                    ynipp1j, ynipm1j, ynipp1jm1, ynipm1jm1,&
-                    zNipm1j, zNipm1jp1, zNipm1jm1,&
-                    xnipp1j, xnipm1j, xnipp2j, xnipm2j,&
-                    xnipjp1, xnipjm1,&
-                    xphij_i, xphijp1_i, xphijm1_i,&
-                    xpsij_i, xpsijp1_i, xpsijm1_i
-  USE model, ONLY: k_Te, k_Ti, k_Ne, k_Ni, k_cB, k_gB, KIN_E,&
-                   tau_e, tau_i, sigma_e, sigma_i, q_e, q_i
-  USE prec_const
-  USE grid,  ONLY: parray_e, parray_i, jarray_e, jarray_i, &
-                   ip,ij, ips_e,ipe_e, ips_i,ipe_i, ijs_e,ije_e, ijs_i,ije_i
+  USE array, ONLY:  xnapj, &
+                    ynapp1j, ynapm1j, ynapp1jm1, ynapm1jm1,&
+                    zNapm1j, zNapm1jp1, zNapm1jm1,&
+                    xnapj, xnapjp1, xnapjm1,&
+                    xnapp1j, xnapm1j, xnapp2j, xnapm2j,&
+                    xphij, xphijp1, xphijm1,&
+                    xpsij, xpsijp1, xpsijm1
+  USE species, ONLY: k_T, k_N, tau, q, sqrt_tau_o_sigma
+  USE model,   ONLY: k_cB, k_gB
+  USE prec_const, ONLY: xp, SQRT2, SQRT3
+  USE grid,  ONLY: parray, jarray, local_na, local_np, local_nj, ngj, ngp
+  INTEGER     :: ia,ip,ij,p_int, j_int ! polynom. dagrees
+  REAL(xp)    :: p_xp, j_xp
 
-  IF(KIN_E) THEN
-  CALL lin_coeff(k_Te,k_Ne,k_cB,k_gB,tau_e,q_e,sigma_e,&
-                 parray_e(ips_e:ipe_e),jarray_e(ijs_e:ije_e),ips_e,ipe_e,ijs_e,ije_e,&
-                 xnepj,xnepp1j,xnepm1j,xnepp2j,xnepm2j,xnepjp1,xnepjm1,&
-                 ynepp1j,ynepm1j,ynepp1jm1,ynepm1jm1,zNepm1j,zNepm1jp1,zNepm1jm1,&
-                 xphij_e,xphijp1_e,xphijm1_e,xpsij_e,xpsijp1_e,xpsijm1_e)
-  ENDIF
-
-  CALL lin_coeff(k_Ti,k_Ni,k_cB,k_gB,tau_i,q_i,sigma_i,&
-             parray_i(ips_i:ipe_i),jarray_i(ijs_i:ije_i),ips_i,ipe_i,ijs_i,ije_i,&
-             xnipj,xnipp1j,xnipm1j,xnipp2j,xnipm2j,xnipjp1,xnipjm1,&
-             ynipp1j,ynipm1j,ynipp1jm1,ynipm1jm1,zNipm1j,zNipm1jp1,zNipm1jm1,&
-             xphij_i,xphijp1_i,xphijm1_i,xpsij_i,xpsijp1_i,xpsijm1_i)
-
-  CONTAINS
-    SUBROUTINE lin_coeff(k_Ta,k_Na,k_cB,k_gB,tau_a,q_a,sigma_a,&
-                         parray_a,jarray_a,ips_a,ipe_a,ijs_a,ije_a,&
-                         xnapj,xnapp1j,xnapm1j,xnapp2j,xnapm2j,xnapjp1,xnapjm1,&
-                         ynapp1j,ynapm1j,ynapp1jm1,ynapm1jm1,zNapm1j,zNapm1jp1,zNapm1jm1,&
-                         xphij_a,xphijp1_a,xphijm1_a,xpsij_a,xpsijp1_a,xpsijm1_a)
-      IMPLICIT NONE
-      ! INPUTS
-      REAL(dp),                         INTENT(IN) :: k_Ta,k_Na,k_cB,k_gB,tau_a,q_a,sigma_a
-      INTEGER,  DIMENSION(ips_a:ipe_a), INTENT(IN) :: parray_a
-      INTEGER,  DIMENSION(ijs_a:ije_a), INTENT(IN) :: jarray_a
-      INTEGER,                          INTENT(IN) :: ips_a,ipe_a,ijs_a,ije_a
-      ! OUTPUTS (linear coefficients used in moment_eq_rhs_mod.F90)
-      REAL(dp), DIMENSION(ips_a:ipe_a,ijs_a:ije_a), INTENT(OUT) :: xnapj
-      REAL(dp), DIMENSION(ips_a:ipe_a),             INTENT(OUT) :: xnapp1j, xnapm1j,   xnapp2j,   xnapm2j
-      REAL(dp), DIMENSION(ijs_a:ije_a),             INTENT(OUT) :: xnapjp1, xnapjm1
-      REAL(dp), DIMENSION(ips_a:ipe_a,ijs_a:ije_a), INTENT(OUT) :: ynapp1j, ynapm1j,   ynapp1jm1, ynapm1jm1
-      REAL(dp), DIMENSION(ips_a:ipe_a,ijs_a:ije_a), INTENT(OUT) :: zNapm1j, zNapm1jp1, zNapm1jm1
-      REAL(dp), DIMENSION(ips_a:ipe_a,ijs_a:ije_a), INTENT(OUT) :: xphij_a, xphijp1_a, xphijm1_a
-      REAL(dp), DIMENSION(ips_a:ipe_a,ijs_a:ije_a), INTENT(OUT) :: xpsij_a, xpsijp1_a, xpsijm1_a
-      INTEGER     :: p_int, j_int ! polynom. dagrees
-      REAL(dp)    :: p_dp, j_dp
-      !! linear coefficients for moment RHS !!!!!!!!!!
-      DO ip = ips_a, ipe_a
-        p_int= parray_a(ip)   ! Hermite degree
-        p_dp = REAL(p_int,dp) ! REAL of Hermite degree
-        DO ij = ijs_a, ije_a
-          j_int= jarray_a(ij)   ! Laguerre degree
-          j_dp = REAL(j_int,dp) ! REAL of Laguerre degree
-          ! All Napj terms
-          xnapj(ip,ij) = tau_a/q_a*(k_cB*(2._dp*p_dp + 1._dp) &
-                                 +k_gB*(2._dp*j_dp + 1._dp))
-          ! Mirror force terms
-          ynapp1j  (ip,ij) = -SQRT(tau_a)/sigma_a *      (j_dp+1._dp)*SQRT(p_dp+1._dp)
-          ynapm1j  (ip,ij) = -SQRT(tau_a)/sigma_a *      (j_dp+1._dp)*SQRT(p_dp)
-          ynapp1jm1(ip,ij) = +SQRT(tau_a)/sigma_a *              j_dp*SQRT(p_dp+1._dp)
-          ynapm1jm1(ip,ij) = +SQRT(tau_a)/sigma_a *              j_dp*SQRT(p_dp)
-          ! Trapping terms
-          zNapm1j  (ip,ij) = +SQRT(tau_a)/sigma_a *(2._dp*j_dp+1._dp)*SQRT(p_dp)
-          zNapm1jp1(ip,ij) = -SQRT(tau_a)/sigma_a *      (j_dp+1._dp)*SQRT(p_dp)
-          zNapm1jm1(ip,ij) = -SQRT(tau_a)/sigma_a *              j_dp*SQRT(p_dp)
-        ENDDO
-      ENDDO
-      DO ip = ips_a, ipe_a
-        p_int= parray_a(ip)   ! Hermite degree
-        p_dp = REAL(p_int,dp) ! REAL of Hermite degree
-        ! Landau damping coefficients (ddz napj term)
-        xnapp1j(ip) = SQRT(tau_a)/sigma_a * SQRT(p_dp+1._dp)
-        xnapm1j(ip) = SQRT(tau_a)/sigma_a * SQRT(p_dp)
-        ! Magnetic curvature term
-        xnapp2j(ip) = tau_a/q_a * k_cB * SQRT((p_dp+1._dp)*(p_dp + 2._dp))
-        xnapm2j(ip) = tau_a/q_a * k_cB * SQRT( p_dp       *(p_dp - 1._dp))
+  !! linear coefficients for moment RHS !!!!!!!!!!
+  DO ia = 1,local_na
+    DO ip = 1,local_np
+      p_int= parray(ip+ngp/2)   ! Hermite degree
+      p_xp = REAL(p_int,xp) ! REAL of Hermite degree
+      DO ij = 1,local_nj
+        j_int= jarray(ij+ngj/2)   ! Laguerre degree
+        j_xp = REAL(j_int,xp) ! REAL of Laguerre degree
+        ! All Napj terms
+        xnapj(ia,ip,ij) = tau(ia)/q(ia)*(k_cB*(2._xp*p_xp + 1._xp) &
+                                        +k_gB*(2._xp*j_xp + 1._xp))
+        ! Mirror force terms
+        ynapp1j  (ia,ip,ij) = -sqrt_tau_o_sigma(ia) *      (j_xp+1._xp)*SQRT(p_xp+1._xp)
+        ynapm1j  (ia,ip,ij) = -sqrt_tau_o_sigma(ia) *      (j_xp+1._xp)*SQRT(p_xp)
+        ynapp1jm1(ia,ip,ij) = +sqrt_tau_o_sigma(ia) *              j_xp*SQRT(p_xp+1._xp)
+        ynapm1jm1(ia,ip,ij) = +sqrt_tau_o_sigma(ia) *              j_xp*SQRT(p_xp)
+        ! Trapping terms
+        zNapm1j  (ia,ip,ij) = +sqrt_tau_o_sigma(ia) *(2._xp*j_xp+1._xp)*SQRT(p_xp)
+        zNapm1jp1(ia,ip,ij) = -sqrt_tau_o_sigma(ia) *      (j_xp+1._xp)*SQRT(p_xp)
+        zNapm1jm1(ia,ip,ij) = -sqrt_tau_o_sigma(ia) *              j_xp*SQRT(p_xp)
       ENDDO
-      DO ij = ijs_a, ije_a
-        j_int= jarray_a(ij)   ! Laguerre degree
-        j_dp = REAL(j_int,dp) ! REAL of Laguerre degree
-        ! Magnetic gradient term
-        xnapjp1(ij) = -tau_a/q_a * k_gB * (j_dp + 1._dp)
-        xnapjm1(ij) = -tau_a/q_a * k_gB *  j_dp
-      ENDDO
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! ES linear coefficients for moment RHS !!!!!!!!!!
-      DO ip = ips_a, ipe_a
-        p_int= parray_a(ip)   ! Hermite degree
-        DO ij = ijs_a, ije_a
-          j_int= jarray_a(ij)   ! REALof Laguerre degree
-          j_dp = REAL(j_int,dp) ! REALof Laguerre degree
-          !! Electrostatic potential pj terms
-          IF (p_int .EQ. 0) THEN ! kronecker p0
-            xphij_a(ip,ij)    = +k_Na + 2._dp*j_dp*k_Ta
-            xphijp1_a(ip,ij)  = -k_Ta*(j_dp+1._dp)
-            xphijm1_a(ip,ij)  = -k_Ta* j_dp
-          ELSE IF (p_int .EQ. 2) THEN ! kronecker p2
-            xphij_a(ip,ij)    = +k_Ta/SQRT2
-            xphijp1_a(ip,ij)  = 0._dp; xphijm1_a(ip,ij)  = 0._dp;
-          ELSE
-            xphij_a(ip,ij)    = 0._dp; xphijp1_a(ip,ij)  = 0._dp
-            xphijm1_a(ip,ij)  = 0._dp;
-          ENDIF
-        ENDDO
+    ENDDO
+    DO ip = 1,local_np
+      p_int= parray(ip+ngp/2)   ! Hermite degree
+      p_xp = REAL(p_int,xp) ! REAL of Hermite degree
+      ! Landau damping coefficients (ddz napj term)
+      xnapp1j(ia,ip) = sqrt_tau_o_sigma(ia) * SQRT(p_xp+1._xp)
+      xnapm1j(ia,ip) = sqrt_tau_o_sigma(ia) * SQRT(p_xp)
+      ! Magnetic curvature term
+      xnapp2j(ia,ip) = tau(ia)/q(ia) * k_cB * SQRT((p_xp+1._xp)*(p_xp + 2._xp))
+      xnapm2j(ia,ip) = tau(ia)/q(ia) * k_cB * SQRT( p_xp       *(p_xp - 1._xp))
+    ENDDO
+    DO ij = 1,local_nj
+      j_int= jarray(ij+ngj/2)   ! Laguerre degree
+      j_xp = REAL(j_int,xp) ! REAL of Laguerre degree
+      ! Magnetic gradient term
+      xnapjp1(ia,ij) = -tau(ia)/q(ia) * k_gB * (j_xp + 1._xp)
+      xnapjm1(ia,ij) = -tau(ia)/q(ia) * k_gB *  j_xp
+    ENDDO
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    !! ES linear coefficients for moment RHS !!!!!!!!!!
+    DO ip = 1,local_np
+      p_int= parray(ip+ngp/2)   ! Hermite degree
+      DO ij = 1,local_nj
+        j_int= jarray(ij+ngj/2)   ! REALof Laguerre degree
+        j_xp = REAL(j_int,xp) ! REALof Laguerre degree
+        !! Electrostatic potential pj terms
+        IF (p_int .EQ. 0) THEN ! kronecker p0
+          xphij  (ia,ip,ij)  = +k_N(ia) + 2._xp*j_xp*k_T(ia)
+          xphijp1(ia,ip,ij)  = -k_T(ia)*(j_xp+1._xp)
+          xphijm1(ia,ip,ij)  = -k_T(ia)* j_xp
+        ELSE IF (p_int .EQ. 2) THEN ! kronecker p2
+          xphij(ia,ip,ij)    = +k_T(ia)/SQRT2
+          xphijp1(ia,ip,ij)  = 0._xp; xphijm1(ia,ip,ij)  = 0._xp;
+        ELSE
+          xphij  (ia,ip,ij)  = 0._xp; xphijp1(ia,ip,ij)  = 0._xp
+          xphijm1(ia,ip,ij)  = 0._xp;
+        ENDIF
       ENDDO
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! Electromagnatic linear coefficients for moment RHS !!!!!!!!!!
-      DO ip = ips_a, ipe_a
-        p_int= parray_a(ip)   ! Hermite degree
-        DO ij = ijs_a, ije_a
-          j_int= jarray_a(ij)   ! REALof Laguerre degree
-          j_dp = REAL(j_int,dp) ! REALof Laguerre degree
-          IF (p_int .EQ. 1) THEN ! kronecker p1
-            xpsij_a  (ip,ij)  = +(k_Na + (2._dp*j_dp+1._dp)*k_Ta)* SQRT(tau_a)/sigma_a
-            xpsijp1_a(ip,ij)  = - k_Ta*(j_dp+1._dp)              * SQRT(tau_a)/sigma_a
-            xpsijm1_a(ip,ij)  = - k_Ta* j_dp                     * SQRT(tau_a)/sigma_a
-          ELSE IF (p_int .EQ. 3) THEN ! kronecker p3
-            xpsij_a  (ip,ij)  = + k_Ta*SQRT3/SQRT2               * SQRT(tau_a)/sigma_a
-            xpsijp1_a(ip,ij)  = 0._dp; xpsijm1_a(ip,ij)  = 0._dp;
-          ELSE
-            xpsij_a  (ip,ij)  = 0._dp; xpsijp1_a(ip,ij)  = 0._dp
-            xpsijm1_a(ip,ij)  = 0._dp;
-          ENDIF
-        ENDDO
+    ENDDO
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    !! Electromagnatic linear coefficients for moment RHS !!!!!!!!!!
+    DO ip = 1,local_np
+      p_int= parray(ip+ngp/2)   ! Hermite degree
+      DO ij = 1,local_nj
+        j_int= jarray(ij+ngj/2)   ! REALof Laguerre degree
+        j_xp = REAL(j_int,xp) ! REALof Laguerre degree
+        IF (p_int .EQ. 1) THEN ! kronecker p1
+          xpsij  (ia,ip,ij)  = +(k_N(ia) + (2._xp*j_xp+1._xp)*k_T(ia))* sqrt_tau_o_sigma(ia)
+          xpsijp1(ia,ip,ij)  = - k_T(ia)*(j_xp+1._xp)                 * sqrt_tau_o_sigma(ia)
+          xpsijm1(ia,ip,ij)  = - k_T(ia)* j_xp                        * sqrt_tau_o_sigma(ia)
+        ELSE IF (p_int .EQ. 3) THEN ! kronecker p3
+          xpsij  (ia,ip,ij)  = + k_T(ia)*SQRT3/SQRT2                  * sqrt_tau_o_sigma(ia)
+          xpsijp1(ia,ip,ij)  = 0._xp; xpsijm1(ia,ip,ij)  = 0._xp;
+        ELSE
+          xpsij  (ia,ip,ij)  = 0._xp; xpsijp1(ia,ip,ij)  = 0._xp
+          xpsijm1(ia,ip,ij)  = 0._xp;
+        ENDIF
       ENDDO
-    END SUBROUTINE lin_coeff
+    ENDDO
+  ENDDO
 END SUBROUTINE compute_lin_coeff
 
 END MODULE numerics
diff --git a/src/parallel_mod.F90 b/src/parallel_mod.F90
index 3a048feda5ddf34db5380a7c967920cd3f6a0a2e..e132fd04c4787c82b3fa9b6c64c88ebf10f0a08a 100644
--- a/src/parallel_mod.F90
+++ b/src/parallel_mod.F90
@@ -1,353 +1,378 @@
 MODULE parallel
-  USE basic
-  USE grid
-  use prec_const
-  USE model, ONLY: KIN_E
+  use prec_const, ONLY : xp, mpi_xp_c
+  USE mpi
   IMPLICIT NONE
-  ! recieve and displacement counts for gatherv
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_y, dsp_y, rcv_zy, dsp_zy
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp_e,  dsp_zp_e
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_yp_e,  dsp_yp_e
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zyp_e, dsp_zyp_e
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp_i,  dsp_zp_i
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_yp_i,  dsp_yp_i
-  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zyp_i, dsp_zyp_i
+  ! Auxiliary variables
+  INTEGER, PUBLIC, PROTECTED  :: comm0                 ! Default communicator with a topology
+  INTEGER, PUBLIC, PROTECTED  :: group0                ! Default group with a topology
+  INTEGER, PUBLIC, PROTECTED  :: rank_0                ! Ranks in comm0
+  ! Communicators for 1-dim cartesian subgrids of comm0
+  INTEGER, PUBLIC, PROTECTED  :: comm_p, comm_ky, comm_z
+  INTEGER, PUBLIC, PROTECTED  :: rank_p, rank_ky, rank_z! Ranks
+  INTEGER, PUBLIC, PROTECTED  :: comm_pz,  rank_pz      ! 2D comm for N_a(p,j,z) ouptut (mspfile)
+  INTEGER, PUBLIC, PROTECTED  :: comm_kyz, rank_kyz     ! 2D comm for N_a(p,j,z) ouptut (mspfile)
+  INTEGER, PUBLIC, PROTECTED  :: comm_ky0, rank_ky0     ! comm along ky with p=0
+  INTEGER, PUBLIC, PROTECTED  :: comm_z0,  rank_z0      ! comm along z  with p=0
+  INTEGER, PUBLIC, PROTECTED  :: group_ky0, group_z0
+  INTEGER, PUBLIC, PROTECTED  :: ierr                  ! flag for MPI error
+  INTEGER, PUBLIC, PROTECTED  :: my_id                 ! Rank in COMM_WORLD
+  INTEGER, PUBLIC, PROTECTED  :: num_procs             ! number of MPI processes
+  INTEGER, PUBLIC, PROTECTED  :: num_procs_p           ! Number of processes in p
+  INTEGER, PUBLIC, PROTECTED  :: num_procs_ky          ! Number of processes in r
+  INTEGER, PUBLIC, PROTECTED  :: num_procs_z           ! Number of processes in z
+  INTEGER, PUBLIC, PROTECTED  :: num_procs_pz          ! Number of processes in pz comm
+  INTEGER, PUBLIC, PROTECTED  :: num_procs_kyz         ! Number of processes in kyz comm
+  INTEGER, PUBLIC, PROTECTED  :: nbr_L, nbr_R          ! Left and right neighbours (along p)
+  INTEGER, PUBLIC, PROTECTED  :: nbr_T, nbr_B          ! Top and bottom neighbours (along kx)
+  INTEGER, PUBLIC, PROTECTED  :: nbr_U, nbr_D          ! Upstream and downstream neighbours (along z)
+
 
-  ! Various buffers used
-  COMPLEX(dp), ALLOCATABLE, DIMENSION(:,:,:)     :: buff_xy_zBC
-  COMPLEX(dp), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: buff_pjxy_zBC_e
-  COMPLEX(dp), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: buff_pjxy_zBC_i
+  ! recieve and displacement counts for gatherv
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_p, dsp_p
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_y, dsp_y
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zy, dsp_zy
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zp,  dsp_zp
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_yp,  dsp_yp
+  INTEGER, DIMENSION(:), ALLOCATABLE :: rcv_zyp, dsp_zyp
 
-  PUBLIC :: manual_0D_bcast, manual_3D_bcast, init_parallel_var, &
-            gather_xyz, gather_pjz_i, gather_pjxyz_e, gather_pjxyz_i
+  PUBLIC :: ppinit, manual_0D_bcast, manual_3D_bcast, init_parallel_var, &
+            gather_xyz, gather_pjz, gather_pjxyz, exchange_ghosts_1D
 
 CONTAINS
 
-  SUBROUTINE init_parallel_var
+  SUBROUTINE ppinit
+    ! Init the parallel environment
+    IMPLICIT NONE
+    ! Variables for cartesian domain decomposition
+    INTEGER, PARAMETER :: ndims=3 ! p, kx and z
+    INTEGER, DIMENSION(ndims) :: dims=0, coords=0
+    LOGICAL :: periods(ndims) = .FALSE., reorder=.FALSE.
+    CHARACTER(len=32) :: str
+    INTEGER :: nargs, i, l
+    CALL MPI_INIT(ierr)
+    CALL MPI_COMM_RANK (MPI_COMM_WORLD,     my_id, ierr)
+    CALL MPI_COMM_SIZE (MPI_COMM_WORLD, num_procs, ierr)
+    nargs = COMMAND_ARGUMENT_COUNT()
+    !
+    IF( nargs .GT. 1 ) THEN
+       DO i=1,ndims
+          CALL GET_COMMAND_ARGUMENT(i, str, l, ierr)
+          READ(str(1:l),'(i3)')  dims(i)
+       END DO
+       IF( PRODUCT(dims) .NE. num_procs ) THEN
+        IF(my_id .EQ. 0) WRITE(*, '(a,i4,a,i4)') 'Product of dims: ', PRODUCT(dims), " is not consistent WITH NPROCS=",num_procs
+          CALL MPI_ABORT(MPI_COMM_WORLD, -2, ierr)
+       END IF
+    ELSE
+      dims(1) = 1
+      dims(2) = num_procs
+      dims(3) = 1
+    END IF
+    num_procs_p  = dims(1) ! Number of processes along p
+    num_procs_ky = dims(2) ! Number of processes along kx
+    num_procs_z  = dims(3) ! Number of processes along z
+    !
+    !periodiciyt in p
+    periods(1)=.FALSE.
+    !periodiciyt in ky
+    periods(2)=.FALSE.
+    !periodiciyt in z
+    periods(3)=.TRUE.
+    CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm0, ierr)
+    CALL MPI_COMM_GROUP(comm0,group0, ierr)
+    CALL MPI_COMM_RANK(comm0, rank_0,  ierr)
+    CALL MPI_CART_COORDS(comm0,rank_0,ndims,coords,ierr)
+    !
+    !  Partitions 3-dim cartesian topology of comm0 into 1-dim cartesian subgrids
+    !
+    CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.FALSE./),  comm_p, ierr)
+    CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.FALSE./), comm_ky, ierr)
+    CALL MPI_CART_SUB (comm0, (/.FALSE.,.FALSE.,.TRUE./),  comm_z, ierr)
+    ! Find id inside the 1d-sub communicators
+    CALL MPI_COMM_RANK(comm_p,  rank_p,  ierr)
+    CALL MPI_COMM_RANK(comm_ky, rank_ky, ierr)
+    CALL MPI_COMM_RANK(comm_z,  rank_z,  ierr)
+    ! 2D communicator
+    CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.TRUE./),  comm_pz,  ierr)
+    CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.TRUE./),  comm_kyz, ierr)
+    ! Count the number of processes in 2D comms
+    CALL MPI_COMM_SIZE(comm_pz, num_procs_pz, ierr)
+    CALL MPI_COMM_SIZE(comm_kyz,num_procs_kyz,ierr)
+    ! Find id inside the 1d-sub communicators
+    CALL MPI_COMM_RANK(comm_pz,  rank_pz,   ierr)
+    CALL MPI_COMM_RANK(comm_kyz, rank_kyz,  ierr)
+    ! Find neighbours
+    CALL MPI_CART_SHIFT(comm0, 0, 1, nbr_L, nbr_R, ierr) !left   right neighbours
+    CALL MPI_CART_SHIFT(comm0, 1, 1, nbr_B, nbr_T, ierr) !bottom top   neighbours
+    CALL MPI_CART_SHIFT(comm0, 2, 1, nbr_D, nbr_U, ierr) !down   up    neighbours
+  END SUBROUTINE ppinit
+
+  SUBROUTINE init_parallel_var(np_loc,np_tot,nky_loc,nky_tot,nz_loc)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: np_loc,np_tot,nky_loc,nky_tot,nz_loc
     INTEGER :: i_
+    !     !! P reduction at constant x,y,z,j
+    ! ALLOCATE(rcv_p(0:num_procs_p-1),dsp_p(0:num_procs_p-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(np_loc,1,MPI_INTEGER,rcv_p,1,MPI_INTEGER,comm_p,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_p(0)=0
+    ! DO i_=1,num_procs_p-1
+    !    dsp_p(i_) =dsp_p(i_-1) + rcv_p(i_-1)
+    ! END DO
+    ! !!!!!! XYZ gather variables
+    ! !! Y reduction at constant x and z
+    ! ! number of points recieved and displacement for the y reduction
+    ! ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nky_loc,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_y(0)=0
+    ! DO i_=1,num_procs_ky-1
+    !    dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
+    ! END DO
+    ! !! Z reduction for full slices of y data but constant x
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*nky_tot,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zy(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
+    ! END DO
+    ! !!!!! PJZ gather variables
+    ! !! P reduction at constant j and z is already done in module GRID
+    ! !! Z reduction for full slices of p data but constant j
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zp(0:num_procs_z-1),dsp_zp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*np_tot,1,MPI_INTEGER,rcv_zp,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zp(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zp(i_) =dsp_zp(i_-1) + rcv_zp(i_-1)
+    ! END DO
+    ! !!!!! PJXYZ gather variables
+    ! !! Y reduction for full slices of p data but constant j
+    ! ! number of points recieved and displacement for the y reduction
+    ! ALLOCATE(rcv_yp(0:num_procs_ky-1),dsp_yp(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nky_loc*np_tot,1,MPI_INTEGER,rcv_yp,1,MPI_INTEGER,comm_ky,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_yp(0)=0
+    ! DO i_=1,num_procs_ky-1
+    !    dsp_yp(i_) =dsp_yp(i_-1) + rcv_yp(i_-1)
+    ! END DO
+    ! !! Z reduction for full slices of py data but constant j
+    ! ! number of points recieved and displacement for the z reduction
+    ! ALLOCATE(rcv_zyp(0:num_procs_z-1),dsp_zyp(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
+    ! ! all processes share their local number of points
+    ! CALL MPI_ALLGATHER(nz_loc*np_tot*nky_tot,1,MPI_INTEGER,rcv_zyp,1,MPI_INTEGER,comm_z,ierr)
+    ! ! the displacement array can be build from each np_loc as
+    ! dsp_zyp(0)=0
+    ! DO i_=1,num_procs_z-1
+    !    dsp_zyp(i_) =dsp_zyp(i_-1) + rcv_zyp(i_-1)
+    ! END DO
+    ! P reduction at constant x,y,z,j
+    ALLOCATE(rcv_p(num_procs_p),dsp_p(num_procs_p)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(np_loc,1,MPI_INTEGER,rcv_p,1,MPI_INTEGER,comm_p,ierr)
+    dsp_p(1)=0
+    DO i_=2,num_procs_p
+       dsp_p(i_) =dsp_p(i_-1) + rcv_p(i_-1)
+    END DO
     !!!!!! XYZ gather variables
-    !! Y reduction at constant x and z
-    ! number of points recieved and displacement for the y reduction
-    ALLOCATE(rcv_y(0:num_procs_ky-1),dsp_y(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nky,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_y(0)=0
-    DO i_=1,num_procs_ky-1
+    ALLOCATE(rcv_y(num_procs_ky),dsp_y(num_procs_ky)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(nky_loc,1,MPI_INTEGER,rcv_y,1,MPI_INTEGER,comm_ky,ierr)
+    dsp_y(1)=0
+    DO i_=2,num_procs_ky
        dsp_y(i_) =dsp_y(i_-1) + rcv_y(i_-1)
     END DO
     !! Z reduction for full slices of y data but constant x
-    ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zy(0:num_procs_z-1),dsp_zy(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Nky,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_zy(0)=0
-    DO i_=1,num_procs_z-1
+    ALLOCATE(rcv_zy(num_procs_z),dsp_zy(num_procs_z)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(nz_loc*nky_tot,1,MPI_INTEGER,rcv_zy,1,MPI_INTEGER,comm_z,ierr)
+    dsp_zy(1)=0
+    DO i_=2,num_procs_z
        dsp_zy(i_) =dsp_zy(i_-1) + rcv_zy(i_-1)
     END DO
-
     !!!!! PJZ gather variables
-    ! IONS
-    !! P reduction at constant j and z is already done in module GRID
-    !! Z reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zp_i(0:num_procs_z-1),dsp_zp_i(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Np_i,1,MPI_INTEGER,rcv_zp_i,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_zp_i(0)=0
-    DO i_=1,num_procs_z-1
-       dsp_zp_i(i_) =dsp_zp_i(i_-1) + rcv_zp_i(i_-1)
-    END DO
-
-    !!!!! PJXYZ gather variables
-    !! Y reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the y reduction
-    ALLOCATE(rcv_yp_i(0:num_procs_ky-1),dsp_yp_i(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nky*Np_i,1,MPI_INTEGER,rcv_yp_i,1,MPI_INTEGER,comm_ky,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_yp_i(0)=0
-    DO i_=1,num_procs_ky-1
-       dsp_yp_i(i_) =dsp_yp_i(i_-1) + rcv_yp_i(i_-1)
-    END DO
-    !! Z reduction for full slices of py data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zyp_i(0:num_procs_z-1),dsp_zyp_i(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Np_i*Nky,1,MPI_INTEGER,rcv_zyp_i,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_zyp_i(0)=0
-    DO i_=1,num_procs_z-1
-       dsp_zyp_i(i_) =dsp_zyp_i(i_-1) + rcv_zyp_i(i_-1)
-    END DO
-
-    ! ELECTONS
-    !! Z reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zp_e(0:num_procs_z-1),dsp_zp_e(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Np_e,1,MPI_INTEGER,rcv_zp_e,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_zp_e(0)=0
-    DO i_=1,num_procs_z-1
-       dsp_zp_e(i_) =dsp_zp_e(i_-1) + rcv_zp_e(i_-1)
+    ALLOCATE(rcv_zp(num_procs_z),dsp_zp(num_procs_z)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(nz_loc*np_tot,1,MPI_INTEGER,rcv_zp,1,MPI_INTEGER,comm_z,ierr)
+    dsp_zp(1)=0
+    DO i_=2,num_procs_z
+       dsp_zp(i_) =dsp_zp(i_-1) + rcv_zp(i_-1)
     END DO
-
     !!!!! PJXYZ gather variables
     !! Y reduction for full slices of p data but constant j
-    ! number of points recieved and displacement for the y reduction
-    ALLOCATE(rcv_yp_e(0:num_procs_ky-1),dsp_yp_e(0:num_procs_ky-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nky*Np_e,1,MPI_INTEGER,rcv_yp_e,1,MPI_INTEGER,comm_ky,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_yp_e(0)=0
-    DO i_=1,num_procs_ky-1
-       dsp_yp_e(i_) =dsp_yp_e(i_-1) + rcv_yp_e(i_-1)
+    ALLOCATE(rcv_yp(num_procs_ky),dsp_yp(num_procs_ky)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(nky_loc*np_tot,1,MPI_INTEGER,rcv_yp,1,MPI_INTEGER,comm_ky,ierr)
+    dsp_yp(1)=0
+    DO i_=2,num_procs_ky
+       dsp_yp(i_) =dsp_yp(i_-1) + rcv_yp(i_-1)
     END DO
     !! Z reduction for full slices of py data but constant j
-    ! number of points recieved and displacement for the z reduction
-    ALLOCATE(rcv_zyp_e(0:num_procs_z-1),dsp_zyp_e(0:num_procs_z-1)) !Displacement sizes for balance diagnostic
-    ! all processes share their local number of points
-    CALL MPI_ALLGATHER(local_nz*Np_e*Nky,1,MPI_INTEGER,rcv_zyp_e,1,MPI_INTEGER,comm_z,ierr)
-    ! the displacement array can be build from each local_np as
-    dsp_zyp_e(0)=0
-    DO i_=1,num_procs_z-1
-       dsp_zyp_e(i_) =dsp_zyp_e(i_-1) + rcv_zyp_e(i_-1)
+    ALLOCATE(rcv_zyp(num_procs_z),dsp_zyp(num_procs_z)) !Displacement sizes for balance diagnostic
+    CALL MPI_ALLGATHER(nz_loc*np_tot*nky_tot,1,MPI_INTEGER,rcv_zyp,1,MPI_INTEGER,comm_z,ierr)
+    dsp_zyp(1)=0
+    DO i_=2,num_procs_z
+       dsp_zyp(i_) =dsp_zyp(i_-1) + rcv_zyp(i_-1)
     END DO
-
-    !! Allocate some buffers
-    ALLOCATE(buff_xy_zBC(ikys:ikye,ikxs:ikxe,-2:2))
-    IF(KIN_E) &
-    ALLOCATE(buff_pjxy_zBC_e(ipgs_e:ipge_e,ijgs_e:ijge_e,ikys:ikye,ikxs:ikxe,-2:2))
-    ALLOCATE(buff_pjxy_zBC_i(ipgs_i:ipge_i,ijgs_i:ijge_i,ikys:ikye,ikxs:ikxe,-2:2))
   END SUBROUTINE init_parallel_var
 
-  !!!!! Gather a field in spatial coordinates on rank 0 !!!!!
-  SUBROUTINE gather_xyz(field_sub,field_full)
-    COMPLEX(dp), DIMENSION(ikys:ikye, 1:Nkx, izs:ize), INTENT(IN)  :: field_sub
-    COMPLEX(dp), DIMENSION(   1:Nky,  1:Nkx,   1:Nz),  INTENT(OUT) :: field_full
-    COMPLEX(dp), DIMENSION(ikys:ikye)           :: buffer_ly_cz !local y, constant z
-    COMPLEX(dp), DIMENSION(   1:Nky )           :: buffer_fy_cz !full  y, constant z
-    COMPLEX(dp), DIMENSION(   1:Nky,  izs:ize ) :: buffer_fy_lz !full  y, local z
-    COMPLEX(dp), DIMENSION(   1:Nky,    1:Nz  ) :: buffer_fy_fz !full  y, full  z
+  SUBROUTINE parallel_ouptutinputs(fid)
+    !
+    !    Write the input parameters to the results_xx.h5 file
+    !
+    USE futils, ONLY: attach, creatd
+    IMPLICIT NONE
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/parallel'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Parallel Input')
+    CALL attach(fid, TRIM(str),       "Nproc",   num_procs)
+    CALL attach(fid, TRIM(str),       "Np_p" , num_procs_p)
+    CALL attach(fid, TRIM(str),       "Np_kx",num_procs_ky)
+    CALL attach(fid, TRIM(str),        "Np_z", num_procs_z)
+  END SUBROUTINE parallel_ouptutinputs
+
+  !!!! Gather a field in spatial coordinates on rank 0 !!!!!
+  SUBROUTINE gather_xyz(field_loc,field_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot
+    COMPLEX(xp), DIMENSION(nky_loc,nkx_tot,nz_loc), INTENT(IN)  :: field_loc
+    COMPLEX(xp), DIMENSION(nky_tot,nkx_tot,nz_tot), INTENT(OUT) :: field_tot
+    COMPLEX(xp), DIMENSION(nky_tot,nz_loc) :: buffer_yt_zl !full  y, local z
+    COMPLEX(xp), DIMENSION(nky_tot,nz_tot) :: buffer_yt_zt !full  y, full  z
+    COMPLEX(xp), DIMENSION(nky_loc):: buffer_yl_zc !local y, constant z
+    COMPLEX(xp), DIMENSION(nky_tot):: buffer_yt_zc !full  y, constant z
     INTEGER :: snd_y, snd_z, root_p, root_z, root_ky, ix, iz
 
-    snd_y  = local_nky    ! Number of points to send along y (per z)
-    snd_z  = Nky*local_nz ! Number of points to send along z (full y)
-
+    snd_y  = nky_loc    ! Number of points to send along y (per z)
+    snd_z  = nky_tot*nz_loc ! Number of points to send along z (full y)
     root_p = 0; root_z = 0; root_ky = 0
     IF(rank_p .EQ. root_p) THEN
-      DO ix = 1,Nkx
-        DO iz = izs,ize
+      DO ix = 1,nkx_tot
+        DO iz = 1,nz_loc
           ! fill a buffer to contain a slice of data at constant kx and z
-          buffer_ly_cz(ikys:ikye) = field_sub(ikys:ikye,ix,iz)
-          CALL MPI_GATHERV(buffer_ly_cz, snd_y,        MPI_DOUBLE_COMPLEX, &
-                           buffer_fy_cz, rcv_y, dsp_y, MPI_DOUBLE_COMPLEX, &
+          buffer_yl_zc(1:nky_loc) = field_loc(1:nky_loc,ix,iz)
+          CALL MPI_GATHERV(buffer_yl_zc, snd_y,        mpi_xp_c, &
+                           buffer_yt_zc, rcv_y, dsp_y, mpi_xp_c, &
                            root_ky, comm_ky, ierr)
-          buffer_fy_lz(1:Nky,iz) = buffer_fy_cz(1:Nky)
+          buffer_yt_zl(1:nky_tot,iz) = buffer_yt_zc(1:nky_tot)
         ENDDO
-
-        ! send the full line on y contained by root_kyas
-        IF(rank_ky .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fy_lz, snd_z,          MPI_DOUBLE_COMPLEX, &
-                           buffer_fy_fz, rcv_zy, dsp_zy, MPI_DOUBLE_COMPLEX, &
+        ! send the full line on y contained by root_ky
+        IF(rank_ky .EQ. root_ky) THEN
+          CALL MPI_GATHERV(buffer_yt_zl, snd_z,          mpi_xp_c, &
+                           buffer_yt_zt, rcv_zy, dsp_zy, mpi_xp_c, &
                            root_z, comm_z, ierr)
         ENDIF
         ! ID 0 (the one who output) rebuild the whole array
         IF(my_id .EQ. 0) &
-          field_full(1:Nky,ix,1:Nz) = buffer_fy_fz(1:Nky,1:Nz)
+          field_tot(1:nky_tot,ix,1:nz_tot) = buffer_yt_zt(1:nky_tot,1:nz_tot)
       ENDDO
     ENDIF
   END SUBROUTINE gather_xyz
 
   !!!!! Gather a field in kinetic + z coordinates on rank 0 !!!!!
-  SUBROUTINE gather_pjz_i(field_sub,field_full)
-    REAL(dp), DIMENSION(ips_i:ipe_i, 1:Nj_i, izs:ize), INTENT(IN)  :: field_sub
-    REAL(dp), DIMENSION(   1:Np_i,   1:Nj_i,   1:Nz),  INTENT(OUT) :: field_full
-    REAL(dp), DIMENSION(ips_i:ipe_i)          :: buffer_lp_cz !local p, constant z
-    REAL(dp), DIMENSION(   1:Np_i )           :: buffer_fp_cz !full  p, constant z
-    REAL(dp), DIMENSION(   1:Np_i,  izs:ize ) :: buffer_fp_lz !full  p, local z
-    REAL(dp), DIMENSION(   1:Np_i,    1:Nz  ) :: buffer_fp_fz !full  p, full  z
-    INTEGER :: snd_p, snd_z, root_p, root_z, root_ky, ij, iz
-
-    snd_p  = local_np_i    ! Number of points to send along p (per z)
-    snd_z  = Np_i*local_nz ! Number of points to send along z (full p)
-
-    root_p = 0; root_z = 0; root_ky = 0
-    IF(rank_ky .EQ. root_ky) THEN
-      DO ij = 1,Nj_i
-        DO iz = izs,ize
-          ! fill a buffer to contain a slice of data at constant j and z
-          buffer_lp_cz(ips_i:ipe_i) = field_sub(ips_i:ipe_i,ij,iz)
-          CALL MPI_GATHERV(buffer_lp_cz, snd_p,            MPI_DOUBLE_PRECISION, &
-                           buffer_fp_cz, rcv_p_i, dsp_p_i, MPI_DOUBLE_PRECISION, &
-                           root_p, comm_p, ierr)
-          buffer_fp_lz(1:Np_i,iz) = buffer_fp_cz(1:Np_i)
-        ENDDO
-
-        ! send the full line on y contained by root_kyas
-        IF(rank_p .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fp_lz, snd_z,              MPI_DOUBLE_PRECISION, &
-                           buffer_fp_fz, rcv_zp_i, dsp_zp_i, MPI_DOUBLE_PRECISION, &
-                           root_z, comm_z, ierr)
-        ENDIF
-        ! ID 0 (the one who output) rebuild the whole array
-        IF(my_id .EQ. 0) &
-          field_full(1:Np_i,ij,1:Nz) = buffer_fp_fz(1:Np_i,1:Nz)
-      ENDDO
-    ENDIF
-  END SUBROUTINE gather_pjz_i
-
-  SUBROUTINE gather_pjz_e(field_sub,field_full)
-    REAL(dp), DIMENSION(ips_e:ipe_e,   1:jmaxe+1, izs:ize), INTENT(IN)    :: field_sub
-    REAL(dp), DIMENSION(   1:pmaxe+1,  1:jmaxe+1,   1:Nz),  INTENT(OUT) :: field_full
-    REAL(dp), DIMENSION(ips_e:ipe_e)         :: buffer_lp_cz !local p, constant z
-    REAL(dp), DIMENSION(   1:pmaxe+1 )       :: buffer_fp_cz !full  p, constant z
-    REAL(dp), DIMENSION(   1:pmaxe+1,  izs:ize ) :: buffer_fp_lz !full  p, local z
-    REAL(dp), DIMENSION(   1:pmaxe+1,    1:Nz  ) :: buffer_fp_fz !full  p, full  z
+  SUBROUTINE gather_pjz(field_loc,field_tot,np_loc,np_tot,nj_tot,nz_loc,nz_tot)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: np_loc,np_tot, nj_tot, nz_loc,nz_tot
+    COMPLEX(xp), DIMENSION(np_loc,nj_tot,nz_loc), INTENT(IN)  :: field_loc
+    COMPLEX(xp), DIMENSION(np_tot,nj_tot,nz_tot), INTENT(OUT) :: field_tot
+    COMPLEX(xp), DIMENSION(np_tot,nz_loc) :: buffer_pt_zl !full  p, local z
+    COMPLEX(xp), DIMENSION(np_tot,nz_tot) :: buffer_pt_zt !full  p, full  z
+    COMPLEX(xp), DIMENSION(np_loc) :: buffer_pl_zc !local p, constant z
+    COMPLEX(xp), DIMENSION(np_tot) :: buffer_pt_zc !full  p, constant z
     INTEGER :: snd_p, snd_z, root_p, root_z, root_ky, ij, iz
 
-    snd_p  = local_np_e    ! Number of points to send along p (per z)
-    snd_z  = Np_e*local_nz ! Number of points to send along z (full p)
+    snd_p  = np_loc    ! Number of points to send along p (per z)
+    snd_z  = np_tot*nz_loc ! Number of points to send along z (full p)
 
     root_p = 0; root_z = 0; root_ky = 0
     IF(rank_ky .EQ. root_ky) THEN
-      DO ij = 1,Nj_i
-        DO iz = izs,ize
+      DO ij = 1,nj_tot
+        DO iz = 1,nz_loc
           ! fill a buffer to contain a slice of data at constant j and z
-          buffer_lp_cz(ips_e:ipe_e) = field_sub(ips_e:ipe_e,ij,iz)
-          CALL MPI_GATHERV(buffer_lp_cz, snd_p,            MPI_DOUBLE_PRECISION, &
-                           buffer_fp_cz, rcv_p_e, dsp_p_e, MPI_DOUBLE_PRECISION, &
+          buffer_pl_zc(1:np_loc) = field_loc(1:np_loc,ij,iz)
+          CALL MPI_GATHERV(buffer_pl_zc, snd_p,        mpi_xp_c, &
+                           buffer_pt_zc, rcv_p, dsp_p, mpi_xp_c, &
                            root_p, comm_p, ierr)
-          buffer_fp_lz(1:Np_e,iz) = buffer_fp_cz(1:Np_e)
+          buffer_pt_zl(1:np_tot,iz) = buffer_pt_zc(1:np_tot)
         ENDDO
-
-        ! send the full line on y contained by root_kyas
-        IF(rank_p .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fp_lz, snd_z,              MPI_DOUBLE_PRECISION, &
-                           buffer_fp_fz, rcv_zp_e, dsp_zp_e, MPI_DOUBLE_PRECISION, &
+        ! send the full line on y contained by root_p
+        IF(rank_p .EQ. root_p) THEN
+          CALL MPI_GATHERV(buffer_pt_zl, snd_z,          mpi_xp_c, &
+                           buffer_pt_zt, rcv_zp, dsp_zp, mpi_xp_c, &
                            root_z, comm_z, ierr)
         ENDIF
         ! ID 0 (the one who output) rebuild the whole array
         IF(my_id .EQ. 0) &
-          field_full(1:Np_e,ij,1:Nz) = buffer_fp_fz(1:Np_e,1:Nz)
+          field_tot(1:np_tot,ij,1:nz_tot) = buffer_pt_zt(1:np_tot,1:nz_tot)
       ENDDO
     ENDIF
-  END SUBROUTINE gather_pjz_e
+  END SUBROUTINE gather_pjz
 
   !!!!! Gather a field in kinetic + spatial coordinates on rank 0 !!!!!
   !!!!! Gather a field in spatial coordinates on rank 0 !!!!!
-  SUBROUTINE gather_pjxyz_i(field_sub,field_full)
-    COMPLEX(dp), DIMENSION( ips_i:ipe_i, 1:Nj_i, ikys:ikye, 1:Nkx, izs:ize), INTENT(IN)  :: field_sub
-    COMPLEX(dp), DIMENSION(     1:Np_i,  1:Nj_i,    1:Nky,  1:Nkx,   1:Nz),  INTENT(OUT) :: field_full
-    COMPLEX(dp), DIMENSION(ips_i:ipe_i)       :: buffer_lp_cy_cz     !local p, constant y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_i)            :: buffer_fp_cy_cz     ! full p, constant y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_i, ikys:ikye)    :: buffer_fp_ly_cz     ! full p,    local y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_i,    1:Nky )    :: buffer_fp_fy_cz     ! full p,     full y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_i,    1:Nky, izs:ize ) :: buffer_fp_fy_lz ! full p,     full y, local    z
-    COMPLEX(dp), DIMENSION(1:Np_i,    1:Nky,   1:Nz  ) :: buffer_fp_fy_fz ! full p,     full y, full     z
-    INTEGER :: snd_p, snd_y, snd_z, root_p, root_z, root_ky, iy, ix, iz, ij
-
-    snd_p  =     local_np_i    ! Number of points to send along p (per z,y)
-    snd_y  = Np_i*local_nky    ! Number of points to send along y (per z, full p)
-    snd_z  = Np_i*Nky*local_nz ! Number of points to send along z (full y, full p)
-
-    root_p = 0; root_z = 0; root_ky = 0
-
-    j: DO ij = 1,Nj_i
-      x: DO ix = 1,Nkx
-        z: DO iz = izs,ize
-          y: DO iy = ikys,ikye
-            ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
-            buffer_lp_cy_cz(ips_i:ipe_i) = field_sub(ips_i:ipe_i,ij,iy,ix,iz)
-            CALL MPI_GATHERV(buffer_lp_cy_cz, snd_p,            MPI_DOUBLE_COMPLEX, &
-                             buffer_fp_cy_cz, rcv_p_i, dsp_p_i, MPI_DOUBLE_COMPLEX, &
-                             root_p, comm_p, ierr)
-            buffer_fp_ly_cz(1:Np_i,iy) = buffer_fp_cy_cz(1:Np_i)
-          ENDDO y
-          ! send the full line on p contained by root_p
-          IF(rank_p .EQ. 0) THEN
-            CALL MPI_GATHERV(buffer_fp_ly_cz, snd_y,        MPI_DOUBLE_COMPLEX, &
-                             buffer_fp_fy_cz, rcv_yp_i, dsp_yp_i, MPI_DOUBLE_COMPLEX, &
-                             root_ky, comm_ky, ierr)
-            buffer_fp_fy_lz(1:Np_i,1:Nky,iz) = buffer_fp_fy_cz(1:Np_i,1:Nky)
-          ENDIF
-        ENDDO z
-        ! send the full line on y contained by root_kyas
-        IF(rank_ky .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fp_fy_lz, snd_z,                MPI_DOUBLE_COMPLEX, &
-                           buffer_fp_fy_fz, rcv_zyp_i, dsp_zyp_i, MPI_DOUBLE_COMPLEX, &
-                           root_z, comm_z, ierr)
-        ENDIF
-        ! ID 0 (the one who output) rebuild the whole array
-        IF(my_id .EQ. 0) &
-          field_full(1:Np_i,ij,1:Nky,ix,1:Nz) = buffer_fp_fy_fz(1:Np_i,1:Nky,1:Nz)
-      ENDDO x
-    ENDDO j
-
-  END SUBROUTINE gather_pjxyz_i
-
-  SUBROUTINE gather_pjxyz_e(field_sub,field_full)
-    COMPLEX(dp), DIMENSION( ips_e:ipe_e, 1:Nj_e, ikys:ikye, 1:Nkx, izs:ize), INTENT(IN)    :: field_sub
-    COMPLEX(dp), DIMENSION(     1:Np_e,  1:Nj_e,    1:Nky,  1:Nkx,   1:Nz),  INTENT(OUT) :: field_full
-    COMPLEX(dp), DIMENSION(ips_e:ipe_e)       :: buffer_lp_cy_cz     !local p, constant y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_e)            :: buffer_fp_cy_cz     ! full p, constant y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_e, ikys:ikye)    :: buffer_fp_ly_cz     ! full p,    local y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_e,    1:Nky )    :: buffer_fp_fy_cz     ! full p,     full y, constant z
-    COMPLEX(dp), DIMENSION(1:Np_e,    1:Nky, izs:ize ) :: buffer_fp_fy_lz ! full p,     full y, local    z
-    COMPLEX(dp), DIMENSION(1:Np_e,    1:Nky,   1:Nz  ) :: buffer_fp_fy_fz ! full p,     full y, full     z
-    INTEGER :: snd_p, snd_y, snd_z, root_p, root_z, root_ky, iy, ix, iz, ij
-
-    snd_p  =     local_np_e    ! Number of points to send along p (per z,y)
-    snd_y  = Np_e*local_nky    ! Number of points to send along y (per z, full p)
-    snd_z  = Np_e*Nky*local_nz ! Number of points to send along z (full y, full p)
-
+  SUBROUTINE gather_pjxyz(field_loc,field_tot,na_tot,np_loc,np_tot,nj_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: na_tot,np_loc,np_tot,nj_tot,nky_loc,nky_tot,nkx_tot,nz_loc,nz_tot
+    COMPLEX(xp), DIMENSION(np_loc,nj_tot,nky_loc,nkx_tot,nz_loc), INTENT(IN)  :: field_loc
+    COMPLEX(xp), DIMENSION(na_tot,np_tot,nj_tot,nky_tot,nkx_tot,nz_tot), INTENT(OUT) :: field_tot
+    COMPLEX(xp), DIMENSION(np_tot,nky_tot,nz_loc) :: buffer_pt_yt_zl ! full p,     full y, local    z
+    COMPLEX(xp), DIMENSION(np_tot,nky_tot,nz_tot) :: buffer_pt_yt_zt ! full p,     full y, full     z
+    COMPLEX(xp), DIMENSION(np_tot,nky_loc) :: buffer_pt_yl_zc     ! full p,    local y, constant z
+    COMPLEX(xp), DIMENSION(np_tot,nky_tot) :: buffer_pt_yt_zc     ! full p,     full y, constant z
+    COMPLEX(xp), DIMENSION(np_loc)       :: buffer_pl_cy_zc     !local p, constant y, constant z
+    COMPLEX(xp), DIMENSION(np_tot)       :: buffer_pt_cy_zc     ! full p, constant y, constant z
+    INTEGER :: snd_p, snd_y, snd_z, root_p, root_z, root_ky, iy, ix, iz, ij, ia
+    snd_p  = np_loc                ! Number of points to send along p (per z,y)
+    snd_y  = np_tot*nky_loc        ! Number of points to send along y (per z, full p)
+    snd_z  = np_tot*nky_tot*nz_loc ! Number of points to send along z (full y, full p)
     root_p = 0; root_z = 0; root_ky = 0
-
-    j: DO ij = 1,Nj_e
-      x: DO ix = 1,Nkx
-        z: DO iz = izs,ize
-          y: DO iy = ikys,ikye
-            ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
-            buffer_lp_cy_cz(ips_e:ipe_e) = field_sub(ips_e:ipe_e,ij,iy,ix,iz)
-            CALL MPI_GATHERV(buffer_lp_cy_cz, snd_p,            MPI_DOUBLE_COMPLEX, &
-                             buffer_fp_cy_cz, rcv_p_e, dsp_p_e, MPI_DOUBLE_COMPLEX, &
-                             root_p, comm_p, ierr)
-            buffer_fp_ly_cz(1:Np_e,iy) = buffer_fp_cy_cz(1:Np_e)
-          ENDDO y
-          ! send the full line on p contained by root_p
-          IF(rank_p .EQ. 0) THEN
-            CALL MPI_GATHERV(buffer_fp_ly_cz, snd_y,        MPI_DOUBLE_COMPLEX, &
-                             buffer_fp_fy_cz, rcv_yp_e, dsp_yp_e, MPI_DOUBLE_COMPLEX, &
-                             root_ky, comm_ky, ierr)
-            buffer_fp_fy_lz(1:Np_e,1:Nky,iz) = buffer_fp_fy_cz(1:Np_e,1:Nky)
+    a: DO ia= 1,na_tot
+      j: DO ij = 1,nj_tot
+        x: DO ix = 1,nkx_tot
+          z: DO iz = 1,nz_loc
+            y: DO iy = 1,nky_loc
+              ! fill a buffer to contain a slice of p data at constant j, ky, kx and z
+              buffer_pl_cy_zc(1:np_loc) = field_loc(1:np_loc,ij,iy,ix,iz)
+              CALL MPI_GATHERV(buffer_pl_cy_zc, snd_p,       mpi_xp_c, &
+                              buffer_pt_cy_zc, rcv_p, dsp_p, mpi_xp_c, &
+                              root_p, comm_p, ierr)
+              buffer_pt_yl_zc(1:np_tot,iy) = buffer_pt_cy_zc(1:np_tot)
+            ENDDO y
+            ! send the full line on p contained by root_p
+            IF(rank_p .EQ. 0) THEN
+              CALL MPI_GATHERV(buffer_pt_yl_zc, snd_y,         mpi_xp_c, &
+                              buffer_pt_yt_zc, rcv_yp, dsp_yp, mpi_xp_c, &
+                              root_ky, comm_ky, ierr)
+              buffer_pt_yt_zl(1:np_tot,1:nky_tot,iz) = buffer_pt_yt_zc(1:np_tot,1:nky_tot)
+            ENDIF
+          ENDDO z
+          ! send the full line on y contained by root_kyas
+          IF(rank_ky .EQ. 0) THEN
+            CALL MPI_GATHERV(buffer_pt_yt_zl, snd_z,           mpi_xp_c, &
+                            buffer_pt_yt_zt, rcv_zyp, dsp_zyp, mpi_xp_c, &
+                            root_z, comm_z, ierr)
           ENDIF
-        ENDDO z
-        ! send the full line on y contained by root_kyas
-        IF(rank_ky .EQ. 0) THEN
-          CALL MPI_GATHERV(buffer_fp_fy_lz, snd_z,                MPI_DOUBLE_COMPLEX, &
-                           buffer_fp_fy_fz, rcv_zyp_e, dsp_zyp_e, MPI_DOUBLE_COMPLEX, &
-                           root_z, comm_z, ierr)
-        ENDIF
-        ! ID 0 (the one who output) rebuild the whole array
-        IF(my_id .EQ. 0) &
-          field_full(1:Np_e,ij,1:Nky,ix,1:Nz) = buffer_fp_fy_fz(1:Np_e,1:Nky,1:Nz)
-      ENDDO x
-    ENDDO j
-
-  END SUBROUTINE gather_pjxyz_e
+          ! ID 0 (the one who ouptut) rebuild the whole array
+          IF(my_id .EQ. 0) &
+            field_tot(ia,1:np_tot,ij,1:nky_tot,ix,1:nz_tot) = buffer_pt_yt_zt(1:np_tot,1:nky_tot,1:nz_tot)
+        ENDDO x
+      ENDDO j
+    ENDDO a
+  END SUBROUTINE gather_pjxyz
 
   !!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!!
-  SUBROUTINE manual_3D_bcast(field_)
-    USE grid
+  SUBROUTINE manual_3D_bcast(field_,n1,n2,n3)
     IMPLICIT NONE
-    COMPLEX(dp), INTENT(INOUT) :: field_(ikys:ikye,ikxs:ikxe,izs:ize)
-    COMPLEX(dp) :: buffer(ikys:ikye,ikxs:ikxe,izs:ize)
-    INTEGER     :: i_, root, world_rank, world_size, count
+    INTEGER, INTENT(IN) :: n1,n2,n3
+    COMPLEX(xp), DIMENSION(n1,n2,n3), INTENT(INOUT) :: field_
+    COMPLEX(xp) :: buffer(n1,n2,n3)
+    INTEGER     :: i_, root, world_rank, world_size, count, i1,i2,i3
     root = 0;
-    count = (ikye-ikys+1) * (ikxe-ikxs+1) * (ize-izs+1);
+    count = n1*n2*n3;
 
     CALL MPI_COMM_RANK(comm_p,world_rank,ierr)
     CALL MPI_COMM_SIZE(comm_p,world_size,ierr)
@@ -355,26 +380,26 @@ CONTAINS
       !! Broadcast phi to the other processes on the same k range (communicator along p)
       IF (world_rank .EQ. root) THEN
         ! Fill the buffer
-        DO iz = izs,ize
-          DO ikx = ikxs,ikxe
-            DO iky = ikys,ikye
-                buffer(iky,ikx,iz) = field_(iky,ikx,iz)
+        DO i3 = 1,n3
+          DO i2 = 1,n2
+            DO i1 = 1,n1
+                buffer(i1,i2,i3) = field_(i1,i2,i3)
               ENDDO
           ENDDO
         ENDDO
         ! Send it to all the other processes
         DO i_ = 0,num_procs_p-1
           IF (i_ .NE. world_rank) &
-          CALL MPI_SEND(buffer, count, MPI_DOUBLE_COMPLEX, i_, 0, comm_p, ierr)
+          CALL MPI_SEND(buffer, count, mpi_xp_c, i_, 0, comm_p, ierr)
         ENDDO
       ELSE
         ! Recieve buffer from root
-        CALL MPI_RECV(buffer, count, MPI_DOUBLE_COMPLEX, root, 0, comm_p, MPI_STATUS_IGNORE, ierr)
+        CALL MPI_RECV(buffer, count, mpi_xp_c, root, 0, comm_p, MPI_STATUS_IGNORE, ierr)
         ! Write it in phi
-        DO iz = izs,ize
-          DO ikx = ikxs,ikxe
-            DO iky = ikys,ikye
-              field_(iky,ikx,iz) = buffer(iky,ikx,iz)
+        DO i3 = 1,n3
+          DO i2 = 1,n2
+            DO i1 = 1,n1
+              field_(i1,i2,i3) = buffer(i1,i2,i3)
             ENDDO
           ENDDO
         ENDDO
@@ -384,10 +409,9 @@ CONTAINS
 
   !!!!! This is a manual way to do MPI_BCAST !!!!!!!!!!!
   SUBROUTINE manual_0D_bcast(v)
-    USE grid
     IMPLICIT NONE
-    COMPLEX(dp), INTENT(INOUT) :: v
-    COMPLEX(dp) :: buffer
+    COMPLEX(xp), INTENT(INOUT) :: v
+    COMPLEX(xp) :: buffer
     INTEGER     :: i_, root, world_rank, world_size, count
     root = 0;
     count = 1;
@@ -402,16 +426,37 @@ CONTAINS
         ! Send it to all the other processes
         DO i_ = 0,num_procs_z-1
           IF (i_ .NE. world_rank) &
-          CALL MPI_SEND(buffer, count, MPI_DOUBLE_COMPLEX, i_, 0, comm_z, ierr)
+          CALL MPI_SEND(buffer, count, mpi_xp_c, i_, 0, comm_z, ierr)
         ENDDO
       ELSE
         ! Recieve buffer from root
-        CALL MPI_RECV(buffer, count, MPI_DOUBLE_COMPLEX, root, 0, comm_z, MPI_STATUS_IGNORE, ierr)
+        CALL MPI_RECV(buffer, count, mpi_xp_c, root, 0, comm_z, MPI_STATUS_IGNORE, ierr)
         ! Write it in phi
         v = buffer
       ENDIF
     ENDIF
   END SUBROUTINE manual_0D_bcast
 
+  ! Routine that exchange ghosts on one dimension
+  SUBROUTINE exchange_ghosts_1D(f,nbr_L,nbr_R,np,ng)
+    IMPLICIT NONE
+    INTEGER, INTENT(IN) :: np,ng, nbr_L, nbr_R
+    COMPLEX(xp), DIMENSION(np+ng), INTENT(INOUT) :: f
+    INTEGER :: ierr, first, last, ig
+    first = 1 + ng/2
+    last  = np + ng/2
+    !!!!!!!!!!! Send ghost to right neighbour !!!!!!!!!!!!!!!!!!!!!!
+    DO ig = 1,ng/2
+      CALL mpi_sendrecv(f(last-(ig-1)), 1, mpi_xp_c, nbr_R, 14+ig, &
+                           f(first-ig), 1, mpi_xp_c, nbr_L, 14+ig, &
+                        comm0, MPI_STATUS_IGNORE, ierr)
+    ENDDO
+    !!!!!!!!!!! Send ghost to left neighbour !!!!!!!!!!!!!!!!!!!!!!
+    DO ig = 1,ng/2
+    CALL mpi_sendrecv(f(first+(ig-1)), 1, mpi_xp_c, nbr_L, 16+ig, &
+                           f(last+ig), 1, mpi_xp_c, nbr_R, 16+ig, &
+                      comm0, MPI_STATUS_IGNORE, ierr)
+    ENDDO
+  END SUBROUTINE exchange_ghosts_1D
 
 END MODULE parallel
diff --git a/src/ppexit.F90 b/src/ppexit.F90
index 0e5570852cba26d87e682ba1ac8a1e7cbec78363..dc68c0b7bfade20c1005f808302431b13bea3904 100644
--- a/src/ppexit.F90
+++ b/src/ppexit.F90
@@ -6,7 +6,7 @@ SUBROUTINE ppexit
 
   use prec_const
   IMPLICIT NONE
-
+  INTEGER :: ierr
 
   CALL finalize_plans
   CALL MPI_BARRIER(MPI_COMM_WORLD, ierr)
diff --git a/src/ppinit.F90 b/src/ppinit.F90
deleted file mode 100644
index 3eb59a8f4f44eb45ed0ae9a5d6e4127f5e773aae..0000000000000000000000000000000000000000
--- a/src/ppinit.F90
+++ /dev/null
@@ -1,90 +0,0 @@
-SUBROUTINE ppinit
-  !   Parallel environment
-
-  USE basic
-  use prec_const
-  IMPLICIT NONE
-
-  ! Variables for cartesian domain decomposition
-  INTEGER, PARAMETER :: ndims=3 ! p, kx and z
-  INTEGER, DIMENSION(ndims) :: dims=0, coords=0
-  LOGICAL :: periods(ndims) = .FALSE., reorder=.FALSE.
-  CHARACTER(len=32) :: str
-  INTEGER :: nargs, i, l
-
-  CALL MPI_INIT(ierr)
-
-  CALL MPI_COMM_RANK (MPI_COMM_WORLD,     my_id, ierr)
-  CALL MPI_COMM_SIZE (MPI_COMM_WORLD, num_procs, ierr)
-
-  nargs = COMMAND_ARGUMENT_COUNT()
-  !
-  IF( nargs .GT. 1 ) THEN
-     DO i=1,ndims
-        CALL GET_COMMAND_ARGUMENT(i, str, l, ierr)
-        READ(str(1:l),'(i3)')  dims(i)
-     END DO
-     IF( PRODUCT(dims) .NE. num_procs ) THEN
-      IF(my_id .EQ. 0) WRITE(*, '(a,i4,a,i4)') 'Product of dims: ', PRODUCT(dims), " is not consistent WITH NPROCS=",num_procs
-        CALL MPI_ABORT(MPI_COMM_WORLD, -2, ierr)
-     END IF
-  ELSE
-    !  CALL MPI_DIMS_CREATE(num_procs, ndims, dims, ierr)
-    dims(1) = 1
-    dims(2) = num_procs
-    dims(3) = 1
-  END IF
-
-  num_procs_p  = dims(1) ! Number of processes along p
-  num_procs_ky = dims(2) ! Number of processes along kx
-  num_procs_z  = dims(3) ! Number of processes along z
-
-  !
-  !periodicity in p
-  periods(1)=.FALSE.
-  !periodicity in ky
-  periods(2)=.FALSE.
-  !periodicity in z
-  periods(3)=.TRUE.
-
-  CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm0, ierr)
-  CALL MPI_COMM_GROUP(comm0,group0, ierr)
-  CALL MPI_COMM_RANK(comm0, rank_0,  ierr)
-  CALL MPI_CART_COORDS(comm0,rank_0,ndims,coords,ierr)
-
-  !
-  !  Partitions 3-dim cartesian topology of comm0 into 1-dim cartesian subgrids
-  !
-  CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.FALSE./),  comm_p, ierr)
-  CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.FALSE./), comm_ky, ierr)
-  CALL MPI_CART_SUB (comm0, (/.FALSE.,.FALSE.,.TRUE./),  comm_z, ierr)
-  ! Find id inside the 1d-sub communicators
-  CALL MPI_COMM_RANK(comm_p,  rank_p,  ierr)
-  CALL MPI_COMM_RANK(comm_ky, rank_ky, ierr)
-  CALL MPI_COMM_RANK(comm_z,  rank_z,  ierr)
-
-  ! 2D communicator
-  CALL MPI_CART_SUB (comm0, (/.TRUE.,.FALSE.,.TRUE./),  comm_pz,  ierr)
-  CALL MPI_CART_SUB (comm0, (/.FALSE.,.TRUE.,.TRUE./),  comm_kyz, ierr)
-  ! Count the number of processes in 2D comms
-  CALL MPI_COMM_SIZE(comm_pz, num_procs_pz, ierr)
-  CALL MPI_COMM_SIZE(comm_kyz,num_procs_kyz,ierr)
-  ! Find id inside the 1d-sub communicators
-  CALL MPI_COMM_RANK(comm_pz,  rank_pz,   ierr)
-  CALL MPI_COMM_RANK(comm_kyz, rank_kyz,  ierr)
-
-  ! Find neighbours
-  CALL MPI_CART_SHIFT(comm0, 0, 1, nbr_L, nbr_R, ierr) !left   right neighbours
-  CALL MPI_CART_SHIFT(comm0, 1, 1, nbr_B, nbr_T, ierr) !bottom top   neighbours
-  CALL MPI_CART_SHIFT(comm0, 2, 1, nbr_D, nbr_U, ierr) !down   up    neighbours
-
-  ! Create the communicator for groups used in gatherv
-  ! CALL MPI_COMM_GROUP(comm0,group_ky0)
-  ! ALLOCATE(rank2include(0:num_procs_ky))
-  ! DO r_ = 0,rank_0
-  !   IF(rank_y .EQ. 0) &
-  !     rank2exclude
-  ! ENDDO
-  ! CALL MPI_COMM_CREATE_GROUPE(comm0, group_p0, comm_ky0)
-
-END SUBROUTINE ppinit
diff --git a/src/prec_const_mod.F90 b/src/prec_const_mod.F90
index a9ecaff66d6c7e73b70f3b63d3ea041401b7796e..ae3bfdf31103400741e3fbf4e7730562b6e0c384 100644
--- a/src/prec_const_mod.F90
+++ b/src/prec_const_mod.F90
@@ -6,44 +6,62 @@ MODULE prec_const
                                            stdin=>input_unit, &
                                            stdout=>output_unit, &
                                            stderr=>error_unit
+  use, intrinsic :: iso_c_binding
 
-  ! Precision for real and complex
-  INTEGER, PARAMETER :: sp = REAL32 !Single precision, should not be used
-  INTEGER, PARAMETER :: dp = REAL64 !real(dp), enforced through the code
-
+  ! Define single and double precision
+  INTEGER, PARAMETER :: sp = REAL32 !Single precision
+  INTEGER, PARAMETER :: dp = REAL64 !Double precision  
   INTEGER, private :: dp_r, dp_p !Range and Aprecision of doubles
   INTEGER, private :: sp_r, sp_p !Range and precision of singles
-
-
   INTEGER, private :: MPI_SP !Single precision for MPI
   INTEGER, private :: MPI_DP !Double precision in MPI
   INTEGER, private :: MPI_SUM_DP !Sum reduction operation for DP datatype
   INTEGER, private :: MPI_MAX_DP !Max reduction operation for DP datatype
   INTEGER, private :: MPI_MIN_DP !Min reduction operation for DP datatype
 
+  ! Define a generic precision parameter for the entire program
+#ifdef SINGLE_PRECISION
+    INTEGER, PARAMETER :: xp       = REAL32
+    INTEGER, PARAMETER :: c_xp_c   = C_FLOAT_COMPLEX
+    INTEGER, PARAMETER :: c_xp_r   = C_FLOAT
+    INTEGER, PARAMETER :: mpi_xp_c = MPI_COMPLEX
+#else
+    INTEGER, PARAMETER :: xp       = REAL64
+    INTEGER, PARAMETER :: c_xp_c   = C_DOUBLE_COMPLEX
+    INTEGER, PARAMETER :: c_xp_r   = C_DOUBLE
+    INTEGER, PARAMETER :: mpi_xp_c = MPI_DOUBLE_COMPLEX
+
+#endif
+  ! Auxiliary variables (unused)
+  INTEGER, private   :: xp_r, xp_p !Range and precision of single
+  INTEGER, private   :: MPI_XP     !Double precision in MPI
+  INTEGER, private   :: MPI_SUM_XP !Sum reduction operation for xp datatype
+  INTEGER, private   :: MPI_MAX_XP !Max reduction operation for xp datatype
+  INTEGER, private   :: MPI_MIN_XP !Min reduction operation for xp datatype
+
 
   ! Some useful constants, to avoid recomputing them too often
-  REAL(dp),    PARAMETER :: PI=3.141592653589793238462643383279502884197_dp
-  REAL(dp),    PARAMETER :: PIO2=1.57079632679489661923132169163975144209858_dp
-  REAL(dp),    PARAMETER :: TWOPI=6.283185307179586476925286766559005768394_dp
-  REAL(dp),    PARAMETER :: ONEOPI=0.3183098861837906912164442019275156781077_dp
-  REAL(dp),    PARAMETER :: SQRT2=1.41421356237309504880168872420969807856967_dp
-  REAL(dp),    PARAMETER :: SQRT6=SQRT(6._dp)
-  REAL(dp),    PARAMETER :: INVSQRT2=0.7071067811865475244008443621048490392848359377_dp
-  REAL(dp),    PARAMETER :: SQRT3=1.73205080756887729352744634150587236694281_dp
-  REAL(dp),    PARAMETER :: onetwelfth=0.08333333333333333333333333333333333333333333333_dp
-  REAL(dp),    PARAMETER :: onetwentyfourth=0.04166666666666666666666666666666666666666666667_dp
-  REAL(dp),    PARAMETER :: onethird=0.33333333333333333333333333333333333333333333333_dp
-  REAL(dp),    PARAMETER :: twothird=0.66666666666666666666666666666666666666666666666_dp
-  REAL(dp),    PARAMETER :: onesixth=0.1666666666666666666666666666666666666666666667_dp
-  REAL(dp),    PARAMETER :: fivesixths=0.8333333333333333333333333333333333333333333333_dp
-  REAL(dp),    PARAMETER :: sevensixths=1.1666666666666666666666666666666666666666666667_dp
-  REAL(dp),    PARAMETER :: elevensixths=1.833333333333333333333333333333333333333333333_dp
-  REAL(dp),    PARAMETER :: nineeighths=1.125_dp
-  REAL(dp),    PARAMETER :: onesixteenth=0.0625_dp
-  REAL(dp),    PARAMETER :: ninesixteenths=0.5625_dp
-  REAL(dp),    PARAMETER :: thirteentwelfths = 1.083333333333333333333333333333333333333333333_dp
-  COMPLEX(dp), PARAMETER :: imagu = (0._dp,1._dp)
+  REAL(xp),    PARAMETER :: PI=3.141592653589793238462643383279502884197_xp
+  REAL(xp),    PARAMETER :: PIO2=1.57079632679489661923132169163975144209858_xp
+  REAL(xp),    PARAMETER :: TWOPI=6.283185307179586476925286766559005768394_xp
+  REAL(xp),    PARAMETER :: ONEOPI=0.3183098861837906912164442019275156781077_xp
+  REAL(xp),    PARAMETER :: SQRT2=1.41421356237309504880168872420969807856967_xp
+  REAL(xp),    PARAMETER :: SQRT6=SQRT(6._xp)
+  REAL(xp),    PARAMETER :: INVSQRT2=0.7071067811865475244008443621048490392848359377_xp
+  REAL(xp),    PARAMETER :: SQRT3=1.73205080756887729352744634150587236694281_xp
+  REAL(xp),    PARAMETER :: onetwelfth=0.08333333333333333333333333333333333333333333333_xp
+  REAL(xp),    PARAMETER :: onetwentyfourth=0.04166666666666666666666666666666666666666666667_xp
+  REAL(xp),    PARAMETER :: onethird=0.33333333333333333333333333333333333333333333333_xp
+  REAL(xp),    PARAMETER :: twothird=0.66666666666666666666666666666666666666666666666_xp
+  REAL(xp),    PARAMETER :: onesixth=0.1666666666666666666666666666666666666666666667_xp
+  REAL(xp),    PARAMETER :: fivesixths=0.8333333333333333333333333333333333333333333333_xp
+  REAL(xp),    PARAMETER :: sevensixths=1.1666666666666666666666666666666666666666666667_xp
+  REAL(xp),    PARAMETER :: elevensixths=1.833333333333333333333333333333333333333333333_xp
+  REAL(xp),    PARAMETER :: nineeighths=1.125_xp
+  REAL(xp),    PARAMETER :: onesixteenth=0.0625_xp
+  REAL(xp),    PARAMETER :: ninesixteenths=0.5625_xp
+  REAL(xp),    PARAMETER :: thirteentwelfths = 1.083333333333333333333333333333333333333333333_xp
+  COMPLEX(xp), PARAMETER :: imagu = (0._xp,1._xp)
   CONTAINS
 
     SUBROUTINE INIT_PREC_CONST
@@ -51,21 +69,24 @@ MODULE prec_const
       IMPLICIT NONE
       integer :: ierr,me
 
-      REAL(sp) :: a = 1_sp
-      REAL(dp) :: b = 1_dp
-
+      ! REAL(sp) :: a = 1_sp
+      ! REAL(dp) :: b = 1_dp
       !Get range and precision of ISO FORTRAN sizes
-      sp_r = range(a)
-      sp_p = precision(a)
-
-      dp_r = range(b)
-      dp_p = precision(b)
+      ! sp_r = range(a)
+      ! sp_p = precision(a)
+      ! dp_r = range(b)
+      ! dp_p = precision(b)
+      
+      REAL(xp) :: c = 1_xp
+      xp_r = range(c)
+      xp_p = precision(c)
 
       CALL mpi_comm_rank(MPI_COMM_WORLD,me,ierr)
 
       !Create MPI datatypes that support the specific size
-      CALL MPI_Type_create_f90_real(sp_p,sp_r,MPI_sp,ierr)
-      CALL MPI_Type_create_f90_real(dp_p,dp_r,MPI_dp,ierr)
+      ! CALL MPI_Type_create_f90_real(sp_p,sp_r,MPI_sp,ierr)
+      ! CALL MPI_Type_create_f90_real(dp_p,dp_r,MPI_xp,ierr)
+      CALL MPI_Type_create_f90_real(xp_p,xp_r,MPI_xp,ierr)
 
     END SUBROUTINE INIT_PREC_CONST
 
diff --git a/src/processing_mod.F90 b/src/processing_mod.F90
index 1577957fac953336a3cf6b0819d35a67f92373de..be8ed50bae8cf475525279564d45318549df754f 100644
--- a/src/processing_mod.F90
+++ b/src/processing_mod.F90
@@ -1,824 +1,563 @@
 MODULE processing
-    USE basic
-    USE prec_const
-    USE grid
-    implicit none
-
-    REAL(dp), PUBLIC, PROTECTED :: pflux_ri, gflux_ri, pflux_re, gflux_re
-    REAL(dp), PUBLIC, PROTECTED :: hflux_xi, hflux_xe
-
-    PUBLIC :: compute_nadiab_moments_z_gradients_and_interp
-    PUBLIC :: compute_density, compute_upar, compute_uperp
-    PUBLIC :: compute_Tpar, compute_Tperp, compute_fluid_moments
-    PUBLIC :: compute_radial_ion_transport, compute_radial_electron_transport
-    PUBLIC :: compute_radial_ion_heatflux, compute_radial_electron_heatflux
-    PUBLIC :: compute_Napjz_spectrum
+   USE prec_const,  ONLY: xp, imagu, SQRT2, SQRT3, onetwelfth, twothird
+   USE grid,        ONLY: &
+      local_na, local_np, local_nj, local_nky, local_nkx, local_nz, Ngz,Ngj,Ngp,nzgrid, &
+      parray,pmax,ip0, iodd, ieven,&
+      CONTAINSp0,ip1,CONTAINSp1,ip2,CONTAINSp2,ip3,CONTAINSp3,&
+      jarray,jmax,ij0, dmax,&
+      kyarray, AA_y,&
+      kxarray, AA_x,&
+      zarray, zweights_SR, ieven, iodd, inv_deltaz
+   USE fields,           ONLY: moments, phi, psi
+   USE array,            ONLY : kernel, nadiab_moments, &
+      ddz_napj, ddzND_Napj, interp_napj,&
+      dens, upar, uper, Tpar, Tper, temp
+   USE geometry,         ONLY: Jacobian, iInt_Jacobian
+   USE time_integration, ONLY: updatetlevel
+   USE calculus,         ONLY: simpson_rule_z, grad_z, grad_z_5D, grad_z2, grad_z4, grad_z4_5D, interp_z
+   USE model,            ONLY: EM, beta, HDz_h
+   USE species,          ONLY: tau,q_tau,q_o_sqrt_tau_sigma,sqrt_tau_o_sigma
+   USE parallel,         ONLY: num_procs_ky, rank_ky, comm_ky
+   USE mpi
+   implicit none
+
+   REAL(xp), PUBLIC, ALLOCATABLE, DIMENSION(:), PROTECTED :: pflux_x, gflux_x
+   REAL(xp), PUBLIC, ALLOCATABLE, DIMENSION(:), PROTECTED :: hflux_x
+   INTEGER :: ierr
+   PUBLIC :: init_process
+   PUBLIC :: compute_nadiab_moments, compute_gradients_z, compute_interp_z
+   PUBLIC :: compute_density, compute_upar, compute_uperp
+   PUBLIC :: compute_Tpar, compute_Tperp, compute_fluid_moments
+   PUBLIC :: compute_radial_transport
+   PUBLIC :: compute_radial_heatflux
+   PUBLIC :: compute_Napjz_spectrum
 CONTAINS
 
-! 1D diagnostic to compute the average radial particle transport <n_i v_ExB_x>_xyz
-SUBROUTINE compute_radial_ion_transport
-  USE fields,           ONLY : moments_i, phi, psi
-  USE array,            ONLY : kernel_i
-  USE geometry,         ONLY : Jacobian, iInt_Jacobian
-  USE time_integration, ONLY : updatetlevel
-  USE calculus,         ONLY : simpson_rule_z
-  USE model,            ONLY : sqrt_tau_o_sigma_i, EM
-  IMPLICIT NONE
-  COMPLEX(dp) :: pflux_local, gflux_local, integral
-  REAL(dp)    :: ky_, buffer(1:2)
-  INTEGER     :: i_, root
-  COMPLEX(dp), DIMENSION(izgs:izge) :: integrant
-
-  pflux_local = 0._dp ! particle flux
-  gflux_local = 0._dp ! gyrocenter flux
-  integrant   = 0._dp ! auxiliary variable for z integration
-  !!---------- Gyro center flux (drift kinetic) ------------
-  ! Electrostatic part
-  IF(CONTAINS_ip0_i) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          integrant(izgs:izge) = integrant(izgs:izge) &
-           +moments_i(ip0_i,ij0_i,iky,ikx,izgs:izge,updatetlevel)&
-            *imagu*ky_*CONJG(phi(iky,ikx,izgs:izge))
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Electromagnetic part
-  IF( EM .AND. CONTAINS_ip1_i ) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          integrant(izgs:izge) = integrant(izgs:izge)&
-            -sqrt_tau_o_sigma_i*moments_i(ip1_i,ij0_i,iky,ikx,izgs:izge,updatetlevel)&
-             *imagu*ky_*CONJG(psi(iky,ikx,izgs:izge))
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  ! Get process local gyrocenter flux
-  gflux_local = integral*iInt_Jacobian
-
-  !
-  integrant   = 0._dp ! reset auxiliary variable
-  !!---------- Particle flux (gyrokinetic) ------------
-  ! Electrostatic part
-  IF(CONTAINS_ip0_i) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          DO ij = ijs_i, ije_i
-            integrant(izgs:izge) = integrant(izgs:izge)&
-              +moments_i(ip0_i,ij,iky,ikx,izgs:izge,updatetlevel)&
-              *imagu*ky_*kernel_i(ij,iky,ikx,izgs:izge,0)*CONJG(phi(iky,ikx,izgs:izge))
-          ENDDO
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Electromagnetic part
-  IF( EM .AND. CONTAINS_ip1_i ) THEN
-    DO iky = ikys,ikye
-      ky_ = kyarray(iky)
-      DO ikx = ikxs,ikxe
-        integrant   = 0._dp ! auxiliary variable for z integration
-        DO ij = ijs_i, ije_i
-          integrant(izgs:izge) = integrant(izgs:izge)&
-            -sqrt_tau_o_sigma_i*moments_i(ip1_i,ij,iky,ikx,izgs:izge,updatetlevel)&
-            *imagu*ky_*kernel_i(ij,iky,ikx,izgs:izge,0)*CONJG(psi(iky,ikx,izgs:izge))
-        ENDDO
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  ! Get process local particle flux
-  pflux_local = integral*iInt_Jacobian
-
-  !!!!---------- Sum over all processes ----------
-  buffer(1) = 2._dp*REAL(gflux_local,dp)
-  buffer(2) = 2._dp*REAL(pflux_local,dp)
-  root = 0
-  !Gather manually among the rank_p=0 processes and perform the sum
-  gflux_ri = 0
-  pflux_ri = 0
-  IF (num_procs_ky .GT. 1) THEN
-      !! Everyone sends its local_sum to root = 0
-      IF (rank_ky .NE. root) THEN
-          CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
-      ELSE
-          ! Recieve from all the other processes
-          DO i_ = 0,num_procs_ky-1
-              IF (i_ .NE. rank_ky) &
-                  CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
-                  gflux_ri = gflux_ri + buffer(1)
-                  pflux_ri = pflux_ri + buffer(2)
-          ENDDO
-      ENDIF
-  ELSE
-    gflux_ri = gflux_local
-    pflux_ri = pflux_local
-  ENDIF
-  ! if(my_id .eq. 0) write(*,*) 'pflux_ri = ',pflux_ri
-END SUBROUTINE compute_radial_ion_transport
-
-! 1D diagnostic to compute the average radial particle transport <n_e v_ExB_x>_xyz
-SUBROUTINE compute_radial_electron_transport
-  USE fields,           ONLY : moments_e, phi, psi
-  USE array,            ONLY : kernel_e
-  USE geometry,         ONLY : Jacobian, iInt_Jacobian
-  USE time_integration, ONLY : updatetlevel
-  USE calculus,         ONLY : simpson_rule_z
-  USE model,            ONLY : sqrt_tau_o_sigma_e, EM
-  IMPLICIT NONE
-  COMPLEX(dp) :: pflux_local, gflux_local, integral
-  REAL(dp)    :: ky_, buffer(1:2)
-  INTEGER     :: i_, root
-  COMPLEX(dp), DIMENSION(izgs:izge) :: integrant
-
-  pflux_local = 0._dp ! particle flux
-  gflux_local = 0._dp ! gyrocenter flux
-  integrant   = 0._dp ! auxiliary variable for z integration
-  !!---------- Gyro center flux (drift kinetic) ------------
-  ! Electrostatic part
-  IF(CONTAINS_ip0_e) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          integrant(izgs:izge) = integrant(izgs:izge) &
-           +moments_e(ip0_e,ij0_e,iky,ikx,izgs:izge,updatetlevel)&
-            *imagu*ky_*CONJG(phi(iky,ikx,izgs:izge))
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Electromagnetic part
-  IF( EM .AND. CONTAINS_ip1_e ) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          integrant(izgs:izge) = integrant(izgs:izge)&
-            -sqrt_tau_o_sigma_e*moments_e(ip1_e,ij0_e,iky,ikx,izgs:izge,updatetlevel)&
-             *imagu*ky_*CONJG(psi(iky,ikx,izgs:izge))
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  ! Get process local gyrocenter flux
-  gflux_local = integral*iInt_Jacobian
-  !
-  integrant   = 0._dp ! reset auxiliary variable
-  !!---------- Particle flux (gyrokinetic) ------------
-  ! Electrostatic part
-  IF(CONTAINS_ip0_e) THEN
-    DO iky = ikys,ikye
-        ky_ = kyarray(iky)
-        DO ikx = ikxs,ikxe
-          DO ij = ijs_e, ije_e
-            integrant(izgs:izge) = integrant(izgs:izge)&
-              +moments_e(ip0_e,ij,iky,ikx,izgs:izge,updatetlevel)&
-              *imagu*ky_*kernel_e(ij,iky,ikx,izgs:izge,0)*CONJG(phi(iky,ikx,izgs:izge))
-          ENDDO
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Electromagnetic part
-  IF( EM .AND. CONTAINS_ip1_e ) THEN
-    DO iky = ikys,ikye
-      ky_ = kyarray(iky)
-      DO ikx = ikxs,ikxe
-        integrant   = 0._dp ! auxiliary variable for z integration
-        DO ij = ijs_e, ije_e
-          integrant(izgs:izge) = integrant(izgs:izge)&
-            -sqrt_tau_o_sigma_e*moments_e(ip1_e,ij,iky,ikx,izgs:izge,updatetlevel)&
-            *imagu*ky_*kernel_e(ij,iky,ikx,izgs:izge,0)*CONJG(psi(iky,ikx,izgs:izge))
-        ENDDO
-      ENDDO
-    ENDDO
-  ENDIF
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  ! Get process local particle flux
-  pflux_local = integral*iInt_Jacobian
-
-  !!!!---------- Sum over all processes ----------
-  buffer(1) = 2._dp*REAL(gflux_local,dp)
-  buffer(2) = 2._dp*REAL(pflux_local,dp)
-  root = 0
-  !Gather manually among the rank_p=0 processes and perform the sum
-  gflux_re = 0
-  pflux_re = 0
-  IF (num_procs_ky .GT. 1) THEN
-      !! Everyone sends its local_sum to root = 0
-      IF (rank_ky .NE. root) THEN
-          CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
-      ELSE
-          ! Recieve from all the other processes
-          DO i_ = 0,num_procs_ky-1
-              IF (i_ .NE. rank_ky) &
-                  CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
-                  gflux_re = gflux_re + buffer(1)
-                  pflux_re = pflux_re + buffer(2)
-          ENDDO
-      ENDIF
-  ELSE
-    gflux_re = gflux_local
-    pflux_re = pflux_local
-  ENDIF
-END SUBROUTINE compute_radial_electron_transport
-
-! 1D diagnostic to compute the average radial particle transport <T_i v_ExB_x>_xyz
-SUBROUTINE compute_radial_ion_heatflux
-  USE fields,           ONLY : moments_i, phi, psi
-  USE array,            ONLY : kernel_i!, HF_phi_correction_operator
-  USE geometry,         ONLY : Jacobian, iInt_Jacobian
-  USE time_integration, ONLY : updatetlevel
-  USE calculus,         ONLY : simpson_rule_z
-  USE model,            ONLY : tau_i, sqrt_tau_o_sigma_i, EM
-  IMPLICIT NONE
-  COMPLEX(dp) :: hflux_local, integral
-  REAL(dp)    :: ky_, buffer(1:2), n_dp
-  INTEGER     :: i_, root, in
-  COMPLEX(dp), DIMENSION(izgs:izge) :: integrant        ! charge density q_a n_a
-
-  hflux_local = 0._dp ! heat flux
-  integrant   = 0._dp ! z integration auxiliary variable
-  !!----------------ELECTROSTATIC CONTRIBUTION---------------------------
-  IF(CONTAINS_ip0_i .AND. CONTAINS_ip2_i) THEN
-    ! Loop to compute gamma_kx = sum_ky sum_j -i k_z Kernel_j Ni00 * phi
-    DO iky = ikys,ikye
-    ky_ = kyarray(iky)
-    DO ikx = ikxs,ikxe
-      DO in = ijs_i, ije_i
-        n_dp = jarray_i(in)
-        integrant(izgs:izge) = integrant(izgs:izge) + tau_i*imagu*ky_*CONJG(phi(iky,ikx,izgs:izge))&
-         *kernel_i(in,iky,ikx,izgs:izge,0)*(&
-                       0.5_dp*SQRT2*moments_i(ip2_i,in  ,iky,ikx,izgs:izge,updatetlevel)&
-             +(2._dp*n_dp + 1.5_dp)*moments_i(ip0_i,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                      -(n_dp+1._dp)*moments_i(ip0_i,in+1,iky,ikx,izgs:izge,updatetlevel)&
-                              -n_dp*moments_i(ip0_i,in-1,iky,ikx,izgs:izge,updatetlevel))
-      ENDDO
-    ENDDO
-    ENDDO
-  ENDIF
-  IF(EM .AND. CONTAINS_ip1_i .AND. CONTAINS_ip3_i) THEN
-    !!----------------ELECTROMAGNETIC CONTRIBUTION--------------------
-    DO iky = ikys,ikye
-    ky_ = kyarray(iky)
-    DO ikx = ikxs,ikxe
-      DO in = ijs_i, ije_i
-        n_dp = jarray_i(in)
-        integrant(izgs:izge) = integrant(izgs:izge) &
-         +tau_i*sqrt_tau_o_sigma_i*imagu*ky_*CONJG(psi(iky,ikx,izgs:izge))&
-           *kernel_i(in,iky,ikx,izgs:izge,0)*(&
-                   0.5_dp*SQRT2*SQRT3*moments_i(ip3_i,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                              +1.5_dp*moments_i(ip1_i,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                  +(2._dp*n_dp+1._dp)*moments_i(ip1_i,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                        -(n_dp+1._dp)*moments_i(ip1_i,in+1,iky,ikx,izgs:izge,updatetlevel)&
-                                -n_dp*moments_i(ip1_i,in-1,iky,ikx,izgs:izge,updatetlevel))
-      ENDDO
-    ENDDO
-    ENDDO
-  ENDIF
-  ! Add polarisation contribution
-  ! integrant(izgs:izge) = integrant(izgs:izge) + tau_i*imagu*ky_&
-  ! *CONJG(phi(iky,ikx,izgs:izge))*phi(iky,ikx,izgs:izge) * HF_phi_correction_operator(iky,ikx,izgs:izge)
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  hflux_local = hflux_local + integral*iInt_Jacobian
-  ! Double it for taking into account the other half plane
-  buffer(2) = 2._dp*REAL(hflux_local,dp)
-  root = 0
-  !Gather manually among the rank_p=0 processes and perform the sum
-  hflux_xi = 0
-  IF (num_procs_ky .GT. 1) THEN
-      !! Everyone sends its local_sum to root = 0
-      IF (rank_ky .NE. root) THEN
-          CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
-      ELSE
-          ! Recieve from all the other processes
-          DO i_ = 0,num_procs_ky-1
-              IF (i_ .NE. rank_ky) &
-                  CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
-                  hflux_xi = hflux_xi + buffer(2)
-          ENDDO
-      ENDIF
-  ELSE
-    hflux_xi = hflux_local
-  ENDIF
-END SUBROUTINE compute_radial_ion_heatflux
-
-
-! 1D diagnostic to compute the average radial particle transport <T_e v_ExB_x>_xyz
-SUBROUTINE compute_radial_electron_heatflux
-  USE fields,           ONLY : moments_e, phi, psi
-  USE array,            ONLY : kernel_e!, HF_phi_correction_operator
-  USE geometry,         ONLY : Jacobian, iInt_Jacobian
-  USE time_integration, ONLY : updatetlevel
-  USE calculus,         ONLY : simpson_rule_z
-  USE model,            ONLY : tau_e, sqrt_tau_o_sigma_e, EM
-  IMPLICIT NONE
-  COMPLEX(dp) :: hflux_local, integral
-  REAL(dp)    :: ky_, buffer(1:2), n_dp
-  INTEGER     :: i_, root, in
-  COMPLEX(dp), DIMENSION(izgs:izge) :: integrant        ! charge density q_a n_a
-
-  hflux_local = 0._dp ! heat flux
-  integrant   = 0._dp ! z integration auxiliary variable
-  !!----------------ELECTROSTATIC CONTRIBUTION---------------------------
-  IF(CONTAINS_ip0_e .AND. CONTAINS_ip2_e) THEN
-    ! Loop to compute gamma_kx = sum_ky sum_j -i k_z Kernel_j Ni00 * phi
-    DO iky = ikys,ikye
-    ky_ = kyarray(iky)
-    DO ikx = ikxs,ikxe
-      DO in = ijs_e, ije_e
-        n_dp = jarray_e(in)
-        integrant(izgs:izge) = integrant(izgs:izge) + tau_e*imagu*ky_*CONJG(phi(iky,ikx,izgs:izge))&
-         *kernel_e(in,iky,ikx,izgs:izge,0)*(&
-                       0.5_dp*SQRT2*moments_e(ip2_e,in  ,iky,ikx,izgs:izge,updatetlevel)&
-             +(2._dp*n_dp + 1.5_dp)*moments_e(ip0_e,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                      -(n_dp+1._dp)*moments_e(ip0_e,in+1,iky,ikx,izgs:izge,updatetlevel)&
-                              -n_dp*moments_e(ip0_e,in-1,iky,ikx,izgs:izge,updatetlevel))
-      ENDDO
-    ENDDO
-    ENDDO
-  ENDIF
-  IF(EM .AND. CONTAINS_ip1_e .AND. CONTAINS_ip3_e) THEN
-    !!----------------ELECTROMAGNETIC CONTRIBUTION--------------------
-    DO iky = ikys,ikye
-    ky_ = kyarray(iky)
-    DO ikx = ikxs,ikxe
-      DO in = ijs_e, ije_e
-        n_dp = jarray_e(in)
-        integrant(izgs:izge) = integrant(izgs:izge) &
-         +tau_e*sqrt_tau_o_sigma_e*imagu*ky_*CONJG(psi(iky,ikx,izgs:izge))&
-           *kernel_e(in,iky,ikx,izgs:izge,0)*(&
-                   0.5_dp*SQRT2*SQRT3*moments_e(ip3_e,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                        +1.5_dp*CONJG(moments_e(ip1_e,in  ,iky,ikx,izgs:izge,updatetlevel))& !?????
-                  +(2._dp*n_dp+1._dp)*moments_e(ip1_e,in  ,iky,ikx,izgs:izge,updatetlevel)&
-                        -(n_dp+1._dp)*moments_e(ip1_e,in+1,iky,ikx,izgs:izge,updatetlevel)&
-                                -n_dp*moments_e(ip1_e,in-1,iky,ikx,izgs:izge,updatetlevel))
-      ENDDO
-    ENDDO
-    ENDDO
-  ENDIF
-  ! Add polarisation contribution
-  ! integrant(izs:ize) = integrant(izs:ize) + tau_e*imagu*ky_&
-  ! *CONJG(phi(iky,ikx,izs:ize))*phi(iky,ikx,izs:ize) * HF_phi_correction_operator(iky,ikx,izs:ize)
-  ! Integrate over z
-  integrant(izgs:izge) = Jacobian(izgs:izge,0)*integrant(izgs:izge)
-  call simpson_rule_z(integrant,integral)
-  hflux_local = hflux_local + integral*iInt_Jacobian
-  ! Double it for taking into account the other half plane
-  buffer(2) = 2._dp*REAL(hflux_local,dp)
-  root = 0
-  !Gather manually among the rank_p=0 processes and perform the sum
-  hflux_xe = 0
-  IF (num_procs_ky .GT. 1) THEN
-      !! Everyone sends its local_sum to root = 0
-      IF (rank_ky .NE. root) THEN
-          CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
-      ELSE
-          ! Recieve from all the other processes
-          DO i_ = 0,num_procs_ky-1
-              IF (i_ .NE. rank_ky) &
-                  CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
-                  hflux_xe = hflux_xe + buffer(2)
-          ENDDO
-      ENDIF
-  ELSE
-    hflux_xe = hflux_local
-  ENDIF
-END SUBROUTINE compute_radial_electron_heatflux
-
-
-
-SUBROUTINE compute_nadiab_moments_z_gradients_and_interp
-  ! evaluate the non-adiabatique ion moments
-  !
-  ! n_{pi} = N^{pj} + kernel(j) /tau_i phi delta_p0
-  !
-  USE fields,           ONLY : moments_i, moments_e, phi, psi
-  USE array,            ONLY : kernel_e, kernel_i, nadiab_moments_e, nadiab_moments_i, &
-                               ddz_nepj, ddzND_Nepj, interp_nepj,&
-                               ddz_nipj, ddzND_Nipj, interp_nipj!, ddz_phi
-  USE time_integration, ONLY : updatetlevel
-  USE model,            ONLY : qe_taue, qi_taui,q_o_sqrt_tau_sigma_e, q_o_sqrt_tau_sigma_i, &
-                               KIN_E, CLOS, beta, HDz_h
-  USE calculus,         ONLY : grad_z, grad_z2, grad_z4, interp_z
-  IMPLICIT NONE
-  INTEGER :: eo, p_int, j_int
-  CALL cpu_time(t0_process)
-
-  ! Electron non adiab moments
-
-    IF(KIN_E) THEN
-      DO ip=ipgs_e,ipge_e
-        IF(parray_e(ip) .EQ. 0) THEN
-          DO ij=ijgs_e,ijge_e
-            nadiab_moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) &
-                                      + qe_taue*kernel_e(ij,ikys:ikye,ikxs:ikxe,izgs:izge,0)*phi(ikys:ikye,ikxs:ikxe,izgs:izge)
-          ENDDO
-        ELSEIF( (parray_e(ip) .EQ. 1) .AND. (beta .GT. 0) ) THEN
-          DO ij=ijgs_e,ijge_e
-            nadiab_moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) &
-                                      - q_o_sqrt_tau_sigma_e*kernel_e(ij,ikys:ikye,ikxs:ikxe,izgs:izge,0)*psi(ikys:ikye,ikxs:ikxe,izgs:izge)
-          ENDDO
-        ELSE
-          DO ij=ijgs_e,ijge_e
-            nadiab_moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel)
-          ENDDO
-        ENDIF
-      ENDDO
-    ENDIF
-    ! Ions non adiab moments
-    DO ip=ipgs_i,ipge_i
-      IF(parray_i(ip) .EQ. 0) THEN
-        DO ij=ijgs_i,ijge_i
-          nadiab_moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) &
-                                    + qi_taui*kernel_i(ij,ikys:ikye,ikxs:ikxe,izgs:izge,0)*phi(ikys:ikye,ikxs:ikxe,izgs:izge)
-        ENDDO
-      ELSEIF( (parray_i(ip) .EQ. 1) .AND. (beta .GT. 0) ) THEN
-        DO ij=ijgs_i,ijge_i
-          nadiab_moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel) &
-                                    - q_o_sqrt_tau_sigma_i*kernel_i(ij,ikys:ikye,ikxs:ikxe,izgs:izge,0)*psi(ikys:ikye,ikxs:ikxe,izgs:izge)
-        ENDDO
+   SUBROUTINE init_process
+      USE grid,       ONLY: local_na
+      IMPLICIT NONE
+      ALLOCATE( pflux_x(local_na))
+      ALLOCATE( gflux_x(local_na))
+      ALLOCATE( hflux_x(local_na))
+   END SUBROUTINE init_process
+!------------------------------ HIGH FREQUENCY ROUTINES -------------
+! The following routines (nadiab computing, interp and grads) must be
+! as fast as possible since they are called every RK substep.
+   ! evaluate the non-adiabatique ion moments
+   !
+   ! n_{pi} = N^{pj} + kernel(j) /tau_i phi delta_p0
+   !
+   SUBROUTINE compute_nadiab_moments
+      IMPLICIT NONE
+      INTEGER :: ia,ip,ij,iky,ikx,iz
+      !non adiab moments
+      DO iz=1,local_nz+ngz
+      DO ikx=1,local_nkx
+      DO iky=1,local_nky
+      DO ij=1,local_nj+ngj
+      DO ip=1,local_np+ngp
+      DO ia = 1,local_na
+         IF(parray(ip) .EQ. 0) THEN
+            nadiab_moments(ia,ip,ij,iky,ikx,iz) = moments(ia,ip,ij,iky,ikx,iz,updatetlevel) &
+            + q_tau(ia)*kernel(ia,ij,iky,ikx,iz,ieven)*phi(iky,ikx,iz)
+            ELSEIF( (parray(ip) .EQ. 1) .AND. (beta .GT. 0) ) THEN
+            nadiab_moments(ia,ip,ij,iky,ikx,iz) = moments(ia,ip,ij,iky,ikx,iz,updatetlevel) &
+               - q_o_sqrt_tau_sigma(ia)*kernel(ia,ij,iky,ikx,iz,ieven)*psi(iky,ikx,iz)
+         ELSE
+            nadiab_moments(ia,ip,ij,iky,ikx,iz) = moments(ia,ip,ij,iky,ikx,iz,updatetlevel)
+         ENDIF
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+   END SUBROUTINE compute_nadiab_moments
+
+   ! z grid gradients
+   ! SUBROUTINE compute_gradients_z
+   !    IMPLICIT NONE
+   !    INTEGER :: eo, p_int, j_int, ia,ip,ij,iky,ikx,iz,izi
+   !    COMPLEX(xp), DIMENSION(local_nz+ngz) :: f_in
+   !    COMPLEX(xp), DIMENSION(local_nz)     :: f_out
+   !       ! Compute z first derivative
+   !       DO iz=1,local_nz+ngz
+   !          izi = iz+ngz/2
+   !       DO ikx=1,local_nkx
+   !       DO iky=1,local_nky
+   !       DO ij=1,local_nj+ngj
+   !       DO ip=1,local_np+ngp
+   !       DO ia = 1,local_na
+   !          ddz_napj(ia,ip,ij,iky,ikx,iz) = inv_deltaz *(&
+   !             +onetwelfth*nadiab_moments(ia,ip,ij,iky,ikx,izi-2)&
+   !               -twothird*nadiab_moments(ia,ip,ij,iky,ikx,izi-1)&
+   !               +twothird*nadiab_moments(ia,ip,ij,iky,ikx,izi+1)&
+   !             -onetwelfth*nadiab_moments(ia,ip,ij,iky,ikx,izi-2)&
+   !             )
+   !          ddzND_Napj(ia,ip,ij,iky,ikx,iz) = inv_deltaz**4 *(&
+   !             +1._xp*moments(ia,ip,ij,iky,ikx,izi-2,updatetlevel)&
+   !             -4._xp*moments(ia,ip,ij,iky,ikx,izi-1,updatetlevel)&
+   !             +6._xp*moments(ia,ip,ij,iky,ikx,izi  ,updatetlevel)&
+   !             -4._xp*moments(ia,ip,ij,iky,ikx,izi+1,updatetlevel)&
+   !             +1._xp*moments(ia,ip,ij,iky,ikx,izi-2,updatetlevel)&
+   !          )
+   !       ENDDO
+   !       ENDDO
+   !       ENDDO
+   !       ENDDO
+   !       ENDDO
+   !       ENDDO
+   ! END SUBROUTINE compute_gradients_z
+
+   ! ! z grid gradients
+   SUBROUTINE compute_gradients_z
+      IMPLICIT NONE
+      INTEGER :: eo, p_int, ia,ip,ij,iky,ikx,iz
+      COMPLEX(xp), DIMENSION(local_nz+ngz) :: f_in
+      COMPLEX(xp), DIMENSION(local_nz)     :: f_out
+      DO ikx = 1,local_nkx
+      DO iky = 1,local_nky
+      DO ij = 1,local_nj+ngj
+      DO ip = 1,local_np+ngp
+      DO ia = 1,local_na
+         IF(nzgrid .GT. 1) THEN
+            p_int = parray(ip+ngp/2)
+            eo    = MODULO(p_int,2)+1 ! Indicates if we are on even or odd z grid
+         ELSE
+            eo = 0
+         ENDIF
+         ! Compute z first derivative
+         f_in = nadiab_moments(ia,ip,ij,iky,ikx,:)
+         CALL   grad_z(eo,local_nz,ngz,inv_deltaz,f_in,f_out)
+         ddz_napj(ia,ip,ij,iky,ikx,:) = f_out(:)
+         ! Parallel numerical diffusion
+         IF (HDz_h) THEN
+            f_in = nadiab_moments(ia,ip,ij,iky,ikx,:)
+         ELSE
+            f_in = moments(ia,ip,ij,iky,ikx,:,updatetlevel)
+         ENDIF
+         CALL  grad_z4(local_nz,ngz,inv_deltaz,f_in,f_out)
+         ! get output
+         DO iz = 1,local_nz
+            ddzND_Napj(ia,ip,ij,iky,ikx,iz) = f_out(iz)
+         ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDDO
+   END SUBROUTINE compute_gradients_z
+
+      ! z grid interpolation
+   SUBROUTINE compute_interp_z
+      IMPLICIT NONE
+      INTEGER :: eo, ia,ip,ij,iky,ikx,iz
+      COMPLEX(xp), DIMENSION(local_nz+ngz) :: f_in
+      COMPLEX(xp), DIMENSION(local_nz)     :: f_out
+      IF(nzgrid .GT. 1) THEN
+         DO ikx = 1,local_nkx
+         DO iky = 1,local_nky
+         DO ij = 1,local_nj+ngj
+         DO ip = 1,local_np+ngp
+         DO ia = 1,local_na
+            ! Compute even odd grids interpolation
+            f_in = nadiab_moments(ia,ip,ij,iky,ikx,:)
+            CALL interp_z(eo,local_nz,ngz,f_in,f_out)
+            DO iz = 1,local_nz
+               interp_napj(ia,ip,ij,iky,ikx,iz) = f_out(iz)
+            ENDDO
+         ENDDO
+         ENDDO
+         ENDDO
+         ENDDO
+         ENDDO
       ELSE
-        DO ij=ijgs_i,ijge_i
-          nadiab_moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge) = moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel)
-        ENDDO
+         interp_napj(:,:,:,:,:,1:local_nz) = nadiab_moments(:,:,:,:,:,(1+ngz/2):(local_nz+ngz/2))
       ENDIF
-    ENDDO
-
-    !! Ensure to kill the moments too high if closue option is set to 1
-    IF(CLOS .EQ. 1) THEN
-      IF(KIN_E) THEN
-        DO ip=ipgs_e,ipge_e
-          p_int = parray_e(ip)
-            DO ij=ijgs_e,ijge_e
-              j_int = jarray_e(ij)
-              IF(p_int+2*j_int .GT. dmaxe) &
-              nadiab_moments_e(ip,ij,:,:,:) = 0._dp
+   END SUBROUTINE compute_interp_z
+
+   !--------------------- LOW FREQUENCY PROCESSING ROUTINES -------------------!
+   ! The following routines are called by the diagnose routine every nsave3D steps 
+   ! (does not need to be optimized)
+   ! 1D diagnostic to compute the average radial particle transport <n_a v_ExB_x>_xyz
+   SUBROUTINE compute_radial_transport
+      IMPLICIT NONE
+      COMPLEX(xp) :: pflux_local, gflux_local, integral
+      REAL(xp)    :: buffer(2)
+      INTEGER     :: i_, root, iky, ikx, ia, iz, in, izi, ini
+      COMPLEX(xp), DIMENSION(local_nz) :: integrant
+      DO ia = 1,local_na
+         pflux_local = 0._xp ! particle flux
+         gflux_local = 0._xp ! gyrocenter flux
+         integrant   = 0._xp ! auxiliary variable for z integration
+         !!---------- Gyro center flux (drift kinetic) ------------
+         ! Electrostatic part
+         IF(CONTAINSp0) THEN
+            DO iz = 1,local_nz
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+               integrant(iz) = integrant(iz) &
+               +Jacobian(izi,ieven)*moments(ia,ip0,ij0,iky,ikx,izi,updatetlevel)&
+               *imagu*kyarray(iky)*CONJG(phi(iky,ikx,izi))
             ENDDO
-        ENDDO
-      ENDIF
-      DO ip=ipgs_i,ipge_i
-        p_int = parray_i(ip)
-          DO ij=ijgs_i,ijge_i
-            j_int = jarray_i(ij)
-            IF(p_int+2*j_int .GT. dmaxi) &
-            nadiab_moments_i(ip,ij,:,:,:) = 0._dp
-          ENDDO
-      ENDDO
-    ENDIF
-
- !------------- INTERP AND GRADIENTS ALONG Z ----------------------------------
-
-  IF (KIN_E) THEN
-  DO ikx = ikxs,ikxe
-    DO iky = ikys,ikye
-      DO ij = ijgs_e,ijge_e
-        DO ip = ipgs_e,ipge_e
-          p_int = parray_e(ip)
-          eo    = MODULO(p_int,2) ! Indicates if we are on even or odd z grid
-          ! Compute z derivatives
-          CALL   grad_z(eo,nadiab_moments_e(ip,ij,iky,ikx,izgs:izge),    ddz_nepj(ip,ij,iky,ikx,izs:ize))
-          ! Parallel hyperdiffusion
-          IF (HDz_h) THEN
-            CALL  grad_z4(nadiab_moments_e(ip,ij,iky,ikx,izgs:izge),ddzND_Nepj(ip,ij,iky,ikx,izs:ize))
-          ELSE
-            CALL  grad_z4(moments_e(ip,ij,iky,ikx,izgs:izge,updatetlevel),ddzND_Nepj(ip,ij,iky,ikx,izs:ize))
-          ENDIF
-        ! Compute even odd grids interpolation
-          CALL interp_z(eo,nadiab_moments_e(ip,ij,iky,ikx,izgs:izge), interp_nepj(ip,ij,iky,ikx,izs:ize))
-        ENDDO
-      ENDDO
-    ENDDO
-  ENDDO
-  ENDIF
-
-  DO ikx = ikxs,ikxe
-    DO iky = ikys,ikye
-      DO ij = ijgs_i,ijge_i
-        DO ip = ipgs_i,ipge_i
-          p_int = parray_i(ip)
-          eo    = MODULO(p_int,2) ! Indicates if we are on even or odd z grid
-          ! Compute z first derivative
-          CALL   grad_z(eo,nadiab_moments_i(ip,ij,iky,ikx,izgs:izge),    ddz_nipj(ip,ij,iky,ikx,izs:ize))
-          ! Parallel numerical diffusion
-          IF (HDz_h) THEN
-            CALL  grad_z4(nadiab_moments_i(ip,ij,iky,ikx,izgs:izge),ddzND_Nipj(ip,ij,iky,ikx,izs:ize))
-          ELSE
-            CALL  grad_z4(moments_i(ip,ij,iky,ikx,izgs:izge,updatetlevel),ddzND_Nipj(ip,ij,iky,ikx,izs:ize))
-          ENDIF
-          ! Compute even odd grids interpolation
-          CALL interp_z(eo,nadiab_moments_i(ip,ij,iky,ikx,izgs:izge), interp_nipj(ip,ij,iky,ikx,izs:ize))
-        ENDDO
-      ENDDO
-    ENDDO
-  ENDDO
-
-  ! Phi parallel gradient (not implemented fully, should be negligible)
-  ! DO ikx = ikxs,ikxe
-  !   DO iky = ikys,ikye
-  !     CALL grad_z(0,phi(iky,ikx,izgs:izge), ddz_phi(iky,ikx,izs:ize))
-  !   ENDDO
-  ! ENDDO
-
-  ! Execution time end
-  CALL cpu_time(t1_process)
-  tc_process = tc_process + (t1_process - t0_process)
-END SUBROUTINE compute_nadiab_moments_z_gradients_and_interp
-
-SUBROUTINE compute_Napjz_spectrum
-  USE fields, ONLY : moments_e, moments_i
-  USE model,  ONLY : KIN_E
-  USE array,  ONLY : Nipjz, Nepjz
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  REAL(dp), DIMENSION(ips_e:ipe_e,ijs_e:ije_e,izs:ize) :: local_sum_e,global_sum_e, buffer_e
-  REAL(dp), DIMENSION(ips_i:ipe_i,ijs_i:ije_i,izs:ize) :: local_sum_i,global_sum_i, buffer_i
-  INTEGER  :: i_, root, count
-  root = 0
-  ! Electron moments spectrum
-  IF (KIN_E) THEN
-    ! build local sum
-    local_sum_e = 0._dp
-    DO ikx = ikxs,ikxe
-      DO iky = ikys,ikye
-        local_sum_e(ips_e:ipe_e,ijs_e:ije_e,izs:ize)  = local_sum_e(ips_e:ipe_e,ijs_e:ije_e,izs:ize)  + &
-        REAL(moments_e(ips_e:ipe_e,ijs_e:ije_e,iky,ikx,izs:ize,updatetlevel)&
-         * CONJG(moments_e(ips_e:ipe_e,ijs_e:ije_e,iky,ikx,izs:ize,updatetlevel)),dp)
-      ENDDO
-    ENDDO
-    ! sum reduction
-    buffer_e     = local_sum_e
-    global_sum_e = 0._dp
-    count = (ipe_e-ips_e+1)*(ije_e-ijs_e+1)*(ize-izs+1)
-    IF (num_procs_ky .GT. 1) THEN
-        !! Everyone sends its local_sum to root = 0
-        IF (rank_ky .NE. root) THEN
-            CALL MPI_SEND(buffer_e, count , MPI_DOUBLE_COMPLEX, root, 1234, comm_ky, ierr)
-        ELSE
-            ! Recieve from all the other processes
-            DO i_ = 0,num_procs_ky-1
-                IF (i_ .NE. rank_ky) &
-                    CALL MPI_RECV(buffer_e, count , MPI_DOUBLE_COMPLEX, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
-                    global_sum_e = global_sum_e + buffer_e
             ENDDO
-        ENDIF
-    ELSE
-      global_sum_e = local_sum_e
-    ENDIF
-    Nepjz = global_sum_e
-  ENDIF
-  ! Ion moment spectrum
-  ! build local sum
-  local_sum_i = 0._dp
-  DO ikx = ikxs,ikxe
-    DO iky = ikys,ikye
-      local_sum_i(ips_i:ipe_i,ijs_i:ije_i,izs:ize)  = local_sum_i(ips_i:ipe_i,ijs_i:ije_i,izs:ize)  + &
-      (moments_i(ips_i:ipe_i,ijs_i:ije_i,iky,ikx,izs:ize,updatetlevel) &
-      * CONJG(moments_i(ips_i:ipe_i,ijs_i:ije_i,iky,ikx,izs:ize,updatetlevel)))
-    ENDDO
-  ENDDO
-  ! sum reduction
-  buffer_i     = local_sum_i
-  global_sum_i = 0._dp
-  count = (ipe_i-ips_i+1)*(ije_i-ijs_i+1)*(ize-izs+1)
-  IF (num_procs_ky .GT. 1) THEN
-      !! Everyone sends its local_sum to root = 0
-      IF (rank_ky .NE. root) THEN
-          CALL MPI_SEND(buffer_i, count , MPI_DOUBLE_PRECISION, root, 5678, comm_ky, ierr)
-      ELSE
-          ! Recieve from all the other processes
-          DO i_ = 0,num_procs_ky-1
-              IF (i_ .NE. rank_ky) &
-                  CALL MPI_RECV(buffer_i, count , MPI_DOUBLE_PRECISION, i_, 5678, comm_ky, MPI_STATUS_IGNORE, ierr)
-                  global_sum_i = global_sum_i + buffer_i
-          ENDDO
-      ENDIF
-  ELSE
-    global_sum_i = local_sum_i
-  ENDIF
-  Nipjz = global_sum_i
-END SUBROUTINE compute_Napjz_spectrum
+            ENDDO
+         ENDIF
+         ! Electromagnetic part
+         IF( EM .AND. CONTAINSp1 ) THEN
+            DO iz = 1,local_nz ! we take interior points only
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+               integrant(iz) = integrant(iz)&
+                  -Jacobian(izi,iodd)*sqrt_tau_o_sigma(ia)*moments(ia,ip1,ij0,iky,ikx,izi,updatetlevel)&
+                  *imagu*kyarray(iky)*CONJG(psi(iky,ikx,izi))
+            ENDDO
+            ENDDO
+            ENDDO
+         ENDIF
+         ! Integrate over z
+         call simpson_rule_z(local_nz,zweights_SR,integrant,integral)
+         ! Get process local gyrocenter flux with a factor two to account for the negative ky modes
+         gflux_local = 2._xp*integral*iInt_Jacobian
+         !
+         !!---------- Particle flux (gyrokinetic) ------------
+         integrant   = 0._xp ! reset auxiliary variable
+         ! Electrostatic part
+         IF(CONTAINSp0) THEN
+            DO iz = 1,local_nz ! we take interior points only
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+            DO in = 1, local_nj
+            ini = in + ngj/2 !interior index for ghosted arrays
+               integrant(iz) = integrant(iz)+ &
+                  Jacobian(izi,ieven)*moments(ia,ip0,ini,iky,ikx,izi,updatetlevel)&
+                  *imagu*kyarray(iky)*kernel(ia,ini,iky,ikx,izi,ieven)*CONJG(phi(iky,ikx,izi))
+            ENDDO
+            ENDDO
+            ENDDO
+            ENDDO
+         ENDIF
+         ! Electromagnetic part
+         IF( EM .AND. CONTAINSp1 ) THEN
+            DO iz = 1,local_nz ! we take interior points only
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+            DO in = 1, local_nj
+            ini = in + ngj/2 !interior index for ghosted arrays
+                  integrant(iz) = integrant(iz) - &
+                  Jacobian(izi,iodd)*sqrt_tau_o_sigma(ia)*moments(ia,ip1,ini,iky,ikx,izi,updatetlevel)&
+                  *imagu*kyarray(iky)*kernel(ia,ini,iky,ikx,izi,iodd)*CONJG(psi(iky,ikx,izi))
+            ENDDO
+            ENDDO
+            ENDDO
+            ENDDO
+         ENDIF
+         ! Integrate over z
+         call simpson_rule_z(local_nz,zweights_SR,integrant,integral)
+         ! Get process local particle flux with a factor two to account for the negative ky modes
+         pflux_local = 2._xp*integral*iInt_Jacobian
+         !!!!---------- Sum over all processes ----------
+         buffer(1) = REAL(gflux_local,xp)
+         buffer(2) = REAL(pflux_local,xp)
+         root = 0
+         !Gather manually among the rank_p=0 processes and perform the sum
+         gflux_x(ia) = 0
+         pflux_x(ia) = 0
+         IF (num_procs_ky .GT. 1) THEN
+            !! Everyone sends its local_sum to root = 0
+            IF (rank_ky .NE. root) THEN
+               CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
+            ELSE
+               ! Recieve from all the other processes
+               DO i_ = 0,num_procs_ky-1
+                  IF (i_ .NE. rank_ky) &
+                     CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
+                  gflux_x(ia) = gflux_x(ia) + buffer(1)
+                  pflux_x(ia) = pflux_x(ia) + buffer(2)
+               ENDDO
+            ENDIF
+         ELSE
+            gflux_x(ia) = gflux_local
+            pflux_x(ia) = pflux_local
+         ENDIF
+      ENDDO
+   END SUBROUTINE compute_radial_transport
+
+! 1D diagnostic to compute the average radial particle heatflux <T_i v_ExB_x>_xyz
+   SUBROUTINE compute_radial_heatflux
+      IMPLICIT NONE
+      COMPLEX(xp) :: hflux_local, integral
+      REAL(xp)    :: buffer(2), n_xp
+      INTEGER     :: i_, root, in, ia, iky, ikx, iz, izi, ini
+      COMPLEX(xp), DIMENSION(local_nz) :: integrant        ! charge density q_a n_a
+      DO ia = 1,local_na
+         hflux_local = 0._xp ! heat flux
+         integrant   = 0._xp ! z integration auxiliary variable
+         !!----------------ELECTROSTATIC CONTRIBUTION---------------------------
+         IF(CONTAINSp0 .AND. CONTAINSp2) THEN
+            ! Loop to compute gamma_kx = sum_ky sum_j -i k_z Kernel_j Na00 * phi
+            DO iz = 1,local_nz ! we take interior points only
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+            DO in = 1, local_nj
+            ini  = in+ngj/2 !interior index for ghosted arrays
+            n_xp = jarray(ini)
+               integrant(iz) = integrant(iz) &
+                  -Jacobian(izi,ieven)*tau(ia)*imagu*kyarray(iky)*phi(iky,ikx,izi)&
+                  *kernel(ia,ini,iky,ikx,izi,ieven)*CONJG(&
+                              0.5_xp*SQRT2*moments(ia,ip2,ini  ,iky,ikx,izi,updatetlevel)&
+                  +(2._xp*n_xp + 1.5_xp)*moments(ia,ip0,ini  ,iky,ikx,izi,updatetlevel)&
+                           -(n_xp+1._xp)*moments(ia,ip0,ini+1,iky,ikx,izi,updatetlevel)&
+                                    -n_xp*moments(ia,ip0,ini-1,iky,ikx,izi,updatetlevel))
+            ENDDO
+            ENDDO
+            ENDDO
+            ENDDO
+         ELSEIF(CONTAINSp0) THEN
+            ERROR STOP "Degrees p=0 and p=2 should be owned by the same process"
+         ENDIF
+         IF(EM .AND. CONTAINSp1 .AND. CONTAINSp3) THEN
+            !!----------------ELECTROMAGNETIC CONTRIBUTION--------------------
+            DO iz  = 1,local_nz
+            izi = iz + ngz/2 !interior index for ghosted arrays
+            DO ikx = 1,local_nkx
+            DO iky = 1,local_nky
+            DO in = 1, local_nj
+            ini = in + ngj/2 !interior index for ghosted arrays
+            n_xp = jarray(ini)
+               integrant(iz) = integrant(iz) &
+                     +Jacobian(izi,iodd)*tau(ia)*sqrt_tau_o_sigma(ia)*imagu*kyarray(iky)*CONJG(psi(iky,ikx,izi))&
+                  *kernel(ia,ini,iky,ikx,izi,iodd)*(&
+                  0.5_xp*SQRT2*SQRT3*moments(ia,ip3,ini  ,iky,ikx,izi,updatetlevel)&
+                              +1.5_xp*moments(ia,ip1,ini  ,iky,ikx,izi,updatetlevel)&
+                  +(2._xp*n_xp+1._xp)*moments(ia,ip1,ini  ,iky,ikx,izi,updatetlevel)&
+                        -(n_xp+1._xp)*moments(ia,ip1,ini+1,iky,ikx,izi,updatetlevel)&
+                                 -n_xp*moments(ia,ip1,ini-1,iky,ikx,izi,updatetlevel))
+            ENDDO
+            ENDDO
+            ENDDO
+            ENDDO
+         ENDIF
+         ! Add polarisation contribution
+         ! integrant(iz) = integrant(iz) + tau_i*imagu*ky_&
+         ! *CONJG(phi(iky,ikx,iz))*phi(iky,ikx,iz) * HF_phi_correction_operator(iky,ikx,iz)
+         ! Integrate over z
+         call simpson_rule_z(local_nz,zweights_SR,integrant,integral)
+         ! Double it for taking into account the other half plane
+         hflux_local = 2._xp*integral*iInt_Jacobian
+         buffer(2)   = REAL(hflux_local,xp)
+         root = 0
+         !Gather manually among the rank_p=0 processes and perform the sum
+         hflux_x(ia) = 0
+         IF (num_procs_ky .GT. 1) THEN
+            !! Everyone sends its local_sum to root = 0
+            IF (rank_ky .NE. root) THEN
+               CALL MPI_SEND(buffer, 2 , MPI_DOUBLE_PRECISION, root, 1234, comm_ky, ierr)
+            ELSE
+               ! Recieve from all the other processes
+               DO i_ = 0,num_procs_ky-1
+                  IF (i_ .NE. rank_ky) &
+                     CALL MPI_RECV(buffer, 2 , MPI_DOUBLE_PRECISION, i_, 1234, comm_ky, MPI_STATUS_IGNORE, ierr)
+                  hflux_x(ia) = hflux_x(ia) + buffer(2)
+               ENDDO
+            ENDIF
+         ELSE
+            hflux_x(ia) = hflux_local
+         ENDIF
+      ENDDO
+   END SUBROUTINE compute_radial_heatflux
+
+   SUBROUTINE compute_Napjz_spectrum
+      USE fields, ONLY : moments
+      USE array,  ONLY : Napjz
+      USE time_integration, ONLY : updatetlevel
+      IMPLICIT NONE
+      REAL(xp), DIMENSION(local_np,local_nj,local_nz) :: local_sum,global_sum, buffer
+      INTEGER  :: i_, root, count, ia, ip, ij, iky, ikx, iz
+      root = 0
+      DO ia=1,local_na
+         ! z-moment spectrum
+         ! build local sum
+         local_sum = 0._xp
+         DO iz = 1,local_nz
+         DO ikx = 1,local_nkx
+         DO iky = 1,local_nky
+         DO ij = 1,local_nj
+         DO ip = 1,local_np
+            local_sum(ip,ij,iz)  = local_sum(ip,ij,iz)  + &
+               (moments(ia,ip+Ngp/2,ij+Ngj/2,iky,ikx,iz+Ngz/2,updatetlevel) &
+               * CONJG(moments(ia,ip+Ngp/2,ij+Ngj/2,iky,ikx,iz+Ngz/2,updatetlevel)))
+         ENDDO
+         ENDDO
+         ENDDO
+         ENDDO
+         ENDDO
+         ! sum reduction
+         buffer     = local_sum
+         global_sum = 0._xp
+         count = local_np*local_nj*local_nz
+         IF (num_procs_ky .GT. 1) THEN
+            !! Everyone sends its local_sum to root = 0
+            IF (rank_ky .NE. root) THEN
+               CALL MPI_SEND(buffer, count , MPI_DOUBLE_PRECISION, root, 5678, comm_ky, ierr)
+            ELSE
+               ! Recieve from all the other processes
+               DO i_ = 0,num_procs_ky-1
+                  IF (i_ .NE. rank_ky) &
+                     CALL MPI_RECV(buffer, count , MPI_DOUBLE_PRECISION, i_, 5678, comm_ky, MPI_STATUS_IGNORE, ierr)
+                  global_sum = global_sum + buffer
+               ENDDO
+            ENDIF
+         ELSE
+            global_sum = local_sum
+         ENDIF
+         Napjz(ia,:,:,:) = global_sum
+      ENDDO
+   END SUBROUTINE compute_Napjz_spectrum
 
 !_____________________________________________________________________________!
 !!!!! FLUID MOMENTS COMPUTATIONS !!!!!
 ! Compute the 2D particle density for electron and ions (sum over Laguerre)
-SUBROUTINE compute_density
-  USE array,  ONLY : dens_e, dens_i, kernel_e, kernel_i
-  USE model,  ONLY : KIN_E
-  USE fields, ONLY : moments_e, moments_i
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  COMPLEX(dp) :: dens
-
-  IF ( CONTAINS_ip0_e .AND. CONTAINS_ip0_i ) THEN
-      ! Loop to compute dens_i = sum_j kernel_j Ni0j at each k
-      DO iz = izs,ize
-        DO iky = ikys,ikye
-          DO ikx = ikxs,ikxe
-
-            IF(KIN_E) THEN
-            ! electron density
-            dens = 0._dp
-            DO ij = ijs_e, ije_e
-                dens = dens + kernel_e(ij,iky,ikx,iz,0) * moments_e(ip0_e,ij,iky,ikx,iz,updatetlevel)
-            ENDDO
-            dens_e(iky,ikx,iz) = dens
-            ENDIF
-            ! ion density
-            dens = 0._dp
-            DO ij = ijs_i, ije_i
-                dens = dens + kernel_i(ij,iky,ikx,iz,0) * moments_i(ip0_e,ij,iky,ikx,iz,updatetlevel)
-            ENDDO
-            dens_i(iky,ikx,iz) = dens
-          ENDDO
-        ENDDO
+   SUBROUTINE compute_density
+   IMPLICIT NONE
+   COMPLEX(xp) :: dens_
+   INTEGER :: ia, iz, iky, ikx, ij
+   DO ia=1,local_na
+   IF ( CONTAINSp0 ) THEN
+   ! Loop to compute dens_i = sum_j kernel_j Ni0j at each k
+   DO iz = 1,local_nz
+   DO iky = 1,local_nky
+   DO ikx = 1,local_nkx
+      dens_ = 0._xp
+      DO ij = 1, local_nj
+         dens_ = dens_ + kernel(ia,ij+ngj/2,iky,ikx,iz+ngz/2,ieven) * moments(ia,ip0,ij+ngj/2,iky,ikx,iz+ngz/2,updatetlevel)
       ENDDO
-  ENDIF
-END SUBROUTINE compute_density
+      dens(ia,iky,ikx,iz) = dens_
+   ENDDO
+   ENDDO
+   ENDDO
+   ENDIF
+   ENDDO
+   END SUBROUTINE compute_density
 
 ! Compute the 2D particle fluid perp velocity for electron and ions (sum over Laguerre)
-SUBROUTINE compute_uperp
-  USE array,  ONLY : uper_e, uper_i, kernel_e, kernel_i
-  USE model,  ONLY : KIN_E
-  USE fields, ONLY : moments_e, moments_i
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  COMPLEX(dp) :: uperp
-
-  IF ( CONTAINS_ip0_e .AND. CONTAINS_ip0_i ) THEN
-      DO iz = izs,ize
-        DO iky = ikys,ikye
-          DO ikx = ikxs,ikxe
-
-            IF(KIN_E) THEN
-            ! electron
-            uperp = 0._dp
-            DO ij = ijs_e, ije_e
-                uperp = uperp + kernel_e(ij,iky,ikx,iz,0) *&
-                 0.5_dp*(moments_e(ip0_e,ij,iky,ikx,iz,updatetlevel) - moments_e(ip0_e,ij-1,iky,ikx,iz,updatetlevel))
-            ENDDO
-            uper_e(iky,ikx,iz) = uperp
-            ENDIF
-            ! ion
-            uperp = 0._dp
-            DO ij = ijs_i, ije_i
-              uperp = uperp + kernel_i(ij,iky,ikx,iz,0) *&
-               0.5_dp*(moments_i(ip0_i,ij,iky,ikx,iz,updatetlevel) - moments_i(ip0_i,ij-1,iky,ikx,iz,updatetlevel))
-             ENDDO
-            uper_i(iky,ikx,iz) = uperp
-          ENDDO
-        ENDDO
-      ENDDO
-  ENDIF
-END SUBROUTINE compute_uperp
+   SUBROUTINE compute_uperp
+      IMPLICIT NONE
+      COMPLEX(xp) :: uperp_
+      INTEGER :: ia, iz, iky, ikx, ij
+      DO ia=1,local_na
+      IF ( CONTAINSp0 ) THEN
+      DO iz = 1,local_nz
+      DO iky = 1,local_nky
+      DO ikx = 1,local_nkx
+      uperp_ = 0._xp
+      DO ij = 1, local_nj
+         uperp_ = uperp_ + kernel(ia,ij+ngj/2,iky,ikx,iz+ngz/2,ieven) *&
+            0.5_xp*(moments(ia,ip0,ij+ngj/2,iky,ikx,iz+ngz/2,updatetlevel)&
+                     -moments(ia,ip0,ij-1+ngj/2,iky,ikx,iz+ngz/2,updatetlevel))
+      ENDDO
+      uper(ia,iky,ikx,iz) = uperp_
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDIF
+      ENDDO
+   END SUBROUTINE compute_uperp
 
 ! Compute the 2D particle fluid par velocity for electron and ions (sum over Laguerre)
-SUBROUTINE compute_upar
-  USE array,  ONLY : upar_e, upar_i, kernel_e, kernel_i
-  USE model,  ONLY : KIN_E
-  USE fields, ONLY : moments_e, moments_i
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  COMPLEX(dp) :: upar
-
-  IF ( CONTAINS_ip1_e .AND. CONTAINS_ip1_i ) THEN
-    DO iz = izs,ize
-      DO iky = ikys,ikye
-        DO ikx = ikxs,ikxe
-          IF(KIN_E) THEN
-          ! electron
-          upar = 0._dp
-          DO ij = ijs_e, ije_e
-            upar = upar + kernel_e(ij,iky,ikx,iz,1)*moments_e(ip1_e,ij,iky,ikx,iz,updatetlevel)
-          ENDDO
-          upar_e(iky,ikx,iz) = upar
-          ENDIF
-          ! ion
-          upar = 0._dp
-          DO ij = ijs_i, ije_i
-            upar = upar + kernel_i(ij,iky,ikx,iz,1)*moments_i(ip1_i,ij,iky,ikx,iz,updatetlevel)
-           ENDDO
-          upar_i(iky,ikx,iz) = upar
-        ENDDO
-      ENDDO
-    ENDDO
-  ELSE
-    IF(KIN_E)&
-    upar_e = 0
-    upar_i = 0
-  ENDIF
-END SUBROUTINE compute_upar
+   SUBROUTINE compute_upar
+      IMPLICIT NONE
+      INTEGER :: ia, iz, iky, ikx, ij
+      COMPLEX(xp) :: upar_
+      DO ia=1,local_na
+      IF ( CONTAINSp1 ) THEN
+      DO iz = 1,local_nz
+      DO iky = 1,local_nky
+      DO ikx = 1,local_nkx
+         upar_ = 0._xp
+         DO ij = 1, local_nj
+            upar_ = upar_ + kernel(ia,ij+ngj/2,iky,ikx,iz+ngz/2,iodd)*moments(ia,ip1,ij+ngj/2,iky,ikx,iz+ngz/2,updatetlevel)
+         ENDDO
+         upar(ia,iky,ikx,iz) = upar_
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDIF
+      ENDDO
+   END SUBROUTINE compute_upar
 
 ! Compute the 2D particle temperature for electron and ions (sum over Laguerre)
-SUBROUTINE compute_tperp
-  USE array,  ONLY : Tper_e, Tper_i, kernel_e, kernel_i
-  USE model,  ONLY : KIN_E
-  USE fields, ONLY : moments_e, moments_i
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  REAL(dp)    :: j_dp
-  COMPLEX(dp) :: Tperp
-
-  IF ( CONTAINS_ip0_e .AND. CONTAINS_ip0_i .AND. &
-       CONTAINS_ip2_e .AND. CONTAINS_ip2_i ) THEN
+   SUBROUTINE compute_tperp
+      USE time_integration, ONLY : updatetlevel
+      IMPLICIT NONE
+      REAL(xp)    :: j_xp
+      COMPLEX(xp) :: Tperp_
+      INTEGER     :: ia, iz, iky, ikx, ij
+      DO ia=1,local_na
+      IF ( CONTAINSp0 .AND. CONTAINSp2 ) THEN
       ! Loop to compute T = 1/3*(Tpar + 2Tperp)
-      DO iz = izs,ize
-        DO iky = ikys,ikye
-          DO ikx = ikxs,ikxe
-            ! electron temperature
-            IF(KIN_E) THEN
-            Tperp  = 0._dp
-            DO ij = ijs_e, ije_e
-              j_dp = REAL(ij-1,dp)
-              Tperp = Tperp + kernel_e(ij,iky,ikx,iz,0)*&
-                  ((2_dp*j_dp+1)*moments_e(ip0_e,ij  ,iky,ikx,iz,updatetlevel)&
-                  -j_dp         *moments_e(ip0_e,ij-1,iky,ikx,iz,updatetlevel)&
-                  -j_dp+1       *moments_e(ip0_e,ij+1,iky,ikx,iz,updatetlevel))
-            ENDDO
-            Tper_e(iky,ikx,iz) = Tperp
-            ENDIF
-            ! ion temperature
-            Tperp = 0._dp
-            DO ij = ijs_i, ije_i
-              j_dp = REAL(ij-1,dp)
-              Tperp = Tperp + kernel_i(ij,iky,ikx,iz,0)*&
-                  ((2_dp*j_dp+1)*moments_i(ip0_i,ij  ,iky,ikx,iz,updatetlevel)&
-                  -j_dp         *moments_i(ip0_i,ij-1,iky,ikx,iz,updatetlevel)&
-                  -j_dp+1       *moments_i(ip0_i,ij+1,iky,ikx,iz,updatetlevel))
-            ENDDO
-            Tper_i(iky,ikx,iz) = Tperp
-          ENDDO
-        ENDDO
+      DO iz = 1,local_nz
+      DO iky = 1,local_nky
+      DO ikx = 1,local_nkx
+      Tperp_ = 0._xp
+      DO ij = 1, local_nj
+         j_xp = jarray(ij+ngj/2)
+         Tperp_ = Tperp_ + kernel(ia,ij+ngj/2,iky,ikx,iz+ngz/2,ieven)*&
+            ((2_xp*j_xp+1)*moments(ia,ip0,ij  +ngj/2,iky,ikx,iz+ngz/2,updatetlevel)&
+                     -j_xp*moments(ia,ip0,ij-1+ngj/2,iky,ikx,iz+ngz/2,updatetlevel)&
+                  -(j_xp+1)*moments(ia,ip0,ij+1+ngj/2,iky,ikx,iz+ngz/2,updatetlevel))
       ENDDO
-  ENDIF
-END SUBROUTINE compute_Tperp
+      Tper(ia,iky,ikx,iz) = Tperp_
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDIF
+      ENDDO
+   END SUBROUTINE compute_Tperp
 
 ! Compute the 2D particle temperature for electron and ions (sum over Laguerre)
-SUBROUTINE compute_Tpar
-  USE array,  ONLY : Tpar_e, Tpar_i, kernel_e, kernel_i
-  USE model,  ONLY : KIN_E
-  USE fields, ONLY : moments_e, moments_i
-  USE time_integration, ONLY : updatetlevel
-  IMPLICIT NONE
-  REAL(dp)    :: j_dp
-  COMPLEX(dp) :: tpar
-
-  IF ( CONTAINS_ip0_e .AND. CONTAINS_ip0_i .AND. &
-       CONTAINS_ip2_e .AND. CONTAINS_ip2_i ) THEN
+   SUBROUTINE compute_Tpar
+      USE time_integration, ONLY : updatetlevel
+      IMPLICIT NONE
+      REAL(xp)    :: j_xp
+      COMPLEX(xp) :: Tpar_
+      INTEGER     :: ia, iz, iky, ikx, ij
+      DO ia=1,local_na
+      IF ( CONTAINSp0 .AND. CONTAINSp0 ) THEN
       ! Loop to compute T = 1/3*(Tpar + 2Tperp)
-      DO iz = izs,ize
-        DO iky = ikys,ikye
-          DO ikx = ikxs,ikxe
-            ! electron temperature
-            IF(KIN_E) THEN
-            Tpar  = 0._dp
-            DO ij = ijs_e, ije_e
-              j_dp = REAL(ij-1,dp)
-              Tpar  = Tpar + kernel_e(ij,iky,ikx,iz,0)*&
-               (SQRT2 * moments_e(ip2_e,ij,iky,ikx,iz,updatetlevel) &
-                      + moments_e(ip0_e,ij,iky,ikx,iz,updatetlevel))
-            ENDDO
-            Tpar_e(iky,ikx,iz) = Tpar
-            ENDIF
-            ! ion temperature
-            Tpar  = 0._dp
-            DO ij = ijs_i, ije_i
-              j_dp = REAL(ij-1,dp)
-              Tpar  = Tpar + kernel_i(ij,iky,ikx,iz,0)*&
-               (SQRT2 * moments_i(ip2_i,ij,iky,ikx,iz,updatetlevel) &
-                      + moments_i(ip0_i,ij,iky,ikx,iz,updatetlevel))
-            ENDDO
-            Tpar_i(iky,ikx,iz) = Tpar
-          ENDDO
-        ENDDO
+      DO iz = 1,local_nz
+      DO iky = 1,local_nky
+      DO ikx = 1,local_nkx
+      Tpar_ = 0._xp
+      DO ij = 1, local_nj
+         j_xp = REAL(ij-1,xp)
+         Tpar_  = Tpar_ + kernel(ia,ij+ngj/2,iky,ikx,iz+ngz/2,ieven)*&
+            (SQRT2 * moments(ia,ip2,ij+ngj/2,iky,ikx,iz+ngz/2,updatetlevel) &
+                     + moments(ia,ip0,ij+ngj/2,iky,ikx,iz+ngz/2,updatetlevel))
+      ENDDO
+      Tpar(ia,iky,ikx,iz) = Tpar_
+      ENDDO
+      ENDDO
+      ENDDO
+      ENDIF
       ENDDO
-  ENDIF
-END SUBROUTINE compute_Tpar
+   END SUBROUTINE compute_Tpar
 
 ! Compute the 2D particle fluid moments for electron and ions (sum over Laguerre)
-SUBROUTINE compute_fluid_moments
-  USE array, ONLY : dens_e, Tpar_e, Tper_e, dens_i, Tpar_i, Tper_i, temp_e, temp_i
-  USE model, ONLY : KIN_E
-  IMPLICIT NONE
-  CALL compute_density
-  CALL compute_upar
-  CALL compute_uperp
-  CALL compute_Tpar
-  CALL compute_Tperp
-  ! Temperature
-  IF(KIN_E)&
-  temp_e = (Tpar_e - 2._dp * Tper_e)/3._dp - dens_e
-  temp_i = (Tpar_i - 2._dp * Tper_i)/3._dp - dens_i
-END SUBROUTINE compute_fluid_moments
+   SUBROUTINE compute_fluid_moments
+      IMPLICIT NONE
+      CALL compute_density
+      CALL compute_upar
+      CALL compute_uperp
+      CALL compute_Tpar
+      CALL compute_Tperp
+      ! Temperature
+      temp = (Tpar - 2._xp * Tper)/3._xp - dens
+   END SUBROUTINE compute_fluid_moments
 
 END MODULE processing
diff --git a/src/readinputs.F90 b/src/readinputs.F90
index 56cdca06944b63aa79f311a4d63f2a973b2d13fe..02f8b5d58cf7dadc55156fd2af4f6cbdd88f0885 100644
--- a/src/readinputs.F90
+++ b/src/readinputs.F90
@@ -5,9 +5,11 @@ SUBROUTINE readinputs
   USE diagnostics_par,  ONLY: diag_par_readinputs
   USE collision,        ONLY: collision_readinputs
   USE model,            ONLY: model_readinputs
+  USE species,          ONLY: species_readinputs
   USE initial_par,      ONLY: initial_readinputs
   USE time_integration, ONLY: time_integration_readinputs
   USE geometry,         ONLY: geometry_readinputs
+  USE closure,          ONLY: closure_readinputs
 
   USE prec_const
   IMPLICIT NONE
@@ -28,6 +30,12 @@ SUBROUTINE readinputs
   ! Load model parameters from input file
   CALL model_readinputs
 
+  ! Load parameters for moment closure scheme
+  CALL closure_readinputs
+  
+  ! Load model parameters from input file
+  CALL species_readinputs
+
   ! Load collision parameters from input file
   CALL collision_readinputs
 
diff --git a/src/restarts_mod.F90 b/src/restarts_mod.F90
index 9628c2efcabf7b00349f91c0df86d2c6c8ff4bac..142231aaf7ac65a967b6a31d0cbc03147c7d60d9 100644
--- a/src/restarts_mod.F90
+++ b/src/restarts_mod.F90
@@ -6,249 +6,106 @@ USE grid
 USE fields
 USE diagnostics_par
 USE time_integration
-USE model, ONLY: KIN_E
 IMPLICIT NONE
 
-INTEGER :: rank, sz_, n_
-INTEGER :: dims(1) = (/0/)
-CHARACTER(LEN=50) :: dset_name
-INTEGER :: pmaxe_cp, jmaxe_cp, pmaxi_cp, jmaxi_cp, n0, Nkx_cp, Nky_cp, Nz_cp
-COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: moments_e_cp
-COMPLEX(dp), DIMENSION(:,:,:,:,:), ALLOCATABLE :: moments_i_cp
-
 PUBLIC :: load_moments!, write_restart
 
 CONTAINS
 
-    !******************************************************************************!
-    !!!!!!! Load moments from a previous output file
-    !******************************************************************************!
-    SUBROUTINE load_moments
-        IMPLICIT NONE
-        REAL    :: timer_tot_1, timer_find_CP_1, timer_load_mom_1
-        REAL    :: timer_tot_2, timer_find_CP_2, timer_load_mom_2
-        INTEGER :: deltap = 1
-        CALL cpu_time(timer_tot_1)
-
-        ! Checkpoint filename
-        WRITE(rstfile,'(a,a1,i2.2,a3)') TRIM(resfile0),'_',job2load,'.h5'
-
-        IF (my_id .EQ. 0) WRITE(*,'(3x,a)') "Resume from ", rstfile
-        ! Open file
-        CALL openf(rstfile, fidrst,mpicomm=comm0)
-        ! Get the checkpoint moments degrees to allocate memory
-        CALL getatt(fidrst,"/data/input/" ,   "Nkx",   Nkx_cp)
-        CALL getatt(fidrst,"/data/input/" ,   "Nky",   Nky_cp)
-        CALL getatt(fidrst,"/data/input/" ,    "Nz",    Nz_cp)
-        IF(Nz .EQ. 1) deltap = 2
-        IF (KIN_E) THEN
-        CALL getatt(fidrst,"/data/input/" , "pmaxe", pmaxe_cp)
-        CALL getatt(fidrst,"/data/input/" , "jmaxe", jmaxe_cp)
-        ENDIF
-        CALL getatt(fidrst,"/data/input/" , "pmaxi", pmaxi_cp)
-        CALL getatt(fidrst,"/data/input/" , "jmaxi", jmaxi_cp)
-        CALL getatt(fidrst,"/data/input/" , "start_iframe5d", n0)
-
-        IF ((KIN_E .AND. ((pmaxe_cp .NE. pmaxe) .OR. (jmaxe_cp .NE. jmaxe))) .OR.&
-         (pmaxi_cp .NE. pmaxi) .OR. (jmaxi_cp .NE. jmaxi)) THEN
-         IF(my_id.EQ.0) WRITE(*,*) '! Extending the polynomials basis !'
-         CALL load_output_adapt_pj
-        ELSE
-
-          CALL cpu_time(timer_find_CP_1)
-
-          ! Find the last results of the checkpoint file by iteration
-          n_ = n0+1
-          WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_ ! start with moments_e/000001
-          DO WHILE (isdataset(fidrst, dset_name)) ! If n_ is not a file we stop the loop
-          n_ = n_ + 1
-          WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_ ! updtate file number
-          ENDDO
-          n_ = n_ - 1 ! n_ is not a file so take the previous one n_-1
-
-          ! Read time dependent attributes to continue simulation
-          WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_
-          CALL getatt(fidrst, dset_name, 'cstep', cstep)
-          CALL getatt(fidrst, dset_name, 'time', time)
-          CALL getatt(fidrst, dset_name, 'jobnum', jobnum)
-          jobnum = jobnum+1
-          CALL getatt(fidrst, dset_name, 'iframe2d',iframe2d)
-          CALL getatt(fidrst, dset_name, 'iframe5d',iframe5d)
-          iframe2d = iframe2d-1; iframe5d = iframe5d-1
-          IF(my_id.EQ.0) WRITE(*,*) '.. restart from t = ', time
-
-          CALL cpu_time(timer_find_CP_2)
-          IF(my_id.EQ.0) WRITE(*,*) '** Time find CP : ', timer_find_CP_2 - timer_find_CP_1, ' **'
-
-          CALL cpu_time(timer_load_mom_1)
-          ! Read state of system from checkpoint file
-
-          ! Super slow futils routine in Marconi.... but spare RAM
-          ! IF (KIN_E) THEN
-          ! WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_e", n_
-          ! CALL getarrnd(fidrst, dset_name, moments_e(ips_e:ipe_e, ijs_e:ije_e, ikys:ikye, ikxs:ikxe, izs:ize, 1),(/1,3,5/))
-          ! ENDIF
-          ! WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_
-          ! CALL getarrnd(fidrst, dset_name, moments_i(ips_i:ipe_i, ijs_i:ije_i, ikys:ikye, ikxs:ikxe, izs:ize, 1),(/1,3,5/))
-
-          ! Brute force loading: load the full moments and take what is needed (RAM dangerous...)
-          IF (KIN_E) THEN
-            CALL allocate_array(moments_e_cp, 1,pmaxe_cp/deltap+1, 1,jmaxe_cp+1, 1,Nky_cp, 1,Nkx_cp, 1,Nz_cp)
-            WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_e", n_
-            CALL getarr(fidrst, dset_name, moments_e_cp(:,:,:,:,:))
-            moments_e(ips_e:ipe_e, ijs_e:ije_e, ikys:ikye, ikxs:ikxe, izs:ize, 1) &
-            = moments_e_cp(ips_e:ipe_e, ijs_e:ije_e, ikys:ikye, ikxs:ikxe, izs:ize)
-            DEALLOCATE(moments_e_cp)
-          ENDIF
-          CALL allocate_array(moments_i_cp, 1,pmaxi_cp/deltap+1, 1,jmaxi_cp+1, 1,Nky_cp, 1,Nkx_cp, 1,Nz_cp)
-          WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_
-          CALL getarr(fidrst, dset_name, moments_i_cp(:,:,:,:,:))
-          moments_i(ips_i:ipe_i, ijs_i:ije_i, ikys:ikye, ikxs:ikxe, izs:ize, 1) &
-          = moments_i_cp(ips_i:ipe_i, ijs_i:ije_i, ikys:ikye, ikxs:ikxe, izs:ize)
-          DEALLOCATE(moments_i_cp)
-
-          CALL closef(fidrst)
-
-          IF (my_id .EQ. 0) WRITE(*,'(3x,a)') "Reading from restart file "//TRIM(rstfile)//" completed!"
-        ENDIF
-        CALL cpu_time(timer_load_mom_2)
-        CALL cpu_time(timer_tot_2)
-
-        CALL mpi_barrier(MPI_COMM_WORLD, ierr)
-
-        IF(my_id.EQ.0) WRITE(*,*) '** Time load mom : ', timer_load_mom_2 - timer_load_mom_1, ' **'
-        IF(my_id.EQ.0) WRITE(*,*) '** Total load time : ', timer_tot_2 - timer_tot_1, ' **'
-
-    END SUBROUTINE load_moments
-    !******************************************************************************!
-
-
-    !******************************************************************************!
-    !!!!!!! Load moments from a previous output file with possible different PJ
-    !******************************************************************************!
-    SUBROUTINE load_output_adapt_pj
-        IMPLICIT NONE
-        INTEGER :: pmaxloop_e, pmaxloop_i, jmaxloop_e, jmaxloop_i, Nkx_cp, Nky_cp, Nz_cp
-        INTEGER :: deltap = 1
-
-        ! Checkpoint filename
-        WRITE(rstfile,'(a,a1,i2.2,a3)') TRIM(resfile0),'_',job2load,'.h5'
-
-        IF (my_id .EQ. 0) WRITE(*,'(3x,a)') "Resume from ", rstfile
-        ! Open file
-        CALL openf(rstfile, fidrst,mpicomm=comm0)
-        ! Get grid info
-        CALL getatt(fidrst,"/data/input/" , "Nkx", Nkx_cp)
-        CALL getatt(fidrst,"/data/input/" , "Nky", Nky_cp)
-        CALL getatt(fidrst,"/data/input/" ,  "Nz",  Nz_cp)
-        IF(Nz .EQ. 1) deltap = 2
-        !!!!!!!!! Load electron moments
-        IF (KIN_E) THEN
-        ! Get the checkpoint moments degrees to allocate memory
-        CALL getatt(fidrst,"/data/input/" , "pmaxe", pmaxe_cp)
-        CALL getatt(fidrst,"/data/input/" , "jmaxe", jmaxe_cp)
-        IF (my_id .EQ. 0) WRITE(*,*) "Pe_cp = ", pmaxe_cp
-        IF (my_id .EQ. 0) WRITE(*,*) "Je_cp = ", jmaxe_cp
-        CALL getatt(fidrst,"/data/input/" , "start_iframe5d", n0)
-        ! Allocate the required size to load checkpoints moments
-        ! CALL allocate_array(moments_e_cp, 1,pmaxe_cp+1, 1,jmaxe_cp+1, ikxs,ikxe, ikys,ikye, izs,ize)
-        CALL allocate_array(moments_e_cp, 1,pmaxe_cp/deltap+1, 1,jmaxe_cp+1, 1,Nky_cp, 1,Nkx_cp, 1,Nz_cp)
-        ! Find the last results of the checkpoint file by iteration
-        n_ = n0+1
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_e", n_ ! start with moments_e/000001
-        DO WHILE (isdataset(fidrst, dset_name)) ! If n_ is not a file we stop the loop
-        n_ = n_ + 1
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_e", n_ ! updtate file number
-        ENDDO
-        n_ = n_ - 1 ! n_ is not a file so take the previous one n_-1
-        ! Read state of system from checkpoint file and load every moment to change the distribution
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_e", n_
-        ! CALL getarrnd(fidrst, dset_name, moments_e_cp(1:pmaxe_cp+1, 1:jmaxe_cp+1, ikys:ikye, ikxs:ikxe, izs:ize),(/1,3/))
-        CALL getarr(fidrst, dset_name, moments_e_cp(1:pmaxe_cp/deltap+1, 1:jmaxe_cp+1, 1:Nky_cp, 1:Nkx_cp, 1:Nz_cp))
-        ! Initialize simulation moments array with checkpoints ones
-        ! (they may have a larger number of polynomials, set to 0 at the begining)
-        moments_e = 0._dp;
-        pmaxloop_e = min(ipe_e,pmaxe_cp/deltap+1)
-        jmaxloop_e = min(ije_e,jmaxe_cp+1)
-        IF (ips_e .LE. pmaxe_cp/deltap+1) THEN
-          DO ip=ips_e,pmaxloop_e
-          IF (ijs_e .LE. jmaxe_cp+1) THEN
-            DO ij=ijs_e,jmaxloop_e
-                DO ikx=ikxs,ikxe
-                DO iky=ikys,ikye
-                DO iz = izs,ize
-                    moments_e(ip,ij,iky,ikx,iz,:) = moments_e_cp(ip,ij,iky,ikx,iz)
-                ENDDO
-                ENDDO
-                ENDDO
-            ENDDO
-          ENDIF
-          ENDDO
-        ENDIF
-        ! Deallocate checkpoint arrays
-        DEALLOCATE(moments_e_cp)
-        ENDIF
-        !!!!!!! Load ion moments
-        ! Get the checkpoint moments degrees to allocate memory
-        CALL getatt(fidrst,"/data/input/" , "pmaxi", pmaxi_cp)
-        CALL getatt(fidrst,"/data/input/" , "jmaxi", jmaxi_cp)
-        IF (my_id .EQ. 0) WRITE(*,*) "Pi_cp = ", pmaxi_cp
-        IF (my_id .EQ. 0) WRITE(*,*) "Ji_cp = ", jmaxi_cp
-        CALL getatt(fidrst,"/data/input/" , "start_iframe5d", n0)
-        ! Allocate the required size to load checkpoints moments
-        ! CALL allocate_array(moments_i_cp, 1,pmaxi_cp+1, 1,jmaxi_cp+1, ikxs,ikxe, ikys,ikye, izs,ize)
-        CALL allocate_array(moments_i_cp, 1,pmaxi_cp/deltap+1, 1,jmaxi_cp+1, 1,Nky_cp, 1,Nkx_cp, 1,Nz_cp)
-        ! Find the last results of the checkpoint file by iteration
-        n_ = n0+1
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_ ! start with moments_e/000001
-        DO WHILE (isdataset(fidrst, dset_name)) ! If n_ is not a file we stop the loop
-        n_ = n_ + 1
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_ ! updtate file number
-        ENDDO
-        n_ = n_ - 1 ! n_ is not a file so take the previous one n_-1
-
-        ! Read state of system from checkpoint file and load every moment to change the distribution
-        WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments_i", n_
-        ! CALL getarrnd(fidrst, dset_name, moments_i_cp(1:pmaxi_cp+1, 1:jmaxi_cp+1, ikys:ikye, ikxs:ikxe, izs:ize),(/1,3/))
-        CALL getarr(fidrst, dset_name, moments_i_cp(1:pmaxi_cp/deltap+1, 1:jmaxi_cp+1, 1:Nky_cp, 1:Nkx_cp, 1:Nz_cp))
-
-        ! Initialize simulation moments array with checkpoints ones
-        ! (they may have a larger number of polynomials, set to 0 at the begining)
-        moments_i = 0._dp;
-        pmaxloop_i = min(ipe_i,pmaxi_cp/deltap+1)
-        jmaxloop_i = min(ije_i,jmaxi_cp+1)
-        IF (ips_i .LE. pmaxi_cp/deltap+1) THEN
-          DO ip=ips_i,pmaxloop_i
-          IF (ijs_i .LE. jmaxi_cp+1) THEN
-            DO ij=ijs_i,jmaxloop_i
-                DO ikx=ikxs,ikxe
-                DO iky=ikys,ikye
-                DO iz = izs,ize
-                    moments_i(ip,ij,iky,ikx,iz,:) = moments_i_cp(ip,ij,iky,ikx,iz)
-                ENDDO
-                ENDDO
-                ENDDO
-            ENDDO
-          ENDIF
-          ENDDO
-        ENDIF
-        ! Deallocate checkpoint arrays
-        DEALLOCATE(moments_i_cp)
-
-        ! Read time dependent attributes to continue simulation
-        CALL getatt(fidrst, dset_name, 'cstep', cstep)
-        CALL getatt(fidrst, dset_name, 'time', time)
-        CALL getatt(fidrst, dset_name, 'jobnum', jobnum)
-        jobnum = jobnum+1
-        CALL getatt(fidrst, dset_name, 'iframe2d',iframe2d)
-        CALL getatt(fidrst, dset_name, 'iframe5d',iframe5d)
-        iframe2d = iframe2d-1; iframe5d = iframe5d-1
-
-        CALL closef(fidrst)
-
-        IF (my_id .EQ. 0) WRITE(*,'(3x,a)') "Reading from restart file "//TRIM(rstfile)//" completed!"
-
-    END SUBROUTINE load_output_adapt_pj
-    !******************************************************************************!
+  !******************************************************************************!
+  !!!!!!! Fill initial moments value using the final state of a previous simulation
+  !******************************************************************************!
+  SUBROUTINE load_moments
+    USE parallel, ONLY: comm0
+    IMPLICIT NONE
+    CHARACTER(LEN=50) :: dset_name
+    REAL(xp):: time_cp
+    INTEGER :: cstep_cp, jobnum_cp
+    INTEGER :: n_
+    INTEGER :: deltap_cp
+    INTEGER :: pmax_cp, jmax_cp, n0, Nkx_cp, Nky_cp, Nz_cp, Na_cp, Np_cp, Nj_cp
+    INTEGER :: ia,ip,ij,iky,ikx,iz, iacp,ipcp,ijcp,iycp,ixcp,izcp, ierr
+    INTEGER :: ipi,iji,izi
+    REAL(xp):: timer_tot_1,timer_tot_2
+    COMPLEX(xp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_cp
+    CALL cpu_time(timer_tot_1)
+    ! Checkpoint filename
+    WRITE(rstfile,'(a,a1,i2.2,a3)') TRIM(resfile0),'_',job2load,'.h5'
+    CALL speak("Resume from "//rstfile)
+    ! Open file
+    CALL openf(rstfile, fidrst,mpicomm=comm0)
+    ! Get the checkpoint moments degrees to allocate memory
+    CALL getatt(fidrst,"/data/input/grid" ,   "Nkx",   Nkx_cp)
+    CALL getatt(fidrst,"/data/input/grid" ,   "Nky",   Nky_cp)
+    CALL getatt(fidrst,"/data/input/grid" ,    "Nz",    Nz_cp)
+    IF(Nz_cp .NE. Nz) &
+      ERROR STOP "!! cannot change Nz in a restart, interp or reduction not implemented !!"
+    CALL getatt(fidrst,"/data/input/grid" ,"deltap",deltap_cp)
+    IF(deltap_cp .NE. deltap) &
+      ERROR STOP "!! cannot change deltap in a restart, not implemented !!"
+    CALL getatt(fidrst,"/data/input/grid" , "pmax", pmax_cp)
+    Np_cp = pmax_cp/deltap_cp+1
+    CALL getatt(fidrst,"/data/input/grid" , "jmax", jmax_cp)
+    Nj_cp = jmax_cp+1
+    CALL getatt(fidrst,"/data/input/model",   "Na",   Na_cp)
+    CALL getatt(fidrst,"/data/input/basic" , "start_iframe5d", n0)
+    ! Find the last results of the checkpoint file by iteration
+    n_ = n0+1
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments", n_ ! start with moments/000001
+    DO WHILE (isdataset(fidrst, dset_name)) ! If n_ is not a file we stop the loop
+      n_ = n_ + 1
+      WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments", n_ ! updtate file number
+    ENDDO
+    n_ = n_ - 1 ! n_ is not a file so take the previous one n_-1
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments", n_
+    ! Read time dependent attributes to continue simulation
+    CALL getatt(fidrst, dset_name,  'cstep', cstep_cp)
+    CALL getatt(fidrst, dset_name,   'time', time_cp)
+    CALL getatt(fidrst, dset_name, 'jobnum', jobnum_cp)
+    ! Set the cstep, step, time and iframnd variables in basic from the checkpoint
+    CALL set_basic_cp(cstep_cp,time_cp,jobnum_cp)
+    CALL speak('.. restart from t = '//str(time))
+    ! Read state of system from checkpoint file
+    ! Brute force loading: load the full moments and take what is needed (RAM dangerous...)
+    ! other possibility is to loop over slices
+    CALL allocate_array(moments_cp, 1,Na_cp, 1,Np_cp, 1,Nj_cp, 1,Nky_cp, 1,Nkx_cp, 1,Nz_cp)
+    WRITE(dset_name, "(A, '/', i6.6)") "/data/var5d/moments", n_
+    CALL getarr(fidrst, dset_name, moments_cp(:,:,:,:,:,:))
+
+    moments     = 0._xp;
+    z: DO iz = 1,local_nz
+      izcp = iz + local_nz_offset
+      izi  = iz + ngz/2
+      x: DO ikx=1,local_nkx
+        ixcp = ikx+local_nkx_offset
+        y: DO iky=1,local_nky
+          iycp = iky + local_nky_offset
+          j: DO ij=1,local_nj
+            ijcp = ij + local_nj_offset
+            iji  = ij + ngj/2
+            p: DO ip=1,local_np
+              ipcp = ip + local_np_offset
+              ipi  = ip + ngp/2
+              a: DO ia=1,Na_cp
+                iacp = ia + local_na_offset
+                ! IF((iacp.LE.Na_cp).AND.(ipcp.LE.Np_cp).AND.(ijcp.LE.Nj_cp).AND.(iycp.LE.Nky_cp).AND.(ixcp.LE.Nkx_cp).AND.(izcp.LE.Nz_cp)) &
+                  moments(ia,ipi,iji,iky,ikx,izi,1) = moments_cp(iacp,ipcp,ijcp,iycp,ixcp,izcp)
+              ENDDO a
+            ENDDO p
+          ENDDO j
+        ENDDO y
+      ENDDO x
+    ENDDO z
+    !! deallocate the full moment variable
+    DEALLOCATE(moments_cp)
+    CALL closef(fidrst)
+    CALL speak("Reading from restart file "//TRIM(rstfile)//" completed!")
+    CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+    ! stop time measurement
+    CALL cpu_time(timer_tot_2)
+    CALL speak('** Total load time : '// str(timer_tot_2 - timer_tot_1)//' **')
+
+  END SUBROUTINE load_moments
+  !******************************************************************************!
 
 END MODULE restarts
diff --git a/src/solve_EM_fields.F90 b/src/solve_EM_fields.F90
index 3f355e5996f5abaabbb118c2840ab6b93634abd7..01fc26ff1ba95ab7205e521fad39eeda595f252b 100644
--- a/src/solve_EM_fields.F90
+++ b/src/solve_EM_fields.F90
@@ -7,7 +7,7 @@ SUBROUTINE solve_EM_fields
 
   CALL poisson
 
-  IF(beta .GT. 0._dp) &
+  IF(beta .GT. 0._xp) &
   CALL ampere
 
 CONTAINS
@@ -15,111 +15,123 @@ CONTAINS
   SUBROUTINE poisson
     ! Solve poisson equation to get phi
     USE time_integration, ONLY: updatetlevel
-    USE array,            ONLY: kernel_e, kernel_i, inv_poisson_op, inv_pol_ion
-    USE fields,           ONLY: phi, moments_e, moments_i
-    USE grid
-    USE calculus,         ONLY : simpson_rule_z
-    USE parallel,         ONLY : manual_3D_bcast
-    use model,            ONLY : q_e, q_i, lambdaD, KIN_E, sigma_e, sigma_i
-    USE processing,       ONLY : compute_density
-    USE geometry,         ONLY : iInt_Jacobian, Jacobian
+    USE array,            ONLY: kernel, inv_poisson_op, inv_pol_ion
+    USE fields,           ONLY: phi, moments
+    USE grid,             ONLY: local_na, local_nky, local_nkx, local_nz,ngz, SOLVE_POISSON,&
+                                kyarray, contains_kx0, contains_ky0,ikx0,iky0, zweights_SR, ieven,&
+                                ip0, total_nj, ngj
+    USE calculus,         ONLY: simpson_rule_z
+    USE parallel,         ONLY: manual_3D_bcast
+    USE model,            ONLY: lambdaD, ADIAB_E
+    use species,          ONLY: q
+    USE processing,       ONLY: compute_density
+    USE geometry,         ONLY: iInt_Jacobian, Jacobian
     IMPLICIT NONE
 
-    INTEGER     :: ini,ine
-    COMPLEX(dp) :: fsa_phi, intf_   ! current flux averaged phi
-    COMPLEX(dp), DIMENSION(izs:ize) :: rho_i, rho_e, integrant  ! charge density q_a n_a and aux var
-
-    ! Execution time start
-    CALL cpu_time(t0_poisson)
-    !! Poisson can be solved only for process containing p=0
+    INTEGER     :: in, ia, ikx, iky, iz, izi, ini
+    COMPLEX(xp) :: fsa_phi, intf_, rhtot   ! current flux averaged phi
+    COMPLEX(xp), DIMENSION(local_nz) :: rho, integrant  ! charge density q_a n_a and aux var
+    rhtot = 0
+    !! Poisson can be solved only for process containng p=0
     IF ( SOLVE_POISSON ) THEN
-      kxloop: DO ikx = ikxs,ikxe
-        kyloop: DO iky = ikys,ikye
-          phi(iky,ikx,izs:ize)  = 0._dp
-          !!!! Compute ion particle charge density q_i n_i
-          rho_i = 0._dp
-          DO ini=1,jmaxi+1
-            rho_i(izs:ize) = rho_i(izs:ize) &
-             +q_i*kernel_i(ini,iky,ikx,izs:ize,0)*moments_i(ip0_i,ini,iky,ikx,izs:ize,updatetlevel)
-          END DO
-          !!!! Compute electron particle charge density q_e n_e
-          rho_e = 0._dp
-          IF (KIN_E) THEN ! Kinetic electrons
-          DO ine=1,jmaxe+1
-            rho_e(izs:ize) = rho_e(izs:ize) &
-             +q_e*kernel_e(ine,iky,ikx,izs:ize,0)*moments_e(ip0_e,ine,iky,ikx,izs:ize,updatetlevel)
-          END DO
-          ELSE  ! Adiabatic electrons
+        x:DO ikx = 1,local_nkx
+          y:DO iky = 1,local_nky
+            !!!!!!!!!!!!!!! Compute particle charge density q_a n_a for each evolved species
+            DO iz = 1,local_nz
+              izi = iz+ngz/2
+              rho(iz) = 0._xp
+              DO in = 1,total_nj
+                ini = in+ngj/2
+                DO ia = 1,local_na
+                  rho(iz) = rho(iz) + q(ia)*kernel(ia,ini,iky,ikx,izi,ieven)&
+                              *moments(ia,ip0,ini,iky,ikx,izi,updatetlevel)
+                END DO
+              END DO
+            END DO
+            !!!!!!!!!!!!!!! adiabatic electron contribution if asked
             ! Adiabatic charge density (linked to flux surface averaged phi)
             ! We compute the flux surface average solving a flux surface averaged
             ! Poisson equation, i.e.
             ! [qi^2(1-sum_j K_j^2)/tau_i] <phi>_psi = <q_i n_i >_psi
             !       inv_pol_ion^-1         fsa_phi  = simpson(Jacobian rho_i ) * iInt_Jacobian
-            fsa_phi = 0._dp
-            IF(kyarray(iky).EQ.0._dp) THEN ! take ky=0 mode (y-average)
-              ! Prepare integrant for z-average
-              integrant(izs:ize) = Jacobian(izs:ize,0)*rho_i(izs:ize)*inv_pol_ion(iky,ikx,izs:ize)
-              call simpson_rule_z(integrant(izs:ize),intf_) ! get the flux averaged phi
-              fsa_phi = intf_ * iInt_Jacobian !Normalize by 1/int(Jxyz)dz
+            IF (ADIAB_E) THEN
+              fsa_phi = 0._xp
+              IF(kyarray(iky).EQ.0._xp) THEN ! take ky=0 mode (y-average)
+                ! Prepare integrant for z-average
+                DO iz = 1,local_nz
+                  izi = iz+ngz/2
+                  integrant(iz) = Jacobian(izi,ieven)*rho(iz)*inv_pol_ion(iky,ikx,iz)
+                ENDDO
+                call simpson_rule_z(local_nz,zweights_SR,integrant,intf_) ! get the flux averaged phi
+                fsa_phi = intf_ * iInt_Jacobian !Normalize by 1/int(Jxyz)dz
+              ENDIF
+              rho = rho + fsa_phi
             ENDIF
-            rho_e(izs:ize) = fsa_phi
-          ENDIF
-          !!!!!!!!!!!!!!! Inverting the poisson equation !!!!!!!!!!!!!!!!!!!!!!!!!!
-          phi(iky,ikx,izs:ize) = (rho_e(izs:ize) + rho_i(izs:ize))*inv_poisson_op(iky,ikx,izs:ize)
-        END DO kyloop
-      END DO kxloop
+            !!!!!!!!!!!!!!! adiabatic ions ?
+            ! IF (ADIAB_I) THEN
+            ! ENDIF
+            !!!!!!!!!!!!!!! Inverting the poisson equation
+            DO iz = 1,local_nz
+              phi(iky,ikx,iz+ngz/2) = inv_poisson_op(iky,ikx,iz)*rho(iz)
+            ENDDO
+            rhtot = rhtot + sum(real(rho))
+          ENDDO y
+        ENDDO x
       ! Cancel origin singularity
-      IF (contains_kx0 .AND. contains_ky0) phi(iky_0,ikx_0,:) = 0._dp
+      IF (contains_kx0 .AND. contains_ky0) phi(iky0,ikx0,:) = 0._xp
     ENDIF
-
     ! Transfer phi to all the others process along p
-    CALL manual_3D_bcast(phi(ikys:ikye,ikxs:ikxe,izs:ize))
-
-    ! Execution time end
-    CALL cpu_time(t1_poisson)
-    tc_poisson = tc_poisson + (t1_poisson - t0_poisson)
+    CALL manual_3D_bcast(phi,local_nky,local_nkx,local_nz+ngz)
+    ! print*, SUM(ABS(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(REAL(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(IMAG(moments(1,:,:,:,:,:,updatetlevel)))
+    ! print*, SUM(REAL(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+    ! print*, SUM(REAL(phi(:,:,:)))
+    ! print*, SUM(IMAG(phi(:,:,(1+ngz/2):(local_nz+ngz/2))))
+    ! print*, SUM(inv_poisson_op(:,:,:))
+    ! print*, rhtot
+    ! stop
   END SUBROUTINE poisson
 
   SUBROUTINE ampere
     ! Solve ampere equation to get psi
     USE time_integration, ONLY: updatetlevel
-    USE array,            ONLY: kernel_e, kernel_i, inv_ampere_op
-    USE fields,           ONLY: moments_i, moments_e, psi
-    USE grid
-    USE parallel,         ONLY : manual_3D_bcast
-    use model,            ONLY : sqrt_tau_o_sigma_e, sqrt_tau_o_sigma_i, q_e, q_i, beta, KIN_E
+    USE array,            ONLY: kernel, inv_ampere_op
+    USE fields,           ONLY: moments, psi
+    USE grid,             ONLY: local_na, local_nky, local_nkx, local_nz,ngz, SOLVE_AMPERE,&
+                                contains_kx0, contains_ky0,ikx0,iky0, iodd,&
+                                ip1, total_nj, ngj
+    USE parallel,         ONLY: manual_3D_bcast
+    use species,          ONLY: sqrt_tau_o_sigma, q
+    use model,            ONLY: beta
     IMPLICIT NONE
-
-    INTEGER     :: ini,ine
-
-    ! Execution time start
-    CALL cpu_time(t0_poisson)
-    !! Ampere can be solved only with beta > 0 and for process containing p=1 moments
+    COMPLEX(xp) :: j_a ! current density
+    INTEGER     :: in, ia, ikx, iky, iz, ini, izi
+    !! Ampere can be solved only with beta > 0 and for process containng p=1 moments
     IF ( SOLVE_AMPERE ) THEN
-      psi(ikys:ikye,ikxs:ikxe,izs:ize)  = 0._dp
-      !!!! ion particle current density contribution "q_i u_i"
-      DO ini=1,jmaxi+1
-        psi(ikys:ikye,ikxs:ikxe,izs:ize) = psi(ikys:ikye,ikxs:ikxe,izs:ize) &
-         +q_i*sqrt_tau_o_sigma_i*kernel_i(ini,ikys:ikye,ikxs:ikxe,izs:ize,0)*moments_i(ip1_i,ini,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
-      END DO
-      !!!! electron particle current density contribution "q_e u_e"
-      DO ine=1,jmaxe+1
-        psi(ikys:ikye,ikxs:ikxe,izs:ize) = psi(ikys:ikye,ikxs:ikxe,izs:ize) &
-          +q_e*sqrt_tau_o_sigma_e*kernel_e(ine,ikys:ikye,ikxs:ikxe,izs:ize,0)*moments_e(ip1_e,ine,ikys:ikye,ikxs:ikxe,izs:ize,updatetlevel)
-      END DO
-      !!!!!!!!!!!!!!! Inverting the poisson equation !!!!!!!!!!!!!!!!!!!!!!!!!!
-      psi(ikys:ikye,ikxs:ikxe,izs:ize) = beta*psi(ikys:ikye,ikxs:ikxe,izs:ize)*inv_ampere_op(ikys:ikye,ikxs:ikxe,izs:ize)
-
-      ! Cancel origin singularity
-      IF (contains_kx0 .AND. contains_ky0) psi(iky_0,ikx_0,:) = 0._dp
+      z:DO iz = 1,local_nz
+      izi = iz+ngz/2
+        x:DO ikx = 1,local_nkx
+          y:DO iky = 1,local_nky
+          !!!!!!!!!!!!!!! compute current density contribution "j_a = q_a u_a" for each species
+          j_a = 0._xp
+          n:DO in=1,total_nj
+          ini = in+ngj/2
+            a:DO ia = 1,local_na
+            j_a = j_a &
+              +q(ia)*sqrt_tau_o_sigma(ia)*kernel(ia,ini,iky,ikx,izi,iodd)*moments(ia,ip1,ini,iky,ikx,izi,updatetlevel)
+            ENDDO a
+          ENDDO n
+          !!!!!!!!!!!!!!! Inverting the Ampere equation
+          psi(iky,ikx,iz+ngz/2) = beta*inv_ampere_op(iky,ikx,iz)*j_a
+          ENDDO y
+        ENDDO x
+      ENDDO z
     ENDIF
-
+    ! Cancel origin singularity
+    IF (contains_kx0 .AND. contains_ky0) psi(iky0,ikx0,:) = 0._xp
     ! Transfer phi to all the others process along p
-    CALL manual_3D_bcast(psi(ikys:ikye,ikxs:ikxe,izs:ize))
-
-    ! Execution time end
-    CALL cpu_time(t1_poisson)
-    tc_poisson = tc_poisson + (t1_poisson - t0_poisson)
+    CALL manual_3D_bcast(psi,local_nky,local_nkx,local_nz+ngz)
   END SUBROUTINE ampere
 
 END SUBROUTINE solve_EM_fields
diff --git a/src/species_mod.F90 b/src/species_mod.F90
index 426ae36e3ec9ae7633fb6c2dbe47103711a6a89d..d00db354ebe70902c517397e1b55cf6ba6da745d 100644
--- a/src/species_mod.F90
+++ b/src/species_mod.F90
@@ -1,56 +1,156 @@
-MODULE species_mod
-  USE :: basic
+MODULE species
+  ! Module for diagnostic parameters
+  USE prec_const
   IMPLICIT NONE
   PRIVATE
+  !! Input parameters
+  CHARACTER(len=32) :: name_               ! name of the species
+  REAL(xp)          :: tau_                ! Temperature
+  REAL(xp)          :: sigma_              ! sqrt mass ratio
+  REAL(xp)          :: q_                  ! Charge
+  REAL(xp)          :: k_N_                ! density drive (L_ref/L_Ni)
+  REAL(xp)          :: k_T_                ! temperature drive (L_ref/L_Ti)
+  !! Arrays to store all species features
+  CHARACTER(len=32),&
+            ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: name               ! name of the species
+  REAL(xp), ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: tau                ! Temperature
+  REAL(xp), ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: sigma              ! sqrt mass ratio
+  REAL(xp), ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: q                  ! Charge
+  REAL(xp), ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: k_N                ! density drive (L_ref/L_Ni)
+  REAL(xp), ALLOCATABLE, DIMENSION(:),  PUBLIC, PROTECTED :: k_T                ! temperature drive (L_ref/L_Ti)
+  REAL(xp), ALLOCATABLE, DIMENSION(:,:),PUBLIC, PROTECTED :: nu_ab              ! Collision frequency tensor
+  !! Auxiliary variables to store precomputation
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: tau_q              ! factor of the magnetic moment coupling
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: q_tau              !
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: sqrtTau_q          ! factor of parallel moment term
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: q_sigma_sqrtTau    ! factor of parallel phi term
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: sigma2_tau_o2      ! factor of the Kernel argument
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: sqrt_sigma2_tau_o2 ! to avoid multiple SQRT eval
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: q2_tau             ! factor of the gammaD sum
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: q_o_sqrt_tau_sigma ! For psi field terms
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: sqrt_tau_o_sigma   ! For Ampere eq
+  REAL(xp), ALLOCATABLE, DIMENSION(:),PUBLIC, PROTECTED :: xpdx               ! radial pressure gradient
+  !! Accessible routines
+  PUBLIC :: species_readinputs, species_outputinputs
+CONTAINS
 
-  ! Classe that encapsulate all atributes and methods for one arbitrary species
-  TYPE, PUBLIC :: species_class
-      REAL(dp), PUBLIC :: q     !charge
-      REAL(dp), PUBLIC :: sigma !sqrt masse ratio w.r.t. ion mass
-      REAL(dp), PUBLIC :: tau   !temperatrue ratio w.r.t. electron temp.
-      INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: parray ! Hermite degrees
-      INTEGER, DIMENSION(:), ALLOCATABLE, PUBLIC :: jarray ! Laguerre degrees
-      ! Hermite-Moments: N_a^pj ! DIMENSIONs correspond to: p, j, kx, ky, z, updatetlevel.
-      COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments
-      ! Arrays to store the rhs, for time integration (ip,ij,ikx,iky,iz,updatetlevel)
-      COMPLEX(dp), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: moments_rhs
-      ! Non linear term array (ip,ij,ikx,iky,iz)
-      COMPLEX(dp), DIMENSION(:,:,:,:,:),   ALLOCATABLE :: Sapj
-      ! lin rhs p,j coefficient storage (ip,ij)
-      REAL(dp), DIMENSION(:,:), ALLOCATABLE :: xnapj
-      REAL(dp), DIMENSION(:),   ALLOCATABLE :: xnapp1j, xnapm1j,   xnapp2j,   xnapm2j, xnapjp1, xnapjm1
-      REAL(dp), DIMENSION(:,:), ALLOCATABLE :: ynapp1j, ynapm1j,   ynapp1jm1, ynapm1jm1 ! mirror lin coeff for non adiab mom
-      REAL(dp), DIMENSION(:,:), ALLOCATABLE :: zNapm1j, zNapm1jp1, zNapm1jm1            ! mirror lin coeff for adiab mom
-      ! Kernel function evaluation (ij,ikx,iky,iz)
-      REAL(dp), DIMENSION(:,:,:,:), ALLOCATABLE :: kernel
-      !! Diagnostics
-      ! Gyrocenter density (ikx,iky,iz)
-      COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: Na00
+  SUBROUTINE species_readinputs
+    !    Read the input parameters
+    USE basic, ONLY : lu_in
+    USE model, ONLY : Na, nu, ADIAB_E
+    USE prec_const
+    IMPLICIT NONE
+    INTEGER :: ia,ib
+    ! expected namelist in the input file
+    NAMELIST /SPECIES/ &
+      name_, tau_, sigma_, q_, k_N_, k_T_
+    ! allocate the arrays of species parameters
+    CALL species_allocate
+    ! loop over the species namelists in the input file
+    DO ia = 1,Na
+      ! default parameters
+      name_  = 'ions'
+      tau_   = 1._xp
+      sigma_ = 1._xp
+      q_     = 1._xp
+      k_N_   = 2.22_xp
+      k_T_   = 6.96_xp
+      ! read input
+      READ(lu_in,species)
+      ! place values found in the arrays
+      name(ia)               = name_
+      tau(ia)                = tau_
+      sigma(ia)              = sigma_
+      q(ia)                  = q_
+      k_N(ia)                = k_N_
+      k_T(ia)                = k_T_
+      tau_q(ia)              = tau_/q_
+      ! precompute factors
+      q_tau(ia)              = q_/tau_
+      sqrtTau_q(ia)          = sqrt(tau_)/q_
+      q_sigma_sqrtTau(ia)    = q_/sigma_/SQRT(tau_)
+      sigma2_tau_o2(ia)      = sigma_**2 * tau_/2._xp
+      sqrt_sigma2_tau_o2(ia) = SQRT(sigma_**2 * tau_/2._xp)
+      q2_tau(ia)             = (q_**2)/tau_
+      q_o_sqrt_tau_sigma(ia) = q_/SQRT(tau_)/sigma_
+      sqrt_tau_o_sigma(ia)   = SQRT(tau_)/sigma_
+      xpdx(ia)               = 0._xp !not implemented yet
+      ! We remove the adiabatic electron flag if electrons are included
+      SELECT CASE (name_)
+      CASE ('electrons','e','electron')
+        ADIAB_E = .FALSE.
+      END SELECT
+    ENDDO
+    !! Set collision frequency tensor
+    IF (nu .EQ. 0) THEN
+      nu_ab = 0
+    ELSE
+      DO ia = 1,Na
+        DO ib = 1,Na
+          !! We use the ion-ion collision as normalization with definition
+          !   nu_ii = 4 sqrt(pi)/3 T_i^(-3/2) m_i^(-1/2) q^4 n_i0 ln(Lambda)
+          SELECT CASE (name(ia))
+          CASE ('electrons','e','electron') ! e-e and e-i collision
+            nu_ab(ia,ib) = nu/sigma(ia) * (tau(ia))**(3._xp/2._xp)  ! (where already multiplied by 0.532)
+          CASE ('ions','ion','i') ! i-e and i-i collision
+            nu_ab(ia,ib) = nu
+          CASE DEFAULT
+            ERROR STOP "!! No collision model for these species interactions"
+          END SELECT
+          ! I think we can just write
+          ! nu_ab(ia,ib) = nu/sigma(ia) * (tau(ia))**(3._xp/2._xp)
+        ENDDO
+      ENDDO
+    ENDIF
+    ! nu_e            = nu/sigma_e * (tau_e)**(3._xp/2._xp)  ! electron-ion collision frequency (where already multiplied by 0.532)
+    ! nu_i            = nu ! ion-ion collision frequ.
+    ! nu_ee           = nu_e ! e-e coll. frequ.
+    ! nu_ie           = nu_i ! i-e coll. frequ.
 
-      ! particle density (ikx,iky,iz)
-      COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: density
+  END SUBROUTINE species_readinputs
 
-      ! particle temperature for electron and ions (ikx,iky,iz)
-      COMPLEX(dp), DIMENSION(:,:,:), ALLOCATABLE :: temperature
 
-    CONTAINS
-      ! Initialization procedures
-      PROCEDURE, PUBLIC :: init                => species_init
-      PROCEDURE, PUBLIC :: setup_arrays        => species_setup_arrays
-      PROCEDURE, PUBLIC :: evaluate_kernels    => species_evaluate_kernels
-      ! Diagnostics
-      PROCEDURE, PUBLIC :: compute_density     => species_compute_density
-      PROCEDURE, PUBLIC :: compute_temperature => species_compute_temperature
-    END TYPE species_class
+  SUBROUTINE species_outputinputs(fid)
+    !    Write the input parameters to the results_xx.h5 file
+    USE futils, ONLY: attach, creatd
+    USE model,  ONLY: Na
+    IMPLICIT NONE
+    INTEGER, INTENT(in) :: fid
+    INTEGER :: ia
+    CHARACTER(len=256)  :: str
+    DO ia = 1,Na
+      WRITE(str,'(a,a)') '/data/input/', name(ia)
+      CALL creatd(fid, 0,(/0/),TRIM(str),'Species Input')
+      CALL attach(fid, TRIM(str),    "name",  name(ia))
+      CALL attach(fid, TRIM(str),     "tau",   tau(ia))
+      CALL attach(fid, TRIM(str),   "sigma", sigma(ia))
+      CALL attach(fid, TRIM(str),       "q",     q(ia))
+      CALL attach(fid, TRIM(str),      "k_N",  k_N(ia))
+      CALL attach(fid, TRIM(str),      "k_T",  k_T(ia))
+    ENDDO
+  END SUBROUTINE species_outputinputs
 
-  ! Routines that every species may use
-  CONTAINS
-    SUBROUTINE species_setup_arrays(this)
+  SUBROUTINE species_allocate
+    USE model, ONLY : Na
+    IMPLICIT NONE
+    !! Allocate the arrays
+      ALLOCATE(              name(1:Na))
+      ALLOCATE(        nu_ab(1:Na,1:Na))
+      ALLOCATE(               tau(1:Na))
+      ALLOCATE(             sigma(1:Na))
+      ALLOCATE(                 q(1:Na))
+      ALLOCATE(               k_N(1:Na))
+      ALLOCATE(               k_T(1:Na))
+      ALLOCATE(             tau_q(1:Na))
+      ALLOCATE(             q_tau(1:Na))
+      ALLOCATE(         sqrtTau_q(1:Na))
+      ALLOCATE(   q_sigma_sqrtTau(1:Na))
+      ALLOCATE(     sigma2_tau_o2(1:Na))
+      ALLOCATE(sqrt_sigma2_tau_o2(1:Na))
+      ALLOCATE(            q2_tau(1:Na))
+      ALLOCATE(q_o_sqrt_tau_sigma(1:Na))
+      ALLOCATE(  sqrt_tau_o_sigma(1:Na))
+      ALLOCATE(              xpdx(1:Na))
+  END SUBROUTINE species_allocate
 
-    END SUBROUTINE
-
-    SUBROUTINE species_compute_density(this)
-
-    END SUBROUTINE
-
-END MODULE
+END MODULE species
diff --git a/src/stepon.F90 b/src/stepon.F90
index d85f3fd134152d97dacb1a0438df5313833198e3..f388a6499927adfaf856057db33a30d07ca7ef53 100644
--- a/src/stepon.F90
+++ b/src/stepon.F90
@@ -1,19 +1,20 @@
 SUBROUTINE stepon
-  !   Advance one time step, (num_step=4 for Runge Kutta 4 scheme)
-  USE advance_field_routine, ONLY: advance_time_level, advance_moments
-  USE basic,                 ONLY: nlend, ierr
-  USE closure,               ONLY: apply_closure_model
-  USE ghosts,                ONLY: update_ghosts_moments, update_ghosts_EM
-  use mpi,                   ONLY: MPI_COMM_WORLD
-  USE time_integration,      ONLY: ntimelevel
-  USE prec_const,            ONLY: dp
-  IMPLICIT NONE
-
-  INTEGER :: num_step
-  LOGICAL :: mlend
+   !   Advance one time step, (num_step=4 for Runge Kutta 4 scheme)
+   USE advance_field_routine, ONLY: advance_time_level, advance_moments
+   USE basic,                 ONLY: nlend, chrono_advf, chrono_pois,&
+      chrono_chck, chrono_clos, chrono_ghst, start_chrono, stop_chrono
+   USE closure,               ONLY: apply_closure_model
+   USE ghosts,                ONLY: update_ghosts_moments, update_ghosts_EM
+   use mpi,                   ONLY: MPI_COMM_WORLD
+   USE time_integration,      ONLY: ntimelevel
+   USE prec_const,            ONLY: xp
+   IMPLICIT NONE
+
+   INTEGER :: num_step, ierr
+   LOGICAL :: mlend
 
    DO num_step=1,ntimelevel ! eg RK4 compute successively k1, k2, k3, k4
-   !----- BEFORE: All fields+ghosts are updated for step = n
+      !----- BEFORE: All fields+ghosts are updated for step = n
       ! Compute right hand side from current fields
       ! N_rhs(N_n, nadia_n, phi_n, S_n, Tcoll_n)
       CALL assemble_RHS
@@ -25,165 +26,156 @@ SUBROUTINE stepon
 
       ! Update moments with the hierarchy RHS (step by step)
       ! N_n+1 = N_n + N_rhs(n)
-      CALL advance_moments
+      CALL start_chrono(chrono_advf)
+        CALL advance_moments
+      CALL stop_chrono(chrono_advf)
+
       ! Closure enforcement of moments
-      CALL apply_closure_model
+      CALL start_chrono(chrono_clos)
+        CALL apply_closure_model
+      CALL stop_chrono(chrono_clos)
+
       ! Exchanges the ghosts values of N_n+1
-      CALL update_ghosts_moments
+      CALL start_chrono(chrono_ghst)
+        CALL update_ghosts_moments
+      CALL stop_chrono(chrono_ghst)
 
       ! Update electrostatic potential phi_n = phi(N_n+1) and potential vect psi
-      CALL solve_EM_fields
-      CALL update_ghosts_EM
+      CALL start_chrono(chrono_pois)
+        CALL solve_EM_fields
+      CALL stop_chrono(chrono_pois)
+      ! Update EM ghosts
+      CALL start_chrono(chrono_ghst)
+        CALL update_ghosts_EM
+      CALL stop_chrono(chrono_ghst)
 
       !-  Check before next step
-      CALL checkfield_all()
+      CALL start_chrono(chrono_chck)
+        CALL checkfield_all()
+      CALL stop_chrono(chrono_chck)
+
       IF( nlend ) EXIT ! exit do loop
 
       CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
-   !----- AFTER: All fields are updated for step = n+1
+      !----- AFTER: All fields are updated for step = n+1
+
    END DO
 
-   CONTAINS
-     !!!! Basic structure to simplify stepon
-     SUBROUTINE assemble_RHS
-       USE moments_eq_rhs, ONLY: compute_moments_eq_rhs
-       USE collision,      ONLY: compute_TColl
-       USE nonlinear,      ONLY: compute_Sapj
-       USE processing,     ONLY: compute_nadiab_moments_z_gradients_and_interp
-       IMPLICIT NONE
-         ! compute auxiliary non adiabatic moments array, gradients and interp
-         CALL compute_nadiab_moments_z_gradients_and_interp
-         ! compute nonlinear term ("if linear" is included inside)
-         CALL compute_Sapj
-         ! compute collision term ("if coll, if nu >0" is included inside)
-         CALL compute_TColl
-         ! compute the moments equation rhs
-         CALL compute_moments_eq_rhs
-     END SUBROUTINE assemble_RHS
-
-      SUBROUTINE checkfield_all ! Check all the fields for inf or nan
-        USE utility,ONLY: checkfield
-        USE basic,  ONLY: t0_checkfield, t1_checkfield, tc_checkfield
-        USE fields, ONLY: phi, moments_e, moments_i
-        USE grid,   ONLY: ipgs_e,ipge_e,ijgs_e,ijge_e,ipgs_i,ipge_i,ijgs_i,ijge_i,&
-                          ikys,ikye,ikxs,ikxe,izgs,izge,ij,ip
-        USE MPI
-        USE time_integration, ONLY: updatetlevel
-        USE model,            ONLY: LINEARITY, KIN_E
-        IMPLICIT NONE
-        LOGICAL :: checkf_
-        ! Execution time start
-        CALL cpu_time(t0_checkfield)
-
-        IF(LINEARITY .NE. 'linear') CALL anti_aliasing   ! ensure 0 mode for 2/3 rule
-        IF(LINEARITY .NE. 'linear') CALL enforce_symmetry ! Enforcing symmetry on kx = 0
-
-        mlend=.FALSE.
-        IF(.NOT.nlend) THEN
-           checkf_ = checkfield(phi,' phi')
-           mlend= (mlend .or. checkf_)
-           IF(KIN_E) THEN
-           DO ij=ijgs_e,ijge_e
-             DO ip=ipgs_e,ipge_e
-               checkf_ = checkfield(moments_e(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel),' moments_e')
-               mlend   = (mlend .or. checkf_)
-             ENDDO
-           ENDDO
-           ENDIF
-           DO ij=ijgs_i,ijge_i
-             DO ip=ipgs_i,ipge_i
-               checkf_ = checkfield(moments_i(ip,ij,ikys:ikye,ikxs:ikxe,izgs:izge,updatetlevel),' moments_i')
-               mlend   = (mlend .or. checkf_)
-               ! print*, 'should be an error'
-               ! stop
-             ENDDO
-           ENDDO
-           CALL MPI_ALLREDUCE(mlend, nlend, 1, MPI_LOGICAL, MPI_LOR, MPI_COMM_WORLD, ierr)
-        ENDIF
-
-        ! Execution time end
-        CALL cpu_time(t1_checkfield)
-        tc_checkfield = tc_checkfield + (t1_checkfield - t0_checkfield)
-      END SUBROUTINE checkfield_all
-
-      SUBROUTINE anti_aliasing
-        USE fields, ONLY: moments_e, moments_i
-        USE grid,   ONLY: ipgs_i,ipge_i,ijgs_i,ijge_i,ipgs_e,ipge_e,ijgs_e,ijge_e,&
-                          ikys,ikye,ikxs,ikxe,izgs,izge,AA_x,AA_y,iz,ikx,iky,ij,ip
-        USE model,  ONLY: KIN_E
-        IMPLICIT NONE
-        IF(KIN_E)THEN
-        DO iz=izgs,izge
-          DO ikx=ikxs,ikxe
-            DO iky=ikys,ikye
-              DO ij=ijgs_e,ijge_e
-                DO ip=ipgs_e,ipge_e
-                  moments_e( ip,ij,iky,ikx,iz,:) = AA_x(ikx)* AA_y(iky) * moments_e( ip,ij,iky,ikx,iz,:)
-                END DO
-              END DO
-            END DO
-          END DO
-        END DO
-        ENDIF
-        DO iz=izgs,izge
-          DO ikx=ikxs,ikxe
-            DO iky=ikys,ikye
-              DO ij=ijgs_i,ijge_i
-                DO ip=ipgs_i,ipge_i
-                  moments_i( ip,ij,iky,ikx,iz,:) = AA_x(ikx)* AA_y(iky) * moments_i( ip,ij,iky,ikx,iz,:)
-                END DO
-              END DO
-            END DO
-          END DO
-        END DO
-      END SUBROUTINE anti_aliasing
-
-      SUBROUTINE enforce_symmetry ! Force X(k) = X(N-k)* complex conjugate symmetry
-        USE fields, ONLY: phi, psi, moments_e, moments_i
-        USE grid,   ONLY: ipgs_i,ipge_i,ijgs_i,ijge_i,ipgs_e,ipge_e,ijgs_e,ijge_e,&
-                          izgs,izge,iz,ikx,ij,ip,Nkx, iky_0, ikx_0, contains_ky0
-        USE model,  ONLY: KIN_E
-        IMPLICIT NONE
-        IF ( contains_ky0 ) THEN
-          ! Electron moments
-          IF(KIN_E) THEN
-            DO iz=izgs,izge
-              DO ij=ijgs_e,ijge_e
-                DO ip=ipgs_e,ipge_e
-                  DO ikx=2,Nkx/2 !symmetry at ky = 0
-                    moments_e( ip,ij,iky_0,ikx,iz, :) = CONJG(moments_e( ip,ij,iky_0,Nkx+2-ikx,iz, :))
-                  END DO
-                ! must be real at origin
-                moments_e(ip,ij, iky_0,ikx_0,iz, :) = REAL(moments_e(ip,ij, iky_0,ikx_0,iz, :),dp)
-              END DO
-            END DO
-          END DO
-          ENDIF
-          ! Ion moments
-          DO iz=izgs,izge
-            DO ij=ijgs_i,ijge_i
-              DO ip=ipgs_i,ipge_i
-                DO ikx=2,Nkx/2 !symmetry at ky = 0
-                  moments_i( ip,ij,iky_0,ikx,iz, :) = CONJG(moments_i( ip,ij,iky_0,Nkx+2-ikx,iz, :))
-                END DO
-                ! must be real at origin and top right
-                moments_i(ip,ij, iky_0,ikx_0,iz, :) = REAL(moments_i(ip,ij, iky_0,ikx_0,iz, :),dp)
-              END DO
-            END DO
-          END DO
-          ! Phi
-          DO ikx=2,Nkx/2 !symmetry at ky = 0
-            phi(iky_0,ikx,izgs:izge) = phi(iky_0,Nkx+2-ikx,izgs:izge)
-          END DO
-          ! must be real at origin
-          phi(iky_0,ikx_0,izgs:izge) = REAL(phi(iky_0,ikx_0,izgs:izge),dp)
-          ! Psi
-          DO ikx=2,Nkx/2 !symmetry at ky = 0
-            psi(iky_0,ikx,izgs:izge) = psi(iky_0,Nkx+2-ikx,izgs:izge)
-          END DO
-          ! must be real at origin
-          psi(iky_0,ikx_0,izgs:izge) = REAL(psi(iky_0,ikx_0,izgs:izge),dp)
-        ENDIF
-      END SUBROUTINE enforce_symmetry
+CONTAINS
+!!!! Basic structure to simplify stepon
+   SUBROUTINE assemble_RHS
+      USE basic,          ONLY: chrono_mrhs, chrono_sapj, chrono_coll, chrono_grad, chrono_napj, start_chrono, stop_chrono
+      USE moments_eq_rhs, ONLY: compute_moments_eq_rhs
+      USE collision,      ONLY: compute_Capj
+      USE nonlinear,      ONLY: compute_Sapj
+      USE processing,     ONLY: compute_nadiab_moments, compute_interp_z, compute_gradients_z
+      IMPLICIT NONE
+      ! compute auxiliary non adiabatic moments array
+      CALL start_chrono(chrono_napj)
+        CALL compute_nadiab_moments
+      CALL stop_chrono(chrono_napj)
+
+      ! compute gradients and interp
+      CALL start_chrono(chrono_grad)
+        CALL compute_gradients_z
+        CALL compute_interp_z
+      CALL stop_chrono(chrono_grad)
+
+      ! compute nonlinear term ("if linear" is included inside)
+      CALL start_chrono(chrono_sapj)
+        CALL compute_Sapj
+      CALL stop_chrono(chrono_sapj)
+
+      ! compute collision term ("if coll, if nu >0" is included inside)
+      CALL start_chrono(chrono_coll)
+        CALL compute_Capj
+      CALL stop_chrono(chrono_coll)
+
+      ! compute the moments equation rhs
+      CALL start_chrono(chrono_mrhs)
+        CALL compute_moments_eq_rhs
+      CALL stop_chrono(chrono_mrhs)
+   END SUBROUTINE assemble_RHS
+
+   SUBROUTINE checkfield_all ! Check all the fields for inf or nan
+      USE utility,ONLY: is_nan, is_inf
+      USE fields, ONLY: phi
+      USE MPI
+      USE model,            ONLY: LINEARITY
+      IMPLICIT NONE
+      LOGICAL :: checkf_
+      REAL    :: sum_
+      IF(LINEARITY .NE. 'linear') CALL anti_aliasing   ! ensure 0 mode for 2/3 rule
+      IF(LINEARITY .NE. 'linear') CALL enforce_symmetry ! Enforcing symmetry on kx = 0
+
+      mlend=.FALSE.
+      IF(.NOT.nlend) THEN
+         sum_    = SUM(ABS(phi))
+         checkf_ = (is_nan(sum_,'phi') .OR. is_inf(sum_,'phi'))
+         mlend   = (mlend .or. checkf_)
+         CALL MPI_ALLREDUCE(mlend, nlend, 1, MPI_LOGICAL, MPI_LOR, MPI_COMM_WORLD, ierr)
+      ENDIF
+   END SUBROUTINE checkfield_all
+
+   SUBROUTINE anti_aliasing
+      USE fields, ONLY: moments
+      USE time_integration, ONLY: updatetlevel
+      USE grid,   ONLY: local_na,local_np,local_nj,local_nky,local_nkx,local_nz,&
+         ngp,ngj,ngz, AA_x, AA_y
+      IMPLICIT NONE
+      INTEGER :: ia, ip, ij, iky, ikx, iz
+      z: DO iz = 1,local_nz+ngz
+         kx:DO ikx= 1,local_nkx
+            ky:DO iky=1,local_nky
+               j: DO ij=1,local_nj+ngj
+                  p: DO ip=1,local_np+ngp
+                     a: DO ia=1,local_na
+                        moments(ia,ip,ij,iky,ikx,iz,updatetlevel) =&
+                           AA_x(ikx)* AA_y(iky) * moments(ia,ip,ij,iky,ikx,iz,updatetlevel)
+                     ENDDO a
+                  ENDDO p
+               ENDDO j
+            ENDDO ky
+         ENDDO kx
+      ENDDO z
+   END SUBROUTINE anti_aliasing
+
+   SUBROUTINE enforce_symmetry ! Force X(k) = X(N-k)* complex conjugate symmetry
+      USE fields, ONLY: phi, psi, moments
+      USE time_integration, ONLY: updatetlevel
+      USE grid,   ONLY: total_nkx, ikx0,iky0, contains_ky0
+      IMPLICIT NONE
+      INTEGER :: ikx
+      IF ( contains_ky0 ) THEN
+         ! moments
+         ! z: DO iz = 1,local_nz+ngz
+         ! j: DO ij=1,local_nj+ngj
+         ! p: DO ip=1,local_np+ngp
+         ! a: DO ia=1,local_na
+         DO ikx=2,total_nkx/2 !symmetry at ky = 0
+            moments(:,:,:,iky0,ikx,:,updatetlevel) = &
+               CONJG(moments(:,:,:,iky0,total_nkx+2-ikx,:,updatetlevel))
+         END DO
+         ! must be real at origin and top right
+         moments(:,:,:, iky0,ikx0,:,updatetlevel) = &
+            REAL(moments(:,:,:, iky0,ikx0,:,updatetlevel),xp)
+         ! ENDDO a
+         ! ENDDO p
+         ! ENDDO j
+         ! ENDDO z
+         ! Phi
+         DO ikx=2,total_nkx/2 !symmetry at ky = 0
+            phi(iky0,ikx,:) = phi(iky0,total_nkx+2-ikx,:)
+         END DO
+         ! must be real at origin
+         phi(iky0,ikx0,:) = REAL(phi(iky0,ikx0,:),xp)
+         ! Psi
+         DO ikx=2,total_nkx/2 !symmetry at ky = 0
+            psi(iky0,ikx,:) = psi(iky0,total_nkx+2-ikx,:)
+         END DO
+         ! must be real at origin
+         psi(iky0,ikx0,:) = REAL(psi(iky0,ikx0,:),xp)
+      ENDIF
+   END SUBROUTINE enforce_symmetry
 
 END SUBROUTINE stepon
diff --git a/src/tesend.F90 b/src/tesend.F90
index 9a77ea1ceffe90e24445a1acf0298f4f622571c1..5c04ab7cd8fa9e24da6f2001572ed265135e90bc 100644
--- a/src/tesend.F90
+++ b/src/tesend.F90
@@ -2,12 +2,13 @@ SUBROUTINE tesend
   !   Test for run completion
 
   USE basic
-
+  USE mpi
   use prec_const
+  USE parallel, ONLY: my_id
   IMPLICIT NONE
   LOGICAL :: mlend, mlexist
-  REAL(dp):: tnow
-  INTEGER :: ncheck_stop = 100
+  REAL    :: tnow
+  INTEGER :: ncheck_stop = 100, ierr
   CHARACTER(len=*), PARAMETER :: stop_file = 'mystop'
 
   !________________________________________________________________________________
@@ -16,25 +17,25 @@ SUBROUTINE tesend
   IF( mlend ) THEN
     nlend   = .TRUE.
     crashed = .TRUE.
-    IF (my_id .EQ. 0) WRITE(*,'(/a)') 'rhs are NaN/Inf'
-    IF (my_id .EQ. 0) WRITE(*,*) 'Run terminated at cstep=',cstep
+    CALL speak('rhs are NaN/Inf')
+    CALL speak('Run terminated at cstep='//str(cstep))
     RETURN
   END IF
 
   !________________________________________________________________________________
   !                   2.  Test on NRUN
-  nlend = step .GT. nrun
+  nlend = step .GE. nrun
   IF ( nlend ) THEN
-     WRITE(*,'(/a)') 'NRUN steps done'
+     CALL speak('NRUN steps done')
      RETURN
   END IF
 
 
   !________________________________________________________________________________
   !                   3.  Test on TMAX
-  nlend = time .GT. tmax
+  nlend = time .GE. tmax
   IF ( nlend ) THEN
-     IF (my_id .EQ. 0) WRITE(*,'(/a)') 'TMAX reached'
+     CALL speak('TMAX reached')
      RETURN
   END IF
   !
@@ -43,12 +44,12 @@ SUBROUTINE tesend
   !________________________________________________________________________________
   !                   4.  Test on run time
   CALL cpu_time(tnow)
-  mlend = (1.1*(tnow-start)) .GT. maxruntime
+  mlend = (1.1*(tnow-chrono_runt%tstart)) .GT. maxruntime
 
 
   CALL mpi_allreduce(mlend, nlend, 1, MPI_LOGICAL, MPI_LOR, MPI_COMM_WORLD, ierr)
   IF ( nlend ) THEN
-     IF(my_id.EQ.0) WRITE(*,'(/a)') 'Max run time reached'
+     CALL speak('Max run time reached')
      RETURN
   END IF
   !________________________________________________________________________________
@@ -59,7 +60,7 @@ SUBROUTINE tesend
      IF( mlexist ) THEN
         OPEN(lu_stop, file=stop_file)
         mlend = mlexist ! Send stop status asa the file exists
-        WRITE(*,'(/a,i4,a)') 'Stop file found -> finishing..'
+        CALL speak('Stop file found -> finishing..')
         CLOSE(lu_stop, status='delete')
      END IF
   END IF
diff --git a/src/time_integration_mod.F90 b/src/time_integration_mod.F90
index 02ba5bf98e36cb10854da9f64256cd62f17cd685..e1a2aa5178ea3589b980d91cde10adb73a257c1e 100644
--- a/src/time_integration_mod.F90
+++ b/src/time_integration_mod.F90
@@ -7,9 +7,9 @@ MODULE time_integration
   INTEGER, PUBLIC, PROTECTED :: ntimelevel=4 ! Total number of time levels required by the numerical scheme
   INTEGER, PUBLIC, PROTECTED :: updatetlevel ! Current time level to be updated
 
-  real(dp),PUBLIC,PROTECTED,DIMENSION(:,:),ALLOCATABLE :: A_E,A_I
-  real(dp),PUBLIC,PROTECTED,DIMENSION(:),ALLOCATABLE :: b_E,b_Es,b_I
-  real(dp),PUBLIC,PROTECTED,DIMENSION(:),ALLOCATABLE :: c_E,c_I !Coeff for Expl/Implic time integration in case of time dependent RHS (i.e. dy/dt = f(y,t)) see Baptiste Frei CSE Rapport 06/17
+  real(xp),PUBLIC,PROTECTED,DIMENSION(:,:),ALLOCATABLE :: A_E,A_I
+  real(xp),PUBLIC,PROTECTED,DIMENSION(:),ALLOCATABLE :: b_E,b_Es,b_I
+  real(xp),PUBLIC,PROTECTED,DIMENSION(:),ALLOCATABLE :: c_E,c_I !Coeff for Expl/Implic time integration in case of time dependent RHS (i.e. dy/dt = f(y,t)) see Baptiste Frei CSE Rapport 06/17
 
   character(len=10),PUBLIC,PROTECTED :: numerical_scheme='RK4'
 
@@ -24,41 +24,30 @@ CONTAINS
 
   SUBROUTINE time_integration_readinputs
     ! Read the input parameters
-
     USE prec_const
     USE basic, ONLY : lu_in
     IMPLICIT NONE
-
     NAMELIST /TIME_INTEGRATION_PAR/ numerical_scheme
-
     READ(lu_in,time_integration_par)
-
     CALL set_numerical_scheme
-
   END SUBROUTINE time_integration_readinputs
 
 
-  SUBROUTINE time_integration_outputinputs(fidres, str)
+  SUBROUTINE time_integration_outputinputs(fid)
     ! Write the input parameters to the results_xx.h5 file
-
-    USE prec_const
-    USE futils, ONLY: attach
+    USE futils, ONLY: attach, creatd
     IMPLICIT NONE
-    INTEGER, INTENT(in) :: fidres
-    CHARACTER(len=256), INTENT(in) :: str
-
-    CALL attach(fidres, TRIM(str), "numerical_scheme", numerical_scheme)
-
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=256)  :: str
+    WRITE(str,'(a)') '/data/input/time_integration'
+    CALL creatd(fid, 0,(/0/),TRIM(str),'Time Integration Input')
+    CALL attach(fid, TRIM(str), "numerical_scheme", numerical_scheme)
   END SUBROUTINE time_integration_outputinputs
 
-
-
   SUBROUTINE set_numerical_scheme
-    ! Initialize Butcher coefficient of numerical_scheme
-
-    use basic
+    ! Initialize Butcher coefficient of set_numerical_scheme
+    use parallel, ONLY: my_id
     IMPLICIT NONE
-
     SELECT CASE (numerical_scheme)
     ! Order 2 methods
     CASE ('RK2')
@@ -86,7 +75,6 @@ CONTAINS
        IF (my_id .EQ. 0) WRITE(*,*) 'Cannot initialize time integration scheme. Name invalid.'
     END SELECT
     IF (my_id .EQ. 0) WRITE(*,*) " Time integration with ", numerical_scheme
-
   END SUBROUTINE set_numerical_scheme
 
   !!! second order time schemes
@@ -96,15 +84,15 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 2
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 2
-    c_E(1) = 0.0_dp
-    c_E(2) = 1.0_dp
-    b_E(1) = 1._dp/2._dp
-    b_E(2) = 1._dp/2._dp
-    A_E(2,1) = 1._dp
+    c_E(1) = 0.0_xp
+    c_E(2) = 1.0_xp
+    b_E(1) = 1._xp/2._xp
+    b_E(2) = 1._xp/2._xp
+    A_E(2,1) = 1._xp
   END SUBROUTINE RK2
 
   SUBROUTINE SSPx_RK2
@@ -115,21 +103,21 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 2
-    REAL(dp) :: alpha, beta
-    alpha = 1._dp/SQRT(2._dp)
-    beta  = SQRT(2._dp) - 1._dp
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    REAL(xp) :: alpha, beta
+    alpha = 1._xp/SQRT(2._xp)
+    beta  = SQRT(2._xp) - 1._xp
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 2
-    c_E(1)   = 0.0_dp
-    c_E(2)   = 1.0_dp/2.0_dp
-    b_E(1)   = alpha*beta/2._dp
-    b_E(2)   = alpha/2._dp
+    c_E(1)   = 0.0_xp
+    c_E(2)   = 1.0_xp/2.0_xp
+    b_E(1)   = alpha*beta/2._xp
+    b_E(2)   = alpha/2._xp
     A_E(2,1) = alpha
-    ! b_E(1) = 1._dp
-    ! b_E(2) = 1._dp/SQRT(2._dp)
-    ! A_E(2,1) = 1._dp/SQRT(2._dp)
+    ! b_E(1) = 1._xp
+    ! b_E(2) = 1._xp/SQRT(2._xp)
+    ! A_E(2,1) = 1._xp/SQRT(2._xp)
   END SUBROUTINE SSPx_RK2
 
   !!! third order time schemes
@@ -139,19 +127,19 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 3
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 3
-    c_E(1)   = 0.0_dp
-    c_E(2)   = 1.0_dp/2.0_dp
-    c_E(3)   = 1.0_dp
-    b_E(1)   = 1._dp/6._dp
-    b_E(2)   = 2._dp/3._dp
-    b_E(3)   = 1._dp/6._dp
-    A_E(2,1) = 1.0_dp/2.0_dp
-    A_E(3,1) = -1._dp
-    A_E(3,2) = 2._dp
+    c_E(1)   = 0.0_xp
+    c_E(2)   = 1.0_xp/2.0_xp
+    c_E(3)   = 1.0_xp
+    b_E(1)   = 1._xp/6._xp
+    b_E(2)   = 2._xp/3._xp
+    b_E(3)   = 1._xp/6._xp
+    A_E(2,1) = 1.0_xp/2.0_xp
+    A_E(3,1) = -1._xp
+    A_E(3,2) = 2._xp
   END SUBROUTINE RK3
 
   SUBROUTINE SSPx_RK3
@@ -162,24 +150,24 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 3
-    REAL(dp) :: a1, a2, a3, w1, w2, w3
-    a1 = (1._dp/6._dp)**(1._dp/3._dp)! (1/6)^(1/3)
-    ! a1 = 0.5503212081491044571635029569733887910843_dp ! (1/6)^(1/3)
+    REAL(xp) :: a1, a2, a3, w1, w2, w3
+    a1 = (1._xp/6._xp)**(1._xp/3._xp)! (1/6)^(1/3)
+    ! a1 = 0.5503212081491044571635029569733887910843_xp ! (1/6)^(1/3)
     a2 = a1
     a3 = a1
-    w1 = 0.5_dp*(-1._dp + SQRT( 9._dp - 2._dp * 6._dp**(2._dp/3._dp))) ! (-1 + sqrt(9-2*6^(2/3)))/2
-    ! w1 = 0.2739744023885328783052273138309828937054_dp ! (sqrt(9-2*6^(2/3))-1)/2
-    w2 = 0.5_dp*(-1._dp + 6._dp**(2._dp/3._dp) - SQRT(9._dp - 2._dp * 6._dp**(2._dp/3._dp))) ! (6^(2/3)-1-sqrt(9-2*6^(2/3)))/2
-    ! w2 = 0.3769892220587804931852815570891834795475_dp ! (6^(2/3)-1-sqrt(9-2*6^(2/3)))/2
-    w3 = 1._dp/a1 - w2 * (1._dp + w1)
-    ! w3 = 1.3368459739528868457369981115334667265415_dp
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    w1 = 0.5_xp*(-1._xp + SQRT( 9._xp - 2._xp * 6._xp**(2._xp/3._xp))) ! (-1 + sqrt(9-2*6^(2/3)))/2
+    ! w1 = 0.2739744023885328783052273138309828937054_xp ! (sqrt(9-2*6^(2/3))-1)/2
+    w2 = 0.5_xp*(-1._xp + 6._xp**(2._xp/3._xp) - SQRT(9._xp - 2._xp * 6._xp**(2._xp/3._xp))) ! (6^(2/3)-1-sqrt(9-2*6^(2/3)))/2
+    ! w2 = 0.3769892220587804931852815570891834795475_xp ! (6^(2/3)-1-sqrt(9-2*6^(2/3)))/2
+    w3 = 1._xp/a1 - w2 * (1._xp + w1)
+    ! w3 = 1.3368459739528868457369981115334667265415_xp
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 3
-    c_E(1) = 0.0_dp
-    c_E(2) = 1.0_dp/2.0_dp
-    c_E(3) = 1.0_dp/2.0_dp
+    c_E(1) = 0.0_xp
+    c_E(2) = 1.0_xp/2.0_xp
+    c_E(3) = 1.0_xp/2.0_xp
     b_E(1) = a1 * (w1*w2 + w3)
     b_E(2) = a2 * w2
     b_E(3) = a3
@@ -195,19 +183,19 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 3
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 3
-    c_E(1)   = 0._dp
-    c_E(2)   = 0.711664700366941_dp
-    c_E(3)   = 0.994611536833690_dp
-    b_E(1)   = 0.398930808264688_dp
-    b_E(2)   = 0.345755244189623_dp
-    b_E(3)   = 0.255313947545689_dp
-    A_E(2,1) = 0.711664700366941_dp
-    A_E(3,1) = 0.077338168947683_dp
-    A_E(3,2) = 0.917273367886007_dp
+    c_E(1)   = 0._xp
+    c_E(2)   = 0.711664700366941_xp
+    c_E(3)   = 0.994611536833690_xp
+    b_E(1)   = 0.398930808264688_xp
+    b_E(2)   = 0.345755244189623_xp
+    b_E(3)   = 0.255313947545689_xp
+    A_E(2,1) = 0.711664700366941_xp
+    A_E(3,1) = 0.077338168947683_xp
+    A_E(3,2) = 0.917273367886007_xp
   END SUBROUTINE IMEX_SSP2
 
   SUBROUTINE ARK2
@@ -217,19 +205,19 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 3
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 3
-    c_E(1)   = 0._dp
-    c_E(2)   = 2._dp*(1._dp - 1._dp/SQRT2)
-    c_E(3)   = 1._dp
-    b_E(1)   = 1._dp/(2._dp*SQRT2)
-    b_E(2)   = 1._dp/(2._dp*SQRT2)
-    b_E(3)   = 1._dp - 1._dp/SQRT2
-    A_E(2,1) = 2._dp*(1._dp - 1._dp/SQRT2)
-    A_E(3,1) = 1._dp - (3._dp + 2._dp*SQRT2)/6._dp
-    A_E(3,2) = (3._dp + 2._dp*SQRT2)/6._dp
+    c_E(1)   = 0._xp
+    c_E(2)   = 2._xp*(1._xp - 1._xp/SQRT2)
+    c_E(3)   = 1._xp
+    b_E(1)   = 1._xp/(2._xp*SQRT2)
+    b_E(2)   = 1._xp/(2._xp*SQRT2)
+    b_E(3)   = 1._xp - 1._xp/SQRT2
+    A_E(2,1) = 2._xp*(1._xp - 1._xp/SQRT2)
+    A_E(3,1) = 1._xp - (3._xp + 2._xp*SQRT2)/6._xp
+    A_E(3,2) = (3._xp + 2._xp*SQRT2)/6._xp
   END SUBROUTINE ARK2
 
   SUBROUTINE SSP_RK3
@@ -238,19 +226,19 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 3
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 3
-    c_E(1)   = 0.0_dp
-    c_E(2)   = 1.0_dp
-    c_E(3)   = 1.0_dp/2.0_dp
-    b_E(1)   = 1._dp/6._dp
-    b_E(2)   = 1._dp/6._dp
-    b_E(3)   = 2._dp/3._dp
-    A_E(2,1) = 1._dp
-    A_E(3,1) = 1._dp/4._dp
-    A_E(3,2) = 1._dp/4._dp
+    c_E(1)   = 0.0_xp
+    c_E(2)   = 1.0_xp
+    c_E(3)   = 1.0_xp/2.0_xp
+    b_E(1)   = 1._xp/6._xp
+    b_E(2)   = 1._xp/6._xp
+    b_E(3)   = 2._xp/3._xp
+    A_E(2,1) = 1._xp
+    A_E(3,1) = 1._xp/4._xp
+    A_E(3,2) = 1._xp/4._xp
   END SUBROUTINE SSP_RK3
 
   !!! fourth order time schemes
@@ -260,21 +248,21 @@ CONTAINS
     USE prec_const
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep = 4
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 4
-    c_E(1)   = 0.0_dp
-    c_E(2)   = 1.0_dp/2.0_dp
-    c_E(3)   = 1.0_dp/2.0_dp
-    c_E(4)   = 1.0_dp
-    b_E(1)   = 1.0_dp/6.0_dp
-    b_E(2)   = 1.0_dp/3.0_dp
-    b_E(3)   = 1.0_dp/3.0_dp
-    b_E(4)   = 1.0_dp/6.0_dp
-    A_E(2,1) = 1.0_dp/2.0_dp
-    A_E(3,2) = 1.0_dp/2.0_dp
-    A_E(4,3) = 1.0_dp
+    c_E(1)   = 0.0_xp
+    c_E(2)   = 1.0_xp/2.0_xp
+    c_E(3)   = 1.0_xp/2.0_xp
+    c_E(4)   = 1.0_xp
+    b_E(1)   = 1.0_xp/6.0_xp
+    b_E(2)   = 1.0_xp/3.0_xp
+    b_E(3)   = 1.0_xp/3.0_xp
+    b_E(4)   = 1.0_xp/6.0_xp
+    A_E(2,1) = 1.0_xp/2.0_xp
+    A_E(3,2) = 1.0_xp/2.0_xp
+    A_E(4,3) = 1.0_xp
   END SUBROUTINE RK4
 
   !!! fifth order time schemes
@@ -285,44 +273,44 @@ CONTAINS
     USE basic
     IMPLICIT NONE
     INTEGER,PARAMETER :: nbstep =7
-    CALL allocate_array_dp1(c_E,1,nbstep)
-    CALL allocate_array_dp1(b_E,1,nbstep)
-    CALL allocate_array_dp2(A_E,1,nbstep,1,nbstep)
+    CALL allocate_array(c_E,1,nbstep)
+    CALL allocate_array(b_E,1,nbstep)
+    CALL allocate_array(A_E,1,nbstep,1,nbstep)
     ntimelevel = 7
-    c_E(1) = 0._dp
-    c_E(2) = 1.0_dp/5.0_dp
-    c_E(3) = 3.0_dp /10.0_dp
-    c_E(4) = 4.0_dp/5.0_dp
-    c_E(5) = 8.0_dp/9.0_dp
-    c_E(6) = 1.0_dp
-    c_E(7) = 1.0_dp
-    A_E(2,1) = 1.0_dp/5.0_dp
-    A_E(3,1) = 3.0_dp/40.0_dp
-    A_E(3,2) = 9.0_dp/40.0_dp
-    A_E(4,1) = 	44.0_dp/45.0_dp
-    A_E(4,2) = -56.0_dp/15.0_dp
-    A_E(4,3) = 32.0_dp/9.0_dp
-    A_E(5,1 ) = 19372.0_dp/6561.0_dp
-    A_E(5,2) = -25360.0_dp/2187.0_dp
-    A_E(5,3) = 64448.0_dp/6561.0_dp
-    A_E(5,4) = -212.0_dp/729.0_dp
-    A_E(6,1) = 9017.0_dp/3168.0_dp
-    A_E(6,2)= -355.0_dp/33.0_dp
-    A_E(6,3) = 46732.0_dp/5247.0_dp
-    A_E(6,4) = 49.0_dp/176.0_dp
-    A_E(6,5) = -5103.0_dp/18656.0_dp
-    A_E(7,1) = 35.0_dp/384.0_dp
-    A_E(7,3) = 500.0_dp/1113.0_dp
-    A_E(7,4) = 125.0_dp/192.0_dp
-    A_E(7,5) = -2187.0_dp/6784.0_dp
-    A_E(7,6) = 11.0_dp/84.0_dp
-    b_E(1) = 35.0_dp/384.0_dp
-    b_E(2) = 0._dp
-    b_E(3) = 500.0_dp/1113.0_dp
-    b_E(4) = 125.0_dp/192.0_dp
-    b_E(5) = -2187.0_dp/6784.0_dp
-    b_E(6) = 11.0_dp/84.0_dp
-    b_E(7) = 0._dp
+    c_E(1) = 0._xp
+    c_E(2) = 1.0_xp/5.0_xp
+    c_E(3) = 3.0_xp /10.0_xp
+    c_E(4) = 4.0_xp/5.0_xp
+    c_E(5) = 8.0_xp/9.0_xp
+    c_E(6) = 1.0_xp
+    c_E(7) = 1.0_xp
+    A_E(2,1) = 1.0_xp/5.0_xp
+    A_E(3,1) = 3.0_xp/40.0_xp
+    A_E(3,2) = 9.0_xp/40.0_xp
+    A_E(4,1) = 	44.0_xp/45.0_xp
+    A_E(4,2) = -56.0_xp/15.0_xp
+    A_E(4,3) = 32.0_xp/9.0_xp
+    A_E(5,1 ) = 19372.0_xp/6561.0_xp
+    A_E(5,2) = -25360.0_xp/2187.0_xp
+    A_E(5,3) = 64448.0_xp/6561.0_xp
+    A_E(5,4) = -212.0_xp/729.0_xp
+    A_E(6,1) = 9017.0_xp/3168.0_xp
+    A_E(6,2)= -355.0_xp/33.0_xp
+    A_E(6,3) = 46732.0_xp/5247.0_xp
+    A_E(6,4) = 49.0_xp/176.0_xp
+    A_E(6,5) = -5103.0_xp/18656.0_xp
+    A_E(7,1) = 35.0_xp/384.0_xp
+    A_E(7,3) = 500.0_xp/1113.0_xp
+    A_E(7,4) = 125.0_xp/192.0_xp
+    A_E(7,5) = -2187.0_xp/6784.0_xp
+    A_E(7,6) = 11.0_xp/84.0_xp
+    b_E(1) = 35.0_xp/384.0_xp
+    b_E(2) = 0._xp
+    b_E(3) = 500.0_xp/1113.0_xp
+    b_E(4) = 125.0_xp/192.0_xp
+    b_E(5) = -2187.0_xp/6784.0_xp
+    b_E(6) = 11.0_xp/84.0_xp
+    b_E(7) = 0._xp
   END SUBROUTINE DOPRI5
 
 END MODULE time_integration
diff --git a/src/utility_mod.F90 b/src/utility_mod.F90
index 028d514e804c8cebff5f7b00902465bce5bc9e45..b854484898e0decd434e31984c00e520aa6ccd67 100644
--- a/src/utility_mod.F90
+++ b/src/utility_mod.F90
@@ -1,15 +1,15 @@
 MODULE utility
   IMPLICIT NONE
-  PUBLIC :: checkfield, checkelem
+  PUBLIC :: is_nan, is_inf!. checkfield, checkelem
 CONTAINS
 
   FUNCTION is_nan(x,str) RESULT(isn)
-    USE basic,            ONLY: cstep, stdout
+    USE basic,            ONLY: cstep
     USE time_integration, ONLY: updatetlevel
-    USE prec_const,       ONLY: dp
+    USE prec_const,       ONLY: xp, stdout
     IMPLICIT NONE
 
-    real(dp), INTENT(IN) :: x
+    real, INTENT(IN) :: x
     CHARACTER(LEN=*), INTENT(IN) :: str
     LOGICAL :: isn
 
@@ -25,11 +25,10 @@ CONTAINS
 
 
   FUNCTION is_inf(x,str) RESULT(isi)
-    USE basic,            ONLY: stdout
-    USE prec_const,       ONLY: dp
+    USE prec_const,       ONLY: xp, stdout
     IMPLICIT NONE
 
-    real(dp), INTENT(IN) :: x
+    real, INTENT(IN) :: x
     CHARACTER(LEN=*), INTENT(IN) :: str
     LOGICAL :: isi
 
@@ -44,33 +43,33 @@ CONTAINS
 
   END FUNCTION is_inf
 
-  FUNCTION checkfield(field,str) RESULT(mlend)
-    USE grid,       ONLY: ikys,ikye,ikxs,ikxe,izgs,izge
-    use prec_const, ONLY: dp
-    IMPLICIT NONE
-    !! BUG found (or feature?)
-    ! if one put the commented first line (commented) instead of the second one,
-    ! no error will be risen by the compiler even if the rank of the array is not matching (should be 3D!)
-    ! COMPLEX(dp), DIMENSION(ikys:ikye,ikxs:ikxe), INTENT(IN) :: field
-    COMPLEX(dp), DIMENSION(ikys:ikye,ikxs:ikxe,izgs:izge), INTENT(IN) :: field
-    CHARACTER(LEN=*), INTENT(IN) :: str
-    LOGICAL :: mlend
-    COMPLEX(dp) :: sumfield
+  ! FUNCTION checkfield(n1,n2,n3,field,str) RESULT(mlend)
+  !   use prec_const, ONLY: xp
+  !   IMPLICIT NONE
+  !   !! BUG found (or feature?)
+  !   ! if one put the commented first line (commented) instead of the second one,
+  !   ! no error will be risen by the compiler even if the rank of the array is not matching (should be 3D!)
+  !   ! COMPLEX(xp), DIMENSION(ikys:ikye,ikxs:ikxe), INTENT(IN) :: field
+  !   INTEGER, INTENT(in) :: n1,n2,n3
+  !   COMPLEX(xp), DIMENSION(n1,n2,n3), INTENT(IN) :: field
+  !   CHARACTER(LEN=*), INTENT(IN) :: str
+  !   LOGICAL :: mlend
+  !   COMPLEX(xp) :: sumfield
 
-    sumfield=SUM(field)
+  !   sumfield=SUM(field)
 
-    mlend= is_nan( REAL(sumfield),str).OR.is_inf( REAL(sumfield),str) &
-      .OR. is_nan(AIMAG(sumfield),str).OR.is_inf(AIMAG(sumfield),str)
-  END FUNCTION checkfield
+  !   mlend= is_nan( REAL(sumfield),str).OR.is_inf( REAL(sumfield),str) &
+  !     .OR. is_nan(AIMAG(sumfield),str).OR.is_inf(AIMAG(sumfield),str)
+  ! END FUNCTION checkfield
 
-  FUNCTION checkelem(elem,str) RESULT(mlend)
-    use prec_const, ONLY: dp
-    IMPLICIT NONE
-    COMPLEX(dp), INTENT(IN) :: elem
-    CHARACTER(LEN=*), INTENT(IN) :: str
-    LOGICAL :: mlend
+  ! FUNCTION checkelem(elem,str) RESULT(mlend)
+  !   use prec_const, ONLY: xp
+  !   IMPLICIT NONE
+  !   COMPLEX(xp), INTENT(IN) :: elem
+  !   CHARACTER(LEN=*), INTENT(IN) :: str
+  !   LOGICAL :: mlend
 
-    mlend= is_nan( REAL(elem),str).OR.is_inf( REAL(elem),str) &
-      .OR. is_nan(AIMAG(elem),str).OR.is_inf(AIMAG(elem),str)
-  END FUNCTION checkelem
+  !   mlend= is_nan( REAL(elem),str).OR.is_inf( REAL(elem),str) &
+  !     .OR. is_nan(AIMAG(elem),str).OR.is_inf(AIMAG(elem),str)
+  ! END FUNCTION checkelem
 END MODULE utility
diff --git a/testcases/cyclone_example/cyclone_example.txt b/testcases/cyclone_example/cyclone_example.txt
deleted file mode 100644
index b38bfef642ef3e498137ca790fa392b2d085fc33..0000000000000000000000000000000000000000
--- a/testcases/cyclone_example/cyclone_example.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-This is a testcase reproducing the cyclone base case of Dimits 2000
-Adiabatic electrons, s-alpha geometry, gradlnN = 2.22, gradlnT = 6.96
-With a small P,J=4,2 polynomial basis, one should observe the secondary instability (KHI) at t~50R/cs.
-The saturated heat flux should be located around Qx ~ 30, i.e. Qx/QGB ~ 2 which is close to Dimits results.
diff --git a/testcases/cyclone_example/fort.90 b/testcases/cyclone_example/fort.90
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/testcases/cyclone_example/fort_00.90 b/testcases/cyclone_example/fort_00.90
index fd926cba93a53a4bd2ee432dc0fa030c9dfc9c6c..c4d16a14a0cfdc5792bcfcc98ef8302a7e6c53f7 100644
--- a/testcases/cyclone_example/fort_00.90
+++ b/testcases/cyclone_example/fort_00.90
@@ -1,80 +1,94 @@
 &BASIC
-  nrun   = 100000000
-  dt     = 0.01
-  tmax   = 50
-  maxruntime = 356400
+  nrun       = 99999999
+  dt         = 0.01
+  tmax       = 500
+  maxruntime = 72000
+  job2load   = -1
 /
 &GRID
-  pmaxe  = 4
-  jmaxe  = 2
-  pmaxi  = 4
-  jmaxi  = 2
+  pmax   = 4
+  jmax   = 2
   Nx     = 128
   Lx     = 120
   Ny     = 64
-  Ly     = 160
-  Nz     = 16
+  Ly     = 120
+  Nz     = 24
+  SG     = .f.
   Nexc   = 0
-  SG     = .true.
 /
 &GEOMETRY
   geom   = 's-alpha'
   q0     = 1.4
   shear  = 0.8
   eps    = 0.18
-  parallel_BC = 'dirichlet'
+  kappa  = 1.0
+  s_kappa= 0.0
+  delta  = 0.0
+  s_delta= 0.0
+  zeta   = 0.0
+  s_zeta = 0.0
+  parallel_bc = 'dirichlet'
+  shift_y= 0.0
 /
 &OUTPUT_PAR
-  nsave_0d = 50
-  nsave_1d = -1
-  nsave_2d = -1
-  nsave_3d = 100
-  nsave_5d = 500
-  write_doubleprecision = .false.
-  write_gamma = .true.
-  write_hf    = .true.
-  write_phi   = .true.
-  write_Na00  = .false.
-  write_Napj  = .true.
-  write_Sapj  = .false.
-  write_dens  = .true.
-  write_temp  = .true.
-  job2load    = -1
+  dtsave_0d = 1
+  dtsave_1d = -1
+  dtsave_2d = -1
+  dtsave_3d = 5
+  dtsave_5d = 20
+  write_doubleprecision = .f.
+  write_gamma = .t.
+  write_hf    = .t.
+  write_phi   = .t.
+  write_Na00  = .t.
+  write_Napj  = .t.
+  write_dens  = .t.
+  write_fvel  = .t.
+  write_temp  = .t.
 /
 &MODEL_PAR
-  ! Collisionality
-  CLOS    = 0
-  NL_CLOS = 0
   LINEARITY = 'nonlinear'
-  KIN_E   = .false.
-  mu_x    = 0.1
-  mu_y    = 0.1
-  N_HD    = 2
+  Na      = 1 ! number of species
+  mu_x    = 1.0
+  mu_y    = 1.0
+  N_HD    = 4
   mu_z    = 2.0
-  mu_p    = 0
-  mu_j    = 0
-  nu      = 0.05
-  tau_e   = 1
-  tau_i   = 1
-  sigma_e = 0.023338
-  sigma_i = 1
-  q_e     = -1
-  q_i     = 1
-  K_Ni    = 2.22
-  K_Ti    = 6.92
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.0
+  mu_j    = 0.0
+  nu      = 0.001
+  beta    = 0.0
+  ADIAB_E = .t.
+  tau_e   = 1.0
 /
+&CLOSURE_PAR
+  hierarchy_closure='truncation'
+  dmax = -1
+  nonlinear_closure='truncation'
+  nmax = 0
+/
+&SPECIES
+ ! ions
+ name_ = 'ions'
+ tau_  = 1.0
+ sigma_= 1.0
+ q_    = 1.0
+ k_N_  = 2.22
+ k_T_  = 6.96
+/
+
 &COLLISION_PAR
-  collision_model = 'DG'
-  gyrokin_CO      = .false.
-  interspecies    = .true.
-  mat_file        = 'null'
+  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  GK_CO           = .f.
+  INTERSPECIES    = .true.
+  mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
-  INIT_OPT      = 'blob'
-  ACT_ON_MODES  = 'donothing'
-  init_background  = 0
-  init_noiselvl = 1e-3
-  iseed         = 42
+  INIT_OPT         = 'blob'
+  ACT_ON_MODES     = 'donothing'
+  init_background  = 0.0
+  init_noiselvl    = 0.005
+  iseed            = 42
 /
 &TIME_INTEGRATION_PAR
   numerical_scheme = 'RK4'
diff --git a/testcases/cyclone_example/gyacomo23_debug b/testcases/cyclone_example/gyacomo23_debug
new file mode 120000
index 0000000000000000000000000000000000000000..dcb6f58d8624fe5e71697ab17d5f0bb5f64c1c4c
--- /dev/null
+++ b/testcases/cyclone_example/gyacomo23_debug
@@ -0,0 +1 @@
+../../bin/gyacomo23_debug
\ No newline at end of file
diff --git a/testcases/cyclone_example/gyacomo23_dp b/testcases/cyclone_example/gyacomo23_dp
new file mode 120000
index 0000000000000000000000000000000000000000..f11ab935431cb2cbfeac04a93dd0231984d8014c
--- /dev/null
+++ b/testcases/cyclone_example/gyacomo23_dp
@@ -0,0 +1 @@
+../../bin/gyacomo23_dp
\ No newline at end of file
diff --git a/testcases/cyclone_example/gyacomo23_sp b/testcases/cyclone_example/gyacomo23_sp
new file mode 120000
index 0000000000000000000000000000000000000000..d3982f650fe02a339058b25297c18d43a3f4c36d
--- /dev/null
+++ b/testcases/cyclone_example/gyacomo23_sp
@@ -0,0 +1 @@
+../../bin/gyacomo23_sp
\ No newline at end of file
diff --git a/testcases/matlab_testscripts/Hallenbert.m b/testcases/matlab_testscripts/Hallenbert.m
index 5ded4b11104b0792dd6a38a2c2e6fba1b1bd27d2..38fa23d7a30d5fa0a707678ccad39f8c5e28021b 100644
--- a/testcases/matlab_testscripts/Hallenbert.m
+++ b/testcases/matlab_testscripts/Hallenbert.m
@@ -41,7 +41,7 @@ JOB2LOAD= -1;
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'LD';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 NL_CLOS = -1;   % nonlinear closure model (-2: nmax = jmax, -1: nmax = jmax-j, >=0 : nmax = NL_CLOS)
 SIMID   = 'Hallenbert_nu_1e-01';  % Name of the simulation
 % SIMID   = 'debug';  % Name of the simulation
diff --git a/testcases/matlab_testscripts/linear_1D_entropy_mode.m b/testcases/matlab_testscripts/linear_1D_entropy_mode.m
index 02de8d1a02673c9f2c8fef223d8d55ef10482aaa..77ef9c1d2f9e97ab99da70201aec2f465be4357c 100644
--- a/testcases/matlab_testscripts/linear_1D_entropy_mode.m
+++ b/testcases/matlab_testscripts/linear_1D_entropy_mode.m
@@ -47,7 +47,7 @@ KIN_E   = 1;
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'LR';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: gyrofluid closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/testcases/matlab_testscripts/linear_damping.m b/testcases/matlab_testscripts/linear_damping.m
index 46b6c4e888fefa5d8f92e51793b1c6f2afcffb2a..494470f7c126840b1a57adeac222ceb3529506a8 100644
--- a/testcases/matlab_testscripts/linear_damping.m
+++ b/testcases/matlab_testscripts/linear_damping.m
@@ -39,7 +39,7 @@ KIN_E   = 1;
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: gyrofluid closure (p+2j<=Pmax))
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/testcases/smallest_problem/fort.90 b/testcases/smallest_problem/fort.90
new file mode 100644
index 0000000000000000000000000000000000000000..bc7dce3fbb38d7480430af97ea3d8ecabed51335
--- /dev/null
+++ b/testcases/smallest_problem/fort.90
@@ -0,0 +1,101 @@
+&BASIC
+  nrun       = 99999999
+  dt         = 0.01
+  tmax       = 5
+  maxruntime = 356400
+  job2load   = -1
+/
+&GRID
+  pmax   = 4
+  jmax   = 1
+  Nx     = 16
+  Lx     = 200
+  Ny     = 12
+  Ly     = 60
+  Nz     = 6
+  SG     = .f.
+  Nexc   = 1
+/
+&GEOMETRY
+  geom   = 's-alpha'
+  q0     = 1.4
+  shear  = 0.0
+  eps    = 0.18
+  kappa  = 1.0
+  s_kappa= 0.0
+  delta  = 0.0
+  s_delta= 0.0
+  zeta   = 0.0
+  s_zeta = 0.0
+  parallel_bc = 'dirichlet'
+  shift_y= 0.0
+/
+&OUTPUT_PAR
+  dtsave_0d = 0.5
+  dtsave_1d = -1
+  dtsave_2d = -1
+  dtsave_3d = 1
+  dtsave_5d = 5
+  write_doubleprecision = .f.
+  write_gamma = .t.
+  write_hf    = .t.
+  write_phi   = .t.
+  write_Na00  = .t.
+  write_Napj  = .t.
+  write_dens  = .t.
+  write_fvel  = .t.
+  write_temp  = .t.
+/
+&MODEL_PAR
+  ! Collisionality
+  CLOS    = 0
+  NL_CLOS = -1
+  LINEARITY = 'nonlinear'
+  Na      = 2 ! number of species
+  mu_x    = 0.2
+  mu_y    = 0.4
+  N_HD    = 4
+  mu_z    = 0.6
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.1
+  mu_j    = 0.5
+  nu      = 1.0
+  beta    = 0.1
+  ADIAB_E = .f.
+  tau_e   = 1.0
+/
+&SPECIES
+ ! ions
+ name_ = 'ions'
+ tau_  = 1.0
+ sigma_= 1.0
+ q_    = 1.0
+ k_N_  = 3.0!2.22
+ k_T_  = 4.0!6.96
+/
+&SPECIES
+ ! electrons
+ name_ = 'electrons'
+ tau_  = 1.0
+ sigma_= 0.023338
+ q_    = -1.0
+ k_N_  = 1.0!2.22
+ k_T_  = 2.0!6.96
+/
+
+&COLLISION_PAR
+  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  GK_CO           = .t.
+  INTERSPECIES    = .true.
+  mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+/
+&INITIAL_CON
+  INIT_OPT         = 'blob'
+  ACT_ON_MODES     = 'donothing'
+  init_background  = 1.0
+  init_noiselvl    = 0.0
+  iseed            = 42
+/
+&TIME_INTEGRATION_PAR
+  numerical_scheme = 'RK4'
+/
diff --git a/testcases/smallest_problem/fort_00.90 b/testcases/smallest_problem/fort_00.90
index 946f5db3e2f2033b284f3549a92d30ec445a70e3..d330d8e0d3cf16dcf07cfc2e8cd0465b1c41d052 100644
--- a/testcases/smallest_problem/fort_00.90
+++ b/testcases/smallest_problem/fort_00.90
@@ -1,41 +1,40 @@
 &BASIC
-  nrun   = 1
+  nrun   = 99999999
   dt     = 0.01
   tmax   = 5
   maxruntime = 356400
 /
 &GRID
-  pmaxe  = 2
+  pmaxe  = 4
   jmaxe  = 1
-  pmaxi  = 2
+  pmaxi  = 4
   jmaxi  = 1
-  Nx     = 2
+  Nx     = 16
   Lx     = 200
-  Ny     = 2
+  Ny     = 12
   Ly     = 60
-  Nz     = 64
+  Nz     = 6
   SG     = .f.
 /
 &GEOMETRY
   geom   = 's-alpha'
   q0     = 1.4
-  shear  = 0.8
+  shear  = 0.0
   eps    = 0.18
   parallel_bc = 'dirichlet'
 /
 &OUTPUT_PAR
-  nsave_0d = 1
+  nsave_0d = 50
   nsave_1d = -1
   nsave_2d = -1
-  nsave_3d = 1
-  nsave_5d = 1
+  nsave_3d = 100
+  nsave_5d = 500
   write_doubleprecision = .f.
   write_gamma = .t.
   write_hf    = .t.
   write_phi   = .t.
-  write_Na00  = .f.
+  write_Na00  = .t.
   write_Napj  = .t.
-  write_Sapj  = .f.
   write_dens  = .t.
   write_temp  = .t.
   job2load    = -1
@@ -44,15 +43,16 @@
   ! Collisionality
   CLOS    = 0
   NL_CLOS = -1
-  LINEARITY = 'linear'
-  KIN_E   = .f.
-  mu_x    = 0.0
-  mu_y    = 0.0
+  LINEARITY = 'nonlinear'
+  KIN_E   = .t.
+  mu_x    = 0.2
+  mu_y    = 0.4
   N_HD    = 4
-  mu_z    = 0.1
-  mu_p    = 0
-  mu_j    = 0
-  nu      = 1
+  mu_z    = 0.6
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.1
+  mu_j    = 0.5
+  nu      = 1.0
   tau_e   = 1
   tau_i   = 1
   sigma_e = 0.023338
@@ -60,19 +60,19 @@
   q_e     = -1
   q_i     = 1
   K_Ne    = 1!6.96
-  K_Te    = 1!2.22
-  K_Ni    = 1!6.96
-  K_Ti    = 1!2.22
-  beta    = 0
+  K_Te    = 2!2.22
+  K_Ni    = 3!6.96
+  K_Ti    = 4!2.22
+  beta    = 0.1
 /
 &COLLISION_PAR
   collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
-  gyrokin_CO      = .f.
+  gyrokin_CO      = .t.
   interspecies    = .true.
-  !mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+  mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
-  INIT_OPT    = 'phi'
+  INIT_OPT    = 'blob'
   ACT_ON_MODES       = 'donothing'
   init_background  = 1.0
   init_noiselvl = 0.0
diff --git a/testcases/smallest_problem/fort_01.90 b/testcases/smallest_problem/fort_01.90
index f5a7d5600e8bf7901303338f0b873d4f02a469f0..ecaa87118a163ac916136103ee56366553e738ad 100644
--- a/testcases/smallest_problem/fort_01.90
+++ b/testcases/smallest_problem/fort_01.90
@@ -67,8 +67,8 @@
 /
 &COLLISION_PAR
   collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
-  gyrokin_CO      = .t.
-  interspecies    = .true.
+  GK_CO      = .t.
+  INTERSPECIES    = .true.
   !mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
diff --git a/testcases/smallest_problem/fort_11.90 b/testcases/smallest_problem/fort_11.90
new file mode 100644
index 0000000000000000000000000000000000000000..f6a8807c823adec78718240e0ca647988b504acd
--- /dev/null
+++ b/testcases/smallest_problem/fort_11.90
@@ -0,0 +1,101 @@
+&BASIC
+  nrun       = 99999999
+  dt         = 0.01
+  tmax       = 5
+  maxruntime = 356400
+  job2load   = -1
+/
+&GRID
+  pmax   = 4
+  jmax   = 1
+  Nx     = 2
+  Lx     = 200
+  Ny     = 4
+  Ly     = 60
+  Nz     = 6
+  SG     = .f.
+  Nexc   = 1
+/
+&GEOMETRY
+  geom   = 's-alpha'
+  q0     = 1.4
+  shear  = 0.0
+  eps    = 0.18
+  kappa  = 1.0
+  s_kappa= 0.0
+  delta  = 0.0
+  s_delta= 0.0
+  zeta   = 0.0
+  s_zeta = 0.0
+  parallel_bc = 'dirichlet'
+  shift_y= 0.0
+/
+&OUTPUT_PAR
+  dtsave_0d = 0.5
+  dtsave_1d = -1
+  dtsave_2d = -1
+  dtsave_3d = 1
+  dtsave_5d = 5
+  write_doubleprecision = .f.
+  write_gamma = .t.
+  write_hf    = .t.
+  write_phi   = .t.
+  write_Na00  = .t.
+  write_Napj  = .t.
+  write_dens  = .t.
+  write_fvel  = .t.
+  write_temp  = .t.
+/
+&MODEL_PAR
+  ! Collisionality
+  CLOS    = 0
+  NL_CLOS = -1
+  LINEARITY = 'linear'
+  Na      = 2 ! number of species
+  mu_x    = 0.2
+  mu_y    = 0.4
+  N_HD    = 4
+  mu_z    = 0.6
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.1
+  mu_j    = 0.5
+  nu      = 1.0
+  beta    = 0.1
+  ADIAB_E = .f.
+  tau_e   = 1.0
+/
+&SPECIES
+ ! ions
+ name_ = 'ions'
+ tau_  = 1.0
+ sigma_= 1.0
+ q_    = 1.0
+ k_N_  = 3.0!2.22
+ k_T_  = 4.0!6.96
+/
+&SPECIES
+ ! electrons
+ name_ = 'electrons'
+ tau_  = 1.0
+ sigma_= 0.023338
+ q_    = -1.0
+ k_N_  = 1.0!2.22
+ k_T_  = 2.0!6.96
+/
+
+&COLLISION_PAR
+  collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
+  GK_CO           = .t.
+  INTERSPECIES    = .true.
+  mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+/
+&INITIAL_CON
+  INIT_OPT         = 'allmom'
+  ACT_ON_MODES     = 'donothing'
+  init_background  = 1.0
+  init_noiselvl    = 0.0
+  iseed            = 42
+/
+&TIME_INTEGRATION_PAR
+  numerical_scheme = 'RK4'
+/
diff --git a/testcases/smallest_problem/gyacomo_1 b/testcases/smallest_problem/gyacomo_1
deleted file mode 120000
index d087f907f6e032370d37ee4a2d6c5c29a5350208..0000000000000000000000000000000000000000
--- a/testcases/smallest_problem/gyacomo_1
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/gyacomo_1
\ No newline at end of file
diff --git a/testcases/smallest_problem/gyacomo_1 b/testcases/smallest_problem/gyacomo_1
new file mode 100644
index 0000000000000000000000000000000000000000..2fd67105ac22ff7e7d826fd5df5e4dc4ccc8d3ff
--- /dev/null
+++ b/testcases/smallest_problem/gyacomo_1
@@ -0,0 +1 @@
+../../../gyacomo_1/bin/gyacomo
\ No newline at end of file
diff --git a/testcases/smallest_problem/gyacomo_1_debug b/testcases/smallest_problem/gyacomo_1_debug
deleted file mode 120000
index f5e7015fb1d6aa0dd07f5ed6008d39a8036fbba9..0000000000000000000000000000000000000000
--- a/testcases/smallest_problem/gyacomo_1_debug
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/gyacomo_1_debug
\ No newline at end of file
diff --git a/testcases/smallest_problem/gyacomo_1_debug b/testcases/smallest_problem/gyacomo_1_debug
new file mode 100644
index 0000000000000000000000000000000000000000..f0c8bbfd26fdc619d94a6e4d6f59dde7d8630636
--- /dev/null
+++ b/testcases/smallest_problem/gyacomo_1_debug
@@ -0,0 +1 @@
+../../../gyacomo_1/bin/gyacomo_debug
\ No newline at end of file
diff --git a/testcases/zpinch_example/fort_00.90 b/testcases/zpinch_example/fort_00.90
index 67312e5472dc8d8c4ae88ed959ac2e9744dbd936..4c2f8a920232e171d6e784da7a2a590722c58b98 100644
--- a/testcases/zpinch_example/fort_00.90
+++ b/testcases/zpinch_example/fort_00.90
@@ -1,82 +1,102 @@
 &BASIC
-  nrun   = 100000000
-  dt     = 0.01
-  tmax   = 50
-  maxruntime = 356400
+  nrun       = 99999999
+  dt         = 0.01
+  tmax       = 50
+  maxruntime = 72000
+  job2load   = -1
 /
 &GRID
-  pmaxe  = 4
-  jmaxe  = 2
-  pmaxi  = 4
-  jmaxi  = 2
+  pmax   = 4
+  jmax   = 2
   Nx     = 128
   Lx     = 200
   Ny     = 48
   Ly     = 60
   Nz     = 1
   SG     = .f.
+  Nexc   = 1
 /
 &GEOMETRY
   geom   = 'Z-pinch'
-  q0     = 0
-  shear  = 0
-  eps    = 0
+  q0     = 0.0
+  shear  = 0.0 
+  eps    = 0.0
+  kappa  = 1.0
+  s_kappa= 0.0
+  delta  = 0.0
+  s_delta= 0.0
+  zeta   = 0.0
+  s_zeta = 0.0
   parallel_bc = 'shearless'
+  shift_y= 0.0
 /
 &OUTPUT_PAR
-  nsave_0d = 10
-  nsave_1d = -1
-  nsave_2d = -1
-  nsave_3d = 100
-  nsave_5d = 1000
+  dtsave_0d = 1
+  dtsave_1d = -1
+  dtsave_2d = -1
+  dtsave_3d = 5
+  dtsave_5d = 20
   write_doubleprecision = .f.
   write_gamma = .t.
   write_hf    = .t.
   write_phi   = .t.
-  write_Na00  = .f.
+  write_Na00  = .t.
   write_Napj  = .t.
-  write_Sapj  = .f.
   write_dens  = .t.
+  write_fvel  = .t.
   write_temp  = .t.
-  job2load    = -1
 /
 &MODEL_PAR
-  ! Collisionality
-  CLOS    = 0
-  NL_CLOS = -1
   LINEARITY = 'nonlinear'
-  KIN_E   = .t.
+  Na      = 2 ! number of species
   mu_x    = 1.0
   mu_y    = 1.0
   N_HD    = 4
   mu_z    = 0.0
-  mu_p    = 0
-  mu_j    = 0
+  HYP_V   = 'hypcoll'
+  mu_p    = 0.0
+  mu_j    = 0.0
   nu      = 0.1
-  tau_e   = 1
-  tau_i   = 1
-  sigma_e = 0.023338
-  sigma_i = 1
-  q_e     = -1
-  q_i     = 1
-  K_Ne    = 2.0
-  K_Te    = 0.4
-  K_Ni    = 2.0
-  K_Ti    = 0.4
-  beta    = 0
+  beta    = 0.0
+  ADIAB_E = .f.
+  tau_e   = 1.0
+/
+&CLOSURE_PAR
+  hierarchy_closure='truncation'
+  dmax = -1
+  nonlinear_closure='anti_laguerre_aliasing' !(truncation,full_sum,anti_laguerre_aliasing)
+  nmax = 0
+/
+&SPECIES
+ ! ions
+ name_ = 'ions'
+ tau_  = 1.0
+ sigma_= 1.0
+ q_    = 1.0
+ k_N_  = 2.0
+ k_T_  = 0.4
+/
+&SPECIES
+ ! electrons
+ name_ = 'electrons'
+ tau_  = 1.0
+ sigma_= 0.023338
+ q_    =-1.0
+ k_N_  = 2.0
+ k_T_  = 0.4
 /
 &COLLISION_PAR
   collision_model = 'DG' !DG/SG/PA/LD (dougherty, sugama, pitch angle, landau)
-  gyrokin_CO      = .true.
-  interspecies    = .true.
-  !mat_file        = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
+  GK_CO           = .t.
+  INTERSPECIES    = .true.
+  mat_file       = 'gk_sugama_P_20_J_10_N_150_kpm_8.0.h5'
 /
 &INITIAL_CON
-  INIT_OPT    = 'phi'
-  ACT_ON_MODES       = 'donothing'
-  init_background  = 0
-  init_noiselvl = 0.005
-  iseed         = 42
+  INIT_OPT         = 'phi' !(phi,blob)
+  ACT_ON_MODES     = 'donothing'
+  init_background  = 0.0
+  init_noiselvl    = 0.005
+  iseed            = 42
 /
 &TIME_INTEGRATION_PAR
   numerical_scheme = 'RK4'
diff --git a/testcases/zpinch_example/gyacomo23_debug b/testcases/zpinch_example/gyacomo23_debug
new file mode 120000
index 0000000000000000000000000000000000000000..dcb6f58d8624fe5e71697ab17d5f0bb5f64c1c4c
--- /dev/null
+++ b/testcases/zpinch_example/gyacomo23_debug
@@ -0,0 +1 @@
+../../bin/gyacomo23_debug
\ No newline at end of file
diff --git a/testcases/zpinch_example/gyacomo23_dp b/testcases/zpinch_example/gyacomo23_dp
new file mode 120000
index 0000000000000000000000000000000000000000..f11ab935431cb2cbfeac04a93dd0231984d8014c
--- /dev/null
+++ b/testcases/zpinch_example/gyacomo23_dp
@@ -0,0 +1 @@
+../../bin/gyacomo23_dp
\ No newline at end of file
diff --git a/testcases/zpinch_example/gyacomo23_sp b/testcases/zpinch_example/gyacomo23_sp
new file mode 120000
index 0000000000000000000000000000000000000000..d3982f650fe02a339058b25297c18d43a3f4c36d
--- /dev/null
+++ b/testcases/zpinch_example/gyacomo23_sp
@@ -0,0 +1 @@
+../../bin/gyacomo23_sp
\ No newline at end of file
diff --git a/testcases/zpinch_example/gyacomo_debug b/testcases/zpinch_example/gyacomo_debug
deleted file mode 120000
index 363e139a389f2e5d2d2097c09bf5ab363772dbfc..0000000000000000000000000000000000000000
--- a/testcases/zpinch_example/gyacomo_debug
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/gyacomo_debug
\ No newline at end of file
diff --git a/wk/analysis_gene.m b/wk/analysis_gene.m
index 9ab388eac102be913b941cd9fa9b9594a3c8135c..b3aa355a68f1fb929cdf35527a539bf252465c4b 100644
--- a/wk/analysis_gene.m
+++ b/wk/analysis_gene.m
@@ -47,14 +47,14 @@ addpath(genpath([gyacomodir,'matlab/load'])) % ... add
 %Paper 2
 % folder = '/misc/gene_results/CBC/KT_6.96_64x32x32x24x12_Nexc_5/';
 % folder = '/misc/gene_results/CBC/KT_6.96_128x64x24x8x4_Nexc_5_00/';
-folder = '/misc/gene_results/CBC/KT_6.96_128x64x24x16x8_Nexc_5_00/';
+% folder = '/misc/gene_results/CBC/KT_6.96_128x64x24x16x8_Nexc_5_00/';
 % folder = '/misc/gene_results/CBC/KT_6.96_128x64x24x32x16_Nexc_5_00/';
 % folder = '/misc/gene_results/CBC/KT_6.96_128x64x24x32x16_Nexc_5_01/';
 
 % folder = '/misc/gene_results/CBC/KT_5.3_128x64x24x32x16_Nexc_5_00/';
 % folder = '/misc/gene_results/CBC/KT_5.3_128x64x24x32x16_Nexc_5_01/';
 % folder = '/misc/gene_results/CBC/new_sim/KT_5.3_128x64x24x16x8_Nexc_5/';
-% folder = '/misc/gene_results/CBC/new_sim/KT_5.3_128x64x24x8x4_Nexc_5/';
+folder = '/misc/gene_results/CBC/new_sim/KT_5.3_128x64x24x8x4_Nexc_5/';
 % folder = '/misc/gene_results/CBC/new_sim/KT_6.96_128x64x24x8x4_Nexc_5_smallvbox/';
 % folder = '/misc/gene_results/CBC/new_sim/KT_6.96_128x64x24x16x8_Nexc_5_largexbox/';
 % folder = '/misc/gene_results/CBC/KT_5.3_128x64x24x16x8_Muz_0.02/';
diff --git a/wk/analysis_gyacomo.m b/wk/analysis_gyacomo.m
index 449ff6d44e2038ac70d44d66c069d52f3333e54e..97ae221785047883d5dc3beeaedcbc7ec634cdf8 100644
--- a/wk/analysis_gyacomo.m
+++ b/wk/analysis_gyacomo.m
@@ -1,8 +1,8 @@
 %% UNCOMMENT FOR TUTORIAL
 gyacomodir = pwd; gyacomodir = gyacomodir(1:end-2); % get code directory
-% resdir = '../testcases/zpinch_example'; %Name of the directory where the results are located
-% PARTITION = '';
-% JOBNUMMIN = 00; JOBNUMMAX = 10;
+% resdir = '../testcases/cyclone_example/'; %Name of the directory where the results are located
+% PARTITION ='';
+% JOBNUMMIN = 03; JOBNUMMAX = 03;
 %%
 addpath(genpath([gyacomodir,'matlab'])) % ... add
 addpath(genpath([gyacomodir,'matlab/plot'])) % ... add
@@ -85,7 +85,7 @@ options.NAME      = '\phi';
 % options.NAME      = 'Q_x';
 % options.NAME      = 'n_i';
 % options.NAME      = 'n_i-n_e';
-options.PLAN      = 'xy';
+options.PLAN      = 'kxky';
 % options.NAME      = 'f_i';
 % options.PLAN      = 'sx';
 options.COMP      = 'avg';
@@ -116,9 +116,9 @@ options.NAME      = '\phi';
 % options.NAME      = 's_{Ex}';
 % options.NAME      = 'Q_x';
 % options.NAME      = 'k^2n_e';
-options.PLAN      = 'xy';
+options.PLAN      = 'kxz';
 options.COMP      = 'avg';
-options.TIME      = [20 35 50];
+options.TIME      = [20 30 60];
 options.RESOLUTION = 256;
 
 data.a = data.EPS * 2e3;
diff --git a/wk/benchmark scripts/Ajay_scan_CH4_lin_ITG.m b/wk/benchmark and scan scripts/Ajay_scan_CH4_lin_ITG.m
similarity index 99%
rename from wk/benchmark scripts/Ajay_scan_CH4_lin_ITG.m
rename to wk/benchmark and scan scripts/Ajay_scan_CH4_lin_ITG.m
index 5836570d22e6e0abe41b58e4fa07a047308059e5..db2e4d8ebb6bb86b8536d883a0ca844dd58829a1 100644
--- a/wk/benchmark scripts/Ajay_scan_CH4_lin_ITG.m	
+++ b/wk/benchmark and scan scripts/Ajay_scan_CH4_lin_ITG.m	
@@ -87,7 +87,7 @@ for NU = NU_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/CBC_P_J_scan.m b/wk/benchmark and scan scripts/CBC_P_J_scan.m
similarity index 99%
rename from wk/CBC_P_J_scan.m
rename to wk/benchmark and scan scripts/CBC_P_J_scan.m
index 1b2a32eadf70e111f945ddb8ac49e1d852b43247..a4dc82614e35200906a09fa9f60abfa0fce78a10 100644
--- a/wk/CBC_P_J_scan.m
+++ b/wk/benchmark and scan scripts/CBC_P_J_scan.m	
@@ -91,7 +91,7 @@ for J = J_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/CBC_hypcoll_PJ_scan.m b/wk/benchmark and scan scripts/CBC_hypcoll_PJ_scan.m
similarity index 99%
rename from wk/CBC_hypcoll_PJ_scan.m
rename to wk/benchmark and scan scripts/CBC_hypcoll_PJ_scan.m
index 1375c493d07f7b3a85d23e91cf269b5030f95e3a..1392bc32eefcca41b87dfbecfccc85ed1ed8df95 100644
--- a/wk/CBC_hypcoll_PJ_scan.m
+++ b/wk/benchmark and scan scripts/CBC_hypcoll_PJ_scan.m	
@@ -92,7 +92,7 @@ for NU_ = NU_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/CBC_kT_PJ_scan.m b/wk/benchmark and scan scripts/CBC_kT_PJ_scan.m
similarity index 99%
rename from wk/CBC_kT_PJ_scan.m
rename to wk/benchmark and scan scripts/CBC_kT_PJ_scan.m
index 66912c279207326ed11e4447a6aefedb726a79d0..4fe617ecbebf1b21c6d4bdd270bae0adf05b7e63 100644
--- a/wk/CBC_kT_PJ_scan.m
+++ b/wk/benchmark and scan scripts/CBC_kT_PJ_scan.m	
@@ -94,7 +94,7 @@ for KT = KT_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/CBC_kT_nu_scan.m b/wk/benchmark and scan scripts/CBC_kT_nu_scan.m
similarity index 99%
rename from wk/CBC_kT_nu_scan.m
rename to wk/benchmark and scan scripts/CBC_kT_nu_scan.m
index 5bd21bfdbc4d2549113d5f148595982f848dc74c..fca6c55451dab883d336861b0acb1d7ab1e78477 100644
--- a/wk/CBC_kT_nu_scan.m
+++ b/wk/benchmark and scan scripts/CBC_kT_nu_scan.m	
@@ -90,7 +90,7 @@ for KT = KT_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/CBC_nu_PJ_scan.m b/wk/benchmark and scan scripts/CBC_nu_PJ_scan.m
similarity index 99%
rename from wk/CBC_nu_PJ_scan.m
rename to wk/benchmark and scan scripts/CBC_nu_PJ_scan.m
index 8a28aca6ee1d1a177daf39a1a03329eddc6e1a4c..f20276b1f7846b68496e139bff0983fc91916d95 100644
--- a/wk/CBC_nu_PJ_scan.m
+++ b/wk/benchmark and scan scripts/CBC_nu_PJ_scan.m	
@@ -89,7 +89,7 @@ for NU = NU_a
     %% OPTIONS
     LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
     % Collision operator
-    ABCO    = 1; % interspecies collisions
+    ABCO    = 1; % INTERSPECIES collisions
     INIT_ZF = 0; ZF_AMP = 0.0;
     CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
     NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_3D_Zpinch.m b/wk/benchmark and scan scripts/lin_3D_Zpinch.m
similarity index 99%
rename from wk/lin_3D_Zpinch.m
rename to wk/benchmark and scan scripts/lin_3D_Zpinch.m
index e6ca8d59b90dca2d3c376562568d0485f8fdbfb1..fa650e0a2dbaf9e7be859cdf45ebde887d3ec4c6 100644
--- a/wk/lin_3D_Zpinch.m
+++ b/wk/benchmark and scan scripts/lin_3D_Zpinch.m	
@@ -68,7 +68,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_ETPY.m b/wk/benchmark and scan scripts/lin_ETPY.m
similarity index 99%
rename from wk/lin_ETPY.m
rename to wk/benchmark and scan scripts/lin_ETPY.m
index ed2d0e85e461944a284b79d2664b824ae187b232..5c5f47256eb894c27a50dff8113a1d2176a5e8cb 100644
--- a/wk/lin_ETPY.m
+++ b/wk/benchmark and scan scripts/lin_ETPY.m	
@@ -72,7 +72,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_ITG.m b/wk/benchmark and scan scripts/lin_ITG.m
similarity index 99%
rename from wk/lin_ITG.m
rename to wk/benchmark and scan scripts/lin_ITG.m
index 64bfc8880e7b1df9765dabcf53f1ee30eb8747fe..1750f20e3454160d758f7d3128941a4786ed9c56 100644
--- a/wk/lin_ITG.m
+++ b/wk/benchmark and scan scripts/lin_ITG.m	
@@ -75,7 +75,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_KBM.m b/wk/benchmark and scan scripts/lin_KBM.m
similarity index 99%
rename from wk/lin_KBM.m
rename to wk/benchmark and scan scripts/lin_KBM.m
index f4f03f6277c7d511ee984c333e481e8633aac38a..7a8c6b786c23f3f991675d41e4eee72bb20cec72 100644
--- a/wk/lin_KBM.m
+++ b/wk/benchmark and scan scripts/lin_KBM.m	
@@ -64,7 +64,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_MTM.m b/wk/benchmark and scan scripts/lin_MTM.m
similarity index 99%
rename from wk/lin_MTM.m
rename to wk/benchmark and scan scripts/lin_MTM.m
index 3330f2658226944f29c605901baee82e3956bd2d..b7f2885b3eacad1e9a4ea3a43ce77c1c0231af4d 100644
--- a/wk/lin_MTM.m
+++ b/wk/benchmark and scan scripts/lin_MTM.m	
@@ -64,7 +64,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_RHT.m b/wk/benchmark and scan scripts/lin_RHT.m
similarity index 99%
rename from wk/lin_RHT.m
rename to wk/benchmark and scan scripts/lin_RHT.m
index 9fe1bcce57f6c9ebc80409997ecb5b3b35590902..61a2c1018a95dd0ae552996e3180c3f00c97afc5 100644
--- a/wk/lin_RHT.m
+++ b/wk/benchmark and scan scripts/lin_RHT.m	
@@ -66,7 +66,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/lin_TEM.m b/wk/benchmark and scan scripts/lin_TEM.m
similarity index 99%
rename from wk/lin_TEM.m
rename to wk/benchmark and scan scripts/lin_TEM.m
index 70e5f26a279ef233186c8d9a36ffecfc268e6ff0..563d90f0529acd035330d9022a7495691db61139 100644
--- a/wk/lin_TEM.m
+++ b/wk/benchmark and scan scripts/lin_TEM.m	
@@ -64,7 +64,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/fast_analysis.m b/wk/fast_analysis.m
index 377c1c5f64e617dd979e1d5cdea7de664f4c130f..a4d580dd50ccf7392f8153c9f3a002ec89f1d5b9 100644
--- a/wk/fast_analysis.m
+++ b/wk/fast_analysis.m
@@ -1,59 +1,39 @@
 % Directory of the code "mypathtogyacomo/gyacomo/"
 % Partition of the computer where the data have to be searched
-PARTITION  = '/misc/gyacomo_outputs/';
+% PARTITION  = '/misc/gyacomo23_outputs/';
+% PARTITION  = gyacomodir;
+PARTITION = '/home/ahoffman/gyacomo/';
+%% CBC 
+% resdir = 'paper_2_GYAC23/CBC/7x4x192x96x32_nu_0.05_muxy_0.5_muz_0.2';
+% resdir = 'paper_2_GYAC23/CBC/7x4x192x96x32_nu_0.05_muxy_1.0_muz_1.0';
+% resdir = 'paper_2_GYAC23/CBC/7x4x192x96x32_nu_0.05_muxy_1.0_muz_2.0';
+% resdir = 'paper_2_GYAC23/CBC/Full_NL_7x4x192x96x32_nu_0.05_muxy_1.0_muz_2.0';
 
-%% Scan kT
-resdirs = {...
-'paper_2_nonlinear/kT_scan_nu_1e-3/5x3x128x64x24_dp', ...
-'paper_2_nonlinear/kT_scan_nu_1e-3/5x3x192x96x32_dp', ...
-'paper_2_nonlinear/kT_scan_nu_1e-3/7x4x128x64x24_dp', ...
-'paper_2_nonlinear/kT_scan_nu_1e-3/7x4x192x96x32_dp', ...
-};
+%% tests single vs double precision
+% resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24';
+% resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_dp';
+% resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_sp';
+% resdir = 'paper_2_GYAC23/precision_study/5x3x128x64x24_sp_clos_1';
+% resdir = 'paper_2_GYAC23/precision_study/3x2x128x64x24_sp_muz_2.0';
+% resdir = 'paper_2_GYAC23/precision_study/test_3x2x128x64x24_sp_muz_2.0';
+% resdir = 'paper_2_GYAC23/precision_study/3x2x128x64x24_sp_clos_1';
 
-%% Scan nu, kT = 5.3
-% resdirs = {...
-% 'paper_2_nonlinear/nu_scan_kT_5.3/FCGK_5x3x128x64x24_dp', ...
-% 'paper_2_nonlinear/nu_scan_kT_5.3/DGGK_7x4x128x64x24_dp', ...
-% 'paper_2_nonlinear/nu_scan_kT_5.3/SGGK_7x4x128x64x24_dp', ...
-% };
+%% 
+% resdir = 'paper_2_GYAC23/collisionless/kT_5.3/5x3x128x64x24_dp_muz_2.0';
+% resdir = 'paper_2_GYAC23/collisionless/kT_5.3/5x3x128x64x24_dp_muz_2.0_full_NL';
+% resdir = 'paper_2_GYAC23/collisionless/kT_5.3/5x3x128x64x24_dp_muz_2.0_muxy_0';
 
+resdir = 'testcases/zpinch_example';
 
-%%
-figure
-hold on
-for i = 1:numel(resdirs)
-    J0 = 00; J1 = 10;
+ %%
+J0 = 00; J1 = 10;
 
-    % Load basic info (grids and time traces)
-    DATADIR = [PARTITION,resdirs{i},'/'];
-    data    = {};
-    data    = compile_results_low_mem(data,DATADIR,J0,J1);
-    
-    % plot heat flux
-    subplot(1,2,1)
-    hold on
-    plot(data.Ts0D,data.HFLUX_X,'DisplayName',data.paramshort);
-    
-    % statistical transport averaging
-    Gavg =[]; Gstd = [];
-    Qavg =[]; Qstd = [];
-    for i_ = 1:2:numel(data.TJOB_SE) 
-    disp([num2str(data.TJOB_SE(i_)),' ',num2str(data.TJOB_SE(i_+1))])
-    disp([num2str(data.NU_EVOL(i_)),' ',num2str(data.NU_EVOL(i_+1))])
-    options.T = [data.TJOB_SE(i_)*1.2 data.TJOB_SE(i_+1)];
-    options.NPLOTS = 0;
-    [fig, res] = statistical_transport_averaging(data,options);
-    Gavg = [Gavg res.Gx_avg]; Gstd = [Gstd res.Gx_std];
-    Qavg = [Qavg res.Qx_avg]; Qstd = [Qstd res.Qx_std];
-    end
-    subplot(1,2,2)
-    hold on
-    errorbar(data.K_T_EVOL(2:2:end),Qavg,Qstd,'--s','DisplayName',data.paramshort);xlabel('$\kappa_T$'); 
-%     errorbar(data.NU_EVOL(2:2:end),Qavg,Qstd,'--s','DisplayName',data.paramshort);xlabel('$\nu$'); 
-    ylabel('$Q_x^\infty$');
-end
+% Load basic info (grids and time traces)
+DATADIR = [PARTITION,resdir,'/'];
+data    = {};
+data    = compile_results_low_mem(data,DATADIR,J0,J1);
 
-if 0
+if 1
 %% Plot transport and phi radial profile
 [data.PHI, data.Ts3D] = compile_results_3D(DATADIR,J0,J1,'phi');
 
@@ -91,4 +71,40 @@ data.EPS          = 0.1;
 data.a = data.EPS * 2000;
 options.RESOLUTION = 256;
 create_film(data,options,'.gif')
+end
+
+if 0
+%% Performance profiler
+profiler(data)
+end
+
+if 0
+%% Hermite-Laguerre spectrum
+[data.Nipjz, data.Ts3D] = compile_results_3D(DATADIR,J0,J1,'Nipjz');
+data.Nipjz = log(data.Nipjz);
+% options.TIME = 'avg';
+options.P2J        = 0;
+options.ST         = 1;
+options.NORMALIZED = 0;
+options.TIME       = [500:800];
+fig = show_moments_spectrum(data,options);
+% fig = show_napjz(data,options);
+% save_figure(data,fig,'.png');
+end
+
+if 0
+%% Mode evolution
+[data.PHI, data.Ts3D] = compile_results_3D(DATADIR,J0,J1,'phi');
+
+options.NORMALIZED = 0;
+options.TIME   = [000:9000];
+options.KX_TW  = [1 20]; %kx Growth rate time window
+options.KY_TW  = [0 20];  %ky Growth rate time window
+options.NMA    = 1;
+options.NMODES = 800;
+options.iz     = 'avg'; % avg or index
+options.ik     = 1; % sum, max or index
+options.fftz.flag = 0;
+fig = mode_growth_meter(data,options);
+% save_figure(data,fig,'.png')
 end
\ No newline at end of file
diff --git a/wk/header_2DZP_results.m b/wk/header_2DZP_results.m
deleted file mode 100644
index 24b1929c77514f5835c0a7d0c6dd9b4260c910af..0000000000000000000000000000000000000000
--- a/wk/header_2DZP_results.m
+++ /dev/null
@@ -1,245 +0,0 @@
-%% Directory of the simulation
-gyacomodir = pwd; gyacomodir = gyacomodir(1:end-2); % get code directory
-% if 1% Local results
-resdir ='';
-resdir ='';
-resdir ='';
-resdir ='';
-% resdir ='debug/ppj_init';
-%% nu = 5e-1
-% Sugama
-% resdir ='Hallenbert_nu_5e-01/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_5e-01_SGGK';% also in 7x4
-% resdir ='Hallenbert_nu_5e-01/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_5e-01_SGGK';
-% resdir ='Hallenbert_nu_5e-01/200x32_7x4_L_120_kN_1.7_kT_0.425_nu_5e-01_SGGK';% also in 7x4
-% resdir ='Hallenbert_nu_5e-01/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_5e-01_SGGK';
-% resdir ='Hallenbert_nu_5e-01/200x32_5x3_L_120_kN_1.9_kT_0.475_nu_5e-01_SGGK';%also in 7x4
-
-%% nu = 1e-1
-% Landau
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/150x50_5x3_L_120_kN_1.6_kT_0.4_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.7_kT_0.425_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/150x50_5x3_L_120_kN_1.8_kT_0.45_nu_1e-01_LDGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.9_kT_0.475_nu_1e-01_LDGK';
-
-% Sugama
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_1e-01_SGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.7_kT_0.425_nu_1e-01_SGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-01_SGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.9_kT_0.475_nu_1e-01_SGGK';
-
-% Dougherty
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_1e-01_DGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_1e-01_DGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.7_kT_0.425_nu_1e-01_DGGK';
-% resdir ='Hallenbert_nu_1e-01/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-01_DGGK';
-
-%% nu = 5e-2
-% resdir ='Hallenbert_nu_5e-02/200x32_11x6_L_120_kN_1.8_kT_0.45_nu_5e-02_SGGK';%For GENE benchmark % to analyse (added HD)
-% resdir ='Hallenbert_nu_5e-02/200x32_11x6_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGGK';
-
-% testing various NL closures
-% resdir ='Hallenbert_nu_5e-02/200x32_7x4_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGGK';
-
-% resdir ='Hallenbert_nu_5e-02/200x32_5x3_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-% resdir ='Hallenbert_nu_5e-02/200x32_11x6_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-
-% resdir ='Hallenbert_nu_5e-02/256x64_5x3_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-% resdir ='Hallenbert_nu_5e-02/256x64_11x6_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-% resdir ='Hallenbert_nu_5e-02/200x32_21x3_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-% resdir ='Hallenbert_nu_5e-02/200x32_17x9_L_120_kN_1.8_kT_0.45_nu_5e-02_SGDK';
-
-% resdir ='Hallenbert_nu_5e-02/200x32_5x3_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_DGGK';
-% resdir ='Hallenbert_nu_5e-02/200x32_11x6_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_DGGK';
-% resdir ='Hallenbert_nu_5e-02/128x32_5x3_Lx_120_Ly_60_kN_1.8_kT_0.45_nu_5e-02_FCGK';
-
-%% nu = 1e-2
-% Landau
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_1e-02_LDGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_1e-02_LDGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.7_kT_0.425_nu_1e-02_LDGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-02_LDGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.9_kT_0.475_nu_1e-02_LDGK';
-
-% Sugama
-% resdir ='kobayashi_2015_fig1/150x150_5x3_L_100_kN_1.4_nu_5e-03_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_7x4_L_120_kN_1.5_kT_0.375_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.5_kT_0.375_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_7x4_L_120_kN_1.6_kT_0.4_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.7_kT_0.425_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/300x64_5x3_L_120_kN_1.7_kT_0.425_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_11x6_L_120_kN_1.7_kT_0.425_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-02_SGGK'; % To analyse (added HD)
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.9_kT_0.475_nu_1e-02_SGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_11x6_L_120_kN_1.9_kT_0.475_nu_1e-02_SGGK';
-% Dougherty
-% resdir ='Hallenbert_nu_1e-02/200x32_7x4_L_120_kN_1.5_kT_0.375_nu_1e-02_DGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.6_kT_0.4_nu_1e-02_DGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_8x5_L_120_kN_1.6_kT_0.4_nu_0e+00_DGGK';
-% resdir ='Hallenbert_nu_1e-02/200x32_5x3_L_120_kN_1.8_kT_0.45_nu_1e-02_DGGK';
-
-%% nu = 5e-3
-% resdir ='Hallenbert_nu_5e-03/200x32_5x3_Lx_120_Ly_60_kN_1.8_eta_0.25_nuSG_5e-03_muxy_5e-2';
-% resdir ='Hallenbert_nu_5e-03/200x32_5x3_Lx_120_Ly_60_kN_1.8_eta_0.25_nuSG_5e-03_mux_5e-2_muy_6e-1';
-% resdir ='Hallenbert_nu_5e-03/200x32_11x6_Lx_120_Ly_60_kN_1.8_eta_0.25_nuSG_5e-03_mux_5e-2_muy_6e-1';
-% resdir ='Hallenbert_nu_5e-03/200x32_11x6_Lx_120_Ly_60_kN_1.8_eta_0.25_nuSG_5e-03_muxy_5e-2';
-
-%% nu = 0
-
-% resdir ='Hallenbert_fig2a/200x32_21x11_Lx_120_Ly_60_kN_1.6_eta_0.4_nu_0_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_11x6_Lx_120_Ly_60_kN_1.6_eta_0.4_nu_0_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_5x3_Lx_120_Ly_60_kN_1.6_eta_0.4_nu_0_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_5x3_Lx_120_Ly_60_kN_1.6_eta_0.4_nuDGGK_0.1_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_11x6_Lx_120_Ly_60_kN_1.6_eta_0.4_nuDGGK_0.1_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_5x3_Lx_120_Ly_60_kN_1.6_eta_0.4_nuSGGK_0.1_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_11x6_Lx_120_Ly_60_kN_1.6_eta_0.4_nuSGGK_0.1_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_5x3_Lx_120_Ly_60_kN_1.6_eta_0.4_nuLDGK_0.1_muxy_1e-2';
-% resdir ='Hallenbert_fig2a/200x32_5x3_Lx_120_Ly_60_kN_1.6_eta_0.4_nuLRGK_0.1_muxy_1e-2';
-
-
-% resdir ='Hallenbert_fig2b/200x32_11x6_Lx_240_Ly_120_kN_2.5_eta_0.25_nu_0_muxy_1e-1';
-% resdir ='Hallenbert_fig2b/200x32_5x3_Lx_240_Ly_120_kN_2.5_eta_0.25_nu_0_muxy_1e-1';
-% resdir ='Hallenbert_fig2b/200x32_5x3_Lx_240_Ly_120_kN_2.5_eta_0.25_nuSGGK_0.1_muxy_1e-1';
-% resdir ='Hallenbert_fig2b/200x32_5x3_Lx_240_Ly_120_kN_2.5_eta_0.25_nuDGGK_0.1_muxy_1e-1';
-% resdir ='Hallenbert_fig2b/200x32_5x3_Lx_240_Ly_120_kN_2.5_eta_0.25_nuLDGK_0.1_muxy_1e-1';
-% resdir ='Hallenbert_fig2b/200x32_5x3_Lx_240_Ly_120_kN_2.5_eta_0.25_nuLRGK_0.1_muxy_1e-1';
-
-% resdir ='Hallenbert_fig2c/200x32_11x6_Lx_120_Ly_60_kN_2.0_eta_0.25_nu_0_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_5x3_Lx_120_Ly_60_kN_2.0_eta_0.25_nu_0_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_5x3_Lx_120_Ly_60_kN_2.0_eta_0.25_nuSGGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_11x6_Lx_120_Ly_60_kN_2.0_eta_0.25_nuSGGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_5x3_Lx_120_Ly_60_kN_2.0_eta_0.25_nuDGGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_11x6_Lx_120_Ly_60_kN_2.0_eta_0.25_nuDGGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_5x3_Lx_120_Ly_60_kN_2.0_eta_0.25_nuLDGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_5x3_Lx_120_Ly_60_kN_2.0_eta_0.25_nuLRGK_0.1_muxy_5e-2';
-% resdir ='Hallenbert_fig2c/200x32_9x5_Lx_120_Ly_60_kN_2.0_eta_0.25_nuLRGK_0.1_muxy_5e-2';
-
-%% Transport scan
-% resdir = 'nu_0.1_transport_scan/colless_kn_1.7_to_2.0';
-% resdir = 'nu_0.1_transport_scan/colless_kn_2.1_to_2.5';
-
-% resdir = 'nu_0.1_transport_scan/LB_kn_2.0';
-
-% resdir = 'nu_0.1_transport_scan/DG_kn_1.8_to_2.1';
-% resdir = 'nu_0.1_transport_scan/DG_kn_2.2_to_2.5';
-% resdir = 'nu_0.1_transport_scan/DG_conv_kN_1.9';
-
-% resdir = 'nu_0.1_transport_scan/SG_kn_1.7_to_2.0';
-% resdir = 'nu_0.1_transport_scan/SG_10x5_conv_test';
-% resdir = 'nu_0.1_transport_scan/SG_kn_2.2_to_2.5';
-
-% resdir = 'nu_0.1_transport_scan/LD_kn_2.0_to_2.5';
-% resdir = 'nu_0.1_transport_scan/LD_kn_1.7_to_2.5';
-
-% resdir = 'nu_0.1_transport_scan/LR_kn_1.7_to_2.0';
-% resdir = 'nu_0.1_transport_scan/LR_kn_2.1_to_2.5';
-
-% resdir = 'nu_0.1_transport_scan/colless_kn_2.2_Lx1.5';
-% resdir = 'nu_0.1_transport_scan/colless_kn_2.2_HD';
-
-% resdir = 'nu_0.1_transport_scan/colless_kn_1.6_HD';
-
-% resdir = 'nu_0.1_transport_scan/large_box_kN_2.1_nu_0.1';
-% resdir = 'nu_0.1_transport_scan/large_box_kN_2.0_nu_0.1';
-
-% resdir = 'predator_prey_nu_scan/DG_Kn_1.7_nu_0.01';
-
-% resdir = 'ZF_damping_linear_nu_0_20x10_kn_1.6_GK/LR_4x2_nu_0.1';
-% resdir = 'ZF_damping_nu_0_20x10_kn_1.6_GK/HSG_4x2_nu_0.1';
-% resdir = 'ZF_damping_nu_0_5x3_kn_2.5_GK/LR_4x2_nu_0.1';
-% resdir = 'hacked_sugama/hacked_B_kn_1.6_200x32_L_120x60_nu_0.1';
-
-% resdir = 'shearless_cyclone/200x32x24_5x4_Lx_120_Ly_60_q0_1.4_e_0.18_kN_2.22_kT_6.9_nuLR_0.01_adiab_e';
-% resdir = 'shearless_cyclone/no_sg_128x32x36_6x3_Lx_120_Ly_60_q0_1.4_e_0.18_kN_2.22_kT_6.9_adiab_e';
-% resdir = 'shearless_cyclone/sgrid_128x64x32_4x2_Lx_100_Ly_120_q0_1.4_e_0.18_kN_2.22_kT_6.96_adiab_e';
-% resdir = 'shearless_cyclone/sgrid_128x64x32_4x2_Lx_100_Ly_120_q0_1.4_e_0.18_kN_1.78_kT_5.52_adiab_e';
-% resdir = 'linear_shearless_cyclone/4_2_cyclone_1.0';
-% resdir = 'linear_shearless_cyclone/test_fmom';
-% else% Marconi results
-% resdir ='';
-% resdir ='';
-% resdir ='';fd
-% resdir ='';
-% resdir ='/marconi_scratch/userexternal/ahoffman/HeLaZ/results/simulation_A_new/300x300_5x3_L_120_kN_1.6667_nu_1e-01_SGGK/out.txt';
-% % resdir ='/marconi_scratch/userexternal/ahoffman/HeLaZ/results/simulation_A/300x150_L_120_P_8_J_4_eta_0.6_nu_1e-01_SGGK_mu_0e+00/out.txt';
-% % BASIC.RESDIR      = ['../',resdir(46:end-8),'/'];
-% MISCDIR = ['/misc/HeLaZ_outputs/',resdir(46:end-8),'/'];
-% end
-
-%% ZPINCH rerun
-% resdir ='Zpinch_rerun/Kn_2.5_200x48x5x3';
-% resdir ='Zpinch_rerun/Kn_2.5_256x128x5x3';
-% resdir ='Zpinch_rerun/Kn_2.5_312x196x5x3_Lx_400_Ly_200';
-% resdir ='Zpinch_rerun/Kn_2.5_256x64x5x3';
-% resdir ='Zpinch_rerun/Kn_2.0_200x48x9x5_large_box';
-% resdir ='Zpinch_rerun/Kn_2.0_256x64x9x5_Lx_240_Ly_120';
-% resdir ='Zpinch_rerun/Kn_1.6_256x128x7x4';
-% resdir ='Zpinch_rerun/Kn_1.6_200x64x11x6';
-% resdir ='Zpinch_rerun/Kn_1.6_200x64x11x6_conv';
-% resdir ='Zpinch_rerun/Kn_1.6_200x64x11x6_mu_0.5';
-resdir ='Zpinch_rerun/Kn_1.6_256x128x21x11';
-
-%% nu scan
-% resdir = 'Zpinch_rerun/kN_2.2_coll_scan_128x48x5x3';
-% resdir = 'Zpinch_rerun/Ultra_HD_312x196x5x3';
-% resdir = 'Zpinch_rerun/UHD_nu_001_LDGK';
-% resdir = 'Zpinch_rerun/UHD_nu_01_LDGK';
-% resdir = 'Zpinch_rerun/UHD_nu_1_LDGK';
-% resdir ='Zpinch_rerun/kN_1.7_SGGK_conv_200x32x7x3_nu_0.01';
-% resdir ='Zpinch_rerun/kN_1.7_LDGKii_200x32x7x3_nu_scan';
-% resdir ='Zpinch_rerun/nu_0.1_LDGKii_200x48x7x4_kN_scan';
-% resdir ='Zpinch_rerun/nu_0.1_FCGK_200x48x5x3_kN_scan';
-% resdir ='Zpinch_rerun/nu_0.1_SGGK_200x48x5x3_kN_scan';
-% resdir = 'Zpinch_rerun/kN_1.7_FCGK_200x32x5x3_nu_scan';
-% resdir = 'Zpinch_rerun/kN_1.7_SGGK_200x32x7x4_nu_scan';
-% resdir = 'Zpinch_rerun/kN_2.2_SGGK_200x32x5x3_nu_scan';
-% resdir = 'Zpinch_rerun/kN_1.7_SGGK_256x64x5x3_nu_scan';
-
-%% Convergence cases kN = (1.6 2.2) nu = (0.01 1.0), SGGK
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x3x2_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x5x3_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x7x4_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x9x5_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x11x6_nu_0.01';
-
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x3x2_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x5x3_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x7x4_mu_0.01';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x9x5_mu_0.01';
-
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x3x2_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x5x3_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x7x4_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_1.6_200x32x9x5_nu_0.1';
-
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x3x2_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x5x3_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x7x4_nu_0.1';
-% resdir = 'Zpinch_rerun/convcoll_Kn_2.2_200x32x9x5_nu_0.1';
-
-%% kN scans with nu=0.1 and nu=0.01
-% resdir = 'Zpinch_rerun/SGGK_kN_scan_200x64x5x3_nu_0.1';
-% resdir = 'Zpinch_rerun/DGGK_kN_scan_200x64x5x3_nu_0.1';
-% resdir = 'Zpinch_rerun/LRGK_kN_scan_200x64x5x3_nu_0.1';
-% resdir = 'Zpinch_rerun/LDGK_kN_scan_200x64x5x3_nu_0.1';
-% 
-% resdir = 'Zpinch_rerun/SGGK_kN_scan_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/DGGK_kN_scan_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/even_DGGK_kN_scan_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/LRGK_kN_scan_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/LDGK_kN_scan_202x64x5x3_nu_0.01';
-
-% resdir = 'Zpinch_rerun/DGGK_kN_1.6_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/DGGK_kN_1.7_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/DGGK_kN_1.7_200x64x9x5_nu_0.01';
-% resdir = 'Zpinch_rerun/LRGK_kN_2.4_300x98x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/LDGK_kN_1.9_200x64x5x3_nu_0.01';
-% resdir = 'Zpinch_rerun/COLLESS_kN_1.7_200x64x5x3';
-% resdir = 'Zpinch_rerun/COLLESS_kN_1.7_200x64x9x5';
-
-JOBNUMMIN = 01; JOBNUMMAX = 03;
-resdir = ['results/',resdir];
-run analysis_gyacomo
\ No newline at end of file
diff --git a/wk/Zpinch_coll_scan_kN_1.7.m b/wk/old scripts/Zpinch_coll_scan_kN_1.7.m
similarity index 100%
rename from wk/Zpinch_coll_scan_kN_1.7.m
rename to wk/old scripts/Zpinch_coll_scan_kN_1.7.m
diff --git a/wk/continue_multiple_runs_marconi.m b/wk/old scripts/continue_multiple_runs_marconi.m
similarity index 100%
rename from wk/continue_multiple_runs_marconi.m
rename to wk/old scripts/continue_multiple_runs_marconi.m
diff --git a/wk/debug_script.m b/wk/old scripts/debug_script.m
similarity index 100%
rename from wk/debug_script.m
rename to wk/old scripts/debug_script.m
diff --git a/wk/local_run.m b/wk/old scripts/local_run.m
similarity index 98%
rename from wk/local_run.m
rename to wk/old scripts/local_run.m
index 7031cacb6158bbfc7f8042498e954ba1b5c7b0b2..21aca7320d10d1013afc7ef8aa1d1c802e241bbd 100644
--- a/wk/local_run.m
+++ b/wk/old scripts/local_run.m	
@@ -40,7 +40,7 @@ JOB2LOAD= -1;
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'SG';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 NL_CLOS = 0;   % nonlinear closure model (-2: nmax = jmax, -1: nmax = jmax-j, >=0 : nmax = NL_CLOS)
 SIMID   = 'kobayashi_2015_fig1';  % Name of the simulation
 % SIMID   = 'debug';  % Name of the simulation
diff --git a/wk/marconi_run.m b/wk/old scripts/marconi_run.m
similarity index 96%
rename from wk/marconi_run.m
rename to wk/old scripts/marconi_run.m
index 4f78af0aa913edc0dbaf5990724a84945124b68e..00ee0405a43d8e3071ebba0f99ff1fc991f7b954 100644
--- a/wk/marconi_run.m
+++ b/wk/old scripts/marconi_run.m	
@@ -48,7 +48,7 @@ JOB2LOAD= -1; % start from t=0 if <0, else restart from outputs_$job2load
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 1; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 CLOS    = 0;   % Closure model (0: =0 truncation)
 NL_CLOS = -1;   % nonlinear closure model (-2: nmax = jmax, -1: nmax = jmax-j, >=0 : nmax = NL_CLOS)
 LINEARITY = 'nonlinear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
diff --git a/wk/quick_run.m b/wk/old scripts/quick_run.m
similarity index 99%
rename from wk/quick_run.m
rename to wk/old scripts/quick_run.m
index dffb3b2a9fea9d0e30c0c781871a9d8eda0ef10a..9eb6073b5dd112fa671c57d2ec4870fa1cbcb37d 100644
--- a/wk/quick_run.m
+++ b/wk/old scripts/quick_run.m	
@@ -64,7 +64,7 @@ LINEARITY = 'linear';   % activate non-linearity (is cancelled if KXEQ0 = 1)
 % (LB:L.Bernstein, DG:Dougherty, SG:Sugama, LR: Lorentz, LD: Landau)
 CO      = 'DG';
 GKCO    = 0; % gyrokinetic operator
-ABCO    = 1; % interspecies collisions
+ABCO    = 1; % INTERSPECIES collisions
 INIT_ZF = 0; ZF_AMP = 0.0;
 CLOS    = 0;   % Closure model (0: =0 truncation, 1: v^Nmax closure (p+2j<=Pmax))s
 NL_CLOS = 0;   % nonlinear closure model (-2:nmax=jmax; -1:nmax=jmax-j; >=0:nmax=NL_CLOS)
diff --git a/wk/save_iFFT.m b/wk/old scripts/save_iFFT.m
similarity index 100%
rename from wk/save_iFFT.m
rename to wk/old scripts/save_iFFT.m