diff --git a/apps/atoms/atom.cpp b/apps/atoms/atom.cpp
index 3fb30ffff1..04a66f252f 100644
--- a/apps/atoms/atom.cpp
+++ b/apps/atoms/atom.cpp
@@ -100,7 +100,6 @@ class Free_atom : public sirius::Atom_type
         );
 
         auto mixer_function_prop = mixer::FunctionProperties<std::vector<double>>(
-                [](const std::vector<double>& x) -> std::size_t { return x.size(); },
                 /* use simple inner product for mixing */
                 [](const std::vector<double>& x, const std::vector<double>& y) -> double {
                     double result = 0.0;
diff --git a/apps/mini_app/sirius.scf.cpp b/apps/mini_app/sirius.scf.cpp
index 0ca3603403..56d90feb7a 100644
--- a/apps/mini_app/sirius.scf.cpp
+++ b/apps/mini_app/sirius.scf.cpp
@@ -323,8 +323,6 @@ ground_state(Simulation_context& ctx, int task_id, cmd_args const& args, int wri
         }
     }
 
-    // dft.print_magnetic_moment();
-
     if (ref_file.size() != 0) {
         json dict_ref;
         std::ifstream(ref_file) >> dict_ref;
@@ -354,7 +352,7 @@ ground_state(Simulation_context& ctx, int task_id, cmd_args const& args, int wri
                     max_diff = std::max(max_diff, std::abs(v1[i][x] - v2[i][x]));
                 }
             }
-            if (max_diff > 1e-5) {
+            if (max_diff > 1e-4) {
                 std::cout << "magnetisations is different!" << std::endl;
                 ctx.comm().abort(5);
             }
diff --git a/apps/tests/test_mixer.cpp b/apps/tests/test_mixer.cpp
index 26c8e0fa4e..c97eedb1e9 100644
--- a/apps/tests/test_mixer.cpp
+++ b/apps/tests/test_mixer.cpp
@@ -85,7 +85,6 @@ test_mixer(cmd_args const& args)
         b[i] -= 1;
 
     auto mixer_function_prop = mixer::FunctionProperties<std::vector<double>>(
-            [](const std::vector<double>& x) -> std::size_t { return 1; },
             [](const std::vector<double>& x, const std::vector<double>& y) -> double {
                 double result = 0.0;
                 for (std::size_t i = 0; i < x.size(); ++i)
diff --git a/examples/fp-lapw/Eu6C60/sirius.json b/examples/fp-lapw/Eu6C60/sirius.json
index 85c5d0314d..4d57bf56e0 100644
--- a/examples/fp-lapw/Eu6C60/sirius.json
+++ b/examples/fp-lapw/Eu6C60/sirius.json
@@ -1,29 +1,27 @@
 {
     "control" : {
-        "cyclic_block_size" : 16,
-        "processing_unit" : "cpu",
-        "std_evp_solver_name" : "lapack",
-        "gen_evp_solver_name" : "lapack"
+	"verbosity" : 1
     },
 
     "parameters" : {
         "electronic_structure_method" : "full_potential_lapwlo",
         "xc_functionals" : ["XC_LDA_X", "XC_LDA_C_PZ"],
         "smearing_width" : 0.05,
-        "valence_relativity" : "none",
-        "core_relativity" : "none",
-        "aw_cutoff" : 7.0,
+        "valence_relativity" : "iora",
+        "core_relativity" : "dirac",
+        "aw_cutoff" : 6.0,
         "pw_cutoff" : 20.00,
         "auto_rmt" : 1,
+        "num_mag_dims" : 1,
         "use_symmetry": true,
         "ngridk" : [1, 1, 1],
         "density_tol" : 1e-5,
         "energy_tol" : 1e-8,
-        "num_dft_iter" : 100
+        "num_dft_iter" : 1
     },
 
     "mixer" : {
-        "beta" : 0.95,
+        "beta" : 0.5,
         "type" : "anderson",
         "max_history" : 8
     },
diff --git a/src/api/sirius.f90 b/src/api/sirius.f90
index e5802a3509..b249ece245 100644
--- a/src/api/sirius.f90
+++ b/src/api/sirius.f90
@@ -3006,16 +3006,18 @@ end subroutine sirius_generate_effective_potential
 !> @param [in] gs_handler Ground state handler.
 !> @param [in] add_core Add core charge density in the muffin-tins.
 !> @param [in] transform_to_rg If true, density and magnetization are transformed to real-space grid.
-!> @param [in] paw_only it true, only local PAW density is generated
+!> @param [in] paw_only if true, only local PAW density is generated
+!> @param [in] efermi if true, Fermi energy level is also computed
 !> @param [out] error_code Error code.
 subroutine sirius_generate_density(gs_handler,add_core,transform_to_rg,paw_only,&
-&error_code)
+&efermi,error_code)
 implicit none
 !
 type(sirius_ground_state_handler), target, intent(in) :: gs_handler
 logical, optional, target, intent(in) :: add_core
 logical, optional, target, intent(in) :: transform_to_rg
 logical, optional, target, intent(in) :: paw_only
+logical, optional, target, intent(in) :: efermi
 integer, optional, target, intent(out) :: error_code
 !
 type(C_PTR) :: gs_handler_ptr
@@ -3025,17 +3027,20 @@ subroutine sirius_generate_density(gs_handler,add_core,transform_to_rg,paw_only,
 logical(C_BOOL), target :: transform_to_rg_c_type
 type(C_PTR) :: paw_only_ptr
 logical(C_BOOL), target :: paw_only_c_type
+type(C_PTR) :: efermi_ptr
+logical(C_BOOL), target :: efermi_c_type
 type(C_PTR) :: error_code_ptr
 !
 interface
 subroutine sirius_generate_density_aux(gs_handler,add_core,transform_to_rg,paw_only,&
-&error_code)&
+&efermi,error_code)&
 &bind(C, name="sirius_generate_density")
 use, intrinsic :: ISO_C_BINDING
 type(C_PTR), value :: gs_handler
 type(C_PTR), value :: add_core
 type(C_PTR), value :: transform_to_rg
 type(C_PTR), value :: paw_only
+type(C_PTR), value :: efermi
 type(C_PTR), value :: error_code
 end subroutine
 end interface
@@ -3057,18 +3062,17 @@ subroutine sirius_generate_density_aux(gs_handler,add_core,transform_to_rg,paw_o
 paw_only_c_type = paw_only
 paw_only_ptr = C_LOC(paw_only_c_type)
 endif
+efermi_ptr = C_NULL_PTR
+if (present(efermi)) then
+efermi_c_type = efermi
+efermi_ptr = C_LOC(efermi_c_type)
+endif
 error_code_ptr = C_NULL_PTR
 if (present(error_code)) then
 error_code_ptr = C_LOC(error_code)
 endif
 call sirius_generate_density_aux(gs_handler_ptr,add_core_ptr,transform_to_rg_ptr,&
-&paw_only_ptr,error_code_ptr)
-if (present(add_core)) then
-endif
-if (present(transform_to_rg)) then
-endif
-if (present(paw_only)) then
-endif
+&paw_only_ptr,efermi_ptr,error_code_ptr)
 end subroutine sirius_generate_density
 
 !
@@ -7262,7 +7266,7 @@ end subroutine sirius_set_atom_vector_field
 !> @brief Set the parameters controlling the dftd3 correction.
 !> @param [in] handler Simulation context handler.
 !> @param [in] method family of predefined parameters. Linked to the functional
-!> @param [in] damping__ damping correction, auto, manual.
+!> @param [in] damping damping correction, auto, manual.
 !> @param [in] atm Include the three body correction
 !> @param [in] damping_term type of damping correction, rational, mrational, zero, mzero, ...
 !> @param [in] s6 s6 parameter for dftd3 model.
@@ -7272,13 +7276,13 @@ end subroutine sirius_set_atom_vector_field
 !> @param [in] alp alp parameter for dftd3 model.
 !> @param [in] beta beta parameter for dftd3 model.
 !> @param [out] error_code Error code.
-subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term,&
-&s6,s8,s9,rs8,alp,beta,error_code)
+subroutine sirius_set_dftd3_correction(handler,method,damping,atm,damping_term,s6,&
+&s8,s9,rs8,alp,beta,error_code)
 implicit none
 !
 type(sirius_context_handler), target, intent(in) :: handler
 character(*), target, intent(in) :: method
-character(*), optional, target, intent(in) :: damping__
+character(*), optional, target, intent(in) :: damping
 logical, optional, target, intent(in) :: atm
 character(*), optional, target, intent(in) :: damping_term
 real(8), optional, target, intent(in) :: s6
@@ -7292,8 +7296,8 @@ subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term
 type(C_PTR) :: handler_ptr
 type(C_PTR) :: method_ptr
 character(C_CHAR), target, allocatable :: method_c_type(:)
-type(C_PTR) :: damping___ptr
-character(C_CHAR), target, allocatable :: damping___c_type(:)
+type(C_PTR) :: damping_ptr
+character(C_CHAR), target, allocatable :: damping_c_type(:)
 type(C_PTR) :: atm_ptr
 logical(C_BOOL), target :: atm_c_type
 type(C_PTR) :: damping_term_ptr
@@ -7307,13 +7311,13 @@ subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term
 type(C_PTR) :: error_code_ptr
 !
 interface
-subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_term,&
+subroutine sirius_set_dftd3_correction_aux(handler,method,damping,atm,damping_term,&
 &s6,s8,s9,rs8,alp,beta,error_code)&
 &bind(C, name="sirius_set_dftd3_correction")
 use, intrinsic :: ISO_C_BINDING
 type(C_PTR), value :: handler
 type(C_PTR), value :: method
-type(C_PTR), value :: damping__
+type(C_PTR), value :: damping
 type(C_PTR), value :: atm
 type(C_PTR), value :: damping_term
 type(C_PTR), value :: s6
@@ -7332,11 +7336,11 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_
 allocate(method_c_type(len(method)+1))
 method_c_type = string_f2c(method)
 method_ptr = C_LOC(method_c_type)
-damping___ptr = C_NULL_PTR
-if (present(damping__)) then
-allocate(damping___c_type(len(damping__)+1))
-damping___c_type = string_f2c(damping__)
-damping___ptr = C_LOC(damping___c_type)
+damping_ptr = C_NULL_PTR
+if (present(damping)) then
+allocate(damping_c_type(len(damping)+1))
+damping_c_type = string_f2c(damping)
+damping_ptr = C_LOC(damping_c_type)
 endif
 atm_ptr = C_NULL_PTR
 if (present(atm)) then
@@ -7377,11 +7381,11 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_
 if (present(error_code)) then
 error_code_ptr = C_LOC(error_code)
 endif
-call sirius_set_dftd3_correction_aux(handler_ptr,method_ptr,damping___ptr,atm_ptr,&
+call sirius_set_dftd3_correction_aux(handler_ptr,method_ptr,damping_ptr,atm_ptr,&
 &damping_term_ptr,s6_ptr,s8_ptr,s9_ptr,rs8_ptr,alp_ptr,beta_ptr,error_code_ptr)
 deallocate(method_c_type)
-if (present(damping__)) then
-deallocate(damping___c_type)
+if (present(damping)) then
+deallocate(damping_c_type)
 endif
 if (present(atm)) then
 endif
@@ -7390,6 +7394,138 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_
 endif
 end subroutine sirius_set_dftd3_correction
 
+!
+!> @brief Set the parameters controlling the dftd3 correction.
+!> @param [in] handler Simulation context handler.
+!> @param [in] method family of predefined parameters. Linked to the functional
+!> @param [in] damping damping correction, auto, manual.
+!> @param [in] atm Include the three body correction
+!> @param [in] damping_term type of damping correction, rational, mrational
+!> @param [in] s6 s6 parameter for dftd4 model.
+!> @param [in] s8 s8 parameter for dftd4 model.
+!> @param [in] s9 s9 parameter for dftd4 model.
+!> @param [in] a1 a1 parameter for dftd4 model.
+!> @param [in] a2 a2 parameter for dftd4 model.
+!> @param [in] alp alp parameter for dftd4 model.
+!> @param [out] error_code Error code.
+subroutine sirius_set_dftd4_correction(handler,method,damping,atm,damping_term,s6,&
+&s8,s9,a1,a2,alp,error_code)
+implicit none
+!
+type(sirius_context_handler), target, intent(in) :: handler
+character(*), target, intent(in) :: method
+character(*), optional, target, intent(in) :: damping
+logical, optional, target, intent(in) :: atm
+character(*), optional, target, intent(in) :: damping_term
+real(8), optional, target, intent(in) :: s6
+real(8), optional, target, intent(in) :: s8
+real(8), optional, target, intent(in) :: s9
+real(8), optional, target, intent(in) :: a1
+real(8), optional, target, intent(in) :: a2
+real(8), optional, target, intent(in) :: alp
+integer, optional, target, intent(out) :: error_code
+!
+type(C_PTR) :: handler_ptr
+type(C_PTR) :: method_ptr
+character(C_CHAR), target, allocatable :: method_c_type(:)
+type(C_PTR) :: damping_ptr
+character(C_CHAR), target, allocatable :: damping_c_type(:)
+type(C_PTR) :: atm_ptr
+logical(C_BOOL), target :: atm_c_type
+type(C_PTR) :: damping_term_ptr
+character(C_CHAR), target, allocatable :: damping_term_c_type(:)
+type(C_PTR) :: s6_ptr
+type(C_PTR) :: s8_ptr
+type(C_PTR) :: s9_ptr
+type(C_PTR) :: a1_ptr
+type(C_PTR) :: a2_ptr
+type(C_PTR) :: alp_ptr
+type(C_PTR) :: error_code_ptr
+!
+interface
+subroutine sirius_set_dftd4_correction_aux(handler,method,damping,atm,damping_term,&
+&s6,s8,s9,a1,a2,alp,error_code)&
+&bind(C, name="sirius_set_dftd4_correction")
+use, intrinsic :: ISO_C_BINDING
+type(C_PTR), value :: handler
+type(C_PTR), value :: method
+type(C_PTR), value :: damping
+type(C_PTR), value :: atm
+type(C_PTR), value :: damping_term
+type(C_PTR), value :: s6
+type(C_PTR), value :: s8
+type(C_PTR), value :: s9
+type(C_PTR), value :: a1
+type(C_PTR), value :: a2
+type(C_PTR), value :: alp
+type(C_PTR), value :: error_code
+end subroutine
+end interface
+!
+handler_ptr = C_NULL_PTR
+handler_ptr = C_LOC(handler%handler_ptr_)
+method_ptr = C_NULL_PTR
+allocate(method_c_type(len(method)+1))
+method_c_type = string_f2c(method)
+method_ptr = C_LOC(method_c_type)
+damping_ptr = C_NULL_PTR
+if (present(damping)) then
+allocate(damping_c_type(len(damping)+1))
+damping_c_type = string_f2c(damping)
+damping_ptr = C_LOC(damping_c_type)
+endif
+atm_ptr = C_NULL_PTR
+if (present(atm)) then
+atm_c_type = atm
+atm_ptr = C_LOC(atm_c_type)
+endif
+damping_term_ptr = C_NULL_PTR
+if (present(damping_term)) then
+allocate(damping_term_c_type(len(damping_term)+1))
+damping_term_c_type = string_f2c(damping_term)
+damping_term_ptr = C_LOC(damping_term_c_type)
+endif
+s6_ptr = C_NULL_PTR
+if (present(s6)) then
+s6_ptr = C_LOC(s6)
+endif
+s8_ptr = C_NULL_PTR
+if (present(s8)) then
+s8_ptr = C_LOC(s8)
+endif
+s9_ptr = C_NULL_PTR
+if (present(s9)) then
+s9_ptr = C_LOC(s9)
+endif
+a1_ptr = C_NULL_PTR
+if (present(a1)) then
+a1_ptr = C_LOC(a1)
+endif
+a2_ptr = C_NULL_PTR
+if (present(a2)) then
+a2_ptr = C_LOC(a2)
+endif
+alp_ptr = C_NULL_PTR
+if (present(alp)) then
+alp_ptr = C_LOC(alp)
+endif
+error_code_ptr = C_NULL_PTR
+if (present(error_code)) then
+error_code_ptr = C_LOC(error_code)
+endif
+call sirius_set_dftd4_correction_aux(handler_ptr,method_ptr,damping_ptr,atm_ptr,&
+&damping_term_ptr,s6_ptr,s8_ptr,s9_ptr,a1_ptr,a2_ptr,alp_ptr,error_code_ptr)
+deallocate(method_c_type)
+if (present(damping)) then
+deallocate(damping_c_type)
+endif
+if (present(atm)) then
+endif
+if (present(damping_term)) then
+deallocate(damping_term_c_type)
+endif
+end subroutine sirius_set_dftd4_correction
+
 
 subroutine sirius_free_handler_ctx(handler, error_code)
     implicit none
diff --git a/src/api/sirius_api.cpp b/src/api/sirius_api.cpp
index 676b77a827..a651815e4d 100644
--- a/src/api/sirius_api.cpp
+++ b/src/api/sirius_api.cpp
@@ -2853,7 +2853,11 @@ sirius_generate_effective_potential(void* const* gs_handler__, int* error_code__
     paw_only:
       type: bool
       attr: in, optional
-      doc: it true, only local PAW density is generated
+      doc: if true, only local PAW density is generated
+    efermi:
+      type: bool
+      attr: in, optional
+      doc: if true, Fermi energy level is also computed
     error_code:
       type: int
       attr: out, optional
@@ -2862,7 +2866,7 @@ sirius_generate_effective_potential(void* const* gs_handler__, int* error_code__
 */
 void
 sirius_generate_density(void* const* gs_handler__, bool const* add_core__, bool const* transform_to_rg__,
-                        bool const* paw_only__, int* error_code__)
+                        bool const* paw_only__, bool const* efermi__, int* error_code__)
 {
     call_sirius(
             [&]() {
@@ -2870,6 +2874,11 @@ sirius_generate_density(void* const* gs_handler__, bool const* add_core__, bool
                 auto add_core        = get_value<bool>(add_core__, false);
                 auto transform_to_rg = get_value<bool>(transform_to_rg__, false);
                 auto paw_only        = get_value<bool>(paw_only__, false);
+                auto efermi          = get_value<bool>(efermi__, false);
+
+                if (efermi) {
+                    gs.k_point_set().find_band_occupancies<double>();
+                }
 
                 if (paw_only) {
                     gs.density().generate_paw_density();
diff --git a/src/density/density.cpp b/src/density/density.cpp
index c51b5631b1..d1345c7e63 100644
--- a/src/density/density.cpp
+++ b/src/density/density.cpp
@@ -1930,7 +1930,8 @@ void
 Density::mixer_init(config_t::mixer_t const& mixer_cfg__)
 {
     auto func_prop    = mixer::periodic_function_property();
-    auto func_prop1   = mixer::periodic_function_property_modified(true);
+    auto func_prop1   = mixer::periodic_function_property_rho_pw(true);
+    auto func_prop2   = mixer::periodic_function_property_mag_pw(true);
     auto density_prop = mixer::density_function_property();
     auto paw_prop     = mixer::paw_density_function_property();
     auto hubbard_prop = mixer::hubbard_matrix_function_property();
@@ -1962,11 +1963,11 @@ Density::mixer_init(config_t::mixer_t const& mixer_cfg__)
             this->mixer_->initialize_function<0>(func_prop, component(0), ctx_);
         }
         if (ctx_.num_mag_dims() > 0) {
-            this->mixer_->initialize_function<1>(func_prop, component(1), ctx_);
+            this->mixer_->initialize_function<1>(func_prop2, component(1), ctx_);
         }
         if (ctx_.num_mag_dims() > 1) {
-            this->mixer_->initialize_function<2>(func_prop, component(2), ctx_);
-            this->mixer_->initialize_function<3>(func_prop, component(3), ctx_);
+            this->mixer_->initialize_function<2>(func_prop2, component(2), ctx_);
+            this->mixer_->initialize_function<3>(func_prop2, component(3), ctx_);
         }
     }
 
diff --git a/src/dft/dft_ground_state.cpp b/src/dft/dft_ground_state.cpp
index f2886825f5..c98fcf9d4c 100644
--- a/src/dft/dft_ground_state.cpp
+++ b/src/dft/dft_ground_state.cpp
@@ -27,7 +27,6 @@ DFT_ground_state::initial_state()
     PROFILE("sirius::DFT_ground_state::initial_state");
 
     density_.initial_density();
-    density_.print_info(ctx_.out(1));
     potential_.generate(density_, ctx_.use_symmetry(), true);
     if (!ctx_.full_potential()) {
         if (ctx_.cfg().parameters().precision_wf() == "fp32") {
@@ -37,7 +36,6 @@ DFT_ground_state::initial_state()
 #else
             RTE_THROW("not compiled with FP32 support");
 #endif
-
         } else {
             Hamiltonian0<double> H0(potential_, true);
             initialize_subspace(kset_, H0);
@@ -197,13 +195,15 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so
     Density rho1(ctx_);
 
     std::stringstream s;
-    s << "density_tol               : " << density_tol__ << std::endl
-      << "energy_tol                : " << energy_tol__ << std::endl
+    s << "density tolerance         : " << density_tol__ << std::endl
+      << "total energy tolerance    : " << energy_tol__ << std::endl
       << "iter_solver_tol (initial) : " << iter_solver_tol__ << std::endl
       << "iter_solver_tol (target)  : " << ctx_.cfg().iterative_solver().min_tolerance() << std::endl
-      << "num_dft_iter              : " << num_dft_iter__;
+      << "num_dft_iter              : " << num_dft_iter__ << std::endl;
     RTE_OUT(ctx_.out(1)) << s.str();
 
+    density_.print_info(ctx_.out(1));
+
     for (int iter = 0; iter < num_dft_iter__; iter++) {
         PROFILE("sirius::DFT_ground_state::scf_loop|iteration");
         std::stringstream s;
@@ -216,7 +216,7 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so
 
         diagonalize_result_t result;
 
-        double ne_diff = 0;
+        double ne_diff{0};
         if (ctx_.cfg().parameters().precision_wf() == "fp32") {
 #if defined(SIRIUS_USE_FP32)
             Hamiltonian0<float> H0(potential_, true);
@@ -255,14 +255,25 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so
         /* mix density */
         rms = density_.mix();
 
-        double eha_res = density_residual_hartree_energy(density_, rho1);
-
-        /* estimate new tolerance of the iterative solver */
-        double tol = rms;
-        if (ctx_.cfg().mixer().use_hartree()) {
-            // tol = rms * rms / std::max(1.0, unit_cell_.num_electrons());
-            tol = eha_res / std::max(1.0, unit_cell_.num_electrons());
+        /* we need to estimate new tolerance of the iterative solver;
+         * several cases need to be handeled:
+         *  - full-potential
+         *  - pseudo-potential
+         *    - use Hartree energy of residuals
+         *    - use inner product of residuals */
+        double tol{0};
+        if (ctx_.full_potential()) {
+            /* this will be Sqrt( <res | res> ) i.e. root mean square error */
+            tol = rms;
+        } else {
+            if (ctx_.cfg().mixer().use_hartree()) {
+                tol = rms * rms / std::max(1.0, unit_cell_.num_electrons());
+            } else {
+                /* same as full-potential case */
+                tol = rms;
+            }
         }
+
         tol = std::min(ctx_.cfg().iterative_solver().tolerance_scale()[0] * tol,
                        ctx_.cfg().iterative_solver().tolerance_scale()[1] * iter_solver_tol__);
         /* tolerance can't be too small */
@@ -344,8 +355,8 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so
         out << "iteration : " << iter << ", RMS : " << std::setprecision(12) << std::scientific << rms
             << ", energy difference : " << std::setprecision(12) << std::scientific << etot - eold << std::endl;
         if (!ctx_.full_potential()) {
-            out << "Hartree energy of density residual : " << eha_res << std::endl
-                << "bands are converged : " << boolstr(result.converged) << std::endl;
+            out //<< "Hartree energy of density residual : " << eha_res << std::endl
+                    << "bands are converged : " << boolstr(result.converged) << std::endl;
         }
         if (ctx_.cfg().iterative_solver().type() != "exact") {
             out << std::endl << "iterative solver converged : " << boolstr(iter_solver_converged) << std::endl;
@@ -353,14 +364,8 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so
 
         RTE_OUT(ctx_.out(1)) << out.str();
         /* check if the calculation has converged */
-        bool converged{true};
-        // converged = (std::abs(eold - etot) < energy_tol__) && result.converged && iter_solver_converged;
-        converged = (std::abs(eold - etot) < energy_tol__) && iter_solver_converged;
-        if (ctx_.cfg().mixer().use_hartree()) {
-            converged = converged && (eha_res < density_tol__);
-        } else {
-            converged = converged && (rms < density_tol__);
-        }
+        bool converged = (std::abs(eold - etot) < energy_tol__) && (rms < density_tol__) && iter_solver_converged;
+
         if (converged) {
             if (std::abs(ne_diff) > 1e-10) {
                 std::stringstream ss;
diff --git a/src/function3d/periodic_function.hpp b/src/function3d/periodic_function.hpp
index f6e5c39e75..ddadcaa33c 100644
--- a/src/function3d/periodic_function.hpp
+++ b/src/function3d/periodic_function.hpp
@@ -328,6 +328,56 @@ copy(periodic_function_ptr_t<T> const src__, Periodic_function<T>& dest__)
     }
 }
 
+template <typename T>
+inline void
+copy(Periodic_function<T> const& src__, Periodic_function<T>& dest__)
+{
+    copy(src__.rg(), dest__.rg());
+    if (src__.ctx().full_potential()) {
+        copy(src__.mt(), dest__.mt());
+    }
+}
+
+template <typename T>
+inline void
+axpy(T alpha__, Periodic_function<T> const& x__, Periodic_function<T>& y__)
+{
+    axpy(alpha__, x__.rg(), y__.rg());
+    if (x__.ctx().full_potential()) {
+        axpy(alpha__, x__.mt(), y__.mt());
+    }
+}
+
+template <typename T>
+inline void
+rotate(T c__, T s__, Periodic_function<T>& x__, Periodic_function<T>& y__)
+{
+    #pragma omp parallel
+    {
+        #pragma omp for schedule(static) nowait
+        for (std::size_t i = 0; i < x__.rg().values().size(); ++i) {
+            auto xi           = x__.rg().value(i);
+            auto yi           = y__.rg().value(i);
+            x__.rg().value(i) = xi * c__ + yi * s__;
+            y__.rg().value(i) = xi * (-s__) + yi * c__;
+        }
+        if (x__.ctx().full_potential()) {
+            for (auto it : x__.ctx().unit_cell().spl_num_atoms()) {
+                int ia       = it.i;
+                auto& x_f_mt = x__.mt()[ia];
+                auto& y_f_mt = y__.mt()[ia];
+                #pragma omp for schedule(static) nowait
+                for (int i = 0; i < static_cast<int>(x__.mt()[ia].size()); i++) {
+                    auto xi   = x_f_mt[i];
+                    auto yi   = y_f_mt[i];
+                    x_f_mt[i] = xi * c__ + yi * s__;
+                    y_f_mt[i] = xi * (-s__) + yi * c__;
+                }
+            }
+        }
+    }
+}
+
 } // namespace sirius
 
 #endif // __PERIODIC_FUNCTION_HPP__
diff --git a/src/mixer/anderson_mixer.hpp b/src/mixer/anderson_mixer.hpp
index 882bbc6344..198ee41099 100644
--- a/src/mixer/anderson_mixer.hpp
+++ b/src/mixer/anderson_mixer.hpp
@@ -82,8 +82,6 @@ class Anderson : public Mixer<FUNCS...>
 
         const auto history_size = static_cast<int>(this->history_size_);
 
-        const bool normalize = false;
-
         // beta scaling
         if (this->step_ > this->max_history_) {
             const double rmse_avg = std::accumulate(this->rmse_history_.begin(), this->rmse_history_.end(), 0.0) /
@@ -116,20 +114,21 @@ class Anderson : public Mixer<FUNCS...>
             for (int i = 0; i <= history_size - 1; ++i) {
                 auto j                                           = this->idx_hist(this->step_ - i - 1);
                 this->S_(history_size - 1, history_size - i - 1) = this->S_(history_size - i - 1, history_size - 1) =
-                        this->template inner_product<normalize>(this->residual_history_[j],
-                                                                this->residual_history_[idx_prev_step]);
+                        this->inner_product(this->residual_history_[j], this->residual_history_[idx_prev_step]);
             }
 
             // Make a copy because factorizing destroys the matrix.
-            for (int i = 0; i < history_size; ++i)
-                for (int j = 0; j < history_size; ++j)
+            for (int i = 0; i < history_size; ++i) {
+                for (int j = 0; j < history_size; ++j) {
                     this->S_factorized_(j, i) = this->S_(j, i);
+                }
+            }
 
             mdarray<double, 1> h({history_size});
             for (int i = 1; i <= history_size; ++i) {
-                auto j              = this->idx_hist(this->step_ - i);
-                h(history_size - i) = this->template inner_product<normalize>(this->residual_history_[j],
-                                                                              this->residual_history_[idx_step]);
+                auto j = this->idx_hist(this->step_ - i);
+                h(history_size - i) =
+                        this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]);
             }
 
             bool invertible = la::wrap(la::lib_t::lapack).sysolve(history_size, this->S_factorized_, h);
diff --git a/src/mixer/anderson_stable_mixer.hpp b/src/mixer/anderson_stable_mixer.hpp
index 9959ee8022..3139882370 100644
--- a/src/mixer/anderson_stable_mixer.hpp
+++ b/src/mixer/anderson_stable_mixer.hpp
@@ -74,8 +74,6 @@ class Anderson_stable : public Mixer<FUNCS...>
         const auto idx_next_step = this->idx_hist(this->step_ + 1);
         const auto idx_step_prev = this->idx_hist(this->step_ - 1);
 
-        const bool normalize = false;
-
         const auto history_size = static_cast<int>(this->history_size_);
 
         // TODO: beta scaling?
@@ -102,8 +100,7 @@ class Anderson_stable : public Mixer<FUNCS...>
             // orthogonalize residual_history_[step-1] w.r.t. residual_history_[1:step-2] using modified Gram-Schmidt.
             for (int i = 1; i <= history_size - 1; ++i) {
                 auto j  = this->idx_hist(this->step_ - i - 1);
-                auto sz = this->template inner_product<normalize>(this->residual_history_[j],
-                                                                  this->residual_history_[idx_step_prev]);
+                auto sz = this->inner_product(this->residual_history_[j], this->residual_history_[idx_step_prev]);
                 this->R_(history_size - 1 - i, history_size - 1) = sz;
                 this->axpy(-sz, this->residual_history_[j], this->residual_history_[idx_step_prev]);
             }
@@ -111,15 +108,14 @@ class Anderson_stable : public Mixer<FUNCS...>
             // repeat orthogonalization.. seems really necessary.
             for (int i = 1; i <= history_size - 1; ++i) {
                 auto j  = this->idx_hist(this->step_ - i - 1);
-                auto sz = this->template inner_product<normalize>(this->residual_history_[j],
-                                                                  this->residual_history_[idx_step_prev]);
+                auto sz = this->inner_product(this->residual_history_[j], this->residual_history_[idx_step_prev]);
                 this->R_(history_size - 1 - i, history_size - 1) += sz;
                 this->axpy(-sz, this->residual_history_[j], this->residual_history_[idx_step_prev]);
             }
 
             // normalize the new residual difference vec itself
-            auto nrm2 = this->template inner_product<normalize>(this->residual_history_[idx_step_prev],
-                                                                this->residual_history_[idx_step_prev]);
+            auto nrm2 =
+                    this->inner_product(this->residual_history_[idx_step_prev], this->residual_history_[idx_step_prev]);
 
             if (nrm2 > 0) {
                 auto sz                                      = std::sqrt(nrm2);
@@ -131,9 +127,9 @@ class Anderson_stable : public Mixer<FUNCS...>
                 // Compute h = Q' * f_n
                 mdarray<double, 1> h({history_size});
                 for (int i = 1; i <= history_size; ++i) {
-                    auto j              = this->idx_hist(this->step_ - i);
-                    h(history_size - i) = this->template inner_product<normalize>(this->residual_history_[j],
-                                                                                  this->residual_history_[idx_step]);
+                    auto j = this->idx_hist(this->step_ - i);
+                    h(history_size - i) =
+                            this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]);
                 }
 
                 // next compute k = R⁻¹ * h... just do that by hand for now, can dispatch to blas later.
diff --git a/src/mixer/broyden2_mixer.hpp b/src/mixer/broyden2_mixer.hpp
index 39bbbe4d18..829c99498e 100644
--- a/src/mixer/broyden2_mixer.hpp
+++ b/src/mixer/broyden2_mixer.hpp
@@ -109,12 +109,10 @@ class Broyden2 : public Mixer<FUNCS...>
 
         const auto n = static_cast<int>(std::min(this->step_, this->max_history_ - 1));
 
-        const bool normalize = false;
-
         for (int i = 0; i <= n; ++i) {
             int j              = this->idx_hist(this->step_ - i);
-            this->S_(n - i, n) = this->S_(n, n - i) = this->template inner_product<normalize>(
-                    this->residual_history_[j], this->residual_history_[idx_step]);
+            this->S_(n - i, n) = this->S_(n, n - i) =
+                    this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]);
         }
 
         // Expand (I - Δf₁Δf₁ᵀ/Δf₁ᵀΔf₁)...(I - Δfₙ₋₁Δfₙ₋₁ᵀ/Δfₙ₋₁ᵀΔfₙ₋₁)fₙ
diff --git a/src/mixer/mixer.hpp b/src/mixer/mixer.hpp
index ef740277d7..a4105f6806 100644
--- a/src/mixer/mixer.hpp
+++ b/src/mixer/mixer.hpp
@@ -31,8 +31,8 @@ namespace mixer {
 
 /// Describes operations on a function type used for mixing.
 /** The properties contain functions, which determine the behaviour of a given type during mixing. The inner product
- * function result is used for calculating mixing parameters. If a function should not contribute to generation of
- * mixing parameters, the inner product function should always return 0.
+ *  function result is used for calculating mixing parameters. If a function should not contribute to generation of
+ *  mixing parameters, the inner product function should always return 0.
  */
 template <typename FUNC>
 struct FunctionProperties
@@ -41,19 +41,18 @@ struct FunctionProperties
 
     ///
     /**
-     *  \param [in]  size_         Function, which returns a measure of size of the (global) function.
-     *  \param [in]  inner_        Function, which computes the (global) inner product. This determines the contribution
-     * to mixing parameters rmse.
-     *  \param [in]  scal_         Function, which scales the input (x = alpha * x).
-     *  \param [in]  copy_         Function, which copies from one object to the other (y = x).
-     *  \param [in]  axpy_         Function, which scales and adds one object to the other (y = alpha * x + y).
+     *  \param [in]  inner_  Function, which computes the (global) inner product. This determines the
+     *                       contribution to mixing parameters rmse.
+     *  \param [in]  scal_   Function, which scales the input (x = alpha * x).
+     *  \param [in]  copy_   Function, which copies from one object to the other (y = x).
+     *  \param [in]  axpy_   Function, which scales and adds one object to the other (y = alpha * x + y).
+     *  \param [in]  rotate_ Function that computes two new linear compibations out of x,y 
      */
-    FunctionProperties(std::function<double(const FUNC&)> size_, std::function<double(const FUNC&, const FUNC&)> inner_,
-                       std::function<void(double, FUNC&)> scal_, std::function<void(const FUNC&, FUNC&)> copy_,
+    FunctionProperties(std::function<double(const FUNC&, const FUNC&)> inner_, std::function<void(double, FUNC&)> scal_,
+                       std::function<void(const FUNC&, FUNC&)> copy_,
                        std::function<void(double, const FUNC&, FUNC&)> axpy_,
                        std::function<void(double, double, FUNC&, FUNC&)> rotate_)
-        : size(size_)
-        , inner(inner_)
+        : inner(inner_)
         , scal(scal_)
         , copy(copy_)
         , axpy(axpy_)
@@ -62,8 +61,7 @@ struct FunctionProperties
     }
 
     FunctionProperties()
-        : size([](const FUNC&) -> double { return 0; })
-        , inner([](const FUNC&, const FUNC&) -> double { return 0.0; })
+        : inner([](const FUNC&, const FUNC&) -> double { return 0.0; })
         , scal([](double, FUNC&) -> void {})
         , copy([](const FUNC&, FUNC&) -> void {})
         , axpy([](double, const FUNC&, FUNC&) -> void {})
@@ -71,9 +69,6 @@ struct FunctionProperties
     {
     }
 
-    // Size proportional to the local contribution of the inner product.
-    std::function<double(const FUNC&)> size; // TODO: this sounds more like a normalization factor.
-
     // Inner product function. Determines contribution to mixing.
     std::function<double(const FUNC&, const FUNC&)> inner;
 
@@ -95,7 +90,7 @@ namespace mixer_impl {
 
 /// Compute inner product <x|y> between pairs of functions in tuples and accumulate in the result.
 /** This function is used in Broyden mixers to compute inner products of residuals. */
-template <std::size_t FUNC_REVERSE_INDEX, bool normalize, typename... FUNCS>
+template <std::size_t FUNC_REVERSE_INDEX, typename... FUNCS>
 struct InnerProduct
 {
     static double
@@ -107,28 +102,14 @@ struct InnerProduct
             /* compute inner product */
             auto v = std::get<FUNC_REVERSE_INDEX>(function_prop)
                              .inner(*std::get<FUNC_REVERSE_INDEX>(x), *std::get<FUNC_REVERSE_INDEX>(y));
-            /* normalize if necessary */
-            if (normalize) {
-                auto sx = std::get<FUNC_REVERSE_INDEX>(function_prop).size(*std::get<FUNC_REVERSE_INDEX>(x));
-                auto sy = std::get<FUNC_REVERSE_INDEX>(function_prop).size(*std::get<FUNC_REVERSE_INDEX>(y));
-                if (sx != sy) {
-                    throw std::runtime_error("[sirius::mixer::InnerProduct] sizes of two functions don't match");
-                }
-                if (sx) {
-                    v /= sx;
-                } else {
-                    v = 0;
-                }
-            }
-
             result += v;
         }
-        return result + InnerProduct<FUNC_REVERSE_INDEX - 1, normalize, FUNCS...>::apply(function_prop, x, y);
+        return result + InnerProduct<FUNC_REVERSE_INDEX - 1, FUNCS...>::apply(function_prop, x, y);
     }
 };
 
-template <bool normalize, typename... FUNCS>
-struct InnerProduct<0, normalize, FUNCS...>
+template <typename... FUNCS>
+struct InnerProduct<0, FUNCS...>
 {
     static double
     apply(const std::tuple<FunctionProperties<FUNCS>...>& function_prop, const std::tuple<std::unique_ptr<FUNCS>...>& x,
@@ -136,18 +117,6 @@ struct InnerProduct<0, normalize, FUNCS...>
     {
         if (std::get<0>(x) && std::get<0>(y)) {
             auto v = std::get<0>(function_prop).inner(*std::get<0>(x), *std::get<0>(y));
-            if (normalize) {
-                auto sx = std::get<0>(function_prop).size(*std::get<0>(x));
-                auto sy = std::get<0>(function_prop).size(*std::get<0>(y));
-                if (sx != sy) {
-                    throw std::runtime_error("[sirius::mixer::InnerProduct] sizes of two functions don't match");
-                }
-                if (sx) {
-                    v /= sx;
-                } else {
-                    v = 0;
-                }
-            }
             return v;
         } else {
             return 0;
@@ -361,8 +330,9 @@ class Mixer
         std::get<FUNC_INDEX>(functions_).copy(*std::get<FUNC_INDEX>(output_history_[idx]), output);
     }
 
-    /// Mix input and stored history. Returns the root mean square error computed by inner products of residuals.
-    /** \param [in]  rms_min  Minimum root mean square error. Mixing is only performed, if current RMS is above this
+    /// Mix input and stored history.
+    /** Returns the root mean square error computed by inner products of residuals.
+     *  \param [in]  rms_min  Minimum root mean square error. Mixing is only performed, if current RMS is above this
      *                        threshold.
      */
     double
@@ -371,14 +341,12 @@ class Mixer
         this->update_residual();
         this->update_rms();
         double rmse = rmse_history_[idx_hist(step_)];
-        if (rmse < rms_min__) {
-            return rmse;
+        if (rmse > rms_min__) {
+            /* call mixing implementation */
+            this->mix_impl();
+            step_++;
         }
 
-        /* call mixing implementation */
-        this->mix_impl();
-
-        ++step_;
         return rmse;
     }
 
@@ -402,7 +370,7 @@ class Mixer
         const auto idx = idx_hist(step_);
 
         /* compute sum of inner products; each inner product is normalized */
-        double rmse = inner_product<true>(residual_history_[idx], residual_history_[idx]);
+        double rmse = inner_product(residual_history_[idx], residual_history_[idx]);
         /* for very close vectors inner product of residuals can become negative due to the
            lapw step function in the interstitial (it has some small negative values sometimes) */
         rmse = std::max(0.0, rmse);
@@ -417,11 +385,10 @@ class Mixer
         return step % max_history_;
     }
 
-    template <bool normalize>
     double
     inner_product(const std::tuple<std::unique_ptr<FUNCS>...>& x, const std::tuple<std::unique_ptr<FUNCS>...>& y)
     {
-        return mixer_impl::InnerProduct<sizeof...(FUNCS) - 1, normalize, FUNCS...>::apply(functions_, x, y);
+        return mixer_impl::InnerProduct<sizeof...(FUNCS) - 1, FUNCS...>::apply(functions_, x, y);
     }
 
     void
diff --git a/src/mixer/mixer_functions.cpp b/src/mixer/mixer_functions.cpp
index f65292481b..bc207b3bdd 100644
--- a/src/mixer/mixer_functions.cpp
+++ b/src/mixer/mixer_functions.cpp
@@ -22,76 +22,35 @@ namespace mixer {
 FunctionProperties<Periodic_function<double>>
 periodic_function_property()
 {
-    auto global_size_func = [](const Periodic_function<double>& x) -> double { return x.ctx().unit_cell().omega(); };
-
     auto inner_prod_func = [](const Periodic_function<double>& x, const Periodic_function<double>& y) -> double {
-        return sirius::inner(x, y);
+        return inner(x, y);
     };
 
-    auto scal_function = [](double alpha, Periodic_function<double>& x) -> void {
-        scale(alpha, x.rg());
-        if (x.ctx().full_potential()) {
-            scale(alpha, x.mt());
-        }
-    };
+    auto scal_function = [](double alpha, Periodic_function<double>& x) -> void { x *= alpha; };
 
-    auto copy_function = [](const Periodic_function<double>& x, Periodic_function<double>& y) -> void {
-        copy(x.rg(), y.rg());
-        if (x.ctx().full_potential()) {
-            copy(x.mt(), y.mt());
-        }
-    };
+    auto copy_function = [](Periodic_function<double> const& x, Periodic_function<double>& y) -> void { copy(x, y); };
 
     auto axpy_function = [](double alpha, const Periodic_function<double>& x, Periodic_function<double>& y) -> void {
-        axpy(alpha, x.rg(), y.rg());
-        if (x.ctx().full_potential()) {
-            axpy(alpha, x.mt(), y.mt());
-        }
+        axpy(alpha, x, y);
     };
 
     auto rotate_function = [](double c, double s, Periodic_function<double>& x, Periodic_function<double>& y) -> void {
-        #pragma omp parallel
-        {
-            #pragma omp for schedule(static) nowait
-            for (std::size_t i = 0; i < x.rg().values().size(); ++i) {
-                auto xi         = x.rg().value(i);
-                auto yi         = y.rg().value(i);
-                x.rg().value(i) = xi * c + yi * s;
-                y.rg().value(i) = xi * -s + yi * c;
-            }
-            if (x.ctx().full_potential()) {
-                for (auto it : x.ctx().unit_cell().spl_num_atoms()) {
-                    int ia       = it.i;
-                    auto& x_f_mt = x.mt()[ia];
-                    auto& y_f_mt = y.mt()[ia];
-                    #pragma omp for schedule(static) nowait
-                    for (int i = 0; i < static_cast<int>(x.mt()[ia].size()); i++) {
-                        auto xi   = x_f_mt[i];
-                        auto yi   = y_f_mt[i];
-                        x_f_mt[i] = xi * c + yi * s;
-                        y_f_mt[i] = xi * -s + yi * c;
-                    }
-                }
-            }
-        }
+        rotate(c, s, x, y);
     };
 
-    return FunctionProperties<Periodic_function<double>>(global_size_func, inner_prod_func, scal_function,
-                                                         copy_function, axpy_function, rotate_function);
+    return FunctionProperties<Periodic_function<double>>(inner_prod_func, scal_function, copy_function, axpy_function,
+                                                         rotate_function);
 }
 
 /// Only for the PP-PW case.
 FunctionProperties<Periodic_function<double>>
-periodic_function_property_modified(bool use_coarse_gvec__)
+periodic_function_property_rho_pw(bool use_coarse_gvec__)
 {
-    auto global_size_func = [](Periodic_function<double> const& x) -> double {
-        return 1.0 / x.ctx().unit_cell().omega();
-    };
-
     auto inner_prod_func = [use_coarse_gvec__](Periodic_function<double> const& x,
                                                Periodic_function<double> const& y) -> double {
         double result{0};
         if (use_coarse_gvec__) {
+            #pragma omp parallel for reduction(+:result)
             for (int igloc = x.ctx().gvec_coarse().skip_g0(); igloc < x.ctx().gvec_coarse().count(); igloc++) {
                 /* local index in fine G-vector list */
                 int ig1 = x.ctx().gvec().gvec_base_mapping(igloc);
@@ -100,6 +59,7 @@ periodic_function_property_modified(bool use_coarse_gvec__)
                           std::pow(x.ctx().gvec().gvec_len(gvec_index_t::local(ig1)), 2);
             }
         } else {
+            #pragma omp parallel for reduction(+:result)
             for (int igloc = x.ctx().gvec().skip_g0(); igloc < x.ctx().gvec().count(); igloc++) {
                 result += std::real(std::conj(x.rg().f_pw_local(igloc)) * y.rg().f_pw_local(igloc)) /
                           std::pow(x.ctx().gvec().gvec_len(gvec_index_t::local(igloc)), 2);
@@ -108,46 +68,75 @@ periodic_function_property_modified(bool use_coarse_gvec__)
         if (x.ctx().gvec().reduced()) {
             result *= 2;
         }
-        result *= fourpi;
+        result *= (twopi * x.ctx().unit_cell().omega());
         x.ctx().comm().allreduce(&result, 1);
         return result;
     };
 
-    auto scal_function = [](double alpha, Periodic_function<double>& x) -> void { scale(alpha, x.rg()); };
+    auto scal_function = [](double alpha, Periodic_function<double>& x) -> void { x *= alpha; };
 
-    auto copy_function = [](Periodic_function<double> const& x, Periodic_function<double>& y) -> void {
-        copy(x.rg(), y.rg());
-    };
+    auto copy_function = [](Periodic_function<double> const& x, Periodic_function<double>& y) -> void { copy(x, y); };
 
     auto axpy_function = [](double alpha, const Periodic_function<double>& x, Periodic_function<double>& y) -> void {
-        axpy(alpha, x.rg(), y.rg());
+        axpy(alpha, x, y);
     };
 
     auto rotate_function = [](double c, double s, Periodic_function<double>& x, Periodic_function<double>& y) -> void {
-        #pragma omp parallel for schedule(static)
-        for (std::size_t i = 0; i < x.rg().values().size(); ++i) {
-            auto xi         = x.rg().value(i);
-            auto yi         = y.rg().value(i);
-            x.rg().value(i) = xi * c + yi * s;
-            y.rg().value(i) = xi * -s + yi * c;
-        }
+        rotate(c, s, x, y);
     };
 
-    return FunctionProperties<Periodic_function<double>>(global_size_func, inner_prod_func, scal_function,
-                                                         copy_function, axpy_function, rotate_function);
+    return FunctionProperties<Periodic_function<double>>(inner_prod_func, scal_function, copy_function, axpy_function,
+                                                         rotate_function);
 }
 
-FunctionProperties<density_matrix_t>
-density_function_property()
+/// Only for the PP-PW case.
+FunctionProperties<Periodic_function<double>>
+periodic_function_property_mag_pw(bool use_coarse_gvec__)
 {
-    auto global_size_func = [](density_matrix_t const& x) -> double {
-        size_t result{0};
-        for (auto& e : x) {
-            result += e.size();
+    auto inner_prod_func = [use_coarse_gvec__](Periodic_function<double> const& x,
+                                               Periodic_function<double> const& y) -> double {
+        double result{0};
+        if (use_coarse_gvec__) {
+            #pragma omp parallel for reduction(+:result)
+            for (int igloc = x.ctx().gvec_coarse().skip_g0(); igloc < x.ctx().gvec_coarse().count(); igloc++) {
+                /* local index in fine G-vector list */
+                int ig1 = x.ctx().gvec().gvec_base_mapping(igloc);
+
+                result += std::real(std::conj(x.rg().f_pw_local(ig1)) * y.rg().f_pw_local(ig1));
+            }
+        } else {
+            #pragma omp parallel for reduction(+:result)
+            for (int igloc = x.ctx().gvec().skip_g0(); igloc < x.ctx().gvec().count(); igloc++) {
+                result += std::real(std::conj(x.rg().f_pw_local(igloc)) * y.rg().f_pw_local(igloc));
+            }
         }
+        if (x.ctx().gvec().reduced()) {
+            result *= 2;
+        }
+        result *= (0.5 * x.ctx().unit_cell().omega() / pi);
+        x.ctx().comm().allreduce(&result, 1);
         return result;
     };
 
+    auto scal_function = [](double alpha, Periodic_function<double>& x) -> void { x *= alpha; };
+
+    auto copy_function = [](Periodic_function<double> const& x, Periodic_function<double>& y) -> void { copy(x, y); };
+
+    auto axpy_function = [](double alpha, const Periodic_function<double>& x, Periodic_function<double>& y) -> void {
+        axpy(alpha, x, y);
+    };
+
+    auto rotate_function = [](double c, double s, Periodic_function<double>& x, Periodic_function<double>& y) -> void {
+        rotate(c, s, x, y);
+    };
+
+    return FunctionProperties<Periodic_function<double>>(inner_prod_func, scal_function, copy_function, axpy_function,
+                                                         rotate_function);
+}
+
+FunctionProperties<density_matrix_t>
+density_function_property()
+{
     auto inner_prod_func = [](density_matrix_t const& x, density_matrix_t const& y) -> double {
         // do not contribute to mixing
         return 0.0;
@@ -189,15 +178,13 @@ density_function_property()
         }
     };
 
-    return FunctionProperties<density_matrix_t>(global_size_func, inner_prod_func, scal_function, copy_function,
-                                                axpy_function, rotate_function);
+    return FunctionProperties<density_matrix_t>(inner_prod_func, scal_function, copy_function, axpy_function,
+                                                rotate_function);
 }
 
 FunctionProperties<PAW_density<double>>
 paw_density_function_property()
 {
-    auto global_size_func = [](PAW_density<double> const& x) -> double { return x.unit_cell().num_paw_atoms(); };
-
     auto inner_prod_func = [](PAW_density<double> const& x, PAW_density<double> const& y) -> double {
         return inner(x, y);
     };
@@ -245,15 +232,13 @@ paw_density_function_property()
         }
     };
 
-    return FunctionProperties<PAW_density<double>>(global_size_func, inner_prod_func, scale_func, copy_function,
-                                                   axpy_function, rotate_function);
+    return FunctionProperties<PAW_density<double>>(inner_prod_func, scale_func, copy_function, axpy_function,
+                                                   rotate_function);
 }
 
 FunctionProperties<Hubbard_matrix>
 hubbard_matrix_function_property()
 {
-    auto global_size_func = [](Hubbard_matrix const& x) -> double { return 1.0; };
-
     auto inner_prod_func = [](Hubbard_matrix const& x, Hubbard_matrix const& y) -> double {
         /* do not contribute to mixing */
         return 0;
@@ -281,6 +266,7 @@ hubbard_matrix_function_property()
         }
     };
 
+    // TODO: check with Mathieu which copy function is the one; replace
     auto copy_func = [](Hubbard_matrix const& x, Hubbard_matrix& y) -> void {
         for (size_t at_lvl = 0; at_lvl < x.local().size(); at_lvl++) {
             copy(x.local(at_lvl), y.local(at_lvl));
@@ -350,8 +336,7 @@ hubbard_matrix_function_property()
         }
     };
 
-    return FunctionProperties<Hubbard_matrix>(global_size_func, inner_prod_func, scale_func, copy_func, axpy_func,
-                                              rotate_func);
+    return FunctionProperties<Hubbard_matrix>(inner_prod_func, scale_func, copy_func, axpy_func, rotate_func);
 }
 } // namespace mixer
 
diff --git a/src/mixer/mixer_functions.hpp b/src/mixer/mixer_functions.hpp
index 03acb7d636..5ed5b3eee4 100644
--- a/src/mixer/mixer_functions.hpp
+++ b/src/mixer/mixer_functions.hpp
@@ -29,7 +29,10 @@ FunctionProperties<Periodic_function<double>>
 periodic_function_property();
 
 FunctionProperties<Periodic_function<double>>
-periodic_function_property_modified(bool use_coarse_gvec__);
+periodic_function_property_rho_pw(bool use_coarse_gvec__);
+
+FunctionProperties<Periodic_function<double>>
+periodic_function_property_mag_pw(bool use_coarse_gvec__);
 
 FunctionProperties<density_matrix_t>
 density_function_property();
diff --git a/src/potential/poisson.cpp b/src/potential/poisson.cpp
index a0b10cb535..8d080424cb 100644
--- a/src/potential/poisson.cpp
+++ b/src/potential/poisson.cpp
@@ -17,25 +17,6 @@
 
 namespace sirius {
 
-double
-density_residual_hartree_energy(Density const& rho1__, Density const& rho2__)
-{
-    double eh{0};
-    auto const& gv = rho1__.ctx().gvec();
-    #pragma omp parallel for reduction(+:eh)
-    for (int igloc = gv.skip_g0(); igloc < gv.count(); igloc++) {
-        auto z   = rho1__.component(0).rg().f_pw_local(igloc) - rho2__.component(0).rg().f_pw_local(igloc);
-        double g = gv.gvec_len(gvec_index_t::local(igloc));
-        eh += (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / std::pow(g, 2);
-    }
-    gv.comm().allreduce(&eh, 1);
-    eh *= twopi * rho1__.ctx().unit_cell().omega();
-    if (gv.reduced()) {
-        eh *= 2;
-    }
-    return eh;
-}
-
 void
 Potential::poisson_add_pseudo_pw(mdarray<std::complex<double>, 2>& qmt__, mdarray<std::complex<double>, 2>& qit__,
                                  std::complex<double>* rho_pw__)
diff --git a/src/potential/potential.hpp b/src/potential/potential.hpp
index ffa8f5973f..e25617bed1 100644
--- a/src/potential/potential.hpp
+++ b/src/potential/potential.hpp
@@ -31,9 +31,6 @@ double
 xc_mt(Radial_grid<double> const& rgrid__, SHT const& sht__, std::vector<XC_functional> const& xc_func__,
       int num_mag_dims__, std::vector<Flm const*> rho__, std::vector<Flm*> vxc__, Flm* exc__, bool use_lalp__);
 
-double
-density_residual_hartree_energy(Density const& rho1__, Density const& rho2__);
-
 /// Generate effective potential from charge density and magnetization.
 /** \note At some point we need to update the atomic potential with the new MT potential. This is simple if the
           effective potential is a global function. Otherwise we need to pass the effective potential between MPI ranks.
diff --git a/src/sirius.hpp b/src/sirius.hpp
index 0e2597b5a2..46b8bee621 100644
--- a/src/sirius.hpp
+++ b/src/sirius.hpp
@@ -70,6 +70,15 @@ energy_acc()
 }
 #endif
 
+struct null_buffer : std::streambuf
+{
+    int
+    overflow(int c) override
+    {
+        return c;
+    }
+};
+
 /// Initialize the library.
 inline void
 initialize(bool call_mpi_init__ = true)
@@ -84,6 +93,7 @@ initialize(bool call_mpi_init__ = true)
     energy_acc() = -power::device_energy();
 #endif
     if (call_mpi_init__) {
+        PROFILE("sirius::initialize::mpi");
         mpi::Communicator::initialize(MPI_THREAD_MULTIPLE);
     }
 #if defined(__APEX)
@@ -97,9 +107,17 @@ initialize(bool call_mpi_init__ = true)
         std::printf("# Warning! Compiled in 'debug' mode with assert statements enabled!\n");
 #endif
     }
+
+    // uncomment this if you want to supress std::cout from all MPI ranks except from rank=0
+    //static null_buffer null;
+    //if (mpi::Communicator::world().rank() != 0) {
+    //    std::cout.rdbuf(&null);
+    //}
+
     /* get number of ranks per node during the global call to sirius::initialize() */
     mpi::num_ranks_per_node();
     if (acc::num_devices() > 0) {
+        PROFILE("sirius::initialize::acc");
         int devid = mpi::get_device_id(acc::num_devices());
         acc::set_device_id(devid);
         /* create extensive amount of streams */
diff --git a/verification/test06/sirius.json b/verification/test06/sirius.json
index 8e3643b3f5..63955b62b6 100644
--- a/verification/test06/sirius.json
+++ b/verification/test06/sirius.json
@@ -61,7 +61,7 @@
 
     "mixer" : {
         "beta" : 0.5,
-        "type" : "broyden2",
+        "!type" : "broyden2",
         "max_history" : 6,
         "use_hartree" : true
     }
diff --git a/verification/test07/sirius.json b/verification/test07/sirius.json
index df2c48fa1a..79ec08cb5f 100644
--- a/verification/test07/sirius.json
+++ b/verification/test07/sirius.json
@@ -48,7 +48,7 @@
         "atom_types": ["Ni"],
         "atoms": {
             "Ni": [
-                [0.0,0.0,0.0,  0,0,1]
+                [0.0,0.0,0.0,  0,0,2]
             ]
         },
         "lattice_vectors": [
diff --git a/verification/test09/output_ref.json b/verification/test09/output_ref.json
index 69bfe4ebdc..e1a7ac7728 100644
--- a/verification/test09/output_ref.json
+++ b/verification/test09/output_ref.json
@@ -27,6 +27,41 @@
                 "verbosity": 2,
                 "verification": 0
             },
+            "dftd3": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "beta": 0.0,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
+            "dftd4": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "bet": 0.0,
+                    "enable": false,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
             "hubbard": {
                 "constrained_calculation": false,
                 "constraint_beta_mixing": 0.4,
@@ -63,12 +98,12 @@
             },
             "locked": true,
             "mixer": {
-                "beta": 0.15,
+                "beta": 0.5,
                 "beta0": 0.15,
                 "beta_scaling_factor": 1.0,
                 "max_history": 8,
                 "rms_min": 1e-16,
-                "type": "broyden2",
+                "type": "anderson",
                 "use_hartree": true
             },
             "nlcg": {
@@ -84,10 +119,13 @@
                 "auto_rmt": 1,
                 "aw_cutoff": 0.0,
                 "core_relativity": "dirac",
-                "density_tol": 1e-06,
+                "density_tol": 1e-08,
+                "dftd3_correction": false,
+                "dftd4_correction": false,
                 "electronic_structure_method": "pseudopotential",
                 "energy_tol": 1e-08,
                 "extra_charge": 0,
+                "fixed_mag": 0,
                 "gamma_point": false,
                 "gk_cutoff": 7.0,
                 "hubbard_correction": false,
@@ -98,7 +136,7 @@
                 "ngridk": [4,4,4],
                 "nn_radius": -1,
                 "num_bands": 38,
-                "num_dft_iter": 100,
+                "num_dft_iter": 200,
                 "num_fv_states": -1,
                 "num_mag_dims": 3,
                 "precision_gs": "auto",
@@ -129,9 +167,12 @@
                 "nprii_vloc": 200,
                 "pseudo_grid_cutoff": 10.0,
                 "radial_grid": "exponential, 1.0",
+                "real_occupation_matrix": false,
                 "sht_coverage": 0,
+                "sht_lmax": -1,
                 "simple_lapw_ri": false,
                 "smooth_initial_mag": false,
+                "tol_ne": 0.01,
                 "use_coarse_fft_grid": true,
                 "xc_use_lapl": false
             },
@@ -143,7 +184,7 @@
                 "atom_types": ["Ni"],
                 "atoms": {
                     "Ni": [
-                        [0.0,0.0,0.0]
+                        [0.0,0.0,0.0,0.0,0.0,1.0]
                     ]
                 },
                 "lattice_vectors": [
@@ -170,49 +211,51 @@
         "omega": 73.39284359469754
     },
     "counters": {
-        "band_evp_work_count": 2403.7795597025856,
-        "local_operator_num_applied": 18142
+        "band_evp_work_count": 2227.5290311998892,
+        "local_operator_num_applied": 17673
     },
-    "git_hash": "0c6b4637d99ced7eea1dc26901a0221db55b23ce",
+    "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef",
     "ground_state": {
         "band_gap": 0.0,
         "converged": true,
-        "efermi": 0.6557800664364786,
+        "efermi": 0.6557802366927981,
         "energy": {
-            "bxc": -0.010470745238370311,
-            "entropy_sum": -0.001494326745922828,
-            "eval_sum": -10.617886973338702,
-            "ewald": -111.75579558183638,
-            "exc": -17.820768180240353,
-            "free": -171.85432926525468,
-            "kin": 48.72292110523524,
-            "scf_correction": -2.909933141381771e-05,
-            "total": -171.85283493850875,
-            "veff": -59.330337333335564,
-            "vha": 107.96333691047042,
-            "vloc": -144.9808316375798,
-            "vxc": -22.312842606234938
+            "bxc": -0.010472257736889786,
+            "dftd3": 0.0,
+            "dftd4": 0.0,
+            "entropy_sum": -0.0014943707877355806,
+            "eval_sum": -10.617894568599278,
+            "ewald": -111.75579557089988,
+            "exc": -17.820770016735647,
+            "free": -171.8543292484083,
+            "kin": 48.72289758879303,
+            "scf_correction": 5.010769612567856e-07,
+            "total": -171.85283487762058,
+            "veff": -59.330319899655414,
+            "vha": 107.96338385189264,
+            "vloc": -144.98085930580308,
+            "vxc": -22.3128444457467
         },
-        "etot_history": [-171.79325126286835,-171.43985660868356,-171.85716446407747,-171.84983295324238,-171.85359306173083,-171.853306751309,-171.85262600113094,-171.8528662490883,-171.85285290035625,-171.852841460192,-171.85283661663274,-171.8528327433734,-171.85283327208722,-171.85283398156966,-171.85283405031998,-171.85283459721808,-171.8528348035663,-171.85283494122925,-171.85283494798668,-171.8528349374169,-171.85283493850875],
+        "etot_history": [-165.20277332050176,-170.83658516476748,-171.8224473280885,-171.85001663166307,-171.84619892519495,-171.86466301547975,-171.84558720562066,-171.85296829907824,-171.85289979932065,-171.85289853706547,-171.85287553177082,-171.85284000799726,-171.85283173165755,-171.85283256036288,-171.8528351504585,-171.85283496553507,-171.85283502211846,-171.85283485626272,-171.8528348669397,-171.8528348796197,-171.85283487762058],
         "forces": [
             [0.0,0.0,0.0]
         ],
         "magnetisation": {
             "atoms": [
-                [5.074929065753288e-14,-9.79841625182196e-15,0.6234475185298044]
+                [-5.362074180946243e-07,-1.8314458428144874e-06,0.6234803505759127]
             ],
-            "total": [4.791847966935517e-14,-9.020174423279415e-15,0.5870547396141836]
+            "total": [-1.3202440873683128e-06,-4.509346828428379e-06,0.5870266623689381]
         },
         "num_scf_iterations": 20,
-        "rho_min": 0.026503709235553462,
-        "rms_history": [5.5908151129756325,2.549149992947269,0.22818171188469558,0.2179292685482527,0.05645964319833182,0.06901006251982635,0.052879992473048,0.010634752636826776,0.009847401569749798,0.0076247346960664816,0.003005067003441252,0.000748979298254481,0.0010404354968139898,0.0005021327826783438,0.00042632751733515366,0.00034731835443184064,0.00015238198955741717,1.4777200578237409e-05,2.600429680867714e-05,1.937406820925168e-05,1.2057628361449534e-05],
-        "scf_time": 29.360402692,
+        "rho_min": 0.02650355735413884,
+        "rms_history": [3.9535032076700025,3.1267938172850664,0.4792130183683345,0.062202091896587076,0.08998363748581666,0.2127421065566379,0.26003995503031213,0.040928312431893925,0.030189843932950832,0.020049690641022865,0.01330784684576,0.0102757796350612,0.00039607591864286427,0.00029122906709065855,3.829700919976523e-05,1.777247914952794e-05,2.144960574241469e-05,1.845129106509047e-05,1.2240840608745643e-05,8.19901012290962e-06,6.723629067819086e-06],
+        "scf_time": 10.401314524,
         "stress": [
-            [5.589717332338956e-05,3.5806012813214917e-19,-5.4210108489640164e-20],
-            [3.5806012813214917e-19,5.58971733233965e-05,-3.9871771000430245e-19],
-            [-5.4210108489640164e-20,-3.9871771000430245e-19,5.5897173323479765e-05]
+            [6.850334737883157e-05,3.590230782880158e-19,-6.324512672831888e-20],
+            [3.590230782880158e-19,6.850334737877606e-05,-4.132331869163532e-19],
+            [-6.324512672831888e-20,-4.132331869163532e-19,6.850334737946995e-05]
         ]
     },
     "task": 0,
-    "threads_per_rank": 1
+    "threads_per_rank": 4
 }
\ No newline at end of file
diff --git a/verification/test09/sirius.json b/verification/test09/sirius.json
index 6d66880bcd..4eb42c93f9 100644
--- a/verification/test09/sirius.json
+++ b/verification/test09/sirius.json
@@ -25,9 +25,7 @@
     "pw_cutoff" : 20.00,
 
     "energy_tol" : 1e-8,
-    "density_tol" : 1e-6,
-
-    "num_dft_iter" : 100,
+    "density_tol" : 1e-5,
 
     "ngridk" : [4,4,4]
   },
@@ -56,8 +54,7 @@
         ]
     },
     "mixer" : {
-        "beta" : 0.15,
-        "use_hartree" : true,
-        "type" : "broyden2"
+        "beta" : 0.5,
+        "use_hartree" : true
     }
 }
diff --git a/verification/test25/output_ref.json b/verification/test25/output_ref.json
index 74302a2aa7..a5bf6715dc 100644
--- a/verification/test25/output_ref.json
+++ b/verification/test25/output_ref.json
@@ -27,6 +27,41 @@
                 "verbosity": 2,
                 "verification": 0
             },
+            "dftd3": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "beta": 0.0,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
+            "dftd4": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "bet": 0.0,
+                    "enable": false,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
             "hubbard": {
                 "constrained_calculation": false,
                 "constraint_beta_mixing": 0.4,
@@ -39,7 +74,7 @@
                     {
                         "J": 0.0,
                         "J0": 0.0,
-                        "U": 8.0,
+                        "U": 0.293995,
                         "alpha": 0.0,
                         "atom_type": "Ni",
                         "beta": 0.0,
@@ -62,11 +97,15 @@
                 "init_subspace": "lcao",
                 "locking": true,
                 "min_num_res": 0,
+                "min_occupancy": 1e-14,
+                "min_tolerance": 1e-13,
                 "num_singular": -1,
                 "num_steps": 20,
                 "relative_tolerance": 0,
                 "residual_tolerance": 1e-06,
                 "subspace_size": 2,
+                "tolerance_ratio": 0,
+                "tolerance_scale": [0.1,0.5],
                 "type": "davidson"
             },
             "locked": true,
@@ -74,10 +113,10 @@
                 "beta": 0.5,
                 "beta0": 0.15,
                 "beta_scaling_factor": 1.0,
-                "linear_mix_rms_tol": 1000000.0,
                 "max_history": 8,
+                "rms_min": 1e-16,
                 "type": "anderson",
-                "use_hartree": false
+                "use_hartree": true
             },
             "nlcg": {
                 "T": 300.0,
@@ -93,9 +132,12 @@
                 "aw_cutoff": 0.0,
                 "core_relativity": "dirac",
                 "density_tol": 1e-06,
+                "dftd3_correction": false,
+                "dftd4_correction": false,
                 "electronic_structure_method": "pseudopotential",
                 "energy_tol": 1e-06,
                 "extra_charge": 0,
+                "fixed_mag": 0,
                 "gamma_point": false,
                 "gk_cutoff": 8.0,
                 "hubbard_correction": true,
@@ -128,21 +170,22 @@
                 "xc_functionals": ["XC_GGA_X_PBE","XC_GGA_C_PBE"]
             },
             "settings": {
-                "always_update_wf": true,
                 "auto_enu_tol": 0,
                 "fft_grid_size": [80,80,80],
                 "fp32_to_fp64_rms": 0,
-                "itsol_tol_min": 1e-13,
-                "itsol_tol_ratio": 0,
-                "itsol_tol_scale": [0.1,0.5],
-                "min_occupancy": 1e-14,
-                "mixer_rms_min": 1e-16,
                 "nprii_aug": 20,
                 "nprii_beta": 20,
                 "nprii_rho_core": 20,
                 "nprii_vloc": 200,
+                "pseudo_grid_cutoff": 10.0,
                 "radial_grid": "exponential, 1.0",
+                "real_occupation_matrix": false,
                 "sht_coverage": 0,
+                "sht_lmax": -1,
+                "simple_lapw_ri": false,
+                "smooth_initial_mag": false,
+                "tol_ne": 0.01,
+                "use_coarse_fft_grid": true,
                 "xc_use_lapl": false
             },
             "unit_cell": {
@@ -154,12 +197,12 @@
                 "atom_types": ["Ni","O"],
                 "atoms": {
                     "Ni": [
-                        [0.0,0.0,0.0],
-                        [0.5,0.5,-0.5]
+                        [0.0,0.0,0.0,0.0,0.0,2.0],
+                        [0.5,0.5,-0.5,0.0,0.0,-2.0]
                     ],
                     "O": [
-                        [0.751,0.751,0.751],
-                        [0.249,0.249,0.249]
+                        [0.751,0.751,0.751,0.0,0.0,0.0],
+                        [0.249,0.249,0.249,0.0,0.0,0.0]
                     ]
                 },
                 "lattice_vectors": [
@@ -186,50 +229,52 @@
         "omega": 244.65193599999998
     },
     "counters": {
-        "band_evp_work_count": 6769.204355790763,
-        "local_operator_num_applied": 58771
+        "band_evp_work_count": 5749.234098310588,
+        "local_operator_num_applied": 41278
     },
-    "git_hash": "a0d09bff305c7d162e6e43a792018b99bd33b276",
+    "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef",
     "ground_state": {
-        "band_gap": 0.11802688817837326,
+        "band_gap": 0.1180270031668248,
         "converged": true,
-        "efermi": 0.5433353639715012,
+        "efermi": 0.5433376914749234,
         "energy": {
-            "bxc": -0.21859449955146065,
-            "entropy_sum": -2.4255684305789004e-16,
-            "eval_sum": -26.803872906548193,
-            "ewald": -239.32629517316133,
-            "exc": -42.97446413308467,
-            "free": -375.4035050739258,
-            "kin": 115.09052586559307,
-            "scf_correction": 9.118537036556518e-09,
-            "total": -375.4035050739258,
-            "veff": -141.6758042725898,
-            "vha": 245.08050938458632,
-            "vloc": -333.063709062216,
-            "vxc": -53.692604594986385
+            "bxc": -0.21859453099694978,
+            "dftd3": 0.0,
+            "dftd4": 0.0,
+            "entropy_sum": -2.431405094633902e-16,
+            "eval_sum": -26.803879480580203,
+            "ewald": -239.32629516891166,
+            "exc": -42.97446376920979,
+            "free": -375.40350472129916,
+            "kin": 115.090520477905,
+            "scf_correction": 8.823249686429335e-07,
+            "total": -375.40350472129916,
+            "veff": -141.67580542748826,
+            "vha": 245.08050350114928,
+            "vloc": -333.0637048237149,
+            "vxc": -53.69260410493752
         },
-        "etot_history": [-363.01882295940095,-372.6209083877894,-374.1974706358724,-375.21670416636,-375.22582404563684,-374.81068657545876,-375.3582464265443,-375.4112652109265,-375.415163391737,-375.40470669561245,-375.40217333107836,-375.4054673696777,-375.4036120700799,-375.4035418057176,-375.4035491118826,-375.40357354524747,-375.4034761581902,-375.40345661730737,-375.4034930484215,-375.40350645913406,-375.40350944571964,-375.40350524502264,-375.40350490168555,-375.40350508450126,-375.4035051163307,-375.40350508441566,-375.40350505428034,-375.4035050072223,-375.4035050216652,-375.40350511462066,-375.4035050875652,-375.403505078375,-375.4035050765659,-375.40350507567865,-375.4035050709074,-375.4035050737591,-375.4035050739258],
+        "etot_history": [-363.01540688727823,-372.3296125505617,-374.43690487677566,-375.1977781956407,-375.26689436890024,-374.7228352759363,-375.4557820825973,-375.40339120708927,-375.39866665166085,-375.4042784049744,-375.40367204105485,-375.40378287259864,-375.40349255195747,-375.4035069395142,-375.40348950139,-375.40350101628485,-375.4035143821575,-375.40350954721794,-375.40350473893943,-375.40350472129916],
         "forces": [
             [0.0,0.0,0.0],
             [0.0,0.0,0.0],
-            [-0.0031472582173004897,-0.0031472582173004897,-0.003147258217300462],
-            [0.0031472582173004897,0.0031472582173005035,0.0031472582173004897]
+            [-0.003146859037271426,-0.0031468590372713983,-0.0031468590372713983],
+            [0.0031468590372713983,0.0031468590372713706,0.0031468590372713983]
         ],
         "magnetisation": {
             "atoms": [
-                [0.0,0.0,1.7398688034258583],
-                [0.0,0.0,-1.7389451666005602],
-                [0.0,0.0,-0.00048123316531149205],
-                [0.0,0.0,0.0004301298158802179]
+                [0.0,0.0,1.739869127453484],
+                [0.0,0.0,-1.7389453650260294],
+                [0.0,0.0,-0.00048182087177273244],
+                [0.0,0.0,0.00042954309197469663]
             ],
-            "total": [0.0,0.0,-5.596852471215777e-10]
+            "total": [0.0,0.0,9.595913934979144e-10]
         },
-        "num_scf_iterations": 36,
-        "rho_min": 0.01147548166842359,
-        "rms_history": [0.2608354247279235,0.295798049249155,0.08752557935096265,0.04860013029518923,0.0440468309977525,0.04217937173992232,0.01361634157104944,0.0049305478463928555,0.004068627173186378,0.0016711848358534152,0.0007486507956958945,0.00015731801929434578,5.7420419825204416e-05,2.9045469334817406e-05,1.9950335758343885e-05,1.3346691843031192e-05,5.905733263317464e-06,4.11159628058218e-06,1.0037347484528199e-06,5.579930436682524e-07,2.472257954889928e-07,8.509055565974707e-08,5.089124250374881e-08,3.515915050900785e-08,4.4925038286985674e-08,3.006162344315359e-08,3.203266782291662e-08,1.8071027542594174e-08,1.3006767057131359e-08,8.174921308712124e-09,5.847791142468431e-09,4.827483502782414e-09,4.305476349055417e-09,3.2184651156804377e-09,2.2807985504373038e-08,3.422207006171203e-09,3.810251678328596e-09],
-        "scf_time": 99.879586612
+        "num_scf_iterations": 19,
+        "rho_min": 0.01147548110904405,
+        "rms_history": [5.033913018124934,5.416953171192131,1.4014083575162628,0.4519610777908385,0.319815887170792,0.3479823809259652,0.1773202879609523,0.07568632353624972,0.02673239603874862,0.00475155580158716,0.0023355406231775266,0.0003159510677868313,7.905651601059274e-05,4.273158576179883e-05,2.7188053367760753e-05,1.7708606229845675e-05,7.453298930344168e-06,1.907404565278266e-06,6.86693198728036e-07,2.825824745564471e-07],
+        "scf_time": 64.23953482
     },
     "task": 0,
-    "threads_per_rank": 16
+    "threads_per_rank": 4
 }
\ No newline at end of file
diff --git a/verification/test25/sirius.json b/verification/test25/sirius.json
index 97d1499230..8bd90e6313 100644
--- a/verification/test25/sirius.json
+++ b/verification/test25/sirius.json
@@ -41,7 +41,7 @@
         "beta_scaling_factor": 1.0,
         "max_history": 8,
         "type": "anderson",
-        "use_hartree": false
+        "use_hartree": true
     },
     "parameters": {
         "density_tol": 1e-06,
diff --git a/verification/test26/output_ref.json b/verification/test26/output_ref.json
index 7ad0540dc7..b1f4fc2c70 100644
--- a/verification/test26/output_ref.json
+++ b/verification/test26/output_ref.json
@@ -8,7 +8,7 @@
                 "beta_on_device": false,
                 "cyclic_block_size": 32,
                 "fft_mode": "parallel",
-                "gen_evp_solver_name": "lapack",
+                "gen_evp_solver_name": "cusolver",
                 "gvec_chunk_size": 500000,
                 "mpi_grid_dims": [1,1],
                 "num_bands_to_print": 10,
@@ -17,16 +17,51 @@
                 "print_forces": true,
                 "print_neighbors": false,
                 "print_stress": true,
-                "processing_unit": "cpu",
+                "processing_unit": "gpu",
                 "reduce_gvec": true,
                 "rmt_max": 2.2,
                 "save_rf": false,
                 "spglib_tolerance": 1e-06,
-                "std_evp_solver_name": "lapack",
+                "std_evp_solver_name": "cusolver",
                 "use_second_variation": true,
-                "verbosity": 2,
+                "verbosity": 3,
                 "verification": 0
             },
+            "dftd3": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "beta": 0.0,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
+            "dftd4": {
+                "damping": "rational",
+                "damping_values": "auto",
+                "method": "none",
+                "parameters": {
+                    "a1": 0.0,
+                    "a2": 0.0,
+                    "alp": 0.0,
+                    "bet": 0.0,
+                    "enable": false,
+                    "rs6": 0.0,
+                    "rs8": 0.0,
+                    "s6": 0.0,
+                    "s8": 0.0,
+                    "s9": 0.0
+                },
+                "three_body": true
+            },
             "hubbard": {
                 "constrained_calculation": false,
                 "constraint_beta_mixing": 0.4,
@@ -38,7 +73,7 @@
                 "local": [
                     {
                         "J": 0.0,
-                        "U": 8.0,
+                        "U": 0.293995,
                         "atom_type": "Ni",
                         "hubbard_orbital": "3d",
                         "l": 2,
@@ -60,11 +95,15 @@
                 "init_subspace": "lcao",
                 "locking": true,
                 "min_num_res": 0,
+                "min_occupancy": 1e-14,
+                "min_tolerance": 1e-13,
                 "num_singular": -1,
                 "num_steps": 20,
                 "relative_tolerance": 0,
                 "residual_tolerance": 1e-06,
                 "subspace_size": 2,
+                "tolerance_ratio": 0,
+                "tolerance_scale": [0.1,0.5],
                 "type": "davidson"
             },
             "locked": true,
@@ -72,8 +111,8 @@
                 "beta": 0.75,
                 "beta0": 0.15,
                 "beta_scaling_factor": 1.0,
-                "linear_mix_rms_tol": 1000000.0,
                 "max_history": 8,
+                "rms_min": 1e-16,
                 "type": "anderson",
                 "use_hartree": false
             },
@@ -91,9 +130,12 @@
                 "aw_cutoff": 0.0,
                 "core_relativity": "dirac",
                 "density_tol": 1e-05,
+                "dftd3_correction": false,
+                "dftd4_correction": false,
                 "electronic_structure_method": "pseudopotential",
                 "energy_tol": 1e-08,
                 "extra_charge": 0,
+                "fixed_mag": 0,
                 "gamma_point": false,
                 "gk_cutoff": 6.325,
                 "hubbard_correction": true,
@@ -126,21 +168,22 @@
                 "xc_functionals": ["XC_GGA_X_PBE","XC_GGA_C_PBE"]
             },
             "settings": {
-                "always_update_wf": true,
                 "auto_enu_tol": 0,
                 "fft_grid_size": [80,80,80],
                 "fp32_to_fp64_rms": 0,
-                "itsol_tol_min": 1e-13,
-                "itsol_tol_ratio": 0,
-                "itsol_tol_scale": [0.1,0.5],
-                "min_occupancy": 1e-14,
-                "mixer_rms_min": 1e-16,
                 "nprii_aug": 20,
                 "nprii_beta": 20,
                 "nprii_rho_core": 20,
                 "nprii_vloc": 200,
+                "pseudo_grid_cutoff": 10.0,
                 "radial_grid": "exponential, 1.0",
+                "real_occupation_matrix": false,
                 "sht_coverage": 0,
+                "sht_lmax": -1,
+                "simple_lapw_ri": false,
+                "smooth_initial_mag": false,
+                "tol_ne": 0.01,
+                "use_coarse_fft_grid": true,
                 "xc_use_lapl": false
             },
             "unit_cell": {
@@ -152,12 +195,12 @@
                 "atom_types": ["Ni","O"],
                 "atoms": {
                     "Ni": [
-                        [0.0,0.0,0.0],
-                        [0.5,0.5,0.5]
+                        [0.0,0.0,0.0,0.0,0.0,2.0],
+                        [0.5,0.5,0.5,0.0,0.0,-2.0]
                     ],
                     "O": [
-                        [0.251,0.251,0.251],
-                        [0.749,0.749,0.749]
+                        [0.251,0.251,0.251,0.0,0.0,0.0],
+                        [0.749,0.749,0.749,0.0,0.0,0.0]
                     ]
                 },
                 "lattice_vectors": [
@@ -184,55 +227,57 @@
         "omega": 249.33862849999997
     },
     "counters": {
-        "band_evp_work_count": 6688.102076124586,
-        "local_operator_num_applied": 58607
+        "band_evp_work_count": 6978.806991654811,
+        "local_operator_num_applied": 60318
     },
-    "git_hash": "a0d09bff305c7d162e6e43a792018b99bd33b276",
+    "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef",
     "ground_state": {
-        "band_gap": 0.11290281924383772,
+        "band_gap": 0.11290280950535486,
         "converged": true,
-        "efermi": 0.5246031969630998,
+        "efermi": 0.5246031810379566,
         "energy": {
-            "bxc": -0.21860314224858804,
-            "entropy_sum": -2.0153200784608935e-16,
-            "eval_sum": -27.43459641965035,
-            "ewald": -237.81730213844847,
-            "exc": -42.94622990705071,
-            "free": -375.40235132916143,
-            "kin": 115.00797407028188,
-            "scf_correction": 3.446132268436486e-10,
-            "total": -375.40235132916143,
-            "veff": -142.22396734768364,
-            "vha": 246.81645700234355,
-            "vloc": -335.38603444869347,
-            "vxc": -53.65438990132184
+            "bxc": -0.21860320810266617,
+            "dftd3": 0.0,
+            "dftd4": 0.0,
+            "entropy_sum": -2.0152556498690033e-16,
+            "eval_sum": -27.43459722643568,
+            "ewald": -237.81730213506043,
+            "exc": -42.94623036082675,
+            "free": -375.40235122162983,
+            "kin": 115.0079726318267,
+            "scf_correction": 3.356319666636409e-10,
+            "total": -375.40235122162983,
+            "veff": -142.22396665015972,
+            "vha": 246.81646192812283,
+            "vloc": -335.3860381677411,
+            "vxc": -53.65439041053203
         },
-        "etot_history": [-358.58999689250766,-370.7939397341292,-373.7906019813497,-375.1115769850038,-375.20397095195676,-374.8859215174739,-375.33087086124556,-375.31576423623505,-375.4073318322314,-375.403211974836,-375.40292296572727,-375.4012254351925,-375.4029521933096,-375.4026844414773,-375.40243545257067,-375.4023511169877,-375.4023609322113,-375.4023487100028,-375.40235196497247,-375.4023504866274,-375.402351322365,-375.40235131711506,-375.4023512949838,-375.4023513115126,-375.40235132254816,-375.4023513283945,-375.40235132976153,-375.402351330946,-375.4023513290283,-375.40235132918485,-375.4023513291497,-375.402351329177,-375.40235132915495,-375.4023513291886,-375.40235132913824,-375.4023513291479,-375.40235132916143],
+        "etot_history": [-358.5864119211701,-370.85888112748336,-373.56719572729037,-375.1555581977194,-375.20122176852476,-375.10041990732316,-375.04692341137036,-375.2343820914492,-375.3765086572063,-375.42433935994035,-375.40648492095585,-375.4052733839037,-375.4006015476415,-375.4015741578613,-375.4023725309721,-375.40232439642256,-375.40236340197174,-375.4023329610692,-375.40236166670445,-375.40235271316226,-375.40235171098044,-375.40235048277077,-375.40235103276746,-375.40235122796827,-375.4023512136913,-375.40235122003276,-375.4023512191543,-375.40235122368574,-375.4023512210638,-375.40235122214096,-375.4023512216577,-375.40235122159834,-375.40235122163824,-375.40235122161766,-375.4023512216177,-375.4023512216305,-375.40235122162983],
         "forces": [
             [0.0,0.0,0.0],
             [0.0,0.0,0.0],
-            [-0.0030171290616563436,-0.0030171290616563574,-0.0030171290616563436],
-            [0.0030171290616563574,0.0030171290616563436,0.0030171290616563436]
+            [-0.0030165752815831805,-0.0030165752815831805,-0.0030165752815831666],
+            [0.0030165752815831523,0.0030165752815831527,0.003016575281583194]
         ],
         "magnetisation": {
             "atoms": [
-                [0.0,0.0,1.744455442288323],
-                [0.0,0.0,-1.7453510364634455],
-                [0.0,0.0,0.00045094306790156646],
-                [0.0,0.0,-0.0004042693409151514]
+                [0.0,0.0,1.744456124509312],
+                [0.0,0.0,-1.7453511104450692],
+                [0.0,0.0,0.00045122806811319143],
+                [0.0,0.0,-0.0004039865795744099]
             ],
-            "total": [0.0,0.0,8.042931013466695e-12]
+            "total": [0.0,0.0,-2.06291399551573e-12]
         },
         "num_scf_iterations": 36,
-        "rho_min": 0.010971688340903465,
-        "rms_history": [0.2596322879810102,0.4285994176301848,0.1528012053166588,0.03812836362286105,0.034209967056783155,0.03458204005797634,0.015425330938034193,0.010611246366198786,0.0044446536983854335,0.0028707636219590126,0.0015073453683350818,0.0007035969131992771,0.00021271573943893382,9.953328765793663e-05,1.5157509714032823e-05,1.2613273366679112e-05,6.025079403762893e-06,4.1968887269848075e-06,2.5583851083447167e-06,1.0724038074593278e-06,4.269441374522495e-07,8.002399797602397e-08,3.283893575249995e-08,1.1904845490585115e-08,7.699665766845564e-09,5.7943074386557846e-09,2.9605930976755445e-09,1.2911371262557649e-09,2.0662193073079145e-10,1.0034087024030482e-10,7.752644023807522e-11,8.071376587150208e-11,6.68306232587366e-11,5.3416354401368483e-11,3.840662636864619e-11,1.931497928961017e-11,1.1920226613004456e-11],
-        "scf_time": 83.167749842,
+        "rho_min": 0.010971694996820939,
+        "rms_history": [0.38054009003505357,0.5557735273929559,0.3079465936489544,0.18904766546481008,0.17792211608264197,0.17800897148332898,0.159432021396124,0.06747670365879009,0.03873899224461493,0.02201171220335613,0.011802170449997372,0.008781577514903221,0.0027181871219661927,0.0009352387113104254,0.0002850899485289659,0.00014020677137127833,7.729832978874548e-05,5.030888950285035e-05,3.111321302751715e-05,1.5826226626986854e-05,6.433234597144335e-06,1.5884775055362247e-06,8.411691648688589e-07,8.689044588801541e-08,7.627279464304499e-08,3.935599087075288e-08,2.773743974109966e-08,2.1088354093288338e-08,7.034322324864935e-09,2.1032404343085687e-09,1.3617543028590982e-09,4.578976645990177e-10,2.582698736840132e-10,8.901301566083312e-11,8.165865274854889e-11,1.0774225754427981e-10,1.033922363492105e-10],
+        "scf_time": 21.713756715,
         "stress": [
-            [-0.00026776187050852295,6.609976010494559e-06,6.609976010494572e-06],
-            [6.609976010494559e-06,-0.00026776187050850907,6.609976010494573e-06],
-            [6.609976010494572e-06,6.609976010494573e-06,-0.0002677618705085212]
+            [-0.00026777011673945195,6.618307063803781e-06,6.618307063803789e-06],
+            [6.618307063803781e-06,-0.00026777011673952307,6.618307063803795e-06],
+            [6.618307063803789e-06,6.618307063803795e-06,-0.00026777011673956297]
         ]
     },
     "task": 0,
-    "threads_per_rank": 16
+    "threads_per_rank": 4
 }
\ No newline at end of file