diff --git a/apps/atoms/atom.cpp b/apps/atoms/atom.cpp index 3fb30ffff1..04a66f252f 100644 --- a/apps/atoms/atom.cpp +++ b/apps/atoms/atom.cpp @@ -100,7 +100,6 @@ class Free_atom : public sirius::Atom_type ); auto mixer_function_prop = mixer::FunctionProperties>( - [](const std::vector& x) -> std::size_t { return x.size(); }, /* use simple inner product for mixing */ [](const std::vector& x, const std::vector& y) -> double { double result = 0.0; diff --git a/apps/mini_app/sirius.scf.cpp b/apps/mini_app/sirius.scf.cpp index 0ca3603403..56d90feb7a 100644 --- a/apps/mini_app/sirius.scf.cpp +++ b/apps/mini_app/sirius.scf.cpp @@ -323,8 +323,6 @@ ground_state(Simulation_context& ctx, int task_id, cmd_args const& args, int wri } } - // dft.print_magnetic_moment(); - if (ref_file.size() != 0) { json dict_ref; std::ifstream(ref_file) >> dict_ref; @@ -354,7 +352,7 @@ ground_state(Simulation_context& ctx, int task_id, cmd_args const& args, int wri max_diff = std::max(max_diff, std::abs(v1[i][x] - v2[i][x])); } } - if (max_diff > 1e-5) { + if (max_diff > 1e-4) { std::cout << "magnetisations is different!" << std::endl; ctx.comm().abort(5); } diff --git a/apps/tests/test_mixer.cpp b/apps/tests/test_mixer.cpp index 26c8e0fa4e..c97eedb1e9 100644 --- a/apps/tests/test_mixer.cpp +++ b/apps/tests/test_mixer.cpp @@ -85,7 +85,6 @@ test_mixer(cmd_args const& args) b[i] -= 1; auto mixer_function_prop = mixer::FunctionProperties>( - [](const std::vector& x) -> std::size_t { return 1; }, [](const std::vector& x, const std::vector& y) -> double { double result = 0.0; for (std::size_t i = 0; i < x.size(); ++i) diff --git a/examples/fp-lapw/Eu6C60/sirius.json b/examples/fp-lapw/Eu6C60/sirius.json index 85c5d0314d..4d57bf56e0 100644 --- a/examples/fp-lapw/Eu6C60/sirius.json +++ b/examples/fp-lapw/Eu6C60/sirius.json @@ -1,29 +1,27 @@ { "control" : { - "cyclic_block_size" : 16, - "processing_unit" : "cpu", - "std_evp_solver_name" : "lapack", - "gen_evp_solver_name" : "lapack" + "verbosity" : 1 }, "parameters" : { "electronic_structure_method" : "full_potential_lapwlo", "xc_functionals" : ["XC_LDA_X", "XC_LDA_C_PZ"], "smearing_width" : 0.05, - "valence_relativity" : "none", - "core_relativity" : "none", - "aw_cutoff" : 7.0, + "valence_relativity" : "iora", + "core_relativity" : "dirac", + "aw_cutoff" : 6.0, "pw_cutoff" : 20.00, "auto_rmt" : 1, + "num_mag_dims" : 1, "use_symmetry": true, "ngridk" : [1, 1, 1], "density_tol" : 1e-5, "energy_tol" : 1e-8, - "num_dft_iter" : 100 + "num_dft_iter" : 1 }, "mixer" : { - "beta" : 0.95, + "beta" : 0.5, "type" : "anderson", "max_history" : 8 }, diff --git a/src/api/sirius.f90 b/src/api/sirius.f90 index e5802a3509..b249ece245 100644 --- a/src/api/sirius.f90 +++ b/src/api/sirius.f90 @@ -3006,16 +3006,18 @@ end subroutine sirius_generate_effective_potential !> @param [in] gs_handler Ground state handler. !> @param [in] add_core Add core charge density in the muffin-tins. !> @param [in] transform_to_rg If true, density and magnetization are transformed to real-space grid. -!> @param [in] paw_only it true, only local PAW density is generated +!> @param [in] paw_only if true, only local PAW density is generated +!> @param [in] efermi if true, Fermi energy level is also computed !> @param [out] error_code Error code. subroutine sirius_generate_density(gs_handler,add_core,transform_to_rg,paw_only,& -&error_code) +&efermi,error_code) implicit none ! type(sirius_ground_state_handler), target, intent(in) :: gs_handler logical, optional, target, intent(in) :: add_core logical, optional, target, intent(in) :: transform_to_rg logical, optional, target, intent(in) :: paw_only +logical, optional, target, intent(in) :: efermi integer, optional, target, intent(out) :: error_code ! type(C_PTR) :: gs_handler_ptr @@ -3025,17 +3027,20 @@ subroutine sirius_generate_density(gs_handler,add_core,transform_to_rg,paw_only, logical(C_BOOL), target :: transform_to_rg_c_type type(C_PTR) :: paw_only_ptr logical(C_BOOL), target :: paw_only_c_type +type(C_PTR) :: efermi_ptr +logical(C_BOOL), target :: efermi_c_type type(C_PTR) :: error_code_ptr ! interface subroutine sirius_generate_density_aux(gs_handler,add_core,transform_to_rg,paw_only,& -&error_code)& +&efermi,error_code)& &bind(C, name="sirius_generate_density") use, intrinsic :: ISO_C_BINDING type(C_PTR), value :: gs_handler type(C_PTR), value :: add_core type(C_PTR), value :: transform_to_rg type(C_PTR), value :: paw_only +type(C_PTR), value :: efermi type(C_PTR), value :: error_code end subroutine end interface @@ -3057,18 +3062,17 @@ subroutine sirius_generate_density_aux(gs_handler,add_core,transform_to_rg,paw_o paw_only_c_type = paw_only paw_only_ptr = C_LOC(paw_only_c_type) endif +efermi_ptr = C_NULL_PTR +if (present(efermi)) then +efermi_c_type = efermi +efermi_ptr = C_LOC(efermi_c_type) +endif error_code_ptr = C_NULL_PTR if (present(error_code)) then error_code_ptr = C_LOC(error_code) endif call sirius_generate_density_aux(gs_handler_ptr,add_core_ptr,transform_to_rg_ptr,& -&paw_only_ptr,error_code_ptr) -if (present(add_core)) then -endif -if (present(transform_to_rg)) then -endif -if (present(paw_only)) then -endif +&paw_only_ptr,efermi_ptr,error_code_ptr) end subroutine sirius_generate_density ! @@ -7262,7 +7266,7 @@ end subroutine sirius_set_atom_vector_field !> @brief Set the parameters controlling the dftd3 correction. !> @param [in] handler Simulation context handler. !> @param [in] method family of predefined parameters. Linked to the functional -!> @param [in] damping__ damping correction, auto, manual. +!> @param [in] damping damping correction, auto, manual. !> @param [in] atm Include the three body correction !> @param [in] damping_term type of damping correction, rational, mrational, zero, mzero, ... !> @param [in] s6 s6 parameter for dftd3 model. @@ -7272,13 +7276,13 @@ end subroutine sirius_set_atom_vector_field !> @param [in] alp alp parameter for dftd3 model. !> @param [in] beta beta parameter for dftd3 model. !> @param [out] error_code Error code. -subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term,& -&s6,s8,s9,rs8,alp,beta,error_code) +subroutine sirius_set_dftd3_correction(handler,method,damping,atm,damping_term,s6,& +&s8,s9,rs8,alp,beta,error_code) implicit none ! type(sirius_context_handler), target, intent(in) :: handler character(*), target, intent(in) :: method -character(*), optional, target, intent(in) :: damping__ +character(*), optional, target, intent(in) :: damping logical, optional, target, intent(in) :: atm character(*), optional, target, intent(in) :: damping_term real(8), optional, target, intent(in) :: s6 @@ -7292,8 +7296,8 @@ subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term type(C_PTR) :: handler_ptr type(C_PTR) :: method_ptr character(C_CHAR), target, allocatable :: method_c_type(:) -type(C_PTR) :: damping___ptr -character(C_CHAR), target, allocatable :: damping___c_type(:) +type(C_PTR) :: damping_ptr +character(C_CHAR), target, allocatable :: damping_c_type(:) type(C_PTR) :: atm_ptr logical(C_BOOL), target :: atm_c_type type(C_PTR) :: damping_term_ptr @@ -7307,13 +7311,13 @@ subroutine sirius_set_dftd3_correction(handler,method,damping__,atm,damping_term type(C_PTR) :: error_code_ptr ! interface -subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_term,& +subroutine sirius_set_dftd3_correction_aux(handler,method,damping,atm,damping_term,& &s6,s8,s9,rs8,alp,beta,error_code)& &bind(C, name="sirius_set_dftd3_correction") use, intrinsic :: ISO_C_BINDING type(C_PTR), value :: handler type(C_PTR), value :: method -type(C_PTR), value :: damping__ +type(C_PTR), value :: damping type(C_PTR), value :: atm type(C_PTR), value :: damping_term type(C_PTR), value :: s6 @@ -7332,11 +7336,11 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_ allocate(method_c_type(len(method)+1)) method_c_type = string_f2c(method) method_ptr = C_LOC(method_c_type) -damping___ptr = C_NULL_PTR -if (present(damping__)) then -allocate(damping___c_type(len(damping__)+1)) -damping___c_type = string_f2c(damping__) -damping___ptr = C_LOC(damping___c_type) +damping_ptr = C_NULL_PTR +if (present(damping)) then +allocate(damping_c_type(len(damping)+1)) +damping_c_type = string_f2c(damping) +damping_ptr = C_LOC(damping_c_type) endif atm_ptr = C_NULL_PTR if (present(atm)) then @@ -7377,11 +7381,11 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_ if (present(error_code)) then error_code_ptr = C_LOC(error_code) endif -call sirius_set_dftd3_correction_aux(handler_ptr,method_ptr,damping___ptr,atm_ptr,& +call sirius_set_dftd3_correction_aux(handler_ptr,method_ptr,damping_ptr,atm_ptr,& &damping_term_ptr,s6_ptr,s8_ptr,s9_ptr,rs8_ptr,alp_ptr,beta_ptr,error_code_ptr) deallocate(method_c_type) -if (present(damping__)) then -deallocate(damping___c_type) +if (present(damping)) then +deallocate(damping_c_type) endif if (present(atm)) then endif @@ -7390,6 +7394,138 @@ subroutine sirius_set_dftd3_correction_aux(handler,method,damping__,atm,damping_ endif end subroutine sirius_set_dftd3_correction +! +!> @brief Set the parameters controlling the dftd3 correction. +!> @param [in] handler Simulation context handler. +!> @param [in] method family of predefined parameters. Linked to the functional +!> @param [in] damping damping correction, auto, manual. +!> @param [in] atm Include the three body correction +!> @param [in] damping_term type of damping correction, rational, mrational +!> @param [in] s6 s6 parameter for dftd4 model. +!> @param [in] s8 s8 parameter for dftd4 model. +!> @param [in] s9 s9 parameter for dftd4 model. +!> @param [in] a1 a1 parameter for dftd4 model. +!> @param [in] a2 a2 parameter for dftd4 model. +!> @param [in] alp alp parameter for dftd4 model. +!> @param [out] error_code Error code. +subroutine sirius_set_dftd4_correction(handler,method,damping,atm,damping_term,s6,& +&s8,s9,a1,a2,alp,error_code) +implicit none +! +type(sirius_context_handler), target, intent(in) :: handler +character(*), target, intent(in) :: method +character(*), optional, target, intent(in) :: damping +logical, optional, target, intent(in) :: atm +character(*), optional, target, intent(in) :: damping_term +real(8), optional, target, intent(in) :: s6 +real(8), optional, target, intent(in) :: s8 +real(8), optional, target, intent(in) :: s9 +real(8), optional, target, intent(in) :: a1 +real(8), optional, target, intent(in) :: a2 +real(8), optional, target, intent(in) :: alp +integer, optional, target, intent(out) :: error_code +! +type(C_PTR) :: handler_ptr +type(C_PTR) :: method_ptr +character(C_CHAR), target, allocatable :: method_c_type(:) +type(C_PTR) :: damping_ptr +character(C_CHAR), target, allocatable :: damping_c_type(:) +type(C_PTR) :: atm_ptr +logical(C_BOOL), target :: atm_c_type +type(C_PTR) :: damping_term_ptr +character(C_CHAR), target, allocatable :: damping_term_c_type(:) +type(C_PTR) :: s6_ptr +type(C_PTR) :: s8_ptr +type(C_PTR) :: s9_ptr +type(C_PTR) :: a1_ptr +type(C_PTR) :: a2_ptr +type(C_PTR) :: alp_ptr +type(C_PTR) :: error_code_ptr +! +interface +subroutine sirius_set_dftd4_correction_aux(handler,method,damping,atm,damping_term,& +&s6,s8,s9,a1,a2,alp,error_code)& +&bind(C, name="sirius_set_dftd4_correction") +use, intrinsic :: ISO_C_BINDING +type(C_PTR), value :: handler +type(C_PTR), value :: method +type(C_PTR), value :: damping +type(C_PTR), value :: atm +type(C_PTR), value :: damping_term +type(C_PTR), value :: s6 +type(C_PTR), value :: s8 +type(C_PTR), value :: s9 +type(C_PTR), value :: a1 +type(C_PTR), value :: a2 +type(C_PTR), value :: alp +type(C_PTR), value :: error_code +end subroutine +end interface +! +handler_ptr = C_NULL_PTR +handler_ptr = C_LOC(handler%handler_ptr_) +method_ptr = C_NULL_PTR +allocate(method_c_type(len(method)+1)) +method_c_type = string_f2c(method) +method_ptr = C_LOC(method_c_type) +damping_ptr = C_NULL_PTR +if (present(damping)) then +allocate(damping_c_type(len(damping)+1)) +damping_c_type = string_f2c(damping) +damping_ptr = C_LOC(damping_c_type) +endif +atm_ptr = C_NULL_PTR +if (present(atm)) then +atm_c_type = atm +atm_ptr = C_LOC(atm_c_type) +endif +damping_term_ptr = C_NULL_PTR +if (present(damping_term)) then +allocate(damping_term_c_type(len(damping_term)+1)) +damping_term_c_type = string_f2c(damping_term) +damping_term_ptr = C_LOC(damping_term_c_type) +endif +s6_ptr = C_NULL_PTR +if (present(s6)) then +s6_ptr = C_LOC(s6) +endif +s8_ptr = C_NULL_PTR +if (present(s8)) then +s8_ptr = C_LOC(s8) +endif +s9_ptr = C_NULL_PTR +if (present(s9)) then +s9_ptr = C_LOC(s9) +endif +a1_ptr = C_NULL_PTR +if (present(a1)) then +a1_ptr = C_LOC(a1) +endif +a2_ptr = C_NULL_PTR +if (present(a2)) then +a2_ptr = C_LOC(a2) +endif +alp_ptr = C_NULL_PTR +if (present(alp)) then +alp_ptr = C_LOC(alp) +endif +error_code_ptr = C_NULL_PTR +if (present(error_code)) then +error_code_ptr = C_LOC(error_code) +endif +call sirius_set_dftd4_correction_aux(handler_ptr,method_ptr,damping_ptr,atm_ptr,& +&damping_term_ptr,s6_ptr,s8_ptr,s9_ptr,a1_ptr,a2_ptr,alp_ptr,error_code_ptr) +deallocate(method_c_type) +if (present(damping)) then +deallocate(damping_c_type) +endif +if (present(atm)) then +endif +if (present(damping_term)) then +deallocate(damping_term_c_type) +endif +end subroutine sirius_set_dftd4_correction + subroutine sirius_free_handler_ctx(handler, error_code) implicit none diff --git a/src/api/sirius_api.cpp b/src/api/sirius_api.cpp index 676b77a827..a651815e4d 100644 --- a/src/api/sirius_api.cpp +++ b/src/api/sirius_api.cpp @@ -2853,7 +2853,11 @@ sirius_generate_effective_potential(void* const* gs_handler__, int* error_code__ paw_only: type: bool attr: in, optional - doc: it true, only local PAW density is generated + doc: if true, only local PAW density is generated + efermi: + type: bool + attr: in, optional + doc: if true, Fermi energy level is also computed error_code: type: int attr: out, optional @@ -2862,7 +2866,7 @@ sirius_generate_effective_potential(void* const* gs_handler__, int* error_code__ */ void sirius_generate_density(void* const* gs_handler__, bool const* add_core__, bool const* transform_to_rg__, - bool const* paw_only__, int* error_code__) + bool const* paw_only__, bool const* efermi__, int* error_code__) { call_sirius( [&]() { @@ -2870,6 +2874,11 @@ sirius_generate_density(void* const* gs_handler__, bool const* add_core__, bool auto add_core = get_value(add_core__, false); auto transform_to_rg = get_value(transform_to_rg__, false); auto paw_only = get_value(paw_only__, false); + auto efermi = get_value(efermi__, false); + + if (efermi) { + gs.k_point_set().find_band_occupancies(); + } if (paw_only) { gs.density().generate_paw_density(); diff --git a/src/density/density.cpp b/src/density/density.cpp index c51b5631b1..d1345c7e63 100644 --- a/src/density/density.cpp +++ b/src/density/density.cpp @@ -1930,7 +1930,8 @@ void Density::mixer_init(config_t::mixer_t const& mixer_cfg__) { auto func_prop = mixer::periodic_function_property(); - auto func_prop1 = mixer::periodic_function_property_modified(true); + auto func_prop1 = mixer::periodic_function_property_rho_pw(true); + auto func_prop2 = mixer::periodic_function_property_mag_pw(true); auto density_prop = mixer::density_function_property(); auto paw_prop = mixer::paw_density_function_property(); auto hubbard_prop = mixer::hubbard_matrix_function_property(); @@ -1962,11 +1963,11 @@ Density::mixer_init(config_t::mixer_t const& mixer_cfg__) this->mixer_->initialize_function<0>(func_prop, component(0), ctx_); } if (ctx_.num_mag_dims() > 0) { - this->mixer_->initialize_function<1>(func_prop, component(1), ctx_); + this->mixer_->initialize_function<1>(func_prop2, component(1), ctx_); } if (ctx_.num_mag_dims() > 1) { - this->mixer_->initialize_function<2>(func_prop, component(2), ctx_); - this->mixer_->initialize_function<3>(func_prop, component(3), ctx_); + this->mixer_->initialize_function<2>(func_prop2, component(2), ctx_); + this->mixer_->initialize_function<3>(func_prop2, component(3), ctx_); } } diff --git a/src/dft/dft_ground_state.cpp b/src/dft/dft_ground_state.cpp index f2886825f5..c98fcf9d4c 100644 --- a/src/dft/dft_ground_state.cpp +++ b/src/dft/dft_ground_state.cpp @@ -27,7 +27,6 @@ DFT_ground_state::initial_state() PROFILE("sirius::DFT_ground_state::initial_state"); density_.initial_density(); - density_.print_info(ctx_.out(1)); potential_.generate(density_, ctx_.use_symmetry(), true); if (!ctx_.full_potential()) { if (ctx_.cfg().parameters().precision_wf() == "fp32") { @@ -37,7 +36,6 @@ DFT_ground_state::initial_state() #else RTE_THROW("not compiled with FP32 support"); #endif - } else { Hamiltonian0 H0(potential_, true); initialize_subspace(kset_, H0); @@ -197,13 +195,15 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so Density rho1(ctx_); std::stringstream s; - s << "density_tol : " << density_tol__ << std::endl - << "energy_tol : " << energy_tol__ << std::endl + s << "density tolerance : " << density_tol__ << std::endl + << "total energy tolerance : " << energy_tol__ << std::endl << "iter_solver_tol (initial) : " << iter_solver_tol__ << std::endl << "iter_solver_tol (target) : " << ctx_.cfg().iterative_solver().min_tolerance() << std::endl - << "num_dft_iter : " << num_dft_iter__; + << "num_dft_iter : " << num_dft_iter__ << std::endl; RTE_OUT(ctx_.out(1)) << s.str(); + density_.print_info(ctx_.out(1)); + for (int iter = 0; iter < num_dft_iter__; iter++) { PROFILE("sirius::DFT_ground_state::scf_loop|iteration"); std::stringstream s; @@ -216,7 +216,7 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so diagonalize_result_t result; - double ne_diff = 0; + double ne_diff{0}; if (ctx_.cfg().parameters().precision_wf() == "fp32") { #if defined(SIRIUS_USE_FP32) Hamiltonian0 H0(potential_, true); @@ -255,14 +255,25 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so /* mix density */ rms = density_.mix(); - double eha_res = density_residual_hartree_energy(density_, rho1); - - /* estimate new tolerance of the iterative solver */ - double tol = rms; - if (ctx_.cfg().mixer().use_hartree()) { - // tol = rms * rms / std::max(1.0, unit_cell_.num_electrons()); - tol = eha_res / std::max(1.0, unit_cell_.num_electrons()); + /* we need to estimate new tolerance of the iterative solver; + * several cases need to be handeled: + * - full-potential + * - pseudo-potential + * - use Hartree energy of residuals + * - use inner product of residuals */ + double tol{0}; + if (ctx_.full_potential()) { + /* this will be Sqrt( ) i.e. root mean square error */ + tol = rms; + } else { + if (ctx_.cfg().mixer().use_hartree()) { + tol = rms * rms / std::max(1.0, unit_cell_.num_electrons()); + } else { + /* same as full-potential case */ + tol = rms; + } } + tol = std::min(ctx_.cfg().iterative_solver().tolerance_scale()[0] * tol, ctx_.cfg().iterative_solver().tolerance_scale()[1] * iter_solver_tol__); /* tolerance can't be too small */ @@ -344,8 +355,8 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so out << "iteration : " << iter << ", RMS : " << std::setprecision(12) << std::scientific << rms << ", energy difference : " << std::setprecision(12) << std::scientific << etot - eold << std::endl; if (!ctx_.full_potential()) { - out << "Hartree energy of density residual : " << eha_res << std::endl - << "bands are converged : " << boolstr(result.converged) << std::endl; + out //<< "Hartree energy of density residual : " << eha_res << std::endl + << "bands are converged : " << boolstr(result.converged) << std::endl; } if (ctx_.cfg().iterative_solver().type() != "exact") { out << std::endl << "iterative solver converged : " << boolstr(iter_solver_converged) << std::endl; @@ -353,14 +364,8 @@ DFT_ground_state::find(double density_tol__, double energy_tol__, double iter_so RTE_OUT(ctx_.out(1)) << out.str(); /* check if the calculation has converged */ - bool converged{true}; - // converged = (std::abs(eold - etot) < energy_tol__) && result.converged && iter_solver_converged; - converged = (std::abs(eold - etot) < energy_tol__) && iter_solver_converged; - if (ctx_.cfg().mixer().use_hartree()) { - converged = converged && (eha_res < density_tol__); - } else { - converged = converged && (rms < density_tol__); - } + bool converged = (std::abs(eold - etot) < energy_tol__) && (rms < density_tol__) && iter_solver_converged; + if (converged) { if (std::abs(ne_diff) > 1e-10) { std::stringstream ss; diff --git a/src/function3d/periodic_function.hpp b/src/function3d/periodic_function.hpp index f6e5c39e75..ddadcaa33c 100644 --- a/src/function3d/periodic_function.hpp +++ b/src/function3d/periodic_function.hpp @@ -328,6 +328,56 @@ copy(periodic_function_ptr_t const src__, Periodic_function& dest__) } } +template +inline void +copy(Periodic_function const& src__, Periodic_function& dest__) +{ + copy(src__.rg(), dest__.rg()); + if (src__.ctx().full_potential()) { + copy(src__.mt(), dest__.mt()); + } +} + +template +inline void +axpy(T alpha__, Periodic_function const& x__, Periodic_function& y__) +{ + axpy(alpha__, x__.rg(), y__.rg()); + if (x__.ctx().full_potential()) { + axpy(alpha__, x__.mt(), y__.mt()); + } +} + +template +inline void +rotate(T c__, T s__, Periodic_function& x__, Periodic_function& y__) +{ + #pragma omp parallel + { + #pragma omp for schedule(static) nowait + for (std::size_t i = 0; i < x__.rg().values().size(); ++i) { + auto xi = x__.rg().value(i); + auto yi = y__.rg().value(i); + x__.rg().value(i) = xi * c__ + yi * s__; + y__.rg().value(i) = xi * (-s__) + yi * c__; + } + if (x__.ctx().full_potential()) { + for (auto it : x__.ctx().unit_cell().spl_num_atoms()) { + int ia = it.i; + auto& x_f_mt = x__.mt()[ia]; + auto& y_f_mt = y__.mt()[ia]; + #pragma omp for schedule(static) nowait + for (int i = 0; i < static_cast(x__.mt()[ia].size()); i++) { + auto xi = x_f_mt[i]; + auto yi = y_f_mt[i]; + x_f_mt[i] = xi * c__ + yi * s__; + y_f_mt[i] = xi * (-s__) + yi * c__; + } + } + } + } +} + } // namespace sirius #endif // __PERIODIC_FUNCTION_HPP__ diff --git a/src/mixer/anderson_mixer.hpp b/src/mixer/anderson_mixer.hpp index 882bbc6344..198ee41099 100644 --- a/src/mixer/anderson_mixer.hpp +++ b/src/mixer/anderson_mixer.hpp @@ -82,8 +82,6 @@ class Anderson : public Mixer const auto history_size = static_cast(this->history_size_); - const bool normalize = false; - // beta scaling if (this->step_ > this->max_history_) { const double rmse_avg = std::accumulate(this->rmse_history_.begin(), this->rmse_history_.end(), 0.0) / @@ -116,20 +114,21 @@ class Anderson : public Mixer for (int i = 0; i <= history_size - 1; ++i) { auto j = this->idx_hist(this->step_ - i - 1); this->S_(history_size - 1, history_size - i - 1) = this->S_(history_size - i - 1, history_size - 1) = - this->template inner_product(this->residual_history_[j], - this->residual_history_[idx_prev_step]); + this->inner_product(this->residual_history_[j], this->residual_history_[idx_prev_step]); } // Make a copy because factorizing destroys the matrix. - for (int i = 0; i < history_size; ++i) - for (int j = 0; j < history_size; ++j) + for (int i = 0; i < history_size; ++i) { + for (int j = 0; j < history_size; ++j) { this->S_factorized_(j, i) = this->S_(j, i); + } + } mdarray h({history_size}); for (int i = 1; i <= history_size; ++i) { - auto j = this->idx_hist(this->step_ - i); - h(history_size - i) = this->template inner_product(this->residual_history_[j], - this->residual_history_[idx_step]); + auto j = this->idx_hist(this->step_ - i); + h(history_size - i) = + this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]); } bool invertible = la::wrap(la::lib_t::lapack).sysolve(history_size, this->S_factorized_, h); diff --git a/src/mixer/anderson_stable_mixer.hpp b/src/mixer/anderson_stable_mixer.hpp index 9959ee8022..3139882370 100644 --- a/src/mixer/anderson_stable_mixer.hpp +++ b/src/mixer/anderson_stable_mixer.hpp @@ -74,8 +74,6 @@ class Anderson_stable : public Mixer const auto idx_next_step = this->idx_hist(this->step_ + 1); const auto idx_step_prev = this->idx_hist(this->step_ - 1); - const bool normalize = false; - const auto history_size = static_cast(this->history_size_); // TODO: beta scaling? @@ -102,8 +100,7 @@ class Anderson_stable : public Mixer // orthogonalize residual_history_[step-1] w.r.t. residual_history_[1:step-2] using modified Gram-Schmidt. for (int i = 1; i <= history_size - 1; ++i) { auto j = this->idx_hist(this->step_ - i - 1); - auto sz = this->template inner_product(this->residual_history_[j], - this->residual_history_[idx_step_prev]); + auto sz = this->inner_product(this->residual_history_[j], this->residual_history_[idx_step_prev]); this->R_(history_size - 1 - i, history_size - 1) = sz; this->axpy(-sz, this->residual_history_[j], this->residual_history_[idx_step_prev]); } @@ -111,15 +108,14 @@ class Anderson_stable : public Mixer // repeat orthogonalization.. seems really necessary. for (int i = 1; i <= history_size - 1; ++i) { auto j = this->idx_hist(this->step_ - i - 1); - auto sz = this->template inner_product(this->residual_history_[j], - this->residual_history_[idx_step_prev]); + auto sz = this->inner_product(this->residual_history_[j], this->residual_history_[idx_step_prev]); this->R_(history_size - 1 - i, history_size - 1) += sz; this->axpy(-sz, this->residual_history_[j], this->residual_history_[idx_step_prev]); } // normalize the new residual difference vec itself - auto nrm2 = this->template inner_product(this->residual_history_[idx_step_prev], - this->residual_history_[idx_step_prev]); + auto nrm2 = + this->inner_product(this->residual_history_[idx_step_prev], this->residual_history_[idx_step_prev]); if (nrm2 > 0) { auto sz = std::sqrt(nrm2); @@ -131,9 +127,9 @@ class Anderson_stable : public Mixer // Compute h = Q' * f_n mdarray h({history_size}); for (int i = 1; i <= history_size; ++i) { - auto j = this->idx_hist(this->step_ - i); - h(history_size - i) = this->template inner_product(this->residual_history_[j], - this->residual_history_[idx_step]); + auto j = this->idx_hist(this->step_ - i); + h(history_size - i) = + this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]); } // next compute k = R⁻¹ * h... just do that by hand for now, can dispatch to blas later. diff --git a/src/mixer/broyden2_mixer.hpp b/src/mixer/broyden2_mixer.hpp index 39bbbe4d18..829c99498e 100644 --- a/src/mixer/broyden2_mixer.hpp +++ b/src/mixer/broyden2_mixer.hpp @@ -109,12 +109,10 @@ class Broyden2 : public Mixer const auto n = static_cast(std::min(this->step_, this->max_history_ - 1)); - const bool normalize = false; - for (int i = 0; i <= n; ++i) { int j = this->idx_hist(this->step_ - i); - this->S_(n - i, n) = this->S_(n, n - i) = this->template inner_product( - this->residual_history_[j], this->residual_history_[idx_step]); + this->S_(n - i, n) = this->S_(n, n - i) = + this->inner_product(this->residual_history_[j], this->residual_history_[idx_step]); } // Expand (I - Δf₁Δf₁ᵀ/Δf₁ᵀΔf₁)...(I - Δfₙ₋₁Δfₙ₋₁ᵀ/Δfₙ₋₁ᵀΔfₙ₋₁)fₙ diff --git a/src/mixer/mixer.hpp b/src/mixer/mixer.hpp index ef740277d7..a4105f6806 100644 --- a/src/mixer/mixer.hpp +++ b/src/mixer/mixer.hpp @@ -31,8 +31,8 @@ namespace mixer { /// Describes operations on a function type used for mixing. /** The properties contain functions, which determine the behaviour of a given type during mixing. The inner product - * function result is used for calculating mixing parameters. If a function should not contribute to generation of - * mixing parameters, the inner product function should always return 0. + * function result is used for calculating mixing parameters. If a function should not contribute to generation of + * mixing parameters, the inner product function should always return 0. */ template struct FunctionProperties @@ -41,19 +41,18 @@ struct FunctionProperties /// /** - * \param [in] size_ Function, which returns a measure of size of the (global) function. - * \param [in] inner_ Function, which computes the (global) inner product. This determines the contribution - * to mixing parameters rmse. - * \param [in] scal_ Function, which scales the input (x = alpha * x). - * \param [in] copy_ Function, which copies from one object to the other (y = x). - * \param [in] axpy_ Function, which scales and adds one object to the other (y = alpha * x + y). + * \param [in] inner_ Function, which computes the (global) inner product. This determines the + * contribution to mixing parameters rmse. + * \param [in] scal_ Function, which scales the input (x = alpha * x). + * \param [in] copy_ Function, which copies from one object to the other (y = x). + * \param [in] axpy_ Function, which scales and adds one object to the other (y = alpha * x + y). + * \param [in] rotate_ Function that computes two new linear compibations out of x,y */ - FunctionProperties(std::function size_, std::function inner_, - std::function scal_, std::function copy_, + FunctionProperties(std::function inner_, std::function scal_, + std::function copy_, std::function axpy_, std::function rotate_) - : size(size_) - , inner(inner_) + : inner(inner_) , scal(scal_) , copy(copy_) , axpy(axpy_) @@ -62,8 +61,7 @@ struct FunctionProperties } FunctionProperties() - : size([](const FUNC&) -> double { return 0; }) - , inner([](const FUNC&, const FUNC&) -> double { return 0.0; }) + : inner([](const FUNC&, const FUNC&) -> double { return 0.0; }) , scal([](double, FUNC&) -> void {}) , copy([](const FUNC&, FUNC&) -> void {}) , axpy([](double, const FUNC&, FUNC&) -> void {}) @@ -71,9 +69,6 @@ struct FunctionProperties { } - // Size proportional to the local contribution of the inner product. - std::function size; // TODO: this sounds more like a normalization factor. - // Inner product function. Determines contribution to mixing. std::function inner; @@ -95,7 +90,7 @@ namespace mixer_impl { /// Compute inner product between pairs of functions in tuples and accumulate in the result. /** This function is used in Broyden mixers to compute inner products of residuals. */ -template +template struct InnerProduct { static double @@ -107,28 +102,14 @@ struct InnerProduct /* compute inner product */ auto v = std::get(function_prop) .inner(*std::get(x), *std::get(y)); - /* normalize if necessary */ - if (normalize) { - auto sx = std::get(function_prop).size(*std::get(x)); - auto sy = std::get(function_prop).size(*std::get(y)); - if (sx != sy) { - throw std::runtime_error("[sirius::mixer::InnerProduct] sizes of two functions don't match"); - } - if (sx) { - v /= sx; - } else { - v = 0; - } - } - result += v; } - return result + InnerProduct::apply(function_prop, x, y); + return result + InnerProduct::apply(function_prop, x, y); } }; -template -struct InnerProduct<0, normalize, FUNCS...> +template +struct InnerProduct<0, FUNCS...> { static double apply(const std::tuple...>& function_prop, const std::tuple...>& x, @@ -136,18 +117,6 @@ struct InnerProduct<0, normalize, FUNCS...> { if (std::get<0>(x) && std::get<0>(y)) { auto v = std::get<0>(function_prop).inner(*std::get<0>(x), *std::get<0>(y)); - if (normalize) { - auto sx = std::get<0>(function_prop).size(*std::get<0>(x)); - auto sy = std::get<0>(function_prop).size(*std::get<0>(y)); - if (sx != sy) { - throw std::runtime_error("[sirius::mixer::InnerProduct] sizes of two functions don't match"); - } - if (sx) { - v /= sx; - } else { - v = 0; - } - } return v; } else { return 0; @@ -361,8 +330,9 @@ class Mixer std::get(functions_).copy(*std::get(output_history_[idx]), output); } - /// Mix input and stored history. Returns the root mean square error computed by inner products of residuals. - /** \param [in] rms_min Minimum root mean square error. Mixing is only performed, if current RMS is above this + /// Mix input and stored history. + /** Returns the root mean square error computed by inner products of residuals. + * \param [in] rms_min Minimum root mean square error. Mixing is only performed, if current RMS is above this * threshold. */ double @@ -371,14 +341,12 @@ class Mixer this->update_residual(); this->update_rms(); double rmse = rmse_history_[idx_hist(step_)]; - if (rmse < rms_min__) { - return rmse; + if (rmse > rms_min__) { + /* call mixing implementation */ + this->mix_impl(); + step_++; } - /* call mixing implementation */ - this->mix_impl(); - - ++step_; return rmse; } @@ -402,7 +370,7 @@ class Mixer const auto idx = idx_hist(step_); /* compute sum of inner products; each inner product is normalized */ - double rmse = inner_product(residual_history_[idx], residual_history_[idx]); + double rmse = inner_product(residual_history_[idx], residual_history_[idx]); /* for very close vectors inner product of residuals can become negative due to the lapw step function in the interstitial (it has some small negative values sometimes) */ rmse = std::max(0.0, rmse); @@ -417,11 +385,10 @@ class Mixer return step % max_history_; } - template double inner_product(const std::tuple...>& x, const std::tuple...>& y) { - return mixer_impl::InnerProduct::apply(functions_, x, y); + return mixer_impl::InnerProduct::apply(functions_, x, y); } void diff --git a/src/mixer/mixer_functions.cpp b/src/mixer/mixer_functions.cpp index f65292481b..bc207b3bdd 100644 --- a/src/mixer/mixer_functions.cpp +++ b/src/mixer/mixer_functions.cpp @@ -22,76 +22,35 @@ namespace mixer { FunctionProperties> periodic_function_property() { - auto global_size_func = [](const Periodic_function& x) -> double { return x.ctx().unit_cell().omega(); }; - auto inner_prod_func = [](const Periodic_function& x, const Periodic_function& y) -> double { - return sirius::inner(x, y); + return inner(x, y); }; - auto scal_function = [](double alpha, Periodic_function& x) -> void { - scale(alpha, x.rg()); - if (x.ctx().full_potential()) { - scale(alpha, x.mt()); - } - }; + auto scal_function = [](double alpha, Periodic_function& x) -> void { x *= alpha; }; - auto copy_function = [](const Periodic_function& x, Periodic_function& y) -> void { - copy(x.rg(), y.rg()); - if (x.ctx().full_potential()) { - copy(x.mt(), y.mt()); - } - }; + auto copy_function = [](Periodic_function const& x, Periodic_function& y) -> void { copy(x, y); }; auto axpy_function = [](double alpha, const Periodic_function& x, Periodic_function& y) -> void { - axpy(alpha, x.rg(), y.rg()); - if (x.ctx().full_potential()) { - axpy(alpha, x.mt(), y.mt()); - } + axpy(alpha, x, y); }; auto rotate_function = [](double c, double s, Periodic_function& x, Periodic_function& y) -> void { - #pragma omp parallel - { - #pragma omp for schedule(static) nowait - for (std::size_t i = 0; i < x.rg().values().size(); ++i) { - auto xi = x.rg().value(i); - auto yi = y.rg().value(i); - x.rg().value(i) = xi * c + yi * s; - y.rg().value(i) = xi * -s + yi * c; - } - if (x.ctx().full_potential()) { - for (auto it : x.ctx().unit_cell().spl_num_atoms()) { - int ia = it.i; - auto& x_f_mt = x.mt()[ia]; - auto& y_f_mt = y.mt()[ia]; - #pragma omp for schedule(static) nowait - for (int i = 0; i < static_cast(x.mt()[ia].size()); i++) { - auto xi = x_f_mt[i]; - auto yi = y_f_mt[i]; - x_f_mt[i] = xi * c + yi * s; - y_f_mt[i] = xi * -s + yi * c; - } - } - } - } + rotate(c, s, x, y); }; - return FunctionProperties>(global_size_func, inner_prod_func, scal_function, - copy_function, axpy_function, rotate_function); + return FunctionProperties>(inner_prod_func, scal_function, copy_function, axpy_function, + rotate_function); } /// Only for the PP-PW case. FunctionProperties> -periodic_function_property_modified(bool use_coarse_gvec__) +periodic_function_property_rho_pw(bool use_coarse_gvec__) { - auto global_size_func = [](Periodic_function const& x) -> double { - return 1.0 / x.ctx().unit_cell().omega(); - }; - auto inner_prod_func = [use_coarse_gvec__](Periodic_function const& x, Periodic_function const& y) -> double { double result{0}; if (use_coarse_gvec__) { + #pragma omp parallel for reduction(+:result) for (int igloc = x.ctx().gvec_coarse().skip_g0(); igloc < x.ctx().gvec_coarse().count(); igloc++) { /* local index in fine G-vector list */ int ig1 = x.ctx().gvec().gvec_base_mapping(igloc); @@ -100,6 +59,7 @@ periodic_function_property_modified(bool use_coarse_gvec__) std::pow(x.ctx().gvec().gvec_len(gvec_index_t::local(ig1)), 2); } } else { + #pragma omp parallel for reduction(+:result) for (int igloc = x.ctx().gvec().skip_g0(); igloc < x.ctx().gvec().count(); igloc++) { result += std::real(std::conj(x.rg().f_pw_local(igloc)) * y.rg().f_pw_local(igloc)) / std::pow(x.ctx().gvec().gvec_len(gvec_index_t::local(igloc)), 2); @@ -108,46 +68,75 @@ periodic_function_property_modified(bool use_coarse_gvec__) if (x.ctx().gvec().reduced()) { result *= 2; } - result *= fourpi; + result *= (twopi * x.ctx().unit_cell().omega()); x.ctx().comm().allreduce(&result, 1); return result; }; - auto scal_function = [](double alpha, Periodic_function& x) -> void { scale(alpha, x.rg()); }; + auto scal_function = [](double alpha, Periodic_function& x) -> void { x *= alpha; }; - auto copy_function = [](Periodic_function const& x, Periodic_function& y) -> void { - copy(x.rg(), y.rg()); - }; + auto copy_function = [](Periodic_function const& x, Periodic_function& y) -> void { copy(x, y); }; auto axpy_function = [](double alpha, const Periodic_function& x, Periodic_function& y) -> void { - axpy(alpha, x.rg(), y.rg()); + axpy(alpha, x, y); }; auto rotate_function = [](double c, double s, Periodic_function& x, Periodic_function& y) -> void { - #pragma omp parallel for schedule(static) - for (std::size_t i = 0; i < x.rg().values().size(); ++i) { - auto xi = x.rg().value(i); - auto yi = y.rg().value(i); - x.rg().value(i) = xi * c + yi * s; - y.rg().value(i) = xi * -s + yi * c; - } + rotate(c, s, x, y); }; - return FunctionProperties>(global_size_func, inner_prod_func, scal_function, - copy_function, axpy_function, rotate_function); + return FunctionProperties>(inner_prod_func, scal_function, copy_function, axpy_function, + rotate_function); } -FunctionProperties -density_function_property() +/// Only for the PP-PW case. +FunctionProperties> +periodic_function_property_mag_pw(bool use_coarse_gvec__) { - auto global_size_func = [](density_matrix_t const& x) -> double { - size_t result{0}; - for (auto& e : x) { - result += e.size(); + auto inner_prod_func = [use_coarse_gvec__](Periodic_function const& x, + Periodic_function const& y) -> double { + double result{0}; + if (use_coarse_gvec__) { + #pragma omp parallel for reduction(+:result) + for (int igloc = x.ctx().gvec_coarse().skip_g0(); igloc < x.ctx().gvec_coarse().count(); igloc++) { + /* local index in fine G-vector list */ + int ig1 = x.ctx().gvec().gvec_base_mapping(igloc); + + result += std::real(std::conj(x.rg().f_pw_local(ig1)) * y.rg().f_pw_local(ig1)); + } + } else { + #pragma omp parallel for reduction(+:result) + for (int igloc = x.ctx().gvec().skip_g0(); igloc < x.ctx().gvec().count(); igloc++) { + result += std::real(std::conj(x.rg().f_pw_local(igloc)) * y.rg().f_pw_local(igloc)); + } } + if (x.ctx().gvec().reduced()) { + result *= 2; + } + result *= (0.5 * x.ctx().unit_cell().omega() / pi); + x.ctx().comm().allreduce(&result, 1); return result; }; + auto scal_function = [](double alpha, Periodic_function& x) -> void { x *= alpha; }; + + auto copy_function = [](Periodic_function const& x, Periodic_function& y) -> void { copy(x, y); }; + + auto axpy_function = [](double alpha, const Periodic_function& x, Periodic_function& y) -> void { + axpy(alpha, x, y); + }; + + auto rotate_function = [](double c, double s, Periodic_function& x, Periodic_function& y) -> void { + rotate(c, s, x, y); + }; + + return FunctionProperties>(inner_prod_func, scal_function, copy_function, axpy_function, + rotate_function); +} + +FunctionProperties +density_function_property() +{ auto inner_prod_func = [](density_matrix_t const& x, density_matrix_t const& y) -> double { // do not contribute to mixing return 0.0; @@ -189,15 +178,13 @@ density_function_property() } }; - return FunctionProperties(global_size_func, inner_prod_func, scal_function, copy_function, - axpy_function, rotate_function); + return FunctionProperties(inner_prod_func, scal_function, copy_function, axpy_function, + rotate_function); } FunctionProperties> paw_density_function_property() { - auto global_size_func = [](PAW_density const& x) -> double { return x.unit_cell().num_paw_atoms(); }; - auto inner_prod_func = [](PAW_density const& x, PAW_density const& y) -> double { return inner(x, y); }; @@ -245,15 +232,13 @@ paw_density_function_property() } }; - return FunctionProperties>(global_size_func, inner_prod_func, scale_func, copy_function, - axpy_function, rotate_function); + return FunctionProperties>(inner_prod_func, scale_func, copy_function, axpy_function, + rotate_function); } FunctionProperties hubbard_matrix_function_property() { - auto global_size_func = [](Hubbard_matrix const& x) -> double { return 1.0; }; - auto inner_prod_func = [](Hubbard_matrix const& x, Hubbard_matrix const& y) -> double { /* do not contribute to mixing */ return 0; @@ -281,6 +266,7 @@ hubbard_matrix_function_property() } }; + // TODO: check with Mathieu which copy function is the one; replace auto copy_func = [](Hubbard_matrix const& x, Hubbard_matrix& y) -> void { for (size_t at_lvl = 0; at_lvl < x.local().size(); at_lvl++) { copy(x.local(at_lvl), y.local(at_lvl)); @@ -350,8 +336,7 @@ hubbard_matrix_function_property() } }; - return FunctionProperties(global_size_func, inner_prod_func, scale_func, copy_func, axpy_func, - rotate_func); + return FunctionProperties(inner_prod_func, scale_func, copy_func, axpy_func, rotate_func); } } // namespace mixer diff --git a/src/mixer/mixer_functions.hpp b/src/mixer/mixer_functions.hpp index 03acb7d636..5ed5b3eee4 100644 --- a/src/mixer/mixer_functions.hpp +++ b/src/mixer/mixer_functions.hpp @@ -29,7 +29,10 @@ FunctionProperties> periodic_function_property(); FunctionProperties> -periodic_function_property_modified(bool use_coarse_gvec__); +periodic_function_property_rho_pw(bool use_coarse_gvec__); + +FunctionProperties> +periodic_function_property_mag_pw(bool use_coarse_gvec__); FunctionProperties density_function_property(); diff --git a/src/potential/poisson.cpp b/src/potential/poisson.cpp index a0b10cb535..8d080424cb 100644 --- a/src/potential/poisson.cpp +++ b/src/potential/poisson.cpp @@ -17,25 +17,6 @@ namespace sirius { -double -density_residual_hartree_energy(Density const& rho1__, Density const& rho2__) -{ - double eh{0}; - auto const& gv = rho1__.ctx().gvec(); - #pragma omp parallel for reduction(+:eh) - for (int igloc = gv.skip_g0(); igloc < gv.count(); igloc++) { - auto z = rho1__.component(0).rg().f_pw_local(igloc) - rho2__.component(0).rg().f_pw_local(igloc); - double g = gv.gvec_len(gvec_index_t::local(igloc)); - eh += (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / std::pow(g, 2); - } - gv.comm().allreduce(&eh, 1); - eh *= twopi * rho1__.ctx().unit_cell().omega(); - if (gv.reduced()) { - eh *= 2; - } - return eh; -} - void Potential::poisson_add_pseudo_pw(mdarray, 2>& qmt__, mdarray, 2>& qit__, std::complex* rho_pw__) diff --git a/src/potential/potential.hpp b/src/potential/potential.hpp index ffa8f5973f..e25617bed1 100644 --- a/src/potential/potential.hpp +++ b/src/potential/potential.hpp @@ -31,9 +31,6 @@ double xc_mt(Radial_grid const& rgrid__, SHT const& sht__, std::vector const& xc_func__, int num_mag_dims__, std::vector rho__, std::vector vxc__, Flm* exc__, bool use_lalp__); -double -density_residual_hartree_energy(Density const& rho1__, Density const& rho2__); - /// Generate effective potential from charge density and magnetization. /** \note At some point we need to update the atomic potential with the new MT potential. This is simple if the effective potential is a global function. Otherwise we need to pass the effective potential between MPI ranks. diff --git a/src/sirius.hpp b/src/sirius.hpp index 0e2597b5a2..46b8bee621 100644 --- a/src/sirius.hpp +++ b/src/sirius.hpp @@ -70,6 +70,15 @@ energy_acc() } #endif +struct null_buffer : std::streambuf +{ + int + overflow(int c) override + { + return c; + } +}; + /// Initialize the library. inline void initialize(bool call_mpi_init__ = true) @@ -84,6 +93,7 @@ initialize(bool call_mpi_init__ = true) energy_acc() = -power::device_energy(); #endif if (call_mpi_init__) { + PROFILE("sirius::initialize::mpi"); mpi::Communicator::initialize(MPI_THREAD_MULTIPLE); } #if defined(__APEX) @@ -97,9 +107,17 @@ initialize(bool call_mpi_init__ = true) std::printf("# Warning! Compiled in 'debug' mode with assert statements enabled!\n"); #endif } + + // uncomment this if you want to supress std::cout from all MPI ranks except from rank=0 + //static null_buffer null; + //if (mpi::Communicator::world().rank() != 0) { + // std::cout.rdbuf(&null); + //} + /* get number of ranks per node during the global call to sirius::initialize() */ mpi::num_ranks_per_node(); if (acc::num_devices() > 0) { + PROFILE("sirius::initialize::acc"); int devid = mpi::get_device_id(acc::num_devices()); acc::set_device_id(devid); /* create extensive amount of streams */ diff --git a/verification/test06/sirius.json b/verification/test06/sirius.json index 8e3643b3f5..63955b62b6 100644 --- a/verification/test06/sirius.json +++ b/verification/test06/sirius.json @@ -61,7 +61,7 @@ "mixer" : { "beta" : 0.5, - "type" : "broyden2", + "!type" : "broyden2", "max_history" : 6, "use_hartree" : true } diff --git a/verification/test07/sirius.json b/verification/test07/sirius.json index df2c48fa1a..79ec08cb5f 100644 --- a/verification/test07/sirius.json +++ b/verification/test07/sirius.json @@ -48,7 +48,7 @@ "atom_types": ["Ni"], "atoms": { "Ni": [ - [0.0,0.0,0.0, 0,0,1] + [0.0,0.0,0.0, 0,0,2] ] }, "lattice_vectors": [ diff --git a/verification/test09/output_ref.json b/verification/test09/output_ref.json index 69bfe4ebdc..e1a7ac7728 100644 --- a/verification/test09/output_ref.json +++ b/verification/test09/output_ref.json @@ -27,6 +27,41 @@ "verbosity": 2, "verification": 0 }, + "dftd3": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "beta": 0.0, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, + "dftd4": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "bet": 0.0, + "enable": false, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, "hubbard": { "constrained_calculation": false, "constraint_beta_mixing": 0.4, @@ -63,12 +98,12 @@ }, "locked": true, "mixer": { - "beta": 0.15, + "beta": 0.5, "beta0": 0.15, "beta_scaling_factor": 1.0, "max_history": 8, "rms_min": 1e-16, - "type": "broyden2", + "type": "anderson", "use_hartree": true }, "nlcg": { @@ -84,10 +119,13 @@ "auto_rmt": 1, "aw_cutoff": 0.0, "core_relativity": "dirac", - "density_tol": 1e-06, + "density_tol": 1e-08, + "dftd3_correction": false, + "dftd4_correction": false, "electronic_structure_method": "pseudopotential", "energy_tol": 1e-08, "extra_charge": 0, + "fixed_mag": 0, "gamma_point": false, "gk_cutoff": 7.0, "hubbard_correction": false, @@ -98,7 +136,7 @@ "ngridk": [4,4,4], "nn_radius": -1, "num_bands": 38, - "num_dft_iter": 100, + "num_dft_iter": 200, "num_fv_states": -1, "num_mag_dims": 3, "precision_gs": "auto", @@ -129,9 +167,12 @@ "nprii_vloc": 200, "pseudo_grid_cutoff": 10.0, "radial_grid": "exponential, 1.0", + "real_occupation_matrix": false, "sht_coverage": 0, + "sht_lmax": -1, "simple_lapw_ri": false, "smooth_initial_mag": false, + "tol_ne": 0.01, "use_coarse_fft_grid": true, "xc_use_lapl": false }, @@ -143,7 +184,7 @@ "atom_types": ["Ni"], "atoms": { "Ni": [ - [0.0,0.0,0.0] + [0.0,0.0,0.0,0.0,0.0,1.0] ] }, "lattice_vectors": [ @@ -170,49 +211,51 @@ "omega": 73.39284359469754 }, "counters": { - "band_evp_work_count": 2403.7795597025856, - "local_operator_num_applied": 18142 + "band_evp_work_count": 2227.5290311998892, + "local_operator_num_applied": 17673 }, - "git_hash": "0c6b4637d99ced7eea1dc26901a0221db55b23ce", + "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef", "ground_state": { "band_gap": 0.0, "converged": true, - "efermi": 0.6557800664364786, + "efermi": 0.6557802366927981, "energy": { - "bxc": -0.010470745238370311, - "entropy_sum": -0.001494326745922828, - "eval_sum": -10.617886973338702, - "ewald": -111.75579558183638, - "exc": -17.820768180240353, - "free": -171.85432926525468, - "kin": 48.72292110523524, - "scf_correction": -2.909933141381771e-05, - "total": -171.85283493850875, - "veff": -59.330337333335564, - "vha": 107.96333691047042, - "vloc": -144.9808316375798, - "vxc": -22.312842606234938 + "bxc": -0.010472257736889786, + "dftd3": 0.0, + "dftd4": 0.0, + "entropy_sum": -0.0014943707877355806, + "eval_sum": -10.617894568599278, + "ewald": -111.75579557089988, + "exc": -17.820770016735647, + "free": -171.8543292484083, + "kin": 48.72289758879303, + "scf_correction": 5.010769612567856e-07, + "total": -171.85283487762058, + "veff": -59.330319899655414, + "vha": 107.96338385189264, + "vloc": -144.98085930580308, + "vxc": -22.3128444457467 }, - "etot_history": [-171.79325126286835,-171.43985660868356,-171.85716446407747,-171.84983295324238,-171.85359306173083,-171.853306751309,-171.85262600113094,-171.8528662490883,-171.85285290035625,-171.852841460192,-171.85283661663274,-171.8528327433734,-171.85283327208722,-171.85283398156966,-171.85283405031998,-171.85283459721808,-171.8528348035663,-171.85283494122925,-171.85283494798668,-171.8528349374169,-171.85283493850875], + "etot_history": [-165.20277332050176,-170.83658516476748,-171.8224473280885,-171.85001663166307,-171.84619892519495,-171.86466301547975,-171.84558720562066,-171.85296829907824,-171.85289979932065,-171.85289853706547,-171.85287553177082,-171.85284000799726,-171.85283173165755,-171.85283256036288,-171.8528351504585,-171.85283496553507,-171.85283502211846,-171.85283485626272,-171.8528348669397,-171.8528348796197,-171.85283487762058], "forces": [ [0.0,0.0,0.0] ], "magnetisation": { "atoms": [ - [5.074929065753288e-14,-9.79841625182196e-15,0.6234475185298044] + [-5.362074180946243e-07,-1.8314458428144874e-06,0.6234803505759127] ], - "total": [4.791847966935517e-14,-9.020174423279415e-15,0.5870547396141836] + "total": [-1.3202440873683128e-06,-4.509346828428379e-06,0.5870266623689381] }, "num_scf_iterations": 20, - "rho_min": 0.026503709235553462, - "rms_history": [5.5908151129756325,2.549149992947269,0.22818171188469558,0.2179292685482527,0.05645964319833182,0.06901006251982635,0.052879992473048,0.010634752636826776,0.009847401569749798,0.0076247346960664816,0.003005067003441252,0.000748979298254481,0.0010404354968139898,0.0005021327826783438,0.00042632751733515366,0.00034731835443184064,0.00015238198955741717,1.4777200578237409e-05,2.600429680867714e-05,1.937406820925168e-05,1.2057628361449534e-05], - "scf_time": 29.360402692, + "rho_min": 0.02650355735413884, + "rms_history": [3.9535032076700025,3.1267938172850664,0.4792130183683345,0.062202091896587076,0.08998363748581666,0.2127421065566379,0.26003995503031213,0.040928312431893925,0.030189843932950832,0.020049690641022865,0.01330784684576,0.0102757796350612,0.00039607591864286427,0.00029122906709065855,3.829700919976523e-05,1.777247914952794e-05,2.144960574241469e-05,1.845129106509047e-05,1.2240840608745643e-05,8.19901012290962e-06,6.723629067819086e-06], + "scf_time": 10.401314524, "stress": [ - [5.589717332338956e-05,3.5806012813214917e-19,-5.4210108489640164e-20], - [3.5806012813214917e-19,5.58971733233965e-05,-3.9871771000430245e-19], - [-5.4210108489640164e-20,-3.9871771000430245e-19,5.5897173323479765e-05] + [6.850334737883157e-05,3.590230782880158e-19,-6.324512672831888e-20], + [3.590230782880158e-19,6.850334737877606e-05,-4.132331869163532e-19], + [-6.324512672831888e-20,-4.132331869163532e-19,6.850334737946995e-05] ] }, "task": 0, - "threads_per_rank": 1 + "threads_per_rank": 4 } \ No newline at end of file diff --git a/verification/test09/sirius.json b/verification/test09/sirius.json index 6d66880bcd..4eb42c93f9 100644 --- a/verification/test09/sirius.json +++ b/verification/test09/sirius.json @@ -25,9 +25,7 @@ "pw_cutoff" : 20.00, "energy_tol" : 1e-8, - "density_tol" : 1e-6, - - "num_dft_iter" : 100, + "density_tol" : 1e-5, "ngridk" : [4,4,4] }, @@ -56,8 +54,7 @@ ] }, "mixer" : { - "beta" : 0.15, - "use_hartree" : true, - "type" : "broyden2" + "beta" : 0.5, + "use_hartree" : true } } diff --git a/verification/test25/output_ref.json b/verification/test25/output_ref.json index 74302a2aa7..a5bf6715dc 100644 --- a/verification/test25/output_ref.json +++ b/verification/test25/output_ref.json @@ -27,6 +27,41 @@ "verbosity": 2, "verification": 0 }, + "dftd3": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "beta": 0.0, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, + "dftd4": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "bet": 0.0, + "enable": false, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, "hubbard": { "constrained_calculation": false, "constraint_beta_mixing": 0.4, @@ -39,7 +74,7 @@ { "J": 0.0, "J0": 0.0, - "U": 8.0, + "U": 0.293995, "alpha": 0.0, "atom_type": "Ni", "beta": 0.0, @@ -62,11 +97,15 @@ "init_subspace": "lcao", "locking": true, "min_num_res": 0, + "min_occupancy": 1e-14, + "min_tolerance": 1e-13, "num_singular": -1, "num_steps": 20, "relative_tolerance": 0, "residual_tolerance": 1e-06, "subspace_size": 2, + "tolerance_ratio": 0, + "tolerance_scale": [0.1,0.5], "type": "davidson" }, "locked": true, @@ -74,10 +113,10 @@ "beta": 0.5, "beta0": 0.15, "beta_scaling_factor": 1.0, - "linear_mix_rms_tol": 1000000.0, "max_history": 8, + "rms_min": 1e-16, "type": "anderson", - "use_hartree": false + "use_hartree": true }, "nlcg": { "T": 300.0, @@ -93,9 +132,12 @@ "aw_cutoff": 0.0, "core_relativity": "dirac", "density_tol": 1e-06, + "dftd3_correction": false, + "dftd4_correction": false, "electronic_structure_method": "pseudopotential", "energy_tol": 1e-06, "extra_charge": 0, + "fixed_mag": 0, "gamma_point": false, "gk_cutoff": 8.0, "hubbard_correction": true, @@ -128,21 +170,22 @@ "xc_functionals": ["XC_GGA_X_PBE","XC_GGA_C_PBE"] }, "settings": { - "always_update_wf": true, "auto_enu_tol": 0, "fft_grid_size": [80,80,80], "fp32_to_fp64_rms": 0, - "itsol_tol_min": 1e-13, - "itsol_tol_ratio": 0, - "itsol_tol_scale": [0.1,0.5], - "min_occupancy": 1e-14, - "mixer_rms_min": 1e-16, "nprii_aug": 20, "nprii_beta": 20, "nprii_rho_core": 20, "nprii_vloc": 200, + "pseudo_grid_cutoff": 10.0, "radial_grid": "exponential, 1.0", + "real_occupation_matrix": false, "sht_coverage": 0, + "sht_lmax": -1, + "simple_lapw_ri": false, + "smooth_initial_mag": false, + "tol_ne": 0.01, + "use_coarse_fft_grid": true, "xc_use_lapl": false }, "unit_cell": { @@ -154,12 +197,12 @@ "atom_types": ["Ni","O"], "atoms": { "Ni": [ - [0.0,0.0,0.0], - [0.5,0.5,-0.5] + [0.0,0.0,0.0,0.0,0.0,2.0], + [0.5,0.5,-0.5,0.0,0.0,-2.0] ], "O": [ - [0.751,0.751,0.751], - [0.249,0.249,0.249] + [0.751,0.751,0.751,0.0,0.0,0.0], + [0.249,0.249,0.249,0.0,0.0,0.0] ] }, "lattice_vectors": [ @@ -186,50 +229,52 @@ "omega": 244.65193599999998 }, "counters": { - "band_evp_work_count": 6769.204355790763, - "local_operator_num_applied": 58771 + "band_evp_work_count": 5749.234098310588, + "local_operator_num_applied": 41278 }, - "git_hash": "a0d09bff305c7d162e6e43a792018b99bd33b276", + "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef", "ground_state": { - "band_gap": 0.11802688817837326, + "band_gap": 0.1180270031668248, "converged": true, - "efermi": 0.5433353639715012, + "efermi": 0.5433376914749234, "energy": { - "bxc": -0.21859449955146065, - "entropy_sum": -2.4255684305789004e-16, - "eval_sum": -26.803872906548193, - "ewald": -239.32629517316133, - "exc": -42.97446413308467, - "free": -375.4035050739258, - "kin": 115.09052586559307, - "scf_correction": 9.118537036556518e-09, - "total": -375.4035050739258, - "veff": -141.6758042725898, - "vha": 245.08050938458632, - "vloc": -333.063709062216, - "vxc": -53.692604594986385 + "bxc": -0.21859453099694978, + "dftd3": 0.0, + "dftd4": 0.0, + "entropy_sum": -2.431405094633902e-16, + "eval_sum": -26.803879480580203, + "ewald": -239.32629516891166, + "exc": -42.97446376920979, + "free": -375.40350472129916, + "kin": 115.090520477905, + "scf_correction": 8.823249686429335e-07, + "total": -375.40350472129916, + "veff": -141.67580542748826, + "vha": 245.08050350114928, + "vloc": -333.0637048237149, + "vxc": -53.69260410493752 }, - "etot_history": [-363.01882295940095,-372.6209083877894,-374.1974706358724,-375.21670416636,-375.22582404563684,-374.81068657545876,-375.3582464265443,-375.4112652109265,-375.415163391737,-375.40470669561245,-375.40217333107836,-375.4054673696777,-375.4036120700799,-375.4035418057176,-375.4035491118826,-375.40357354524747,-375.4034761581902,-375.40345661730737,-375.4034930484215,-375.40350645913406,-375.40350944571964,-375.40350524502264,-375.40350490168555,-375.40350508450126,-375.4035051163307,-375.40350508441566,-375.40350505428034,-375.4035050072223,-375.4035050216652,-375.40350511462066,-375.4035050875652,-375.403505078375,-375.4035050765659,-375.40350507567865,-375.4035050709074,-375.4035050737591,-375.4035050739258], + "etot_history": [-363.01540688727823,-372.3296125505617,-374.43690487677566,-375.1977781956407,-375.26689436890024,-374.7228352759363,-375.4557820825973,-375.40339120708927,-375.39866665166085,-375.4042784049744,-375.40367204105485,-375.40378287259864,-375.40349255195747,-375.4035069395142,-375.40348950139,-375.40350101628485,-375.4035143821575,-375.40350954721794,-375.40350473893943,-375.40350472129916], "forces": [ [0.0,0.0,0.0], [0.0,0.0,0.0], - [-0.0031472582173004897,-0.0031472582173004897,-0.003147258217300462], - [0.0031472582173004897,0.0031472582173005035,0.0031472582173004897] + [-0.003146859037271426,-0.0031468590372713983,-0.0031468590372713983], + [0.0031468590372713983,0.0031468590372713706,0.0031468590372713983] ], "magnetisation": { "atoms": [ - [0.0,0.0,1.7398688034258583], - [0.0,0.0,-1.7389451666005602], - [0.0,0.0,-0.00048123316531149205], - [0.0,0.0,0.0004301298158802179] + [0.0,0.0,1.739869127453484], + [0.0,0.0,-1.7389453650260294], + [0.0,0.0,-0.00048182087177273244], + [0.0,0.0,0.00042954309197469663] ], - "total": [0.0,0.0,-5.596852471215777e-10] + "total": [0.0,0.0,9.595913934979144e-10] }, - "num_scf_iterations": 36, - "rho_min": 0.01147548166842359, - "rms_history": [0.2608354247279235,0.295798049249155,0.08752557935096265,0.04860013029518923,0.0440468309977525,0.04217937173992232,0.01361634157104944,0.0049305478463928555,0.004068627173186378,0.0016711848358534152,0.0007486507956958945,0.00015731801929434578,5.7420419825204416e-05,2.9045469334817406e-05,1.9950335758343885e-05,1.3346691843031192e-05,5.905733263317464e-06,4.11159628058218e-06,1.0037347484528199e-06,5.579930436682524e-07,2.472257954889928e-07,8.509055565974707e-08,5.089124250374881e-08,3.515915050900785e-08,4.4925038286985674e-08,3.006162344315359e-08,3.203266782291662e-08,1.8071027542594174e-08,1.3006767057131359e-08,8.174921308712124e-09,5.847791142468431e-09,4.827483502782414e-09,4.305476349055417e-09,3.2184651156804377e-09,2.2807985504373038e-08,3.422207006171203e-09,3.810251678328596e-09], - "scf_time": 99.879586612 + "num_scf_iterations": 19, + "rho_min": 0.01147548110904405, + "rms_history": [5.033913018124934,5.416953171192131,1.4014083575162628,0.4519610777908385,0.319815887170792,0.3479823809259652,0.1773202879609523,0.07568632353624972,0.02673239603874862,0.00475155580158716,0.0023355406231775266,0.0003159510677868313,7.905651601059274e-05,4.273158576179883e-05,2.7188053367760753e-05,1.7708606229845675e-05,7.453298930344168e-06,1.907404565278266e-06,6.86693198728036e-07,2.825824745564471e-07], + "scf_time": 64.23953482 }, "task": 0, - "threads_per_rank": 16 + "threads_per_rank": 4 } \ No newline at end of file diff --git a/verification/test25/sirius.json b/verification/test25/sirius.json index 97d1499230..8bd90e6313 100644 --- a/verification/test25/sirius.json +++ b/verification/test25/sirius.json @@ -41,7 +41,7 @@ "beta_scaling_factor": 1.0, "max_history": 8, "type": "anderson", - "use_hartree": false + "use_hartree": true }, "parameters": { "density_tol": 1e-06, diff --git a/verification/test26/output_ref.json b/verification/test26/output_ref.json index 7ad0540dc7..b1f4fc2c70 100644 --- a/verification/test26/output_ref.json +++ b/verification/test26/output_ref.json @@ -8,7 +8,7 @@ "beta_on_device": false, "cyclic_block_size": 32, "fft_mode": "parallel", - "gen_evp_solver_name": "lapack", + "gen_evp_solver_name": "cusolver", "gvec_chunk_size": 500000, "mpi_grid_dims": [1,1], "num_bands_to_print": 10, @@ -17,16 +17,51 @@ "print_forces": true, "print_neighbors": false, "print_stress": true, - "processing_unit": "cpu", + "processing_unit": "gpu", "reduce_gvec": true, "rmt_max": 2.2, "save_rf": false, "spglib_tolerance": 1e-06, - "std_evp_solver_name": "lapack", + "std_evp_solver_name": "cusolver", "use_second_variation": true, - "verbosity": 2, + "verbosity": 3, "verification": 0 }, + "dftd3": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "beta": 0.0, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, + "dftd4": { + "damping": "rational", + "damping_values": "auto", + "method": "none", + "parameters": { + "a1": 0.0, + "a2": 0.0, + "alp": 0.0, + "bet": 0.0, + "enable": false, + "rs6": 0.0, + "rs8": 0.0, + "s6": 0.0, + "s8": 0.0, + "s9": 0.0 + }, + "three_body": true + }, "hubbard": { "constrained_calculation": false, "constraint_beta_mixing": 0.4, @@ -38,7 +73,7 @@ "local": [ { "J": 0.0, - "U": 8.0, + "U": 0.293995, "atom_type": "Ni", "hubbard_orbital": "3d", "l": 2, @@ -60,11 +95,15 @@ "init_subspace": "lcao", "locking": true, "min_num_res": 0, + "min_occupancy": 1e-14, + "min_tolerance": 1e-13, "num_singular": -1, "num_steps": 20, "relative_tolerance": 0, "residual_tolerance": 1e-06, "subspace_size": 2, + "tolerance_ratio": 0, + "tolerance_scale": [0.1,0.5], "type": "davidson" }, "locked": true, @@ -72,8 +111,8 @@ "beta": 0.75, "beta0": 0.15, "beta_scaling_factor": 1.0, - "linear_mix_rms_tol": 1000000.0, "max_history": 8, + "rms_min": 1e-16, "type": "anderson", "use_hartree": false }, @@ -91,9 +130,12 @@ "aw_cutoff": 0.0, "core_relativity": "dirac", "density_tol": 1e-05, + "dftd3_correction": false, + "dftd4_correction": false, "electronic_structure_method": "pseudopotential", "energy_tol": 1e-08, "extra_charge": 0, + "fixed_mag": 0, "gamma_point": false, "gk_cutoff": 6.325, "hubbard_correction": true, @@ -126,21 +168,22 @@ "xc_functionals": ["XC_GGA_X_PBE","XC_GGA_C_PBE"] }, "settings": { - "always_update_wf": true, "auto_enu_tol": 0, "fft_grid_size": [80,80,80], "fp32_to_fp64_rms": 0, - "itsol_tol_min": 1e-13, - "itsol_tol_ratio": 0, - "itsol_tol_scale": [0.1,0.5], - "min_occupancy": 1e-14, - "mixer_rms_min": 1e-16, "nprii_aug": 20, "nprii_beta": 20, "nprii_rho_core": 20, "nprii_vloc": 200, + "pseudo_grid_cutoff": 10.0, "radial_grid": "exponential, 1.0", + "real_occupation_matrix": false, "sht_coverage": 0, + "sht_lmax": -1, + "simple_lapw_ri": false, + "smooth_initial_mag": false, + "tol_ne": 0.01, + "use_coarse_fft_grid": true, "xc_use_lapl": false }, "unit_cell": { @@ -152,12 +195,12 @@ "atom_types": ["Ni","O"], "atoms": { "Ni": [ - [0.0,0.0,0.0], - [0.5,0.5,0.5] + [0.0,0.0,0.0,0.0,0.0,2.0], + [0.5,0.5,0.5,0.0,0.0,-2.0] ], "O": [ - [0.251,0.251,0.251], - [0.749,0.749,0.749] + [0.251,0.251,0.251,0.0,0.0,0.0], + [0.749,0.749,0.749,0.0,0.0,0.0] ] }, "lattice_vectors": [ @@ -184,55 +227,57 @@ "omega": 249.33862849999997 }, "counters": { - "band_evp_work_count": 6688.102076124586, - "local_operator_num_applied": 58607 + "band_evp_work_count": 6978.806991654811, + "local_operator_num_applied": 60318 }, - "git_hash": "a0d09bff305c7d162e6e43a792018b99bd33b276", + "git_hash": "463749bce37a1287336017dc11cdcddda6cc7cef", "ground_state": { - "band_gap": 0.11290281924383772, + "band_gap": 0.11290280950535486, "converged": true, - "efermi": 0.5246031969630998, + "efermi": 0.5246031810379566, "energy": { - "bxc": -0.21860314224858804, - "entropy_sum": -2.0153200784608935e-16, - "eval_sum": -27.43459641965035, - "ewald": -237.81730213844847, - "exc": -42.94622990705071, - "free": -375.40235132916143, - "kin": 115.00797407028188, - "scf_correction": 3.446132268436486e-10, - "total": -375.40235132916143, - "veff": -142.22396734768364, - "vha": 246.81645700234355, - "vloc": -335.38603444869347, - "vxc": -53.65438990132184 + "bxc": -0.21860320810266617, + "dftd3": 0.0, + "dftd4": 0.0, + "entropy_sum": -2.0152556498690033e-16, + "eval_sum": -27.43459722643568, + "ewald": -237.81730213506043, + "exc": -42.94623036082675, + "free": -375.40235122162983, + "kin": 115.0079726318267, + "scf_correction": 3.356319666636409e-10, + "total": -375.40235122162983, + "veff": -142.22396665015972, + "vha": 246.81646192812283, + "vloc": -335.3860381677411, + "vxc": -53.65439041053203 }, - "etot_history": [-358.58999689250766,-370.7939397341292,-373.7906019813497,-375.1115769850038,-375.20397095195676,-374.8859215174739,-375.33087086124556,-375.31576423623505,-375.4073318322314,-375.403211974836,-375.40292296572727,-375.4012254351925,-375.4029521933096,-375.4026844414773,-375.40243545257067,-375.4023511169877,-375.4023609322113,-375.4023487100028,-375.40235196497247,-375.4023504866274,-375.402351322365,-375.40235131711506,-375.4023512949838,-375.4023513115126,-375.40235132254816,-375.4023513283945,-375.40235132976153,-375.402351330946,-375.4023513290283,-375.40235132918485,-375.4023513291497,-375.402351329177,-375.40235132915495,-375.4023513291886,-375.40235132913824,-375.4023513291479,-375.40235132916143], + "etot_history": [-358.5864119211701,-370.85888112748336,-373.56719572729037,-375.1555581977194,-375.20122176852476,-375.10041990732316,-375.04692341137036,-375.2343820914492,-375.3765086572063,-375.42433935994035,-375.40648492095585,-375.4052733839037,-375.4006015476415,-375.4015741578613,-375.4023725309721,-375.40232439642256,-375.40236340197174,-375.4023329610692,-375.40236166670445,-375.40235271316226,-375.40235171098044,-375.40235048277077,-375.40235103276746,-375.40235122796827,-375.4023512136913,-375.40235122003276,-375.4023512191543,-375.40235122368574,-375.4023512210638,-375.40235122214096,-375.4023512216577,-375.40235122159834,-375.40235122163824,-375.40235122161766,-375.4023512216177,-375.4023512216305,-375.40235122162983], "forces": [ [0.0,0.0,0.0], [0.0,0.0,0.0], - [-0.0030171290616563436,-0.0030171290616563574,-0.0030171290616563436], - [0.0030171290616563574,0.0030171290616563436,0.0030171290616563436] + [-0.0030165752815831805,-0.0030165752815831805,-0.0030165752815831666], + [0.0030165752815831523,0.0030165752815831527,0.003016575281583194] ], "magnetisation": { "atoms": [ - [0.0,0.0,1.744455442288323], - [0.0,0.0,-1.7453510364634455], - [0.0,0.0,0.00045094306790156646], - [0.0,0.0,-0.0004042693409151514] + [0.0,0.0,1.744456124509312], + [0.0,0.0,-1.7453511104450692], + [0.0,0.0,0.00045122806811319143], + [0.0,0.0,-0.0004039865795744099] ], - "total": [0.0,0.0,8.042931013466695e-12] + "total": [0.0,0.0,-2.06291399551573e-12] }, "num_scf_iterations": 36, - "rho_min": 0.010971688340903465, - "rms_history": [0.2596322879810102,0.4285994176301848,0.1528012053166588,0.03812836362286105,0.034209967056783155,0.03458204005797634,0.015425330938034193,0.010611246366198786,0.0044446536983854335,0.0028707636219590126,0.0015073453683350818,0.0007035969131992771,0.00021271573943893382,9.953328765793663e-05,1.5157509714032823e-05,1.2613273366679112e-05,6.025079403762893e-06,4.1968887269848075e-06,2.5583851083447167e-06,1.0724038074593278e-06,4.269441374522495e-07,8.002399797602397e-08,3.283893575249995e-08,1.1904845490585115e-08,7.699665766845564e-09,5.7943074386557846e-09,2.9605930976755445e-09,1.2911371262557649e-09,2.0662193073079145e-10,1.0034087024030482e-10,7.752644023807522e-11,8.071376587150208e-11,6.68306232587366e-11,5.3416354401368483e-11,3.840662636864619e-11,1.931497928961017e-11,1.1920226613004456e-11], - "scf_time": 83.167749842, + "rho_min": 0.010971694996820939, + "rms_history": [0.38054009003505357,0.5557735273929559,0.3079465936489544,0.18904766546481008,0.17792211608264197,0.17800897148332898,0.159432021396124,0.06747670365879009,0.03873899224461493,0.02201171220335613,0.011802170449997372,0.008781577514903221,0.0027181871219661927,0.0009352387113104254,0.0002850899485289659,0.00014020677137127833,7.729832978874548e-05,5.030888950285035e-05,3.111321302751715e-05,1.5826226626986854e-05,6.433234597144335e-06,1.5884775055362247e-06,8.411691648688589e-07,8.689044588801541e-08,7.627279464304499e-08,3.935599087075288e-08,2.773743974109966e-08,2.1088354093288338e-08,7.034322324864935e-09,2.1032404343085687e-09,1.3617543028590982e-09,4.578976645990177e-10,2.582698736840132e-10,8.901301566083312e-11,8.165865274854889e-11,1.0774225754427981e-10,1.033922363492105e-10], + "scf_time": 21.713756715, "stress": [ - [-0.00026776187050852295,6.609976010494559e-06,6.609976010494572e-06], - [6.609976010494559e-06,-0.00026776187050850907,6.609976010494573e-06], - [6.609976010494572e-06,6.609976010494573e-06,-0.0002677618705085212] + [-0.00026777011673945195,6.618307063803781e-06,6.618307063803789e-06], + [6.618307063803781e-06,-0.00026777011673952307,6.618307063803795e-06], + [6.618307063803789e-06,6.618307063803795e-06,-0.00026777011673956297] ] }, "task": 0, - "threads_per_rank": 16 + "threads_per_rank": 4 } \ No newline at end of file