diff --git a/CMakeLists.txt b/CMakeLists.txt index fb9f80fa..23d058f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ if(USE_LIBROM) find_package(libROM REQUIRED) if(libROM_FOUND) - set(MGMOL_HAS_LIBROM 1) + add_definitions(-DMGMOL_HAS_LIBROM) endif(libROM_FOUND) endif(USE_LIBROM) @@ -233,6 +233,7 @@ FortranCInterface_HEADER( DGEMM SGEMM DSYMM DSYRK SSYRK DTRMM DTRSM STRSM #lapack DSYEV DPOTRF DPOTRS DPOTRI DSYGST DTRTRS DPOCON DSYGV DLANGE + DGETRF DGETRS DLACPY ) FortranCInterface_HEADER( diff --git a/drivers/check_input.cc b/drivers/check_input.cc index 14dbaa09..56a9d5d8 100644 --- a/drivers/check_input.cc +++ b/drivers/check_input.cc @@ -70,11 +70,13 @@ int main(int argc, char** argv) { MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); *MPIdata::sout << " Input parameters OK\n"; diff --git a/drivers/example1.cc b/drivers/example1.cc index 561f5e08..60acb123 100644 --- a/drivers/example1.cc +++ b/drivers/example1.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { diff --git a/examples/AOMM/Si216.cfg b/examples/AOMM/Si216.cfg index a8c8e2c6..9756a4da 100644 --- a/examples/AOMM/Si216.cfg +++ b/examples/AOMM/Si216.cfg @@ -15,7 +15,7 @@ lz=30.78 [Potentials] pseudopotential=pseudo.Si [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/C200H272_1.05gcc/mgmol_md.cfg b/examples/C200H272_1.05gcc/mgmol_md.cfg index 393f83ca..b11e9109 100644 --- a/examples/C200H272_1.05gcc/mgmol_md.cfg +++ b/examples/C200H272_1.05gcc/mgmol_md.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/C200H272_1.05gcc/mgmol_opt.cfg b/examples/C200H272_1.05gcc/mgmol_opt.cfg index a1fe927f..96420be8 100644 --- a/examples/C200H272_1.05gcc/mgmol_opt.cfg +++ b/examples/C200H272_1.05gcc/mgmol_opt.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/C200H272_1.05gcc/mgmol_quench.cfg b/examples/C200H272_1.05gcc/mgmol_quench.cfg index ef3781ff..38d38907 100644 --- a/examples/C200H272_1.05gcc/mgmol_quench.cfg +++ b/examples/C200H272_1.05gcc/mgmol_quench.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Cu32/mvp.cfg b/examples/Cu32/mvp.cfg index 69085aef..c4c29d61 100644 --- a/examples/Cu32/mvp.cfg +++ b/examples/Cu32/mvp.cfg @@ -15,7 +15,7 @@ lz=13.6 [Potentials] pseudopotential=pseudo.Cu_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/D144localPotentials/mgmol_md.cfg b/examples/D144localPotentials/mgmol_md.cfg index c9a509a8..6e4e9359 100644 --- a/examples/D144localPotentials/mgmol_md.cfg +++ b/examples/D144localPotentials/mgmol_md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.H [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/D144localPotentials/mgmol_quench.cfg b/examples/D144localPotentials/mgmol_quench.cfg index 896acfa1..a090ba64 100644 --- a/examples/D144localPotentials/mgmol_quench.cfg +++ b/examples/D144localPotentials/mgmol_quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.H [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li128/mgmol_quench_hmvp.cfg b/examples/Li128/mgmol_quench_hmvp.cfg index 9ce53512..7a1198e8 100644 --- a/examples/Li128/mgmol_quench_hmvp.cfg +++ b/examples/Li128/mgmol_quench_hmvp.cfg @@ -15,7 +15,7 @@ lz= 26.52 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li128/mgmol_quench_mvp.cfg b/examples/Li128/mgmol_quench_mvp.cfg index d2d3c1dd..41ed3731 100644 --- a/examples/Li128/mgmol_quench_mvp.cfg +++ b/examples/Li128/mgmol_quench_mvp.cfg @@ -15,7 +15,7 @@ lz= 26.52 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li16/mvp.cfg b/examples/Li16/mvp.cfg index a2fe2f97..efea30df 100644 --- a/examples/Li16/mvp.cfg +++ b/examples/Li16/mvp.cfg @@ -15,7 +15,7 @@ lz=13.26 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li2GTH/davidson.cfg b/examples/Li2GTH/davidson.cfg index d211b812..929ef604 100644 --- a/examples/Li2GTH/davidson.cfg +++ b/examples/Li2GTH/davidson.cfg @@ -15,7 +15,7 @@ lz=18. [Potentials] pseudopotential=pseudo.Li_GTH_PBE [Poisson] -solver=CG +solver=PCG bcx=0 bcy=0 bcz=0 diff --git a/examples/PinnedH2O/job.ref b/examples/PinnedH2O/job.ref index 32024e6f..f9826e8e 100644 --- a/examples/PinnedH2O/job.ref +++ b/examples/PinnedH2O/job.ref @@ -8,7 +8,7 @@ date setenv OMP_NUM_THREADS 1 #setenv KMP_DETERMINISTIC_REDUCTION 1 -set ncpus = 8 +set ncpus = 1 set case = 2 set maindir = /p/lustre2/cheung26/mgmol diff --git a/examples/PinnedH2O/job.rom_3DOF b/examples/PinnedH2O/job.rom_3DOF index 0e38688b..b7dc5d6c 100644 --- a/examples/PinnedH2O/job.rom_3DOF +++ b/examples/PinnedH2O/job.rom_3DOF @@ -8,7 +8,7 @@ date setenv OMP_NUM_THREADS 1 #setenv KMP_DETERMINISTIC_REDUCTION 1 -set ncpus = 8 +set ncpus = 1 set case = 2 set maindir = /p/lustre2/cheung26/mgmol diff --git a/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg b/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg index 24273c40..bb2912d9 100644 --- a/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg +++ b/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg @@ -38,7 +38,7 @@ output_level=4 [ROM] stage=online_pinned_H2O_3dof [ROM.offline] -basis_file=/usr/workspace/nlrom/MGmol/PinnedH2O_3DOF/data_8/PinnedH2O_3DOF_orbitals_basis_2_2 +basis_file=/usr/workspace/nlrom/MGmol/PinnedH2O_3DOF/data_1/PinnedH2O_3DOF_orbitals_basis_2_2 [ROM.basis] compare_md=false number_of_orbital_basis=34 diff --git a/examples/ShortSighted/mgmol_md.cfg b/examples/ShortSighted/mgmol_md.cfg index 18bc419b..7c485c80 100644 --- a/examples/ShortSighted/mgmol_md.cfg +++ b/examples/ShortSighted/mgmol_md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_tm_pbe [Poisson] -solver=CG +solver=PCG max_steps_initial=20 max_steps=20 bcx=periodic diff --git a/examples/ShortSighted/mgmol_quench.cfg b/examples/ShortSighted/mgmol_quench.cfg index 17499e60..a89e6b47 100644 --- a/examples/ShortSighted/mgmol_quench.cfg +++ b/examples/ShortSighted/mgmol_quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_tm_pbe [Poisson] -solver=CG +solver=PCG max_steps_initial=20 max_steps=20 bcx=periodic diff --git a/examples/Water4x4y4z/md.cfg b/examples/Water4x4y4z/md.cfg index bdf99eac..749da19f 100644 --- a/examples/Water4x4y4z/md.cfg +++ b/examples/Water4x4y4z/md.cfg @@ -16,7 +16,7 @@ lz= 93.84 pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/Water4x4y4z/quench.cfg b/examples/Water4x4y4z/quench.cfg index 7f58a871..d50f860c 100644 --- a/examples/Water4x4y4z/quench.cfg +++ b/examples/Water4x4y4z/quench.cfg @@ -16,7 +16,7 @@ lz= 93.84 pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/potentials/pseudo.Li_ONCV_PBE_SG15 b/potentials/pseudo.Li_ONCV_PBE_SG15 new file mode 100644 index 00000000..4713d8a0 --- /dev/null +++ b/potentials/pseudo.Li_ONCV_PBE_SG15 @@ -0,0 +1,1915 @@ +# This pseudopotential file has been produced using the code +# ONCVPSP (Optimized Norm-Conservinng Vanderbilt PSeudopotential) +# scalar-relativistic version 2.1.1, 03/26/2014 by D. R. Hamann +# The code is available through a link at URL www.mat-simresearch.com. +# Documentation with the package provides a full discription of the +# input data below. +# +# While it is not required under the terms of the GNU GPL, it is +# suggested that you cite D. R. Hamann, Phys. Rev. B 88, 085117 (2013) +# in any publication using these pseudopotentials. +# +# Copyright 2015 The Regents of the University of California +# +# This work is licensed under the Creative Commons Attribution-ShareAlike +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# +# This pseudopotential is part of the Schlipf-Gygi norm-conserving +# pseudopotential library. Its construction parameters were tuned to +# reproduce materials of a training set with very high accuracy and +# should be suitable as a general purpose pseudopotential to treat a +# variety of different compounds. For details of the construction and +# testing of the pseudopotential please refer to: +# +# M. Schlipf, F. Gygi, Comp. Phys. Comm. 196, 36 (2015) +# http://dx.doi.org/10.1016/j.cpc.2015.05.011 +# +# We kindly ask that you include this reference in all publications +# associated to this pseudopotential. +# +# +# Input file for PP generation: +# +# # ATOM AND REFERENCE CONFIGURATION +# # atsym z nc nv iexc psfile +# Li 3.00 0 2 4 fpmd +# # +# # n l f energy (Ha) +# 1 0 2.00 +# 2 0 1.00 +# # +# # PSEUDOPOTENTIAL AND OPTIMIZATION +# # lmax +# 1 +# # +# # l, rc, ep, ncon, nbas, qcut +# 0 1.74553 -1.90562 5 8 7.75888 +# 1 1.40824 0.11292 5 8 11.11410 +# # +# # LOCAL POTENTIAL +# # lloc, lpopt, rc(5), dvloc0 +# 4 5 0.89499 0.00000 +# # +# # VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# # l, nproj, debl +# 0 2 1.80000 +# 1 2 1.03347 +# # +# # MODEL CORE CHARGE +# # icmod, fcfact +# 0 0.00000 +# # +# # LOG DERIVATIVE ANALYSIS +# # epsh1, epsh2, depsh +# -5.00 3.00 0.02 +# # +# # OUTPUT GRID +# # rlmax, drl +# 6.00 0.01 +# # +# # TEST CONFIGURATIONS +# # ncnf +# 0 +# # nvcnf +# # n l f +# +Li_ONCV_PBE-1 +# +Silver +#radii of balls and covalent bonds +1.0 2.7 +# Nlcc flag +0 +# Atomic number +3 +# Atomic mass +6.94000006 +# Number of valence electrons +3 +#Gaussian core charge parameter rc +1.0 +# Number of potentials +3 +# l-value for state which is local, then type of potential format +2 3 +# Local potential radius +3.2 +# Non-local potential radius +3.2 +# number of points in radial grid +602 +# VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# l, nproj +0 2 -0.4707813485E+01 -0.2191592421E+01 +1 2 -0.1677821370E+01 -0.1382610420E+00 +# l= 0 +0.0 -7.7593005895 -1.0192109936 +0.01 -7.7568112882 -1.0194882249 +0.02 -7.7493621217 -1.0202971423 +0.03 -7.7369338327 -1.0216609501 +0.04 -7.7195071639 -1.0236028526 +0.05 -7.6970556726 -1.0261549101 +0.06 -7.6695462423 -1.0293576177 +0.07 -7.6369397322 -1.0332593707 +0.08 -7.5991917591 -1.037915821 +0.09 -7.5562536037 -1.0433891309 +0.1 -7.508073233 -1.0497471309 +0.11 -7.4545964294 -1.0570623897 +0.12 -7.3957680159 -1.0654112041 +0.13 -7.3315331656 -1.0748725194 +0.14 -7.2618387832 -1.0855267901 +0.15 -7.186634945 -1.0974547908 +0.16 -7.1058763841 -1.1107363896 +0.17 -7.0195240059 -1.1254492955 +0.18 -6.9275464193 -1.141667791 +0.19 -6.8299214694 -1.1594614629 +0.2 -6.7266377513 -1.1788939458 +0.21 -6.6176961029 -1.200021683 +0.22 -6.5031110416 -1.2228927294 +0.23 -6.3829121491 -1.2475455963 +0.24 -6.2571453715 -1.2740081622 +0.25 -6.1258742523 -1.3022966399 +0.26 -5.9891810223 -1.332414651 +0.27 -5.8471676284 -1.3643523576 +0.28 -5.6999565818 -1.3980857296 +0.29 -5.5476917005 -1.4335759024 +0.3 -5.3905386942 -1.4707686566 +0.31 -5.2286855923 -1.5095940253 +0.32 -5.0623429859 -1.5499660461 +0.33 -4.8917441534 -1.5917826115 +0.34 -4.7171449574 -1.6349254961 +0.35 -4.5388235441 -1.6792605403 +0.36 -4.3570799614 -1.7246379061 +0.37 -4.172235486 -1.7708925549 +0.38 -3.9846317723 -1.8178448665 +0.39 -3.7946299563 -1.8653013044 +0.4 -3.6026094194 -1.9130553363 +0.41 -3.4089664609 -1.9608884314 +0.42 -3.214112807 -2.0085711862 +0.43 -3.0184739587 -2.055864575 +0.44 -2.8224873949 -2.1025213123 +0.45 -2.626600643 -2.1482873175 +0.46 -2.4312692288 -2.1929032714 +0.47 -2.2369545193 -2.2361062538 +0.48 -2.0441214814 -2.2776314393 +0.49 -1.8532363685 -2.3172138591 +0.5 -1.6647643041 -2.3545902195 +0.51 -1.4791669005 -2.3895006957 +0.52 -1.2968998099 -2.4216907774 +0.53 -1.118410149 -2.450913204 +0.54 -0.94413434687 -2.4769295482 +0.55 -0.77449530271 -2.4995123575 +0.56 -0.60990047503 -2.5184465315 +0.57 -0.45073918141 -2.5335313282 +0.58 -0.29738077311 -2.5445816652 +0.59 -0.15017199575 -2.5514300525 +0.6 -0.0094358826149 -2.5539273235 +0.61 0.12453146732 -2.5519449868 +0.62 0.25146099181 -2.5453751729 +0.63 0.37111352658 -2.5341326766 +0.64 0.48328079855 -2.5181555598 +0.65 0.58778581947 -2.4974053018 +0.66 0.68448513053 -2.4718683148 +0.67 0.77326913801 -2.4415560143 +0.68 0.85406228696 -2.4065047639 +0.69 0.9268231688 -2.3667757677 +0.7 0.99154622952 -2.3224560865 +0.71 1.0482603331 -2.2736573657 +0.72 1.0970287085 -2.2205155535 +0.73 1.1379485353 -2.1631903396 +0.74 1.1711500525 -2.1018642456 +0.75 1.1967959926 -2.036741917 +0.76 1.2150805992 -1.9680490979 +0.77 1.2262277331 -1.8960309893 +0.78 1.2304896364 -1.820951035 +0.79 1.228145274 -1.7430894102 +0.8 1.2194985196 -1.6627413965 +0.81 1.2048762016 -1.5802156579 +0.82 1.184626025 -1.4958324308 +0.83 1.1591143867 -1.4099216449 +0.84 1.1287241012 -1.3228209881 +0.85 1.0938520569 -1.2348739334 +0.86 1.05490682 -1.1464277436 +0.87 1.0123062069 -1.0578314692 +0.88 0.96647484347 -0.96943395791 +0.89 0.91784174161 -0.88158190682 +0.9 0.86683791898 -0.79461798042 +0.91 0.81389319634 -0.70887759925 +0.92 0.7594323121 -0.62468477328 +0.93 0.70387167633 -0.54234914361 +0.94 0.64761702996 -0.4621644887 +0.95 0.59106122966 -0.38440732114 +0.96 0.53458281949 -0.30933630656 +0.97 0.4785423228 -0.23718904247 +0.98 0.42328195918 -0.16818279962 +0.99 0.36912362332 -0.10251308529 +1.0 0.31636746756 -0.040352882291 +1.01 0.26529068328 0.018147948585 +1.02 0.21614684997 0.072862757827 +1.03 0.16916539076 0.12368816752 +1.04 0.12454771649 0.17054715475 +1.05 0.082470373831 0.2133856147 +1.06 0.043083689239 0.25217324203 +1.07 0.0065120756055 0.28690287693 +1.08 -0.02714463863 0.31758903479 +1.09 -0.057816978622 0.34427244982 +1.1 -0.085458809175 0.36701361535 +1.11 -0.11004858788 0.3858935715 +1.12 -0.13158628508 0.40101111338 +1.13 -0.1500994279 0.41248727342 +1.14 -0.16563634513 0.42045937767 +1.15 -0.178265114 0.4250799321 +1.16 -0.18807046663 0.42651410161 +1.17 -0.19516078332 0.42494426276 +1.18 -0.19965807561 0.42056210722 +1.19 -0.20169758712 0.41356821114 +1.2 -0.20142872217 0.40417225404 +1.21 -0.19901390278 0.39259116996 +1.22 -0.19462294096 0.37904535273 +1.23 -0.18843296654 0.36375871026 +1.24 -0.18062956949 0.3469570493 +1.25 -0.17140042562 0.32886496448 +1.26 -0.16093611844 0.30970691787 +1.27 -0.14942835147 0.28970204679 +1.28 -0.13706676851 0.2690647395 +1.29 -0.12404067387 0.24800651356 +1.3 -0.11053235807 0.22672597944 +1.31 -0.096719595481 0.20541541404 +1.32 -0.082775967686 0.18426029179 +1.33 -0.068861416613 0.16342787686 +1.34 -0.0551310275 0.14307944398 +1.35 -0.04172908387 0.12336253621 +1.36 -0.028783936218 0.10440594115 +1.37 -0.016420371472 0.086334177332 +1.38 -0.0047416778062 0.069247108056 +1.39 0.0061586502908 0.053233909192 +1.4 0.016196242387 0.038373444503 +1.41 0.025311653016 0.024717655508 +1.42 0.033446675253 0.012317164084 +1.43 0.040568099018 0.0011960148746 +1.44 0.046652083258 -0.0086313620653 +1.45 0.051685603931 -0.017161923985 +1.46 0.055679271064 -0.024416714472 +1.47 0.058641496585 -0.030414623402 +1.48 0.060610312422 -0.035204332823 +1.49 0.061621969933 -0.038832637114 +1.5 0.061731691095 -0.04136515654 +1.51 0.06100215529 -0.042874613882 +1.52 0.059503512477 -0.043441079158 +1.53 0.057318988046 -0.043156859831 +1.54 0.054531363789 -0.042113991585 +1.55 0.051236871402 -0.040416609638 +1.56 0.047528779041 -0.038166294726 +1.57 0.043507889336 -0.035470691094 +1.58 0.039273079584 -0.032435823961 +1.59 0.034923278225 -0.029166627307 +1.6 0.030555075345 -0.025765776124 +1.61 0.026260024876 -0.022330178375 +1.62 0.022123590707 -0.018950671038 +1.63 0.018223882204 -0.015710358314 +1.64 0.014626616423 -0.012680135019 +1.65 0.011389900708 -0.0099231517255 +1.66 0.0085521986237 -0.0074834592058 +1.67 0.0061449383254 -0.0053980868807 +1.68 0.0041740658843 -0.0036795505535 +1.69 0.0026383564748 -0.0023333398664 +1.7 0.0015094773016 -0.0013390589523 +1.71 0.00074981081503 -0.00066738518164 +1.72 0.00029951000811 -0.00026781274301 +1.73 7.9879263209e-05 -7.2256702049e-05 +1.74 1.5661696141e-05 -1.487504535e-05 +1.75 6.2099461977e-06 -6.0427619678e-06 +1.76 -2.071124021e-06 1.6637105816e-06 +1.77 -7.5368369057e-07 6.9971701676e-07 +1.78 -9.3400344889e-07 8.671251814e-07 +1.79 0.0 0.0 +1.8 0.0 0.0 +1.81 0.0 0.0 +1.82 0.0 0.0 +1.83 0.0 0.0 +1.84 0.0 0.0 +1.85 0.0 0.0 +1.86 0.0 0.0 +1.87 0.0 0.0 +1.88 0.0 0.0 +1.89 0.0 0.0 +1.9 0.0 0.0 +1.91 0.0 0.0 +1.92 0.0 0.0 +1.93 0.0 0.0 +1.94 0.0 0.0 +1.95 0.0 0.0 +1.96 0.0 0.0 +1.97 0.0 0.0 +1.98 0.0 0.0 +1.99 0.0 0.0 +2.0 0.0 0.0 +2.01 0.0 0.0 +2.02 0.0 0.0 +2.03 0.0 0.0 +2.04 0.0 0.0 +2.05 0.0 0.0 +2.06 0.0 0.0 +2.07 0.0 0.0 +2.08 0.0 0.0 +2.09 0.0 0.0 +2.1 0.0 0.0 +2.11 0.0 0.0 +2.12 0.0 0.0 +2.13 0.0 0.0 +2.14 0.0 0.0 +2.15 0.0 0.0 +2.16 0.0 0.0 +2.17 0.0 0.0 +2.18 0.0 0.0 +2.19 0.0 0.0 +2.2 0.0 0.0 +2.21 0.0 0.0 +2.22 0.0 0.0 +2.23 0.0 0.0 +2.24 0.0 0.0 +2.25 0.0 0.0 +2.26 0.0 0.0 +2.27 0.0 0.0 +2.28 0.0 0.0 +2.29 0.0 0.0 +2.3 0.0 0.0 +2.31 0.0 0.0 +2.32 0.0 0.0 +2.33 0.0 0.0 +2.34 0.0 0.0 +2.35 0.0 0.0 +2.36 0.0 0.0 +2.37 0.0 0.0 +2.38 0.0 0.0 +2.39 0.0 0.0 +2.4 0.0 0.0 +2.41 0.0 0.0 +2.42 0.0 0.0 +2.43 0.0 0.0 +2.44 0.0 0.0 +2.45 0.0 0.0 +2.46 0.0 0.0 +2.47 0.0 0.0 +2.48 0.0 0.0 +2.49 0.0 0.0 +2.5 0.0 0.0 +2.51 0.0 0.0 +2.52 0.0 0.0 +2.53 0.0 0.0 +2.54 0.0 0.0 +2.55 0.0 0.0 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# l= 1 +0.0 0.0 0.0 +0.01 0.29066480787 -0.12766802783 +0.02 0.58059469457 -0.25370368503 +0.03 0.86905459105 -0.37649810362 +0.04 1.1553091839 -0.49448908497 +0.05 1.4386229206 -0.60618359966 +0.06 1.7182601654 -0.71017929906 +0.07 1.9934855498 -0.80518473143 +0.08 2.2635645602 -0.89003797427 +0.09 2.5277643986 -0.96372341716 +0.1 2.7853551471 -1.0253864565 +0.11 3.0356112595 -1.0743458929 +0.12 3.2778133999 -1.1101038562 +0.13 3.5112506355 -1.132353118 +0.14 3.7352229868 -1.1409816885 +0.15 3.9490443308 -1.1360746358 +0.16 4.1520456404 -1.1179131031 +0.17 4.3435785405 -1.0869705414 +0.18 4.5230191502 -1.043906215 +0.19 4.6897721746 -0.98955607228 +0.2 4.8432751971 -0.92492112794 +0.21 4.9830031356 -0.85115349716 +0.22 5.1084727845 -0.76954032576 +0.23 5.219247403 -0.68148580784 +0.24 5.3149412635 -0.58849160184 +0.25 5.3952241285 -0.49213581023 +0.26 5.4598255167 -0.39405106569 +0.27 5.5085387903 -0.29590166122 +0.28 5.541224879 -0.19936048846 +0.29 5.5578156616 -0.10608575913 +0.3 5.5583168965 -0.017698005217 +0.31 5.5428106467 0.06424234575 +0.32 5.5114571261 0.1382563903 +0.33 5.4644959792 0.20296728607 +0.34 5.4022468522 0.25711902545 +0.35 5.325109272 0.29959336152 +0.36 5.2335618541 0.32942535652 +0.37 5.1281607036 0.34581612529 +0.38 5.0095370906 0.34814322996 +0.39 4.8783943935 0.33596941846 +0.4 4.7355042613 0.30904776792 +0.41 4.5817020837 0.26732465609 +0.42 4.4178817653 0.21094005537 +0.43 4.2449898494 0.14022512555 +0.44 4.0640190434 0.055697201533 +0.45 3.8760012064 -0.041947731618 +0.46 3.6819998663 -0.15184494879 +0.47 3.4831023478 -0.27297311531 +0.48 3.2804117938 -0.40416833694 +0.49 3.0750382363 -0.54414020192 +0.5 2.8680904422 -0.69148938787 +0.51 2.6606674962 -0.84472613781 +0.52 2.4538500308 -1.0022901441 +0.53 2.2486913509 -1.1625719248 +0.54 2.0462109192 -1.323931525 +0.55 1.8473843041 -1.4847227682 +0.56 1.653138125 -1.6433107038 +0.57 1.4643414037 -1.7980948263 +0.58 1.281800928 -1.947525902 +0.59 1.1062538905 -2.0901278939 +0.6 0.93836579773 -2.2245096048 +0.61 0.77872264836 -2.3493884415 +0.62 0.62783196803 -2.4635949995 +0.63 0.48611725026 -2.5660924179 +0.64 0.35391756909 -2.6559840633 +0.65 0.23148844511 -2.7325176066 +0.66 0.11899991646 -2.7950966015 +0.67 0.016538750486 -2.8432814671 +0.68 -0.075888903222 -2.8767889702 +0.69 -0.15835461263 -2.8954907864 +0.7 -0.23100553111 -2.8994156318 +0.71 -0.29405849442 -2.8887392669 +0.72 -0.34779536494 -2.8637786145 +0.73 -0.39255757684 -2.8249835899 +0.74 -0.42874014339 -2.7729270037 +0.75 -0.45678532652 -2.7082941525 +0.76 -0.47717572969 -2.6318706129 +0.77 -0.49042753238 -2.5445289574 +0.78 -0.4970835879 -2.447216249 +0.79 -0.49770653893 -2.3409410988 +0.8 -0.49287207593 -2.2267610065 +0.81 -0.48316244483 -2.1057702977 +0.82 -0.46916030733 -1.9790889658 +0.83 -0.45144305243 -1.8478527201 +0.84 -0.43057765246 -1.7132045264 +0.85 -0.40711614974 -1.57628791 +0.86 -0.38159185278 -1.4382422733 +0.87 -0.35451631218 -1.3002004539 +0.88 -0.32637713717 -1.1632887249 +0.89 -0.29763676098 -1.0286299149 +0.9 -0.26873225925 -0.89735010789 +0.91 -0.24007124473 -0.77054582678 +0.92 -0.21202347663 -0.64921538628 +0.93 -0.18491499163 -0.53421702583 +0.94 -0.15902690746 -0.42626728118 +0.95 -0.13459495726 -0.32594113668 +0.96 -0.11181120497 -0.23367796686 +0.97 -0.090820056384 -0.14977127266 +0.98 -0.071724461213 -0.074387465455 +0.99 -0.05458647007 -0.0075684657988 +1.0 -0.039429767264 0.050760037837 +1.01 -0.026242635758 0.10077910624 +1.02 -0.014981534149 0.1427654233 +1.03 -0.0055743761777 0.17708106785 +1.04 0.0020772571196 0.20416531617 +1.05 0.0080912125581 0.2245148205 +1.06 0.012601877251 0.23867416975 +1.07 0.015756248457 0.24722388705 +1.08 0.017710687794 0.25077081678 +1.09 0.01862572475 0.24993011099 +1.1 0.018662681782 0.24531711912 +1.11 0.017980644302 0.23753738864 +1.12 0.016735251642 0.22718372487 +1.13 0.015071425965 0.21481153372 +1.14 0.013123583416 0.20094132022 +1.15 0.011014308211 0.18605454018 +1.16 0.0088554209722 0.17059765277 +1.17 0.0067375505894 0.15494682671 +1.18 0.0047378191094 0.13943452685 +1.19 0.0029193524458 0.1243477471 +1.2 0.001328262614 0.10991769715 +1.21 -7.6992942738e-06 0.096314572727 +1.22 -0.0010722476971 0.083670780504 +1.23 -0.0018609144401 0.07207986554 +1.24 -0.0023860261071 0.06157688782 +1.25 -0.0026664363232 0.052173793152 +1.26 -0.0027284955274 0.043856886321 +1.27 -0.0026075696808 0.036575485324 +1.28 -0.0023404917442 0.030270092219 +1.29 -0.0019675321965 0.024867237243 +1.3 -0.0015277073539 0.020284143588 +1.31 -0.0010579085108 0.016439036083 +1.32 -0.00059366211672 0.013247738376 +1.33 -0.00016101283768 0.010637243204 +1.34 0.00021686756196 0.0085367398153 +1.35 0.00052471216314 0.0068851056495 +1.36 0.00075756576343 0.0056302865355 +1.37 0.00091293900599 0.0047214403435 +1.38 0.0010007352126 0.0041128940905 +1.39 0.0010335306184 0.0037478485963 +1.4 0.0010886959567 0.0037639012113 +1.41 0.00091991355586 0.0031664945096 +1.42 0.00029615236778 0.0010139679602 +1.43 -6.1278603913e-05 -0.00021121956431 +1.44 -2.2865795105e-05 -7.88154914e-05 +1.45 0.0 0.0 +1.46 0.0 0.0 +1.47 0.0 0.0 +1.48 0.0 0.0 +1.49 0.0 0.0 +1.5 0.0 0.0 +1.51 0.0 0.0 +1.52 0.0 0.0 +1.53 0.0 0.0 +1.54 0.0 0.0 +1.55 0.0 0.0 +1.56 0.0 0.0 +1.57 0.0 0.0 +1.58 0.0 0.0 +1.59 0.0 0.0 +1.6 0.0 0.0 +1.61 0.0 0.0 +1.62 0.0 0.0 +1.63 0.0 0.0 +1.64 0.0 0.0 +1.65 0.0 0.0 +1.66 0.0 0.0 +1.67 0.0 0.0 +1.68 0.0 0.0 +1.69 0.0 0.0 +1.7 0.0 0.0 +1.71 0.0 0.0 +1.72 0.0 0.0 +1.73 0.0 0.0 +1.74 0.0 0.0 +1.75 0.0 0.0 +1.76 0.0 0.0 +1.77 0.0 0.0 +1.78 0.0 0.0 +1.79 0.0 0.0 +1.8 0.0 0.0 +1.81 0.0 0.0 +1.82 0.0 0.0 +1.83 0.0 0.0 +1.84 0.0 0.0 +1.85 0.0 0.0 +1.86 0.0 0.0 +1.87 0.0 0.0 +1.88 0.0 0.0 +1.89 0.0 0.0 +1.9 0.0 0.0 +1.91 0.0 0.0 +1.92 0.0 0.0 +1.93 0.0 0.0 +1.94 0.0 0.0 +1.95 0.0 0.0 +1.96 0.0 0.0 +1.97 0.0 0.0 +1.98 0.0 0.0 +1.99 0.0 0.0 +2.0 0.0 0.0 +2.01 0.0 0.0 +2.02 0.0 0.0 +2.03 0.0 0.0 +2.04 0.0 0.0 +2.05 0.0 0.0 +2.06 0.0 0.0 +2.07 0.0 0.0 +2.08 0.0 0.0 +2.09 0.0 0.0 +2.1 0.0 0.0 +2.11 0.0 0.0 +2.12 0.0 0.0 +2.13 0.0 0.0 +2.14 0.0 0.0 +2.15 0.0 0.0 +2.16 0.0 0.0 +2.17 0.0 0.0 +2.18 0.0 0.0 +2.19 0.0 0.0 +2.2 0.0 0.0 +2.21 0.0 0.0 +2.22 0.0 0.0 +2.23 0.0 0.0 +2.24 0.0 0.0 +2.25 0.0 0.0 +2.26 0.0 0.0 +2.27 0.0 0.0 +2.28 0.0 0.0 +2.29 0.0 0.0 +2.3 0.0 0.0 +2.31 0.0 0.0 +2.32 0.0 0.0 +2.33 0.0 0.0 +2.34 0.0 0.0 +2.35 0.0 0.0 +2.36 0.0 0.0 +2.37 0.0 0.0 +2.38 0.0 0.0 +2.39 0.0 0.0 +2.4 0.0 0.0 +2.41 0.0 0.0 +2.42 0.0 0.0 +2.43 0.0 0.0 +2.44 0.0 0.0 +2.45 0.0 0.0 +2.46 0.0 0.0 +2.47 0.0 0.0 +2.48 0.0 0.0 +2.49 0.0 0.0 +2.5 0.0 0.0 +2.51 0.0 0.0 +2.52 0.0 0.0 +2.53 0.0 0.0 +2.54 0.0 0.0 +2.55 0.0 0.0 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# local +0.0 -7.3824473704E+00 +0.01 -7.3812697659E+00 +0.02 -7.3777245837E+00 +0.03 -7.3718243749E+00 +0.04 -7.3635816905E+00 +0.05 -7.3530139295E+00 +0.06 -7.3401431711E+00 +0.07 -7.3249959683E+00 +0.08 -7.3076031080E+00 +0.09 -7.2879993436E+00 +0.1 -7.2662231076E+00 +0.11 -7.2423162111E+00 +0.12 -7.2163235363E+00 +0.13 -7.1882927290E+00 +0.14 -7.1582738976E+00 +0.15 -7.1263193213E+00 +0.16 -7.0924831749E+00 +0.17 -7.0568212706E+00 +0.18 -7.0193908209E+00 +0.19 -6.9802502231E+00 +0.2 -6.9394588668E+00 +0.21 -6.8970769636E+00 +0.22 -6.8531653983E+00 +0.23 -6.8077856012E+00 +0.24 -6.7609994384E+00 +0.25 -6.7128691203E+00 +0.26 -6.6634571231E+00 +0.27 -6.6128261248E+00 +0.28 -6.5610389507E+00 +0.29 -6.5081585274E+00 +0.3 -6.4542478436E+00 +0.31 -6.3993699152E+00 +0.32 -6.3435877532E+00 +0.33 -6.2869643332E+00 +0.34 -6.2295625648E+00 +0.35 -6.1714452603E+00 +0.36 -6.1126751004E+00 +0.37 -6.0533145978E+00 +0.38 -5.9934260574E+00 +0.39 -5.9330715308E+00 +0.4 -5.8723127672E+00 +0.41 -5.8112111589E+00 +0.42 -5.7498276813E+00 +0.43 -5.6882228258E+00 +0.44 -5.6264565276E+00 +0.45 -5.5645880861E+00 +0.46 -5.5026760781E+00 +0.47 -5.4407782641E+00 +0.48 -5.3789514911E+00 +0.49 -5.3172515715E+00 +0.5 -5.2557331733E+00 +0.51 -5.1944496983E+00 +0.52 -5.1334531413E+00 +0.53 -5.0727939327E+00 +0.54 -5.0125208304E+00 +0.55 -4.9526806984E+00 +0.56 -4.8933184053E+00 +0.57 -4.8344766015E+00 +0.58 -4.7761955947E+00 +0.59 -4.7185131158E+00 +0.6 -4.6614642490E+00 +0.61 -4.6050811188E+00 +0.62 -4.5493929153E+00 +0.63 -4.4944256168E+00 +0.64 -4.4402019202E+00 +0.65 -4.3867412382E+00 +0.66 -4.3340595177E+00 +0.67 -4.2821692757E+00 +0.68 -4.2310796697E+00 +0.69 -4.1807965917E+00 +0.7 -4.1313226501E+00 +0.71 -4.0826574704E+00 +0.72 -4.0347978888E+00 +0.73 -3.9877382038E+00 +0.74 -3.9414704800E+00 +0.75 -3.8959848485E+00 +0.76 -3.8512698499E+00 +0.77 -3.8073128101E+00 +0.78 -3.7641001660E+00 +0.79 -3.7216177873E+00 +0.8 -3.6798512666E+00 +0.81 -3.6387861674E+00 +0.82 -3.5984082176E+00 +0.83 -3.5587034427E+00 +0.84 -3.5196582305E+00 +0.85 -3.4812593249E+00 +0.86 -3.4434937476E+00 +0.87 -3.4063486484E+00 +0.88 -3.3698110888E+00 +0.89 -3.3338677493E+00 +0.9 -3.2985045609E+00 +0.91 -3.2637075627E+00 +0.92 -3.2294647053E+00 +0.93 -3.1957666631E+00 +0.94 -3.1626064086E+00 +0.95 -3.1299788012E+00 +0.96 -3.0978801763E+00 +0.97 -3.0663080784E+00 +0.98 -3.0352607939E+00 +0.99 -3.0047370809E+00 +1.0 -2.9747358888E+00 +1.01 -2.9452561151E+00 +1.02 -2.9162964073E+00 +1.03 -2.8878550086E+00 +1.04 -2.8599295343E+00 +1.05 -2.8325169529E+00 +1.06 -2.8056135042E+00 +1.07 -2.7792146759E+00 +1.08 -2.7533152247E+00 +1.09 -2.7279090107E+00 +1.1 -2.7029892279E+00 +1.11 -2.6785483969E+00 +1.12 -2.6545784706E+00 +1.13 -2.6310707370E+00 +1.14 -2.6080160518E+00 +1.15 -2.5854049000E+00 +1.16 -2.5632274752E+00 +1.17 -2.5414736594E+00 +1.18 -2.5201332293E+00 +1.19 -2.4991958733E+00 +1.2 -2.4786512316E+00 +1.21 -2.4584890008E+00 +1.22 -2.4386989558E+00 +1.23 -2.4192709473E+00 +1.24 -2.4001950501E+00 +1.25 -2.3814614823E+00 +1.26 -2.3630605841E+00 +1.27 -2.3449830012E+00 +1.28 -2.3272195138E+00 +1.29 -2.3097610277E+00 +1.3 -2.2925987334E+00 +1.31 -2.2757238961E+00 +1.32 -2.2591279027E+00 +1.33 -2.2428023049E+00 +1.34 -2.2267386815E+00 +1.35 -2.2109286973E+00 +1.36 -2.1953640511E+00 +1.37 -2.1800364538E+00 +1.38 -2.1649376273E+00 +1.39 -2.1500592840E+00 +1.4 -2.1353931293E+00 +1.41 -2.1209308669E+00 +1.42 -2.1066642024E+00 +1.43 -2.0925848762E+00 +1.44 -2.0786847192E+00 +1.45 -2.0649555800E+00 +1.46 -2.0513896667E+00 +1.47 -2.0379790929E+00 +1.48 -2.0247167043E+00 +1.49 -2.0115954015E+00 +1.5 -1.9986087910E+00 +1.51 -1.9857511909E+00 +1.52 -1.9730172462E+00 +1.53 -1.9604033418E+00 +1.54 -1.9479054899E+00 +1.55 -1.9355225868E+00 +1.56 -1.9232526393E+00 +1.57 -1.9110969000E+00 +1.58 -1.8990559460E+00 +1.59 -1.8871329022E+00 +1.6 -1.8753306466E+00 +1.61 -1.8636535889E+00 +1.62 -1.8521057833E+00 +1.63 -1.8406918545E+00 +1.64 -1.8294152997E+00 +1.65 -1.8182794584E+00 +1.66 -1.8072855050E+00 +1.67 -1.7964339215E+00 +1.68 -1.7857226516E+00 +1.69 -1.7751484466E+00 +1.7 -1.7647067259E+00 +1.71 -1.7543913992E+00 +1.72 -1.7441975973E+00 +1.73 -1.7341198736E+00 +1.74 -1.7241551769E+00 +1.75 -1.7143025836E+00 +1.76 -1.7045606872E+00 +1.77 -1.6949290865E+00 +1.78 -1.6854065806E+00 +1.79 -1.6759901769E+00 +1.8 -1.6666775032E+00 +1.81 -1.6574675168E+00 +1.82 -1.6483588188E+00 +1.83 -1.6393496966E+00 +1.84 -1.6304384659E+00 +1.85 -1.6216236523E+00 +1.86 -1.6129037164E+00 +1.87 -1.6042771878E+00 +1.88 -1.5957425861E+00 +1.89 -1.5872984657E+00 +1.9 -1.5789434174E+00 +1.91 -1.5706760161E+00 +1.92 -1.5624949244E+00 +1.93 -1.5543987361E+00 +1.94 -1.5463861762E+00 +1.95 -1.5384559029E+00 +1.96 -1.5306066597E+00 +1.97 -1.5228371950E+00 +1.98 -1.5151462616E+00 +1.99 -1.5075327030E+00 +2.0 -1.4999952835E+00 +2.01 -1.4925329139E+00 +2.02 -1.4851444346E+00 +2.03 -1.4778287552E+00 +2.04 -1.4705848143E+00 +2.05 -1.4634115164E+00 +2.06 -1.4563078779E+00 +2.07 -1.4492728445E+00 +2.08 -1.4423054491E+00 +2.09 -1.4354047221E+00 +2.1 -1.4285696813E+00 +2.11 -1.4217994342E+00 +2.12 -1.4150930238E+00 +2.13 -1.4084495764E+00 +2.14 -1.4018682130E+00 +2.15 -1.3953480370E+00 +2.16 -1.3888882403E+00 +2.17 -1.3824879562E+00 +2.18 -1.3761463800E+00 +2.19 -1.3698627240E+00 +2.2 -1.3636361553E+00 +2.21 -1.3574659450E+00 +2.22 -1.3513513123E+00 +2.23 -1.3452915018E+00 +2.24 -1.3392858081E+00 +2.25 -1.3333334744E+00 +2.26 -1.3274338163E+00 +2.27 -1.3215861463E+00 +2.28 -1.3157897421E+00 +2.29 -1.3100439737E+00 +2.3 -1.3043481656E+00 +2.31 -1.2987016554E+00 +2.32 -1.2931038351E+00 +2.33 -1.2875540522E+00 +2.34 -1.2820516981E+00 +2.35 -1.2765961859E+00 +2.36 -1.2711868863E+00 +2.37 -1.2658232390E+00 +2.38 -1.2605046772E+00 +2.39 -1.2552305958E+00 +2.4 -1.2500004767E+00 +2.41 -1.2448137690E+00 +2.42 -1.2396699045E+00 +2.43 -1.2345683891E+00 +2.44 -1.2295086930E+00 +2.45 -1.2244902830E+00 +2.46 -1.2195126858E+00 +2.47 -1.2145753938E+00 +2.48 -1.2096779044E+00 +2.49 -1.2048197648E+00 +2.5 -1.2000004895E+00 +2.51 -1.1952196023E+00 +2.52 -1.1904766709E+00 +2.53 -1.1857712322E+00 +2.54 -1.1811028319E+00 +2.55 -1.1764710586E+00 +2.56 -1.1718754712E+00 +2.57 -1.1673156341E+00 +2.58 -1.1627911562E+00 +2.59 -1.1583016187E+00 +2.6 -1.1538466012E+00 +2.61 -1.1494257329E+00 +2.62 -1.1450386175E+00 +2.63 -1.1406848461E+00 +2.64 -1.1363640685E+00 +2.65 -1.1320759107E+00 +2.66 -1.1278199728E+00 +2.67 -1.1235959244E+00 +2.68 -1.1194034091E+00 +2.69 -1.1152420487E+00 +2.7 -1.1111115170E+00 +2.71 -1.1070114792E+00 +2.72 -1.1029415795E+00 +2.73 -1.0989014927E+00 +2.74 -1.0948909057E+00 +2.75 -1.0909094852E+00 +2.76 -1.0869569047E+00 +2.77 -1.0830328722E+00 +2.78 -1.0791370775E+00 +2.79 -1.0752691907E+00 +2.8 -1.0714289403E+00 +2.81 -1.0676160318E+00 +2.82 -1.0638301530E+00 +2.83 -1.0600710295E+00 +2.84 -1.0563383877E+00 +2.85 -1.0526319386E+00 +2.86 -1.0489513980E+00 +2.87 -1.0452965144E+00 +2.88 -1.0416670199E+00 +2.89 -1.0380626259E+00 +2.9 -1.0344830951E+00 +2.91 -1.0309281744E+00 +2.92 -1.0273975975E+00 +2.93 -1.0238911131E+00 +2.94 -1.0204084904E+00 +2.95 -1.0169494860E+00 +2.96 -1.0135138357E+00 +2.97 -1.0101013272E+00 +2.98 -1.0067117293E+00 +2.99 -1.0033448003E+00 +3.0 -1.0000003097E+00 +3.01 -9.9667804911E-01 +3.02 -9.9337779726E-01 +3.03 -9.9009931515E-01 +3.04 -9.8684240459E-01 +3.05 -9.8360685818E-01 +3.06 -9.8039246029E-01 +3.07 -9.7719899065E-01 +3.08 -9.7402626440E-01 +3.09 -9.7087408053E-01 +3.1 -9.6774222652E-01 +3.11 -9.6463050840E-01 +3.12 -9.6153874392E-01 +3.13 -9.5846674140E-01 +3.14 -9.5541429264E-01 +3.15 -9.5238122645E-01 +3.16 -9.4936736350E-01 +3.17 -9.4637251945E-01 +3.18 -9.4339649617E-01 +3.19 -9.4043913696E-01 +3.2 -9.3750026760E-01 +3.21 -9.3457970844E-01 +3.22 -9.3167727899E-01 +3.23 -9.2879282694E-01 +3.24 -9.2592618620E-01 +3.25 -9.2307718251E-01 +3.26 -9.2024565026E-01 +3.27 -9.1743144193E-01 +3.28 -9.1463439914E-01 +3.29 -9.1185435366E-01 +3.3 -9.0909115210E-01 +3.31 -9.0634465218E-01 +3.32 -9.0361470291E-01 +3.33 -9.0090114265E-01 +3.34 -8.9820382763E-01 +3.35 -8.9552262138E-01 +3.36 -8.9285737995E-01 +3.37 -8.9020794879E-01 +3.38 -8.8757419136E-01 +3.39 -8.8495597752E-01 +3.4 -8.8235317003E-01 +3.41 -8.7976562191E-01 +3.42 -8.7719320159E-01 +3.43 -8.7463578579E-01 +3.44 -8.7209324365E-01 +3.45 -8.6956543622E-01 +3.46 -8.6705223479E-01 +3.47 -8.6455352341E-01 +3.48 -8.6206917731E-01 +3.49 -8.5959906600E-01 +3.5 -8.5714306169E-01 +3.51 -8.5470105614E-01 +3.52 -8.5227293040E-01 +3.53 -8.4985856290E-01 +3.54 -8.4745782498E-01 +3.55 -8.4507061639E-01 +3.56 -8.4269682374E-01 +3.57 -8.4033633363E-01 +3.58 -8.3798901852E-01 +3.59 -8.3565478279E-01 +3.6 -8.3333351949E-01 +3.61 -8.3102512052E-01 +3.62 -8.2872946795E-01 +3.63 -8.2644646108E-01 +3.64 -8.2417600238E-01 +3.65 -8.2191798877E-01 +3.66 -8.1967231254E-01 +3.67 -8.1743886634E-01 +3.68 -8.1521756218E-01 +3.69 -8.1300830181E-01 +3.7 -8.1081098693E-01 +3.71 -8.0862550619E-01 +3.72 -8.0645177730E-01 +3.73 -8.0428970779E-01 +3.74 -8.0213920396E-01 +3.75 -8.0000016527E-01 +3.76 -7.9787249939E-01 +3.77 -7.9575612448E-01 +3.78 -7.9365095120E-01 +3.79 -7.9155689023E-01 +3.8 -7.8947383894E-01 +3.81 -7.8740172506E-01 +3.82 -7.8534046368E-01 +3.83 -7.8328996962E-01 +3.84 -7.8125015177E-01 +3.85 -7.7922092485E-01 +3.86 -7.7720221538E-01 +3.87 -7.7519394216E-01 +3.88 -7.7319602398E-01 +3.89 -7.7120836876E-01 +3.9 -7.6923090699E-01 +3.91 -7.6726356352E-01 +3.92 -7.6530626095E-01 +3.93 -7.6335891957E-01 +3.94 -7.6142145427E-01 +3.95 -7.5949380177E-01 +3.96 -7.5757588828E-01 +3.97 -7.5566763999E-01 +3.98 -7.5376897721E-01 +3.99 -7.5187982665E-01 +4.0 -7.5000012482E-01 +4.01 -7.4812980134E-01 +4.02 -7.4626878584E-01 +4.03 -7.4441699946E-01 +4.04 -7.4257437862E-01 +4.05 -7.4074086012E-01 +4.06 -7.3891637689E-01 +4.07 -7.3710086183E-01 +4.08 -7.3529423757E-01 +4.09 -7.3349644823E-01 +4.1 -7.3170743153E-01 +4.11 -7.2992712353E-01 +4.12 -7.2815546025E-01 +4.13 -7.2639236646E-01 +4.14 -7.2463779202E-01 +4.15 -7.2289167623E-01 +4.16 -7.2115395812E-01 +4.17 -7.1942457633E-01 +4.18 -7.1770345938E-01 +4.19 -7.1599056003E-01 +4.2 -7.1428582013E-01 +4.21 -7.1258918153E-01 +4.22 -7.1090058579E-01 +4.23 -7.0921996440E-01 +4.24 -7.0754727262E-01 +4.25 -7.0588245501E-01 +4.26 -7.0422545614E-01 +4.27 -7.0257622058E-01 +4.28 -7.0093468249E-01 +4.29 -6.9930079886E-01 +4.3 -6.9767451726E-01 +4.31 -6.9605578483E-01 +4.32 -6.9444454872E-01 +4.33 -6.9284074715E-01 +4.34 -6.9124433646E-01 +4.35 -6.8965526802E-01 +4.36 -6.8807349144E-01 +4.37 -6.8649895633E-01 +4.38 -6.8493160544E-01 +4.39 -6.8337139305E-01 +4.4 -6.8181827480E-01 +4.41 -6.8027220264E-01 +4.42 -6.7873312854E-01 +4.43 -6.7720100026E-01 +4.44 -6.7567576872E-01 +4.45 -6.7415739420E-01 +4.46 -6.7264583091E-01 +4.47 -6.7114103304E-01 +4.48 -6.6964295385E-01 +4.49 -6.6815153971E-01 +4.5 -6.6666675588E-01 +4.51 -6.6518855869E-01 +4.52 -6.6371690447E-01 +4.53 -6.6225174956E-01 +4.54 -6.6079304326E-01 +4.55 -6.5934074757E-01 +4.56 -6.5789482363E-01 +4.57 -6.5645522981E-01 +4.58 -6.5502192447E-01 +4.59 -6.5359486328E-01 +4.6 -6.5217400096E-01 +4.61 -6.5075930467E-01 +4.62 -6.4935073471E-01 +4.63 -6.4794825138E-01 +4.64 -6.4655181499E-01 +4.65 -6.4516137866E-01 +4.66 -6.4377690915E-01 +4.67 -6.4239837075E-01 +4.68 -6.4102572560E-01 +4.69 -6.3965893587E-01 +4.7 -6.3829796182E-01 +4.71 -6.3694276063E-01 +4.72 -6.3559330341E-01 +4.73 -6.3424955407E-01 +4.74 -6.3291147654E-01 +4.75 -6.3157903473E-01 +4.76 -6.3025218749E-01 +4.77 -6.2893090085E-01 +4.78 -6.2761514429E-01 +4.79 -6.2630488341E-01 +4.8 -6.2500008380E-01 +4.81 -6.2370071106E-01 +4.82 -6.2240672338E-01 +4.83 -6.2111809390E-01 +4.84 -6.1983479105E-01 +4.85 -6.1855678202E-01 +4.86 -6.1728403402E-01 +4.87 -6.1601651350E-01 +4.88 -6.1475418088E-01 +4.89 -6.1349701253E-01 +4.9 -6.1224497718E-01 +4.91 -6.1099804356E-01 +4.92 -6.0975618039E-01 +4.93 -6.0851935437E-01 +4.94 -6.0728753037E-01 +4.95 -6.0606068480E-01 +4.96 -6.0483878785E-01 +4.97 -6.0362180971E-01 +4.98 -6.0240972057E-01 +4.99 -6.0120248790E-01 +5.0 -6.0000007964E-01 +5.01 -5.9880247275E-01 +5.02 -5.9760963882E-01 +5.03 -5.9642154943E-01 +5.04 -5.9523817614E-01 +5.05 -5.9405948777E-01 +5.06 -5.9288545395E-01 +5.07 -5.9171605272E-01 +5.08 -5.9055125698E-01 +5.09 -5.8939103965E-01 +5.1 -5.8823537362E-01 +5.11 -5.8708422951E-01 +5.12 -5.8593757739E-01 +5.13 -5.8479539688E-01 +5.14 -5.8365766214E-01 +5.15 -5.8252434734E-01 +5.16 -5.8139542663E-01 +5.17 -5.8027087294E-01 +5.18 -5.7915065560E-01 +5.19 -5.7803475621E-01 +5.2 -5.7692315016E-01 +5.21 -5.7581581280E-01 +5.22 -5.7471271951E-01 +5.23 -5.7361384566E-01 +5.24 -5.7251915960E-01 +5.25 -5.7142864450E-01 +5.26 -5.7034227717E-01 +5.27 -5.6926003411E-01 +5.28 -5.6818189184E-01 +5.29 -5.6710782688E-01 +5.3 -5.6603781082E-01 +5.31 -5.6497182354E-01 +5.32 -5.6390984492E-01 +5.33 -5.6285185259E-01 +5.34 -5.6179782416E-01 +5.35 -5.6074773722E-01 +5.36 -5.5970156703E-01 +5.37 -5.5865928922E-01 +5.38 -5.5762088710E-01 +5.39 -5.5658633932E-01 +5.4 -5.5555562455E-01 +5.41 -5.5452872144E-01 +5.42 -5.5350560863E-01 +5.43 -5.5248625863E-01 +5.44 -5.5147065646E-01 +5.45 -5.5045878243E-01 +5.46 -5.4945061618E-01 +5.47 -5.4844613737E-01 +5.48 -5.4744532564E-01 +5.49 -5.4644815794E-01 +5.5 -5.4545461295E-01 +5.51 -5.4446467529E-01 +5.52 -5.4347832555E-01 +5.53 -5.4249554433E-01 +5.54 -5.4151631224E-01 +5.55 -5.4054060985E-01 +5.56 -5.3956841245E-01 +5.57 -5.3859970546E-01 +5.58 -5.3763447159E-01 +5.59 -5.3667269235E-01 +5.6 -5.3571434922E-01 +5.61 -5.3475942372E-01 +5.62 -5.3380789633E-01 +5.63 -5.3285974433E-01 +5.64 -5.3191495541E-01 +5.65 -5.3097351196E-01 +5.66 -5.3003539632E-01 +5.67 -5.2910059086E-01 +5.68 -5.2816907795E-01 +5.69 -5.2724083735E-01 +5.7 -5.2631585067E-01 +5.71 -5.2539410476E-01 +5.72 -5.2447558280E-01 +5.73 -5.2356026797E-01 +5.74 -5.2264814347E-01 +5.75 -5.2173919248E-01 +5.76 -5.2083339456E-01 +5.77 -5.1993073445E-01 +5.78 -5.1903119862E-01 +5.79 -5.1813477104E-01 +5.8 -5.1724143568E-01 +5.81 -5.1635117650E-01 +5.82 -5.1546397748E-01 +5.83 -5.1457981849E-01 +5.84 -5.1369868619E-01 +5.85 -5.1282056719E-01 +5.86 -5.1194544619E-01 +5.87 -5.1107330792E-01 +5.88 -5.1020413708E-01 +5.89 -5.0933791839E-01 +5.9 -5.0847463252E-01 +5.91 -5.0761426693E-01 +5.92 -5.0675680882E-01 +5.93 -5.0590224363E-01 +5.94 -5.0505055676E-01 +5.95 -5.0420173367E-01 +5.96 -5.0335575976E-01 +5.97 -5.0251261692E-01 +5.98 -5.0167229238E-01 +5.99 -5.0083477440E-01 +6.0 -5.0000004908E-01 +6.01 -4.9916810253E-01 diff --git a/potentials/pseudo.Li_pbe b/potentials/pseudo.Li_pbe deleted file mode 100644 index 0f6b1473..00000000 --- a/potentials/pseudo.Li_pbe +++ /dev/null @@ -1,1248 +0,0 @@ -# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at -# the Lawrence Livermore National Laboratory. -# LLNL-CODE-743438 -# All rights reserved. -# This file is part of MGmol. For details, see https://github.com/llnl/mgmol. -# Please also read this link https://github.com/llnl/mgmol/LICENSE -# -# obtained from Tadashi, 6/26/03 -# 3.00 1 2 6 1.80 : z nc nv iexc rnlc -# 1 0 2.00 : n l f -# 2 0 1.00 -# 2 1 0.00 -#2 t : lmax s_pp_def -#Generated on 20020124, at 165109.318, time zone -0800 -# Short description of the species type. One line only! -Li PBE (TM) -#color -Yellow -#radii of balls and covalent bonds -1.0 2.7 -# Nlcc flag -0 -# Atomic number. -3 -# Atomic mass. -6.941 -# Number of valence electrons. -1 -# Gaussian core charge parameter rc. -1. -# Number of potentials -3 -# l-value for state which is local -2 0 -# Local potential radius -3.2 -# Non-local potential radius -3.2 -# number of points in radial grid -401 -# log mesh parameter -0. -# radial grid, potential, and reference state for l=0 -0.00 0.565792564331E+00 0.135186864319E+00 -0.01 0.565791454518E+00 0.135189278893E+00 -0.02 0.565791845509E+00 0.135197551572E+00 -0.03 0.565791763062E+00 0.135211380681E+00 -0.04 0.565790306539E+00 0.135230751862E+00 -0.05 0.565786677278E+00 0.135255663484E+00 -0.06 0.565780127388E+00 0.135286116357E+00 -0.07 0.565769959504E+00 0.135322112269E+00 -0.08 0.565755529963E+00 0.135363653613E+00 -0.09 0.565736250559E+00 0.135410743262E+00 -0.10 0.565711588291E+00 0.135463384514E+00 -0.11 0.565681063234E+00 0.135521581061E+00 -0.12 0.565644244804E+00 0.135585336970E+00 -0.13 0.565600746712E+00 0.135654656666E+00 -0.14 0.565550220900E+00 0.135729544918E+00 -0.15 0.565492350674E+00 0.135810006826E+00 -0.16 0.565426843285E+00 0.135896047804E+00 -0.17 0.565353422123E+00 0.135987673570E+00 -0.18 0.565271818696E+00 0.136084890124E+00 -0.19 0.565181764520E+00 0.136187703737E+00 -0.20 0.565082983059E+00 0.136296120931E+00 -0.21 0.564975181781E+00 0.136410148461E+00 -0.22 0.564858044434E+00 0.136529793298E+00 -0.23 0.564731223610E+00 0.136655062607E+00 -0.24 0.564594333622E+00 0.136785963726E+00 -0.25 0.564446943740E+00 0.136922504144E+00 -0.26 0.564288571936E+00 0.137064691479E+00 -0.27 0.564118678928E+00 0.137212533455E+00 -0.28 0.563936662662E+00 0.137366037873E+00 -0.29 0.563741853541E+00 0.137525212589E+00 -0.30 0.563533509606E+00 0.137690065485E+00 -0.31 0.563310812655E+00 0.137860604445E+00 -0.32 0.563072864807E+00 0.138036837317E+00 -0.33 0.562818684862E+00 0.138218771894E+00 -0.34 0.562547206034E+00 0.138406415876E+00 -0.35 0.562257273690E+00 0.138599776840E+00 -0.36 0.561947643382E+00 0.138798862207E+00 -0.37 0.561616979300E+00 0.139003679209E+00 -0.38 0.561263853377E+00 0.139214234852E+00 -0.39 0.560886744621E+00 0.139430535882E+00 -0.40 0.560484038770E+00 0.139652588747E+00 -0.41 0.560054028277E+00 0.139880399559E+00 -0.42 0.559594912621E+00 0.140113974057E+00 -0.43 0.559104798899E+00 0.140353317563E+00 -0.44 0.558581702687E+00 0.140598434946E+00 -0.45 0.558023549151E+00 0.140849330574E+00 -0.46 0.557428174385E+00 0.141106008280E+00 -0.47 0.556793326974E+00 0.141368471307E+00 -0.48 0.556116669765E+00 0.141636722275E+00 -0.49 0.555395781616E+00 0.141910763126E+00 -0.50 0.554628159196E+00 0.142190595081E+00 -0.51 0.553811219296E+00 0.142476218590E+00 -0.52 0.552942301215E+00 0.142767633284E+00 -0.53 0.552018669332E+00 0.143064837933E+00 -0.54 0.551037515446E+00 0.143367830394E+00 -0.55 0.549995959902E+00 0.143676607543E+00 -0.56 0.548891055043E+00 0.143991165230E+00 -0.57 0.547719788628E+00 0.144311498246E+00 -0.58 0.546479086011E+00 0.144637600267E+00 -0.59 0.545165810161E+00 0.144969463751E+00 -0.60 0.543776766801E+00 0.145307079924E+00 -0.61 0.542308708013E+00 0.145650438766E+00 -0.62 0.540758330765E+00 0.145999528858E+00 -0.63 0.539122281362E+00 0.146354337355E+00 -0.64 0.537397160708E+00 0.146714850032E+00 -0.65 0.535579521189E+00 0.147081051057E+00 -0.66 0.533665872432E+00 0.147452923007E+00 -0.67 0.531652685710E+00 0.147830446936E+00 -0.68 0.529536389730E+00 0.148213602015E+00 -0.69 0.527313380424E+00 0.148602365785E+00 -0.70 0.524980019018E+00 0.148996713944E+00 -0.71 0.522532634142E+00 0.149396620169E+00 -0.72 0.519967529235E+00 0.149802056452E+00 -0.73 0.517280977558E+00 0.150212992433E+00 -0.74 0.514469233415E+00 0.150629396008E+00 -0.75 0.511528527844E+00 0.151051232676E+00 -0.76 0.508455077338E+00 0.151478465879E+00 -0.77 0.505245083810E+00 0.151911056790E+00 -0.78 0.501894740707E+00 0.152348964123E+00 -0.79 0.498400235520E+00 0.152792144466E+00 -0.80 0.494757756833E+00 0.153240551576E+00 -0.81 0.490963495756E+00 0.153694137184E+00 -0.82 0.487013658197E+00 0.154152849868E+00 -0.83 0.482904461604E+00 0.154616636225E+00 -0.84 0.478632156517E+00 0.155085439443E+00 -0.85 0.474193015478E+00 0.155559200707E+00 -0.86 0.469583367112E+00 0.156037857623E+00 -0.87 0.464799575834E+00 0.156521345662E+00 -0.88 0.459838088204E+00 0.157009596699E+00 -0.89 0.454695408356E+00 0.157502540157E+00 -0.90 0.449368146101E+00 0.158000102088E+00 -0.91 0.443853007998E+00 0.158502205487E+00 -0.92 0.438146816772E+00 0.159008770539E+00 -0.93 0.432246560465E+00 0.159519713461E+00 -0.94 0.426149332525E+00 0.160034948440E+00 -0.95 0.419852488361E+00 0.160554384787E+00 -0.96 0.413353478708E+00 0.161077930227E+00 -0.97 0.406650093670E+00 0.161605487462E+00 -0.98 0.399740278545E+00 0.162136956937E+00 -0.99 0.392622296566E+00 0.162672235269E+00 -1.00 0.385294753890E+00 0.163211214957E+00 -1.01 0.377756446910E+00 0.163753786565E+00 -1.02 0.370006748510E+00 0.164299834901E+00 -1.03 0.362045188428E+00 0.164849243263E+00 -1.04 0.353871866418E+00 0.165401890109E+00 -1.05 0.345487329269E+00 0.165957650099E+00 -1.06 0.336892389378E+00 0.166516395913E+00 -1.07 0.328088698274E+00 0.167077994011E+00 -1.08 0.319078106137E+00 0.167642309399E+00 -1.09 0.309863131855E+00 0.168209202623E+00 -1.10 0.300447014480E+00 0.168778529429E+00 -1.11 0.290833172522E+00 0.169350144431E+00 -1.12 0.281026035523E+00 0.169923895960E+00 -1.13 0.271030466904E+00 0.170499629654E+00 -1.14 0.260851691962E+00 0.171077188959E+00 -1.15 0.250496025034E+00 0.171656410739E+00 -1.16 0.239969993102E+00 0.172237130569E+00 -1.17 0.229280689189E+00 0.172819180503E+00 -1.18 0.218436152166E+00 0.173402386705E+00 -1.19 0.207444532175E+00 0.173986574636E+00 -1.20 0.196314615787E+00 0.174571565342E+00 -1.21 0.185055919212E+00 0.175157174817E+00 -1.22 0.173678024343E+00 0.175743218552E+00 -1.23 0.162191051234E+00 0.176329507323E+00 -1.24 0.150605631539E+00 0.176915847422E+00 -1.25 0.138932431680E+00 0.177502044754E+00 -1.26 0.127182423849E+00 0.178087900753E+00 -1.27 0.115366871347E+00 0.178673212598E+00 -1.28 0.103497049682E+00 0.179257777174E+00 -1.29 0.915842538990E-01 0.179841387652E+00 -1.30 0.796398016438E-01 0.180423832900E+00 -1.31 0.676750093549E-01 0.181004901516E+00 -1.32 0.557009920576E-01 0.181584379560E+00 -1.33 0.437285173400E-01 0.182162048711E+00 -1.34 0.317683183110E-01 0.182737690216E+00 -1.35 0.198308852390E-01 0.183311084098E+00 -1.36 0.792590861738E-02 0.183882006356E+00 -1.37 -0.393708546836E-02 0.184450232159E+00 -1.38 -0.157486995633E-01 0.185015536359E+00 -1.39 -0.275006304569E-01 0.185577691084E+00 -1.40 -0.391851278030E-01 0.186136467090E+00 -1.41 -0.507944647328E-01 0.186691635103E+00 -1.42 -0.623218306353E-01 0.187242964906E+00 -1.43 -0.737617135724E-01 0.187790224949E+00 -1.44 -0.851087002317E-01 0.188333183579E+00 -1.45 -0.963577282132E-01 0.188871609148E+00 -1.46 -0.107505672167E+00 0.189405270031E+00 -1.47 -0.118550071471E+00 0.189933934611E+00 -1.48 -0.129488490830E+00 0.190457371286E+00 -1.49 -0.140319972566E+00 0.190975349528E+00 -1.50 -0.151045845842E+00 0.191487640461E+00 -1.51 -0.161667636136E+00 0.191994015352E+00 -1.52 -0.172187159032E+00 0.192494245800E+00 -1.53 -0.182609370427E+00 0.192988106996E+00 -1.54 -0.192941147155E+00 0.193475376329E+00 -1.55 -0.203189393657E+00 0.193955831219E+00 -1.56 -0.213362239971E+00 0.194429250486E+00 -1.57 -0.223473528480E+00 0.194895419469E+00 -1.58 -0.233538764526E+00 0.195354125410E+00 -1.59 -0.243573455806E+00 0.195805155550E+00 -1.60 -0.253594887442E+00 0.196248300351E+00 -1.61 -0.263625336286E+00 0.196683359312E+00 -1.62 -0.273687948990E+00 0.197110133506E+00 -1.63 -0.283805890193E+00 0.197528424027E+00 -1.64 -0.294006393132E+00 0.197938037034E+00 -1.65 -0.304325896966E+00 0.198338790136E+00 -1.66 -0.314802106618E+00 0.198730502522E+00 -1.67 -0.325472685410E+00 0.199112993395E+00 -1.68 -0.336362632216E+00 0.199486087955E+00 -1.69 -0.347466516274E+00 0.199849625803E+00 -1.70 -0.358774430088E+00 0.200203448658E+00 -1.71 -0.370276466935E+00 0.200547398238E+00 -1.72 -0.381968774596E+00 0.200881321501E+00 -1.73 -0.393868162029E+00 0.201205083278E+00 -1.74 -0.405995841654E+00 0.201518552209E+00 -1.75 -0.418373025891E+00 0.201821596933E+00 -1.76 -0.430971618014E+00 0.202114089057E+00 -1.77 -0.443433002131E+00 0.202395920073E+00 -1.78 -0.455262722590E+00 0.202666989649E+00 -1.79 -0.465965906466E+00 0.202927197475E+00 -1.80 -0.475063781985E+00 0.203176443982E+00 -1.81 -0.482440981673E+00 0.203414646287E+00 -1.82 -0.488341737783E+00 0.203641738020E+00 -1.83 -0.493025622782E+00 0.203857653514E+00 -1.84 -0.496752196125E+00 0.204062327106E+00 -1.85 -0.499735487640E+00 0.204255700760E+00 -1.86 -0.502041964605E+00 0.204437741166E+00 -1.87 -0.503708252819E+00 0.204608420015E+00 -1.88 -0.504770978080E+00 0.204767708995E+00 -1.89 -0.505268020163E+00 0.204915580746E+00 -1.90 -0.505262699874E+00 0.205052027155E+00 -1.91 -0.504842022132E+00 0.205177058030E+00 -1.92 -0.504093893884E+00 0.205290683859E+00 -1.93 -0.503106222076E+00 0.205392915133E+00 -1.94 -0.501959562238E+00 0.205483766602E+00 -1.95 -0.500691161675E+00 0.205563278114E+00 -1.96 -0.499322462220E+00 0.205631498679E+00 -1.97 -0.497874882550E+00 0.205688477319E+00 -1.98 -0.496369837873E+00 0.205734263063E+00 -1.99 -0.494824369849E+00 0.205768912960E+00 -2.00 -0.493242596565E+00 0.205792507752E+00 -2.01 -0.491626260514E+00 0.205805132539E+00 -2.02 -0.489977104191E+00 0.205806872422E+00 -2.03 -0.488296862439E+00 0.205797812550E+00 -2.04 -0.486585900698E+00 0.205778047088E+00 -2.05 -0.484841644787E+00 0.205747689552E+00 -2.06 -0.483061140110E+00 0.205706855964E+00 -2.07 -0.481241432069E+00 0.205655662344E+00 -2.08 -0.479379573812E+00 0.205594224744E+00 -2.09 -0.477474160448E+00 0.205522665662E+00 -2.10 -0.475527179642E+00 0.205441121769E+00 -2.11 -0.473541070780E+00 0.205349731625E+00 -2.12 -0.471518273251E+00 0.205248633790E+00 -2.13 -0.469461226893E+00 0.205137966826E+00 -2.14 -0.467373835494E+00 0.205017871416E+00 -2.15 -0.465264719927E+00 0.204888495095E+00 -2.16 -0.463143449330E+00 0.204749986773E+00 -2.17 -0.461019592836E+00 0.204602495359E+00 -2.18 -0.458902719582E+00 0.204446169763E+00 -2.19 -0.456801544181E+00 0.204281158571E+00 -2.20 -0.454718725723E+00 0.204107608072E+00 -2.21 -0.452654307658E+00 0.203925663565E+00 -2.22 -0.450608323068E+00 0.203735470343E+00 -2.23 -0.448580805037E+00 0.203537173700E+00 -2.24 -0.446571782181E+00 0.203330918796E+00 -2.25 -0.444581131791E+00 0.203116846265E+00 -2.26 -0.442608546555E+00 0.202895091216E+00 -2.27 -0.440653707955E+00 0.202665788424E+00 -2.28 -0.438716297476E+00 0.202429072664E+00 -2.29 -0.436795996602E+00 0.202185078711E+00 -2.30 -0.434892501747E+00 0.201933940230E+00 -2.31 -0.433005589696E+00 0.201675784912E+00 -2.32 -0.431135064151E+00 0.201410738448E+00 -2.33 -0.429280728838E+00 0.201138926526E+00 -2.34 -0.427442387480E+00 0.200860474835E+00 -2.35 -0.425619843791E+00 0.200575509039E+00 -2.36 -0.423812900243E+00 0.200284151693E+00 -2.37 -0.422021356849E+00 0.199986519187E+00 -2.38 -0.420245013334E+00 0.199682727186E+00 -2.39 -0.418483669421E+00 0.199372891356E+00 -2.40 -0.416737124834E+00 0.199057127363E+00 -2.41 -0.415005179673E+00 0.198735550730E+00 -2.42 -0.413287645439E+00 0.198408272685E+00 -2.43 -0.411584346817E+00 0.198075399487E+00 -2.44 -0.409895109227E+00 0.197737037116E+00 -2.45 -0.408219758089E+00 0.197393291555E+00 -2.46 -0.406558118823E+00 0.197044268786E+00 -2.47 -0.404910017175E+00 0.196690074596E+00 -2.48 -0.403275286321E+00 0.196330810374E+00 -2.49 -0.401653766815E+00 0.195966573139E+00 -2.50 -0.400045299528E+00 0.195597459721E+00 -2.51 -0.398449725331E+00 0.195223566953E+00 -2.52 -0.396866885095E+00 0.194844991664E+00 -2.53 -0.395296619878E+00 0.194461830587E+00 -2.54 -0.393738777444E+00 0.194074176827E+00 -2.55 -0.392193213951E+00 0.193682118959E+00 -2.56 -0.390659786087E+00 0.193285745271E+00 -2.57 -0.389138350540E+00 0.192885144050E+00 -2.58 -0.387628763998E+00 0.192480403585E+00 -2.59 -0.386130883161E+00 0.192071612156E+00 -2.60 -0.384644568387E+00 0.191658855933E+00 -2.61 -0.383169688783E+00 0.191242216039E+00 -2.62 -0.381706114736E+00 0.190821772859E+00 -2.63 -0.380253716635E+00 0.190397606776E+00 -2.64 -0.378812364866E+00 0.189969798175E+00 -2.65 -0.377381929818E+00 0.189538427440E+00 -2.66 -0.375962282872E+00 0.189103574373E+00 -2.67 -0.374553303182E+00 0.188665314231E+00 -2.68 -0.373154873564E+00 0.188223720127E+00 -2.69 -0.371766876854E+00 0.187778865164E+00 -2.70 -0.370389195889E+00 0.187330822443E+00 -2.71 -0.369021713505E+00 0.186879665066E+00 -2.72 -0.367664312563E+00 0.186425466121E+00 -2.73 -0.366316879306E+00 0.185968296666E+00 -2.74 -0.364979306898E+00 0.185508223615E+00 -2.75 -0.363651489347E+00 0.185045313373E+00 -2.76 -0.362333320662E+00 0.184579632343E+00 -2.77 -0.361024694850E+00 0.184111246933E+00 -2.78 -0.359725505921E+00 0.183640223546E+00 -2.79 -0.358435648116E+00 0.183166628446E+00 -2.80 -0.357155020623E+00 0.182690524873E+00 -2.81 -0.355883527338E+00 0.182211973191E+00 -2.82 -0.354621072343E+00 0.181731033651E+00 -2.83 -0.353367559722E+00 0.181247766503E+00 -2.84 -0.352122893557E+00 0.180762231996E+00 -2.85 -0.350886977931E+00 0.180274490380E+00 -2.86 -0.349659717323E+00 0.179784601659E+00 -2.87 -0.348441021306E+00 0.179292622661E+00 -2.88 -0.347230802975E+00 0.178798608020E+00 -2.89 -0.346028975496E+00 0.178302612328E+00 -2.90 -0.344835452034E+00 0.177804690173E+00 -2.91 -0.343650145755E+00 0.177304896145E+00 -2.92 -0.342472969825E+00 0.176803284836E+00 -2.93 -0.341303837694E+00 0.176299910654E+00 -2.94 -0.340142667189E+00 0.175794825228E+00 -2.95 -0.338989379581E+00 0.175288078000E+00 -2.96 -0.337843896235E+00 0.174779718349E+00 -2.97 -0.336706138515E+00 0.174269795659E+00 -2.98 -0.335576027785E+00 0.173758359311E+00 -2.99 -0.334453485410E+00 0.173245458687E+00 -3.00 -0.333338432816E+00 0.172731143128E+00 -3.01 -0.332230794350E+00 0.172215460083E+00 -3.02 -0.331130498471E+00 0.171698454339E+00 -3.03 -0.330037473950E+00 0.171180170481E+00 -3.04 -0.328951649553E+00 0.170660653095E+00 -3.05 -0.327872954052E+00 0.170139946765E+00 -3.06 -0.326801316215E+00 0.169618096076E+00 -3.07 -0.325736664810E+00 0.169095145616E+00 -3.08 -0.324678929621E+00 0.168571139299E+00 -3.09 -0.323628044884E+00 0.168046118099E+00 -3.10 -0.322583946062E+00 0.167520122183E+00 -3.11 -0.321546568616E+00 0.166993191715E+00 -3.12 -0.320515848007E+00 0.166465366863E+00 -3.13 -0.319491719697E+00 0.165936687792E+00 -3.14 -0.318474119147E+00 0.165407194667E+00 -3.15 -0.317462981877E+00 0.164876927617E+00 -3.16 -0.316458245854E+00 0.164345925118E+00 -3.17 -0.315459852357E+00 0.163814223423E+00 -3.18 -0.314467742901E+00 0.163281858622E+00 -3.19 -0.313481858997E+00 0.162748866810E+00 -3.20 -0.312502142161E+00 0.162215284076E+00 -3.21 -0.311528533905E+00 0.161681146515E+00 -3.22 -0.310560975743E+00 0.161146490217E+00 -3.23 -0.309599409400E+00 0.160611351132E+00 -3.24 -0.308643779592E+00 0.160075763154E+00 -3.25 -0.307694033256E+00 0.159539758657E+00 -3.26 -0.306750117382E+00 0.159003369978E+00 -3.27 -0.305811978958E+00 0.158466629455E+00 -3.28 -0.304879564973E+00 0.157929569425E+00 -3.29 -0.303952822418E+00 0.157392222226E+00 -3.30 -0.303031698282E+00 0.156854620193E+00 -3.31 -0.302116139781E+00 0.156316795507E+00 -3.32 -0.301206096938E+00 0.155778778385E+00 -3.33 -0.300301521664E+00 0.155240597728E+00 -3.34 -0.299402365905E+00 0.154702282413E+00 -3.35 -0.298508581607E+00 0.154163861316E+00 -3.36 -0.297620120714E+00 0.153625363312E+00 -3.37 -0.296736935173E+00 0.153086817278E+00 -3.38 -0.295858976930E+00 0.152548252090E+00 -3.39 -0.294986198018E+00 0.152009696560E+00 -3.40 -0.294118552598E+00 0.151471177992E+00 -3.41 -0.293255997006E+00 0.150932722140E+00 -3.42 -0.292398487674E+00 0.150394354689E+00 -3.43 -0.291545981037E+00 0.149856101326E+00 -3.44 -0.290698433528E+00 0.149317987736E+00 -3.45 -0.289855801582E+00 0.148780039604E+00 -3.46 -0.289018041630E+00 0.148242282617E+00 -3.47 -0.288185110110E+00 0.147704742457E+00 -3.48 -0.287356964427E+00 0.147167444108E+00 -3.49 -0.286533564634E+00 0.146630410633E+00 -3.50 -0.285714871231E+00 0.146093664778E+00 -3.51 -0.284900844716E+00 0.145557229285E+00 -3.52 -0.284091445587E+00 0.145021126897E+00 -3.53 -0.283286634344E+00 0.144485380359E+00 -3.54 -0.282486371485E+00 0.143950012413E+00 -3.55 -0.281690617509E+00 0.143415045802E+00 -3.56 -0.280899333023E+00 0.142880503190E+00 -3.57 -0.280112480541E+00 0.142346405835E+00 -3.58 -0.279330024193E+00 0.141812773802E+00 -3.59 -0.278551928160E+00 0.141279627120E+00 -3.60 -0.277778156624E+00 0.140746985815E+00 -3.61 -0.277008673767E+00 0.140214869917E+00 -3.62 -0.276243443770E+00 0.139683299452E+00 -3.63 -0.275482430816E+00 0.139152294447E+00 -3.64 -0.274725599086E+00 0.138621874932E+00 -3.65 -0.273972913029E+00 0.138092060732E+00 -3.66 -0.273224339180E+00 0.137562870106E+00 -3.67 -0.272479845051E+00 0.137034320576E+00 -3.68 -0.271739398163E+00 0.136506429658E+00 -3.69 -0.271002966034E+00 0.135979214870E+00 -3.70 -0.270270516185E+00 0.135452693728E+00 -3.71 -0.269542016133E+00 0.134926883751E+00 -3.72 -0.268817433400E+00 0.134401802456E+00 -3.73 -0.268096735504E+00 0.133877467359E+00 -3.74 -0.267379890210E+00 0.133353895789E+00 -3.75 -0.266666867178E+00 0.132831103617E+00 -3.76 -0.265957636947E+00 0.132309106040E+00 -3.77 -0.265252170062E+00 0.131787918247E+00 -3.78 -0.264550437068E+00 0.131267555430E+00 -3.79 -0.263852408508E+00 0.130748032780E+00 -3.80 -0.263158054928E+00 0.130229365489E+00 -3.81 -0.262467346872E+00 0.129711568747E+00 -3.82 -0.261780254885E+00 0.129194657745E+00 -3.83 -0.261096749594E+00 0.128678647609E+00 -3.84 -0.260416803058E+00 0.128163552338E+00 -3.85 -0.259740388525E+00 0.127649384992E+00 -3.86 -0.259067479284E+00 0.127136158600E+00 -3.87 -0.258398048620E+00 0.126623886192E+00 -3.88 -0.257732069821E+00 0.126112580800E+00 -3.89 -0.257069516173E+00 0.125602255452E+00 -3.90 -0.256410360964E+00 0.125092923179E+00 -3.91 -0.255754577480E+00 0.124584597011E+00 -3.92 -0.255102139010E+00 0.124077289978E+00 -3.93 -0.254453019423E+00 0.123571014637E+00 -3.94 -0.253807194213E+00 0.123065782233E+00 -3.95 -0.253164639154E+00 0.122561603780E+00 -3.96 -0.252525330019E+00 0.122058490298E+00 -3.97 -0.251889242580E+00 0.121556452803E+00 -3.98 -0.251256352611E+00 0.121055502313E+00 -3.99 -0.250626635885E+00 0.120555649843E+00 -4.00 -0.250000068176E+00 0.120056906411E+00 -# l=1 -0.00 -0.199396910523E+01 0.161556966777E-02 -0.01 -0.199397020146E+01 0.775445182415E-02 -0.02 -0.199396960647E+01 0.155070355277E-01 -0.03 -0.199396880528E+01 0.232558780395E-01 -0.04 -0.199396788390E+01 0.309991092173E-01 -0.05 -0.199396650225E+01 0.387348606998E-01 -0.06 -0.199396394707E+01 0.464612663670E-01 -0.07 -0.199395913436E+01 0.541764628573E-01 -0.08 -0.199395060889E+01 0.618785901110E-01 -0.09 -0.199393654565E+01 0.695657919318E-01 -0.10 -0.199391475371E+01 0.772362165607E-01 -0.11 -0.199388268249E+01 0.848880172637E-01 -0.12 -0.199383743011E+01 0.925193529328E-01 -0.13 -0.199377575352E+01 0.100128388700E+00 -0.14 -0.199369408000E+01 0.107713296571E+00 -0.15 -0.199358852012E+01 0.115272256065E+00 -0.16 -0.199345488154E+01 0.122803454885E+00 -0.17 -0.199328868373E+01 0.130305089590E+00 -0.18 -0.199308517320E+01 0.137775366304E+00 -0.19 -0.199283933949E+01 0.145212501424E+00 -0.20 -0.199254593134E+01 0.152614722362E+00 -0.21 -0.199219947317E+01 0.159980268301E+00 -0.22 -0.199179428182E+01 0.167307390972E+00 -0.23 -0.199132448336E+01 0.174594355449E+00 -0.24 -0.199078402990E+01 0.181839440973E+00 -0.25 -0.199016671646E+01 0.189040941790E+00 -0.26 -0.198946619790E+01 0.196197167994E+00 -0.27 -0.198867600548E+01 0.203306446418E+00 -0.28 -0.198778956345E+01 0.210367121553E+00 -0.29 -0.198680020574E+01 0.217377556401E+00 -0.30 -0.198570119197E+01 0.224336133490E+00 -0.31 -0.198448572374E+01 0.231241255780E+00 -0.32 -0.198314696062E+01 0.238091347585E+00 -0.33 -0.198167803570E+01 0.244884855696E+00 -0.34 -0.198007207131E+01 0.251620250260E+00 -0.35 -0.197832219425E+01 0.258296025794E+00 -0.36 -0.197642155090E+01 0.264910702238E+00 -0.37 -0.197436332220E+01 0.271462826027E+00 -0.38 -0.197214073826E+01 0.277950971084E+00 -0.39 -0.196974709290E+01 0.284373739869E+00 -0.40 -0.196717575780E+01 0.290729764430E+00 -0.41 -0.196442019662E+01 0.297017707467E+00 -0.42 -0.196147397879E+01 0.303236263381E+00 -0.43 -0.195833079314E+01 0.309384159336E+00 -0.44 -0.195498446129E+01 0.315460156303E+00 -0.45 -0.195142895085E+01 0.321463050107E+00 -0.46 -0.194765838835E+01 0.327391672453E+00 -0.47 -0.194366707193E+01 0.333244891941E+00 -0.48 -0.193944948369E+01 0.339021615038E+00 -0.49 -0.193500030219E+01 0.344720787118E+00 -0.50 -0.193031441521E+01 0.350341393559E+00 -0.51 -0.192538693137E+01 0.355882460626E+00 -0.52 -0.192021319136E+01 0.361343056332E+00 -0.53 -0.191478877826E+01 0.366722291166E+00 -0.54 -0.190910952830E+01 0.372019318922E+00 -0.55 -0.190317154700E+01 0.377233338412E+00 -0.56 -0.189697121724E+01 0.382363593773E+00 -0.57 -0.189050520500E+01 0.387409374539E+00 -0.58 -0.188377046931E+01 0.392370016395E+00 -0.59 -0.187676428580E+01 0.397244903961E+00 -0.60 -0.186948424447E+01 0.402033469561E+00 -0.61 -0.186192824914E+01 0.406735192459E+00 -0.62 -0.185409455027E+01 0.411349602971E+00 -0.63 -0.184598174535E+01 0.415876281622E+00 -0.64 -0.183758876224E+01 0.420314856160E+00 -0.65 -0.182891490833E+01 0.424665007830E+00 -0.66 -0.181995985754E+01 0.428926468681E+00 -0.67 -0.181072363066E+01 0.433099018381E+00 -0.68 -0.180120666873E+01 0.437182493449E+00 -0.69 -0.179140976784E+01 0.441176777749E+00 -0.70 -0.178133412087E+01 0.445081807632E+00 -0.71 -0.177098134335E+01 0.448897574436E+00 -0.72 -0.176035339895E+01 0.452624114457E+00 -0.73 -0.174945272362E+01 0.456261524190E+00 -0.74 -0.173828209531E+01 0.459809943221E+00 -0.75 -0.172684475409E+01 0.463269568952E+00 -0.76 -0.171514432153E+01 0.466640645863E+00 -0.77 -0.170318483710E+01 0.469923469825E+00 -0.78 -0.169097077176E+01 0.473118389105E+00 -0.79 -0.167850696631E+01 0.476225796780E+00 -0.80 -0.166579873072E+01 0.479246142230E+00 -0.81 -0.165285170541E+01 0.482179914441E+00 -0.82 -0.163967201851E+01 0.485027660551E+00 -0.83 -0.162626609932E+01 0.487789963618E+00 -0.84 -0.161264086369E+01 0.490467464757E+00 -0.85 -0.159880350863E+01 0.493060838713E+00 -0.86 -0.158476169516E+01 0.495570816242E+00 -0.87 -0.157052335695E+01 0.497998161131E+00 -0.88 -0.155609684261E+01 0.500343688639E+00 -0.89 -0.154149078077E+01 0.502608248877E+00 -0.90 -0.152671413504E+01 0.504792735689E+00 -0.91 -0.151177617157E+01 0.506898082503E+00 -0.92 -0.149668638735E+01 0.508925255651E+00 -0.93 -0.148145460197E+01 0.510875267637E+00 -0.94 -0.146609076644E+01 0.512749152416E+00 -0.95 -0.145060513209E+01 0.514547994413E+00 -0.96 -0.143500803218E+01 0.516272893547E+00 -0.97 -0.141931000002E+01 0.517924995303E+00 -0.98 -0.140352165567E+01 0.519505465520E+00 -0.99 -0.138765368171E+01 0.521015499950E+00 -1.00 -0.137171682784E+01 0.522456326589E+00 -1.01 -0.135572183211E+01 0.523829186432E+00 -1.02 -0.133967938259E+01 0.525135358413E+00 -1.03 -0.132360014615E+01 0.526376131045E+00 -1.04 -0.130749462328E+01 0.527552818877E+00 -1.05 -0.129137318396E+01 0.528666756881E+00 -1.06 -0.127524608249E+01 0.529719288442E+00 -1.07 -0.125912320203E+01 0.530711782976E+00 -1.08 -0.124301433504E+01 0.531645615780E+00 -1.09 -0.122692890393E+01 0.532522175531E+00 -1.10 -0.121087593259E+01 0.533342865316E+00 -1.11 -0.119486433371E+01 0.534109090559E+00 -1.12 -0.117890237696E+01 0.534822268114E+00 -1.13 -0.116299805607E+01 0.535483819740E+00 -1.14 -0.114715911229E+01 0.536095168582E+00 -1.15 -0.113139252677E+01 0.536657741919E+00 -1.16 -0.111570513188E+01 0.537172967781E+00 -1.17 -0.110010333816E+01 0.537642272106E+00 -1.18 -0.108459285123E+01 0.538067077333E+00 -1.19 -0.106917929299E+01 0.538448805292E+00 -1.20 -0.105386778682E+01 0.538788869531E+00 -1.21 -0.103866288459E+01 0.539088674102E+00 -1.22 -0.102356908576E+01 0.539349621806E+00 -1.23 -0.100859044861E+01 0.539573100004E+00 -1.24 -0.993730611991E+00 0.539760481388E+00 -1.25 -0.978993185348E+00 0.539913137038E+00 -1.26 -0.964381508513E+00 0.540032416287E+00 -1.27 -0.949898664625E+00 0.540119647767E+00 -1.28 -0.935547725070E+00 0.540176158455E+00 -1.29 -0.921331713931E+00 0.540203250042E+00 -1.30 -0.907253600564E+00 0.540202194960E+00 -1.31 -0.893316352845E+00 0.540174263722E+00 -1.32 -0.879523067054E+00 0.540120703001E+00 -1.33 -0.865877060969E+00 0.540042718355E+00 -1.34 -0.852381674353E+00 0.539941511267E+00 -1.35 -0.839040414012E+00 0.539818266920E+00 -1.36 -0.825857336897E+00 0.539674116833E+00 -1.37 -0.812836613054E+00 0.539510181490E+00 -1.38 -0.799982500876E+00 0.539327574741E+00 -1.39 -0.787300010343E+00 0.539127353999E+00 -1.40 -0.774794531171E+00 0.538910548161E+00 -1.41 -0.762471468991E+00 0.538678185071E+00 -1.42 -0.750336848795E+00 0.538431252708E+00 -1.43 -0.738397573437E+00 0.538170682558E+00 -1.44 -0.726660612437E+00 0.537897401812E+00 -1.45 -0.715133170526E+00 0.537612324623E+00 -1.46 -0.703823750797E+00 0.537316293152E+00 -1.47 -0.692741301895E+00 0.537010124859E+00 -1.48 -0.681894790076E+00 0.536694636456E+00 -1.49 -0.671294209853E+00 0.536370601513E+00 -1.50 -0.660951147129E+00 0.536038726824E+00 -1.51 -0.650877324419E+00 0.535699713452E+00 -1.52 -0.641084675033E+00 0.535354255938E+00 -1.53 -0.631587414063E+00 0.535002978224E+00 -1.54 -0.622401152369E+00 0.534646461066E+00 -1.55 -0.613541520876E+00 0.534285284598E+00 -1.56 -0.605025127151E+00 0.533920011360E+00 -1.57 -0.596873130790E+00 0.533551121884E+00 -1.58 -0.589108016559E+00 0.533179072829E+00 -1.59 -0.581752270965E+00 0.532804320813E+00 -1.60 -0.574829684327E+00 0.532427294001E+00 -1.61 -0.568367708612E+00 0.532048340644E+00 -1.62 -0.562394433817E+00 0.531667795063E+00 -1.63 -0.556937965053E+00 0.531285991434E+00 -1.64 -0.552029825754E+00 0.530903229834E+00 -1.65 -0.547709273908E+00 0.530519733195E+00 -1.66 -0.544016630963E+00 0.530135713842E+00 -1.67 -0.540992174493E+00 0.529751384009E+00 -1.68 -0.538662825517E+00 0.529366928122E+00 -1.69 -0.537023412450E+00 0.528982463790E+00 -1.70 -0.536064042318E+00 0.528598098794E+00 -1.71 -0.535774822856E+00 0.528213940913E+00 -1.72 -0.536151364706E+00 0.527830072080E+00 -1.73 -0.537208057373E+00 0.527446486041E+00 -1.74 -0.538963292648E+00 0.527063157749E+00 -1.75 -0.541435462322E+00 0.526680062154E+00 -1.76 -0.544593363494E+00 0.526297176205E+00 -1.77 -0.548073360083E+00 0.525914490233E+00 -1.78 -0.551375189607E+00 0.525532000072E+00 -1.79 -0.553998169894E+00 0.525149701572E+00 -1.80 -0.555457655643E+00 0.524767580730E+00 -1.81 -0.555630955028E+00 0.524385401135E+00 -1.82 -0.554753540315E+00 0.524002706297E+00 -1.83 -0.553076162742E+00 0.523619030338E+00 -1.84 -0.550849560367E+00 0.523233907458E+00 -1.85 -0.548278361802E+00 0.522847150465E+00 -1.86 -0.545417753899E+00 0.522459475125E+00 -1.87 -0.542292702000E+00 0.522071779811E+00 -1.88 -0.538928171446E+00 0.521684962897E+00 -1.89 -0.535350323025E+00 0.521299798066E+00 -1.90 -0.531609571092E+00 0.520914529285E+00 -1.91 -0.527778908656E+00 0.520525045499E+00 -1.92 -0.523932188652E+00 0.520127145960E+00 -1.93 -0.520143264012E+00 0.519716629920E+00 -1.94 -0.516478462844E+00 0.519291317612E+00 -1.95 -0.512959783423E+00 0.518860935145E+00 -1.96 -0.509593045721E+00 0.518439553716E+00 -1.97 -0.506384046004E+00 0.518041250886E+00 -1.98 -0.503338577018E+00 0.517680092631E+00 -1.99 -0.500457991743E+00 0.517355538615E+00 -2.00 -0.497730523907E+00 0.517023887711E+00 -2.01 -0.495141995677E+00 0.516633505042E+00 -2.02 -0.492678229224E+00 0.516132755734E+00 -2.03 -0.490325041821E+00 0.515470390391E+00 -2.04 -0.488067375131E+00 0.514664146180E+00 -2.05 -0.485888291177E+00 0.513879849890E+00 -2.06 -0.483770608742E+00 0.513302492540E+00 -2.07 -0.481697146606E+00 0.513117065151E+00 -2.08 -0.479650737031E+00 0.513507336726E+00 -2.09 -0.477616895724E+00 0.514413755885E+00 -2.10 -0.475587042409E+00 0.515241428106E+00 -2.11 -0.473553382938E+00 0.515324177280E+00 -2.12 -0.471508123160E+00 0.513995827296E+00 -2.13 -0.469443469933E+00 0.510590396231E+00 -2.14 -0.467354891330E+00 0.505070826417E+00 -2.15 -0.465248363637E+00 0.499426564744E+00 -2.16 -0.463131975565E+00 0.496054438499E+00 -2.17 -0.461013815826E+00 0.497351274970E+00 -2.18 -0.458901973131E+00 0.505713901448E+00 -2.19 -0.456803865289E+00 0.522551545249E+00 -2.20 -0.454722155776E+00 0.542274836853E+00 -2.21 -0.452657454474E+00 0.556271417976E+00 -2.22 -0.450610363122E+00 0.555916946725E+00 -2.23 -0.448581483460E+00 0.532587081205E+00 -2.24 -0.446571403680E+00 0.477933839130E+00 -2.25 -0.444580252676E+00 0.392976479119E+00 -2.26 -0.442607599052E+00 0.290161422435E+00 -2.27 -0.440652977408E+00 0.182628543819E+00 -2.28 -0.438715922345E+00 0.835177180131E-01 -2.29 -0.436795968465E+00 0.596881975665E-02 -2.30 -0.434892687876E+00 -0.394659416875E-01 -2.31 -0.433005854567E+00 -0.561622612324E-01 -2.32 -0.431135310154E+00 -0.521612906457E-01 -2.33 -0.429280896299E+00 -0.355075455275E-01 -2.34 -0.427442454665E+00 -0.142455414782E-01 -2.35 -0.425619826792E+00 0.360437483446E-02 -2.36 -0.423812838158E+00 0.131641726354E-01 -2.37 -0.422021282430E+00 0.157795085712E-01 -2.38 -0.420244949532E+00 0.135282600823E-01 -2.39 -0.418483629388E+00 0.848830460931E-02 -2.40 -0.416737111923E+00 0.273751959263E-02 -2.41 -0.415005187617E+00 -0.168445464066E-02 -2.42 -0.413287663823E+00 -0.389809672637E-02 -2.43 -0.411584367405E+00 -0.436536722352E-02 -2.44 -0.409895126314E+00 -0.362297646647E-02 -2.45 -0.408219768501E+00 -0.220763478957E-02 -2.46 -0.406558121917E+00 -0.656052527200E-03 -2.47 -0.404910014772E+00 0.509354111090E-03 -2.48 -0.403275281176E+00 0.109088538927E-02 -2.49 -0.401653761093E+00 0.121332473377E-02 -2.50 -0.400045294740E+00 0.101530302696E-02 -2.51 -0.398449722334E+00 0.635451151194E-03 -2.52 -0.396866884094E+00 0.212399988844E-03 -2.53 -0.395296620431E+00 -0.117284954818E-03 -2.54 -0.393738778816E+00 -0.290998757808E-03 -2.55 -0.392193215548E+00 -0.338703843382E-03 -2.56 -0.390659787484E+00 -0.296208207612E-03 -2.57 -0.389138351480E+00 -0.199319846570E-03 -2.58 -0.387628764393E+00 -0.838467563277E-04 -2.59 -0.386130883093E+00 0.144435142549E-04 -2.60 -0.384644568048E+00 0.718128693218E-04 -2.61 -0.383169688343E+00 0.932981649777E-04 -2.62 -0.381706114320E+00 0.881485134032E-04 -2.63 -0.380253716325E+00 0.656130267787E-04 -2.64 -0.378812364701E+00 0.349408172851E-04 -2.65 -0.377381929793E+00 0.538099710268E-05 -2.66 -0.375962282942E+00 -0.147460441603E-04 -2.67 -0.374553303297E+00 -0.243872279759E-04 -2.68 -0.373154873686E+00 -0.259115687832E-04 -2.69 -0.371766876956E+00 -0.217080881840E-04 -2.70 -0.370389195955E+00 -0.141658077800E-04 -2.71 -0.369021713532E+00 -0.567374917294E-05 -2.72 -0.367664312556E+00 0.138527741538E-05 -2.73 -0.366316879280E+00 0.553922824606E-05 -2.74 -0.364979306864E+00 0.717773489865E-05 -2.75 -0.363651489315E+00 0.691787543448E-05 -2.76 -0.362333320637E+00 0.537672791490E-05 -2.77 -0.361024694835E+00 0.317137040124E-05 -2.78 -0.359725505916E+00 0.918880954831E-06 -2.79 -0.358435648119E+00 -0.781728797193E-06 -2.80 -0.357155020631E+00 -0.171253804042E-05 -2.81 -0.355883527348E+00 -0.201851340550E-05 -2.82 -0.354621072352E+00 -0.185904454950E-05 -2.83 -0.353367559729E+00 -0.139352112946E-05 -2.84 -0.352122893561E+00 -0.781332802427E-06 -2.85 -0.350886977932E+00 -0.181869225452E-06 -2.86 -0.349659717322E+00 0.254253774246E-06 -2.87 -0.348441021304E+00 0.488841741047E-06 -2.88 -0.347230802973E+00 0.561508611704E-06 -2.89 -0.346028975493E+00 0.513424319757E-06 -2.90 -0.344835452032E+00 0.385758798747E-06 -2.91 -0.343650145754E+00 0.219681982213E-06 -2.92 -0.342472969825E+00 0.563638036956E-07 -2.93 -0.341303837694E+00 -0.648161282394E-07 -2.94 -0.340142667189E+00 -0.132146637015E-06 -2.95 -0.338989379581E+00 -0.155674930014E-06 -2.96 -0.337843896235E+00 -0.146035093908E-06 -2.97 -0.336706138515E+00 -0.113861215369E-06 -2.98 -0.335576027785E+00 -0.697873810700E-07 -2.99 -0.334453485410E+00 -0.244476776826E-07 -3.00 -0.333338432816E+00 0.116362301878E-07 -3.01 -0.332230794350E+00 0.332200392413E-07 -3.02 -0.331130498471E+00 0.424912932553E-07 -3.03 -0.330037473949E+00 0.421967491380E-07 -3.04 -0.328951649553E+00 0.350831637971E-07 -3.05 -0.327872954052E+00 0.238972941407E-07 -3.06 -0.326801316215E+00 0.113858970767E-07 -3.07 -0.325736664810E+00 0.295729512940E-09 -3.08 -0.324678929621E+00 -0.714980482964E-08 -3.09 -0.323628044884E+00 -0.110280917447E-07 -3.10 -0.322583946062E+00 -0.120485934431E-07 -3.11 -0.321546568616E+00 -0.109207900847E-07 -3.12 -0.320515848007E+00 -0.835416182914E-08 -3.13 -0.319491719697E+00 -0.505818883629E-08 -3.14 -0.318474119147E+00 -0.174235126590E-08 -3.15 -0.317462981877E+00 0.892352064117E-09 -3.16 -0.316458245854E+00 0.250616832160E-08 -3.17 -0.315459852357E+00 0.324772025731E-08 -3.18 -0.314467742901E+00 0.330026474982E-08 -3.19 -0.313481858997E+00 0.284705867768E-08 -3.20 -0.312502142161E+00 0.207135891946E-08 -3.21 -0.311528533905E+00 0.115642235373E-08 -3.22 -0.310560975743E+00 0.285505859048E-09 -3.23 -0.309599409400E+00 -0.367023708206E-09 -3.24 -0.308643779592E+00 -0.752783412892E-09 -3.25 -0.307694033256E+00 -0.916895328784E-09 -3.26 -0.306750117382E+00 -0.906694230615E-09 -3.27 -0.305811978958E+00 -0.769514893121E-09 -3.28 -0.304879564973E+00 -0.552692091033E-09 -3.29 -0.303952822418E+00 -0.303560599085E-09 -3.30 -0.303031698282E+00 -0.694551920121E-10 -3.31 -0.302116139781E+00 0.105027159242E-09 -3.32 -0.301206096938E+00 0.209005933782E-09 -3.33 -0.300301521664E+00 0.254285326489E-09 -3.34 -0.299402365905E+00 0.253091786466E-09 -3.35 -0.298508581607E+00 0.217651762822E-09 -3.36 -0.297620120714E+00 0.160191704660E-09 -3.37 -0.296736935173E+00 0.929380610875E-10 -3.38 -0.295858976930E+00 0.281172812103E-10 -3.39 -0.294986198018E+00 -0.223482577758E-10 -3.40 -0.294118552598E+00 -0.538239964999E-10 -3.41 -0.293255997006E+00 -0.691304635957E-10 -3.42 -0.292398487674E+00 -0.714257190431E-10 -3.43 -0.291545981037E+00 -0.638678228218E-10 -3.44 -0.290698433528E+00 -0.496148349116E-10 -3.45 -0.289855801582E+00 -0.318248152925E-10 -3.46 -0.289018041630E+00 -0.136558239441E-10 -3.47 -0.288185110110E+00 0.173553880751E-11 -3.48 -0.287356964427E+00 0.121428059404E-10 -3.49 -0.286533564634E+00 0.179457771920E-10 -3.50 -0.285714871231E+00 0.199601712347E-10 -3.51 -0.284900844716E+00 0.190017067414E-10 -3.52 -0.284091445587E+00 0.158861023847E-10 -3.53 -0.283286634344E+00 0.114290768372E-10 -3.54 -0.282486371485E+00 0.644634877170E-11 -3.55 -0.281690617509E+00 0.175363686074E-11 -3.56 -0.280899333023E+00 -0.186377893362E-11 -3.57 -0.280112480541E+00 -0.415138041519E-11 -3.58 -0.279330024193E+00 -0.530553537258E-11 -3.59 -0.278551928160E+00 -0.553694181571E-11 -3.60 -0.277778156624E+00 -0.505629775449E-11 -3.61 -0.277008673767E+00 -0.407430119883E-11 -3.62 -0.276243443770E+00 -0.280165015865E-11 -3.63 -0.275482430816E+00 -0.144904264387E-11 -3.64 -0.274725599086E+00 -0.227176664388E-12 -3.65 -0.273972913029E+00 0.674473239547E-12 -3.66 -0.273224339180E+00 0.123206378389E-11 -3.67 -0.272479845051E+00 0.149956709632E-11 -3.68 -0.271739398163E+00 0.153140592438E-11 -3.69 -0.271002966034E+00 0.138200301561E-11 -3.70 -0.270270516185E+00 0.110578111753E-11 -3.71 -0.269542016133E+00 0.757162977679E-12 -3.72 -0.268817433400E+00 0.390571343597E-12 -3.73 -0.268096735504E+00 0.604289628162E-13 -3.74 -0.267379890210E+00 -0.184404496849E-12 -3.75 -0.266666867178E+00 -0.337946397110E-12 -3.76 -0.265957636947E+00 -0.414143678035E-12 -3.77 -0.265252170062E+00 -0.427053593655E-12 -3.78 -0.264550437068E+00 -0.390733398003E-12 -3.79 -0.263852408508E+00 -0.319240345108E-12 -3.80 -0.263158054928E+00 -0.226631689003E-12 -3.81 -0.262467346872E+00 -0.126964683718E-12 -3.82 -0.261780254885E+00 -0.342965832851E-13 -3.83 -0.261096749594E+00 0.378573739506E-13 -3.84 -0.260416803058E+00 0.851925298170E-13 -3.85 -0.259740388525E+00 0.111103192424E-12 -3.86 -0.259067479284E+00 0.119220313898E-12 -3.87 -0.258398048620E+00 0.113174846367E-12 -3.88 -0.257732069821E+00 0.965977419553E-13 -3.89 -0.257069516173E+00 0.731199527911E-13 -3.90 -0.256410360964E+00 0.463724310006E-13 -3.91 -0.255754577480E+00 0.199861287103E-13 -3.92 -0.255102139010E+00 -0.240940416128E-14 -3.93 -0.254453019423E+00 -0.182562022275E-13 -3.94 -0.253807194213E+00 -0.279777451605E-13 -3.95 -0.253164639154E+00 -0.325118984437E-13 -3.96 -0.252525330019E+00 -0.327965275603E-13 -3.97 -0.251889242580E+00 -0.297694979937E-13 -3.98 -0.251256352611E+00 -0.243686752270E-13 -3.99 -0.250626635885E+00 -0.175319247436E-13 -4.00 -0.25 -0.101971120268E-13 -# l=2 -0.00 -0.118599184851E+01 0.774278678775E-06 -0.01 -0.118599295299E+01 0.178391526709E-04 -0.02 -0.118599248226E+01 0.713530367340E-04 -0.03 -0.118599221954E+01 0.160530674096E-03 -0.04 -0.118599274729E+01 0.285353702617E-03 -0.05 -0.118599441913E+01 0.445796398141E-03 -0.06 -0.118599741149E+01 0.641825684216E-03 -0.07 -0.118600172437E+01 0.873401140746E-03 -0.08 -0.118600717886E+01 0.114047501335E-02 -0.09 -0.118601341613E+01 0.144299222372E-02 -0.10 -0.118601989861E+01 0.178089038166E-02 -0.11 -0.118602591312E+01 0.215409979855E-02 -0.12 -0.118603057573E+01 0.256254350225E-02 -0.13 -0.118603283812E+01 0.300613725468E-02 -0.14 -0.118603149491E+01 0.348478956938E-02 -0.15 -0.118602519211E+01 0.399840173309E-02 -0.16 -0.118601243609E+01 0.454686782801E-02 -0.17 -0.118599160311E+01 0.513007475739E-02 -0.18 -0.118596094915E+01 0.574790226940E-02 -0.19 -0.118591861998E+01 0.640022298997E-02 -0.20 -0.118586266129E+01 0.708690245283E-02 -0.21 -0.118579102873E+01 0.780779913197E-02 -0.22 -0.118570159794E+01 0.856276447957E-02 -0.23 -0.118559217439E+01 0.935164296550E-02 -0.24 -0.118546050291E+01 0.101742721171E-01 -0.25 -0.118530427703E+01 0.110304825623E-01 -0.26 -0.118512114806E+01 0.119200980854E-01 -0.27 -0.118490873370E+01 0.128429356703E-01 -0.28 -0.118466462639E+01 0.137988055489E-01 -0.29 -0.118438640131E+01 0.147875112788E-01 -0.30 -0.118407162396E+01 0.158088497765E-01 -0.31 -0.118371785741E+01 0.168626113979E-01 -0.32 -0.118332266912E+01 0.179485800175E-01 -0.33 -0.118288363751E+01 0.190665330607E-01 -0.34 -0.118239835803E+01 0.202162416123E-01 -0.35 -0.118186444894E+01 0.213974704936E-01 -0.36 -0.118127955678E+01 0.226099783354E-01 -0.37 -0.118064136152E+01 0.238535176583E-01 -0.38 -0.117994758136E+01 0.251278349733E-01 -0.39 -0.117919597724E+01 0.264326708779E-01 -0.40 -0.117838435709E+01 0.277677601537E-01 -0.41 -0.117751057983E+01 0.291328318698E-01 -0.42 -0.117657255911E+01 0.305276094907E-01 -0.43 -0.117556826687E+01 0.319518109889E-01 -0.44 -0.117449573665E+01 0.334051489623E-01 -0.45 -0.117335306676E+01 0.348873307561E-01 -0.46 -0.117213842326E+01 0.363980585899E-01 -0.47 -0.117085004272E+01 0.379370296909E-01 -0.48 -0.116948623495E+01 0.395039364323E-01 -0.49 -0.116804538562E+01 0.410984664675E-01 -0.50 -0.116652595902E+01 0.427203028635E-01 -0.51 -0.116492650039E+01 0.443691242607E-01 -0.52 -0.116324563814E+01 0.460446050359E-01 -0.53 -0.116148208578E+01 0.477464154752E-01 -0.54 -0.115963464417E+01 0.494742219334E-01 -0.55 -0.115770220551E+01 0.512276869327E-01 -0.56 -0.115568375463E+01 0.530064693895E-01 -0.57 -0.115357836979E+01 0.548102248388E-01 -0.58 -0.115138522496E+01 0.566386055940E-01 -0.59 -0.114910359649E+01 0.584912608079E-01 -0.60 -0.114673286157E+01 0.603678368013E-01 -0.61 -0.114427249751E+01 0.622679773005E-01 -0.62 -0.114172209175E+01 0.641913234457E-01 -0.63 -0.113908134152E+01 0.661375141023E-01 -0.64 -0.113635004853E+01 0.681061861730E-01 -0.65 -0.113352813436E+01 0.700969745702E-01 -0.66 -0.113061563654E+01 0.721095125922E-01 -0.67 -0.112761270245E+01 0.741434321723E-01 -0.68 -0.112451961301E+01 0.761983638946E-01 -0.69 -0.112133676246E+01 0.782739374573E-01 -0.70 -0.111806467206E+01 0.803697817228E-01 -0.71 -0.111470399934E+01 0.824855250157E-01 -0.72 -0.111125551451E+01 0.846207953489E-01 -0.73 -0.110772014168E+01 0.867752206302E-01 -0.74 -0.110409891719E+01 0.889484289087E-01 -0.75 -0.110039302964E+01 0.911400486017E-01 -0.76 -0.109660379445E+01 0.933497087518E-01 -0.77 -0.109273266620E+01 0.955770391764E-01 -0.78 -0.108878124434E+01 0.978216708938E-01 -0.79 -0.108475125258E+01 0.100083236023E+00 -0.80 -0.108064457300E+01 0.102361368527E+00 -0.81 -0.107646319922E+01 0.104655703748E+00 -0.82 -0.107220928930E+01 0.106965879546E+00 -0.83 -0.106788510260E+01 0.109291535421E+00 -0.84 -0.106349306065E+01 0.111632314057E+00 -0.85 -0.105903567814E+01 0.113987860060E+00 -0.86 -0.105451561995E+01 0.116357821825E+00 -0.87 -0.104993563846E+01 0.118741850042E+00 -0.88 -0.104529861245E+01 0.121139599630E+00 -0.89 -0.104060750540E+01 0.123550728404E+00 -0.90 -0.103586537117E+01 0.125974898498E+00 -0.91 -0.103107534491E+01 0.128411775973E+00 -0.92 -0.102624060978E+01 0.130861030766E+00 -0.93 -0.102136441501E+01 0.133322338292E+00 -0.94 -0.101645002333E+01 0.135795376934E+00 -0.95 -0.101150072073E+01 0.138279832265E+00 -0.96 -0.100651979077E+01 0.140775392257E+00 -0.97 -0.100151047176E+01 0.143281752716E+00 -0.98 -0.996475979421E+00 0.145798612987E+00 -0.99 -0.991419435787E+00 0.148325678685E+00 -1.00 -0.986343861752E+00 0.150862662186E+00 -1.01 -0.981252201559E+00 0.153409279164E+00 -1.02 -0.976147161015E+00 0.155965255168E+00 -1.03 -0.971031377836E+00 0.158530318359E+00 -1.04 -0.965907204398E+00 0.161104205544E+00 -1.05 -0.960776769296E+00 0.163686660312E+00 -1.06 -0.955642052293E+00 0.166277429919E+00 -1.07 -0.950504560562E+00 0.168876273159E+00 -1.08 -0.945365688317E+00 0.171482951534E+00 -1.09 -0.940226424541E+00 0.174097235116E+00 -1.10 -0.935087321811E+00 0.176718903205E+00 -1.11 -0.929948820315E+00 0.179347737340E+00 -1.12 -0.924810721985E+00 0.181983531558E+00 -1.13 -0.919672554850E+00 0.184626085265E+00 -1.14 -0.914533615729E+00 0.187275202376E+00 -1.15 -0.909392505591E+00 0.189930700384E+00 -1.16 -0.904247689640E+00 0.192592399440E+00 -1.17 -0.899097279267E+00 0.195260127233E+00 -1.18 -0.893938794645E+00 0.197933724053E+00 -1.19 -0.888769687417E+00 0.200613031716E+00 -1.20 -0.883587041851E+00 0.203297901874E+00 -1.21 -0.878387521033E+00 0.205988197461E+00 -1.22 -0.873167753247E+00 0.208683782551E+00 -1.23 -0.867924121485E+00 0.211384532258E+00 -1.24 -0.862652775564E+00 0.214090332188E+00 -1.25 -0.857349855067E+00 0.216801068889E+00 -1.26 -0.852011478658E+00 0.219516640013E+00 -1.27 -0.846633745083E+00 0.222236953779E+00 -1.28 -0.841212759747E+00 0.224961919159E+00 -1.29 -0.835744854312E+00 0.227691455089E+00 -1.30 -0.830226622222E+00 0.230425492041E+00 -1.31 -0.824654676165E+00 0.233163961223E+00 -1.32 -0.819026007648E+00 0.235906801405E+00 -1.33 -0.813338261648E+00 0.238653964406E+00 -1.34 -0.807589147957E+00 0.241405403337E+00 -1.35 -0.801776712525E+00 0.244161075563E+00 -1.36 -0.795900108388E+00 0.246920952465E+00 -1.37 -0.789958716164E+00 0.249685008302E+00 -1.38 -0.783952073702E+00 0.252453218773E+00 -1.39 -0.777881056375E+00 0.255225571812E+00 -1.40 -0.771747215345E+00 0.258002061534E+00 -1.41 -0.765552128743E+00 0.260782682254E+00 -1.42 -0.759298412548E+00 0.263567435444E+00 -1.43 -0.752990153738E+00 0.266356332725E+00 -1.44 -0.746631551008E+00 0.269149386489E+00 -1.45 -0.740227178084E+00 0.271946611041E+00 -1.46 -0.733783678902E+00 0.274748031246E+00 -1.47 -0.727308407815E+00 0.277553675597E+00 -1.48 -0.720808745004E+00 0.280363572671E+00 -1.49 -0.714293575678E+00 0.283177756057E+00 -1.50 -0.707774114302E+00 0.285996267098E+00 -1.51 -0.701261775297E+00 0.288819147803E+00 -1.52 -0.694768258888E+00 0.291646440747E+00 -1.53 -0.688308359037E+00 0.294478194613E+00 -1.54 -0.681898762150E+00 0.297314461822E+00 -1.55 -0.675556181832E+00 0.300155294851E+00 -1.56 -0.669298520249E+00 0.303000747167E+00 -1.57 -0.663149219302E+00 0.305850876870E+00 -1.58 -0.657133333599E+00 0.308705743408E+00 -1.59 -0.651275919965E+00 0.311565406231E+00 -1.60 -0.645603703842E+00 0.314429925500E+00 -1.61 -0.640148096818E+00 0.317299363386E+00 -1.62 -0.634941327033E+00 0.320173782409E+00 -1.63 -0.630015639745E+00 0.323053245087E+00 -1.64 -0.625407152057E+00 0.325937813610E+00 -1.65 -0.621160741805E+00 0.328827549434E+00 -1.66 -0.617322491376E+00 0.331722513911E+00 -1.67 -0.613938440710E+00 0.334622768388E+00 -1.68 -0.611041707738E+00 0.337528372705E+00 -1.69 -0.608634361883E+00 0.340439383077E+00 -1.70 -0.606713904788E+00 0.343355855186E+00 -1.71 -0.605277838843E+00 0.346277844714E+00 -1.72 -0.604329475981E+00 0.349205405355E+00 -1.73 -0.603891953410E+00 0.352138584023E+00 -1.74 -0.603992633641E+00 0.355077426184E+00 -1.75 -0.604658879184E+00 0.358021977306E+00 -1.76 -0.605868593113E+00 0.360972281430E+00 -1.77 -0.607268151920E+00 0.363928373028E+00 -1.78 -0.608367678298E+00 0.366890282644E+00 -1.79 -0.608676876397E+00 0.369858040806E+00 -1.80 -0.607721510863E+00 0.372831677651E+00 -1.81 -0.605389833040E+00 0.375811214428E+00 -1.82 -0.601928786013E+00 0.378796663588E+00 -1.83 -0.597600614342E+00 0.381788037212E+00 -1.84 -0.592667549446E+00 0.384785347376E+00 -1.85 -0.587345834007E+00 0.387788601131E+00 -1.86 -0.581702660181E+00 0.390797789237E+00 -1.87 -0.575775077731E+00 0.393812899158E+00 -1.88 -0.569600136421E+00 0.396833918362E+00 -1.89 -0.563216076623E+00 0.399860833869E+00 -1.90 -0.556685294198E+00 0.402893623702E+00 -1.91 -0.550092672352E+00 0.405932257504E+00 -1.92 -0.543523950736E+00 0.408976704600E+00 -1.93 -0.537064869001E+00 0.412026934315E+00 -1.94 -0.530793491582E+00 0.415082910183E+00 -1.95 -0.524742667102E+00 0.418144561635E+00 -1.96 -0.518928742540E+00 0.421211805653E+00 -1.97 -0.513368040699E+00 0.424284559202E+00 -1.98 -0.508076880344E+00 0.427362739258E+00 -1.99 -0.503066489896E+00 0.430446277573E+00 -2.00 -0.498333056113E+00 0.433535149559E+00 -2.01 -0.493870000820E+00 0.436629338653E+00 -2.02 -0.489670745841E+00 0.439728828291E+00 -2.03 -0.485728699578E+00 0.442833601330E+00 -2.04 -0.482034868339E+00 0.445943536599E+00 -2.05 -0.478575101993E+00 0.449058289616E+00 -2.06 -0.475334583118E+00 0.452177487002E+00 -2.07 -0.472298494288E+00 0.455300755377E+00 -2.08 -0.469452020677E+00 0.458427723058E+00 -2.09 -0.466780864566E+00 0.461558355965E+00 -2.10 -0.464271865955E+00 0.464693362803E+00 -2.11 -0.461912016329E+00 0.467833551179E+00 -2.12 -0.459688307179E+00 0.470979728700E+00 -2.13 -0.457587730167E+00 0.474132702697E+00 -2.14 -0.455597849837E+00 0.477292392253E+00 -2.15 -0.453708076644E+00 0.480455854340E+00 -2.16 -0.451908192121E+00 0.483619570573E+00 -2.17 -0.450187977798E+00 0.486780022569E+00 -2.18 -0.448537215209E+00 0.489933691943E+00 -2.19 -0.446945950304E+00 0.493078451733E+00 -2.20 -0.445406102831E+00 0.496222035250E+00 -2.21 -0.443910401906E+00 0.499376434876E+00 -2.22 -0.442451579858E+00 0.502553659872E+00 -2.23 -0.441022369013E+00 0.505765719502E+00 -2.24 -0.439615529106E+00 0.509023913694E+00 -2.25 -0.438224748900E+00 0.512315499422E+00 -2.26 -0.436844850485E+00 0.515598403486E+00 -2.27 -0.435470724726E+00 0.518828772795E+00 -2.28 -0.434097262488E+00 0.521962754260E+00 -2.29 -0.432719354637E+00 0.524956494790E+00 -2.30 -0.431332196959E+00 0.527790701720E+00 -2.31 -0.429932626448E+00 0.530578276920E+00 -2.32 -0.428518029861E+00 0.533476403724E+00 -2.33 -0.427085794348E+00 0.536642297387E+00 -2.34 -0.425633307061E+00 0.540233173170E+00 -2.35 -0.424157962667E+00 0.544404519130E+00 -2.36 -0.422658132843E+00 0.549087262024E+00 -2.37 -0.421134124346E+00 0.553767561315E+00 -2.38 -0.419586471595E+00 0.557879249305E+00 -2.39 -0.418015709009E+00 0.560856158301E+00 -2.40 -0.416422371007E+00 0.562132120607E+00 -2.41 -0.414807067238E+00 0.561179036658E+00 -2.42 -0.413172689876E+00 0.558623798888E+00 -2.43 -0.411524770445E+00 0.556428853186E+00 -2.44 -0.409868987540E+00 0.556631064871E+00 -2.45 -0.408211019755E+00 0.561267299260E+00 -2.46 -0.406556545684E+00 0.572374421674E+00 -2.47 -0.404911126755E+00 0.591791053897E+00 -2.48 -0.403277662713E+00 0.616852363173E+00 -2.49 -0.401656409930E+00 0.640421035737E+00 -2.50 -0.400047511271E+00 0.655167709086E+00 -2.51 -0.398451109603E+00 0.653763020717E+00 -2.52 -0.396867347789E+00 0.628877608129E+00 -2.53 -0.395296364384E+00 0.573399635392E+00 -2.54 -0.393738143530E+00 0.488006833101E+00 -2.55 -0.392192476116E+00 0.383126172082E+00 -2.56 -0.390659140825E+00 0.269800281882E+00 -2.57 -0.389137916340E+00 0.159071792047E+00 -2.58 -0.387628581345E+00 0.619833321270E-01 -2.59 -0.386130914625E+00 -0.104383816387E-01 -2.60 -0.384644724824E+00 -0.518994285825E-01 -2.61 -0.383169892024E+00 -0.674269319116E-01 -2.62 -0.381706306759E+00 -0.637052595060E-01 -2.63 -0.380253859567E+00 -0.474187792459E-01 -2.64 -0.378812440982E+00 -0.252518590111E-01 -2.65 -0.377381941540E+00 -0.388886668184E-02 -2.66 -0.375962250749E+00 0.106570211301E-01 -2.67 -0.374553250057E+00 0.176247406435E-01 -2.68 -0.373154817118E+00 0.187263874320E-01 -2.69 -0.371766829565E+00 0.156885163204E-01 -2.70 -0.370389165030E+00 0.102376821332E-01 -2.71 -0.369021701145E+00 0.410043969523E-02 -2.72 -0.367664315580E+00 -0.100114515637E-02 -2.73 -0.366316891373E+00 -0.400322091950E-02 -2.74 -0.364979322534E+00 -0.518737579036E-02 -2.75 -0.363651504417E+00 -0.499957438611E-02 -2.76 -0.362333332375E+00 -0.388578132391E-02 -2.77 -0.361024701759E+00 -0.229196122091E-02 -2.78 -0.359725507922E+00 -0.664078694271E-03 -2.79 -0.358435646413E+00 0.564958318250E-03 -2.80 -0.357155016892E+00 0.123765763105E-02 -2.81 -0.355883522941E+00 0.145878716894E-02 -2.82 -0.354621068294E+00 0.134353843176E-02 -2.83 -0.353367556687E+00 0.100710291929E-02 -2.84 -0.352122891855E+00 0.564672131355E-03 -2.85 -0.350886977534E+00 0.131437567762E-03 -2.86 -0.349659717877E+00 -0.183750151232E-03 -2.87 -0.348441022371E+00 -0.353287750054E-03 -2.88 -0.347230804198E+00 -0.405804368587E-03 -2.89 -0.346028976614E+00 -0.371053671402E-03 -2.90 -0.344835452874E+00 -0.278789323065E-03 -2.91 -0.343650146234E+00 -0.158764988147E-03 -2.92 -0.342472969948E+00 -0.407343312161E-04 -2.93 -0.341303837553E+00 0.468428576984E-04 -2.94 -0.340142666901E+00 0.955028675911E-04 -2.95 -0.338989379242E+00 0.112506852722E-03 -2.96 -0.337843895916E+00 0.105540107203E-03 -2.97 -0.336706138267E+00 0.822879251472E-04 -2.98 -0.335576027633E+00 0.504356006660E-04 -2.99 -0.334453485357E+00 0.176684278720E-04 -3.00 -0.333338432841E+00 -0.840954696988E-05 -3.01 -0.332230794422E+00 -0.240082462989E-04 -3.02 -0.331130498564E+00 -0.307086161646E-04 -3.03 -0.330037474041E+00 -0.304957480321E-04 -3.04 -0.328951649630E+00 -0.253547333665E-04 -3.05 -0.327872954104E+00 -0.172706636329E-04 -3.06 -0.326801316239E+00 -0.822863029647E-05 -3.07 -0.325736664811E+00 -0.213724822327E-06 -3.08 -0.324678929605E+00 0.516719062528E-05 -3.09 -0.323628044860E+00 0.797004304815E-05 -3.10 -0.322583946036E+00 0.870756343295E-05 -3.11 -0.321546568592E+00 0.789249573812E-05 -3.12 -0.320515847989E+00 0.603758392211E-05 -3.13 -0.319491719686E+00 0.365557194337E-05 -3.14 -0.318474119143E+00 0.125920376033E-05 -3.15 -0.317462981879E+00 -0.644906165976E-06 -3.16 -0.316458245860E+00 -0.181121719618E-05 -3.17 -0.315459852365E+00 -0.234713954674E-05 -3.18 -0.314467742908E+00 -0.238511364751E-05 -3.19 -0.313481859003E+00 -0.205757992833E-05 -3.20 -0.312502142165E+00 -0.149697881904E-05 -3.21 -0.311528533907E+00 -0.835750749488E-06 -3.22 -0.310560975743E+00 -0.206336149515E-06 -3.23 -0.309599409399E+00 0.265249403234E-06 -3.24 -0.308643779590E+00 0.544039381025E-06 -3.25 -0.307694033254E+00 0.662643674918E-06 -3.26 -0.306750117380E+00 0.655271303213E-06 -3.27 -0.305811978956E+00 0.556131284209E-06 -3.28 -0.304879564972E+00 0.399432636205E-06 -3.29 -0.303952822418E+00 0.219384377500E-06 -3.30 -0.303031698281E+00 0.501955263946E-07 -3.31 -0.302116139781E+00 -0.759035198255E-07 -3.32 -0.301206096939E+00 -0.151049368116E-06 -3.33 -0.300301521665E+00 -0.183772954156E-06 -3.34 -0.299402365906E+00 -0.182910378329E-06 -3.35 -0.298508581607E+00 -0.157297741019E-06 -3.36 -0.297620120714E+00 -0.115771142610E-06 -3.37 -0.296736935173E+00 -0.671666834866E-07 -3.38 -0.295858976930E+00 -0.203204640323E-07 -3.39 -0.294986198018E+00 0.161511692727E-07 -3.40 -0.294118552598E+00 0.388988030802E-07 -3.41 -0.293255997005E+00 0.499608439566E-07 -3.42 -0.292398487674E+00 0.516196336316E-07 -3.43 -0.291545981037E+00 0.461575138351E-07 -3.44 -0.290698433528E+00 0.358568262965E-07 -3.45 -0.289855801582E+00 0.229999127457E-07 -3.46 -0.289018041630E+00 0.986911491232E-08 -3.47 -0.288185110110E+00 -0.125428037123E-08 -3.48 -0.287356964427E+00 -0.877565115624E-08 -3.49 -0.286533564634E+00 -0.129694801298E-07 -3.50 -0.285714871231E+00 -0.144252902199E-07 -3.51 -0.284900844716E+00 -0.137326043547E-07 -3.52 -0.284091445587E+00 -0.114809454622E-07 -3.53 -0.283286634344E+00 -0.825983647053E-08 -3.54 -0.282486371485E+00 -0.465880030772E-08 -3.55 -0.281690617509E+00 -0.126735990183E-08 -3.56 -0.280899333023E+00 0.134695998882E-08 -3.57 -0.280112480541E+00 0.300021811427E-08 -3.58 -0.279330024193E+00 0.383433020796E-08 -3.59 -0.278551928160E+00 0.400156850776E-08 -3.60 -0.277778156624E+00 0.365420525150E-08 -3.61 -0.277008673767E+00 0.294451267703E-08 -3.62 -0.276243443770E+00 0.202476302222E-08 -3.63 -0.275482430816E+00 0.104722852490E-08 -3.64 -0.274725599086E+00 0.164181422917E-09 -3.65 -0.273972913029E+00 -0.487444326584E-09 -3.66 -0.273224339180E+00 -0.890417093271E-09 -3.67 -0.272479845051E+00 -0.108374273518E-08 -3.68 -0.271739398163E+00 -0.110675277501E-08 -3.69 -0.271002966034E+00 -0.998778735436E-09 -3.70 -0.270270516185E+00 -0.799152139149E-09 -3.71 -0.269542016133E+00 -0.547204508836E-09 -3.72 -0.268817433400E+00 -0.282267367184E-09 -3.73 -0.268096735504E+00 -0.436722368792E-10 -3.74 -0.267379890210E+00 0.133269817860E-09 -3.75 -0.266666867178E+00 0.244235122021E-09 -3.76 -0.265957636947E+00 0.299303181227E-09 -3.77 -0.265252170062E+00 0.308633225411E-09 -3.78 -0.264550437068E+00 0.282384484507E-09 -3.79 -0.263852408508E+00 0.230716188450E-09 -3.80 -0.263158054928E+00 0.163787567173E-09 -3.81 -0.262467346872E+00 0.917578506105E-10 -3.82 -0.261780254885E+00 0.247862686958E-10 -3.83 -0.261096749594E+00 -0.273596653946E-10 -3.84 -0.260416803058E+00 -0.615689591399E-10 -3.85 -0.259740388525E+00 -0.802946916751E-10 -3.86 -0.259067479284E+00 -0.861609656484E-10 -3.87 -0.258398048620E+00 -0.817918837084E-10 -3.88 -0.257732069821E+00 -0.698115485036E-10 -3.89 -0.257069516173E+00 -0.528440626823E-10 -3.90 -0.256410360964E+00 -0.335135288931E-10 -3.91 -0.255754577480E+00 -0.144440497843E-10 -3.92 -0.255102139010E+00 0.174128537650E-11 -3.93 -0.254453019423E+00 0.131938254611E-10 -3.94 -0.253807194213E+00 0.202196208085E-10 -3.95 -0.253164639154E+00 0.234964703026E-10 -3.96 -0.252525330019E+00 0.237021728270E-10 -3.97 -0.251889242580E+00 0.215145272658E-10 -3.98 -0.251256352611E+00 0.176113325026E-10 -3.99 -0.250626635885E+00 0.126703874213E-10 -4.00 -0.25 0.736949090574E-11 diff --git a/scripts/build_pel.sh b/scripts/build_pel.sh index 0264cafb..bb7d72dd 100755 --- a/scripts/build_pel.sh +++ b/scripts/build_pel.sh @@ -1,31 +1,31 @@ ##! /bin/csh -f ## An example script to build on LLNL Peloton systems. ## For now, this script assumes intel/ mkl libraries are being used. - + # load some modules source scripts/modules.pel - + # set some environment variables. Set them explicitly or use loaded module path (preferred) # Here we use an explicit path for scalapack to be consistent with the path for the blas libraries and avoid # benign cmake warnings -setenv SCALAPACK_ROOT /usr/tce/packages/mkl/mkl-2019.0/ +setenv SCALAPACK_ROOT ${MKLROOT} setenv HDF5_ROOT ${HDF5} - + # We need to define the cmake blas vendor option here to find the right one. set BLAS_VENDOR = Intel10_64lp - + # manually set the location of BLACS libraries for scalapack -set BLACS_LIB = ${SCALAPACK_ROOT}/lib - +set BLACS_LIB = ${SCALAPACK_ROOT}/lib/intel64 + set MGMOL_ROOT = `pwd` - + set INSTALL_DIR = ${MGMOL_ROOT}/mgmol_install mkdir -p ${INSTALL_DIR} - + set BUILD_DIR = ${MGMOL_ROOT}/mgmol_build mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} - + # call cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DCMAKE_CXX_COMPILER=mpic++ \ @@ -33,8 +33,12 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DMPIEXEC_NUMPROC_FLAG="-n" \ -DBLA_VENDOR=${BLAS_VENDOR} \ -DSCALAPACK_BLACS_LIBRARY=${BLACS_LIB}/libmkl_blacs_intelmpi_lp64.so \ + -DCMAKE_BUILD_TYPE=Release \ + -DMPIEXEC_EXECUTABLE=/usr/bin/srun \ .. - + # call make install make -j make install + + diff --git a/scripts/modules.pel b/scripts/modules.pel index 5e4efa85..bdad8018 100644 --- a/scripts/modules.pel +++ b/scripts/modules.pel @@ -1,7 +1,9 @@ -module load intel/19.0.4 -module load hdf5-parallel/1.10.2 -module load boost/1.69.0 -module load mkl/2019.0 +##TOSS4 options +module load intel/2022.1.0 +module load hdf5-parallel/1.14.0 +module load mkl module load cmake/3.14.5 -module load python/3.7.2 - +module load python +module load boost +## manually add boost path +setenv LD_LIBRARY_PATH ${BOOST_ROOT}/lib:$LD_LIBRARY_PATH diff --git a/src/ABPG.cc b/src/ABPG.cc index 107edc63..a8a7708e 100644 --- a/src/ABPG.cc +++ b/src/ABPG.cc @@ -127,7 +127,7 @@ void ABPG::update_states(T& orbitals, T& res, T& work_orbitals, else { // Preconditioned Power Method - orbitals.axpy(alpha, res); + orbitals.axpy((ORBDTYPE)alpha, res); if (ct.getOrthoType() == OrthoType::Orthonormal) orbitals.orthonormalizeLoewdin(false); @@ -148,5 +148,5 @@ void ABPG::printTimers(std::ostream& os) update_states_tm_.print(os); } -template class ABPG; -template class ABPG; +template class ABPG>; +template class ABPG>; diff --git a/src/AOMMprojector.cc b/src/AOMMprojector.cc index 3e670e45..a35acf13 100644 --- a/src/AOMMprojector.cc +++ b/src/AOMMprojector.cc @@ -14,8 +14,8 @@ #include "ProjectedMatricesSparse.h" #include "SubspaceProjector.h" -AOMMprojector::AOMMprojector( - LocGridOrbitals& phi, const std::shared_ptr& lrs) +AOMMprojector::AOMMprojector(LocGridOrbitals& phi, + const std::shared_ptr& lrs) { Control& ct = *(Control::instance()); Mesh* mymesh = Mesh::instance(); @@ -48,7 +48,7 @@ AOMMprojector::AOMMprojector( ct.numst, with_spin, ct.occ_width); // kernel functions use their own projected matrices and masks - kernel_phi_ = new LocGridOrbitals( + kernel_phi_ = new LocGridOrbitals( "AOMM", phi, kernel_proj_matrices_, kernelMasks_, nullptr); kernel_phi_->initGauss(0.5 * radius, lrs); @@ -59,7 +59,8 @@ AOMMprojector::AOMMprojector( kernel_phi_->computeGramAndInvS(ct.verbose); - kernelprojector_ = new SubspaceProjector(*kernel_phi_); + kernelprojector_ + = new SubspaceProjector>(*kernel_phi_); matrix_mask_ = new SquareLocalMatrices( subdivx, kernel_phi_->chromatic_number()); @@ -77,7 +78,7 @@ AOMMprojector::AOMMprojector( // matrix_mask_->setMaskThreshold(threshold, 10000.); } -void AOMMprojector::resetProjectors(LocGridOrbitals& phi) +void AOMMprojector::resetProjectors(LocGridOrbitals& phi) { if (onpe0) std::cout << "AOMM: reset projectors..." << std::endl; @@ -89,10 +90,11 @@ void AOMMprojector::resetProjectors(LocGridOrbitals& phi) kernel_phi_->computeGramAndInvS(0); delete kernelprojector_; - kernelprojector_ = new SubspaceProjector(*kernel_phi_); + kernelprojector_ + = new SubspaceProjector>(*kernel_phi_); } -void AOMMprojector::projectOut(LocGridOrbitals& phi) +void AOMMprojector::projectOut(LocGridOrbitals& phi) { assert(kernelprojector_ != nullptr); assert(matrix_mask_ != nullptr); diff --git a/src/AOMMprojector.h b/src/AOMMprojector.h index 54888cec..c9089d13 100644 --- a/src/AOMMprojector.h +++ b/src/AOMMprojector.h @@ -22,9 +22,9 @@ class MasksSet; class AOMMprojector { private: - LocGridOrbitals* kernel_phi_; + LocGridOrbitals* kernel_phi_; - SubspaceProjector* kernelprojector_; + SubspaceProjector>* kernelprojector_; MasksSet* kernelMasks_; @@ -35,13 +35,13 @@ class AOMMprojector short counter_; public: - AOMMprojector( - LocGridOrbitals& phi, const std::shared_ptr& lrs); + AOMMprojector(LocGridOrbitals& phi, + const std::shared_ptr& lrs); ~AOMMprojector(); - void projectOut(LocGridOrbitals& phi); + void projectOut(LocGridOrbitals& phi); - void resetProjectors(LocGridOrbitals& phi); + void resetProjectors(LocGridOrbitals& phi); }; #endif diff --git a/src/AndersonMix.cc b/src/AndersonMix.cc index 8311c69b..2892c9f9 100644 --- a/src/AndersonMix.cc +++ b/src/AndersonMix.cc @@ -261,7 +261,7 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) for (int j = 0; j < mm_; j++) { - x_.axpy(theta_[j], *xi_[j]); + x_.axpy((ORBDTYPE)theta_[j], *xi_[j]); } // update xi_ for next step // restart @@ -288,7 +288,7 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) for (int j = 0; j < mm_; j++) { - f.axpy(theta_[j], *fi_[j]); + f.axpy((ORBDTYPE)theta_[j], *fi_[j]); } // update fi_ for next step @@ -309,9 +309,9 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) #endif // update x_ if (mm_ > 0) - x_.axpy(beta_, f); + x_.axpy((ORBDTYPE)beta_, f); else - x_.axpy(1., f); + x_.axpy((ORBDTYPE)1., f); postprocessUpdate(); @@ -321,6 +321,6 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) #ifdef TESTING template class AndersonMix; #else -template class AndersonMix; -template class AndersonMix; +template class AndersonMix>; +template class AndersonMix>; #endif diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 671ba90d..a1923581 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -262,9 +262,7 @@ BlockVector::BlockVector( setup(bv); - if (copy_data) - MemorySpace::Memory::copy( - bv.storage_, size_storage_, storage_); + if (copy_data) copyDataFrom(bv); } template @@ -278,8 +276,7 @@ BlockVector::operator=( setup(bv); - MemorySpace::Memory::copy( - bv.storage_, size_storage_, storage_); + copyDataFrom(bv); return *this; } @@ -289,12 +286,17 @@ BlockVector& BlockVector::operator-=( const BlockVector& src) { + opminus_tm_.start(); + for (unsigned int i = 0; i < vect_.size(); i++) { ScalarType* vi = vect_[i]; ScalarType const* const si = src.vect_[i]; BV::subtract(numel_, si, vi); } + + opminus_tm_.stop(); + return *this; } @@ -303,11 +305,15 @@ template void BlockVector::assign( const pb::GridFuncVector& src) { + assign_tm_.start(); + for (unsigned int i = 0; i < vect_.size(); i++) { ScalarType* dest = vect_[i]; src.template getValues(i, dest); } + + assign_tm_.stop(); } template @@ -396,7 +402,11 @@ void BlockVector::scal(const double alpha) { assert(storage_ != nullptr); + scal_tm_.start(); + LinearAlgebraUtils::MPscal(size_storage_, alpha, storage_); + + scal_tm_.stop(); } template @@ -453,6 +463,7 @@ void BlockVector::axpy( LinearAlgebraUtils::MPaxpy( locnumel_, alpha, vect_[ix] + shift, vect_[iy] + shift); } + template void BlockVector::axpy(const double alpha, BlockVector& bv, const int ix, const int iy, @@ -467,6 +478,27 @@ void BlockVector::axpy(const double alpha, LinearAlgebraUtils::MPaxpy( locnumel_, alpha, bv.vect_[ix] + shift, vect_[iy] + shift); } + +template +void BlockVector::applyDiagonalOp( + const std::vector& diag, + BlockVector& dst) const +{ + diagop_tm_.start(); + + const double* const dd = diag.data(); + + for (unsigned int j = 0; j < vect_.size(); j++) + { + const ScalarType* __restrict__ srcj = vect_[j]; + ScalarType* __restrict__ dstj = dst.vect_[j]; + for (int i = 0; i < numel_; i++) + dstj[i] = (ScalarType)(dd[i] * (double)srcj[i]); + } + + diagop_tm_.stop(); +} + template void BlockVector::hasnan(const int j) const { @@ -493,8 +525,6 @@ void BlockVector::setDataWithGhosts( set_data_tm_.start(); - data_wghosts->resetData(); - data_wghosts->set_updated_boundaries(false); // get number of mesh points @@ -521,6 +551,11 @@ void BlockVector::printTimers(std::ostream& os) { set_data_tm_.print(os); trade_data_tm_.print(os); + assign_tm_.print(os); + scal_tm_.print(os); + opminus_tm_.print(os); + copy_tm_.print(os); + diagop_tm_.print(os); } template diff --git a/src/BlockVector.h b/src/BlockVector.h index c9627466..33016d62 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -30,6 +30,11 @@ class BlockVector { static Timer set_data_tm_; static Timer trade_data_tm_; + static Timer assign_tm_; + static Timer scal_tm_; + static Timer opminus_tm_; + static Timer copy_tm_; + static Timer diagop_tm_; static short n_instances_; static short subdivx_; @@ -109,7 +114,8 @@ class BlockVector deallocate_storage(); } - void axpy(const double alpha, const BlockVector& bv) + template + void axpy(const ScalarType2 alpha, const BlockVector& bv) { assert(storage_ != nullptr); assert(bv.storage_ != nullptr); @@ -132,6 +138,9 @@ class BlockVector return vect_[i]; } + void applyDiagonalOp(const std::vector& diag, + BlockVector& dst) const; + ScalarType maxAbsValue() const; template @@ -177,13 +186,19 @@ class BlockVector } void setToDataWithGhosts() { assign(*data_wghosts_); } + void copyDataFrom(const BlockVector& src) { + copy_tm_.start(); + assert(src.size_storage_ == size_storage_); assert(storage_ != nullptr); assert(src.storage_ != nullptr); + MemorySpace::Memory::copy( src.storage_, size_storage_, storage_); + + copy_tm_.stop(); } pb::GridFunc& getVectorWithGhosts(const int i) @@ -306,4 +321,22 @@ Timer BlockVector::set_data_tm_( template Timer BlockVector::trade_data_tm_( "BlockVector::trade_data"); + +template +Timer BlockVector::assign_tm_( + "BlockVector::assign"); + +template +Timer BlockVector::scal_tm_("BlockVector::scal"); + +template +Timer BlockVector::opminus_tm_( + "BlockVector::opminus"); + +template +Timer BlockVector::copy_tm_("BlockVector::copy"); + +template +Timer BlockVector::diagop_tm_( + "BlockVector::diagop"); #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e602101f..8a2a2021 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,6 +26,12 @@ set(SOURCES mgmol_run.cc read_config.cc jade.cc + DotProductSimple.cc + DotProductDiagonal.cc + DotProductWithInvS.cc + DotProductWithDM.cc + LocalMatrices2ReplicatedMatrix.cc + ReplicatedMatrix2SquareLocalMatrices.cc DielectricControl.cc ReplicatedMatrix.cc ReplicatedVector.cc @@ -46,7 +52,7 @@ set(SOURCES HamiltonianMVP_DMStrategy.cc MVPSolver.cc HamiltonianMVPSolver.cc - OrbitalsPreconditioning.cc + MGOrbitalsPreconditioning.cc DFTsolver.cc NonOrthoDMStrategy.cc FullyOccupiedNonOrthoDMStrategy.cc diff --git a/src/ChebyshevApproximation.cc b/src/ChebyshevApproximation.cc index 093f1266..9ef65322 100644 --- a/src/ChebyshevApproximation.cc +++ b/src/ChebyshevApproximation.cc @@ -218,6 +218,4 @@ MatrixType ChebyshevApproximation::computeChebyshevApproximation( } template class ChebyshevApproximation>; -#ifdef HAVE_MAGMA template class ChebyshevApproximation; -#endif diff --git a/src/Control.cc b/src/Control.cc index b7d23203..94e2ff30 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -41,9 +41,11 @@ Control::Control() lrs_extrapolation = 1; // default lrs_compute = 0; system_charge_ = 0.; - poisson_pc_nu1 = 2; - poisson_pc_nu2 = 2; + poisson_pc_nu1 = 1; + poisson_pc_nu2 = 1; poisson_pc_nlev = 10; + poisson_conv_tol = 1.e-8; + poisson_pc_data_ = 32; coloring_algo_ = 0; maxDistanceAtomicInfo_ = 8.; spread_factor = 2.; @@ -262,7 +264,8 @@ void Control::print(std::ostream& os) os << " Localization radius = " << cut_radius << std::endl; os << std::endl; - os << " preconditioner factor:" << precond_factor << std::endl; + os << " preconditioner factor: " << precond_factor << std::endl; + os << " preconditioner precision: " << precond_precision_ << std::endl; if (precond_type_ == 10) { os << " Multigrid preconditioning for wave functions:" << std::endl; @@ -331,7 +334,7 @@ void Control::sync(void) if (onpe0 && verbose > 0) (*MPIdata::sout) << "Control::sync()" << std::endl; // pack - const short size_short_buffer = 91; + const short size_short_buffer = 93; short* short_buffer = new short[size_short_buffer]; if (mype_ == 0) { @@ -385,7 +388,7 @@ void Control::sync(void) short_buffer[47] = out_restart_file_naming_strategy; short_buffer[48] = enforceVmass0; short_buffer[49] = dm_inner_steps; - short_buffer[50] = -1; + short_buffer[50] = rmatrices; short_buffer[51] = fgmres_kim; short_buffer[52] = fgmres_maxits; short_buffer[53] = ilu_type; @@ -421,6 +424,8 @@ void Control::sync(void) short_buffer[88] = hartree_reset_; short_buffer[89] = MD_last_step_; short_buffer[90] = (short)static_cast(poisson_lap_type_); + short_buffer[91] = poisson_pc_data_; + short_buffer[92] = precond_precision_; } else { @@ -440,7 +445,7 @@ void Control::sync(void) memset(&int_buffer[0], 0, size_int_buffer * sizeof(int)); } - const short size_float_buffer = 44; + const short size_float_buffer = 45; float* float_buffer = new float[size_float_buffer]; if (mype_ == 0) { @@ -487,6 +492,7 @@ void Control::sync(void) float_buffer[41] = pair_mlwf_distance_threshold_; float_buffer[42] = e0_; float_buffer[43] = dm_tol; + float_buffer[44] = poisson_conv_tol; } else { @@ -598,42 +604,44 @@ void Control::sync(void) out_restart_file_naming_strategy = short_buffer[47]; enforceVmass0 = short_buffer[48]; dm_inner_steps = short_buffer[49]; - //... = short_buffer[50]; - fgmres_kim = short_buffer[51]; - fgmres_maxits = short_buffer[52]; - ilu_type = short_buffer[53]; - ilu_lof = short_buffer[54]; - ilu_maxfil = short_buffer[55]; - coloring_algo_ = short_buffer[56]; - diel_flag_ = short_buffer[57]; - poisson_pc_nu1 = short_buffer[58]; - poisson_pc_nu2 = short_buffer[59]; - poisson_pc_nlev = short_buffer[60]; - system_charge_ = short_buffer[61]; - md_print_freq = short_buffer[62]; - use_kernel_functions = short_buffer[63]; - ngpts_[0] = short_buffer[64]; - ngpts_[1] = short_buffer[65]; - ngpts_[2] = short_buffer[66]; - computeCondGram_ = short_buffer[67]; - lrs_extrapolation = short_buffer[68]; - parallel_transport = (bool)short_buffer[69]; - with_spin_ = (bool)short_buffer[70]; - conv_criterion_ = short_buffer[71]; - load_balancing_max_iterations = short_buffer[72]; - load_balancing_modulo = short_buffer[73]; - write_clusters = short_buffer[74]; - DM_solver_ = short_buffer[75]; - dm_algo_ = short_buffer[80]; - dm_approx_order = short_buffer[81]; - dm_approx_ndigits = short_buffer[82]; - dm_approx_power_maxits = short_buffer[83]; - spread_penalty_type_ = short_buffer[84]; - dm_use_old_ = short_buffer[85]; - max_electronic_steps_tight_ = short_buffer[86]; - hartree_reset_ = short_buffer[88]; - MD_last_step_ = short_buffer[89]; - poisson_lap_type_ = static_cast(short_buffer[90]); + rmatrices = short_buffer[50]; + fgmres_kim = short_buffer[51]; + fgmres_maxits = short_buffer[52]; + ilu_type = short_buffer[53]; + ilu_lof = short_buffer[54]; + ilu_maxfil = short_buffer[55]; + coloring_algo_ = short_buffer[56]; + diel_flag_ = short_buffer[57]; + poisson_pc_nu1 = short_buffer[58]; + poisson_pc_nu2 = short_buffer[59]; + poisson_pc_nlev = short_buffer[60]; + system_charge_ = short_buffer[61]; + md_print_freq = short_buffer[62]; + use_kernel_functions = short_buffer[63]; + ngpts_[0] = short_buffer[64]; + ngpts_[1] = short_buffer[65]; + ngpts_[2] = short_buffer[66]; + computeCondGram_ = short_buffer[67]; + lrs_extrapolation = short_buffer[68]; + parallel_transport = (bool)short_buffer[69]; + with_spin_ = (bool)short_buffer[70]; + conv_criterion_ = short_buffer[71]; + load_balancing_max_iterations = short_buffer[72]; + load_balancing_modulo = short_buffer[73]; + write_clusters = short_buffer[74]; + DM_solver_ = short_buffer[75]; + dm_algo_ = short_buffer[80]; + dm_approx_order = short_buffer[81]; + dm_approx_ndigits = short_buffer[82]; + dm_approx_power_maxits = short_buffer[83]; + spread_penalty_type_ = short_buffer[84]; + dm_use_old_ = short_buffer[85]; + max_electronic_steps_tight_ = short_buffer[86]; + hartree_reset_ = short_buffer[88]; + MD_last_step_ = short_buffer[89]; + poisson_lap_type_ = static_cast(short_buffer[90]); + poisson_pc_data_ = short_buffer[91]; + precond_precision_ = short_buffer[92]; numst = int_buffer[0]; nel_ = int_buffer[1]; @@ -683,6 +691,7 @@ void Control::sync(void) pair_mlwf_distance_threshold_ = float_buffer[41]; e0_ = float_buffer[42]; dm_tol = float_buffer[43]; + poisson_conv_tol = float_buffer[44]; max_electronic_steps_loose_ = max_electronic_steps; delete[] short_buffer; @@ -829,6 +838,7 @@ int Control::checkState() assert(wannier_transform_type == 0 || wannier_transform_type == 1 || wannier_transform_type == 2); assert(tmatrices == 1 || tmatrices == 0); + assert(rmatrices == 1 || rmatrices == 0); assert(mg_levels_ >= -1); assert(rho0_ > 0.); assert(drho0_ > 0.); @@ -1395,7 +1405,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) if (str.compare("periodic") == 0) bcWF[2] = 1; str = vm["Poisson.solver"].as(); - if (str.compare("CG") == 0) diel_flag_ = 10; + if (str.compare("CG") == 0 || str.compare("PCG") == 0) diel_flag_ = 10; if (str.compare("MG") == 0) diel_flag_ = 0; str = vm["Poisson.diel"].as(); @@ -1405,20 +1415,23 @@ void Control::setOptions(const boost::program_options::variables_map& vm) bool poisson_reset = vm["Poisson.reset"].as(); hartree_reset_ = poisson_reset ? 1 : 0; - poisson_pc_nu1 = vm["Poisson.nu1"].as(); - poisson_pc_nu2 = vm["Poisson.nu2"].as(); - vh_init = vm["Poisson.max_steps_initial"].as(); - vh_its = vm["Poisson.max_steps"].as(); - poisson_pc_nlev = vm["Poisson.max_levels"].as(); - rho0_ = vm["Poisson.rho0"].as(); - drho0_ = vm["Poisson.beta"].as(); - e0_ = vm["Poisson.e0"].as(); + poisson_pc_nu1 = vm["Poisson.nu1"].as(); + poisson_pc_nu2 = vm["Poisson.nu2"].as(); + vh_init = vm["Poisson.max_steps_initial"].as(); + vh_its = vm["Poisson.max_steps"].as(); + poisson_pc_nlev = vm["Poisson.max_levels"].as(); + rho0_ = vm["Poisson.rho0"].as(); + drho0_ = vm["Poisson.beta"].as(); + e0_ = vm["Poisson.e0"].as(); + poisson_pc_data_ = vm["Poisson.precond_precision"].as(); + poisson_conv_tol = vm["Poisson.conv_tol"].as(); str = vm["ProjectedMatrices.solver"].as(); if (str.compare("short_sighted") == 0) short_sighted = 1; if (str.compare("exact") == 0) short_sighted = 0; tmatrices = vm["ProjectedMatrices.printMM"].as() ? 1 : 0; + rmatrices = vm["ProjectedMatrices.replicated"].as() ? 1 : 0; if (short_sighted) { @@ -1473,8 +1486,9 @@ void Control::setOptions(const boost::program_options::variables_map& vm) std::cout << "Outer solver type: " << str << std::endl; assert(it_algo_type_ >= 0); - mg_levels_ = vm["Quench.preconditioner_num_levels"].as() - 1; - precond_factor = vm["Quench.step_length"].as(); + mg_levels_ = vm["Quench.preconditioner_num_levels"].as() - 1; + precond_precision_ = vm["Quench.preconditioner_precision"].as(); + precond_factor = vm["Quench.step_length"].as(); if (precond_factor < 0.) { switch (lap_type) diff --git a/src/Control.h b/src/Control.h index 7e27d1b8..0956eb07 100644 --- a/src/Control.h +++ b/src/Control.h @@ -375,7 +375,7 @@ class Control poisson_pc_nlev = nlev; } - // 10 or larger means CG, otherwise MG V-cycles + // 10 or larger means PCG, otherwise MG V-cycles bool MGPoissonSolver() { return (diel_flag_ / 10 == 0); } bool LangevinThermostat() { return (thermostat_type == 1); } @@ -406,16 +406,24 @@ class Control // dielectric model for solvation short diel; + // Parameters for MG solver/ preconditioner for Poisson problem short poisson_pc_nu1; short poisson_pc_nu2; short poisson_pc_nlev; + /*! + * Poisson preconditioner precision (32 or 64) + */ + short poisson_pc_data_; + PoissonFDtype poisson_lap_type_; short lap_type; short orthof; // orthogonalization frequency + short precond_precision_; + // screening constant for potential mixing float screening_const; @@ -472,6 +480,9 @@ class Control // Number of v-cycles for hartree solution short vh_its; + + // convergence tolerance for solving Poisson problem using PCG. + float poisson_conv_tol; // Max number of changes of potential short max_changes_pot; @@ -484,6 +495,9 @@ class Control // transfer matrix flag short tmatrices; + // replicated matrices + short rmatrices; + // Initialization with localized orbitals (1) or not (0) short init_loc; diff --git a/src/DFTsolver.cc b/src/DFTsolver.cc index 206fba91..55e6df26 100644 --- a/src/DFTsolver.cc +++ b/src/DFTsolver.cc @@ -425,5 +425,5 @@ void DFTsolver::printTimers(std::ostream& os) solve_tm_.print(os); } -template class DFTsolver; -template class DFTsolver; +template class DFTsolver>; +template class DFTsolver>; diff --git a/src/DMStrategyFactory.cc b/src/DMStrategyFactory.cc index 6dae6870..115c8cbe 100644 --- a/src/DMStrategyFactory.cc +++ b/src/DMStrategyFactory.cc @@ -2,76 +2,101 @@ #include "ReplicatedMatrix.h" template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, dist_matrix::DistMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, - ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, - const bool short_sighted) + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, + ProjectedMatricesInterface* /*proj_matrices*/, + LocGridOrbitals* orbitals, const bool short_sighted) { if (short_sighted) { - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, - ProjectedMatricesSparse, LocGridOrbitals>(comm, os, ions, rho, - energy, electrostat, mgmol_strategy, orbitals); + ProjectedMatricesSparse, LocGridOrbitals>(comm, os, + ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, + orbitals); return dm_strategy; } else { - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy< dist_matrix::DistMatrix, ProjectedMatrices>, - LocGridOrbitals>(comm, os, ions, rho, energy, electrostat, - mgmol_strategy, orbitals); + LocGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } } template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, + ReplicatedMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, + std::ostream& /*os*/, Ions& /*ions*/, + Rho>* /*rho*/, + Energy>* /*energy*/, + Electrostatic* /*electrostat*/, + Hamiltonian>* /*hamiltonian*/, + MGmol>* /*mgmol_strategy*/, + ProjectedMatricesInterface* /*proj_matrices*/, + LocGridOrbitals* /*orbitals*/, const bool /*short_sighted*/) +{ + + std::cerr << "DMStrategy not implemented" << std::endl; + MPI_Abort(comm, EXIT_FAILURE); + + return nullptr; +} + +template <> +DMStrategy>* +DMStrategyFactory, dist_matrix::DistMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, - ExtendedGridOrbitals* orbitals, const bool short_sighted) + ExtendedGridOrbitals* orbitals, const bool short_sighted) { (void)short_sighted; - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, ProjectedMatrices>, - ExtendedGridOrbitals>( - comm, os, ions, rho, energy, electrostat, mgmol_strategy, orbitals); + ExtendedGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } -#ifdef HAVE_MAGMA template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, ReplicatedMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, - ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, - const bool short_sighted) + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, + ProjectedMatricesInterface* /*proj_matrices*/, + ExtendedGridOrbitals* orbitals, const bool short_sighted) { (void)short_sighted; - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>(comm, os, - ions, rho, energy, electrostat, mgmol_strategy, - orbitals->getOverlappingGids()); + ProjectedMatrices, + ExtendedGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } -#endif diff --git a/src/DMStrategyFactory.h b/src/DMStrategyFactory.h index 24f4f272..11a42c3d 100644 --- a/src/DMStrategyFactory.h +++ b/src/DMStrategyFactory.h @@ -26,7 +26,8 @@ class DMStrategyFactory public: static DMStrategy* create(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, ProjectedMatricesInterface* proj_matrices, OrbitalsType* orbitals) { Control& ct = *(Control::instance()); @@ -36,14 +37,14 @@ class DMStrategyFactory if (ct.DM_solver() == DMNonLinearSolverType::MVP) { dm_strategy = new MVP_DMStrategy(comm, os, - ions, rho, energy, electrostat, mgmol_strategy, + ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, orbitals->getOverlappingGids(), proj_matrices, ct.use_old_dm()); } else if (ct.DM_solver() == DMNonLinearSolverType::HMVP) { dm_strategy = createHamiltonianMVP_DMStrategy(comm, os, ions, rho, - energy, electrostat, mgmol_strategy, proj_matrices, orbitals, - ct.short_sighted); + energy, electrostat, hamiltonian, mgmol_strategy, proj_matrices, + orbitals, ct.short_sighted); } else { @@ -84,6 +85,7 @@ class DMStrategyFactory static DMStrategy* createHamiltonianMVP_DMStrategy( MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, ProjectedMatricesInterface* proj_matrices, OrbitalsType* orbitals, const bool); diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index c34b6396..636de977 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -424,7 +424,7 @@ int DavidsonSolver::solve( os_ << "DavidsonSolver -> Iteration " << outer_it << std::endl; os_ << "###########################" << std::endl; } - OrbitalsType tmp_orbitals("Davidson_tmp", orbitals); + OrbitalsType hphi("Davidson_hphi", orbitals); MatrixType dm2Ninit("dm2N", 2 * numst_, 2 * numst_); std::vector eval(2 * numst_); MatrixType evect("EigVect", 2 * numst_, 2 * numst_); @@ -483,10 +483,11 @@ int DavidsonSolver::solve( ProjectedMatrices* projmatrices = dynamic_cast*>( orbitals.getProjMatrices()); + assert(projmatrices != nullptr); - // get H*psi stored in work_orbitals + // get H*phi stored in hphi // h11 computed at the same time - mgmol_strategy_->computePrecondResidual(orbitals, tmp_orbitals, + mgmol_strategy_->computePrecondResidual(orbitals, hphi, work_orbitals, ions_, &kbpsi_1, false, false); projmatrices->setHB2H(); @@ -520,19 +521,30 @@ int DavidsonSolver::solve( kbpsi_2.computeHvnlMatrix(&kbpsi_2, ions_, h22nl); kbpsi_1.computeHvnlMatrix(&kbpsi_2, ions_, h12nl); + + h12 = h12nl; + h22 = h22nl; + } + else + { + hamiltonian_->applyDeltaPot(orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); + } + + if (inner_it == 0) + { + // compute H*P and store in hphi + hamiltonian_->applyLocal(numst_, work_orbitals, hphi); } else { - h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->applyDeltaPot(work_orbitals, hphi); } // update h22, h12 and h21 - h22 = h22nl; - hamiltonian_->addHlocal2matrix(work_orbitals, work_orbitals, h22); + orbitals.addDotWithNcol2Matrix(hphi, h12); - h12 = h12nl; - hamiltonian_->addHlocal2matrix(orbitals, work_orbitals, h12); + work_orbitals.addDotWithNcol2Matrix(hphi, h22); h21.transpose(1., h12, 0.); @@ -604,18 +616,16 @@ int DavidsonSolver::solve( energy_->saveVofRho(); // update h11, h22, h12, and h21 - h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->applyDeltaPot(orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); - h22 = h22nl; - hamiltonian_->addHlocal2matrix( - work_orbitals, work_orbitals, h22); - - h12 = h12nl; - hamiltonian_->addHlocal2matrix(orbitals, work_orbitals, h12); + hamiltonian_->applyDeltaPot(work_orbitals, hphi); + work_orbitals.addDotWithNcol2Matrix(hphi, h22); + orbitals.addDotWithNcol2Matrix(hphi, h12); h21.transpose(1., h12, 0.); + // assemble 2N x 2N Hamiltonian proj_mat2N_->assignBlocksH(h11, h12, h21, h22); proj_mat2N_->setHB2H(); @@ -710,7 +720,7 @@ int DavidsonSolver::solve( // eigenvalues of DM orbitals.multiply_by_matrix(dm12); work_orbitals.multiply_by_matrix(dm22); - orbitals.axpy(1., work_orbitals); + orbitals.axpy((ORBDTYPE)1., work_orbitals); orbitals.incrementIterativeIndex(); orbitals.incrementIterativeIndex(); work_orbitals.incrementIterativeIndex(2); @@ -826,6 +836,7 @@ int DavidsonSolver::solve( assert(pmat); pmat->printOccupations(os_); + proj_mat2N_->printEigenvalues(os_); } if (mmpi.PE0() && ct.verbose > 1) @@ -848,8 +859,6 @@ void DavidsonSolver::printTimers(std::ostream& os) target_tm_.print(os); } -template class DavidsonSolver, dist_matrix::DistMatrix>; -#ifdef HAVE_MAGMA -template class DavidsonSolver; -#endif +template class DavidsonSolver, ReplicatedMatrix>; diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index 7be19aab..a82ec288 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -78,7 +78,7 @@ void DensityMatrix::build( #endif // diagonal matrix with occ values in diagonal - MatrixType gamma("Gamma", &occ[0], dim_, dim_); + MatrixType gamma("Gamma", &occ[0], dim_); gamma.scal(orbital_occupation_); // rescale for spin // work_ = zmat*gamma with gamma symmetric @@ -132,7 +132,7 @@ void DensityMatrix::build() std::cout << "Warning: occupations not up to date to build DM!!!" << std::endl; - MatrixType gamma("Gamma", &occupation_[0], dim_, dim_); + MatrixType gamma("Gamma", &occupation_[0], dim_); gamma.scal(orbital_occupation_); // rescale for spin *dm_ = gamma; @@ -478,6 +478,4 @@ int DensityMatrix::read(HDFrestart& h5f_file, std::string& name) } template class DensityMatrix>; -#ifdef HAVE_MAGMA template class DensityMatrix; -#endif diff --git a/src/DistMatrix/DistMatrix.cc b/src/DistMatrix/DistMatrix.cc index 743bd2a5..89f19400 100644 --- a/src/DistMatrix/DistMatrix.cc +++ b/src/DistMatrix/DistMatrix.cc @@ -78,6 +78,8 @@ DistMatrix::DistMatrix(const std::string& name, const int m, const int n) bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { + assert(default_bc_ != nullptr); + resize(m, n, distmatrix_def_block_size_, distmatrix_def_block_size_); } @@ -87,6 +89,8 @@ DistMatrix::DistMatrix(const std::string& name, const int m) bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { + assert(default_bc_ != nullptr); + resize(m, m, distmatrix_def_block_size_, distmatrix_def_block_size_); } @@ -2032,12 +2036,12 @@ DistMatrix::DistMatrix(const std::string& name, const BlacsContext& bc, template DistMatrix::DistMatrix( - const std::string& name, const T* const dmat, const int m, const int n) + const std::string& name, const T* const dmat, const int m) : object_name_(name), bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { - resize(m, n, distmatrix_def_block_size_, distmatrix_def_block_size_); + resize(m, m, distmatrix_def_block_size_, distmatrix_def_block_size_); setDiagonalValues(dmat); } diff --git a/src/DistMatrix/DistMatrix.h b/src/DistMatrix/DistMatrix.h index 175fbd9b..e07c8d56 100644 --- a/src/DistMatrix/DistMatrix.h +++ b/src/DistMatrix/DistMatrix.h @@ -243,8 +243,7 @@ class DistMatrix // Construct a diagonal DistMatrix from a vector dmat of diagonal elements DistMatrix(const std::string& name, const BlacsContext&, const T* const dmat, const int m, const int n); - DistMatrix( - const std::string& name, const T* const dmat, const int m, const int n); + DistMatrix(const std::string& name, const T* const dmat, const int m); DistMatrix& operator=(const DistMatrix& a); DistMatrix& assign(const DistMatrix&, const int, const int); diff --git a/src/DistMatrix/DistMatrixTools.cc b/src/DistMatrix/DistMatrixTools.cc index 4c3ed2ce..a3262bc4 100644 --- a/src/DistMatrix/DistMatrixTools.cc +++ b/src/DistMatrix/DistMatrixTools.cc @@ -120,7 +120,7 @@ void sqrtDistMatrix(dist_matrix::DistMatrix& u) { eigenvalues[i] = 1. / sqrt(eigenvalues[i]); } - dist_matrix::DistMatrix g("g", &eigenvalues[0], nst, nst); + dist_matrix::DistMatrix g("g", &eigenvalues[0], nst); // u = z * g * z**T w.symm('r', 'l', 1., g, z, 0.); diff --git a/src/DotProductDiagonal.cc b/src/DotProductDiagonal.cc new file mode 100644 index 00000000..764e7446 --- /dev/null +++ b/src/DotProductDiagonal.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductDiagonal.h" + +#include "Control.h" +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template <> +double DotProductDiagonal>::dotProduct( + ExtendedGridOrbitals& phi0, + const ExtendedGridOrbitals& phi1) +{ + const int chromatic_number = phi0.chromatic_number(); + std::vector ss(chromatic_number); + phi0.computeDiagonalElementsDotProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->getTraceDiagProductWithInvS(ss); +} + +template <> +double DotProductDiagonal>::dotProduct( + LocGridOrbitals& phi0, const LocGridOrbitals& phi1) +{ + const int numst = phi0.numst(); + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + assert(proj_matrices != nullptr); + + std::vector ss; + Control& ct = *(Control::instance()); + if (ct.short_sighted) + { + phi0.computeDiagonalElementsDotProductLocal(phi1, ss); + } + else + { + ss.resize(numst); + phi0.computeDiagonalElementsDotProduct(phi1, ss); + } + + return proj_matrices->getTraceDiagProductWithInvS(ss); +} diff --git a/src/DotProductDiagonal.h b/src/DotProductDiagonal.h new file mode 100644 index 00000000..a5d5dec2 --- /dev/null +++ b/src/DotProductDiagonal.h @@ -0,0 +1,23 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerDiagonal_H +#define MGMOL_DotProductManagerDiagonal_H + +#include "DotProductManager.h" + +template +class DotProductDiagonal : public DotProductManager +{ +public: + DotProductDiagonal(){}; + + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductManager.h b/src/DotProductManager.h new file mode 100644 index 00000000..6491d696 --- /dev/null +++ b/src/DotProductManager.h @@ -0,0 +1,23 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManager_H +#define MGMOL_DotProductManager_H + +template +class DotProductManager +{ +public: + DotProductManager(){}; + + virtual ~DotProductManager(){}; + + virtual double dotProduct(T& a, const T& b) = 0; +}; + +#endif diff --git a/src/DotProductManagerFactory.h b/src/DotProductManagerFactory.h new file mode 100644 index 00000000..ebd783a4 --- /dev/null +++ b/src/DotProductManagerFactory.h @@ -0,0 +1,46 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_DotProductManagerFACTORY_H +#define MGMOL_DotProductManagerFACTORY_H + +#include "DotProductDiagonal.h" +#include "DotProductSimple.h" +#include "DotProductWithDM.h" +#include "DotProductWithInvS.h" + +template +class DotProductManagerFactory +{ +public: + static DotProductManager* create(const short type) + { + DotProductManager* dot_product_manager = nullptr; + switch (type) + { + case 0: + dot_product_manager = new DotProductDiagonal(); + break; + case 1: + dot_product_manager = new DotProductWithInvS(); + break; + case 2: + dot_product_manager = new DotProductWithDM(); + break; + case 3: + dot_product_manager = new DotProductSimple(); + break; + default: + std::cerr << "DotProductManager* create() --- option invalid\n"; + } + return dot_product_manager; + } +}; + +#endif diff --git a/src/DotProductSimple.cc b/src/DotProductSimple.cc new file mode 100644 index 00000000..44e62b5a --- /dev/null +++ b/src/DotProductSimple.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductSimple.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductSimple::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductSimple(ss); +} + +template class DotProductSimple>; +template class DotProductSimple>; diff --git a/src/DotProductSimple.h b/src/DotProductSimple.h new file mode 100644 index 00000000..38aa18b5 --- /dev/null +++ b/src/DotProductSimple.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerSimple_H +#define MGMOL_DotProductManagerSimple_H + +#include "DotProductManager.h" + +template +class DotProductSimple : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductWithDM.cc b/src/DotProductWithDM.cc new file mode 100644 index 00000000..652a6683 --- /dev/null +++ b/src/DotProductWithDM.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductWithDM.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductWithDM::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductWithDM(ss); +} + +template class DotProductWithDM>; +template class DotProductWithDM>; diff --git a/src/DotProductWithDM.h b/src/DotProductWithDM.h new file mode 100644 index 00000000..96c713d6 --- /dev/null +++ b/src/DotProductWithDM.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerWithDM_H +#define MGMOL_DotProductManagerWithDM_H + +#include "DotProductManager.h" + +template +class DotProductWithDM : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductWithInvS.cc b/src/DotProductWithInvS.cc new file mode 100644 index 00000000..08b079ce --- /dev/null +++ b/src/DotProductWithInvS.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductWithInvS.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductWithInvS::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductWithInvS(ss); +} + +template class DotProductWithInvS>; +template class DotProductWithInvS>; diff --git a/src/DotProductWithInvS.h b/src/DotProductWithInvS.h new file mode 100644 index 00000000..868d8f9d --- /dev/null +++ b/src/DotProductWithInvS.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerWithInvS_H +#define MGMOL_DotProductManagerWithInvS_H + +#include "DotProductManager.h" + +template +class DotProductWithInvS : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/EigenDMStrategy.cc b/src/EigenDMStrategy.cc index 358ed5f9..cf803e0f 100644 --- a/src/EigenDMStrategy.cc +++ b/src/EigenDMStrategy.cc @@ -48,5 +48,5 @@ int EigenDMStrategy::update(OrbitalsType& orbitals) return 0; } -template class EigenDMStrategy; -template class EigenDMStrategy; +template class EigenDMStrategy>; +template class EigenDMStrategy>; diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index aba09d07..cbefab6f 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -254,9 +254,10 @@ void Electrostatic::setup(const short max_sweeps) { Control& ct = *(Control::instance()); const short nu1 = ct.poisson_pc_nu1; - const short nu2 = ct.poisson_pc_nu1; + const short nu2 = ct.poisson_pc_nu2; const short max_nlevs = ct.poisson_pc_nlev; - poisson_solver_->setup(nu1, nu2, max_sweeps, 1.e-16, max_nlevs); + const float conv_tol = ct.poisson_conv_tol; + poisson_solver_->setup(nu1, nu2, max_sweeps, conv_tol, max_nlevs); } template @@ -336,14 +337,16 @@ void Electrostatic::computeVh(const Ions& ions, Rho& rho, Potentials& pot) solve_tm_.stop(); } -template void Electrostatic::computeVhRho(Rho& rho); +template void Electrostatic::computeVhRho(Rho>& rho); template void Electrostatic::computeVh( - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, Potentials& pot); template void Electrostatic::computeVh(const pb::GridFunc& vhinit, - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, Potentials& pot); -template void Electrostatic::computeVhRho(Rho& rho); -template void Electrostatic::computeVh( - const Ions& ions, Rho& rho, Potentials& pot); +template void Electrostatic::computeVhRho( + Rho>& rho); +template void Electrostatic::computeVh(const Ions& ions, + Rho>& rho, Potentials& pot); template void Electrostatic::computeVh(const pb::GridFunc& vhinit, - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, + Potentials& pot); diff --git a/src/Energy.cc b/src/Energy.cc index f53dbc93..ad371afc 100644 --- a/src/Energy.cc +++ b/src/Energy.cc @@ -60,9 +60,9 @@ double Energy::getEVrhoRho() const template double Energy::evaluateEnergyIonsInVext(Ions& ions) { +#ifdef HAVE_TRICUBIC double energy = 0.; -#ifdef HAVE_TRICUBIC if (!pot_.withVext()) return energy; //(*MPIdata::sout)<<"Energy::evaluateEnergyIonsInVext()"<::evaluateEnergyIonsInVext(Ions& ions) std::vector val(nions); pot_.getValVext(positions, val); + double energy = 0.; + // loop over ions again ion = ions.local_ions().begin(); int ion_index = 0; @@ -105,8 +107,13 @@ double Energy::evaluateEnergyIonsInVext(Ions& ions) MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.allreduce(&energy, &tmp, 1, MPI_SUM); energy = tmp; -#endif + return energy; +#else + (void)ions; + + return 0.; +#endif } template @@ -192,5 +199,5 @@ double Energy::evaluateTotal(const double ts, // in [Ha] return energy_sc; } -template class Energy; -template class Energy; +template class Energy>; +template class Energy>; diff --git a/src/EnergySpreadPenalty.cc b/src/EnergySpreadPenalty.cc index 0299fbe8..5d9a1a5d 100644 --- a/src/EnergySpreadPenalty.cc +++ b/src/EnergySpreadPenalty.cc @@ -231,5 +231,5 @@ double EnergySpreadPenalty::evaluateEnergy(const T& phi) return alpha_ * total_energy; } -template class EnergySpreadPenalty; -template class EnergySpreadPenalty; +template class EnergySpreadPenalty>; +template class EnergySpreadPenalty>; diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 3e887dad..48eafce6 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -6,17 +6,13 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "ExtendedGridOrbitals.h" #include "global.h" -#include - #include "Control.h" -#include "DistMatrix.h" -#include "ExtendedGridOrbitals.h" +#include "DotProductManagerFactory.h" #include "GridFunc.h" -#include "HDFrestart.h" -#include "Laph2.h" #include "Laph4M.h" #include "LocalMatrices2DistMatrix.h" #include "LocalizationRegions.h" @@ -31,41 +27,77 @@ #include #include +#include #include +#ifdef MGMOL_HAS_LIBROM +#include "librom.h" +#endif + #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short ExtendedGridOrbitals::subdivx_ = 0; -int ExtendedGridOrbitals::lda_ = 0; -int ExtendedGridOrbitals::numpt_ = 0; -int ExtendedGridOrbitals::loc_numpt_ = 0; -ExtendedGridOrbitalsPtrFunc ExtendedGridOrbitals::dotProduct_ - = &ExtendedGridOrbitals::dotProductDiagonal; -int ExtendedGridOrbitals::data_wghosts_index_ = -1; -int ExtendedGridOrbitals::numst_ = -1; -std::vector> ExtendedGridOrbitals::overlapping_gids_; - -Timer ExtendedGridOrbitals::matB_tm_("ExtendedGridOrbitals::matB"); -Timer ExtendedGridOrbitals::invBmat_tm_("ExtendedGridOrbitals::invBmat"); -Timer ExtendedGridOrbitals::overlap_tm_("ExtendedGridOrbitals::overlap"); -Timer ExtendedGridOrbitals::dot_product_tm_( - "ExtendedGridOrbitals::dot_product"); -Timer ExtendedGridOrbitals::addDot_tm_("ExtendedGridOrbitals::addDot"); -Timer ExtendedGridOrbitals::prod_matrix_tm_( - "ExtendedGridOrbitals::prod_matrix"); -Timer ExtendedGridOrbitals::assign_tm_("ExtendedGridOrbitals::assign"); -Timer ExtendedGridOrbitals::normalize_tm_("ExtendedGridOrbitals::normalize"); -Timer ExtendedGridOrbitals::axpy_tm_("ExtendedGridOrbitals::axpy"); - -ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, +template +DotProductManager>* + ExtendedGridOrbitals::dotProductManager_ + = nullptr; + +template +int ExtendedGridOrbitals::lda_ = 0; +template +int ExtendedGridOrbitals::numpt_ = 0; +template +int ExtendedGridOrbitals::data_wghosts_index_ = -1; +template +int ExtendedGridOrbitals::numst_ = -1; +template +std::vector> + ExtendedGridOrbitals::overlapping_gids_; + +template +Timer ExtendedGridOrbitals::matB_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::matB"); +template +Timer ExtendedGridOrbitals::invBmat_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::invBmat"); +template +Timer ExtendedGridOrbitals::overlap_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::overlap"); +template +Timer ExtendedGridOrbitals::dot_product_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::dot_product"); +template +Timer ExtendedGridOrbitals::addDot_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::addDot"); +template +Timer ExtendedGridOrbitals::prod_matrix_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::prod_matrix"); +template +Timer ExtendedGridOrbitals::assign_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::assign"); +template +Timer ExtendedGridOrbitals::normalize_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::normalize"); +template +Timer ExtendedGridOrbitals::axpy_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::axpy"); + +template +ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, const pb::Grid& my_grid, const short subdivx, const int numst, const short bc[3], ProjectedMatricesInterface* proj_matrices, std::shared_ptr lrs, MasksSet* masks, MasksSet* corrmasks, ClusterOrbitals* local_cluster, const bool setup_flag) : name_(std::move(name)), proj_matrices_(proj_matrices), - block_vector_(my_grid, subdivx, bc), + block_vector_(my_grid, 1, bc), grid_(my_grid) { (void)lrs; @@ -74,46 +106,51 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, (void)local_cluster; // preconditions - assert(subdivx > 0); +#ifndef NDEBUG + assert(subdivx == 1); +#else + (void)subdivx; +#endif assert(proj_matrices != nullptr); for (short i = 0; i < 3; i++) assert(bc[i] == 0 || bc[i] == 1); assert(grid_.size() > 0); - subdivx_ = subdivx; - numst_ = numst; - numpt_ = grid_.size(); - lda_ = block_vector_.getld(); - loc_numpt_ = numpt_ / subdivx_; + numst_ = numst; + numpt_ = grid_.size(); + lda_ = block_vector_.getld(); assert(numst_ >= 0); if (setup_flag) setup(); } -ExtendedGridOrbitals::~ExtendedGridOrbitals() +template +ExtendedGridOrbitals::~ExtendedGridOrbitals() { assert(proj_matrices_ != nullptr); } -ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, - const ExtendedGridOrbitals& A, const bool copy_data) +template +ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, + const ExtendedGridOrbitals& A, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(A.proj_matrices_), block_vector_(A.block_vector_, copy_data), grid_(A.grid_) { - // if(onpe0)cout<<"call ExtendedGridOrbitals(const ExtendedGridOrbitals &A, - // const bool copy_data)"< +ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, + const ExtendedGridOrbitals& A, + ProjectedMatricesInterface* proj_matrices, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(proj_matrices), @@ -126,7 +163,9 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, proj_matrices_->setup(overlapping_gids_); } -void ExtendedGridOrbitals::copyDataFrom(const ExtendedGridOrbitals& src) +template +void ExtendedGridOrbitals::copyDataFrom( + const ExtendedGridOrbitals& src) { assert(proj_matrices_ != nullptr); @@ -135,19 +174,18 @@ void ExtendedGridOrbitals::copyDataFrom(const ExtendedGridOrbitals& src) setIterativeIndex(src); } -void ExtendedGridOrbitals::setDotProduct(const short dot_type) +template +void ExtendedGridOrbitals::setDotProduct(const short dot_type) { - if (dot_type == 0) - dotProduct_ = &ExtendedGridOrbitals::dotProductDiagonal; - else if (dot_type == 1) - dotProduct_ = &ExtendedGridOrbitals::dotProductWithInvS; - else if (dot_type == 2) - dotProduct_ = &ExtendedGridOrbitals::dotProductWithDM; - else if (dot_type == 3) - dotProduct_ = &ExtendedGridOrbitals::dotProductSimple; + DotProductManagerFactory factory; + + dotProductManager_ = factory.create(dot_type); + + assert(dotProductManager_ != nullptr); } -void ExtendedGridOrbitals::setup() +template +void ExtendedGridOrbitals::setup() { Control& ct = *(Control::instance()); @@ -171,8 +209,9 @@ void ExtendedGridOrbitals::setup() "ExtendedGridOrbitals::setup() done...", (*MPIdata::sout)); } -void ExtendedGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, - std::shared_ptr lrs) +template +void ExtendedGridOrbitals::reset(MasksSet* masks, + MasksSet* corrmasks, std::shared_ptr lrs) { (void)masks; (void)corrmasks; @@ -186,7 +225,9 @@ void ExtendedGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, setup(); } -void ExtendedGridOrbitals::assign(const ExtendedGridOrbitals& orbitals) +template +void ExtendedGridOrbitals::assign( + const ExtendedGridOrbitals& orbitals) { assert(proj_matrices_ != nullptr); @@ -199,8 +240,10 @@ void ExtendedGridOrbitals::assign(const ExtendedGridOrbitals& orbitals) assign_tm_.stop(); } -void ExtendedGridOrbitals::axpy( - const double alpha, const ExtendedGridOrbitals& orbitals) +template +template +void ExtendedGridOrbitals::axpy( + const CoeffType alpha, const ExtendedGridOrbitals& orbitals) { axpy_tm_.start(); @@ -211,11 +254,11 @@ void ExtendedGridOrbitals::axpy( axpy_tm_.stop(); } -void ExtendedGridOrbitals::initGauss( +template +void ExtendedGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(numst_ >= 0); - assert(subdivx_ > 0); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -229,7 +272,7 @@ void ExtendedGridOrbitals::initGauss( const double start1 = grid_.start(1); const double start2 = grid_.start(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -245,54 +288,52 @@ void ExtendedGridOrbitals::initGauss( const double rmax = 6. * rc; for (int icolor = 0; icolor < numst_; icolor++) { - ORBDTYPE* ipsi = psi(icolor); + ScalarType* ipsi = psi(icolor); unsigned int const ipsi_size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(ipsi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( ipsi, ipsi_size, ipsi_host_view); - MemorySpace::Memory::set( + MemorySpace::Memory::set( ipsi_host_view, ipsi_size, 0); - for (short iloc = 0; iloc < subdivx_; iloc++) + const Vector3D& center(lrs->getCenter(icolor)); + Vector3D xc; + + xc[0] = start0; + for (int ix = 0; ix < dim0; ix++) { - const Vector3D& center(lrs->getCenter(icolor)); - Vector3D xc; + xc[1] = start1; - xc[0] = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) + for (int iy = 0; iy < dim1; iy++) { - xc[1] = start1; - - for (int iy = 0; iy < dim1; iy++) + xc[2] = start2; + for (int iz = 0; iz < dim2; iz++) { - xc[2] = start2; - for (int iz = 0; iz < dim2; iz++) - { - const double r = xc.minimage(center, ll, ct.bcWF); - if (r < rmax) - ipsi_host_view[ix * incx + iy * incy + iz] - = static_cast(exp(-r * r * invrc2)); - else - ipsi_host_view[ix * incx + iy * incy + iz] = 0.; - - xc[2] += hgrid[2]; - } - xc[1] += hgrid[1]; + const double r = xc.minimage(center, ll, ct.bcWF); + if (r < rmax) + ipsi_host_view[ix * incx + iy * incy + iz] + = static_cast(exp(-r * r * invrc2)); + else + ipsi_host_view[ix * incx + iy * incy + iz] = 0.; + + xc[2] += hgrid[2]; } - xc[0] += hgrid[0]; + xc[1] += hgrid[1]; } + xc[0] += hgrid[0]; } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, ipsi_size, ipsi); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void ExtendedGridOrbitals::initFourier() +template +void ExtendedGridOrbitals::initFourier() { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) @@ -302,7 +343,7 @@ void ExtendedGridOrbitals::initFourier() const double start1 = grid_.start(1) - grid_.origin(1); const double start2 = grid_.start(2) - grid_.origin(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -329,53 +370,51 @@ void ExtendedGridOrbitals::initFourier() const double kk[3] = { dk[0] * (double)kvector[0], dk[1] * (double)kvector[1], dk[2] * (double)kvector[2] }; - ORBDTYPE* ipsi = psi(icolor); + ScalarType* ipsi = psi(icolor); unsigned int const ipsi_size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(ipsi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( ipsi, ipsi_size, ipsi_host_view); - MemorySpace::Memory::set( + MemorySpace::Memory::set( ipsi_host_view, numpt_, 0); // TODO this can be done on the GPU with OpenMP - for (short iloc = 0; iloc < subdivx_; iloc++) + double x = start0; + for (int ix = 0; ix < dim0; ix++) { - double x = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) - { - double y = start1; + double y = start1; - for (int iy = 0; iy < dim1; iy++) + for (int iy = 0; iy < dim1; iy++) + { + double z = start2; + for (int iz = 0; iz < dim2; iz++) { - double z = start2; - for (int iz = 0; iz < dim2; iz++) - { - ipsi_host_view[ix * incx + iy * incy + iz] - = 1. - - static_cast(std::cos(kk[0] * x) - * std::cos(kk[1] * y) - * std::cos(kk[2] * z)); - - z += hgrid[2]; - } - y += hgrid[1]; + ipsi_host_view[ix * incx + iy * incy + iz] + = 1. + - static_cast(std::cos(kk[0] * x) + * std::cos(kk[1] * y) + * std::cos(kk[2] * z)); + + z += hgrid[2]; } - x += hgrid[0]; + y += hgrid[1]; } + x += hgrid[0]; } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, ipsi_size, ipsi); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void ExtendedGridOrbitals::multiply_by_matrix( +template +void ExtendedGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& dmatrix, - ORBDTYPE* const product, const int ldp) + ScalarType* const product, const int ldp) { #if 0 (*MPIdata::sout)<<"self multiply_by_matrix"< +void ExtendedGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); - assert(subdivx_ > 0); - unsigned int const product_size = numst_ * ldp; - ORBDTYPE* product_host_view - = MemorySpace::Memory::allocate_host_view( - product_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* product_host_view = MemorySpace::Memory::allocate_host_view(product_size); + MemorySpace::Memory::copy_view_to_host( product, product_size, product_host_view); - memset(product_host_view, 0, ldp * numst_ * sizeof(ORBDTYPE)); + memset(product_host_view, 0, ldp * numst_ * sizeof(ScalarType)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); - - // TODO this can be done on the GPU - // Compute product for subdomain iloc - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, matrix, numst_, 0., - product_host_view + iloc * loc_numpt_, ldp); - - MemorySpace::Memory::free_host_view( - phi_host_view); - } - MemorySpace::Memory::copy_view_to_dev( + unsigned int const phi_size = numpt_ * numst_; + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); + + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, matrix, numst_, 0., product_host_view, ldp); + + MemorySpace::Memory::free_host_view( + phi_host_view); + + MemorySpace::Memory::copy_view_to_dev( product_host_view, product_size, product); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( product_host_view); prod_matrix_tm_.stop(); } #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { - SquareLocalMatrices matdev( + SquareLocalMatrices matdev( matrix.nmat(), matrix.m()); matdev.assign(matrix); @@ -445,9 +478,10 @@ void ExtendedGridOrbitals::multiplyByMatrix( } #endif -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { assert(matrix.nmat() == 1); @@ -464,10 +498,11 @@ void ExtendedGridOrbitals::multiplyByMatrix( // Here the result is stored in one of the matrices used in the multiplication, // so a temporary arry is necessary #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { - SquareLocalMatrices matdev( + SquareLocalMatrices matdev( matrix.nmat(), matrix.m()); matdev.assign(matrix); @@ -475,42 +510,62 @@ void ExtendedGridOrbitals::multiplyByMatrix( } #endif -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { - ORBDTYPE* product - = MemorySpace::Memory::allocate( + ScalarType* product + = MemorySpace::Memory::allocate( numpt_ * numst_); multiplyByMatrix(matrix, product, numpt_); - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( product, numpt_ * numst_, getPsi(0)); - MemorySpace::Memory::free(product); + MemorySpace::Memory::free(product); } -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ExtendedGridOrbitals& product) const + ExtendedGridOrbitals& product) const { multiplyByMatrix(matrix, product.psi(0), product.lda_); } -void ExtendedGridOrbitals::multiply_by_matrix( - const DISTMATDTYPE* const matrix, ExtendedGridOrbitals& product) const +template +void ExtendedGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, + ExtendedGridOrbitals& product) const { multiply_by_matrix(matrix, product.psi(0), product.lda_); } template <> -void ExtendedGridOrbitals::multiply_by_matrix( +template <> +void ExtendedGridOrbitals::multiply_by_matrix( + const dist_matrix::DistMatrix& matrix) +{ + multiply_by_DistMatrix(matrix); +} + +template <> +template <> +void ExtendedGridOrbitals::multiply_by_matrix( + const ReplicatedMatrix& matrix) +{ + multiply_by_ReplicatedMatrix(matrix); +} + +template +void ExtendedGridOrbitals::multiply_by_DistMatrix( const dist_matrix::DistMatrix& matrix) { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[loc_numpt_ * numst_]; - memset(product, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ScalarType* product = new ScalarType[numpt_ * numst_]; + memset(product, 0, numpt_ * numst_ * sizeof(ScalarType)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -518,67 +573,72 @@ void ExtendedGridOrbitals::multiply_by_matrix( matrix.allgather(work_matrix, numst_); - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = numpt_ * sizeof(ScalarType); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); - - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, work_matrix, numst_, 0., product, - loc_numpt_); - - for (int color = 0; color < numst_; color++) - memcpy(phi_host_view + color * lda_, product + color * loc_numpt_, - slnumpt); - - MemorySpace::Memory::copy_view_to_dev( - phi_host_view, phi_size, getPsi(0, iloc)); - MemorySpace::Memory::free_host_view( - phi_host_view); - } + unsigned int const phi_size = numpt_ * numst_; + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); + + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, work_matrix, numst_, 0., product, numpt_); + + for (int color = 0; color < numst_; color++) + memcpy(phi_host_view + color * lda_, product + color * numpt_, slnumpt); + + MemorySpace::Memory::copy_view_to_dev( + phi_host_view, phi_size, getPsi(0)); + MemorySpace::Memory::free_host_view( + phi_host_view); delete[] product; prod_matrix_tm_.stop(); } -#ifdef HAVE_MAGMA -template <> -void ExtendedGridOrbitals::multiply_by_matrix(const ReplicatedMatrix& matrix) +template +void ExtendedGridOrbitals::multiply_by_ReplicatedMatrix( + const ReplicatedMatrix& matrix) { prod_matrix_tm_.start(); +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const('n'); magma_trans_t magma_transb = magma_trans_const('n'); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - ORBDTYPE* tmp - = MemorySpace::Memory::allocate( + ScalarType* tmp + = MemorySpace::Memory::allocate( numst_ * lda_); magmablas_dgemm(magma_transa, magma_transb, numpt_, numst_, numst_, 1., block_vector_.vect(0), lda_, matrix.data(), matrix.ld(), 0., tmp, lda_, magma_singleton.queue_); - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( tmp, numst_ * lda_, block_vector_.vect(0)); - MemorySpace::Memory::free(tmp); + MemorySpace::Memory::free(tmp); +#else + ScalarType* tmp + = MemorySpace::Memory::allocate( + numst_ * lda_); + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + block_vector_.vect(0), lda_, matrix.data(), matrix.ld(), 0., tmp, lda_); + + memcpy(block_vector_.vect(0), tmp, numst_ * lda_ * sizeof(ScalarType)); + + MemorySpace::Memory::free(tmp); +#endif prod_matrix_tm_.stop(); } -#endif -int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) +template +int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) { assert(proj_matrices_ != nullptr); @@ -588,9 +648,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) int ierr = read_func_hdf5(h5f_file, name); if (ierr < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_hdf5(): error in reading " << name - << ", size=" << name.size() << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_hdf5(): " + "error in reading " + << name << ", size=" << name.size() << std::endl; return ierr; } else if (onpe0 && ct.verbose > 2) @@ -605,9 +665,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_hdf5(): error in reading DM" - << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_hdf5():" + " error in reading DM" + << std::endl; return ierr; } } @@ -617,7 +677,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) +template +int ExtendedGridOrbitals::write( + HDFrestart& h5f_file, const std::string& name) { if (onpe0) (*MPIdata::sout) << "ExtendedGridOrbitals::write_func_hdf5()...\n"; @@ -710,9 +772,9 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) } // iwrite unsigned int const psi_size = numpt_; - ORBDTYPE* psi_host_view = MemorySpace::Memory::allocate_host_view(psi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(color), psi_size, psi_host_view); int ierr = h5f_file.writeData( @@ -725,9 +787,10 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) herr_t status = H5Dclose(dset_id); if (status < 0) { - (*MPIdata::serr) << "ExtendedGridOrbitals::write_func_hdf5:" - "H5Dclose failed!!!" - << std::endl; + (*MPIdata::serr) + << "ExtendedGridOrbitals::write_func_hdf5:" + "H5Dclose failed!!!" + << std::endl; return -1; } } @@ -758,7 +821,8 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) } // read all the data sets with names starting with "name" -int ExtendedGridOrbitals::read_func_hdf5( +template +int ExtendedGridOrbitals::read_func_hdf5( HDFrestart& h5f_file, const std::string& name) { assert(numst_ >= 0); @@ -779,18 +843,18 @@ int ExtendedGridOrbitals::read_func_hdf5( // memory dataspace identifier hid_t memspace = (h5f_file.active()) ? h5f_file.createMemspace() : 0; - ORBDTYPE* buffer = new ORBDTYPE[block[0] * block[1] * block[2]]; + ScalarType* buffer = new ScalarType[block[0] * block[1] * block[2]]; if (onpe0 && ct.verbose > 2) { if (h5f_file.gatherDataX()) { - (*MPIdata::sout) - << "ExtendedGridOrbitals::read_func_hdf5(): Read wave " - "functions from " - << grid_.mype_env().n_mpi_task(1) - * grid_.mype_env().n_mpi_task(2) - << " PEs" << std::endl; + (*MPIdata::sout) << "ExtendedGridOrbitals::read_func_" + "hdf5(): Read wave " + "functions from " + << grid_.mype_env().n_mpi_task(1) + * grid_.mype_env().n_mpi_task(2) + << " PEs" << std::endl; } else { @@ -819,18 +883,19 @@ int ExtendedGridOrbitals::read_func_hdf5( hid_t dset_id = h5f_file.open_dset(datasetname); if (dset_id < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_func_hdf5() --- cannot open " - << datasetname << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_func_" + "hdf5() --- cannot open " + << datasetname << std::endl; return dset_id; } herr_t status = h5f_file.readData(buffer, memspace, dset_id, precision); if (status < 0) { - (*MPIdata::serr) << "ExtendedGridOrbitals::read_func_hdf5() --- " - "H5Dread failed!!!" - << std::endl; + (*MPIdata::serr) + << "ExtendedGridOrbitals::read_func_hdf5() --- " + "H5Dread failed!!!" + << std::endl; return -1; } @@ -841,20 +906,16 @@ int ExtendedGridOrbitals::read_func_hdf5( } #ifdef HAVE_MAGMA - ORBDTYPE* buffer_dev - = MemorySpace::Memory::allocate( + ScalarType* buffer_dev + = MemorySpace::Memory::allocate( numpt_); MemorySpace::copy_to_dev(buffer, numpt_, buffer_dev); #else - ORBDTYPE* buffer_dev = buffer; + ScalarType* buffer_dev = buffer; #endif - for (short iloc = 0; iloc < subdivx_; iloc++) - { - const int shift = iloc * loc_numpt_; - block_vector_.assignLocal(icolor, iloc, buffer_dev + shift); - } + block_vector_.assignLocal(icolor, 0, buffer_dev); #ifdef HAVE_MAGMA - MemorySpace::Memory::free(buffer_dev); + MemorySpace::Memory::free(buffer_dev); #endif } @@ -876,8 +937,10 @@ int ExtendedGridOrbitals::read_func_hdf5( // compute the matrix // output: matB -void ExtendedGridOrbitals::computeMatB( - const ExtendedGridOrbitals& orbitals, const pb::Lap& LapOper) +template +void ExtendedGridOrbitals::computeMatB( + const ExtendedGridOrbitals& orbitals, + const pb::Lap& LapOper) { if (numst_ == 0) return; @@ -891,19 +954,18 @@ void ExtendedGridOrbitals::computeMatB( const short bcolor = 32; - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); - ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; - memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); + ScalarType* work = new ScalarType[lda_ * bcolor]; + memset(work, 0, lda_ * bcolor * sizeof(ScalarType)); - ORBDTYPE* const orbitals_psi + ScalarType* const orbitals_psi = (numst_ > 0) ? orbitals.block_vector_.vect(0) : nullptr; const unsigned int orbitals_psi_size = orbitals.block_vector_.get_allocated_size_storage(); - ORBDTYPE* orbitals_psi_host_view - = MemorySpace::Memory::allocate_host_view( - orbitals_psi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* orbitals_psi_host_view = MemorySpace::Memory::allocate_host_view(orbitals_psi_size); + MemorySpace::Memory::copy_view_to_host( orbitals_psi, orbitals_psi_size, orbitals_psi_host_view); setDataWithGhosts(); @@ -920,32 +982,27 @@ void ExtendedGridOrbitals::computeMatB( LapOper.rhs(getFuncWithGhosts(icolor + i), work + i * lda_); } - for (short iloc = 0; iloc < subdivx_; iloc++) - { - - MATDTYPE* ssiloc = ss.getRawPtr(iloc); + MATDTYPE* ss0 = ss.getRawPtr(0); - // calculate nf columns of ssiloc - LinearAlgebraUtils::MPgemmTN(numst_, nf, - loc_numpt_, 1., orbitals_psi_host_view + iloc * loc_numpt_, - lda_, work + iloc * loc_numpt_, lda_, 0., - ssiloc + icolor * numst_, numst_); - } + // calculate nf columns of ss0 + LinearAlgebraUtils::MPgemmTN(numst_, nf, numpt_, + grid_.vel(), orbitals_psi_host_view, lda_, work, lda_, 0., + ss0 + icolor * numst_, numst_); } - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( orbitals_psi_host_view); delete[] work; - const double vel = grid_.vel(); - ss.scal(vel); proj_matrices_->initializeMatB(ss); matB_tm_.stop(); } // compute and its inverse -void ExtendedGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) +template +void ExtendedGridOrbitals::computeBAndInvB( + const pb::Lap& LapOper) { assert(proj_matrices_ != nullptr); @@ -960,34 +1017,23 @@ void ExtendedGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) invBmat_tm_.stop(); } -void ExtendedGridOrbitals::getLocalOverlap( +template +void ExtendedGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(numst_ >= 0); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(grid_.vel() > 1.e-8); - assert(subdivx_ > 0); if (numst_ != 0) { -#ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); -#else - ORBDTYPE* psi = block_vector_.vect(0); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - ss.syrk(iloc, loc_numpt_, psi + iloc * loc_numpt_, lda_); - } - - // We may need the full matrix - ss.fillUpperWithLower(); - - ss.scal(grid_.vel()); -#endif } } -void ExtendedGridOrbitals::getLocalOverlap(const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::getLocalOverlap( + const ExtendedGridOrbitals& orbitals, SquareLocalMatrices& ss) { assert(numst_ >= 0); @@ -999,8 +1045,9 @@ void ExtendedGridOrbitals::getLocalOverlap(const ExtendedGridOrbitals& orbitals, } } -void ExtendedGridOrbitals::computeLocalProduct( - const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::computeLocalProduct( + const ExtendedGridOrbitals& orbitals, LocalMatrices& ss, const bool transpose) { // assert( orbitals.numst_>=0 ); @@ -1011,11 +1058,12 @@ void ExtendedGridOrbitals::computeLocalProduct( } #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void ExtendedGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { - LocalMatrices sdev( + LocalMatrices sdev( ss.nmat(), ss.m(), ss.n()); computeLocalProduct(array, ld, sdev, transpose); @@ -1024,60 +1072,53 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, } #endif -void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void ExtendedGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { - assert(loc_numpt_ > 0); - assert(loc_numpt_ <= ld); + assert(numpt_ > 0); + assert(numpt_ <= ld); assert(array != nullptr); assert(numst_ != 0); assert(grid_.vel() > 0.); - assert(subdivx_ > 0); - const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); - const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; + const ScalarType* const a = transpose ? array : block_vector_.vect(0); + const ScalarType* const b = transpose ? block_vector_.vect(0) : array; const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, 1., a + iloc * loc_numpt_, lda, b + +iloc * loc_numpt_, - ldb, 0., ss.getRawPtr(iloc), ss.m()); - } - - ss.scal(grid_.vel()); + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, + grid_.vel(), a, lda, b, ldb, 0., ss.getRawPtr(0), ss.m()); } -void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( - const ExtendedGridOrbitals& orbitals, std::vector& ss) const +template +void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( + const ExtendedGridOrbitals& orbitals, + std::vector& ss) const { assert(numst_ > 0); assert(grid_.vel() > 0.); for (int icolor = 0; icolor < numst_; icolor++) { - ss[icolor] = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - double alpha - = LinearAlgebraUtils::MPdot(loc_numpt_, - orbitals.getPsi(icolor, iloc), getPsi(icolor, iloc)); + ss[icolor] = 0.; + double alpha = LinearAlgebraUtils::MPdot( + numpt_, orbitals.getPsi(icolor), getPsi(icolor)); - ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); - } + ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); } std::vector tmp(ss); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.allreduce(&tmp[0], &ss[0], numst_, MPI_SUM); } -void ExtendedGridOrbitals::computeGram( +template +void ExtendedGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1088,10 +1129,12 @@ void ExtendedGridOrbitals::computeGram( sl2dm->accumulate(ss, gram_mat); } -void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::computeGram( + const ExtendedGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(orbitals, ss); @@ -1104,7 +1147,8 @@ void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, } // compute the lower-triangular part of the overlap matrix -void ExtendedGridOrbitals::computeGram(const int verbosity) +template +void ExtendedGridOrbitals::computeGram(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1115,11 +1159,11 @@ void ExtendedGridOrbitals::computeGram(const int verbosity) (*MPIdata::sout) << "ExtendedGridOrbitals::computeGram()" << std::endl; #endif - assert(subdivx_ > 0); - assert(subdivx_ < 1000); + assert(1 > 0); + assert(1 < 1000); assert(numst_ >= 0); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1130,7 +1174,8 @@ void ExtendedGridOrbitals::computeGram(const int verbosity) overlap_tm_.stop(); } -void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) +template +void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1140,137 +1185,45 @@ void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) proj_matrices_->computeInvS(); } -void ExtendedGridOrbitals::checkCond(const double tol, const bool flag_stop) +template +void ExtendedGridOrbitals::checkCond( + const double tol, const bool flag_stop) { assert(proj_matrices_ != nullptr); proj_matrices_->checkCond(tol, flag_stop); } -double ExtendedGridOrbitals::dotProductWithDM( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(subdivx_, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithDM(ss); -} - -double ExtendedGridOrbitals::dotProductWithInvS( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(subdivx_, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithInvS(ss); -} - -double ExtendedGridOrbitals::dotProductDiagonal( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - std::vector ss(numst_); - computeDiagonalElementsDotProduct(orbitals, ss); - return proj_matrices_->getTraceDiagProductWithInvS(ss); -} - -double ExtendedGridOrbitals::dotProductSimple( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(subdivx_, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductSimple(ss); -} - -double ExtendedGridOrbitals::dotProduct(const ExtendedGridOrbitals& orbitals) -{ - return (this->*dotProduct_)(orbitals); // call through pointer member -} - -double ExtendedGridOrbitals::dotProduct( - const ExtendedGridOrbitals& orbitals, const short dot_type) -{ - dot_product_tm_.start(); - - assert(numst_ >= 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); - - double dot = 0.; - if (dot_type == 0) - { - dot = dotProductDiagonal(orbitals); - } - else if (dot_type == 1) - { - dot = dotProductWithInvS(orbitals); - } - else if (dot_type == 2) - { - dot = dotProductWithDM(orbitals); - } - else if (dot_type == 3) - { - dot = dotProductSimple(orbitals); - } - else - { - (*MPIdata::serr) << "ExtendedGridOrbitals::dot_product() --- unknown " - "dot product type" - << std::endl; - Control& ct = *(Control::instance()); - ct.global_exit(); - } - - dot_product_tm_.stop(); - - return dot; -} - -dist_matrix::DistMatrix ExtendedGridOrbitals::computeProduct( - const ExtendedGridOrbitals& orbitals, const bool transpose) +template +double ExtendedGridOrbitals::dotProduct( + const ExtendedGridOrbitals& orbitals) { - assert(numst_ > 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); - - return computeProduct(orbitals.psi(0), numst_, orbitals.lda_, transpose); + assert(dotProductManager_ != nullptr); + return dotProductManager_->dotProduct(*this, orbitals); } -dist_matrix::DistMatrix ExtendedGridOrbitals::computeProduct( - const ORBDTYPE* const array, const int ncol, const int lda, - const bool transpose) +template +double ExtendedGridOrbitals::dotProduct( + const ExtendedGridOrbitals& orbitals, const short dot_type) { - assert(lda > 1); - dot_product_tm_.start(); - LocalMatrices ss(subdivx_, numst_, ncol); + DotProductManagerFactory factory; + DotProductManager* manager = factory.create(dot_type); + assert(manager != nullptr); - computeLocalProduct(array, lda, ss, transpose); + double dot = manager->dotProduct(*this, orbitals); - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - dist_matrix::DistMatrix tmp("tmp", numst_, numst_); - sl2dm->accumulate(ss, tmp); + delete manager; dot_product_tm_.stop(); - return tmp; + return dot; } -void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, +template +void ExtendedGridOrbitals::orthonormalizeLoewdin( + const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) { @@ -1283,13 +1236,12 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, SquareLocalMatrices* localP = matrixTransform; if (matrixTransform == nullptr) - localP = new SquareLocalMatrices( - subdivx_, numst_); + localP + = new SquareLocalMatrices(1, numst_); incrementIterativeIndex(); bool multbymat = false; -#ifdef HAVE_MAGMA // try with ReplicatedMatrix first { ProjectedMatrices* projmatrices @@ -1306,7 +1258,6 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, multbymat = true; } } -#endif if (!multbymat) { ProjectedMatrices>* projmatrices @@ -1326,7 +1277,8 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, if (matrixTransform == nullptr) delete localP; } -double ExtendedGridOrbitals::norm() const +template +double ExtendedGridOrbitals::norm() const { double norm = 0; @@ -1337,17 +1289,16 @@ double ExtendedGridOrbitals::norm() const return norm; } -double ExtendedGridOrbitals::normState(const int gid) const +template +double ExtendedGridOrbitals::normState(const int gid) const { assert(gid >= 0); double tmp = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - // diagonal element - tmp += block_vector_.dot(gid, gid, iloc); - // cout<<"gid="<::multiplyByMatrix2states()"< +void ExtendedGridOrbitals::computeInvNorms2( std::vector>& inv_norms2) const { std::vector diagS(numst_); computeDiagonalElementsDotProduct(*this, diagS); - inv_norms2.resize(subdivx_); - for (short iloc = 0; iloc < subdivx_; iloc++) - inv_norms2[iloc].resize(numst_); + inv_norms2.resize(1); + inv_norms2[0].resize(numst_); for (short color = 0; color < numst_; color++) { double alpha = 1. / diagS[color]; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - inv_norms2[iloc][color] = alpha; - } + inv_norms2[0][color] = alpha; } } -void ExtendedGridOrbitals::normalize() +template +void ExtendedGridOrbitals::normalize() { normalize_tm_.start(); @@ -1488,7 +1427,8 @@ void ExtendedGridOrbitals::normalize() assert(numst_ >= 0); // if( onpe0 && ct.verbose>2 ) - // (*MPIdata::sout)<<"Normalize ExtendedGridOrbitals"<"< diagS(numst_); @@ -1506,10 +1446,7 @@ void ExtendedGridOrbitals::normalize() assert(diagS[color] > 1.e-15); diagS[color] = 1. / sqrt(diagS[color]); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - block_vector_.scal(diagS[color], color, iloc); - } + block_vector_.scal(diagS[color], color, 0); } incrementIterativeIndex(); @@ -1518,11 +1455,12 @@ void ExtendedGridOrbitals::normalize() } // modify argument orbitals, by projecting out its component -// along ExtendedGridOrbitals -void ExtendedGridOrbitals::projectOut( - ExtendedGridOrbitals& orbitals, const double scale) +// along ExtendedGridOrbitals +template +void ExtendedGridOrbitals::projectOut( + ExtendedGridOrbitals& orbitals) { - projectOut(orbitals.psi(0), lda_, scale); + projectOut(orbitals.psi(0), lda_); #if 0 // test if projection is now 0 @@ -1535,15 +1473,16 @@ void ExtendedGridOrbitals::projectOut( orbitals.incrementIterativeIndex(); } -void ExtendedGridOrbitals::projectOut( - ORBDTYPE* const array, const int lda, const double scale) +template +void ExtendedGridOrbitals::projectOut( + ScalarType* const array, const int lda) { assert(lda > 1); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(numst_ >= 0); - assert(lda_ >= loc_numpt_); + assert(lda_ >= numpt_); - SquareLocalMatrices lmatrix(subdivx_, numst_); + SquareLocalMatrices lmatrix(1, numst_); if (numst_ != 0) computeLocalProduct(array, lda, lmatrix, false); @@ -1554,52 +1493,48 @@ void ExtendedGridOrbitals::projectOut( #endif proj_matrices_->applyInvS(lmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[loc_numpt_ * numst_]; - memset(tproduct, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ScalarType* tproduct = new ScalarType[numpt_ * numst_]; + memset(tproduct, 0, numpt_ * numst_ * sizeof(ScalarType)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); - - MATDTYPE* localMat_iloc = lmatrix.getRawPtr(iloc); - - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, localMat_iloc, numst_, 0., - tproduct, loc_numpt_); - - MemorySpace::Memory::free_host_view( - phi_host_view); - - ORBDTYPE* parray = array + iloc * loc_numpt_; - unsigned int const parray_size = numst_ * lda; - ORBDTYPE* parray_host_view = MemorySpace::Memory::allocate_host_view(parray_size); - MemorySpace::Memory::copy_view_to_host( - parray, parray_size, parray_host_view); - - double minus = -1. * scale; - for (int j = 0; j < numst_; j++) - LinearAlgebraUtils::MPaxpy(loc_numpt_, minus, - tproduct + j * loc_numpt_, parray_host_view + j * lda); - - MemorySpace::Memory::copy_view_to_dev( - parray_host_view, parray_size, parray); - - MemorySpace::Memory::free_host_view( - parray_host_view); - } + unsigned int const phi_size = numpt_ * numst_; + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); + + MATDTYPE* localMat = lmatrix.getRawPtr(); + + // TODO this can be done on the GPU + // Compute numpt_ rows (for subdomain 0) + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, localMat, numst_, 0., tproduct, numpt_); + + MemorySpace::Memory::free_host_view( + phi_host_view); + + ScalarType* parray = array + 0 * numpt_; + unsigned int const parray_size = numst_ * lda; + ScalarType* parray_host_view = MemorySpace::Memory::allocate_host_view(parray_size); + MemorySpace::Memory::copy_view_to_host( + parray, parray_size, parray_host_view); + + ScalarType minus = -1.; + for (int j = 0; j < numst_; j++) + LinearAlgebraUtils::MPaxpy( + numpt_, minus, tproduct + j * numpt_, parray_host_view + j * lda); + + MemorySpace::Memory::copy_view_to_dev( + parray_host_view, parray_size, parray); + + MemorySpace::Memory::free_host_view( + parray_host_view); delete[] tproduct; } -void ExtendedGridOrbitals::initRand() +template +void ExtendedGridOrbitals::initRand() { Control& ct = *(Control::instance()); @@ -1609,7 +1544,7 @@ void ExtendedGridOrbitals::initRand() std::vector yrand(grid_.gdim(1)); std::vector zrand(grid_.gdim(2)); - const int loc_length = dim[0] / subdivx_; + const int loc_length = dim[0] / 1; assert(loc_length > 0); assert(static_cast(loc_length) <= dim[0]); @@ -1638,31 +1573,28 @@ void ExtendedGridOrbitals::initRand() for (unsigned int idx = 0; idx < grid_.gdim(2); idx++) zrand[idx] = ran0() - 0.5; - unsigned int const size = loc_numpt_; - ORBDTYPE* psi_state_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(istate), size, psi_state_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - for (int ix = loc_length * iloc; ix < loc_length * (iloc + 1); ix++) - for (unsigned int iy = 0; iy < dim[1]; iy++) - for (unsigned int iz = 0; iz < dim[2]; iz++) - { - const double alpha = xrand[xoff + ix] * yrand[yoff + iy] - * zrand[zoff + iz]; - - psi_state_view[ix * incx + iy * incy + iz] - = alpha * alpha; - - assert((ix * incx + iy * incy + iz) - < static_cast(lda_)); - } - } - MemorySpace::Memory::copy_view_to_dev( + for (int ix = loc_length * 0; ix < loc_length; ix++) + for (unsigned int iy = 0; iy < dim[1]; iy++) + for (unsigned int iz = 0; iz < dim[2]; iz++) + { + const double alpha = xrand[xoff + ix] * yrand[yoff + iy] + * zrand[zoff + iz]; + + psi_state_view[ix * incx + iy * incy + iz] = alpha * alpha; + + assert((ix * incx + iy * incy + iz) + < static_cast(lda_)); + } + + MemorySpace::Memory::copy_view_to_dev( psi_state_view, size, psi(istate)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( psi_state_view); } @@ -1670,8 +1602,18 @@ void ExtendedGridOrbitals::initRand() } template <> -void ExtendedGridOrbitals::addDotWithNcol2Matrix( - ExtendedGridOrbitals& Apsi, dist_matrix::DistMatrix& matrix) const +template <> +void ExtendedGridOrbitals::addDotWithNcol2Matrix( + ExtendedGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const +{ + addDotWithNcol2DistMatrix(Apsi, matrix); +} + +template +void ExtendedGridOrbitals::addDotWithNcol2DistMatrix( + ExtendedGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const { addDot_tm_.start(); @@ -1685,29 +1627,26 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( memset(work.data(), 0, size_work * sizeof(double)); unsigned int const block_vector_size = numpt_ * numst_; - ORBDTYPE* block_vector_host_view - = MemorySpace::Memory::allocate_host_view( - block_vector_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* block_vector_host_view = MemorySpace::Memory::allocate_host_view(block_vector_size); + MemorySpace::Memory::copy_view_to_host( block_vector_.vect(0), block_vector_size, block_vector_host_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - Apsi.getPsi(0, iloc), phi_size, phi_host_view); - - // TODO this can be done on the GPU - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, vel, block_vector_host_view + iloc * loc_numpt_, lda_, - phi_host_view, lda_, 1., work.data(), numst_); - - MemorySpace::Memory::free_host_view( - phi_host_view); - } - MemorySpace::Memory::free_host_view( + unsigned int const phi_size = numpt_ * numst_; + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( + Apsi.getPsi(0), phi_size, phi_host_view); + + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, vel, + block_vector_host_view + 0 * numpt_, lda_, phi_host_view, lda_, 1., + work.data(), numst_); + + MemorySpace::Memory::free_host_view( + phi_host_view); + + MemorySpace::Memory::free_host_view( block_vector_host_view); std::vector work2(size_work); @@ -1720,24 +1659,37 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -#ifdef HAVE_MAGMA template <> -void ExtendedGridOrbitals::addDotWithNcol2Matrix( - ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const +template <> +void ExtendedGridOrbitals::addDotWithNcol2Matrix( + ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const +{ + addDotWithNcol2ReplicatedMatrix(Apsi, matrix); +} + +template +void ExtendedGridOrbitals::addDotWithNcol2ReplicatedMatrix( + ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const { addDot_tm_.start(); + ReplicatedMatrix tmp("tmp", numst_, numst_); + const double vel = grid_.vel(); + +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const('t'); magma_trans_t magma_transb = magma_trans_const('n'); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - ReplicatedMatrix tmp("tmp", numst_, numst_); - const double vel = grid_.vel(); - magmablas_dgemm(magma_transa, magma_transb, numst_, numst_, numpt_, vel, block_vector_.vect(0), lda_, Apsi.getPsi(0), lda_, 0., tmp.data(), tmp.ld(), magma_singleton.queue_); +#else + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, vel, + block_vector_.vect(0), lda_, Apsi.getPsi(0), lda_, 0., tmp.data(), + tmp.ld()); +#endif tmp.consolidate(); @@ -1745,23 +1697,21 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -#endif -void ExtendedGridOrbitals::computeGlobalIndexes() +template +void ExtendedGridOrbitals::computeGlobalIndexes() { overlapping_gids_.clear(); - overlapping_gids_.resize(subdivx_); - for (short iloc = 0; iloc < subdivx_; iloc++) + overlapping_gids_.resize(1); + overlapping_gids_[0].resize(numst_, -1); + for (int gid = 0; gid < numst_; gid++) { - overlapping_gids_[iloc].resize(numst_, -1); - for (int gid = 0; gid < numst_; gid++) - { - overlapping_gids_[iloc][gid] = gid; - } + overlapping_gids_[0][gid] = gid; } } -void ExtendedGridOrbitals::printTimers(std::ostream& os) +template +void ExtendedGridOrbitals::printTimers(std::ostream& os) { matB_tm_.print(os); invBmat_tm_.print(os); @@ -1774,7 +1724,8 @@ void ExtendedGridOrbitals::printTimers(std::ostream& os) axpy_tm_.print(os); } -void ExtendedGridOrbitals::initWF( +template +void ExtendedGridOrbitals::initWF( const std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -1809,10 +1760,10 @@ void ExtendedGridOrbitals::initWF( if (ct.globalColoring()) { // smooth out random functions - pb::Laph4M myoper(grid_); - pb::GridFunc gf_work( + pb::Laph4M myoper(grid_); + pb::GridFunc gf_work( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - pb::GridFunc gf_psi( + pb::GridFunc gf_psi( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); if (onpe0 && ct.verbose > 2) @@ -1827,6 +1778,12 @@ void ExtendedGridOrbitals::initWF( } } + // needs to mask one layer of values when using 0 BC for + // wavefunctions the next two lines do that + setDataWithGhosts(); + trade_boundaries(); + setToDataWithGhosts(); + resetIterativeIndex(); if (onpe0 && ct.verbose > 2) @@ -1857,17 +1814,29 @@ void ExtendedGridOrbitals::initWF( #endif } -template void ExtendedGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); -template void ExtendedGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); +#ifdef MGMOL_HAS_LIBROM +template +void ExtendedGridOrbitals::set(std::string file_path, int rdim) +{ + const int dim = getLocNumpt(); + + CAROM::BasisReader reader(file_path); + CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); + + Control& ct = *(Control::instance()); + Mesh* mymesh = Mesh::instance(); + pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); + CAROM::Vector psi; + for (int i = 0; i < rdim; ++i) + { + orbital_basis->getColumn(i, psi); + gf_psi.assign(psi.getData()); + setPsi(gf_psi, i); + } +} +#endif -template void ExtendedGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); +template void ExtendedGridOrbitals::axpy( + const ORBDTYPE alpha, const ExtendedGridOrbitals&); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); +template class ExtendedGridOrbitals; diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 74af1982..e1fb667c 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -12,15 +12,17 @@ #include "BlockVector.h" #include "DistMatrix.h" +#include "DotProductManager.h" #include "GridFunc.h" #include "HDFrestart.h" #include "Lap.h" #include "MPIdata.h" +#include "MasksSet.h" #include "Mesh.h" #include "Orbitals.h" +#include "ReplicatedMatrix.h" #include "SinCosOps.h" #include "SquareLocalMatrices.h" -#include "global.h" #include "hdf5.h" #include @@ -28,18 +30,11 @@ #include #include -class Potentials; -template -class ProjectedMatrices; class ProjectedMatricesInterface; class LocalizationRegions; -class ExtendedGridOrbitals; -class MasksSet; class ClusterOrbitals; -typedef double (ExtendedGridOrbitals::*ExtendedGridOrbitalsPtrFunc)( - const ExtendedGridOrbitals&); - +template class ExtendedGridOrbitals : public Orbitals { private: @@ -60,11 +55,9 @@ class ExtendedGridOrbitals : public Orbitals static int lda_; // leading dimension for storage static int numpt_; - static int loc_numpt_; - // static double (ExtendedGridOrbitals::*dotProduct_)(const - // ExtendedGridOrbitals&); - static ExtendedGridOrbitalsPtrFunc dotProduct_; + static DotProductManager>* + dotProductManager_; static int data_wghosts_index_; @@ -80,38 +73,37 @@ class ExtendedGridOrbitals : public Orbitals //////////////////////////////////////////////////////// // instance specific data //////////////////////////////////////////////////////// - BlockVector block_vector_; + BlockVector block_vector_; //////////////////////////////////////////////////////// // // private functions // - void projectOut(ORBDTYPE* const, const int, const double scale = 1.); + void projectOut(ScalarType* const, const int); + + void multiply_by_ReplicatedMatrix(const ReplicatedMatrix& matrix); + void multiply_by_DistMatrix( + const dist_matrix::DistMatrix& matrix); void multiply_by_matrix( - const DISTMATDTYPE* const, ORBDTYPE*, const int) const; + const DISTMATDTYPE* const, ScalarType*, const int) const; void multiply_by_matrix(const dist_matrix::DistMatrix& matrix, - ORBDTYPE* const product, const int ldp); + ScalarType* const product, const int ldp); void scal(const int i, const double alpha) { block_vector_.scal(i, alpha); } - virtual void assign(const int i, const ORBDTYPE* const v, const int n = 1) + virtual void assign(const int i, const ScalarType* const v, const int n = 1) { block_vector_.assign(i, v, n); } ExtendedGridOrbitals& operator=(const ExtendedGridOrbitals& orbitals); ExtendedGridOrbitals(); - void computeMatB(const ExtendedGridOrbitals&, const pb::Lap&); + void computeMatB(const ExtendedGridOrbitals&, const pb::Lap&); - double dotProductDiagonal(const ExtendedGridOrbitals& orbitals); - double dotProductWithDM(const ExtendedGridOrbitals& orbitals); - double dotProductWithInvS(const ExtendedGridOrbitals& orbitals); - double dotProductSimple(const ExtendedGridOrbitals& orbitals); - - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); #ifdef HAVE_MAGMA - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); #endif @@ -120,30 +112,34 @@ class ExtendedGridOrbitals : public Orbitals void computeInvNorms2(std::vector>& inv_norms2) const; void computeDiagonalGram(VariableSizeMatrix& diagS) const; + /*! + * Specialized functions + */ + void addDotWithNcol2DistMatrix( + ExtendedGridOrbitals&, dist_matrix::DistMatrix&) const; + void addDotWithNcol2ReplicatedMatrix( + ExtendedGridOrbitals&, ReplicatedMatrix&) const; + void initFourier(); void initRand(); - dist_matrix::DistMatrix computeProduct(const ORBDTYPE* const, - const int, const int, const bool transpose = false); - ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } + ScalarType* psi(const int i) const { return block_vector_.vect(i); } - void app_mask(const int, ORBDTYPE*, const short) const {}; + void app_mask(const int, ScalarType*, const short) const {}; #ifdef HAVE_MAGMA void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; #endif void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; void setup(); protected: const pb::Grid& grid_; - static short subdivx_; - // indexes corresponding to valid function in each subdomain static std::vector> overlapping_gids_; @@ -197,7 +193,7 @@ class ExtendedGridOrbitals : public Orbitals int numst(void) const { return numst_; } int getLda() const { return lda_; } - int getLocNumpt() const { return loc_numpt_; } + int getLocNumpt() const { return numpt_; } int getNumpt() const { return numpt_; } bool isCompatibleWith(const ExtendedGridOrbitals&) const { return true; } @@ -224,7 +220,7 @@ class ExtendedGridOrbitals : public Orbitals block_vector_.setDataWithGhosts(data_wghosts); } - pb::GridFunc& getFuncWithGhosts(const int i) + pb::GridFunc& getFuncWithGhosts(const int i) { //(*MPIdata::sout)<<" data_wghosts_index_="< class FIRE : public IonicAlgorithm { diff --git a/src/Forces.cc b/src/Forces.cc index 04e6ffd7..ad6b3914 100644 --- a/src/Forces.cc +++ b/src/Forces.cc @@ -416,14 +416,12 @@ void Forces::lforce(Ions& ions, RHODTYPE* rho) template SquareLocalMatrices Forces::getReplicatedDM() { -#ifdef HAVE_MAGMA { ProjectedMatrices* projmatrices = dynamic_cast*>( proj_matrices_); if (projmatrices) return projmatrices->getReplicatedDM(); } -#endif { ProjectedMatrices>* projmatrices = dynamic_cast< @@ -674,5 +672,5 @@ void Forces::force(T& orbitals, Ions& ions) total_tm_.stop(); } -template class Forces; -template class Forces; +template class Forces>; +template class Forces>; diff --git a/src/FullyOccupiedNonOrthoDMStrategy.cc b/src/FullyOccupiedNonOrthoDMStrategy.cc index 8887bc69..fb63d61b 100644 --- a/src/FullyOccupiedNonOrthoDMStrategy.cc +++ b/src/FullyOccupiedNonOrthoDMStrategy.cc @@ -46,5 +46,5 @@ int FullyOccupiedNonOrthoDMStrategy::update( return 0; // success } -template class FullyOccupiedNonOrthoDMStrategy; -template class FullyOccupiedNonOrthoDMStrategy; +template class FullyOccupiedNonOrthoDMStrategy>; +template class FullyOccupiedNonOrthoDMStrategy>; diff --git a/src/GramMatrix.cc b/src/GramMatrix.cc index 647ed766..1443d450 100644 --- a/src/GramMatrix.cc +++ b/src/GramMatrix.cc @@ -136,14 +136,13 @@ double GramMatrix>::computeCond() return cond; } -#ifdef HAVE_MAGMA template <> double GramMatrix::computeCond() { const double cond = 1; + return cond; } -#endif // mat is overwritten by inv(ls)*mat*inv(ls**T) template @@ -329,7 +328,5 @@ void GramMatrix::applyInv(VectorType& mat) template class GramMatrix>; template void GramMatrix>::applyInv( dist_matrix::DistVector&); -#ifdef HAVE_MAGMA template class GramMatrix; template void GramMatrix::applyInv(ReplicatedVector&); -#endif diff --git a/src/GrassmanCG.cc b/src/GrassmanCG.cc index d963bb36..5ad18700 100644 --- a/src/GrassmanCG.cc +++ b/src/GrassmanCG.cc @@ -62,7 +62,7 @@ void GrassmanCG::conjugate() double tau = std::max(0., alpha); const double one = 1.; sdir->scal(tau); - sdir->axpy(one, *new_pcgrad); + sdir->axpy((ORBDTYPE)one, *new_pcgrad); } else { @@ -237,9 +237,6 @@ void GrassmanCG::parallelTransportUpdate(const double /*lambda*/, T& phi) { Control& ct = *(Control::instance()); - // const double fact = lambda; - const double fact = 1.; - // update history data T* gradptr; // update gradient information @@ -255,13 +252,13 @@ void GrassmanCG::parallelTransportUpdate(const double /*lambda*/, T& phi) if (ct.parallel_transport) { // compute G_old = G - lambda*(Phi*S^{-1}*Phi^T*G).*corrmasks - phi.projectOut(*GrassmanLineMinimization::grad_, fact); + phi.projectOut(*GrassmanLineMinimization::grad_); GrassmanLineMinimization::grad_->applyCorrMask(true); // compute MG_old = MG - lambda*(Phi*S^{-1}*Phi^T*MG).*masks - phi.projectOut(*GrassmanLineMinimization::pcgrad_, fact); + phi.projectOut(*GrassmanLineMinimization::pcgrad_); GrassmanLineMinimization::pcgrad_->applyMask(true); // update preconditioned search direction information - phi.projectOut(*GrassmanLineMinimization::sdir_, fact); + phi.projectOut(*GrassmanLineMinimization::sdir_); GrassmanLineMinimization::sdir_->applyMask(true); } @@ -284,5 +281,5 @@ void GrassmanCG::parallelTransportUpdate(const double /*lambda*/, T& phi) return; } -template class GrassmanCG; -template class GrassmanCG; +template class GrassmanCG>; +template class GrassmanCG>; diff --git a/src/GrassmanCGFactory.cc b/src/GrassmanCGFactory.cc index 49fcb22b..1d35cfc7 100644 --- a/src/GrassmanCGFactory.cc +++ b/src/GrassmanCGFactory.cc @@ -4,22 +4,23 @@ #include "LocGridOrbitals.h" template <> -OrbitalsStepper* GrassmanCGFactory::create( - Hamiltonian* hamiltonian, +OrbitalsStepper>* +GrassmanCGFactory>::create( + Hamiltonian>* hamiltonian, ProjectedMatricesInterface* proj_matrices, - MGmol* mgmol_strategy, Ions& ions, std::ostream& os, - const bool short_sighted) + MGmol>* mgmol_strategy, Ions& ions, + std::ostream& os, const bool short_sighted) { - OrbitalsStepper* stepper; + OrbitalsStepper>* stepper; if (short_sighted) { - stepper = new GrassmanCGSparse( + stepper = new GrassmanCGSparse>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); } else { - stepper = new GrassmanCG( + stepper = new GrassmanCG>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); } @@ -27,15 +28,15 @@ OrbitalsStepper* GrassmanCGFactory::create( } template <> -OrbitalsStepper* -GrassmanCGFactory::create( - Hamiltonian* hamiltonian, +OrbitalsStepper>* +GrassmanCGFactory>::create( + Hamiltonian>* hamiltonian, ProjectedMatricesInterface* proj_matrices, - MGmol* mgmol_strategy, Ions& ions, std::ostream& os, - const bool /*short_sighted*/) + MGmol>* mgmol_strategy, Ions& ions, + std::ostream& os, const bool /*short_sighted*/) { - OrbitalsStepper* stepper - = new GrassmanCG( + OrbitalsStepper>* stepper + = new GrassmanCG>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); return stepper; diff --git a/src/GrassmanCGSparse.cc b/src/GrassmanCGSparse.cc index 00f6fd77..936b0850 100644 --- a/src/GrassmanCGSparse.cc +++ b/src/GrassmanCGSparse.cc @@ -328,4 +328,4 @@ void GrassmanCGSparse::parallelTransportUpdate( // proj_matrices_->applyInvS(ss); } -template class GrassmanCGSparse; +template class GrassmanCGSparse>; diff --git a/src/GrassmanLineMinimization.cc b/src/GrassmanLineMinimization.cc index f642e0f4..186e8c82 100644 --- a/src/GrassmanLineMinimization.cc +++ b/src/GrassmanLineMinimization.cc @@ -112,7 +112,7 @@ void GrassmanLineMinimization::update_states( // Grassman line minimization method double lambda = computeStepSize(orbitals); // orbitals.projectOut(*sdir_); - orbitals.axpy(lambda, *sdir_); + orbitals.axpy((ORBDTYPE)lambda, *sdir_); // recompute overlap and inverse for new wavefunctions orbitals.computeGramAndInvS(); if (onpe0 && ct.verbose > 1) @@ -124,7 +124,7 @@ void GrassmanLineMinimization::update_states( { // Preconditioned Power Method // orbitals.projectOut(*sdir_); - orbitals.axpy(alpha, *sdir_); + orbitals.axpy((ORBDTYPE)alpha, *sdir_); // if(onpe0)cout<<"alpha = "<::printTimers(std::ostream& os) update_states_tm_.print(os); } -template class GrassmanLineMinimization; -template class GrassmanLineMinimization; +template class GrassmanLineMinimization>; +template class GrassmanLineMinimization>; diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 1b2c7fb7..2c9674d0 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -66,8 +66,6 @@ HDFrestart::~HDFrestart() { if (!closed_) close(); - closeWorkSpace(); - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.barrier(); @@ -1147,13 +1145,13 @@ int HDFrestart::getLRs(std::shared_ptr lrs, template <> void HDFrestart::getWorkspace(float*& work_space) { - work_space = work_space_float_; + work_space = work_space_float_.data(); } template <> void HDFrestart::getWorkspace(double*& work_space) { - work_space = work_space_double_; + work_space = work_space_double_.data(); } template <> @@ -1567,12 +1565,12 @@ int HDFrestart::readData( if (precision == 1) { status = H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, filespace, - plist_id, work_space_float_); + plist_id, work_space_float_.data()); } else { status = H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, - plist_id, work_space_double_); + plist_id, work_space_double_.data()); } if (status < 0) @@ -1604,15 +1602,15 @@ int HDFrestart::readData( { //(*MPIdata::sout)<<"PE: "<= 0); if (precision == 1) status = H5Dwrite(dset_id, H5T_NATIVE_FLOAT, memspace, space_id, - plist_id, work_space_float_); + plist_id, work_space_float_.data()); else status = H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, space_id, - plist_id, work_space_double_); + plist_id, work_space_double_.data()); if (status < 0) { MGMOL_HDFRESTART_FAIL("H5Dwrite failed!!!"); @@ -1851,21 +1851,12 @@ void HDFrestart::setupWorkSpace() { // if( active_ ) { - const int n = block_[0] * block_[1] * block_[2]; - work_space_double_ = new double[n]; - memset(work_space_double_, 0, n * sizeof(double)); - - work_space_float_ = new float[n]; - memset(work_space_float_, 0, n * sizeof(float)); - } -} + const int n = block_[0] * block_[1] * block_[2]; + work_space_double_.resize(n); + memset(work_space_double_.data(), 0, n * sizeof(double)); -void HDFrestart::closeWorkSpace() -{ - // if( active_ ) - { - delete[] work_space_double_; - delete[] work_space_float_; + work_space_float_.resize(n); + memset(work_space_float_.data(), 0, n * sizeof(float)); } } diff --git a/src/HDFrestart.h b/src/HDFrestart.h index 7bba69d0..bf49a0e8 100644 --- a/src/HDFrestart.h +++ b/src/HDFrestart.h @@ -60,8 +60,8 @@ class HDFrestart int bsize_; - double* work_space_double_; - float* work_space_float_; + std::vector work_space_double_; + std::vector work_space_float_; #ifdef MGMOL_USE_HDF5P bool use_hdf5p_; @@ -87,7 +87,6 @@ class HDFrestart template void gatherDataXdir(std::vector& data); - void closeWorkSpace(); void setupWorkSpace(); template diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 5a5d2a53..601fdc24 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -86,11 +86,10 @@ const T& Hamiltonian::applyLocal(T& phi, const bool force) template void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) { - apply_Hloc_tm_.start(); #ifdef PRINT_OPERATIONS if (onpe0) (*MPIdata::sout) << "Hamiltonian::applyLocal() for " << ncolors - << " states" << endl; + << " states" << std::endl; #endif const Control& ct = *(Control::instance()); @@ -102,6 +101,9 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) phi.setDataWithGhosts(); phi.trade_boundaries(); + // start timer after filling ghost values + apply_Hloc_tm_.start(); + using memory_space_type = typename T::memory_space_type; if (ct.Mehrstellen()) @@ -127,7 +129,7 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) // gfvw1 = -Lap*phi gfv_phi->applyLap(0, gfvw1); // gfv_work1 = -Lap*phi + B*V*psi - gfv_work1.axpy(1., gfvw1); + gfv_work1.axpy((ORBDTYPE)1., gfvw1); // set hpsi data without ghosts hphi.setPsi(gfv_work1); } @@ -139,7 +141,7 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) for (int i = 0; i < ncolors; i++) { using memory_space_type = typename T::memory_space_type; - ORBDTYPE* ihphi = hphi.getPsi(i); + auto ihphi = hphi.getPsi(i); unsigned int const size = hphi.getNumpt(); ORBDTYPE* ihphi_host_view = MemorySpace::Memory::allocate_host_view(size); @@ -159,18 +161,27 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) apply_Hloc_tm_.stop(); } +template +void Hamiltonian::applyDeltaPot(const T& phi, T& hphi) +{ + const std::vector& dv(pot_->dv()); + + phi.applyDiagonalOp(dv, hphi); +} + // add to hij the elements // corresponding to the local part of the Hamiltonian template <> template <> -void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, - LocGridOrbitals& phi2, dist_matrix::DistMatrix& hij, - const bool force) +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + dist_matrix::DistMatrix& hij, const bool force) { applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << std::endl; #endif phi1.addDotWithNcol2Matrix(*hlphi_, hij); @@ -178,14 +189,15 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, template <> template <> -void Hamiltonian::addHlocal2matrix( - ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, +void Hamiltonian>::addHlocal2matrix( + ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, dist_matrix::DistMatrix& hij, const bool force) { applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << std::endl; #endif // hij.print(std::cout, 0, 0, 5, 5); @@ -195,22 +207,39 @@ void Hamiltonian::addHlocal2matrix( // hij.print(std::cout, 0, 0, 5, 5); } -#ifdef HAVE_MAGMA template <> template <> -void Hamiltonian::addHlocal2matrix( - ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, +void Hamiltonian>::addHlocal2matrix( + ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) { applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix() at line " + << __LINE__ << std::endl; #endif phi1.addDotWithNcol2Matrix(*hlphi_, hij); } -#endif + +template <> +template <> +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + ReplicatedMatrix& hij, const bool force) +{ + (void)phi1; + (void)phi2; + (void)hij; + + applyLocal(phi2, force); + + // phi1.addDotWithNcol2Matrix(*hlphi_, hij); + std::cerr << "Not implemented!" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +} template void Hamiltonian::addHlocalij( @@ -219,7 +248,9 @@ void Hamiltonian::addHlocalij( applyLocal(phi2); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHLocalij()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHLocalij() at line " << __LINE__ + << std::endl; #endif addHlocalij(phi1, proj_matrices); @@ -241,15 +272,18 @@ void Hamiltonian::addHlocalij( template <> template <> -void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, - LocGridOrbitals& phi2, VariableSizeMatrix& mat, const bool force) +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + VariableSizeMatrix& mat, const bool force) { Control& ct = *(Control::instance()); applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHLocalij()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHLocalij() at line " << __LINE__ + << std::endl; #endif SquareLocalMatrices ss( @@ -260,28 +294,5 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, mat.insertMatrixElements(ss, phi1.getOverlappingGids(), ct.numst); } -template Hamiltonian::Hamiltonian(); -template Hamiltonian::Hamiltonian(); - -template Hamiltonian::~Hamiltonian(); -template Hamiltonian::~Hamiltonian(); - -template void Hamiltonian::setup(pb::Grid const&, int); -template void Hamiltonian::setup(pb::Grid const&, int); - -template const LocGridOrbitals& Hamiltonian::applyLocal( - LocGridOrbitals&, const bool); -template const ExtendedGridOrbitals& -Hamiltonian::applyLocal( - ExtendedGridOrbitals&, const bool); -template void Hamiltonian::addHlocalij(LocGridOrbitals&, - LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, - ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocal2matrix(LocGridOrbitals&, - LocGridOrbitals&, VariableSizeMatrix& mat, const bool force); +template class Hamiltonian>; +template class Hamiltonian>; diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h index 31b84a2e..7263dff6 100644 --- a/src/Hamiltonian.h +++ b/src/Hamiltonian.h @@ -27,8 +27,6 @@ class Hamiltonian static Timer apply_Hloc_tm_; - void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); - public: static Timer apply_Hloc_tm() { return apply_Hloc_tm_; } @@ -42,10 +40,16 @@ class Hamiltonian pb::Lap* lapOper() { return lapOper_; } const OrbitalsType& applyLocal(OrbitalsType& phi, const bool force = false); + void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); + + /*! + * Apply potential difference dv to phi + */ + void applyDeltaPot(const OrbitalsType& phi, OrbitalsType& hphi); template void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2, - MatrixType& mat, const bool force = false); + MatrixType& mat, const bool force); void addHlocalij(OrbitalsType& orbitals1, OrbitalsType& orbitals2, ProjectedMatricesInterface*); void addHlocalij(OrbitalsType& orbitals1, ProjectedMatricesInterface*); diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 23ae9867..c697b5eb 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -37,8 +37,9 @@ template HamiltonianMVPSolver::HamiltonianMVPSolver(std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const short n_inner_steps, const MatrixType& hinit, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, const int numst, + const short n_inner_steps, const MatrixType& hinit, const bool try_shorter_intervals) : os_(os), n_inner_steps_(n_inner_steps), @@ -50,6 +51,7 @@ HamiltonianMVPSolver::solve( // compute new h11 for the current potential by adding local part to // nonlocal components h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -177,7 +179,7 @@ int HamiltonianMVPSolver::solve( // update H and compute energy at midpoint h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -212,7 +214,7 @@ int HamiltonianMVPSolver::solve( // update H with new potential h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -268,7 +270,7 @@ int HamiltonianMVPSolver::solve( // update H h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -349,15 +351,14 @@ void HamiltonianMVPSolver, - ProjectedMatrices>, LocGridOrbitals>; + ProjectedMatrices>, + LocGridOrbitals>; template class HamiltonianMVPSolver, - ProjectedMatricesSparse, LocGridOrbitals>; + ProjectedMatricesSparse, LocGridOrbitals>; template class HamiltonianMVPSolver, ProjectedMatrices>, - ExtendedGridOrbitals>; -#ifdef HAVE_MAGMA + ExtendedGridOrbitals>; template class HamiltonianMVPSolver, ExtendedGridOrbitals>; -#endif + ProjectedMatrices, ExtendedGridOrbitals>; diff --git a/src/HamiltonianMVPSolver.h b/src/HamiltonianMVPSolver.h index 3d917b93..06388034 100644 --- a/src/HamiltonianMVPSolver.h +++ b/src/HamiltonianMVPSolver.h @@ -11,6 +11,7 @@ #define MGMOL_HAMILTONIANMVP_SOLVER_H_ #include "Energy.h" +#include "Hamiltonian.h" #include "MGmol.h" #include "Rho.h" #include "Timer.h" @@ -38,6 +39,7 @@ class HamiltonianMVPSolver Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; MGmol* mgmol_strategy_; int numst_; @@ -66,6 +68,7 @@ class HamiltonianMVPSolver public: HamiltonianMVPSolver(std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, const int numst, const short n_inner_steps, const MatrixType& hinit, const bool try_shorter_intervals = false); diff --git a/src/HamiltonianMVP_DMStrategy.cc b/src/HamiltonianMVP_DMStrategy.cc index be62b894..1d03a9b3 100644 --- a/src/HamiltonianMVP_DMStrategy.cc +++ b/src/HamiltonianMVP_DMStrategy.cc @@ -22,14 +22,15 @@ template HamiltonianMVP_DMStrategy::HamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - OrbitalsType* orbitals) + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, OrbitalsType* orbitals) : comm_(comm), os_(os), ions_(ions), rho_(rho), energy_(energy), electrostat_(electrostat), + hamiltonian_(hamiltonian), global_indexes_(orbitals->getOverlappingGids()), mgmol_strategy_(mgmol_strategy) { @@ -44,8 +45,9 @@ HamiltonianMVP_DMStrategy( - os_, ions_, rho_, energy_, electrostat_, mgmol_strategy_, ct.numst, - ct.dm_inner_steps, projmatrices->getH(), true); + os_, ions_, rho_, energy_, electrostat_, hamiltonian_, + mgmol_strategy_, ct.numst, ct.dm_inner_steps, projmatrices->getH(), + true); } template @@ -97,15 +99,13 @@ void HamiltonianMVP_DMStrategy, - ProjectedMatrices>, LocGridOrbitals>; - + ProjectedMatrices>, + LocGridOrbitals>; template class HamiltonianMVP_DMStrategy, - ProjectedMatricesSparse, LocGridOrbitals>; + ProjectedMatricesSparse, LocGridOrbitals>; template class HamiltonianMVP_DMStrategy, ProjectedMatrices>, - ExtendedGridOrbitals>; -#ifdef HAVE_MAGMA + ExtendedGridOrbitals>; template class HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>; -#endif + ProjectedMatrices, ExtendedGridOrbitals>; diff --git a/src/HamiltonianMVP_DMStrategy.h b/src/HamiltonianMVP_DMStrategy.h index da0e58b1..5f174903 100644 --- a/src/HamiltonianMVP_DMStrategy.h +++ b/src/HamiltonianMVP_DMStrategy.h @@ -11,13 +11,13 @@ #define MGMOL_HamiltonianMVP_DMStrategy_H #include "DMStrategy.h" +#include "Electrostatic.h" #include "Energy.h" #include "HamiltonianMVPSolver.h" +#include "Ions.h" #include "MGmol.h" #include "Rho.h" -class Ions; -class Electrostatic; template class MGmol; @@ -32,6 +32,7 @@ class HamiltonianMVP_DMStrategy : public DMStrategy Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; const std::vector>& global_indexes_; MGmol* mgmol_strategy_; @@ -40,8 +41,8 @@ class HamiltonianMVP_DMStrategy : public DMStrategy public: HamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - OrbitalsType* orbitals); + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, OrbitalsType* orbitals); ~HamiltonianMVP_DMStrategy() override; diff --git a/src/Hartree_CG.cc b/src/Hartree_CG.cc index 375237f8..2418015c 100644 --- a/src/Hartree_CG.cc +++ b/src/Hartree_CG.cc @@ -6,13 +6,8 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE - -#include -#include -using namespace std; - -#include "Control.h" #include "Hartree_CG.h" +#include "Control.h" #include "MultipoleExpansion.h" #include "Laph2.h" @@ -22,15 +17,16 @@ using namespace std; #include "Laph6.h" #include "Laph8.h" -// Timer Poisson::poisson_tm_("Poisson::poisson"); +#include +#include -template -void Hartree_CG::solve( - const pb::GridFunc& rho, const pb::GridFunc& rhoc) +template +void Hartree_CG::solve( + const pb::GridFunc& rho, const pb::GridFunc& rhoc) { PoissonInterface::poisson_tm_.start(); - pb::GridFunc work_rho(rho); + pb::GridFunc work_rho(rho); Control& ct = *(Control::instance()); // Keep in memory vh*rho before updating vh @@ -48,7 +44,7 @@ void Hartree_CG::solve( if (Poisson::bc_[i] == 2) dim_mpol++; //(*MPIdata::sout)<<"dim_mpol="< bc_func( + pb::GridFunc bc_func( Poisson::grid_, Poisson::bc_[0], Poisson::bc_[1], Poisson::bc_[2]); if (dim_mpol > 0) { @@ -70,53 +66,40 @@ void Hartree_CG::solve( } } - /* Check for uniform precision before calling poisson_solver. - * Downgrade or upgrade rhs (work_rho) to have precision of solution (vh_). - * Note that this could be done at the beginning of this function, but - * several operations involving rho might be done in lower precision - * (depending on POTDTYPE), which could affect accuracy. For now, we delay - * the switch until just before the solve call. - */ - // if(sizeof(POTDTYPE) != sizeof(RHODTYPE)) - // { - /* solve with POTDTYPE precision */ - pb::GridFunc rhs(work_rho); + pb::GridFunc rhs(work_rho); rhs *= (4. * M_PI); poisson_solver_->solve(*Poisson::vh_, rhs); - // } - // else - // { - // poisson_solver_->solve(*Poisson::vh_, work_rho); - // } - - double residual_reduction = poisson_solver_->getResidualReduction(); - double final_residual = poisson_solver_->getFinalResidual(); + + const double residual_reduction = poisson_solver_->getResidualReduction(); + const double final_residual = poisson_solver_->getFinalResidual(); const bool large_residual = (residual_reduction > 1.e-3 || final_residual > 1.e-3); if (onpe0 && (large_residual || ct.verbose > 1)) - (*MPIdata::sout) << setprecision(2) << scientific + (*MPIdata::sout) << std::setprecision(2) << std::scientific << "Hartree_CG: residual reduction = " << residual_reduction - << ", final residual = " << final_residual << endl; + << ", final residual = " << final_residual + << std::endl; Poisson::Int_vhrho_ = vel * Poisson::vh_->gdot(rho); Poisson::Int_vhrhoc_ = vel * Poisson::vh_->gdot(rhoc); PoissonInterface::poisson_tm_.stop(); - assert(residual_reduction == residual_reduction); + assert(!std::isnan(residual_reduction)); } -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; + +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; diff --git a/src/Hartree_CG.h b/src/Hartree_CG.h index efa50919..e1a59657 100644 --- a/src/Hartree_CG.h +++ b/src/Hartree_CG.h @@ -7,28 +7,33 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifndef _HARTREE_CG_H_ -#define _HARTREE_CG_H_ +#ifndef MGMOL_HARTREE_CG_H +#define MGMOL_HARTREE_CG_H #include "PCGSolver.h" #include "Poisson.h" -template +#include + +template class Hartree_CG : public Poisson { private: - PCGSolver* poisson_solver_; + std::shared_ptr> + poisson_solver_; public: // Constructor Hartree_CG(const pb::Grid& grid, const short bc[3]) : Poisson(grid, bc) { - T oper(Poisson::grid_); - poisson_solver_ = new PCGSolver(oper, bc[0], bc[1], bc[2]); + OperatorType oper(Poisson::grid_); + poisson_solver_ + = std::make_shared>( + oper, bc[0], bc[1], bc[2]); }; // Destructor - ~Hartree_CG() override { delete poisson_solver_; } + ~Hartree_CG() override {} void setup(const short nu1, const short nu2, const short max_sweeps, const double tol, const short max_nlevels, @@ -38,15 +43,8 @@ class Hartree_CG : public Poisson poisson_solver_->setup(nu1, nu2, max_sweeps, tol, max_nlevels); } - void solve(const pb::GridFunc& rho, - const pb::GridFunc& rhoc) override; - - void applyOperator(pb::GridFunc &vh, - pb::GridFunc &lhs) override - { - T *oper = poisson_solver_->getOperator(); - oper->apply(vh, lhs); - } + void solve(const pb::GridFunc& rho, + const pb::GridFunc& rhoc) override; }; #endif diff --git a/src/IonicAlgorithm.cc b/src/IonicAlgorithm.cc index e804077a..fa8e1690 100644 --- a/src/IonicAlgorithm.cc +++ b/src/IonicAlgorithm.cc @@ -235,5 +235,5 @@ void IonicAlgorithm::updatePotAndMasks() mgmol_strategy_.move_orbitals(orbitals_); } -template class IonicAlgorithm; -template class IonicAlgorithm; +template class IonicAlgorithm>; +template class IonicAlgorithm>; diff --git a/src/Ions.cc b/src/Ions.cc index 1879e1ba..248323ae 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -2768,7 +2768,7 @@ void Ions::gatherForces(std::vector& forces, const int root) const const int index = ion->index(); // std::cout << "index = " << index << std::endl; assert(index < num_ions_); - assert(forces.size() >= 3 * index); + assert((int)forces.size() >= 3 * index); assert(index < num_ions_); ion->getForce(&forces[3 * index]); } diff --git a/src/KBPsiMatrixSparse.cc b/src/KBPsiMatrixSparse.cc index 8f5b3d49..b12604da 100644 --- a/src/KBPsiMatrixSparse.cc +++ b/src/KBPsiMatrixSparse.cc @@ -276,8 +276,9 @@ void KBPsiMatrixSparse::scaleWithKBcoeff(const Ions& ions) // potential, and add them into Aij. // Note: neglecting the small matrix elements reduces the size of hnlij and thus // reduces the size of communications later on. -void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, - const Ion& ion, SquareSubMatrix& hnlij) const +void KBPsiMatrixSparse::computeHvnlElementsIon( + const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, + SquareSubMatrix& hnlij) const { assert(ion.here()); @@ -346,8 +347,9 @@ void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, } } -void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, - const Ion& ion, VariableSizeMatrix& mat) const +void KBPsiMatrixSparse::computeHvnlElementsIon( + const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, + VariableSizeMatrix& mat) const { assert(ion.here()); @@ -424,8 +426,6 @@ void KBPsiMatrixSparse::computeHvnlMatrix( ss2dm->accumulate(submat, hij, 0.); } -#ifdef HAVE_MAGMA - template <> void KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions, @@ -438,8 +438,6 @@ void KBPsiMatrixSparse::computeHvnlMatrix( hij.consolidate(); } -#endif - // build elements, one atom at a time SquareSubMatrix KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions) const @@ -455,7 +453,7 @@ SquareSubMatrix KBPsiMatrixSparse::computeHvnlMatrix( // (distribution of work AND Hvnlij contributions) for (const auto& ion : ions.local_ions()) { - computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, Aij); + computeHvnlElementsIon((KBPsiMatrixSparse*)kbpsi2, *ion, Aij); } computeHvnlMatrix_tm_.stop(); @@ -474,7 +472,7 @@ void KBPsiMatrixSparse::computeHvnlMatrix( // (distribution of work AND Hvnlij contributions) for (const auto& ion : ions.local_ions()) { - computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, mat); + computeHvnlElementsIon((KBPsiMatrixSparse*)kbpsi2, *ion, mat); } computeHvnlMatrix_tm_.stop(); @@ -484,63 +482,8 @@ void KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions, ProjectedMatricesInterface* proj_matrices) const { - computeHvnlMatrix_tm_.start(); - SquareSubMatrix hnlij(computeHvnlMatrix(kbpsi2, ions)); proj_matrices->setLocalMatrixElementsHnl(hnlij); - - computeHvnlMatrix_tm_.stop(); -} - -// build elements of matrix (assumed to be symmetric) -// assemble resulting matrix in variable sparse matrix format -void KBPsiMatrixSparse::getPsiKBPsiSym( - const Ion& ion, VariableSizeMatrix& sm) -{ - std::vector gids; - ion.getGidsNLprojs(gids); - std::vector kbsigns; - ion.getKBsigns(kbsigns); - - const short nprojs = (short)gids.size(); - for (short i = 0; i < nprojs; i++) - { - const int gid = gids[i]; - const double coeff = (double)kbsigns[i]; - int* rindex = (int*)(kbpsimat_->getTableValue(gid)); - if (rindex == nullptr) continue; - const int lrindex = *rindex; - const int nnzrow1 = kbpsimat_->nnzrow(lrindex); - for (int p1 = 0; p1 < nnzrow1; p1++) - { - double kbpsielement1 = kbpsimat_->getRowEntry(lrindex, p1); - if (fabs(kbpsielement1) <= tolKBpsi) continue; - const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); - for (int p2 = 0; p2 < nnzrow1; p2++) - { - double kbpsielement2 = kbpsimat_->getRowEntry(lrindex, p2); - if (fabs(kbpsielement2) <= tolKBpsi) continue; - const double alpha = coeff * kbpsielement1 * kbpsielement2; - /* set hnlij */ - if (fabs(alpha) > tolKBpsi) - { - const int st2 = kbpsimat_->getColumnIndex(lrindex, p2); - sm.insertMatrixElement(st1, st2, alpha, ADD, true); - } - } - } - } -} - -void KBPsiMatrixSparse::getPsiKBPsiSym( - const Ions& ions, VariableSizeMatrix& sm) -{ - // loop over all the ions - // parallelization over ions by including only those centered in subdomain - for (const auto& ion : ions.local_ions()) - { - getPsiKBPsiSym(*ion, sm); - } } template @@ -624,9 +567,9 @@ double KBPsiMatrixSparse::getEvnl( return evnl * Ry2Ha; } -template <> -double KBPsiMatrixSparse::getEvnl(const Ions& ions, - ProjectedMatrices>* proj_matrices) +template +double KBPsiMatrixSparse::getEvnl( + const Ions& ions, ProjectedMatrices* proj_matrices) { SquareLocalMatrices dm( proj_matrices->getReplicatedDM()); @@ -658,20 +601,26 @@ double KBPsiMatrixSparse::getEvnl(const Ions& ions, } double KBPsiMatrixSparse::getTraceDM( - const int gid, const DISTMATDTYPE* const mat_X, const int numst) const + const int gid, const double* const mat_X, const int numst) const { + trace_tm_.start(); + double trace = 0.; int* rindex = (int*)(*kbpsimat_).getTableValue(gid); - if (rindex == nullptr) return trace; + if (rindex == nullptr) + { + trace_tm_.stop(); + return trace; + } const int lrindex = *rindex; const int nnzrow1 = kbpsimat_->nnzrow(lrindex); for (int p1 = 0; p1 < nnzrow1; p1++) { - const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); - const double t1 = (*kbpsimat_).getRowEntry(lrindex, p1); - const DISTMATDTYPE* const pmat = &mat_X[st1 * numst]; + const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); + const double t1 = (*kbpsimat_).getRowEntry(lrindex, p1); + const double* const pmat = &mat_X[st1 * numst]; for (int p2 = 0; p2 < nnzrow1; p2++) { @@ -681,6 +630,8 @@ double KBPsiMatrixSparse::getTraceDM( } } + trace_tm_.stop(); + return trace; } @@ -734,11 +685,18 @@ double KBPsiMatrixSparse::getTraceDM( } template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, - LocGridOrbitals& orbitals, const int first_color, const int nb_colors, - const bool flag); -template void KBPsiMatrixSparse::computeAll(const Ions&, LocGridOrbitals&); + LocGridOrbitals& orbitals, const int first_color, + const int nb_colors, const bool flag); +template void KBPsiMatrixSparse::computeAll( + const Ions&, LocGridOrbitals&); template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, - ExtendedGridOrbitals& orbitals, const int first_color, const int nb_colors, - const bool flag); -template void KBPsiMatrixSparse::computeAll(const Ions&, ExtendedGridOrbitals&); + ExtendedGridOrbitals& orbitals, const int first_color, + const int nb_colors, const bool flag); +template void KBPsiMatrixSparse::computeAll( + const Ions&, ExtendedGridOrbitals&); + +template double KBPsiMatrixSparse::getEvnl(const Ions& ions, + ProjectedMatrices>* proj_matrices); +template double KBPsiMatrixSparse::getEvnl( + const Ions& ions, ProjectedMatrices* proj_matrices); diff --git a/src/KBPsiMatrixSparse.h b/src/KBPsiMatrixSparse.h index 073fbddc..82321d1d 100644 --- a/src/KBPsiMatrixSparse.h +++ b/src/KBPsiMatrixSparse.h @@ -66,15 +66,12 @@ class KBPsiMatrixSparse : public KBPsiMatrixInterface return (*kbBpsimat_).get_value(gid, st); } - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi, const Ion&, - SquareSubMatrix& mat) const; - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, + // private functions working with single Ion + void computeHvnlElementsIon(const KBPsiMatrixSparse* const kbpsi, + const Ion&, SquareSubMatrix& mat) const; + void computeHvnlElementsIon(const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, VariableSizeMatrix& mat) const; - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, const Ion&, - ProjectedMatricesInterface*) const; - void getPsiKBPsiSym(const Ions& ions, VariableSizeMatrix& sm); - void getPsiKBPsiSym(const Ion& ion, VariableSizeMatrix& sm); template void computeKBpsi(const Ions& ions, OrbitalsType& orbitals, const int first_color, const int nb_colors, const bool flag); diff --git a/src/KBprojectorSparse.cc b/src/KBprojectorSparse.cc index 937b95d2..38b4c9bb 100644 --- a/src/KBprojectorSparse.cc +++ b/src/KBprojectorSparse.cc @@ -15,7 +15,7 @@ const double rthreshold = 1.e-5; -std::vector> KBprojectorSparse::work_nlindex_; +std::vector> KBprojectorSparse::work_nlindex_; std::vector> KBprojectorSparse::work_proj_; KBprojectorSparse::KBprojectorSparse(const Species& sp) : KBprojector(sp) @@ -133,11 +133,12 @@ void KBprojectorSparse::registerPsi(const short iloc, const ORBDTYPE* const psi) assert(work_nlindex_.size() == static_cast(omp_get_max_threads())); assert(static_cast(thread) < work_nlindex_.size()); - std::vector& work(work_nlindex_[thread]); + std::vector& work(work_nlindex_[thread]); for (int i = 0; i < sizenl; i++) { const int j = rnlindex[i]; - work[i] = psi[j]; + // convert to KBPROJDTYPE if necessary to enable optimized dot products + work[i] = (KBPROJDTYPE)psi[j]; } } diff --git a/src/KBprojectorSparse.h b/src/KBprojectorSparse.h index 552132b6..be3af5a8 100644 --- a/src/KBprojectorSparse.h +++ b/src/KBprojectorSparse.h @@ -30,7 +30,7 @@ class KBprojectorSparse : public KBprojector { // work arrays (1 for each thread) - static std::vector> work_nlindex_; + static std::vector> work_nlindex_; static std::vector> work_proj_; diff --git a/src/LBFGS.cc b/src/LBFGS.cc index 9154b7b3..e8064202 100644 --- a/src/LBFGS.cc +++ b/src/LBFGS.cc @@ -128,7 +128,7 @@ void LBFGS::updateRefs() } template <> -void LBFGS::updateRefMasks() +void LBFGS>::updateRefMasks() { Control& ct = *(Control::instance()); @@ -147,7 +147,7 @@ void LBFGS::updateRefMasks() } template <> -void LBFGS::updateRefMasks() +void LBFGS>::updateRefMasks() { } @@ -177,5 +177,5 @@ bool LBFGS::lbfgsLastStepNotAccepted() const return !stepper_->check_last_step_accepted(); } -template class LBFGS; -template class LBFGS; +template class LBFGS>; +template class LBFGS>; diff --git a/src/LDAonGrid.cc b/src/LDAonGrid.cc index 77a4bb30..ed659447 100644 --- a/src/LDAonGrid.cc +++ b/src/LDAonGrid.cc @@ -26,5 +26,5 @@ void LDAonGrid::update() get_xc_tm_.stop(); } -template class LDAonGrid; -template class LDAonGrid; +template class LDAonGrid>; +template class LDAonGrid>; diff --git a/src/LDAonGridSpin.cc b/src/LDAonGridSpin.cc index fa5841e3..2f9ca2d9 100644 --- a/src/LDAonGridSpin.cc +++ b/src/LDAonGridSpin.cc @@ -42,5 +42,5 @@ double LDAonGridSpin::getExc() const // in [Ha] return mygrid.vel() * lda_->computeRhoDotExc(); } -template class LDAonGridSpin; -template class LDAonGridSpin; +template class LDAonGridSpin>; +template class LDAonGridSpin>; diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 3d14e051..92793a46 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -14,11 +14,11 @@ #include "ColoredRegions.h" #include "Control.h" #include "DistMatrix.h" +#include "DotProductManagerFactory.h" #include "FunctionsPacking.h" #include "GridFunc.h" #include "GridMask.h" #include "HDFrestart.h" -#include "Laph2.h" #include "Laph4M.h" #include "LocGridOrbitals.h" #include "LocalMatrices2DistMatrix.h" @@ -27,8 +27,6 @@ #include "Masks4Orbitals.h" #include "MasksSet.h" #include "Mesh.h" -#include "Potentials.h" -#include "Preconditioning.h" #include "ProjectedMatrices.h" #include "ReplicatedWorkSpace.h" #include "SquareLocalMatrices.h" @@ -46,28 +44,63 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short LocGridOrbitals::subdivx_ = 0; -int LocGridOrbitals::lda_ = 0; -int LocGridOrbitals::numpt_ = 0; -int LocGridOrbitals::loc_numpt_ = 0; -PtrFunc LocGridOrbitals::dotProduct_ = &LocGridOrbitals::dotProductDiagonal; -int LocGridOrbitals::data_wghosts_index_ = -1; - -Timer LocGridOrbitals::get_dm_tm_("LocGridOrbitals::get_dm"); -Timer LocGridOrbitals::matB_tm_("LocGridOrbitals::matB"); -Timer LocGridOrbitals::invBmat_tm_("LocGridOrbitals::invBmat"); -Timer LocGridOrbitals::overlap_tm_("LocGridOrbitals::overlap"); -Timer LocGridOrbitals::dot_product_tm_("LocGridOrbitals::dot_product"); -Timer LocGridOrbitals::addDot_tm_("LocGridOrbitals::addDot"); -Timer LocGridOrbitals::mask_tm_("LocGridOrbitals::mask"); -Timer LocGridOrbitals::prod_matrix_tm_("LocGridOrbitals::prod_matrix"); -Timer LocGridOrbitals::assign_tm_("LocGridOrbitals::assign"); -Timer LocGridOrbitals::normalize_tm_("LocGridOrbitals::normalize"); -Timer LocGridOrbitals::axpy_tm_("LocGridOrbitals::axpy"); - -LocGridOrbitals::LocGridOrbitals(std::string name, const pb::Grid& my_grid, - const short subdivx, const int numst, const short bc[3], - ProjectedMatricesInterface* proj_matrices, +template +DotProductManager>* + LocGridOrbitals::dotProductManager_ + = nullptr; + +template +short LocGridOrbitals::subdivx_ = 0; +template +int LocGridOrbitals::lda_ = 0; +template +int LocGridOrbitals::numpt_ = 0; +template +int LocGridOrbitals::loc_numpt_ = 0; + +template +int LocGridOrbitals::data_wghosts_index_ = -1; + +template +Timer LocGridOrbitals::get_dm_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::get_dm"); +template +Timer LocGridOrbitals::matB_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::matB"); +template +Timer LocGridOrbitals::invBmat_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::invBmat"); +template +Timer LocGridOrbitals::overlap_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::overlap"); +template +Timer LocGridOrbitals::dot_product_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::dot_product"); +template +Timer LocGridOrbitals::addDot_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::addDot"); +template +Timer LocGridOrbitals::mask_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::mask"); +template +Timer LocGridOrbitals::prod_matrix_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::prod_matrix"); +template +Timer LocGridOrbitals::assign_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::assign"); +template +Timer LocGridOrbitals::normalize_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::normalize"); +template +Timer LocGridOrbitals::axpy_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::axpy"); + +template +LocGridOrbitals::LocGridOrbitals(std::string name, + const pb::Grid& my_grid, const short subdivx, const int numst, + const short bc[3], ProjectedMatricesInterface* proj_matrices, std::shared_ptr lrs, MasksSet* masks, MasksSet* corrmasks, ClusterOrbitals* local_cluster, const bool setup_flag) : name_(std::move(name)), @@ -107,7 +140,8 @@ LocGridOrbitals::LocGridOrbitals(std::string name, const pb::Grid& my_grid, if (setup_flag) setup(lrs); } -LocGridOrbitals::~LocGridOrbitals() +template +LocGridOrbitals::~LocGridOrbitals() { assert(proj_matrices_ != nullptr); assert(pack_); @@ -119,8 +153,9 @@ LocGridOrbitals::~LocGridOrbitals() gidToStorage_ = nullptr; } -LocGridOrbitals::LocGridOrbitals( - const std::string& name, const LocGridOrbitals& A, const bool copy_data) +template +LocGridOrbitals::LocGridOrbitals(const std::string& name, + const LocGridOrbitals& A, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(A.proj_matrices_), @@ -141,9 +176,11 @@ LocGridOrbitals::LocGridOrbitals( setGids2Storage(); } -LocGridOrbitals::LocGridOrbitals(const std::string& name, - const LocGridOrbitals& A, ProjectedMatricesInterface* proj_matrices, - MasksSet* masks, MasksSet* corrmasks, const bool copy_data) +template +LocGridOrbitals::LocGridOrbitals(const std::string& name, + const LocGridOrbitals& A, + ProjectedMatricesInterface* proj_matrices, MasksSet* masks, + MasksSet* corrmasks, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(proj_matrices), @@ -170,7 +207,9 @@ LocGridOrbitals::LocGridOrbitals(const std::string& name, proj_matrices_->setup(overlapping_gids_); } -void LocGridOrbitals::copySharedData(const LocGridOrbitals& A) +template +void LocGridOrbitals::copySharedData( + const LocGridOrbitals& A) { assert(A.gidToStorage_ != nullptr); assert(A.pack_); @@ -191,7 +230,9 @@ void LocGridOrbitals::copySharedData(const LocGridOrbitals& A) distributor_normalize_ = A.distributor_normalize_; } -void LocGridOrbitals::copyDataFrom(const LocGridOrbitals& src) +template +void LocGridOrbitals::copyDataFrom( + const LocGridOrbitals& src) { assert(proj_matrices_ != nullptr); @@ -200,19 +241,18 @@ void LocGridOrbitals::copyDataFrom(const LocGridOrbitals& src) setIterativeIndex(src); } -void LocGridOrbitals::setDotProduct(const short dot_type) +template +void LocGridOrbitals::setDotProduct(const short dot_type) { - if (dot_type == 0) - dotProduct_ = &LocGridOrbitals::dotProductDiagonal; - else if (dot_type == 1) - dotProduct_ = &LocGridOrbitals::dotProductWithInvS; - else if (dot_type == 2) - dotProduct_ = &LocGridOrbitals::dotProductWithDM; - else if (dot_type == 3) - dotProduct_ = &LocGridOrbitals::dotProductSimple; + DotProductManagerFactory factory; + + dotProductManager_ = factory.create(dot_type); + + assert(dotProductManager_ != nullptr); } -void LocGridOrbitals::setGids2Storage() +template +void LocGridOrbitals::setGids2Storage() { assert(chromatic_number_ >= 0); assert(subdivx_ > 0); @@ -220,25 +260,26 @@ void LocGridOrbitals::setGids2Storage() if (gidToStorage_ != nullptr) gidToStorage_->clear(); else - gidToStorage_ = new std::vector>(); + gidToStorage_ = new std::vector>(); gidToStorage_->resize(subdivx_); for (short iloc = 0; iloc < subdivx_; iloc++) { - std::map& gid2st((*gidToStorage_)[iloc]); + std::map& gid2st((*gidToStorage_)[iloc]); for (int color = 0; color < chromatic_number_; color++) { const int gid = overlapping_gids_[iloc][color]; if (gid != -1) { gid2st.insert( - std::pair(gid, getPsi(color, iloc))); + std::pair(gid, getPsi(color, iloc))); } } } } // return pointer to const data -const ORBDTYPE* LocGridOrbitals::getGidStorage( +template +const ScalarType* LocGridOrbitals::getGidStorage( const int gid, const short iloc) const { assert(numst_ >= 0); @@ -247,15 +288,15 @@ const ORBDTYPE* LocGridOrbitals::getGidStorage( assert(gid < numst_); assert(iloc < (short)gidToStorage_->size()); - std::map::const_iterator p - = (*gidToStorage_)[iloc].find(gid); + auto p = (*gidToStorage_)[iloc].find(gid); if (p != (*gidToStorage_)[iloc].end()) return p->second; else return nullptr; } -void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, +template +void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs) { assert(masks != nullptr); @@ -270,7 +311,9 @@ void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, setup(lrs); } -void LocGridOrbitals::setup(std::shared_ptr lrs) +template +void LocGridOrbitals::setup( + std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -313,7 +356,8 @@ void LocGridOrbitals::setup(std::shared_ptr lrs) "LocGridOrbitals::setup() done...", (*MPIdata::sout)); } -void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, +template +void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs) { // free some old data @@ -324,7 +368,9 @@ void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, setup(masks, corrmasks, lrs); } -void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) +template +void LocGridOrbitals::assign( + const LocGridOrbitals& orbitals) { assign_tm_.start(); @@ -346,9 +392,9 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) - << "LocGridOrbitals::Assign orbitals to different LR" - << std::endl; + (*MPIdata::sout) << "LocGridOrbitals::Assign orbitals " + "to different LR" + << std::endl; for (int color = 0; color < chromatic_number_; color++) { // assign state @@ -358,7 +404,7 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) if (gid != -1) { // find storage location in orbitals - const ORBDTYPE* const val + const ScalarType* const val = orbitals.getGidStorage(gid, iloc); // copy into new psi_ if (val != nullptr) @@ -373,7 +419,9 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) assign_tm_.stop(); } -void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) +template +void LocGridOrbitals::axpy( + const double alpha, const LocGridOrbitals& orbitals) { axpy_tm_.start(); @@ -399,7 +447,7 @@ void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) if (gid != -1) { // find orbital storage in orbitals - const ORBDTYPE* const val + const ScalarType* const val = orbitals.getGidStorage(gid, iloc); // copy into new psi_ if (val != nullptr) @@ -416,7 +464,8 @@ void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) axpy_tm_.stop(); } -short LocGridOrbitals::checkOverlap( +template +short LocGridOrbitals::checkOverlap( const int st1, const int st2, const short level) { assert(masks4orbitals_); @@ -424,7 +473,8 @@ short LocGridOrbitals::checkOverlap( return masks4orbitals_->checkOverlap(st1, st2, level); } -void LocGridOrbitals::applyMask(const bool first_time) +template +void LocGridOrbitals::applyMask(const bool first_time) { assert(chromatic_number_ >= 0); assert(subdivx_ > 0); @@ -451,16 +501,17 @@ void LocGridOrbitals::applyMask(const bool first_time) mask_tm_.stop(); } -void LocGridOrbitals::applyCorrMask(const bool first_time) +template +void LocGridOrbitals::applyCorrMask(const bool first_time) { mask_tm_.start(); for (int color = 0; color < chromatic_number_; color++) { - const unsigned int size = block_vector_.get_allocated_size_storage(); - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(color), size, ipsi_host_view); for (short iloc = 0; iloc < subdivx_; iloc++) @@ -474,9 +525,9 @@ void LocGridOrbitals::applyCorrMask(const bool first_time) else block_vector_.set_zero(color, iloc); } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, size, psi(color)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } incrementIterativeIndex(); @@ -484,8 +535,9 @@ void LocGridOrbitals::applyCorrMask(const bool first_time) mask_tm_.stop(); } -void LocGridOrbitals::app_mask( - const int color, ORBDTYPE* u, const short level) const +template +void LocGridOrbitals::app_mask( + const int color, ScalarType* u, const short level) const { mask_tm_.start(); assert(masks4orbitals_); @@ -503,13 +555,14 @@ void LocGridOrbitals::app_mask( (masks4orbitals_->getMask(gid)).apply(u, level, iloc); } else - memset(u + iloc * lnumpt, 0, lnumpt * sizeof(ORBDTYPE)); + memset(u + iloc * lnumpt, 0, lnumpt * sizeof(ScalarType)); } mask_tm_.stop(); } -void LocGridOrbitals::app_mask( - const int color, pb::GridFunc& gu, const short level) const +template +void LocGridOrbitals::app_mask( + const int color, pb::GridFunc& gu, const short level) const { mask_tm_.start(); @@ -534,23 +587,25 @@ void LocGridOrbitals::app_mask( { int offset = (shift + dim0 * iloc) * incx; assert(offset + lnumpt < static_cast(gu.grid().sizeg())); - ORBDTYPE* pu = gu.uu() + offset; - memset(pu, 0, lnumpt * sizeof(ORBDTYPE)); + ScalarType* pu = gu.uu() + offset; + memset(pu, 0, lnumpt * sizeof(ScalarType)); } } mask_tm_.stop(); } -void LocGridOrbitals::init2zero() +template +void LocGridOrbitals::init2zero() { for (int icolor = 0; icolor < chromatic_number_; icolor++) { - ORBDTYPE* ipsi = psi(icolor); - memset(ipsi, 0, numpt_ * sizeof(ORBDTYPE)); + ScalarType* ipsi = psi(icolor); + memset(ipsi, 0, numpt_ * sizeof(ScalarType)); } } -void LocGridOrbitals::initGauss( +template +void LocGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(chromatic_number_ >= 0); @@ -584,13 +639,13 @@ void LocGridOrbitals::initGauss( const double rmax = 6. * rc; for (int icolor = 0; icolor < chromatic_number_; icolor++) { - const unsigned int size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(icolor), size, ipsi_host_view); - memset(ipsi_host_view, 0, numpt_ * sizeof(ORBDTYPE)); + memset(ipsi_host_view, 0, numpt_ * sizeof(ScalarType)); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -613,7 +668,7 @@ void LocGridOrbitals::initGauss( const double r = xc.minimage(center, ll, ct.bcWF); if (r < rmax) ipsi_host_view[ix * incx + iy * incy + iz] - = (ORBDTYPE)exp(-r * r * invrc2); + = (ScalarType)exp(-r * r * invrc2); else ipsi_host_view[ix * incx + iy * incy + iz] = 0.; @@ -625,15 +680,16 @@ void LocGridOrbitals::initGauss( } } } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, size, psi(icolor)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void LocGridOrbitals::initFourier() +template +void LocGridOrbitals::initFourier() { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) @@ -669,8 +725,8 @@ void LocGridOrbitals::initFourier() const double kk[3] = { dk[0] * (double)kvector[0], dk[1] * (double)kvector[1], dk[2] * (double)kvector[2] }; - ORBDTYPE* ipsi = psi(icolor); - memset(ipsi, 0, numpt_ * sizeof(ORBDTYPE)); + ScalarType* ipsi = psi(icolor); + memset(ipsi, 0, numpt_ * sizeof(ScalarType)); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -690,8 +746,8 @@ void LocGridOrbitals::initFourier() { ipsi[ix * incx + iy * incy + iz] = 1. - - (ORBDTYPE)(cos(kk[0] * x) * cos(kk[1] * y) - * cos(kk[2] * z)); + - (ScalarType)(cos(kk[0] * x) * cos(kk[1] * y) + * cos(kk[2] * z)); z += hgrid[2]; } @@ -705,7 +761,9 @@ void LocGridOrbitals::initFourier() resetIterativeIndex(); } -int LocGridOrbitals::packStates(std::shared_ptr lrs) +template +int LocGridOrbitals::packStates( + std::shared_ptr lrs) { assert(lrs); @@ -732,11 +790,11 @@ int LocGridOrbitals::packStates(std::shared_ptr lrs) return pack_->chromatic_number(); } -void LocGridOrbitals::multiply_by_matrix( +template +void LocGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& dmatrix, - ORBDTYPE* const product, const int ldp) + ScalarType* const product, const int ldp) { - ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); DISTMATDTYPE* work_matrix = wspace.square_matrix(); @@ -747,8 +805,9 @@ void LocGridOrbitals::multiply_by_matrix( multiply_by_matrix(0, chromatic_number_, work_matrix, product, ldp); } -void LocGridOrbitals::multiply_by_matrix(const int first_color, - const int ncolors, const DISTMATDTYPE* const matrix, ORBDTYPE* product, +template +void LocGridOrbitals::multiply_by_matrix(const int first_color, + const int ncolors, const DISTMATDTYPE* const matrix, ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); @@ -757,7 +816,7 @@ void LocGridOrbitals::multiply_by_matrix(const int first_color, assert((first_color + ncolors) <= chromatic_number_); assert(subdivx_ > 0); - memset(product, 0, ldp * ncolors * sizeof(ORBDTYPE)); + memset(product, 0, ldp * ncolors * sizeof(ScalarType)); DISTMATDTYPE* matrix_local = new DISTMATDTYPE[chromatic_number_ * ncolors]; @@ -790,9 +849,10 @@ void LocGridOrbitals::multiply_by_matrix(const int first_color, prod_matrix_tm_.stop(); } -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); @@ -829,7 +889,8 @@ void LocGridOrbitals::multiplyByMatrix( // Here the result is stored in one of the matrices used in the multiplication, // so a temporary arry is necessary -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { prod_matrix_tm_.start(); @@ -837,38 +898,38 @@ void LocGridOrbitals::multiplyByMatrix( if (chromatic_number_ > 0) { unsigned int const product_size = loc_numpt_ * chromatic_number_; - std::unique_ptr product( - MemorySpace::Memory::allocate( + std::unique_ptr product( + MemorySpace::Memory::allocate( product_size), - MemorySpace::Memory::free); + MemorySpace::Memory::free); // We want to to use: - // MemorySpace::Memory::set( + // MemorySpace::Memory::set( // product.get(), product_size, 0.); // but we get an error at linking time from nvptx-none-gcc #ifdef HAVE_MAGMA #ifdef HAVE_OPENMP_OFFLOAD - ORBDTYPE* tmp = product.get(); + ScalarType* tmp = product.get(); #pragma omp target teams distribute parallel for is_device_ptr(tmp) for (unsigned int i = 0; i < product_size; ++i) tmp[i] = 0; #else - ORBDTYPE* product_host - = MemorySpace::Memory::allocate( + ScalarType* product_host + = MemorySpace::Memory::allocate( product_size); - std::memset(product_host, 0, product_size * sizeof(ORBDTYPE)); + std::memset(product_host, 0, product_size * sizeof(ScalarType)); MemorySpace::copy_to_dev(product_host, product_size, product.get()); - MemorySpace::Memory::free(product_host); + MemorySpace::Memory::free(product_host); #endif #else - std::memset(product.get(), 0, product_size * sizeof(ORBDTYPE)); + std::memset(product.get(), 0, product_size * sizeof(ScalarType)); #endif - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = loc_numpt_ * sizeof(ScalarType); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); + ScalarType* phi = getPsi(0, iloc); const MATDTYPE* const mat = matrix.getSubMatrix(iloc); #ifdef HAVE_MAGMA int const mat_size = matrix.m() * matrix.n(); @@ -889,7 +950,7 @@ void LocGridOrbitals::multiplyByMatrix( chromatic_number_, 0., product.get(), loc_numpt_); for (int color = 0; color < chromatic_number_; color++) - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( product.get() + color * loc_numpt_, slnumpt, phi + color); } } @@ -897,35 +958,40 @@ void LocGridOrbitals::multiplyByMatrix( prod_matrix_tm_.stop(); } -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - LocGridOrbitals& product) const + LocGridOrbitals& product) const { multiplyByMatrix(matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix(const int first_color, +template +void LocGridOrbitals::multiply_by_matrix(const int first_color, const int ncolors, const DISTMATDTYPE* const matrix, - LocGridOrbitals& product) const + LocGridOrbitals& product) const { multiply_by_matrix( first_color, ncolors, matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix( - const DISTMATDTYPE* const matrix, LocGridOrbitals& product) const +template +void LocGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, + LocGridOrbitals& product) const { multiply_by_matrix( 0, chromatic_number_, matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix( +template +void LocGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& matrix) { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[loc_numpt_ * chromatic_number_]; - memset(product, 0, loc_numpt_ * chromatic_number_ * sizeof(ORBDTYPE)); + ScalarType* product = new ScalarType[loc_numpt_ * chromatic_number_]; + memset(product, 0, loc_numpt_ * chromatic_number_ * sizeof(ScalarType)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -936,12 +1002,12 @@ void LocGridOrbitals::multiply_by_matrix( DISTMATDTYPE* matrix_local = new DISTMATDTYPE[chromatic_number_ * chromatic_number_]; - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = loc_numpt_ * sizeof(ScalarType); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); + ScalarType* phi = getPsi(0, iloc); matrixToLocalMatrix(iloc, work_matrix, matrix_local); @@ -960,7 +1026,8 @@ void LocGridOrbitals::multiply_by_matrix( prod_matrix_tm_.stop(); } -int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) +template +int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) { assert(proj_matrices_ != nullptr); @@ -986,9 +1053,9 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_hdf5(): error in reading DM" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_hdf5(): " + "error in reading DM" + << std::endl; return ierr; } } @@ -996,7 +1063,9 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) +template +int LocGridOrbitals::write( + HDFrestart& h5f_file, const std::string& name) { Control& ct = *(Control::instance()); hid_t file_id = h5f_file.file_id(); @@ -1023,7 +1092,7 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) const short precision = ct.out_restart_info > 3 ? 2 : 1; if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) << "Write LocGridOrbitals " << name + (*MPIdata::sout) << "Write LocGridOrbitals " << name << " with precision " << precision << std::endl; // loop over global (storage) functions for (int color = 0; color < chromatic_number_; color++) @@ -1126,9 +1195,9 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) herr_t status = H5Dclose(dset_id); if (status < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::write_func_hdf5:H5Dclose failed!!!" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::write_func_" + "hdf5:H5Dclose failed!!!" + << std::endl; return -1; } } @@ -1158,7 +1227,8 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) return 0; } -int LocGridOrbitals::read_func_hdf5( +template +int LocGridOrbitals::read_func_hdf5( HDFrestart& h5f_file, const std::string& name) { assert(chromatic_number_ >= 0); @@ -1184,7 +1254,7 @@ int LocGridOrbitals::read_func_hdf5( hid_t memspace = H5P_DEFAULT; if (h5f_file.active()) memspace = h5f_file.createMemspace(); - ORBDTYPE* buffer = new ORBDTYPE[block[0] * block[1] * block[2]]; + ScalarType* buffer = new ScalarType[block[0] * block[1] * block[2]]; if (onpe0 && ct.verbose > 2) { @@ -1198,9 +1268,9 @@ int LocGridOrbitals::read_func_hdf5( } else { - (*MPIdata::sout) - << "LocGridOrbitals::read_func_hdf5(): Read wave functions " - << name << " from all tasks..." << std::endl; + (*MPIdata::sout) << "LocGridOrbitals::read_func_hdf5():" + " Read wave functions " + << name << " from all tasks..." << std::endl; } } @@ -1240,18 +1310,18 @@ int LocGridOrbitals::read_func_hdf5( hid_t dset_id = h5f_file.open_dset(key); if (dset_id < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_func_hdf5() --- cannot open " << key - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_func_hdf5() " + "--- cannot open " + << key << std::endl; return dset_id; } herr_t status = h5f_file.readData(buffer, memspace, dset_id, precision); if (status < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_func_hdf5() --- H5Dread failed!!!" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_func_hdf5() " + "--- H5Dread failed!!!" + << std::endl; return -1; } @@ -1370,13 +1440,15 @@ int LocGridOrbitals::read_func_hdf5( // initialize matrix chromatic_number_ by ncolor (for columns first_color to // first_color+ncolor) -void LocGridOrbitals::matrixToLocalMatrix(const short iloc, +template +void LocGridOrbitals::matrixToLocalMatrix(const short iloc, const DISTMATDTYPE* const matrix, DISTMATDTYPE* const lmatrix) const { matrixToLocalMatrix(iloc, matrix, lmatrix, 0, chromatic_number_); } -void LocGridOrbitals::matrixToLocalMatrix(const short iloc, +template +void LocGridOrbitals::matrixToLocalMatrix(const short iloc, const DISTMATDTYPE* const matrix, DISTMATDTYPE* const lmatrix, const int first_color, const int ncolor) const { @@ -1404,8 +1476,10 @@ void LocGridOrbitals::matrixToLocalMatrix(const short iloc, // compute the matrix // output: matB -void LocGridOrbitals::computeMatB( - const LocGridOrbitals& orbitals, const pb::Lap& LapOper) +template +void LocGridOrbitals::computeMatB( + const LocGridOrbitals& orbitals, + const pb::Lap& LapOper) { if (numst_ == 0) return; @@ -1422,10 +1496,10 @@ void LocGridOrbitals::computeMatB( SquareLocalMatrices ss( subdivx_, chromatic_number_); - ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; - memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); + ScalarType* work = new ScalarType[lda_ * bcolor]; + memset(work, 0, lda_ * bcolor * sizeof(ScalarType)); - const ORBDTYPE* const orbitals_psi + const ScalarType* const orbitals_psi = (chromatic_number_ > 0) ? orbitals.block_vector_.vect(0) : nullptr; setDataWithGhosts(); @@ -1465,7 +1539,9 @@ void LocGridOrbitals::computeMatB( } // compute and its inverse -void LocGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) +template +void LocGridOrbitals::computeBAndInvB( + const pb::Lap& LapOper) { assert(proj_matrices_ != nullptr); @@ -1480,7 +1556,8 @@ void LocGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) invBmat_tm_.stop(); } -void LocGridOrbitals::getLocalOverlap( +template +void LocGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(chromatic_number_ >= 0); @@ -1493,7 +1570,7 @@ void LocGridOrbitals::getLocalOverlap( #ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); #else - const ORBDTYPE* const psi = block_vector_.vect(0); + const ScalarType* const psi = block_vector_.vect(0); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -1508,7 +1585,9 @@ void LocGridOrbitals::getLocalOverlap( } } -void LocGridOrbitals::getLocalOverlap(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::getLocalOverlap( + const LocGridOrbitals& orbitals, SquareLocalMatrices& ss) { assert(chromatic_number_ >= 0); @@ -1520,7 +1599,9 @@ void LocGridOrbitals::getLocalOverlap(const LocGridOrbitals& orbitals, } } -void LocGridOrbitals::computeLocalProduct(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::computeLocalProduct( + const LocGridOrbitals& orbitals, LocalMatrices& ss, const bool transpose) { // assert( orbitals.chromatic_number_>=0 ); @@ -1530,9 +1611,10 @@ void LocGridOrbitals::computeLocalProduct(const LocGridOrbitals& orbitals, computeLocalProduct(orbitals.psi(0), orbitals.lda_, ss, transpose); } -void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void LocGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { assert(loc_numpt_ > 0); assert(loc_numpt_ <= ld); @@ -1541,39 +1623,37 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, assert(grid_.vel() > 0.); assert(subdivx_ > 0); - const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); - const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; + const ScalarType* const a = transpose ? array : block_vector_.vect(0); + const ScalarType* const b = transpose ? block_vector_.vect(0) : array; const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; unsigned int const a_size = numpt_ * ss.m(); - ORBDTYPE* a_host_view - = MemorySpace::Memory::allocate_host_view( - a_size); - MemorySpace::Memory::copy_view_to_host( - const_cast(a), a_size, a_host_view); + ScalarType* a_host_view = MemorySpace::Memory::allocate_host_view(a_size); + MemorySpace::Memory::copy_view_to_host( + const_cast(a), a_size, a_host_view); unsigned int const b_size = numpt_ * ss.n(); - ORBDTYPE* b_host_view - = MemorySpace::Memory::allocate_host_view( - b_size); - MemorySpace::Memory::copy_view_to_host( - const_cast(b), b_size, b_host_view); + ScalarType* b_host_view = MemorySpace::Memory::allocate_host_view(b_size); + MemorySpace::Memory::copy_view_to_host( + const_cast(b), b_size, b_host_view); #ifdef MGMOL_USE_MIXEDP // use temporary float data for matrix ss - LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); + LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); #else - LocalMatrices& ssf(ss); + LocalMatrices& ssf(ss); #endif for (short iloc = 0; iloc < subdivx_; iloc++) { ssf.gemm(iloc, loc_numpt_, a_host_view + iloc * loc_numpt_, lda, b_host_view + iloc * loc_numpt_, ldb); } - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( a_host_view); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( b_host_view); #ifdef MGMOL_USE_MIXEDP ss.copy(ssf); @@ -1582,8 +1662,9 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, ss.scal(grid_.vel()); } -void LocGridOrbitals::computeDiagonalElementsDotProduct( - const LocGridOrbitals& orbitals, std::vector& ss) +template +void LocGridOrbitals::computeDiagonalElementsDotProduct( + const LocGridOrbitals& orbitals, std::vector& ss) { assert(numst_ > 0); assert(grid_.vel() > 0.); @@ -1608,8 +1689,9 @@ void LocGridOrbitals::computeDiagonalElementsDotProduct( mmpi.allreduce(&tmp[0], &ss[0], numst_, MPI_SUM); } -void LocGridOrbitals::computeDiagonalElementsDotProductLocal( - const LocGridOrbitals& orbitals, std::vector& ss) +template +void LocGridOrbitals::computeDiagonalElementsDotProductLocal( + const LocGridOrbitals& orbitals, std::vector& ss) { assert(grid_.vel() > 0.); @@ -1656,22 +1738,16 @@ void LocGridOrbitals::computeDiagonalElementsDotProductLocal( } } -void LocGridOrbitals::computeGram( +template +void LocGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - getLocalOverlap(ss); - - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - gram_mat.clear(); - - sl2dm->accumulate(ss, gram_mat); + computeGram(*this, gram_mat); } -void LocGridOrbitals::computeGram(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::computeGram( + const LocGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { SquareLocalMatrices ss( @@ -1687,7 +1763,8 @@ void LocGridOrbitals::computeGram(const LocGridOrbitals& orbitals, } // compute the lower-triangular part of the overlap matrix -void LocGridOrbitals::computeGram(const int verbosity) +template +void LocGridOrbitals::computeGram(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1716,7 +1793,8 @@ void LocGridOrbitals::computeGram(const int verbosity) overlap_tm_.stop(); } -void LocGridOrbitals::computeGramAndInvS(const int verbosity) +template +void LocGridOrbitals::computeGramAndInvS(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1726,77 +1804,26 @@ void LocGridOrbitals::computeGramAndInvS(const int verbosity) proj_matrices_->computeInvS(); } -void LocGridOrbitals::checkCond(const double tol, const bool flag_stop) +template +void LocGridOrbitals::checkCond( + const double tol, const bool flag_stop) { assert(proj_matrices_ != nullptr); proj_matrices_->checkCond(tol, flag_stop); } -double LocGridOrbitals::dotProductWithDM(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithDM(ss); -} - -double LocGridOrbitals::dotProductWithInvS(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithInvS(ss); -} - -double LocGridOrbitals::dotProductDiagonal(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - std::vector ss; - Control& ct = *(Control::instance()); - if (ct.short_sighted) - { - computeDiagonalElementsDotProductLocal(orbitals, ss); - } - else - { - ss.resize(numst_); - computeDiagonalElementsDotProduct(orbitals, ss); - } - return proj_matrices_->getTraceDiagProductWithInvS(ss); -} - -double LocGridOrbitals::dotProductSimple(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductSimple(ss); -} - -double LocGridOrbitals::dotProduct(const LocGridOrbitals& orbitals) +template +double LocGridOrbitals::dotProduct( + const LocGridOrbitals& orbitals) { - return (this->*dotProduct_)(orbitals); // call through pointer member + assert(dotProductManager_ != nullptr); + return dotProductManager_->dotProduct(*this, orbitals); } -double LocGridOrbitals::dotProduct( - const LocGridOrbitals& orbitals, const short dot_type) +template +double LocGridOrbitals::dotProduct( + const LocGridOrbitals& orbitals, const short dot_type) { dot_product_tm_.start(); @@ -1804,72 +1831,22 @@ double LocGridOrbitals::dotProduct( assert(subdivx_ > 0); assert(subdivx_ < 1000); - double dot = 0.; - if (dot_type == 0) - { - dot = dotProductDiagonal(orbitals); - } - else if (dot_type == 1) - { - dot = dotProductWithInvS(orbitals); - } - else if (dot_type == 2) - { - dot = dotProductWithDM(orbitals); - } - else if (dot_type == 3) - { - dot = dotProductSimple(orbitals); - } - else - { - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - (*MPIdata::serr) - << "LocGridOrbitals::dot_product() --- unknown dot product type" - << std::endl; - mmpi.abort(); - } + DotProductManagerFactory factory; + DotProductManager* manager = factory.create(dot_type); + assert(manager != nullptr); - dot_product_tm_.stop(); + double dot = manager->dotProduct(*this, orbitals); - return dot; -} - -dist_matrix::DistMatrix LocGridOrbitals::product( - const LocGridOrbitals& orbitals, const bool transpose) -{ - assert(numst_ > 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); - - return product( - orbitals.psi(0), orbitals.chromatic_number_, orbitals.lda_, transpose); -} - -dist_matrix::DistMatrix LocGridOrbitals::product( - const ORBDTYPE* const array, const int ncol, const int lda, - const bool transpose) -{ - assert(lda > 1); - - dot_product_tm_.start(); - - LocalMatrices ss( - subdivx_, chromatic_number_, ncol); - - if (chromatic_number_ != 0) computeLocalProduct(array, lda, ss, transpose); - - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - dist_matrix::DistMatrix tmp("tmp", numst_, numst_); - sl2dm->accumulate(ss, tmp); + delete manager; dot_product_tm_.stop(); - return tmp; + return dot; } -void LocGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, +template +void LocGridOrbitals::orthonormalizeLoewdin( + const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) { @@ -1908,7 +1885,8 @@ void LocGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, if (matrixTransform == nullptr) delete localP; } -double LocGridOrbitals::norm() const +template +double LocGridOrbitals::norm() const { Control& ct = *(Control::instance()); @@ -1921,7 +1899,8 @@ double LocGridOrbitals::norm() const return norm; } -double LocGridOrbitals::normState(const int gid) const +template +double LocGridOrbitals::normState(const int gid) const { assert(gid >= 0); @@ -1953,7 +1932,9 @@ double LocGridOrbitals::normState(const int gid) const return grid_.vel() * norm; } -void LocGridOrbitals::orthonormalize2states(const int st1, const int st2) +template +void LocGridOrbitals::orthonormalize2states( + const int st1, const int st2) { assert(st1 >= 0); assert(st2 >= 0); @@ -2094,8 +2075,9 @@ void LocGridOrbitals::orthonormalize2states(const int st1, const int st2) #endif } -void LocGridOrbitals::multiplyByMatrix2states( - const int st1, const int st2, const double* mat, LocGridOrbitals& product) +template +void LocGridOrbitals::multiplyByMatrix2states(const int st1, + const int st2, const double* mat, LocGridOrbitals& product) { assert(st1 >= 0); assert(st2 >= 0); @@ -2142,7 +2124,8 @@ void LocGridOrbitals::multiplyByMatrix2states( } } -void LocGridOrbitals::computeDiagonalGram( +template +void LocGridOrbitals::computeDiagonalGram( VariableSizeMatrix& diagS) const { const double vel = grid_.vel(); @@ -2174,7 +2157,8 @@ void LocGridOrbitals::computeDiagonalGram( #endif } -void LocGridOrbitals::computeInvNorms2( +template +void LocGridOrbitals::computeInvNorms2( std::vector>& inv_norms2) const { const int initTabSize = 4096; @@ -2206,7 +2190,8 @@ void LocGridOrbitals::computeInvNorms2( } } -void LocGridOrbitals::normalize() +template +void LocGridOrbitals::normalize() { normalize_tm_.start(); @@ -2295,8 +2280,10 @@ void LocGridOrbitals::normalize() } // modify argument orbitals, by projecting out its component -// along LocGridOrbitals -void LocGridOrbitals::projectOut(LocGridOrbitals& orbitals, const double scale) +// along LocGridOrbitals +template +void LocGridOrbitals::projectOut( + LocGridOrbitals& orbitals, const double scale) { projectOut(orbitals.psi(0), lda_, scale); @@ -2311,8 +2298,9 @@ void LocGridOrbitals::projectOut(LocGridOrbitals& orbitals, const double scale) orbitals.incrementIterativeIndex(); } -void LocGridOrbitals::projectOut( - ORBDTYPE* const array, const int lda, const double scale) +template +void LocGridOrbitals::projectOut( + ScalarType* const array, const int lda, const double scale) { assert(lda > 1); assert(loc_numpt_ > 0); @@ -2333,14 +2321,14 @@ void LocGridOrbitals::projectOut( #endif proj_matrices_->applyInvS(pmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[loc_numpt_ * chromatic_number_]; - memset(tproduct, 0, loc_numpt_ * chromatic_number_ * sizeof(ORBDTYPE)); + ScalarType* tproduct = new ScalarType[loc_numpt_ * chromatic_number_]; + memset(tproduct, 0, loc_numpt_ * chromatic_number_ * sizeof(ScalarType)); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); - ORBDTYPE* parray = array + iloc * loc_numpt_; + ScalarType* phi = getPsi(0, iloc); + ScalarType* parray = array + iloc * loc_numpt_; MATDTYPE* localMat_iloc = pmatrix.getRawPtr(iloc); @@ -2358,7 +2346,8 @@ void LocGridOrbitals::projectOut( delete[] tproduct; } -void LocGridOrbitals::initRand() +template +void LocGridOrbitals::initRand() { Control& ct = *(Control::instance()); @@ -2464,8 +2453,10 @@ void LocGridOrbitals::initRand() } // Compute nstates column of Psi^T*A*Psi starting at column 0 -void LocGridOrbitals::addDotWithNcol2Matrix( - LocGridOrbitals& Apsi, dist_matrix::DistMatrix& matrix) const +template +void LocGridOrbitals::addDotWithNcol2Matrix( + LocGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const { addDot_tm_.start(); @@ -2508,7 +2499,8 @@ void LocGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -void LocGridOrbitals::computeGlobalIndexes( +template +void LocGridOrbitals::computeGlobalIndexes( std::shared_ptr lrs) { all_overlapping_gids_ = lrs->getOverlapGids(); @@ -2537,7 +2529,8 @@ void LocGridOrbitals::computeGlobalIndexes( } } -void LocGridOrbitals::printTimers(std::ostream& os) +template +void LocGridOrbitals::printTimers(std::ostream& os) { matB_tm_.print(os); invBmat_tm_.print(os); @@ -2552,7 +2545,9 @@ void LocGridOrbitals::printTimers(std::ostream& os) axpy_tm_.print(os); } -void LocGridOrbitals::initWF(const std::shared_ptr lrs) +template +void LocGridOrbitals::initWF( + const std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -2586,10 +2581,10 @@ void LocGridOrbitals::initWF(const std::shared_ptr lrs) if (ct.globalColoring()) { // smooth out random functions - pb::Laph4M myoper(grid_); - pb::GridFunc gf_work( + pb::Laph4M myoper(grid_); + pb::GridFunc gf_work( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - pb::GridFunc gf_psi( + pb::GridFunc gf_psi( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); if (onpe0 && ct.verbose > 2) @@ -2633,17 +2628,4 @@ void LocGridOrbitals::initWF(const std::shared_ptr lrs) #endif } -template void LocGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); -template void LocGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); - -template void LocGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); -template void LocGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); - -template void LocGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); -template void LocGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); +template class LocGridOrbitals; diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index 951e418f..ee49497c 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -13,6 +13,7 @@ #include "BlockVector.h" #include "ClusterOrbitals.h" #include "DataDistribution.h" +#include "DotProductManager.h" #include "FunctionsPacking.h" #include "GridFunc.h" #include "HDFrestart.h" @@ -23,7 +24,6 @@ #include "SaveData.h" #include "SinCosOps.h" #include "SquareLocalMatrices.h" -#include "global.h" #include "hdf5.h" #include @@ -31,17 +31,13 @@ #include #include -class Potentials; -template -class ProjectedMatrices; class ProjectedMatricesInterface; class LocalizationRegions; class MasksSet; -class LocGridOrbitals; class Masks4Orbitals; +class ReplicatedMatrix; -typedef double (LocGridOrbitals::*PtrFunc)(const LocGridOrbitals&); - +template class LocGridOrbitals : public Orbitals { private: @@ -66,8 +62,7 @@ class LocGridOrbitals : public Orbitals static int numpt_; static int loc_numpt_; - // static double (LocGridOrbitals::*dotProduct_)(const LocGridOrbitals&); - static PtrFunc dotProduct_; + static DotProductManager>* dotProductManager_; static int data_wghosts_index_; @@ -82,7 +77,7 @@ class LocGridOrbitals : public Orbitals int chromatic_number_; // map gid -> function storage (for each subdomain) - std::vector>* gidToStorage_; + std::vector>* gidToStorage_; // pointers to objects owned outside class ProjectedMatricesInterface* proj_matrices_; @@ -91,7 +86,7 @@ class LocGridOrbitals : public Orbitals //////////////////////////////////////////////////////// // instance specific data //////////////////////////////////////////////////////// - BlockVector block_vector_; + BlockVector block_vector_; //////////////////////////////////////////////////////// // @@ -99,19 +94,19 @@ class LocGridOrbitals : public Orbitals // void copySharedData(const LocGridOrbitals& A); - const ORBDTYPE* getGidStorage(const int st, const short iloc) const; + const ScalarType* getGidStorage(const int st, const short iloc) const; int packStates(std::shared_ptr lrs); void setAssignedIndexes(); - void projectOut(ORBDTYPE* const, const int, const double scale = 1.); + void projectOut(ScalarType* const, const int, const double scale = 1.); void multiply_by_matrix(const int first_color, const int ncolors, const DISTMATDTYPE* const matrix, LocGridOrbitals& product) const; void multiply_by_matrix(const int, const int, const DISTMATDTYPE* const, - ORBDTYPE*, const int) const; + ScalarType*, const int) const; void multiply_by_matrix(const dist_matrix::DistMatrix& matrix, - ORBDTYPE* const product, const int ldp); + ScalarType* const product, const int ldp); void scal(const int i, const double alpha) { block_vector_.scal(i, alpha); } - virtual void assign(const int i, const ORBDTYPE* const v, const int n = 1) + virtual void assign(const int i, const ScalarType* const v, const int n = 1) { block_vector_.assign(i, v, n); } @@ -120,18 +115,13 @@ class LocGridOrbitals : public Orbitals LocGridOrbitals& operator=(const LocGridOrbitals& orbitals); LocGridOrbitals(); - void computeMatB(const LocGridOrbitals&, const pb::Lap&); + void computeMatB(const LocGridOrbitals&, const pb::Lap&); void matrixToLocalMatrix(const short, const DISTMATDTYPE* const, DISTMATDTYPE* const, const int, const int) const; void matrixToLocalMatrix( const short, const DISTMATDTYPE* const, DISTMATDTYPE* const) const; - double dotProductDiagonal(const LocGridOrbitals& orbitals); - double dotProductWithDM(const LocGridOrbitals& orbitals); - double dotProductWithInvS(const LocGridOrbitals& orbitals); - double dotProductSimple(const LocGridOrbitals& orbitals); - - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); @@ -141,15 +131,13 @@ class LocGridOrbitals : public Orbitals void initFourier(); void initRand(); - dist_matrix::DistMatrix product(const ORBDTYPE* const, - const int, const int, const bool transpose = false); - ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } + ScalarType* psi(const int i) const { return block_vector_.vect(i); } - void app_mask(const int, ORBDTYPE*, const short level) const; + void app_mask(const int, ScalarType*, const short level) const; void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; void setup(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs); @@ -255,7 +243,7 @@ class LocGridOrbitals : public Orbitals block_vector_.setDataWithGhosts(data_wghosts); } - pb::GridFunc& getFuncWithGhosts(const int i) + pb::GridFunc& getFuncWithGhosts(const int i) { //(*MPIdata::sout)<<" data_wghosts_index_="<& v, LocGridOrbitals& hphi) const + { + block_vector_.applyDiagonalOp(v, hphi.block_vector_); + } void scal(const double alpha) { @@ -394,7 +392,8 @@ class LocGridOrbitals : public Orbitals void initGauss(const double, const std::shared_ptr); virtual void axpy(const double alpha, const LocGridOrbitals&); - void app_mask(const int, pb::GridFunc&, const short level) const; + void app_mask( + const int, pb::GridFunc&, const short level) const; void applyMask(const bool first_time = false); void applyCorrMask(const bool first_time = false); diff --git a/src/LocalMatrices2ReplicatedMatrix.cc b/src/LocalMatrices2ReplicatedMatrix.cc new file mode 100644 index 00000000..1b0c94d3 --- /dev/null +++ b/src/LocalMatrices2ReplicatedMatrix.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "LocalMatrices2ReplicatedMatrix.h" +#include "MGmol_MPI.h" + +LocalMatrices2ReplicatedMatrix* LocalMatrices2ReplicatedMatrix::pinstance_ + = nullptr; +std::vector> LocalMatrices2ReplicatedMatrix::global_indexes_; +double LocalMatrices2ReplicatedMatrix::tol_mat_elements = 1.e-14; + +Timer LocalMatrices2ReplicatedMatrix::convert_tm_( + "LocalMatrices2ReplicatedMatrix::convert"); + +void LocalMatrices2ReplicatedMatrix::convert( + const LocalMatrices& src, ReplicatedMatrix& dst, + const int numst, const double tol) const +{ + (void)tol; + + assert(!global_indexes_.empty()); + + convert_tm_.start(); + + const int subdiv = static_cast(global_indexes_.size()); + + std::vector val(subdiv); + + const short chromatic_number + = static_cast(global_indexes_[0].size()); + + std::vector data(numst * numst); + + // double loop over colors + for (short icolor = 0; icolor < chromatic_number; icolor++) + { + for (short jcolor = 0; jcolor < chromatic_number; jcolor++) + { + // loop over subdomains + for (short iloc = 0; iloc < subdiv; iloc++) + { + const int st1 = global_indexes_[iloc][icolor]; + //(*MPIdata::sout)<<"icolor="< + +// Add matrix elements corresponding to subdomains at their right place +// into a ReplicatedMatrix +// Important Note: Neglect contributions smaller than tol! +// (may lead to results dependent on number of CPUs) + +class LocalMatrices2ReplicatedMatrix +{ +private: + static LocalMatrices2ReplicatedMatrix* pinstance_; + + static Timer convert_tm_; + + static std::vector> global_indexes_; + + static double tol_mat_elements; + +public: + static LocalMatrices2ReplicatedMatrix* instance() + { + if (pinstance_ == nullptr) + { + pinstance_ = new LocalMatrices2ReplicatedMatrix(); + } + return pinstance_; + } + + LocalMatrices2ReplicatedMatrix() {} + + static void setup(const std::vector>& gids) + { + global_indexes_ = gids; + } + + void convert(const LocalMatrices& src, + ReplicatedMatrix& dst, const int numst, + const double tol = tol_mat_elements) const; + + void accumulate(const LocalMatrices& src, + ReplicatedMatrix& dst, const double tol = tol_mat_elements) const; + + static void printTimers(std::ostream& os) { convert_tm_.print(os); } +}; + +#endif diff --git a/src/LocalizationRegions.cc b/src/LocalizationRegions.cc index f0f96412..4f83fe01 100644 --- a/src/LocalizationRegions.cc +++ b/src/LocalizationRegions.cc @@ -1824,8 +1824,8 @@ double LocalizationRegions::computeMinDistBetweenLocalPairs( //} template float LocalizationRegions::move( - const SpreadsAndCenters& sc, const bool flag); + const SpreadsAndCenters>& sc, const bool flag); template float LocalizationRegions::updateRadiiConstVol( - const SpreadsAndCenters& sc); + const SpreadsAndCenters>& sc); template float LocalizationRegions::updateRadii( - const SpreadsAndCenters& sc, const float ratio); + const SpreadsAndCenters>& sc, const float ratio); diff --git a/src/MGOrbitalsPreconditioning.cc b/src/MGOrbitalsPreconditioning.cc new file mode 100644 index 00000000..957495d9 --- /dev/null +++ b/src/MGOrbitalsPreconditioning.cc @@ -0,0 +1,180 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "MGOrbitalsPreconditioning.h" + +#include "Control.h" +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "LocalizationRegions.h" +#include "MasksSet.h" +#include "Mesh.h" +#include "Potentials.h" +#include "Preconditioning.h" +#include "ProjectedMatricesInterface.h" + +template +MGOrbitalsPreconditioning::MGOrbitalsPreconditioning( + const short mg_levels, const short lap_type) + : mg_levels_(mg_levels), lap_type_(lap_type), is_set_(false){}; + +template +MGOrbitalsPreconditioning::~MGOrbitalsPreconditioning() +{ + assert(is_set_); + assert(precond_); +} + +template +void MGOrbitalsPreconditioning::setup( + OrbitalsType& orbitals, MasksSet* currentMasks, + const std::shared_ptr& lrs) +{ + assert(!is_set_); + + Control& ct(*(Control::instance())); + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid(mymesh->grid()); + + precond_ = std::make_shared>( + lap_type_, mg_levels_, mygrid, ct.bcWF); + + if (currentMasks != nullptr) + { + // set masks in GridFuncVector class + map2masks_ + = std::make_shared(currentMasks, lrs->getOverlapGids()); + pb::GridFuncVector::setMasks( + map2masks_.get()); + } + + precond_->setup(orbitals.getOverlappingGids()); + + assert(orbitals.chromatic_number() + == static_cast(orbitals.getOverlappingGids()[0].size())); + + gfv_work1_ + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); + + gfv_work2_ + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); + + if (sizeof(ORBDTYPE) != sizeof(PDataType)) + gfv_work3_ + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); + + is_set_ = true; + + assert(gfv_work2_); +} + +template +void MGOrbitalsPreconditioning::precond( + OrbitalsType& orbitals) +{ + assert(is_set_); + assert(precond_); + assert(gamma_ > 0.); + assert(gfv_work1_); + +#ifdef PRINT_OPERATIONS + if (onpe0) (*MPIdata::sout) << "T::precond_mg()..." << endl; +#endif + precond_tm_.start(); + + // initialize gfv_work2_ with data from orbitals + if (sizeof(ORBDTYPE) == sizeof(PDataType)) + { + orbitals.setDataWithGhosts(gfv_work2_.get()); + } + else + { + // Convert to data with ghosts first, then convert to different + // precision. This is more efficient in practice than doing precision + // conversion in setDataWithGhosts + orbitals.setDataWithGhosts(gfv_work3_.get()); + + gfv_work2_->copyFrom(*gfv_work3_); + } + + gfv_work1_->resetData(); + gfv_work1_->axpy((PDataType)gamma_, *gfv_work2_); + + // block-implemented preconditioner + precond_->mg(*gfv_work1_, *gfv_work2_, lap_type_, 0); + + if (sizeof(ORBDTYPE) == sizeof(PDataType)) + { + orbitals.setPsi(*gfv_work1_); + } + else + { + // Convert to orbitals precision first + gfv_work3_->copyFrom(*gfv_work1_); + + // set orbitals to GridFuncVector second + orbitals.setPsi(*gfv_work3_); + } + +#ifdef PRINT_OPERATIONS + if (onpe0) + (*MPIdata::sout) + << "MGOrbitalsPreconditioning::" + "precond_mg() done" + << endl; +#endif + precond_tm_.stop(); +} + +template +void MGOrbitalsPreconditioning::setGamma( + const pb::Lap& lapOper, const Potentials& pot, + const short mg_levels, ProjectedMatricesInterface* proj_matrices) +{ + assert(precond_); + assert(is_set_); + + const double small_eig = proj_matrices->getLowestEigenvalue(); + double diag = lapOper.invDiagEl(); + double vmax = pot.max(); + + // diag * 4^{N_level+1} + // gamma = inverse of the largest eigenvalue for the low frequency error + gamma_ = diag; + for (short ln = 0; ln <= mg_levels; ln++) + { + gamma_ *= 4.; + } + gamma_ = 1.0 / (2.0 / gamma_ + fabs(vmax - small_eig)); +#ifdef DEBUG + Control& ct(*(Control::instance())); + if (onpe0 && ct.verbose > 2) + (*MPIdata::sout) << " time step for low frequencies corrections = " + << gamma_ << endl; +#endif +} + +template +void MGOrbitalsPreconditioning::printTimers( + std::ostream& os) +{ + precond_tm_.print(os); +} + +template class MGOrbitalsPreconditioning, float>; +template class MGOrbitalsPreconditioning, double>; +template class MGOrbitalsPreconditioning, float>; +template class MGOrbitalsPreconditioning, + double>; diff --git a/src/MGOrbitalsPreconditioning.h b/src/MGOrbitalsPreconditioning.h new file mode 100644 index 00000000..87ffa3a9 --- /dev/null +++ b/src/MGOrbitalsPreconditioning.h @@ -0,0 +1,79 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_MGOrbitalsPreconditioning_H +#define MGMOL_MGOrbitalsPreconditioning_H + +#include "GridFuncVector.h" +#include "Lap.h" +#include "Map2Masks.h" +#include "OrbitalsPreconditioning.h" +#include "Preconditioning.h" + +#include + +// class Masks4Orbitals; +// class MasksSet; +class ProjectedMatricesInterface; +class Potentials; +// class LocalizationRegions; + +template +class MGOrbitalsPreconditioning : public OrbitalsPreconditioning +{ +private: +#ifdef HAVE_MAGMA + using memory_space_type = MemorySpace::Device; +#else + using memory_space_type = MemorySpace::Host; +#endif + + std::shared_ptr> precond_; + + // work arrays with preconditioner precision + std::shared_ptr> + gfv_work1_; + std::shared_ptr> + gfv_work2_; + + // tmp work array for case ORBDTYPE!=PDataType + std::shared_ptr> gfv_work3_; + + short mg_levels_; + + short lap_type_; + + bool is_set_; + + // coefficient for preconditioning + double gamma_; + + // timers + static Timer precond_tm_; + + std::shared_ptr map2masks_; + +public: + MGOrbitalsPreconditioning(const short mg_levels, const short lap_type); + + ~MGOrbitalsPreconditioning(); + + void setup(OrbitalsType& orbitals, MasksSet*, + const std::shared_ptr&) override; + void precond(OrbitalsType& orbitals) override; + void setGamma(const pb::Lap& lapOper, const Potentials& pot, + const short mg_levels, ProjectedMatricesInterface* proj_matrices); + static void printTimers(std::ostream& os); +}; + +template +Timer MGOrbitalsPreconditioning::precond_tm_( + "MGOrbitalsPreconditioning::precond"); + +#endif diff --git a/src/MGmol.cc b/src/MGmol.cc index 5c590819..0dcd2092 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -39,8 +39,10 @@ #include "KBPsiMatrixSparse.h" #include "LBFGS.h" #include "LocGridOrbitals.h" +#include "LocalMatrices2ReplicatedMatrix.h" #include "LocalizationRegions.h" #include "MDfiles.h" +#include "MGOrbitalsPreconditioning.h" #include "MGkernels.h" #include "MGmol.h" #include "MLWFTransform.h" @@ -48,7 +50,6 @@ #include "MVPSolver.h" #include "MasksSet.h" #include "Mesh.h" -#include "OrbitalsPreconditioning.h" #include "PackedCommunicationBuffer.h" #include "PoissonInterface.h" #include "Potentials.h" @@ -58,6 +59,7 @@ #include "ProjectedMatricesMehrstellen.h" #include "ProjectedMatricesSparse.h" #include "ReplicatedMatrix.h" +#include "ReplicatedMatrix2SquareLocalMatrices.h" #include "ReplicatedVector.h" #include "Rho.h" #include "SP2.h" @@ -93,8 +95,11 @@ extern Timer dsyrk_tm; extern Timer ssyrk_tm; extern Timer mpsyrk_tm; extern Timer tttsyrk_tm; -extern Timer mpdot_tm; extern Timer ttdot_tm; +extern Timer loopdot_tm; +extern Timer loopaxpy_tm; +extern Timer loopscal_tm; +extern Timer loopcp_tm; extern Timer get_NOLMO_tm; extern Timer get_MLWF_tm; extern Timer md_iterations_tm; @@ -142,7 +147,7 @@ MGmol::~MGmol() } template <> -void MGmol::initialMasks() +void MGmol>::initialMasks() { assert(lrs_); @@ -160,7 +165,7 @@ void MGmol::initialMasks() } template <> -void MGmol::initialMasks() +void MGmol>::initialMasks() { } @@ -236,22 +241,15 @@ int MGmol::initial() // initialize data distribution objects bool with_spin = (mmpi.nspin() > 1); - // we support using ReplicatedMatrix on GPU only for - // a limited set of options -#ifdef HAVE_MAGMA - bool use_replicated_matrix - = !std::is_same::value; -#endif + bool use_replicated_matrix = ct.rmatrices; if (ct.Mehrstellen()) { -#ifdef HAVE_MAGMA if (use_replicated_matrix) proj_matrices_.reset( new ProjectedMatricesMehrstellen( ct.numst, with_spin, ct.occ_width)); else -#endif proj_matrices_.reset(new ProjectedMatricesMehrstellen< dist_matrix::DistMatrix>( ct.numst, with_spin, ct.occ_width)); @@ -259,13 +257,10 @@ int MGmol::initial() else if (ct.short_sighted) proj_matrices_.reset(new ProjectedMatricesSparse( ct.numst, ct.occ_width, lrs_, local_cluster_.get())); - else -#ifdef HAVE_MAGMA - if (use_replicated_matrix) + else if (use_replicated_matrix) proj_matrices_.reset(new ProjectedMatrices( ct.numst, with_spin, ct.occ_width)); else -#endif proj_matrices_.reset( new ProjectedMatrices>( ct.numst, with_spin, ct.occ_width)); @@ -289,7 +284,7 @@ int MGmol::initial() ct.numst, ct.bcWF, proj_matrices_.get(), lrs_, currentMasks_.get(), corrMasks_.get(), local_cluster_.get(), true); - increaseMemorySlotsForOrbitals(); + increaseMemorySlotsForOrbitals(); Potentials& pot = hamiltonian_->potential(); pb::Lap* lapOper = hamiltonian_->lapOper(); @@ -463,18 +458,17 @@ int MGmol::initial() updateHmatrix(*current_orbitals_, *ions_); // HMVP algorithm requires that H is initialized -#ifdef HAVE_MAGMA if (use_replicated_matrix) dm_strategy_.reset( DMStrategyFactory::create(comm_, os_, *ions_, rho_.get(), energy_.get(), electrostat_.get(), - this, proj_matrices_.get(), current_orbitals_)); + hamiltonian_.get(), this, proj_matrices_.get(), + current_orbitals_)); else -#endif dm_strategy_.reset(DMStrategyFactory>::create(comm_, os_, *ions_, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), current_orbitals_)); + rho_.get(), energy_.get(), electrostat_.get(), hamiltonian_.get(), + this, proj_matrices_.get(), current_orbitals_)); // theta = invB * Hij proj_matrices_->updateThetaAndHB(); @@ -529,8 +523,7 @@ void MGmol::run() constraints_->projectOutForces(20); - if ((ions_->getNumIons() <= 1024 || ct.verbose > 2) - && ct.verbose > 0) + if ((ions_->getNumIons() <= 1024 || ct.verbose > 1)) ions_->printForcesGlobal(os_); finalEnergy(); @@ -641,10 +634,12 @@ void MGmol::write_header() << (omp_get_max_threads() > 1 ? "s " : " "); os_ << "active" << std::endl << std::endl; #endif - - os_ << " ScaLapack block size: " - << dist_matrix::DistMatrix::getBlockSize() - << std::endl; + if (!ct.rmatrices) + { + os_ << " ScaLapack block size: " + << dist_matrix::DistMatrix::getBlockSize() + << std::endl; + } if (!ct.short_sighted) { @@ -752,7 +747,6 @@ void MGmol::printEigAndOcc() && onpe0) { bool printflag = false; -#ifdef HAVE_MAGMA // try with ReplicatedMatrix first { std::shared_ptr> projmatrices @@ -765,7 +759,6 @@ void MGmol::printEigAndOcc() printflag = true; } } -#endif if (!printflag) { std::shared_ptr< @@ -870,9 +863,13 @@ void MGmol::printTimers() dsyrk_tm.print(os_); mpsyrk_tm.print(os_); tttsyrk_tm.print(os_); - mpdot_tm.print(os_); ttdot_tm.print(os_); + loopcp_tm.print(os_); + loopaxpy_tm.print(os_); + loopscal_tm.print(os_); + loopdot_tm.print(os_); + dist_matrix::SubMatrices::printTimers(os_); DistMatrix2SquareLocalMatrices::printTimers(os_); @@ -881,6 +878,9 @@ void MGmol::printTimers() dist_matrix::DistMatrix::printTimers(os_); + ReplicatedMatrix2SquareLocalMatrices::printTimers(os_); + LocalMatrices2ReplicatedMatrix::printTimers(os_); + MGmol_MPI::printTimers(os_); g_kbpsi_->printTimers(os_); @@ -902,9 +902,13 @@ void MGmol::printTimers() AndersonMix::update_tm().print(os_); proj_matrices_->printTimers(os_); ShortSightedInverse::printTimers(os_); - if (std::is_same::value) - MVPSolver>::value) + { + MVPSolver, dist_matrix::DistMatrix>::printTimers(os_); + MVPSolver, + ReplicatedMatrix>::printTimers(os_); + } VariableSizeMatrixInterface::printTimers(os_); DataDistribution::printTimers(os_); PackedCommunicationBuffer::printTimers(os_); @@ -941,19 +945,30 @@ void MGmol::printTimers() setup_tm_.print(os_); HDFrestart::printTimers(os_); #ifdef HAVE_MAGMA - PowerGen::printTimers(os_); BlockVector::printTimers(os_); - DavidsonSolver::printTimers(os_); - ChebyshevApproximation::printTimers(os_); #endif + PowerGen::printTimers(os_); + DavidsonSolver, + ReplicatedMatrix>::printTimers(os_); + ChebyshevApproximation::printTimers(os_); PowerGen, dist_matrix::DistVector>::printTimers(os_); BlockVector::printTimers(os_); - DavidsonSolver>::printTimers(os_); - ChebyshevApproximation>::printTimers( - os_); - OrbitalsPreconditioning::printTimers(os_); + if (ct.rmatrices) + { + DavidsonSolver, + ReplicatedMatrix>::printTimers(os_); + ChebyshevApproximation::printTimers(os_); + } + else + { + DavidsonSolver, + dist_matrix::DistMatrix>::printTimers(os_); + ChebyshevApproximation< + dist_matrix::DistMatrix>::printTimers(os_); + } + MGOrbitalsPreconditioning::printTimers(os_); + MGOrbitalsPreconditioning::printTimers(os_); MDfiles::printTimers(os_); ChebyshevApproximationInterface::printTimers(os_); } @@ -1018,14 +1033,26 @@ double MGmol::get_evnl(const Ions& ions) } else { - std::shared_ptr< - ProjectedMatrices>> - projmatrices = std::dynamic_pointer_cast< - ProjectedMatrices>>( - proj_matrices_); - assert(projmatrices); + if (ct.rmatrices) + { + std::shared_ptr> projmatrices + = std::dynamic_pointer_cast< + ProjectedMatrices>(proj_matrices_); + assert(projmatrices); - val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + } + else + { + std::shared_ptr< + ProjectedMatrices>> + projmatrices = std::dynamic_pointer_cast< + ProjectedMatrices>>( + proj_matrices_); + assert(projmatrices); + + val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + } } evnl_tm_.stop(); @@ -1149,7 +1176,8 @@ void MGmol::cleanup() } template <> -void MGmol::projectOutKernel(LocGridOrbitals& phi) +void MGmol>::projectOutKernel( + LocGridOrbitals& phi) { assert(aomm_ != nullptr); aomm_->projectOut(phi); @@ -1164,33 +1192,53 @@ void MGmol::projectOutKernel(OrbitalsType& phi) } template -void MGmol::setGamma( - const pb::Lap& lapOper, const Potentials& pot) +void MGmol::precond_mg(OrbitalsType& phi) { assert(orbitals_precond_); Control& ct = *(Control::instance()); - orbitals_precond_->setGamma( - lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); -} + Potentials& pot = hamiltonian_->potential(); + pb::Lap* lapOper = hamiltonian_->lapOper(); -template -void MGmol::precond_mg(OrbitalsType& phi) -{ - assert(orbitals_precond_); + const short precision = ct.precond_precision_; + if (precision == 32) + { + using OrbitalsPrecond = MGOrbitalsPreconditioning; + + std::shared_ptr orbitals_precond + = std::dynamic_pointer_cast(orbitals_precond_); + + orbitals_precond->setGamma( + *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); + } + else if (precision == 64) + { + using OrbitalsPrecond = MGOrbitalsPreconditioning; - orbitals_precond_->precond_mg(phi); + std::shared_ptr orbitals_precond + = std::dynamic_pointer_cast(orbitals_precond_); + + orbitals_precond->setGamma( + *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); + } + else + { + std::cerr << "Precision " << precision + << " not supported for orbitals preconditioner!!!" + << std::endl; + } + + orbitals_precond_->precond(phi); } template -double MGmol::computeResidual(OrbitalsType& orbitals, - OrbitalsType& work_orbitals, Ions& ions, OrbitalsType& res, - const bool print_residual, const bool norm_res) +double MGmol::computeResidual(OrbitalsType& phi, + OrbitalsType& hphi, Ions& ions, OrbitalsType& res, + const KBPsiMatrixSparse* const kbpsi, const bool print_residual, + const bool norm_res) { - assert(orbitals.getIterativeIndex() >= 0); - comp_res_tm_.start(); // os_<<"computeResidual()"<::computeResidual(OrbitalsType& orbitals, proj_matrices_->computeInvB(); - Potentials& pot = hamiltonian_->potential(); - pb::Lap* lapop = hamiltonian_->lapOper(); - - setGamma(*lapop, pot); - - // get H*psi stored in work_orbitals.psi + // get H*phi stored in hphi // and psi^T H psi in Hij - getHpsiAndTheta(ions, orbitals, work_orbitals); + getHpsiAndTheta(ions, phi, hphi, kbpsi); - double norm2Res = computeConstraintResidual( - orbitals, work_orbitals, res, print_residual, norm_res); + double norm2Res + = computeConstraintResidual(phi, hphi, res, print_residual, norm_res); - if (ct.isSpreadFunctionalEnergy()) addResidualSpreadPenalty(orbitals, res); + if (ct.isSpreadFunctionalEnergy()) addResidualSpreadPenalty(phi, res); comp_res_tm_.stop(); @@ -1280,7 +1323,7 @@ void MGmol::computeResidualUsingHPhi(OrbitalsType& psi, } // res = (B*phi*theta - H*phi) in [Ry] - res.axpy(-1., hphi); + res.axpy((ORBDTYPE)(-1.), hphi); } get_res_tm_.stop(); @@ -1337,30 +1380,17 @@ double MGmol::computePrecondResidual(OrbitalsType& phi, { Control& ct = *(Control::instance()); - proj_matrices_->computeInvB(); - - Potentials& pot = hamiltonian_->potential(); - pb::Lap* lapop = hamiltonian_->lapOper(); - - setGamma(*lapop, pot); - - // get H*psi stored in hphi - // and psi^T H psi in Hij - getHpsiAndTheta(ions, phi, hphi, kbpsi); - - double norm2Res - = computeConstraintResidual(phi, hphi, res, print_residual, norm_res); + double norm2Res = computeResidual( + phi, hphi, ions, res, kbpsi, print_residual, norm_res); if (ct.withPreconditioner()) { // PRECONDITIONING // compute the preconditioned steepest descent direction // -> res - orbitals_precond_->precond_mg(res); + precond_mg(res); } - // if( ct.isSpreadFunctionalActive() )addResidualSpreadPenalty(phi,res); - return norm2Res; } @@ -1443,8 +1473,8 @@ void MGmol::updateDMandEnergy(OrbitalsType& orbitals, Ions& ions, std::shared_ptr> dm_strategy( DMStrategyFactory>::create(comm_, os_, ions, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), &orbitals)); + rho_.get(), energy_.get(), electrostat_.get(), + hamiltonian_.get(), this, proj_matrices_.get(), &orbitals)); dm_strategy->update(orbitals); @@ -1495,6 +1525,8 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, const std::vector& tau, const std::vector& atnumbers, std::vector& forces) { + Control& ct = *(Control::instance()); + OrbitalsType* dorbitals = dynamic_cast(orbitals); // create a new temporary Ions object to be used for @@ -1517,13 +1549,25 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, proj_matrices_->updateThetaAndHB(); // compute DM - std::shared_ptr> dm_strategy( - DMStrategyFactory>::create(comm_, os_, ions, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), dorbitals)); + if (ct.rmatrices) + { + std::shared_ptr> dm_strategy( + DMStrategyFactory::create(comm_, + os_, ions, rho_.get(), energy_.get(), electrostat_.get(), + hamiltonian_.get(), this, proj_matrices_.get(), dorbitals)); + + dm_strategy->update(*dorbitals); + } + else + { + std::shared_ptr> dm_strategy( + DMStrategyFactory>::create(comm_, os_, ions, + rho_.get(), energy_.get(), electrostat_.get(), + hamiltonian_.get(), this, proj_matrices_.get(), dorbitals)); - dm_strategy->update(*dorbitals); + dm_strategy->update(*dorbitals); + } // evaluate energy and forces double ts = 0.; @@ -1537,11 +1581,13 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, return eks; } -template class MGmol; -template class MGmol; -template int MGmol::initial(); -template int MGmol::initial(); +template class MGmol>; +template class MGmol>; +template int MGmol>::initial(); +template int +MGmol>::initial(); #ifdef HAVE_MAGMA -template int MGmol::initial(); -template int MGmol::initial(); +template int MGmol>::initial(); +template int +MGmol>::initial(); #endif diff --git a/src/MGmol.h b/src/MGmol.h index a561e900..6738c58f 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -49,9 +49,9 @@ class IonicAlgorithm; #include "Forces.h" #include "Ions.h" #include "LocGridOrbitals.h" +#include "MGOrbitalsPreconditioning.h" #include "MGmolInterface.h" #include "OrbitalsExtrapolation.h" -#include "OrbitalsPreconditioning.h" #include "Rho.h" #include "SpreadPenaltyInterface.h" #include "SpreadsAndCenters.h" @@ -125,12 +125,6 @@ class MGmol : public MGmolInterface void getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, ProjectedMatricesInterface* projmatrices); - void getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, - Ions& ions, KBPsiMatrixSparse* kbpsi, - ProjectedMatricesInterface* projmatrices, - dist_matrix::DistMatrix& hij); - void getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions, - KBPsiMatrixSparse* kbpsi, dist_matrix::DistMatrix& hij); void computeHnlPhiAndAdd2HPhi(Ions& ions, OrbitalsType& phi, OrbitalsType& hphi, const KBPsiMatrixSparse* const kbpsi); int dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, @@ -264,12 +258,6 @@ class MGmol : public MGmolInterface const Ions& ions, const KBPsiMatrixSparse* const kbpsi, ProjectedMatricesInterface*); - template - void addHlocal2matrix( - OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, MatrixType& mat); - void addHlocal2matrix(OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, - VariableSizeMatrix& mat); - void update_pot(const pb::GridFunc& vh_init, const Ions& ions); void update_pot(const Ions& ions); int quench(OrbitalsType& orbitals, Ions& ions, const int max_steps, @@ -336,10 +324,17 @@ class MGmol : public MGmolInterface void projectOutKernel(OrbitalsType& phi); void precond_mg(OrbitalsType& orbitals); - void setGamma(const pb::Lap& lapOper, const Potentials& pot); + double computeResidual(OrbitalsType& orbitals, OrbitalsType& work_orbitals, + Ions& ions, OrbitalsType& res, const KBPsiMatrixSparse* const kbpsi, + const bool print_residual, const bool norm_res); double computeResidual(OrbitalsType& orbitals, OrbitalsType& work_orbitals, Ions& ions, OrbitalsType& res, const bool print_residual, - const bool norm_res); + const bool norm_res) + { + return computeResidual(orbitals, work_orbitals, ions, res, + g_kbpsi_.get(), print_residual, norm_res); + } + void applyAOMMprojection(OrbitalsType&); void force(OrbitalsType& orbitals, Ions& ions) { diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 699a01f3..97bcc825 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -44,8 +44,8 @@ double evalEntropyMVP(ProjectedMatricesInterface* projmatrices, template MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const double kbT, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, const short n_inner_steps, const double mixing, const double tol_de0, const bool use_old_dm) @@ -69,6 +69,7 @@ MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, rho_ = rho; energy_ = energy; electrostat_ = electrostat; + hamiltonian_ = hamiltonian; mgmol_strategy_ = mgmol_strategy; work_ = new MatrixType("workMVP", numst_, numst_); @@ -210,6 +211,10 @@ int MVPSolver::solve(OrbitalsType& orbitals) kbpsi.computeHvnlMatrix(&kbpsi, ions_, h11_nl); + OrbitalsType hphi("MVP_hphi", orbitals); + + MatrixType h11(h11_nl); + for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++) { if (onpe0 && ct.verbose > 1) @@ -237,8 +242,15 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components - MatrixType h11(h11_nl); - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + if (inner_it == 0) + { + hamiltonian_->applyLocal(numst_, orbitals, hphi); + } + else + { + hamiltonian_->applyDeltaPot(orbitals, hphi); + } + orbitals.addDotWithNcol2Matrix(hphi, h11); current_proj_mat->assignH(h11); current_proj_mat->setHB2H(); @@ -316,11 +328,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) energy_->saveVofRho(); // update h11 - { - h11 = h11_nl; - mgmol_strategy_->addHlocal2matrix( - orbitals, orbitals, h11); - } + hamiltonian_->applyDeltaPot(orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); proj_mat_work_->assignH(h11); proj_mat_work_->setHB2H(); @@ -402,10 +411,10 @@ void MVPSolver::printTimers(std::ostream& os) target_tm_.print(os); } -template class MVPSolver, dist_matrix::DistMatrix>; -template class MVPSolver, ReplicatedMatrix>; + +template class MVPSolver, dist_matrix::DistMatrix>; -#ifdef HAVE_MAGMA -template class MVPSolver; -#endif +template class MVPSolver, ReplicatedMatrix>; diff --git a/src/MVPSolver.h b/src/MVPSolver.h index 2e0c36b6..ac5022cc 100644 --- a/src/MVPSolver.h +++ b/src/MVPSolver.h @@ -10,6 +10,7 @@ #define MGMOL_MVPSOLVER_H #include "Energy.h" +#include "Hamiltonian.h" #include "MGmol.h" #include "Rho.h" #include "Timer.h" @@ -42,6 +43,7 @@ class MVPSolver Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; MGmol* mgmol_strategy_; @@ -56,10 +58,9 @@ class MVPSolver void buildTarget_MVP(MatrixType& h11, MatrixType& s11, MatrixType& target); public: - MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, - Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const double kbT, + MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, Rho*, + Energy*, Electrostatic*, Hamiltonian*, + MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, const short n_inner_steps, const double mixing, const double tol_de0, const bool use_old_dm); diff --git a/src/MVP_DMStrategy.cc b/src/MVP_DMStrategy.cc index 3fb53dee..09657755 100644 --- a/src/MVP_DMStrategy.cc +++ b/src/MVP_DMStrategy.cc @@ -24,7 +24,7 @@ template MVP_DMStrategy::MVP_DMStrategy(MPI_Comm comm, ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, const std::vector>& overlappingGids, ProjectedMatricesInterface* proj_matrices, const bool use_old_dm) : proj_matrices_(proj_matrices), @@ -34,6 +34,7 @@ MVP_DMStrategy::MVP_DMStrategy(MPI_Comm comm, rho_(rho), energy_(energy), electrostat_(electrostat), + hamiltonian_(hamiltonian), global_indexes_(overlappingGids), mgmol_strategy_(mgmol_strategy), use_old_dm_(use_old_dm) @@ -53,8 +54,8 @@ int MVP_DMStrategy::update(OrbitalsType& orbitals) } MVPSolver solver(comm_, os_, ions_, rho_, energy_, - electrostat_, mgmol_strategy_, ct.numst, ct.occ_width, global_indexes_, - ct.dm_inner_steps, ct.dm_mix, ct.dm_tol, use_old_dm_); + electrostat_, hamiltonian_, mgmol_strategy_, ct.numst, ct.occ_width, + global_indexes_, ct.dm_inner_steps, ct.dm_mix, ct.dm_tol, use_old_dm_); return solver.solve(orbitals); } @@ -71,9 +72,10 @@ void MVP_DMStrategy::dressDM() if (use_old_dm_) proj_matrices_->dressupDM(); } -template class MVP_DMStrategy>; -template class MVP_DMStrategy, dist_matrix::DistMatrix>; -#ifdef HAVE_MAGMA -template class MVP_DMStrategy; -#endif +template class MVP_DMStrategy, ReplicatedMatrix>; + +template class MVP_DMStrategy, + dist_matrix::DistMatrix>; +template class MVP_DMStrategy, ReplicatedMatrix>; diff --git a/src/MVP_DMStrategy.h b/src/MVP_DMStrategy.h index 2c16fbcc..04dbcad8 100644 --- a/src/MVP_DMStrategy.h +++ b/src/MVP_DMStrategy.h @@ -11,18 +11,18 @@ #define MGMOL_MVP_DMStrategy_H #include "DMStrategy.h" +#include "Electrostatic.h" #include "Energy.h" +#include "Hamiltonian.h" +#include "Ions.h" #include "MGmol.h" +#include "ProjectedMatricesInterface.h" #include "Rho.h" #include #include #include -class ProjectedMatricesInterface; -class Ions; -class Electrostatic; - template class MVP_DMStrategy : public DMStrategy { @@ -36,6 +36,7 @@ class MVP_DMStrategy : public DMStrategy Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; const std::vector>& global_indexes_; MGmol* mgmol_strategy_; @@ -44,7 +45,8 @@ class MVP_DMStrategy : public DMStrategy public: MVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, + Electrostatic* electrostat, Hamiltonian*, + MGmol* mgmol_strategy, const std::vector>& overlappingGids, ProjectedMatricesInterface* proj_matrices, const bool use_old_dm); diff --git a/src/NonOrthoDMStrategy.cc b/src/NonOrthoDMStrategy.cc index 28a8f1d5..b377c364 100644 --- a/src/NonOrthoDMStrategy.cc +++ b/src/NonOrthoDMStrategy.cc @@ -21,7 +21,7 @@ NonOrthoDMStrategy::NonOrthoDMStrategy( } template -void NonOrthoDMStrategy::initialize(OrbitalsType& orbitals) +void NonOrthoDMStrategy::initialize(OrbitalsType& /*orbitals*/) { Control& ct = *(Control::instance()); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -35,7 +35,7 @@ void NonOrthoDMStrategy::initialize(OrbitalsType& orbitals) } template -int NonOrthoDMStrategy::update(OrbitalsType& orbitals) +int NonOrthoDMStrategy::update(OrbitalsType& /*orbitals*/) { assert(proj_matrices_ != nullptr); @@ -91,5 +91,5 @@ void NonOrthoDMStrategy::dressDM() if (mix_ < 1.) proj_matrices_->dressupDM(); } -template class NonOrthoDMStrategy; -template class NonOrthoDMStrategy; +template class NonOrthoDMStrategy>; +template class NonOrthoDMStrategy>; diff --git a/src/OrbitalsExtrapolation.cc b/src/OrbitalsExtrapolation.cc index d4b96b85..f6ddaaf0 100644 --- a/src/OrbitalsExtrapolation.cc +++ b/src/OrbitalsExtrapolation.cc @@ -79,5 +79,5 @@ void OrbitalsExtrapolation::setupPreviousOrbitals( *orbitals = new_orbitals; } -template class OrbitalsExtrapolation; -template class OrbitalsExtrapolation; +template class OrbitalsExtrapolation>; +template class OrbitalsExtrapolation>; diff --git a/src/OrbitalsExtrapolationOrder2.cc b/src/OrbitalsExtrapolationOrder2.cc index 869dcd47..4fdea147 100644 --- a/src/OrbitalsExtrapolationOrder2.cc +++ b/src/OrbitalsExtrapolationOrder2.cc @@ -53,7 +53,7 @@ void OrbitalsExtrapolationOrder2::extrapolate_orbitals( getProcrustesTransform(matQ, yyt); orbitals_minus1->multiply_by_matrix(matQ); - orbitals_minus1->axpy(-1., *new_orbitals); + orbitals_minus1->axpy((ORBDTYPE)-1., *new_orbitals); orbitals_minus1->multiply_by_matrix(yyt); } else @@ -61,7 +61,7 @@ void OrbitalsExtrapolationOrder2::extrapolate_orbitals( new_orbitals->scal(2.); } - new_orbitals->axpy(-1., *orbitals_minus1); + new_orbitals->axpy((ORBDTYPE)-1., *orbitals_minus1); delete orbitals_minus1; } @@ -89,5 +89,5 @@ void OrbitalsExtrapolationOrder2::extrapolate_orbitals( } } -template class OrbitalsExtrapolationOrder2; -template class OrbitalsExtrapolationOrder2; +template class OrbitalsExtrapolationOrder2>; +template class OrbitalsExtrapolationOrder2>; diff --git a/src/OrbitalsExtrapolationOrder3.cc b/src/OrbitalsExtrapolationOrder3.cc index f9718b21..e0586bab 100644 --- a/src/OrbitalsExtrapolationOrder3.cc +++ b/src/OrbitalsExtrapolationOrder3.cc @@ -56,7 +56,7 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( // compute delta Phi tmp_orbitals_minus1.assign(*orbitals_minus1_); - tmp_orbitals_minus1.axpy(-1., *new_orbitals); + tmp_orbitals_minus1.axpy((ORBDTYPE)-1., *new_orbitals); tmp_orbitals_minus1.multiply_by_matrix(yyt); if (orbitals_minus2_ != nullptr) @@ -67,7 +67,7 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( orbitals_minus2_->multiply_by_matrix(matQ); // compute delta Phi - orbitals_minus2_->axpy(-1., *orbitals_minus1_); + orbitals_minus2_->axpy((ORBDTYPE)-1., *orbitals_minus1_); orbitals_minus2_->multiply_by_matrix(yyt); } } @@ -75,17 +75,17 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( { tmp_orbitals_minus1.assign(*orbitals_minus1_); if (orbitals_minus2_ != nullptr) - orbitals_minus2_->axpy(-1., *orbitals_minus1_); + orbitals_minus2_->axpy((ORBDTYPE)-1., *orbitals_minus1_); if (ct.verbose > 1 && onpe0) (*MPIdata::sout) << "Compute tmp_orbitals_minus1..." << std::endl; - tmp_orbitals_minus1.axpy(-1., *new_orbitals); + tmp_orbitals_minus1.axpy((ORBDTYPE)-1., *new_orbitals); } if (orbitals_minus2_ != nullptr) { - new_orbitals->axpy(-2., tmp_orbitals_minus1); - new_orbitals->axpy(1., *orbitals_minus2_); + new_orbitals->axpy((ORBDTYPE)-2., tmp_orbitals_minus1); + new_orbitals->axpy((ORBDTYPE)1., *orbitals_minus2_); delete orbitals_minus2_; } @@ -95,7 +95,7 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( (*MPIdata::sout) << "Extrapolate orbitals using 2nd order " "scheme only for this step..." << std::endl; - new_orbitals->axpy(-1., tmp_orbitals_minus1); + new_orbitals->axpy((ORBDTYPE)-1., tmp_orbitals_minus1); } orbitals_minus2_ = orbitals_minus1_; @@ -122,5 +122,5 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( } } -template class OrbitalsExtrapolationOrder3; -template class OrbitalsExtrapolationOrder3; +template class OrbitalsExtrapolationOrder3>; +template class OrbitalsExtrapolationOrder3>; diff --git a/src/OrbitalsPreconditioning.cc b/src/OrbitalsPreconditioning.cc deleted file mode 100644 index 82754f60..00000000 --- a/src/OrbitalsPreconditioning.cc +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright (c) 2017, Lawrence Livermore National Security, LLC and -// UT-Battelle, LLC. -// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge -// National Laboratory. -// LLNL-CODE-743438 -// All rights reserved. -// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. -// Please also read this link https://github.com/llnl/mgmol/LICENSE - -#include "OrbitalsPreconditioning.h" - -#include "Control.h" -#include "ExtendedGridOrbitals.h" -#include "LocGridOrbitals.h" -#include "LocalizationRegions.h" -#include "MasksSet.h" -#include "Mesh.h" -#include "Potentials.h" -#include "Preconditioning.h" -#include "ProjectedMatricesInterface.h" - -template -OrbitalsPreconditioning::~OrbitalsPreconditioning() -{ - assert(is_set_); - assert(precond_ != nullptr); - - delete precond_; - delete map2masks_; - - if (gfv_work_ != nullptr) - { - delete gfv_work_; - gfv_work_ = nullptr; - } - if (gfv_work2_ != nullptr) - { - delete gfv_work2_; - gfv_work2_ = nullptr; - } -} - -template -void OrbitalsPreconditioning::setup(T& orbitals, const short mg_levels, - const short lap_type, MasksSet* currentMasks, - const std::shared_ptr& lrs) -{ - assert(!is_set_); - - lap_type_ = lap_type; - - Control& ct(*(Control::instance())); - Mesh* mymesh = Mesh::instance(); - const pb::Grid& mygrid(mymesh->grid()); - - precond_ = new Preconditioning( - lap_type, mg_levels, mygrid, ct.bcWF); - - if (currentMasks != nullptr) - { - // set masks in GridFuncVector class - map2masks_ = new Map2Masks(currentMasks, lrs->getOverlapGids()); - pb::GridFuncVector::setMasks( - map2masks_); - } - else - map2masks_ = nullptr; - - precond_->setup(orbitals.getOverlappingGids()); - - assert(orbitals.chromatic_number() - == static_cast(orbitals.getOverlappingGids()[0].size())); - - gfv_work_ = new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], orbitals.getOverlappingGids()); - - gfv_work2_ - = new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], orbitals.getOverlappingGids()); - - is_set_ = true; - - assert(gfv_work2_); -} - -template -void OrbitalsPreconditioning::precond_mg(T& orbitals) -{ - assert(is_set_); - assert(precond_ != nullptr); - assert(gamma_ > 0.); - assert(gfv_work_ != nullptr); - -#ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "T::precond_mg()..." << endl; -#endif - precond_tm_.start(); - - gfv_work_->resetData(); - - // store residual in GridFuncVector container - // used for ghost values (no ghost values needed) - orbitals.setDataWithGhosts(gfv_work2_); - gfv_work_->axpy(gamma_, *gfv_work2_); - - // block-implemented preconditioner - precond_->mg(*gfv_work_, *gfv_work2_, lap_type_, 0); - - orbitals.setPsi(*gfv_work_); - -#ifdef PRINT_OPERATIONS - if (onpe0) - (*MPIdata::sout) << "OrbitalsPreconditioning::precond_mg() done" - << endl; -#endif - precond_tm_.stop(); -} - -template -void OrbitalsPreconditioning::setGamma(const pb::Lap& lapOper, - const Potentials& pot, const short mg_levels, - ProjectedMatricesInterface* proj_matrices) -{ - assert(precond_ != nullptr); - assert(is_set_); - - const double small_eig = proj_matrices->getLowestEigenvalue(); - double diag = lapOper.invDiagEl(); - double vmax = pot.max(); - - // diag * 4^{N_level+1} - // gamma = inverse of the largest eigenvalue for the low frequency error - gamma_ = diag; - for (short ln = 0; ln <= mg_levels; ln++) - { - gamma_ *= 4.; - } - gamma_ = 1.0 / (2.0 / gamma_ + fabs(vmax - small_eig)); -#ifdef DEBUG - Control& ct(*(Control::instance())); - if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) << " time step for low frequencies corrections = " - << gamma_ << endl; -#endif -} - -template -void OrbitalsPreconditioning::printTimers(std::ostream& os) -{ - precond_tm_.print(os); -} - -template class OrbitalsPreconditioning; -template class OrbitalsPreconditioning; diff --git a/src/OrbitalsPreconditioning.h b/src/OrbitalsPreconditioning.h index 7a26261e..0501388c 100644 --- a/src/OrbitalsPreconditioning.h +++ b/src/OrbitalsPreconditioning.h @@ -10,67 +10,22 @@ #ifndef MGMOL_OrbitalsPreconditioning_H #define MGMOL_OrbitalsPreconditioning_H -#include "GridFuncVector.h" -#include "Lap.h" -#include "Map2Masks.h" -#include "Preconditioning.h" +#include "LocalizationRegions.h" +#include "MasksSet.h" -#include - -class Masks4Orbitals; -class MasksSet; -class ProjectedMatricesInterface; -class Potentials; -class LocalizationRegions; - -template +template class OrbitalsPreconditioning { -private: -#ifdef HAVE_MAGMA - using memory_space_type = MemorySpace::Device; -#else - using memory_space_type = MemorySpace::Host; -#endif - - Preconditioning* precond_; - pb::GridFuncVector* gfv_work_; - - pb::GridFuncVector* gfv_work2_; - - short lap_type_; - - // coefficient for preconditioning - double gamma_; - - bool is_set_; - - // timers - static Timer precond_tm_; - - Map2Masks* map2masks_; - public: - OrbitalsPreconditioning() - { - is_set_ = false; - precond_ = nullptr; - gfv_work_ = nullptr; - gfv_work2_ = nullptr; - }; + OrbitalsPreconditioning(){}; - ~OrbitalsPreconditioning(); + virtual ~OrbitalsPreconditioning(){}; - void setup(T& orbitals, const short mg_levels, const short lap_type, - MasksSet*, const std::shared_ptr&); - void precond_mg(T& orbitals); - void setGamma(const pb::Lap& lapOper, const Potentials& pot, - const short mg_levels, ProjectedMatricesInterface* proj_matrices); - static void printTimers(std::ostream& os); -}; + virtual void setup(OrbitalsType& orbitals, MasksSet*, + const std::shared_ptr&) + = 0; -template -Timer OrbitalsPreconditioning::precond_tm_( - "OrbitalsPreconditioning::precond"); + virtual void precond(OrbitalsType& orbitals) = 0; +}; #endif diff --git a/src/OrthoAndersonMix.cc b/src/OrthoAndersonMix.cc index 64fad2b3..8838cc8d 100644 --- a/src/OrthoAndersonMix.cc +++ b/src/OrthoAndersonMix.cc @@ -25,5 +25,5 @@ void OrthoAndersonMix::postprocessUpdate() } } -template class OrthoAndersonMix; -template class OrthoAndersonMix; +template class OrthoAndersonMix>; +template class OrthoAndersonMix>; diff --git a/src/PBEonGrid.cc b/src/PBEonGrid.cc index 2c0603e3..e2d98c39 100644 --- a/src/PBEonGrid.cc +++ b/src/PBEonGrid.cc @@ -133,5 +133,5 @@ double PBEonGrid::getExc() const return mygrid.vel() * pbe_->computeRhoDotExc(); } -template class PBEonGrid; -template class PBEonGrid; +template class PBEonGrid>; +template class PBEonGrid>; diff --git a/src/PBEonGridSpin.cc b/src/PBEonGridSpin.cc index 8fe58b90..aada0fae 100644 --- a/src/PBEonGridSpin.cc +++ b/src/PBEonGridSpin.cc @@ -185,5 +185,5 @@ double PBEonGridSpin::getExc() const return exc * mygrid.vel(); } -template class PBEonGridSpin; -template class PBEonGridSpin; +template class PBEonGridSpin>; +template class PBEonGridSpin>; diff --git a/src/PCGSolver.cc b/src/PCGSolver.cc index b3bd7ec3..254b1a23 100644 --- a/src/PCGSolver.cc +++ b/src/PCGSolver.cc @@ -12,8 +12,8 @@ #include #include -template -void PCGSolver::clear() +template +void PCGSolver::clear() { for (short i = 0; i < (short)precond_oper_.size(); i++) { @@ -34,7 +34,7 @@ void PCGSolver::clear() assert(gf_newv_[i] != nullptr); delete gf_newv_[i]; } - // delete grids after pb::GridFunc objects since those + // delete grids after pb::GridFunc objects since those // have data members references to grids for (short i = 0; i < (short)grid_.size(); i++) { @@ -47,10 +47,10 @@ void PCGSolver::clear() gf_newv_.clear(); } -template -void PCGSolver::setupPrecon() +template +void PCGSolver::setupPrecon() { - // check if precon is already setup + // check if preconditioner is already setup // Assumes operator does not change, hence // a single setup is sufficient if (is_precond_setup_) return; @@ -60,13 +60,12 @@ void PCGSolver::setupPrecon() grid_.push_back(mygrid); const short nghosts = mygrid->ghost_pt(); - pb::Lap* myoper - = LapFactory::createLap(*grid_[0], lap_type_); + pb::Lap* myoper + = LapFactory::createLap(*grid_[0], precond_lap_type_); precond_oper_.push_back(myoper); - pb::GridFunc* gf_work - = new pb::GridFunc( - *grid_[0], bc_[0], bc_[1], bc_[2]); + pb::GridFunc* gf_work + = new pb::GridFunc(*grid_[0], bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); // coarse levels @@ -89,20 +88,20 @@ void PCGSolver::setupPrecon() pb::Grid* coarse_grid = new pb::Grid(mygrid->coarse_grid()); grid_.push_back(coarse_grid); - pb::Lap* myoper - = LapFactory::createLap(*coarse_grid, 1); + pb::Lap* myoper + = LapFactory::createLap(*coarse_grid, 1); precond_oper_.push_back(myoper); - gf_work = new pb::GridFunc( + gf_work = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); - pb::GridFunc* gf_rcoarse - = new pb::GridFunc( + pb::GridFunc* gf_rcoarse + = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_rcoarse_.push_back(gf_rcoarse); - pb::GridFunc* gf_newv - = new pb::GridFunc( + pb::GridFunc* gf_newv + = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_newv_.push_back(gf_newv); @@ -112,10 +111,10 @@ void PCGSolver::setupPrecon() } // MG V-cycle with no mask -template -void PCGSolver::preconSolve( - pb::GridFunc& gf_v, - const pb::GridFunc& gf_f, const short level) +template +void PCGSolver::preconSolve( + pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level) { //(*MPIdata::sout)<<"Preconditioning::mg() at level "<::preconSolve( ncycl = 4 > (nu1_ + nu2_) ? 4 : (nu1_ + nu2_); } - pb::Lap* myoper = precond_oper_[level]; + pb::Lap* myoper = precond_oper_[level]; - // SMOOTHING + // pre-smoothing for (short it = 0; it < ncycl; it++) { myoper->jacobi(gf_v, gf_f, *gf_work_[level]); @@ -137,11 +136,11 @@ void PCGSolver::preconSolve( // COARSE GRID CORRECTION // restrictions - pb::GridFunc* rcoarse = gf_rcoarse_[level]; + pb::GridFunc* rcoarse = gf_rcoarse_[level]; gf_work_[level]->restrict3D(*rcoarse); // storage functions for coarse grid - pb::GridFunc* newv = gf_newv_[level]; + pb::GridFunc* newv = gf_newv_[level]; // call mgrid solver on a coarser level newv->resetData(); @@ -161,28 +160,36 @@ void PCGSolver::preconSolve( } // Left Preconditioned CG -template -bool PCGSolver::solve( - pb::GridFunc& gf_phi, const pb::GridFunc& gf_rhs) +template +bool PCGSolver::solve( + pb::GridFunc& gf_phi, + const pb::GridFunc& gf_rhs) { bool converged = false; const pb::Grid& finegrid = gf_phi.grid(); // initial data and residual - We assume a nonzero initial guess - pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); // scale initial guess with epsilon oper_.inv_transform(gf_phi); // compute initial residual: r := b - Ax /* compute Ax */ oper_.apply(gf_phi, lhs); - /* set r = b */ - pb::GridFunc res(gf_rhs); - oper_.transform(res); - /* compute r = r - Ax */ + // set r = b + pb::GridFunc rhs(gf_rhs); + + // transform r.h.s. to account for dielectric model + oper_.transform(rhs); + + // apply Mehrstelllen r.h.s. if appropriate + pb::GridFunc res(finegrid, bc_[0], bc_[1], bc_[2]); + oper_.rhs(rhs, res); + + // compute r = r - Ax res -= lhs; - double init_rnorm = res.norm2(); - assert(init_rnorm == init_rnorm); + const double init_rnorm = res.norm2(); + assert(!std::isnan(init_rnorm)); // cout<<"init_rnorm="<::solve( double rnorm = init_rnorm; - /* preconditioned residual as type POISSONPRECONDTYPE */ - pb::GridFunc prec_z(finegrid, bc_[0], bc_[1], bc_[2]); - pb::GridFunc prec_res(res); + /* preconditioned residual as type PrecondDataType */ + pb::GridFunc prec_z(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc prec_res(res); /* preconditioning step */ prec_z.setValues(0.); preconSolve(prec_z, prec_res, 0); - pb::GridFunc z(prec_z); + pb::GridFunc z(prec_z); // conjugate vectors - pb::GridFunc p(prec_z); - pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc p(prec_z); + pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); double rtz = res.gdot(z); @@ -213,7 +220,7 @@ bool PCGSolver::solve( double ptap = p.gdot(ap); double alp = rtz / ptap; - assert(alp == alp); + assert(!std::isnan(alp)); // update solution gf_phi.axpy(alp, p); @@ -248,15 +255,14 @@ bool PCGSolver::solve( return converged; } -// Left Preconditioned CG -template -bool PCGSolver::solve( - ScalarType* phi, ScalarType* rhs, const char dis) +template +bool PCGSolver::solve( + ScalarDataType* phi, ScalarDataType* rhs, const char dis) { - pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_phi.assign(phi, dis); - pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_work.assign(rhs, dis); bool converged = solve(gf_phi, gf_work); @@ -266,15 +272,21 @@ bool PCGSolver::solve( return converged; } -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; diff --git a/src/PCGSolver.h b/src/PCGSolver.h index f6db7cc6..0782775a 100644 --- a/src/PCGSolver.h +++ b/src/PCGSolver.h @@ -17,23 +17,23 @@ #include -template +template class PCGSolver { private: std::vector grid_; - short lap_type_; + short precond_lap_type_; short bc_[3]; bool fully_periodic_; // operator to solve for - T oper_; + OperatorType oper_; // preconditioner operator for each MG level - std::vector*> precond_oper_; - std::vector*> gf_work_; - std::vector*> gf_rcoarse_; - std::vector*> gf_newv_; + std::vector*> precond_oper_; + std::vector*> gf_work_; + std::vector*> gf_rcoarse_; + std::vector*> gf_newv_; // solver parameters int maxiters_; @@ -48,23 +48,24 @@ class PCGSolver short nlevels_; bool is_precond_setup_; - void preconSolve(pb::GridFunc& gf_v, - const pb::GridFunc& gf_f, const short level = 0); + void preconSolve(pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level = 0); void setupPrecon(); void clear(); public: - PCGSolver(T& oper, const short px, const short py, const short pz) - : oper_(oper) + PCGSolver( + OperatorType& oper, const short px, const short py, const short pz) + : oper_(oper), + maxiters_(10), + tol_(1.e-16), + final_residual_(-1.), + residual_reduction_(-1.), + nu1_(2), + nu2_(2), + max_nlevels_(10), + is_precond_setup_(false) { - maxiters_ = 10; // default - nu1_ = 2; // default - nu2_ = 2; // default - tol_ = 1.e-16; - max_nlevels_ = 10; - final_residual_ = -1.; - residual_reduction_ = -1.; - // boundary conditions bc_[0] = px; bc_[1] = py; @@ -72,8 +73,7 @@ class PCGSolver fully_periodic_ = ((bc_[0] == 1) && (bc_[1] == 1) && (bc_[2] == 1)); Control& ct = *(Control::instance()); - lap_type_ = ct.lap_type; - is_precond_setup_ = false; + precond_lap_type_ = ct.lap_type; }; void setup(const short nu1, const short nu2, const short max_sweeps, @@ -87,16 +87,17 @@ class PCGSolver setupPrecon(); } - bool solve(pb::GridFunc& gf_phi, - const pb::GridFunc& gf_rhs); + bool solve(pb::GridFunc& gf_phi, + const pb::GridFunc& gf_rhs); - bool solve(ScalarType* phi, ScalarType* rhs, const char dis); + /*! + * Interface for raw pointers + */ + bool solve(ScalarDataType* phi, ScalarDataType* rhs, const char dis); double getFinalResidual() const { return final_residual_; } double getResidualReduction() const { return residual_reduction_; } - T* getOperator() { return &oper_; } - // Destructor ~PCGSolver() { clear(); } }; diff --git a/src/Poisson.h b/src/Poisson.h index 4bdfeeab..f7af6468 100644 --- a/src/Poisson.h +++ b/src/Poisson.h @@ -7,9 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id$ -#ifndef included_Poisson -#define included_Poisson +#ifndef MGMOL_included_Poisson +#define MGMOL_included_Poisson #include "PoissonInterface.h" @@ -24,8 +23,6 @@ class Poisson : public PoissonInterface { protected: - // static Timer poisson_tm_; - const pb::Grid& grid_; pb::GridFunc* vh_; @@ -50,7 +47,7 @@ class Poisson : public PoissonInterface }; // Destructor - ~Poisson() override { delete vh_; }; + virtual ~Poisson() override { delete vh_; }; virtual void setup(const short nu1, const short nu2, const short max_sweeps, const double tol, const short max_nlevels, diff --git a/src/PoissonSolverFactory.h b/src/PoissonSolverFactory.h index 91b99bf2..879dfc8a 100644 --- a/src/PoissonSolverFactory.h +++ b/src/PoissonSolverFactory.h @@ -12,24 +12,79 @@ #include "Control.h" #include "Hartree.h" #include "Hartree_CG.h" -#include "Mesh.h" -#include "PBdiel.h" -#include "PBdiel_CG.h" -#include "ShiftedHartree.h" -#include "mputils.h" - -#include "GridFunc.h" #include "Laph2.h" #include "Laph4.h" #include "Laph4M.h" #include "Laph4MP.h" #include "Laph6.h" #include "Laph8.h" +#include "MGmol_MPI.h" +#include "Mesh.h" +#include "PBdiel.h" +#include "PBdiel_CG.h" +#include "ShiftedHartree.h" #include "ShiftedLaph4M.h" -class PoissonSolverFactory +/*! + * Create Hartree_CG solver templated on data type and preconditioner data type + */ +template +Poisson* createHartreeCG( + PoissonFDtype lap_type, const pb::Grid& myGrid, const short bc[3]) { + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + if (mmpi.instancePE0()) + { + std::cout << "create HartreeCG with precision " + << 8 * sizeof(ScalarType) << std::endl; + std::cout << "HartreeCG with preconditioner in precision " + << 8 * sizeof(PDataType) << std::endl; + } + Poisson* poisson_solver = nullptr; + + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h2: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h4: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h6: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h8: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h4MP: + poisson_solver = new Hartree_CG, ScalarType, + PDataType>(myGrid, bc); + break; + default: + std::cerr << "createHartreeCG(), Undefined option: " + << static_cast(lap_type) << std::endl; + } + return poisson_solver; +} +/*! + * Main factory + */ +class PoissonSolverFactory +{ public: /*! * return specific Poisson solver needed to solve Hartree problem @@ -48,13 +103,13 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new ShiftedHartree>( + = new ShiftedHartree>( myGrid, bc, screening_const); break; default: - (*MPIdata::sout) - << "Electrostatic, shifted, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr << "PoissonSolverFactory, shifted, Undefined " + "option: " + << static_cast(lap_type) << std::endl; } } else @@ -63,32 +118,31 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h2: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h4: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h6: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h8: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h4MP: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr << "PoissonSolverFactory, Undefined option: " + << static_cast(lap_type) << std::endl; } } } @@ -100,47 +154,30 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new ShiftedHartree>( + = new ShiftedHartree>( myGrid, bc, screening_const); break; default: - (*MPIdata::sout) - << "PCG Electrostatic, shifted, Undefined option: " + std::cerr + << "PoissonSolverFactory, with screening_const, " + "Undefined option: " << static_cast(lap_type) << std::endl; } } else { - switch (lap_type) + const short precision = ct.poisson_pc_data_; + if (precision == 32) + poisson_solver + = createHartreeCG(lap_type, myGrid, bc); + else if (precision == 64) + poisson_solver + = createHartreeCG(lap_type, myGrid, bc); + else { - case PoissonFDtype::h4M: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h2: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h4: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h6: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h8: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h4MP: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - default: - (*MPIdata::sout) - << "PCG Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr + << "PoissonSolverFactory: Unknown precision option " + << precision << std::endl; } } } @@ -160,32 +197,31 @@ class PoissonSolverFactory switch (lap_type) { case PoissonFDtype::h4M: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h2: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h6: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h8: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4MP: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; + std::cerr << "createDiel(), Undefined option" << std::endl; } } else // use PCG for Poisson Solver @@ -193,32 +229,31 @@ class PoissonSolverFactory switch (lap_type) { case PoissonFDtype::h4M: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h2: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h6: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h8: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4MP: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; + std::cerr << "createDiel(), Undefined option" << std::endl; } } return poisson_solver; diff --git a/src/PolakRibiereSolver.cc b/src/PolakRibiereSolver.cc index 3f46a00e..ca25b1cc 100644 --- a/src/PolakRibiereSolver.cc +++ b/src/PolakRibiereSolver.cc @@ -262,7 +262,7 @@ double PolakRibiereSolver::computeBeta( OrbitalsType& work_orbitals) const { work_orbitals.assign(*r_k_); - work_orbitals.axpy(-1., *r_km1_); + work_orbitals.axpy((ORBDTYPE)(-1.), *r_km1_); double beta = z_k_->dotProduct(work_orbitals, 2); @@ -453,7 +453,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, if (onpe0 && ct.verbose > 1) os_ << " PolakRibiereSolver: beta=" << beta << std::endl; p_k_->scal(beta); - p_k_->axpy(1., *z_k_); + p_k_->axpy((ORBDTYPE)1., *z_k_); if (beta > 0.1) p_k_->scal(1. / (1. + beta)); } @@ -462,7 +462,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, alpha_k = alpha_; // make new trial step - orbitals.axpy(alpha_k, *p_k_); + orbitals.axpy((ORBDTYPE)alpha_k, *p_k_); // save current "k" vectors into "km1" vectors if (with_preconditioner_) @@ -480,7 +480,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, << alpha_k << "..." << std::endl; // half step back - orbitals.axpy(-1. * alpha_k, *p_k_); + orbitals.axpy((ORBDTYPE)(-1. * alpha_k), *p_k_); // return DM to value before trial step dm_strategy_->reset(); @@ -577,5 +577,5 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, return retval; } -template class PolakRibiereSolver; -template class PolakRibiereSolver; +template class PolakRibiereSolver>; +template class PolakRibiereSolver>; diff --git a/src/Potentials.cc b/src/Potentials.cc index a620cdeb..960fd74d 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -155,6 +155,7 @@ double Potentials::updateVtot(const std::vector>& rho) double minus = -1.; LinearAlgebraUtils::MPaxpy( size_, minus, &vtot_[0], &dv_[0]); + LinearAlgebraUtils::MPscal(size_, minus, &dv_[0]); evalNormDeltaVtotRho(rho); diff --git a/src/Potentials.h b/src/Potentials.h index 6803edc3..3eb41fd3 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -158,6 +158,8 @@ class Potentials POTDTYPE* vtot() { return vtot_.data(); } RHODTYPE* rho_comp() { return rho_comp_.data(); } + const std::vector& dv() { return dv_; } + const std::vector& vnuc() const { return v_nuc_; } const std::vector& vh_rho() const { return vh_rho_; } diff --git a/src/Power.cc b/src/Power.cc index 6a153fa9..58e268d0 100644 --- a/src/Power.cc +++ b/src/Power.cc @@ -9,6 +9,8 @@ #include "GramMatrix.h" #include "LocalVector.h" +#include "ReplicatedMatrix.h" +#include "ReplicatedVector.h" #include "SquareLocalMatrices.h" #include "mputils.h" #include "random.h" @@ -97,3 +99,4 @@ template class Power, SquareLocalMatrices>; template class Power, dist_matrix::DistMatrix>; +// template class Power; diff --git a/src/PowerGen.cc b/src/PowerGen.cc index 5305a8df..3bd9dc74 100644 --- a/src/PowerGen.cc +++ b/src/PowerGen.cc @@ -46,10 +46,10 @@ void PowerGen::computeGenEigenInterval(MatrixType& mat, // initialize solution data // initial guess - VectorType sol("sol", m); + VectorType sol(m); sol = vec1_; // initialize local solution data // new solution - VectorType new_sol("new_sol", m); + VectorType new_sol(m); // get norm of initial sol double alpha = sol.nrm2(); @@ -187,6 +187,4 @@ void PowerGen::computeGenEigenInterval(MatrixType& mat, template class PowerGen, dist_matrix::DistVector>; -#ifdef HAVE_MAGMA template class PowerGen; -#endif diff --git a/src/Preconditioning.cc b/src/Preconditioning.cc index e92b1e24..495d9703 100644 --- a/src/Preconditioning.cc +++ b/src/Preconditioning.cc @@ -216,3 +216,4 @@ void Preconditioning::mg(pb::GridFuncVector& gfv_v, } template class Preconditioning; +template class Preconditioning; diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 87c9fd48..8a3b773a 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -15,11 +15,13 @@ #include "DistMatrixTools.h" #include "HDFrestart.h" #include "LocalMatrices2DistMatrix.h" +#include "LocalMatrices2ReplicatedMatrix.h" #include "MGmol_MPI.h" #include "Orbitals.h" #include "Power.h" #include "PowerGen.h" #include "ReplicatedMatrix.h" +#include "ReplicatedMatrix2SquareLocalMatrices.h" #include "ReplicatedVector.h" #include "ReplicatedWorkSpace.h" #include "SP2.h" @@ -60,6 +62,7 @@ std::string ProjectedMatrices>::getMatrixType() // // conversion functions from one matrix format into another // +#ifndef HAVE_MAGMA void convert_matrix(const dist_matrix::DistMatrix& src, SquareLocalMatrices& dst) { @@ -67,7 +70,7 @@ void convert_matrix(const dist_matrix::DistMatrix& src, = DistMatrix2SquareLocalMatrices::instance(); dm2sl->convert(src, dst); } -#ifdef HAVE_MAGMA +#else void convert_matrix(const dist_matrix::DistMatrix& src, SquareLocalMatrices& dst) { @@ -78,19 +81,24 @@ void convert_matrix(const dist_matrix::DistMatrix& src, dst.assign(tmp); } +#endif +#ifndef HAVE_MAGMA void convert_matrix(const ReplicatedMatrix& src, SquareLocalMatrices& dst) { - src.get(dst.getRawPtr(), dst.m()); -} + assert(dst.m() > 0); + ReplicatedMatrix2SquareLocalMatrices* r2l + = ReplicatedMatrix2SquareLocalMatrices::instance(); + r2l->convert(src, dst); +} +#else void convert_matrix(const ReplicatedMatrix& src, SquareLocalMatrices& dst) { dst.assign(src); } - #endif //=====================================================================// @@ -147,21 +155,20 @@ void ProjectedMatrices>::convert( sl2dm->accumulate(src, dst); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::convert( const SquareLocalMatrices& src, ReplicatedMatrix& dst) { - dst.init(src.getSubMatrix(), dim_); + LocalMatrices2ReplicatedMatrix* sl2rm + = LocalMatrices2ReplicatedMatrix::instance(); - dst.consolidate(); + sl2rm->accumulate(src, dst); } -#endif template <> -void ProjectedMatrices>::setupMPI( - const std::vector>& global_indexes) +void ProjectedMatrices>:: + setupGlobalIndexes(const std::vector>& global_indexes) { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); MPI_Comm comm = mmpi.commSpin(); @@ -171,13 +178,14 @@ void ProjectedMatrices>::setupMPI( LocalMatrices2DistMatrix::setup(comm, global_indexes); } -#ifdef HAVE_MAGMA template <> -void ProjectedMatrices::setupMPI( +void ProjectedMatrices::setupGlobalIndexes( const std::vector>& global_indexes) { + LocalMatrices2ReplicatedMatrix::setup(global_indexes); + + ReplicatedMatrix2SquareLocalMatrices::setup(global_indexes); } -#endif template void ProjectedMatrices::setup( @@ -189,7 +197,7 @@ void ProjectedMatrices::setup( global_indexes_ = global_indexes; - setupMPI(global_indexes); + setupGlobalIndexes(global_indexes); localX_.reset(new SquareLocalMatrices( subdiv_, chromatic_number_)); @@ -253,7 +261,7 @@ template void ProjectedMatrices::applyInvS( SquareLocalMatrices& mat) { - // build DistMatrix from SquareLocalMatrices + // build Matrix from SquareLocalMatrices convert(mat, *work_); gm_->applyInv(*work_); @@ -738,6 +746,7 @@ double ProjectedMatrices::checkCond( template int ProjectedMatrices::writeDM(HDFrestart& h5f_file) { + // std::cout << "ProjectedMatrices::writeDM()..." << std::endl; std::string name("/Density_Matrix"); return dm_->write(h5f_file, name); } @@ -1193,7 +1202,6 @@ void ProjectedMatrices>:: power.computeGenEigenInterval(mat, *gm_, interval, maxits, pad); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::computeGenEigenInterval( std::vector& interval, const int maxits, const double pad) @@ -1204,7 +1212,6 @@ void ProjectedMatrices::computeGenEigenInterval( power.computeGenEigenInterval(mat, *gm_, interval, maxits, pad); } -#endif template <> void ProjectedMatrices>::consolidateH() @@ -1228,12 +1235,12 @@ void ProjectedMatrices>::consolidateH() consolidate_H_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::consolidateH() { consolidate_H_tm_.start(); + // assign SquareLocalMatrices to matH_ matH_->assign(*localHl_); matH_->add(*localHnl_); @@ -1242,7 +1249,6 @@ void ProjectedMatrices::consolidateH() consolidate_H_tm_.stop(); } -#endif template void ProjectedMatrices::updateSubMatX(const MatrixType& dm) @@ -1261,7 +1267,6 @@ ProjectedMatrices>::getReplicatedDM() return sldm; } -#ifdef HAVE_MAGMA template <> SquareLocalMatrices ProjectedMatrices::getReplicatedDM() @@ -1272,9 +1277,6 @@ ProjectedMatrices::getReplicatedDM() return sldm; } -#endif template class ProjectedMatrices>; -#ifdef HAVE_MAGMA template class ProjectedMatrices; -#endif diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index 142dbf70..fa5add85 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -132,7 +132,7 @@ class ProjectedMatrices : public ProjectedMatricesInterface void convert(const SquareLocalMatrices& src, MatrixType& dst); - void setupMPI(const std::vector>&); + void setupGlobalIndexes(const std::vector>&); std::string getMatrixType(); diff --git a/src/ProjectedMatrices2N.cc b/src/ProjectedMatrices2N.cc index 479ee4f0..d13196b2 100644 --- a/src/ProjectedMatrices2N.cc +++ b/src/ProjectedMatrices2N.cc @@ -68,6 +68,4 @@ void ProjectedMatrices2N::iterativeUpdateDMwithEigenstates( } template class ProjectedMatrices2N>; -#ifdef HAVE_MAGMA template class ProjectedMatrices2N; -#endif diff --git a/src/ProjectedMatricesMehrstellen.cc b/src/ProjectedMatricesMehrstellen.cc index 0d2893ba..b12aee94 100644 --- a/src/ProjectedMatricesMehrstellen.cc +++ b/src/ProjectedMatricesMehrstellen.cc @@ -99,6 +99,4 @@ void ProjectedMatricesMehrstellen::rotateAll( template class ProjectedMatricesMehrstellen< dist_matrix::DistMatrix>; -#ifdef HAVE_MAGMA template class ProjectedMatricesMehrstellen; -#endif diff --git a/src/ReplicatedMatrix.cc b/src/ReplicatedMatrix.cc index 27c7e31e..2f266ab2 100644 --- a/src/ReplicatedMatrix.cc +++ b/src/ReplicatedMatrix.cc @@ -6,25 +6,37 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifdef HAVE_MAGMA - #include "ReplicatedMatrix.h" - +#include "LocalMatrices2ReplicatedMatrix.h" #include "ReplicatedVector.h" #include "memory_space.h" #include "random.h" +#ifdef HAVE_MAGMA #include "magma_v2.h" +using Memory = MemorySpace::Memory; +constexpr double gpuroundup = 32; +#else +#include "blas3_c.h" +#include "fc_mangle.h" +#include "lapack_c.h" +using Memory = MemorySpace::Memory; +#endif #include -using MemoryDev = MemorySpace::Memory; - -constexpr double gpuroundup = 32; - MPI_Comm ReplicatedMatrix::comm_ = MPI_COMM_NULL; bool ReplicatedMatrix::onpe0_ = false; +static int roundup(const int n) +{ +#ifdef HAVE_MAGMA + return magma_roundup(n, gpuroundup); +#else + return n; +#endif +} + void rotateSym(ReplicatedMatrix& mat, const ReplicatedMatrix& rotation_matrix, ReplicatedMatrix& work) { @@ -35,8 +47,8 @@ void rotateSym(ReplicatedMatrix& mat, const ReplicatedMatrix& rotation_matrix, ReplicatedMatrix::ReplicatedMatrix( const std::string name, const int m, const int n) : dim_(m), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free), + ld_(roundup(dim_)), + data_(Memory::allocate(dim_ * ld_), Memory::free), name_(name) { assert(m == n); @@ -46,36 +58,47 @@ ReplicatedMatrix::ReplicatedMatrix( ReplicatedMatrix::ReplicatedMatrix(const std::string name, const int n) : dim_(n), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free), + ld_(roundup(n)), + data_(Memory::allocate(dim_ * ld_), Memory::free), name_(name) { clear(); } -ReplicatedMatrix::ReplicatedMatrix(const std::string name, - const double* const diagonal, const int m, const int n) +ReplicatedMatrix::ReplicatedMatrix( + const std::string name, const double* const diagonal, const int m) : dim_(m), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free) + ld_(roundup(dim_)), + data_(Memory::allocate(dim_ * ld_), Memory::free), + name_(name) { clear(); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dsetvector( - dim_, diagonal, 1, device_data_.get(), ld_ + 1, magma_singleton.queue_); + dim_, diagonal, 1, data_.get(), ld_ + 1, magma_singleton.queue_); +#else + int ione = 1; + int incy = ld_ + 1; + DCOPY(&dim_, diagonal, &ione, data_.get(), &incy); +#endif } ReplicatedMatrix::ReplicatedMatrix(const ReplicatedMatrix& mat) : dim_(mat.dim_), ld_(mat.ld_), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free) + data_(Memory::allocate(dim_ * ld_), Memory::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(dim_, dim_, mat.device_data_.get(), mat.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(dim_, dim_, mat.data_.get(), mat.ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + memcpy(data_.get(), mat.data_.get(), ld_ * dim_ * sizeof(double)); +#endif } ReplicatedMatrix& ReplicatedMatrix::operator=(const ReplicatedMatrix& rhs) @@ -84,12 +107,16 @@ ReplicatedMatrix& ReplicatedMatrix::operator=(const ReplicatedMatrix& rhs) { ld_ = rhs.ld_; dim_ = rhs.dim_; - device_data_.reset(MemoryDev::allocate(dim_ * ld_)); + data_.reset(Memory::allocate(dim_ * ld_)); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(dim_, dim_, rhs.device_data_.get(), rhs.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(dim_, dim_, rhs.data_.get(), rhs.ld_, data_.get(), + ld_, magma_singleton.queue_); +#else + memcpy(data_.get(), rhs.data_.get(), ld_ * dim_ * sizeof(double)); +#endif } return *this; } @@ -99,62 +126,119 @@ ReplicatedMatrix::~ReplicatedMatrix() {} void ReplicatedMatrix::getsub( const ReplicatedMatrix& src, int m, int n, int ia, int ja) { +#ifdef HAVE_MAGMA + auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(m, n, src.device_data_.get() + ja * src.ld_ + ia, src.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(m, n, src.data_.get() + ja * src.ld_ + ia, src.ld_, + data_.get(), ld_, magma_singleton.queue_); +#else + char uplo = 'a'; + int lda = src.ld_; + int ldb = ld_; + DLACPY(&uplo, &m, &n, src.data_.get() + ja * src.ld_ + ia, &lda, + data_.get(), &ldb); +#endif } void ReplicatedMatrix::consolidate() { assert(comm_ != MPI_COMM_NULL); - std::vector mat(dim_ * dim_); - std::vector mat_sum(dim_ * dim_); + std::vector mat(dim_ * ld_); +#ifdef HAVE_MAGMA + std::vector mat_sum(dim_ * ld_); + double* mat_sum_data = mat_sum.data(); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // copy from GPU to CPU - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); - + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), ld_, magma_singleton.queue_); +#else + double* mat_sum_data = data_.get(); + memcpy(mat.data(), data_.get(), dim_ * ld_ * sizeof(double)); +#endif MPI_Allreduce( - mat.data(), mat_sum.data(), dim_ * dim_, MPI_DOUBLE, MPI_SUM, comm_); + mat.data(), mat_sum_data, dim_ * ld_, MPI_DOUBLE, MPI_SUM, comm_); +#ifdef HAVE_MAGMA // copy from CPU to GPU - magma_dsetmatrix(dim_, dim_, mat_sum.data(), dim_, device_data_.get(), ld_, + magma_dsetmatrix(dim_, dim_, mat_sum.data(), ld_, data_.get(), ld_, magma_singleton.queue_); +#endif } void ReplicatedMatrix::assign( const ReplicatedMatrix& src, const int ib, const int jb) { assert(this != &src); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(src.dim_, src.dim_, src.device_data_.get(), src.ld_, - device_data_.get() + jb * ld_ + ib, ld_, magma_singleton.queue_); + magma_dcopymatrix(src.dim_, src.dim_, src.data_.get(), src.ld_, + data_.get() + jb * ld_ + ib, ld_, magma_singleton.queue_); +#else + char uplo = 'a'; + int dim = src.dim_; + int lda = src.ld_; + int ldb = ld_; + DLACPY(&uplo, &dim, &dim, src.data_.get(), &lda, + data_.get() + jb * ld_ + ib, &ldb); +#endif } template <> void ReplicatedMatrix::assign( SquareLocalMatrices& src) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetmatrix(src.m(), src.n(), src.getSubMatrix(), src.n(), - device_data_.get(), ld_, magma_singleton.queue_); + magma_dsetmatrix(src.m(), src.n(), src.getSubMatrix(), src.n(), data_.get(), + ld_, magma_singleton.queue_); +#else + LocalMatrices2ReplicatedMatrix* l2r + = LocalMatrices2ReplicatedMatrix::instance(); + l2r->convert(src, *this, dim_, 0.); +#endif } template <> void ReplicatedMatrix::assign( SquareLocalMatrices& src) { + assert(src.n() == dim_); + + // current implementation restriction + assert(src.nmat() == 1); + +#ifdef HAVE_MAGMA + auto& magma_singleton = MagmaSingleton::get_magma_singleton(); + + magma_dcopymatrix(src.n(), src.n(), src.getRawPtr(), src.n(), data_.get(), + ld_, magma_singleton.queue_); +#else + // copy columns of matrix + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + j * ld_, src.getRawPtr() + j * src.n(), + dim_ * sizeof(double)); +#endif +} + +void ReplicatedMatrix::assign(const double* const src, const int ld) +{ +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(src.n(), src.n(), src.getRawPtr(), src.n(), - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix( + dim_, dim_, src, ld, data_.get(), ld_, magma_singleton.queue_); +#else + // copy columns of matrix + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + j * ld_, src + j * ld, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::add(const SquareSubMatrix& mat) @@ -175,8 +259,9 @@ void ReplicatedMatrix::add(const SquareSubMatrix& mat) } } +#ifdef HAVE_MAGMA std::unique_ptr src_dev( - MemoryDev::allocate(dim_ * ld_), MemoryDev::free); + Memory::allocate(dim_ * ld_), Memory::free); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); @@ -185,40 +270,69 @@ void ReplicatedMatrix::add(const SquareSubMatrix& mat) magma_singleton.queue_); // add to object data - magmablas_dgeadd(dim_, dim_, 1., src_dev.get(), ld_, device_data_.get(), - ld_, magma_singleton.queue_); + magmablas_dgeadd(dim_, dim_, 1., src_dev.get(), ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + double* data = data_.get(); + for (int j = 0; j < dim_; j++) + for (int i = 0; i < dim_; i++) + data[i + j * ld_] += src[i + j * n]; +#endif } void ReplicatedMatrix::init(const double* const ha, const int lda) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dsetmatrix( - dim_, dim_, ha, lda, device_data_.get(), ld_, magma_singleton.queue_); + dim_, dim_, ha, lda, data_.get(), ld_, magma_singleton.queue_); +#else + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + ld_ * j, ha + lda * j, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::get(double* ha, const int lda) const { + assert(ha != nullptr); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dgetmatrix( - dim_, dim_, device_data_.get(), ld_, ha, lda, magma_singleton.queue_); + dim_, dim_, data_.get(), ld_, ha, lda, magma_singleton.queue_); +#else + for (int j = 0; j < dim_; j++) + memcpy(ha + lda * j, data_.get() + ld_ * j, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::getDiagonalValues(double* ha) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dgetvector( - dim_, device_data_.get(), ld_ + 1, ha, 1, magma_singleton.queue_); + magma_dgetvector(dim_, data_.get(), ld_ + 1, ha, 1, magma_singleton.queue_); +#else + int dim = dim_; + int incx = ld_ + 1; + int incy = 1; + DCOPY(&dim, data_.get(), &incx, ha, &incy); +#endif } void ReplicatedMatrix::axpy(const double alpha, const ReplicatedMatrix& a) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dgeadd(dim_, dim_, alpha, a.device_data_.get(), a.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magmablas_dgeadd(dim_, dim_, alpha, a.data_.get(), a.ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + int dim = dim_ * ld_; + int ione = 1; + DAXPY(&dim, &alpha, a.data_.get(), &ione, data_.get(), &ione); +#endif } void ReplicatedMatrix::setRandom(const double minv, const double maxv) @@ -227,32 +341,52 @@ void ReplicatedMatrix::setRandom(const double minv, const double maxv) generateRandomData(mat, minv, maxv); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetmatrix(dim_, dim_, mat.data(), dim_, device_data_.get(), ld_, - magma_singleton.queue_); + magma_dsetmatrix( + dim_, dim_, mat.data(), dim_, data_.get(), ld_, magma_singleton.queue_); +#else + double* data = data_.get(); + for (int j = 0; j < dim_; j++) + for (int i = 0; i < dim_ * (int)ld_; i++) + data[j * ld_ + i] = mat[j * dim_ + i]; +#endif } void ReplicatedMatrix::identity() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 1.0, device_data_.get(), ld_, + magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 1.0, data_.get(), ld_, magma_singleton.queue_); +#else + double* data = data_.get(); + memset(data, 0, dim_ * ld_ * sizeof(double)); + for (int i = 0; i < dim_; i++) + data[i * ld_ + i] = 1.; +#endif } void ReplicatedMatrix::scal(const double alpha) { + int size = dim_ * ld_; +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dscal( - dim_ * ld_, alpha, device_data_.get(), 1, magma_singleton.queue_); + magma_dscal(size, alpha, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + DSCAL(&size, &alpha, data_.get(), &ione); +#endif } // this = alpha * transpose(A) + beta * this void ReplicatedMatrix::transpose( const double alpha, const ReplicatedMatrix& a, const double beta) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); double* dwork; @@ -262,54 +396,87 @@ void ReplicatedMatrix::transpose( std::cerr << "magma_dmalloc failed!" << std::endl; } - magmablas_dtranspose(dim_, dim_, a.device_data_.get(), a.ld_, dwork, ld_, - magma_singleton.queue_); + magmablas_dtranspose( + dim_, dim_, a.data_.get(), a.ld_, dwork, ld_, magma_singleton.queue_); - magmablas_dgeadd2(dim_, dim_, alpha, dwork, ld_, beta, device_data_.get(), - ld_, magma_singleton.queue_); + magmablas_dgeadd2(dim_, dim_, alpha, dwork, ld_, beta, data_.get(), ld_, + magma_singleton.queue_); magma_singleton.sync(); magma_free(dwork); +#else + double* data = data_.get(); + double* adata = a.data_.get(); + for (int i = 0; i < dim_; i++) + { + for (int j = 0; j < dim_; j++) + { + data[j * ld_ + i] + = beta * data[j * ld_ + i] + alpha * adata[i * ld_ + j]; + } + } +#endif } void ReplicatedMatrix::gemm(const char transa, const char transb, const double alpha, const ReplicatedMatrix& a, const ReplicatedMatrix& b, const double beta) { +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const(transa); magma_trans_t magma_transb = magma_trans_const(transb); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magmablas_dgemm(magma_transa, magma_transb, dim_, dim_, dim_, alpha, - a.device_data_.get(), a.ld_, b.device_data_.get(), b.ld_, beta, - device_data_.get(), ld_, magma_singleton.queue_); + a.data_.get(), a.ld_, b.data_.get(), b.ld_, beta, data_.get(), ld_, + magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + int bld = b.ld_; + DGEMM(&transa, &transb, &dim_, &dim_, &dim_, &alpha, a.data_.get(), &ald, + b.data_.get(), &bld, &beta, data_.get(), &ld); +#endif } void ReplicatedMatrix::symm(const char side, const char uplo, const double alpha, const ReplicatedMatrix& a, const ReplicatedMatrix& b, const double beta) { +#ifdef HAVE_MAGMA magma_side_t magma_side = magma_side_const(side); magma_uplo_t magma_uplo = magma_uplo_const(uplo); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsymm(magma_side, magma_uplo, dim_, dim_, alpha, a.device_data_.get(), - a.ld_, b.device_data_.get(), b.ld_, beta, device_data_.get(), ld_, - magma_singleton.queue_); + magma_dsymm(magma_side, magma_uplo, dim_, dim_, alpha, a.data_.get(), a.ld_, + b.data_.get(), b.ld_, beta, data_.get(), ld_, magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + int bld = b.ld_; + DSYMM(&side, &uplo, &dim_, &dim_, &alpha, a.data_.get(), &ald, + b.data_.get(), &bld, &beta, data_.get(), &ld); +#endif } int ReplicatedMatrix::potrf(char uplo) { - assert(device_data_.get()); + assert(data_.get()); + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotrf_gpu(magma_uplo, dim_, device_data_.get(), ld_, &info); + magma_dpotrf_gpu(magma_uplo, dim_, data_.get(), ld_, &info); +#else + int ld = ld_; + DPOTRF(&uplo, &dim_, data_.get(), &ld, &info); +#endif if (info != 0) - std::cerr << "magma_dpotrf_gpu failed, info = " << info << std::endl; + std::cerr << "ReplicatedMatrix::potrf() failed, info = " << info + << std::endl; return info; } @@ -317,17 +484,27 @@ int ReplicatedMatrix::potrf(char uplo) void ReplicatedMatrix::getrf(std::vector& ipiv) { int info; - magma_dgetrf_gpu(dim_, dim_, device_data_.get(), ld_, ipiv.data(), &info); +#ifdef HAVE_MAGMA + magma_dgetrf_gpu(dim_, dim_, data_.get(), ld_, ipiv.data(), &info); +#else + int ld = ld_; + DGETRF(&dim_, &dim_, data_.get(), &ld, ipiv.data(), &info); +#endif if (info != 0) std::cerr << "magma_dgetrf_gpu failed, info = " << info << std::endl; } int ReplicatedMatrix::potri(char uplo) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotri_gpu(magma_uplo, dim_, device_data_.get(), ld_, &info); + magma_dpotri_gpu(magma_uplo, dim_, data_.get(), ld_, &info); +#else + int ld = ld_; + DPOTRI(&uplo, &dim_, data_.get(), &ld, &info); +#endif if (info != 0) std::cerr << "magma_dpotri_gpu failed, info = " << info << std::endl; @@ -339,34 +516,51 @@ int ReplicatedMatrix::potri(char uplo) // A = U**T*U or A = L*L**T computed by potrf void ReplicatedMatrix::potrs(char uplo, ReplicatedMatrix& b) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotrs_gpu(magma_uplo, dim_, dim_, device_data_.get(), ld_, - b.device_data_.get(), b.ld_, &info); - if (info != 0) - std::cerr << "magma_dpotrs_gpu failed, info = " << info << std::endl; + magma_dpotrs_gpu( + magma_uplo, dim_, 1, data_.get(), ld_, b.data(), dim_, &info); +#else + int ione = 1; + int ld = ld_; + DPOTRS(&uplo, &dim_, &ione, data_.get(), &ld, b.data(), &dim_, &info); +#endif + if (info != 0) std::cerr << "dpotrs failed, info = " << info << std::endl; } void ReplicatedMatrix::potrs(char uplo, ReplicatedVector& b) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; magma_dpotrs_gpu( - magma_uplo, dim_, 1, device_data_.get(), ld_, b.data(), dim_, &info); - if (info != 0) - std::cerr << "magma_dpotrs_gpu failed, info = " << info << std::endl; + magma_uplo, dim_, 1, data_.get(), ld_, b.data(), dim_, &info); +#else + int ione = 1; + int ld = ld_; + DPOTRS(&uplo, &dim_, &ione, data_.get(), &ld, b.data(), &dim_, &info); +#endif + if (info != 0) std::cerr << "dpotrs failed, info = " << info << std::endl; } void ReplicatedMatrix::getrs( char trans, ReplicatedMatrix& b, std::vector& ipiv) { + int info; +#ifdef HAVE_MAGMA magma_trans_t magma_trans = magma_trans_const(trans); - int info; - magma_dgetrs_gpu(magma_trans, dim_, dim_, device_data_.get(), ld_, - ipiv.data(), b.device_data_.get(), b.ld_, &info); + magma_dgetrs_gpu(magma_trans, dim_, dim_, data_.get(), ld_, ipiv.data(), + b.data_.get(), b.ld_, &info); +#else + int ld = ld_; + int bld = b.ld_; + DGETRS(&trans, &dim_, &dim_, data_.get(), &ld, ipiv.data(), b.data_.get(), + &bld, &info); +#endif if (info != 0) std::cerr << "magma_dgetrs_gpu failed, info = " << info << std::endl; } @@ -374,27 +568,37 @@ void ReplicatedMatrix::getrs( void ReplicatedMatrix::syev( char jobz, char uplo, std::vector& evals, ReplicatedMatrix& z) { + int info; +#ifdef HAVE_MAGMA magma_vec_t magma_jobz = magma_vec_const(jobz); magma_uplo_t magma_uplo = magma_uplo_const(uplo); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // copy matrix into z - magmablas_dlacpy(MagmaFull, dim_, dim_, device_data_.get(), ld_, - z.device_data_.get(), z.ld_, magma_singleton.queue_); + magmablas_dlacpy(MagmaFull, dim_, dim_, data_.get(), ld_, z.data_.get(), + z.ld_, magma_singleton.queue_); magma_int_t nb = magma_get_ssytrd_nb(dim_); magma_int_t lwork = std::max(2 * dim_ + dim_ * nb, 1 + 6 * dim_ + 2 * dim_ * dim_); + int liwork = 3 + 5 * dim_; - int info; std::vector wa(dim_ * dim_); std::vector work(lwork); std::vector iwork(liwork); - magma_dsyevd_gpu(magma_jobz, magma_uplo, dim_, z.device_data_.get(), z.ld_, + magma_dsyevd_gpu(magma_jobz, magma_uplo, dim_, z.data_.get(), z.ld_, evals.data(), wa.data(), dim_, work.data(), lwork, iwork.data(), liwork, &info); +#else + memcpy(z.data_.get(), data_.get(), dim_ * ld_ * sizeof(double)); + int lwork = 3 * dim_ - 1; + std::vector work(lwork); + int zld = z.ld_; + DSYEV(&jobz, &uplo, &dim_, z.data_.get(), &zld, evals.data(), work.data(), + &lwork, &info); +#endif if (info != 0) std::cerr << "magma_dsyevd_gpu failed, info = " << info << std::endl; // for(auto& d : evals)std::cout<(itype); - int info; - magma_dsygst_gpu(magma_itype, magma_uplo, dim_, device_data_.get(), ld_, - b.device_data_.get(), b.ld_, &info); + magma_dsygst_gpu(magma_itype, magma_uplo, dim_, data_.get(), ld_, + b.data_.get(), b.ld_, &info); +#else + int ld = ld_; + int bld = b.ld_; + DSYGST(&itype, &uplo, &dim_, data_.get(), &ld, b.data_.get(), &bld, &info); +#endif if (info != 0) std::cerr << "magma_dsygst_gpu failed, info = " << info << std::endl; } @@ -415,6 +625,7 @@ void ReplicatedMatrix::sygst(int itype, char uplo, const ReplicatedMatrix& b) void ReplicatedMatrix::trmm(const char side, const char uplo, const char trans, const char diag, const double alpha, const ReplicatedMatrix& a) { +#ifdef HAVE_MAGMA magma_side_t magma_side = magma_side_const(side); magma_uplo_t magma_uplo = magma_uplo_const(uplo); magma_trans_t magma_trans = magma_trans_const(trans); @@ -423,13 +634,19 @@ void ReplicatedMatrix::trmm(const char side, const char uplo, const char trans, auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dtrmm(magma_side, magma_uplo, magma_trans, magma_diag, dim_, dim_, - alpha, a.device_data_.get(), a.ld_, device_data_.get(), ld_, - magma_singleton.queue_); + alpha, a.data_.get(), a.ld_, data_.get(), ld_, magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + DTRMM(&side, &uplo, &trans, &diag, &dim_, &dim_, &alpha, a.data_.get(), + &ald, data_.get(), &ld); +#endif } void ReplicatedMatrix::trtrs(const char uplo, const char trans, const char diag, ReplicatedMatrix& b) const { +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); magma_trans_t magma_trans = magma_trans_const(trans); magma_diag_t magma_diag = magma_diag_const(diag); @@ -437,94 +654,142 @@ void ReplicatedMatrix::trtrs(const char uplo, const char trans, const char diag, auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dtrsm(MagmaLeft, magma_uplo, magma_trans, magma_diag, dim_, dim_, 1., - device_data_.get(), ld_, b.device_data_.get(), b.ld_, - magma_singleton.queue_); + data_.get(), ld_, b.data_.get(), b.ld_, magma_singleton.queue_); +#else + double one = 1.; + char side = 'L'; + int ld = ld_; + int bld = b.ld_; + DTRSM(&side, &uplo, &trans, &diag, &dim_, &dim_, &one, data_.get(), &ld, + b.data_.get(), &bld); +#endif } // get max in absolute value of column j int ReplicatedMatrix::iamax(const int j, double& val) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - int indx = magma_idamax(dim_, device_data_.get() + j * ld_, 1, - magma_singleton.queue_) - - 1; - magma_dgetvector(dim_, device_data_.get() + j * ld_ + indx, 1, &val, 1, - magma_singleton.queue_); + int indx + = magma_idamax(dim_, data_.get() + j * ld_, 1, magma_singleton.queue_) + - 1; + magma_dgetvector( + dim_, data_.get() + j * ld_ + indx, 1, &val, 1, magma_singleton.queue_); + return indx; +#else + int ione = 1; + int indx = IDAMAX(&dim_, data_.get() + j * ld_, &ione) - 1; + val = *(data_.get() + j * ld_ + indx); +#endif return indx; } void ReplicatedMatrix::setVal(const int i, const int j, const double val) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector(dim_, &val, 1, device_data_.get() + j * ld_ + i, 1, - magma_singleton.queue_); + // this call does not look correct... + magma_dsetvector( + dim_, &val, 1, data_.get() + j * ld_ + i, 1, magma_singleton.queue_); +#else + *(data_.get() + j * ld_ + i) = val; +#endif } void ReplicatedMatrix::setDiagonal(const std::vector& diag_values) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector(dim_, diag_values.data(), 1, device_data_.get(), ld_ + 1, + magma_dsetvector(dim_, diag_values.data(), 1, data_.get(), ld_ + 1, magma_singleton.queue_); +#else + double* data = data_.get(); + for (int i = 0; i < dim_; i++) + data[i * (ld_ + 1)] = diag_values[i]; +#endif } double ReplicatedMatrix::trace() const { + const std::vector val(dim_, 1.); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // this is a little contorted, but it works for now... std::unique_ptr tmp_dev( - MemoryDev::allocate(dim_ * ld_), MemoryDev::free); - const std::vector val(dim_, 1.); + Memory::allocate(dim_ * ld_), Memory::free); magma_dsetvector( dim_, val.data(), 1, tmp_dev.get(), 1, magma_singleton.queue_); - return magma_ddot(dim_, device_data_.get(), ld_ + 1, tmp_dev.get(), 1, - magma_singleton.queue_); + return magma_ddot( + dim_, data_.get(), ld_ + 1, tmp_dev.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + int ldp = ld_ + 1; + return DDOT(&dim_, data_.get(), &ldp, val.data(), &ione); +#endif } double ReplicatedMatrix::traceProduct(const ReplicatedMatrix& matrix) const { + double trace = 0.; + +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - double trace = 0.; for (int i = 0; i < dim_; i++) - trace += magma_ddot(dim_, device_data_.get() + i, ld_, - matrix.device_data_.get() + matrix.ld_ * i, 1, - magma_singleton.queue_); + trace += magma_ddot(dim_, data_.get() + i, ld_, + matrix.data_.get() + matrix.ld_ * i, 1, magma_singleton.queue_); +#else + int ione = 1; + int ld = ld_; + for (int i = 0; i < dim_; i++) + trace += DDOT(&dim_, data_.get() + i, &ld, + matrix.data_.get() + matrix.ld_ * i, &ione); +#endif return trace; } double ReplicatedMatrix::norm(char ty) { +#ifdef HAVE_MAGMA magma_norm_t magma_ty = magma_norm_const(ty); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - int lwork = dim_; double* dwork; - magma_dmalloc(&dwork, lwork); - double norm_val = magmablas_dlange(magma_ty, dim_, dim_, device_data_.get(), - ld_, dwork, lwork, magma_singleton.queue_); + magma_dmalloc(&dwork, dim_); + double norm_val = magmablas_dlange(magma_ty, dim_, dim_, data_.get(), ld_, + dwork, lwork, magma_singleton.queue_); magma_singleton.sync(); magma_free(dwork); return norm_val; +#else + std::vector dwork(dim_); + int ld = ld_; + return DLANGE(&ty, &dim_, &dim_, data_.get(), &ld, dwork.data()); +#endif } void ReplicatedMatrix::trset(const char uplo) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector mat(dim_ * dim_); - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), dim_, magma_singleton.queue_); +#else + double* mat = data_.get(); +#endif if (uplo == 'l' || uplo == 'L') { @@ -539,16 +804,22 @@ void ReplicatedMatrix::trset(const char uplo) mat[i + j * dim_] = 0.; } - magma_dsetmatrix(dim_, dim_, mat.data(), dim_, device_data_.get(), ld_, - magma_singleton.queue_); +#ifdef HAVE_MAGMA + magma_dsetmatrix( + dim_, dim_, mat.data(), dim_, data_.get(), ld_, magma_singleton.queue_); +#endif } void ReplicatedMatrix::clear() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 0.0, device_data_.get(), ld_, + magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 0.0, data_.get(), ld_, magma_singleton.queue_); +#else + memset(data_.get(), 0, dim_ * ld_ * sizeof(double)); +#endif } void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, @@ -557,13 +828,16 @@ void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, const int m = std::min(ma, std::max(dim_ - ia, 0)); const int n = std::min(na, std::max(dim_ - ja, 0)); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector mat(dim_ * dim_); - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); - + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), dim_, magma_singleton.queue_); +#else + const double* const mat = data_.get(); +#endif if (onpe0_) for (int i = ia; i < m; i++) { @@ -573,5 +847,17 @@ void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, } } -void ReplicatedMatrix::printMM(std::ostream& os) const {} -#endif +// add shift to diagonal, to shift eigenvalues +void ReplicatedMatrix::shift(const double shift) +{ + double* mat = data_.get(); + for (int i = 0; i < dim_; i++) + mat[i + i * dim_] += shift; +} + +void ReplicatedMatrix::printMM(std::ostream& os) const +{ + (void)os; + std::cerr << "ReplicatedMatrix::printMM() not implemented" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +} diff --git a/src/ReplicatedMatrix.h b/src/ReplicatedMatrix.h index 79c50599..ff72b296 100644 --- a/src/ReplicatedMatrix.h +++ b/src/ReplicatedMatrix.h @@ -9,8 +9,6 @@ #ifndef MGMOL_REPLICATEDMATRIX_H #define MGMOL_REPLICATEDMATRIX_H -#ifdef HAVE_MAGMA - class ReplicatedVector; #include "SquareLocalMatrices.h" #include "SquareSubMatrix.h" @@ -32,7 +30,7 @@ class ReplicatedMatrix size_t ld_; // matrix data - std::unique_ptr device_data_; + std::unique_ptr data_; std::string name_; @@ -54,8 +52,8 @@ class ReplicatedMatrix ReplicatedMatrix(const std::string name, const int n); // construct diagonal matrix from diagonal values - ReplicatedMatrix(const std::string name, const double* const diagonal, - const int m, const int n); + ReplicatedMatrix( + const std::string name, const double* const diagonal, const int m); ReplicatedMatrix(const ReplicatedMatrix&); @@ -63,7 +61,7 @@ class ReplicatedMatrix std::string name() { return name_; } - double* const data() const { return device_data_.get(); } + double* data() const { return data_.get(); } int m() const { return dim_; } @@ -78,6 +76,8 @@ class ReplicatedMatrix } ReplicatedMatrix& operator=(const ReplicatedMatrix& rhs); + void assign(const double* const src, const int ld); + void assign(const ReplicatedMatrix& src, const int ib, const int jb); template @@ -126,6 +126,7 @@ class ReplicatedMatrix int iamax(const int j, double& val); double norm(char ty); double traceProduct(const ReplicatedMatrix&) const; + void shift(const double); void print( std::ostream& os, const int, const int, const int, const int) const; @@ -138,5 +139,3 @@ class ReplicatedMatrix void rotateSym(ReplicatedMatrix&, const ReplicatedMatrix&, ReplicatedMatrix&); #endif - -#endif diff --git a/src/ReplicatedMatrix2SquareLocalMatrices.cc b/src/ReplicatedMatrix2SquareLocalMatrices.cc new file mode 100644 index 00000000..3a2b0855 --- /dev/null +++ b/src/ReplicatedMatrix2SquareLocalMatrices.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "ReplicatedMatrix2SquareLocalMatrices.h" + +ReplicatedMatrix2SquareLocalMatrices* + ReplicatedMatrix2SquareLocalMatrices::pinstance_ + = nullptr; +std::vector> + ReplicatedMatrix2SquareLocalMatrices::global_indexes_; + +Timer ReplicatedMatrix2SquareLocalMatrices::convert_tm_( + "ReplicatedMatrix2SquareLocalMatrices::convert"); + +void ReplicatedMatrix2SquareLocalMatrices::convert(const ReplicatedMatrix& rmat, + SquareLocalMatrices& lmat) +{ + convert_tm_.start(); + + const short nd = lmat.nmat(); + const int dim = lmat.n(); + const int nst = rmat.ld(); + + for (short i = 0; i < nd; i++) + { + double* dst = lmat.getSubMatrix(i); + double* src = rmat.data(); + for (int jj = 0; jj < dim; jj++) + { + const int st2 = global_indexes_[i][jj]; + if (st2 != -1) + { + for (int ii = 0; ii < dim; ii++) + { + const int st1 = global_indexes_[i][ii]; + if (st1 != -1) + { + dst[ii + dim * jj] = src[st1 + nst * st2]; + } + } + } + } + } + + convert_tm_.stop(); +} diff --git a/src/ReplicatedMatrix2SquareLocalMatrices.h b/src/ReplicatedMatrix2SquareLocalMatrices.h new file mode 100644 index 00000000..b3e05005 --- /dev/null +++ b/src/ReplicatedMatrix2SquareLocalMatrices.h @@ -0,0 +1,53 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_ReplicatedMatrix2SquareLocalMatrices_H +#define MGMOL_ReplicatedMatrix2SquareLocalMatrices_H + +#include "ReplicatedMatrix.h" +#include "SquareLocalMatrices.h" +#include "Timer.h" + +#include +#include + +class ReplicatedMatrix2SquareLocalMatrices +{ + static ReplicatedMatrix2SquareLocalMatrices* pinstance_; + + static Timer convert_tm_; + + static std::vector> global_indexes_; + +public: + static ReplicatedMatrix2SquareLocalMatrices* instance() + { + if (pinstance_ == nullptr) + { + pinstance_ = new ReplicatedMatrix2SquareLocalMatrices(); + } + return pinstance_; + } + + ReplicatedMatrix2SquareLocalMatrices() {} + + static void setup(const std::vector>& gids) + { + global_indexes_ = gids; + } + + ~ReplicatedMatrix2SquareLocalMatrices() {} + + void convert(const ReplicatedMatrix& dmat, + SquareLocalMatrices& lmat); + + static void printTimers(std::ostream& os) { convert_tm_.print(os); } +}; + +#endif diff --git a/src/ReplicatedVector.cc b/src/ReplicatedVector.cc index 85fdb69e..f5e07ff0 100644 --- a/src/ReplicatedVector.cc +++ b/src/ReplicatedVector.cc @@ -6,92 +6,132 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifdef HAVE_MAGMA - #include "ReplicatedVector.h" #include "memory_space.h" +#ifdef HAVE_MAGMA #include "magma_v2.h" +#else +#include "blas2_c.h" +#endif +#ifdef HAVE_MAGMA using MemoryDev = MemorySpace::Memory; +#else +using MemoryDev = MemorySpace::Memory; +#endif -ReplicatedVector::ReplicatedVector(const std::string name, const int n) - : dim_(n), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) +ReplicatedVector::ReplicatedVector(const int n) + : dim_(n), data_(MemoryDev::allocate(dim_), MemoryDev::free) { } ReplicatedVector::ReplicatedVector(const ReplicatedVector& v) - : dim_(v.dim_), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) + : dim_(v.dim_), data_(MemoryDev::allocate(dim_), MemoryDev::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopy(dim_, v.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_dcopy(dim_, v.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), v.data_.get(), dim_ * sizeof(double)); +#endif } ReplicatedVector::ReplicatedVector(const std::vector& v) - : dim_(v.size()), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) + : dim_(v.size()), data_(MemoryDev::allocate(dim_), MemoryDev::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector( - dim_, v.data(), 1, device_data_.get(), 1, magma_singleton.queue_); + magma_dsetvector(dim_, v.data(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), v.data(), dim_ * sizeof(double)); +#endif } ReplicatedVector& ReplicatedVector::operator=(const ReplicatedVector& src) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopy(dim_, src.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_dcopy( + dim_, src.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), src.data_.get(), dim_ * sizeof(double)); +#endif return *this; } void ReplicatedVector::axpy(const double alpha, const ReplicatedVector& x) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_daxpy(dim_, alpha, x.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_daxpy( + dim_, alpha, x.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + double a = alpha; + DAXPY(&dim_, &a, x.data_.get(), &ione, data_.get(), &ione); +#endif } void ReplicatedVector::clear() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector zero(dim_, 0.); magma_dsetvector( - dim_, zero.data(), 1, device_data_.get(), 1, magma_singleton.queue_); + dim_, zero.data(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memset(data_.get(), 0., dim_ * sizeof(double)); +#endif } double ReplicatedVector::nrm2() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - return magma_dnrm2(dim_, device_data_.get(), 1, magma_singleton.queue_); + return magma_dnrm2(dim_, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + return DNRM2(&dim_, data_.get(), &ione); +#endif } double ReplicatedVector::dot(const ReplicatedVector& v) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - return magma_ddot(dim_, device_data_.get(), 1, v.device_data_.get(), 1, - magma_singleton.queue_); + return magma_ddot( + dim_, data_.get(), 1, v.data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + return DDOT(&dim_, data_.get(), &ione, v.data_.get(), &ione); +#endif } void ReplicatedVector::gemv(const char trans, const double alpha, const ReplicatedMatrix& a, const ReplicatedVector& b, const double beta) { +#ifdef HAVE_MAGMA magma_trans_t magma_trans = magma_trans_const(trans); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dgemv(magma_trans, dim_, dim_, alpha, a.device_data_.get(), a.ld_, - b.device_data_.get(), 1, beta, device_data_.get(), 1, - magma_singleton.queue_); -} - + magmablas_dgemv(magma_trans, dim_, dim_, alpha, a.data_.get(), a.ld_, + b.data_.get(), 1, beta, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + int lda = a.ld_; + DGEMV(&trans, &dim_, &dim_, &alpha, a.data_.get(), &lda, b.data_.get(), + &ione, &beta, data_.get(), &ione); #endif +} diff --git a/src/ReplicatedVector.h b/src/ReplicatedVector.h index 4140a73b..a9c03e68 100644 --- a/src/ReplicatedVector.h +++ b/src/ReplicatedVector.h @@ -9,8 +9,6 @@ #ifndef MGMOL_REPLICATEDVECTOR_H #define MGMOL_REPLICATEDVECTOR_H -#ifdef HAVE_MAGMA - #include "ReplicatedMatrix.h" #include @@ -19,14 +17,14 @@ class ReplicatedVector { int dim_; - std::unique_ptr device_data_; + std::unique_ptr data_; public: - ReplicatedVector(const std::string name, const int n); + ReplicatedVector(const int n); ReplicatedVector(const ReplicatedVector&); ReplicatedVector(const std::vector&); ReplicatedVector& operator=(const ReplicatedVector&); - double* data() { return device_data_.get(); } + double* data() { return data_.get(); } void clear(); double dot(const ReplicatedVector& v); double nrm2(); @@ -38,6 +36,5 @@ class ReplicatedVector void gemm(const char transa, const char transb, const double alpha, const ReplicatedMatrix&, const ReplicatedVector&, const double beta); }; -#endif #endif diff --git a/src/ReplicatedWorkSpace.cc b/src/ReplicatedWorkSpace.cc index b6b5cb83..da76e2f7 100644 --- a/src/ReplicatedWorkSpace.cc +++ b/src/ReplicatedWorkSpace.cc @@ -12,6 +12,7 @@ #include "MGmol_MPI.h" #include "MGmol_blas1.h" +#include #include template @@ -54,13 +55,14 @@ void ReplicatedWorkSpace::initSquareMatrix( distmat.allgather(square_matrix_, ndim_); } -#ifdef HAVE_MAGMA template void ReplicatedWorkSpace::initSquareMatrix( const ReplicatedMatrix& mat) { - mat.get(square_matrix_, ndim_ * ndim_); + assert(square_matrix_ != nullptr); + assert(ndim_ > 0); + + mat.get(square_matrix_, ndim_); } -#endif template class ReplicatedWorkSpace; diff --git a/src/ReplicatedWorkSpace.h b/src/ReplicatedWorkSpace.h index 918481f9..c4d66f59 100644 --- a/src/ReplicatedWorkSpace.h +++ b/src/ReplicatedWorkSpace.h @@ -40,7 +40,11 @@ class ReplicatedWorkSpace } ReplicatedWorkSpace(const ReplicatedWorkSpace&); - ~ReplicatedWorkSpace() { delete[] square_matrix_; } + ~ReplicatedWorkSpace() + { + delete[] square_matrix_; + square_matrix_ = nullptr; + } public: Timer mpisum_tm() { return mpisum_tm_; } @@ -70,9 +74,7 @@ class ReplicatedWorkSpace void setUpperTriangularSquareMatrixToZero(); void initSquareMatrix(const dist_matrix::DistMatrix& tmat); -#ifdef HAVE_MAGMA void initSquareMatrix(const ReplicatedMatrix& mat); -#endif int getDim() { return ndim_; } }; diff --git a/src/Rho.cc b/src/Rho.cc index e80e74fe..7b104573 100644 --- a/src/Rho.cc +++ b/src/Rho.cc @@ -302,7 +302,7 @@ void Rho::computeRho( { proj_matrices.updateSubMatX(); - if (std::is_same::value) + if (std::is_same>::value) { SquareLocalMatrices& localX( (orbitals.projMatrices())->getLocalX()); @@ -569,33 +569,34 @@ void Rho::printTimers(std::ostream& os) compute_blas_tm_.print(os); } -template class Rho; -template class Rho; +template class Rho>; +template class Rho>; -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const double* const func) const; -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const double* const func) const; -template void -Rho::computeRho>( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, - const dist_matrix::DistMatrix&, +template void Rho>::computeRho< + dist_matrix::DistMatrix>(ExtendedGridOrbitals&, + ExtendedGridOrbitals&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&); +template void Rho>::computeRho< + dist_matrix::DistMatrix>( + ExtendedGridOrbitals&, const dist_matrix::DistMatrix&); template void -Rho::computeRho>( - ExtendedGridOrbitals&, const dist_matrix::DistMatrix&); -template void Rho::computeRho>( - LocGridOrbitals&, const dist_matrix::DistMatrix&); +Rho>::computeRho>( + LocGridOrbitals&, const dist_matrix::DistMatrix&); #ifdef MGMOL_USE_MIXEDP -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const float* const func) const; #endif -#ifdef HAVE_MAGMA -template void Rho::computeRho( - ExtendedGridOrbitals&, const ReplicatedMatrix&); -template void Rho::computeRho( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, const ReplicatedMatrix&, - const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&); -#endif +template void Rho>::computeRho( + ExtendedGridOrbitals&, const ReplicatedMatrix&); +template void Rho>::computeRho( + ExtendedGridOrbitals&, ExtendedGridOrbitals&, + const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&, + const ReplicatedMatrix&); +template void Rho>::computeRho( + LocGridOrbitals&, const ReplicatedMatrix&); diff --git a/src/SP2.cc b/src/SP2.cc index fa007212..6ff7625a 100644 --- a/src/SP2.cc +++ b/src/SP2.cc @@ -231,7 +231,6 @@ void SP2::getDM(dist_matrix::DistMatrix& submatM, // output getdm_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void SP2::getDM(ReplicatedMatrix& submatM, // output const ReplicatedMatrix& invS) @@ -247,4 +246,3 @@ void SP2::getDM(ReplicatedMatrix& submatM, // output getdm_tm_.stop(); } -#endif diff --git a/src/SinCosOps.cc b/src/SinCosOps.cc index 85f4ea79..cd3f7a8c 100644 --- a/src/SinCosOps.cc +++ b/src/SinCosOps.cc @@ -39,7 +39,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -64,7 +64,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) MemorySpace::Memory::copy_view_to_host( orbitals.psi(0), size_psi, psi_view); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -153,7 +153,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -200,7 +200,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -274,7 +274,7 @@ void SinCosOps::computeSquare1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -298,7 +298,7 @@ void SinCosOps::computeSquare1D( } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -366,7 +366,7 @@ void SinCosOps::compute1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -389,7 +389,7 @@ void SinCosOps::compute1D( const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -466,7 +466,7 @@ void SinCosOps::computeDiag2states( color_st[ic] = orbitals.getColor(st[ic]); } - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -487,7 +487,7 @@ void SinCosOps::computeDiag2states( const short mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -558,7 +558,7 @@ void SinCosOps::compute2states( int n2 = 4; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -578,7 +578,7 @@ void SinCosOps::compute2states( const int mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -656,7 +656,7 @@ void SinCosOps::compute( const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals1.subdivx_; + int loc_length = dim0 / orbitals1.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -671,9 +671,8 @@ void SinCosOps::compute( vector cosz; grid.getSinCosFunctions(sinx, siny, sinz, cosx, cosy, cosz); - for (short iloc = 0; iloc < orbitals1.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals1.subdivx(); iloc++) { - for (int color = 0; color < orbitals1.chromatic_number(); color++) { int i = orbitals1.overlapping_gids_[iloc][color]; @@ -741,7 +740,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -768,7 +767,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (short icolor = 0; icolor < size; icolor++) { @@ -830,5 +829,5 @@ void SinCosOps::computeDiag(const T& orbitals, compute_tm_.stop(); } -template class SinCosOps; -template class SinCosOps; +template class SinCosOps>; +template class SinCosOps>; diff --git a/src/SpreadPenalty.cc b/src/SpreadPenalty.cc index d3979f5c..3e13f31e 100644 --- a/src/SpreadPenalty.cc +++ b/src/SpreadPenalty.cc @@ -317,5 +317,5 @@ double SpreadPenalty::evaluateEnergy(const T& phi) return alpha_ * total_energy; } -template class SpreadPenalty; -template class SpreadPenalty; +template class SpreadPenalty>; +template class SpreadPenalty>; diff --git a/src/SpreadPenaltyVolume.cc b/src/SpreadPenaltyVolume.cc index 334c3455..24ad725f 100644 --- a/src/SpreadPenaltyVolume.cc +++ b/src/SpreadPenaltyVolume.cc @@ -307,5 +307,5 @@ double SpreadPenaltyVolume::evaluateEnergy(const T& phi) return 0.; } -template class SpreadPenaltyVolume; -template class SpreadPenaltyVolume; +template class SpreadPenaltyVolume>; +template class SpreadPenaltyVolume>; diff --git a/src/SpreadsAndCenters.cc b/src/SpreadsAndCenters.cc index d06c54e7..cfe5335c 100644 --- a/src/SpreadsAndCenters.cc +++ b/src/SpreadsAndCenters.cc @@ -545,5 +545,5 @@ void SpreadsAndCenters::computeSinCosDiag( mat, orbitals.getAllOverlappingGids(), orbitals.getLocalGids()); } -template class SpreadsAndCenters; -template class SpreadsAndCenters; +template class SpreadsAndCenters>; +template class SpreadsAndCenters>; diff --git a/src/SubspaceProjector.cc b/src/SubspaceProjector.cc index fe412085..67702cb4 100644 --- a/src/SubspaceProjector.cc +++ b/src/SubspaceProjector.cc @@ -82,5 +82,5 @@ void SubspaceProjector::projectOut( orbitals.incrementIterativeIndex(); } -template class SubspaceProjector; -template class SubspaceProjector; +template class SubspaceProjector>; +template class SubspaceProjector>; diff --git a/src/computeHij.cc b/src/computeHij.cc index 75b8ef86..5abd2a2c 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -29,53 +29,23 @@ #include "SquareSubMatrix2DistMatrix.h" template <> -void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, - LocGridOrbitals& orbitalsj, VariableSizeMatrix& mat) -{ - computeHij_tm_.start(); - -#if DEBUG - os_ << " addHlocal2matrix()" << endl; -#endif - - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, mat, true); - - computeHij_tm_.stop(); -} - template <> -template <> -void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, - LocGridOrbitals& orbitalsj, dist_matrix::DistMatrix& H) -{ - computeHij_tm_.start(); - -#if DEBUG - os_ << " addHlocal2matrix()" << endl; -#endif - - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, H); - - computeHij_tm_.stop(); -} - -template <> -template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi_i, const KBPsiMatrixSparse* const kbpsi_j, VariableSizeMatrix& mat, const bool consolidate) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif // compute phi_i^T*Hnl*Phi_j kbpsi_i->computeHvnlMatrix(kbpsi_j, ions, mat); // add local Hamiltonian part to phi_i^T*H*phi_j - addHlocal2matrix(orbitals_i, orbitals_j, mat); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, mat, true); // sum matrix elements among processors if (consolidate) @@ -103,19 +73,20 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, } template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, VariableSizeMatrix& mat, const bool consolidate) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif kbpsi->computeHvnlMatrix(ions, mat); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, mat); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, mat, true); // sum matrix elements among processors if (consolidate) @@ -162,7 +133,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, dist_matrix::DistMatrix& hij) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif hij.clear(); @@ -173,13 +144,14 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, ss2dm->accumulate(submat, hij, 0.); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, hij); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij, true); } template <> template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, const KBPsiMatrixSparse* const kbpsi_j, dist_matrix::DistMatrix& hij, const bool consolidate) @@ -191,8 +163,9 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, template <> template <> -void MGmol::computeHij(ExtendedGridOrbitals& orbitals_i, - ExtendedGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + ExtendedGridOrbitals& orbitals_i, + ExtendedGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, const KBPsiMatrixSparse* const kbpsi_j, dist_matrix::DistMatrix& hij, const bool consolidate) @@ -219,7 +192,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, const KBPsiMatrixSparse* const kbpsi, dist_matrix::DistMatrix& hij) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line" << __LINE__ << std::endl; #endif SquareSubMatrix submat(kbpsi->computeHvnlMatrix(ions)); @@ -228,7 +201,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, ss2dm->accumulate(submat, hij, 0.); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, hij); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij, false); } template @@ -238,7 +211,7 @@ void MGmol::computeHij(OrbitalsType& orbitals_i, ProjectedMatricesInterface* projmatrices) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif kbpsi->computeHvnlMatrix(ions, projmatrices); @@ -247,20 +220,6 @@ void MGmol::computeHij(OrbitalsType& orbitals_i, hamiltonian_->addHlocalij(orbitals_i, orbitals_j, projmatrices); } -template -void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, - OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, - ProjectedMatricesInterface* projmatrices, - dist_matrix::DistMatrix& hij) -{ - kbpsi->computeAll(ions, orbitals_i); - - computeHij(orbitals_i, orbitals_j, ions, kbpsi, hij, true); - - projmatrices->setHiterativeIndex(orbitals_j.getIterativeIndex(), - hamiltonian_->potential().getIterativeIndex()); -} - template void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, @@ -276,13 +235,6 @@ void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, hamiltonian_->potential().getIterativeIndex()); } -template -void MGmol::getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions, - KBPsiMatrixSparse* kbpsi, dist_matrix::DistMatrix& hij) -{ - getKBPsiAndHij(orbitals, orbitals, ions, kbpsi, proj_matrices_.get(), hij); -} - template void MGmol::getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions) { @@ -296,7 +248,7 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, { // H_nl #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHnlPhiAndAdd2HPhi()" << endl; + if (onpe0) os_ << "computeHnlPhiAndAdd2HPhi()" << std::endl; #endif Control& ct = *(Control::instance()); @@ -334,8 +286,8 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, memory_space_type>::copy_view_to_host(hpsi, numpt, hpsi_host_view); - LinearAlgebraUtils::MPaxpy( - numpt, 1., work.data() + numpt * icolor, hpsi_host_view); + LinearAlgebraUtils::MPaxpy(numpt, + (ORBDTYPE)1., work.data() + numpt * icolor, hpsi_host_view); MemorySpace::Memory::copy_view_to_dev(hpsi_host_view, numpt, @@ -360,7 +312,7 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, hpsi_host_view); LinearAlgebraUtils::MPaxpy( - numpt, 1., hnl, hpsi_host_view); + numpt, (ORBDTYPE)1., hnl, hpsi_host_view); MemorySpace::Memory::copy_view_to_dev(hpsi_host_view, @@ -376,30 +328,6 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, hphi.setIterativeIndex(phi.getIterativeIndex()); } -template -template -void MGmol::addHlocal2matrix( - OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, MatrixType& mat) -{ - computeHij_tm_.start(); - -#if DEBUG - os_ << " addHlocal2matrix()" << endl; -#endif - - // add local H to mat - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, mat); - - computeHij_tm_.stop(); -} - -template -void MGmol::getHpsiAndTheta( - Ions& ions, OrbitalsType& phi, OrbitalsType& hphi) -{ - getHpsiAndTheta(ions, phi, hphi, g_kbpsi_.get()); -} - template void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, OrbitalsType& hphi, const KBPsiMatrixSparse* const kbpsi) @@ -410,11 +338,11 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, const int phi_it_index = phi.getIterativeIndex(); -#if DEBUG - os_ << " getHpsiAndTheta" << endl; +#ifdef PRINT_OPERATIONS + os_ << " getHpsiAndTheta" << std::endl; #endif - hphi.assign(hamiltonian_->applyLocal(phi)); + hamiltonian_->applyLocal(phi.chromatic_number(), phi, hphi); // Compute "nstates" columns of matrix // Hij = phi**T * H_loc * phi and save in sh @@ -423,13 +351,14 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, { #ifdef PRINT_OPERATIONS if (onpe0) - os_ << "Hij matrix up to date, no computation necessary" << endl; + os_ << "Hij matrix up to date, no computation necessary" + << std::endl; #endif } else { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "build matrix Hij = Phi**T * H * Phi" << endl; + if (onpe0) os_ << "build matrix Hij = Phi**T * H * Phi" << std::endl; #endif proj_matrices_->clearSparseH(); @@ -438,7 +367,13 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, kbpsi->computeHvnlMatrix(ions, proj_matrices_.get()); // add local part of H to sh - hamiltonian_->addHlocalij(phi, proj_matrices_.get()); + SquareLocalMatrices slm( + phi.subdivx(), phi.chromatic_number()); + + phi.computeLocalProduct(hphi, slm); + proj_matrices_->setLocalMatrixElementsHl(slm); + + proj_matrices_->consolidateH(); energy_->saveVofRho(); @@ -454,14 +389,5 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, get_Hpsi_and_Hij_tm_.stop(); } -template class MGmol; -template class MGmol; - -template void MGmol::addHlocal2matrix( - ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, - dist_matrix::DistMatrix&); -#ifdef HAVE_MAGMA -template void MGmol::addHlocal2matrix( - ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, - ReplicatedMatrix& mat); -#endif +template class MGmol>; +template class MGmol>; diff --git a/src/global.h b/src/global.h index a23e4e3b..a9176b60 100644 --- a/src/global.h +++ b/src/global.h @@ -23,20 +23,13 @@ typedef double ORBDTYPE; /* lmasktype sets the data type for the mask coeffs */ typedef ORBDTYPE lmasktype; -// typedef float lmasktype; typedef double RHODTYPE; -// typedef float RHODTYPE; typedef double MATDTYPE; -typedef float MGPRECONDTYPE; - typedef double POTDTYPE; -// typedef float POTDTYPE; - -typedef ORBDTYPE KBPROJDTYPE; -typedef float POISSONPRECONDTYPE; +typedef double KBPROJDTYPE; #endif diff --git a/src/lbfgsrlx.cc b/src/lbfgsrlx.cc index ce05c1a3..20326a94 100644 --- a/src/lbfgsrlx.cc +++ b/src/lbfgsrlx.cc @@ -138,7 +138,7 @@ void MGmol::lbfgsrlx(OrbitalsType** orbitals, Ions& ions) } } -template void MGmol::lbfgsrlx( - LocGridOrbitals** orbitals, Ions& ions); -template void MGmol::lbfgsrlx( - ExtendedGridOrbitals** orbitals, Ions& ions); +template void MGmol>::lbfgsrlx( + LocGridOrbitals** orbitals, Ions& ions); +template void MGmol>::lbfgsrlx( + ExtendedGridOrbitals** orbitals, Ions& ions); diff --git a/src/linear_algebra/lapack_c.h b/src/linear_algebra/lapack_c.h index 4a72f48f..3f275c84 100644 --- a/src/linear_algebra/lapack_c.h +++ b/src/linear_algebra/lapack_c.h @@ -13,6 +13,8 @@ #include "fc_mangle.h" typedef const char* const Pchar; +typedef const int* const Pint; +typedef const double* const Pdouble; #define dsygst DSYGST #define dtrtrs DTRTRS @@ -24,15 +26,15 @@ extern "C" { void DSYEV(Pchar, Pchar, const int* const, double*, const int* const, double*, double*, const int* const, int*); - void dsygv(const int* const, Pchar, Pchar, const int* const, double*, + void DSYGV(const int* const, Pchar, Pchar, const int* const, double*, const int* const, double*, const int* const, double*, double*, const int* const, int*); void DPOTRI(Pchar, const int* const, double*, const int* const, int*); void DPOTRF(Pchar, const int* const, double*, const int* const, int*); void DPOTRS(Pchar, const int* const, const int* const, double*, const int* const, double*, const int* const, int*); - void dgetrf(int*, int*, double*, int*, int*, int*); - void dgetrs(Pchar, int*, int*, double*, int*, int*, double*, int*, int*); + void DGETRF(int*, int*, double*, int*, int*, int*); + void DGETRS(Pchar, int*, int*, double*, int*, int*, double*, int*, int*); void dpocon(Pchar, const int* const, double*, const int* const, double*, double*, double*, const int* const, int*); void dtrtrs(Pchar, Pchar, Pchar, const int* const, const int* const, @@ -43,6 +45,7 @@ extern "C" void dgesvd(Pchar, Pchar, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*); double dlange(Pchar, int*, int*, double*, int*, double*); + void DLACPY(Pchar, Pint, Pint, Pdouble, Pint, Pdouble, Pint); } #endif diff --git a/src/linear_algebra/mputils.cc b/src/linear_algebra/mputils.cc index 487d1367..b49ba366 100644 --- a/src/linear_algebra/mputils.cc +++ b/src/linear_algebra/mputils.cc @@ -38,9 +38,14 @@ Timer bligemm_tm("bligemm"); Timer dsyrk_tm("dsyrk"); Timer ssyrk_tm("ssyrk"); -Timer mpdot_tm("mpdot"); Timer ttdot_tm("ttdot"); +// Timers for hand written loops +Timer loopdot_tm("loopdot"); +Timer loopaxpy_tm("loopaxpy"); +Timer loopscal_tm("loopscal"); +Timer loopcp_tm("loopcp"); + /* Function definitions. See mputils.h for comments */ using LAU_H = LinearAlgebraUtils; @@ -69,6 +74,8 @@ void LAU_H::MPscal(const int len, const double scal, double* dptr) template <> void LAU_H::MPscal(const int len, const double scal, float* dptr) { + loopscal_tm.start(); + MemorySpace::assert_is_host_ptr(dptr); if (scal == 1.) @@ -85,6 +92,8 @@ void LAU_H::MPscal(const int len, const double scal, float* dptr) dptr[k] = static_cast(scal * val); } } + + loopscal_tm.stop(); } // MemorySpace::Device @@ -158,7 +167,7 @@ double LAU_H::MPdot( MemorySpace::assert_is_host_ptr(xptr); MemorySpace::assert_is_host_ptr(yptr); - mpdot_tm.start(); + loopdot_tm.start(); double dot = 0.; for (int k = 0; k < len; k++) @@ -168,7 +177,7 @@ double LAU_H::MPdot( dot += val1 * val2; } - mpdot_tm.stop(); + loopdot_tm.stop(); return dot; } @@ -227,6 +236,7 @@ double LAU_D::MPdot( /////////////////////////////// // MemorySpace::Host template <> +template <> void LAU_H::MPaxpy(const int len, double scal, const double* __restrict__ xptr, double* __restrict__ yptr) { @@ -238,10 +248,24 @@ void LAU_H::MPaxpy(const int len, double scal, const double* __restrict__ xptr, } template <> -template -void LAU_H::MPaxpy(const int len, double scal, const T1* __restrict__ xptr, - T2* __restrict__ yptr) +template <> +void LAU_H::MPaxpy(const int len, float scal, const float* __restrict__ xptr, + float* __restrict__ yptr) +{ + MemorySpace::assert_is_host_ptr(xptr); + MemorySpace::assert_is_host_ptr(yptr); + + const int one = 1; + SAXPY(&len, &scal, xptr, &one, yptr, &one); +} + +template <> +template +void LAU_H::MPaxpy( + const int len, T0 scal, const T1* __restrict__ xptr, T2* __restrict__ yptr) { + loopaxpy_tm.start(); + MemorySpace::assert_is_host_ptr(xptr); MemorySpace::assert_is_host_ptr(yptr); #pragma omp parallel for simd @@ -249,6 +273,8 @@ void LAU_H::MPaxpy(const int len, double scal, const T1* __restrict__ xptr, { yptr[k] += static_cast(scal * static_cast(xptr[k])); } + + loopaxpy_tm.stop(); } // MemorySpace::Device @@ -793,14 +819,22 @@ void MPcpy(float* const dest, const float* const src, const int n) void MPcpy( double* __restrict__ dest, const float* __restrict__ src, const int n) { + loopcp_tm.start(); + for (int i = 0; i < n; i++) dest[i] = src[i]; + + loopcp_tm.stop(); } void MPcpy( float* __restrict__ dest, const double* __restrict__ src, const int n) { + loopcp_tm.start(); + for (int i = 0; i < n; i++) dest[i] = src[i]; + + loopcp_tm.stop(); } template void LAU_H::MPgemm(const char transa, @@ -845,10 +879,15 @@ template double LAU_H::MPdot( const int len, const double* const xptr, const float* const yptr); template double LAU_H::MPdot( const int len, const float* const xptr, const double* const yptr); -template void LAU_H::MPaxpy(const int len, const double scal, - const float* __restrict__ xptr, double* __restrict__ yptr); -template void LAU_H::MPaxpy(const int len, const double scal, - const float* __restrict__ xptr, float* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const double scal, const float* __restrict__ xptr, + double* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const float scal, const float* __restrict__ xptr, + double* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const double scal, const float* __restrict__ xptr, + float* __restrict__ yptr); template void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const double* const a, diff --git a/src/linear_algebra/mputils.h b/src/linear_algebra/mputils.h index fa838f17..d3f65afe 100644 --- a/src/linear_algebra/mputils.h +++ b/src/linear_algebra/mputils.h @@ -119,10 +119,8 @@ struct LinearAlgebraUtils /* mixed-precision vector times scalar plus vector. Accumulates results * in double precision and stores in single precision. */ - static void MPaxpy(const int len, double scal, - const double* __restrict__ xptr, double* __restrict__ yptr); - template - static void MPaxpy(const int len, double scal, const T1* __restrict__ xptr, + template + static void MPaxpy(const int len, T0 scal, const T1* __restrict__ xptr, T2* __restrict__ yptr); static void MPsyrk(const char uplo, const char trans, const int n, diff --git a/src/local_matrices/LocalMatrices.h b/src/local_matrices/LocalMatrices.h index d2aacf8f..615cfab1 100644 --- a/src/local_matrices/LocalMatrices.h +++ b/src/local_matrices/LocalMatrices.h @@ -71,7 +71,7 @@ class LocalMatrices int m() const { return m_; } - const DataType* getSubMatrix(const int iloc = 0) const + DataType* getSubMatrix(const int iloc = 0) const { assert(iloc < (int)ptr_matrices_.size()); assert(ptr_matrices_[iloc] != NULL); diff --git a/src/main.cc b/src/main.cc index 621b72de..7b69aeb3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -96,11 +96,13 @@ int main(int argc, char** argv) { MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - coords_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, coords_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - coords_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, coords_filename, lrs_filename, + constraints_filename); mgmol->setup(); diff --git a/src/manage_memory.cc b/src/manage_memory.cc index 94e0b278..f118fd6d 100644 --- a/src/manage_memory.cc +++ b/src/manage_memory.cc @@ -12,7 +12,7 @@ // Increase memory slots in BlockVector as needed based on runtime // options -template +template void increaseMemorySlotsForOrbitals() { Control& ct = *(Control::instance()); @@ -22,18 +22,18 @@ void increaseMemorySlotsForOrbitals() case OuterSolverType::ABPG: { // r_k-1, phi_k-1 - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } case OuterSolverType::PolakRibiere: { // r_k-1, z_k, z_k-1, p_k - BlockVector::incMaxAllocInstances(4); + BlockVector::incMaxAllocInstances(4); break; } case OuterSolverType::Davidson: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } default: @@ -44,17 +44,17 @@ void increaseMemorySlotsForOrbitals() { case WFExtrapolationType::Reversible: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } case WFExtrapolationType::Order2: { - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); break; } case WFExtrapolationType::Order3: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } default: @@ -62,21 +62,21 @@ void increaseMemorySlotsForOrbitals() } for (short i = 1; i < ct.wf_m; i++) - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); if (ct.use_kernel_functions) - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); switch (ct.AtomsDynamic()) { case AtomsDynamicType::LBFGS: - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); break; default: break; } } -template void increaseMemorySlotsForOrbitals(); +template void increaseMemorySlotsForOrbitals(); #ifdef HAVE_MAGMA -template void increaseMemorySlotsForOrbitals(); +template void increaseMemorySlotsForOrbitals(); #endif diff --git a/src/manage_memory.h b/src/manage_memory.h index 4e4c584c..864fed55 100644 --- a/src/manage_memory.h +++ b/src/manage_memory.h @@ -6,5 +6,5 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -template +template void increaseMemorySlotsForOrbitals(); diff --git a/src/md.cc b/src/md.cc index 06a45e51..cc5d1d01 100644 --- a/src/md.cc +++ b/src/md.cc @@ -26,7 +26,6 @@ #include "Mesh.h" #include "OrbitalsExtrapolation.h" #include "OrbitalsExtrapolationFactory.h" -#include "OrbitalsPreconditioning.h" #include "Potentials.h" #include "ProjectedMatricesMehrstellen.h" #include "ProjectedMatricesSparse.h" @@ -412,7 +411,7 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) if (ROM_MVP) { if (onpe0) os_ << "Setup ROM MVP solver..." << std::endl; - ExtendedGridOrbitals** extended_orbitals = reinterpret_cast(orbitals); + ExtendedGridOrbitals** extended_orbitals = reinterpret_cast**>(orbitals); (*extended_orbitals)->set(ct.getROMOptions().basis_file, ct.numst); (*extended_orbitals)->orthonormalizeLoewdin(); (*extended_orbitals)->setDataWithGhosts(true); @@ -856,5 +855,5 @@ void MGmol::loadRestartFile(const std::string filename) return; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/mlwf.cc b/src/mlwf.cc index 3efdfeb2..dc920134 100644 --- a/src/mlwf.cc +++ b/src/mlwf.cc @@ -339,5 +339,5 @@ int MGmol::get_NOLMO(NOLMOTransform& noot, OrbitalsType& orbitals, return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/pb/GridFunc.cc b/src/pb/GridFunc.cc index c8bcf1df..f883d0d7 100644 --- a/src/pb/GridFunc.cc +++ b/src/pb/GridFunc.cc @@ -521,7 +521,7 @@ void GridFunc::scal(const double alpha) } template -void GridFunc::axpy(const double alpha, const GridFunc& vv) +void GridFunc::axpy(const T alpha, const GridFunc& vv) { assert(vv.grid_.sizeg() == grid_.sizeg()); diff --git a/src/pb/GridFunc.h b/src/pb/GridFunc.h index 699ed294..ab72c86d 100644 --- a/src/pb/GridFunc.h +++ b/src/pb/GridFunc.h @@ -192,7 +192,7 @@ class GridFunc : public GridFuncInterface GridFunc& operator/=(const GridFunc& B); - void axpy(const double alpha, const GridFunc& vv); + void axpy(const T alpha, const GridFunc& vv); void scal(const double alpha); void prod(const GridFunc& A, const GridFunc& B); void diff(const GridFunc& A, const GridFunc& B); diff --git a/src/pb/GridFuncVector.cc b/src/pb/GridFuncVector.cc index 64ffd867..5c3fbda5 100644 --- a/src/pb/GridFuncVector.cc +++ b/src/pb/GridFuncVector.cc @@ -1643,14 +1643,14 @@ void GridFuncVector::extend3D( template GridFuncVector& GridFuncVector::operator-=( - const GridFuncVector& func) + const GridFuncVector& func) { assert(func.grid_.sizeg() == grid_.sizeg()); assert(func.grid_.ghost_pt() == grid_.ghost_pt()); assert(this != &func); - LinearAlgebraUtils::MPaxpy( - nfunc_ * grid_.sizeg(), -1., func.memory_.get(), memory_.get()); + LinearAlgebraUtils::MPaxpy(nfunc_ * grid_.sizeg(), + (ScalarType)(-1.), func.memory_.get(), memory_.get()); updated_boundaries_ = (func.updated_boundaries_ && updated_boundaries_); @@ -1658,8 +1658,23 @@ GridFuncVector::operator-=( } template -void GridFuncVector::axpy( - const double alpha, const GridFuncVector& func) +template +void GridFuncVector::copyFrom( + const GridFuncVector& src) +{ + copy_tm_.start(); + + MPcpy(memory_.get(), src.getDataPtr(0), nfunc_ * grid_.sizeg()); + + updated_boundaries_ = src.getUpdatedBoundariesFlag(); + + copy_tm_.stop(); +} + +template +template +void GridFuncVector::axpy(const ScalarType2 alpha, + const GridFuncVector& func) { LinearAlgebraUtils::MPaxpy( nfunc_ * grid_.sizeg(), alpha, func.memory_.get(), memory_.get()); @@ -2419,7 +2434,7 @@ void GridFuncVector::jacobi(const int type, { applyLap(type, w); w -= B; - axpy(-1. * jacobiFactor, w); + axpy((ScalarType)(-1. * jacobiFactor), w); set_updated_boundaries(false); } @@ -2459,6 +2474,16 @@ template void GridFuncVector::pointwiseProduct( GridFuncVector& A, const GridFunc& B); template void GridFuncVector::pointwiseProduct( GridFuncVector& A, const GridFunc& B); + +template void GridFuncVector::axpy( + const float alpha, const GridFuncVector& func); +template void GridFuncVector::axpy( + const double alpha, const GridFuncVector& func); +template void GridFuncVector::copyFrom( + const GridFuncVector& src); +template void GridFuncVector::copyFrom( + const GridFuncVector& src); + #ifdef HAVE_MAGMA template class GridFuncVector; template class GridFuncVector; diff --git a/src/pb/GridFuncVector.h b/src/pb/GridFuncVector.h index 4798adea..88f115d8 100644 --- a/src/pb/GridFuncVector.h +++ b/src/pb/GridFuncVector.h @@ -38,6 +38,7 @@ class GridFuncVector static Timer wait_north_south_tm_; static Timer wait_up_down_tm_; static Timer wait_east_west_tm_; + static Timer copy_tm_; static Map2Masks* map2masks_; @@ -219,11 +220,13 @@ class GridFuncVector ScalarType* data() { return memory_.get(); } - ScalarType* getDataPtr(const int ifunc, const int index = 0) + ScalarType* getDataPtr(const int ifunc, const int index = 0) const { return memory_.get() + ifunc * grid_.sizeg() + index; } + bool getUpdatedBoundariesFlag() const { return updated_boundaries_; } + // assign values to one GridFunc from values in array src // (without ghosts) template @@ -462,9 +465,14 @@ class GridFuncVector void set_updated_boundaries(const bool flag) { updated_boundaries_ = flag; } GridFuncVector& operator-=( const GridFuncVector& func); - void axpy(const double alpha, + + template + void axpy(const ScalarType2 alpha, const GridFuncVector& func); + template + void copyFrom(const GridFuncVector& src); + template void getValues(const int k, InputScalarType* vv) const; @@ -489,12 +497,13 @@ class GridFuncVector finishExchangeNorthSouth_tm_.print(os); finishExchangeUpDown_tm_.print(os); finishExchangeEastWest_tm_.print(os); + copy_tm_.print(os); } }; template Timer GridFuncVector::trade_bc_tm_( - "GridFuncVector::trade_bc"); + "GridFuncVector::trade_bc_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::trade_bc_colors_tm_( "GridFuncVector::trade_bc_colors"); @@ -503,22 +512,25 @@ Timer GridFuncVector::prod_tm_( "GridFuncVector::prod"); template Timer GridFuncVector::finishExchangeNorthSouth_tm_( - "GridFuncVector::finishExNorthSouth"); + "GridFuncVector::finishExNorthSouth_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::finishExchangeUpDown_tm_( - "GridFuncVector::finishExUpDown"); + "GridFuncVector::finishExUpDown_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::finishExchangeEastWest_tm_( - "GridFuncVector::finishExEastWest"); + "GridFuncVector::finishExEastWest_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_north_south_tm_( - "GridFuncVector::waitNS"); + "GridFuncVector::waitNS_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_up_down_tm_( - "GridFuncVector::waitUD"); + "GridFuncVector::waitUD_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_east_west_tm_( - "GridFuncVector::waitEW"); + "GridFuncVector::waitEW_" + std::to_string(sizeof(ScalarType))); +template +Timer GridFuncVector::copy_tm_( + "GridFuncVector::copy_" + std::to_string(sizeof(ScalarType))); template Map2Masks* GridFuncVector::map2masks_(nullptr); diff --git a/src/pb/Mgm.h b/src/pb/Mgm.h index 4b5ad68d..a821261a 100644 --- a/src/pb/Mgm.h +++ b/src/pb/Mgm.h @@ -35,12 +35,10 @@ bool Mgm(T1& A, T2& vh, const GridFunc& rho, const short cogr, // Compute r.h.s. from rho GridFunc res(rho); - A.transform(res); + A.transform(res); // to account for dielectric model GridFunc rhs(finegrid, bcx, bcy, bcz); - A.rhs(res, rhs); + A.rhs(res, rhs); // to account for possible Mehrstellen operator - // Hartree units - // work GridFunc GridFunc lhs(finegrid, bcx, bcy, bcz); short bcwork[3] = { bcx, bcy, bcz }; @@ -54,7 +52,6 @@ bool Mgm(T1& A, T2& vh, const GridFunc& rho, const short cogr, nb_sweeps = 0; for (short i = 0; i < max_sweeps; i++) { - A.apply(vh, lhs); // res=rhs-lhs; res.diff(rhs, lhs); diff --git a/src/quench.cc b/src/quench.cc index 6eed74bb..56305e59 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -28,11 +28,11 @@ #include "Ions.h" #include "KBPsiMatrixSparse.h" #include "LocalizationRegions.h" +#include "MGOrbitalsPreconditioning.h" #include "MGmol.h" #include "MPIdata.h" #include "MasksSet.h" #include "Mesh.h" -#include "OrbitalsPreconditioning.h" #include "OrbitalsTransform.h" #include "PolakRibiereSolver.h" #include "Potentials.h" @@ -52,8 +52,8 @@ Timer quench_evnl_tm("quench_evnl"); Timer updateCenters_tm("MGmol::updateCenters"); template <> -void MGmol::adaptLR( - const SpreadsAndCenters* /*spreadf*/, +void MGmol>::adaptLR( + const SpreadsAndCenters>* /*spreadf*/, const OrbitalsTransform* /*ot*/) { } @@ -397,7 +397,8 @@ void MGmol::disentangleOrbitals(OrbitalsType& orbitals, } template <> -void MGmol::applyAOMMprojection(LocGridOrbitals& orbitals) +void MGmol>::applyAOMMprojection( + LocGridOrbitals& orbitals) { aomm_.reset(new AOMMprojector(orbitals, lrs_)); aomm_->projectOut(orbitals); @@ -409,8 +410,9 @@ void MGmol::applyAOMMprojection(OrbitalsType&) } template <> -int MGmol::outerSolve(LocGridOrbitals& orbitals, - LocGridOrbitals& work_orbitals, Ions& ions, const int max_steps, +int MGmol>::outerSolve( + LocGridOrbitals& orbitals, + LocGridOrbitals& work_orbitals, Ions& ions, const int max_steps, const int iprint, double& last_eks) { int retval @@ -423,7 +425,7 @@ int MGmol::outerSolve(LocGridOrbitals& orbitals, case OuterSolverType::ABPG: case OuterSolverType::NLCG: { - DFTsolver solver(hamiltonian_.get(), + DFTsolver> solver(hamiltonian_.get(), proj_matrices_.get(), energy_.get(), electrostat_.get(), this, ions, rho_.get(), dm_strategy_.get(), os_); @@ -435,9 +437,10 @@ int MGmol::outerSolve(LocGridOrbitals& orbitals, case OuterSolverType::PolakRibiere: { - PolakRibiereSolver solver(hamiltonian_.get(), - proj_matrices_.get(), energy_.get(), electrostat_.get(), this, - ions, rho_.get(), dm_strategy_.get(), os_); + PolakRibiereSolver> solver( + hamiltonian_.get(), proj_matrices_.get(), energy_.get(), + electrostat_.get(), this, ions, rho_.get(), dm_strategy_.get(), + os_); retval = solver.solve( orbitals, work_orbitals, ions, max_steps, iprint, last_eks); @@ -497,16 +500,27 @@ int MGmol::outerSolve(OrbitalsType& orbitals, MGmol_MPI& mmpi = *(MGmol_MPI::instance()); const bool with_spin = (mmpi.nspin() > 1); -#ifdef HAVE_MAGMA - DavidsonSolver -#else - DavidsonSolver> -#endif - solver(os_, *ions_, hamiltonian_.get(), rho_.get(), - energy_.get(), electrostat_.get(), this, gids, ct.dm_mix, - with_spin); + if (ct.rmatrices) + { + DavidsonSolver + + solver(os_, *ions_, hamiltonian_.get(), rho_.get(), + energy_.get(), electrostat_.get(), this, gids, + ct.dm_mix, with_spin); + + retval = solver.solve(orbitals, work_orbitals); + } + else + { + DavidsonSolver> + + solver(os_, *ions_, hamiltonian_.get(), rho_.get(), + energy_.get(), electrostat_.get(), this, gids, + ct.dm_mix, with_spin); - retval = solver.solve(orbitals, work_orbitals); + retval = solver.solve(orbitals, work_orbitals); + } break; } @@ -561,9 +575,28 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, applyAOMMprojection(orbitals); } - orbitals_precond_.reset(new OrbitalsPreconditioning()); - orbitals_precond_->setup( - orbitals, ct.getMGlevels(), ct.lap_type, currentMasks_.get(), lrs_); + const short precision = ct.precond_precision_; + if (precision == 32) + { + orbitals_precond_.reset( + new MGOrbitalsPreconditioning( + ct.getMGlevels(), ct.lap_type)); + } + else if (precision == 64) + { + orbitals_precond_.reset( + new MGOrbitalsPreconditioning( + ct.getMGlevels(), ct.lap_type)); + } + else + { + std::cerr << "Unknown precision option for orbitals preconditioner!!!" + << std::endl; + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + mmpi.abort(); + } + + orbitals_precond_->setup(orbitals, currentMasks_.get(), lrs_); // solve electronic structure problem // (inner iterations) @@ -637,5 +670,5 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, return retval; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/readInput.cc b/src/readInput.cc index bd6a7dd1..bcb823c4 100644 --- a/src/readInput.cc +++ b/src/readInput.cc @@ -201,5 +201,5 @@ int MGmol::readCoordinates( return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/read_config.cc b/src/read_config.cc index aefb59ae..e3272b3b 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -97,7 +97,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value("periodic"), "boundary condition z")("Poisson.diel", po::value()->default_value("off"), - "continuum solvent: on/off")("Run.type", + "continuum solvent: on/off")("Poisson.precond_precision", + po::value()->default_value(32), + "Precision for Poisson Preconditioner")("Run.type", po::value()->default_value("QUENCH"), "Run type")( "Quench.solver", po::value()->default_value("ABPG"), "Iterative solver for quench")("Quench.max_steps", @@ -119,7 +121,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, "Quench.preconditioner_num_levels", po::value()->default_value(2), "Number of levels for MG preconditioner")( - "Quench.spread_penalty_damping", + "Quench.preconditioner_precision", + po::value()->default_value(32), + "Precision for MG preconditioner")("Quench.spread_penalty_damping", po::value()->default_value(0.), "Spread penalty damping factor")("Quench.spread_penalty_target", po::value()->default_value(2.), @@ -177,7 +181,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, "solver for projected matrices")("ProjectedMatrices.printMM", po::value()->default_value(false), "print projected matrices in MM format")( - "LocalizationRegions.radius", + "ProjectedMatrices.replicated", + po::value()->default_value(false), + "use replicated projected matrices")("LocalizationRegions.radius", po::value()->default_value(1000.), "Localization regions radius")("LocalizationRegions.adaptive", po::value()->default_value(true), @@ -236,7 +242,7 @@ int read_config(int argc, char** argv, po::variables_map& vm, "safety factor to use for static allocation of orbitals")( "Potentials.filterPseudo", po::value()->default_value('f'), "filter")("Poisson.solver", - po::value()->default_value("CG"), + po::value()->default_value("PCG"), "solver")("Poisson.e0", po::value()->default_value(78.36), "continuum solvent: epsilon0")("Poisson.rho0", po::value()->default_value(0.0004), @@ -244,14 +250,16 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value(1.3), "continuum solvent: beta")("Poisson.FDtype", po::value()->default_value("Mehrstellen"), - "FDtype")("Poisson.nu1", po::value()->default_value(2), - "nu_1")("Poisson.nu2", po::value()->default_value(2), - "nu_2")("Poisson.max_steps", po::value()->default_value(20), + "FDtype")("Poisson.nu1", po::value()->default_value(1), + "MG pre-smoothing sweeps")("Poisson.nu2", po::value()->default_value(1), + "MG post-smoothing sweeps")("Poisson.max_steps", po::value()->default_value(20), "max. nb. steps Poisson solver")("Poisson.max_steps_initial", po::value()->default_value(20), "max. nb. steps Poisson solver in first solve")( "Poisson.max_levels", po::value()->default_value(10), - "max. nb. MG levels Poisson solver")("Poisson.reset", + "max. nb. levels for MG solver or Precon")("Poisson.conv_tol", + po::value()->default_value(1.e-8), + "Convergence tolerance for Poisson solver")("Poisson.reset", po::value()->default_value(false), "reset Hartree potential at each MD step")("ABPG.m", po::value()->default_value(1), diff --git a/src/restart.cc b/src/restart.cc index 532043af..b144c0ab 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -213,5 +213,5 @@ int MGmol::read_restart_data( return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/rom.cc b/src/rom.cc index 986e136f..863d76e9 100644 --- a/src/rom.cc +++ b/src/rom.cc @@ -92,26 +92,27 @@ void MGmol::project_orbital(std::string file_path, int rdim, Orbit } } -void ExtendedGridOrbitals::set(std::string file_path, int rdim) -{ - const int dim = getLocNumpt(); - - CAROM::BasisReader reader(file_path); - CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); - - Control& ct = *(Control::instance()); - Mesh* mymesh = Mesh::instance(); - pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - CAROM::Vector psi; - for (int i = 0; i < rdim; ++i) - { - orbital_basis->getColumn(i, psi); - gf_psi.assign(psi.getData()); - setPsi(gf_psi, i); - } -} - -template class MGmol; -template class MGmol; +//template +//void ExtendedGridOrbitals::set(std::string file_path, int rdim) +//{ +// const int dim = getLocNumpt(); + +// CAROM::BasisReader reader(file_path); +// CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); + +// Control& ct = *(Control::instance()); +// Mesh* mymesh = Mesh::instance(); +// pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); +// CAROM::Vector psi; +// for (int i = 0; i < rdim; ++i) +// { +// orbital_basis->getColumn(i, psi); +// gf_psi.assign(psi.getData()); +// setPsi(gf_psi, i); +// } +//} + +template class MGmol>; +template class MGmol>; #endif // MGMOL_HAS_LIBROM diff --git a/src/rom_main.cc b/src/rom_main.cc index 7eb323a7..d29419b9 100644 --- a/src/rom_main.cc +++ b/src/rom_main.cc @@ -94,10 +94,10 @@ int main(int argc, char** argv) { MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, + mgmol = new MGmol>(global_comm, *MPIdata::sout, input_filename, lrs_filename, constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, + mgmol = new MGmol>(global_comm, *MPIdata::sout, input_filename, lrs_filename, constraints_filename); mgmol->setup(); @@ -106,43 +106,43 @@ int main(int argc, char** argv) { case (ROMStage::OFFLINE): if (ct.isLocMode()) - readRestartFiles(mgmol); + readRestartFiles>(mgmol); else - readRestartFiles(mgmol); + readRestartFiles>(mgmol); break; case (ROMStage::BUILD): if (ct.isLocMode()) - buildROMPoissonOperator(mgmol); + buildROMPoissonOperator>(mgmol); else - buildROMPoissonOperator(mgmol); + buildROMPoissonOperator>(mgmol); break; case (ROMStage::ONLINE_POISSON): if (ct.isLocMode()) - runPoissonROM(mgmol); + runPoissonROM>(mgmol); else - runPoissonROM(mgmol); + runPoissonROM>(mgmol); break; case (ROMStage::TEST_POISSON): if (ct.isLocMode()) - testROMPoissonOperator(mgmol); + testROMPoissonOperator>(mgmol); else - testROMPoissonOperator(mgmol); + testROMPoissonOperator>(mgmol); break; case (ROMStage::TEST_RHO): if (ct.isLocMode()) - testROMRhoOperator(mgmol); + testROMRhoOperator>(mgmol); else - testROMRhoOperator(mgmol); + testROMRhoOperator>(mgmol); case (ROMStage::TEST_ION): if (ct.isLocMode()) - testROMIonDensity(mgmol); + testROMIonDensity>(mgmol); else - testROMIonDensity(mgmol); + testROMIonDensity>(mgmol); break; diff --git a/src/rom_workflows.cc b/src/rom_workflows.cc index f8d70eec..00ffefa5 100644 --- a/src/rom_workflows.cc +++ b/src/rom_workflows.cc @@ -942,20 +942,20 @@ void testROMIonDensity(MGmolInterface *mgmol_) delete new_ions; } -template void readRestartFiles(MGmolInterface *mgmol_); -template void readRestartFiles(MGmolInterface *mgmol_); +template void readRestartFiles>(MGmolInterface *mgmol_); +template void readRestartFiles>(MGmolInterface *mgmol_); -template void buildROMPoissonOperator(MGmolInterface *mgmol_); -template void buildROMPoissonOperator(MGmolInterface *mgmol_); +template void buildROMPoissonOperator>(MGmolInterface *mgmol_); +template void buildROMPoissonOperator>(MGmolInterface *mgmol_); -template void runPoissonROM(MGmolInterface *mgmol_); -template void runPoissonROM(MGmolInterface *mgmol_); +template void runPoissonROM>(MGmolInterface *mgmol_); +template void runPoissonROM>(MGmolInterface *mgmol_); -template void testROMPoissonOperator(MGmolInterface *mgmol_); -template void testROMPoissonOperator(MGmolInterface *mgmol_); +template void testROMPoissonOperator>(MGmolInterface *mgmol_); +template void testROMPoissonOperator>(MGmolInterface *mgmol_); -template void testROMRhoOperator(MGmolInterface *mgmol_); -template void testROMRhoOperator(MGmolInterface *mgmol_); +template void testROMRhoOperator>(MGmolInterface *mgmol_); +template void testROMRhoOperator>(MGmolInterface *mgmol_); -template void testROMIonDensity(MGmolInterface *mgmol_); -template void testROMIonDensity(MGmolInterface *mgmol_); +template void testROMIonDensity>(MGmolInterface *mgmol_); +template void testROMIonDensity>(MGmolInterface *mgmol_); diff --git a/src/runfire.cc b/src/runfire.cc index 3239e1b1..544685a9 100644 --- a/src/runfire.cc +++ b/src/runfire.cc @@ -137,7 +137,7 @@ void MGmol::runfire(OrbitalsType** orbitals, Ions& ions) } } -template void MGmol::runfire( - LocGridOrbitals** orbitals, Ions& ions); -template void MGmol::runfire( - ExtendedGridOrbitals** orbitals, Ions& ions); +template void MGmol>::runfire( + LocGridOrbitals** orbitals, Ions& ions); +template void MGmol>::runfire( + ExtendedGridOrbitals** orbitals, Ions& ions); diff --git a/src/setup.cc b/src/setup.cc index 2ff6d79e..c7b67c60 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -82,27 +82,28 @@ int MGmol::setupFromInput(const std::string filename) // data if (!ct.short_sighted) { - MatricesBlacsContext::instance().setup(mmpi.commSpin(), ct.numst); + ReplicatedWorkSpace::instance().setup(ct.numst); - dist_matrix::DistMatrix::setBlockSize(64); + if (!ct.rmatrices) + { + MatricesBlacsContext::instance().setup(mmpi.commSpin(), ct.numst); - dist_matrix::DistMatrix::setDefaultBlacsContext( - MatricesBlacsContext::instance().bcxt()); + dist_matrix::DistMatrix::setBlockSize(64); - ReplicatedWorkSpace::instance().setup(ct.numst); + dist_matrix::DistMatrix::setDefaultBlacsContext( + MatricesBlacsContext::instance().bcxt()); - dist_matrix::SparseDistMatrix::setNumTasksPerPartitioning( - 128); + dist_matrix::SparseDistMatrix< + DISTMATDTYPE>::setNumTasksPerPartitioning(128); - int npes = mmpi.size(); - setSparseDistMatriConsolidationNumber(npes); + int npes = mmpi.size(); + setSparseDistMatriConsolidationNumber(npes); + } } -#ifdef HAVE_MAGMA - ReplicatedMatrix::setMPIcomm(mmpi.commSpin()); -#endif + if (ct.rmatrices) ReplicatedMatrix::setMPIcomm(mmpi.commSpin()); - LocGridOrbitals::setDotProduct(ct.dot_product_type); + OrbitalsType::setDotProduct(ct.dot_product_type); mgmol_check(); @@ -193,5 +194,5 @@ int MGmol::setupConstraintsFromInput(const std::string filename) return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 72e603b6..5779198d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -242,6 +242,7 @@ add_executable(testGramMatrix ${CMAKE_SOURCE_DIR}/src/GramMatrix.cc ${CMAKE_SOURCE_DIR}/src/Power.cc ${CMAKE_SOURCE_DIR}/src/magma_singleton.cc + ${CMAKE_SOURCE_DIR}/src/LocalMatrices2ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/ReplicatedMatrix.cc @@ -265,6 +266,7 @@ add_executable(testDensityMatrix ${CMAKE_SOURCE_DIR}/src/magma_singleton.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc + ${CMAKE_SOURCE_DIR}/src/LocalMatrices2ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc @@ -498,6 +500,13 @@ add_test(NAME Davidson ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/davidson.cfg ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME DavidsonReplicated + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/DavidsonReplicated/davidson.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/coords.in + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testSpinO2 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} @@ -511,7 +520,6 @@ add_test(NAME testSpinO2LDA ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/mgmol.cfg ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/coords.in - ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMVP COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVP/test.py @@ -520,6 +528,13 @@ add_test(NAME testMVP ${CMAKE_CURRENT_SOURCE_DIR}/MVP/mvp.cfg ${CMAKE_CURRENT_SOURCE_DIR}/MVP/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testMVPReplicated + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVP/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/MVPReplicated/mvp.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/MVP/coords.in + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMVPmix COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVPmix/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} @@ -675,7 +690,7 @@ if(${MAGMA_FOUND}) target_link_libraries(testConditionDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) target_link_libraries(testConditionDistMatrixPower PRIVATE - ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) target_link_libraries(testReplicatedMatrix PRIVATE MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) @@ -711,19 +726,20 @@ if(${MAGMA_FOUND}) endif() else() target_link_libraries(testDistVector PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testReplicated2DistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testConditionDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testConditionDistMatrixPower PRIVATE - ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) - target_link_libraries(testPower PRIVATE ${BLAS_LIBRARIES} ${SCALAPACK_LIBRARIES} + target_link_libraries(testPower PRIVATE ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) - target_link_libraries(testPowerDistMatrix PRIVATE ${BLAS_LIBRARIES} + target_link_libraries(testPowerDistMatrix PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${SCALAPACK_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testAndersonMix PRIVATE ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} @@ -741,9 +757,9 @@ else() target_link_libraries(testMGkernels PRIVATE ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testGramMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testDensityMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) endif() set_tests_properties(testSiH4 PROPERTIES REQUIRED_FILES diff --git a/tests/Chebyshev/cheb.cfg b/tests/Chebyshev/cheb.cfg index f6d67c02..43c95f50 100644 --- a/tests/Chebyshev/cheb.cfg +++ b/tests/Chebyshev/cheb.cfg @@ -15,7 +15,7 @@ lz=13.26 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/Chebyshev/test.py b/tests/Chebyshev/test.py index 5cb9a284..28718ee3 100644 --- a/tests/Chebyshev/test.py +++ b/tests/Chebyshev/test.py @@ -70,21 +70,24 @@ print("Chebyshev-MVP test FAILED for taking too many iterations") sys.exit(1) +energy_ref = -108.264614049136 +tol = 1.e-6 print("Check energy...") last_energy = eval(energies[-1]) print("Energy = {}".format(last_energy)) -if last_energy>-108.0868: +if abs(last_energy-energy_ref)>tol: print("Last energy = {}".format(last_energy)) sys.exit(1) tol = 1.e-5 +entropy_ref = 0.17653976 print("Check entropy...") for line in lines: if line.count(b'-TS'): words=line.split() entropy = eval(words[3]) print("Entropy = {}".format(entropy)) - entropy_diff = entropy+0.17819 + entropy_diff = entropy+entropy_ref if abs(entropy_diff)>tol: print("Check entropy test FAILED. Entropy difference = {}".format(abs(entropy_diff))) sys.exit(1) diff --git a/tests/Cl2_ONCVPSP_LDA/test.py b/tests/Cl2_ONCVPSP_LDA/test.py index e7ff345a..dcb9bff3 100755 --- a/tests/Cl2_ONCVPSP_LDA/test.py +++ b/tests/Cl2_ONCVPSP_LDA/test.py @@ -36,7 +36,7 @@ lines=output.split(b'\n') tol = 4.e-6 -Fz = 1.2e-3 +Fz = -7.33e-04 for line in lines: num_matches = line.count(b'%%') if num_matches: @@ -51,7 +51,7 @@ for i in range(5,7): force = eval(words[i]) if abs(force)>tol: - print("force = {}".format(force)) + print("Force larger than tol, force = {}".format(force)) sys.exit(1) #check value of force in z direction if abs(eval(words[7])-Fz)>2.e-5: diff --git a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc index a5a086fe..72e5dc68 100644 --- a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc +++ b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -158,9 +160,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); const pb::PEenv& myPEenv = mymesh->peenv(); HDFrestart h5file("WF", myPEenv, ct.out_restart_file_type); diff --git a/tests/Davidson/davidson.cfg b/tests/Davidson/davidson.cfg index ce864055..e34f407b 100644 --- a/tests/Davidson/davidson.cfg +++ b/tests/Davidson/davidson.cfg @@ -15,7 +15,7 @@ lz=15.3 [Potentials] pseudopotential=pseudo.Al_LDA_FHI [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/DavidsonReplicated/davidson.cfg b/tests/DavidsonReplicated/davidson.cfg new file mode 100644 index 00000000..87f0ea1f --- /dev/null +++ b/tests/DavidsonReplicated/davidson.cfg @@ -0,0 +1,35 @@ +verbosity=2 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=0. +oy=0. +oz=0. +lx=15.3 +ly=15.3 +lz=15.3 +[Potentials] +pseudopotential=pseudo.Al_LDA_FHI +[Poisson] +solver=PCG +[Run] +type=QUENCH +[Quench] +solver=Davidson +max_steps=200 +atol=1.e-8 +[Orbitals] +nempty=10 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +replicated=true +[DensityMatrix] +nb_inner_it=2 +[Restart] +output_level=0 diff --git a/tests/EnergyAndForces/testEnergyAndForces.cc b/tests/EnergyAndForces/testEnergyAndForces.cc index c8f7ab95..f8d1264b 100644 --- a/tests/EnergyAndForces/testEnergyAndForces.cc +++ b/tests/EnergyAndForces/testEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { diff --git a/tests/Fatom/test.py b/tests/Fatom/test.py index db960c3a..def0b3a9 100755 --- a/tests/Fatom/test.py +++ b/tests/Fatom/test.py @@ -65,7 +65,7 @@ print("ERROR Eigenvalue 0 = {}".format(eval(eigenvalues[0]))) sys.exit(1) for ii in range(3): - if abs(eval(eigenvalues[1+ii])+0.409)>tole: + if abs(eval(eigenvalues[1+ii])+0.410)>tole: print("ERROR Eigenvalue {} = {}".format(1+ii,eval(eigenvalues[1+ii]))) sys.exit(1) sys.exit(0) diff --git a/tests/MVP/mvp.cfg b/tests/MVP/mvp.cfg index 868e5703..28bca7e2 100644 --- a/tests/MVP/mvp.cfg +++ b/tests/MVP/mvp.cfg @@ -19,7 +19,7 @@ type=QUENCH [Quench] solver=PSD max_steps=300 -atol=1.e-7 +atol=2.e-7 ortho_freq=10 [Orbitals] nempty=10 @@ -27,6 +27,7 @@ initial_type=random temperature=300. [ProjectedMatrices] solver=exact +replicated=false [DensityMatrix] solver=MVP nb_inner_it=2 diff --git a/tests/MVP/test.py b/tests/MVP/test.py index 57d9522e..8a55cc5b 100644 --- a/tests/MVP/test.py +++ b/tests/MVP/test.py @@ -87,12 +87,13 @@ print(eigenvalues) tol = 1.e-4 -eigenvalue0 = -0.208 +eigenvalue0 = -0.210 if abs(eigenvalues[0]-eigenvalue0)>tol: print("Expected eigenvalue 0 to be {}".format(eigenvalue0)) sys.exit(1) -eigenvalue50 = 0.208 +eigenvalue50 = 0.205 if abs(eigenvalues[50]-eigenvalue50)>tol: + print("Eeigenvalue 50 = {}".format(eigenvalues[50])) print("Expected eigenvalue 50 to be {}".format(eigenvalue50)) sys.exit(1) diff --git a/tests/MVPReplicated/mvp.cfg b/tests/MVPReplicated/mvp.cfg new file mode 100644 index 00000000..9978efa2 --- /dev/null +++ b/tests/MVPReplicated/mvp.cfg @@ -0,0 +1,35 @@ +verbosity=2 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=0. +oy=0. +oz=0. +lx=15.3 +ly=15.3 +lz=15.3 +[Potentials] +pseudopotential=pseudo.Al_LDA_FHI +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=300 +atol=2.e-7 +ortho_freq=10 +[Orbitals] +nempty=10 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +replicated=true +[DensityMatrix] +solver=MVP +nb_inner_it=2 +[Restart] +output_level=2 diff --git a/tests/PinnedH2O_3DOF/testPinnedH2O_3DOF.cc b/tests/PinnedH2O_3DOF/testPinnedH2O_3DOF.cc index 9cb58f33..ad9ef743 100644 --- a/tests/PinnedH2O_3DOF/testPinnedH2O_3DOF.cc +++ b/tests/PinnedH2O_3DOF/testPinnedH2O_3DOF.cc @@ -92,7 +92,7 @@ int main(int argc, char** argv) std::cout << "-------------------------" << std::endl; } - MGmolInterface* mgmol = new MGmol(global_comm, + MGmolInterface* mgmol = new MGmol>(global_comm, *MPIdata::sout, input_filename, lrs_filename, constraints_filename); if (MPIdata::onpe0) @@ -152,7 +152,7 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, nullptr); diff --git a/tests/ProjectedMatrices/quenchExact.cfg b/tests/ProjectedMatrices/quenchExact.cfg index 30c4d6a6..4e338690 100644 --- a/tests/ProjectedMatrices/quenchExact.cfg +++ b/tests/ProjectedMatrices/quenchExact.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/ProjectedMatrices/quenchShortSighted.cfg b/tests/ProjectedMatrices/quenchShortSighted.cfg index 47615ccd..15e5c903 100644 --- a/tests/ProjectedMatrices/quenchShortSighted.cfg +++ b/tests/ProjectedMatrices/quenchShortSighted.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc index 058b0a8e..776e7983 100644 --- a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -85,8 +85,9 @@ int main(int argc, char** argv) std::cout << "-------------------------" << std::endl; } - MGmolInterface* mgmol = new MGmol(global_comm, - *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + MGmolInterface* mgmol = new MGmol>( + global_comm, *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -142,9 +143,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); // read numst_ wavefunction int nread = orbitals.read_func_hdf5(h5file, name); diff --git a/tests/RhoVhRestart/md.cfg b/tests/RhoVhRestart/md.cfg index 2b8a378b..5fba5590 100644 --- a/tests/RhoVhRestart/md.cfg +++ b/tests/RhoVhRestart/md.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=MD [MD] diff --git a/tests/RhoVhRestart/mgmol.cfg b/tests/RhoVhRestart/mgmol.cfg index eee7f11c..aff5795d 100644 --- a/tests/RhoVhRestart/mgmol.cfg +++ b/tests/RhoVhRestart/mgmol.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=QUENCH [Quench] diff --git a/tests/RhoVhRestart/restart.cfg b/tests/RhoVhRestart/restart.cfg index 20f0293a..74b1abc5 100644 --- a/tests/RhoVhRestart/restart.cfg +++ b/tests/RhoVhRestart/restart.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=QUENCH [Quench] diff --git a/tests/RhoVhRestart/testRhoVhRestart.cc b/tests/RhoVhRestart/testRhoVhRestart.cc index ce40272d..b2cd8294 100644 --- a/tests/RhoVhRestart/testRhoVhRestart.cc +++ b/tests/RhoVhRestart/testRhoVhRestart.cc @@ -178,26 +178,27 @@ int main(int argc, char** argv) // Enter main scope { - MGmolInterface* mgmol = new MGmol(global_comm, - *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + MGmolInterface* mgmol = new MGmol>( + global_comm, *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); mgmol->setup(); /* load a restart file */ - MGmol* mgmol_ext - = dynamic_cast*>(mgmol); + MGmol>* mgmol_ext + = dynamic_cast>*>(mgmol); mgmol_ext->loadRestartFile(ct.restart_file); if (MPIdata::onpe0) std::cout << "=============================" << std::endl; if (MPIdata::onpe0) std::cout << "testRhoRestart..." << std::endl; - status = testRhoRestart(mgmol); + status = testRhoRestart>(mgmol); if (status < 0) return status; if (MPIdata::onpe0) std::cout << "=============================" << std::endl; if (MPIdata::onpe0) std::cout << "testPotRestart..." << std::endl; - status = testPotRestart(mgmol); + status = testPotRestart>(mgmol); if (status < 0) return status; delete mgmol; diff --git a/tests/ShortSighted/md.cfg b/tests/ShortSighted/md.cfg index 012deb56..5730d62e 100644 --- a/tests/ShortSighted/md.cfg +++ b/tests/ShortSighted/md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/tests/ShortSighted/quench.cfg b/tests/ShortSighted/quench.cfg index d6821bba..8b21d4cd 100644 --- a/tests/ShortSighted/quench.cfg +++ b/tests/ShortSighted/quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/ShortSighted/test.py b/tests/ShortSighted/test.py index 68a617aa..25f7290b 100755 --- a/tests/ShortSighted/test.py +++ b/tests/ShortSighted/test.py @@ -67,8 +67,8 @@ print("Check energies...") tol = 1.e-3 count = 0 -energy1_ref = -83.904 -energy4_ref = -83.871 +energy1_ref = -83.929 +energy4_ref = -83.896 for line in lines: num_matches1 = line.count(b'IONIC') @@ -93,7 +93,7 @@ tol = 1.e-1 count = 0 temperature1_ref = 948.253 -temperature4_ref = 916.029 +temperature4_ref = 916.336 for line in lines: num_matches1 = line.count(b'Kinetic') diff --git a/tests/SiH4/mgmol.cfg b/tests/SiH4/mgmol.cfg index c6a72cdb..aea52881 100644 --- a/tests/SiH4/mgmol.cfg +++ b/tests/SiH4/mgmol.cfg @@ -20,6 +20,7 @@ type=QUENCH max_steps=45 atol=1.e-9 num_lin_iterations=2 +preconditioner_precision=64 [Orbitals] initial_type=Gaussian initial_width=2. diff --git a/tests/SpinO2/test.py b/tests/SpinO2/test.py index 5d5e823c..8d08bfd9 100755 --- a/tests/SpinO2/test.py +++ b/tests/SpinO2/test.py @@ -43,15 +43,15 @@ words=line.split() energy = eval(words[5][:-1]) -ref_energy = -31.805 +ref_energy = -31.808 print("energy = {}".format(energy)) if abs(ref_energy-energy) > 1.e-3: - print("Incorrect energy!") + print("Expected energy = {}".format(ref_energy)) sys.exit(1) #make sure forces are below tolerance tol = 6.e-4 -Fz = -1.06e-2 +Fz = -0.96e-2 for line in lines: #find output lines with forces if line.count(b'##'): diff --git a/tests/SpinO2LDA/mgmol.cfg b/tests/SpinO2LDA/mgmol.cfg index ee2385e0..11326e2f 100644 --- a/tests/SpinO2LDA/mgmol.cfg +++ b/tests/SpinO2LDA/mgmol.cfg @@ -22,7 +22,7 @@ max_steps=100 atol=1.e-7 MLWC=true [Orbitals] -initial_type=Gaussian +initial_type=Random initial_width=1. bcx=0 bcy=0 diff --git a/tests/SpinO2LDA/test.py b/tests/SpinO2LDA/test.py index d6fa4242..26010c0a 100755 --- a/tests/SpinO2LDA/test.py +++ b/tests/SpinO2LDA/test.py @@ -13,10 +13,9 @@ mpicmd = mpicmd + " "+sys.argv[i] print("MPI run command: {}".format(mpicmd)) -exe = sys.argv[nargs-5] -inp = sys.argv[nargs-4] -coords = sys.argv[nargs-3] -lrs = sys.argv[nargs-2] +exe = sys.argv[nargs-4] +inp = sys.argv[nargs-3] +coords = sys.argv[nargs-2] print("coordinates file: %s"%coords) #create links to potentials files @@ -28,7 +27,7 @@ os.symlink(src1, dst1) #run mgmol -command = "{} {} -c {} -i {} -l {}".format(mpicmd,exe,inp,coords,lrs) +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp,coords) print("Run command: {}".format(command)) output = subprocess.check_output(command,shell=True) @@ -44,15 +43,15 @@ words=line.split() energy = eval(words[5][:-1]) -ref_energy = -31.6105 +ref_energy = -31.6130 print("energy = {}".format(energy)) if abs(ref_energy-energy) > 1.e-3: - print("Incorrect energy!") + print("Incorrect energy, expected {}".format(ref_energy)) sys.exit(1) #make sure forces are below tolerance tol = 4.e-4 -Fz = 1.e-2 +Fz = 1.06e-2 for line in lines: #find output lines with forces if line.count(b'##'): diff --git a/tests/SpreadPenalty/test.py b/tests/SpreadPenalty/test.py index ea70f074..059f3320 100755 --- a/tests/SpreadPenalty/test.py +++ b/tests/SpreadPenalty/test.py @@ -79,7 +79,7 @@ #we tolerate an energy difference since the initial wave functions #are very delocalized and the spread penalty remains active all along -energy_ref = -17.16448 +energy_ref = -17.1660 tol = 5.e-4 if abs(energy-energy_ref) > tol: print("Test failed: last energy value incorrect!") diff --git a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc index f9039abe..ae07a3fa 100644 --- a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc +++ b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -158,9 +160,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); const pb::PEenv& myPEenv = mymesh->peenv(); HDFrestart h5file("WF", myPEenv, ct.out_restart_file_type); diff --git a/util/compareForces.py b/util/compareForces.py index 829300a3..0297d5e8 100644 --- a/util/compareForces.py +++ b/util/compareForces.py @@ -11,6 +11,7 @@ #------------------------------------------------------------------------------- import sys, string from math import sqrt +import matplotlib.pyplot as plt input1=open(sys.argv[1],'r') input2=open(sys.argv[2],'r') @@ -20,28 +21,20 @@ frame=eval(sys.argv[3]) print( 'Input argument: Frame=',frame ) -L1=input1.readlines() -L2=input2.readlines() - -star='*' +lines1=input1.readlines() +lines2=input2.readlines() ############################################## # count number atoms def getNumAtoms(lines): - searchterm1='## ' - searchterm2='FORCES' - searchterm3='Forces' found_current_line=0 already_found_one=0 na=0 flag=0 for line in lines: ## loop over lines of file - num_matches1 = line.count(searchterm1) - num_matches2 = line.count(searchterm2) - num_matches3 = line.count(searchterm3) - if num_matches2 or num_matches3: + if line.count('FORCES') or line.count('Forces'): flag=1 - if num_matches1 & flag==1: + if line.count('## ') & flag==1: #print 'line=',line found_current_line=1 already_found_one =1 @@ -54,13 +47,12 @@ def getNumAtoms(lines): ############################################## -na1=getNumAtoms(L1) -na2=getNumAtoms(L2) +na1=getNumAtoms(lines1) +na2=getNumAtoms(lines2) print( 'N atoms in file1=', na1) print( 'N atoms in file2=', na2) - ############################################## def getForces(names,coords,forces,lines,fframe): @@ -113,25 +105,16 @@ def getForces(names,coords,forces,lines,fframe): ############################################## -forces1=[] -coords1=[] -names1=[] -for i in range(0,na1): - forces1.append(0) - coords1.append(0) - names1.append(0) - -forces2=[] -coords2=[] -names2=[] -for i in range(0,na2): - forces2.append(0) - coords2.append(0) - names2.append(0) - +forces1=[0]*na1 +coords1=[0]*na1 +names1=[0]*na1 + +forces2=[0]*na2 +coords2=[0]*na2 +names2=[0]*na2 -getForces(names1,coords1,forces1,L1,frame) -getForces(names2,coords2,forces2,L2,frame) +getForces(names1,coords1,forces1,lines1,frame) +getForces(names2,coords2,forces2,lines2,frame) mindf=100. maxdf=0. @@ -142,40 +125,7 @@ def getForces(names,coords,forces,lines,fframe): imax=0 jmax=0 dff=[] -bin=[] -for i in range(0,10): - bin.append(0) - -############################################## -def subtractAverageForce(forces): - avgx=0. - avgy=0. - avgz=0. - na=len(forces) - for i in range(na): - word=string.split(forces[i]) - fx=eval(word[0]) - fy=eval(word[1]) - fz=eval(word[2]) - avgx=avgx+fx - avgy=avgy+fy - avgz=avgz+fz - - avgx=avgx/na - avgy=avgy/na - avgz=avgz/na - - for i in range(na): - word=string.split(forces[i]) - fx=eval(word[0])-avgx - fy=eval(word[1])-avgy - fz=eval(word[2])-avgz - forces[i]=str(fx)+'\t'+str(fy)+'\t'+str(fz) - -############################################## - -#subtractAverageForce(forces1) -#subtractAverageForce(forces2) +bins=[0] * 10 na=0 for i in range(na1): @@ -209,16 +159,19 @@ def subtractAverageForce(forces): mindf=df na=na+1 print (names1[i],': delta f=',df) + print ('na=',na) avg=avg/na - -print ('N atoms =', na) -print ('Avg. df=',avgx,avgy,avgz) -print ('Avg. |df|=',avg) -print ('Min. df=',mindf) -print ('Max. df=',maxdf) -print ('df max for atom ',names1[imax],' and ',names2[jmax]) -print ('Forces atoms with largest force difference:') +avgx=avgx/na +avgy=avgz/na +avgz=avgz/na + +print ('N atoms = ', na) +print ('Avg. df = ',avgx,avgy,avgz) +print ('Avg. |df| = ',avg) +print ('Min. |df| = ',mindf) +print ('Max. |df| = ',maxdf) +print ('Atoms with largest force difference:') filename1=sys.argv[1] filename1=filename1.ljust(15) filename2=sys.argv[2] @@ -230,10 +183,15 @@ def subtractAverageForce(forces): for j in range(na): a=(dff[j]-mindf)/delf b=int(a) - bin[b]=bin[b]+1 + bins[b]=bins[b]+1 for i in range(0,10): - print (mindf+(i+0.5)*delf, bin[i]) - -#for j in range(na): -# print (dff[j]) + print (mindf+(i+0.5)*delf, bins[i]) + +plt.hist(dff, bins=10, edgecolor="black") +plt.ticklabel_format(axis='x', style='sci', scilimits=(0,0)) +plt.xlabel('force error magnitude [Ha/Bohr]',fontsize=12) +plt.ylabel('frequency',fontsize=12) +plt.xticks(fontsize=12) +plt.yticks(fontsize=12) +plt.savefig('errorForces.png', dpi=100) diff --git a/util/compareTimers.py b/util/compareTimers.py index e31b2df6..002ccf0b 100644 --- a/util/compareTimers.py +++ b/util/compareTimers.py @@ -54,7 +54,7 @@ print('-----------------------------------------------------------------------------') for timer in reversed(sorted_timers): key=timer[0] - print(key.ljust(50)), - print(str(eval(timers1[key])).ljust(10)), - print(str(eval(timers2[key])).ljust(10)), + print(key.ljust(50), end="") + print(str(eval(timers1[key])).ljust(10), end="") + print(str(eval(timers2[key])).ljust(10), end="") print(str(100.*timer[1]).ljust(20)) diff --git a/util/hdf5toMatrix.py b/util/hdf5toMatrix.py new file mode 100644 index 00000000..a11e169c --- /dev/null +++ b/util/hdf5toMatrix.py @@ -0,0 +1,158 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. +# LLNL-CODE-743438 +# All rights reserved. +# This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +# Please also read this link https://github.com/llnl/mgmol/LICENSE +# +import sys +import argparse +import h5py +import numpy as np + + +# Read dataset in hdf5 file. +# Returns an array containing the data. +def get_function(filename, datasetname, dims): + + # Check If File is in HDF5 Format + try: + ishdf = h5py.is_hdf5(filename) + except Exception: + print('\nh5py.is_hdf5 unsucessful') + return None + + # If File is not in HDF5 Format, Stop + if( not(ishdf) ): + print('\nInput File ' + filename + ' not in HDF5 Format. Stop.') + return None + + # If Everything Goes Fine, Proceed + else: + + # Open HDF5 File + try: + file_id = h5py.h5f.open(bytes(filename, encoding='utf-8'), + h5py.h5f.ACC_RDONLY, h5py.h5p.DEFAULT) + except Exception: + print('\nHDF5 File: ' + filename + ' Failed to Open') + return None + + # Open Dataset + try: + dset_id = h5py.h5d.open(file_id, bytes(datasetname, encoding='utf-8')) + except Exception: + print('\nHDF5 Dataset: ' + datasetname + ' Failed to Open') + return None + + # Copy of Dataspace for Dataset + try: + filespace = dset_id.get_space() + + except Exception: + print('\ndset_id.get_space() Failed.') + return None + + # Get Dataspace Dimension + ndims = filespace.get_simple_extent_ndims() + # If Dataspace Dimension is not 3, Stop. + if( not(ndims == 3) ): + print('\nProblem with Dataspace Dimension, ndims = ' + str(ndims)) + return None + + # Shape of Dataspace (dims) + dims = dims.tolist() + dims = filespace.get_simple_extent_dims() + + print('Dataspace: Dimensions ' + str( int(dims[0]) ) + ' x ' + + str( int(dims[1]) ) + ' x ' + + str( int(dims[2])) ) + + print('Size: ' + str( int(dims[0] * dims[1] * dims[2]) )) + + # If Size < 1, Stop. + if( int( dims[0] * dims[1] * dims[2] ) < 1 ): + return None + + # Read data -> data + data = np.array(0.0, h5py.h5t.NATIVE_FLOAT) + data.resize( int(dims[0] * dims[1] * dims[2]) , refcheck = False) + + # Dump Data into Numpy Array (data) + try: + status = dset_id.read(h5py.h5s.ALL, h5py.h5s.ALL, data) + except Exception: + print('\ndataset_id.read Failed.') + return 0 + + return ( data, dims ) + + +''' MAIN ''' +# USAGE: +# python hdf5toMM.py file.hdf5 + +def main(): + + h5filename = sys.argv[1] + field = 'Function' + + # Remove File Extension ( .hdf5 ) + base_filename = h5filename.split('.')[0].strip() + base_filename = base_filename.split('/')[-1] + + # Use base_filename to make .dat filename + output_data_filename = base_filename + '.dat' + print('\noutput_data_filename = ' + output_data_filename) + + columns = [] + + i = 0 + while i<1000: + # Get data - Call get_function + number = str(i) + while len(number)<4: + number = '0'+number + + datasetname = field + number + print('\nDataset: ' + datasetname) + + dims = np.arange(0, dtype = h5py.h5t.NATIVE_INT32) # Turns Into a TUPLE + + try: + column, dims = get_function(h5filename, datasetname, dims) + except Exception: + print('\nRead Failed. \nEither the HDF5 File ' + + 'or the Dataset are not Present. Stop.\n') + break + + # If data Empty, Stop. + if( column is None or dims is None ): + print('\nRead Failed.') + return -1 + + #add data just read as a column in list of columns + columns.append(column) + + dim = [ int(dims[0]), int(dims[1]), int(dims[2]) ] + + i = i+1 + + #build numpy 2d array from all the columns + matrix = columns[0] + for i in range(len(columns)-1): + matrix = np.column_stack((matrix,columns[i+1])) + + print('\nWrite data...\n') + + nrows = dim[0]*dim[1]*dim[2] + ncols = len(columns) + + np.savetxt('matrix.dat', matrix, delimiter='\t', fmt='%le', header=str(nrows) + '\t' + str(ncols)) + + return 0 + +# Executes Main Function +if __name__ == '__main__': + + main() diff --git a/util/plotConvergenceEnergy.py b/util/plotConvergenceEnergy.py index d4e93c50..b89f9a3f 100644 --- a/util/plotConvergenceEnergy.py +++ b/util/plotConvergenceEnergy.py @@ -9,35 +9,45 @@ import sys, string import matplotlib.pyplot as plt -energies=[] +conv_energy=10000. -inputfile=open(sys.argv[1],'r') -lines=inputfile.readlines() +markers=['r.--','b.--','g.--'] -flag=0 -nst=0 -conv_energy=10000. -for line in lines: - if line.count( 'Number of states'): - words=line.split() - nst=eval(words[4]) - num_matches1 = line.count('ENERGY') - num_matches2 = line.count('%%') - if num_matches1 & num_matches2: - words=line.split() - energy=eval(words[5][:-1]) - energies.append(energy) - conv_energy=energy - -deltaes=[] -for energy in energies: - deltaes.append((energy-conv_energy)/nst) - -plt.plot(deltaes,'r.--') -plt.ylabel('error Eks/orbital [Ry]') -plt.xlabel('outer iterations') -plt.axis([0.,len(deltaes),10.*deltaes[-2],deltaes[0]]) +i=0 +for filename in sys.argv[1:]: + energies=[] + + inputfile=open(filename,'r') + lines=inputfile.readlines() + + flag=0 + na=0 + for line in lines: + if line.count('Number of ions'): + words=line.split() + na=eval(words[4]) + print('na = {}'.format(na)) + if line.count('ENERGY') & line.count('%%'): + words=line.split() + energy=eval(words[5][:-1]) + energies.append(energy) + if conv_energy>energy: + conv_energy=energy + + print('Reference energy [Ha/atom] = {}'.format(conv_energy/na)) + deltaes=[] + for energy in energies: + deltaes.append((energy-conv_energy)/na) + + plt.plot(deltaes,markers[i]) + plt.axis([0.,len(deltaes),10.*deltaes[-2],deltaes[0]]) + i=i+1 + +plt.ylabel('error Eks/atom [Ha]', fontsize=12) +plt.xlabel('outer iterations', fontsize=12) plt.yscale('log') +plt.xticks(fontsize=12) +plt.yticks(fontsize=12) #plt.show() plt.savefig('errorEnergy.png', dpi=100) diff --git a/util/read_hdf5.py b/util/read_hdf5.py index b9bf42bd..2d2ce1a9 100644 --- a/util/read_hdf5.py +++ b/util/read_hdf5.py @@ -665,13 +665,10 @@ def writeAtomsXYZ(xyz_filename, filename, origin, lattice): ''' MAIN ''' -# *argv (sys.argv in Python) - Takes an Arbitrary Number of Paramters -# and Stores Them in a List - # USAGE: -# ssh -l user cab.llnl.gov (If Using Cab System) # python read_hdf5.py [ -bov ] file.hdf5 datasetName - +# The '-bov' option should be used to visulaize data with VisIt +# For a plain asci file with data in one column, no '-bov' option should be used def main(): ''' Variables ''' @@ -800,25 +797,30 @@ def main(): else: print('\nWrite data...\n') - + ndec = 4 with open(output_data_filename, 'w') as tfile: - tfile.write('\n' + str( origin[0] ) + '\t' - + str( origin[1] ) + '\t' - + str( origin[2] ) + '\t' - + str( origin[0] + lattice[0] ) + '\t' - + str( origin[1] + lattice[1] ) + '\t' - + str( origin[2] + lattice[2] ) - + ' // cell corners') - - tfile.write(str(dim[0]) + '\t' + str(dim[1]) + '\t' - + str(dim[2]) + ' // mesh') - + tfile.write( str(round(lattice[0],ndec)) + '\t' + + str(round(lattice[1],ndec)) + '\t' + + str(round(lattice[2],ndec)) + + ' // domain dimensions [Bohr]\n') + tfile.write( str(round(origin[0],ndec)) + '\t' + + str(round(origin[1],ndec)) + '\t' + + str(round(origin[2],ndec)) + + ' // lower left corner [Bohr]\n') + tfile.write( str(dim[0]) + '\t' + + str(dim[1]) + '\t' + + str(dim[2]) + ' // mesh') + + count=0 for i in range( dim[0] ): for j in range( dim[1] ): for k in range( dim[2] ): row = (i * incx) + (j * incy) + k tfile.write('\n' + str( data[row] )) + count = count + 1 + + print("Written {} values.".format(count)) # Release Data and Attributes del data diff --git a/util/replicateCoords.py b/util/replicateCoords.py index 37450511..8b95f865 100644 --- a/util/replicateCoords.py +++ b/util/replicateCoords.py @@ -53,11 +53,11 @@ vy=eval(word[7]) vz=eval(word[8]) for i in range(nx): - x=eval(word[2])+i*lx + x=round(eval(word[2])+i*lx,12) for j in range(ny): - y=eval(word[3])+j*ly + y=round(eval(word[3])+j*ly,12) for k in range(nz): - z=eval(word[4])+k*lz + z=round(eval(word[4])+k*lz,12) count_atom = count_atom + 1 name = myspecies + str(count_atom) sp=word[1]