diff --git a/source/Makefile.Objects b/source/Makefile.Objects index b5ceae2e68..53d06c8519 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -346,6 +346,7 @@ OBJS_HAMILT_LCAO=hamilt_lcao.o\ overlap.o\ td_ekinetic_lcao.o\ td_nonlocal_lcao.o\ + td_deepks_lcao.o\ td_pot_hybrid.o\ veff_lcao.o\ meta_lcao.o\ diff --git a/source/source_io/module_hs/cal_r_overlap_R.cpp b/source/source_io/module_hs/cal_r_overlap_R.cpp index 7a26e2d7fc..a4d96ba74c 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.cpp +++ b/source/source_io/module_hs/cal_r_overlap_R.cpp @@ -16,7 +16,8 @@ cal_r_overlap_R::~cal_r_overlap_R() } void cal_r_overlap_R::initialize_orb_table(const UnitCell& ucell, - const LCAO_Orbitals& orb) + const LCAO_Orbitals& orb, + const int lmax_extra) { const int ntype = orb.get_ntype(); int lmax_orb = -1; @@ -24,14 +25,18 @@ void cal_r_overlap_R::initialize_orb_table(const UnitCell& ucell, { lmax_orb = std::max(lmax_orb, orb.Phi[it].getLmax()); } + // lmax_extra (default 0) covers integrals against a higher-L projector set + // (e.g. DeePKS alpha orbitals). lmax_eff == lmax_orb when lmax_extra <= lmax_orb, + // so the default reproduces the original orbital-only sizing exactly. + const int lmax_eff = std::max(lmax_orb, lmax_extra); const double dr = orb.get_dR(); const double dk = orb.get_dk(); const int kmesh = orb.get_kmesh() * 4 + 1; int Rmesh = static_cast(orb.get_Rmax() / dr) + 4; Rmesh += 1 - Rmesh % 2; - const int Lmax = lmax_orb + 1; - const int Lmax_used = 2 * lmax_orb + 1; + const int Lmax = lmax_eff + 1; + const int Lmax_used = lmax_orb + lmax_eff + 1; Center2_Orb::init_Table_Spherical_Bessel(Lmax_used, dr, dk, @@ -473,6 +478,201 @@ void cal_r_overlap_R::init_nonlocal(const UnitCell& ucell,const Parallel_Orbital return; } +void cal_r_overlap_R::init_alpha(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb) +{ + ModuleBase::TITLE("cal_r_overlap_R", "init_alpha"); + ModuleBase::timer::start("cal_r_overlap_R", "init_alpha"); + this->ParaV = &pv; + + // initialize_orb_table builds the spherical Bessel table (psb_) and Gaunt + // coefficients (MGT); construct_orbs_and_orb_r builds orbs[T][L][N] and orb_r. + // Both are reused as-is. We pass the alpha (descriptor) Lmax so the tables are + // sized for the orbital<->alpha integrals in one shot. + initialize_orb_table(ucell, orb, orb.Alpha[0].getLmax()); + construct_orbs_and_orb_r(ucell,orb); + construct_orbs_and_alpha_and_orb_r(ucell,orb); + + ModuleBase::timer::end("cal_r_overlap_R", "init_alpha"); + return; +} + +void cal_r_overlap_R::construct_orbs_and_alpha_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb) +{ + // orbs[T][L][N] and orb_r are already built by construct_orbs_and_orb_r (called + // before this in init_alpha). Here we only build the DeePKS projected orbitals + // orbs_alpha and the orbital<->alpha center-two-orbital tables. + // The spherical Bessel / Gaunt tables were already sized for the alpha Lmax by + // initialize_orb_table(ucell, orb, alpha_lmax) called from init_alpha. + const int alpha_lmax = orb.Alpha[0].getLmax(); + orbs_alpha.resize(alpha_lmax + 1); + for (int L = 0; L <= alpha_lmax; ++L) + { + orbs_alpha[L].resize(orb.Alpha[0].getNchi(L)); + for (int N = 0; N < orb.Alpha[0].getNchi(L); ++N) + { + // Alpha (descriptor) orbitals are ordinary numerical orbitals (Psi type), + // built the same way as orbs[T][L][N] (no Beta-style beta_r/r handling). + const auto& orb_origin = orb.Alpha[0].PhiLN(L, N); + orbs_alpha[L][N].set_orbital_info(orb_origin.getLabel(), + orb_origin.getType(), + orb_origin.getL(), + orb_origin.getChi(), + orb_origin.getNr(), + orb_origin.getRab(), + orb_origin.getRadial(), + Numerical_Orbital_Lm::Psi_Type::Psi, + orb_origin.getPsi(), + static_cast(orb_origin.getNk() * kmesh_times) | 1, + orb_origin.getDk(), + orb_origin.getDruniform(), + false, + true, + PARAM.inp.cal_force); + } + } + + for (int TA = 0; TA < orb.get_ntype(); ++TA) + { + for (int LA = 0; LA <= orb.Phi[TA].getLmax(); ++LA) + { + for (int NA = 0; NA < orb.Phi[TA].getNchi(LA); ++NA) + { + for (int LB = 0; LB <= alpha_lmax; ++LB) + { + for (int NB = 0; NB < orb.Alpha[0].getNchi(LB); ++NB) + { + center2_orb11_alpha[TA][LA][NA][LB].insert( + std::make_pair(NB, Center2_Orb::Orb11(orbs[TA][LA][NA], orbs_alpha[LB][NB], psb_, MGT))); + } + } + } + } + } + + for (int TA = 0; TA < orb.get_ntype(); ++TA) + { + for (int LA = 0; LA <= orb.Phi[TA].getLmax(); ++LA) + { + for (int NA = 0; NA < orb.Phi[TA].getNchi(LA); ++NA) + { + for (int LB = 0; LB <= alpha_lmax; ++LB) + { + for (int NB = 0; NB < orb.Alpha[0].getNchi(LB); ++NB) + { + center2_orb21_r_alpha[TA][LA][NA][LB].insert(std::make_pair( + NB, + Center2_Orb::Orb21(orbs[TA][LA][NA], orb_r, orbs_alpha[LB][NB], psb_, MGT))); + } + } + } + } + } + + for (auto& co1: center2_orb11_alpha) + { + for (auto& co2: co1.second) + { + for (auto& co3: co2.second) + { + for (auto& co4: co3.second) + { + for (auto& co5: co4.second) + { + co5.second.init_radial_table(); + } + } + } + } + } + + for (auto& co1: center2_orb21_r_alpha) + { + for (auto& co2: co1.second) + { + for (auto& co3: co2.second) + { + for (auto& co4: co3.second) + { + for (auto& co5: co4.second) + { + co5.second.init_radial_table(); + } + } + } + } + } +} + +void cal_r_overlap_R::get_psi_r_alpha(const LCAO_Orbitals& orb, + std::vector>& nlm, + const ModuleBase::Vector3& R1, + const int& T1, + const int& L1, + const int& m1, + const int& N1, + const ModuleBase::Vector3& R2) +{ + ModuleBase::Vector3 origin_point(0.0, 0.0, 0.0); + double factor = sqrt(ModuleBase::FOUR_PI / 3.0); + const ModuleBase::Vector3& distance = R2 - R1; + const int alpha_lmax = orb.Alpha[0].getLmax(); + + // total number of alpha basis functions (including all m) + int nw_alpha = 0; + for (int L = 0; L <= alpha_lmax; ++L) + { + nw_alpha += orb.Alpha[0].getNchi(L) * (2 * L + 1); + } + + nlm.resize(4); + for (int i = 0; i < 4; i++) + { + nlm[i].resize(nw_alpha); + } + + // Iterate L0 -> N0 -> running-m to match TwoCenterIntegrator::snap (and thus the + // column order of phialpha and the inl_index order of gedm). + int index = 0; + for (int L0 = 0; L0 <= alpha_lmax; ++L0) + { + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + { + for (int m0 = 0; m0 < 2 * L0 + 1; m0++) + { + double overlap_o + = center2_orb11_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point, distance, m1, m0); + + double overlap_x = -1 * factor + * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point, + distance, + m1, + 1, + m0); // m = 1 + + double overlap_y = -1 * factor + * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point, + distance, + m1, + 2, + m0); // m = -1 + + double overlap_z = factor + * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point, + distance, + m1, + 0, + m0); // m = 0 + + nlm[0][index] = overlap_o; + nlm[1][index] = overlap_x + (R1 * overlap_o).x; + nlm[2][index] = overlap_y + (R1 * overlap_o).y; + nlm[3][index] = overlap_z + (R1 * overlap_o).z; + index++; + } + } + } +} + ModuleBase::Vector3 cal_r_overlap_R::get_psi_r_psi(const ModuleBase::Vector3& R1, const int& T1, const int& L1, diff --git a/source/source_io/module_hs/cal_r_overlap_R.h b/source/source_io/module_hs/cal_r_overlap_R.h index 2329cd4ea8..7d1d0683d4 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.h +++ b/source/source_io/module_hs/cal_r_overlap_R.h @@ -33,6 +33,10 @@ class cal_r_overlap_R void init(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb); void init_nonlocal(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb); + // Initialize the r-weighted two-center integral tables between numerical atomic + // orbitals and DeePKS projected orbitals (orb.Alpha[0]). Used by the rt-TDDFT + // current operator to evaluate the commutator -i[V_delta, r]. Added for DeePKS. + void init_alpha(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb); ModuleBase::Vector3 get_psi_r_psi( const ModuleBase::Vector3& R1, const int& T1, @@ -56,13 +60,33 @@ class cal_r_overlap_R const ModuleBase::Vector3& R2, const int& T2 ); + // Compute and (4 components: nlm[0]=, + // nlm[1..3]=) between an orbital phi (T1,L1,m1,N1 at R1) and all + // DeePKS projected orbitals alpha (orb.Alpha[0]) centered at R2. The alpha index + // ordering (L0->N0->running-m) matches TwoCenterIntegrator::snap, hence phialpha + // and gedm. Added for DeePKS rt-TDDFT current. + void get_psi_r_alpha( + const LCAO_Orbitals& orb, + std::vector>& nlm, + const ModuleBase::Vector3& R1, + const int& T1, + const int& L1, + const int& m1, + const int& N1, + const ModuleBase::Vector3& R2 + ); void out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep); void out_rR_other(const UnitCell& ucell, const int& istep, const std::set>& output_R_coor); private: - void initialize_orb_table(const UnitCell& ucell, const LCAO_Orbitals& orb); + // lmax_extra (default 0) enlarges the spherical-Bessel / Gaunt tables to also + // cover integrals against a higher-L projector set (e.g. DeePKS alpha orbitals). + // With the default it reduces exactly to the orbital-only sizing, so existing + // callers (init / init_nonlocal) are byte-for-byte unaffected. + void initialize_orb_table(const UnitCell& ucell, const LCAO_Orbitals& orb, const int lmax_extra = 0); void construct_orbs_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb); void construct_orbs_and_nonlocal_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb); + void construct_orbs_and_alpha_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb); std::vector iw2ia; std::vector iw2iL; @@ -76,6 +100,8 @@ class cal_r_overlap_R Numerical_Orbital_Lm orb_r; std::vector>> orbs; std::vector> orbs_nonlocal; + // DeePKS projected orbitals (orb.Alpha[0]), indexed [L_alpha][N_alpha]. + std::vector> orbs_alpha; std::map< size_t, @@ -97,6 +123,19 @@ class cal_r_overlap_R std::map>>>> center2_orb21_r_nonlocal; + // r-weighted tables between orbitals and DeePKS projectors (orb.Alpha[0]). + // Keyed [T1][L1][N1][L_alpha] -> {N_alpha -> Orb}. Alpha is type-independent, + // so there is no ket-type (TB) index; the ket is indexed by (L_alpha, N_alpha). + std::map< + size_t, + std::map>>>> + center2_orb11_alpha; + + std::map< + size_t, + std::map>>>> + center2_orb21_r_alpha; + const Parallel_Orbitals* ParaV = nullptr; }; #endif diff --git a/source/source_lcao/CMakeLists.txt b/source/source_lcao/CMakeLists.txt index a793f5d5d0..e3ad7efa86 100644 --- a/source/source_lcao/CMakeLists.txt +++ b/source/source_lcao/CMakeLists.txt @@ -19,6 +19,7 @@ if(ENABLE_LCAO) module_operator_lcao/nonlocal.cpp module_operator_lcao/td_ekinetic_lcao.cpp module_operator_lcao/td_nonlocal_lcao.cpp + module_operator_lcao/td_deepks_lcao.cpp module_operator_lcao/td_pot_hybrid.cpp module_operator_lcao/dspin_lcao.cpp module_operator_lcao/dftu_lcao.cpp diff --git a/source/source_lcao/hamilt_lcao.cpp b/source/source_lcao/hamilt_lcao.cpp index f783155c0c..505ec4d03b 100644 --- a/source/source_lcao/hamilt_lcao.cpp +++ b/source/source_lcao/hamilt_lcao.cpp @@ -39,6 +39,7 @@ #include "module_operator_lcao/op_dftu_lcao.h" #include "module_operator_lcao/op_exx_lcao.h" #include "module_operator_lcao/overlap.h" +#include "module_operator_lcao/td_deepks_lcao.h" #include "module_operator_lcao/td_ekinetic_lcao.h" #include "module_operator_lcao/td_nonlocal_lcao.h" #include "module_operator_lcao/td_pot_hybrid.h" @@ -353,6 +354,25 @@ HamiltLCAO::HamiltLCAO(const UnitCell& ucell, two_center_bundle.overlap_orb.get()); this->getOperator()->add(td_ekinetic); +#ifdef __MLALGO + // Add the DeePKS commutator -i[V_delta, r] to the velocity-gauge + // current. Registered between TDEkinetic and TDNonlocal so it shares + // the same TD sub-chain (cal_type lcao_tddft_periodic): it runs after + // TDEkinetic (which creates/zeroes current_term and forwards hR_tmp) + // and before TDNonlocal (which resets TD_info::evolve_once). + if (PARAM.inp.deepks_scf) + { + Operator* td_deepks = new TDDeePKS>(this->hsk, + this->kv->kvec_d, + this->hR, + &ucell, + orb, + &grid_d, + &deepks.ld); + this->getOperator()->add(td_deepks); + } +#endif + Operator* td_nonlocal = new TDNonlocal>(this->hsk, this->kv->kvec_d, this->hR, diff --git a/source/source_lcao/module_operator_lcao/CMakeLists.txt b/source/source_lcao/module_operator_lcao/CMakeLists.txt index e9dbbe381b..e10583b25e 100644 --- a/source/source_lcao/module_operator_lcao/CMakeLists.txt +++ b/source/source_lcao/module_operator_lcao/CMakeLists.txt @@ -11,6 +11,7 @@ add_library( nonlocal.cpp td_ekinetic_lcao.cpp td_nonlocal_lcao.cpp + td_deepks_lcao.cpp td_pot_hybrid.cpp dspin_lcao.cpp dftu_lcao.cpp diff --git a/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp b/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp new file mode 100644 index 0000000000..08216f59c1 --- /dev/null +++ b/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp @@ -0,0 +1,403 @@ +#include "td_deepks_lcao.h" + +#include "source_base/timer.h" +#include "source_base/tool_title.h" +#include "source_cell/module_neighbor/sltk_grid_driver.h" +#include "source_estate/module_pot/H_TDDFT_pw.h" +#include "source_io/module_parameter/parameter.h" +#include "source_lcao/module_operator_lcao/operator_lcao.h" +#include "source_lcao/module_rt/td_info.h" + +#ifdef _OPENMP +#include +#include +#endif + +template +cal_r_overlap_R hamilt::TDDeePKS>::r_calculator; +template +bool hamilt::TDDeePKS>::init_done = false; + +template +hamilt::TDDeePKS>::TDDeePKS(HS_Matrix_K* hsk_in, + const std::vector>& kvec_d_in, + hamilt::HContainer* hR_in, + const UnitCell* ucell_in, + const LCAO_Orbitals& orb, + const Grid_Driver* GridD_in +#ifdef __MLALGO + , + LCAO_Deepks* ld_in +#endif + ) + : hamilt::OperatorLCAO(hsk_in, kvec_d_in, hR_in), orb_(orb) +{ + this->cal_type = calculation_type::lcao_tddft_periodic; + this->ucell = ucell_in; + this->Grid = GridD_in; +#ifdef __MLALGO + this->ld = ld_in; +#endif +#ifdef __DEBUG + assert(this->ucell != nullptr); +#endif + // only meaningful in velocity gauge (td_stype=1) + if (elecstate::H_TDDFT_pw::stype != 1) + { + return; + } + // initialize HR to get adjs info (same cutoff as DeePKS V_delta). + this->initialize_HR(this->Grid); + // initialize the r-weighted projection tables , once. + if (!init_done) + { + r_calculator.init_alpha(*this->ucell, *this->hsk->get_pv(), orb_); + init_done = true; + } +} + +template +hamilt::TDDeePKS>::~TDDeePKS() +{ +} + +// initialize_HR() +template +void hamilt::TDDeePKS>::initialize_HR(const Grid_Driver* GridD) +{ + if (elecstate::H_TDDFT_pw::stype != 1) + { + return; + } + ModuleBase::TITLE("TDDeePKS", "initialize_HR"); + ModuleBase::timer::start("TDDeePKS", "initialize_HR"); + + this->adjs_all.clear(); + this->adjs_all.reserve(this->ucell->nat); + for (int iat0 = 0; iat0 < ucell->nat; iat0++) + { + auto tau0 = ucell->get_tau(iat0); + int T0, I0; + ucell->iat2iait(iat0, &I0, &T0); + AdjacentAtomInfo adjs; + GridD->Find_atom(*ucell, tau0, T0, I0, &adjs); + std::vector is_adj(adjs.adj_num + 1, false); + for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1) + { + const int T1 = adjs.ntype[ad1]; + const int I1 = adjs.natom[ad1]; + const int iat1 = ucell->itia2iat(T1, I1); + const ModuleBase::Vector3& R_index1 = adjs.box[ad1]; + // choose the real adjacent atoms (same cutoff as DeePKS V_delta: + // Phi[T1].Rcut + Alpha[0].Rcut, centered on the descriptor atom iat0) + if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0 + < orb_.Phi[T1].getRcut() + orb_.Alpha[0].getRcut()) + { + is_adj[ad1] = true; + } + } + filter_adjs(is_adj, adjs); + this->adjs_all.push_back(adjs); + } + + ModuleBase::timer::end("TDDeePKS", "initialize_HR"); +} + +template +void hamilt::TDDeePKS>::calculate_current() +{ +#ifdef __MLALGO + ModuleBase::TITLE("TDDeePKS", "calculate_current"); + + // current_term is only allocated when out_current==1; nothing to do otherwise. + if (TD_info::out_current != 1) + { + return; + } + // DeePKS rt-TDDFT current is currently only implemented for nspin=1. + // nspin=2 needs the spin-resolved (gedm +/- gedm_mag) handling and nspin=4 + // needs the npol=2 spinor layout, neither of which is validated here. + if (PARAM.inp.nspin != 1) + { + ModuleBase::WARNING_QUIT("TDDeePKS::calculate_current", + "DeePKS rt-TDDFT current (out_current=1) only supports nspin=1 for now."); + } + + ModuleBase::timer::start("TDDeePKS", "calculate_current"); + + const Parallel_Orbitals* paraV = TD_info::td_vel_op->get_current_term_pointer(0)->get_atom_pair(0).get_paraV(); + const int npol = this->ucell->get_npol(); + + for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) + { + auto tau0 = ucell->get_tau(iat0); + int T0 = 0; + int I0 = 0; + ucell->iat2iait(iat0, &I0, &T0); + AdjacentAtomInfo& adjs = this->adjs_all[iat0]; + + // -------------------------------------------------------------------- + // build the gedm triples (alpha_row, alpha_col, value) for this iat0. + // The alpha index is ib + m running over (L0 -> N0 -> m), matching the + // column order of phialpha and of get_psi_r_alpha's output (nlm). + // Replicated from DeePKS::calculate_HR (deepks_lcao.cpp). nspin=1: no mag. + // -------------------------------------------------------------------- + std::vector trace_alpha_row; + std::vector trace_alpha_col; + std::vector gedms; + if (!PARAM.inp.deepks_equiv) + { + int ib = 0; + for (int L0 = 0; L0 <= orb_.Alpha[0].getLmax(); ++L0) + { + for (int N0 = 0; N0 < orb_.Alpha[0].getNchi(L0); ++N0) + { + const int inl = this->ld->deepks_param.inl_index[T0](I0, L0, N0); + const double* pgedm = this->ld->gedm[inl]; + const int nm = 2 * L0 + 1; + for (int m1 = 0; m1 < nm; ++m1) + { + for (int m2 = 0; m2 < nm; ++m2) + { + trace_alpha_row.push_back(ib + m1); + trace_alpha_col.push_back(ib + m2); + gedms.push_back(pgedm[m1 * nm + m2]); + } + } + ib += nm; + } + } + } + else + { + const double* pgedm = this->ld->gedm[iat0]; + int nproj = 0; + for (int il = 0; il < this->ld->deepks_param.lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb_.Alpha[0].getNchi(il); + } + for (int iproj = 0; iproj < nproj; iproj++) + { + for (int jproj = 0; jproj < nproj; jproj++) + { + trace_alpha_row.push_back(iproj); + trace_alpha_col.push_back(jproj); + gedms.push_back(pgedm[iproj * nproj + jproj]); + } + } + } + + // -------------------------------------------------------------------- + // compute and (nlm[0..3]) for every orbital of + // every neighbor atom of iat0. + // -------------------------------------------------------------------- + std::vector>>> nlm_tot; + nlm_tot.resize(adjs.adj_num + 1); + for (int i = 0; i < adjs.adj_num + 1; i++) + { + nlm_tot[i].resize(4); + } + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T1 = adjs.ntype[ad]; + const int I1 = adjs.natom[ad]; + const int iat1 = ucell->itia2iat(T1, I1); + const ModuleBase::Vector3& tau1 = adjs.adjacent_tau[ad]; + const Atom* atom1 = &ucell->atoms[T1]; + auto all_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat1); + all_indexes.insert(all_indexes.end(), col_indexes.begin(), col_indexes.end()); + std::sort(all_indexes.begin(), all_indexes.end()); + all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end()); + for (int iw1l = 0; iw1l < all_indexes.size(); iw1l += npol) + { + const int iw1 = all_indexes[iw1l] / npol; + std::vector> nlm; + // R1 = orbital position (tau1), R2 = descriptor center (tau0) + r_calculator.get_psi_r_alpha(orb_, + nlm, + tau1 * this->ucell->lat0, + T1, + atom1->iw2l[iw1], + atom1->iw2m[iw1], + atom1->iw2n[iw1], + tau0 * this->ucell->lat0); + for (int dir = 0; dir < 4; dir++) + { + nlm_tot[ad][dir].insert({all_indexes[iw1l], nlm[dir]}); + } + } + } + + // -------------------------------------------------------------------- + // contract over alpha and accumulate -i[V_delta,r] into current_term + // for each atom pair. + // -------------------------------------------------------------------- + for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1) + { + const int T1 = adjs.ntype[ad1]; + const int I1 = adjs.natom[ad1]; + const int iat1 = ucell->itia2iat(T1, I1); + const ModuleBase::Vector3& R_index1 = adjs.box[ad1]; + for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) + { + const int T2 = adjs.ntype[ad2]; + const int I2 = adjs.natom[ad2]; + const int iat2 = ucell->itia2iat(T2, I2); + const ModuleBase::Vector3& R_index2 = adjs.box[ad2]; + const ModuleBase::Vector3 R_vector(R_index2[0] - R_index1[0], + R_index2[1] - R_index1[1], + R_index2[2] - R_index1[2]); + std::complex* tmp_c[3] = {nullptr, nullptr, nullptr}; + for (int i = 0; i < 3; i++) + { + hamilt::BaseMatrix>* matrix_ptr + = TD_info::td_vel_op->get_current_term_pointer(i)->find_matrix(iat1, + iat2, + R_vector[0], + R_vector[1], + R_vector[2]); + if (matrix_ptr != nullptr) + { + tmp_c[i] = matrix_ptr->get_pointer(); + } + } + // the pair must exist in current_term (it always does: DeePKS + // expands hR with V_delta_R, and current_term spans hR). Skip if + // not found, for safety. + if (tmp_c[0] != nullptr) + { + this->cal_current_IJR(iat1, + iat2, + paraV, + nlm_tot[ad1], + nlm_tot[ad2], + trace_alpha_row, + trace_alpha_col, + gedms, + tmp_c); + } + } + } + } + ModuleBase::timer::end("TDDeePKS", "calculate_current"); +#endif +} + +template +void hamilt::TDDeePKS>::cal_current_IJR( + const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::vector>>& nlm1_all, + const std::vector>>& nlm2_all, + const std::vector& trace_alpha_row, + const std::vector& trace_alpha_col, + const std::vector& gedms, + std::complex** current_mat_p) +{ + const int npol = this->ucell->get_npol(); + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 + std::vector step_trace(npol * npol, 0); + for (int is = 0; is < npol; is++) + { + for (int is2 = 0; is2 < npol; is2++) + { + step_trace[is * npol + is2] = col_indexes.size() * is + is2; + } + } + const int trace_alpha_size = trace_alpha_row.size(); + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol) + { + std::vector*> nlm1; + for (int dir = 0; dir < 4; dir++) + { + nlm1.push_back(&(nlm1_all[dir].find(row_indexes[iw1l])->second)); + } + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol) + { + std::vector*> nlm2; + for (int dir = 0; dir < 4; dir++) + { + nlm2.push_back(&(nlm2_all[dir].find(col_indexes[iw2l])->second)); + } + for (int is = 0; is < npol * npol; ++is) + { + for (int dir = 0; dir < 3; dir++) + { + std::complex nlm_r_tmp = std::complex{0, 0}; + std::complex imag_unit = std::complex{0, 1}; + for (int t = 0; t < trace_alpha_size; t++) + { + const int a1 = trace_alpha_row[t]; + const int a2 = trace_alpha_col[t]; + // gedm - gedm + nlm_r_tmp += (nlm1[dir + 1]->at(a1) * nlm2[0]->at(a2) + - nlm1[0]->at(a1) * nlm2[dir + 1]->at(a2)) + * gedms[t]; + } + // -i[r,V_delta], factor 2.0 due to the unit transformation + // (identical convention to the Vnl term in cal_vcomm_r_IJR) + current_mat_p[dir][step_trace[is]] -= imag_unit * nlm_r_tmp / 2.0; + } + } + for (int dir = 0; dir < 3; dir++) + { + current_mat_p[dir] += npol; + } + } + for (int dir = 0; dir < 3; dir++) + { + current_mat_p[dir] += (npol - 1) * col_indexes.size(); + } + } +} + +template +void hamilt::TDDeePKS>::contributeHR() +{ + ModuleBase::TITLE("TDDeePKS", "contributeHR"); + + if (elecstate::H_TDDFT_pw::stype != 1) + { + return; + } + + ModuleBase::timer::start("TDDeePKS", "contributeHR"); + + // recompute whenever the TD operators recompute (same gating as TDNonlocal). + // NOTE: we do NOT reset TD_info::evolve_once here; TDNonlocal (which runs after + // this operator in the sub-chain) is responsible for resetting it. + if (!this->current_done || TD_info::evolve_once) + { + // forward the shared hR_tmp pointer to the next sub-operator (TDNonlocal), + // so it accumulates Vnl into the same buffer created by TDEkinetic. + // This operator itself does not write hR_tmp. + if (this->next_sub_op != nullptr) + { + static_cast*>(this->next_sub_op)->set_HR_fixed(this->hR_tmp); + } + this->calculate_current(); + this->current_done = true; + } + + ModuleBase::timer::end("TDDeePKS", "contributeHR"); + return; +} + +template +void hamilt::TDDeePKS>::contributeHk(int ik) +{ + return; +} + +template +void hamilt::TDDeePKS>::set_HR_fixed(void* hR_tmp_in) +{ + this->hR_tmp = static_cast>*>(hR_tmp_in); +} + +template class hamilt::TDDeePKS, double>>; +template class hamilt::TDDeePKS, std::complex>>; diff --git a/source/source_lcao/module_operator_lcao/td_deepks_lcao.h b/source/source_lcao/module_operator_lcao/td_deepks_lcao.h new file mode 100644 index 0000000000..a656bb8549 --- /dev/null +++ b/source/source_lcao/module_operator_lcao/td_deepks_lcao.h @@ -0,0 +1,116 @@ +#ifndef TDDEEPKS_H +#define TDDEEPKS_H +#include "source_basis/module_ao/parallel_orbitals.h" +#include "source_cell/module_neighbor/sltk_grid_driver.h" +#include "source_cell/unitcell.h" +#include "source_io/module_hs/cal_r_overlap_R.h" +#include "source_lcao/module_hcontainer/hcontainer.h" +#include "source_lcao/module_operator_lcao/operator_lcao.h" +#ifdef __MLALGO +#include "source_lcao/module_deepks/LCAO_deepks.h" +#endif + +#include +#include + +namespace hamilt +{ + +#ifndef __TD_DEEPKSTEMPLATE +#define __TD_DEEPKSTEMPLATE + +/// The TDDeePKS class template inherits from class T. +/// It supplies the missing commutator contribution -i[V_delta, r] of the DeePKS +/// non-local potential to the rt-TDDFT velocity-gauge current (out_current=1, +/// td_stype=1). V_delta is a cross-atom projected operator +/// (V_delta_{mu,nu} = sum_iat0 gedm ), so [V_delta,r] +/// is non-zero and contributes to the current operator. This term is otherwise +/// absent from current_term (only filled by TDEkinetic and TDNonlocal), which +/// produces a spurious DC offset in the DeePKS current/spectrum. +template +class TDDeePKS : public T +{ +}; + +#endif + +/// TDDeePKS class template specialization for OperatorLCAO base class. +/// Template parameters: +/// - TK: data type of k-space Hamiltonian +/// - TR: data type of real space Hamiltonian +template +class TDDeePKS> : public OperatorLCAO +{ + public: + TDDeePKS>(HS_Matrix_K* hsk_in, + const std::vector>& kvec_d_in, + hamilt::HContainer* hR_in, + const UnitCell* ucell_in, + const LCAO_Orbitals& orb, + const Grid_Driver* GridD_in +#ifdef __MLALGO + , + LCAO_Deepks* ld_in +#endif + ); + ~TDDeePKS>(); + + /** + * @brief contributeHR() adds -i[V_delta, r] to TD_info::td_vel_op->current_term. + * This operator does NOT modify hR (V_delta itself is added by the DeePKS + * operator); it only forwards the shared hR_tmp pointer to the next TD + * sub-operator (TDNonlocal) and writes the current term. + */ + virtual void contributeHR() override; + virtual void contributeHk(int ik) override; + + virtual void set_HR_fixed(void*) override; + + private: + const UnitCell* ucell = nullptr; + const LCAO_Orbitals& orb_; + const Grid_Driver* Grid = nullptr; + +#ifdef __MLALGO + /// @brief DeePKS data handle, provides gedm and inl_index (phialpha layout) + LCAO_Deepks* ld = nullptr; +#endif + + /// @brief shared TD real-space Hamiltonian buffer. Created by TDEkinetic and + /// forwarded down the sub-chain. Not written here, only forwarded to the next + /// sub-operator so that TDNonlocal accumulates Vnl into the same buffer. + HContainer>* hR_tmp = nullptr; + + /// @brief whether the current term has been computed at least once (per instance) + bool current_done = false; + + /// @brief search the nearest neighbor atoms (same cutoff as DeePKS V_delta: + /// Phi[T1].Rcut + Alpha[0].Rcut, centered on the descriptor atom iat0) + void initialize_HR(const Grid_Driver* GridD_in); + + /// @brief accumulate -i[V_delta, r] into current_term for all pairs + void calculate_current(); + + /// @brief calculate the current contribution of one atom pair + void cal_current_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::vector>>& nlm1_all, + const std::vector>>& nlm2_all, + const std::vector& trace_alpha_row, + const std::vector& trace_alpha_col, + const std::vector& gedms, + std::complex** current_mat_p); + + /// @brief nearest neighbor atoms of each descriptor atom iat0 + std::vector adjs_all; + + /// @brief r-weighted projection calculator and ; + /// the tables only depend on orbital shapes (not positions), so a single + /// static instance, initialized once, is reused across instances and MD steps. + static cal_r_overlap_R r_calculator; + static bool init_done; +}; + +} // namespace hamilt +#endif