diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index b5ceae2e68..53d06c8519 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -346,6 +346,7 @@ OBJS_HAMILT_LCAO=hamilt_lcao.o\
     overlap.o\
     td_ekinetic_lcao.o\
     td_nonlocal_lcao.o\
+    td_deepks_lcao.o\
     td_pot_hybrid.o\
     veff_lcao.o\
     meta_lcao.o\
diff --git a/source/source_io/module_hs/cal_r_overlap_R.cpp b/source/source_io/module_hs/cal_r_overlap_R.cpp
index 7a26e2d7fc..a4d96ba74c 100644
--- a/source/source_io/module_hs/cal_r_overlap_R.cpp
+++ b/source/source_io/module_hs/cal_r_overlap_R.cpp
@@ -16,7 +16,8 @@ cal_r_overlap_R::~cal_r_overlap_R()
 }
 
 void cal_r_overlap_R::initialize_orb_table(const UnitCell& ucell,
-                                           const LCAO_Orbitals& orb)
+                                           const LCAO_Orbitals& orb,
+                                           const int lmax_extra)
 {
     const int ntype = orb.get_ntype();
     int lmax_orb = -1;
@@ -24,14 +25,18 @@ void cal_r_overlap_R::initialize_orb_table(const UnitCell& ucell,
     {
         lmax_orb = std::max(lmax_orb, orb.Phi[it].getLmax());
     }
+    // lmax_extra (default 0) covers integrals against a higher-L projector set
+    // (e.g. DeePKS alpha orbitals). lmax_eff == lmax_orb when lmax_extra <= lmax_orb,
+    // so the default reproduces the original orbital-only sizing exactly.
+    const int lmax_eff = std::max(lmax_orb, lmax_extra);
     const double dr = orb.get_dR();
     const double dk = orb.get_dk();
     const int kmesh = orb.get_kmesh() * 4 + 1;
     int Rmesh = static_cast<int>(orb.get_Rmax() / dr) + 4;
     Rmesh += 1 - Rmesh % 2;
 
-    const int Lmax = lmax_orb + 1;
-    const int Lmax_used = 2 * lmax_orb + 1;
+    const int Lmax = lmax_eff + 1;
+    const int Lmax_used = lmax_orb + lmax_eff + 1;
     Center2_Orb::init_Table_Spherical_Bessel(Lmax_used,
                                              dr,
                                              dk,
@@ -473,6 +478,201 @@ void cal_r_overlap_R::init_nonlocal(const UnitCell& ucell,const Parallel_Orbital
     return;
 }
 
+void cal_r_overlap_R::init_alpha(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb)
+{
+    ModuleBase::TITLE("cal_r_overlap_R", "init_alpha");
+    ModuleBase::timer::start("cal_r_overlap_R", "init_alpha");
+    this->ParaV = &pv;
+
+    // initialize_orb_table builds the spherical Bessel table (psb_) and Gaunt
+    // coefficients (MGT); construct_orbs_and_orb_r builds orbs[T][L][N] and orb_r.
+    // Both are reused as-is. We pass the alpha (descriptor) Lmax so the tables are
+    // sized for the orbital<->alpha integrals in one shot.
+    initialize_orb_table(ucell, orb, orb.Alpha[0].getLmax());
+    construct_orbs_and_orb_r(ucell,orb);
+    construct_orbs_and_alpha_and_orb_r(ucell,orb);
+
+    ModuleBase::timer::end("cal_r_overlap_R", "init_alpha");
+    return;
+}
+
+void cal_r_overlap_R::construct_orbs_and_alpha_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb)
+{
+    // orbs[T][L][N] and orb_r are already built by construct_orbs_and_orb_r (called
+    // before this in init_alpha). Here we only build the DeePKS projected orbitals
+    // orbs_alpha and the orbital<->alpha center-two-orbital tables.
+    // The spherical Bessel / Gaunt tables were already sized for the alpha Lmax by
+    // initialize_orb_table(ucell, orb, alpha_lmax) called from init_alpha.
+    const int alpha_lmax = orb.Alpha[0].getLmax();
+    orbs_alpha.resize(alpha_lmax + 1);
+    for (int L = 0; L <= alpha_lmax; ++L)
+    {
+        orbs_alpha[L].resize(orb.Alpha[0].getNchi(L));
+        for (int N = 0; N < orb.Alpha[0].getNchi(L); ++N)
+        {
+            // Alpha (descriptor) orbitals are ordinary numerical orbitals (Psi type),
+            // built the same way as orbs[T][L][N] (no Beta-style beta_r/r handling).
+            const auto& orb_origin = orb.Alpha[0].PhiLN(L, N);
+            orbs_alpha[L][N].set_orbital_info(orb_origin.getLabel(),
+                                              orb_origin.getType(),
+                                              orb_origin.getL(),
+                                              orb_origin.getChi(),
+                                              orb_origin.getNr(),
+                                              orb_origin.getRab(),
+                                              orb_origin.getRadial(),
+                                              Numerical_Orbital_Lm::Psi_Type::Psi,
+                                              orb_origin.getPsi(),
+                                              static_cast<int>(orb_origin.getNk() * kmesh_times) | 1,
+                                              orb_origin.getDk(),
+                                              orb_origin.getDruniform(),
+                                              false,
+                                              true,
+                                              PARAM.inp.cal_force);
+        }
+    }
+
+    for (int TA = 0; TA < orb.get_ntype(); ++TA)
+    {
+        for (int LA = 0; LA <= orb.Phi[TA].getLmax(); ++LA)
+        {
+            for (int NA = 0; NA < orb.Phi[TA].getNchi(LA); ++NA)
+            {
+                for (int LB = 0; LB <= alpha_lmax; ++LB)
+                {
+                    for (int NB = 0; NB < orb.Alpha[0].getNchi(LB); ++NB)
+                    {
+                        center2_orb11_alpha[TA][LA][NA][LB].insert(
+                            std::make_pair(NB, Center2_Orb::Orb11(orbs[TA][LA][NA], orbs_alpha[LB][NB], psb_, MGT)));
+                    }
+                }
+            }
+        }
+    }
+
+    for (int TA = 0; TA < orb.get_ntype(); ++TA)
+    {
+        for (int LA = 0; LA <= orb.Phi[TA].getLmax(); ++LA)
+        {
+            for (int NA = 0; NA < orb.Phi[TA].getNchi(LA); ++NA)
+            {
+                for (int LB = 0; LB <= alpha_lmax; ++LB)
+                {
+                    for (int NB = 0; NB < orb.Alpha[0].getNchi(LB); ++NB)
+                    {
+                        center2_orb21_r_alpha[TA][LA][NA][LB].insert(std::make_pair(
+                            NB,
+                            Center2_Orb::Orb21(orbs[TA][LA][NA], orb_r, orbs_alpha[LB][NB], psb_, MGT)));
+                    }
+                }
+            }
+        }
+    }
+
+    for (auto& co1: center2_orb11_alpha)
+    {
+        for (auto& co2: co1.second)
+        {
+            for (auto& co3: co2.second)
+            {
+                for (auto& co4: co3.second)
+                {
+                    for (auto& co5: co4.second)
+                    {
+                        co5.second.init_radial_table();
+                    }
+                }
+            }
+        }
+    }
+
+    for (auto& co1: center2_orb21_r_alpha)
+    {
+        for (auto& co2: co1.second)
+        {
+            for (auto& co3: co2.second)
+            {
+                for (auto& co4: co3.second)
+                {
+                    for (auto& co5: co4.second)
+                    {
+                        co5.second.init_radial_table();
+                    }
+                }
+            }
+        }
+    }
+}
+
+void cal_r_overlap_R::get_psi_r_alpha(const LCAO_Orbitals& orb,
+                                      std::vector<std::vector<double>>& nlm,
+                                      const ModuleBase::Vector3<double>& R1,
+                                      const int& T1,
+                                      const int& L1,
+                                      const int& m1,
+                                      const int& N1,
+                                      const ModuleBase::Vector3<double>& R2)
+{
+    ModuleBase::Vector3<double> origin_point(0.0, 0.0, 0.0);
+    double factor = sqrt(ModuleBase::FOUR_PI / 3.0);
+    const ModuleBase::Vector3<double>& distance = R2 - R1;
+    const int alpha_lmax = orb.Alpha[0].getLmax();
+
+    // total number of alpha basis functions (including all m)
+    int nw_alpha = 0;
+    for (int L = 0; L <= alpha_lmax; ++L)
+    {
+        nw_alpha += orb.Alpha[0].getNchi(L) * (2 * L + 1);
+    }
+
+    nlm.resize(4);
+    for (int i = 0; i < 4; i++)
+    {
+        nlm[i].resize(nw_alpha);
+    }
+
+    // Iterate L0 -> N0 -> running-m to match TwoCenterIntegrator::snap (and thus the
+    // column order of phialpha and the inl_index order of gedm).
+    int index = 0;
+    for (int L0 = 0; L0 <= alpha_lmax; ++L0)
+    {
+        for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0)
+        {
+            for (int m0 = 0; m0 < 2 * L0 + 1; m0++)
+            {
+                double overlap_o
+                    = center2_orb11_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point, distance, m1, m0);
+
+                double overlap_x = -1 * factor
+                                   * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point,
+                                                                                              distance,
+                                                                                              m1,
+                                                                                              1,
+                                                                                              m0); // m =  1
+
+                double overlap_y = -1 * factor
+                                   * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point,
+                                                                                              distance,
+                                                                                              m1,
+                                                                                              2,
+                                                                                              m0); // m = -1
+
+                double overlap_z = factor
+                                   * center2_orb21_r_alpha[T1][L1][N1][L0].at(N0).cal_overlap(origin_point,
+                                                                                              distance,
+                                                                                              m1,
+                                                                                              0,
+                                                                                              m0); // m =  0
+
+                nlm[0][index] = overlap_o;
+                nlm[1][index] = overlap_x + (R1 * overlap_o).x;
+                nlm[2][index] = overlap_y + (R1 * overlap_o).y;
+                nlm[3][index] = overlap_z + (R1 * overlap_o).z;
+                index++;
+            }
+        }
+    }
+}
+
 ModuleBase::Vector3<double> cal_r_overlap_R::get_psi_r_psi(const ModuleBase::Vector3<double>& R1,
                                                            const int& T1,
                                                            const int& L1,
diff --git a/source/source_io/module_hs/cal_r_overlap_R.h b/source/source_io/module_hs/cal_r_overlap_R.h
index 2329cd4ea8..7d1d0683d4 100644
--- a/source/source_io/module_hs/cal_r_overlap_R.h
+++ b/source/source_io/module_hs/cal_r_overlap_R.h
@@ -33,6 +33,10 @@ class cal_r_overlap_R
 
     void init(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb);
     void init_nonlocal(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb);
+    // Initialize the r-weighted two-center integral tables between numerical atomic
+    // orbitals and DeePKS projected orbitals (orb.Alpha[0]). Used by the rt-TDDFT
+    // current operator to evaluate the commutator -i[V_delta, r]. Added for DeePKS.
+    void init_alpha(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb);
     ModuleBase::Vector3<double> get_psi_r_psi(
       const ModuleBase::Vector3<double>& R1,
       const int& T1,
@@ -56,13 +60,33 @@ class cal_r_overlap_R
       const ModuleBase::Vector3<double>& R2,
       const int& T2
     );
+    // Compute <phi|alpha> and <phi|r|alpha> (4 components: nlm[0]=<phi|alpha>,
+    // nlm[1..3]=<phi|x,y,z|alpha>) between an orbital phi (T1,L1,m1,N1 at R1) and all
+    // DeePKS projected orbitals alpha (orb.Alpha[0]) centered at R2. The alpha index
+    // ordering (L0->N0->running-m) matches TwoCenterIntegrator::snap, hence phialpha
+    // and gedm. Added for DeePKS rt-TDDFT current.
+    void get_psi_r_alpha(
+      const LCAO_Orbitals& orb,
+      std::vector<std::vector<double>>& nlm,
+      const ModuleBase::Vector3<double>& R1,
+      const int& T1,
+      const int& L1,
+      const int& m1,
+      const int& N1,
+      const ModuleBase::Vector3<double>& R2
+    );
     void out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep);
     void out_rR_other(const UnitCell& ucell, const int& istep, const std::set<Abfs::Vector3_Order<int>>& output_R_coor);
 
   private:
-    void initialize_orb_table(const UnitCell& ucell, const LCAO_Orbitals& orb);
+    // lmax_extra (default 0) enlarges the spherical-Bessel / Gaunt tables to also
+    // cover integrals against a higher-L projector set (e.g. DeePKS alpha orbitals).
+    // With the default it reduces exactly to the orbital-only sizing, so existing
+    // callers (init / init_nonlocal) are byte-for-byte unaffected.
+    void initialize_orb_table(const UnitCell& ucell, const LCAO_Orbitals& orb, const int lmax_extra = 0);
     void construct_orbs_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb);
     void construct_orbs_and_nonlocal_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb);
+    void construct_orbs_and_alpha_and_orb_r(const UnitCell& ucell,const LCAO_Orbitals& orb);
 
     std::vector<int> iw2ia;
     std::vector<int> iw2iL;
@@ -76,6 +100,8 @@ class cal_r_overlap_R
     Numerical_Orbital_Lm orb_r;
     std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> orbs;
     std::vector<std::vector<Numerical_Orbital_Lm>> orbs_nonlocal;
+    // DeePKS projected orbitals (orb.Alpha[0]), indexed [L_alpha][N_alpha].
+    std::vector<std::vector<Numerical_Orbital_Lm>> orbs_alpha;
 
     std::map<
         size_t,
@@ -97,6 +123,19 @@ class cal_r_overlap_R
         std::map<size_t, std::map<size_t, std::map<size_t, std::map<size_t, Center2_Orb::Orb21>>>>>
         center2_orb21_r_nonlocal;
 
+    // r-weighted tables between orbitals and DeePKS projectors (orb.Alpha[0]).
+    // Keyed [T1][L1][N1][L_alpha] -> {N_alpha -> Orb}. Alpha is type-independent,
+    // so there is no ket-type (TB) index; the ket is indexed by (L_alpha, N_alpha).
+    std::map<
+        size_t,
+        std::map<size_t, std::map<size_t, std::map<size_t, std::map<size_t, Center2_Orb::Orb11>>>>>
+        center2_orb11_alpha;
+
+    std::map<
+        size_t,
+        std::map<size_t, std::map<size_t, std::map<size_t, std::map<size_t, Center2_Orb::Orb21>>>>>
+        center2_orb21_r_alpha;
+
     const Parallel_Orbitals* ParaV = nullptr;
 };
 #endif
diff --git a/source/source_lcao/CMakeLists.txt b/source/source_lcao/CMakeLists.txt
index a793f5d5d0..e3ad7efa86 100644
--- a/source/source_lcao/CMakeLists.txt
+++ b/source/source_lcao/CMakeLists.txt
@@ -19,6 +19,7 @@ if(ENABLE_LCAO)
         module_operator_lcao/nonlocal.cpp
         module_operator_lcao/td_ekinetic_lcao.cpp
         module_operator_lcao/td_nonlocal_lcao.cpp
+        module_operator_lcao/td_deepks_lcao.cpp
         module_operator_lcao/td_pot_hybrid.cpp
         module_operator_lcao/dspin_lcao.cpp
         module_operator_lcao/dftu_lcao.cpp
diff --git a/source/source_lcao/hamilt_lcao.cpp b/source/source_lcao/hamilt_lcao.cpp
index f783155c0c..505ec4d03b 100644
--- a/source/source_lcao/hamilt_lcao.cpp
+++ b/source/source_lcao/hamilt_lcao.cpp
@@ -39,6 +39,7 @@
 #include "module_operator_lcao/op_dftu_lcao.h"
 #include "module_operator_lcao/op_exx_lcao.h"
 #include "module_operator_lcao/overlap.h"
+#include "module_operator_lcao/td_deepks_lcao.h"
 #include "module_operator_lcao/td_ekinetic_lcao.h"
 #include "module_operator_lcao/td_nonlocal_lcao.h"
 #include "module_operator_lcao/td_pot_hybrid.h"
@@ -353,6 +354,25 @@ HamiltLCAO<TK, TR>::HamiltLCAO(const UnitCell& ucell,
                                                                              two_center_bundle.overlap_orb.get());
             this->getOperator()->add(td_ekinetic);
 
+#ifdef __MLALGO
+            // Add the DeePKS commutator -i[V_delta, r] to the velocity-gauge
+            // current. Registered between TDEkinetic and TDNonlocal so it shares
+            // the same TD sub-chain (cal_type lcao_tddft_periodic): it runs after
+            // TDEkinetic (which creates/zeroes current_term and forwards hR_tmp)
+            // and before TDNonlocal (which resets TD_info::evolve_once).
+            if (PARAM.inp.deepks_scf)
+            {
+                Operator<TK>* td_deepks = new TDDeePKS<OperatorLCAO<TK, TR>>(this->hsk,
+                                                                            this->kv->kvec_d,
+                                                                            this->hR,
+                                                                            &ucell,
+                                                                            orb,
+                                                                            &grid_d,
+                                                                            &deepks.ld);
+                this->getOperator()->add(td_deepks);
+            }
+#endif
+
             Operator<TK>* td_nonlocal = new TDNonlocal<OperatorLCAO<TK, TR>>(this->hsk,
                                                                              this->kv->kvec_d,
                                                                              this->hR,
diff --git a/source/source_lcao/module_operator_lcao/CMakeLists.txt b/source/source_lcao/module_operator_lcao/CMakeLists.txt
index e9dbbe381b..e10583b25e 100644
--- a/source/source_lcao/module_operator_lcao/CMakeLists.txt
+++ b/source/source_lcao/module_operator_lcao/CMakeLists.txt
@@ -11,6 +11,7 @@ add_library(
     nonlocal.cpp
     td_ekinetic_lcao.cpp
     td_nonlocal_lcao.cpp
+    td_deepks_lcao.cpp
     td_pot_hybrid.cpp
     dspin_lcao.cpp
     dftu_lcao.cpp
diff --git a/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp b/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp
new file mode 100644
index 0000000000..08216f59c1
--- /dev/null
+++ b/source/source_lcao/module_operator_lcao/td_deepks_lcao.cpp
@@ -0,0 +1,403 @@
+#include "td_deepks_lcao.h"
+
+#include "source_base/timer.h"
+#include "source_base/tool_title.h"
+#include "source_cell/module_neighbor/sltk_grid_driver.h"
+#include "source_estate/module_pot/H_TDDFT_pw.h"
+#include "source_io/module_parameter/parameter.h"
+#include "source_lcao/module_operator_lcao/operator_lcao.h"
+#include "source_lcao/module_rt/td_info.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#include <unordered_set>
+#endif
+
+template <typename TK, typename TR>
+cal_r_overlap_R hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::r_calculator;
+template <typename TK, typename TR>
+bool hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::init_done = false;
+
+template <typename TK, typename TR>
+hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::TDDeePKS(HS_Matrix_K<TK>* hsk_in,
+                                                        const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                                        hamilt::HContainer<TR>* hR_in,
+                                                        const UnitCell* ucell_in,
+                                                        const LCAO_Orbitals& orb,
+                                                        const Grid_Driver* GridD_in
+#ifdef __MLALGO
+                                                        ,
+                                                        LCAO_Deepks<TK>* ld_in
+#endif
+                                                        )
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), orb_(orb)
+{
+    this->cal_type = calculation_type::lcao_tddft_periodic;
+    this->ucell = ucell_in;
+    this->Grid = GridD_in;
+#ifdef __MLALGO
+    this->ld = ld_in;
+#endif
+#ifdef __DEBUG
+    assert(this->ucell != nullptr);
+#endif
+    // only meaningful in velocity gauge (td_stype=1)
+    if (elecstate::H_TDDFT_pw::stype != 1)
+    {
+        return;
+    }
+    // initialize HR to get adjs info (same cutoff as DeePKS V_delta).
+    this->initialize_HR(this->Grid);
+    // initialize the r-weighted projection tables <phi|alpha>, <phi|r|alpha> once.
+    if (!init_done)
+    {
+        r_calculator.init_alpha(*this->ucell, *this->hsk->get_pv(), orb_);
+        init_done = true;
+    }
+}
+
+template <typename TK, typename TR>
+hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::~TDDeePKS()
+{
+}
+
+// initialize_HR()
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
+{
+    if (elecstate::H_TDDFT_pw::stype != 1)
+    {
+        return;
+    }
+    ModuleBase::TITLE("TDDeePKS", "initialize_HR");
+    ModuleBase::timer::start("TDDeePKS", "initialize_HR");
+
+    this->adjs_all.clear();
+    this->adjs_all.reserve(this->ucell->nat);
+    for (int iat0 = 0; iat0 < ucell->nat; iat0++)
+    {
+        auto tau0 = ucell->get_tau(iat0);
+        int T0, I0;
+        ucell->iat2iait(iat0, &I0, &T0);
+        AdjacentAtomInfo adjs;
+        GridD->Find_atom(*ucell, tau0, T0, I0, &adjs);
+        std::vector<bool> is_adj(adjs.adj_num + 1, false);
+        for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
+        {
+            const int T1 = adjs.ntype[ad1];
+            const int I1 = adjs.natom[ad1];
+            const int iat1 = ucell->itia2iat(T1, I1);
+            const ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
+            // choose the real adjacent atoms (same cutoff as DeePKS V_delta:
+            // Phi[T1].Rcut + Alpha[0].Rcut, centered on the descriptor atom iat0)
+            if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0
+                < orb_.Phi[T1].getRcut() + orb_.Alpha[0].getRcut())
+            {
+                is_adj[ad1] = true;
+            }
+        }
+        filter_adjs(is_adj, adjs);
+        this->adjs_all.push_back(adjs);
+    }
+
+    ModuleBase::timer::end("TDDeePKS", "initialize_HR");
+}
+
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_current()
+{
+#ifdef __MLALGO
+    ModuleBase::TITLE("TDDeePKS", "calculate_current");
+
+    // current_term is only allocated when out_current==1; nothing to do otherwise.
+    if (TD_info::out_current != 1)
+    {
+        return;
+    }
+    // DeePKS rt-TDDFT current is currently only implemented for nspin=1.
+    // nspin=2 needs the spin-resolved (gedm +/- gedm_mag) handling and nspin=4
+    // needs the npol=2 spinor layout, neither of which is validated here.
+    if (PARAM.inp.nspin != 1)
+    {
+        ModuleBase::WARNING_QUIT("TDDeePKS::calculate_current",
+                                 "DeePKS rt-TDDFT current (out_current=1) only supports nspin=1 for now.");
+    }
+
+    ModuleBase::timer::start("TDDeePKS", "calculate_current");
+
+    const Parallel_Orbitals* paraV = TD_info::td_vel_op->get_current_term_pointer(0)->get_atom_pair(0).get_paraV();
+    const int npol = this->ucell->get_npol();
+
+    for (int iat0 = 0; iat0 < this->ucell->nat; iat0++)
+    {
+        auto tau0 = ucell->get_tau(iat0);
+        int T0 = 0;
+        int I0 = 0;
+        ucell->iat2iait(iat0, &I0, &T0);
+        AdjacentAtomInfo& adjs = this->adjs_all[iat0];
+
+        // --------------------------------------------------------------------
+        // build the gedm triples (alpha_row, alpha_col, value) for this iat0.
+        // The alpha index is ib + m running over (L0 -> N0 -> m), matching the
+        // column order of phialpha and of get_psi_r_alpha's output (nlm).
+        // Replicated from DeePKS::calculate_HR (deepks_lcao.cpp). nspin=1: no mag.
+        // --------------------------------------------------------------------
+        std::vector<int> trace_alpha_row;
+        std::vector<int> trace_alpha_col;
+        std::vector<double> gedms;
+        if (!PARAM.inp.deepks_equiv)
+        {
+            int ib = 0;
+            for (int L0 = 0; L0 <= orb_.Alpha[0].getLmax(); ++L0)
+            {
+                for (int N0 = 0; N0 < orb_.Alpha[0].getNchi(L0); ++N0)
+                {
+                    const int inl = this->ld->deepks_param.inl_index[T0](I0, L0, N0);
+                    const double* pgedm = this->ld->gedm[inl];
+                    const int nm = 2 * L0 + 1;
+                    for (int m1 = 0; m1 < nm; ++m1)
+                    {
+                        for (int m2 = 0; m2 < nm; ++m2)
+                        {
+                            trace_alpha_row.push_back(ib + m1);
+                            trace_alpha_col.push_back(ib + m2);
+                            gedms.push_back(pgedm[m1 * nm + m2]);
+                        }
+                    }
+                    ib += nm;
+                }
+            }
+        }
+        else
+        {
+            const double* pgedm = this->ld->gedm[iat0];
+            int nproj = 0;
+            for (int il = 0; il < this->ld->deepks_param.lmaxd + 1; il++)
+            {
+                nproj += (2 * il + 1) * orb_.Alpha[0].getNchi(il);
+            }
+            for (int iproj = 0; iproj < nproj; iproj++)
+            {
+                for (int jproj = 0; jproj < nproj; jproj++)
+                {
+                    trace_alpha_row.push_back(iproj);
+                    trace_alpha_col.push_back(jproj);
+                    gedms.push_back(pgedm[iproj * nproj + jproj]);
+                }
+            }
+        }
+
+        // --------------------------------------------------------------------
+        // compute <phi|alpha> and <phi|r|alpha> (nlm[0..3]) for every orbital of
+        // every neighbor atom of iat0.
+        // --------------------------------------------------------------------
+        std::vector<std::vector<std::unordered_map<int, std::vector<double>>>> nlm_tot;
+        nlm_tot.resize(adjs.adj_num + 1);
+        for (int i = 0; i < adjs.adj_num + 1; i++)
+        {
+            nlm_tot[i].resize(4);
+        }
+        for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
+        {
+            const int T1 = adjs.ntype[ad];
+            const int I1 = adjs.natom[ad];
+            const int iat1 = ucell->itia2iat(T1, I1);
+            const ModuleBase::Vector3<double>& tau1 = adjs.adjacent_tau[ad];
+            const Atom* atom1 = &ucell->atoms[T1];
+            auto all_indexes = paraV->get_indexes_row(iat1);
+            auto col_indexes = paraV->get_indexes_col(iat1);
+            all_indexes.insert(all_indexes.end(), col_indexes.begin(), col_indexes.end());
+            std::sort(all_indexes.begin(), all_indexes.end());
+            all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end());
+            for (int iw1l = 0; iw1l < all_indexes.size(); iw1l += npol)
+            {
+                const int iw1 = all_indexes[iw1l] / npol;
+                std::vector<std::vector<double>> nlm;
+                // R1 = orbital position (tau1), R2 = descriptor center (tau0)
+                r_calculator.get_psi_r_alpha(orb_,
+                                             nlm,
+                                             tau1 * this->ucell->lat0,
+                                             T1,
+                                             atom1->iw2l[iw1],
+                                             atom1->iw2m[iw1],
+                                             atom1->iw2n[iw1],
+                                             tau0 * this->ucell->lat0);
+                for (int dir = 0; dir < 4; dir++)
+                {
+                    nlm_tot[ad][dir].insert({all_indexes[iw1l], nlm[dir]});
+                }
+            }
+        }
+
+        // --------------------------------------------------------------------
+        // contract over alpha and accumulate -i[V_delta,r] into current_term
+        // for each <iat1, iat2, R> atom pair.
+        // --------------------------------------------------------------------
+        for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
+        {
+            const int T1 = adjs.ntype[ad1];
+            const int I1 = adjs.natom[ad1];
+            const int iat1 = ucell->itia2iat(T1, I1);
+            const ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
+            for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2)
+            {
+                const int T2 = adjs.ntype[ad2];
+                const int I2 = adjs.natom[ad2];
+                const int iat2 = ucell->itia2iat(T2, I2);
+                const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad2];
+                const ModuleBase::Vector3<int> R_vector(R_index2[0] - R_index1[0],
+                                                        R_index2[1] - R_index1[1],
+                                                        R_index2[2] - R_index1[2]);
+                std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};
+                for (int i = 0; i < 3; i++)
+                {
+                    hamilt::BaseMatrix<std::complex<double>>* matrix_ptr
+                        = TD_info::td_vel_op->get_current_term_pointer(i)->find_matrix(iat1,
+                                                                                       iat2,
+                                                                                       R_vector[0],
+                                                                                       R_vector[1],
+                                                                                       R_vector[2]);
+                    if (matrix_ptr != nullptr)
+                    {
+                        tmp_c[i] = matrix_ptr->get_pointer();
+                    }
+                }
+                // the pair must exist in current_term (it always does: DeePKS
+                // expands hR with V_delta_R, and current_term spans hR). Skip if
+                // not found, for safety.
+                if (tmp_c[0] != nullptr)
+                {
+                    this->cal_current_IJR(iat1,
+                                          iat2,
+                                          paraV,
+                                          nlm_tot[ad1],
+                                          nlm_tot[ad2],
+                                          trace_alpha_row,
+                                          trace_alpha_col,
+                                          gedms,
+                                          tmp_c);
+                }
+            }
+        }
+    }
+    ModuleBase::timer::end("TDDeePKS", "calculate_current");
+#endif
+}
+
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::cal_current_IJR(
+    const int& iat1,
+    const int& iat2,
+    const Parallel_Orbitals* paraV,
+    const std::vector<std::unordered_map<int, std::vector<double>>>& nlm1_all,
+    const std::vector<std::unordered_map<int, std::vector<double>>>& nlm2_all,
+    const std::vector<int>& trace_alpha_row,
+    const std::vector<int>& trace_alpha_col,
+    const std::vector<double>& gedms,
+    std::complex<double>** current_mat_p)
+{
+    const int npol = this->ucell->get_npol();
+    auto row_indexes = paraV->get_indexes_row(iat1);
+    auto col_indexes = paraV->get_indexes_col(iat2);
+    // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
+    std::vector<int> step_trace(npol * npol, 0);
+    for (int is = 0; is < npol; is++)
+    {
+        for (int is2 = 0; is2 < npol; is2++)
+        {
+            step_trace[is * npol + is2] = col_indexes.size() * is + is2;
+        }
+    }
+    const int trace_alpha_size = trace_alpha_row.size();
+    for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
+    {
+        std::vector<const std::vector<double>*> nlm1;
+        for (int dir = 0; dir < 4; dir++)
+        {
+            nlm1.push_back(&(nlm1_all[dir].find(row_indexes[iw1l])->second));
+        }
+        for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol)
+        {
+            std::vector<const std::vector<double>*> nlm2;
+            for (int dir = 0; dir < 4; dir++)
+            {
+                nlm2.push_back(&(nlm2_all[dir].find(col_indexes[iw2l])->second));
+            }
+            for (int is = 0; is < npol * npol; ++is)
+            {
+                for (int dir = 0; dir < 3; dir++)
+                {
+                    std::complex<double> nlm_r_tmp = std::complex<double>{0, 0};
+                    std::complex<double> imag_unit = std::complex<double>{0, 1};
+                    for (int t = 0; t < trace_alpha_size; t++)
+                    {
+                        const int a1 = trace_alpha_row[t];
+                        const int a2 = trace_alpha_col[t];
+                        // <phi_i|r|alpha> gedm <alpha|phi_j> - <phi_i|alpha> gedm <alpha|r|phi_j>
+                        nlm_r_tmp += (nlm1[dir + 1]->at(a1) * nlm2[0]->at(a2)
+                                      - nlm1[0]->at(a1) * nlm2[dir + 1]->at(a2))
+                                     * gedms[t];
+                    }
+                    // -i[r,V_delta], factor 2.0 due to the unit transformation
+                    // (identical convention to the Vnl term in cal_vcomm_r_IJR)
+                    current_mat_p[dir][step_trace[is]] -= imag_unit * nlm_r_tmp / 2.0;
+                }
+            }
+            for (int dir = 0; dir < 3; dir++)
+            {
+                current_mat_p[dir] += npol;
+            }
+        }
+        for (int dir = 0; dir < 3; dir++)
+        {
+            current_mat_p[dir] += (npol - 1) * col_indexes.size();
+        }
+    }
+}
+
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
+{
+    ModuleBase::TITLE("TDDeePKS", "contributeHR");
+
+    if (elecstate::H_TDDFT_pw::stype != 1)
+    {
+        return;
+    }
+
+    ModuleBase::timer::start("TDDeePKS", "contributeHR");
+
+    // recompute whenever the TD operators recompute (same gating as TDNonlocal).
+    // NOTE: we do NOT reset TD_info::evolve_once here; TDNonlocal (which runs after
+    // this operator in the sub-chain) is responsible for resetting it.
+    if (!this->current_done || TD_info::evolve_once)
+    {
+        // forward the shared hR_tmp pointer to the next sub-operator (TDNonlocal),
+        // so it accumulates Vnl into the same buffer created by TDEkinetic.
+        // This operator itself does not write hR_tmp.
+        if (this->next_sub_op != nullptr)
+        {
+            static_cast<OperatorLCAO<TK, TR>*>(this->next_sub_op)->set_HR_fixed(this->hR_tmp);
+        }
+        this->calculate_current();
+        this->current_done = true;
+    }
+
+    ModuleBase::timer::end("TDDeePKS", "contributeHR");
+    return;
+}
+
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::contributeHk(int ik)
+{
+    return;
+}
+
+template <typename TK, typename TR>
+void hamilt::TDDeePKS<hamilt::OperatorLCAO<TK, TR>>::set_HR_fixed(void* hR_tmp_in)
+{
+    this->hR_tmp = static_cast<hamilt::HContainer<std::complex<double>>*>(hR_tmp_in);
+}
+
+template class hamilt::TDDeePKS<hamilt::OperatorLCAO<std::complex<double>, double>>;
+template class hamilt::TDDeePKS<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>;
diff --git a/source/source_lcao/module_operator_lcao/td_deepks_lcao.h b/source/source_lcao/module_operator_lcao/td_deepks_lcao.h
new file mode 100644
index 0000000000..a656bb8549
--- /dev/null
+++ b/source/source_lcao/module_operator_lcao/td_deepks_lcao.h
@@ -0,0 +1,116 @@
+#ifndef TDDEEPKS_H
+#define TDDEEPKS_H
+#include "source_basis/module_ao/parallel_orbitals.h"
+#include "source_cell/module_neighbor/sltk_grid_driver.h"
+#include "source_cell/unitcell.h"
+#include "source_io/module_hs/cal_r_overlap_R.h"
+#include "source_lcao/module_hcontainer/hcontainer.h"
+#include "source_lcao/module_operator_lcao/operator_lcao.h"
+#ifdef __MLALGO
+#include "source_lcao/module_deepks/LCAO_deepks.h"
+#endif
+
+#include <unordered_map>
+#include <vector>
+
+namespace hamilt
+{
+
+#ifndef __TD_DEEPKSTEMPLATE
+#define __TD_DEEPKSTEMPLATE
+
+/// The TDDeePKS class template inherits from class T.
+/// It supplies the missing commutator contribution -i[V_delta, r] of the DeePKS
+/// non-local potential to the rt-TDDFT velocity-gauge current (out_current=1,
+/// td_stype=1). V_delta is a cross-atom projected operator
+/// (V_delta_{mu,nu} = sum_iat0 <phi_mu|alpha> gedm <alpha|phi_nu>), so [V_delta,r]
+/// is non-zero and contributes to the current operator. This term is otherwise
+/// absent from current_term (only filled by TDEkinetic and TDNonlocal), which
+/// produces a spurious DC offset in the DeePKS current/spectrum.
+template <class T>
+class TDDeePKS : public T
+{
+};
+
+#endif
+
+/// TDDeePKS class template specialization for OperatorLCAO<TK> base class.
+/// Template parameters:
+/// - TK: data type of k-space Hamiltonian
+/// - TR: data type of real space Hamiltonian
+template <typename TK, typename TR>
+class TDDeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
+{
+  public:
+    TDDeePKS<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
+                                   const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                   hamilt::HContainer<TR>* hR_in,
+                                   const UnitCell* ucell_in,
+                                   const LCAO_Orbitals& orb,
+                                   const Grid_Driver* GridD_in
+#ifdef __MLALGO
+                                   ,
+                                   LCAO_Deepks<TK>* ld_in
+#endif
+    );
+    ~TDDeePKS<OperatorLCAO<TK, TR>>();
+
+    /**
+     * @brief contributeHR() adds -i[V_delta, r] to TD_info::td_vel_op->current_term.
+     * This operator does NOT modify hR (V_delta itself is added by the DeePKS
+     * operator); it only forwards the shared hR_tmp pointer to the next TD
+     * sub-operator (TDNonlocal) and writes the current term.
+     */
+    virtual void contributeHR() override;
+    virtual void contributeHk(int ik) override;
+
+    virtual void set_HR_fixed(void*) override;
+
+  private:
+    const UnitCell* ucell = nullptr;
+    const LCAO_Orbitals& orb_;
+    const Grid_Driver* Grid = nullptr;
+
+#ifdef __MLALGO
+    /// @brief DeePKS data handle, provides gedm and inl_index (phialpha layout)
+    LCAO_Deepks<TK>* ld = nullptr;
+#endif
+
+    /// @brief shared TD real-space Hamiltonian buffer. Created by TDEkinetic and
+    /// forwarded down the sub-chain. Not written here, only forwarded to the next
+    /// sub-operator so that TDNonlocal accumulates Vnl into the same buffer.
+    HContainer<std::complex<double>>* hR_tmp = nullptr;
+
+    /// @brief whether the current term has been computed at least once (per instance)
+    bool current_done = false;
+
+    /// @brief search the nearest neighbor atoms (same cutoff as DeePKS V_delta:
+    /// Phi[T1].Rcut + Alpha[0].Rcut, centered on the descriptor atom iat0)
+    void initialize_HR(const Grid_Driver* GridD_in);
+
+    /// @brief accumulate -i[V_delta, r] into current_term for all <I,J,R> pairs
+    void calculate_current();
+
+    /// @brief calculate the current contribution of one <I,J,R> atom pair
+    void cal_current_IJR(const int& iat1,
+                         const int& iat2,
+                         const Parallel_Orbitals* paraV,
+                         const std::vector<std::unordered_map<int, std::vector<double>>>& nlm1_all,
+                         const std::vector<std::unordered_map<int, std::vector<double>>>& nlm2_all,
+                         const std::vector<int>& trace_alpha_row,
+                         const std::vector<int>& trace_alpha_col,
+                         const std::vector<double>& gedms,
+                         std::complex<double>** current_mat_p);
+
+    /// @brief nearest neighbor atoms of each descriptor atom iat0
+    std::vector<AdjacentAtomInfo> adjs_all;
+
+    /// @brief r-weighted projection calculator <phi|alpha> and <phi|r|alpha>;
+    /// the tables only depend on orbital shapes (not positions), so a single
+    /// static instance, initialized once, is reused across instances and MD steps.
+    static cal_r_overlap_R r_calculator;
+    static bool init_done;
+};
+
+} // namespace hamilt
+#endif