@@ -32,6 +32,14 @@ Driver::~Driver() = default;
3232
3333namespace {
3434
35+ // / Returns current time in microseconds using high_resolution_clock.
36+ // / Used for driver-level lifecycle timing to match BlockingState::sinceUs_.
37+ inline uint64_t currentTimeMicrosHires () {
38+ return std::chrono::duration_cast<std::chrono::microseconds>(
39+ std::chrono::high_resolution_clock::now ().time_since_epoch ())
40+ .count ();
41+ }
42+
3543// Checks if output channel is produced using identity projection and returns
3644// input channel if so.
3745std::optional<column_index_t > getIdentityProjection (
@@ -228,6 +236,10 @@ void BlockingState::setResume(std::shared_ptr<BlockingState> state) {
228236 std::lock_guard<std::timed_mutex> l (task->mutex ());
229237 if (!driver->state ().isTerminated ) {
230238 state->operator_ ->recordBlockingTime (state->sinceUs_ , state->reason_ );
239+ // Accumulate driver-level blocked time using high_resolution_clock,
240+ // matching sinceUs_ and all other driver lifecycle timing.
241+ driver->addDriverBlockedTime (
242+ (currentTimeMicrosHires () - state->sinceUs_ ) * 1'000 );
231243 }
232244 VELOX_CHECK (!driver->state ().suspended ());
233245 VELOX_CHECK (driver->state ().hasBlockingFuture );
@@ -358,7 +370,7 @@ void Driver::enqueueInternal() {
358370 VELOX_CHECK (!state_.isEnqueued );
359371 state_.isEnqueued = true ;
360372 // When enqueuing, starting timing the queue time.
361- queueTimeStartUs_ = getCurrentTimeMicro ();
373+ queueTimeStartUs_ = currentTimeMicrosHires ();
362374}
363375
364376// Call an Operator method. record silenced throws, but not a query
@@ -504,9 +516,24 @@ StopReason Driver::runInternal(
504516 std::shared_ptr<Driver>& self,
505517 std::shared_ptr<BlockingState>& blockingState,
506518 RowVectorPtr& result) {
507- const auto now = getCurrentTimeMicro ();
519+ // All driver timing uses high_resolution_clock consistently
520+ // (matching BlockingState::sinceUs_ used for blocked time).
521+ const auto now = currentTimeMicrosHires ();
508522 const auto queuedTimeUs = now - queueTimeStartUs_;
509523
524+ totalDriverQueuedNanos_ += queuedTimeUs * 1'000 ;
525+ onThreadStartUs_ = now;
526+ // For the normal close path, closeOperators() finalizes and clears
527+ // onThreadStartUs_ before reporting. This guard handles early returns
528+ // (e.g. Task::enter() failure) and non-close exit paths.
529+ auto onThreadTimeGuard = folly::makeGuard ([this ]() {
530+ if (onThreadStartUs_ > 0 ) {
531+ totalDriverOnThreadNanos_ +=
532+ (currentTimeMicrosHires () - onThreadStartUs_) * 1'000 ;
533+ onThreadStartUs_ = 0 ;
534+ }
535+ });
536+
510537 // Update the next operator's queueTime.
511538 StopReason stop =
512539 closed_ ? StopReason::kTerminate : task ()->enter (state_, now);
@@ -870,6 +897,23 @@ void Driver::closeOperators() {
870897 op->close ();
871898 }
872899
900+ // Report driver-level lifecycle timing to the Task accumulator.
901+ // Use partitionId (0..numDrivers-1) so same-index drivers across split
902+ // groups in grouped execution are summed together.
903+ // Finalize on-thread time here (the onThreadTimeGuard in runInternal
904+ // hasn't fired yet since CancelGuard destructs before it).
905+ if (onThreadStartUs_ > 0 ) {
906+ totalDriverOnThreadNanos_ +=
907+ (currentTimeMicrosHires () - onThreadStartUs_) * 1'000 ;
908+ onThreadStartUs_ = 0 ; // Prevent double-counting in the guard.
909+ }
910+ task ()->addDriverLifecycleStats (
911+ static_cast <uint32_t >(ctx_->pipelineId ),
912+ ctx_->partitionId ,
913+ totalDriverQueuedNanos_,
914+ totalDriverOnThreadNanos_,
915+ totalDriverBlockedNanos_);
916+
873917 // Add operator stats to the task.
874918 for (auto & op : operators_) {
875919 auto stats = op->stats (true );
@@ -904,15 +948,22 @@ void Driver::updateStats() {
904948 1'000'000 * state_.totalOffThreadTimeMs ,
905949 RuntimeCounter::Unit::kNanos );
906950 }
951+
907952 task ()->addDriverStats (ctx_->pipelineId , std::move (stats));
908953}
909954
910955void Driver::updateOperatorBlockingStats () {
911956 // Record blocked time if the driver was blocked when terminated.
912957 // This ensures we don't lose blocked time metrics when a query is aborted.
913- if (state_.hasBlockingFuture && blockedOperatorId_ < operators_.size ()) {
914- operators_[blockedOperatorId_]->recordBlockingTime (
915- state_.blockingStartUs , blockingReason_);
958+ if (state_.hasBlockingFuture ) {
959+ // Accumulate driver-level blocked time unconditionally.
960+ totalDriverBlockedNanos_ +=
961+ (currentTimeMicrosHires () - state_.blockingStartUs ) * 1'000 ;
962+ // Record per-operator blocked time if operator is available.
963+ if (blockedOperatorId_ < operators_.size ()) {
964+ operators_[blockedOperatorId_]->recordBlockingTime (
965+ state_.blockingStartUs , blockingReason_);
966+ }
916967 }
917968}
918969
0 commit comments