Skip to content

Commit 465e9fb

Browse files
committed
scheduler: requeue unschedulable bindings on cluster status changes
When a cluster's status changes (conditions, API enablements, or resource summary), previously unschedulable bindings may now be schedulable. This commit adds event handlers so such changes flush unschedulable bindings directly to activeQ via MoveAllToActive(), avoiding a full scan of all ResourceBindings. - addCluster: gate on HasUnschedulableBindings() to skip unnecessary work during informer cache population at scheduler startup - updateCluster: replace clusterReconcileWorker.Add with MoveAllToActive() for status-change cases; the direct flush is cheaper than enqueueing a full binding scan via reconcileWorker - ResourceSummary changes retain their existing guard so a requeue only fires when there are actually unschedulable bindings Signed-off-by: Tejashwar Reddy Katika <tejashwar1029@gmail.com>
1 parent 773dcf0 commit 465e9fb

4 files changed

Lines changed: 247 additions & 25 deletions

File tree

pkg/scheduler/event_handler.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,9 @@ func (s *Scheduler) addCluster(obj any) {
277277
if s.enableSchedulerEstimator {
278278
s.schedulerEstimatorWorker.Add(cluster.Name)
279279
}
280+
if s.priorityQueue != nil && s.priorityQueue.HasUnschedulableBindings() {
281+
s.priorityQueue.MoveAllToActive()
282+
}
280283
}
281284

282285
func (s *Scheduler) updateCluster(oldObj, newObj any) {
@@ -306,6 +309,15 @@ func (s *Scheduler) updateCluster(oldObj, newObj any) {
306309
// to the worker. Therefore, call Add func instead of Enqueue func.
307310
s.clusterReconcileWorker.Add(oldCluster)
308311
s.clusterReconcileWorker.Add(newCluster)
312+
case !equality.Semantic.DeepEqual(oldCluster.Status.Conditions, newCluster.Status.Conditions) ||
313+
!equality.Semantic.DeepEqual(oldCluster.Status.APIEnablements, newCluster.Status.APIEnablements):
314+
if s.priorityQueue != nil && s.priorityQueue.HasUnschedulableBindings() {
315+
s.priorityQueue.MoveAllToActive()
316+
}
317+
case !equality.Semantic.DeepEqual(oldCluster.Status.ResourceSummary, newCluster.Status.ResourceSummary):
318+
if s.priorityQueue != nil && s.priorityQueue.HasUnschedulableBindings() {
319+
s.priorityQueue.MoveAllToActive()
320+
}
309321
}
310322
}
311323

pkg/scheduler/event_handler_test.go

Lines changed: 210 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
3131
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
3232
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
33+
internalqueue "github.com/karmada-io/karmada/pkg/scheduler/internal/queue"
3334
)
3435

3536
func TestResourceBindingEventFilter(t *testing.T) {
@@ -146,55 +147,84 @@ func TestResourceBindingEventFilter(t *testing.T) {
146147

147148
func TestAddCluster(t *testing.T) {
148149
tests := []struct {
149-
name string
150-
enableSchedulerEstimator bool
151-
obj any
152-
expectedAdded bool
153-
expectedClusterName string
150+
name string
151+
enableSchedulerEstimator bool
152+
obj any
153+
priorityQueue internalqueue.SchedulingQueue
154+
expectedEstimatorAdded bool
155+
expectedEstimatorClusterName string
156+
expectedMoveAllToActive bool
154157
}{
155158
{
156-
name: "valid cluster object with estimator enabled",
157-
enableSchedulerEstimator: true,
158-
obj: createCluster("test-cluster", 0, nil),
159-
expectedAdded: true,
160-
expectedClusterName: "test-cluster",
159+
name: "valid cluster, estimator enabled, no priority queue",
160+
enableSchedulerEstimator: true,
161+
obj: createCluster("test-cluster", 0, nil),
162+
priorityQueue: nil,
163+
expectedEstimatorAdded: true,
164+
expectedEstimatorClusterName: "test-cluster",
165+
expectedMoveAllToActive: false,
161166
},
162167
{
163-
name: "valid cluster object with estimator disabled",
168+
name: "valid cluster, estimator enabled, queue with no unschedulable bindings",
169+
enableSchedulerEstimator: true,
170+
obj: createCluster("test-cluster", 0, nil),
171+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: false},
172+
expectedEstimatorAdded: true,
173+
expectedEstimatorClusterName: "test-cluster",
174+
expectedMoveAllToActive: false,
175+
},
176+
{
177+
name: "valid cluster, estimator enabled, queue with unschedulable bindings",
178+
enableSchedulerEstimator: true,
179+
obj: createCluster("test-cluster", 0, nil),
180+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
181+
expectedEstimatorAdded: true,
182+
expectedEstimatorClusterName: "test-cluster",
183+
expectedMoveAllToActive: true,
184+
},
185+
{
186+
name: "valid cluster, estimator disabled, queue with unschedulable bindings",
164187
enableSchedulerEstimator: false,
165-
obj: createCluster("test-cluster-2", 0, nil),
166-
expectedAdded: false,
167-
expectedClusterName: "",
188+
obj: createCluster("test-cluster-2", 0, nil),
189+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
190+
expectedEstimatorAdded: false,
191+
expectedMoveAllToActive: true,
168192
},
169193
{
170194
name: "invalid object type",
171195
enableSchedulerEstimator: true,
172196
obj: &corev1.Pod{},
173-
expectedAdded: false,
174-
expectedClusterName: "",
197+
expectedEstimatorAdded: false,
198+
expectedMoveAllToActive: false,
175199
},
176200
}
177201

178202
for _, tt := range tests {
179203
t.Run(tt.name, func(t *testing.T) {
180-
mockWorker := &mockAsyncWorker{}
204+
estimatorWorker := &mockAsyncWorker{}
181205
s := &Scheduler{
182206
enableSchedulerEstimator: tt.enableSchedulerEstimator,
183-
schedulerEstimatorWorker: mockWorker,
207+
schedulerEstimatorWorker: estimatorWorker,
208+
priorityQueue: tt.priorityQueue,
184209
}
185210

186211
s.addCluster(tt.obj)
187212

188-
if tt.expectedAdded {
189-
assert.Equal(t, 1, mockWorker.addCount, "Worker Add should have been called once")
190-
assert.Equal(t, tt.expectedClusterName, mockWorker.lastAdded, "Incorrect cluster name added")
213+
if tt.expectedEstimatorAdded {
214+
assert.Equal(t, 1, estimatorWorker.addCount, "Estimator worker Add should have been called once")
215+
assert.Equal(t, tt.expectedEstimatorClusterName, estimatorWorker.lastAdded, "Incorrect cluster name added to estimator worker")
191216
} else {
192-
assert.Equal(t, 0, mockWorker.addCount, "Worker Add should not have been called")
193-
assert.Nil(t, mockWorker.lastAdded, "No cluster name should have been added")
217+
assert.Equal(t, 0, estimatorWorker.addCount, "Estimator worker Add should not have been called")
218+
assert.Nil(t, estimatorWorker.lastAdded, "No cluster name should have been added to estimator worker")
219+
}
220+
221+
if tt.priorityQueue != nil {
222+
mock := tt.priorityQueue.(*mockSchedulingQueue)
223+
assert.Equal(t, tt.expectedMoveAllToActive, mock.moveAllToActiveCalled, "MoveAllToActive called state mismatch")
194224
}
195225

196-
assert.Equal(t, 0, mockWorker.enqueueCount, "Worker Enqueue should not have been called")
197-
assert.Nil(t, mockWorker.lastEnqueued, "No item should have been enqueued")
226+
assert.Equal(t, 0, estimatorWorker.enqueueCount, "Estimator worker Enqueue should not have been called")
227+
assert.Nil(t, estimatorWorker.lastEnqueued, "No item should have been enqueued")
198228
})
199229
}
200230
}
@@ -205,8 +235,10 @@ func TestUpdateCluster(t *testing.T) {
205235
enableSchedulerEstimator bool
206236
oldObj any
207237
newObj any
238+
priorityQueue internalqueue.SchedulingQueue
208239
expectedEstimatorAdded bool
209240
expectedReconcileAdded int
241+
expectedMoveAllToActive bool
210242
}{
211243
{
212244
name: "valid cluster update with generation change",
@@ -256,6 +288,141 @@ func TestUpdateCluster(t *testing.T) {
256288
expectedEstimatorAdded: false,
257289
expectedReconcileAdded: 0,
258290
},
291+
{
292+
name: "cluster update with ResourceSummary change, priorityQueue nil",
293+
enableSchedulerEstimator: true,
294+
oldObj: createCluster("test-cluster", 0, nil),
295+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
296+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
297+
}),
298+
expectedEstimatorAdded: true,
299+
expectedReconcileAdded: 0,
300+
},
301+
{
302+
name: "cluster update with ResourceSummary change, no unschedulable bindings",
303+
enableSchedulerEstimator: true,
304+
oldObj: createCluster("test-cluster", 0, nil),
305+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
306+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
307+
}),
308+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: false},
309+
expectedEstimatorAdded: true,
310+
expectedReconcileAdded: 0,
311+
},
312+
{
313+
name: "cluster update with ResourceSummary change, has unschedulable bindings",
314+
enableSchedulerEstimator: true,
315+
oldObj: createCluster("test-cluster", 0, nil),
316+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
317+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
318+
}),
319+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
320+
expectedEstimatorAdded: true,
321+
expectedReconcileAdded: 0,
322+
expectedMoveAllToActive: true,
323+
},
324+
{
325+
name: "ResourceSummary and Conditions change simultaneously — Conditions case takes precedence",
326+
enableSchedulerEstimator: false,
327+
oldObj: createCluster("test-cluster", 0, nil),
328+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
329+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
330+
Conditions: []metav1.Condition{
331+
{Type: clusterv1alpha1.ClusterConditionReady, Status: metav1.ConditionTrue},
332+
},
333+
}),
334+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
335+
expectedEstimatorAdded: false,
336+
// Conditions case fires first via MoveAllToActive; ResourceSummary case is skipped
337+
expectedReconcileAdded: 0,
338+
expectedMoveAllToActive: true,
339+
},
340+
{
341+
name: "generation and status change simultaneously — generation case takes precedence",
342+
enableSchedulerEstimator: false,
343+
oldObj: createCluster("test-cluster", 1, nil),
344+
newObj: createClusterWithStatus("test-cluster", 2, nil, clusterv1alpha1.ClusterStatus{
345+
Conditions: []metav1.Condition{
346+
{Type: clusterv1alpha1.ClusterConditionReady, Status: metav1.ConditionTrue},
347+
},
348+
}),
349+
expectedEstimatorAdded: false,
350+
// Generation case fires and adds both old+new → 2, not 3
351+
expectedReconcileAdded: 2,
352+
},
353+
{
354+
name: "DeletionTimestamp and status change simultaneously — deletion case takes precedence",
355+
enableSchedulerEstimator: false,
356+
oldObj: createCluster("test-cluster", 0, nil),
357+
newObj: func() *clusterv1alpha1.Cluster {
358+
c := createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
359+
Conditions: []metav1.Condition{
360+
{Type: clusterv1alpha1.ClusterConditionReady, Status: metav1.ConditionTrue},
361+
},
362+
})
363+
now := metav1.Now()
364+
c.DeletionTimestamp = &now
365+
return c
366+
}(),
367+
expectedEstimatorAdded: false,
368+
// Deletion case fires → only 1 add, status case is skipped
369+
expectedReconcileAdded: 1,
370+
},
371+
{
372+
name: "identical non-nil ResourceSummary — DeepEqual true, no reconcile triggered",
373+
enableSchedulerEstimator: false,
374+
oldObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
375+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
376+
}),
377+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
378+
ResourceSummary: &clusterv1alpha1.ResourceSummary{},
379+
}),
380+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
381+
expectedEstimatorAdded: false,
382+
expectedReconcileAdded: 0,
383+
},
384+
{
385+
name: "cluster update with Conditions change, has unschedulable bindings",
386+
enableSchedulerEstimator: true,
387+
oldObj: createCluster("test-cluster", 0, nil),
388+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
389+
Conditions: []metav1.Condition{
390+
{Type: clusterv1alpha1.ClusterConditionReady, Status: metav1.ConditionTrue},
391+
},
392+
}),
393+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
394+
expectedEstimatorAdded: true,
395+
expectedReconcileAdded: 0,
396+
expectedMoveAllToActive: true,
397+
},
398+
{
399+
name: "cluster update with Conditions change, no unschedulable bindings",
400+
enableSchedulerEstimator: false,
401+
oldObj: createCluster("test-cluster", 0, nil),
402+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
403+
Conditions: []metav1.Condition{
404+
{Type: clusterv1alpha1.ClusterConditionReady, Status: metav1.ConditionTrue},
405+
},
406+
}),
407+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: false},
408+
expectedEstimatorAdded: false,
409+
expectedReconcileAdded: 0,
410+
expectedMoveAllToActive: false,
411+
},
412+
{
413+
name: "cluster update with APIEnablements change, has unschedulable bindings",
414+
enableSchedulerEstimator: true,
415+
oldObj: createCluster("test-cluster", 0, nil),
416+
newObj: createClusterWithStatus("test-cluster", 0, nil, clusterv1alpha1.ClusterStatus{
417+
APIEnablements: []clusterv1alpha1.APIEnablement{
418+
{GroupVersion: "apps/v1"},
419+
},
420+
}),
421+
priorityQueue: &mockSchedulingQueue{hasUnschedulable: true},
422+
expectedEstimatorAdded: true,
423+
expectedReconcileAdded: 0,
424+
expectedMoveAllToActive: true,
425+
},
259426
}
260427

261428
for _, tt := range tests {
@@ -266,6 +433,7 @@ func TestUpdateCluster(t *testing.T) {
266433
enableSchedulerEstimator: tt.enableSchedulerEstimator,
267434
schedulerEstimatorWorker: estimatorWorker,
268435
clusterReconcileWorker: reconcileWorker,
436+
priorityQueue: tt.priorityQueue,
269437
}
270438

271439
s.updateCluster(tt.oldObj, tt.newObj)
@@ -299,6 +467,12 @@ func TestUpdateCluster(t *testing.T) {
299467
} else {
300468
assert.Nil(t, reconcileWorker.lastAdded, "No cluster should have been added to reconcile worker")
301469
}
470+
471+
// Check MoveAllToActive
472+
if tt.priorityQueue != nil {
473+
mock := tt.priorityQueue.(*mockSchedulingQueue)
474+
assert.Equal(t, tt.expectedMoveAllToActive, mock.moveAllToActiveCalled, "MoveAllToActive called state mismatch")
475+
}
302476
})
303477
}
304478
}
@@ -422,6 +596,17 @@ func createCluster(name string, generation int64, labels map[string]string) *clu
422596
}
423597
}
424598

599+
func createClusterWithStatus(name string, generation int64, labels map[string]string, status clusterv1alpha1.ClusterStatus) *clusterv1alpha1.Cluster {
600+
return &clusterv1alpha1.Cluster{
601+
ObjectMeta: metav1.ObjectMeta{
602+
Name: name,
603+
Generation: generation,
604+
Labels: labels,
605+
},
606+
Status: status,
607+
}
608+
}
609+
425610
func createResourceBinding(name, schedulerName string, labels map[string]string, suspension *workv1alpha2.Suspension) *workv1alpha2.ResourceBinding {
426611
return &workv1alpha2.ResourceBinding{
427612
ObjectMeta: metav1.ObjectMeta{

pkg/scheduler/internal/queue/scheduling_queue.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ type SchedulingQueue interface {
7676
// Len returns the length of activeQ.
7777
Len() int
7878

79+
// HasUnschedulableBindings reports whether the unschedulableBindings map is non-empty.
80+
HasUnschedulableBindings() bool
81+
82+
// MoveAllToActive moves all bindings from unschedulableBindings directly to activeQ.
83+
MoveAllToActive()
84+
7985
// Forget indicates that an item is finished being retried. Doesn't matter whether it's for perm failing
8086
// or for success, we'll remove it from backoffQ, but you still have to call `Done` on the queue.
8187
Forget(bindingInfo *QueuedBindingInfo)
@@ -332,6 +338,21 @@ func (bq *prioritySchedulingQueue) Len() int {
332338
return bq.activeQ.Len()
333339
}
334340

341+
func (bq *prioritySchedulingQueue) HasUnschedulableBindings() bool {
342+
bq.lock.RLock()
343+
defer bq.lock.RUnlock()
344+
return len(bq.unschedulableBindings.bindingInfoMap) > 0
345+
}
346+
347+
func (bq *prioritySchedulingQueue) MoveAllToActive() {
348+
bq.lock.Lock()
349+
defer bq.lock.Unlock()
350+
351+
for _, bInfo := range bq.unschedulableBindings.bindingInfoMap {
352+
bq.moveToActiveQ(bInfo)
353+
}
354+
}
355+
335356
func (bq *prioritySchedulingQueue) Forget(bindingInfo *QueuedBindingInfo) {
336357
bq.lock.Lock()
337358
defer bq.lock.Unlock()

pkg/scheduler/scheduler_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,12 +2097,16 @@ type mockSchedulingQueue struct {
20972097
pushUnschedulableCalled bool
20982098
pushBackoffCalled bool
20992099
forgetCalled bool
2100+
moveAllToActiveCalled bool
2101+
hasUnschedulable bool
21002102
}
21012103

21022104
func (m *mockSchedulingQueue) Push(_ *internalqueue.QueuedBindingInfo) {}
21032105
func (m *mockSchedulingQueue) Pop() (*internalqueue.QueuedBindingInfo, bool) { return nil, false }
21042106
func (m *mockSchedulingQueue) Done(_ *internalqueue.QueuedBindingInfo) {}
21052107
func (m *mockSchedulingQueue) Len() int { return 0 }
2108+
func (m *mockSchedulingQueue) HasUnschedulableBindings() bool { return m.hasUnschedulable }
2109+
func (m *mockSchedulingQueue) MoveAllToActive() { m.moveAllToActiveCalled = true }
21062110
func (m *mockSchedulingQueue) Run() {}
21072111
func (m *mockSchedulingQueue) Close() {}
21082112

0 commit comments

Comments
 (0)