Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion pkg/util/cluster/aad.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ package cluster

import (
"context"
"errors"
"fmt"
"net/http"
"time"

"k8s.io/apimachinery/pkg/util/wait"
Expand All @@ -15,6 +17,11 @@ import (
msgraph_errors "github.com/Azure/ARO-RP/pkg/util/graph/graphsdk/models/odataerrors"
)

const (
GraphApiMaxRetries int = 5
GraphApiRetryInterval time.Duration = 5 * time.Second
)

func (c *Cluster) createApplication(ctx context.Context, displayName string) (string, string, error) {
appBody := msgraph_models.NewApplication()
appBody.SetDisplayName(&displayName)
Expand All @@ -35,7 +42,31 @@ func (c *Cluster) createApplication(ctx context.Context, displayName string) (st
// ByApplicationId is confusingly named, but it refers to
// the application's Object ID, not to the Application ID.
// https://learn.microsoft.com/en-us/graph/api/application-addpassword?view=graph-rest-1.0&tabs=http#http-request
pwResult, err := c.spGraphClient.Applications().ByApplicationId(id).AddPassword().Post(ctx, pwCredentialRequestBody, nil)

// retry loop, due to eventual consistency, the application we just created might not be found when queried immediately, only after a retry
// check if returned error is 404 not found, if so, retry the operation
numRetries := 0
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var pwResult msgraph_models.PasswordCredentialable
err = wait.PollUntilWithContext(ctx, GraphApiRetryInterval, func(ctx context.Context) (done bool, err error) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: use wait.PollImmediateUntilWithContext (or do one direct call before polling) so retries still happen, but success path stays fast.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a focused unit test around this retry loop?
Useful cases: retries on 404, no retry on non-404, and stops after max retries.
That would make this eventual-consistency handling safer to refactor later.

pwResult, err = c.spGraphClient.Applications().ByApplicationId(id).AddPassword().Post(ctx, pwCredentialRequestBody, nil)
if err == nil {
return true, nil
}

var mainErr *msgraph_errors.ODataError
// some unknown error, bubble it up to caller
if ok := errors.As(err, &mainErr); !ok || mainErr.GetStatusCode() != http.StatusNotFound {
return false, err
}

// Check if we already tried too often
numRetries++
if numRetries > GraphApiMaxRetries {
return false, mainErr
}

return false, nil
})
if err != nil {
return "", "", err
}
Expand Down
Loading