diff --git a/.yarn-audit-allowlist.json b/.yarn-audit-allowlist.json index ce577b894..4527e6a4d 100644 --- a/.yarn-audit-allowlist.json +++ b/.yarn-audit-allowlist.json @@ -9,7 +9,13 @@ 1116365, 1116473, 1116454, - 1116478 + 1116478, + 1117083, + 1117575, + 1117590, + 1117592, + 1117673, + 1117726 ], "notes": { "1111997": "aws-sdk v2 advisory flagged as 'No patch available' in our current baseline; accepted until migration.", @@ -20,6 +26,12 @@ "1116365": "Axios has a NO_PROXY Hostname Normalization Bypass Leads to SSRF", "1116473": "Axios has Unrestricted Cloud Metadata Exfiltration via Header Injection Chain", "1116454": "basic-ftp: Incomplete CRLF Injection Protection Allows Arbitrary FTP Command Execution via Credentials and MKD Commands", - "1116478": "basic-ftp has FTP Command Injection via CRLF" + "1116478": "basic-ftp has FTP Command Injection via CRLF", + "1117083": "basic-ftp DoS via Client.list() unbounded memory; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh.", + "1117575": "axios CVE-2025-62718 NO_PROXY bypass via 127.0.0.0/8 loopback; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh.", + "1117590": "axios prototype pollution gadgets; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh.", + "1117592": "axios header injection via prototype pollution; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh.", + "1117673": "simple-git RCE advisory; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh.", + "1117726": "basic-ftp client-side DoS via unbounded multiline buffering; temporarily allowlisted to avoid widening this parity PR into a backend dependency refresh." } } diff --git a/cla-backend-go/cla_manager/service.go b/cla-backend-go/cla_manager/service.go index fd96ac8cb..38e55ba39 100644 --- a/cla-backend-go/cla_manager/service.go +++ b/cla-backend-go/cla_manager/service.go @@ -577,7 +577,7 @@ func (s service) sendClaManagerDeleteEmailToCLAManagers(emailSvc emails.EmailTem // subject string, body string, recipients []string subject := fmt.Sprintf("EasyCLA: CLA Manager Removed Notice for %s", claGroupModel.ProjectName) recipients := []string{emailParams.RecipientAddress} - body, err := emails.RenderClaManagerDeletedToCLAManagersTemplate(emailSvc, claGroupModel.Version, claGroupModel.ProjectName) + body, err := emails.RenderClaManagerDeletedToCLAManagersTemplate(emailSvc, claGroupModel.Version, claGroupModel.ProjectName, emailParams) if err != nil { log.Warnf("email template render : %s failed : %v", emails.ClaManagerDeletedToCLAManagersTemplateName, err) diff --git a/cla-backend-go/emails/cla_manager_templates.go b/cla-backend-go/emails/cla_manager_templates.go index d8f5d2dbc..fc9bb8f47 100644 --- a/cla-backend-go/emails/cla_manager_templates.go +++ b/cla-backend-go/emails/cla_manager_templates.go @@ -291,9 +291,8 @@ const ( ) // RenderClaManagerDeletedToCLAManagersTemplate renders the RemovedCLAManagerTemplate -func RenderClaManagerDeletedToCLAManagersTemplate(svc EmailTemplateService, claGroupModelVersion, claGroupName string) (string, error) { - - params := CLAGroupTemplateParams{ +func RenderClaManagerDeletedToCLAManagersTemplate(svc EmailTemplateService, claGroupModelVersion, claGroupName string, params ClaManagerDeletedToCLAManagersTemplateParams) (string, error) { + params.CLAGroupTemplateParams = CLAGroupTemplateParams{ CLAGroupName: claGroupName, } diff --git a/cla-backend-go/events/service.go b/cla-backend-go/events/service.go index b3085c3e1..e9738b5c1 100644 --- a/cla-backend-go/events/service.go +++ b/cla-backend-go/events/service.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "strings" "github.com/linuxfoundation/easycla/cla-backend-go/projects_cla_groups" @@ -298,9 +299,10 @@ func (s *service) loadLFUser(ctx context.Context, args *LogEventArgs) error { } if args.LfUsername != "" { - lfUser, lfErr := user_service.GetClient().GetUserByUsername(args.LfUsername) + lfUsername := strings.TrimSpace(args.LfUsername) + lfUser, lfErr := user_service.GetClient().GetUserByUsername(lfUsername) if lfErr != nil || lfUser == nil { - log.WithFields(f).Warnf("unable to fetch user by username: %s ", args.LfUsername) + log.WithFields(f).Warnf("unable to fetch user by username: %q", lfUsername) return nil } args.LFUser = lfUser diff --git a/cla-backend-go/github/github_org.go b/cla-backend-go/github/github_org.go index 2cc61b13f..3900641b8 100644 --- a/cla-backend-go/github/github_org.go +++ b/cla-backend-go/github/github_org.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "strings" "github.com/linuxfoundation/easycla/cla-backend-go/utils" "github.com/sirupsen/logrus" @@ -66,6 +67,55 @@ func GetOrganization(ctx context.Context, organizationName string) (*github.Orga return org, nil } +// ListUserPublicOrgs returns the GitHub organization logins that is a +// publicly visible member of. It calls GET /users//orgs, which is the +// same endpoint the pre-cutover Python helper cla.utils.lookup_github_organizations +// used. Membership in private orgs is invisible to this endpoint unless the +// user has set their membership to public on github.com. +// +// Returns an empty slice (with a nil error) when the user has no visible org +// memberships. The github-org approval-list check must be done against this +// list (case-insensitive) rather than against /orgs//memberships/, +// because the EasyCLA OAuth bot is not itself a member of customer orgs and +// gets a 403 from the latter endpoint. +// +// An empty user is rejected with an error: go-github routes an empty user +// to GET /user/orgs (the authenticated bot's own orgs), which would silently +// approve unrelated callers if it ever leaked through. +func ListUserPublicOrgs(ctx context.Context, user string) ([]string, error) { + f := logrus.Fields{ + "functionName": "github.ListUserPublicOrgs", + utils.XREQUESTID: ctx.Value(utils.XREQUESTID), + "user": user, + } + + if strings.TrimSpace(user) == "" { + return nil, errors.New("ListUserPublicOrgs: user is empty") + } + + client := NewGithubOauthClient() + logins := make([]string, 0) + opt := &github.ListOptions{PerPage: 100} + for { + orgs, resp, err := client.Organizations.List(ctx, user, opt) + if err != nil { + log.WithFields(f).Warnf("ListUserPublicOrgs %s failed. error = %s", user, err.Error()) + return nil, err + } + for _, org := range orgs { + if org == nil || org.Login == nil { + continue + } + logins = append(logins, *org.Login) + } + if resp == nil || resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + return logins, nil +} + // GetOrganizationMembers gets members in organization func GetOrganizationMembers(ctx context.Context, orgName string, installationID int64) ([]string, error) { f := logrus.Fields{ diff --git a/cla-backend-go/github/github_repository.go b/cla-backend-go/github/github_repository.go index 1c23780b6..5aa87c365 100644 --- a/cla-backend-go/github/github_repository.go +++ b/cla-backend-go/github/github_repository.go @@ -465,6 +465,27 @@ func (c *UserCache) Clear() { func (c *UserCache) Delete(key [3]string) { c.mu.Lock(); delete(c.data, key); c.mu.Unlock() } +// InvalidateByUser removes every entry whose (id, login) prefix matches, +// regardless of the email component. The login is lowercased internally to +// match how UserKey stores it, so callers may pass either the original +// GitHub login or a pre-lowercased form. Used after a signature event to +// drop stale entries keyed on commit-email shapes the caller cannot +// enumerate (e.g. the GitHub noreply form emitted when a user has email +// privacy enabled). +func (c *UserCache) InvalidateByUser(id, login string) int { + loginLower := strings.ToLower(login) + c.mu.Lock() + defer c.mu.Unlock() + n := 0 + for k := range c.data { + if k[0] == id && k[1] == loginLower { + delete(c.data, k) + n++ + } + } + return n +} + type projectUserCacheEntry struct { value *models.User signed bool @@ -539,6 +560,46 @@ func (c *ProjectUserCache) Clear() { func (c *ProjectUserCache) Delete(key [4]string) { c.mu.Lock(); delete(c.data, key); c.mu.Unlock() } +// InvalidateByProject removes every entry for the given project, regardless +// of user. Used after an approval-list mutation (UpdateApprovalList), since +// any cached signed/authorized decision under that project may now be +// stale: users newly added to email/domain/org/github approvals must flip +// red→green, and users removed must flip green→red. Cache misses for +// affected webhooks are then resolved against fresh DDB state on next read. +func (c *ProjectUserCache) InvalidateByProject(projectID string) int { + c.mu.Lock() + defer c.mu.Unlock() + n := 0 + for k := range c.data { + if k[0] == projectID { + delete(c.data, k) + n++ + } + } + return n +} + +// InvalidateByUser removes every entry whose (projectID, id, login) prefix +// matches, regardless of the email component. The login is lowercased +// internally to match how ProjectUserKey stores it, so callers may pass +// either the original GitHub login or a pre-lowercased form. Used after a +// signature event to drop stale per-project entries keyed on commit-email +// shapes the caller cannot enumerate (e.g. the GitHub noreply form emitted +// when a user has email privacy enabled). +func (c *ProjectUserCache) InvalidateByUser(projectID, id, login string) int { + loginLower := strings.ToLower(login) + c.mu.Lock() + defer c.mu.Unlock() + n := 0 + for k := range c.data { + if k[0] == projectID && k[1] == id && k[2] == loginLower { + delete(c.data, k) + n++ + } + } + return n +} + var GithubUserCache = NewCache(12 * time.Hour) var ModelUserCache = NewUserCache(12 * time.Hour) var ModelProjectUserCache = NewProjectUserCache(3 * time.Hour) @@ -2023,15 +2084,25 @@ func UpdateCacheAfterSignature(ctx context.Context, user *models.User, projectID } affiliated := strings.TrimSpace(user.CompanyID) != "" - - emails := collectUserEmails(user) - if len(emails) == 0 { - log.WithFields(f).Debugf("no emails found for user (githubID=%s, login=%s) - nothing to cache", githubID, githubLogin) - return nil - } - loginLower := strings.ToLower(githubLogin) + // Wipe every cache entry for this (projectID, githubID, login) tuple + // regardless of email. The pre-signature webhook may have stored a + // negative entry keyed on a commit-email shape we cannot enumerate from + // the user record — most commonly the GitHub noreply form + // "+@users.noreply.github.com" when the user has email + // privacy enabled. Without this wipe, the stale negative entry survives + // the signature callback and the PR stays red until NegativeCacheTTL + // (2m) expires or the next webhook lands on a different Lambda. + projInvalidated := ModelProjectUserCache.InvalidateByUser(projectID, githubID, loginLower) + userInvalidated := ModelUserCache.InvalidateByUser(githubID, loginLower) + + // Pre-populate positive entries for the user's known emails so a webhook + // whose commit-email matches one of them gets an immediate cache hit. + // Webhooks for unknown email shapes (e.g. noreply) will fall through to + // the slow path, find the freshly-recorded signature, and cache a fresh + // positive entry — no stale negative left to mislead them. + emails := collectUserEmails(user) for _, email := range emails { genKey := UserKey(githubID, loginLower, email) ModelUserCache.Set(genKey, user) @@ -2040,8 +2111,8 @@ func UpdateCacheAfterSignature(ctx context.Context, user *models.User, projectID ModelProjectUserCache.Set(projKey, user, true, affiliated) } - log.WithFields(f).Infof("updated caches for user login=%s (GitHubID=%s), project=%s: marked as authorized for %d email(s)", - loginLower, githubID, projectID, len(emails)) + log.WithFields(f).Infof("updated caches for user login=%s (GitHubID=%s), project=%s: invalidated %d project + %d user stale entries; pre-populated %d authorized email(s)", + loginLower, githubID, projectID, projInvalidated, userInvalidated, len(emails)) return nil } diff --git a/cla-backend-go/github/github_repository_test.go b/cla-backend-go/github/github_repository_test.go index 7fccd37ec..11cc016bd 100644 --- a/cla-backend-go/github/github_repository_test.go +++ b/cla-backend-go/github/github_repository_test.go @@ -14,6 +14,7 @@ import ( "time" gh "github.com/google/go-github/v37/github" + "github.com/linuxfoundation/easycla/cla-backend-go/gen/v1/models" "github.com/stretchr/testify/assert" ) @@ -203,3 +204,143 @@ func TestListPullRequestCommitsComparePreservesPageOrder(t *testing.T) { assert.Equal(t, "sha3", commits[2].GetSHA()) } } + +// TestUpdateCacheAfterSignatureInvalidatesUnknownEmailKeys verifies that a +// pre-signature negative cache entry keyed on a commit-email shape that is NOT +// in the user record (e.g. the GitHub noreply form emitted when email privacy +// is on) is wiped by UpdateCacheAfterSignature, while the user's known emails +// are pre-populated as authorized positives. +func TestUpdateCacheAfterSignatureInvalidatesUnknownEmailKeys(t *testing.T) { + ModelProjectUserCache.Clear() + ModelUserCache.Clear() + t.Cleanup(func() { + ModelProjectUserCache.Clear() + ModelUserCache.Clear() + }) + + const ( + projectID = "01af041c-0000-0000-0000-000000000000" + githubID = "196905385" + githubLogin = "lukaszgryglicki2" + noreplyEmail = "196905385+lukaszgryglicki2@users.noreply.github.com" + realEmail = "lukaszgryglicki2@proton.me" + anotherEmail = "lg2@elsewhere.example" + ) + + // Pre-populate: pre-signature webhook stored a negative entry on the + // noreply form, plus an unrelated negative on a different email. + staleNoreply := ProjectUserKey(projectID, githubID, githubLogin, noreplyEmail) + ModelProjectUserCache.SetWithTTL(staleNoreply, nil, false, false, NegativeCacheTTL) + staleOther := ProjectUserKey(projectID, githubID, githubLogin, anotherEmail) + ModelProjectUserCache.SetWithTTL(staleOther, nil, false, false, NegativeCacheTTL) + + staleNoreplyUser := UserKey(githubID, githubLogin, noreplyEmail) + ModelUserCache.SetWithTTL(staleNoreplyUser, nil, NegativeCacheTTL) + + // An unrelated entry under a different login must NOT be touched. + otherLoginKey := ProjectUserKey(projectID, "999", "someoneelse", "x@y.example") + ModelProjectUserCache.SetWithTTL(otherLoginKey, nil, false, false, NegativeCacheTTL) + + user := &models.User{ + GithubID: githubID, + GithubUsername: githubLogin, + Emails: []string{realEmail}, + CompanyID: "f7c7ac9c-1111-2222-3333-444444444444", + } + + err := UpdateCacheAfterSignature(context.Background(), user, projectID) + assert.NoError(t, err) + + // All stale entries for this user (regardless of email shape) must be + // gone from the project cache. They were not in the user.Emails set, so + // the previous code would have left them behind. + _, _, _, ok := ModelProjectUserCache.Get(staleNoreply) + assert.False(t, ok, "noreply project-cache entry must be invalidated") + _, _, _, ok = ModelProjectUserCache.Get(staleOther) + assert.False(t, ok, "unrelated-email project-cache entry must be invalidated") + + _, ok = ModelUserCache.Get(staleNoreplyUser) + assert.False(t, ok, "noreply user-cache entry must be invalidated") + + // Unrelated user under a different login must survive. + _, _, _, ok = ModelProjectUserCache.Get(otherLoginKey) + assert.True(t, ok, "unrelated user's cache entry must NOT be invalidated") + + // The user's known email is pre-populated as an authorized positive. + realKey := ProjectUserKey(projectID, githubID, githubLogin, realEmail) + cachedUser, signed, affiliated, ok := ModelProjectUserCache.Get(realKey) + assert.True(t, ok, "real-email project-cache entry must be set") + assert.NotNil(t, cachedUser) + assert.True(t, signed, "real-email entry must be marked signed") + assert.True(t, affiliated, "user has CompanyID, must be marked affiliated") +} + +// TestInvalidateByProjectScopesToProject verifies that InvalidateByProject +// drops every entry for the given project regardless of user, but does not +// touch entries for other projects. Used after UpdateApprovalList because +// approval-list mutations may flip authorization in either direction for +// any user who has cached state under that project. +func TestInvalidateByProjectScopesToProject(t *testing.T) { + ModelProjectUserCache.Clear() + t.Cleanup(func() { ModelProjectUserCache.Clear() }) + + const ( + targetProj = "01af041c-0000-0000-0000-000000000000" + otherProj = "ffffffff-1111-2222-3333-444444444444" + ) + + a := ProjectUserKey(targetProj, "1", "userA", "a@example.com") + b := ProjectUserKey(targetProj, "2", "userB", "b@example.com") + c := ProjectUserKey(otherProj, "3", "userC", "c@example.com") + ModelProjectUserCache.SetWithTTL(a, nil, false, false, NegativeCacheTTL) + ModelProjectUserCache.SetWithTTL(b, nil, true, true, NegativeCacheTTL) + ModelProjectUserCache.SetWithTTL(c, nil, true, true, NegativeCacheTTL) + + n := ModelProjectUserCache.InvalidateByProject(targetProj) + assert.Equal(t, 2, n, "must invalidate exactly the target project's entries") + + _, _, _, ok := ModelProjectUserCache.Get(a) + assert.False(t, ok, "target-project entry A must be gone") + _, _, _, ok = ModelProjectUserCache.Get(b) + assert.False(t, ok, "target-project entry B must be gone") + _, _, _, ok = ModelProjectUserCache.Get(c) + assert.True(t, ok, "other-project entry C must NOT be touched") +} + +// TestInvalidateByUserNormalizesLoginCase verifies that InvalidateByUser +// lowercases the login internally and matches entries regardless of the +// caller's casing. UserKey/ProjectUserKey lowercase the login on insert, +// so callers passing a mixed-case login must still hit those entries. +func TestInvalidateByUserNormalizesLoginCase(t *testing.T) { + ModelProjectUserCache.Clear() + ModelUserCache.Clear() + t.Cleanup(func() { + ModelProjectUserCache.Clear() + ModelUserCache.Clear() + }) + + const ( + projectID = "01af041c-0000-0000-0000-000000000000" + githubID = "12345" + mixedCase = "MixedCaseLogin" + email = "u@example.com" + ) + + // Insert via canonical keys (login is lowercased by the *Key helpers). + pKey := ProjectUserKey(projectID, githubID, mixedCase, email) + ModelProjectUserCache.SetWithTTL(pKey, nil, false, false, NegativeCacheTTL) + uKey := UserKey(githubID, mixedCase, email) + ModelUserCache.SetWithTTL(uKey, nil, NegativeCacheTTL) + + // Caller passes the original mixed-case login (the common mistake). + pn := ModelProjectUserCache.InvalidateByUser(projectID, githubID, mixedCase) + un := ModelUserCache.InvalidateByUser(githubID, mixedCase) + + assert.Equal(t, 1, pn, "project cache entry must be invalidated despite mixed-case login") + assert.Equal(t, 1, un, "user cache entry must be invalidated despite mixed-case login") + + _, _, _, ok := ModelProjectUserCache.Get(pKey) + assert.False(t, ok, "project cache entry must be gone") + _, ok = ModelUserCache.Get(uKey) + assert.False(t, ok, "user cache entry must be gone") +} diff --git a/cla-backend-go/signatures/service.go b/cla-backend-go/signatures/service.go index 51e60c009..c9bf9b8db 100644 --- a/cla-backend-go/signatures/service.go +++ b/cla-backend-go/signatures/service.go @@ -102,6 +102,12 @@ const ( githubStatusMissingCLA = "Missing CLA Authorization." ) +// listUserPublicOrgs is the indirection that lets unit tests for +// UserIsApproved stub the GitHub /users//orgs call without standing up +// a real OAuth client. Production callers always go through +// github.ListUserPublicOrgs. +var listUserPublicOrgs = github.ListUserPublicOrgs + // NewService creates a new signature service func NewService(repo SignatureRepository, companyService company.IService, usersService users.Service, eventsService events.Service, githubOrgValidation bool, repositoryService repositories.Service, githubOrgService github_organizations.ServiceInterface, claGroupService service2.Service, gitLabApp *gitlab_api.App, CLABaseAPIURL, CLALandingPage, CLALogoURL string) SignatureService { return service{ @@ -536,6 +542,15 @@ func (s service) UpdateApprovalList(ctx context.Context, authUser *auth.User, cl return updatedCorporateSignature, err } + // Invalidate every cached PR-check decision for this project. Approval + // list mutations change who is authorized: users newly added must flip + // red→green and removed users must flip green→red. The cache is keyed + // on (project, github user, login, email), and any negative entry from + // before this update is now stale — the handleGitHubStatusUpdate + // goroutines below otherwise see the same stale negatives. + invalidated := github.ModelProjectUserCache.InvalidateByProject(claGroupModel.ProjectID) + log.WithFields(f).Infof("invalidated %d ProjectUserCache entries for project %s after approval list update", invalidated, claGroupModel.ProjectID) + // If auto create ECLA is enabled for this Corporate Agreement, then create an ECLA for each employee that was added to the approval list // we get the complete user list as output from the processing of the approval list var userModelList []*models.User @@ -1635,23 +1650,40 @@ func (s service) UserIsApproved(ctx context.Context, user *models.User, cclaSign } } - // check github org email ApprovalList - if user.GithubUsername != "" { + // check github org approval list. Match the user's publicly-visible org + // memberships against the approval list, mirroring the pre-cutover Python + // helper cla.utils.lookup_github_organizations. The previous implementation + // called /orgs//memberships/, which the EasyCLA OAuth bot has + // no permission to read for customer orgs (returns 403) — so every + // org-only-approved contributor failed the check. + if login := strings.TrimSpace(user.GithubUsername); login != "" { githubOrgApprovalList := cclaSignature.GithubOrgApprovalList if len(githubOrgApprovalList) > 0 { - log.WithFields(f).Debugf("determining if github user :%s is associated with ant of the github orgs : %+v", user.GithubUsername, githubOrgApprovalList) - } - - for _, org := range githubOrgApprovalList { - membership, err := github.GetMembership(ctx, user.GithubUsername, org) + log.WithFields(f).Debugf("determining if github user :%s is associated with any of the github orgs : %+v", login, githubOrgApprovalList) + userOrgs, err := listUserPublicOrgs(ctx, login) if err != nil { - break - } - if membership != nil { - log.WithFields(f).Debugf("found matching github organization: %s for user: %s", org, user.GithubUsername) - return true, nil + // Mirror the Python flow (cla.utils.is_approved): if the + // public-orgs lookup fails, log and treat as no match — do + // not propagate. Returning an error here would 500 the + // /v3/sign route and a transient GitHub blip would block + // every org-approved contributor across the project. + log.WithFields(f).Warnf("could not list public orgs for github user %s; treating as no org-approval match: %v", login, err) } else { - log.WithFields(f).Debugf("user: %s is not in the organization: %s", user.GithubUsername, org) + for _, approvedOrg := range githubOrgApprovalList { + approvedOrgTrim := strings.TrimSpace(approvedOrg) + matched := false + for _, userOrg := range userOrgs { + if strings.EqualFold(approvedOrgTrim, userOrg) { + matched = true + break + } + } + if matched { + log.WithFields(f).Debugf("found matching github organization: %s for user: %s", approvedOrg, login) + return true, nil + } + log.WithFields(f).Debugf("user: %s is not in the organization: %s", login, approvedOrg) + } } } } diff --git a/cla-backend-go/signatures/service_test.go b/cla-backend-go/signatures/service_test.go index c6c8576ea..fe4f87ca9 100644 --- a/cla-backend-go/signatures/service_test.go +++ b/cla-backend-go/signatures/service_test.go @@ -5,12 +5,30 @@ package signatures import ( "context" + "errors" "testing" v1Models "github.com/linuxfoundation/easycla/cla-backend-go/gen/v1/models" + githublib "github.com/linuxfoundation/easycla/cla-backend-go/github" "github.com/stretchr/testify/assert" ) +// stubListUserPublicOrgs replaces the package-level listUserPublicOrgs hook +// for the duration of a test, restoring the real GitHub client wrapper on +// cleanup. Returning the recorded user lets callers assert the value passed +// in. +func stubListUserPublicOrgs(t *testing.T, orgs []string, err error) *string { + t.Helper() + original := listUserPublicOrgs + var captured string + listUserPublicOrgs = func(_ context.Context, user string) ([]string, error) { + captured = user + return orgs, err + } + t.Cleanup(func() { listUserPublicOrgs = original }) + return &captured +} + func TestUserIsApproved(t *testing.T) { ctx := context.Background() @@ -93,3 +111,145 @@ func TestUserIsApproved(t *testing.T) { }) } } + +// TestUserIsApproved_GithubOrgApprovalList covers the regression that caused +// the post-cutover EasyCLA outage: the original Go port called +// /orgs//memberships/ (403 for the bot), so all org-approved +// contributors were silently rejected. The fix calls +// github.ListUserPublicOrgs (GET /users//orgs) and compares +// case-insensitively against the approval list, matching the pre-cutover +// Python behavior. These cases lock in that contract. +func TestUserIsApproved_GithubOrgApprovalList(t *testing.T) { + ctx := context.Background() + + cases := []struct { + name string + username string + approvalList []string + userOrgs []string + listErr error + wantApproved bool + wantListCalled bool + wantUser string + }{ + { + name: "exact-case match", + username: "alice", + approvalList: []string{"acme"}, + userOrgs: []string{"acme"}, + wantApproved: true, + wantListCalled: true, + wantUser: "alice", + }, + { + name: "user-org casing differs from approval list", + username: "bob", + approvalList: []string{"sap-cloudfoundry"}, + userOrgs: []string{"SAP", "sap-cloudfoundry", "sap-contributions"}, + wantApproved: true, + wantListCalled: true, + wantUser: "bob", + }, + { + name: "approval-list casing differs from user orgs", + username: "carol", + approvalList: []string{"PIVOTAL-CF"}, + userOrgs: []string{"pivotal-cf", "vmware"}, + wantApproved: true, + wantListCalled: true, + wantUser: "carol", + }, + { + name: "approval list contains whitespace around org", + username: "dave", + approvalList: []string{" morganstanley "}, + userOrgs: []string{"morganstanley"}, + wantApproved: true, + wantListCalled: true, + wantUser: "dave", + }, + { + name: "no overlap between user orgs and approval list", + username: "eve", + approvalList: []string{"acme"}, + userOrgs: []string{"contoso", "initech"}, + wantApproved: false, + wantListCalled: true, + wantUser: "eve", + }, + { + name: "user has no public orgs", + username: "frank", + approvalList: []string{"acme"}, + userOrgs: nil, + wantApproved: false, + wantListCalled: true, + wantUser: "frank", + }, + { + name: "github API error treated as no match", + username: "grace", + approvalList: []string{"acme"}, + listErr: errors.New("simulated 502 from github"), + wantApproved: false, + wantListCalled: true, + wantUser: "grace", + }, + { + name: "github username trimmed before lookup", + username: " henry ", + approvalList: []string{"acme"}, + userOrgs: []string{"acme"}, + wantApproved: true, + wantListCalled: true, + wantUser: "henry", + }, + { + name: "whitespace-only username never reaches github", + username: " ", + approvalList: []string{"acme"}, + wantApproved: false, + wantListCalled: false, + }, + { + name: "empty approval list never queries github", + username: "ivy", + approvalList: []string{}, + wantApproved: false, + wantListCalled: false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + captured := stubListUserPublicOrgs(t, tc.userOrgs, tc.listErr) + // Sentinel pre-test state: an empty captured value means the + // stub was never invoked. + *captured = "" + + svc := NewService(nil, nil, nil, nil, false, nil, nil, nil, nil, "", "", "") + user := &v1Models.User{GithubUsername: tc.username} + ccla := &v1Models.Signature{GithubOrgApprovalList: tc.approvalList} + + ok, err := svc.UserIsApproved(ctx, user, ccla) + assert.NoError(t, err) + assert.Equal(t, tc.wantApproved, ok) + if tc.wantListCalled { + assert.Equal(t, tc.wantUser, *captured, "ListUserPublicOrgs called with wrong user") + } else { + assert.Equal(t, "", *captured, "ListUserPublicOrgs should not have been called") + } + }) + } +} + +// TestListUserPublicOrgs_RejectsEmptyUser guards the public helper itself: +// go-github routes an empty user string to GET /user/orgs (the authenticated +// bot's own orgs), so an empty argument must never silently succeed. +func TestListUserPublicOrgs_RejectsEmptyUser(t *testing.T) { + for _, in := range []string{"", " ", "\t\n"} { + got, err := githublib.ListUserPublicOrgs(context.Background(), in) + assert.Nil(t, got, "expected nil slice for input %q", in) + assert.Error(t, err, "expected error for input %q", in) + } +} diff --git a/cla-backend-legacy/internal/api/github_oauth.go b/cla-backend-legacy/internal/api/github_oauth.go index 04e4ac965..d9bacb733 100644 --- a/cla-backend-legacy/internal/api/github_oauth.go +++ b/cla-backend-legacy/internal/api/github_oauth.go @@ -15,6 +15,7 @@ import ( "strings" "time" + "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" "github.com/go-chi/chi/v5" "github.com/google/uuid" @@ -233,9 +234,12 @@ func (h *Handlers) githubGetOrCreateUser(ctx context.Context, sess middleware.Se if err != nil { return nil, &httpErr{status: http.StatusInternalServerError, payload: map[string]any{"errors": err.Error()}, err: err} } - var userItem map[string]any + // Operate on the raw AttributeValue map so we never round-trip pynamodb + // types through InterfaceMapToItem's isNumericString heuristic, which + // can silently coerce digit-only S fields to N. + var userAV map[string]types.AttributeValue if len(items) > 0 { - userItem = store.ItemToInterfaceMap(items[0]) + userAV = items[0] } else { // Fallback: look up by email. for _, e := range emails { @@ -252,7 +256,7 @@ func (h *Handlers) githubGetOrCreateUser(ctx context.Context, sess middleware.Se } } if len(its) > 0 { - userItem = store.ItemToInterfaceMap(its[0]) + userAV = its[0] break } } @@ -264,51 +268,58 @@ func (h *Handlers) githubGetOrCreateUser(ctx context.Context, sess middleware.Se githubName = strings.TrimSpace(githubName) now := time.Now().UTC() - if userItem != nil { + if userAV != nil { // Update existing user: set github id, username, display name and emails if githubLogin != "" { - userItem["user_github_username"] = githubLogin + userAV["user_github_username"] = &types.AttributeValueMemberS{Value: githubLogin} } if githubName != "" { - userItem["user_name"] = githubName + userAV["user_name"] = &types.AttributeValueMemberS{Value: githubName} } - userItem["user_emails"] = emails - userItem["user_github_id"] = githubID - userItem["date_modified"] = formatPynamoDateTimeUTC(now) - - // Persist. - userItemAV, convErr := store.InterfaceMapToItem(userItem) - if convErr != nil { - return nil, &httpErr{status: http.StatusInternalServerError, payload: map[string]any{"errors": convErr.Error()}, err: convErr} - } - if err := h.users.PutItem(ctx, userItemAV); err != nil { + // PatchedUnicodeSetAttribute on the Python side. emails is guaranteed + // non-empty by the len(emails) < 1 guard above, so SS is always valid. + userAV["user_emails"] = &types.AttributeValueMemberSS{Value: emails} + userAV["user_github_id"] = &types.AttributeValueMemberN{Value: strconv.FormatInt(githubID, 10)} + userAV["date_modified"] = &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)} + + if err := h.users.PutItem(ctx, userAV); err != nil { return nil, &httpErr{status: http.StatusInternalServerError, payload: map[string]any{"errors": err.Error()}, err: err} } - return normalizeUserDict(userItem), nil + result := store.ItemToInterfaceMap(userAV) + // Preserve the pre-cutover wire shape for the OAuth callers + // (/v2/github/auth/callback no-redirect branch and /v2/user-from-session): + // pynamodb User.to_dict() returned user_github_id as an int, and the + // previous Go code mirrored that by overwriting the map entry with an + // int64. ItemToInterfaceMap converts N to a string, so re-apply the + // int64 here so JSON consumers continue to see a number. + result["user_github_id"] = githubID + return normalizeUserDict(result), nil } // Create new user. newID := uuid.New().String() - item := map[string]any{ - "user_id": newID, - "version": "v1", - "date_created": formatPynamoDateTimeUTC(now), - "date_modified": formatPynamoDateTimeUTC(now), - "user_github_id": githubID, - "user_github_username": githubLogin, - "user_emails": emails, + itemAV := map[string]types.AttributeValue{ + "user_id": &types.AttributeValueMemberS{Value: newID}, + "version": &types.AttributeValueMemberS{Value: "v1"}, + "date_created": &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)}, + "date_modified": &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)}, + "user_github_id": &types.AttributeValueMemberN{Value: strconv.FormatInt(githubID, 10)}, + "user_emails": &types.AttributeValueMemberSS{Value: emails}, } - if githubName != "" { - item["user_name"] = githubName + if githubLogin != "" { + itemAV["user_github_username"] = &types.AttributeValueMemberS{Value: githubLogin} } - itemAV, convErr := store.InterfaceMapToItem(item) - if convErr != nil { - return nil, &httpErr{status: http.StatusInternalServerError, payload: map[string]any{"errors": convErr.Error()}, err: convErr} + if githubName != "" { + itemAV["user_name"] = &types.AttributeValueMemberS{Value: githubName} } if err := h.users.PutItem(ctx, itemAV); err != nil { return nil, &httpErr{status: http.StatusInternalServerError, payload: map[string]any{"errors": err.Error()}, err: err} } - return normalizeUserDict(item), nil + result := store.ItemToInterfaceMap(itemAV) + // See the update branch above: keep user_github_id as int64 in the + // response to match pre-cutover Python wire shape. + result["user_github_id"] = githubID + return normalizeUserDict(result), nil } func (h *Handlers) setActiveSignatureMetadata(ctx context.Context, userID, projectID, repositoryID, pullRequestID string, returnURLs ...string) error { diff --git a/cla-backend-legacy/internal/api/handlers.go b/cla-backend-legacy/internal/api/handlers.go index 55ead35b1..9543ebd7b 100644 --- a/cla-backend-legacy/internal/api/handlers.go +++ b/cla-backend-legacy/internal/api/handlers.go @@ -5767,6 +5767,24 @@ func (h *Handlers) GetExternalProjectV1(w http.ResponseWriter, r *http.Request) respond.JSON(w, http.StatusOK, projects) } +// normalizeProjectStringFields rewrites the named keys of item from +// AttributeValueMemberN back to AttributeValueMemberS, preserving the digits +// as the new string value. It is used on the PutProjectV1 read-modify-write +// path so that records persisted under the previous InterfaceMapToItem +// numeric-coercion bug are healed on the next update, even when the request +// does not touch the affected field. Keys that are absent, missing, or +// already strings are left untouched. +func normalizeProjectStringFields(item map[string]types.AttributeValue, names ...string) { + if item == nil { + return + } + for _, name := range names { + if num, ok := item[name].(*types.AttributeValueMemberN); ok && num != nil { + item[name] = &types.AttributeValueMemberS{Value: num.Value} + } + } +} + // POST /v1/project // Python: cla/routes.py:1438 post_project() // Calls: cla.controllers.project.create_project @@ -5843,23 +5861,19 @@ func (h *Handlers) PostProjectV1(w http.ResponseWriter, r *http.Request) { now := time.Now().UTC() projectID := uuid.New().String() - proj := map[string]any{ - "project_id": projectID, - "project_external_id": req.ProjectExternalID, - "project_name": req.ProjectName, - "project_icla_enabled": *req.ProjectICLAEnabled, - "project_ccla_enabled": *req.ProjectCCLAEnabled, - "project_ccla_requires_icla_signature": *req.ProjectCCLARequiresICLASignature, - "project_acl": []string{authUser.Username}, - "date_created": formatPynamoDateTimeUTC(now), - "date_modified": formatPynamoDateTimeUTC(now), - "version": "v1", - } - - item, err := store.InterfaceMapToItem(proj) - if err != nil { - respond.JSON(w, http.StatusInternalServerError, map[string]any{"errors": map[string]any{"server": err.Error()}}) - return + // Build the AttributeValue map directly so InterfaceMapToItem's + // isNumericString heuristic cannot coerce an all-digit project_name to N. + item := map[string]types.AttributeValue{ + "project_id": &types.AttributeValueMemberS{Value: projectID}, + "project_external_id": &types.AttributeValueMemberS{Value: req.ProjectExternalID}, + "project_name": &types.AttributeValueMemberS{Value: req.ProjectName}, + "project_icla_enabled": &types.AttributeValueMemberBOOL{Value: *req.ProjectICLAEnabled}, + "project_ccla_enabled": &types.AttributeValueMemberBOOL{Value: *req.ProjectCCLAEnabled}, + "project_ccla_requires_icla_signature": &types.AttributeValueMemberBOOL{Value: *req.ProjectCCLARequiresICLASignature}, + "project_acl": &types.AttributeValueMemberSS{Value: []string{authUser.Username}}, + "date_created": &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)}, + "date_modified": &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)}, + "version": &types.AttributeValueMemberS{Value: "v1"}, } if err := h.projects.PutItem(ctx, item); err != nil { respond.JSON(w, http.StatusInternalServerError, map[string]any{"errors": map[string]any{"server": err.Error()}}) @@ -5876,7 +5890,7 @@ func (h *Handlers) PostProjectV1(w http.ResponseWriter, r *http.Request) { ContainsPII: false, }) - respond.JSON(w, http.StatusOK, proj) + respond.JSON(w, http.StatusOK, store.ItemToInterfaceMap(item)) } // PUT /v1/project @@ -5962,39 +5976,42 @@ func (h *Handlers) PutProjectV1(w http.ResponseWriter, r *http.Request) { return } - proj := store.ItemToInterfaceMap(item) + // Patch the AttributeValue map directly so we never round-trip pynamodb + // types through InterfaceMapToItem's isNumericString heuristic, which + // can silently coerce digit-only S fields to N. + // + // Heal records persisted under that bug: any project_name or + // project_external_id stored as N is rewritten to S before the PutItem + // below, so a PUT that only flips a boolean cannot preserve a broken + // type from the affected window. + normalizeProjectStringFields(item, "project_name", "project_external_id") updatedString := " " if req.ProjectExternalID != nil { - proj["project_external_id"] = *req.ProjectExternalID + item["project_external_id"] = &types.AttributeValueMemberS{Value: *req.ProjectExternalID} updatedString += fmt.Sprintf("project_external_id changed to %s \n", *req.ProjectExternalID) } if req.ProjectName != nil { - proj["project_name"] = *req.ProjectName + item["project_name"] = &types.AttributeValueMemberS{Value: *req.ProjectName} updatedString += fmt.Sprintf("project_name changed to %s \n", *req.ProjectName) } if req.ProjectICLAEnabled != nil { - proj["project_icla_enabled"] = *req.ProjectICLAEnabled + item["project_icla_enabled"] = &types.AttributeValueMemberBOOL{Value: *req.ProjectICLAEnabled} updatedString += fmt.Sprintf("project_icla_enabled changed to %s \n", boolString(*req.ProjectICLAEnabled)) } if req.ProjectCCLAEnabled != nil { - proj["project_ccla_enabled"] = *req.ProjectCCLAEnabled + item["project_ccla_enabled"] = &types.AttributeValueMemberBOOL{Value: *req.ProjectCCLAEnabled} updatedString += fmt.Sprintf("project_ccla_enabled changed to %s \n", boolString(*req.ProjectCCLAEnabled)) } if req.ProjectCCLARequiresICLASignature != nil { - proj["project_ccla_requires_icla_signature"] = *req.ProjectCCLARequiresICLASignature + item["project_ccla_requires_icla_signature"] = &types.AttributeValueMemberBOOL{Value: *req.ProjectCCLARequiresICLASignature} updatedString += fmt.Sprintf("project_ccla_requires_icla_signature changed to %s \n", boolString(*req.ProjectCCLARequiresICLASignature)) } now := time.Now().UTC() - proj["date_modified"] = formatPynamoDateTimeUTC(now) + item["date_modified"] = &types.AttributeValueMemberS{Value: formatPynamoDateTimeUTC(now)} - putItem, err := store.InterfaceMapToItem(proj) - if err != nil { - respond.JSON(w, http.StatusInternalServerError, map[string]any{"errors": map[string]any{"server": err.Error()}}) - return - } - if err := h.projects.PutItem(ctx, putItem); err != nil { + if err := h.projects.PutItem(ctx, item); err != nil { respond.JSON(w, http.StatusInternalServerError, map[string]any{"errors": map[string]any{"server": err.Error()}}) return } @@ -6008,7 +6025,7 @@ func (h *Handlers) PutProjectV1(w http.ResponseWriter, r *http.Request) { ContainsPII: false, }) - respond.JSON(w, http.StatusOK, proj) + respond.JSON(w, http.StatusOK, store.ItemToInterfaceMap(item)) } // DELETE /v1/project/{project_id} diff --git a/cla-backend-legacy/internal/api/handlers_project_test.go b/cla-backend-legacy/internal/api/handlers_project_test.go new file mode 100644 index 000000000..414f597f0 --- /dev/null +++ b/cla-backend-legacy/internal/api/handlers_project_test.go @@ -0,0 +1,97 @@ +// Copyright The Linux Foundation and each contributor to CommunityBridge. +// SPDX-License-Identifier: MIT + +package api + +import ( + "testing" + + "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" +) + +// TestNormalizeProjectStringFields locks in the heal-on-PUT behavior added +// after the post-cutover regression: digit-only project_name and +// project_external_id values that the previous InterfaceMapToItem heuristic +// persisted as N are rewritten to S before PutItem, even when the request +// payload does not touch those fields. +func TestNormalizeProjectStringFields(t *testing.T) { + t.Run("rewrites N to S for named fields", func(t *testing.T) { + item := map[string]types.AttributeValue{ + "project_name": &types.AttributeValueMemberN{Value: "12345"}, + "project_external_id": &types.AttributeValueMemberN{Value: "67890"}, + "project_acl": &types.AttributeValueMemberSS{Value: []string{"alice"}}, + } + normalizeProjectStringFields(item, "project_name", "project_external_id") + assertS(t, item, "project_name", "12345") + assertS(t, item, "project_external_id", "67890") + if _, ok := item["project_acl"].(*types.AttributeValueMemberSS); !ok { + t.Fatalf("project_acl should be unchanged SS, got %T", item["project_acl"]) + } + }) + + t.Run("leaves S values untouched", func(t *testing.T) { + item := map[string]types.AttributeValue{ + "project_name": &types.AttributeValueMemberS{Value: "Cloud Foundry"}, + } + normalizeProjectStringFields(item, "project_name") + assertS(t, item, "project_name", "Cloud Foundry") + }) + + t.Run("ignores unknown / missing names", func(t *testing.T) { + item := map[string]types.AttributeValue{ + "project_name": &types.AttributeValueMemberN{Value: "42"}, + } + normalizeProjectStringFields(item, "does_not_exist", "project_name") + assertS(t, item, "project_name", "42") + if _, present := item["does_not_exist"]; present { + t.Fatalf("normalizeProjectStringFields must not insert missing keys") + } + }) + + t.Run("nil map is a no-op", func(t *testing.T) { + // Must not panic. + normalizeProjectStringFields(nil, "project_name") + }) +} + +// TestPutProjectV1_BuildsStringTypesForDigitOnlyValues asserts the post-bug +// shape of the AttributeValue map: regardless of whether the project name +// happens to be all digits ("12345"), the PutProjectV1 / PostProjectV1 paths +// must produce S — not N — to round-trip correctly through downstream +// readers (LFX, Salesforce sync) that expect strings. +// +// This is a structural check on the literal map produced by the handlers' +// shared building blocks; it does not exercise HTTP wiring (which would +// require a full mock DDB), only that the literal AttributeValue we put +// into the map is the right type for digit-only inputs. +func TestPutProjectV1_BuildsStringTypesForDigitOnlyValues(t *testing.T) { + digitOnly := "12345" + item := map[string]types.AttributeValue{ + "project_name": &types.AttributeValueMemberS{Value: digitOnly}, + "project_external_id": &types.AttributeValueMemberS{Value: digitOnly}, + } + + assertS(t, item, "project_name", digitOnly) + assertS(t, item, "project_external_id", digitOnly) + + // Simulate a record that survived from the buggy era and verify the + // normalization rewrites it before write. + stale := map[string]types.AttributeValue{ + "project_name": &types.AttributeValueMemberN{Value: digitOnly}, + "project_external_id": &types.AttributeValueMemberN{Value: digitOnly}, + } + normalizeProjectStringFields(stale, "project_name", "project_external_id") + assertS(t, stale, "project_name", digitOnly) + assertS(t, stale, "project_external_id", digitOnly) +} + +func assertS(t *testing.T, item map[string]types.AttributeValue, name, want string) { + t.Helper() + v, ok := item[name].(*types.AttributeValueMemberS) + if !ok { + t.Fatalf("%s expected *AttributeValueMemberS, got %T", name, item[name]) + } + if v.Value != want { + t.Fatalf("%s got %q want %q", name, v.Value, want) + } +}