Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion cmd/korg/korg.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ limitations under the License.
package main

import (
"context"
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"

"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -58,6 +61,16 @@ Note: Removing from teams is currently unsupported.
`

auditHelpText = "Audit GitHub org members"

userinfoHelpText = `
Gets k8s org membership, GitHub profile, and OWNERS file references for user(s).

Honors GITHUB_TOKEN / GH_TOKEN for authenticated GitHub API access.

korg userinfo <github username>
korg userinfo <github username1> <github username2> <github username3> ...
korg userinfo --output json <github username>
`
)

type Options struct {
Expand Down Expand Up @@ -222,7 +235,29 @@ func main() {
rootCmd.AddCommand(removeCmd)
rootCmd.AddCommand(auditCmd)

if err := rootCmd.Execute(); err != nil {
var outputFormat string
userInfoCmd := &cobra.Command{
Use: "userinfo",
Short: "Get information about user(s)",
Long: userinfoHelpText,
Args: cobra.MinimumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
switch outputFormat {
case "text", "json":
default:
return fmt.Errorf("invalid --output %q (want: text, json)", outputFormat)
}
return runUserinfo(cmd.Context(), o.RepoRoot, args, outputFormat == "json", cmd.OutOrStdout())
},
}
userInfoCmd.Flags().StringVarP(&outputFormat, "output", "o", "text", "output format: text|json")
rootCmd.AddCommand(userInfoCmd)

ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()

if err := rootCmd.ExecuteContext(ctx); err != nil {
fmt.Fprintln(os.Stderr, "error:", err)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cobra already prints command errors to stderr by default, so this line will double-print every error. Either drop it to preserve the previous behavior, or set rootCmd.SilenceErrors = true before ExecuteContext if you want to own the formatting yourself.

os.Exit(1)
}
}
306 changes: 306 additions & 0 deletions cmd/korg/userinfo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
/*
Copyright 2023 The Kubernetes Authors.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Copyright 2023 The Kubernetes Authors.
Copyright The Kubernetes Authors.

per kubernetes/kubernetes@c247514


Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"sort"
"text/tabwriter"
"time"

"github.com/google/go-github/v88/github"
houndclient "github.com/hound-search/hound/client"
"golang.org/x/sync/errgroup"
"sigs.k8s.io/prow/pkg/config/org"
)

const (
houndSearchURL = "https://cs.k8s.io/api/v1/search"
defaultHTTPTimeout = 30 * time.Second
userinfoConcurrency = 4
)

// Test-only overrides. Empty means use defaults / api.github.com.
var (
houndSearchURLOverride string
ghBaseURLOverride string
)

func houndURL() string {
if houndSearchURLOverride != "" {
return houndSearchURLOverride
}
return houndSearchURL
}

type OrgMembership struct {
Org string `json:"org"`
Role string `json:"role"` // "member" or "admin"
}

type OwnerFile struct {
Repo string `json:"repo"`
Path string `json:"path"`
URL string `json:"url"`
}

type UserDetails struct {
Username string `json:"username"`
Company string `json:"company,omitempty"`
Orgs []OrgMembership `json:"orgs"`
OwnerFiles []OwnerFile `json:"owner_files,omitempty"`
Warnings []string `json:"warnings,omitempty"`
}

// findUserDetails gathers GitHub profile, k8s org membership, and OWNERS file references for a username.
// GitHub or k/org config failures are fatal (returned as error). Hound failures are non-fatal warnings.
func findUserDetails(ctx context.Context, gh *github.Client, hc *http.Client, configs map[string]*org.Config, username string) (*UserDetails, error) {
info := &UserDetails{Username: username}

u, _, err := gh.Users.Get(ctx, username)
if err != nil {
return nil, fmt.Errorf("github user %q: %w", username, err)
}
if u.Company != nil {
info.Company = *u.Company
}

info.Orgs = findOrgMembership(configs, username)

files, err := searchOwnerFiles(ctx, hc, username)
if err != nil {
info.Warnings = append(info.Warnings, fmt.Sprintf("OWNERS lookup failed: %v", err))
} else {
info.OwnerFiles = files
}

return info, nil
}

func findOrgMembership(configs map[string]*org.Config, username string) []OrgMembership {
out := []OrgMembership{}
orgNames := make([]string, 0, len(configs))
for name := range configs {
orgNames = append(orgNames, name)
}
sort.Strings(orgNames)

for _, name := range orgNames {
cfg := configs[name]
switch {
case stringInSliceCaseAgnostic(cfg.Admins, username):
out = append(out, OrgMembership{Org: name, Role: "admin"})
case stringInSliceCaseAgnostic(cfg.Members, username):
out = append(out, OrgMembership{Org: name, Role: "member"})
}
}
return out
}

func loadOrgConfigs(repoRoot string, orgs []string) (map[string]*org.Config, error) {
out := make(map[string]*org.Config, len(orgs))
for _, name := range orgs {
path := filepath.Join(repoRoot, fmt.Sprintf(orgConfigPathFormat, name))
cfg, err := readConfig(path)
if err != nil {
return nil, fmt.Errorf("loading org config %s: %w", name, err)
}
out[name] = cfg
}
return out, nil
}

func ownerFileURL(repo, path string) string {
return fmt.Sprintf("https://github.com/%s/blob/HEAD/%s", repo, path)
}

func searchOwnerFiles(ctx context.Context, hc *http.Client, username string) ([]OwnerFile, error) {
q := url.Values{}
q.Set("q", username)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we do a search for "- " + username instead of just username?

since OWNERS entries are always YAML list items

q.Set("repos", "*")
q.Set("rng", ":20")
q.Set("files", "OWNERS")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's include OWNERS_ALIASES as well

since that defines named groups (e.g. sig-foo-approvers) which are referenced in OWNERS files

q.Set("excludeFiles", "vendor/")
q.Set("i", "true")
q.Set("stats", "true")

req, err := http.NewRequestWithContext(ctx, http.MethodGet, houndURL()+"?"+q.Encode(), nil)
if err != nil {
return nil, err
}

resp, err := hc.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode/100 != 2 {
snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return nil, fmt.Errorf("hound %s: %s", resp.Status, bytes.TrimSpace(snippet))
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

var r houndclient.Response
if err := json.Unmarshal(body, &r); err != nil {
return nil, fmt.Errorf("decoding hound response: %w", err)
}

out := []OwnerFile{}
for repo, matches := range r.Results {
if matches == nil {
continue
}
for _, fm := range matches.Matches {
out = append(out, OwnerFile{
Repo: repo,
Path: fm.Filename,
URL: ownerFileURL(repo, fm.Filename),
})
Comment on lines +181 to +186

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add a check here to skip lines starting with # in the result before appending them to out?

because commented out usernames also will show up as valid hit right now

Also -

(since we already have the repo + file path from hound)
could we fetch the actual OWNERS file from GitHub and parse which section the username falls under? because someone in emeritus_approvers/emeritus_reviewers isn't valid

it will add 1-2 extra API calls maybe but since its supposed to be used locally on a handful of users at a time, it think that should be ok

}
}
sort.Slice(out, func(i, j int) bool {
if out[i].Repo != out[j].Repo {
return out[i].Repo < out[j].Repo
}
return out[i].Path < out[j].Path
})
return out, nil
}

// renderText writes a human-readable form of UserDetails to w.
func (u *UserDetails) renderText(w io.Writer) {
fmt.Fprintf(w, "\n=== %s\n", u.Username)
if u.Company != "" {
fmt.Fprintln(w, "Company:", u.Company)
} else {
fmt.Fprintln(w, "Company: **Not Found**")
}

fmt.Fprintln(w, "Orgs:")
if len(u.Orgs) == 0 {
fmt.Fprintln(w, " (none)")
}
for _, m := range u.Orgs {
fmt.Fprintf(w, " %s (%s)\n", m.Org, m.Role)
}

if len(u.OwnerFiles) > 0 {
fmt.Fprintln(w, "Owner Files:")
tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
fmt.Fprintln(tw, " REPO\tPATH\tURL")
for _, of := range u.OwnerFiles {
fmt.Fprintf(tw, " %s\t%s\t%s\n", of.Repo, of.Path, of.URL)
}
tw.Flush()
}

for _, warn := range u.Warnings {
fmt.Fprintln(w, "Warning:", warn)
}
}

// newGitHubClient returns a GitHub client authenticated via GITHUB_TOKEN or GH_TOKEN if set.
func newGitHubClient(ctx context.Context, httpClient *http.Client) (*github.Client, error) {
token := os.Getenv("GITHUB_TOKEN")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ctx isn't used anywhere in this function — it only reads env vars and constructs a client. Either drop it from the signature or use _ if you're planning to need it later.

if token == "" {
token = os.Getenv("GH_TOKEN")
}
opts := []github.ClientOptionsFunc{}
if httpClient != nil {
opts = append(opts, github.WithHTTPClient(httpClient))
}
if token != "" {
opts = append(opts, github.WithAuthToken(token))
}
if ghBaseURLOverride != "" {
opts = append(opts, github.WithEnterpriseURLs(ghBaseURLOverride, ghBaseURLOverride))
}
return github.NewClient(opts...)
}

// runUserinfo fetches info for every username concurrently and writes ordered output to w.
// Returns a joined error of all per-user failures; users without errors still render.
func runUserinfo(ctx context.Context, repoRoot string, usernames []string, outputJSON bool, w io.Writer) error {
configs, err := loadOrgConfigs(repoRoot, validOrgs)
if err != nil {
return err
}

hc := &http.Client{Timeout: defaultHTTPTimeout}
gh, err := newGitHubClient(ctx, hc)
if err != nil {
return fmt.Errorf("building github client: %w", err)
}

results := make([]*UserDetails, len(usernames))
errs := make([]error, len(usernames))

g, gctx := errgroup.WithContext(ctx)
g.SetLimit(userinfoConcurrency)
for i, name := range usernames {
i, name := i, name
g.Go(func() error {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop-variable capture was needed before Go 1.22, but since the module is already at 1.22+ (and this PR bumps it further), loop variables are per-iteration by default. This line can be removed.

info, err := findUserDetails(gctx, gh, hc, configs, name)
if err != nil {
errs[i] = err
return nil
}
results[i] = info
return nil
})
}
_ = g.Wait()

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the goroutines always return nil today, discarding the Wait() return is technically safe — but it's a maintenance trap. If someone later changes a goroutine to return a real error, it'll vanish silently. A small guard makes the invariant explicit:

if err := g.Wait(); err != nil {
    return fmt.Errorf("unexpected errgroup error: %w", err)
}

if outputJSON {
clean := make([]*UserDetails, 0, len(results))

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a user lookup fails in JSON mode, that user is silently dropped from the output array. A script parsing stdout will get fewer results than expected, with no structured way to tell which lookups failed or why. Text mode handles this well (it renders an ERROR: block inline), but JSON consumers have to correlate via exit code alone.

Worth either including an error entry in the array (e.g. {"username":"ghost","error":"..."}) or at least documenting in the help text that JSON mode omits failures and callers should check the exit code.

for _, r := range results {
if r != nil {
clean = append(clean, r)
}
}
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
if err := enc.Encode(clean); err != nil {
return err
}
} else {
for i, r := range results {
if r != nil {
r.renderText(w)
}
if errs[i] != nil {
fmt.Fprintf(w, "\n=== %s\nERROR: %v\n", usernames[i], errs[i])
}
}
}

return errors.Join(errs...)
}
Loading