-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Expand file tree
/
Copy pathgenerate-contributor-list.mjs
More file actions
336 lines (310 loc) · 12.3 KB
/
generate-contributor-list.mjs
File metadata and controls
336 lines (310 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import { execFileSync } from "node:child_process";
import https from "node:https";
// ---------- flags / utils
const RAW_ARGS = process.argv.slice(2);
const DEBUG = RAW_ARGS.includes("--debug");
const logd = (...xs) => { if (DEBUG) console.error(...xs) };
const collator = new Intl.Collator("en", { sensitivity: "base" });
const cmp = (a, b) => collator.compare(a, b);
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const toLower = (s) => (s || "").toLowerCase();
function die(msg, code = 1) { console.error(msg); process.exit(code) }
function normalizeName(s) {
return (s || "")
.normalize("NFKD")
.replace(/\p{Diacritic}/gu, "")
.replace(/\s+/g, " ")
.trim()
.toLowerCase();
}
function pickBetterName(current, candidate) {
if (!current) return (candidate || "").trim();
const c = current.trim();
const d = (candidate || "").trim();
if (!c && d) return d;
const spaceC = /\s/.test(c), spaceD = /\s/.test(d);
if (spaceD && !spaceC) return d;
if (d.length > c.length) return d;
return c;
}
function sanitizeDisplayName(raw, fallback) {
const s = (raw || "").trim();
if (!s) return fallback;
if (/moved\s+to\s+@/i.test(s)) return fallback;
if (/@/.test(s)) return fallback;
if (/^\s*[-–—]+\s*$/.test(s)) return fallback;
return s;
}
function parseArgs() {
const args = RAW_ARGS.filter(x => x !== "--debug");
if (args.length === 0) die("Usage: node scripts/generate-contributor-list.mjs <git-range> [-- <paths...>] [--debug]");
const dd = args.indexOf("--");
return { range: args[0], paths: dd === -1 ? [] : args.slice(dd + 1) };
}
function execGit(argv, opts = {}) {
try {
return execFileSync("git", argv, { encoding: "utf8", maxBuffer: 1024 * 1024 * 400, ...opts });
} catch (e) {
die(`git ${argv.join(" ")} failed: ${e.message}`);
}
}
function repoNameWithOwner() {
let url = "";
try { url = execGit(["remote", "get-url", "origin"]).trim(); } catch { }
const m = url.match(/github\.com[:/](?<owner>[^/]+)\/(?<repo>[^/]+?)(?:\.git)?$/i);
if (m?.groups) return `${m.groups.owner}/${m.groups.repo}`;
die("Could not determine GitHub repo from 'origin' remote.");
}
function revList(range, paths) {
const args = ["rev-list", range];
if (paths.length) args.push("--", ...paths);
const out = execGit(args);
return out.split(/\r?\n/).filter(Boolean);
}
function parseCoAuthorLines(message) {
const out = [];
const re = /^[ \t]*Co-authored-by:\s*(.+?)\s*<([^>]+)>/gim;
let m;
while ((m = re.exec(message))) out.push({ name: m[1].trim(), email: m[2].trim() });
return out;
}
function loginFromNoreply(email) {
const m = email.toLowerCase().match(/^(?:\d+\+)?([a-z0-9-]+)@users\.noreply\.github\.com$/i);
return m ? m[1] : "";
}
function candidateHandlesFromEmailAndName(email, name) {
const cands = new Set();
const local = email.split("@")[0];
const bare = local.replace(/[._]/g, "");
const bareNoDigits = bare.replace(/\d+$/, "");
cands.add(bare); cands.add(bareNoDigits);
const parts = local.split(/[._-]+/).filter(Boolean);
if (parts.length >= 2) {
const first = parts[0], last = parts[parts.length - 1];
cands.add(`${first}${last}`);
cands.add(`${first}-${last}`);
cands.add(`${first}_${last}`);
cands.add(`${first[0]}${last}`);
if (last.length >= 3) cands.add(`${first}${last.slice(0, 3)}`);
}
const nameParts = name.split(/\s+/).filter(Boolean);
if (nameParts.length >= 2) {
const f = nameParts[0].replace(/[^A-Za-z0-9-]/g, "");
const l = nameParts[nameParts.length - 1].replace(/[^A-Za-z0-9-]/g, "");
if (f && l) {
cands.add(`${f}${l}`);
cands.add(`${f}-${l}`);
cands.add(`${f[0]}${l}`);
}
}
const q = name.match(/'([^']{1,39})'/); if (q) cands.add(q[1]);
const p = name.match(/\(([^) ]{1,39})\)/); if (p) cands.add(p[1]);
return Array.from(cands).filter(s => /^[A-Za-z0-9-]{2,39}$/.test(s));
}
// ---------- GraphQL
const REPO = repoNameWithOwner();
const [OWNER, NAME] = REPO.split("/");
function getToken() {
const env = process.env.GITHUB_TOKEN || process.env.GH_TOKEN || "";
if (env) return env;
try { return execFileSync("gh", ["auth", "token"], { encoding: "utf8" }).trim(); } catch { return ""; }
}
const TOKEN = getToken();
if (!TOKEN) console.error("Warning: no GITHUB_TOKEN/GH_TOKEN (or gh auth token). Resolution will be limited.");
async function graphql(query, variables) {
const body = JSON.stringify(variables ? { query, variables } : { query });
const options = {
hostname: "api.github.com",
path: "/graphql",
method: "POST",
headers: {
"User-Agent": "contributors-table-graphql",
"Authorization": `Bearer ${TOKEN}`,
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(body),
},
};
return await new Promise((resolve) => {
const req = https.request(options, (res) => {
let data = "";
res.setEncoding("utf8");
res.on("data", (c) => (data += c));
res.on("end", () => {
try {
const json = JSON.parse(data || "{}");
if (json.errors && DEBUG) console.error("GraphQL errors:", JSON.stringify(json.errors, null, 2));
resolve(json);
} catch { resolve({}); }
});
});
req.on("error", () => resolve({}));
req.write(body);
req.end();
});
}
// Batch fetch commit author + message for SHAs; count primary-author occurrences per login
async function fetchCommitsByOidBatch(oids) {
const out = new Map(); // oid -> { login | "", name, email, message }
const authorCount = new Map(); // login -> # of primary authored commits in range
const chunkSize = 40;
for (let i = 0; i < oids.length; i += chunkSize) {
const chunk = oids.slice(i, i + chunkSize);
const fields = chunk.map((oid, idx) => `
c${idx}: object(oid: "${oid}") {
... on Commit {
message
author { user { login } name email }
}
}`).join("\n");
const q = `query($owner:String!, $name:String!) { repository(owner:$owner, name:$name) { ${fields} } }`;
const res = await graphql(q, { owner: OWNER, name: NAME });
const repo = res?.data?.repository || {};
for (let idx = 0; idx < chunk.length; idx++) {
const node = repo[`c${idx}`];
if (!node) continue;
const info = {
login: node?.author?.user?.login || "",
name: node?.author?.name || "",
email: node?.author?.email || "",
message: node?.message || "",
};
out.set(chunk[idx], info);
const L = info.login;
if (L) authorCount.set(L, (authorCount.get(L) || 0) + 1);
}
}
return { commitInfo: out, authorCount };
}
// GraphQL user search helpers (users only)
async function searchUsersByNameExact(name) {
if (!TOKEN) return "";
const queryStr = `"${name.replace(/"/g, '\\"')}" in:name type:user`;
const q = `query($q:String!){ search(type: USER, query: $q, first: 25) { nodes { ... on User { login name } } } }`;
const r = await graphql(q, { q: queryStr });
const nodes = r?.data?.search?.nodes ?? [];
const target = normalizeName(name);
for (const it of nodes) {
if (!it?.login) continue;
if (normalizeName(it.name || "") === target) return it.login;
}
return "";
}
async function searchUsersByLoginToken(token) {
if (!TOKEN) return "";
const q = `query($q:String!){ search(type: USER, query: $q, first: 5) { nodes { ... on User { login name } } } }`;
const r = await graphql(q, { q: `${token} in:login type:user` });
const items = r?.data?.search?.nodes ?? [];
if (items.length === 1) return items[0]?.login || "";
return "";
}
async function fetchProfileNames(logins) {
const out = new Map();
const chunkSize = 40;
for (let i = 0; i < logins.length; i += chunkSize) {
const chunk = logins.slice(i, i + chunkSize);
const fields = chunk.map((login, idx) => `u${idx}: user(login: "${login}") { login name }`).join("\n");
const q = `query { ${fields} }`;
const r = await graphql(q);
const data = r?.data || {};
for (let idx = 0; idx < chunk.length; idx++) {
const u = data[`u${idx}`];
out.set(chunk[idx], (u?.name || "").trim());
}
}
return out;
}
// ---------- main
async function main() {
const { range, paths } = parseArgs();
const shas = revList(range, paths);
if (!shas.length) die("No commits in the specified range/path.");
// 1) Commit info + primary author counts
const { commitInfo, authorCount } = await fetchCommitsByOidBatch(shas);
// 2) Collect authors and co-authors
const loginBestName = new Map(); // login -> name hint
const pool = []; // [{ name, email }] to resolve (co-authors + primaries with missing login)
for (const sha of shas) {
const info = commitInfo.get(sha);
if (!info) continue;
const { login, name, email, message } = info;
if (login) {
loginBestName.set(login, pickBetterName(loginBestName.get(login) || "", name));
} else {
const guess = loginFromNoreply(email);
if (guess) loginBestName.set(guess, pickBetterName(loginBestName.get(guess) || "", name));
else pool.push({ name, email });
}
for (const ca of parseCoAuthorLines(message)) pool.push(ca);
}
// 3) Resolve pool (GraphQL users search only)
const emailToLogin = new Map(); // emailLower -> login
const concurrency = 8;
let idx = 0;
async function worker() {
while (idx < pool.length) {
const i = idx++;
const { name, email } = pool[i];
const ekey = toLower(email);
if (emailToLogin.has(ekey)) continue;
let login = loginFromNoreply(email);
if (!login) login = await searchUsersByNameExact(name);
if (!login) {
const cands = candidateHandlesFromEmailAndName(email, name);
for (const cand of cands) {
const solo = await searchUsersByLoginToken(cand);
if (solo) { login = solo; break; }
}
}
if (!login && DEBUG) logd(`Unresolved: "${name}" <${email}>`);
emailToLogin.set(ekey, login || "");
if (login) loginBestName.set(login, pickBetterName(loginBestName.get(login) || "", name));
if (i % 10 === 0) await sleep(60);
}
}
await Promise.all(Array.from({ length: concurrency }, worker));
// 4) Build candidate rows (resolved only), fetch profile names
const resolvedLogins = Array.from(loginBestName.keys());
const profileNames = await fetchProfileNames(resolvedLogins);
const candidates = resolvedLogins.map(login => {
const prof = (profileNames.get(login) || "").trim();
const hint = (loginBestName.get(login) || "").trim();
const display = sanitizeDisplayName(prof || hint || login, prof || login);
return { login, display, authorCommits: authorCount.get(login) || 0 };
});
// 5) Collapse duplicate people with the same display name
const byDisplay = new Map(); // normName -> best candidate
const score = (x) => (x.authorCommits > 0 ? 2 : 0) + (x.display.toLowerCase() !== x.login.toLowerCase() ? 1 : 0);
for (const c of candidates) {
const key = normalizeName(c.display);
if (!byDisplay.has(key)) { byDisplay.set(key, c); continue; }
const cur = byDisplay.get(key);
if (score(c) > score(cur) || (score(c) === score(cur) && c.login.toLowerCase() < cur.login.toLowerCase())) {
if (DEBUG) logd(`Collapsed duplicate "${c.display}": keeping ${c.login} over ${cur.login}`);
byDisplay.set(key, c);
}
}
const resolvedRows = Array.from(byDisplay.values())
.filter((v, i, arr) => arr.findIndex(x => x.login.toLowerCase() === v.login.toLowerCase()) === i)
.map(({ display, login }) => ({ name: display, gh: `[@${login}](https://github.com/${login})`, login }));
// 6) Unmatched → show email (dedupe by name+email)
const unmatched = [];
const seenUnk = new Set();
for (const { name, email } of pool) {
const login = emailToLogin.get(toLower(email));
if (login) continue;
const nm = sanitizeDisplayName(name || "(Unknown)", name || "(Unknown)");
const key = normalizeName(nm) + "|" + email.toLowerCase();
if (seenUnk.has(key)) continue;
seenUnk.add(key);
unmatched.push({ name: nm, gh: "", login: "" });
}
// 7) Merge, sort, output
const allRows = [...resolvedRows, ...unmatched];
allRows.sort((a, b) => cmp(a.name, b.name));
console.log("| Author | Github");
console.log("| ----------------------------- | ---------------------------------");
for (const r of allRows) {
console.log(`| ${r.name} | ${r.gh}`);
}
}
main().catch((e) => die(String(e)));