From 976f14189ef4baa391e665d59987e90f49f5ca90 Mon Sep 17 00:00:00 2001 From: wowawiwa Date: Sat, 6 Oct 2018 17:20:13 +0200 Subject: [PATCH 1/8] Comment --- fetchRepos.js | 2 ++ impl/githubV4.js | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fetchRepos.js b/fetchRepos.js index cf9acb26b..c13b84211 100755 --- a/fetchRepos.js +++ b/fetchRepos.js @@ -109,9 +109,11 @@ optional arguments: } spinner.succeed(`Found ${Object.keys(repoPaths).length} repos in DB`); + // We iterate over referencedRepos and not repoPath because new repositories might have been created. for (const repoFullName of referencedRepos) { await fetchRepo(repoFullName, firsttime); } + stripUnreferencedRepos(); for (const repoFullName in repoPaths) { diff --git a/impl/githubV4.js b/impl/githubV4.js index b73cccf9e..d264c82c9 100644 --- a/impl/githubV4.js +++ b/impl/githubV4.js @@ -54,7 +54,8 @@ query($owner: String!, $name: String!) { } ` const repo = async (oraSpinner, errCodes, repoFullName, repoFetchedAtDate) => { - // TODO use repoFetchedAtDate + // TODO: Use repoFetchedAtDate + // TODO: This doesn't fetch "organization" as v3 does. Check if it's a problem. const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', oraSpinner, errCodes, null, { query: repoQuery, From 28fa0fc17b2b34b56f2edfab445a3f7a4681ff2a Mon Sep 17 00:00:00 2001 From: wowawiwa Date: Sat, 6 Oct 2018 20:09:43 +0200 Subject: [PATCH 2/8] Refactor- Call stripUnreferencedRepos() sooner --- fetchRepos.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fetchRepos.js b/fetchRepos.js index c13b84211..95b36f845 100755 --- a/fetchRepos.js +++ b/fetchRepos.js @@ -109,13 +109,13 @@ optional arguments: } spinner.succeed(`Found ${Object.keys(repoPaths).length} repos in DB`); + stripUnreferencedRepos(); + // We iterate over referencedRepos and not repoPath because new repositories might have been created. for (const repoFullName of referencedRepos) { await fetchRepo(repoFullName, firsttime); } - stripUnreferencedRepos(); - for (const repoFullName in repoPaths) { const repo = new DbFile(repoPaths[repoFullName].repo); if (!repo.removed_from_github && !repo.ghuser_insignificant) { From 30a75aeafcc51471a6af059aa9fe3e40fa641310 Mon Sep 17 00:00:00 2001 From: wowawiwa Date: Mon, 8 Oct 2018 23:50:23 +0200 Subject: [PATCH 3/8] // --- fetchRepos.js | 175 ++++++++++++++++++++++------------------ impl/github.js | 10 +-- impl/githubV3.js | 25 +++--- impl/githubV4.js | 205 +++++++++++++++++++++++++++-------------------- 4 files changed, 237 insertions(+), 178 deletions(-) diff --git a/fetchRepos.js b/fetchRepos.js index 95b36f845..82a1c9a33 100755 --- a/fetchRepos.js +++ b/fetchRepos.js @@ -13,9 +13,8 @@ const DbFile = require('./impl/dbFile'); const fetchJson = require('./impl/fetchJson'); - const ghAPIVersion = process.env.GHUSER_GHV4 === "true" ? 4 : 3 - console.log(`Using GitHub APIv${ghAPIVersion}`) - const ghcl = require(`./impl/githubV${ghAPIVersion}`); + const ghclV3 = require(`./impl/githubV3`); + const ghclV4 = require(`./impl/githubV4`); const githubColors = require('github-colors'); const scriptUtils = require('./impl/scriptUtils'); @@ -47,10 +46,7 @@ optional arguments: return; async function fetchRepos(firsttime) { - let spinner; - - let spinnerText = 'Reading users from DB...'; - spinner = ora(spinnerText).start(); + console.log('Reading users from DB...') const users = []; for (const file of fs.readdirSync(data.users)) { await sleep(0); // make loop interruptible @@ -59,14 +55,12 @@ optional arguments: const user = new DbFile(path.join(data.users, file)); if (!user.ghuser_deleted_because && !user.removed_from_github) { users.push(user); - spinner.text = `${spinnerText} [${users.length}]`; } } } - spinner.succeed(`Found ${users.length} users in DB`); + console.log(`Found ${users.length} users in DB`); - spinnerText = 'Searching repos referenced by users...'; - spinner = ora(spinnerText).start(); + console.log('Searching repos referenced by users...'); const referencedRepos = new Set([]); for (const user of users) { for (const repo in (user.contribs && user.contribs.repos || [])) { @@ -77,8 +71,6 @@ optional arguments: throw `user.contribs.repos[${repo}] is undefined`; } referencedRepos.add(full_name); - spinner.text = `${spinnerText} [${referencedRepos.size}]`; - // Make sure the corresponding repo files exist: for (const pathToFolder of [data.repos, data.repoCommits]) { const filePath = path.join(pathToFolder, `${full_name}.json`); @@ -86,10 +78,9 @@ optional arguments: } } } - spinner.succeed(`Found ${referencedRepos.size} repos referenced by users`); + console.log(`Found ${referencedRepos.size} repos referenced by users`); - spinnerText = 'Reading repos from DB...'; - spinner = ora(spinnerText).start(); + console.log('Reading repos from DB...'); const repoPaths = {}; for (const ownerDir of fs.readdirSync(data.repos)) { const pathToOwner = path.join(data.repos, ownerDir); @@ -103,79 +94,88 @@ optional arguments: repo: path.join(pathToOwner, file), repoCommits: path.join(data.repoCommits, ownerDir, file) }; - spinner.text = `${spinnerText} [${Object.keys(repoPaths).length}]`; } } } - spinner.succeed(`Found ${Object.keys(repoPaths).length} repos in DB`); + console.log(`Found ${Object.keys(repoPaths).length} repos in DB`); stripUnreferencedRepos(); - // We iterate over referencedRepos and not repoPath because new repositories might have been created. - for (const repoFullName of referencedRepos) { - await fetchRepo(repoFullName, firsttime); - } + let full_names; - for (const repoFullName in repoPaths) { - const repo = new DbFile(repoPaths[repoFullName].repo); + full_names = [...referencedRepos]; + await Promise.all([ + loopDo(full_names, async (full_name) => { await fetchRepo(ghclV3, full_name, firsttime)}), + loopDo(full_names, async (full_name) => { await fetchRepo(ghclV4, full_name, firsttime)}), + ]); + + full_names = Object.keys(repoPaths); + await Promise.all([ + loopDo(full_names, async (full_name) => { await fetchRepoDetails(ghclV3, repoPaths[full_name])}), + loopDo(full_names, async (full_name) => { await fetchRepoDetails(ghclV4, repoPaths[full_name])}), + ]); + + createRenamedRepos(); + + return; + + async function fetchRepoDetails(ghcl, paths) { + const repo = new DbFile(paths.repo); if (!repo.removed_from_github && !repo.ghuser_insignificant) { - const repoCommits = new DbFile(repoPaths[repoFullName].repoCommits); - await fetchRepoCommitsAndContributors(repo, repoCommits); - await fetchRepoPullRequests(repo); - if (ghAPIVersion === 3) { - await fetchRepoLanguages(repo); - } + const repoCommits = new DbFile(paths.repoCommits); + await fetchRepoCommitsAndContributors(ghcl, repo, repoCommits); + await fetchRepoPullRequests(ghcl, repo); + await fetchRepoLanguages(ghcl, repo); await fetchRepoSettings(repo); markRepoAsFullyFetched(repo); } - } + }; - createRenamedRepos(); - - return; + async function fetchRepo(ghcl, repoFullName, firsttime) { + const tag = `[${ghcl.version}] Fetch Repo - ${repoFullName} -`; - async function fetchRepo(repoFullName, firsttime) { - spinner = ora(`Fetching ${repoFullName}...`).start(); + console.log(`${tag} starting`); const repo = new DbFile(repoPaths[repoFullName].repo); + // repo.gh_api_version = ghcl.version; const now = new Date; const maxAgeHours = firsttime && (24 * 365) || 12; if (repo.fetching_since || repo.fetched_at && now - Date.parse(repo.fetched_at) < maxAgeHours * 60 * 60 * 1000) { - spinner.succeed(`${repoFullName} is still fresh`); + console.log(`${tag} is still fresh`); return; } if (repo.removed_from_github) { // For now ok, but maybe some day we'll have to deal with resurrected repos. - spinner.succeed(`${repoFullName} was removed from GitHub in the past`); + console.log(`${tag} was removed from GitHub in the past`); return; } - const ghDataJson = await ghcl.repo(spinner, [304, 404, 451], repoFullName, new Date(repo.fetched_at)) + const ghDataJson = await ghcl.repo([304, 404, 451], repoFullName, new Date(repo.fetched_at)); switch (ghDataJson) { case 304: - repo.fetched_at = now.toISOString();; - spinner.succeed(`${repoFullName} didn't change`); + repo.fetched_at = now.toISOString(); + console.log(`${tag} didn't change`); repo.write(); return; case 404: repo.removed_from_github = true; - spinner.succeed(`${repoFullName} was removed from GitHub`); + console.log(`${tag} was removed from GitHub`); repo.write(); return; case 451: // Unavailable for legal reasons // Probably a DCMA takedown, like https://github.com/worktips/worktips repo.removed_from_github = true; - spinner.succeed(`${repoFullName} is blocked for legal reasons`); + console.log(`${tag} is blocked for legal reasons`); repo.write(); return; } - repo.fetching_since = now.toISOString();; + repo.fetching_since = now.toISOString(); - spinner.succeed(`Fetched ${repoFullName}`); + console.log(`${tag} finished`); ghDataJson.owner = ghDataJson.owner.login; Object.assign(repo, ghDataJson); @@ -225,20 +225,23 @@ optional arguments: } } - async function fetchRepoCommitsAndContributors(repo, repoCommits) { + async function fetchRepoCommitsAndContributors(ghcl, repo, repoCommits) { + const tag = `[${ghcl.version}] Fetch Commits & Contribs - ${repo.full_name} -`; + repo.contributors = repo.contributors || {}; + // repo.gh_api_version_constributors = ghcl.version; + repoCommits.version_constributors = ghcl.version; repoCommits.contributors = repoCommits.contributors || {}; repoCommits.last_fetched_commit = repoCommits.last_fetched_commit || { sha: null, date: '2000-01-01T00:00:00Z' }; - const spinnerText = `Fetching ${repo.full_name}'s commits...`; - spinner = ora(spinnerText).start(); + console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && new Date(repo.fetched_at) > new Date(repo.pushed_at)) { - spinner.succeed(`${repo.full_name} hasn't changed`); + console.log(`${tag} hasn't changed`); return; } @@ -248,15 +251,15 @@ optional arguments: const perPage = 100; pages: for (let page = 1;; ++page) { - spinner.start(`${spinnerText} [page ${page}]`); - const ghDataJson = await ghcl.commits(spinner, [404, 500], repo.full_name, repoCommits.last_fetched_commit.date, page, perPage, ghAPIV4Cursor); - ghAPIV4Cursor = ghDataJson[0] ? ghDataJson[0].cursor : undefined + console.log(`${tag} [page ${page}]`); + const ghDataJson = await ghcl.commits([404, 500], repo.full_name, repoCommits.last_fetched_commit.date, page, perPage, ghAPIV4Cursor); + ghAPIV4Cursor = ghDataJson[0] ? ghDataJson[0].cursor : undefined; switch (ghDataJson) { case 404: // The repo has been removed during the current run. It will be marked as removed in the // next run. For now just don't crash. - spinner.succeed(`${repo.full_name} was just removed from GitHub`); + console.log(`${tag} was just removed from GitHub`); return; case 500: // Workaround for #8 if (page > 1000) { @@ -264,7 +267,7 @@ optional arguments: repoCommits.ghuser_truncated = true; break pages; } - spinner.fail(); + console.log(`${tag} failed.`); return; } @@ -326,24 +329,27 @@ optional arguments: } if (page >= 10000) { - spinner.fail(); + console.log(`${tag} failed`); throw 'fetchRepoCommitsAndContributors(): Infinite loop?'; } } - spinner.succeed(`Fetched ${repo.full_name}'s commits`); + console.log(`${tag} finished`); repo.write(); repoCommits.last_fetched_commit = mostRecentCommit || repoCommits.last_fetched_commit; repoCommits.write(); } - async function fetchRepoPullRequests(repo) { - spinner = ora(`Fetching ${repo.full_name}'s pull requests...`).start(); + async function fetchRepoPullRequests(ghcl, repo) { + const tag = `[${ghcl.version}] Fetch PRs - ${repo.full_name} -`; + + // repo.gh_api_version_pulls = ghcl.version; + console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && new Date(repo.fetched_at) > new Date(repo.pushed_at)) { - spinner.succeed(`${repo.full_name} hasn't changed`); + console.log(`${tag} hasn't changed`); return; } @@ -352,17 +358,17 @@ optional arguments: let ghAPIV4Cursor; const perPage = 100; for (let page = 1;; ++page) { - const ghDataJson = await ghcl.pullRequests(spinner, [404, 500], repo.full_name, page, perPage, ghAPIV4Cursor); - ghAPIV4Cursor = ghDataJson[0] ? ghDataJson[0].cursor : undefined + const ghDataJson = await ghcl.pullRequests([404, 500], repo.full_name, page, perPage, ghAPIV4Cursor); + ghAPIV4Cursor = ghDataJson[0] ? ghDataJson[0].cursor : undefined; switch (ghDataJson) { case 404: // The repo has been removed during the current run. It will be marked as removed in the // next run. For now just don't crash. - spinner.succeed(`${repo.full_name} was just removed from GitHub`); + console.log(`${tag} was just removed from GitHub`); return; case 500: // Workaround for #8 - spinner.fail(); + console.log(`${tag} failed`); return; } @@ -382,35 +388,38 @@ optional arguments: } if (page >= 10000) { - spinner.fail(); + console.log(`${tag} failed`); throw 'fetchRepoPullRequests(): Infinite loop?'; } } - spinner.succeed(`Fetched ${repo.full_name}'s pull requests`); + console.log(`${tag} finished`); repo.pulls_authors = [...authors]; repo.write(); } - async function fetchRepoLanguages(repo) { - spinner = ora(`Fetching ${repo.full_name}'s languages...`).start(); + async function fetchRepoLanguages(ghcl, repo) { + const tag = `[${ghcl.version}] Fetch Languages - ${repo.full_name} -`; + + // repo.gh_api_version_languages = ghcl.version; + console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && new Date(repo.fetched_at) > new Date(repo.pushed_at)) { - spinner.succeed(`${repo.full_name} hasn't changed`); + console.log(`${tag} hasn't changed`); return; } - const ghDataJson = await ghcl.repoLanguages(spinner, [404], repo.full_name); + const ghDataJson = await ghcl.repoLanguages([404], repo.full_name); if (ghDataJson === 404) { // The repo has been removed during the current run. It will be marked as removed in the // next run. For now just don't crash. - spinner.succeed(`${repo.full_name} was just removed from GitHub`); + console.log(`${tag} was just removed from GitHub`); return; } - spinner.succeed(`Fetched ${repo.full_name}'s languages`); + console.log(`${tag} finished`); for (let language in ghDataJson) { ghDataJson[language] = { @@ -424,28 +433,30 @@ optional arguments: } async function fetchRepoSettings(repo) { - spinner = ora(`Fetching ${repo.full_name}'s settings...`).start(); + const tag = `Fetch Settings - ${repo.full_name} -`; + + console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && new Date(repo.fetched_at) > new Date(repo.pushed_at)) { - spinner.succeed(`${repo.full_name} hasn't changed`); + console.log(`${tag} hasn't changed`); return; } for (const fileName of ['.ghuser.io.json', '.github/ghuser.io.json']) { const url = `https://raw.githubusercontent.com/${repo.full_name}/master/${fileName}`; - const dataJson = await fetchJson(url, spinner, [404]); + const dataJson = await fetchJson(url, null, [404]); if (dataJson === 404) { continue; } - spinner.succeed(`Fetched ${repo.full_name}'s settings`); + console.log(`${tag} finished`); repo.settings = dataJson; repo.write(); return; } - spinner.succeed(`${repo.full_name} has no settings`); + console.log(`${tag} has no settings`); } function markRepoAsFullyFetched(repo) { @@ -489,3 +500,15 @@ optional arguments: } })(); + +const loopDo = async (jobs, jobFn) => { + while(true) { + const job = jobs.pop(); + + if (!job) { + break + } + + await jobFn(job); + } +}; diff --git a/impl/github.js b/impl/github.js index a07fa626a..030873e03 100755 --- a/impl/github.js +++ b/impl/github.js @@ -57,12 +57,12 @@ // Returns GitHub's rate limit object for reference. async function waitForRateLimit(oraSpinner, isGraphQL) { const oldSpinnerText = oraSpinner && oraSpinner.text; + const key = isGraphQL ? "graphql" : "core"; let rateLimit = await fetchGHRateLimit(oraSpinner); - const lim = rateLimit[isGraphQL ? "graphql" : "core"] - if (lim.remaining <= 10) { + if (rateLimit[key].remaining <= 10) { const now = (new Date).getTime() / 1000; - const secondsToSleep = Math.ceil(lim.reset - now) + 1; + const secondsToSleep = Math.ceil(rateLimit[key].reset - now) + 1; if (secondsToSleep >= 0) { if (oraSpinner) { oraSpinner.text += ` (waiting ${secondsToSleep} second(s) for API rate limit)`; @@ -70,10 +70,10 @@ await sleep(secondsToSleep * 1000); rateLimit = await fetchGHRateLimit(oraSpinner); - if (lim.remaining <= 10) { + if (rateLimit[key].remaining <= 10) { console.error('\nAPI rate limit is still low:'); console.error(rateLimit); - console.error(`next reset in ${Math.ceil(lim.reset - ((new Date).getTime() / 1000))} seconds(s)`); + console.error(`next reset in ${Math.ceil(rateLimit[key].reset - ((new Date).getTime() / 1000))} seconds(s)`); throw 'API rate limit is still low after waiting'; } diff --git a/impl/githubV3.js b/impl/githubV3.js index 869145dd3..6980700c7 100644 --- a/impl/githubV3.js +++ b/impl/githubV3.js @@ -5,27 +5,30 @@ const gh = require('./github'); - const repo = async (oraSpinner, errCodes, repoFullName, repoFetchedAtDate) => { + const version = "V3"; + + const repo = async (errCodes, repoFullName, repoFetchedAtDate) => { const ghRepoUrl = `https://api.github.com/repos/${repoFullName}`; - return await gh.fetchGHJson(ghRepoUrl, oraSpinner, errCodes, repoFetchedAtDate); + return await gh.fetchGHJson(ghRepoUrl, null, errCodes, repoFetchedAtDate); }; - const commits = async (oraSpinner, errCodes, repoFullName, lastFetchedCommitDateStr, page, perPage) => { + const commits = async (errCodes, repoFullName, lastFetchedCommitDateStr, page, perPage) => { const ghUrl = `https://api.github.com/repos/${repoFullName}/commits?since=${lastFetchedCommitDateStr}&page=${page}&per_page=${perPage}`; - return await gh.fetchGHJson(ghUrl, oraSpinner, errCodes); - } + return await gh.fetchGHJson(ghUrl, null, errCodes); + }; - const pullRequests = async (oraSpinner, errCodes, repoFullName, page, perPage) => { + const pullRequests = async (errCodes, repoFullName, page, perPage) => { const ghUrl = `https://api.github.com/repos/${repoFullName}/pulls?state=all&page=${page}&per_page=${perPage}`; - return await gh.fetchGHJson(ghUrl, oraSpinner, errCodes); - } + return await gh.fetchGHJson(ghUrl, null, errCodes); + }; - const repoLanguages = async (oraSpinner, errCodes, repoFullName) => { + const repoLanguages = async (errCodes, repoFullName) => { const ghUrl = `https://api.github.com/repos/${repoFullName}/languages`; - return await gh.fetchGHJson(ghUrl, oraSpinner, errCodes); - } + return await gh.fetchGHJson(ghUrl, null, errCodes); + }; module.exports = { + version, repo, commits, pullRequests, diff --git a/impl/githubV4.js b/impl/githubV4.js index d264c82c9..dc34caa5f 100644 --- a/impl/githubV4.js +++ b/impl/githubV4.js @@ -6,6 +6,8 @@ const gh = require('./github'); + const version = "V4"; + const repoQuery = ` query($owner: String!, $name: String!) { repository(owner: $owner, name: $name) { @@ -14,6 +16,7 @@ query($owner: String!, $name: String!) { isPrivate owner { login + __typename } url description @@ -41,79 +44,62 @@ query($owner: String!, $name: String!) { defaultBranchRef { name } - languages(first: 100) { - edges { - size - node { - color - name - } - } - } } } -` - const repo = async (oraSpinner, errCodes, repoFullName, repoFetchedAtDate) => { +`; + // TODO: This sets "organization" field if appropriate, but as an empty object. + const repo = async (errCodes, repoFullName, repoFetchedAtDate) => { // TODO: Use repoFetchedAtDate - // TODO: This doesn't fetch "organization" as v3 does. Check if it's a problem. - const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', oraSpinner, errCodes, null, { + const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', null, errCodes, null, { query: repoQuery, variables: buildCommonRepoVariables(repoFullName), - }) + }); - if (!(dataJson instanceof Object)) { - return dataJson - } - if (dataJson.errors) { - switch (dataJson.errors[0].type) { - case "NOT_FOUND": - return 404 - } + const err = checkResponse(dataJson); + if (err != null) { + return err } - const r = dataJson.data.repository + const r = dataJson.data.repository; - let res = {} - res.name = r.name - res.full_name = r.nameWithOwner - res.private = r.isPrivate - res.owner = r.owner.login - res.html_url = r.url - res.description = r.description - res.fork = r.isFork - res.url = "https://api.github.com/repos/" + r.nameWithOwner - res.languages_url = "https://api.github.com/repos/" + r.nameWithOwner + "/languages" - res.pulls_url = "https://api.github.com/repos/" + r.nameWithOwner + "/pulls{/number}" + let res = {}; + res.name = r.name; + res.full_name = r.nameWithOwner; + res.private = r.isPrivate; + res.owner = r.owner.login; + res.html_url = r.url; + res.description = r.description; + res.fork = r.isFork; + res.url = "https://api.github.com/repos/" + r.nameWithOwner; + res.languages_url = "https://api.github.com/repos/" + r.nameWithOwner + "/languages"; + res.pulls_url = "https://api.github.com/repos/" + r.nameWithOwner + "/pulls{/number}"; // format: "2015-09-10T02:15:47Z" - res.created_at = coerceDate(r.createdAt) - res.updated_at = coerceDate(r.updatedAt) - res.pusher_at = coerceDate(r.pushedAt) - - res.homepage = r.homepageUrl // TODO verify expected URL - res.size = r.diskUsage - res.stargazers_count = r.stargazers.totalCount - res.language = r.primaryLanguage.name - res.mirror_url = r.mirrorUrl - res.archived = r.isArchived - res.default_branch = r.defaultBranchRef.name + res.created_at = coerceDate(r.createdAt); + res.updated_at = coerceDate(r.updatedAt); + res.pusher_at = coerceDate(r.pushedAt); + + res.homepage = r.homepageUrl; // TODO verify expected URL + res.size = r.diskUsage; + res.stargazers_count = r.stargazers.totalCount; + res.language = r.primaryLanguage.name; + res.mirror_url = r.mirrorUrl; + res.archived = r.isArchived; if (r.licenseInfo) { - res.license = {} - res.license.key = r.licenseInfo.key - res.license.name = r.licenseInfo.name - res.license.spdx_id = r.licenseInfo.spdxId - res.license.url = r.licenseInfo.url - res.license.node_id = r.licenseInfo.id + res.license = {}; + res.license.key = r.licenseInfo.key; + res.license.name = r.licenseInfo.name; + res.license.spdx_id = r.licenseInfo.spdxId; + res.license.url = r.licenseInfo.url; + res.license.node_id = r.licenseInfo.id; } - res.languages = {} - for (let it of r.languages.edges) { - res.languages[it.node.name] = { - bytes: it.size, - color: it.node.color, - } + res.default_branch = r.defaultBranchRef.name; + + if (r.owner.__typename === "Organization") { + res.organization = {}; } return res @@ -150,26 +136,26 @@ query($owner: String!, $name: String!, $cursor: String, $since: GitTimestamp) { } } } -` - const commits = async (oraSpinner, errCodes, repoFullName, lastFetchedCommitDateStr, page, perPage, v4cursor = null) => { +`; + const commits = async (errCodes, repoFullName, lastFetchedCommitDateStr, page, perPage, v4cursor = null) => { - let variables = buildCommonRepoVariables(repoFullName, page, v4cursor) - variables.since = lastFetchedCommitDateStr - const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', oraSpinner, errCodes, null, { + let variables = buildCommonRepoVariables(repoFullName, page, v4cursor); + variables.since = lastFetchedCommitDateStr; + const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', null, errCodes, null, { query: commitsQuery, variables: variables, - }) + }); if (!(dataJson instanceof Object)) { return dataJson } - let edges = dataJson.data.repository.ref.target.history.edges + let edges = dataJson.data.repository.ref.target.history.edges; - let res = [] + let res = []; for (let e of edges) { - const author = e.node.author - const committer = e.node.committer + const author = e.node.author; + const committer = e.node.committer; res.push({ sha: e.node.oid, @@ -190,7 +176,7 @@ query($owner: String!, $name: String!, $cursor: String, $since: GitTimestamp) { }) } - insertCursor(res, edges) + insertCursor(res, edges); return res }; @@ -211,21 +197,21 @@ query($owner: String!, $name: String!, $cursor: String) { } } } -` - const pullRequests = async (oraSpinner, errCodes, repoFullName, page, perPage, v4cursor = null) => { +`; + const pullRequests = async (errCodes, repoFullName, page, perPage, v4cursor = null) => { - const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', oraSpinner, errCodes, null, { + const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', null, errCodes, null, { query: pullRequestsQuery, variables: buildCommonRepoVariables(repoFullName, page, v4cursor), - }) + }); if (!(dataJson instanceof Object)) { return dataJson } - let edges = dataJson.data.repository.pullRequests.edges + let edges = dataJson.data.repository.pullRequests.edges; - let res = [] + let res = []; for (let e of edges) { res.push({ user: { @@ -234,31 +220,64 @@ query($owner: String!, $name: String!, $cursor: String) { }) } - insertCursor(res, edges) + insertCursor(res, edges); return res - } + }; - const repoLanguages = async () => { - throw "unexpected call to repoLanguages" - return + const repoLanguagesQuery = ` +query($owner: String!, $name: String!) { + repository(owner: $owner, name: $name) { + languages(first: 100) { + edges { + size + node { + name + } + } + } } +} +`; + const repoLanguages = async (errCodes, repoFullName) => { + + const dataJson = await gh.fetchGHJson('https://api.github.com/graphql', null, errCodes, null, { + query: repoLanguagesQuery, + variables: buildCommonRepoVariables(repoFullName), + }); + + const err = checkResponse(dataJson); + if (err != null) { + return err + } + + const r = dataJson.data.repository; + + let res = {}; + + for (let it of r.languages.edges) { + res[it.node.name] = it.size + } + + return res + }; module.exports = { + version, repo, commits, pullRequests, repoLanguages, - } + }; function buildCommonRepoVariables(repoFullName, page, cursor) { - let owner, repoName - [owner, repoName] = repoFullName.split("/") + let owner, repoName; + [owner, repoName] = repoFullName.split("/"); let variables = { owner: owner, name: repoName, - } + }; if (isNaN(page) || page === 1) { return variables @@ -268,7 +287,7 @@ query($owner: String!, $name: String!, $cursor: String) { throw "expected cursor not null" } - variables.cursor = cursor + variables.cursor = cursor; return variables } @@ -277,8 +296,7 @@ query($owner: String!, $name: String!, $cursor: String) { // Pass the cursor in the first element of the response. function insertCursor(resultArray, edgesArray) { if (resultArray.length > 0 && edgesArray.length > 0) { - const cursor = edgesArray.slice(-1)[0].cursor - console.log(cursor) + const cursor = edgesArray.slice(-1)[0].cursor; resultArray[0].cursor = cursor } } @@ -290,4 +308,19 @@ query($owner: String!, $name: String!, $cursor: String) { return (new Date(dateStr)).toISOString() } + function checkResponse(dataJson) { + if (!(dataJson instanceof Object)) { + return 500 + } + if (dataJson.errors) { + switch (dataJson.errors[0].type) { + case "NOT_FOUND": + return 404; + default: + return 500; + } + } + return null; + } + })(); From 72b00ab697b8e5618623b98eebf62ecaea42c9e6 Mon Sep 17 00:00:00 2001 From: wowawiwa Date: Thu, 11 Oct 2018 22:11:42 +0200 Subject: [PATCH 4/8] Handle V4 error on no language --- impl/githubV4.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impl/githubV4.js b/impl/githubV4.js index dc34caa5f..3bf8f7333 100644 --- a/impl/githubV4.js +++ b/impl/githubV4.js @@ -83,7 +83,7 @@ query($owner: String!, $name: String!) { res.homepage = r.homepageUrl; // TODO verify expected URL res.size = r.diskUsage; res.stargazers_count = r.stargazers.totalCount; - res.language = r.primaryLanguage.name; + res.language = r.primaryLanguage ? r.primaryLanguage.name : null; res.mirror_url = r.mirrorUrl; res.archived = r.isArchived; From 1956c4aabb67c08d0a72cb97c2727787ab2c8ad9 Mon Sep 17 00:00:00 2001 From: wowawiwa Date: Thu, 11 Oct 2018 22:18:31 +0200 Subject: [PATCH 5/8] Fix v4 bugs --- impl/githubV4.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/impl/githubV4.js b/impl/githubV4.js index 3bf8f7333..cd997bf86 100644 --- a/impl/githubV4.js +++ b/impl/githubV4.js @@ -67,7 +67,9 @@ query($owner: String!, $name: String!) { res.name = r.name; res.full_name = r.nameWithOwner; res.private = r.isPrivate; - res.owner = r.owner.login; + res.owner = { + login: r.owner.login + }; res.html_url = r.url; res.description = r.description; res.fork = r.isFork; @@ -78,7 +80,7 @@ query($owner: String!, $name: String!) { // format: "2015-09-10T02:15:47Z" res.created_at = coerceDate(r.createdAt); res.updated_at = coerceDate(r.updatedAt); - res.pusher_at = coerceDate(r.pushedAt); + res.pushed_at = coerceDate(r.pushedAt); res.homepage = r.homepageUrl; // TODO verify expected URL res.size = r.diskUsage; From 4dc5df919da71f8ff2b9a0a7eec1987cad721ad7 Mon Sep 17 00:00:00 2001 From: Aurelien Lourot Date: Sat, 20 Oct 2018 15:55:21 +0200 Subject: [PATCH 6/8] Removed API version from DB. --- fetchRepos.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fetchRepos.js b/fetchRepos.js index 82a1c9a33..c0c1b72a3 100755 --- a/fetchRepos.js +++ b/fetchRepos.js @@ -136,7 +136,6 @@ optional arguments: console.log(`${tag} starting`); const repo = new DbFile(repoPaths[repoFullName].repo); - // repo.gh_api_version = ghcl.version; const now = new Date; const maxAgeHours = firsttime && (24 * 365) || 12; @@ -229,8 +228,6 @@ optional arguments: const tag = `[${ghcl.version}] Fetch Commits & Contribs - ${repo.full_name} -`; repo.contributors = repo.contributors || {}; - // repo.gh_api_version_constributors = ghcl.version; - repoCommits.version_constributors = ghcl.version; repoCommits.contributors = repoCommits.contributors || {}; repoCommits.last_fetched_commit = repoCommits.last_fetched_commit || { sha: null, @@ -344,7 +341,6 @@ optional arguments: async function fetchRepoPullRequests(ghcl, repo) { const tag = `[${ghcl.version}] Fetch PRs - ${repo.full_name} -`; - // repo.gh_api_version_pulls = ghcl.version; console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && @@ -402,7 +398,6 @@ optional arguments: async function fetchRepoLanguages(ghcl, repo) { const tag = `[${ghcl.version}] Fetch Languages - ${repo.full_name} -`; - // repo.gh_api_version_languages = ghcl.version; console.log(`${tag} starting`); if (!repo.fetching_since || repo.fetched_at && From 33296bf507123cfc8f6d3d39d7c4c8b990073bb8 Mon Sep 17 00:00:00 2001 From: Aurelien Lourot Date: Sat, 20 Oct 2018 19:05:45 +0200 Subject: [PATCH 7/8] Harmonized traces. --- fetchRepos.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetchRepos.js b/fetchRepos.js index c0c1b72a3..096fb7831 100755 --- a/fetchRepos.js +++ b/fetchRepos.js @@ -264,7 +264,7 @@ optional arguments: repoCommits.ghuser_truncated = true; break pages; } - console.log(`${tag} failed.`); + console.log(`${tag} failed`); return; } From 413033e0c1199fc979a2645f94a3d9bfd3e271fa Mon Sep 17 00:00:00 2001 From: Aurelien Lourot Date: Thu, 15 Nov 2018 15:13:59 +0100 Subject: [PATCH 8/8] Made linter happy. --- impl/data.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impl/data.js b/impl/data.js index 36f1c7999..7614bfd04 100755 --- a/impl/data.js +++ b/impl/data.js @@ -8,7 +8,7 @@ const dbPath = process.env.GHUSER_DBDIR || path.join(os.homedir(), 'data'); if (!fs.existsSync(dbPath)) { - throw `${dbPath} directory doesn't exist` + throw `${dbPath} directory doesn't exist`; } module.exports = {