From de9c3d74300147d4d601d0668c9e2d7da6c7953e Mon Sep 17 00:00:00 2001 From: tdurieux Date: Mon, 6 Sep 2021 16:20:59 +0200 Subject: [PATCH] fix: fix migration script --- migrateDB.ts | 345 +++++++++++++++++++++++++-------------------------- 1 file changed, 167 insertions(+), 178 deletions(-) diff --git a/migrateDB.ts b/migrateDB.ts index aaab23f..2734724 100644 --- a/migrateDB.ts +++ b/migrateDB.ts @@ -31,198 +31,187 @@ async function connect(db) { const oldDB = await connect("anonymous_github"); console.log("Import Users"); - await new Promise(async (resolve) => { - const promises = []; - await oldDB - .collection("users") - .find() - .batchSize(1) - .forEach(async (r) => { - let localResolve = null; - const p = new Promise((r) => (localResolve = r)); - promises.push(p); - console.log("Import User: " + r.username); + let index = 0; + const userQuery = oldDB.collection("users").find(); + const totalUser = await userQuery.count(); - const newRepos = []; - const allRepoIds = []; - if (r.repositories) { - const finds = await RepositoryModel.find({ - externalId: { - $in: r.repositories.map((repo) => "gh_" + repo.id), - }, - }).select("externalId"); - finds.forEach((f) => allRepoIds.push(f.id)); + while (await userQuery.hasNext()) { + const r = await userQuery.next(); + index++; + console.log(`Import User [${index}/${totalUser}]: ${r.username}`); - const repoIds = new Set(); - const toInsert = r.repositories.filter((f) => { - if (repoIds.has(f.id)) return false; - repoIds.add(f.id); - const externalId = "gh_" + f.id; - return finds.filter((f) => f.externalId == externalId).length == 0; - }); + const newRepos = []; + const allRepoIds = []; + if (r.repositories) { + const finds = await RepositoryModel.find({ + externalId: { + $in: r.repositories.map((repo) => "gh_" + repo.id), + }, + }).select("externalId"); + finds.forEach((f) => allRepoIds.push(f.id)); - for (const repo of toInsert) { - newRepos.push( - new RepositoryModel({ - externalId: "gh_" + repo.id, - name: repo.full_name, - url: repo.html_url, - size: repo.size, - defaultBranch: repo.default_branch, - }) - ); - } - if (newRepos.length > 0) { - await RepositoryModel.insertMany(newRepos); - } - newRepos.forEach((f) => allRepoIds.push(f.id)); - } - const user = new UserModel({ - accessTokens: { - github: r.accessToken, - }, - externalIDs: { - github: r.profile.id, - }, - username: r.username, - emails: r.profile.emails?.map((email) => { - return { email: email.value, default: false }; - }), - photo: r.profile.photos[0]?.value, - repositories: allRepoIds, - default: { - terms: r.default?.terms, - options: r.default?.options, - }, - }); - if (user.emails?.length) user.emails[0].default = true; - - await user.save(); - - localResolve(user); + const repoIds = new Set(); + const toInsert = r.repositories.filter((f) => { + if (repoIds.has(f.id)) return false; + repoIds.add(f.id); + const externalId = "gh_" + f.id; + return finds.filter((f) => f.externalId == externalId).length == 0; }); - Promise.all(promises).then(resolve); - }); + + for (const repo of toInsert) { + newRepos.push( + new RepositoryModel({ + externalId: "gh_" + repo.id, + name: repo.full_name, + url: repo.html_url, + size: repo.size, + defaultBranch: repo.default_branch, + }) + ); + } + if (newRepos.length > 0) { + await RepositoryModel.insertMany(newRepos); + } + newRepos.forEach((f) => allRepoIds.push(f.id)); + } + const user = new UserModel({ + accessTokens: { + github: r.accessToken, + }, + externalIDs: { + github: r.profile.id, + }, + username: r.username, + emails: r.profile.emails?.map((email) => { + return { email: email.value, default: false }; + }), + photo: r.profile.photos[0]?.value, + repositories: allRepoIds, + default: { + terms: r.default?.terms, + options: r.default?.options, + }, + }); + if (user.emails?.length) user.emails[0].default = true; + + await user.save(); + } console.log("Import Repositories"); - let promises = []; - await oldDB - .collection("repositories") - .find({}) - .batchSize(1) - .forEach(async (r) => { - if (!r.id) return; - let localResolve = null; - const p = new Promise((r) => (localResolve = r)); - promises.push(p); + const repoQuery = oldDB.collection("repositories").find(); + const totalRepository = await repoQuery.count(); + index = 0; + while (await repoQuery.hasNext()) { + const r = await repoQuery.next(); + if (!r.id) continue; + index++; + console.log( + `Import Repository [${index}/${totalRepository}]: ${r.fullName}` + ); - let find = await RepositoryModel.findOne({ + let find = await RepositoryModel.findOne({ + externalId: "gh_" + r.id, + }); + + if (find == null) { + find = new RepositoryModel({ externalId: "gh_" + r.id, + name: r.fullName, + url: r.html_url, + size: r.size, + defaultBranch: r.default_branch, }); - // console.log("gh_" + r.id, find != null); - if (find == null) { - find = new RepositoryModel({ - externalId: "gh_" + r.id, - name: r.fullName, - url: r.html_url, - size: r.size, - defaultBranch: r.default_branch, - }); - } - if (r.branches) { - const branches = [...Object.values(r.branches)].map((b: any) => { - const o: any = { name: b.name, commit: b.commit.sha }; - if (b.name == find.defaultBranch) { - o.readme = r.readme; - } - return o; - }); - find.branches = branches; - } - await find.save(); - localResolve(); - }); - await Promise.all(promises); + } + if (r.branches) { + const branches = [...Object.values(r.branches)].map((b: any) => { + const o: any = { name: b.name, commit: b.commit.sha }; + if (b.name == find.defaultBranch) { + o.readme = r.readme; + } + return o; + }); + find.branches = branches; + } + await find.save(); + } + console.log("Import Anonymized Repositories"); - promises = []; - await oldDB - .collection("anonymized_repositories") - .find({}) - .forEach(async (r) => { - let localResolve = null; - const p = new Promise((r) => (localResolve = r)); - promises.push(p); + const anoQuery = oldDB.collection("anonymized_repositories").find(); + const totalAno = await anoQuery.count(); + index = 0; + while (await anoQuery.hasNext()) { + const r = await anoQuery.next(); - let repo = await RepositoryModel.findOne({ name: r.fullName }); - if (repo == null) { - const tmp = await oldDB - .collection("repositories") - .findOne({ fullName: r.fullName }); - if (tmp) { - repo = await RepositoryModel.findOne({ externalId: "gh_" + tmp.id }); - } else { - console.error(`Repository ${r.fullName} is not found (renamed)`); + index++; + console.log( + `Import Anonymized Repository [${index}/${totalAno}]: ${r.repoId}` + ); + + let repo = await RepositoryModel.findOne({ name: r.fullName }); + if (repo == null) { + const tmp = await oldDB + .collection("repositories") + .findOne({ fullName: r.fullName }); + if (tmp) { + repo = await RepositoryModel.findOne({ externalId: "gh_" + tmp.id }); + } else { + console.error(`Repository ${r.fullName} is not found (renamed)`); + } + } + let size = { storage: 0, file: 0 }; + function recursiveCount(files) { + const out = { storage: 0, file: 0 }; + for (const name in files) { + const file = files[name]; + if (file.size && file.sha && parseInt(file.size) == file.size) { + out.storage += file.size as number; + out.file++; + } else if (typeof file == "object") { + const r = recursiveCount(file); + out.storage += r.storage; + out.file += r.file; } } - let size = { storage: 0, file: 0 }; - function recursiveCount(files) { - const out = { storage: 0, file: 0 }; - for (const name in files) { - const file = files[name]; - if (file.size && file.sha && parseInt(file.size) == file.size) { - out.storage += file.size as number; - out.file++; - } else if (typeof file == "object") { - const r = recursiveCount(file); - out.storage += r.storage; - out.file += r.file; - } - } - return out; - } + return out; + } - if (r.originalFiles) { - size = recursiveCount(r.originalFiles); - } - const owner = await UserModel.findOne({ username: r.owner }).select( - "_id" - ); - await new AnonymizedRepositoryModel({ - repoId: r.repoId, - status: r.status, - anonymizeDate: r.anonymizeDate, - lastView: r.lastView, - pageView: r.pageView, - owner: owner?.id, - size, - source: { - accessToken: r.token, - type: - r.options.mode == "download" ? "GitHubDownload" : "GitHubStream", - branch: r.branch, - commit: r.commit, - repositoryId: repo?.id, - repositoryName: r.fullName, - }, - options: { - terms: r.terms, - expirationMode: r.options.expirationMode, - expirationDate: r.options.expirationDate - ? new Date(r.options.expirationDate) - : null, - update: r.options.update, - image: r.options.image, - pdf: r.options.pdf, - notebook: r.options.notebook, - loc: r.options.loc, - link: r.options.link, - page: r.options.page, - pageSource: r.options.pageSource, - }, - }).save(); - localResolve(); - }); - await Promise.all(promises); + if (r.originalFiles) { + size = recursiveCount(r.originalFiles); + } + const owner = await UserModel.findOne({ username: r.owner }).select("_id"); + await new AnonymizedRepositoryModel({ + repoId: r.repoId, + status: r.status, + anonymizeDate: r.anonymizeDate, + lastView: r.lastView, + pageView: r.pageView, + owner: owner?.id, + size, + source: { + accessToken: r.token, + type: r.options.mode == "download" ? "GitHubDownload" : "GitHubStream", + branch: r.branch, + commit: r.commit, + repositoryId: repo?.id, + repositoryName: r.fullName, + }, + options: { + terms: r.terms, + expirationMode: r.options.expirationMode, + expirationDate: r.options.expirationDate + ? new Date(r.options.expirationDate) + : null, + update: r.options.update, + image: r.options.image, + pdf: r.options.pdf, + notebook: r.options.notebook, + loc: r.options.loc, + link: r.options.link, + page: r.options.page, + pageSource: r.options.pageSource, + }, + }).save(); + } console.log("Import finished!"); setTimeout(() => process.exit(), 5000); })();