From 1ffbf2fedd18175c5ed78bd4f4983dcf890c1e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Mon, 17 Oct 2022 22:17:05 +0200 Subject: [PATCH 1/3] Optimization: cache results between projects Towards https://github.com/sourcegraph/scip-typescript/issues/175 Previously, scip-typescript didn't cache anything at all between TypeScript projects. This commit implements an optimization so that we now cache the results of loading source files and parsing options. Benchmarks against the sourcegraph/sourcegraph repo indicate this optimization consistently speeds up the `index` command in all three multi-project repositories that I tested it with. - sourcegraph/sourcegraph: ~30% from ~100s to ~70s - nextautjs/next-auth: ~40% from 6.5s to 3.9 - xtermjs/xterm.js: ~45% from 7.3s to 4.1s For every repo, I additionally validated that the resulting index.scip has identical checksum before and after applying this optimization. Given these promising results, this new optimization is enabled by default, but can be disabled with the option `--no-global-cache`. *Test plan* Manually tested by running `scip-typescript index tsconfig.all.json` in the sourcegraph/sourcegraph repository. To benchmark the difference for this PR: - Checkout the code - Run `yarn tsc -b` - Go to the directory of your project - Run `node PATH_TO_SCIP_TYPESCRIPT/dist/src/main.js` - Copy the "optimized" index.scip with `cp index.scip index-withcache.scip` - Run `node PATH_TO_SCIP_TYPESCRIPT/dist/src/main.js --no-global-caches` - Validate the checksum is identical from the optimized output `shasum -a 256 *.scip` --- src/CommandLineOptions.ts | 12 +++++- src/ProjectIndexer.ts | 86 ++++++++++++++++++++++++++------------- src/main.test.ts | 1 + src/main.ts | 39 ++++++++++++------ 4 files changed, 96 insertions(+), 42 deletions(-) diff --git a/src/CommandLineOptions.ts b/src/CommandLineOptions.ts index d57e2db6..0c9e6700 100644 --- a/src/CommandLineOptions.ts +++ b/src/CommandLineOptions.ts @@ -1,4 +1,6 @@ import { Command } from 'commander' +// eslint-disable-next-line id-length +import ts from 'typescript' import packageJson from '../package.json' @@ -10,6 +12,7 @@ export interface MultiProjectOptions { progressBar: boolean yarnWorkspaces: boolean yarnBerryWorkspaces: boolean + globalCaches: boolean cwd: string output: string indexedProjects: Set @@ -22,6 +25,12 @@ export interface ProjectOptions extends MultiProjectOptions { writeIndex: (index: lsif.lib.codeintel.lsiftyped.Index) => void } +/** Cached values */ +export interface GlobalCache { + sources: Map + parsedCommandLines: Map +} + export function mainCommand( indexAction: (projects: string[], otpions: MultiProjectOptions) => void ): Command { @@ -47,7 +56,8 @@ export function mainCommand( false ) .option('--output ', 'path to the output file', 'index.scip') - .option('--no-progress-bar', 'whether to disable the progress bar') + .option('--progress-bar', 'whether to enable a rich progress bar') + .option('--no-global-caches', 'whether to disable global caches between TypeScript projects') .argument('[projects...]') .action((parsedProjects, parsedOptions) => { indexAction( diff --git a/src/ProjectIndexer.ts b/src/ProjectIndexer.ts index 3fe9c023..33cc8b99 100644 --- a/src/ProjectIndexer.ts +++ b/src/ProjectIndexer.ts @@ -1,17 +1,53 @@ import * as path from 'path' -import { Writable as WritableStream } from 'stream' import prettyMilliseconds from 'pretty-ms' import ProgressBar from 'progress' import * as ts from 'typescript' -import { ProjectOptions } from './CommandLineOptions' +import { GlobalCache, ProjectOptions } from './CommandLineOptions' import { FileIndexer } from './FileIndexer' import { Input } from './Input' import * as lsif from './lsif' import { LsifSymbol } from './LsifSymbol' import { Packages } from './Packages' +function createCompilerHost( + cache: GlobalCache, + compilerOptions: ts.CompilerOptions, + projectOptions: ProjectOptions +): ts.CompilerHost { + const host = ts.createCompilerHost(compilerOptions) + if (!projectOptions.globalCaches) { + return host + } + const hostCopy = { ...host } + host.getParsedCommandLine = (fileName: string) => { + if (!hostCopy.getParsedCommandLine) { + return undefined + } + if (cache.parsedCommandLines.has(fileName)) { + return cache.parsedCommandLines.get(fileName) + } + const result = hostCopy.getParsedCommandLine(fileName) + if (result !== undefined) { + cache.parsedCommandLines.set(fileName, result) + } + return result + } + host.getSourceFile = (fileName, languageVersion) => { + const fromCache = cache.sources.get(fileName) + if (fromCache !== undefined) { + return fromCache + } + const result = hostCopy.getSourceFile(fileName, languageVersion) + if (result !== undefined) { + cache.sources.set(fileName, result) + } + return result + } + return host +} + export class ProjectIndexer { private options: ProjectOptions private program: ts.Program @@ -20,10 +56,12 @@ export class ProjectIndexer { private packages: Packages constructor( public readonly config: ts.ParsedCommandLine, - options: ProjectOptions + options: ProjectOptions, + cache: GlobalCache ) { this.options = options - this.program = ts.createProgram(config.fileNames, config.options) + const host = createCompilerHost(cache, config.options, options) + this.program = ts.createProgram(config.fileNames, config.options, host) this.checker = this.program.getTypeChecker() this.packages = new Packages(options.projectRoot) } @@ -47,24 +85,24 @@ export class ProjectIndexer { ) } - const jobs = new ProgressBar( - ` ${this.options.projectDisplayName} [:bar] :current/:total :title`, - { - total: filesToIndex.length, - renderThrottle: 100, - incomplete: '_', - complete: '#', - width: 20, - clear: true, - stream: this.options.progressBar - ? process.stderr - : writableNoopStream(), - } - ) + const jobs: ProgressBar | undefined = !this.options.progressBar + ? undefined + : new ProgressBar( + ` ${this.options.projectDisplayName} [:bar] :current/:total :title`, + { + total: filesToIndex.length, + renderThrottle: 100, + incomplete: '_', + complete: '#', + width: 20, + clear: true, + stream: process.stderr, + } + ) let lastWrite = startTimestamp for (const [index, sourceFile] of filesToIndex.entries()) { const title = path.relative(this.options.cwd, sourceFile.fileName) - jobs.tick({ title }) + jobs?.tick({ title }) if (!this.options.progressBar) { const now = Date.now() const elapsed = now - lastWrite @@ -102,7 +140,7 @@ export class ProjectIndexer { ) } } - jobs.terminate() + jobs?.terminate() const elapsed = Date.now() - startTimestamp if (!this.options.progressBar && lastWrite > startTimestamp) { process.stdout.write('\n') @@ -112,11 +150,3 @@ export class ProjectIndexer { ) } } - -function writableNoopStream(): WritableStream { - return new WritableStream({ - write(_unused1, _unused2, callback) { - setImmediate(callback) - }, - }) -} diff --git a/src/main.test.ts b/src/main.test.ts index c0a2a528..41c42b60 100644 --- a/src/main.test.ts +++ b/src/main.test.ts @@ -54,6 +54,7 @@ for (const snapshotDirectory of snapshotDirectories) { yarnBerryWorkspaces: false, progressBar: false, indexedProjects: new Set(), + globalCaches: true }) if (inferTsconfig) { fs.rmSync(tsconfigJsonPath) diff --git a/src/main.ts b/src/main.ts index 64edd440..baca11ba 100644 --- a/src/main.ts +++ b/src/main.ts @@ -9,6 +9,7 @@ import * as ts from 'typescript' import packageJson from '../package.json' import { + GlobalCache, mainCommand, MultiProjectOptions, ProjectOptions, @@ -48,6 +49,11 @@ export function indexCommand( documentCount += index.documents.length fs.writeSync(output, index.serializeBinary()) } + + const cache: GlobalCache = { + sources: new Map(), + parsedCommandLines: new Map(), + } try { writeIndex( new lsiftyped.Index({ @@ -67,12 +73,15 @@ export function indexCommand( // they can have dependencies. for (const projectRoot of projects) { const projectDisplayName = projectRoot === '.' ? options.cwd : projectRoot - indexSingleProject({ - ...options, - projectRoot, - projectDisplayName, - writeIndex, - }) + indexSingleProject( + { + ...options, + projectRoot, + projectDisplayName, + writeIndex, + }, + cache + ) } } finally { fs.close(output) @@ -96,10 +105,11 @@ function makeAbsolutePath(cwd: string, relativeOrAbsolutePath: string): string { return path.resolve(cwd, relativeOrAbsolutePath) } -function indexSingleProject(options: ProjectOptions): void { +function indexSingleProject(options: ProjectOptions, cache: GlobalCache): void { if (options.indexedProjects.has(options.projectRoot)) { return } + options.indexedProjects.add(options.projectRoot) let config = ts.parseCommandLine( ['-p', options.projectRoot], @@ -125,15 +135,18 @@ function indexSingleProject(options: ProjectOptions): void { } for (const projectReference of config.projectReferences || []) { - indexSingleProject({ - ...options, - projectRoot: projectReference.path, - projectDisplayName: projectReference.path, - }) + indexSingleProject( + { + ...options, + projectRoot: projectReference.path, + projectDisplayName: projectReference.path, + }, + cache + ) } if (config.fileNames.length > 0) { - new ProjectIndexer(config, options).index() + new ProjectIndexer(config, options, cache).index() } } From d7329f6a5d813058c126b526e115508699750186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Mon, 17 Oct 2022 22:48:02 +0200 Subject: [PATCH 2/3] Fix failing tests --- src/CommandLineOptions.test.ts | 1 - src/CommandLineOptions.ts | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CommandLineOptions.test.ts b/src/CommandLineOptions.test.ts index 30af8db5..496399f6 100644 --- a/src/CommandLineOptions.test.ts +++ b/src/CommandLineOptions.test.ts @@ -24,7 +24,6 @@ function checkIndexParser( // defaults checkIndexParser([], { - progressBar: true, cwd: process.cwd(), inferTsconfig: false, output: 'index.scip', diff --git a/src/CommandLineOptions.ts b/src/CommandLineOptions.ts index 0c9e6700..8751daff 100644 --- a/src/CommandLineOptions.ts +++ b/src/CommandLineOptions.ts @@ -57,6 +57,8 @@ export function mainCommand( ) .option('--output ', 'path to the output file', 'index.scip') .option('--progress-bar', 'whether to enable a rich progress bar') + .option('--no-progress-bar', 'whether to disable the rich progress bar') + .option('--global-caches', 'whether to disable global caches between TypeScript projects') .option('--no-global-caches', 'whether to disable global caches between TypeScript projects') .argument('[projects...]') .action((parsedProjects, parsedOptions) => { From e732a03b44821ec3a265126a1bb454b50daaa38a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Tue, 18 Oct 2022 08:42:53 +0200 Subject: [PATCH 3/3] Address review comments --- src/CommandLineOptions.test.ts | 2 ++ src/CommandLineOptions.ts | 11 +++++-- src/ProjectIndexer.ts | 54 ++++++++++++++++++++++++++++++---- src/main.test.ts | 2 +- 4 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/CommandLineOptions.test.ts b/src/CommandLineOptions.test.ts index 496399f6..ccb018cc 100644 --- a/src/CommandLineOptions.test.ts +++ b/src/CommandLineOptions.test.ts @@ -34,3 +34,5 @@ checkIndexParser(['--cwd', 'qux'], { cwd: 'qux' }) checkIndexParser(['--yarn-workspaces'], { yarnWorkspaces: true }) checkIndexParser(['--infer-tsconfig'], { inferTsconfig: true }) checkIndexParser(['--no-progress-bar'], { progressBar: false }) +checkIndexParser(['--progress-bar'], { progressBar: true }) +checkIndexParser(['--no-global-caches'], { globalCaches: false }) diff --git a/src/CommandLineOptions.ts b/src/CommandLineOptions.ts index 8751daff..2ddc0662 100644 --- a/src/CommandLineOptions.ts +++ b/src/CommandLineOptions.ts @@ -27,7 +27,10 @@ export interface ProjectOptions extends MultiProjectOptions { /** Cached values */ export interface GlobalCache { - sources: Map + sources: Map< + string, + [ts.SourceFile | undefined, ts.ScriptTarget | ts.CreateSourceFileOptions] + > parsedCommandLines: Map } @@ -58,8 +61,10 @@ export function mainCommand( .option('--output ', 'path to the output file', 'index.scip') .option('--progress-bar', 'whether to enable a rich progress bar') .option('--no-progress-bar', 'whether to disable the rich progress bar') - .option('--global-caches', 'whether to disable global caches between TypeScript projects') - .option('--no-global-caches', 'whether to disable global caches between TypeScript projects') + .option( + '--no-global-caches', + 'whether to disable global caches between TypeScript projects' + ) .argument('[projects...]') .action((parsedProjects, parsedOptions) => { indexAction( diff --git a/src/ProjectIndexer.ts b/src/ProjectIndexer.ts index 33cc8b99..3ce158e0 100644 --- a/src/ProjectIndexer.ts +++ b/src/ProjectIndexer.ts @@ -25,23 +25,43 @@ function createCompilerHost( if (!hostCopy.getParsedCommandLine) { return undefined } - if (cache.parsedCommandLines.has(fileName)) { - return cache.parsedCommandLines.get(fileName) + const fromCache = cache.parsedCommandLines.get(fileName) + if (fromCache !== undefined) { + return fromCache } const result = hostCopy.getParsedCommandLine(fileName) if (result !== undefined) { + // Don't cache undefined results even if they could be cached + // theoretically. The big performance gains from this cache come from + // caching non-undefined results. cache.parsedCommandLines.set(fileName, result) } return result } - host.getSourceFile = (fileName, languageVersion) => { + host.getSourceFile = ( + fileName, + languageVersion, + onError, + shouldCreateNewSourceFile + ) => { const fromCache = cache.sources.get(fileName) if (fromCache !== undefined) { - return fromCache + const [sourceFile, cachedLanguageVersion] = fromCache + if (isSameLanguageVersion(languageVersion, cachedLanguageVersion)) { + return sourceFile + } } - const result = hostCopy.getSourceFile(fileName, languageVersion) + const result = hostCopy.getSourceFile( + fileName, + languageVersion, + onError, + shouldCreateNewSourceFile + ) if (result !== undefined) { - cache.sources.set(fileName, result) + // Don't cache undefined results even if they could be cached + // theoretically. The big performance gains from this cache come from + // caching non-undefined results. + cache.sources.set(fileName, [result, languageVersion]) } return result } @@ -150,3 +170,25 @@ export class ProjectIndexer { ) } } + +function isSameLanguageVersion( + a: ts.ScriptTarget | ts.CreateSourceFileOptions, + b: ts.ScriptTarget | ts.CreateSourceFileOptions +): boolean { + if (typeof a === 'number' && typeof b === 'number') { + return a === b + } + if (typeof a === 'number' || typeof b === 'number') { + // Different shape: one is ts.ScriptTarget, the other is + // ts.CreateSourceFileOptions + return false + } + return ( + a.languageVersion === b.languageVersion && + a.impliedNodeFormat === b.impliedNodeFormat + // Ignore setExternalModuleIndicator even if that increases the risk of a + // false positive. A local experiment revealed that we never get a cache hit + // if we compare setExternalModuleIndicator since it's function with a + // unique reference on every `CompilerHost.getSourceFile` callback. + ) +} diff --git a/src/main.test.ts b/src/main.test.ts index 41c42b60..e6b081b9 100644 --- a/src/main.test.ts +++ b/src/main.test.ts @@ -54,7 +54,7 @@ for (const snapshotDirectory of snapshotDirectories) { yarnBerryWorkspaces: false, progressBar: false, indexedProjects: new Set(), - globalCaches: true + globalCaches: true, }) if (inferTsconfig) { fs.rmSync(tsconfigJsonPath)