ripgrepTextSearch.ts 11.7 KB
Newer Older
R
Rob Lourens 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

R
💄  
Rob Lourens 已提交
7
import { EventEmitter } from 'events';
8
import * as path from 'path';
R
💄  
Rob Lourens 已提交
9

R
Rob Lourens 已提交
10 11 12
import * as cp from 'child_process';
import { rgPath } from 'vscode-ripgrep';

13
import * as strings from 'vs/base/common/strings';
14
import * as extfs from 'vs/base/node/extfs';
15
import * as encoding from 'vs/base/node/encoding';
R
Rob Lourens 已提交
16
import * as glob from 'vs/base/common/glob';
17
import { ILineMatch, ISearchLog } from 'vs/platform/search/common/search';
18
import { TPromise } from 'vs/base/common/winjs.base';
R
Rob Lourens 已提交
19

20
import { ISerializedFileMatch, ISerializedSearchComplete, IRawSearch } from './search';
R
Rob Lourens 已提交
21

22
export class RipgrepEngine {
R
Rob Lourens 已提交
23 24
	private isDone = false;
	private rgProc: cp.ChildProcess;
25
	private postProcessExclusions: glob.ParsedExpression;
R
Rob Lourens 已提交
26

R
💄  
Rob Lourens 已提交
27
	private ripgrepParser: RipgrepParser;
R
Rob Lourens 已提交
28

29
	private resultsHandledP: TPromise<any> = TPromise.wrap(null);
30

R
Rob Lourens 已提交
31
	constructor(private config: IRawSearch) {
R
Rob Lourens 已提交
32 33 34 35
	}

	cancel(): void {
		this.isDone = true;
R
💄  
Rob Lourens 已提交
36
		this.ripgrepParser.cancel();
R
Rob Lourens 已提交
37 38 39
		this.rgProc.kill();
	}

40
	// TODO@Rob - make promise-based once the old search is gone, and I don't need them to have matching interfaces anymore
41
	search(onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
R
Rob Lourens 已提交
42
		if (this.config.rootFolders.length) {
43
			this.searchFolder(this.config.rootFolders[0], onResult, onMessage, done);
R
💄  
Rob Lourens 已提交
44 45 46 47 48
		} else {
			done(null, {
				limitHit: false,
				stats: null
			});
R
Rob Lourens 已提交
49 50 51
		}
	}

52
	private searchFolder(rootFolder: string, onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
53
		const rgArgs = getRgArgs(this.config);
54 55 56
		if (rgArgs.siblingClauses) {
			this.postProcessExclusions = glob.parseToAsync(rgArgs.siblingClauses, { trimForExclusions: true });
		}
R
💄  
Rob Lourens 已提交
57

58 59
		process.nextTick(() => {
			// Allow caller to register progress callback
60
			const rgCmd = `rg ${rgArgs.args.join(' ')}\ncwd: ${rootFolder}\n`;
61 62
			onMessage({ message: rgCmd });
		});
R
💄  
Rob Lourens 已提交
63 64
		this.rgProc = cp.spawn(rgPath, rgArgs.args, { cwd: rootFolder });

65
		this.ripgrepParser = new RipgrepParser(this.config.maxResults, rootFolder);
66 67 68
		this.ripgrepParser.on('result', (match: ISerializedFileMatch) => {
			if (this.postProcessExclusions) {
				const relativePath = path.relative(rootFolder, match.path);
69
				const handleResultP = (<TPromise<string>>this.postProcessExclusions(relativePath, undefined, () => getSiblings(match.path)))
70 71 72 73 74
					.then(globMatch => {
						if (!globMatch) {
							onResult(match);
						}
					});
75 76

				this.resultsHandledP = TPromise.join([this.resultsHandledP, handleResultP]);
77 78 79 80
			} else {
				onResult(match);
			}
		});
R
💄  
Rob Lourens 已提交
81 82 83 84 85 86 87
		this.ripgrepParser.on('hitLimit', () => {
			this.cancel();
			done(null, {
				limitHit: true,
				stats: null
			});
		});
R
Rob Lourens 已提交
88 89

		this.rgProc.stdout.on('data', data => {
R
💄  
Rob Lourens 已提交
90
			this.ripgrepParser.handleData(data);
R
Rob Lourens 已提交
91 92
		});

93 94 95 96
		let gotData = false;
		this.rgProc.stdout.once('data', () => gotData = true);

		let stderr = '';
R
Rob Lourens 已提交
97
		this.rgProc.stderr.on('data', data => {
98 99 100
			const message = data.toString();
			onMessage({ message });
			stderr += message;
R
Rob Lourens 已提交
101 102 103
		});

		this.rgProc.on('close', code => {
104 105
			// Trigger last result, then wait on async result handling
			this.ripgrepParser.flush();
106
			this.resultsHandledP.then(() => {
107 108 109
				this.rgProc = null;
				if (!this.isDone) {
					this.isDone = true;
110 111 112 113 114 115 116 117 118 119 120
					if (stderr && this.shouldReturnErrorMsg(stderr) && !gotData) {
						done(new Error(stderr), {
							limitHit: false,
							stats: null
						});
					} else {
						done(null, {
							limitHit: false,
							stats: null
						});
					}
121 122
				}
			});
R
Rob Lourens 已提交
123 124
		});
	}
125 126 127 128

	private shouldReturnErrorMsg(msg: string): boolean {
		return strings.startsWith(msg, 'Error parsing regex');
	}
R
💄  
Rob Lourens 已提交
129 130 131 132 133 134
}

export class RipgrepParser extends EventEmitter {
	private static RESULT_REGEX = /^\u001b\[m(\d+)\u001b\[m:(.*)$/;
	private static FILE_REGEX = /^\u001b\[m(.+)\u001b\[m$/;

R
Rob Lourens 已提交
135 136
	public static MATCH_START_MARKER = '\u001b[m\u001b[31m';
	public static MATCH_END_MARKER = '\u001b[m';
R
💄  
Rob Lourens 已提交
137 138 139 140 141 142 143

	private fileMatch: FileMatch;
	private remainder: string;
	private isDone: boolean;

	private numResults = 0;

144
	constructor(private maxResults: number, private rootFolder: string) {
R
💄  
Rob Lourens 已提交
145 146
		super();
	}
R
Rob Lourens 已提交
147

R
💄  
Rob Lourens 已提交
148 149 150
	public cancel(): void {
		this.isDone = true;
	}
R
Rob Lourens 已提交
151

R
Rob Lourens 已提交
152 153 154 155 156 157
	public flush(): void {
		if (this.fileMatch) {
			this.onResult();
		}
	}

R
💄  
Rob Lourens 已提交
158
	public handleData(data: string | Buffer): void {
R
Rob Lourens 已提交
159
		// If the previous data chunk didn't end in a newline, prepend it to this chunk
R
💄  
Rob Lourens 已提交
160 161 162
		const dataStr = this.remainder ?
			this.remainder + data.toString() :
			data.toString();
R
Rob Lourens 已提交
163

R
💄  
Rob Lourens 已提交
164 165
		const dataLines: string[] = dataStr.split(/\r\n|\n/);
		this.remainder = dataLines[dataLines.length - 1] ? dataLines.pop() : null;
R
Rob Lourens 已提交
166

R
💄  
Rob Lourens 已提交
167 168 169 170
		for (let l = 0; l < dataLines.length; l++) {
			const outputLine = dataLines[l].trim();
			if (this.isDone) {
				break;
R
Rob Lourens 已提交
171 172
			}

R
💄  
Rob Lourens 已提交
173
			let r: RegExpMatchArray;
R
Rob Lourens 已提交
174
			if (r = outputLine.match(RipgrepParser.RESULT_REGEX)) {
R
💄  
Rob Lourens 已提交
175 176 177 178 179 180 181 182
				// Line is a result - add to collected results for the current file path
				this.handleMatchLine(outputLine, parseInt(r[1]) - 1, r[2]);
			} else if (r = outputLine.match(RipgrepParser.FILE_REGEX)) {
				// Line is a file path - send all collected results for the previous file path
				if (this.fileMatch) {
					this.onResult();
				}

183
				this.fileMatch = new FileMatch(path.join(this.rootFolder, r[1]));
R
💄  
Rob Lourens 已提交
184
			} else {
R
Rob Lourens 已提交
185
				// Line is empty (or malformed)
R
💄  
Rob Lourens 已提交
186 187 188 189 190 191 192 193 194 195 196 197 198 199
			}
		}
	}

	private handleMatchLine(outputLine: string, lineNum: number, text: string): void {
		const lineMatch = new LineMatch(text, lineNum);
		this.fileMatch.addMatch(lineMatch);

		let lastMatchEndPos = 0;
		let matchTextStartPos = -1;

		// Track positions with color codes subtracted - offsets in the final text preview result
		let matchTextStartRealIdx = -1;
		let textRealIdx = 0;
200
		let hitLimit = false;
R
💄  
Rob Lourens 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

		const realTextParts: string[] = [];

		for (let i = 0; i < text.length - (RipgrepParser.MATCH_END_MARKER.length - 1);) {
			if (text.substr(i, RipgrepParser.MATCH_START_MARKER.length) === RipgrepParser.MATCH_START_MARKER) {
				// Match start
				const chunk = text.slice(lastMatchEndPos, i);
				realTextParts.push(chunk);
				i += RipgrepParser.MATCH_START_MARKER.length;
				matchTextStartPos = i;
				matchTextStartRealIdx = textRealIdx;
			} else if (text.substr(i, RipgrepParser.MATCH_END_MARKER.length) === RipgrepParser.MATCH_END_MARKER) {
				// Match end
				const chunk = text.slice(matchTextStartPos, i);
				realTextParts.push(chunk);
216 217 218 219
				if (!hitLimit) {
					lineMatch.addMatch(matchTextStartRealIdx, textRealIdx - matchTextStartRealIdx);
				}

R
💄  
Rob Lourens 已提交
220 221 222 223 224 225
				matchTextStartPos = -1;
				matchTextStartRealIdx = -1;
				i += RipgrepParser.MATCH_END_MARKER.length;
				lastMatchEndPos = i;
				this.numResults++;

226 227
				// Check hit maxResults limit
				if (this.numResults >= this.maxResults) {
228 229
					// Finish the line, then report the result below
					hitLimit = true;
230
				}
R
Rob Lourens 已提交
231
			} else {
R
💄  
Rob Lourens 已提交
232 233
				i++;
				textRealIdx++;
R
Rob Lourens 已提交
234 235 236
			}
		}

R
💄  
Rob Lourens 已提交
237 238 239
		const chunk = text.slice(lastMatchEndPos);
		realTextParts.push(chunk);

240
		// Replace preview with version without color codes
R
💄  
Rob Lourens 已提交
241 242
		const preview = realTextParts.join('');
		lineMatch.preview = preview;
243 244 245 246 247 248

		if (hitLimit) {
			this.cancel();
			this.onResult();
			this.emit('hitLimit');
		}
R
💄  
Rob Lourens 已提交
249
	}
R
Rob Lourens 已提交
250

R
💄  
Rob Lourens 已提交
251 252 253 254
	private onResult(): void {
		this.emit('result', this.fileMatch.serialize());
		this.fileMatch = null;
	}
R
Rob Lourens 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
}

export class FileMatch implements ISerializedFileMatch {
	path: string;
	lineMatches: LineMatch[];

	constructor(path: string) {
		this.path = path;
		this.lineMatches = [];
	}

	addMatch(lineMatch: LineMatch): void {
		this.lineMatches.push(lineMatch);
	}

	isEmpty(): boolean {
		return this.lineMatches.length === 0;
	}

	serialize(): ISerializedFileMatch {
		let lineMatches: ILineMatch[] = [];
		let numMatches = 0;

		for (let i = 0; i < this.lineMatches.length; i++) {
			numMatches += this.lineMatches[i].offsetAndLengths.length;
			lineMatches.push(this.lineMatches[i].serialize());
		}

		return {
			path: this.path,
			lineMatches,
			numMatches
		};
	}
}

export class LineMatch implements ILineMatch {
	preview: string;
	lineNumber: number;
	offsetAndLengths: number[][];

	constructor(preview: string, lineNumber: number) {
		this.preview = preview.replace(/(\r|\n)*$/, '');
		this.lineNumber = lineNumber;
		this.offsetAndLengths = [];
	}

	getText(): string {
		return this.preview;
	}

	getLineNumber(): number {
		return this.lineNumber;
	}

	addMatch(offset: number, length: number): void {
		this.offsetAndLengths.push([offset, length]);
	}

	serialize(): ILineMatch {
		const result = {
			preview: this.preview,
			lineNumber: this.lineNumber,
			offsetAndLengths: this.offsetAndLengths
		};

		return result;
	}
R
💄  
Rob Lourens 已提交
323 324
}

325
function globExprsToRgGlobs(patterns: glob.IExpression): { globArgs: string[], siblingClauses: glob.IExpression } {
R
💄  
Rob Lourens 已提交
326
	const globArgs: string[] = [];
327
	let siblingClauses: glob.IExpression = null;
R
💄  
Rob Lourens 已提交
328 329 330 331 332 333 334 335 336 337 338
	Object.keys(patterns)
		.forEach(key => {
			const value = patterns[key];
			if (typeof value === 'boolean' && value) {
				// globs added to ripgrep don't match from the root by default, so add a /
				if (key.charAt(0) !== '*') {
					key = '/' + key;
				}

				globArgs.push(key);
			} else if (value && value.when) {
339 340 341 342 343
				if (!siblingClauses) {
					siblingClauses = {};
				}

				siblingClauses[key] = value;
R
💄  
Rob Lourens 已提交
344 345 346 347 348 349
			}
		});

	return { globArgs, siblingClauses };
}

350
function getRgArgs(config: IRawSearch): { args: string[], siblingClauses: glob.IExpression } {
R
Rob Lourens 已提交
351
	const args = ['--hidden', '--heading', '--line-number', '--color', 'ansi', '--colors', 'path:none', '--colors', 'line:none', '--colors', 'match:fg:red', '--colors', 'match:style:nobold'];
R
💄  
Rob Lourens 已提交
352 353 354 355 356 357 358 359 360
	args.push(config.contentPattern.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

	if (config.includePattern) {
		// I don't think includePattern can have siblingClauses
		globExprsToRgGlobs(config.includePattern).globArgs.forEach(globArg => {
			args.push('-g', globArg);
		});
	}

361
	let siblingClauses: glob.IExpression;
R
💄  
Rob Lourens 已提交
362 363 364 365 366 367 368 369 370 371 372
	if (config.excludePattern) {
		const rgGlobs = globExprsToRgGlobs(config.excludePattern);
		rgGlobs.globArgs
			.forEach(rgGlob => args.push('-g', `!${rgGlob}`));
		siblingClauses = rgGlobs.siblingClauses;
	}

	if (config.maxFilesize) {
		args.push('--max-filesize', config.maxFilesize + '');
	}

R
Rob Lourens 已提交
373
	if (config.disregardIgnoreFiles) {
R
Rob Lourens 已提交
374 375 376 377 378 379 380
		// Don't use .gitignore or .ignore
		args.push('--no-ignore');
	}

	// Follow symlinks
	args.push('--follow');

381
	// Set default encoding
R
Rob Lourens 已提交
382
	if (config.fileEncoding && config.fileEncoding !== 'utf8') {
383 384 385
		args.push('--encoding', encoding.toCanonicalName(config.fileEncoding));
	}

R
Rob Lourens 已提交
386
	let searchPatternAfterDoubleDashes: string;
387 388 389 390
	if (config.contentPattern.isWordMatch) {
		const regexp = strings.createRegExp(config.contentPattern.pattern, config.contentPattern.isRegExp, { wholeWord: config.contentPattern.isWordMatch });
		args.push('--regexp', regexp.source);
	} else if (config.contentPattern.isRegExp) {
R
💄  
Rob Lourens 已提交
391 392
		args.push('--regexp', config.contentPattern.pattern);
	} else {
393 394
		searchPatternAfterDoubleDashes = config.contentPattern.pattern;
		args.push('--fixed-strings');
R
💄  
Rob Lourens 已提交
395 396 397
	}

	// Folder to search
R
Rob Lourens 已提交
398 399 400 401 402 403 404 405
	args.push('--');

	if (searchPatternAfterDoubleDashes) {
		// Put the query after --, in case the query starts with a dash
		args.push(searchPatternAfterDoubleDashes);
	}

	args.push('./');
R
💄  
Rob Lourens 已提交
406 407 408

	return { args, siblingClauses };
}
409 410 411 412 413 414 415 416 417 418 419 420

function getSiblings(file: string): TPromise<string[]> {
	return new TPromise((resolve, reject) => {
		extfs.readdir(path.dirname(file), (error: Error, files: string[]) => {
			if (error) {
				reject(error);
			}

			resolve(files);
		});
	});
}