ripgrepTextSearch.ts 16.3 KB
Newer Older
R
Rob Lourens 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

R
💄  
Rob Lourens 已提交
7
import { EventEmitter } from 'events';
8
import * as path from 'path';
9
import { StringDecoder, NodeStringDecoder } from 'string_decoder';
R
💄  
Rob Lourens 已提交
10

R
Rob Lourens 已提交
11 12 13
import * as cp from 'child_process';
import { rgPath } from 'vscode-ripgrep';

R
Rob Lourens 已提交
14
import objects = require('vs/base/common/objects');
15
import platform = require('vs/base/common/platform');
16
import * as strings from 'vs/base/common/strings';
17
import * as paths from 'vs/base/common/paths';
18
import * as extfs from 'vs/base/node/extfs';
19
import * as encoding from 'vs/base/node/encoding';
R
Rob Lourens 已提交
20
import * as glob from 'vs/base/common/glob';
21
import { ILineMatch, ISearchLog } from 'vs/platform/search/common/search';
22
import { TPromise } from 'vs/base/common/winjs.base';
R
Rob Lourens 已提交
23

R
Rob Lourens 已提交
24
import { ISerializedFileMatch, ISerializedSearchComplete, IRawSearch, IFolderSearch } from './search';
R
Rob Lourens 已提交
25

26
export class RipgrepEngine {
R
Rob Lourens 已提交
27 28
	private isDone = false;
	private rgProc: cp.ChildProcess;
29
	private postProcessExclusions: glob.ParsedExpression;
R
Rob Lourens 已提交
30

R
💄  
Rob Lourens 已提交
31
	private ripgrepParser: RipgrepParser;
R
Rob Lourens 已提交
32

33
	private resultsHandledP: TPromise<any> = TPromise.wrap(null);
34

R
Rob Lourens 已提交
35
	constructor(private config: IRawSearch) {
R
Rob Lourens 已提交
36 37 38 39
	}

	cancel(): void {
		this.isDone = true;
R
💄  
Rob Lourens 已提交
40
		this.ripgrepParser.cancel();
R
Rob Lourens 已提交
41 42 43
		this.rgProc.kill();
	}

44
	// TODO@Rob - make promise-based once the old search is gone, and I don't need them to have matching interfaces anymore
45
	search(onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
46
		if (!this.config.folderQueries.length && !this.config.extraFiles.length) {
47 48 49 50 51 52 53
			done(null, {
				limitHit: false,
				stats: null
			});
			return;
		}

54
		const rgArgs = getRgArgs(this.config);
55 56 57
		if (rgArgs.siblingClauses) {
			this.postProcessExclusions = glob.parseToAsync(rgArgs.siblingClauses, { trimForExclusions: true });
		}
R
💄  
Rob Lourens 已提交
58

59
		const cwd = platform.isWindows ? 'c:/' : '/';
R
Rob Lourens 已提交
60
		process.nextTick(() => { // Allow caller to register progress callback
61
			const escapedArgs = rgArgs.globArgs
62 63 64
				.map(arg => arg.match(/^-/) ? arg : `'${arg}'`)
				.join(' ');

65
			const rgCmd = `rg ${escapedArgs}\n - cwd: ${cwd}\n`;
66
			onMessage({ message: rgCmd });
67 68 69
			if (rgArgs.siblingClauses) {
				onMessage({ message: ` - Sibling clauses: ${JSON.stringify(rgArgs.siblingClauses)}\n` });
			}
70
		});
71
		this.rgProc = cp.spawn(rgPath, rgArgs.globArgs, { cwd });
R
💄  
Rob Lourens 已提交
72

73
		this.ripgrepParser = new RipgrepParser(this.config.maxResults, cwd);
74 75
		this.ripgrepParser.on('result', (match: ISerializedFileMatch) => {
			if (this.postProcessExclusions) {
76
				const handleResultP = (<TPromise<string>>this.postProcessExclusions(match.path, undefined, () => getSiblings(match.path)))
77 78 79 80 81
					.then(globMatch => {
						if (!globMatch) {
							onResult(match);
						}
					});
82 83

				this.resultsHandledP = TPromise.join([this.resultsHandledP, handleResultP]);
84 85 86 87
			} else {
				onResult(match);
			}
		});
R
💄  
Rob Lourens 已提交
88 89 90 91 92 93 94
		this.ripgrepParser.on('hitLimit', () => {
			this.cancel();
			done(null, {
				limitHit: true,
				stats: null
			});
		});
R
Rob Lourens 已提交
95 96

		this.rgProc.stdout.on('data', data => {
R
💄  
Rob Lourens 已提交
97
			this.ripgrepParser.handleData(data);
R
Rob Lourens 已提交
98 99
		});

100 101 102 103
		let gotData = false;
		this.rgProc.stdout.once('data', () => gotData = true);

		let stderr = '';
R
Rob Lourens 已提交
104
		this.rgProc.stderr.on('data', data => {
105 106 107
			const message = data.toString();
			onMessage({ message });
			stderr += message;
R
Rob Lourens 已提交
108 109 110
		});

		this.rgProc.on('close', code => {
111 112
			// Trigger last result, then wait on async result handling
			this.ripgrepParser.flush();
113
			this.resultsHandledP.then(() => {
114 115 116
				this.rgProc = null;
				if (!this.isDone) {
					this.isDone = true;
117 118 119
					let displayMsg: string;
					if (stderr && !gotData && (displayMsg = this.rgErrorMsgForDisplay(stderr))) {
						done(new Error(displayMsg), {
120 121 122 123 124 125 126 127 128
							limitHit: false,
							stats: null
						});
					} else {
						done(null, {
							limitHit: false,
							stats: null
						});
					}
129 130
				}
			});
R
Rob Lourens 已提交
131 132
		});
	}
133

134 135 136 137 138 139
	/**
	 * Read the first line of stderr and return an error for display or undefined, based on a whitelist.
	 * Ripgrep produces stderr output which is not from a fatal error, and we only want the search to be
	 * "failed" when a fatal error was produced.
	 */
	private rgErrorMsgForDisplay(msg: string): string | undefined {
140 141
		const firstLine = msg.split('\n')[0];

142 143 144 145 146 147 148 149 150
		if (strings.startsWith(firstLine, 'Error parsing regex')) {
			return firstLine;
		}

		if (strings.startsWith(firstLine, 'error parsing glob')) {
			return firstLine;
		}

		return undefined;
151
	}
R
💄  
Rob Lourens 已提交
152 153 154
}

export class RipgrepParser extends EventEmitter {
R
Rob Lourens 已提交
155
	private static RESULT_REGEX = /^\u001b\[m(\d+)\u001b\[m:(.*)(\r?)/;
R
💄  
Rob Lourens 已提交
156 157
	private static FILE_REGEX = /^\u001b\[m(.+)\u001b\[m$/;

R
Rob Lourens 已提交
158 159
	public static MATCH_START_MARKER = '\u001b[m\u001b[31m';
	public static MATCH_END_MARKER = '\u001b[m';
R
💄  
Rob Lourens 已提交
160 161 162 163

	private fileMatch: FileMatch;
	private remainder: string;
	private isDone: boolean;
164
	private stringDecoder: NodeStringDecoder;
R
💄  
Rob Lourens 已提交
165 166 167

	private numResults = 0;

168
	constructor(private maxResults: number, private rootFolder: string) {
R
💄  
Rob Lourens 已提交
169
		super();
170
		this.stringDecoder = new StringDecoder();
R
💄  
Rob Lourens 已提交
171
	}
R
Rob Lourens 已提交
172

R
💄  
Rob Lourens 已提交
173 174 175
	public cancel(): void {
		this.isDone = true;
	}
R
Rob Lourens 已提交
176

R
Rob Lourens 已提交
177
	public flush(): void {
178 179
		this.handleDecodedData(this.stringDecoder.end());

R
Rob Lourens 已提交
180 181 182 183 184
		if (this.fileMatch) {
			this.onResult();
		}
	}

185 186 187 188 189 190
	public handleData(data: Buffer | string): void {
		const dataStr = typeof data === 'string' ? data : this.stringDecoder.write(data);
		this.handleDecodedData(dataStr);
	}

	private handleDecodedData(decodedData: string): void {
R
Rob Lourens 已提交
191
		// If the previous data chunk didn't end in a newline, prepend it to this chunk
R
💄  
Rob Lourens 已提交
192
		const dataStr = this.remainder ?
193 194
			this.remainder + decodedData :
			decodedData;
R
Rob Lourens 已提交
195

R
💄  
Rob Lourens 已提交
196 197
		const dataLines: string[] = dataStr.split(/\r\n|\n/);
		this.remainder = dataLines[dataLines.length - 1] ? dataLines.pop() : null;
R
Rob Lourens 已提交
198

R
💄  
Rob Lourens 已提交
199 200 201 202
		for (let l = 0; l < dataLines.length; l++) {
			const outputLine = dataLines[l].trim();
			if (this.isDone) {
				break;
R
Rob Lourens 已提交
203 204
			}

R
💄  
Rob Lourens 已提交
205
			let r: RegExpMatchArray;
R
Rob Lourens 已提交
206
			if (r = outputLine.match(RipgrepParser.RESULT_REGEX)) {
R
Rob Lourens 已提交
207 208 209 210 211 212 213 214 215
				const lineNum = parseInt(r[1]) - 1;
				let matchText = r[2];

				// workaround https://github.com/BurntSushi/ripgrep/issues/416
				// If the match line ended with \r, append a match end marker so the match isn't lost
				if (r[3]) {
					matchText += RipgrepParser.MATCH_END_MARKER;
				}

R
💄  
Rob Lourens 已提交
216
				// Line is a result - add to collected results for the current file path
R
Rob Lourens 已提交
217
				this.handleMatchLine(outputLine, lineNum, matchText);
R
💄  
Rob Lourens 已提交
218 219 220 221 222 223
			} else if (r = outputLine.match(RipgrepParser.FILE_REGEX)) {
				// Line is a file path - send all collected results for the previous file path
				if (this.fileMatch) {
					this.onResult();
				}

R
Rob Lourens 已提交
224
				this.fileMatch = new FileMatch(path.isAbsolute(r[1]) ? r[1] : path.join(this.rootFolder, r[1]));
R
💄  
Rob Lourens 已提交
225
			} else {
R
Rob Lourens 已提交
226
				// Line is empty (or malformed)
R
💄  
Rob Lourens 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240
			}
		}
	}

	private handleMatchLine(outputLine: string, lineNum: number, text: string): void {
		const lineMatch = new LineMatch(text, lineNum);
		this.fileMatch.addMatch(lineMatch);

		let lastMatchEndPos = 0;
		let matchTextStartPos = -1;

		// Track positions with color codes subtracted - offsets in the final text preview result
		let matchTextStartRealIdx = -1;
		let textRealIdx = 0;
241
		let hitLimit = false;
R
💄  
Rob Lourens 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

		const realTextParts: string[] = [];

		for (let i = 0; i < text.length - (RipgrepParser.MATCH_END_MARKER.length - 1);) {
			if (text.substr(i, RipgrepParser.MATCH_START_MARKER.length) === RipgrepParser.MATCH_START_MARKER) {
				// Match start
				const chunk = text.slice(lastMatchEndPos, i);
				realTextParts.push(chunk);
				i += RipgrepParser.MATCH_START_MARKER.length;
				matchTextStartPos = i;
				matchTextStartRealIdx = textRealIdx;
			} else if (text.substr(i, RipgrepParser.MATCH_END_MARKER.length) === RipgrepParser.MATCH_END_MARKER) {
				// Match end
				const chunk = text.slice(matchTextStartPos, i);
				realTextParts.push(chunk);
257 258 259 260
				if (!hitLimit) {
					lineMatch.addMatch(matchTextStartRealIdx, textRealIdx - matchTextStartRealIdx);
				}

R
💄  
Rob Lourens 已提交
261 262 263 264 265 266
				matchTextStartPos = -1;
				matchTextStartRealIdx = -1;
				i += RipgrepParser.MATCH_END_MARKER.length;
				lastMatchEndPos = i;
				this.numResults++;

267 268
				// Check hit maxResults limit
				if (this.numResults >= this.maxResults) {
269 270
					// Finish the line, then report the result below
					hitLimit = true;
271
				}
R
Rob Lourens 已提交
272
			} else {
R
💄  
Rob Lourens 已提交
273 274
				i++;
				textRealIdx++;
R
Rob Lourens 已提交
275 276 277
			}
		}

R
💄  
Rob Lourens 已提交
278 279 280
		const chunk = text.slice(lastMatchEndPos);
		realTextParts.push(chunk);

281
		// Replace preview with version without color codes
R
💄  
Rob Lourens 已提交
282 283
		const preview = realTextParts.join('');
		lineMatch.preview = preview;
284 285 286 287 288 289

		if (hitLimit) {
			this.cancel();
			this.onResult();
			this.emit('hitLimit');
		}
R
💄  
Rob Lourens 已提交
290
	}
R
Rob Lourens 已提交
291

R
💄  
Rob Lourens 已提交
292 293 294 295
	private onResult(): void {
		this.emit('result', this.fileMatch.serialize());
		this.fileMatch = null;
	}
R
Rob Lourens 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
}

export class FileMatch implements ISerializedFileMatch {
	path: string;
	lineMatches: LineMatch[];

	constructor(path: string) {
		this.path = path;
		this.lineMatches = [];
	}

	addMatch(lineMatch: LineMatch): void {
		this.lineMatches.push(lineMatch);
	}

	isEmpty(): boolean {
		return this.lineMatches.length === 0;
	}

	serialize(): ISerializedFileMatch {
		let lineMatches: ILineMatch[] = [];
		let numMatches = 0;

		for (let i = 0; i < this.lineMatches.length; i++) {
			numMatches += this.lineMatches[i].offsetAndLengths.length;
			lineMatches.push(this.lineMatches[i].serialize());
		}

		return {
			path: this.path,
			lineMatches,
			numMatches
		};
	}
}

export class LineMatch implements ILineMatch {
	preview: string;
	lineNumber: number;
	offsetAndLengths: number[][];

	constructor(preview: string, lineNumber: number) {
		this.preview = preview.replace(/(\r|\n)*$/, '');
		this.lineNumber = lineNumber;
		this.offsetAndLengths = [];
	}

	getText(): string {
		return this.preview;
	}

	getLineNumber(): number {
		return this.lineNumber;
	}

	addMatch(offset: number, length: number): void {
		this.offsetAndLengths.push([offset, length]);
	}

	serialize(): ILineMatch {
		const result = {
			preview: this.preview,
			lineNumber: this.lineNumber,
			offsetAndLengths: this.offsetAndLengths
		};

		return result;
	}
R
💄  
Rob Lourens 已提交
364 365
}

366
export interface IRgGlobResult {
R
Rob Lourens 已提交
367
	globArgs: string[];
368
	siblingClauses: glob.IExpression;
R
Rob Lourens 已提交
369 370
}

371
export function foldersToRgExcludeGlobs(folderQueries: IFolderSearch[], globalExclude: glob.IExpression, excludesToSkip?: Set<string>, absoluteGlobs = true): IRgGlobResult {
R
Rob Lourens 已提交
372
	const globArgs: string[] = [];
373
	let siblingClauses: glob.IExpression = {};
R
Rob Lourens 已提交
374
	folderQueries.forEach(folderQuery => {
375
		const totalExcludePattern = objects.assign({}, folderQuery.excludePattern || {}, globalExclude || {});
376
		const result = globExprsToRgGlobs(totalExcludePattern, absoluteGlobs && folderQuery.folder, excludesToSkip);
R
Rob Lourens 已提交
377
		globArgs.push(...result.globArgs);
378 379 380
		if (result.siblingClauses) {
			siblingClauses = objects.assign(siblingClauses, result.siblingClauses);
		}
R
Rob Lourens 已提交
381 382 383 384 385
	});

	return { globArgs, siblingClauses };
}

386
export function foldersToIncludeGlobs(folderQueries: IFolderSearch[], globalInclude: glob.IExpression, absoluteGlobs = true): string[] {
387
	const globArgs: string[] = [];
388 389
	folderQueries.forEach(folderQuery => {
		const totalIncludePattern = objects.assign({}, globalInclude || {}, folderQuery.includePattern || {});
390
		const result = globExprsToRgGlobs(totalIncludePattern, absoluteGlobs && folderQuery.folder);
391 392 393 394 395 396
		globArgs.push(...result.globArgs);
	});

	return globArgs;
}

397
function globExprsToRgGlobs(patterns: glob.IExpression, folder?: string, excludesToSkip?: Set<string>): IRgGlobResult {
R
💄  
Rob Lourens 已提交
398
	const globArgs: string[] = [];
399
	let siblingClauses: glob.IExpression = null;
R
💄  
Rob Lourens 已提交
400 401
	Object.keys(patterns)
		.forEach(key => {
402 403 404 405
			if (excludesToSkip && excludesToSkip.has(key)) {
				return;
			}

406 407 408 409
			if (!key) {
				return;
			}

R
💄  
Rob Lourens 已提交
410
			const value = patterns[key];
411
			key = trimTrailingSlash(folder ? getAbsoluteGlob(folder, key) : key);
R
Rob Lourens 已提交
412

R
💄  
Rob Lourens 已提交
413
			if (typeof value === 'boolean' && value) {
414
				globArgs.push(fixDriveC(key));
R
💄  
Rob Lourens 已提交
415
			} else if (value && value.when) {
416 417 418 419 420
				if (!siblingClauses) {
					siblingClauses = {};
				}

				siblingClauses[key] = value;
R
💄  
Rob Lourens 已提交
421 422 423 424 425 426
			}
		});

	return { globArgs, siblingClauses };
}

427 428
/**
 * Resolves a glob like "node_modules/**" in "/foo/bar" to "/foo/bar/node_modules/**".
R
Rob Lourens 已提交
429 430 431
 * Special cases C:/foo paths to write the glob like /foo instead - see https://github.com/BurntSushi/ripgrep/issues/530.
 *
 * Exported for testing
432
 */
R
Rob Lourens 已提交
433
export function getAbsoluteGlob(folder: string, key: string): string {
434
	return paths.isAbsolute(key) ?
435 436
		key :
		path.join(folder, key);
437
}
R
Rob Lourens 已提交
438

439 440 441
function trimTrailingSlash(str: string): string {
	str = strings.rtrim(str, '\\');
	return strings.rtrim(str, '/');
442
}
443

444 445
export function fixDriveC(path: string): string {
	const root = paths.getRoot(path);
446
	return root.toLowerCase() === 'c:/' ?
447 448
		path.replace(/^c:[/\\]/i, '/') :
		path;
449 450
}

R
Rob Lourens 已提交
451
function getRgArgs(config: IRawSearch): IRgGlobResult {
R
Rob Lourens 已提交
452
	const args = ['--hidden', '--heading', '--line-number', '--color', 'ansi', '--colors', 'path:none', '--colors', 'line:none', '--colors', 'match:fg:red', '--colors', 'match:style:nobold'];
R
💄  
Rob Lourens 已提交
453 454
	args.push(config.contentPattern.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

455
	// includePattern can't have siblingClauses
456 457 458 459 460
	foldersToIncludeGlobs(config.folderQueries, config.includePattern).forEach(globArg => {
		args.push('-g', globArg);
	});

	let siblingClauses: glob.IExpression;
R
Rob Lourens 已提交
461 462 463

	// Find excludes that are exactly the same in all folderQueries - e.g. from user settings, and that start with `**`.
	// To make the command shorter, don't resolve these against every folderQuery path - see #33189.
464 465
	const universalExcludes = findUniversalExcludes(config.folderQueries);
	const rgGlobs = foldersToRgExcludeGlobs(config.folderQueries, config.excludePattern, universalExcludes);
466 467
	rgGlobs.globArgs
		.forEach(rgGlob => args.push('-g', `!${rgGlob}`));
468 469 470 471
	if (universalExcludes) {
		universalExcludes
			.forEach(exclude => args.push('-g', `!${trimTrailingSlash(exclude)}`));
	}
472
	siblingClauses = rgGlobs.siblingClauses;
R
💄  
Rob Lourens 已提交
473 474 475 476 477

	if (config.maxFilesize) {
		args.push('--max-filesize', config.maxFilesize + '');
	}

R
Rob Lourens 已提交
478
	if (config.disregardIgnoreFiles) {
R
Rob Lourens 已提交
479 480 481 482 483 484 485
		// Don't use .gitignore or .ignore
		args.push('--no-ignore');
	}

	// Follow symlinks
	args.push('--follow');

486
	// Set default encoding if only one folder is opened
R
Rob Lourens 已提交
487
	if (config.folderQueries.length === 1 && config.folderQueries[0].fileEncoding && config.folderQueries[0].fileEncoding !== 'utf8') {
488 489
		args.push('--encoding', encoding.toCanonicalName(config.folderQueries[0].fileEncoding));
	}
490

491 492 493 494 495 496 497
	// Ripgrep handles -- as a -- arg separator. Only --.
	// - is ok, --- is ok, --some-flag is handled as query text. Need to special case.
	if (config.contentPattern.pattern === '--') {
		config.contentPattern.isRegExp = true;
		config.contentPattern.pattern = '\\-\\-';
	}

R
Rob Lourens 已提交
498
	let searchPatternAfterDoubleDashes: string;
499 500
	if (config.contentPattern.isWordMatch) {
		const regexp = strings.createRegExp(config.contentPattern.pattern, config.contentPattern.isRegExp, { wholeWord: config.contentPattern.isWordMatch });
501 502
		const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy.
		args.push('--regexp', regexpStr);
503
	} else if (config.contentPattern.isRegExp) {
R
💄  
Rob Lourens 已提交
504 505
		args.push('--regexp', config.contentPattern.pattern);
	} else {
506 507
		searchPatternAfterDoubleDashes = config.contentPattern.pattern;
		args.push('--fixed-strings');
R
💄  
Rob Lourens 已提交
508 509 510
	}

	// Folder to search
R
Rob Lourens 已提交
511 512 513 514 515 516 517
	args.push('--');

	if (searchPatternAfterDoubleDashes) {
		// Put the query after --, in case the query starts with a dash
		args.push(searchPatternAfterDoubleDashes);
	}

518
	args.push(...config.folderQueries.map(q => q.folder));
519 520
	args.push(...config.extraFiles);

521
	return { globArgs: args, siblingClauses };
R
💄  
Rob Lourens 已提交
522
}
523 524

function getSiblings(file: string): TPromise<string[]> {
525
	return new TPromise<string[]>((resolve, reject) => {
526 527 528 529 530 531 532 533 534
		extfs.readdir(path.dirname(file), (error: Error, files: string[]) => {
			if (error) {
				reject(error);
			}

			resolve(files);
		});
	});
}
535 536 537 538 539 540 541 542

function findUniversalExcludes(folderQueries: IFolderSearch[]): Set<string> {
	if (folderQueries.length < 2) {
		// Nothing to simplify
		return null;
	}

	const firstFolder = folderQueries[0];
543 544 545
	if (!firstFolder.excludePattern) {
		return null;
	}
546 547 548

	const universalExcludes = new Set<string>();
	Object.keys(firstFolder.excludePattern).forEach(key => {
549
		if (strings.startsWith(key, '**') && folderQueries.every(q => q.excludePattern && q.excludePattern[key] === true)) {
550 551 552 553 554 555
			universalExcludes.add(key);
		}
	});

	return universalExcludes;
}