ripgrepTextSearch.ts 16.2 KB
Newer Older
R
Rob Lourens 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

R
💄  
Rob Lourens 已提交
7
import { EventEmitter } from 'events';
8
import * as path from 'path';
9
import { StringDecoder, NodeStringDecoder } from 'string_decoder';
R
💄  
Rob Lourens 已提交
10

R
Rob Lourens 已提交
11 12 13
import * as cp from 'child_process';
import { rgPath } from 'vscode-ripgrep';

14
import arrays = require('vs/base/common/arrays');
R
Rob Lourens 已提交
15
import objects = require('vs/base/common/objects');
16
import platform = require('vs/base/common/platform');
17
import * as strings from 'vs/base/common/strings';
18
import * as paths from 'vs/base/common/paths';
19
import * as extfs from 'vs/base/node/extfs';
20
import * as encoding from 'vs/base/node/encoding';
R
Rob Lourens 已提交
21
import * as glob from 'vs/base/common/glob';
22
import { ILineMatch, ISearchLog } from 'vs/platform/search/common/search';
23
import { TPromise } from 'vs/base/common/winjs.base';
R
Rob Lourens 已提交
24

R
Rob Lourens 已提交
25
import { ISerializedFileMatch, ISerializedSearchComplete, IRawSearch, IFolderSearch } from './search';
R
Rob Lourens 已提交
26

27
export class RipgrepEngine {
R
Rob Lourens 已提交
28 29
	private isDone = false;
	private rgProc: cp.ChildProcess;
30
	private postProcessExclusions: glob.ParsedExpression;
R
Rob Lourens 已提交
31

R
💄  
Rob Lourens 已提交
32
	private ripgrepParser: RipgrepParser;
R
Rob Lourens 已提交
33

34
	private resultsHandledP: TPromise<any> = TPromise.wrap(null);
35

R
Rob Lourens 已提交
36
	constructor(private config: IRawSearch) {
R
Rob Lourens 已提交
37 38 39 40
	}

	cancel(): void {
		this.isDone = true;
R
💄  
Rob Lourens 已提交
41
		this.ripgrepParser.cancel();
R
Rob Lourens 已提交
42 43 44
		this.rgProc.kill();
	}

45
	// TODO@Rob - make promise-based once the old search is gone, and I don't need them to have matching interfaces anymore
46
	search(onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
47 48 49 50 51 52 53 54
		if (!this.config.folderQueries.length) {
			done(null, {
				limitHit: false,
				stats: null
			});
			return;
		}

55
		const rgArgs = getRgArgs(this.config);
56 57 58
		if (rgArgs.siblingClauses) {
			this.postProcessExclusions = glob.parseToAsync(rgArgs.siblingClauses, { trimForExclusions: true });
		}
R
💄  
Rob Lourens 已提交
59

60
		const cwd = platform.isWindows ? 'c:/' : '/';
61
		process.nextTick(() => {
62
			const escapedArgs = rgArgs.globArgs
63 64 65
				.map(arg => arg.match(/^-/) ? arg : `'${arg}'`)
				.join(' ');

66
			// Allow caller to register progress callback
67
			const rgCmd = `rg ${escapedArgs}\n - cwd: ${cwd}\n`;
68
			onMessage({ message: rgCmd });
69 70 71
			if (rgArgs.siblingClauses) {
				onMessage({ message: ` - Sibling clauses: ${JSON.stringify(rgArgs.siblingClauses)}\n` });
			}
72
		});
73
		this.rgProc = cp.spawn(rgPath, rgArgs.globArgs, { cwd });
R
💄  
Rob Lourens 已提交
74

75
		this.ripgrepParser = new RipgrepParser(this.config.maxResults, cwd);
76 77
		this.ripgrepParser.on('result', (match: ISerializedFileMatch) => {
			if (this.postProcessExclusions) {
78
				const handleResultP = (<TPromise<string>>this.postProcessExclusions(match.path, undefined, () => getSiblings(match.path)))
79 80 81 82 83
					.then(globMatch => {
						if (!globMatch) {
							onResult(match);
						}
					});
84 85

				this.resultsHandledP = TPromise.join([this.resultsHandledP, handleResultP]);
86 87 88 89
			} else {
				onResult(match);
			}
		});
R
💄  
Rob Lourens 已提交
90 91 92 93 94 95 96
		this.ripgrepParser.on('hitLimit', () => {
			this.cancel();
			done(null, {
				limitHit: true,
				stats: null
			});
		});
R
Rob Lourens 已提交
97 98

		this.rgProc.stdout.on('data', data => {
R
💄  
Rob Lourens 已提交
99
			this.ripgrepParser.handleData(data);
R
Rob Lourens 已提交
100 101
		});

102 103 104 105
		let gotData = false;
		this.rgProc.stdout.once('data', () => gotData = true);

		let stderr = '';
R
Rob Lourens 已提交
106
		this.rgProc.stderr.on('data', data => {
107 108 109
			const message = data.toString();
			onMessage({ message });
			stderr += message;
R
Rob Lourens 已提交
110 111 112
		});

		this.rgProc.on('close', code => {
113 114
			// Trigger last result, then wait on async result handling
			this.ripgrepParser.flush();
115
			this.resultsHandledP.then(() => {
116 117 118
				this.rgProc = null;
				if (!this.isDone) {
					this.isDone = true;
119 120 121
					let displayMsg: string;
					if (stderr && !gotData && (displayMsg = this.rgErrorMsgForDisplay(stderr))) {
						done(new Error(displayMsg), {
122 123 124 125 126 127 128 129 130
							limitHit: false,
							stats: null
						});
					} else {
						done(null, {
							limitHit: false,
							stats: null
						});
					}
131 132
				}
			});
R
Rob Lourens 已提交
133 134
		});
	}
135

136 137 138 139 140 141
	/**
	 * Read the first line of stderr and return an error for display or undefined, based on a whitelist.
	 * Ripgrep produces stderr output which is not from a fatal error, and we only want the search to be
	 * "failed" when a fatal error was produced.
	 */
	private rgErrorMsgForDisplay(msg: string): string | undefined {
142 143 144 145
		const firstLine = msg.split('\n')[0];

		// The error "No such file or directory" is returned for broken symlinks and also for bad search paths.
		// Only show it if it's from a search path.
146
		const reg = /^\.\/(.*): No such file or directory \(os error 2\)/;
147 148 149
		const noSuchFileMatch = firstLine.match(reg);
		if (noSuchFileMatch) {
			const errorPath = noSuchFileMatch[1];
150 151 152 153 154 155 156 157
			const matchingPathSegmentReg = new RegExp('[\\/]' + errorPath);
			const matchesFolderQuery = this.config.folderQueries
				.map(q => q.folder)
				.some(folder => !!folder.match(matchingPathSegmentReg));

			return matchesFolderQuery ?
				firstLine :
				undefined;
158 159
		}

160 161 162 163 164 165 166 167 168
		if (strings.startsWith(firstLine, 'Error parsing regex')) {
			return firstLine;
		}

		if (strings.startsWith(firstLine, 'error parsing glob')) {
			return firstLine;
		}

		return undefined;
169
	}
R
💄  
Rob Lourens 已提交
170 171 172
}

export class RipgrepParser extends EventEmitter {
R
Rob Lourens 已提交
173
	private static RESULT_REGEX = /^\u001b\[m(\d+)\u001b\[m:(.*)(\r?)/;
R
💄  
Rob Lourens 已提交
174 175
	private static FILE_REGEX = /^\u001b\[m(.+)\u001b\[m$/;

R
Rob Lourens 已提交
176 177
	public static MATCH_START_MARKER = '\u001b[m\u001b[31m';
	public static MATCH_END_MARKER = '\u001b[m';
R
💄  
Rob Lourens 已提交
178 179 180 181

	private fileMatch: FileMatch;
	private remainder: string;
	private isDone: boolean;
182
	private stringDecoder: NodeStringDecoder;
R
💄  
Rob Lourens 已提交
183 184 185

	private numResults = 0;

186
	constructor(private maxResults: number, private rootFolder: string) {
R
💄  
Rob Lourens 已提交
187
		super();
188
		this.stringDecoder = new StringDecoder();
R
💄  
Rob Lourens 已提交
189
	}
R
Rob Lourens 已提交
190

R
💄  
Rob Lourens 已提交
191 192 193
	public cancel(): void {
		this.isDone = true;
	}
R
Rob Lourens 已提交
194

R
Rob Lourens 已提交
195
	public flush(): void {
196 197
		this.handleDecodedData(this.stringDecoder.end());

R
Rob Lourens 已提交
198 199 200 201 202
		if (this.fileMatch) {
			this.onResult();
		}
	}

203 204 205 206 207 208
	public handleData(data: Buffer | string): void {
		const dataStr = typeof data === 'string' ? data : this.stringDecoder.write(data);
		this.handleDecodedData(dataStr);
	}

	private handleDecodedData(decodedData: string): void {
R
Rob Lourens 已提交
209
		// If the previous data chunk didn't end in a newline, prepend it to this chunk
R
💄  
Rob Lourens 已提交
210
		const dataStr = this.remainder ?
211 212
			this.remainder + decodedData :
			decodedData;
R
Rob Lourens 已提交
213

R
💄  
Rob Lourens 已提交
214 215
		const dataLines: string[] = dataStr.split(/\r\n|\n/);
		this.remainder = dataLines[dataLines.length - 1] ? dataLines.pop() : null;
R
Rob Lourens 已提交
216

R
💄  
Rob Lourens 已提交
217 218 219 220
		for (let l = 0; l < dataLines.length; l++) {
			const outputLine = dataLines[l].trim();
			if (this.isDone) {
				break;
R
Rob Lourens 已提交
221 222
			}

R
💄  
Rob Lourens 已提交
223
			let r: RegExpMatchArray;
R
Rob Lourens 已提交
224
			if (r = outputLine.match(RipgrepParser.RESULT_REGEX)) {
R
Rob Lourens 已提交
225 226 227 228 229 230 231 232 233
				const lineNum = parseInt(r[1]) - 1;
				let matchText = r[2];

				// workaround https://github.com/BurntSushi/ripgrep/issues/416
				// If the match line ended with \r, append a match end marker so the match isn't lost
				if (r[3]) {
					matchText += RipgrepParser.MATCH_END_MARKER;
				}

R
💄  
Rob Lourens 已提交
234
				// Line is a result - add to collected results for the current file path
R
Rob Lourens 已提交
235
				this.handleMatchLine(outputLine, lineNum, matchText);
R
💄  
Rob Lourens 已提交
236 237 238 239 240 241
			} else if (r = outputLine.match(RipgrepParser.FILE_REGEX)) {
				// Line is a file path - send all collected results for the previous file path
				if (this.fileMatch) {
					this.onResult();
				}

R
Rob Lourens 已提交
242
				this.fileMatch = new FileMatch(path.isAbsolute(r[1]) ? r[1] : path.join(this.rootFolder, r[1]));
R
💄  
Rob Lourens 已提交
243
			} else {
R
Rob Lourens 已提交
244
				// Line is empty (or malformed)
R
💄  
Rob Lourens 已提交
245 246 247 248 249 250 251 252 253 254 255 256 257 258
			}
		}
	}

	private handleMatchLine(outputLine: string, lineNum: number, text: string): void {
		const lineMatch = new LineMatch(text, lineNum);
		this.fileMatch.addMatch(lineMatch);

		let lastMatchEndPos = 0;
		let matchTextStartPos = -1;

		// Track positions with color codes subtracted - offsets in the final text preview result
		let matchTextStartRealIdx = -1;
		let textRealIdx = 0;
259
		let hitLimit = false;
R
💄  
Rob Lourens 已提交
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274

		const realTextParts: string[] = [];

		for (let i = 0; i < text.length - (RipgrepParser.MATCH_END_MARKER.length - 1);) {
			if (text.substr(i, RipgrepParser.MATCH_START_MARKER.length) === RipgrepParser.MATCH_START_MARKER) {
				// Match start
				const chunk = text.slice(lastMatchEndPos, i);
				realTextParts.push(chunk);
				i += RipgrepParser.MATCH_START_MARKER.length;
				matchTextStartPos = i;
				matchTextStartRealIdx = textRealIdx;
			} else if (text.substr(i, RipgrepParser.MATCH_END_MARKER.length) === RipgrepParser.MATCH_END_MARKER) {
				// Match end
				const chunk = text.slice(matchTextStartPos, i);
				realTextParts.push(chunk);
275 276 277 278
				if (!hitLimit) {
					lineMatch.addMatch(matchTextStartRealIdx, textRealIdx - matchTextStartRealIdx);
				}

R
💄  
Rob Lourens 已提交
279 280 281 282 283 284
				matchTextStartPos = -1;
				matchTextStartRealIdx = -1;
				i += RipgrepParser.MATCH_END_MARKER.length;
				lastMatchEndPos = i;
				this.numResults++;

285 286
				// Check hit maxResults limit
				if (this.numResults >= this.maxResults) {
287 288
					// Finish the line, then report the result below
					hitLimit = true;
289
				}
R
Rob Lourens 已提交
290
			} else {
R
💄  
Rob Lourens 已提交
291 292
				i++;
				textRealIdx++;
R
Rob Lourens 已提交
293 294 295
			}
		}

R
💄  
Rob Lourens 已提交
296 297 298
		const chunk = text.slice(lastMatchEndPos);
		realTextParts.push(chunk);

299
		// Replace preview with version without color codes
R
💄  
Rob Lourens 已提交
300 301
		const preview = realTextParts.join('');
		lineMatch.preview = preview;
302 303 304 305 306 307

		if (hitLimit) {
			this.cancel();
			this.onResult();
			this.emit('hitLimit');
		}
R
💄  
Rob Lourens 已提交
308
	}
R
Rob Lourens 已提交
309

R
💄  
Rob Lourens 已提交
310 311 312 313
	private onResult(): void {
		this.emit('result', this.fileMatch.serialize());
		this.fileMatch = null;
	}
R
Rob Lourens 已提交
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
}

export class FileMatch implements ISerializedFileMatch {
	path: string;
	lineMatches: LineMatch[];

	constructor(path: string) {
		this.path = path;
		this.lineMatches = [];
	}

	addMatch(lineMatch: LineMatch): void {
		this.lineMatches.push(lineMatch);
	}

	isEmpty(): boolean {
		return this.lineMatches.length === 0;
	}

	serialize(): ISerializedFileMatch {
		let lineMatches: ILineMatch[] = [];
		let numMatches = 0;

		for (let i = 0; i < this.lineMatches.length; i++) {
			numMatches += this.lineMatches[i].offsetAndLengths.length;
			lineMatches.push(this.lineMatches[i].serialize());
		}

		return {
			path: this.path,
			lineMatches,
			numMatches
		};
	}
}

export class LineMatch implements ILineMatch {
	preview: string;
	lineNumber: number;
	offsetAndLengths: number[][];

	constructor(preview: string, lineNumber: number) {
		this.preview = preview.replace(/(\r|\n)*$/, '');
		this.lineNumber = lineNumber;
		this.offsetAndLengths = [];
	}

	getText(): string {
		return this.preview;
	}

	getLineNumber(): number {
		return this.lineNumber;
	}

	addMatch(offset: number, length: number): void {
		this.offsetAndLengths.push([offset, length]);
	}

	serialize(): ILineMatch {
		const result = {
			preview: this.preview,
			lineNumber: this.lineNumber,
			offsetAndLengths: this.offsetAndLengths
		};

		return result;
	}
R
💄  
Rob Lourens 已提交
382 383
}

R
Rob Lourens 已提交
384 385
interface IRgGlobResult {
	globArgs: string[];
386
	siblingClauses?: glob.IExpression;
R
Rob Lourens 已提交
387 388
}

389 390 391 392 393 394 395 396 397 398 399 400 401
function foldersToRgExcludeGlobs(folderQueries: IFolderSearch[]): IRgGlobResult {
	return foldersToRgGlobs(folderQueries, fq => fq.excludePattern);
}

function foldersToRgIncludeGlobs(folderQueries: IFolderSearch[], globalInclude: glob.IExpression): IRgGlobResult {
	return foldersToRgGlobs(folderQueries, fq => objects.assign({}, fq.includePattern || {}, globalInclude || {}));
}

function foldersToRgGlobalExcludeGlobs(folderQueries: IFolderSearch[], globalExclude: glob.IExpression): IRgGlobResult {
	return foldersToRgGlobs(folderQueries, () => globalExclude);
}

function foldersToRgGlobs(folderQueries: IFolderSearch[], patternProvider: (fs: IFolderSearch) => glob.IExpression): IRgGlobResult {
R
Rob Lourens 已提交
402
	const globArgs: string[] = [];
403
	let siblingClauses: glob.IExpression = {};
R
Rob Lourens 已提交
404
	folderQueries.forEach(folderQuery => {
R
Rob Lourens 已提交
405
		const pattern = patternProvider(folderQuery) || {};
406
		const result = globExprsToRgGlobs(pattern, folderQuery.folder);
R
Rob Lourens 已提交
407
		globArgs.push(...result.globArgs);
408 409 410
		if (result.siblingClauses) {
			siblingClauses = objects.assign(siblingClauses, result.siblingClauses);
		}
R
Rob Lourens 已提交
411 412 413 414 415 416
	});

	return { globArgs, siblingClauses };
}

function globExprsToRgGlobs(patterns: glob.IExpression, folder: string): IRgGlobResult {
R
💄  
Rob Lourens 已提交
417
	const globArgs: string[] = [];
418
	let siblingClauses: glob.IExpression = null;
R
💄  
Rob Lourens 已提交
419 420 421
	Object.keys(patterns)
		.forEach(key => {
			const value = patterns[key];
422
			key = getAbsoluteGlob(folder, key);
R
Rob Lourens 已提交
423

R
💄  
Rob Lourens 已提交
424
			if (typeof value === 'boolean' && value) {
425
				globArgs.push(fixDriveC(key));
R
💄  
Rob Lourens 已提交
426
			} else if (value && value.when) {
427 428 429 430 431
				if (!siblingClauses) {
					siblingClauses = {};
				}

				siblingClauses[key] = value;
R
💄  
Rob Lourens 已提交
432 433 434 435 436 437
			}
		});

	return { globArgs, siblingClauses };
}

438 439
/**
 * Resolves a glob like "node_modules/**" in "/foo/bar" to "/foo/bar/node_modules/**".
R
Rob Lourens 已提交
440 441 442
 * Special cases C:/foo paths to write the glob like /foo instead - see https://github.com/BurntSushi/ripgrep/issues/530.
 *
 * Exported for testing
443
 */
R
Rob Lourens 已提交
444
export function getAbsoluteGlob(folder: string, key: string): string {
445
	return paths.isAbsolute(key) ?
446 447
		key :
		path.join(folder, key);
448
}
449

450 451
export function fixDriveC(path: string): string {
	const root = paths.getRoot(path);
452
	return root.toLowerCase() === 'c:/' ?
453 454
		path.replace(/^c:[/\\]/i, '/') :
		path;
455 456
}

R
Rob Lourens 已提交
457
function getRgArgs(config: IRawSearch): IRgGlobResult {
R
Rob Lourens 已提交
458
	const args = ['--hidden', '--heading', '--line-number', '--color', 'ansi', '--colors', 'path:none', '--colors', 'line:none', '--colors', 'match:fg:red', '--colors', 'match:style:nobold'];
R
💄  
Rob Lourens 已提交
459 460
	args.push(config.contentPattern.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

461 462
	const globsToGlobArgs = (globArgs: string[]) => arrays.flatten(globArgs.map(arg => ['-g', arg]));
	const globToNotGlob = (glob: string) => '!' + glob;
R
💄  
Rob Lourens 已提交
463

464 465 466 467 468 469 470 471 472 473 474 475
	// Include/exclude precedence:
	// settings exclude < global include < global exclude
	const excludeResult = foldersToRgExcludeGlobs(config.folderQueries);
	args.push(...globsToGlobArgs(excludeResult.globArgs.map(globToNotGlob)));

	const includeResult = foldersToRgIncludeGlobs(config.folderQueries, config.includePattern);
	args.push(...globsToGlobArgs(includeResult.globArgs));

	const globalExcludeResult = foldersToRgGlobalExcludeGlobs(config.folderQueries, config.excludePattern);
	args.push(...globsToGlobArgs(globalExcludeResult.globArgs.map(globToNotGlob)));

	// includePattern can't have siblingClauses
476 477 478
	const siblingClauses = (excludeResult.siblingClauses || globalExcludeResult.siblingClauses) ?
		objects.assign({}, excludeResult.siblingClauses || {}, globalExcludeResult.siblingClauses || {}) :
		null;
R
💄  
Rob Lourens 已提交
479 480 481 482 483

	if (config.maxFilesize) {
		args.push('--max-filesize', config.maxFilesize + '');
	}

R
Rob Lourens 已提交
484
	if (config.disregardIgnoreFiles) {
R
Rob Lourens 已提交
485 486 487 488 489 490 491
		// Don't use .gitignore or .ignore
		args.push('--no-ignore');
	}

	// Follow symlinks
	args.push('--follow');

492
	// Set default encoding if only one folder is opened
R
Rob Lourens 已提交
493
	if (config.folderQueries.length === 1 && config.folderQueries[0].fileEncoding && config.folderQueries[0].fileEncoding !== 'utf8') {
494 495
		args.push('--encoding', encoding.toCanonicalName(config.folderQueries[0].fileEncoding));
	}
496

497 498 499 500 501 502 503
	// Ripgrep handles -- as a -- arg separator. Only --.
	// - is ok, --- is ok, --some-flag is handled as query text. Need to special case.
	if (config.contentPattern.pattern === '--') {
		config.contentPattern.isRegExp = true;
		config.contentPattern.pattern = '\\-\\-';
	}

R
Rob Lourens 已提交
504
	let searchPatternAfterDoubleDashes: string;
505 506
	if (config.contentPattern.isWordMatch) {
		const regexp = strings.createRegExp(config.contentPattern.pattern, config.contentPattern.isRegExp, { wholeWord: config.contentPattern.isWordMatch });
507 508
		const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy.
		args.push('--regexp', regexpStr);
509
	} else if (config.contentPattern.isRegExp) {
R
💄  
Rob Lourens 已提交
510 511
		args.push('--regexp', config.contentPattern.pattern);
	} else {
512 513
		searchPatternAfterDoubleDashes = config.contentPattern.pattern;
		args.push('--fixed-strings');
R
💄  
Rob Lourens 已提交
514 515 516
	}

	// Folder to search
R
Rob Lourens 已提交
517 518 519 520 521 522 523
	args.push('--');

	if (searchPatternAfterDoubleDashes) {
		// Put the query after --, in case the query starts with a dash
		args.push(searchPatternAfterDoubleDashes);
	}

524
	args.push(...config.folderQueries.map(q => q.folder));
525 526
	args.push(...config.extraFiles);

527
	return { globArgs: args, siblingClauses };
R
💄  
Rob Lourens 已提交
528
}
529 530

function getSiblings(file: string): TPromise<string[]> {
531
	return new TPromise<string[]>((resolve, reject) => {
532 533 534 535 536 537 538 539 540
		extfs.readdir(path.dirname(file), (error: Error, files: string[]) => {
			if (error) {
				reject(error);
			}

			resolve(files);
		});
	});
}