ripgrepTextSearch.ts 11.6 KB
Newer Older
R
Rob Lourens 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

R
💄  
Rob Lourens 已提交
7
import { EventEmitter } from 'events';
8
import * as path from 'path';
R
💄  
Rob Lourens 已提交
9

R
Rob Lourens 已提交
10 11 12
import * as cp from 'child_process';
import { rgPath } from 'vscode-ripgrep';

13
import * as strings from 'vs/base/common/strings';
14
import * as extfs from 'vs/base/node/extfs';
15
import * as encoding from 'vs/base/node/encoding';
R
Rob Lourens 已提交
16
import * as glob from 'vs/base/common/glob';
17
import { ILineMatch, ISearchLog } from 'vs/platform/search/common/search';
18
import { TPromise } from 'vs/base/common/winjs.base';
R
Rob Lourens 已提交
19

20
import { ISerializedFileMatch, ISerializedSearchComplete, IRawSearch } from './search';
R
Rob Lourens 已提交
21

22
export class RipgrepEngine {
R
Rob Lourens 已提交
23 24
	private isDone = false;
	private rgProc: cp.ChildProcess;
25
	private postProcessExclusions: glob.ParsedExpression;
R
Rob Lourens 已提交
26

R
💄  
Rob Lourens 已提交
27
	private ripgrepParser: RipgrepParser;
R
Rob Lourens 已提交
28

29 30
	private handleResultP: TPromise<any> = TPromise.wrap(null);

R
Rob Lourens 已提交
31
	constructor(private config: IRawSearch) {
R
Rob Lourens 已提交
32 33 34 35
	}

	cancel(): void {
		this.isDone = true;
R
💄  
Rob Lourens 已提交
36
		this.ripgrepParser.cancel();
R
Rob Lourens 已提交
37 38 39
		this.rgProc.kill();
	}

40
	// TODO@Rob - make promise-based once the old search is gone, and I don't need them to have matching interfaces anymore
41
	search(onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
R
Rob Lourens 已提交
42
		if (this.config.rootFolders.length) {
43
			this.searchFolder(this.config.rootFolders[0], onResult, onMessage, done);
R
💄  
Rob Lourens 已提交
44 45 46 47 48
		} else {
			done(null, {
				limitHit: false,
				stats: null
			});
R
Rob Lourens 已提交
49 50 51
		}
	}

52
	private searchFolder(rootFolder: string, onResult: (match: ISerializedFileMatch) => void, onMessage: (message: ISearchLog) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
53
		const rgArgs = getRgArgs(this.config);
54 55 56
		if (rgArgs.siblingClauses) {
			this.postProcessExclusions = glob.parseToAsync(rgArgs.siblingClauses, { trimForExclusions: true });
		}
R
💄  
Rob Lourens 已提交
57

58 59 60 61 62
		process.nextTick(() => {
			// Allow caller to register progress callback
			const rgCmd = `\nrg ${rgArgs.args.join(' ')}\ncwd: ${rootFolder}\n`;
			onMessage({ message: rgCmd });
		});
R
💄  
Rob Lourens 已提交
63 64
		this.rgProc = cp.spawn(rgPath, rgArgs.args, { cwd: rootFolder });

65
		this.ripgrepParser = new RipgrepParser(this.config.maxResults, rootFolder);
66 67 68
		this.ripgrepParser.on('result', (match: ISerializedFileMatch) => {
			if (this.postProcessExclusions) {
				const relativePath = path.relative(rootFolder, match.path);
69 70 71 72 73 74 75
				this.handleResultP = this.handleResultP
					.then(() => (<TPromise<string>>this.postProcessExclusions(relativePath, undefined, () => getSiblings(match.path))))
					.then(globMatch => {
						if (!globMatch) {
							onResult(match);
						}
					});
76 77 78 79
			} else {
				onResult(match);
			}
		});
R
💄  
Rob Lourens 已提交
80 81 82 83 84 85 86
		this.ripgrepParser.on('hitLimit', () => {
			this.cancel();
			done(null, {
				limitHit: true,
				stats: null
			});
		});
R
Rob Lourens 已提交
87 88

		this.rgProc.stdout.on('data', data => {
R
💄  
Rob Lourens 已提交
89
			this.ripgrepParser.handleData(data);
R
Rob Lourens 已提交
90 91
		});

92 93 94 95
		let gotData = false;
		this.rgProc.stdout.once('data', () => gotData = true);

		let stderr = '';
R
Rob Lourens 已提交
96
		this.rgProc.stderr.on('data', data => {
97 98 99
			const message = data.toString();
			onMessage({ message });
			stderr += message;
R
Rob Lourens 已提交
100 101 102
		});

		this.rgProc.on('close', code => {
103 104 105 106 107 108
			// Trigger last result, then wait on async result handling
			this.ripgrepParser.flush();
			this.handleResultP.then(() => {
				this.rgProc = null;
				if (!this.isDone) {
					this.isDone = true;
109 110 111 112 113 114 115 116 117 118 119
					if (stderr && this.shouldReturnErrorMsg(stderr) && !gotData) {
						done(new Error(stderr), {
							limitHit: false,
							stats: null
						});
					} else {
						done(null, {
							limitHit: false,
							stats: null
						});
					}
120 121
				}
			});
R
Rob Lourens 已提交
122 123
		});
	}
124 125 126 127

	private shouldReturnErrorMsg(msg: string): boolean {
		return strings.startsWith(msg, 'Error parsing regex');
	}
R
💄  
Rob Lourens 已提交
128 129 130 131 132 133
}

export class RipgrepParser extends EventEmitter {
	private static RESULT_REGEX = /^\u001b\[m(\d+)\u001b\[m:(.*)$/;
	private static FILE_REGEX = /^\u001b\[m(.+)\u001b\[m$/;

R
Rob Lourens 已提交
134 135
	public static MATCH_START_MARKER = '\u001b[m\u001b[31m';
	public static MATCH_END_MARKER = '\u001b[m';
R
💄  
Rob Lourens 已提交
136 137 138 139 140 141 142

	private fileMatch: FileMatch;
	private remainder: string;
	private isDone: boolean;

	private numResults = 0;

143
	constructor(private maxResults: number, private rootFolder: string) {
R
💄  
Rob Lourens 已提交
144 145
		super();
	}
R
Rob Lourens 已提交
146

R
💄  
Rob Lourens 已提交
147 148 149
	public cancel(): void {
		this.isDone = true;
	}
R
Rob Lourens 已提交
150

R
Rob Lourens 已提交
151 152 153 154 155 156
	public flush(): void {
		if (this.fileMatch) {
			this.onResult();
		}
	}

R
💄  
Rob Lourens 已提交
157
	public handleData(data: string | Buffer): void {
R
Rob Lourens 已提交
158
		// If the previous data chunk didn't end in a newline, prepend it to this chunk
R
💄  
Rob Lourens 已提交
159 160 161
		const dataStr = this.remainder ?
			this.remainder + data.toString() :
			data.toString();
R
Rob Lourens 已提交
162

R
💄  
Rob Lourens 已提交
163 164
		const dataLines: string[] = dataStr.split(/\r\n|\n/);
		this.remainder = dataLines[dataLines.length - 1] ? dataLines.pop() : null;
R
Rob Lourens 已提交
165

R
💄  
Rob Lourens 已提交
166 167 168 169
		for (let l = 0; l < dataLines.length; l++) {
			const outputLine = dataLines[l].trim();
			if (this.isDone) {
				break;
R
Rob Lourens 已提交
170 171
			}

R
💄  
Rob Lourens 已提交
172
			let r: RegExpMatchArray;
R
Rob Lourens 已提交
173
			if (r = outputLine.match(RipgrepParser.RESULT_REGEX)) {
R
💄  
Rob Lourens 已提交
174 175 176 177 178 179 180 181
				// Line is a result - add to collected results for the current file path
				this.handleMatchLine(outputLine, parseInt(r[1]) - 1, r[2]);
			} else if (r = outputLine.match(RipgrepParser.FILE_REGEX)) {
				// Line is a file path - send all collected results for the previous file path
				if (this.fileMatch) {
					this.onResult();
				}

182
				this.fileMatch = new FileMatch(path.join(this.rootFolder, r[1]));
R
💄  
Rob Lourens 已提交
183
			} else {
R
Rob Lourens 已提交
184
				// Line is empty (or malformed)
R
💄  
Rob Lourens 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198
			}
		}
	}

	private handleMatchLine(outputLine: string, lineNum: number, text: string): void {
		const lineMatch = new LineMatch(text, lineNum);
		this.fileMatch.addMatch(lineMatch);

		let lastMatchEndPos = 0;
		let matchTextStartPos = -1;

		// Track positions with color codes subtracted - offsets in the final text preview result
		let matchTextStartRealIdx = -1;
		let textRealIdx = 0;
199
		let hitLimit = false;
R
💄  
Rob Lourens 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214

		const realTextParts: string[] = [];

		for (let i = 0; i < text.length - (RipgrepParser.MATCH_END_MARKER.length - 1);) {
			if (text.substr(i, RipgrepParser.MATCH_START_MARKER.length) === RipgrepParser.MATCH_START_MARKER) {
				// Match start
				const chunk = text.slice(lastMatchEndPos, i);
				realTextParts.push(chunk);
				i += RipgrepParser.MATCH_START_MARKER.length;
				matchTextStartPos = i;
				matchTextStartRealIdx = textRealIdx;
			} else if (text.substr(i, RipgrepParser.MATCH_END_MARKER.length) === RipgrepParser.MATCH_END_MARKER) {
				// Match end
				const chunk = text.slice(matchTextStartPos, i);
				realTextParts.push(chunk);
215 216 217 218
				if (!hitLimit) {
					lineMatch.addMatch(matchTextStartRealIdx, textRealIdx - matchTextStartRealIdx);
				}

R
💄  
Rob Lourens 已提交
219 220 221 222 223 224
				matchTextStartPos = -1;
				matchTextStartRealIdx = -1;
				i += RipgrepParser.MATCH_END_MARKER.length;
				lastMatchEndPos = i;
				this.numResults++;

225 226
				// Check hit maxResults limit
				if (this.numResults >= this.maxResults) {
227 228
					// Finish the line, then report the result below
					hitLimit = true;
229
				}
R
Rob Lourens 已提交
230
			} else {
R
💄  
Rob Lourens 已提交
231 232
				i++;
				textRealIdx++;
R
Rob Lourens 已提交
233 234 235
			}
		}

R
💄  
Rob Lourens 已提交
236 237 238
		const chunk = text.slice(lastMatchEndPos);
		realTextParts.push(chunk);

239
		// Replace preview with version without color codes
R
💄  
Rob Lourens 已提交
240 241
		const preview = realTextParts.join('');
		lineMatch.preview = preview;
242 243 244 245 246 247

		if (hitLimit) {
			this.cancel();
			this.onResult();
			this.emit('hitLimit');
		}
R
💄  
Rob Lourens 已提交
248
	}
R
Rob Lourens 已提交
249

R
💄  
Rob Lourens 已提交
250 251 252 253
	private onResult(): void {
		this.emit('result', this.fileMatch.serialize());
		this.fileMatch = null;
	}
R
Rob Lourens 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
}

export class FileMatch implements ISerializedFileMatch {
	path: string;
	lineMatches: LineMatch[];

	constructor(path: string) {
		this.path = path;
		this.lineMatches = [];
	}

	addMatch(lineMatch: LineMatch): void {
		this.lineMatches.push(lineMatch);
	}

	isEmpty(): boolean {
		return this.lineMatches.length === 0;
	}

	serialize(): ISerializedFileMatch {
		let lineMatches: ILineMatch[] = [];
		let numMatches = 0;

		for (let i = 0; i < this.lineMatches.length; i++) {
			numMatches += this.lineMatches[i].offsetAndLengths.length;
			lineMatches.push(this.lineMatches[i].serialize());
		}

		return {
			path: this.path,
			lineMatches,
			numMatches
		};
	}
}

export class LineMatch implements ILineMatch {
	preview: string;
	lineNumber: number;
	offsetAndLengths: number[][];

	constructor(preview: string, lineNumber: number) {
		this.preview = preview.replace(/(\r|\n)*$/, '');
		this.lineNumber = lineNumber;
		this.offsetAndLengths = [];
	}

	getText(): string {
		return this.preview;
	}

	getLineNumber(): number {
		return this.lineNumber;
	}

	addMatch(offset: number, length: number): void {
		this.offsetAndLengths.push([offset, length]);
	}

	serialize(): ILineMatch {
		const result = {
			preview: this.preview,
			lineNumber: this.lineNumber,
			offsetAndLengths: this.offsetAndLengths
		};

		return result;
	}
R
💄  
Rob Lourens 已提交
322 323
}

324
function globExprsToRgGlobs(patterns: glob.IExpression): { globArgs: string[], siblingClauses: glob.IExpression } {
R
💄  
Rob Lourens 已提交
325
	const globArgs: string[] = [];
326
	let siblingClauses: glob.IExpression = null;
R
💄  
Rob Lourens 已提交
327 328 329 330 331 332 333 334 335 336 337
	Object.keys(patterns)
		.forEach(key => {
			const value = patterns[key];
			if (typeof value === 'boolean' && value) {
				// globs added to ripgrep don't match from the root by default, so add a /
				if (key.charAt(0) !== '*') {
					key = '/' + key;
				}

				globArgs.push(key);
			} else if (value && value.when) {
338 339 340 341 342
				if (!siblingClauses) {
					siblingClauses = {};
				}

				siblingClauses[key] = value;
R
💄  
Rob Lourens 已提交
343 344 345 346 347 348
			}
		});

	return { globArgs, siblingClauses };
}

349
function getRgArgs(config: IRawSearch): { args: string[], siblingClauses: glob.IExpression } {
R
Rob Lourens 已提交
350
	const args = ['--hidden', '--heading', '--line-number', '--color', 'ansi', '--colors', 'path:none', '--colors', 'line:none', '--colors', 'match:fg:red', '--colors', 'match:style:nobold'];
R
💄  
Rob Lourens 已提交
351 352 353 354 355 356 357 358 359
	args.push(config.contentPattern.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

	if (config.includePattern) {
		// I don't think includePattern can have siblingClauses
		globExprsToRgGlobs(config.includePattern).globArgs.forEach(globArg => {
			args.push('-g', globArg);
		});
	}

360
	let siblingClauses: glob.IExpression;
R
💄  
Rob Lourens 已提交
361 362 363 364 365 366 367 368 369 370 371
	if (config.excludePattern) {
		const rgGlobs = globExprsToRgGlobs(config.excludePattern);
		rgGlobs.globArgs
			.forEach(rgGlob => args.push('-g', `!${rgGlob}`));
		siblingClauses = rgGlobs.siblingClauses;
	}

	if (config.maxFilesize) {
		args.push('--max-filesize', config.maxFilesize + '');
	}

R
Rob Lourens 已提交
372
	if (config.disregardIgnoreFiles) {
R
Rob Lourens 已提交
373 374 375 376 377 378 379
		// Don't use .gitignore or .ignore
		args.push('--no-ignore');
	}

	// Follow symlinks
	args.push('--follow');

380
	// Set default encoding
R
Rob Lourens 已提交
381
	if (config.fileEncoding && config.fileEncoding !== 'utf8') {
382 383 384
		args.push('--encoding', encoding.toCanonicalName(config.fileEncoding));
	}

R
Rob Lourens 已提交
385
	let searchPatternAfterDoubleDashes: string;
386 387 388 389
	if (config.contentPattern.isWordMatch) {
		const regexp = strings.createRegExp(config.contentPattern.pattern, config.contentPattern.isRegExp, { wholeWord: config.contentPattern.isWordMatch });
		args.push('--regexp', regexp.source);
	} else if (config.contentPattern.isRegExp) {
R
💄  
Rob Lourens 已提交
390 391
		args.push('--regexp', config.contentPattern.pattern);
	} else {
392 393
		searchPatternAfterDoubleDashes = config.contentPattern.pattern;
		args.push('--fixed-strings');
R
💄  
Rob Lourens 已提交
394 395 396
	}

	// Folder to search
R
Rob Lourens 已提交
397 398 399 400 401 402 403 404
	args.push('--');

	if (searchPatternAfterDoubleDashes) {
		// Put the query after --, in case the query starts with a dash
		args.push(searchPatternAfterDoubleDashes);
	}

	args.push('./');
R
💄  
Rob Lourens 已提交
405 406 407

	return { args, siblingClauses };
}
408 409 410 411 412 413 414 415 416 417 418 419

function getSiblings(file: string): TPromise<string[]> {
	return new TPromise((resolve, reject) => {
		extfs.readdir(path.dirname(file), (error: Error, files: string[]) => {
			if (error) {
				reject(error);
			}

			resolve(files);
		});
	});
}