fileSearch.ts 17.5 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6 7
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

'use strict';

C
Christof Marti 已提交
8 9
import * as childProcess from 'child_process';
import {StringDecoder} from 'string_decoder';
E
Erich Gamma 已提交
10 11
import fs = require('fs');
import paths = require('path');
C
Christof Marti 已提交
12
import {Readable} from "stream";
E
Erich Gamma 已提交
13

14
import scorer = require('vs/base/common/scorer');
E
Erich Gamma 已提交
15
import arrays = require('vs/base/common/arrays');
C
Christof Marti 已提交
16
import platform = require('vs/base/common/platform');
E
Erich Gamma 已提交
17
import strings = require('vs/base/common/strings');
18
import types = require('vs/base/common/types');
E
Erich Gamma 已提交
19
import glob = require('vs/base/common/glob');
20
import {IProgress, IUncachedSearchStats} from 'vs/platform/search/common/search';
E
Erich Gamma 已提交
21 22 23

import extfs = require('vs/base/node/extfs');
import flow = require('vs/base/node/flow');
24
import {IRawFileMatch, ISerializedSearchComplete, IRawSearch, ISearchEngine} from './search';
E
Erich Gamma 已提交
25

C
Christof Marti 已提交
26 27 28
enum Traversal {
	Node = 1,
	MacFind,
C
Christof Marti 已提交
29 30
	WindowsDir,
	LinuxFind
C
Christof Marti 已提交
31 32 33 34 35 36 37 38 39 40 41 42
}

interface IDirectoryEntry {
	relativePath: string;
	basename: string;
}

interface IDirectoryTree {
	rootEntries: IDirectoryEntry[];
	pathToEntries: { [relativePath: string]: IDirectoryEntry[] };
}

E
Erich Gamma 已提交
43 44
export class FileWalker {
	private config: IRawSearch;
45
	private filePattern: string;
46
	private normalizedFilePatternLowercase: string;
C
Christof Marti 已提交
47 48
	private excludePattern: glob.ParsedExpression;
	private includePattern: glob.ParsedExpression;
E
Erich Gamma 已提交
49
	private maxResults: number;
50
	private maxFilesize: number;
E
Erich Gamma 已提交
51 52 53
	private isLimitHit: boolean;
	private resultCount: number;
	private isCanceled: boolean;
C
chrmarti 已提交
54 55 56
	private fileWalkStartTime: number;
	private directoriesWalked: number;
	private filesWalked: number;
C
Christof Marti 已提交
57 58 59 60 61
	private traversal: Traversal;
	private errors: string[];
	private cmdForkStartTime: number;
	private cmdForkResultTime: number;
	private cmdResultCount: number;
E
Erich Gamma 已提交
62 63 64 65 66

	private walkedPaths: { [path: string]: boolean; };

	constructor(config: IRawSearch) {
		this.config = config;
67
		this.filePattern = config.filePattern;
C
Christof Marti 已提交
68 69
		this.excludePattern = glob.parse(config.excludePattern);
		this.includePattern = config.includePattern && glob.parse(config.includePattern);
E
Erich Gamma 已提交
70
		this.maxResults = config.maxResults || null;
71
		this.maxFilesize = config.maxFilesize || null;
E
Erich Gamma 已提交
72
		this.walkedPaths = Object.create(null);
73 74
		this.resultCount = 0;
		this.isLimitHit = false;
C
chrmarti 已提交
75 76
		this.directoriesWalked = 0;
		this.filesWalked = 0;
C
Christof Marti 已提交
77 78
		this.traversal = Traversal.Node;
		this.errors = [];
B
Benjamin Pasero 已提交
79

80 81
		if (this.filePattern) {
			this.normalizedFilePatternLowercase = strings.stripWildcards(this.filePattern).toLowerCase();
B
Benjamin Pasero 已提交
82
		}
E
Erich Gamma 已提交
83 84 85 86 87 88
	}

	public cancel(): void {
		this.isCanceled = true;
	}

89
	public walk(rootFolders: string[], extraFiles: string[], onResult: (result: IRawFileMatch) => void, done: (error: Error, isLimitHit: boolean) => void): void {
C
chrmarti 已提交
90
		this.fileWalkStartTime = Date.now();
E
Erich Gamma 已提交
91

92
		// Support that the file pattern is a full path to a file that exists
93
		this.checkFilePatternAbsoluteMatch((exists, size) => {
94 95 96
			if (this.isCanceled) {
				return done(null, this.isLimitHit);
			}
E
Erich Gamma 已提交
97

98 99
			// Report result from file pattern if matching
			if (exists) {
100 101
				this.resultCount++;
				onResult({
C
Christof Marti 已提交
102
					path: this.filePattern,
103 104
					size
				});
105 106 107 108

				// Optimization: a match on an absolute path is a good result and we do not
				// continue walking the entire root paths array for other matches because
				// it is very unlikely that another file would match on the full absolute path
109
				return done(null, this.isLimitHit);
110
			}
E
Erich Gamma 已提交
111

112 113 114
			// For each extra file
			if (extraFiles) {
				extraFiles.forEach(extraFilePath => {
C
Christof Marti 已提交
115
					if (this.excludePattern(extraFilePath)) {
116
						return; // excluded
E
Erich Gamma 已提交
117 118
					}

119
					// File: Check for match on file pattern and include pattern
C
Christof Marti 已提交
120
					this.matchFile(onResult, null, extraFilePath /* no workspace relative path */);
121 122
				});
			}
123

C
Christof Marti 已提交
124
			let traverse = this.nodeJSTraversal;
C
Christof Marti 已提交
125 126 127 128 129 130 131
			if (!this.maxFilesize) {
				if (platform.isMacintosh) {
					this.traversal = Traversal.MacFind;
					traverse = this.macFindTraversal;
				} else if (platform.isWindows) {
					this.traversal = Traversal.WindowsDir;
					traverse = this.windowsDirTraversal;
C
Christof Marti 已提交
132 133 134
				} else if (platform.isLinux) {
					this.traversal = Traversal.LinuxFind;
					traverse = this.linuxFindTraversal;
C
Christof Marti 已提交
135 136 137 138 139 140
				}
			}

			const isNodeTraversal = traverse === this.nodeJSTraversal;
			if (!isNodeTraversal) {
				this.cmdForkStartTime = Date.now();
C
Christof Marti 已提交
141 142
			}

143
			// For each root folder
C
Christof Marti 已提交
144 145 146
			flow.parallel(rootFolders, (rootFolder, rootFolderDone: (err?: Error) => void) => {
				traverse.call(this, rootFolder, onResult, err => {
					if (err) {
C
Christof Marti 已提交
147
						if (isNodeTraversal) {
C
Christof Marti 已提交
148 149 150 151 152 153 154 155
							rootFolderDone(err);
						} else {
							// fallback
							this.errors.push(String(err));
							this.nodeJSTraversal(rootFolder, onResult, rootFolderDone);
						}
					} else {
						rootFolderDone();
156
					}
C
Christof Marti 已提交
157 158 159 160 161 162
				});
			}, (err, result) => {
				done(err ? err[0] : null, this.isLimitHit);
			});
		});
	}
163

C
Christof Marti 已提交
164 165
	private macFindTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
		const cmd = childProcess.spawn('find', ['-L', '.', '-type', 'f'], { cwd: rootFolder });
C
Christof Marti 已提交
166
		this.readStdout(cmd, 'utf8', (err: Error, stdout?: string) => {
C
Christof Marti 已提交
167 168 169 170
			if (err) {
				done(err);
				return;
			}
E
Erich Gamma 已提交
171

C
Christof Marti 已提交
172 173 174 175 176 177 178 179 180
			// Mac: uses NFD unicode form on disk, but we want NFC
			const relativeFiles = strings.normalizeNFC(stdout).split('\n./');
			relativeFiles[0] = relativeFiles[0].trim().substr(2);
			const n = relativeFiles.length;
			relativeFiles[n - 1] = relativeFiles[n - 1].trim();
			if (!relativeFiles[n - 1]) {
				relativeFiles.pop();
			}

C
Christof Marti 已提交
181
			this.matchFiles(rootFolder, relativeFiles, onResult);
C
Christof Marti 已提交
182

C
Christof Marti 已提交
183 184 185 186 187 188 189 190 191 192
			done();
		});
	}

	private windowsDirTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
		const cmd = childProcess.spawn('cmd', ['/U', '/c', 'dir', '/s', '/b', '/a-d'], { cwd: rootFolder });
		this.readStdout(cmd, 'ucs2', (err: Error, stdout?: string) => {
			if (err) {
				done(err);
				return;
C
Christof Marti 已提交
193 194
			}

C
Christof Marti 已提交
195 196 197 198 199 200 201 202 203
			const relativeFiles = stdout.split(`\r\n${rootFolder}\\`);
			relativeFiles[0] = relativeFiles[0].trim().substr(rootFolder.length + 1);
			const n = relativeFiles.length;
			relativeFiles[n - 1] = relativeFiles[n - 1].trim();
			if (!relativeFiles[n - 1]) {
				relativeFiles.pop();
			}

			this.matchFiles(rootFolder, relativeFiles, onResult);
C
Christof Marti 已提交
204 205 206 207 208

			done();
		});
	}

C
Christof Marti 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
	private linuxFindTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
		const cmd = childProcess.spawn('find', ['-L', '.', '-type', 'f'], { cwd: rootFolder });
		this.readStdout(cmd, 'utf8', (err: Error, stdout?: string) => {
			if (err) {
				done(err);
				return;
			}

			const relativeFiles = stdout.split('\n./');
			relativeFiles[0] = relativeFiles[0].trim().substr(2);
			const n = relativeFiles.length;
			relativeFiles[n - 1] = relativeFiles[n - 1].trim();
			if (!relativeFiles[n - 1]) {
				relativeFiles.pop();
			}

			this.matchFiles(rootFolder, relativeFiles, onResult);

			done();
		});
	}

C
Christof Marti 已提交
231
	private readStdout(cmd: childProcess.ChildProcess, encoding: string, cb: (err: Error, stdout?: string) => void): void {
C
Christof Marti 已提交
232 233
		let done = (err: Error, stdout?: string) => {
			done = () => {};
C
Christof Marti 已提交
234
			this.cmdForkResultTime = Date.now();
C
Christof Marti 已提交
235 236 237 238 239 240 241 242 243 244 245 246
			cb(err, stdout);
		};

		const stdout = this.collectData(cmd.stdout);
		const stderr = this.collectData(cmd.stderr);

		cmd.on('error', err => {
			done(err);
		});

		cmd.on('close', code => {
			if (code !== 0) {
C
Christof Marti 已提交
247
				done(new Error(`find failed with error code ${code}: ${this.decodeData(stderr, encoding)}`));
C
Christof Marti 已提交
248
			} else {
C
Christof Marti 已提交
249
				done(null, this.decodeData(stdout, encoding));
C
Christof Marti 已提交
250 251 252 253 254 255 256 257 258 259 260 261
			}
		});
	}

	private collectData(stream: Readable): Buffer[] {
		const buffers = [];
		stream.on('data', data => {
			buffers.push(data);
		});
		return buffers;
	}

C
Christof Marti 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
	private decodeData(buffers: Buffer[], encoding: string): string {
		const decoder = new StringDecoder(encoding);
		return buffers.map(buffer => decoder.write(buffer)).join('');
	}

	private matchFiles(rootFolder: string, relativeFiles: string[], onResult: (result: IRawFileMatch) => void) {
		this.cmdResultCount = relativeFiles.length;

		// Support relative paths to files from a root resource (ignores excludes)
		if (relativeFiles.indexOf(this.filePattern) !== -1) {
			this.matchFile(onResult, rootFolder, this.filePattern);
		}

		const tree = this.buildDirectoryTree(relativeFiles);
		this.matchDirectoryTree(rootFolder, tree, onResult);
C
Christof Marti 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	}

	private buildDirectoryTree(relativeFilePaths: string[]): IDirectoryTree {
		const tree: IDirectoryTree = {
			rootEntries: [],
			pathToEntries: Object.create(null)
		};
		const {pathToEntries} = tree;
		pathToEntries['.'] = tree.rootEntries;
		relativeFilePaths.forEach(function add(relativePath: string) {
			const basename = paths.basename(relativePath);
			const dirname = paths.dirname(relativePath);
			let entries = pathToEntries[dirname];
			if (!entries) {
				entries = pathToEntries[dirname] = [];
				add(dirname);
			}
			entries.push({
				relativePath,
				basename
			});
		});
		return tree;
	}

	private matchDirectoryTree(rootFolder: string, { rootEntries, pathToEntries }: IDirectoryTree, onResult: (result: IRawFileMatch) => void) {
		const self = this;
		const excludePattern = this.excludePattern;
		const filePattern = this.filePattern;
		function matchDirectory(entries: IDirectoryEntry[]) {
			self.directoriesWalked++;
			for (let i = 0, n = entries.length; i < n; i++) {
				const entry = entries[i];
C
Christof Marti 已提交
310
				const relativePath = entry.relativePath;
C
Christof Marti 已提交
311 312 313 314 315 316 317 318

				// Check exclude pattern
				// If the user searches for the exact file name, we adjust the glob matching
				// to ignore filtering by siblings because the user seems to know what she
				// is searching for and we want to include the result in that case anyway
				if (excludePattern(relativePath, () => filePattern !== entry.basename ? entries.map(entry => entry.basename) : [])) {
					continue;
				}
E
Erich Gamma 已提交
319

C
Christof Marti 已提交
320 321 322 323 324 325 326 327 328
				const sub = pathToEntries[relativePath];
				if (sub) {
					matchDirectory(sub);
				} else {
					self.filesWalked++;
					if (relativePath === filePattern) {
						continue; // ignore file if its path matches with the file pattern because that is already matched above
					}

C
Christof Marti 已提交
329
					self.matchFile(onResult, rootFolder, relativePath);
C
Christof Marti 已提交
330 331 332 333 334 335
				}
			};
		}
		matchDirectory(rootEntries);
	}

C
Christof Marti 已提交
336
	private nodeJSTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
C
Christof Marti 已提交
337
		this.directoriesWalked++;
C
Christof Marti 已提交
338
		extfs.readdir(rootFolder, (error: Error, files: string[]) => {
C
Christof Marti 已提交
339 340 341 342 343
			if (error || this.isCanceled || this.isLimitHit) {
				return done();
			}

			// Support relative paths to files from a root resource (ignores excludes)
C
Christof Marti 已提交
344
			return this.checkFilePatternRelativeMatch(rootFolder, (match, size) => {
C
Christof Marti 已提交
345 346 347 348 349 350 351 352
				if (this.isCanceled || this.isLimitHit) {
					return done();
				}

				// Report result from file pattern if matching
				if (match) {
					this.resultCount++;
					onResult({
C
Christof Marti 已提交
353 354
						base: rootFolder,
						path: this.filePattern,
C
Christof Marti 已提交
355
						size
356
					});
C
Christof Marti 已提交
357 358
				}

C
Christof Marti 已提交
359
				return this.doWalk(rootFolder, '', files, onResult, done);
E
Erich Gamma 已提交
360
			});
361 362 363
		});
	}

364
	public getStats(): IUncachedSearchStats {
C
chrmarti 已提交
365
		return {
366
			fromCache: false,
C
Christof Marti 已提交
367 368
			traversal: Traversal[this.traversal],
			errors: this.errors,
C
chrmarti 已提交
369 370 371
			fileWalkStartTime: this.fileWalkStartTime,
			fileWalkResultTime: Date.now(),
			directoriesWalked: this.directoriesWalked,
372
			filesWalked: this.filesWalked,
C
Christof Marti 已提交
373 374 375 376
			resultCount: this.resultCount,
			cmdForkStartTime: this.cmdForkStartTime,
			cmdForkResultTime: this.cmdForkResultTime,
			cmdResultCount: this.cmdResultCount
C
chrmarti 已提交
377 378 379
		};
	}

380
	private checkFilePatternAbsoluteMatch(clb: (exists: boolean, size?: number) => void): void {
381 382 383 384
		if (!this.filePattern || !paths.isAbsolute(this.filePattern)) {
			return clb(false);
		}

385
		return fs.stat(this.filePattern, (error, stat) => {
386
			return clb(!error && !stat.isDirectory(), stat && stat.size); // only existing files
E
Erich Gamma 已提交
387 388 389
		});
	}

390
	private checkFilePatternRelativeMatch(basePath: string, clb: (matchPath: string, size?: number) => void): void {
B
polish  
Benjamin Pasero 已提交
391
		if (!this.filePattern || paths.isAbsolute(this.filePattern)) {
392 393 394 395 396
			return clb(null);
		}

		const absolutePath = paths.join(basePath, this.filePattern);

397
		return fs.stat(absolutePath, (error, stat) => {
398
			return clb(!error && !stat.isDirectory() ? absolutePath : null, stat && stat.size); // only existing files
399 400 401
		});
	}

C
Christof Marti 已提交
402
	private doWalk(rootFolder: string, relativeParentPath: string, files: string[], onResult: (result: IRawFileMatch) => void, done: (error: Error) => void): void {
E
Erich Gamma 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415

		// Execute tasks on each file in parallel to optimize throughput
		flow.parallel(files, (file: string, clb: (error: Error) => void): void => {

			// Check canceled
			if (this.isCanceled || this.isLimitHit) {
				return clb(null);
			}

			// If the user searches for the exact file name, we adjust the glob matching
			// to ignore filtering by siblings because the user seems to know what she
			// is searching for and we want to include the result in that case anyway
			let siblings = files;
416
			if (this.config.filePattern === file) {
E
Erich Gamma 已提交
417 418 419 420
				siblings = [];
			}

			// Check exclude pattern
C
Christof Marti 已提交
421 422
			let currentRelativePath = relativeParentPath ? [relativeParentPath, file].join(paths.sep) : file;
			if (this.excludePattern(currentRelativePath, () => siblings)) {
E
Erich Gamma 已提交
423 424 425
				return clb(null);
			}

426
			// Use lstat to detect links
C
Christof Marti 已提交
427
			let currentAbsolutePath = [rootFolder, currentRelativePath].join(paths.sep);
428
			fs.lstat(currentAbsolutePath, (error, lstat) => {
429 430 431
				if (error || this.isCanceled || this.isLimitHit) {
					return clb(null);
				}
E
Erich Gamma 已提交
432

433
				// If the path is a link, we must instead use fs.stat() to find out if the
434 435 436 437 438 439
				// link is a directory or not because lstat will always return the stat of
				// the link which is always a file.
				this.statLinkIfNeeded(currentAbsolutePath, lstat, (error, stat) => {
					if (error || this.isCanceled || this.isLimitHit) {
						return clb(null);
					}
E
Erich Gamma 已提交
440

441 442
					// Directory: Follow directories
					if (stat.isDirectory()) {
C
chrmarti 已提交
443
						this.directoriesWalked++;
444

445 446
						// to really prevent loops with links we need to resolve the real path of them
						return this.realPathIfNeeded(currentAbsolutePath, lstat, (error, realpath) => {
447 448 449 450
							if (error || this.isCanceled || this.isLimitHit) {
								return clb(null);
							}

451 452 453
							if (this.walkedPaths[realpath]) {
								return clb(null); // escape when there are cycles (can happen with symlinks)
							}
E
Erich Gamma 已提交
454

455 456 457 458 459 460 461 462
							this.walkedPaths[realpath] = true; // remember as walked

							// Continue walking
							return extfs.readdir(currentAbsolutePath, (error: Error, children: string[]): void => {
								if (error || this.isCanceled || this.isLimitHit) {
									return clb(null);
								}

C
Christof Marti 已提交
463
								this.doWalk(rootFolder, currentRelativePath, children, onResult, clb);
464 465
							});
						});
466
					}
E
Erich Gamma 已提交
467

468 469
					// File: Check for match on file pattern and include pattern
					else {
C
chrmarti 已提交
470
						this.filesWalked++;
C
Christof Marti 已提交
471
						if (currentRelativePath === this.filePattern) {
472 473
							return clb(null); // ignore file if its path matches with the file pattern because checkFilePatternRelativeMatch() takes care of those
						}
E
Erich Gamma 已提交
474

475 476 477 478
						if (this.maxFilesize && types.isNumber(stat.size) && stat.size > this.maxFilesize) {
							return clb(null); // ignore file if max file size is hit
						}

C
Christof Marti 已提交
479
						this.matchFile(onResult, rootFolder, currentRelativePath, stat.size);
480 481 482 483 484
					}

					// Unwind
					return clb(null);
				});
E
Erich Gamma 已提交
485 486 487 488 489 490
			});
		}, (error: Error[]): void => {
			if (error) {
				error = arrays.coalesce(error); // find any error by removing null values first
			}

C
Christof Marti 已提交
491
			return done(error && error.length > 0 ? error[0] : null);
E
Erich Gamma 已提交
492 493 494
		});
	}

C
Christof Marti 已提交
495 496
	private matchFile(onResult: (result: IRawFileMatch) => void, base: string, path: string, size?: number): void {
		if (this.isFilePatternMatch(path) && (!this.includePattern || this.includePattern(path))) {
497 498 499 500 501 502 503 504
			this.resultCount++;

			if (this.maxResults && this.resultCount > this.maxResults) {
				this.isLimitHit = true;
			}

			if (!this.isLimitHit) {
				onResult({
C
Christof Marti 已提交
505 506
					base,
					path,
507 508
					size
				});
509 510 511 512
			}
		}
	}

B
polish  
Benjamin Pasero 已提交
513
	private isFilePatternMatch(path: string): boolean {
514 515 516

		// Check for search pattern
		if (this.filePattern) {
517 518 519 520
			if (this.filePattern === '*') {
				return true; // support the all-matching wildcard
			}

521
			return scorer.matches(path, this.normalizedFilePatternLowercase);
522 523 524 525 526 527
		}

		// No patterns means we match all
		return true;
	}

528
	private statLinkIfNeeded(path: string, lstat: fs.Stats, clb: (error: Error, stat: fs.Stats) => void): void {
529
		if (lstat.isSymbolicLink()) {
530
			return fs.stat(path, clb); // stat the target the link points to
531 532 533 534 535
		}

		return clb(null, lstat); // not a link, so the stat is already ok for us
	}

536
	private realPathIfNeeded(path: string, lstat: fs.Stats, clb: (error: Error, realpath?: string) => void): void {
537 538 539 540 541
		if (lstat.isSymbolicLink()) {
			return fs.realpath(path, (error, realpath) => {
				if (error) {
					return clb(error);
				}
E
Erich Gamma 已提交
542

543 544 545
				return clb(null, realpath);
			});
		}
E
Erich Gamma 已提交
546

547
		return clb(null, path);
E
Erich Gamma 已提交
548 549 550
	}
}

551
export class Engine implements ISearchEngine<IRawFileMatch> {
552 553
	private rootFolders: string[];
	private extraFiles: string[];
E
Erich Gamma 已提交
554 555 556
	private walker: FileWalker;

	constructor(config: IRawSearch) {
557 558 559
		this.rootFolders = config.rootFolders;
		this.extraFiles = config.extraFiles;

E
Erich Gamma 已提交
560 561 562
		this.walker = new FileWalker(config);
	}

563
	public search(onResult: (result: IRawFileMatch) => void, onProgress: (progress: IProgress) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
C
chrmarti 已提交
564 565 566 567 568 569
		this.walker.walk(this.rootFolders, this.extraFiles, onResult, (err: Error, isLimitHit: boolean) => {
			done(err, {
				limitHit: isLimitHit,
				stats: this.walker.getStats()
			});
		});
E
Erich Gamma 已提交
570 571 572 573 574
	}

	public cancel(): void {
		this.walker.cancel();
	}
575
}