fileSearch.ts 16.8 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6 7
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

'use strict';

C
Christof Marti 已提交
8 9
import * as childProcess from 'child_process';
import {StringDecoder} from 'string_decoder';
E
Erich Gamma 已提交
10 11
import fs = require('fs');
import paths = require('path');
C
Christof Marti 已提交
12
import {Readable} from "stream";
E
Erich Gamma 已提交
13

14
import scorer = require('vs/base/common/scorer');
E
Erich Gamma 已提交
15
import arrays = require('vs/base/common/arrays');
C
Christof Marti 已提交
16
import platform = require('vs/base/common/platform');
E
Erich Gamma 已提交
17
import strings = require('vs/base/common/strings');
18
import types = require('vs/base/common/types');
E
Erich Gamma 已提交
19
import glob = require('vs/base/common/glob');
20
import {IProgress, IUncachedSearchStats} from 'vs/platform/search/common/search';
E
Erich Gamma 已提交
21 22 23

import extfs = require('vs/base/node/extfs');
import flow = require('vs/base/node/flow');
24
import {IRawFileMatch, ISerializedSearchComplete, IRawSearch, ISearchEngine} from './search';
E
Erich Gamma 已提交
25

C
Christof Marti 已提交
26 27 28
enum Traversal {
	Node = 1,
	MacFind,
C
Christof Marti 已提交
29
	WindowsDir
C
Christof Marti 已提交
30 31 32 33 34 35 36 37 38 39 40 41
}

interface IDirectoryEntry {
	relativePath: string;
	basename: string;
}

interface IDirectoryTree {
	rootEntries: IDirectoryEntry[];
	pathToEntries: { [relativePath: string]: IDirectoryEntry[] };
}

E
Erich Gamma 已提交
42 43
export class FileWalker {
	private config: IRawSearch;
44
	private filePattern: string;
45
	private normalizedFilePatternLowercase: string;
C
Christof Marti 已提交
46 47
	private excludePattern: glob.ParsedExpression;
	private includePattern: glob.ParsedExpression;
E
Erich Gamma 已提交
48
	private maxResults: number;
49
	private maxFilesize: number;
E
Erich Gamma 已提交
50 51 52
	private isLimitHit: boolean;
	private resultCount: number;
	private isCanceled: boolean;
C
chrmarti 已提交
53 54 55
	private fileWalkStartTime: number;
	private directoriesWalked: number;
	private filesWalked: number;
C
Christof Marti 已提交
56 57 58 59 60
	private traversal: Traversal;
	private errors: string[];
	private cmdForkStartTime: number;
	private cmdForkResultTime: number;
	private cmdResultCount: number;
E
Erich Gamma 已提交
61 62 63 64 65

	private walkedPaths: { [path: string]: boolean; };

	constructor(config: IRawSearch) {
		this.config = config;
66
		this.filePattern = config.filePattern;
C
Christof Marti 已提交
67 68
		this.excludePattern = glob.parse(config.excludePattern);
		this.includePattern = config.includePattern && glob.parse(config.includePattern);
E
Erich Gamma 已提交
69
		this.maxResults = config.maxResults || null;
70
		this.maxFilesize = config.maxFilesize || null;
E
Erich Gamma 已提交
71
		this.walkedPaths = Object.create(null);
72 73
		this.resultCount = 0;
		this.isLimitHit = false;
C
chrmarti 已提交
74 75
		this.directoriesWalked = 0;
		this.filesWalked = 0;
C
Christof Marti 已提交
76 77
		this.traversal = Traversal.Node;
		this.errors = [];
B
Benjamin Pasero 已提交
78

79 80
		if (this.filePattern) {
			this.normalizedFilePatternLowercase = strings.stripWildcards(this.filePattern).toLowerCase();
B
Benjamin Pasero 已提交
81
		}
E
Erich Gamma 已提交
82 83 84 85 86 87
	}

	public cancel(): void {
		this.isCanceled = true;
	}

88
	public walk(rootFolders: string[], extraFiles: string[], onResult: (result: IRawFileMatch) => void, done: (error: Error, isLimitHit: boolean) => void): void {
C
chrmarti 已提交
89
		this.fileWalkStartTime = Date.now();
E
Erich Gamma 已提交
90

91
		// Support that the file pattern is a full path to a file that exists
92
		this.checkFilePatternAbsoluteMatch((exists, size) => {
93 94 95
			if (this.isCanceled) {
				return done(null, this.isLimitHit);
			}
E
Erich Gamma 已提交
96

97 98
			// Report result from file pattern if matching
			if (exists) {
99 100
				this.resultCount++;
				onResult({
C
Christof Marti 已提交
101
					path: this.filePattern,
102 103
					size
				});
104 105 106 107

				// Optimization: a match on an absolute path is a good result and we do not
				// continue walking the entire root paths array for other matches because
				// it is very unlikely that another file would match on the full absolute path
108
				return done(null, this.isLimitHit);
109
			}
E
Erich Gamma 已提交
110

111 112 113
			// For each extra file
			if (extraFiles) {
				extraFiles.forEach(extraFilePath => {
C
Christof Marti 已提交
114
					if (this.excludePattern(extraFilePath)) {
115
						return; // excluded
E
Erich Gamma 已提交
116 117
					}

118
					// File: Check for match on file pattern and include pattern
C
Christof Marti 已提交
119
					this.matchFile(onResult, null, extraFilePath /* no workspace relative path */);
120 121
				});
			}
122

C
Christof Marti 已提交
123
			let traverse = this.nodeJSTraversal;
C
Christof Marti 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136
			if (!this.maxFilesize) {
				if (platform.isMacintosh) {
					this.traversal = Traversal.MacFind;
					traverse = this.macFindTraversal;
				} else if (platform.isWindows) {
					this.traversal = Traversal.WindowsDir;
					traverse = this.windowsDirTraversal;
				}
			}

			const isNodeTraversal = traverse === this.nodeJSTraversal;
			if (!isNodeTraversal) {
				this.cmdForkStartTime = Date.now();
C
Christof Marti 已提交
137 138
			}

139
			// For each root folder
C
Christof Marti 已提交
140 141 142
			flow.parallel(rootFolders, (rootFolder, rootFolderDone: (err?: Error) => void) => {
				traverse.call(this, rootFolder, onResult, err => {
					if (err) {
C
Christof Marti 已提交
143
						if (isNodeTraversal) {
C
Christof Marti 已提交
144 145 146 147 148 149 150 151
							rootFolderDone(err);
						} else {
							// fallback
							this.errors.push(String(err));
							this.nodeJSTraversal(rootFolder, onResult, rootFolderDone);
						}
					} else {
						rootFolderDone();
152
					}
C
Christof Marti 已提交
153 154 155 156 157 158
				});
			}, (err, result) => {
				done(err ? err[0] : null, this.isLimitHit);
			});
		});
	}
159

C
Christof Marti 已提交
160 161
	private macFindTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
		const cmd = childProcess.spawn('find', ['-L', '.', '-type', 'f'], { cwd: rootFolder });
C
Christof Marti 已提交
162
		this.readStdout(cmd, 'utf8', (err: Error, stdout?: string) => {
C
Christof Marti 已提交
163 164 165 166
			if (err) {
				done(err);
				return;
			}
E
Erich Gamma 已提交
167

C
Christof Marti 已提交
168 169 170 171 172 173 174 175 176
			// Mac: uses NFD unicode form on disk, but we want NFC
			const relativeFiles = strings.normalizeNFC(stdout).split('\n./');
			relativeFiles[0] = relativeFiles[0].trim().substr(2);
			const n = relativeFiles.length;
			relativeFiles[n - 1] = relativeFiles[n - 1].trim();
			if (!relativeFiles[n - 1]) {
				relativeFiles.pop();
			}

C
Christof Marti 已提交
177
			this.matchFiles(rootFolder, relativeFiles, onResult);
C
Christof Marti 已提交
178

C
Christof Marti 已提交
179 180 181 182 183 184 185 186 187 188
			done();
		});
	}

	private windowsDirTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
		const cmd = childProcess.spawn('cmd', ['/U', '/c', 'dir', '/s', '/b', '/a-d'], { cwd: rootFolder });
		this.readStdout(cmd, 'ucs2', (err: Error, stdout?: string) => {
			if (err) {
				done(err);
				return;
C
Christof Marti 已提交
189 190
			}

C
Christof Marti 已提交
191 192 193 194 195 196 197 198 199
			const relativeFiles = stdout.split(`\r\n${rootFolder}\\`);
			relativeFiles[0] = relativeFiles[0].trim().substr(rootFolder.length + 1);
			const n = relativeFiles.length;
			relativeFiles[n - 1] = relativeFiles[n - 1].trim();
			if (!relativeFiles[n - 1]) {
				relativeFiles.pop();
			}

			this.matchFiles(rootFolder, relativeFiles, onResult);
C
Christof Marti 已提交
200 201 202 203 204

			done();
		});
	}

C
Christof Marti 已提交
205
	private readStdout(cmd: childProcess.ChildProcess, encoding: string, cb: (err: Error, stdout?: string) => void): void {
C
Christof Marti 已提交
206 207
		let done = (err: Error, stdout?: string) => {
			done = () => {};
C
Christof Marti 已提交
208
			this.cmdForkResultTime = Date.now();
C
Christof Marti 已提交
209 210 211 212 213 214 215 216 217 218 219 220
			cb(err, stdout);
		};

		const stdout = this.collectData(cmd.stdout);
		const stderr = this.collectData(cmd.stderr);

		cmd.on('error', err => {
			done(err);
		});

		cmd.on('close', code => {
			if (code !== 0) {
C
Christof Marti 已提交
221
				done(new Error(`find failed with error code ${code}: ${this.decodeData(stderr, encoding)}`));
C
Christof Marti 已提交
222
			} else {
C
Christof Marti 已提交
223
				done(null, this.decodeData(stdout, encoding));
C
Christof Marti 已提交
224 225 226 227 228 229 230 231 232 233 234 235
			}
		});
	}

	private collectData(stream: Readable): Buffer[] {
		const buffers = [];
		stream.on('data', data => {
			buffers.push(data);
		});
		return buffers;
	}

C
Christof Marti 已提交
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
	private decodeData(buffers: Buffer[], encoding: string): string {
		const decoder = new StringDecoder(encoding);
		return buffers.map(buffer => decoder.write(buffer)).join('');
	}

	private matchFiles(rootFolder: string, relativeFiles: string[], onResult: (result: IRawFileMatch) => void) {
		this.cmdResultCount = relativeFiles.length;

		// Support relative paths to files from a root resource (ignores excludes)
		if (relativeFiles.indexOf(this.filePattern) !== -1) {
			this.matchFile(onResult, rootFolder, this.filePattern);
		}

		const tree = this.buildDirectoryTree(relativeFiles);
		this.matchDirectoryTree(rootFolder, tree, onResult);
C
Christof Marti 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
	}

	private buildDirectoryTree(relativeFilePaths: string[]): IDirectoryTree {
		const tree: IDirectoryTree = {
			rootEntries: [],
			pathToEntries: Object.create(null)
		};
		const {pathToEntries} = tree;
		pathToEntries['.'] = tree.rootEntries;
		relativeFilePaths.forEach(function add(relativePath: string) {
			const basename = paths.basename(relativePath);
			const dirname = paths.dirname(relativePath);
			let entries = pathToEntries[dirname];
			if (!entries) {
				entries = pathToEntries[dirname] = [];
				add(dirname);
			}
			entries.push({
				relativePath,
				basename
			});
		});
		return tree;
	}

	private matchDirectoryTree(rootFolder: string, { rootEntries, pathToEntries }: IDirectoryTree, onResult: (result: IRawFileMatch) => void) {
		const self = this;
		const excludePattern = this.excludePattern;
		const filePattern = this.filePattern;
		function matchDirectory(entries: IDirectoryEntry[]) {
			self.directoriesWalked++;
			for (let i = 0, n = entries.length; i < n; i++) {
				const entry = entries[i];
C
Christof Marti 已提交
284
				const relativePath = entry.relativePath;
C
Christof Marti 已提交
285 286 287 288 289 290 291 292

				// Check exclude pattern
				// If the user searches for the exact file name, we adjust the glob matching
				// to ignore filtering by siblings because the user seems to know what she
				// is searching for and we want to include the result in that case anyway
				if (excludePattern(relativePath, () => filePattern !== entry.basename ? entries.map(entry => entry.basename) : [])) {
					continue;
				}
E
Erich Gamma 已提交
293

C
Christof Marti 已提交
294 295 296 297 298 299 300 301 302
				const sub = pathToEntries[relativePath];
				if (sub) {
					matchDirectory(sub);
				} else {
					self.filesWalked++;
					if (relativePath === filePattern) {
						continue; // ignore file if its path matches with the file pattern because that is already matched above
					}

C
Christof Marti 已提交
303
					self.matchFile(onResult, rootFolder, relativePath);
C
Christof Marti 已提交
304 305 306 307 308 309
				}
			};
		}
		matchDirectory(rootEntries);
	}

C
Christof Marti 已提交
310
	private nodeJSTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void {
C
Christof Marti 已提交
311
		this.directoriesWalked++;
C
Christof Marti 已提交
312
		extfs.readdir(rootFolder, (error: Error, files: string[]) => {
C
Christof Marti 已提交
313 314 315 316 317
			if (error || this.isCanceled || this.isLimitHit) {
				return done();
			}

			// Support relative paths to files from a root resource (ignores excludes)
C
Christof Marti 已提交
318
			return this.checkFilePatternRelativeMatch(rootFolder, (match, size) => {
C
Christof Marti 已提交
319 320 321 322 323 324 325 326
				if (this.isCanceled || this.isLimitHit) {
					return done();
				}

				// Report result from file pattern if matching
				if (match) {
					this.resultCount++;
					onResult({
C
Christof Marti 已提交
327 328
						base: rootFolder,
						path: this.filePattern,
C
Christof Marti 已提交
329
						size
330
					});
C
Christof Marti 已提交
331 332
				}

C
Christof Marti 已提交
333
				return this.doWalk(rootFolder, '', files, onResult, done);
E
Erich Gamma 已提交
334
			});
335 336 337
		});
	}

338
	public getStats(): IUncachedSearchStats {
C
chrmarti 已提交
339
		return {
340
			fromCache: false,
C
Christof Marti 已提交
341 342
			traversal: Traversal[this.traversal],
			errors: this.errors,
C
chrmarti 已提交
343 344 345
			fileWalkStartTime: this.fileWalkStartTime,
			fileWalkResultTime: Date.now(),
			directoriesWalked: this.directoriesWalked,
346
			filesWalked: this.filesWalked,
C
Christof Marti 已提交
347 348 349 350
			resultCount: this.resultCount,
			cmdForkStartTime: this.cmdForkStartTime,
			cmdForkResultTime: this.cmdForkResultTime,
			cmdResultCount: this.cmdResultCount
C
chrmarti 已提交
351 352 353
		};
	}

354
	private checkFilePatternAbsoluteMatch(clb: (exists: boolean, size?: number) => void): void {
355 356 357 358
		if (!this.filePattern || !paths.isAbsolute(this.filePattern)) {
			return clb(false);
		}

359
		return fs.stat(this.filePattern, (error, stat) => {
360
			return clb(!error && !stat.isDirectory(), stat && stat.size); // only existing files
E
Erich Gamma 已提交
361 362 363
		});
	}

364
	private checkFilePatternRelativeMatch(basePath: string, clb: (matchPath: string, size?: number) => void): void {
B
polish  
Benjamin Pasero 已提交
365
		if (!this.filePattern || paths.isAbsolute(this.filePattern)) {
366 367 368 369 370
			return clb(null);
		}

		const absolutePath = paths.join(basePath, this.filePattern);

371
		return fs.stat(absolutePath, (error, stat) => {
372
			return clb(!error && !stat.isDirectory() ? absolutePath : null, stat && stat.size); // only existing files
373 374 375
		});
	}

C
Christof Marti 已提交
376
	private doWalk(rootFolder: string, relativeParentPath: string, files: string[], onResult: (result: IRawFileMatch) => void, done: (error: Error) => void): void {
E
Erich Gamma 已提交
377 378 379 380 381 382 383 384 385 386 387 388 389

		// Execute tasks on each file in parallel to optimize throughput
		flow.parallel(files, (file: string, clb: (error: Error) => void): void => {

			// Check canceled
			if (this.isCanceled || this.isLimitHit) {
				return clb(null);
			}

			// If the user searches for the exact file name, we adjust the glob matching
			// to ignore filtering by siblings because the user seems to know what she
			// is searching for and we want to include the result in that case anyway
			let siblings = files;
390
			if (this.config.filePattern === file) {
E
Erich Gamma 已提交
391 392 393 394
				siblings = [];
			}

			// Check exclude pattern
C
Christof Marti 已提交
395 396
			let currentRelativePath = relativeParentPath ? [relativeParentPath, file].join(paths.sep) : file;
			if (this.excludePattern(currentRelativePath, () => siblings)) {
E
Erich Gamma 已提交
397 398 399
				return clb(null);
			}

400
			// Use lstat to detect links
C
Christof Marti 已提交
401
			let currentAbsolutePath = [rootFolder, currentRelativePath].join(paths.sep);
402
			fs.lstat(currentAbsolutePath, (error, lstat) => {
403 404 405
				if (error || this.isCanceled || this.isLimitHit) {
					return clb(null);
				}
E
Erich Gamma 已提交
406

407
				// If the path is a link, we must instead use fs.stat() to find out if the
408 409 410 411 412 413
				// link is a directory or not because lstat will always return the stat of
				// the link which is always a file.
				this.statLinkIfNeeded(currentAbsolutePath, lstat, (error, stat) => {
					if (error || this.isCanceled || this.isLimitHit) {
						return clb(null);
					}
E
Erich Gamma 已提交
414

415 416
					// Directory: Follow directories
					if (stat.isDirectory()) {
C
chrmarti 已提交
417
						this.directoriesWalked++;
418

419 420
						// to really prevent loops with links we need to resolve the real path of them
						return this.realPathIfNeeded(currentAbsolutePath, lstat, (error, realpath) => {
421 422 423 424
							if (error || this.isCanceled || this.isLimitHit) {
								return clb(null);
							}

425 426 427
							if (this.walkedPaths[realpath]) {
								return clb(null); // escape when there are cycles (can happen with symlinks)
							}
E
Erich Gamma 已提交
428

429 430 431 432 433 434 435 436
							this.walkedPaths[realpath] = true; // remember as walked

							// Continue walking
							return extfs.readdir(currentAbsolutePath, (error: Error, children: string[]): void => {
								if (error || this.isCanceled || this.isLimitHit) {
									return clb(null);
								}

C
Christof Marti 已提交
437
								this.doWalk(rootFolder, currentRelativePath, children, onResult, clb);
438 439
							});
						});
440
					}
E
Erich Gamma 已提交
441

442 443
					// File: Check for match on file pattern and include pattern
					else {
C
chrmarti 已提交
444
						this.filesWalked++;
C
Christof Marti 已提交
445
						if (currentRelativePath === this.filePattern) {
446 447
							return clb(null); // ignore file if its path matches with the file pattern because checkFilePatternRelativeMatch() takes care of those
						}
E
Erich Gamma 已提交
448

449 450 451 452
						if (this.maxFilesize && types.isNumber(stat.size) && stat.size > this.maxFilesize) {
							return clb(null); // ignore file if max file size is hit
						}

C
Christof Marti 已提交
453
						this.matchFile(onResult, rootFolder, currentRelativePath, stat.size);
454 455 456 457 458
					}

					// Unwind
					return clb(null);
				});
E
Erich Gamma 已提交
459 460 461 462 463 464
			});
		}, (error: Error[]): void => {
			if (error) {
				error = arrays.coalesce(error); // find any error by removing null values first
			}

C
Christof Marti 已提交
465
			return done(error && error.length > 0 ? error[0] : null);
E
Erich Gamma 已提交
466 467 468
		});
	}

C
Christof Marti 已提交
469 470
	private matchFile(onResult: (result: IRawFileMatch) => void, base: string, path: string, size?: number): void {
		if (this.isFilePatternMatch(path) && (!this.includePattern || this.includePattern(path))) {
471 472 473 474 475 476 477 478
			this.resultCount++;

			if (this.maxResults && this.resultCount > this.maxResults) {
				this.isLimitHit = true;
			}

			if (!this.isLimitHit) {
				onResult({
C
Christof Marti 已提交
479 480
					base,
					path,
481 482
					size
				});
483 484 485 486
			}
		}
	}

B
polish  
Benjamin Pasero 已提交
487
	private isFilePatternMatch(path: string): boolean {
488 489 490

		// Check for search pattern
		if (this.filePattern) {
491 492 493 494
			if (this.filePattern === '*') {
				return true; // support the all-matching wildcard
			}

495
			return scorer.matches(path, this.normalizedFilePatternLowercase);
496 497 498 499 500 501
		}

		// No patterns means we match all
		return true;
	}

502
	private statLinkIfNeeded(path: string, lstat: fs.Stats, clb: (error: Error, stat: fs.Stats) => void): void {
503
		if (lstat.isSymbolicLink()) {
504
			return fs.stat(path, clb); // stat the target the link points to
505 506 507 508 509
		}

		return clb(null, lstat); // not a link, so the stat is already ok for us
	}

510
	private realPathIfNeeded(path: string, lstat: fs.Stats, clb: (error: Error, realpath?: string) => void): void {
511 512 513 514 515
		if (lstat.isSymbolicLink()) {
			return fs.realpath(path, (error, realpath) => {
				if (error) {
					return clb(error);
				}
E
Erich Gamma 已提交
516

517 518 519
				return clb(null, realpath);
			});
		}
E
Erich Gamma 已提交
520

521
		return clb(null, path);
E
Erich Gamma 已提交
522 523 524
	}
}

525
export class Engine implements ISearchEngine<IRawFileMatch> {
526 527
	private rootFolders: string[];
	private extraFiles: string[];
E
Erich Gamma 已提交
528 529 530
	private walker: FileWalker;

	constructor(config: IRawSearch) {
531 532 533
		this.rootFolders = config.rootFolders;
		this.extraFiles = config.extraFiles;

E
Erich Gamma 已提交
534 535 536
		this.walker = new FileWalker(config);
	}

537
	public search(onResult: (result: IRawFileMatch) => void, onProgress: (progress: IProgress) => void, done: (error: Error, complete: ISerializedSearchComplete) => void): void {
C
chrmarti 已提交
538 539 540 541 542 543
		this.walker.walk(this.rootFolders, this.extraFiles, onResult, (err: Error, isLimitHit: boolean) => {
			done(err, {
				limitHit: isLimitHit,
				stats: this.walker.getStats()
			});
		});
E
Erich Gamma 已提交
544 545 546 547 548
	}

	public cancel(): void {
		this.walker.cancel();
	}
549
}