textSearch.ts 9.2 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

'use strict';

import strings = require('vs/base/common/strings');

import fs = require('fs');

import baseMime = require('vs/base/common/mime');
import {ILineMatch, IProgress} from 'vs/platform/search/common/search';
B
Benjamin Pasero 已提交
14
import {detectMimeAndEncodingFromBuffer} from 'vs/base/node/mime';
E
Erich Gamma 已提交
15
import {FileWalker} from 'vs/workbench/services/search/node/fileSearch';
B
wip  
Benjamin Pasero 已提交
16
import {UTF16le, UTF16be, UTF8, UTF8_with_bom, encodingExists, decode} from 'vs/base/node/encoding';
E
Erich Gamma 已提交
17 18
import {ISerializedFileMatch, IRawSearch, ISearchEngine} from 'vs/workbench/services/search/node/rawSearchService';

19 20 21 22 23
interface ReadLinesOptions {
	bufferLength: number;
	encoding: string;
}

E
Erich Gamma 已提交
24
export class Engine implements ISearchEngine {
25 26 27

	private static PROGRESS_FLUSH_CHUNK_SIZE = 50; // optimization: number of files to process before emitting progress event

28 29
	private rootFolders: string[];
	private extraFiles: string[];
E
Erich Gamma 已提交
30 31 32 33 34
	private maxResults: number;
	private walker: FileWalker;
	private contentPattern: RegExp;
	private isCanceled: boolean;
	private isDone: boolean;
35 36
	private total: number;
	private worked: number;
37
	private progressed: number;
E
Erich Gamma 已提交
38 39 40
	private walkerError: Error;
	private walkerIsDone: boolean;
	private fileEncoding: string;
41
	private limitReached: boolean;
E
Erich Gamma 已提交
42 43

	constructor(config: IRawSearch, walker: FileWalker) {
44 45
		this.rootFolders = config.rootFolders;
		this.extraFiles = config.extraFiles;
E
Erich Gamma 已提交
46
		this.walker = walker;
47
		this.contentPattern = strings.createRegExp(config.contentPattern.pattern, config.contentPattern.isRegExp, config.contentPattern.isCaseSensitive, config.contentPattern.isWordMatch, true);
E
Erich Gamma 已提交
48
		this.isCanceled = false;
49
		this.limitReached = false;
E
Erich Gamma 已提交
50 51
		this.maxResults = config.maxResults;
		this.worked = 0;
52
		this.progressed = 0;
E
Erich Gamma 已提交
53
		this.total = 0;
B
wip  
Benjamin Pasero 已提交
54
		this.fileEncoding = encodingExists(config.fileEncoding) ? config.fileEncoding : UTF8;
E
Erich Gamma 已提交
55 56 57 58 59 60 61 62 63 64
	}

	public cancel(): void {
		this.isCanceled = true;
		this.walker.cancel();
	}

	public search(onResult: (match: ISerializedFileMatch) => void, onProgress: (progress: IProgress) => void, done: (error: Error, isLimitHit: boolean) => void): void {
		let resultCounter = 0;

65 66 67 68 69 70 71
		let progress = () => {
			this.progressed++;
			if (this.progressed % Engine.PROGRESS_FLUSH_CHUNK_SIZE === 0) {
				onProgress({ total: this.total, worked: this.worked }); // buffer progress in chunks to reduce pressure
			}
		};

E
Erich Gamma 已提交
72 73 74
		let unwind = (processed: number) => {
			this.worked += processed;

75 76
			// Emit progress() unless we got canceled or hit the limit
			if (processed && !this.isDone && !this.isCanceled && !this.limitReached) {
77
				progress();
E
Erich Gamma 已提交
78 79 80 81 82
			}

			// Emit done()
			if (this.worked === this.total && this.walkerIsDone && !this.isDone) {
				this.isDone = true;
83
				done(this.walkerError, this.limitReached);
E
Erich Gamma 已提交
84 85 86
			}
		};

87
		// Walk over the file system
88 89 90
		this.walker.walk(this.rootFolders, this.extraFiles, (result, size) => {
			size = size ||  1;
			this.total += size;
E
Erich Gamma 已提交
91 92

			// If the result is empty or we have reached the limit or we are canceled, ignore it
93
			if (this.limitReached || this.isCanceled) {
94
				return unwind(size);
E
Erich Gamma 已提交
95 96
			}

97
			// Indicate progress to the outside
98
			progress();
99

100
			let fileMatch: FileMatch = null;
E
Erich Gamma 已提交
101

102
			let doneCallback = (error?: Error) => {
103
				if (!error && !this.isCanceled && fileMatch && !fileMatch.isEmpty()) {
104 105
					onResult(fileMatch.serialize());
				}
106

107
				return unwind(size);
108
			};
E
Erich Gamma 已提交
109

110
			let perLineCallback = (line: string, lineNumber: number) => {
111
				if (this.limitReached || this.isCanceled) {
112 113
					return; // return early if canceled or limit reached
				}
E
Erich Gamma 已提交
114

115 116
				let lineMatch: LineMatch = null;
				let match = this.contentPattern.exec(line);
E
Erich Gamma 已提交
117

118
				// Record all matches into file result
119
				while (match !== null && match[0].length > 0 && !this.limitReached && !this.isCanceled) {
120 121
					resultCounter++;
					if (this.maxResults && resultCounter >= this.maxResults) {
122
						this.limitReached = true;
E
Erich Gamma 已提交
123 124
					}

125 126
					if (fileMatch === null) {
						fileMatch = new FileMatch(result.path);
E
Erich Gamma 已提交
127 128
					}

129 130 131 132
					if (lineMatch === null) {
						lineMatch = new LineMatch(line, lineNumber);
						fileMatch.addMatch(lineMatch);
					}
E
Erich Gamma 已提交
133

134
					lineMatch.addMatch(match.index, match[0].length);
E
Erich Gamma 已提交
135

136 137 138
					match = this.contentPattern.exec(line);
				}
			};
E
Erich Gamma 已提交
139

140
			// Read lines buffered to support large files
141
			this.readlinesAsync(result.path, perLineCallback, { bufferLength: 8096, encoding: this.fileEncoding }, doneCallback);
E
Erich Gamma 已提交
142 143 144 145 146 147 148
		}, (error, isLimitHit) => {
			this.walkerIsDone = true;
			this.walkerError = error;
			unwind(0 /* walker is done, indicate this back to our handler to be able to unwind */);
		});
	}

149 150 151 152
	private readlinesAsync(filename: string, perLineCallback: (line: string, lineNumber: number) => void, options: ReadLinesOptions, callback: (error: Error) => void): void {
		fs.open(filename, 'r', null, (error: Error, fd: number) => {
			if (error) {
				return callback(error);
153 154
			}

155 156 157 158 159 160
			let buffer = new Buffer(options.bufferLength);
			let pos: number;
			let i: number;
			let line = '';
			let lineNumber = 0;
			let lastBufferHadTraillingCR = false;
E
Erich Gamma 已提交
161

162
			const outer = this;
E
Erich Gamma 已提交
163

B
wip  
Benjamin Pasero 已提交
164 165
			function decodeBuffer(buffer: NodeBuffer): string {
				if (options.encoding === UTF8 || options.encoding === UTF8_with_bom) {
166
					return buffer.toString(); // much faster to use built in toString() when encoding is default
E
Erich Gamma 已提交
167 168
				}

B
wip  
Benjamin Pasero 已提交
169
				return decode(buffer, options.encoding);
170
			}
E
Erich Gamma 已提交
171

172
			function lineFinished(offset: number): void {
B
wip  
Benjamin Pasero 已提交
173
				line += decodeBuffer(buffer.slice(pos, i + offset));
174 175 176 177 178
				perLineCallback(line, lineNumber);
				line = '';
				lineNumber++;
				pos = i + offset;
			}
179

180 181
			function readFile(isFirstRead: boolean, clb: (error: Error) => void): void {
				if (outer.limitReached || outer.isCanceled) {
B
Benjamin Pasero 已提交
182
					return clb(null); // return early if canceled or limit reached
E
Erich Gamma 已提交
183 184
				}

185 186
				fs.read(fd, buffer, 0, buffer.length, null, (error: Error, bytesRead: number, buffer: NodeBuffer) => {
					if (error || bytesRead === 0 || outer.limitReached || outer.isCanceled) {
B
Benjamin Pasero 已提交
187
						return clb(error); // return early if canceled or limit reached or no more bytes to read
E
Erich Gamma 已提交
188 189
					}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
					pos = 0;
					i = 0;

					// Detect encoding and mime when this is the beginning of the file
					if (isFirstRead) {
						let mimeAndEncoding = detectMimeAndEncodingFromBuffer(buffer, bytesRead);
						if (mimeAndEncoding.mimes[mimeAndEncoding.mimes.length - 1] !== baseMime.MIME_TEXT) {
							return clb(null); // skip files that seem binary
						}

						// Check for BOM offset
						switch (mimeAndEncoding.encoding) {
							case UTF8:
								pos = i = 3;
								options.encoding = UTF8;
								break;
							case UTF16be:
								pos = i = 2;
								options.encoding = UTF16be;
								break;
							case UTF16le:
								pos = i = 2;
								options.encoding = UTF16le;
								break;
						}
					}
E
Erich Gamma 已提交
216

217 218 219
					if (lastBufferHadTraillingCR) {
						if (buffer[i] === 0x0a) { // LF (Line Feed)
							lineFinished(1);
E
Erich Gamma 已提交
220 221
							i++;
						} else {
222
							lineFinished(0);
E
Erich Gamma 已提交
223
						}
224 225

						lastBufferHadTraillingCR = false;
E
Erich Gamma 已提交
226 227
					}

228 229 230 231 232 233 234 235 236 237 238 239 240 241
					for (; i < bytesRead; ++i) {
						if (buffer[i] === 0x0a) { // LF (Line Feed)
							lineFinished(1);
						} else if (buffer[i] === 0x0d) { // CR (Carriage Return)
							if (i + 1 === bytesRead) {
								lastBufferHadTraillingCR = true;
							} else if (buffer[i + 1] === 0x0a) { // LF (Line Feed)
								lineFinished(2);
								i++;
							} else {
								lineFinished(1);
							}
						}
					}
E
Erich Gamma 已提交
242

B
wip  
Benjamin Pasero 已提交
243
					line += decodeBuffer(buffer.slice(pos, bytesRead));
E
Erich Gamma 已提交
244

245 246
					readFile(false /* isFirstRead */, clb); // Continue reading
				});
E
Erich Gamma 已提交
247 248
			}

249 250 251 252
			readFile(true /* isFirstRead */, (error: Error) => {
				if (error) {
					return callback(error);
				}
E
Erich Gamma 已提交
253

254 255 256 257 258 259 260
				if (line.length) {
					perLineCallback(line, lineNumber); // handle last line
				}

				fs.close(fd, (error: Error) => {
					callback(error);
				});
E
Erich Gamma 已提交
261 262
			});
		});
263
	}
E
Erich Gamma 已提交
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
}

class FileMatch implements ISerializedFileMatch {
	public path: string;
	public lineMatches: LineMatch[];

	constructor(path: string) {
		this.path = path;
		this.lineMatches = [];
	}

	public addMatch(lineMatch: LineMatch): void {
		this.lineMatches.push(lineMatch);
	}

	public isEmpty(): boolean {
		return this.lineMatches.length === 0;
	}

	public serialize(): ISerializedFileMatch {
		let lineMatches: ILineMatch[] = [];

		for (let i = 0; i < this.lineMatches.length; i++) {
			lineMatches.push(this.lineMatches[i].serialize());
		}

		return {
			path: this.path,
			lineMatches: lineMatches
		};
	}
}

class LineMatch implements ILineMatch {
	public preview: string;
	public lineNumber: number;
	public offsetAndLengths: number[][];

	constructor(preview: string, lineNumber: number) {
		this.preview = preview.replace(/(\r|\n)*$/, '');
		this.lineNumber = lineNumber;
		this.offsetAndLengths = [];
	}

	public getText(): string {
		return this.preview;
	}

	public getLineNumber(): number {
		return this.lineNumber;
	}

	public addMatch(offset: number, length: number): void {
		this.offsetAndLengths.push([offset, length]);
	}

	public serialize(): ILineMatch {
		let result = {
			preview: this.preview,
			lineNumber: this.lineNumber,
			offsetAndLengths: this.offsetAndLengths
		};

		return result;
	}
}