filters.ts 18.7 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

B
Benjamin Pasero 已提交
7
import strings = require('vs/base/common/strings');
8
import { BoundedMap } from 'vs/base/common/map';
J
Johannes Rieken 已提交
9
import { CharCode } from 'vs/base/common/charCode';
E
Erich Gamma 已提交
10 11 12

export interface IFilter {
	// Returns null if word doesn't match.
B
Benjamin Pasero 已提交
13
	(word: string, wordToMatchAgainst: string): IMatch[];
E
Erich Gamma 已提交
14 15 16
}

export interface IMatch {
B
Benjamin Pasero 已提交
17 18
	start: number;
	end: number;
E
Erich Gamma 已提交
19 20 21 22 23
}

// Combined filters

/**
24
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
25 26 27 28
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
29
export function or(...filter: IFilter[]): IFilter {
30
	return function (word: string, wordToMatchAgainst: string): IMatch[] {
B
Benjamin Pasero 已提交
31 32 33
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (match) {
E
Erich Gamma 已提交
34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

/**
42
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
43 44 45
 * of filters with an and. The combines matches are
 * returned if *all* filters match.
 */
B
Benjamin Pasero 已提交
46
export function and(...filter: IFilter[]): IFilter {
47
	return function (word: string, wordToMatchAgainst: string): IMatch[] {
B
Benjamin Pasero 已提交
48 49 50 51
		let result: IMatch[] = [];
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (!match) {
E
Erich Gamma 已提交
52 53 54 55 56 57 58 59 60 61
				return null;
			}
			result = result.concat(match);
		}
		return result;
	};
}

// Prefix

62 63
export const matchesStrictPrefix: IFilter = _matchesPrefix.bind(undefined, false);
export const matchesPrefix: IFilter = _matchesPrefix.bind(undefined, true);
E
Erich Gamma 已提交
64

B
Benjamin Pasero 已提交
65
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] {
66
	if (!wordToMatchAgainst || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
67 68
		return null;
	}
69

70 71 72 73 74 75
	let matches: boolean;
	if (ignoreCase) {
		matches = strings.beginsWithIgnoreCase(wordToMatchAgainst, word);
	} else {
		matches = wordToMatchAgainst.indexOf(word) === 0;
	}
76

77
	if (!matches) {
78
		return null;
E
Erich Gamma 已提交
79
	}
80

E
Erich Gamma 已提交
81 82 83 84 85
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

B
Benjamin Pasero 已提交
86 87
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] {
	let index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
88 89 90 91 92 93 94 95 96
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

B
Benjamin Pasero 已提交
97
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] {
E
Erich Gamma 已提交
98 99 100
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

B
Benjamin Pasero 已提交
101
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] {
E
Erich Gamma 已提交
102 103 104 105 106 107
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
B
Benjamin Pasero 已提交
108
			let result: IMatch[] = null;
E
Erich Gamma 已提交
109 110 111 112 113 114 115 116 117 118 119
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
120
function isLower(code: number): boolean {
121
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
122 123
}

B
Benjamin Pasero 已提交
124
function isUpper(code: number): boolean {
125
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
126 127
}

B
Benjamin Pasero 已提交
128
function isNumber(code: number): boolean {
129
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
130 131
}

B
Benjamin Pasero 已提交
132
function isWhitespace(code: number): boolean {
133 134 135 136 137 138
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
139 140
}

B
Benjamin Pasero 已提交
141
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
142 143 144
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
145
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
146 147 148 149 150 151 152 153 154 155 156
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
157 158 159
	for (let i = start; i < camelCaseWord.length; i++) {
		let c = camelCaseWord.charCodeAt(i);
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
160 161 162 163 164 165
			return i;
		}
	}
	return camelCaseWord.length;
}

B
Benjamin Pasero 已提交
166
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] {
E
Erich Gamma 已提交
167 168 169 170 171 172 173
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
B
Benjamin Pasero 已提交
174
		let result: IMatch[] = null;
B
Benjamin Pasero 已提交
175
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
176 177 178 179 180 181 182 183 184
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
185 186 187 188 189 190 191
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
192 193
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
194
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
195
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
196

B
Benjamin Pasero 已提交
197
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
198 199
		code = word.charCodeAt(i);

200 201 202 203
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
204 205
	}

B
Benjamin Pasero 已提交
206 207 208
	let upperPercent = upper / word.length;
	let lowerPercent = lower / word.length;
	let alphaPercent = alpha / word.length;
209
	let numericPercent = numeric / word.length;
E
Erich Gamma 已提交
210

J
Joao Moreno 已提交
211 212 213 214
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
215
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
216 217 218 219 220
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
221
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
222 223 224 225 226
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
227
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
228

B
Benjamin Pasero 已提交
229
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
230 231
		code = word.charCodeAt(i);

232 233 234
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
235 236 237 238 239 240 241 242 243
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

B
Benjamin Pasero 已提交
244
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] {
245
	if (!camelCaseWord || camelCaseWord.length === 0) {
E
Erich Gamma 已提交
246 247 248 249 250 251 252
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
253
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
254 255 256
		return null;
	}

J
Joao Moreno 已提交
257 258 259 260 261 262 263 264 265 266
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

B
Benjamin Pasero 已提交
267 268
	let result: IMatch[] = null;
	let i = 0;
E
Erich Gamma 已提交
269 270 271 272 273 274 275 276

	while (i < camelCaseWord.length && (result = _matchesCamelCase(word.toLowerCase(), camelCaseWord, 0, i)) === null) {
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

277
// Matches beginning of words supporting non-ASCII languages
S
#17646:  
Sandeep Somavarapu 已提交
278 279
// If `contiguous` is true then matches word with beginnings of the words in the target. E.g. "pul" will match "Git: Pull"
// Otherwise also matches sub string of the word with beginnings of the words in the target. E.g. "gp" or "g p" will match "Git: Pull"
280 281
// Useful in cases where the target is words (e.g. command labels)

S
#17646:  
Sandeep Somavarapu 已提交
282
export function matchesWords(word: string, target: string, contiguous: boolean = false): IMatch[] {
283
	if (!target || target.length === 0) {
284 285 286 287 288 289
		return null;
	}

	let result: IMatch[] = null;
	let i = 0;

S
#17646:  
Sandeep Somavarapu 已提交
290
	while (i < target.length && (result = _matchesWords(word.toLowerCase(), target, 0, i, contiguous)) === null) {
291 292 293 294 295 296
		i = nextWord(target, i + 1);
	}

	return result;
}

S
#17646:  
Sandeep Somavarapu 已提交
297
function _matchesWords(word: string, target: string, i: number, j: number, contiguous: boolean): IMatch[] {
298 299 300 301 302 303 304
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
	} else if (word[i] !== target[j].toLowerCase()) {
		return null;
	} else {
B
Benjamin Pasero 已提交
305
		let result: IMatch[] = null;
306
		let nextWordIndex = j + 1;
S
#17646:  
Sandeep Somavarapu 已提交
307 308 309 310 311 312
		result = _matchesWords(word, target, i + 1, j + 1, contiguous);
		if (!contiguous) {
			while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
				result = _matchesWords(word, target, i + 1, nextWordIndex, contiguous);
				nextWordIndex++;
			}
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
		let c = word.charCodeAt(i);
		if (isWhitespace(c) || (i > 0 && isWhitespace(word.charCodeAt(i - 1)))) {
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
328 329
// Fuzzy

B
Benjamin Pasero 已提交
330 331 332 333 334
export enum SubstringMatching {
	Contiguous,
	Separate
}

335
export const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
336
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
337
const fuzzyRegExpCache = new BoundedMap<RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
338

339
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] {
340 341 342
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
343 344

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
345
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
346
	if (!regexp) {
B
Benjamin Pasero 已提交
347
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
348
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
349 350 351
	}

	// RegExp Filter
B
Benjamin Pasero 已提交
352
	let match: RegExpExecArray = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
353
	if (match) {
B
Benjamin Pasero 已提交
354
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
355 356 357
	}

	// Default Filter
358
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
359
}
J
Johannes Rieken 已提交
360

361 362
export function createMatches(position: number[]): IMatch[] {
	let ret: IMatch[] = [];
363 364 365
	if (!position) {
		return ret;
	}
366 367 368 369 370 371 372 373 374 375 376 377
	let last: IMatch;
	for (const pos of position) {
		if (last && last.end === pos) {
			last.end += 1;
		} else {
			last = { start: pos, end: pos + 1 };
			ret.push(last);
		}
	}
	return ret;
}

378 379 380 381 382 383 384 385 386 387 388 389 390
function initTable() {
	const table: number[][] = [];
	const row: number[] = [0];
	for (let i = 1; i <= 100; i++) {
		row.push(-i);
	}
	for (let i = 0; i < 100; i++) {
		let thisRow = row.slice(0);
		thisRow[0] = -i;
		table.push(thisRow);
	}
	return table;
}
391

392
const _table = initTable();
J
Johannes Rieken 已提交
393
const _scores = initTable();
J
Johannes Rieken 已提交
394
const _arrows = <Arrow[][]>initTable();
395
const _debug = false;
396

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
function printTable(table: number[][], pattern: string, patternLen: number, word: string, wordLen: number): string {
	function pad(s: string, n: number, pad = ' ') {
		while (s.length < n) {
			s = pad + s;
		}
		return s;
	}
	let ret = ` |   |${word.split('').map(c => pad(c, 3)).join('|')}\n`;

	for (let i = 0; i <= patternLen; i++) {
		if (i === 0) {
			ret += ' |';
		} else {
			ret += `${pattern[i - 1]}|`;
		}
		ret += table[i].slice(0, wordLen + 1).map(n => pad(n.toString(), 3)).join('|') + '\n';
	}
	return ret;
}

417 418
const _seps: { [ch: string]: boolean } = Object.create(null);
_seps['_'] = true;
419
_seps['-'] = true;
420 421 422 423
_seps['.'] = true;
_seps[' '] = true;
_seps['/'] = true;
_seps['\\'] = true;
424 425
_seps['\''] = true;
_seps['"'] = true;
J
Johannes Rieken 已提交
426
_seps[':'] = true;
427

428 429 430 431
const _ws: { [ch: string]: boolean } = Object.create(null);
_ws[' '] = true;
_ws['\t'] = true;

J
Johannes Rieken 已提交
432 433
const enum Arrow { Top = 0b1, Diag = 0b10, Left = 0b100 }

434
export function fuzzyScore(pattern: string, word: string): [number, number[]] {
435

436
	const patternLen = pattern.length > 100 ? 100 : pattern.length;
437 438
	const wordLen = word.length > 100 ? 100 : word.length;

439 440 441 442 443 444 445 446 447 448 449 450 451 452
	// Check for leading whitespace in the pattern and
	// start matching just after that position. This is
	// like `pattern = pattern.rtrim()` but doesn't create
	// a new string
	let patternStartPos = 0;
	for (const ch of pattern) {
		if (_ws[ch]) {
			patternStartPos += 1;
		} else {
			break;
		}
	}

	if (patternLen === patternStartPos) {
453
		return [-100, []];
454 455
	}

456 457 458 459 460 461
	if (patternLen > wordLen) {
		return undefined;
	}

	const lowPattern = pattern.toLowerCase();
	const lowWord = word.toLowerCase();
462

463 464 465
	let patternPos = patternStartPos;
	let wordPos = 0;

J
Johannes Rieken 已提交
466 467 468
	// Run a simple check if the characters of pattern occur
	// (in order) at all in word. If that isn't the case we
	// stop because no match will be possible
469 470 471
	while (patternPos < patternLen && wordPos < wordLen) {
		if (lowPattern[patternPos] === lowWord[wordPos]) {
			patternPos += 1;
472
		}
473
		wordPos += 1;
474
	}
475
	if (patternPos !== patternLen) {
476 477
		return undefined;
	}
478

J
Johannes Rieken 已提交
479
	// There will be a mach, fill in tables
480
	for (patternPos = patternStartPos + 1; patternPos <= patternLen; patternPos++) {
481 482 483

		let lastLowWordChar = '';

484
		for (wordPos = 1; wordPos <= wordLen; wordPos++) {
485 486

			let score = -1;
487 488
			let lowWordChar = lowWord[wordPos - 1];
			if (lowPattern[patternPos - 1] === lowWordChar) {
J
Johannes Rieken 已提交
489 490
				if (wordPos === (patternPos - patternStartPos)) {
					// common prefix: `foobar <-> foobaz`
491
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
492 493 494 495
						score = 7;
					} else {
						score = 5;
					}
496
				} else if (lowWordChar !== word[wordPos - 1]) {
J
Johannes Rieken 已提交
497
					// hitting upper-case: `foo <-> forOthers`
498
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
J
Johannes Rieken 已提交
499 500 501 502
						score = 7;
					} else {
						score = 5;
					}
503
				} else if (_seps[lastLowWordChar]) {
J
Johannes Rieken 已提交
504
					// post separator: `foo <-> bar_foo`
J
Johannes Rieken 已提交
505 506
					score = 5;

507 508 509 510 511
				} else {
					score = 1;
				}
			}

512
			_scores[patternPos][wordPos] = score;
J
Johannes Rieken 已提交
513

514 515 516
			let diag = _table[patternPos - 1][wordPos - 1] + (score > 1 ? 1 : score);
			let top = _table[patternPos - 1][wordPos] + -1;
			let left = _table[patternPos][wordPos - 1] + -1;
517 518 519

			if (left >= top) {
				// left or diag
J
Johannes Rieken 已提交
520
				if (left > diag) {
521 522
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left;
J
Johannes Rieken 已提交
523
				} else if (left === diag) {
524 525
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left | Arrow.Diag;
526
				} else {
527 528
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
529 530 531
				}
			} else {
				// top or diag
J
Johannes Rieken 已提交
532
				if (top > diag) {
533 534
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top;
J
Johannes Rieken 已提交
535
				} else if (top === diag) {
536 537
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top | Arrow.Diag;
J
Johannes Rieken 已提交
538
				} else {
539 540
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
541 542 543 544 545 546 547 548 549 550
				}
			}

			lastLowWordChar = lowWordChar;
		}
	}

	if (_debug) {
		console.log(printTable(_table, pattern, patternLen, word, wordLen));
		console.log(printTable(_arrows, pattern, patternLen, word, wordLen));
J
Johannes Rieken 已提交
551
		console.log(printTable(_scores, pattern, patternLen, word, wordLen));
552 553
	}

554 555 556
	// _bucket is an array of [PrefixArray] we use to keep
	// track of scores and matches. After calling `_findAllMatches`
	// the best match (if available) is the first item in the array
557
	_bucket.length = 0;
558
	_topScore = -100;
559
	_patternStartPos = patternStartPos;
560
	_findAllMatches(patternLen, wordLen, 0, new LazyArray(), false);
J
Johannes Rieken 已提交
561

562
	if (_bucket.length === 0) {
J
Johannes Rieken 已提交
563 564 565
		return undefined;
	}

566
	return [_topScore, _bucket[0].toArray()];
J
Johannes Rieken 已提交
567 568
}

569 570
let _bucket: LazyArray[] = [];
let _topScore: number = 0;
571
let _patternStartPos: number = 0;
572

573
function _findAllMatches(patternPos: number, wordPos: number, total: number, matches: LazyArray, lastMatched: boolean): void {
574 575

	if (_bucket.length >= 10 || total < -25) {
576 577
		// stop when having already 10 results, or
		// when a potential alignment as already 5 gaps
578 579 580
		return;
	}

581 582
	let simpleMatchCount = 0;

583
	while (patternPos > _patternStartPos && wordPos > 0) {
584

J
Johannes Rieken 已提交
585 586 587
		let score = _scores[patternPos][wordPos];
		let arrow = _arrows[patternPos][wordPos];

588
		if (arrow === Arrow.Left) {
J
Johannes Rieken 已提交
589 590
			// left
			wordPos -= 1;
591
			if (lastMatched) {
592
				total -= 5; // new gap penalty
593
			} else if (!matches.isEmpty()) {
594
				total -= 1; // gap penalty after first match
595 596
			}
			lastMatched = false;
597
			simpleMatchCount = 0;
J
Johannes Rieken 已提交
598 599 600 601 602

		} else if (arrow & Arrow.Diag) {

			if (arrow & Arrow.Left) {
				// left
603 604
				_findAllMatches(
					patternPos,
605
					wordPos - 1,
606 607
					!matches.isEmpty() ? total - 1 : total, // gap penalty after first match
					matches.slice(),
608
					lastMatched
609
				);
610
			}
J
Johannes Rieken 已提交
611 612

			// diag
613
			total += score;
J
Johannes Rieken 已提交
614 615 616
			patternPos -= 1;
			wordPos -= 1;
			matches.unshift(wordPos);
617
			lastMatched = true;
J
Johannes Rieken 已提交
618

J
Johannes Rieken 已提交
619 620 621
			// count simple matches and boost a row of
			// simple matches when they yield in a
			// strong match.
622 623
			if (score === 1) {
				simpleMatchCount += 1;
J
Johannes Rieken 已提交
624 625 626 627 628 629 630

				if (patternPos === _patternStartPos) {
					// when the first match is a weak
					// match we discard it
					return undefined;
				}

631
			} else {
J
Johannes Rieken 已提交
632
				// boost
633
				total += 1 + (simpleMatchCount * (score - 1));
634 635 636
				simpleMatchCount = 0;
			}

J
Johannes Rieken 已提交
637
		} else {
638
			return undefined;
639 640 641
		}
	}

J
Johannes Rieken 已提交
642 643
	total -= wordPos >= 3 ? 9 : wordPos * 3; // late start penalty

644 645 646 647 648 649 650 651
	// dynamically keep track of the current top score
	// and insert the current best score at head, the rest at tail
	if (total > _topScore) {
		_topScore = total;
		_bucket.unshift(matches);
	} else {
		_bucket.push(matches);
	}
652
}
653

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
class LazyArray {

	private _parent: LazyArray;
	private _parentLen: number;
	private _data: number[];

	isEmpty(): boolean {
		return !this._data && (!this._parent || this._parent.isEmpty());
	}

	unshift(n: number) {
		if (!this._data) {
			this._data = [n];
		} else {
			this._data.unshift(n);
		}
	}

	slice(): LazyArray {
		const ret = new LazyArray();
		ret._parent = this;
		ret._parentLen = this._data ? this._data.length : 0;
		return ret;
	}

	toArray(): number[] {
		if (!this._data) {
			return this._parent.toArray();
		}
		const bucket: number[][] = [];
		let element = <LazyArray>this;
		while (element) {
			if (element._parent && element._parent._data) {
				bucket.push(element._parent._data.slice(element._parent._data.length - element._parentLen));
			}
			element = element._parent;
		}
		return Array.prototype.concat.apply(this._data, bucket);
	}
}
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714

export function nextTypoPermutation(pattern: string, patternPos: number) {

	if (patternPos + 1 >= pattern.length) {
		return undefined;
	}

	return pattern.slice(0, patternPos)
		+ pattern[patternPos + 1]
		+ pattern[patternPos]
		+ pattern.slice(patternPos + 2);
}

export function fuzzyScoreGraceful(pattern: string, word: string): [number, number[]] {
	let ret = fuzzyScore(pattern, word);
	for (let patternPos = 1; patternPos < pattern.length - 1 && !ret; patternPos++) {
		let pattern2 = nextTypoPermutation(pattern, patternPos);
		ret = fuzzyScore(pattern2, word);
	}
	return ret;
}