filters.ts 21.7 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

J
Johannes Rieken 已提交
6
import { CharCode } from 'vs/base/common/charCode';
7 8
import { LRUCache } from 'vs/base/common/map';
import * as strings from 'vs/base/common/strings';
E
Erich Gamma 已提交
9 10 11

export interface IFilter {
	// Returns null if word doesn't match.
12
	(word: string, wordToMatchAgainst: string): IMatch[] | null;
E
Erich Gamma 已提交
13 14 15
}

export interface IMatch {
B
Benjamin Pasero 已提交
16 17
	start: number;
	end: number;
E
Erich Gamma 已提交
18 19 20 21 22
}

// Combined filters

/**
23
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
24 25 26 27
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
28
export function or(...filter: IFilter[]): IFilter {
29
	return function (word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
30 31 32
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (match) {
E
Erich Gamma 已提交
33 34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

// Prefix

42 43
export const matchesStrictPrefix: IFilter = _matchesPrefix.bind(undefined, false);
export const matchesPrefix: IFilter = _matchesPrefix.bind(undefined, true);
E
Erich Gamma 已提交
44

45
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] | null {
46
	if (!wordToMatchAgainst || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
47 48
		return null;
	}
49

50 51
	let matches: boolean;
	if (ignoreCase) {
52
		matches = strings.startsWithIgnoreCase(wordToMatchAgainst, word);
53 54 55
	} else {
		matches = wordToMatchAgainst.indexOf(word) === 0;
	}
56

57
	if (!matches) {
58
		return null;
E
Erich Gamma 已提交
59
	}
60

E
Erich Gamma 已提交
61 62 63 64 65
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

66
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
67
	let index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
68 69 70 71 72 73 74 75 76
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

77
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
E
Erich Gamma 已提交
78 79 80
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

81
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
82 83 84 85 86 87
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
88
			let result: IMatch[] | null = null;
E
Erich Gamma 已提交
89 90 91
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
92
			return null;
E
Erich Gamma 已提交
93 94 95 96 97 98 99 100
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
101
function isLower(code: number): boolean {
102
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
103 104
}

105
export function isUpper(code: number): boolean {
106
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
107 108
}

B
Benjamin Pasero 已提交
109
function isNumber(code: number): boolean {
110
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
111 112
}

B
Benjamin Pasero 已提交
113
function isWhitespace(code: number): boolean {
114 115 116 117 118 119
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
120 121
}

B
Benjamin Pasero 已提交
122
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
123 124 125
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
126
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
127 128 129 130 131 132 133 134 135 136 137
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
138 139 140
	for (let i = start; i < camelCaseWord.length; i++) {
		let c = camelCaseWord.charCodeAt(i);
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
141 142 143 144 145 146
			return i;
		}
	}
	return camelCaseWord.length;
}

147
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
148 149 150 151 152 153 154
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
155
		let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
156
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
157 158 159 160 161 162 163 164 165
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
166 167 168 169 170 171 172
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
173 174
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
175
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
176
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
177

B
Benjamin Pasero 已提交
178
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
179 180
		code = word.charCodeAt(i);

181 182 183 184
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
185 186
	}

B
Benjamin Pasero 已提交
187 188 189
	let upperPercent = upper / word.length;
	let lowerPercent = lower / word.length;
	let alphaPercent = alpha / word.length;
190
	let numericPercent = numeric / word.length;
E
Erich Gamma 已提交
191

J
Joao Moreno 已提交
192 193 194 195
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
196
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
197 198 199 200 201
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
202
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
203 204 205 206 207
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
208
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
209

B
Benjamin Pasero 已提交
210
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
211 212
		code = word.charCodeAt(i);

213 214 215
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
216 217 218 219 220 221 222 223 224
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

225
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] | null {
226 227 228 229 230 231 232
	if (!camelCaseWord) {
		return null;
	}

	camelCaseWord = camelCaseWord.trim();

	if (camelCaseWord.length === 0) {
E
Erich Gamma 已提交
233 234 235 236 237 238 239
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
240
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
241 242 243
		return null;
	}

J
Joao Moreno 已提交
244 245 246 247 248 249 250 251 252 253
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

254
	let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
255
	let i = 0;
E
Erich Gamma 已提交
256

257 258
	word = word.toLowerCase();
	while (i < camelCaseWord.length && (result = _matchesCamelCase(word, camelCaseWord, 0, i)) === null) {
E
Erich Gamma 已提交
259 260 261 262 263 264
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

265
// Matches beginning of words supporting non-ASCII languages
S
#17646:  
Sandeep Somavarapu 已提交
266 267
// If `contiguous` is true then matches word with beginnings of the words in the target. E.g. "pul" will match "Git: Pull"
// Otherwise also matches sub string of the word with beginnings of the words in the target. E.g. "gp" or "g p" will match "Git: Pull"
268 269
// Useful in cases where the target is words (e.g. command labels)

270
export function matchesWords(word: string, target: string, contiguous: boolean = false): IMatch[] | null {
271
	if (!target || target.length === 0) {
272 273 274
		return null;
	}

275
	let result: IMatch[] | null = null;
276 277
	let i = 0;

278 279 280
	word = word.toLowerCase();
	target = target.toLowerCase();
	while (i < target.length && (result = _matchesWords(word, target, 0, i, contiguous)) === null) {
281 282 283 284 285 286
		i = nextWord(target, i + 1);
	}

	return result;
}

287
function _matchesWords(word: string, target: string, i: number, j: number, contiguous: boolean): IMatch[] | null {
288 289 290 291
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
292
	} else if (word[i] !== target[j]) {
293 294
		return null;
	} else {
295
		let result: IMatch[] | null = null;
296
		let nextWordIndex = j + 1;
S
#17646:  
Sandeep Somavarapu 已提交
297 298 299 300 301 302
		result = _matchesWords(word, target, i + 1, j + 1, contiguous);
		if (!contiguous) {
			while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
				result = _matchesWords(word, target, i + 1, nextWordIndex, contiguous);
				nextWordIndex++;
			}
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
		let c = word.charCodeAt(i);
		if (isWhitespace(c) || (i > 0 && isWhitespace(word.charCodeAt(i - 1)))) {
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
318 319
// Fuzzy

J
Johannes Rieken 已提交
320
const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
321
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
D
Dirk Baeumer 已提交
322
const fuzzyRegExpCache = new LRUCache<string, RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
323

324
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] | null {
325 326 327
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
328 329

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
330
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
331
	if (!regexp) {
B
Benjamin Pasero 已提交
332
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
333
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
334 335 336
	}

	// RegExp Filter
337
	let match = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
338
	if (match) {
B
Benjamin Pasero 已提交
339
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
340 341 342
	}

	// Default Filter
343
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
344
}
J
Johannes Rieken 已提交
345

346 347 348 349 350
/**
 * Match pattern againt word in a fuzzy way. As in IntelliSense and faster and more
 * powerfull than `matchesFuzzy`
 */
export function matchesFuzzy2(pattern: string, word: string): IMatch[] | null {
351
	let score = fuzzyScore(pattern, pattern.toLowerCase(), 0, word, word.toLowerCase(), 0, true);
352 353
	return score ? createMatches(score) : null;
}
354

355 356 357 358 359
export function anyScore(pattern: string, lowPattern: string, _patternPos: number, word: string, lowWord: string, _wordPos: number): FuzzyScore {
	const result = fuzzyScore(pattern, lowPattern, 0, word, lowWord, 0, true);
	if (result) {
		return result;
	}
360 361 362 363 364 365 366 367 368
	let matches = 0;
	let score = 0;
	let idx = _wordPos;
	for (let patternPos = 0; patternPos < lowPattern.length && patternPos < _maxLen; ++patternPos) {
		const wordPos = lowWord.indexOf(lowPattern.charAt(patternPos), idx);
		if (wordPos >= 0) {
			score += 1;
			matches += 2 ** wordPos;
			idx = wordPos + 1;
369 370
		}
	}
371
	return [score, matches, _wordPos];
372 373
}

374 375
//#region --- fuzzyScore ---

376 377 378
export function createMatches(score: undefined | FuzzyScore): IMatch[] {
	if (typeof score === 'undefined') {
		return [];
379
	}
380 381 382 383 384 385 386 387 388 389

	const [, matches, wordStart] = score;
	const res: IMatch[] = [];

	for (let pos = wordStart; pos < _masks.length; pos++) {
		const mask = _masks[pos];
		if (mask > matches) {
			break;
		} else if (matches & mask) {
			res.push({ start: pos, end: pos + 1 });
390 391
		}
	}
392
	return res;
393 394
}

395 396 397 398 399 400 401 402 403
const _maxLen = 53;

const _masks = (function () {
	const result: number[] = [];
	for (let pos = 0; pos < _maxLen; pos++) {
		result.push(2 ** pos);
	}
	return result;
}());
J
Johannes Rieken 已提交
404

405 406 407
function initTable() {
	const table: number[][] = [];
	const row: number[] = [0];
J
Johannes Rieken 已提交
408
	for (let i = 1; i <= _maxLen; i++) {
409 410
		row.push(-i);
	}
J
Johannes Rieken 已提交
411
	for (let i = 0; i <= _maxLen; i++) {
412 413 414 415 416 417
		let thisRow = row.slice(0);
		thisRow[0] = -i;
		table.push(thisRow);
	}
	return table;
}
418

419
const _table = initTable();
J
Johannes Rieken 已提交
420
const _scores = initTable();
J
Johannes Rieken 已提交
421
const _arrows = <Arrow[][]>initTable();
422
const _debug = false;
423

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
function printTable(table: number[][], pattern: string, patternLen: number, word: string, wordLen: number): string {
	function pad(s: string, n: number, pad = ' ') {
		while (s.length < n) {
			s = pad + s;
		}
		return s;
	}
	let ret = ` |   |${word.split('').map(c => pad(c, 3)).join('|')}\n`;

	for (let i = 0; i <= patternLen; i++) {
		if (i === 0) {
			ret += ' |';
		} else {
			ret += `${pattern[i - 1]}|`;
		}
		ret += table[i].slice(0, wordLen + 1).map(n => pad(n.toString(), 3)).join('|') + '\n';
	}
	return ret;
}

B
Benjamin Pasero 已提交
444
function isSeparatorAtPos(value: string, index: number): boolean {
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Underline:
		case CharCode.Dash:
		case CharCode.Period:
		case CharCode.Space:
		case CharCode.Slash:
		case CharCode.Backslash:
		case CharCode.SingleQuote:
		case CharCode.DoubleQuote:
		case CharCode.Colon:
			return true;
		default:
			return false;
	}
}

function isWhitespaceAtPos(value: string, index: number): boolean {
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Space:
		case CharCode.Tab:
			return true;
		default:
			return false;
	}
}
478

J
Johannes Rieken 已提交
479 480 481 482
function isUpperCaseAtPos(pos: number, word: string, wordLow: string): boolean {
	return word[pos] !== wordLow[pos];
}

483 484 485 486 487 488 489 490 491 492
function isPatternInWord(patternLow: string, patternPos: number, patternLen: number, wordLow: string, wordPos: number, wordLen: number): boolean {
	while (patternPos < patternLen && wordPos < wordLen) {
		if (patternLow[patternPos] === wordLow[wordPos]) {
			patternPos += 1;
		}
		wordPos += 1;
	}
	return patternPos === patternLen; // pattern must be exhausted
}

J
Johannes Rieken 已提交
493 494
const enum Arrow { Top = 0b1, Diag = 0b10, Left = 0b100 }

495 496 497 498 499 500 501 502 503 504 505 506 507 508
/**
 * A tuple of three values.
 * 0. the score
 * 1. the matches encoded as bitmask (2^53)
 * 2. the offset at which matching started
 */
export type FuzzyScore = [number, number, number];

export namespace FuzzyScore {
	/**
	 * No matches and value `-100`
	 */
	export const Default: [-100, 0, 0] = [-100, 0, 0];
}
J
Johannes Rieken 已提交
509

510
export interface FuzzyScorer {
J
Johannes Rieken 已提交
511
	(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined;
512 513
}

514
export function fuzzyScore(pattern: string, patternLow: string, patternPos: number, word: string, wordLow: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
515

J
Johannes Rieken 已提交
516 517
	const patternLen = pattern.length > _maxLen ? _maxLen : pattern.length;
	const wordLen = word.length > _maxLen ? _maxLen : word.length;
518

519
	if (patternPos >= patternLen || wordPos >= wordLen || patternLen > wordLen) {
520 521 522
		return undefined;
	}

J
Johannes Rieken 已提交
523 524 525
	// Run a simple check if the characters of pattern occur
	// (in order) at all in word. If that isn't the case we
	// stop because no match will be possible
526
	if (!isPatternInWord(patternLow, patternPos, patternLen, wordLow, wordPos, wordLen)) {
527 528
		return undefined;
	}
529

530
	const patternStartPos = patternPos;
531
	const wordStartPos = wordPos;
532

J
Johannes Rieken 已提交
533
	// There will be a mach, fill in tables
534
	for (patternPos = patternStartPos + 1; patternPos <= patternLen; patternPos++) {
535

536
		for (wordPos = 1; wordPos <= wordLen; wordPos++) {
537 538

			let score = -1;
J
Johannes Rieken 已提交
539
			if (patternLow[patternPos - 1] === wordLow[wordPos - 1]) {
J
Johannes Rieken 已提交
540

J
Johannes Rieken 已提交
541 542
				if (wordPos === (patternPos - patternStartPos)) {
					// common prefix: `foobar <-> foobaz`
J
Johannes Rieken 已提交
543
					//                            ^^^^^
544
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
545 546 547 548
						score = 7;
					} else {
						score = 5;
					}
J
Johannes Rieken 已提交
549
				} else if (isUpperCaseAtPos(wordPos - 1, word, wordLow) && (wordPos === 1 || !isUpperCaseAtPos(wordPos - 2, word, wordLow))) {
J
Johannes Rieken 已提交
550
					// hitting upper-case: `foo <-> forOthers`
J
Johannes Rieken 已提交
551
					//                              ^^ ^
552
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
J
Johannes Rieken 已提交
553 554 555 556
						score = 7;
					} else {
						score = 5;
					}
557
				} else if (isSeparatorAtPos(wordLow, wordPos - 2) || isWhitespaceAtPos(wordLow, wordPos - 2)) {
J
Johannes Rieken 已提交
558
					// post separator: `foo <-> bar_foo`
J
Johannes Rieken 已提交
559
					//                              ^^^
J
Johannes Rieken 已提交
560 561
					score = 5;

562 563 564 565 566
				} else {
					score = 1;
				}
			}

567
			_scores[patternPos][wordPos] = score;
J
Johannes Rieken 已提交
568

569 570 571
			let diag = _table[patternPos - 1][wordPos - 1] + (score > 1 ? 1 : score);
			let top = _table[patternPos - 1][wordPos] + -1;
			let left = _table[patternPos][wordPos - 1] + -1;
572 573 574

			if (left >= top) {
				// left or diag
J
Johannes Rieken 已提交
575
				if (left > diag) {
576 577
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left;
J
Johannes Rieken 已提交
578
				} else if (left === diag) {
579 580
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left | Arrow.Diag;
581
				} else {
582 583
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
584 585 586
				}
			} else {
				// top or diag
J
Johannes Rieken 已提交
587
				if (top > diag) {
588 589
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top;
J
Johannes Rieken 已提交
590
				} else if (top === diag) {
591 592
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top | Arrow.Diag;
J
Johannes Rieken 已提交
593
				} else {
594 595
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
596 597 598 599 600 601 602 603
				}
			}
		}
	}

	if (_debug) {
		console.log(printTable(_table, pattern, patternLen, word, wordLen));
		console.log(printTable(_arrows, pattern, patternLen, word, wordLen));
J
Johannes Rieken 已提交
604
		console.log(printTable(_scores, pattern, patternLen, word, wordLen));
605 606
	}

607
	_matchesCount = 0;
608
	_topScore = -100;
609
	_patternStartPos = patternStartPos;
610
	_firstMatchCanBeWeak = firstMatchCanBeWeak;
611
	_findAllMatches2(patternLen, wordLen, patternLen === wordLen ? 1 : 0, 0, false);
612
	if (_matchesCount === 0) {
J
Johannes Rieken 已提交
613 614 615
		return undefined;
	}

616
	return [_topScore, _topMatch2, wordStartPos];
J
Johannes Rieken 已提交
617 618
}

619

620
let _matchesCount: number = 0;
621
let _topMatch2: number = 0;
622
let _topScore: number = 0;
623
let _patternStartPos: number = 0;
624
let _firstMatchCanBeWeak: boolean = false;
625

626
function _findAllMatches2(patternPos: number, wordPos: number, total: number, matches: number, lastMatched: boolean): void {
627

628
	if (_matchesCount >= 10 || total < -25) {
629 630
		// stop when having already 10 results, or
		// when a potential alignment as already 5 gaps
631 632 633
		return;
	}

634 635
	let simpleMatchCount = 0;

636
	while (patternPos > _patternStartPos && wordPos > 0) {
637

J
Johannes Rieken 已提交
638 639 640
		let score = _scores[patternPos][wordPos];
		let arrow = _arrows[patternPos][wordPos];

641
		if (arrow === Arrow.Left) {
642
			// left -> no match, skip a word character
J
Johannes Rieken 已提交
643
			wordPos -= 1;
644
			if (lastMatched) {
645
				total -= 5; // new gap penalty
646
			} else if (matches !== 0) {
647
				total -= 1; // gap penalty after first match
648 649
			}
			lastMatched = false;
650
			simpleMatchCount = 0;
J
Johannes Rieken 已提交
651 652 653 654 655

		} else if (arrow & Arrow.Diag) {

			if (arrow & Arrow.Left) {
				// left
656
				_findAllMatches2(
657
					patternPos,
658
					wordPos - 1,
659 660
					matches !== 0 ? total - 1 : total, // gap penalty after first match
					matches,
661
					lastMatched
662
				);
663
			}
J
Johannes Rieken 已提交
664 665

			// diag
666
			total += score;
J
Johannes Rieken 已提交
667 668
			patternPos -= 1;
			wordPos -= 1;
669
			lastMatched = true;
J
Johannes Rieken 已提交
670

671 672 673
			// match -> set a 1 at the word pos
			matches += 2 ** wordPos;

J
Johannes Rieken 已提交
674 675 676
			// count simple matches and boost a row of
			// simple matches when they yield in a
			// strong match.
677 678
			if (score === 1) {
				simpleMatchCount += 1;
J
Johannes Rieken 已提交
679

680
				if (patternPos === _patternStartPos && !_firstMatchCanBeWeak) {
J
Johannes Rieken 已提交
681 682 683 684 685
					// when the first match is a weak
					// match we discard it
					return undefined;
				}

686
			} else {
J
Johannes Rieken 已提交
687
				// boost
688
				total += 1 + (simpleMatchCount * (score - 1));
689 690 691
				simpleMatchCount = 0;
			}

J
Johannes Rieken 已提交
692
		} else {
693
			return undefined;
694 695 696
		}
	}

J
Johannes Rieken 已提交
697 698
	total -= wordPos >= 3 ? 9 : wordPos * 3; // late start penalty

699 700
	// dynamically keep track of the current top score
	// and insert the current best score at head, the rest at tail
701
	_matchesCount += 1;
702 703
	if (total > _topScore) {
		_topScore = total;
704
		_topMatch2 = matches;
705
	}
706 707 708 709 710 711 712
}

//#endregion


//#region --- graceful ---

713
export function fuzzyScoreGracefulAggressive(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
714
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, true, firstMatchCanBeWeak);
715 716
}

717
export function fuzzyScoreGraceful(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
718
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, false, firstMatchCanBeWeak);
719 720
}

721 722
function fuzzyScoreWithPermutations(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, aggressive: boolean, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
	let top = fuzzyScore(pattern, lowPattern, patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
723 724 725 726 727 728 729 730 731

	if (top && !aggressive) {
		// when using the original pattern yield a result we`
		// return it unless we are aggressive and try to find
		// a better alignment, e.g. `cno` -> `^co^ns^ole` or `^c^o^nsole`.
		return top;
	}

	if (pattern.length >= 3) {
J
Johannes Rieken 已提交
732 733
		// When the pattern is long enough then try a few (max 7)
		// permutations of the pattern to find a better match. The
734 735 736
		// permutations only swap neighbouring characters, e.g
		// `cnoso` becomes `conso`, `cnsoo`, `cnoos`.
		let tries = Math.min(7, pattern.length - 1);
737 738
		for (let movingPatternPos = patternPos + 1; movingPatternPos < tries; movingPatternPos++) {
			let newPattern = nextTypoPermutation(pattern, movingPatternPos);
739
			if (newPattern) {
740
				let candidate = fuzzyScore(newPattern, newPattern.toLowerCase(), patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
J
Johannes Rieken 已提交
741 742 743 744 745
				if (candidate) {
					candidate[0] -= 3; // permutation penalty
					if (!top || candidate[0] > top[0]) {
						top = candidate;
					}
746 747 748 749 750 751 752 753
				}
			}
		}
	}

	return top;
}

754
function nextTypoPermutation(pattern: string, patternPos: number): string | undefined {
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773

	if (patternPos + 1 >= pattern.length) {
		return undefined;
	}

	let swap1 = pattern[patternPos];
	let swap2 = pattern[patternPos + 1];

	if (swap1 === swap2) {
		return undefined;
	}

	return pattern.slice(0, patternPos)
		+ swap2
		+ swap1
		+ pattern.slice(patternPos + 2);
}

//#endregion