filters.ts 21.6 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

J
Johannes Rieken 已提交
6
import { CharCode } from 'vs/base/common/charCode';
7 8
import { LRUCache } from 'vs/base/common/map';
import * as strings from 'vs/base/common/strings';
E
Erich Gamma 已提交
9 10 11

export interface IFilter {
	// Returns null if word doesn't match.
12
	(word: string, wordToMatchAgainst: string): IMatch[] | null;
E
Erich Gamma 已提交
13 14 15
}

export interface IMatch {
B
Benjamin Pasero 已提交
16 17
	start: number;
	end: number;
E
Erich Gamma 已提交
18 19 20 21 22
}

// Combined filters

/**
23
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
24 25 26 27
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
28
export function or(...filter: IFilter[]): IFilter {
29
	return function (word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
30
		for (let i = 0, len = filter.length; i < len; i++) {
31
			const match = filter[i](word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
32
			if (match) {
E
Erich Gamma 已提交
33 34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

// Prefix

42 43
export const matchesStrictPrefix: IFilter = _matchesPrefix.bind(undefined, false);
export const matchesPrefix: IFilter = _matchesPrefix.bind(undefined, true);
E
Erich Gamma 已提交
44

45
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] | null {
46
	if (!wordToMatchAgainst || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
47 48
		return null;
	}
49

50 51
	let matches: boolean;
	if (ignoreCase) {
52
		matches = strings.startsWithIgnoreCase(wordToMatchAgainst, word);
53 54 55
	} else {
		matches = wordToMatchAgainst.indexOf(word) === 0;
	}
56

57
	if (!matches) {
58
		return null;
E
Erich Gamma 已提交
59
	}
60

E
Erich Gamma 已提交
61 62 63 64 65
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

66
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
67
	const index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
68 69 70 71 72 73 74 75 76
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

77
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
E
Erich Gamma 已提交
78 79 80
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

81
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
82 83 84 85 86 87
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
88
			let result: IMatch[] | null = null;
E
Erich Gamma 已提交
89 90 91
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
92
			return null;
E
Erich Gamma 已提交
93 94 95 96 97 98 99 100
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
101
function isLower(code: number): boolean {
102
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
103 104
}

105
export function isUpper(code: number): boolean {
106
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
107 108
}

B
Benjamin Pasero 已提交
109
function isNumber(code: number): boolean {
110
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
111 112
}

B
Benjamin Pasero 已提交
113
function isWhitespace(code: number): boolean {
114 115 116 117 118 119
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
120 121
}

122 123 124 125 126 127 128 129 130
const wordSeparators = new Set<number>();
'`~!@#$%^&*()-=+[{]}\\|;:\'",.<>/?'
	.split('')
	.forEach(s => wordSeparators.add(s.charCodeAt(0)));

function isWordSeparator(code: number): boolean {
	return wordSeparators.has(code);
}

B
Benjamin Pasero 已提交
131
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
132 133 134
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
135
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
136 137 138 139 140 141 142 143 144 145 146
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
147
	for (let i = start; i < camelCaseWord.length; i++) {
148
		const c = camelCaseWord.charCodeAt(i);
B
Benjamin Pasero 已提交
149
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
150 151 152 153 154 155
			return i;
		}
	}
	return camelCaseWord.length;
}

156
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
157 158 159 160 161 162 163
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
164
		let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
165
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
166 167 168 169 170 171 172 173 174
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
175 176 177 178 179 180 181
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
182 183
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
184
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
185
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
186

B
Benjamin Pasero 已提交
187
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
188 189
		code = word.charCodeAt(i);

190 191 192 193
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
194 195
	}

196 197 198 199
	const upperPercent = upper / word.length;
	const lowerPercent = lower / word.length;
	const alphaPercent = alpha / word.length;
	const numericPercent = numeric / word.length;
E
Erich Gamma 已提交
200

J
Joao Moreno 已提交
201 202 203 204
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
205
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
206 207 208 209 210
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
211
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
212 213 214 215 216
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
217
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
218

B
Benjamin Pasero 已提交
219
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
220 221
		code = word.charCodeAt(i);

222 223 224
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
225 226 227 228 229 230 231 232 233
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

234
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] | null {
235 236 237 238 239 240 241
	if (!camelCaseWord) {
		return null;
	}

	camelCaseWord = camelCaseWord.trim();

	if (camelCaseWord.length === 0) {
E
Erich Gamma 已提交
242 243 244 245 246 247 248
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
249
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
250 251 252
		return null;
	}

J
Joao Moreno 已提交
253 254 255 256 257 258 259 260 261 262
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

263
	let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
264
	let i = 0;
E
Erich Gamma 已提交
265

266 267
	word = word.toLowerCase();
	while (i < camelCaseWord.length && (result = _matchesCamelCase(word, camelCaseWord, 0, i)) === null) {
E
Erich Gamma 已提交
268 269 270 271 272 273
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

274
// Matches beginning of words supporting non-ASCII languages
S
#17646:  
Sandeep Somavarapu 已提交
275 276
// If `contiguous` is true then matches word with beginnings of the words in the target. E.g. "pul" will match "Git: Pull"
// Otherwise also matches sub string of the word with beginnings of the words in the target. E.g. "gp" or "g p" will match "Git: Pull"
277 278
// Useful in cases where the target is words (e.g. command labels)

279
export function matchesWords(word: string, target: string, contiguous: boolean = false): IMatch[] | null {
280
	if (!target || target.length === 0) {
281 282 283
		return null;
	}

284
	let result: IMatch[] | null = null;
285 286
	let i = 0;

287 288 289
	word = word.toLowerCase();
	target = target.toLowerCase();
	while (i < target.length && (result = _matchesWords(word, target, 0, i, contiguous)) === null) {
290 291 292 293 294 295
		i = nextWord(target, i + 1);
	}

	return result;
}

296
function _matchesWords(word: string, target: string, i: number, j: number, contiguous: boolean): IMatch[] | null {
297 298 299 300
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
301
	} else if (word[i] !== target[j]) {
302 303
		return null;
	} else {
304
		let result: IMatch[] | null = null;
305
		let nextWordIndex = j + 1;
S
#17646:  
Sandeep Somavarapu 已提交
306 307 308 309 310 311
		result = _matchesWords(word, target, i + 1, j + 1, contiguous);
		if (!contiguous) {
			while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
				result = _matchesWords(word, target, i + 1, nextWordIndex, contiguous);
				nextWordIndex++;
			}
312 313 314 315 316 317 318
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
319
		const c = word.charCodeAt(i);
320 321
		if (isWhitespace(c) || (i > 0 && isWhitespace(word.charCodeAt(i - 1))) ||
			isWordSeparator(c) || (i > 0 && isWordSeparator(word.charCodeAt(i - 1)))) {
322 323 324 325 326 327
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
328 329
// Fuzzy

J
Johannes Rieken 已提交
330
const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
331
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
D
Dirk Baeumer 已提交
332
const fuzzyRegExpCache = new LRUCache<string, RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
333

334
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] | null {
335 336 337
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
338 339

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
340
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
341
	if (!regexp) {
B
Benjamin Pasero 已提交
342
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
343
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
344 345 346
	}

	// RegExp Filter
347
	const match = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
348
	if (match) {
B
Benjamin Pasero 已提交
349
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
350 351 352
	}

	// Default Filter
353
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
354
}
J
Johannes Rieken 已提交
355

356 357 358 359 360
/**
 * Match pattern againt word in a fuzzy way. As in IntelliSense and faster and more
 * powerfull than `matchesFuzzy`
 */
export function matchesFuzzy2(pattern: string, word: string): IMatch[] | null {
361
	const score = fuzzyScore(pattern, pattern.toLowerCase(), 0, word, word.toLowerCase(), 0, true);
362 363
	return score ? createMatches(score) : null;
}
364

365 366
//#region --- fuzzyScore ---

367 368 369
export function createMatches(score: undefined | FuzzyScore): IMatch[] {
	if (typeof score === 'undefined') {
		return [];
370
	}
371

372 373
	const matches = score[1].toString(2);
	const wordStart = score[2];
374 375
	const res: IMatch[] = [];

376 377
	for (let pos = wordStart; pos < _maxLen; pos++) {
		if (matches[matches.length - (pos + 1)] === '1') {
J
Joao Moreno 已提交
378 379 380 381 382 383
			const last = res[res.length - 1];
			if (last && last.end === pos) {
				last.end = pos + 1;
			} else {
				res.push({ start: pos, end: pos + 1 });
			}
384 385
		}
	}
386
	return res;
387 388
}

389 390
const _maxLen = 53;

391 392 393
function initTable() {
	const table: number[][] = [];
	const row: number[] = [0];
J
Johannes Rieken 已提交
394
	for (let i = 1; i <= _maxLen; i++) {
395 396
		row.push(-i);
	}
J
Johannes Rieken 已提交
397
	for (let i = 0; i <= _maxLen; i++) {
398
		const thisRow = row.slice(0);
399 400 401 402 403
		thisRow[0] = -i;
		table.push(thisRow);
	}
	return table;
}
404

405
const _table = initTable();
J
Johannes Rieken 已提交
406
const _scores = initTable();
J
Johannes Rieken 已提交
407
const _arrows = <Arrow[][]>initTable();
408
const _debug = false;
409

410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
function printTable(table: number[][], pattern: string, patternLen: number, word: string, wordLen: number): string {
	function pad(s: string, n: number, pad = ' ') {
		while (s.length < n) {
			s = pad + s;
		}
		return s;
	}
	let ret = ` |   |${word.split('').map(c => pad(c, 3)).join('|')}\n`;

	for (let i = 0; i <= patternLen; i++) {
		if (i === 0) {
			ret += ' |';
		} else {
			ret += `${pattern[i - 1]}|`;
		}
		ret += table[i].slice(0, wordLen + 1).map(n => pad(n.toString(), 3)).join('|') + '\n';
	}
	return ret;
}

B
Benjamin Pasero 已提交
430
function isSeparatorAtPos(value: string, index: number): boolean {
431 432 433 434 435 436 437 438 439 440 441 442 443 444
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Underline:
		case CharCode.Dash:
		case CharCode.Period:
		case CharCode.Space:
		case CharCode.Slash:
		case CharCode.Backslash:
		case CharCode.SingleQuote:
		case CharCode.DoubleQuote:
		case CharCode.Colon:
445
		case CharCode.DollarSign:
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
			return true;
		default:
			return false;
	}
}

function isWhitespaceAtPos(value: string, index: number): boolean {
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Space:
		case CharCode.Tab:
			return true;
		default:
			return false;
	}
}
465

J
Johannes Rieken 已提交
466 467 468 469
function isUpperCaseAtPos(pos: number, word: string, wordLow: string): boolean {
	return word[pos] !== wordLow[pos];
}

470 471 472 473 474 475 476 477 478 479
function isPatternInWord(patternLow: string, patternPos: number, patternLen: number, wordLow: string, wordPos: number, wordLen: number): boolean {
	while (patternPos < patternLen && wordPos < wordLen) {
		if (patternLow[patternPos] === wordLow[wordPos]) {
			patternPos += 1;
		}
		wordPos += 1;
	}
	return patternPos === patternLen; // pattern must be exhausted
}

J
Johannes Rieken 已提交
480 481
const enum Arrow { Top = 0b1, Diag = 0b10, Left = 0b100 }

482 483 484 485 486 487 488 489 490 491 492 493
/**
 * A tuple of three values.
 * 0. the score
 * 1. the matches encoded as bitmask (2^53)
 * 2. the offset at which matching started
 */
export type FuzzyScore = [number, number, number];

export namespace FuzzyScore {
	/**
	 * No matches and value `-100`
	 */
494
	export const Default: [-100, 0, 0] = <[-100, 0, 0]>Object.freeze([-100, 0, 0]);
J
Joao Moreno 已提交
495

J
Joao Moreno 已提交
496 497
	export function isDefault(score?: FuzzyScore): score is [-100, 0, 0] {
		return !score || (score[0] === -100 && score[1] === 0 && score[2] === 0);
J
Joao Moreno 已提交
498
	}
499
}
J
Johannes Rieken 已提交
500

501
export interface FuzzyScorer {
J
Johannes Rieken 已提交
502
	(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined;
503 504
}

505
export function fuzzyScore(pattern: string, patternLow: string, patternPos: number, word: string, wordLow: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
506

J
Johannes Rieken 已提交
507 508
	const patternLen = pattern.length > _maxLen ? _maxLen : pattern.length;
	const wordLen = word.length > _maxLen ? _maxLen : word.length;
509

510
	if (patternPos >= patternLen || wordPos >= wordLen || patternLen > wordLen) {
511 512 513
		return undefined;
	}

J
Johannes Rieken 已提交
514 515 516
	// Run a simple check if the characters of pattern occur
	// (in order) at all in word. If that isn't the case we
	// stop because no match will be possible
517
	if (!isPatternInWord(patternLow, patternPos, patternLen, wordLow, wordPos, wordLen)) {
518 519
		return undefined;
	}
520

521
	const patternStartPos = patternPos;
522
	const wordStartPos = wordPos;
523

J
Johannes Rieken 已提交
524
	// There will be a mach, fill in tables
525
	for (patternPos = patternStartPos + 1; patternPos <= patternLen; patternPos++) {
526

527
		for (wordPos = 1; wordPos <= wordLen; wordPos++) {
528 529

			let score = -1;
J
Johannes Rieken 已提交
530
			if (patternLow[patternPos - 1] === wordLow[wordPos - 1]) {
J
Johannes Rieken 已提交
531

J
Johannes Rieken 已提交
532 533
				if (wordPos === (patternPos - patternStartPos)) {
					// common prefix: `foobar <-> foobaz`
J
Johannes Rieken 已提交
534
					//                            ^^^^^
535
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
536 537 538 539
						score = 7;
					} else {
						score = 5;
					}
J
Johannes Rieken 已提交
540
				} else if (isUpperCaseAtPos(wordPos - 1, word, wordLow) && (wordPos === 1 || !isUpperCaseAtPos(wordPos - 2, word, wordLow))) {
J
Johannes Rieken 已提交
541
					// hitting upper-case: `foo <-> forOthers`
J
Johannes Rieken 已提交
542
					//                              ^^ ^
543
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
J
Johannes Rieken 已提交
544 545 546 547
						score = 7;
					} else {
						score = 5;
					}
548
				} else if (isSeparatorAtPos(wordLow, wordPos - 2) || isWhitespaceAtPos(wordLow, wordPos - 2)) {
J
Johannes Rieken 已提交
549
					// post separator: `foo <-> bar_foo`
J
Johannes Rieken 已提交
550
					//                              ^^^
J
Johannes Rieken 已提交
551 552
					score = 5;

553 554 555 556 557
				} else {
					score = 1;
				}
			}

558
			_scores[patternPos][wordPos] = score;
J
Johannes Rieken 已提交
559

560 561 562
			const diag = _table[patternPos - 1][wordPos - 1] + (score > 1 ? 1 : score);
			const top = _table[patternPos - 1][wordPos] + -1;
			const left = _table[patternPos][wordPos - 1] + -1;
563 564 565

			if (left >= top) {
				// left or diag
J
Johannes Rieken 已提交
566
				if (left > diag) {
567 568
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left;
J
Johannes Rieken 已提交
569
				} else if (left === diag) {
570 571
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left | Arrow.Diag;
572
				} else {
573 574
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
575 576 577
				}
			} else {
				// top or diag
J
Johannes Rieken 已提交
578
				if (top > diag) {
579 580
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top;
J
Johannes Rieken 已提交
581
				} else if (top === diag) {
582 583
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top | Arrow.Diag;
J
Johannes Rieken 已提交
584
				} else {
585 586
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
587 588 589 590 591 592 593 594
				}
			}
		}
	}

	if (_debug) {
		console.log(printTable(_table, pattern, patternLen, word, wordLen));
		console.log(printTable(_arrows, pattern, patternLen, word, wordLen));
J
Johannes Rieken 已提交
595
		console.log(printTable(_scores, pattern, patternLen, word, wordLen));
596 597
	}

598
	_matchesCount = 0;
599
	_topScore = -100;
600
	_patternStartPos = patternStartPos;
601
	_firstMatchCanBeWeak = firstMatchCanBeWeak;
602
	_findAllMatches2(patternLen, wordLen, patternLen === wordLen ? 1 : 0, 0, false);
603
	if (_matchesCount === 0) {
J
Johannes Rieken 已提交
604 605 606
		return undefined;
	}

607
	return [_topScore, _topMatch2, wordStartPos];
J
Johannes Rieken 已提交
608 609
}

610

611
let _matchesCount: number = 0;
612
let _topMatch2: number = 0;
613
let _topScore: number = 0;
614
let _patternStartPos: number = 0;
615
let _firstMatchCanBeWeak: boolean = false;
616

617
function _findAllMatches2(patternPos: number, wordPos: number, total: number, matches: number, lastMatched: boolean): void {
618

619
	if (_matchesCount >= 10 || total < -25) {
620 621
		// stop when having already 10 results, or
		// when a potential alignment as already 5 gaps
622 623 624
		return;
	}

625 626
	let simpleMatchCount = 0;

627
	while (patternPos > _patternStartPos && wordPos > 0) {
628

629 630
		const score = _scores[patternPos][wordPos];
		const arrow = _arrows[patternPos][wordPos];
J
Johannes Rieken 已提交
631

632
		if (arrow === Arrow.Left) {
633
			// left -> no match, skip a word character
J
Johannes Rieken 已提交
634
			wordPos -= 1;
635
			if (lastMatched) {
636
				total -= 5; // new gap penalty
637
			} else if (matches !== 0) {
638
				total -= 1; // gap penalty after first match
639 640
			}
			lastMatched = false;
641
			simpleMatchCount = 0;
J
Johannes Rieken 已提交
642 643 644 645 646

		} else if (arrow & Arrow.Diag) {

			if (arrow & Arrow.Left) {
				// left
647
				_findAllMatches2(
648
					patternPos,
649
					wordPos - 1,
650 651
					matches !== 0 ? total - 1 : total, // gap penalty after first match
					matches,
652
					lastMatched
653
				);
654
			}
J
Johannes Rieken 已提交
655 656

			// diag
657
			total += score;
J
Johannes Rieken 已提交
658 659
			patternPos -= 1;
			wordPos -= 1;
660
			lastMatched = true;
J
Johannes Rieken 已提交
661

662 663 664
			// match -> set a 1 at the word pos
			matches += 2 ** wordPos;

J
Johannes Rieken 已提交
665 666 667
			// count simple matches and boost a row of
			// simple matches when they yield in a
			// strong match.
668 669
			if (score === 1) {
				simpleMatchCount += 1;
J
Johannes Rieken 已提交
670

671
				if (patternPos === _patternStartPos && !_firstMatchCanBeWeak) {
J
Johannes Rieken 已提交
672 673 674 675 676
					// when the first match is a weak
					// match we discard it
					return undefined;
				}

677
			} else {
J
Johannes Rieken 已提交
678
				// boost
679
				total += 1 + (simpleMatchCount * (score - 1));
680 681 682
				simpleMatchCount = 0;
			}

J
Johannes Rieken 已提交
683
		} else {
684
			return undefined;
685 686 687
		}
	}

J
Johannes Rieken 已提交
688 689
	total -= wordPos >= 3 ? 9 : wordPos * 3; // late start penalty

690 691
	// dynamically keep track of the current top score
	// and insert the current best score at head, the rest at tail
692
	_matchesCount += 1;
693 694
	if (total > _topScore) {
		_topScore = total;
695
		_topMatch2 = matches;
696
	}
697 698 699 700 701 702 703
}

//#endregion


//#region --- graceful ---

704
export function fuzzyScoreGracefulAggressive(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
705
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, true, firstMatchCanBeWeak);
706 707
}

708
export function fuzzyScoreGraceful(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
709
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, false, firstMatchCanBeWeak);
710 711
}

712 713
function fuzzyScoreWithPermutations(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, aggressive: boolean, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
	let top = fuzzyScore(pattern, lowPattern, patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
714 715 716 717 718 719 720 721 722

	if (top && !aggressive) {
		// when using the original pattern yield a result we`
		// return it unless we are aggressive and try to find
		// a better alignment, e.g. `cno` -> `^co^ns^ole` or `^c^o^nsole`.
		return top;
	}

	if (pattern.length >= 3) {
J
Johannes Rieken 已提交
723 724
		// When the pattern is long enough then try a few (max 7)
		// permutations of the pattern to find a better match. The
725 726
		// permutations only swap neighbouring characters, e.g
		// `cnoso` becomes `conso`, `cnsoo`, `cnoos`.
727
		const tries = Math.min(7, pattern.length - 1);
728
		for (let movingPatternPos = patternPos + 1; movingPatternPos < tries; movingPatternPos++) {
729
			const newPattern = nextTypoPermutation(pattern, movingPatternPos);
730
			if (newPattern) {
731
				const candidate = fuzzyScore(newPattern, newPattern.toLowerCase(), patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
J
Johannes Rieken 已提交
732 733 734 735 736
				if (candidate) {
					candidate[0] -= 3; // permutation penalty
					if (!top || candidate[0] > top[0]) {
						top = candidate;
					}
737 738 739 740 741 742 743 744
				}
			}
		}
	}

	return top;
}

745
function nextTypoPermutation(pattern: string, patternPos: number): string | undefined {
746 747 748 749 750

	if (patternPos + 1 >= pattern.length) {
		return undefined;
	}

751 752
	const swap1 = pattern[patternPos];
	const swap2 = pattern[patternPos + 1];
753 754 755 756 757 758 759 760 761 762 763 764

	if (swap1 === swap2) {
		return undefined;
	}

	return pattern.slice(0, patternPos)
		+ swap2
		+ swap1
		+ pattern.slice(patternPos + 2);
}

//#endregion