filters.ts 22.5 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

J
Johannes Rieken 已提交
6
import { CharCode } from 'vs/base/common/charCode';
7 8
import { LRUCache } from 'vs/base/common/map';
import * as strings from 'vs/base/common/strings';
E
Erich Gamma 已提交
9 10 11

export interface IFilter {
	// Returns null if word doesn't match.
12
	(word: string, wordToMatchAgainst: string): IMatch[] | null;
E
Erich Gamma 已提交
13 14 15
}

export interface IMatch {
B
Benjamin Pasero 已提交
16 17
	start: number;
	end: number;
E
Erich Gamma 已提交
18 19 20 21 22
}

// Combined filters

/**
23
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
24 25 26 27
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
28
export function or(...filter: IFilter[]): IFilter {
29
	return function (word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
30
		for (let i = 0, len = filter.length; i < len; i++) {
31
			const match = filter[i](word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
32
			if (match) {
E
Erich Gamma 已提交
33 34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

// Prefix

42 43
export const matchesStrictPrefix: IFilter = _matchesPrefix.bind(undefined, false);
export const matchesPrefix: IFilter = _matchesPrefix.bind(undefined, true);
E
Erich Gamma 已提交
44

45
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] | null {
46
	if (!wordToMatchAgainst || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
47 48
		return null;
	}
49

50 51
	let matches: boolean;
	if (ignoreCase) {
52
		matches = strings.startsWithIgnoreCase(wordToMatchAgainst, word);
53 54 55
	} else {
		matches = wordToMatchAgainst.indexOf(word) === 0;
	}
56

57
	if (!matches) {
58
		return null;
E
Erich Gamma 已提交
59
	}
60

E
Erich Gamma 已提交
61 62 63 64 65
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

66
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
67
	const index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
68 69 70 71 72 73 74 75 76
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

77
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
E
Erich Gamma 已提交
78 79 80
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

81
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
82 83 84 85 86 87
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
88
			let result: IMatch[] | null = null;
E
Erich Gamma 已提交
89 90 91
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
92
			return null;
E
Erich Gamma 已提交
93 94 95 96 97 98 99 100
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
101
function isLower(code: number): boolean {
102
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
103 104
}

105
export function isUpper(code: number): boolean {
106
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
107 108
}

B
Benjamin Pasero 已提交
109
function isNumber(code: number): boolean {
110
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
111 112
}

B
Benjamin Pasero 已提交
113
function isWhitespace(code: number): boolean {
114 115 116 117 118 119
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
120 121
}

122 123 124 125 126 127
const wordSeparators = new Set<number>();
'`~!@#$%^&*()-=+[{]}\\|;:\'",.<>/?'
	.split('')
	.forEach(s => wordSeparators.add(s.charCodeAt(0)));

function isWordSeparator(code: number): boolean {
128 129 130 131 132
	return isWhitespace(code) || wordSeparators.has(code);
}

function charactersMatch(codeA: number, codeB: number): boolean {
	return (codeA === codeB) || (isWordSeparator(codeA) && isWordSeparator(codeB));
133 134
}

B
Benjamin Pasero 已提交
135
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
136 137 138
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
139
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
140 141 142 143 144 145 146 147 148 149 150
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
151
	for (let i = start; i < camelCaseWord.length; i++) {
152
		const c = camelCaseWord.charCodeAt(i);
B
Benjamin Pasero 已提交
153
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
154 155 156 157 158 159
			return i;
		}
	}
	return camelCaseWord.length;
}

160
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
161 162 163 164 165 166 167
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
168
		let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
169
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
170 171 172 173 174 175 176 177 178
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
179 180 181 182 183 184 185
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
186 187
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
188
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
189
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
190

B
Benjamin Pasero 已提交
191
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
192 193
		code = word.charCodeAt(i);

194 195 196 197
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
198 199
	}

200 201 202 203
	const upperPercent = upper / word.length;
	const lowerPercent = lower / word.length;
	const alphaPercent = alpha / word.length;
	const numericPercent = numeric / word.length;
E
Erich Gamma 已提交
204

J
Joao Moreno 已提交
205 206 207 208
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
209
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
210 211 212 213 214
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
215
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
216 217 218 219 220
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
221
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
222

B
Benjamin Pasero 已提交
223
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
224 225
		code = word.charCodeAt(i);

226 227 228
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
229 230 231 232 233 234 235 236 237
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

238
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] | null {
239 240 241 242 243 244 245
	if (!camelCaseWord) {
		return null;
	}

	camelCaseWord = camelCaseWord.trim();

	if (camelCaseWord.length === 0) {
E
Erich Gamma 已提交
246 247 248 249 250 251 252
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
253
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
254 255 256
		return null;
	}

J
Joao Moreno 已提交
257 258 259 260 261 262 263 264 265 266
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

267
	let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
268
	let i = 0;
E
Erich Gamma 已提交
269

270 271
	word = word.toLowerCase();
	while (i < camelCaseWord.length && (result = _matchesCamelCase(word, camelCaseWord, 0, i)) === null) {
E
Erich Gamma 已提交
272 273 274 275 276 277
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

278
// Matches beginning of words supporting non-ASCII languages
S
#17646:  
Sandeep Somavarapu 已提交
279 280
// If `contiguous` is true then matches word with beginnings of the words in the target. E.g. "pul" will match "Git: Pull"
// Otherwise also matches sub string of the word with beginnings of the words in the target. E.g. "gp" or "g p" will match "Git: Pull"
281 282
// Useful in cases where the target is words (e.g. command labels)

283
export function matchesWords(word: string, target: string, contiguous: boolean = false): IMatch[] | null {
284
	if (!target || target.length === 0) {
285 286 287
		return null;
	}

288
	let result: IMatch[] | null = null;
289 290
	let i = 0;

291 292 293
	word = word.toLowerCase();
	target = target.toLowerCase();
	while (i < target.length && (result = _matchesWords(word, target, 0, i, contiguous)) === null) {
294 295 296 297 298 299
		i = nextWord(target, i + 1);
	}

	return result;
}

300
function _matchesWords(word: string, target: string, i: number, j: number, contiguous: boolean): IMatch[] | null {
301 302 303 304
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
305
	} else if (!charactersMatch(word.charCodeAt(i), target.charCodeAt(j))) {
306 307
		return null;
	} else {
308
		let result: IMatch[] | null = null;
309
		let nextWordIndex = j + 1;
S
#17646:  
Sandeep Somavarapu 已提交
310 311 312 313 314 315
		result = _matchesWords(word, target, i + 1, j + 1, contiguous);
		if (!contiguous) {
			while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
				result = _matchesWords(word, target, i + 1, nextWordIndex, contiguous);
				nextWordIndex++;
			}
316 317 318 319 320 321 322
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
323 324
		if (isWordSeparator(word.charCodeAt(i)) ||
			(i > 0 && isWordSeparator(word.charCodeAt(i - 1)))) {
325 326 327 328 329 330
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
331 332
// Fuzzy

J
Johannes Rieken 已提交
333
const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
334
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
D
Dirk Baeumer 已提交
335
const fuzzyRegExpCache = new LRUCache<string, RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
336

337
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] | null {
338 339 340
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
341 342

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
343
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
344
	if (!regexp) {
B
Benjamin Pasero 已提交
345
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
346
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
347 348 349
	}

	// RegExp Filter
350
	const match = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
351
	if (match) {
B
Benjamin Pasero 已提交
352
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
353 354 355
	}

	// Default Filter
356
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
357
}
J
Johannes Rieken 已提交
358

359 360 361 362 363
/**
 * Match pattern againt word in a fuzzy way. As in IntelliSense and faster and more
 * powerfull than `matchesFuzzy`
 */
export function matchesFuzzy2(pattern: string, word: string): IMatch[] | null {
364
	const score = fuzzyScore(pattern, pattern.toLowerCase(), 0, word, word.toLowerCase(), 0, true);
365 366
	return score ? createMatches(score) : null;
}
367

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
export function anyScore(pattern: string, lowPattern: string, _patternPos: number, word: string, lowWord: string, _wordPos: number): FuzzyScore {
	const result = fuzzyScore(pattern, lowPattern, 0, word, lowWord, 0, true);
	if (result) {
		return result;
	}
	let matches = 0;
	let score = 0;
	let idx = _wordPos;
	for (let patternPos = 0; patternPos < lowPattern.length && patternPos < _maxLen; ++patternPos) {
		const wordPos = lowWord.indexOf(lowPattern.charAt(patternPos), idx);
		if (wordPos >= 0) {
			score += 1;
			matches += 2 ** wordPos;
			idx = wordPos + 1;

		} else if (matches !== 0) {
			// once we have started matching things
			// we need to match the remaining pattern
			// characters
			break;
		}
	}
	return [score, matches, _wordPos];
}

393 394
//#region --- fuzzyScore ---

395 396 397
export function createMatches(score: undefined | FuzzyScore): IMatch[] {
	if (typeof score === 'undefined') {
		return [];
398
	}
399

400 401
	const matches = score[1].toString(2);
	const wordStart = score[2];
402 403
	const res: IMatch[] = [];

404 405
	for (let pos = wordStart; pos < _maxLen; pos++) {
		if (matches[matches.length - (pos + 1)] === '1') {
J
Joao Moreno 已提交
406 407 408 409 410 411
			const last = res[res.length - 1];
			if (last && last.end === pos) {
				last.end = pos + 1;
			} else {
				res.push({ start: pos, end: pos + 1 });
			}
412 413
		}
	}
414
	return res;
415 416
}

417 418
const _maxLen = 53;

419 420 421
function initTable() {
	const table: number[][] = [];
	const row: number[] = [0];
J
Johannes Rieken 已提交
422
	for (let i = 1; i <= _maxLen; i++) {
423 424
		row.push(-i);
	}
J
Johannes Rieken 已提交
425
	for (let i = 0; i <= _maxLen; i++) {
426
		const thisRow = row.slice(0);
427 428 429 430 431
		thisRow[0] = -i;
		table.push(thisRow);
	}
	return table;
}
432

433
const _table = initTable();
J
Johannes Rieken 已提交
434
const _scores = initTable();
J
Johannes Rieken 已提交
435
const _arrows = <Arrow[][]>initTable();
436
const _debug = false;
437

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
function printTable(table: number[][], pattern: string, patternLen: number, word: string, wordLen: number): string {
	function pad(s: string, n: number, pad = ' ') {
		while (s.length < n) {
			s = pad + s;
		}
		return s;
	}
	let ret = ` |   |${word.split('').map(c => pad(c, 3)).join('|')}\n`;

	for (let i = 0; i <= patternLen; i++) {
		if (i === 0) {
			ret += ' |';
		} else {
			ret += `${pattern[i - 1]}|`;
		}
		ret += table[i].slice(0, wordLen + 1).map(n => pad(n.toString(), 3)).join('|') + '\n';
	}
	return ret;
}

B
Benjamin Pasero 已提交
458
function isSeparatorAtPos(value: string, index: number): boolean {
459 460 461 462 463 464 465 466 467 468 469 470 471 472
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Underline:
		case CharCode.Dash:
		case CharCode.Period:
		case CharCode.Space:
		case CharCode.Slash:
		case CharCode.Backslash:
		case CharCode.SingleQuote:
		case CharCode.DoubleQuote:
		case CharCode.Colon:
473
		case CharCode.DollarSign:
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
			return true;
		default:
			return false;
	}
}

function isWhitespaceAtPos(value: string, index: number): boolean {
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Space:
		case CharCode.Tab:
			return true;
		default:
			return false;
	}
}
493

J
Johannes Rieken 已提交
494 495 496 497
function isUpperCaseAtPos(pos: number, word: string, wordLow: string): boolean {
	return word[pos] !== wordLow[pos];
}

498 499 500 501 502 503 504 505 506 507
function isPatternInWord(patternLow: string, patternPos: number, patternLen: number, wordLow: string, wordPos: number, wordLen: number): boolean {
	while (patternPos < patternLen && wordPos < wordLen) {
		if (patternLow[patternPos] === wordLow[wordPos]) {
			patternPos += 1;
		}
		wordPos += 1;
	}
	return patternPos === patternLen; // pattern must be exhausted
}

J
Johannes Rieken 已提交
508 509
const enum Arrow { Top = 0b1, Diag = 0b10, Left = 0b100 }

510 511 512 513 514 515 516 517 518 519 520 521
/**
 * A tuple of three values.
 * 0. the score
 * 1. the matches encoded as bitmask (2^53)
 * 2. the offset at which matching started
 */
export type FuzzyScore = [number, number, number];

export namespace FuzzyScore {
	/**
	 * No matches and value `-100`
	 */
522
	export const Default: [-100, 0, 0] = <[-100, 0, 0]>Object.freeze([-100, 0, 0]);
J
Joao Moreno 已提交
523

J
Joao Moreno 已提交
524 525
	export function isDefault(score?: FuzzyScore): score is [-100, 0, 0] {
		return !score || (score[0] === -100 && score[1] === 0 && score[2] === 0);
J
Joao Moreno 已提交
526
	}
527
}
J
Johannes Rieken 已提交
528

529
export interface FuzzyScorer {
J
Johannes Rieken 已提交
530
	(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined;
531 532
}

533
export function fuzzyScore(pattern: string, patternLow: string, patternPos: number, word: string, wordLow: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
534

J
Johannes Rieken 已提交
535 536
	const patternLen = pattern.length > _maxLen ? _maxLen : pattern.length;
	const wordLen = word.length > _maxLen ? _maxLen : word.length;
537

538
	if (patternPos >= patternLen || wordPos >= wordLen || patternLen > wordLen) {
539 540 541
		return undefined;
	}

J
Johannes Rieken 已提交
542 543 544
	// Run a simple check if the characters of pattern occur
	// (in order) at all in word. If that isn't the case we
	// stop because no match will be possible
545
	if (!isPatternInWord(patternLow, patternPos, patternLen, wordLow, wordPos, wordLen)) {
546 547
		return undefined;
	}
548

549
	const patternStartPos = patternPos;
550
	const wordStartPos = wordPos;
551

J
Johannes Rieken 已提交
552
	// There will be a mach, fill in tables
553
	for (patternPos = patternStartPos + 1; patternPos <= patternLen; patternPos++) {
554

555
		for (wordPos = 1; wordPos <= wordLen; wordPos++) {
556 557

			let score = -1;
J
Johannes Rieken 已提交
558
			if (patternLow[patternPos - 1] === wordLow[wordPos - 1]) {
J
Johannes Rieken 已提交
559

J
Johannes Rieken 已提交
560 561
				if (wordPos === (patternPos - patternStartPos)) {
					// common prefix: `foobar <-> foobaz`
J
Johannes Rieken 已提交
562
					//                            ^^^^^
563
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
564 565 566 567
						score = 7;
					} else {
						score = 5;
					}
J
Johannes Rieken 已提交
568
				} else if (isUpperCaseAtPos(wordPos - 1, word, wordLow) && (wordPos === 1 || !isUpperCaseAtPos(wordPos - 2, word, wordLow))) {
J
Johannes Rieken 已提交
569
					// hitting upper-case: `foo <-> forOthers`
J
Johannes Rieken 已提交
570
					//                              ^^ ^
571
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
J
Johannes Rieken 已提交
572 573 574 575
						score = 7;
					} else {
						score = 5;
					}
576
				} else if (isSeparatorAtPos(wordLow, wordPos - 2) || isWhitespaceAtPos(wordLow, wordPos - 2)) {
J
Johannes Rieken 已提交
577
					// post separator: `foo <-> bar_foo`
J
Johannes Rieken 已提交
578
					//                              ^^^
J
Johannes Rieken 已提交
579 580
					score = 5;

581 582 583 584 585
				} else {
					score = 1;
				}
			}

586
			_scores[patternPos][wordPos] = score;
J
Johannes Rieken 已提交
587

588 589 590
			const diag = _table[patternPos - 1][wordPos - 1] + (score > 1 ? 1 : score);
			const top = _table[patternPos - 1][wordPos] + -1;
			const left = _table[patternPos][wordPos - 1] + -1;
591 592 593

			if (left >= top) {
				// left or diag
J
Johannes Rieken 已提交
594
				if (left > diag) {
595 596
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left;
J
Johannes Rieken 已提交
597
				} else if (left === diag) {
598 599
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left | Arrow.Diag;
600
				} else {
601 602
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
603 604 605
				}
			} else {
				// top or diag
J
Johannes Rieken 已提交
606
				if (top > diag) {
607 608
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top;
J
Johannes Rieken 已提交
609
				} else if (top === diag) {
610 611
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top | Arrow.Diag;
J
Johannes Rieken 已提交
612
				} else {
613 614
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
615 616 617 618 619 620 621 622
				}
			}
		}
	}

	if (_debug) {
		console.log(printTable(_table, pattern, patternLen, word, wordLen));
		console.log(printTable(_arrows, pattern, patternLen, word, wordLen));
J
Johannes Rieken 已提交
623
		console.log(printTable(_scores, pattern, patternLen, word, wordLen));
624 625
	}

626
	_matchesCount = 0;
627
	_topScore = -100;
628
	_patternStartPos = patternStartPos;
629
	_firstMatchCanBeWeak = firstMatchCanBeWeak;
630
	_findAllMatches2(patternLen, wordLen, patternLen === wordLen ? 1 : 0, 0, false);
631
	if (_matchesCount === 0) {
J
Johannes Rieken 已提交
632 633 634
		return undefined;
	}

635
	return [_topScore, _topMatch2, wordStartPos];
J
Johannes Rieken 已提交
636 637
}

638

639
let _matchesCount: number = 0;
640
let _topMatch2: number = 0;
641
let _topScore: number = 0;
642
let _patternStartPos: number = 0;
643
let _firstMatchCanBeWeak: boolean = false;
644

645
function _findAllMatches2(patternPos: number, wordPos: number, total: number, matches: number, lastMatched: boolean): void {
646

647
	if (_matchesCount >= 10 || total < -25) {
648 649
		// stop when having already 10 results, or
		// when a potential alignment as already 5 gaps
650 651 652
		return;
	}

653 654
	let simpleMatchCount = 0;

655
	while (patternPos > _patternStartPos && wordPos > 0) {
656

657 658
		const score = _scores[patternPos][wordPos];
		const arrow = _arrows[patternPos][wordPos];
J
Johannes Rieken 已提交
659

660
		if (arrow === Arrow.Left) {
661
			// left -> no match, skip a word character
J
Johannes Rieken 已提交
662
			wordPos -= 1;
663
			if (lastMatched) {
664
				total -= 5; // new gap penalty
665
			} else if (matches !== 0) {
666
				total -= 1; // gap penalty after first match
667 668
			}
			lastMatched = false;
669
			simpleMatchCount = 0;
J
Johannes Rieken 已提交
670 671 672 673 674

		} else if (arrow & Arrow.Diag) {

			if (arrow & Arrow.Left) {
				// left
675
				_findAllMatches2(
676
					patternPos,
677
					wordPos - 1,
678 679
					matches !== 0 ? total - 1 : total, // gap penalty after first match
					matches,
680
					lastMatched
681
				);
682
			}
J
Johannes Rieken 已提交
683 684

			// diag
685
			total += score;
J
Johannes Rieken 已提交
686 687
			patternPos -= 1;
			wordPos -= 1;
688
			lastMatched = true;
J
Johannes Rieken 已提交
689

690 691 692
			// match -> set a 1 at the word pos
			matches += 2 ** wordPos;

J
Johannes Rieken 已提交
693 694 695
			// count simple matches and boost a row of
			// simple matches when they yield in a
			// strong match.
696 697
			if (score === 1) {
				simpleMatchCount += 1;
J
Johannes Rieken 已提交
698

699
				if (patternPos === _patternStartPos && !_firstMatchCanBeWeak) {
J
Johannes Rieken 已提交
700 701 702 703 704
					// when the first match is a weak
					// match we discard it
					return undefined;
				}

705
			} else {
J
Johannes Rieken 已提交
706
				// boost
707
				total += 1 + (simpleMatchCount * (score - 1));
708 709 710
				simpleMatchCount = 0;
			}

J
Johannes Rieken 已提交
711
		} else {
712
			return undefined;
713 714 715
		}
	}

J
Johannes Rieken 已提交
716 717
	total -= wordPos >= 3 ? 9 : wordPos * 3; // late start penalty

718 719
	// dynamically keep track of the current top score
	// and insert the current best score at head, the rest at tail
720
	_matchesCount += 1;
721 722
	if (total > _topScore) {
		_topScore = total;
723
		_topMatch2 = matches;
724
	}
725 726 727 728 729 730 731
}

//#endregion


//#region --- graceful ---

732
export function fuzzyScoreGracefulAggressive(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
733
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, true, firstMatchCanBeWeak);
734 735
}

736
export function fuzzyScoreGraceful(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
737
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, false, firstMatchCanBeWeak);
738 739
}

740 741
function fuzzyScoreWithPermutations(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, aggressive: boolean, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
	let top = fuzzyScore(pattern, lowPattern, patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
742 743 744 745 746 747 748 749 750

	if (top && !aggressive) {
		// when using the original pattern yield a result we`
		// return it unless we are aggressive and try to find
		// a better alignment, e.g. `cno` -> `^co^ns^ole` or `^c^o^nsole`.
		return top;
	}

	if (pattern.length >= 3) {
J
Johannes Rieken 已提交
751 752
		// When the pattern is long enough then try a few (max 7)
		// permutations of the pattern to find a better match. The
753 754
		// permutations only swap neighbouring characters, e.g
		// `cnoso` becomes `conso`, `cnsoo`, `cnoos`.
755
		const tries = Math.min(7, pattern.length - 1);
756
		for (let movingPatternPos = patternPos + 1; movingPatternPos < tries; movingPatternPos++) {
757
			const newPattern = nextTypoPermutation(pattern, movingPatternPos);
758
			if (newPattern) {
759
				const candidate = fuzzyScore(newPattern, newPattern.toLowerCase(), patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
J
Johannes Rieken 已提交
760 761 762 763 764
				if (candidate) {
					candidate[0] -= 3; // permutation penalty
					if (!top || candidate[0] > top[0]) {
						top = candidate;
					}
765 766 767 768 769 770 771 772
				}
			}
		}
	}

	return top;
}

773
function nextTypoPermutation(pattern: string, patternPos: number): string | undefined {
774 775 776 777 778

	if (patternPos + 1 >= pattern.length) {
		return undefined;
	}

779 780
	const swap1 = pattern[patternPos];
	const swap2 = pattern[patternPos + 1];
781 782 783 784 785 786 787 788 789 790 791 792

	if (swap1 === swap2) {
		return undefined;
	}

	return pattern.slice(0, patternPos)
		+ swap2
		+ swap1
		+ pattern.slice(patternPos + 2);
}

//#endregion