filters.ts 22.0 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

J
Johannes Rieken 已提交
6
import { CharCode } from 'vs/base/common/charCode';
7 8
import { LRUCache } from 'vs/base/common/map';
import * as strings from 'vs/base/common/strings';
E
Erich Gamma 已提交
9 10 11

export interface IFilter {
	// Returns null if word doesn't match.
12
	(word: string, wordToMatchAgainst: string): IMatch[] | null;
E
Erich Gamma 已提交
13 14 15
}

export interface IMatch {
B
Benjamin Pasero 已提交
16 17
	start: number;
	end: number;
E
Erich Gamma 已提交
18 19 20 21 22
}

// Combined filters

/**
23
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
24 25 26 27
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
28
export function or(...filter: IFilter[]): IFilter {
29
	return function (word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
30 31 32
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (match) {
E
Erich Gamma 已提交
33 34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

// Prefix

42 43
export const matchesStrictPrefix: IFilter = _matchesPrefix.bind(undefined, false);
export const matchesPrefix: IFilter = _matchesPrefix.bind(undefined, true);
E
Erich Gamma 已提交
44

45
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] | null {
46
	if (!wordToMatchAgainst || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
47 48
		return null;
	}
49

50 51
	let matches: boolean;
	if (ignoreCase) {
52
		matches = strings.startsWithIgnoreCase(wordToMatchAgainst, word);
53 54 55
	} else {
		matches = wordToMatchAgainst.indexOf(word) === 0;
	}
56

57
	if (!matches) {
58
		return null;
E
Erich Gamma 已提交
59
	}
60

E
Erich Gamma 已提交
61 62 63 64 65
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

66
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
B
Benjamin Pasero 已提交
67
	let index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
68 69 70 71 72 73 74 75 76
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

77
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] | null {
E
Erich Gamma 已提交
78 79 80
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

81
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
82 83 84 85 86 87
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
88
			let result: IMatch[] | null = null;
E
Erich Gamma 已提交
89 90 91
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
92
			return null;
E
Erich Gamma 已提交
93 94 95 96 97 98 99 100
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
101
function isLower(code: number): boolean {
102
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
103 104
}

105
export function isUpper(code: number): boolean {
106
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
107 108
}

B
Benjamin Pasero 已提交
109
function isNumber(code: number): boolean {
110
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
111 112
}

B
Benjamin Pasero 已提交
113
function isWhitespace(code: number): boolean {
114 115 116 117 118 119
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
120 121
}

B
Benjamin Pasero 已提交
122
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
123 124 125
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
126
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
127 128 129 130 131 132 133 134 135 136 137
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
138 139 140
	for (let i = start; i < camelCaseWord.length; i++) {
		let c = camelCaseWord.charCodeAt(i);
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
141 142 143 144 145 146
			return i;
		}
	}
	return camelCaseWord.length;
}

147
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] | null {
E
Erich Gamma 已提交
148 149 150 151 152 153 154
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
155
		let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
156
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
157 158 159 160 161 162 163 164 165
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
166 167 168 169 170 171 172
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
173 174
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
175
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
176
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
177

B
Benjamin Pasero 已提交
178
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
179 180
		code = word.charCodeAt(i);

181 182 183 184
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
185 186
	}

B
Benjamin Pasero 已提交
187 188 189
	let upperPercent = upper / word.length;
	let lowerPercent = lower / word.length;
	let alphaPercent = alpha / word.length;
190
	let numericPercent = numeric / word.length;
E
Erich Gamma 已提交
191

J
Joao Moreno 已提交
192 193 194 195
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
196
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
197 198 199 200 201
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
202
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
203 204 205 206 207
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
208
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
209

B
Benjamin Pasero 已提交
210
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
211 212
		code = word.charCodeAt(i);

213 214 215
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
216 217 218 219 220 221 222 223 224
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

225
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] | null {
226 227 228 229 230 231 232
	if (!camelCaseWord) {
		return null;
	}

	camelCaseWord = camelCaseWord.trim();

	if (camelCaseWord.length === 0) {
E
Erich Gamma 已提交
233 234 235 236 237 238 239
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
240
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
241 242 243
		return null;
	}

J
Joao Moreno 已提交
244 245 246 247 248 249 250 251 252 253
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

254
	let result: IMatch[] | null = null;
B
Benjamin Pasero 已提交
255
	let i = 0;
E
Erich Gamma 已提交
256

257 258
	word = word.toLowerCase();
	while (i < camelCaseWord.length && (result = _matchesCamelCase(word, camelCaseWord, 0, i)) === null) {
E
Erich Gamma 已提交
259 260 261 262 263 264
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

265
// Matches beginning of words supporting non-ASCII languages
S
#17646:  
Sandeep Somavarapu 已提交
266 267
// If `contiguous` is true then matches word with beginnings of the words in the target. E.g. "pul" will match "Git: Pull"
// Otherwise also matches sub string of the word with beginnings of the words in the target. E.g. "gp" or "g p" will match "Git: Pull"
268 269
// Useful in cases where the target is words (e.g. command labels)

270
export function matchesWords(word: string, target: string, contiguous: boolean = false): IMatch[] | null {
271
	if (!target || target.length === 0) {
272 273 274
		return null;
	}

275
	let result: IMatch[] | null = null;
276 277
	let i = 0;

278 279 280
	word = word.toLowerCase();
	target = target.toLowerCase();
	while (i < target.length && (result = _matchesWords(word, target, 0, i, contiguous)) === null) {
281 282 283 284 285 286
		i = nextWord(target, i + 1);
	}

	return result;
}

287
function _matchesWords(word: string, target: string, i: number, j: number, contiguous: boolean): IMatch[] | null {
288 289 290 291
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
292
	} else if (word[i] !== target[j]) {
293 294
		return null;
	} else {
295
		let result: IMatch[] | null = null;
296
		let nextWordIndex = j + 1;
S
#17646:  
Sandeep Somavarapu 已提交
297 298 299 300 301 302
		result = _matchesWords(word, target, i + 1, j + 1, contiguous);
		if (!contiguous) {
			while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
				result = _matchesWords(word, target, i + 1, nextWordIndex, contiguous);
				nextWordIndex++;
			}
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
		let c = word.charCodeAt(i);
		if (isWhitespace(c) || (i > 0 && isWhitespace(word.charCodeAt(i - 1)))) {
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
318 319
// Fuzzy

J
Johannes Rieken 已提交
320
const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
321
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
D
Dirk Baeumer 已提交
322
const fuzzyRegExpCache = new LRUCache<string, RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
323

324
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] | null {
325 326 327
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
328 329

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
330
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
331
	if (!regexp) {
B
Benjamin Pasero 已提交
332
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
333
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
334 335 336
	}

	// RegExp Filter
337
	let match = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
338
	if (match) {
B
Benjamin Pasero 已提交
339
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
340 341 342
	}

	// Default Filter
343
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
344
}
J
Johannes Rieken 已提交
345

346 347 348 349
export function matchesFuzzy2(word: string, wordToMatchAgainst: string): IMatch[] | null {
	let score = fuzzyScore(word, word.toLowerCase(), 0, wordToMatchAgainst, wordToMatchAgainst.toLowerCase(), 0, false);
	return score ? createMatches(score) : null;
}
350

351 352 353 354 355
export function anyScore(pattern: string, lowPattern: string, _patternPos: number, word: string, lowWord: string, _wordPos: number): FuzzyScore {
	const result = fuzzyScore(pattern, lowPattern, 0, word, lowWord, 0, true);
	if (result) {
		return result;
	}
356 357
	const matches: number[] = [];
	let idx = 0;
358 359
	for (let pos = 0; pos < lowPattern.length; ++pos) {
		const thisIdx = lowWord.indexOf(lowPattern.charAt(pos), idx);
360 361 362 363 364 365 366 367
		if (thisIdx >= 0) {
			matches.push(thisIdx);
			idx = thisIdx + 1;
		}
	}
	return [matches.length, matches];
}

368 369
//#region --- fuzzyScore ---

370
export function createMatches(offsetOrScore: undefined | number[] | FuzzyScore): IMatch[] {
371
	let ret: IMatch[] = [];
372
	if (!offsetOrScore) {
373 374
		return ret;
	}
375 376 377 378 379 380
	let offsets: number[];
	if (Array.isArray(offsetOrScore[1])) {
		offsets = (offsetOrScore as FuzzyScore)[1];
	} else {
		offsets = offsetOrScore as number[];
	}
381
	let last: IMatch | undefined;
382
	for (const pos of offsets) {
383 384 385 386 387 388 389 390 391 392
		if (last && last.end === pos) {
			last.end += 1;
		} else {
			last = { start: pos, end: pos + 1 };
			ret.push(last);
		}
	}
	return ret;
}

J
Johannes Rieken 已提交
393 394
const _maxLen = 100;

395 396 397
function initTable() {
	const table: number[][] = [];
	const row: number[] = [0];
J
Johannes Rieken 已提交
398
	for (let i = 1; i <= _maxLen; i++) {
399 400
		row.push(-i);
	}
J
Johannes Rieken 已提交
401
	for (let i = 0; i <= _maxLen; i++) {
402 403 404 405 406 407
		let thisRow = row.slice(0);
		thisRow[0] = -i;
		table.push(thisRow);
	}
	return table;
}
408

409
const _table = initTable();
J
Johannes Rieken 已提交
410
const _scores = initTable();
J
Johannes Rieken 已提交
411
const _arrows = <Arrow[][]>initTable();
412
const _debug = false;
413

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
function printTable(table: number[][], pattern: string, patternLen: number, word: string, wordLen: number): string {
	function pad(s: string, n: number, pad = ' ') {
		while (s.length < n) {
			s = pad + s;
		}
		return s;
	}
	let ret = ` |   |${word.split('').map(c => pad(c, 3)).join('|')}\n`;

	for (let i = 0; i <= patternLen; i++) {
		if (i === 0) {
			ret += ' |';
		} else {
			ret += `${pattern[i - 1]}|`;
		}
		ret += table[i].slice(0, wordLen + 1).map(n => pad(n.toString(), 3)).join('|') + '\n';
	}
	return ret;
}

B
Benjamin Pasero 已提交
434
function isSeparatorAtPos(value: string, index: number): boolean {
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Underline:
		case CharCode.Dash:
		case CharCode.Period:
		case CharCode.Space:
		case CharCode.Slash:
		case CharCode.Backslash:
		case CharCode.SingleQuote:
		case CharCode.DoubleQuote:
		case CharCode.Colon:
			return true;
		default:
			return false;
	}
}

function isWhitespaceAtPos(value: string, index: number): boolean {
	if (index < 0 || index >= value.length) {
		return false;
	}
	const code = value.charCodeAt(index);
	switch (code) {
		case CharCode.Space:
		case CharCode.Tab:
			return true;
		default:
			return false;
	}
}
468

469 470 471 472 473 474 475 476 477 478
function isPatternInWord(patternLow: string, patternPos: number, patternLen: number, wordLow: string, wordPos: number, wordLen: number): boolean {
	while (patternPos < patternLen && wordPos < wordLen) {
		if (patternLow[patternPos] === wordLow[wordPos]) {
			patternPos += 1;
		}
		wordPos += 1;
	}
	return patternPos === patternLen; // pattern must be exhausted
}

J
Johannes Rieken 已提交
479 480
const enum Arrow { Top = 0b1, Diag = 0b10, Left = 0b100 }

J
Johannes Rieken 已提交
481 482
export type FuzzyScore = [number, number[]];

483
export interface FuzzyScorer {
J
Johannes Rieken 已提交
484
	(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined;
485 486
}

487
export function fuzzyScore(pattern: string, patternLow: string, patternPos: number, word: string, wordLow: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
488

J
Johannes Rieken 已提交
489 490
	const patternLen = pattern.length > _maxLen ? _maxLen : pattern.length;
	const wordLen = word.length > _maxLen ? _maxLen : word.length;
491

492
	if (patternPos >= patternLen || wordPos >= wordLen || patternLen > wordLen) {
493 494 495
		return undefined;
	}

J
Johannes Rieken 已提交
496 497 498
	// Run a simple check if the characters of pattern occur
	// (in order) at all in word. If that isn't the case we
	// stop because no match will be possible
499
	if (!isPatternInWord(patternLow, patternPos, patternLen, wordLow, wordPos, wordLen)) {
500 501
		return undefined;
	}
502

503
	const patternStartPos = patternPos;
504

J
Johannes Rieken 已提交
505
	// There will be a mach, fill in tables
506
	for (patternPos = patternStartPos + 1; patternPos <= patternLen; patternPos++) {
507

508
		for (wordPos = 1; wordPos <= wordLen; wordPos++) {
509 510

			let score = -1;
511 512
			let lowWordChar = wordLow[wordPos - 1];
			if (patternLow[patternPos - 1] === lowWordChar) {
J
Johannes Rieken 已提交
513

J
Johannes Rieken 已提交
514 515
				if (wordPos === (patternPos - patternStartPos)) {
					// common prefix: `foobar <-> foobaz`
516
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
517 518 519 520
						score = 7;
					} else {
						score = 5;
					}
521
				} else if (lowWordChar !== word[wordPos - 1] && (wordPos === 1 || wordLow[wordPos - 2] === word[wordPos - 2])) {
J
Johannes Rieken 已提交
522
					// hitting upper-case: `foo <-> forOthers`
523
					if (pattern[patternPos - 1] === word[wordPos - 1]) {
J
Johannes Rieken 已提交
524 525 526 527
						score = 7;
					} else {
						score = 5;
					}
528
				} else if (isSeparatorAtPos(wordLow, wordPos - 2) || isWhitespaceAtPos(wordLow, wordPos - 2)) {
J
Johannes Rieken 已提交
529
					// post separator: `foo <-> bar_foo`
J
Johannes Rieken 已提交
530 531
					score = 5;

532 533 534 535 536
				} else {
					score = 1;
				}
			}

537
			_scores[patternPos][wordPos] = score;
J
Johannes Rieken 已提交
538

539 540 541
			let diag = _table[patternPos - 1][wordPos - 1] + (score > 1 ? 1 : score);
			let top = _table[patternPos - 1][wordPos] + -1;
			let left = _table[patternPos][wordPos - 1] + -1;
542 543 544

			if (left >= top) {
				// left or diag
J
Johannes Rieken 已提交
545
				if (left > diag) {
546 547
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left;
J
Johannes Rieken 已提交
548
				} else if (left === diag) {
549 550
					_table[patternPos][wordPos] = left;
					_arrows[patternPos][wordPos] = Arrow.Left | Arrow.Diag;
551
				} else {
552 553
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
554 555 556
				}
			} else {
				// top or diag
J
Johannes Rieken 已提交
557
				if (top > diag) {
558 559
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top;
J
Johannes Rieken 已提交
560
				} else if (top === diag) {
561 562
					_table[patternPos][wordPos] = top;
					_arrows[patternPos][wordPos] = Arrow.Top | Arrow.Diag;
J
Johannes Rieken 已提交
563
				} else {
564 565
					_table[patternPos][wordPos] = diag;
					_arrows[patternPos][wordPos] = Arrow.Diag;
566 567 568 569 570 571 572 573
				}
			}
		}
	}

	if (_debug) {
		console.log(printTable(_table, pattern, patternLen, word, wordLen));
		console.log(printTable(_arrows, pattern, patternLen, word, wordLen));
J
Johannes Rieken 已提交
574
		console.log(printTable(_scores, pattern, patternLen, word, wordLen));
575 576
	}

577 578 579
	// _bucket is an array of [PrefixArray] we use to keep
	// track of scores and matches. After calling `_findAllMatches`
	// the best match (if available) is the first item in the array
580
	_matchesCount = 0;
581
	_topScore = -100;
582
	_patternStartPos = patternStartPos;
583
	_firstMatchCanBeWeak = firstMatchCanBeWeak;
584
	_findAllMatches(patternLen, wordLen, patternLen === wordLen ? 1 : 0, new LazyArray(), false);
J
Johannes Rieken 已提交
585

586
	if (_matchesCount === 0) {
J
Johannes Rieken 已提交
587 588 589
		return undefined;
	}

590
	return [_topScore, _topMatch.toArray()];
J
Johannes Rieken 已提交
591 592
}

593 594
let _matchesCount: number = 0;
let _topMatch: LazyArray;
595
let _topScore: number = 0;
596
let _patternStartPos: number = 0;
597
let _firstMatchCanBeWeak: boolean = false;
598

599
function _findAllMatches(patternPos: number, wordPos: number, total: number, matches: LazyArray, lastMatched: boolean): void {
600

601
	if (_matchesCount >= 10 || total < -25) {
602 603
		// stop when having already 10 results, or
		// when a potential alignment as already 5 gaps
604 605 606
		return;
	}

607 608
	let simpleMatchCount = 0;

609
	while (patternPos > _patternStartPos && wordPos > 0) {
610

J
Johannes Rieken 已提交
611 612 613
		let score = _scores[patternPos][wordPos];
		let arrow = _arrows[patternPos][wordPos];

614
		if (arrow === Arrow.Left) {
J
Johannes Rieken 已提交
615 616
			// left
			wordPos -= 1;
617
			if (lastMatched) {
618
				total -= 5; // new gap penalty
619
			} else if (!matches.isEmpty()) {
620
				total -= 1; // gap penalty after first match
621 622
			}
			lastMatched = false;
623
			simpleMatchCount = 0;
J
Johannes Rieken 已提交
624 625 626 627 628

		} else if (arrow & Arrow.Diag) {

			if (arrow & Arrow.Left) {
				// left
629 630
				_findAllMatches(
					patternPos,
631
					wordPos - 1,
632 633
					!matches.isEmpty() ? total - 1 : total, // gap penalty after first match
					matches.slice(),
634
					lastMatched
635
				);
636
			}
J
Johannes Rieken 已提交
637 638

			// diag
639
			total += score;
J
Johannes Rieken 已提交
640 641 642
			patternPos -= 1;
			wordPos -= 1;
			matches.unshift(wordPos);
643
			lastMatched = true;
J
Johannes Rieken 已提交
644

J
Johannes Rieken 已提交
645 646 647
			// count simple matches and boost a row of
			// simple matches when they yield in a
			// strong match.
648 649
			if (score === 1) {
				simpleMatchCount += 1;
J
Johannes Rieken 已提交
650

651
				if (patternPos === _patternStartPos && !_firstMatchCanBeWeak) {
J
Johannes Rieken 已提交
652 653 654 655 656
					// when the first match is a weak
					// match we discard it
					return undefined;
				}

657
			} else {
J
Johannes Rieken 已提交
658
				// boost
659
				total += 1 + (simpleMatchCount * (score - 1));
660 661 662
				simpleMatchCount = 0;
			}

J
Johannes Rieken 已提交
663
		} else {
664
			return undefined;
665 666 667
		}
	}

J
Johannes Rieken 已提交
668 669
	total -= wordPos >= 3 ? 9 : wordPos * 3; // late start penalty

670 671
	// dynamically keep track of the current top score
	// and insert the current best score at head, the rest at tail
672
	_matchesCount += 1;
673 674
	if (total > _topScore) {
		_topScore = total;
675
		_topMatch = matches;
676
	}
677
}
678

679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
class LazyArray {

	private _parent: LazyArray;
	private _parentLen: number;
	private _data: number[];

	isEmpty(): boolean {
		return !this._data && (!this._parent || this._parent.isEmpty());
	}

	unshift(n: number) {
		if (!this._data) {
			this._data = [n];
		} else {
			this._data.unshift(n);
		}
	}

	slice(): LazyArray {
		const ret = new LazyArray();
		ret._parent = this;
700
		ret._parentLen = this._data ? this._data.length : 0; return ret;
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
	}

	toArray(): number[] {
		if (!this._data) {
			return this._parent.toArray();
		}
		const bucket: number[][] = [];
		let element = <LazyArray>this;
		while (element) {
			if (element._parent && element._parent._data) {
				bucket.push(element._parent._data.slice(element._parent._data.length - element._parentLen));
			}
			element = element._parent;
		}
		return Array.prototype.concat.apply(this._data, bucket);
	}
717 718 719 720 721 722 723
}

//#endregion


//#region --- graceful ---

724
export function fuzzyScoreGracefulAggressive(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
725
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, true, firstMatchCanBeWeak);
726 727
}

728
export function fuzzyScoreGraceful(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
729
	return fuzzyScoreWithPermutations(pattern, lowPattern, patternPos, word, lowWord, wordPos, false, firstMatchCanBeWeak);
730 731
}

732 733
function fuzzyScoreWithPermutations(pattern: string, lowPattern: string, patternPos: number, word: string, lowWord: string, wordPos: number, aggressive: boolean, firstMatchCanBeWeak: boolean): FuzzyScore | undefined {
	let top = fuzzyScore(pattern, lowPattern, patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
734 735 736 737 738 739 740 741 742

	if (top && !aggressive) {
		// when using the original pattern yield a result we`
		// return it unless we are aggressive and try to find
		// a better alignment, e.g. `cno` -> `^co^ns^ole` or `^c^o^nsole`.
		return top;
	}

	if (pattern.length >= 3) {
J
Johannes Rieken 已提交
743 744
		// When the pattern is long enough then try a few (max 7)
		// permutations of the pattern to find a better match. The
745 746 747
		// permutations only swap neighbouring characters, e.g
		// `cnoso` becomes `conso`, `cnsoo`, `cnoos`.
		let tries = Math.min(7, pattern.length - 1);
748 749
		for (let movingPatternPos = patternPos + 1; movingPatternPos < tries; movingPatternPos++) {
			let newPattern = nextTypoPermutation(pattern, movingPatternPos);
750
			if (newPattern) {
751
				let candidate = fuzzyScore(newPattern, newPattern.toLowerCase(), patternPos, word, lowWord, wordPos, firstMatchCanBeWeak);
J
Johannes Rieken 已提交
752 753 754 755 756
				if (candidate) {
					candidate[0] -= 3; // permutation penalty
					if (!top || candidate[0] > top[0]) {
						top = candidate;
					}
757 758 759 760 761 762 763 764
				}
			}
		}
	}

	return top;
}

765
function nextTypoPermutation(pattern: string, patternPos: number): string | undefined {
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784

	if (patternPos + 1 >= pattern.length) {
		return undefined;
	}

	let swap1 = pattern[patternPos];
	let swap2 = pattern[patternPos + 1];

	if (swap1 === swap2) {
		return undefined;
	}

	return pattern.slice(0, patternPos)
		+ swap2
		+ swap1
		+ pattern.slice(patternPos + 2);
}

//#endregion