filters.ts 10.1 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

B
Benjamin Pasero 已提交
7
import strings = require('vs/base/common/strings');
J
Johannes Rieken 已提交
8 9
import { BoundedLinkedMap } from 'vs/base/common/map';
import { CharCode } from 'vs/base/common/charCode';
E
Erich Gamma 已提交
10 11 12

export interface IFilter {
	// Returns null if word doesn't match.
B
Benjamin Pasero 已提交
13
	(word: string, wordToMatchAgainst: string): IMatch[];
E
Erich Gamma 已提交
14 15 16
}

export interface IMatch {
B
Benjamin Pasero 已提交
17 18
	start: number;
	end: number;
E
Erich Gamma 已提交
19 20 21 22 23
}

// Combined filters

/**
24
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
25 26 27 28
 * of filters with an or. The *first* filters that
 * matches defined the return value of the returned
 * filter.
 */
B
Benjamin Pasero 已提交
29
export function or(...filter: IFilter[]): IFilter {
30
	return function (word: string, wordToMatchAgainst: string): IMatch[] {
B
Benjamin Pasero 已提交
31 32 33
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (match) {
E
Erich Gamma 已提交
34 35 36 37 38 39 40 41
				return match;
			}
		}
		return null;
	};
}

/**
42
 * @returns A filter which combines the provided set
E
Erich Gamma 已提交
43 44 45
 * of filters with an and. The combines matches are
 * returned if *all* filters match.
 */
B
Benjamin Pasero 已提交
46
export function and(...filter: IFilter[]): IFilter {
47
	return function (word: string, wordToMatchAgainst: string): IMatch[] {
B
Benjamin Pasero 已提交
48 49 50 51
		let result: IMatch[] = [];
		for (let i = 0, len = filter.length; i < len; i++) {
			let match = filter[i](word, wordToMatchAgainst);
			if (!match) {
E
Erich Gamma 已提交
52 53 54 55 56 57 58 59 60 61
				return null;
			}
			result = result.concat(match);
		}
		return result;
	};
}

// Prefix

B
Benjamin Pasero 已提交
62 63
export let matchesStrictPrefix: IFilter = (word: string, wordToMatchAgainst: string): IMatch[] => { return _matchesPrefix(false, word, wordToMatchAgainst); };
export let matchesPrefix: IFilter = (word: string, wordToMatchAgainst: string): IMatch[] => { return _matchesPrefix(true, word, wordToMatchAgainst); };
E
Erich Gamma 已提交
64

B
Benjamin Pasero 已提交
65
function _matchesPrefix(ignoreCase: boolean, word: string, wordToMatchAgainst: string): IMatch[] {
66
	if (!wordToMatchAgainst || wordToMatchAgainst.length === 0 || wordToMatchAgainst.length < word.length) {
E
Erich Gamma 已提交
67 68 69 70 71 72
		return null;
	}
	if (ignoreCase) {
		word = word.toLowerCase();
		wordToMatchAgainst = wordToMatchAgainst.toLowerCase();
	}
B
Benjamin Pasero 已提交
73
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
74 75 76 77 78 79 80 81 82
		if (word[i] !== wordToMatchAgainst[i]) {
			return null;
		}
	}
	return word.length > 0 ? [{ start: 0, end: word.length }] : [];
}

// Contiguous Substring

B
Benjamin Pasero 已提交
83 84
export function matchesContiguousSubString(word: string, wordToMatchAgainst: string): IMatch[] {
	let index = wordToMatchAgainst.toLowerCase().indexOf(word.toLowerCase());
E
Erich Gamma 已提交
85 86 87 88 89 90 91 92 93
	if (index === -1) {
		return null;
	}

	return [{ start: index, end: index + word.length }];
}

// Substring

B
Benjamin Pasero 已提交
94
export function matchesSubString(word: string, wordToMatchAgainst: string): IMatch[] {
E
Erich Gamma 已提交
95 96 97
	return _matchesSubString(word.toLowerCase(), wordToMatchAgainst.toLowerCase(), 0, 0);
}

B
Benjamin Pasero 已提交
98
function _matchesSubString(word: string, wordToMatchAgainst: string, i: number, j: number): IMatch[] {
E
Erich Gamma 已提交
99 100 101 102 103 104
	if (i === word.length) {
		return [];
	} else if (j === wordToMatchAgainst.length) {
		return null;
	} else {
		if (word[i] === wordToMatchAgainst[j]) {
B
Benjamin Pasero 已提交
105
			let result: IMatch[] = null;
E
Erich Gamma 已提交
106 107 108 109 110 111 112 113 114 115 116
			if (result = _matchesSubString(word, wordToMatchAgainst, i + 1, j + 1)) {
				return join({ start: j, end: j + 1 }, result);
			}
		}

		return _matchesSubString(word, wordToMatchAgainst, i, j + 1);
	}
}

// CamelCase

B
Benjamin Pasero 已提交
117
function isLower(code: number): boolean {
118
	return CharCode.a <= code && code <= CharCode.z;
E
Erich Gamma 已提交
119 120
}

B
Benjamin Pasero 已提交
121
function isUpper(code: number): boolean {
122
	return CharCode.A <= code && code <= CharCode.Z;
E
Erich Gamma 已提交
123 124
}

B
Benjamin Pasero 已提交
125
function isNumber(code: number): boolean {
126
	return CharCode.Digit0 <= code && code <= CharCode.Digit9;
E
Erich Gamma 已提交
127 128
}

B
Benjamin Pasero 已提交
129
function isWhitespace(code: number): boolean {
130 131 132 133 134 135
	return (
		code === CharCode.Space
		|| code === CharCode.Tab
		|| code === CharCode.LineFeed
		|| code === CharCode.CarriageReturn
	);
E
Erich Gamma 已提交
136 137
}

B
Benjamin Pasero 已提交
138
function isAlphanumeric(code: number): boolean {
E
Erich Gamma 已提交
139 140 141
	return isLower(code) || isUpper(code) || isNumber(code);
}

B
Benjamin Pasero 已提交
142
function join(head: IMatch, tail: IMatch[]): IMatch[] {
E
Erich Gamma 已提交
143 144 145 146 147 148 149 150 151 152 153
	if (tail.length === 0) {
		tail = [head];
	} else if (head.end === tail[0].start) {
		tail[0].start = head.start;
	} else {
		tail.unshift(head);
	}
	return tail;
}

function nextAnchor(camelCaseWord: string, start: number): number {
B
Benjamin Pasero 已提交
154 155 156
	for (let i = start; i < camelCaseWord.length; i++) {
		let c = camelCaseWord.charCodeAt(i);
		if (isUpper(c) || isNumber(c) || (i > 0 && !isAlphanumeric(camelCaseWord.charCodeAt(i - 1)))) {
E
Erich Gamma 已提交
157 158 159 160 161 162
			return i;
		}
	}
	return camelCaseWord.length;
}

B
Benjamin Pasero 已提交
163
function _matchesCamelCase(word: string, camelCaseWord: string, i: number, j: number): IMatch[] {
E
Erich Gamma 已提交
164 165 166 167 168 169 170
	if (i === word.length) {
		return [];
	} else if (j === camelCaseWord.length) {
		return null;
	} else if (word[i] !== camelCaseWord[j].toLowerCase()) {
		return null;
	} else {
B
Benjamin Pasero 已提交
171
		let result: IMatch[] = null;
B
Benjamin Pasero 已提交
172
		let nextUpperIndex = j + 1;
E
Erich Gamma 已提交
173 174 175 176 177 178 179 180 181
		result = _matchesCamelCase(word, camelCaseWord, i + 1, j + 1);
		while (!result && (nextUpperIndex = nextAnchor(camelCaseWord, nextUpperIndex)) < camelCaseWord.length) {
			result = _matchesCamelCase(word, camelCaseWord, i + 1, nextUpperIndex);
			nextUpperIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

J
Joao Moreno 已提交
182 183 184 185 186 187 188
interface ICamelCaseAnalysis {
	upperPercent: number;
	lowerPercent: number;
	alphaPercent: number;
	numericPercent: number;
}

E
Erich Gamma 已提交
189 190
// Heuristic to avoid computing camel case matcher for words that don't
// look like camelCaseWords.
J
Joao Moreno 已提交
191
function analyzeCamelCaseWord(word: string): ICamelCaseAnalysis {
192
	let upper = 0, lower = 0, alpha = 0, numeric = 0, code = 0;
E
Erich Gamma 已提交
193

B
Benjamin Pasero 已提交
194
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
195 196
		code = word.charCodeAt(i);

197 198 199 200
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isAlphanumeric(code)) { alpha++; }
		if (isNumber(code)) { numeric++; }
E
Erich Gamma 已提交
201 202
	}

B
Benjamin Pasero 已提交
203 204 205
	let upperPercent = upper / word.length;
	let lowerPercent = lower / word.length;
	let alphaPercent = alpha / word.length;
206
	let numericPercent = numeric / word.length;
E
Erich Gamma 已提交
207

J
Joao Moreno 已提交
208 209 210 211
	return { upperPercent, lowerPercent, alphaPercent, numericPercent };
}

function isUpperCaseWord(analysis: ICamelCaseAnalysis): boolean {
B
Benjamin Pasero 已提交
212
	const { upperPercent, lowerPercent } = analysis;
J
Joao Moreno 已提交
213 214 215 216 217
	return lowerPercent === 0 && upperPercent > 0.6;
}

function isCamelCaseWord(analysis: ICamelCaseAnalysis): boolean {
	const { upperPercent, lowerPercent, alphaPercent, numericPercent } = analysis;
218
	return lowerPercent > 0.2 && upperPercent < 0.8 && alphaPercent > 0.6 && numericPercent < 0.2;
E
Erich Gamma 已提交
219 220 221 222 223
}

// Heuristic to avoid computing camel case matcher for words that don't
// look like camel case patterns.
function isCamelCasePattern(word: string): boolean {
B
Benjamin Pasero 已提交
224
	let upper = 0, lower = 0, code = 0, whitespace = 0;
E
Erich Gamma 已提交
225

B
Benjamin Pasero 已提交
226
	for (let i = 0; i < word.length; i++) {
E
Erich Gamma 已提交
227 228
		code = word.charCodeAt(i);

229 230 231
		if (isUpper(code)) { upper++; }
		if (isLower(code)) { lower++; }
		if (isWhitespace(code)) { whitespace++; }
E
Erich Gamma 已提交
232 233 234 235 236 237 238 239 240
	}

	if ((upper === 0 || lower === 0) && whitespace === 0) {
		return word.length <= 30;
	} else {
		return upper <= 5;
	}
}

B
Benjamin Pasero 已提交
241
export function matchesCamelCase(word: string, camelCaseWord: string): IMatch[] {
242
	if (!camelCaseWord || camelCaseWord.length === 0) {
E
Erich Gamma 已提交
243 244 245 246 247 248 249
		return null;
	}

	if (!isCamelCasePattern(word)) {
		return null;
	}

J
Joao Moreno 已提交
250
	if (camelCaseWord.length > 60) {
E
Erich Gamma 已提交
251 252 253
		return null;
	}

J
Joao Moreno 已提交
254 255 256 257 258 259 260 261 262 263
	const analysis = analyzeCamelCaseWord(camelCaseWord);

	if (!isCamelCaseWord(analysis)) {
		if (!isUpperCaseWord(analysis)) {
			return null;
		}

		camelCaseWord = camelCaseWord.toLowerCase();
	}

B
Benjamin Pasero 已提交
264 265
	let result: IMatch[] = null;
	let i = 0;
E
Erich Gamma 已提交
266 267 268 269 270 271 272 273

	while (i < camelCaseWord.length && (result = _matchesCamelCase(word.toLowerCase(), camelCaseWord, 0, i)) === null) {
		i = nextAnchor(camelCaseWord, i + 1);
	}

	return result;
}

274 275 276 277 278
// Matches beginning of words supporting non-ASCII languages
// E.g. "gp" or "g p" will match "Git: Pull"
// Useful in cases where the target is words (e.g. command labels)

export function matchesWords(word: string, target: string): IMatch[] {
279
	if (!target || target.length === 0) {
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
		return null;
	}

	let result: IMatch[] = null;
	let i = 0;

	while (i < target.length && (result = _matchesWords(word.toLowerCase(), target, 0, i)) === null) {
		i = nextWord(target, i + 1);
	}

	return result;
}

function _matchesWords(word: string, target: string, i: number, j: number): IMatch[] {
	if (i === word.length) {
		return [];
	} else if (j === target.length) {
		return null;
	} else if (word[i] !== target[j].toLowerCase()) {
		return null;
	} else {
B
Benjamin Pasero 已提交
301
		let result: IMatch[] = null;
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
		let nextWordIndex = j + 1;
		result = _matchesWords(word, target, i + 1, j + 1);
		while (!result && (nextWordIndex = nextWord(target, nextWordIndex)) < target.length) {
			result = _matchesWords(word, target, i + 1, nextWordIndex);
			nextWordIndex++;
		}
		return result === null ? null : join({ start: j, end: j + 1 }, result);
	}
}

function nextWord(word: string, start: number): number {
	for (let i = start; i < word.length; i++) {
		let c = word.charCodeAt(i);
		if (isWhitespace(c) || (i > 0 && isWhitespace(word.charCodeAt(i - 1)))) {
			return i;
		}
	}
	return word.length;
}

E
Erich Gamma 已提交
322 323
// Fuzzy

B
Benjamin Pasero 已提交
324 325 326 327 328
export enum SubstringMatching {
	Contiguous,
	Separate
}

329
export const fuzzyContiguousFilter = or(matchesPrefix, matchesCamelCase, matchesContiguousSubString);
B
Benjamin Pasero 已提交
330
const fuzzySeparateFilter = or(matchesPrefix, matchesCamelCase, matchesSubString);
B
Benjamin Pasero 已提交
331
const fuzzyRegExpCache = new BoundedLinkedMap<RegExp>(10000); // bounded to 10000 elements
B
Benjamin Pasero 已提交
332

333
export function matchesFuzzy(word: string, wordToMatchAgainst: string, enableSeparateSubstringMatching = false): IMatch[] {
334 335 336
	if (typeof word !== 'string' || typeof wordToMatchAgainst !== 'string') {
		return null; // return early for invalid input
	}
E
Erich Gamma 已提交
337 338

	// Form RegExp for wildcard matches
B
Benjamin Pasero 已提交
339
	let regexp = fuzzyRegExpCache.get(word);
E
Erich Gamma 已提交
340
	if (!regexp) {
B
Benjamin Pasero 已提交
341
		regexp = new RegExp(strings.convertSimple2RegExpPattern(word), 'i');
B
Benjamin Pasero 已提交
342
		fuzzyRegExpCache.set(word, regexp);
E
Erich Gamma 已提交
343 344 345
	}

	// RegExp Filter
B
Benjamin Pasero 已提交
346
	let match: RegExpExecArray = regexp.exec(wordToMatchAgainst);
E
Erich Gamma 已提交
347
	if (match) {
B
Benjamin Pasero 已提交
348
		return [{ start: match.index, end: match.index + match[0].length }];
E
Erich Gamma 已提交
349 350 351
	}

	// Default Filter
352
	return enableSeparateSubstringMatching ? fuzzySeparateFilter(word, wordToMatchAgainst) : fuzzyContiguousFilter(word, wordToMatchAgainst);
B
Benjamin Pasero 已提交
353
}