glob.ts 7.4 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6 7 8 9
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

import strings = require('vs/base/common/strings');
import paths = require('vs/base/common/paths');

B
Benjamin Pasero 已提交
10
const CACHE: { [glob: string]: RegExp } = Object.create(null);
E
Erich Gamma 已提交
11 12

export interface IExpression {
13
	[pattern: string]: boolean | SiblingClause | any;
E
Erich Gamma 已提交
14 15 16 17 18 19
}

export interface SiblingClause {
	when: string;
}

B
Benjamin Pasero 已提交
20 21
const PATH_REGEX = '[/\\\\]';		// any slash or backslash
const NO_PATH_REGEX = '[^/\\\\]';	// any non-slash and non-backslash
E
Erich Gamma 已提交
22 23 24 25 26 27 28 29 30

function starsToRegExp(starCount: number): string {
	switch (starCount) {
		case 0:
			return '';
		case 1:
			return NO_PATH_REGEX + '*?'; // 1 star matches any number of characters except path separator (/ and \) - non greedy (?)
		default:
			// Matches:  (Path Sep    OR     Path Val followed by Path Sep     OR    Path Sep followed by Path Val) 0-many times
P
Pascal Borreli 已提交
31
			// Group is non capturing because we don't need to capture at all (?:...)
E
Erich Gamma 已提交
32 33 34 35 36 37 38 39 40 41
			// Overall we use non-greedy matching because it could be that we match too much
			return '(?:' + PATH_REGEX + '|' + NO_PATH_REGEX + '+' + PATH_REGEX + '|' + PATH_REGEX + NO_PATH_REGEX + '+)*?';
	}
}

export function splitGlobAware(pattern: string, splitChar: string): string[] {
	if (!pattern) {
		return [];
	}

B
Benjamin Pasero 已提交
42
	let segments: string[] = [];
E
Erich Gamma 已提交
43

B
Benjamin Pasero 已提交
44 45
	let inBraces = false;
	let inBrackets = false;
E
Erich Gamma 已提交
46

B
Benjamin Pasero 已提交
47 48 49
	let char: string;
	let curVal = '';
	for (let i = 0; i < pattern.length; i++) {
E
Erich Gamma 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
		char = pattern[i];

		switch (char) {
			case splitChar:
				if (!inBraces && !inBrackets) {
					segments.push(curVal);
					curVal = '';

					continue;
				}
				break;
			case '{':
				inBraces = true;
				break;
			case '}':
				inBraces = false;
				break;
			case '[':
				inBrackets = true;
				break;
			case ']':
				inBrackets = false;
				break;
		}

		curVal += char;
	}

	// Tail
	if (curVal) {
		segments.push(curVal);
	}

	return segments;
}

function parseRegExp(pattern: string): string {
	if (!pattern) {
		return '';
	}

B
Benjamin Pasero 已提交
91
	let regEx = '';
E
Erich Gamma 已提交
92 93

	// Split up into segments for each slash found
B
Benjamin Pasero 已提交
94
	let segments = splitGlobAware(pattern, '/');
E
Erich Gamma 已提交
95 96 97 98 99 100 101 102

	// Special case where we only have globstars
	if (segments.every(s => s === '**')) {
		regEx = '.*';
	}

	// Build regex over segments
	else {
B
Benjamin Pasero 已提交
103
		let previousSegmentWasGlobStar = false;
E
Erich Gamma 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
		segments.forEach((segment, index) => {

			// Globstar is special
			if (segment === '**') {

				// if we have more than one globstar after another, just ignore it
				if (!previousSegmentWasGlobStar) {
					regEx += starsToRegExp(2);
					previousSegmentWasGlobStar = true;
				}

				return;
			}

			// States
B
Benjamin Pasero 已提交
119 120
			let inBraces = false;
			let braceVal = '';
E
Erich Gamma 已提交
121

B
Benjamin Pasero 已提交
122 123
			let inBrackets = false;
			let bracketVal = '';
E
Erich Gamma 已提交
124

B
Benjamin Pasero 已提交
125 126
			let char: string;
			for (let i = 0; i < segment.length; i++) {
E
Erich Gamma 已提交
127 128 129 130 131 132 133 134 135 136
				char = segment[i];

				// Support brace expansion
				if (char !== '}' && inBraces) {
					braceVal += char;
					continue;
				}

				// Support brackets
				if (char !== ']' && inBrackets) {
B
Benjamin Pasero 已提交
137
					let res: string;
E
Erich Gamma 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
					switch (char) {
						case '-':		// allow the range operator
							res = char;
							break;
						case '^':		// allow the negate operator
							res = char;
							break;
						default:
							res = strings.escapeRegExpCharacters(char);
					}

					bracketVal += res;
					continue;
				}

				switch (char) {
					case '{':
						inBraces = true;
						continue;

					case '[':
						inBrackets = true;
						continue;

					case '}':
B
Benjamin Pasero 已提交
163
						let choices = splitGlobAware(braceVal, ',');
E
Erich Gamma 已提交
164 165

						// Converts {foo,bar} => [foo|bar]
B
Benjamin Pasero 已提交
166
						let braceRegExp = '(?:' + choices.reduce((prevValue, curValue, i, array) => {
E
Erich Gamma 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
							return prevValue + '|' + parseRegExp(curValue);
						}, parseRegExp(choices[0]) /* parse the first segment as regex and give as initial value */) + ')';

						regEx += braceRegExp;

						inBraces = false;
						braceVal = '';

						break;

					case ']':
						regEx += ('[' + bracketVal + ']');

						inBrackets = false;
						bracketVal = '';

						break;


					case '?':
						regEx += NO_PATH_REGEX; // 1 ? matches any single character except path separator (/ and \)
						continue;

					case '*':
						regEx += starsToRegExp(1);
						continue;

					default:
						regEx += strings.escapeRegExpCharacters(char);
				}
			}

			// Tail: Add the slash we had split on if there is more to come and the next one is not a globstar
			if (index < segments.length - 1 && segments[index + 1] !== '**') {
				regEx += PATH_REGEX;
			}

			// reset state
			previousSegmentWasGlobStar = false;
		});
	}

	return regEx;
}

function globToRegExp(pattern: string): RegExp {
	if (!pattern) {
		return null;
	}

	// Whitespace trimming
	pattern = pattern.trim();

	// Check cache
	if (CACHE[pattern]) {
B
Benjamin Pasero 已提交
222
		let cached = CACHE[pattern];
E
Erich Gamma 已提交
223 224 225 226 227
		cached.lastIndex = 0; // reset RegExp to its initial state to reuse it!

		return cached;
	}

B
Benjamin Pasero 已提交
228
	let regEx = parseRegExp(pattern);
E
Erich Gamma 已提交
229 230 231 232 233

	// Wrap it
	regEx = '^' + regEx + '$';

	// Convert to regexp and be ready for errors
B
Benjamin Pasero 已提交
234
	let result: RegExp;
E
Erich Gamma 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
	try {
		result = new RegExp(regEx);
	} catch (error) {
		result = /.^/; // create a regex that matches nothing if we cannot parse the pattern
	}

	// Make sure to cache
	CACHE[pattern] = result;

	return result;
}

/**
 * Simplified glob matching. Supports a subset of glob patterns:
 * - * matches anything inside a path segment
 * - ? matches 1 character inside a path segment
 * - ** matches anything including an empty path segment
 * - simple brace expansion ({js,ts} => js or ts)
 * - character ranges (using [...])
 */
export function match(pattern: string, path: string): boolean;
export function match(expression: IExpression, path: string, siblings?: string[]): string /* the matching pattern */;
257
export function match(arg1: string | IExpression, path: string, siblings?: string[]): any {
E
Erich Gamma 已提交
258 259 260 261 262 263
	if (!arg1 || !path) {
		return false;
	}

	// Glob with String
	if (typeof arg1 === 'string') {
B
Benjamin Pasero 已提交
264 265
		var regExp = globToRegExp(arg1);
		return regExp && regExp.test(path);
E
Erich Gamma 已提交
266 267 268 269 270 271 272
	}

	// Glob with Expression
	return matchExpression(<IExpression>arg1, path, siblings);
}

function matchExpression(expression: IExpression, path: string, siblings?: string[]): string /* the matching pattern */ {
B
Benjamin Pasero 已提交
273
	let patterns = Object.getOwnPropertyNames(expression);
274
	let basename: string;
B
Benjamin Pasero 已提交
275 276
	for (let i = 0; i < patterns.length; i++) {
		let pattern = patterns[i];
E
Erich Gamma 已提交
277

B
Benjamin Pasero 已提交
278 279 280 281 282
		let value = expression[pattern];
		if (value === false) {
			continue; // pattern is disabled
		}

E
Erich Gamma 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296
		// Pattern matches path
		if (match(pattern, path)) {

			// Expression Pattern is <boolean>
			if (typeof value === 'boolean') {
				return pattern;
			}

			// Expression Pattern is <SiblingClause>
			if (value && typeof (<SiblingClause>value).when === 'string') {
				if (!siblings || !siblings.length) {
					continue; // pattern is malformed or we don't have siblings
				}

297 298 299 300
				if (!basename) {
					basename = strings.rtrim(paths.basename(path), paths.extname(path));
				}

B
Benjamin Pasero 已提交
301
				let clause = <SiblingClause>value;
B
Benjamin Pasero 已提交
302
				let clausePattern = clause.when.replace('$(basename)', basename);
E
Erich Gamma 已提交
303 304 305 306 307 308 309 310 311 312 313 314 315 316
				if (siblings.some((sibling) => sibling === clausePattern)) {
					return pattern;
				} else {
					continue; // pattern does not match in the end because the when clause is not satisfied
				}
			}

			// Expression is Anything
			return pattern;
		}
	}

	return null;
}