strings.ts 19.6 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

J
Johannes Rieken 已提交
6
import { CharCode } from 'vs/base/common/charCode';
7

E
Erich Gamma 已提交
8 9 10
/**
 * The empty string.
 */
B
Benjamin Pasero 已提交
11
export const empty = '';
E
Erich Gamma 已提交
12

M
Matt Bierner 已提交
13
export function isFalsyOrWhitespace(str: string | undefined): boolean {
14 15 16 17 18 19
	if (!str || typeof str !== 'string') {
		return true;
	}
	return str.trim().length === 0;
}

E
Erich Gamma 已提交
20
/**
21
 * @returns the provided number with the given number of preceding zeros.
E
Erich Gamma 已提交
22 23
 */
export function pad(n: number, l: number, char: string = '0'): string {
24 25
	const str = '' + n;
	const r = [str];
E
Erich Gamma 已提交
26

B
Benjamin Pasero 已提交
27
	for (let i = str.length; i < l; i++) {
E
Erich Gamma 已提交
28 29 30 31 32 33
		r.push(char);
	}

	return r.reverse().join('');
}

B
Benjamin Pasero 已提交
34
const _formatRegexp = /{(\d+)}/g;
E
Erich Gamma 已提交
35 36 37 38 39 40 41 42 43 44 45

/**
 * Helper to produce a string with a variable number of arguments. Insert variable segments
 * into the string using the {n} notation where N is the index of the argument following the string.
 * @param value string to which formatting is applied
 * @param args replacements for {n}-entries
 */
export function format(value: string, ...args: any[]): string {
	if (args.length === 0) {
		return value;
	}
J
Johannes Rieken 已提交
46
	return value.replace(_formatRegexp, function (match, group) {
47
		const idx = parseInt(group, 10);
E
Erich Gamma 已提交
48 49 50 51 52 53 54 55 56 57 58
		return isNaN(idx) || idx < 0 || idx >= args.length ?
			match :
			args[idx];
	});
}

/**
 * Converts HTML characters inside the string to use entities instead. Makes the string safe from
 * being used e.g. in HTMLElement.innerHTML.
 */
export function escape(html: string): string {
59
	return html.replace(/[<>&]/g, function (match) {
E
Erich Gamma 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72
		switch (match) {
			case '<': return '&lt;';
			case '>': return '&gt;';
			case '&': return '&amp;';
			default: return match;
		}
	});
}

/**
 * Escapes regular expression characters in a given string
 */
export function escapeRegExpCharacters(value: string): string {
73
	return value.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)\#]/g, '\\$&');
E
Erich Gamma 已提交
74 75 76
}

/**
P
Pascal Borreli 已提交
77
 * Removes all occurrences of needle from the beginning and end of haystack.
E
Erich Gamma 已提交
78 79 80
 * @param haystack string to trim
 * @param needle the thing to trim (default is a blank)
 */
R
Rob Lourens 已提交
81
export function trim(haystack: string, needle: string = ' '): string {
82
	const trimmed = ltrim(haystack, needle);
E
Erich Gamma 已提交
83 84 85 86
	return rtrim(trimmed, needle);
}

/**
P
Pascal Borreli 已提交
87
 * Removes all occurrences of needle from the beginning of haystack.
E
Erich Gamma 已提交
88 89 90
 * @param haystack string to trim
 * @param needle the thing to trim
 */
91
export function ltrim(haystack: string, needle: string): string {
E
Erich Gamma 已提交
92 93 94 95
	if (!haystack || !needle) {
		return haystack;
	}

96
	const needleLen = needle.length;
E
Erich Gamma 已提交
97 98 99 100
	if (needleLen === 0 || haystack.length === 0) {
		return haystack;
	}

101
	let offset = 0;
E
Erich Gamma 已提交
102

103
	while (haystack.indexOf(needle, offset) === offset) {
E
Erich Gamma 已提交
104 105 106 107 108 109
		offset = offset + needleLen;
	}
	return haystack.substring(offset);
}

/**
P
Pascal Borreli 已提交
110
 * Removes all occurrences of needle from the end of haystack.
E
Erich Gamma 已提交
111 112 113
 * @param haystack string to trim
 * @param needle the thing to trim
 */
114
export function rtrim(haystack: string, needle: string): string {
E
Erich Gamma 已提交
115 116 117 118
	if (!haystack || !needle) {
		return haystack;
	}

119
	const needleLen = needle.length,
E
Erich Gamma 已提交
120 121 122 123 124 125
		haystackLen = haystack.length;

	if (needleLen === 0 || haystackLen === 0) {
		return haystack;
	}

B
Benjamin Pasero 已提交
126
	let offset = haystackLen,
E
Erich Gamma 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
		idx = -1;

	while (true) {
		idx = haystack.lastIndexOf(needle, offset - 1);
		if (idx === -1 || idx + needleLen !== offset) {
			break;
		}
		if (idx === 0) {
			return '';
		}
		offset = idx;
	}

	return haystack.substring(0, offset);
}

export function convertSimple2RegExpPattern(pattern: string): string {
	return pattern.replace(/[\-\\\{\}\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&').replace(/[\*]/g, '.*');
}

export function stripWildcards(pattern: string): string {
B
Benjamin Pasero 已提交
148
	return pattern.replace(/\*/g, '');
E
Erich Gamma 已提交
149 150 151 152 153 154 155 156 157 158
}

/**
 * Determines if haystack starts with needle.
 */
export function startsWith(haystack: string, needle: string): boolean {
	if (haystack.length < needle.length) {
		return false;
	}

159 160 161 162
	if (haystack === needle) {
		return true;
	}

B
Benjamin Pasero 已提交
163
	for (let i = 0; i < needle.length; i++) {
E
Erich Gamma 已提交
164 165 166 167 168 169 170 171 172 173 174 175
		if (haystack[i] !== needle[i]) {
			return false;
		}
	}

	return true;
}

/**
 * Determines if haystack ends with needle.
 */
export function endsWith(haystack: string, needle: string): boolean {
176
	const diff = haystack.length - needle.length;
E
Erich Gamma 已提交
177
	if (diff > 0) {
C
Christof Marti 已提交
178
		return haystack.indexOf(needle, diff) === diff;
E
Erich Gamma 已提交
179 180 181 182 183 184 185
	} else if (diff === 0) {
		return haystack === needle;
	} else {
		return false;
	}
}

S
Sandeep Somavarapu 已提交
186 187 188 189 190
export interface RegExpOptions {
	matchCase?: boolean;
	wholeWord?: boolean;
	multiline?: boolean;
	global?: boolean;
191
	unicode?: boolean;
S
Sandeep Somavarapu 已提交
192 193 194
}

export function createRegExp(searchString: string, isRegex: boolean, options: RegExpOptions = {}): RegExp {
195
	if (!searchString) {
E
Erich Gamma 已提交
196 197 198
		throw new Error('Cannot create regex from empty string');
	}
	if (!isRegex) {
199
		searchString = escapeRegExpCharacters(searchString);
E
Erich Gamma 已提交
200
	}
S
Sandeep Somavarapu 已提交
201
	if (options.wholeWord) {
E
Erich Gamma 已提交
202 203 204 205 206 207 208
		if (!/\B/.test(searchString.charAt(0))) {
			searchString = '\\b' + searchString;
		}
		if (!/\B/.test(searchString.charAt(searchString.length - 1))) {
			searchString = searchString + '\\b';
		}
	}
209
	let modifiers = '';
S
Sandeep Somavarapu 已提交
210
	if (options.global) {
211 212
		modifiers += 'g';
	}
S
Sandeep Somavarapu 已提交
213
	if (!options.matchCase) {
E
Erich Gamma 已提交
214 215
		modifiers += 'i';
	}
S
Sandeep Somavarapu 已提交
216 217 218
	if (options.multiline) {
		modifiers += 'm';
	}
219 220 221
	if (options.unicode) {
		modifiers += 'u';
	}
E
Erich Gamma 已提交
222 223 224 225 226

	return new RegExp(searchString, modifiers);
}

export function regExpLeadsToEndlessLoop(regexp: RegExp): boolean {
227 228
	// Exit early if it's one of these special cases which are meant to match
	// against an empty string
229
	if (regexp.source === '^' || regexp.source === '^$' || regexp.source === '$' || regexp.source === '^\\s*$') {
230 231 232
		return false;
	}

E
Erich Gamma 已提交
233 234
	// We check against an empty string. If the regular expression doesn't advance
	// (e.g. ends in an endless loop) it will match an empty string.
235
	const match = regexp.exec('');
B
Benjamin Pasero 已提交
236
	return !!(match && regexp.lastIndex === 0);
E
Erich Gamma 已提交
237 238
}

239 240 241 242
export function regExpContainsBackreference(regexpValue: string): boolean {
	return !!regexpValue.match(/([^\\]|^)(\\\\)*\\\d+/);
}

243 244 245 246 247 248 249
export function regExpFlags(regexp: RegExp): string {
	return (regexp.global ? 'g' : '')
		+ (regexp.ignoreCase ? 'i' : '')
		+ (regexp.multiline ? 'm' : '')
		+ ((regexp as any).unicode ? 'u' : '');
}

E
Erich Gamma 已提交
250 251 252 253 254
/**
 * Returns first index of the string that is not whitespace.
 * If string is empty or contains only whitespaces, returns -1
 */
export function firstNonWhitespaceIndex(str: string): number {
B
Benjamin Pasero 已提交
255
	for (let i = 0, len = str.length; i < len; i++) {
256
		const chCode = str.charCodeAt(i);
257
		if (chCode !== CharCode.Space && chCode !== CharCode.Tab) {
E
Erich Gamma 已提交
258 259 260 261 262 263 264 265 266 267
			return i;
		}
	}
	return -1;
}

/**
 * Returns the leading whitespace of the string.
 * If the string contains only whitespaces, returns entire string
 */
268 269
export function getLeadingWhitespace(str: string, start: number = 0, end: number = str.length): string {
	for (let i = start; i < end; i++) {
270
		const chCode = str.charCodeAt(i);
271
		if (chCode !== CharCode.Space && chCode !== CharCode.Tab) {
272
			return str.substring(start, i);
E
Erich Gamma 已提交
273 274
		}
	}
275
	return str.substring(start, end);
E
Erich Gamma 已提交
276 277 278 279 280 281
}

/**
 * Returns last index of the string that is not whitespace.
 * If string is empty or contains only whitespaces, returns -1
 */
282 283
export function lastNonWhitespaceIndex(str: string, startIndex: number = str.length - 1): number {
	for (let i = startIndex; i >= 0; i--) {
284
		const chCode = str.charCodeAt(i);
285
		if (chCode !== CharCode.Space && chCode !== CharCode.Tab) {
E
Erich Gamma 已提交
286 287 288 289 290 291
			return i;
		}
	}
	return -1;
}

J
Johannes Rieken 已提交
292
export function compare(a: string, b: string): number {
293 294
	if (a < b) {
		return -1;
J
Johannes Rieken 已提交
295
	} else if (a > b) {
296 297 298 299 300 301
		return 1;
	} else {
		return 0;
	}
}

302 303 304
export function compareIgnoreCase(a: string, b: string): number {
	const len = Math.min(a.length, b.length);
	for (let i = 0; i < len; i++) {
J
Johannes Rieken 已提交
305 306
		let codeA = a.charCodeAt(i);
		let codeB = b.charCodeAt(i);
307 308 309 310 311 312

		if (codeA === codeB) {
			// equal
			continue;
		}

J
Johannes Rieken 已提交
313
		if (isUpperAsciiLetter(codeA)) {
S
Sandeep Somavarapu 已提交
314
			codeA += 32;
J
Johannes Rieken 已提交
315 316 317
		}

		if (isUpperAsciiLetter(codeB)) {
S
Sandeep Somavarapu 已提交
318
			codeB += 32;
J
Johannes Rieken 已提交
319 320 321 322 323 324 325 326 327 328 329 330
		}

		const diff = codeA - codeB;

		if (diff === 0) {
			// equal -> ignoreCase
			continue;

		} else if (isLowerAsciiLetter(codeA) && isLowerAsciiLetter(codeB)) {
			//
			return diff;

J
Johannes Rieken 已提交
331 332
		} else {
			return compare(a.toLowerCase(), b.toLowerCase());
333 334 335 336 337 338 339 340 341 342 343 344
		}
	}

	if (a.length < b.length) {
		return -1;
	} else if (a.length > b.length) {
		return 1;
	} else {
		return 0;
	}
}

A
Alex Dima 已提交
345
export function isLowerAsciiLetter(code: number): boolean {
J
Johannes Rieken 已提交
346 347 348
	return code >= CharCode.a && code <= CharCode.z;
}

A
Alex Dima 已提交
349
export function isUpperAsciiLetter(code: number): boolean {
J
Johannes Rieken 已提交
350 351 352
	return code >= CharCode.A && code <= CharCode.Z;
}

353
function isAsciiLetter(code: number): boolean {
J
Johannes Rieken 已提交
354
	return isLowerAsciiLetter(code) || isUpperAsciiLetter(code);
E
Erich Gamma 已提交
355 356 357
}

export function equalsIgnoreCase(a: string, b: string): boolean {
358 359
	const len1 = a ? a.length : 0;
	const len2 = b ? b.length : 0;
E
Erich Gamma 已提交
360 361 362 363 364

	if (len1 !== len2) {
		return false;
	}

B
Benjamin Pasero 已提交
365 366 367
	return doEqualsIgnoreCase(a, b);
}

368 369 370 371 372
function doEqualsIgnoreCase(a: string, b: string, stopAt = a.length): boolean {
	if (typeof a !== 'string' || typeof b !== 'string') {
		return false;
	}

B
Benjamin Pasero 已提交
373
	for (let i = 0; i < stopAt; i++) {
374 375
		const codeA = a.charCodeAt(i);
		const codeB = b.charCodeAt(i);
E
Erich Gamma 已提交
376 377 378

		if (codeA === codeB) {
			continue;
379
		}
E
Erich Gamma 已提交
380

381 382
		// a-z A-Z
		if (isAsciiLetter(codeA) && isAsciiLetter(codeB)) {
383
			const diff = Math.abs(codeA - codeB);
E
Erich Gamma 已提交
384 385 386
			if (diff !== 0 && diff !== 32) {
				return false;
			}
387 388 389 390 391
		}

		// Any other charcode
		else {
			if (String.fromCharCode(codeA).toLowerCase() !== String.fromCharCode(codeB).toLowerCase()) {
E
Erich Gamma 已提交
392 393 394 395 396 397 398 399
				return false;
			}
		}
	}

	return true;
}

400
export function startsWithIgnoreCase(str: string, candidate: string): boolean {
B
Benjamin Pasero 已提交
401 402 403 404 405 406 407 408
	const candidateLength = candidate.length;
	if (candidate.length > str.length) {
		return false;
	}

	return doEqualsIgnoreCase(str, candidate, candidateLength);
}

E
Erich Gamma 已提交
409
/**
410
 * @returns the length of the common prefix of the two strings.
E
Erich Gamma 已提交
411 412 413
 */
export function commonPrefixLength(a: string, b: string): number {

B
Benjamin Pasero 已提交
414
	let i: number,
E
Erich Gamma 已提交
415 416 417 418 419 420 421 422 423 424 425 426
		len = Math.min(a.length, b.length);

	for (i = 0; i < len; i++) {
		if (a.charCodeAt(i) !== b.charCodeAt(i)) {
			return i;
		}
	}

	return len;
}

/**
427
 * @returns the length of the common suffix of the two strings.
E
Erich Gamma 已提交
428 429 430
 */
export function commonSuffixLength(a: string, b: string): number {

B
Benjamin Pasero 已提交
431
	let i: number,
E
Erich Gamma 已提交
432 433
		len = Math.min(a.length, b.length);

434 435
	const aLastIndex = a.length - 1;
	const bLastIndex = b.length - 1;
E
Erich Gamma 已提交
436 437 438 439 440 441 442 443 444 445

	for (i = 0; i < len; i++) {
		if (a.charCodeAt(aLastIndex - i) !== b.charCodeAt(bLastIndex - i)) {
			return i;
		}
	}

	return len;
}

J
Johannes Rieken 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
function substrEquals(a: string, aStart: number, aEnd: number, b: string, bStart: number, bEnd: number): boolean {
	while (aStart < aEnd && bStart < bEnd) {
		if (a[aStart] !== b[bStart]) {
			return false;
		}
		aStart += 1;
		bStart += 1;
	}
	return true;
}

/**
 * Return the overlap between the suffix of `a` and the prefix of `b`.
 * For instance `overlap("foobar", "arr, I'm a pirate") === 2`.
 */
export function overlap(a: string, b: string): number {
462
	const aEnd = a.length;
J
Johannes Rieken 已提交
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
	let bEnd = b.length;
	let aStart = aEnd - bEnd;

	if (aStart === 0) {
		return a === b ? aEnd : 0;
	} else if (aStart < 0) {
		bEnd += aStart;
		aStart = 0;
	}

	while (aStart < aEnd && bEnd > 0) {
		if (substrEquals(a, aStart, aEnd, b, 0, bEnd)) {
			return bEnd;
		}
		bEnd -= 1;
		aStart += 1;
	}
	return 0;
}

E
Erich Gamma 已提交
483 484 485 486 487 488
// --- unicode
// http://en.wikipedia.org/wiki/Surrogate_pair
// Returns the code point starting at a specified index in a string
// Code points U+0000 to U+D7FF and U+E000 to U+FFFF are represented on a single character
// Code points U+10000 to U+10FFFF are represented on two consecutive characters
//export function getUnicodePoint(str:string, index:number, len:number):number {
489
//	const chrCode = str.charCodeAt(index);
E
Erich Gamma 已提交
490
//	if (0xD800 <= chrCode && chrCode <= 0xDBFF && index + 1 < len) {
491
//		const nextChrCode = str.charCodeAt(index + 1);
E
Erich Gamma 已提交
492 493 494 495 496 497
//		if (0xDC00 <= nextChrCode && nextChrCode <= 0xDFFF) {
//			return (chrCode - 0xD800) << 10 + (nextChrCode - 0xDC00) + 0x10000;
//		}
//	}
//	return chrCode;
//}
J
Johannes Rieken 已提交
498
export function isHighSurrogate(charCode: number): boolean {
499 500 501
	return (0xD800 <= charCode && charCode <= 0xDBFF);
}

J
Johannes Rieken 已提交
502
export function isLowSurrogate(charCode: number): boolean {
503 504
	return (0xDC00 <= charCode && charCode <= 0xDFFF);
}
E
Erich Gamma 已提交
505

A
Alex Dima 已提交
506 507 508 509 510 511 512 513 514 515 516 517
/**
 * Generated using https://github.com/alexandrudima/unicode-utils/blob/master/generate-rtl-test.js
 */
const CONTAINS_RTL = /(?:[\u05BE\u05C0\u05C3\u05C6\u05D0-\u05F4\u0608\u060B\u060D\u061B-\u064A\u066D-\u066F\u0671-\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u0710\u0712-\u072F\u074D-\u07A5\u07B1-\u07EA\u07F4\u07F5\u07FA-\u0815\u081A\u0824\u0828\u0830-\u0858\u085E-\u08BD\u200F\uFB1D\uFB1F-\uFB28\uFB2A-\uFD3D\uFD50-\uFDFC\uFE70-\uFEFC]|\uD802[\uDC00-\uDD1B\uDD20-\uDE00\uDE10-\uDE33\uDE40-\uDEE4\uDEEB-\uDF35\uDF40-\uDFFF]|\uD803[\uDC00-\uDCFF]|\uD83A[\uDC00-\uDCCF\uDD00-\uDD43\uDD50-\uDFFF]|\uD83B[\uDC00-\uDEBB])/;

/**
 * Returns true if `str` contains any Unicode character that is classified as "R" or "AL".
 */
export function containsRTL(str: string): boolean {
	return CONTAINS_RTL.test(str);
}

518 519 520 521 522 523 524 525 526
/**
 * Generated using https://github.com/alexandrudima/unicode-utils/blob/master/generate-emoji-test.js
 */
const CONTAINS_EMOJI = /(?:[\u231A\u231B\u23F0\u23F3\u2600-\u27BF\u2B50\u2B55]|\uD83C[\uDDE6-\uDDFF\uDF00-\uDFFF]|\uD83D[\uDC00-\uDE4F\uDE80-\uDEF8]|\uD83E[\uDD00-\uDDE6])/;

export function containsEmoji(str: string): boolean {
	return CONTAINS_EMOJI.test(str);
}

527 528 529 530 531 532 533 534
const IS_BASIC_ASCII = /^[\t\n\r\x20-\x7E]*$/;
/**
 * Returns true if `str` contains only basic ASCII characters in the range 32 - 126 (including 32 and 126) or \n, \r, \t
 */
export function isBasicASCII(str: string): boolean {
	return IS_BASIC_ASCII.test(str);
}

A
Alex Dima 已提交
535 536 537 538 539 540 541 542 543
export function containsFullWidthCharacter(str: string): boolean {
	for (let i = 0, len = str.length; i < len; i++) {
		if (isFullWidthCharacter(str.charCodeAt(i))) {
			return true;
		}
	}
	return false;
}

J
Johannes Rieken 已提交
544
export function isFullWidthCharacter(charCode: number): boolean {
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
	// Do a cheap trick to better support wrapping of wide characters, treat them as 2 columns
	// http://jrgraphix.net/research/unicode_blocks.php
	//          2E80 — 2EFF   CJK Radicals Supplement
	//          2F00 — 2FDF   Kangxi Radicals
	//          2FF0 — 2FFF   Ideographic Description Characters
	//          3000 — 303F   CJK Symbols and Punctuation
	//          3040 — 309F   Hiragana
	//          30A0 — 30FF   Katakana
	//          3100 — 312F   Bopomofo
	//          3130 — 318F   Hangul Compatibility Jamo
	//          3190 — 319F   Kanbun
	//          31A0 — 31BF   Bopomofo Extended
	//          31F0 — 31FF   Katakana Phonetic Extensions
	//          3200 — 32FF   Enclosed CJK Letters and Months
	//          3300 — 33FF   CJK Compatibility
	//          3400 — 4DBF   CJK Unified Ideographs Extension A
	//          4DC0 — 4DFF   Yijing Hexagram Symbols
	//          4E00 — 9FFF   CJK Unified Ideographs
	//          A000 — A48F   Yi Syllables
	//          A490 — A4CF   Yi Radicals
	//          AC00 — D7AF   Hangul Syllables
	// [IGNORE] D800 — DB7F   High Surrogates
	// [IGNORE] DB80 — DBFF   High Private Use Surrogates
	// [IGNORE] DC00 — DFFF   Low Surrogates
	// [IGNORE] E000 — F8FF   Private Use Area
	//          F900 — FAFF   CJK Compatibility Ideographs
	// [IGNORE] FB00 — FB4F   Alphabetic Presentation Forms
	// [IGNORE] FB50 — FDFF   Arabic Presentation Forms-A
	// [IGNORE] FE00 — FE0F   Variation Selectors
	// [IGNORE] FE20 — FE2F   Combining Half Marks
	// [IGNORE] FE30 — FE4F   CJK Compatibility Forms
	// [IGNORE] FE50 — FE6F   Small Form Variants
	// [IGNORE] FE70 — FEFF   Arabic Presentation Forms-B
	//          FF00 — FFEF   Halfwidth and Fullwidth Forms
	//               [https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms]
	//               of which FF01 - FF5E fullwidth ASCII of 21 to 7E
	// [IGNORE]    and FF65 - FFDC halfwidth of Katakana and Hangul
	// [IGNORE] FFF0 — FFFF   Specials
A
Alex Dima 已提交
583
	charCode = +charCode; // @perf
584 585 586 587 588 589 590
	return (
		(charCode >= 0x2E80 && charCode <= 0xD7AF)
		|| (charCode >= 0xF900 && charCode <= 0xFAFF)
		|| (charCode >= 0xFF01 && charCode <= 0xFF5E)
	);
}

E
Erich Gamma 已提交
591 592 593 594
/**
 * Given a string and a max length returns a shorted version. Shorting
 * happens at favorable positions - such as whitespace or punctuation characters.
 */
R
Rob Lourens 已提交
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
export function lcut(text: string, n: number) {
	if (text.length < n) {
		return text;
	}

	const re = /\b/g;
	let i = 0;
	while (re.test(text)) {
		if (text.length - re.lastIndex < n) {
			break;
		}

		i = re.lastIndex;
		re.lastIndex += 1;
	}

	return text.substring(i).replace(/^\s/, empty);
}

E
Erich Gamma 已提交
614 615
// Escape codes
// http://en.wikipedia.org/wiki/ANSI_escape_code
B
Benjamin Pasero 已提交
616 617 618
const EL = /\x1B\x5B[12]?K/g; // Erase in line
const COLOR_START = /\x1b\[\d+m/g; // Color
const COLOR_END = /\x1b\[0?m/g; // Color
E
Erich Gamma 已提交
619 620 621 622 623 624 625 626 627 628 629

export function removeAnsiEscapeCodes(str: string): string {
	if (str) {
		str = str.replace(EL, '');
		str = str.replace(COLOR_START, '');
		str = str.replace(COLOR_END, '');
	}

	return str;
}

630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
export const removeAccents: (str: string) => string = (function () {
	if (typeof (String.prototype as any).normalize !== 'function') {
		// ☹️ no ES6 features...
		return function (str: string) { return str; };
	} else {
		// transform into NFD form and remove accents
		// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
		const regex = /[\u0300-\u036f]/g;
		return function (str: string) {
			return (str as any).normalize('NFD').replace(regex, empty);
		};
	}
})();


E
Erich Gamma 已提交
645 646
// -- UTF-8 BOM

A
Alex Dima 已提交
647
export const UTF8_BOM_CHARACTER = String.fromCharCode(CharCode.UTF8_BOM);
E
Erich Gamma 已提交
648 649

export function startsWithUTF8BOM(str: string): boolean {
M
Matt Bierner 已提交
650
	return !!(str && str.length > 0 && str.charCodeAt(0) === CharCode.UTF8_BOM);
I
isidor 已提交
651 652
}

653 654 655 656
export function stripUTF8BOM(str: string): string {
	return startsWithUTF8BOM(str) ? str.substr(1) : str;
}

657 658
export function safeBtoa(str: string): string {
	return btoa(encodeURIComponent(str)); // we use encodeURIComponent because btoa fails for non Latin 1 values
659 660
}

J
Johannes Rieken 已提交
661
export function repeat(s: string, count: number): string {
B
Benjamin Pasero 已提交
662 663
	let result = '';
	for (let i = 0; i < count; i++) {
664 665 666
		result += s;
	}
	return result;
667
}
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687

/**
 * Checks if the characters of the provided query string are included in the
 * target string. The characters do not have to be contiguous within the string.
 */
export function fuzzyContains(target: string, query: string): boolean {
	if (!target || !query) {
		return false; // return early if target or query are undefined
	}

	if (target.length < query.length) {
		return false; // impossible for query to be contained in target
	}

	const queryLen = query.length;
	const targetLower = target.toLowerCase();

	let index = 0;
	let lastIndexOf = -1;
	while (index < queryLen) {
688
		const indexOf = targetLower.indexOf(query[index], lastIndexOf + 1);
689 690 691 692 693 694 695 696 697 698
		if (indexOf < 0) {
			return false;
		}

		lastIndexOf = indexOf;

		index++;
	}

	return true;
J
Johannes Rieken 已提交
699
}
700 701 702 703 704 705 706 707 708 709 710 711

export function containsUppercaseCharacter(target: string, ignoreEscapedChars = false): boolean {
	if (!target) {
		return false;
	}

	if (ignoreEscapedChars) {
		target = target.replace(/\\./g, '');
	}

	return target.toLowerCase() !== target;
}
712 713 714

export function uppercaseFirstLetter(str: string): string {
	return str.charAt(0).toUpperCase() + str.slice(1);
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
}

export function getNLines(str: string, n = 1): string {
	if (n === 0) {
		return '';
	}

	let idx = -1;
	do {
		idx = str.indexOf('\n', idx + 1);
		n--;
	} while (n > 0 && idx >= 0);

	return idx >= 0 ?
		str.substr(0, idx) :
		str;
}
A
Alex Dima 已提交
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746

/**
 * Produces 'a'-'z', followed by 'A'-'Z'... followed by 'a'-'z', etc.
 */
export function singleLetterHash(n: number): string {
	const LETTERS_CNT = (CharCode.Z - CharCode.A + 1);

	n = n % (2 * LETTERS_CNT);

	if (n < LETTERS_CNT) {
		return String.fromCharCode(CharCode.a + n);
	}

	return String.fromCharCode(CharCode.A + n - LETTERS_CNT);
}