uri.ts 21.1 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

6
import { isWindows } from 'vs/base/common/platform';
7
import { CharCode } from 'vs/base/common/charCode';
8
import { isHighSurrogate, isLowSurrogate, isLowerAsciiHex, isAsciiLetter } from 'vs/base/common/strings';
9

10
const _schemeRegExp = /^\w[\w\d+.-]*$/;
11

12
function _validateUri(ret: URI): void {
13

14 15
	// scheme, must be set
	if (!ret.scheme) {
16
		throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${ret.authority}", path: "${ret.path}", query: "${ret.query}", fragment: "${ret.fragment}"}`);
17
	}
18

19 20
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
21
	if (ret.scheme && !_schemeRegExp.test(ret.scheme)) {
22 23 24 25 26 27 28 29 30 31
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
32
			if (ret.path.charCodeAt(0) !== CharCode.Slash) {
33 34 35
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
36
			if (ret.path.charCodeAt(0) === CharCode.Slash && ret.path.charCodeAt(1) === CharCode.Slash) {
37 38 39 40 41 42
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

43
// graceful behaviour when scheme is missing: fallback to using 'file'-scheme
44
function _schemeFix(scheme: string, strict?: boolean): string {
45
	if (!scheme) {
46 47 48 49 50 51
		if (strict) {
			throw new Error('[UriError]: A scheme must be provided');
		} else {
			// console.trace('BAD uri lacks scheme, falling back to file-scheme.');
			scheme = 'file';
		}
52 53 54 55
	}
	return scheme;
}

56 57 58 59 60 61 62 63 64 65 66 67
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
68 69 70
				path = '/';
			} else if (path[0].charCodeAt(0) !== CharCode.Slash) {
				path = '/' + path;
71 72 73 74 75 76
			}
			break;
	}
	return path;
}

77
const _uriRegExp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
78

79 80 81 82 83 84 85 86
const enum MatchIndex {
	scheme = 2,
	authority = 4,
	path = 5,
	query = 7,
	fragment = 9
}

87 88 89 90 91
const _percentRegExp = /%/g;
const _hashRegExp = /#/g;
const _backslashRegExp = /\\/g;
const _slashRegExp = /\//g;

E
Erich Gamma 已提交
92 93
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
94
 * This class is a simple parser which creates the basic component parts
E
Erich Gamma 已提交
95 96 97
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
98
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
99 100 101 102 103 104 105
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 */
106
export class URI implements UriComponents {
E
Erich Gamma 已提交
107

108 109 110 111 112 113 114 115 116 117 118
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
J
Johannes Rieken 已提交
119 120 121 122
			&& typeof (<URI>thing).scheme === 'string'
			&& typeof (<URI>thing).fsPath === 'function'
			&& typeof (<URI>thing).with === 'function'
			&& typeof (<URI>thing).toString === 'function';
123 124
	}

E
Erich Gamma 已提交
125 126 127 128
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
129
	readonly scheme: string;
E
Erich Gamma 已提交
130 131 132 133 134

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
135
	readonly authority: string;
E
Erich Gamma 已提交
136 137 138 139

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
140
	readonly path: string;
E
Erich Gamma 已提交
141 142 143 144

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
145
	readonly query: string;
E
Erich Gamma 已提交
146 147 148 149

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
150 151 152 153 154
	readonly fragment: string;

	/**
	 * @internal
	 */
155
	protected constructor(scheme: string, authority?: string, path?: string, query?: string, fragment?: string);
J
Johannes Rieken 已提交
156

J
Johannes Rieken 已提交
157 158 159 160
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
161

J
Johannes Rieken 已提交
162 163 164
	/**
	 * @internal
	 */
165
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {
J
Johannes Rieken 已提交
166 167

		if (typeof schemeOrData === 'object') {
168 169 170 171 172
			this.scheme = schemeOrData.scheme || '';
			this.authority = schemeOrData.authority || '';
			this.path = schemeOrData.path || '';
			this.query = schemeOrData.query || '';
			this.fragment = schemeOrData.fragment || '';
J
Johannes Rieken 已提交
173 174 175 176
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
177
			this.scheme = _schemeFix(schemeOrData);
178 179 180 181
			this.authority = authority || '';
			this.path = _referenceResolution(this.scheme, path || '');
			this.query = query || '';
			this.fragment = fragment || '';
182

183
			_validateUri(this);
J
Johannes Rieken 已提交
184
		}
E
Erich Gamma 已提交
185 186 187 188 189
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
190
	 * Returns a string representing the corresponding file system path of this URI.
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
	 * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
	 * platform specific path separator.
	 *
	 * * Will *not* validate the path for invalid characters and semantics.
	 * * Will *not* look at the scheme of this URI.
	 * * The result shall *not* be used for display purposes but for accessing a file on disk.
	 *
	 *
	 * The *difference* to `URI#path` is the use of the platform specific separator and the handling
	 * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
	 *
	 * ```ts
		const u = URI.parse('file://server/c$/folder/file.txt')
		u.authority === 'server'
		u.path === '/shares/c$/file.txt'
		u.fsPath === '\\server\c$\folder\file.txt'
	```
	 *
	 * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
	 * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
	 * with URIs that represent files on disk (`file` scheme).
E
Erich Gamma 已提交
212
	 */
J
Johannes Rieken 已提交
213
	get fsPath(): string {
214
		return _toFsPath(this.scheme, this.authority, this.path);
E
Erich Gamma 已提交
215 216 217 218
	}

	// ---- modify to new -------------------------

219
	with(change: { scheme?: string; authority?: string | null; path?: string | null; query?: string | null; fragment?: string | null; }): URI {
220 221 222 223 224

		if (!change) {
			return this;
		}

225
		let { scheme, authority, path, query, fragment } = change;
R
Rob Lourens 已提交
226
		if (scheme === undefined) {
227
			scheme = this.scheme;
J
Johannes Rieken 已提交
228
		} else if (scheme === null) {
229
			scheme = '';
230
		}
R
Rob Lourens 已提交
231
		if (authority === undefined) {
232
			authority = this.authority;
J
Johannes Rieken 已提交
233
		} else if (authority === null) {
234
			authority = '';
235
		}
R
Rob Lourens 已提交
236
		if (path === undefined) {
237
			path = this.path;
J
Johannes Rieken 已提交
238
		} else if (path === null) {
239
			path = '';
240
		}
R
Rob Lourens 已提交
241
		if (query === undefined) {
242
			query = this.query;
J
Johannes Rieken 已提交
243
		} else if (query === null) {
244
			query = '';
245
		}
R
Rob Lourens 已提交
246
		if (fragment === undefined) {
247
			fragment = this.fragment;
J
Johannes Rieken 已提交
248
		} else if (fragment === null) {
249
			fragment = '';
250
		}
251 252 253 254 255 256 257 258 259 260

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

261
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
262 263 264 265
	}

	// ---- parse & validate ------------------------

266 267 268 269
	/**
	 * Creates a new URI from a string, e.g. `http://www.msft.com/some/path`,
	 * `file:///usr/home`, or `scheme:with/path`.
	 *
270 271
	 * *Note:* When the input lacks a scheme then `file` is used.
	 *
272 273
	 * @param value A string which represents an URI (see `URI#toString`).
	 */
274
	static parse(value: string, strict?: boolean): URI {
275
		const match = _uriRegExp.exec(value);
J
Johannes Rieken 已提交
276
		if (!match) {
277
			throw new Error(`[UriError]: Invalid input: ${value}`);
J
Johannes Rieken 已提交
278
		}
279

280
		const scheme = _schemeFix(match[MatchIndex.scheme], strict) || '';
281 282 283 284
		const authority = match[MatchIndex.authority] || '';
		const path = _referenceResolution(scheme, match[MatchIndex.path] || '');
		const query = match[MatchIndex.query] || '';
		const fragment = match[MatchIndex.fragment] || '';
285 286 287 288 289 290 291

		const result = new _URI(
			scheme,
			percentDecode(authority),
			percentDecode(path),
			percentDecode(query),
			percentDecode(fragment),
J
Johannes Rieken 已提交
292
		);
293
		result._external = _toExternal(_normalEncoder, scheme, authority, path, query, fragment);
294
		return result;
E
Erich Gamma 已提交
295 296
	}

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
	/**
	 * Creates a new URI from a file system path, e.g. `c:\my\files`,
	 * `/usr/home`, or `\\server\share\some\path`.
	 *
	 * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
	 * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
	 * `URI.parse('file://' + path)` because the path might contain characters that are
	 * interpreted (# and ?). See the following sample:
	 * ```ts
	const good = URI.file('/coding/c#/project1');
	good.scheme === 'file';
	good.path === '/coding/c#/project1';
	good.fragment === '';
	const bad = URI.parse('file://' + '/coding/c#/project1');
	bad.scheme === 'file';
	bad.path === '/coding/c'; // path is now broken
	bad.fragment === '/project1';
	```
	 *
	 * @param path A file system path (see `URI#fsPath`)
	 */
J
Johannes Rieken 已提交
318
	static file(path: string): URI {
E
Erich Gamma 已提交
319

320
		let authority = '';
321

322
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
323
		// on other systems bwd-slashes are valid
J
Johannes Rieken 已提交
324
		// filename character, eg /f\oo/ba\r.txt
325
		if (isWindows) {
326
			path = path.replace(_backslashRegExp, '/');
327
		}
328 329 330

		// check for authority as used in UNC shares
		// or use the path as given
331 332
		if (path.charCodeAt(0) === CharCode.Slash && path.charCodeAt(1) === CharCode.Slash) {
			const idx = path.indexOf('/', 2);
333
			if (idx === -1) {
J
Johannes Rieken 已提交
334
				authority = path.substring(2);
335
				path = '/';
336
			} else {
J
Johannes Rieken 已提交
337
				authority = path.substring(2, idx);
338
				path = path.substring(idx) || '/';
339 340 341
			}
		}

342 343
		// ensures that path starts with /
		path = _referenceResolution('file', path);
344 345

		// escape some vital characters
346 347 348
		authority = authority.replace(_percentRegExp, '%25');
		path = path.replace(_percentRegExp, '%25');
		path = path.replace(_hashRegExp, '%23');
349

350
		if (!isWindows) {
351
			path = path.replace(_backslashRegExp, '%5C');
352 353
		}

354
		return URI.parse('file://' + authority + path);
E
Erich Gamma 已提交
355 356
	}

357
	static from(components: { scheme: string; authority?: string; path?: string; query?: string; fragment?: string; }): URI {
358
		return new _URI(
J
Johannes Rieken 已提交
359 360 361 362 363 364
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
365 366 367 368
	}

	// ---- printing/externalize ---------------------------

369
	/**
A
Anuj 已提交
370
	 * Creates a string representation for this URI. It's guaranteed that calling
371 372 373 374 375 376
	 * `URI.parse` with the result of this function creates an URI which is equal
	 * to this URI.
	 *
	 * * The result shall *not* be used for display purposes but for externalization or transport.
	 * * The result will be encoded using the percentage encoding and encoding happens mostly
	 * ignore the scheme-specific encoding rules.
377
	 *
378
	 * @param skipEncoding Do not encode the result, default is `false`
379
	 */
J
Johannes Rieken 已提交
380
	toString(skipEncoding: boolean = false): string {
381
		return _toExternal(skipEncoding ? _minimalEncoder : _normalEncoder, this.scheme, this.authority, this.path, this.query, this.fragment);
382 383
	}

J
Johannes Rieken 已提交
384
	toJSON(): UriComponents {
385
		return this;
386 387
	}

J
Johannes Rieken 已提交
388
	static revive(data: UriComponents | URI): URI;
389 390 391 392
	static revive(data: UriComponents | URI | undefined): URI | undefined;
	static revive(data: UriComponents | URI | null): URI | null;
	static revive(data: UriComponents | URI | undefined | null): URI | undefined | null;
	static revive(data: UriComponents | URI | undefined | null): URI | undefined | null {
J
Johannes Rieken 已提交
393
		if (!data) {
394
			return data;
J
Johannes Rieken 已提交
395 396 397
		} else if (data instanceof URI) {
			return data;
		} else {
398
			const result = new _URI(data);
399
			result._external = (<UriState>data).external;
400
			result._fsPath = (<UriState>data)._sep === _pathSepMarker ? (<UriState>data).fsPath : null;
J
Johannes Rieken 已提交
401 402
			return result;
		}
403 404 405
	}
}

J
Johannes Rieken 已提交
406
export interface UriComponents {
407 408 409 410 411 412 413 414
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
415
	$mid: number;
416
	external: string;
J
Johannes Rieken 已提交
417
	fsPath: string;
418
	_sep: 1 | undefined;
419 420
}

421
const _pathSepMarker = isWindows ? 1 : undefined;
422

423 424
// tslint:disable-next-line:class-name
class _URI extends URI {
425

426
	_external: string | null = null;
427
	_fsPath: string | null = null;
428 429 430

	get fsPath(): string {
		if (!this._fsPath) {
431
			this._fsPath = _toFsPath(this.scheme, this.authority, this.path);
432 433 434 435
		}
		return this._fsPath;
	}

J
Johannes Rieken 已提交
436
	toString(skipEncoding: boolean = false): string {
J
Johannes Rieken 已提交
437
		if (skipEncoding) {
438
			// we don't cache that
439
			return _toExternal(_minimalEncoder, this.scheme, this.authority, this.path, this.query, this.fragment);
J
Johannes Rieken 已提交
440
		}
441 442
		if (!this._external) {
			this._external = _toExternal(_normalEncoder, this.scheme, this.authority, this.path, this.query, this.fragment);
J
Johannes Rieken 已提交
443
		}
444
		return this._external;
J
Johannes Rieken 已提交
445
	}
446

J
Johannes Rieken 已提交
447
	toJSON(): UriComponents {
448
		const res = <UriState>{
449
			$mid: 1
450 451 452 453
		};
		// cached state
		if (this._fsPath) {
			res.fsPath = this._fsPath;
454
			res._sep = _pathSepMarker;
455
		}
456 457
		if (this._external) {
			res.external = this._external;
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
		}
		// uri components
		if (this.path) {
			res.path = this.path;
		}
		if (this.scheme) {
			res.scheme = this.scheme;
		}
		if (this.authority) {
			res.authority = this.authority;
		}
		if (this.query) {
			res.query = this.query;
		}
		if (this.fragment) {
			res.fragment = this.fragment;
		}
		return res;
	}
477 478 479 480 481
}

/**
 * Compute `fsPath` for the given uri
 */
482
function _toFsPath(scheme: string, authority: string, path: string): string {
483 484

	let value: string;
485
	if (authority && path.length > 1 && scheme === 'file') {
486
		// unc path: file://shares/c$/far/boo
487
		value = `//${authority}${path}`;
J
Johannes Rieken 已提交
488
	} else if (
489 490 491
		path.charCodeAt(0) === CharCode.Slash
		&& isAsciiLetter(path.charCodeAt(1))
		&& path.charCodeAt(2) === CharCode.Colon
J
Johannes Rieken 已提交
492
	) {
493
		// windows drive letter: file:///c:/far/boo
494
		value = path[1].toLowerCase() + path.substr(2);
495 496
	} else {
		// other path
497
		value = path;
498
	}
499
	if (isWindows) {
500
		value = value.replace(_slashRegExp, '\\');
501 502 503
	}
	return value;
}
504

J
Johannes Rieken 已提交
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519

//#region ---- decode

function decodeURIComponentGraceful(str: string): string {
	try {
		return decodeURIComponent(str);
	} catch {
		if (str.length > 3) {
			return str.substr(0, 3) + decodeURIComponentGraceful(str.substr(3));
		} else {
			return str;
		}
	}
}

520 521
const _encodedAsHexRegExp = /(%[0-9A-Za-z][0-9A-Za-z])+/g;

J
Johannes Rieken 已提交
522
function percentDecode(str: string): string {
523
	if (!str.match(_encodedAsHexRegExp)) {
J
Johannes Rieken 已提交
524 525
		return str;
	}
526
	return str.replace(_encodedAsHexRegExp, (match) => decodeURIComponentGraceful(match));
J
Johannes Rieken 已提交
527 528 529 530 531 532 533 534
}

//#endregion

//#region ---- encode

// https://url.spec.whatwg.org/#percent-encoded-bytes

J
polish  
Johannes Rieken 已提交
535
// "The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~)."
J
Johannes Rieken 已提交
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
function isC0ControlPercentEncodeSet(code: number): boolean {
	return code <= 0x1F || code > 0x7E;
}
// "The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`)."
function isFragmentPercentEncodeSet(code: number): boolean {
	return isC0ControlPercentEncodeSet(code)
		|| code === 0x20 || code === 0x22 || code === 0x3C || code === 0x3E || code === 0x60;
}
// "The path percent-encode set is the fragment percent-encode set and U+0023 (#), U+003F (?), U+007B ({), and U+007D (})."
function isPathPercentEncodeSet(code: number): boolean {
	return isFragmentPercentEncodeSet(code)
		|| code === 0x23 || code === 0x3F || code === 0x7B || code === 0x7D;
}
// "The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), and U+007C (|)."
function isUserInfoPercentEncodeSet(code: number): boolean {
	return isPathPercentEncodeSet(code)
		|| code === 0x2F || code === 0x3A || code === 0x3B || code === 0x3D || code === 0x40
		|| code === 0x5B || code === 0x5C || code === 0x5D || code === 0x5E || code === 0x7C;
}

// https://url.spec.whatwg.org/#query-state
function isQueryPrecentEncodeSet(code: number): boolean {
	return code < 0x21 || code > 0x7E
		|| code === 0x22 || code === 0x23 || code === 0x3C || code === 0x3E
		|| code === 0x27; // <- todo@joh https://url.spec.whatwg.org/#is-special
}

563 564 565 566 567
// this is non-standard and uses for `URI.toString(true)`
function isHashOrQuestionMark(code: number): boolean {
	return code === CharCode.Hash || code === CharCode.QuestionMark;
}

J
polish  
Johannes Rieken 已提交
568 569 570
const _encodeTable: string[] = (function () {
	let table: string[] = [];
	for (let code = 0; code < 128; code++) {
571 572 573 574 575
		if (code < 16) {
			table[code] = `%0${code.toString(16).toUpperCase()}`;
		} else {
			table[code] = `%${code.toString(16).toUpperCase()}`;
		}
J
polish  
Johannes Rieken 已提交
576 577 578 579
	}
	return table;
})();

J
Johannes Rieken 已提交
580 581
function percentEncode(str: string, mustEncode: (code: number) => boolean): string {
	let lazyOutStr: string | undefined;
J
polish  
Johannes Rieken 已提交
582 583
	for (let pos = 0; pos < str.length; pos++) {
		const code = str.charCodeAt(pos);
J
Johannes Rieken 已提交
584 585 586 587

		// invoke encodeURIComponent when needed
		if (mustEncode(code)) {
			if (!lazyOutStr) {
J
polish  
Johannes Rieken 已提交
588
				lazyOutStr = str.substr(0, pos);
J
Johannes Rieken 已提交
589 590
			}
			if (isHighSurrogate(code)) {
J
polish  
Johannes Rieken 已提交
591 592 593 594
				// Append encoded version of this surrogate pair (2 characters)
				if (pos + 1 < str.length && isLowSurrogate(str.charCodeAt(pos + 1))) {
					lazyOutStr += encodeURIComponent(str.substr(pos, 2));
					pos += 1;
J
Johannes Rieken 已提交
595 596
				} else {
					// broken surrogate pair
J
polish  
Johannes Rieken 已提交
597
					lazyOutStr += str.charAt(pos);
J
Johannes Rieken 已提交
598 599
				}
			} else {
J
polish  
Johannes Rieken 已提交
600 601 602 603 604 605 606
				// Append encoded version of the current character, use lookup table
				// to speed up repeated encoding of the same characters.
				if (code < _encodeTable.length) {
					lazyOutStr += _encodeTable[code];
				} else {
					lazyOutStr += encodeURIComponent(str.charAt(pos));
				}
J
Johannes Rieken 已提交
607 608 609 610 611 612 613
			}
			continue;
		}

		// normalize percent encoded sequences to upper case
		// todo@joh also changes invalid sequences
		if (code === CharCode.PercentSign
J
polish  
Johannes Rieken 已提交
614 615
			&& pos + 2 < str.length
			&& (isLowerAsciiHex(str.charCodeAt(pos + 1)) || isLowerAsciiHex(str.charCodeAt(pos + 2)))
J
Johannes Rieken 已提交
616 617
		) {
			if (!lazyOutStr) {
J
polish  
Johannes Rieken 已提交
618
				lazyOutStr = str.substr(0, pos);
J
Johannes Rieken 已提交
619
			}
J
polish  
Johannes Rieken 已提交
620 621
			lazyOutStr += '%' + str.substr(pos + 1, 2).toUpperCase();
			pos += 2;
J
Johannes Rieken 已提交
622 623 624 625 626
			continue;
		}

		// once started, continue to build up lazy output
		if (lazyOutStr) {
J
polish  
Johannes Rieken 已提交
627
			lazyOutStr += str.charAt(pos);
J
Johannes Rieken 已提交
628 629 630 631 632
		}
	}
	return lazyOutStr || str;
}

633 634 635
const enum EncodePart {
	user, authority, path, query, fragment
}
636
const _normalEncoder: { (code: number): boolean }[] = [isUserInfoPercentEncodeSet, isC0ControlPercentEncodeSet, isPathPercentEncodeSet, isQueryPrecentEncodeSet, isFragmentPercentEncodeSet];
637 638
const _minimalEncoder: { (code: number): boolean }[] = [isHashOrQuestionMark, isHashOrQuestionMark, isHashOrQuestionMark, isHashOrQuestionMark, () => false];

J
Johannes Rieken 已提交
639
const _driveLetterRegExp = /^(\/?[a-z])(:|%3a)/i;
640

641 642 643
/**
 * Create the external version of a uri
 */
644
function _toExternal(encoder: { (code: number): boolean }[], scheme: string, authority: string, path: string, query: string, fragment: string): string {
E
Erich Gamma 已提交
645

646
	let res = '';
647
	if (scheme) {
648 649
		res += scheme;
		res += ':';
650 651
	}
	if (authority || scheme === 'file') {
652
		res += '//';
653 654
	}
	if (authority) {
J
Johannes Rieken 已提交
655 656 657 658 659 660
		const idxUserInfo = authority.indexOf('@');
		if (idxUserInfo !== -1) {
			// <user:token>
			const userInfo = authority.substr(0, idxUserInfo);
			const idxPasswordOrToken = userInfo.indexOf(':');
			if (idxPasswordOrToken !== -1) {
661
				res += percentEncode(userInfo.substr(0, idxPasswordOrToken), encoder[EncodePart.user]);
662
				res += ':';
663
				res += percentEncode(userInfo.substr(idxPasswordOrToken + 1), encoder[EncodePart.user]);
J
Johannes Rieken 已提交
664
			} else {
665
				res += percentEncode(userInfo, encoder[EncodePart.user]);
E
Erich Gamma 已提交
666
			}
667
			res += '@';
J
Johannes Rieken 已提交
668
		}
J
Johannes Rieken 已提交
669 670 671 672
		authority = authority.substr(idxUserInfo + 1).toLowerCase();
		const idxPort = authority.indexOf(':');
		if (idxPort !== -1) {
			// <authority>:<port>
673
			res += percentEncode(authority.substr(0, idxPort), encoder[EncodePart.authority]);
J
Johannes Rieken 已提交
674
			res += ':';
J
Johannes Rieken 已提交
675
		}
676
		res += percentEncode(authority.substr(idxPort + 1), encoder[EncodePart.authority]);
677 678
	}
	if (path) {
J
Johannes Rieken 已提交
679
		// encode the path
680
		let pathEncoded = percentEncode(path, encoder[EncodePart.path]);
J
Johannes Rieken 已提交
681 682

		// lower-case windows drive letters in /C:/fff or C:/fff and escape `:`
683
		let match = _driveLetterRegExp.exec(pathEncoded);
684 685
		if (match) {
			pathEncoded = match[1].toLowerCase() + '%3A' + pathEncoded.substr(match[0].length);
686
		}
J
Johannes Rieken 已提交
687
		res += pathEncoded;
688 689
	}
	if (query) {
690
		res += '?';
691
		res += percentEncode(query, encoder[EncodePart.query]);
692 693
	}
	if (fragment) {
694
		res += '#';
695
		res += percentEncode(fragment, encoder[EncodePart.fragment]);
E
Erich Gamma 已提交
696
	}
697
	return res;
E
Erich Gamma 已提交
698
}
J
Johannes Rieken 已提交
699 700

//#endregion