uri.ts 20.9 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

6
import { isWindows } from 'vs/base/common/platform';
7
import { CharCode } from 'vs/base/common/charCode';
J
Johannes Rieken 已提交
8
import { isHighSurrogate, isLowSurrogate } from 'vs/base/common/strings';
9

10 11 12 13
const _schemePattern = /^\w[\w\d+.-]*$/;
const _singleSlashStart = /^\//;
const _doubleSlashStart = /^\/\//;

14
function _validateUri(ret: URI): void {
15

16 17
	// scheme, must be set
	if (!ret.scheme) {
18
		throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${ret.authority}", path: "${ret.path}", query: "${ret.query}", fragment: "${ret.fragment}"}`);
19
	}
20

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	if (ret.scheme && !_schemePattern.test(ret.scheme)) {
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
			if (!_singleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
			if (_doubleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

45 46
// graceful behaviour when scheme is missing: fallback to using 'file'-scheme
function _schemeFix(scheme: string): string {
47 48 49 50 51 52 53
	if (!scheme) {
		console.trace('BAD uri lacks scheme, falling back to file-scheme.');
		scheme = 'file';
	}
	return scheme;
}

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
				path = _slash;
			} else if (path[0] !== _slash) {
				path = _slash + path;
			}
			break;
	}
	return path;
}

75 76
const _empty = '';
const _slash = '/';
77

78 79
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;

80 81 82 83 84 85 86 87
const enum MatchIndex {
	scheme = 2,
	authority = 4,
	path = 5,
	query = 7,
	fragment = 9
}

E
Erich Gamma 已提交
88 89
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
90
 * This class is a simple parser which creates the basic component parts
E
Erich Gamma 已提交
91 92 93
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
94
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
95 96 97 98 99 100 101
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 */
102
export class URI implements UriComponents {
E
Erich Gamma 已提交
103

104 105 106 107 108 109 110 111 112 113 114
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
J
Johannes Rieken 已提交
115 116 117 118
			&& typeof (<URI>thing).scheme === 'string'
			&& typeof (<URI>thing).fsPath === 'function'
			&& typeof (<URI>thing).with === 'function'
			&& typeof (<URI>thing).toString === 'function';
119 120
	}

E
Erich Gamma 已提交
121 122 123 124
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
125
	readonly scheme: string;
E
Erich Gamma 已提交
126 127 128 129 130

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
131
	readonly authority: string;
E
Erich Gamma 已提交
132 133 134 135

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
136
	readonly path: string;
E
Erich Gamma 已提交
137 138 139 140

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
141
	readonly query: string;
E
Erich Gamma 已提交
142 143 144 145

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
146 147 148 149 150
	readonly fragment: string;

	/**
	 * @internal
	 */
151
	protected constructor(scheme: string, authority?: string, path?: string, query?: string, fragment?: string);
J
Johannes Rieken 已提交
152

J
Johannes Rieken 已提交
153 154 155 156
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
157

J
Johannes Rieken 已提交
158 159 160
	/**
	 * @internal
	 */
161
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {
J
Johannes Rieken 已提交
162 163 164 165 166 167 168 169 170 171 172

		if (typeof schemeOrData === 'object') {
			this.scheme = schemeOrData.scheme || _empty;
			this.authority = schemeOrData.authority || _empty;
			this.path = schemeOrData.path || _empty;
			this.query = schemeOrData.query || _empty;
			this.fragment = schemeOrData.fragment || _empty;
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
173
			this.scheme = _schemeFix(schemeOrData);
J
Johannes Rieken 已提交
174
			this.authority = authority || _empty;
175
			this.path = _referenceResolution(this.scheme, path || _empty);
J
Johannes Rieken 已提交
176 177
			this.query = query || _empty;
			this.fragment = fragment || _empty;
178

179
			_validateUri(this);
J
Johannes Rieken 已提交
180
		}
E
Erich Gamma 已提交
181 182 183 184 185
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
186
	 * Returns a string representing the corresponding file system path of this URI.
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
	 * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
	 * platform specific path separator.
	 *
	 * * Will *not* validate the path for invalid characters and semantics.
	 * * Will *not* look at the scheme of this URI.
	 * * The result shall *not* be used for display purposes but for accessing a file on disk.
	 *
	 *
	 * The *difference* to `URI#path` is the use of the platform specific separator and the handling
	 * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
	 *
	 * ```ts
		const u = URI.parse('file://server/c$/folder/file.txt')
		u.authority === 'server'
		u.path === '/shares/c$/file.txt'
		u.fsPath === '\\server\c$\folder\file.txt'
	```
	 *
	 * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
	 * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
	 * with URIs that represent files on disk (`file` scheme).
E
Erich Gamma 已提交
208
	 */
J
Johannes Rieken 已提交
209
	get fsPath(): string {
210 211 212
		// if (this.scheme !== 'file') {
		// 	console.warn(`[UriError] calling fsPath with scheme ${this.scheme}`);
		// }
213
		return _makeFsPath(this);
E
Erich Gamma 已提交
214 215 216 217
	}

	// ---- modify to new -------------------------

218
	with(change: { scheme?: string; authority?: string | null; path?: string | null; query?: string | null; fragment?: string | null; }): URI {
219 220 221 222 223

		if (!change) {
			return this;
		}

224
		let { scheme, authority, path, query, fragment } = change;
R
Rob Lourens 已提交
225
		if (scheme === undefined) {
226
			scheme = this.scheme;
J
Johannes Rieken 已提交
227
		} else if (scheme === null) {
J
Johannes Rieken 已提交
228
			scheme = _empty;
229
		}
R
Rob Lourens 已提交
230
		if (authority === undefined) {
231
			authority = this.authority;
J
Johannes Rieken 已提交
232
		} else if (authority === null) {
J
Johannes Rieken 已提交
233
			authority = _empty;
234
		}
R
Rob Lourens 已提交
235
		if (path === undefined) {
236
			path = this.path;
J
Johannes Rieken 已提交
237
		} else if (path === null) {
J
Johannes Rieken 已提交
238
			path = _empty;
239
		}
R
Rob Lourens 已提交
240
		if (query === undefined) {
241
			query = this.query;
J
Johannes Rieken 已提交
242
		} else if (query === null) {
J
Johannes Rieken 已提交
243
			query = _empty;
244
		}
R
Rob Lourens 已提交
245
		if (fragment === undefined) {
246
			fragment = this.fragment;
J
Johannes Rieken 已提交
247
		} else if (fragment === null) {
J
Johannes Rieken 已提交
248
			fragment = _empty;
249
		}
250 251 252 253 254 255 256 257 258 259

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

260
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
261 262 263 264
	}

	// ---- parse & validate ------------------------

265 266 267 268 269 270
	/**
	 * Creates a new URI from a string, e.g. `http://www.msft.com/some/path`,
	 * `file:///usr/home`, or `scheme:with/path`.
	 *
	 * @param value A string which represents an URI (see `URI#toString`).
	 */
271
	static parse(value: string): URI {
272
		const match = _regexp.exec(value);
J
Johannes Rieken 已提交
273
		if (!match) {
274
			throw new Error(`[UriError]: Invalid input: ${value}`);
J
Johannes Rieken 已提交
275
		}
276 277 278 279 280 281 282 283 284 285 286 287 288

		const scheme = _schemeFix(match[MatchIndex.scheme]) || _empty;
		const authority = match[MatchIndex.authority] || _empty;
		const path = _referenceResolution(scheme, match[MatchIndex.path] || _empty);
		const query = match[MatchIndex.query] || _empty;
		const fragment = match[MatchIndex.fragment] || _empty;

		const result = new _URI(
			scheme,
			percentDecode(authority),
			percentDecode(path),
			percentDecode(query),
			percentDecode(fragment),
J
Johannes Rieken 已提交
289
		);
290
		result._formatted = _toString(normalEncode, scheme, authority, path, query, fragment);
291
		return result;
E
Erich Gamma 已提交
292 293
	}

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
	/**
	 * Creates a new URI from a file system path, e.g. `c:\my\files`,
	 * `/usr/home`, or `\\server\share\some\path`.
	 *
	 * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
	 * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
	 * `URI.parse('file://' + path)` because the path might contain characters that are
	 * interpreted (# and ?). See the following sample:
	 * ```ts
	const good = URI.file('/coding/c#/project1');
	good.scheme === 'file';
	good.path === '/coding/c#/project1';
	good.fragment === '';
	const bad = URI.parse('file://' + '/coding/c#/project1');
	bad.scheme === 'file';
	bad.path === '/coding/c'; // path is now broken
	bad.fragment === '/project1';
	```
	 *
	 * @param path A file system path (see `URI#fsPath`)
	 */
J
Johannes Rieken 已提交
315
	static file(path: string): URI {
E
Erich Gamma 已提交
316

317
		let authority = _empty;
318

319
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
320
		// on other systems bwd-slashes are valid
J
Johannes Rieken 已提交
321
		// filename character, eg /f\oo/ba\r.txt
322
		if (isWindows) {
323
			path = path.replace(/\\/g, _slash);
324
		}
325 326 327

		// check for authority as used in UNC shares
		// or use the path as given
J
Johannes Rieken 已提交
328
		if (path[0] === _slash && path[1] === _slash) {
329
			const idx = path.indexOf(_slash, 2);
330
			if (idx === -1) {
J
Johannes Rieken 已提交
331
				authority = path.substring(2);
J
Johannes Rieken 已提交
332
				path = _slash;
333
			} else {
J
Johannes Rieken 已提交
334
				authority = path.substring(2, idx);
J
Johannes Rieken 已提交
335
				path = path.substring(idx) || _slash;
336 337 338
			}
		}

339 340 341 342 343 344 345
		if (path.charAt(0) !== _slash) {
			path = _slash + path;
		}

		// escape some vital characters
		authority = authority.replace(/%/g, '%25');
		path = path.replace(/%/g, '%25');
346
		path = path.replace(/#/g, '%23');
347

348 349 350 351
		if (!isWindows) {
			path = path.replace(/\\/g, '%5C');
		}

352
		return URI.parse('file://' + authority + path);
E
Erich Gamma 已提交
353 354
	}

355
	static from(components: { scheme: string; authority?: string; path?: string; query?: string; fragment?: string; }): URI {
356
		return new _URI(
J
Johannes Rieken 已提交
357 358 359 360 361 362
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
363 364 365 366
	}

	// ---- printing/externalize ---------------------------

367
	/**
A
Anuj 已提交
368
	 * Creates a string representation for this URI. It's guaranteed that calling
369 370 371 372 373 374
	 * `URI.parse` with the result of this function creates an URI which is equal
	 * to this URI.
	 *
	 * * The result shall *not* be used for display purposes but for externalization or transport.
	 * * The result will be encoded using the percentage encoding and encoding happens mostly
	 * ignore the scheme-specific encoding rules.
375
	 *
376
	 * @param skipEncoding Do not encode the result, default is `false`
377
	 */
J
Johannes Rieken 已提交
378
	toString(skipEncoding: boolean = false): string {
J
polish  
Johannes Rieken 已提交
379
		return _toString(skipEncoding ? minimalEncode : normalEncode, this.scheme, this.authority, this.path, this.query, this.fragment);
380 381
	}

J
Johannes Rieken 已提交
382
	toJSON(): UriComponents {
383
		return this;
384 385
	}

J
Johannes Rieken 已提交
386
	static revive(data: UriComponents | URI): URI;
387 388 389 390
	static revive(data: UriComponents | URI | undefined): URI | undefined;
	static revive(data: UriComponents | URI | null): URI | null;
	static revive(data: UriComponents | URI | undefined | null): URI | undefined | null;
	static revive(data: UriComponents | URI | undefined | null): URI | undefined | null {
J
Johannes Rieken 已提交
391
		if (!data) {
392
			return data;
J
Johannes Rieken 已提交
393 394 395
		} else if (data instanceof URI) {
			return data;
		} else {
396
			const result = new _URI(data);
397
			result._formatted = (<UriState>data).external;
398
			result._fsPath = (<UriState>data)._sep === _pathSepMarker ? (<UriState>data).fsPath : null;
J
Johannes Rieken 已提交
399 400
			return result;
		}
401 402 403
	}
}

J
Johannes Rieken 已提交
404
export interface UriComponents {
405 406 407 408 409 410 411 412
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
413
	$mid: number;
414
	external: string;
J
Johannes Rieken 已提交
415
	fsPath: string;
416
	_sep: 1 | undefined;
417 418
}

419
const _pathSepMarker = isWindows ? 1 : undefined;
420

421 422
// tslint:disable-next-line:class-name
class _URI extends URI {
423

424 425
	_formatted: string | null = null;
	_fsPath: string | null = null;
426 427 428

	get fsPath(): string {
		if (!this._fsPath) {
429
			this._fsPath = _makeFsPath(this);
430 431 432 433
		}
		return this._fsPath;
	}

J
Johannes Rieken 已提交
434
	toString(skipEncoding: boolean = false): string {
J
Johannes Rieken 已提交
435
		if (skipEncoding) {
436
			// we don't cache that
J
polish  
Johannes Rieken 已提交
437
			return _toString(minimalEncode, this.scheme, this.authority, this.path, this.query, this.fragment);
J
Johannes Rieken 已提交
438 439
		}
		if (!this._formatted) {
440
			this._formatted = _toString(normalEncode, this.scheme, this.authority, this.path, this.query, this.fragment);
J
Johannes Rieken 已提交
441
		}
J
Johannes Rieken 已提交
442
		return this._formatted;
J
Johannes Rieken 已提交
443
	}
444

J
Johannes Rieken 已提交
445
	toJSON(): UriComponents {
446
		const res = <UriState>{
447
			$mid: 1
448 449 450 451
		};
		// cached state
		if (this._fsPath) {
			res.fsPath = this._fsPath;
452
			res._sep = _pathSepMarker;
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
		}
		if (this._formatted) {
			res.external = this._formatted;
		}
		// uri components
		if (this.path) {
			res.path = this.path;
		}
		if (this.scheme) {
			res.scheme = this.scheme;
		}
		if (this.authority) {
			res.authority = this.authority;
		}
		if (this.query) {
			res.query = this.query;
		}
		if (this.fragment) {
			res.fragment = this.fragment;
		}
		return res;
	}
475 476 477 478 479 480 481 482
}

/**
 * Compute `fsPath` for the given uri
 */
function _makeFsPath(uri: URI): string {

	let value: string;
483
	if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
484 485
		// unc path: file://shares/c$/far/boo
		value = `//${uri.authority}${uri.path}`;
J
Johannes Rieken 已提交
486 487 488 489 490
	} else if (
		uri.path.charCodeAt(0) === CharCode.Slash
		&& (uri.path.charCodeAt(1) >= CharCode.A && uri.path.charCodeAt(1) <= CharCode.Z || uri.path.charCodeAt(1) >= CharCode.a && uri.path.charCodeAt(1) <= CharCode.z)
		&& uri.path.charCodeAt(2) === CharCode.Colon
	) {
491 492 493 494 495 496
		// windows drive letter: file:///c:/far/boo
		value = uri.path[1].toLowerCase() + uri.path.substr(2);
	} else {
		// other path
		value = uri.path;
	}
497
	if (isWindows) {
498 499 500 501
		value = value.replace(/\//g, '\\');
	}
	return value;
}
502

J
Johannes Rieken 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531

//#region ---- decode

function decodeURIComponentGraceful(str: string): string {
	try {
		return decodeURIComponent(str);
	} catch {
		if (str.length > 3) {
			return str.substr(0, 3) + decodeURIComponentGraceful(str.substr(3));
		} else {
			return str;
		}
	}
}

const _hex2 = /(%[0-9A-Za-z][0-9A-Za-z])+/g;
function percentDecode(str: string): string {
	if (!str.match(_hex2)) {
		return str;
	}
	return str.replace(_hex2, (match) => decodeURIComponentGraceful(match));
}

//#endregion

//#region ---- encode

// https://url.spec.whatwg.org/#percent-encoded-bytes

J
polish  
Johannes Rieken 已提交
532
// "The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~)."
J
Johannes Rieken 已提交
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
function isC0ControlPercentEncodeSet(code: number): boolean {
	return code <= 0x1F || code > 0x7E;
}
// "The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`)."
function isFragmentPercentEncodeSet(code: number): boolean {
	return isC0ControlPercentEncodeSet(code)
		|| code === 0x20 || code === 0x22 || code === 0x3C || code === 0x3E || code === 0x60;
}
// "The path percent-encode set is the fragment percent-encode set and U+0023 (#), U+003F (?), U+007B ({), and U+007D (})."
function isPathPercentEncodeSet(code: number): boolean {
	return isFragmentPercentEncodeSet(code)
		|| code === 0x23 || code === 0x3F || code === 0x7B || code === 0x7D;
}
// "The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), and U+007C (|)."
function isUserInfoPercentEncodeSet(code: number): boolean {
	return isPathPercentEncodeSet(code)
		|| code === 0x2F || code === 0x3A || code === 0x3B || code === 0x3D || code === 0x40
		|| code === 0x5B || code === 0x5C || code === 0x5D || code === 0x5E || code === 0x7C;
}

// https://url.spec.whatwg.org/#query-state
function isQueryPrecentEncodeSet(code: number): boolean {
	return code < 0x21 || code > 0x7E
		|| code === 0x22 || code === 0x23 || code === 0x3C || code === 0x3E
		|| code === 0x27; // <- todo@joh https://url.spec.whatwg.org/#is-special
}

560 561 562 563 564
// this is non-standard and uses for `URI.toString(true)`
function isHashOrQuestionMark(code: number): boolean {
	return code === CharCode.Hash || code === CharCode.QuestionMark;
}

J
Johannes Rieken 已提交
565 566 567 568 569
function isLowerAsciiHex(code: number): boolean {
	return code >= CharCode.Digit0 && code <= CharCode.Digit9
		|| code >= CharCode.a && code <= CharCode.z;
}

J
polish  
Johannes Rieken 已提交
570 571 572 573 574 575 576 577
const _encodeTable: string[] = (function () {
	let table: string[] = [];
	for (let code = 0; code < 128; code++) {
		table[code] = `%${code.toString(16)}`;
	}
	return table;
})();

J
Johannes Rieken 已提交
578 579
function percentEncode(str: string, mustEncode: (code: number) => boolean): string {
	let lazyOutStr: string | undefined;
J
polish  
Johannes Rieken 已提交
580 581
	for (let pos = 0; pos < str.length; pos++) {
		const code = str.charCodeAt(pos);
J
Johannes Rieken 已提交
582 583 584 585

		// invoke encodeURIComponent when needed
		if (mustEncode(code)) {
			if (!lazyOutStr) {
J
polish  
Johannes Rieken 已提交
586
				lazyOutStr = str.substr(0, pos);
J
Johannes Rieken 已提交
587 588
			}
			if (isHighSurrogate(code)) {
J
polish  
Johannes Rieken 已提交
589 590 591 592
				// Append encoded version of this surrogate pair (2 characters)
				if (pos + 1 < str.length && isLowSurrogate(str.charCodeAt(pos + 1))) {
					lazyOutStr += encodeURIComponent(str.substr(pos, 2));
					pos += 1;
J
Johannes Rieken 已提交
593 594
				} else {
					// broken surrogate pair
J
polish  
Johannes Rieken 已提交
595
					lazyOutStr += str.charAt(pos);
J
Johannes Rieken 已提交
596 597
				}
			} else {
J
polish  
Johannes Rieken 已提交
598 599 600 601 602 603 604
				// Append encoded version of the current character, use lookup table
				// to speed up repeated encoding of the same characters.
				if (code < _encodeTable.length) {
					lazyOutStr += _encodeTable[code];
				} else {
					lazyOutStr += encodeURIComponent(str.charAt(pos));
				}
J
Johannes Rieken 已提交
605 606 607 608 609 610 611
			}
			continue;
		}

		// normalize percent encoded sequences to upper case
		// todo@joh also changes invalid sequences
		if (code === CharCode.PercentSign
J
polish  
Johannes Rieken 已提交
612 613
			&& pos + 2 < str.length
			&& (isLowerAsciiHex(str.charCodeAt(pos + 1)) || isLowerAsciiHex(str.charCodeAt(pos + 2)))
J
Johannes Rieken 已提交
614 615
		) {
			if (!lazyOutStr) {
J
polish  
Johannes Rieken 已提交
616
				lazyOutStr = str.substr(0, pos);
J
Johannes Rieken 已提交
617
			}
J
polish  
Johannes Rieken 已提交
618 619
			lazyOutStr += '%' + str.substr(pos + 1, 2).toUpperCase();
			pos += 2;
J
Johannes Rieken 已提交
620 621 622 623 624
			continue;
		}

		// once started, continue to build up lazy output
		if (lazyOutStr) {
J
polish  
Johannes Rieken 已提交
625
			lazyOutStr += str.charAt(pos);
J
Johannes Rieken 已提交
626 627 628 629 630
		}
	}
	return lazyOutStr || str;
}

631 632 633 634
const enum EncodePart {
	user, authority, path, query, fragment
}
const normalEncode: { (code: number): boolean }[] = [isUserInfoPercentEncodeSet, isC0ControlPercentEncodeSet, isPathPercentEncodeSet, isFragmentPercentEncodeSet, isQueryPrecentEncodeSet];
J
polish  
Johannes Rieken 已提交
635
const minimalEncode: { (code: number): boolean }[] = [isHashOrQuestionMark, isHashOrQuestionMark, isHashOrQuestionMark, isHashOrQuestionMark, () => false];
636

637 638 639
/**
 * Create the external version of a uri
 */
640
function _toString(encoder: { (code: number): boolean }[], scheme: string, authority: string, path: string, query: string, fragment: string): string {
E
Erich Gamma 已提交
641

642
	let res = '';
643
	if (scheme) {
644 645
		res += scheme;
		res += ':';
646 647
	}
	if (authority || scheme === 'file') {
648 649
		res += _slash;
		res += _slash;
650 651
	}
	if (authority) {
J
Johannes Rieken 已提交
652 653 654 655 656 657
		const idxUserInfo = authority.indexOf('@');
		if (idxUserInfo !== -1) {
			// <user:token>
			const userInfo = authority.substr(0, idxUserInfo);
			const idxPasswordOrToken = userInfo.indexOf(':');
			if (idxPasswordOrToken !== -1) {
658
				res += percentEncode(userInfo.substr(0, idxPasswordOrToken), encoder[EncodePart.user]);
659
				res += ':';
660
				res += percentEncode(userInfo.substr(idxPasswordOrToken + 1), encoder[EncodePart.user]);
J
Johannes Rieken 已提交
661
			} else {
662
				res += percentEncode(userInfo, encoder[EncodePart.user]);
E
Erich Gamma 已提交
663
			}
664
			res += '@';
J
Johannes Rieken 已提交
665
		}
J
Johannes Rieken 已提交
666 667 668 669
		authority = authority.substr(idxUserInfo + 1).toLowerCase();
		const idxPort = authority.indexOf(':');
		if (idxPort !== -1) {
			// <authority>:<port>
670
			res += percentEncode(authority.substr(0, idxPort), encoder[EncodePart.authority]);
J
Johannes Rieken 已提交
671
			res += ':';
J
Johannes Rieken 已提交
672
		}
673
		res += percentEncode(authority.substr(idxPort + 1), encoder[EncodePart.authority]);
674 675
	}
	if (path) {
J
Johannes Rieken 已提交
676
		// encode the path
677
		let pathEncoded = percentEncode(path, encoder[EncodePart.path]);
J
Johannes Rieken 已提交
678 679

		// lower-case windows drive letters in /C:/fff or C:/fff and escape `:`
680

681
		let match = /(\/?[a-z])(:|%3a)/i.exec(pathEncoded); // <- todo@joh make fast!
682 683
		if (match) {
			pathEncoded = match[1].toLowerCase() + '%3A' + pathEncoded.substr(match[0].length);
684
		}
J
Johannes Rieken 已提交
685
		res += pathEncoded;
686 687
	}
	if (query) {
688
		res += '?';
689
		res += percentEncode(query, encoder[EncodePart.query]);
690 691
	}
	if (fragment) {
692
		res += '#';
693
		res += percentEncode(fragment, encoder[EncodePart.fragment]);
E
Erich Gamma 已提交
694
	}
695
	return res;
E
Erich Gamma 已提交
696
}
J
Johannes Rieken 已提交
697 698

//#endregion