uri.ts 17.4 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

7
import { isWindows } from 'vs/base/common/platform';
8
import { CharCode } from 'vs/base/common/charCode';
9

10 11 12 13 14
const _schemePattern = /^\w[\w\d+.-]*$/;
const _singleSlashStart = /^\//;
const _doubleSlashStart = /^\/\//;

function _validateUri(ret: URI): void {
15 16 17 18 19 20

	// // scheme, must be set
	// if (!ret.scheme) {
	// 	throw new Error('[UriError]: Scheme is missing.');
	// }

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	if (ret.scheme && !_schemePattern.test(ret.scheme)) {
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
			if (!_singleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
			if (_doubleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
				path = _slash;
			} else if (path[0] !== _slash) {
				path = _slash + path;
			}
			break;
	}
	return path;
}

66 67 68 69
const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;

E
Erich Gamma 已提交
70 71
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
72
 * This class is a simple parser which creates the basic component parts
E
Erich Gamma 已提交
73 74 75
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
76
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
77 78 79 80 81 82 83
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 */
J
Johannes Rieken 已提交
84
export default class URI implements UriComponents {
E
Erich Gamma 已提交
85

86 87 88 89 90 91 92 93 94 95 96 97 98 99
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
			&& typeof (<URI>thing).scheme === 'string';
	}

E
Erich Gamma 已提交
100 101 102 103
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
104
	readonly scheme: string;
E
Erich Gamma 已提交
105 106 107 108 109

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
110
	readonly authority: string;
E
Erich Gamma 已提交
111 112 113 114

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
115
	readonly path: string;
E
Erich Gamma 已提交
116 117 118 119

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
120
	readonly query: string;
E
Erich Gamma 已提交
121 122 123 124

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
125 126 127 128 129
	readonly fragment: string;

	/**
	 * @internal
	 */
J
Johannes Rieken 已提交
130
	protected constructor(scheme: string, authority: string, path: string, query: string, fragment: string);
J
Johannes Rieken 已提交
131

J
Johannes Rieken 已提交
132 133 134 135
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
136

J
Johannes Rieken 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	/**
	 * @internal
	 */
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {

		if (typeof schemeOrData === 'object') {
			this.scheme = schemeOrData.scheme || _empty;
			this.authority = schemeOrData.authority || _empty;
			this.path = schemeOrData.path || _empty;
			this.query = schemeOrData.query || _empty;
			this.fragment = schemeOrData.fragment || _empty;
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
			this.scheme = schemeOrData || _empty;
			this.authority = authority || _empty;
154
			this.path = _referenceResolution(this.scheme, path || _empty);
J
Johannes Rieken 已提交
155 156
			this.query = query || _empty;
			this.fragment = fragment || _empty;
157

J
Johannes Rieken 已提交
158 159
			_validateUri(this);
		}
E
Erich Gamma 已提交
160 161 162 163 164
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
165
	 * Returns a string representing the corresponding file system path of this URI.
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
	 * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
	 * platform specific path separator.
	 *
	 * * Will *not* validate the path for invalid characters and semantics.
	 * * Will *not* look at the scheme of this URI.
	 * * The result shall *not* be used for display purposes but for accessing a file on disk.
	 *
	 *
	 * The *difference* to `URI#path` is the use of the platform specific separator and the handling
	 * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
	 *
	 * ```ts
		const u = URI.parse('file://server/c$/folder/file.txt')
		u.authority === 'server'
		u.path === '/shares/c$/file.txt'
		u.fsPath === '\\server\c$\folder\file.txt'
	```
	 *
	 * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
	 * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
	 * with URIs that represent files on disk (`file` scheme).
E
Erich Gamma 已提交
187
	 */
J
Johannes Rieken 已提交
188
	get fsPath(): string {
189
		return _makeFsPath(this);
E
Erich Gamma 已提交
190 191 192 193
	}

	// ---- modify to new -------------------------

194
	public with(change: { scheme?: string; authority?: string | null; path?: string | null; query?: string | null; fragment?: string | null }): URI {
195 196 197 198 199

		if (!change) {
			return this;
		}

200
		let { scheme, authority, path, query, fragment } = change;
201 202
		if (scheme === void 0) {
			scheme = this.scheme;
J
Johannes Rieken 已提交
203
		} else if (scheme === null) {
J
Johannes Rieken 已提交
204
			scheme = _empty;
205 206 207
		}
		if (authority === void 0) {
			authority = this.authority;
J
Johannes Rieken 已提交
208
		} else if (authority === null) {
J
Johannes Rieken 已提交
209
			authority = _empty;
210 211 212
		}
		if (path === void 0) {
			path = this.path;
J
Johannes Rieken 已提交
213
		} else if (path === null) {
J
Johannes Rieken 已提交
214
			path = _empty;
215 216 217
		}
		if (query === void 0) {
			query = this.query;
J
Johannes Rieken 已提交
218
		} else if (query === null) {
J
Johannes Rieken 已提交
219
			query = _empty;
220 221 222
		}
		if (fragment === void 0) {
			fragment = this.fragment;
J
Johannes Rieken 已提交
223
		} else if (fragment === null) {
J
Johannes Rieken 已提交
224
			fragment = _empty;
225
		}
226 227 228 229 230 231 232 233 234 235

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

236
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
237 238 239 240
	}

	// ---- parse & validate ------------------------

241 242 243 244 245 246
	/**
	 * Creates a new URI from a string, e.g. `http://www.msft.com/some/path`,
	 * `file:///usr/home`, or `scheme:with/path`.
	 *
	 * @param value A string which represents an URI (see `URI#toString`).
	 */
E
Erich Gamma 已提交
247
	public static parse(value: string): URI {
248
		const match = _regexp.exec(value);
J
Johannes Rieken 已提交
249
		if (!match) {
250
			return new _URI(_empty, _empty, _empty, _empty, _empty);
J
Johannes Rieken 已提交
251
		}
252
		return new _URI(
253 254 255 256 257
			match[2] || _empty,
			decodeURIComponent(match[4] || _empty),
			decodeURIComponent(match[5] || _empty),
			decodeURIComponent(match[7] || _empty),
			decodeURIComponent(match[9] || _empty),
J
Johannes Rieken 已提交
258
		);
E
Erich Gamma 已提交
259 260
	}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
	/**
	 * Creates a new URI from a file system path, e.g. `c:\my\files`,
	 * `/usr/home`, or `\\server\share\some\path`.
	 *
	 * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
	 * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
	 * `URI.parse('file://' + path)` because the path might contain characters that are
	 * interpreted (# and ?). See the following sample:
	 * ```ts
	const good = URI.file('/coding/c#/project1');
	good.scheme === 'file';
	good.path === '/coding/c#/project1';
	good.fragment === '';

	const bad = URI.parse('file://' + '/coding/c#/project1');
	bad.scheme === 'file';
	bad.path === '/coding/c'; // path is now broken
	bad.fragment === '/project1';
	```
	 *
	 * @param path A file system path (see `URI#fsPath`)
	 */
E
Erich Gamma 已提交
283 284
	public static file(path: string): URI {

285
		let authority = _empty;
286

287
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
288
		// on other systems bwd-slashes are valid
289
		// filename character, eg /f\oo/ba\r.txt
290
		if (isWindows) {
291
			path = path.replace(/\\/g, _slash);
292
		}
293 294 295

		// check for authority as used in UNC shares
		// or use the path as given
J
Johannes Rieken 已提交
296
		if (path[0] === _slash && path[1] === _slash) {
297
			let idx = path.indexOf(_slash, 2);
298
			if (idx === -1) {
J
Johannes Rieken 已提交
299
				authority = path.substring(2);
J
Johannes Rieken 已提交
300
				path = _slash;
301
			} else {
J
Johannes Rieken 已提交
302
				authority = path.substring(2, idx);
J
Johannes Rieken 已提交
303
				path = path.substring(idx) || _slash;
304 305 306
			}
		}

307
		return new _URI('file', authority, path, _empty, _empty);
E
Erich Gamma 已提交
308 309
	}

310
	public static from(components: { scheme: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
311
		return new _URI(
J
Johannes Rieken 已提交
312 313 314 315 316 317
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
318 319 320 321
	}

	// ---- printing/externalize ---------------------------

322
	/**
323 324 325 326 327 328 329
	 * Creates a string presentation for this URI. It's guardeed that calling
	 * `URI.parse` with the result of this function creates an URI which is equal
	 * to this URI.
	 *
	 * * The result shall *not* be used for display purposes but for externalization or transport.
	 * * The result will be encoded using the percentage encoding and encoding happens mostly
	 * ignore the scheme-specific encoding rules.
330
	 *
331
	 * @param skipEncoding Do not encode the result, default is `false`
332
	 */
333
	public toString(skipEncoding: boolean = false): string {
334
		return _asFormatted(this, skipEncoding);
335 336
	}

J
Johannes Rieken 已提交
337
	public toJSON(): object {
338
		return this;
339 340
	}

J
Johannes Rieken 已提交
341
	static revive(data: UriComponents | any): URI {
J
Johannes Rieken 已提交
342 343 344 345 346 347 348 349 350 351
		if (!data) {
			return data;
		} else if (data instanceof URI) {
			return data;
		} else {
			let result = new _URI(data);
			result._fsPath = (<UriState>data).fsPath;
			result._formatted = (<UriState>data).external;
			return result;
		}
352 353 354
	}
}

J
Johannes Rieken 已提交
355
export interface UriComponents {
356 357 358 359 360 361 362 363 364 365 366 367 368
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
	$mid: number;
	fsPath: string;
	external: string;
}

369

370 371
// tslint:disable-next-line:class-name
class _URI extends URI {
372 373 374 375 376 377

	_formatted: string = null;
	_fsPath: string = null;

	get fsPath(): string {
		if (!this._fsPath) {
378
			this._fsPath = _makeFsPath(this);
379 380 381 382
		}
		return this._fsPath;
	}

383 384 385
	public toString(skipEncoding: boolean = false): string {
		if (!skipEncoding) {
			if (!this._formatted) {
386
				this._formatted = _asFormatted(this, false);
387 388 389 390
			}
			return this._formatted;
		} else {
			// we don't cache that
391
			return _asFormatted(this, true);
J
Johannes Rieken 已提交
392 393
		}
	}
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423

	toJSON(): object {
		const res = <UriState>{
			$mid: 1
		};
		// cached state
		if (this._fsPath) {
			res.fsPath = this._fsPath;
		}
		if (this._formatted) {
			res.external = this._formatted;
		}
		// uri components
		if (this.path) {
			res.path = this.path;
		}
		if (this.scheme) {
			res.scheme = this.scheme;
		}
		if (this.authority) {
			res.authority = this.authority;
		}
		if (this.query) {
			res.query = this.query;
		}
		if (this.fragment) {
			res.fragment = this.fragment;
		}
		return res;
	}
424 425
}

426
// reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
427
const encodeTable: { [ch: number]: string } = {
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
	[CharCode.Colon]: '%3A', // gen-delims
	[CharCode.Slash]: '%2F',
	[CharCode.QuestionMark]: '%3F',
	[CharCode.Hash]: '%23',
	[CharCode.OpenSquareBracket]: '%5B',
	[CharCode.CloseSquareBracket]: '%5D',
	[CharCode.AtSign]: '%40',

	[CharCode.ExclamationMark]: '%21', // sub-delims
	[CharCode.DollarSign]: '%24',
	[CharCode.Ampersand]: '%26',
	[CharCode.SingleQuote]: '%27',
	[CharCode.OpenParen]: '%28',
	[CharCode.CloseParen]: '%29',
	[CharCode.Asterisk]: '%2A',
	[CharCode.Plus]: '%2B',
	[CharCode.Comma]: '%2C',
	[CharCode.Semicolon]: '%3B',
	[CharCode.Equals]: '%3D',

	[CharCode.Space]: '%20',
};

function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string {
	let res: string = undefined;
	let nativeEncodePos = -1;

	for (let pos = 0; pos < uriComponent.length; pos++) {
		let code = uriComponent.charCodeAt(pos);

		// unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
		if (
			(code >= CharCode.a && code <= CharCode.z)
			|| (code >= CharCode.A && code <= CharCode.Z)
			|| (code >= CharCode.Digit0 && code <= CharCode.Digit9)
			|| code === CharCode.Dash
			|| code === CharCode.Period
			|| code === CharCode.Underline
			|| code === CharCode.Tilde
			|| (allowSlash && code === CharCode.Slash)
		) {
			// check if we are delaying native encode
			if (nativeEncodePos !== -1) {
				res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
				nativeEncodePos = -1;
			}
			// check if we write into a new string (by default we try to return the param)
			if (res !== undefined) {
				res += uriComponent.charAt(pos);
			}

		} else {
			// encoding needed, we need to allocate a new string
			if (res === undefined) {
				res = uriComponent.substr(0, pos);
			}

			// check with default table first
			let escaped = encodeTable[code];
			if (escaped !== undefined) {

				// check if we are delaying native encode
				if (nativeEncodePos !== -1) {
					res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
					nativeEncodePos = -1;
				}

				// append escaped variant to result
				res += escaped;

			} else if (nativeEncodePos === -1) {
				// use native encode only when needed
				nativeEncodePos = pos;
			}
		}
	}

	if (nativeEncodePos !== -1) {
		res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
	}

	return res !== undefined ? res : uriComponent;
}

function encodeURIComponentMinimal(path: string): string {
	let res: string = undefined;
	for (let pos = 0; pos < path.length; pos++) {
		let code = path.charCodeAt(pos);
		if (code === CharCode.Hash || code === CharCode.QuestionMark) {
			if (res === undefined) {
				res = path.substr(0, pos);
			}
			res += encodeTable[code];
		} else {
			if (res !== undefined) {
				res += path[pos];
			}
		}
	}
	return res !== undefined ? res : path;
}
J
Johannes Rieken 已提交
529

530 531 532 533 534 535 536
/**
 * Compute `fsPath` for the given uri
 * @param uri
 */
function _makeFsPath(uri: URI): string {

	let value: string;
537
	if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
538 539
		// unc path: file://shares/c$/far/boo
		value = `//${uri.authority}${uri.path}`;
J
Johannes Rieken 已提交
540 541 542 543 544
	} else if (
		uri.path.charCodeAt(0) === CharCode.Slash
		&& (uri.path.charCodeAt(1) >= CharCode.A && uri.path.charCodeAt(1) <= CharCode.Z || uri.path.charCodeAt(1) >= CharCode.a && uri.path.charCodeAt(1) <= CharCode.z)
		&& uri.path.charCodeAt(2) === CharCode.Colon
	) {
545 546 547 548 549 550
		// windows drive letter: file:///c:/far/boo
		value = uri.path[1].toLowerCase() + uri.path.substr(2);
	} else {
		// other path
		value = uri.path;
	}
551
	if (isWindows) {
552 553 554 555
		value = value.replace(/\//g, '\\');
	}
	return value;
}
556

557 558 559 560
/**
 * Create the external version of a uri
 */
function _asFormatted(uri: URI, skipEncoding: boolean): string {
561

562
	const encoder = !skipEncoding
563 564
		? encodeURIComponentFast
		: encodeURIComponentMinimal;
E
Erich Gamma 已提交
565

566
	let res = '';
567 568
	let { scheme, authority, path, query, fragment } = uri;
	if (scheme) {
569 570
		res += scheme;
		res += ':';
571 572
	}
	if (authority || scheme === 'file') {
573 574
		res += _slash;
		res += _slash;
575 576 577 578
	}
	if (authority) {
		let idx = authority.indexOf('@');
		if (idx !== -1) {
579
			// <user>@<auth>
580 581 582
			const userinfo = authority.substr(0, idx);
			authority = authority.substr(idx + 1);
			idx = userinfo.indexOf(':');
J
Johannes Rieken 已提交
583
			if (idx === -1) {
584
				res += encoder(userinfo, false);
J
Johannes Rieken 已提交
585
			} else {
586
				// <user>:<pass>@<auth>
587
				res += encoder(userinfo.substr(0, idx), false);
588
				res += ':';
589
				res += encoder(userinfo.substr(idx + 1), false);
E
Erich Gamma 已提交
590
			}
591
			res += '@';
J
Johannes Rieken 已提交
592
		}
593 594 595
		authority = authority.toLowerCase();
		idx = authority.indexOf(':');
		if (idx === -1) {
596
			res += encoder(authority, false);
597
		} else {
598
			// <auth>:<port>
599
			res += encoder(authority.substr(0, idx), false);
600
			res += authority.substr(idx);
J
Johannes Rieken 已提交
601
		}
602 603 604
	}
	if (path) {
		// lower-case windows drive letters in /C:/fff or C:/fff
605 606 607 608
		if (path.length >= 3 && path.charCodeAt(0) === CharCode.Slash && path.charCodeAt(2) === CharCode.Colon) {
			let code = path.charCodeAt(1);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
609
			}
610 611 612 613
		} else if (path.length >= 2 && path.charCodeAt(1) === CharCode.Colon) {
			let code = path.charCodeAt(0);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
614
			}
615
		}
616 617
		// encode the rest of the path
		res += encoder(path, true);
618 619
	}
	if (query) {
620
		res += '?';
621
		res += encoder(query, false);
622 623
	}
	if (fragment) {
624
		res += '#';
J
Johannes Rieken 已提交
625
		res += !skipEncoding ? encodeURIComponentFast(fragment, false) : fragment;
E
Erich Gamma 已提交
626
	}
627
	return res;
E
Erich Gamma 已提交
628
}