uri.ts 15.1 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

7
import { isWindows } from 'vs/base/common/platform';
8
import { CharCode } from 'vs/base/common/charCode';
9

10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
const _schemePattern = /^\w[\w\d+.-]*$/;
const _singleSlashStart = /^\//;
const _doubleSlashStart = /^\/\//;

function _validateUri(ret: URI): void {
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	if (ret.scheme && !_schemePattern.test(ret.scheme)) {
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
			if (!_singleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
			if (_doubleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
				path = _slash;
			} else if (path[0] !== _slash) {
				path = _slash + path;
			}
			break;
	}
	return path;
}

60 61 62 63
const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;

E
Erich Gamma 已提交
64 65 66 67 68 69
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
 * This class is a simple parser which creates the basic component paths
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
70
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
71 72 73 74 75 76 77 78 79
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 *
 *
 */
J
Johannes Rieken 已提交
80
export default class URI implements UriComponents {
E
Erich Gamma 已提交
81

82 83 84 85 86 87 88 89 90 91 92 93 94 95
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
			&& typeof (<URI>thing).scheme === 'string';
	}

E
Erich Gamma 已提交
96 97 98 99
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
100
	readonly scheme: string;
E
Erich Gamma 已提交
101 102 103 104 105

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
106
	readonly authority: string;
E
Erich Gamma 已提交
107 108 109 110

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
111
	readonly path: string;
E
Erich Gamma 已提交
112 113 114 115

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
116
	readonly query: string;
E
Erich Gamma 已提交
117 118 119 120

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
121 122 123 124 125
	readonly fragment: string;

	/**
	 * @internal
	 */
J
Johannes Rieken 已提交
126
	protected constructor(scheme: string, authority: string, path: string, query: string, fragment: string);
J
Johannes Rieken 已提交
127

J
Johannes Rieken 已提交
128 129 130 131
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
132

J
Johannes Rieken 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
	/**
	 * @internal
	 */
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {

		if (typeof schemeOrData === 'object') {
			this.scheme = schemeOrData.scheme || _empty;
			this.authority = schemeOrData.authority || _empty;
			this.path = schemeOrData.path || _empty;
			this.query = schemeOrData.query || _empty;
			this.fragment = schemeOrData.fragment || _empty;
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
			this.scheme = schemeOrData || _empty;
			this.authority = authority || _empty;
150
			this.path = _referenceResolution(this.scheme, path || _empty);
J
Johannes Rieken 已提交
151 152
			this.query = query || _empty;
			this.fragment = fragment || _empty;
153

J
Johannes Rieken 已提交
154 155
			_validateUri(this);
		}
E
Erich Gamma 已提交
156 157 158 159 160
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
161
	 * Returns a string representing the corresponding file system path of this URI.
E
Erich Gamma 已提交
162 163 164 165
	 * Will handle UNC paths and normalize windows drive letters to lower-case. Also
	 * uses the platform specific path separator. Will *not* validate the path for
	 * invalid characters and semantics. Will *not* look at the scheme of this URI.
	 */
J
Johannes Rieken 已提交
166
	get fsPath(): string {
167
		return _makeFsPath(this);
E
Erich Gamma 已提交
168 169 170 171
	}

	// ---- modify to new -------------------------

172
	public with(change: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
173 174 175 176 177

		if (!change) {
			return this;
		}

178
		let { scheme, authority, path, query, fragment } = change;
179 180
		if (scheme === void 0) {
			scheme = this.scheme;
J
Johannes Rieken 已提交
181
		} else if (scheme === null) {
J
Johannes Rieken 已提交
182
			scheme = _empty;
183 184 185
		}
		if (authority === void 0) {
			authority = this.authority;
J
Johannes Rieken 已提交
186
		} else if (authority === null) {
J
Johannes Rieken 已提交
187
			authority = _empty;
188 189 190
		}
		if (path === void 0) {
			path = this.path;
J
Johannes Rieken 已提交
191
		} else if (path === null) {
J
Johannes Rieken 已提交
192
			path = _empty;
193 194 195
		}
		if (query === void 0) {
			query = this.query;
J
Johannes Rieken 已提交
196
		} else if (query === null) {
J
Johannes Rieken 已提交
197
			query = _empty;
198 199 200
		}
		if (fragment === void 0) {
			fragment = this.fragment;
J
Johannes Rieken 已提交
201
		} else if (fragment === null) {
J
Johannes Rieken 已提交
202
			fragment = _empty;
203
		}
204 205 206 207 208 209 210 211 212 213

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

214
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
215 216 217 218 219
	}

	// ---- parse & validate ------------------------

	public static parse(value: string): URI {
220
		const match = _regexp.exec(value);
J
Johannes Rieken 已提交
221
		if (!match) {
222
			return new _URI(_empty, _empty, _empty, _empty, _empty);
J
Johannes Rieken 已提交
223
		}
224
		return new _URI(
225 226 227 228 229
			match[2] || _empty,
			decodeURIComponent(match[4] || _empty),
			decodeURIComponent(match[5] || _empty),
			decodeURIComponent(match[7] || _empty),
			decodeURIComponent(match[9] || _empty),
J
Johannes Rieken 已提交
230
		);
E
Erich Gamma 已提交
231 232 233 234
	}

	public static file(path: string): URI {

235
		let authority = _empty;
236

237
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
238
		// on other systems bwd-slashes are valid
239
		// filename character, eg /f\oo/ba\r.txt
240
		if (isWindows) {
241
			path = path.replace(/\\/g, _slash);
242
		}
243 244 245

		// check for authority as used in UNC shares
		// or use the path as given
J
Johannes Rieken 已提交
246
		if (path[0] === _slash && path[1] === _slash) {
247
			let idx = path.indexOf(_slash, 2);
248
			if (idx === -1) {
J
Johannes Rieken 已提交
249
				authority = path.substring(2);
J
Johannes Rieken 已提交
250
				path = _slash;
251
			} else {
J
Johannes Rieken 已提交
252
				authority = path.substring(2, idx);
J
Johannes Rieken 已提交
253
				path = path.substring(idx) || _slash;
254 255 256
			}
		}

257
		return new _URI('file', authority, path, _empty, _empty);
E
Erich Gamma 已提交
258 259
	}

260
	public static from(components: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
261
		return new _URI(
J
Johannes Rieken 已提交
262 263 264 265 266 267
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
268 269 270 271
	}

	// ---- printing/externalize ---------------------------

272
	/**
273
	 *
274
	 * @param skipEncoding Do not encode the result, default is `false`
275
	 */
276
	public toString(skipEncoding: boolean = false): string {
277
		return _asFormatted(this, skipEncoding);
278 279
	}

J
Johannes Rieken 已提交
280
	public toJSON(): object {
281
		return this;
282 283
	}

J
Johannes Rieken 已提交
284
	static revive(data: UriComponents | any): URI {
J
Johannes Rieken 已提交
285 286 287 288 289 290 291 292 293 294
		if (!data) {
			return data;
		} else if (data instanceof URI) {
			return data;
		} else {
			let result = new _URI(data);
			result._fsPath = (<UriState>data).fsPath;
			result._formatted = (<UriState>data).external;
			return result;
		}
295 296 297
	}
}

J
Johannes Rieken 已提交
298
export interface UriComponents {
299 300 301 302 303 304 305 306 307 308 309 310 311
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
	$mid: number;
	fsPath: string;
	external: string;
}

312

313 314
// tslint:disable-next-line:class-name
class _URI extends URI {
315 316 317 318 319 320

	_formatted: string = null;
	_fsPath: string = null;

	get fsPath(): string {
		if (!this._fsPath) {
321
			this._fsPath = _makeFsPath(this);
322 323 324 325
		}
		return this._fsPath;
	}

326 327 328
	public toString(skipEncoding: boolean = false): string {
		if (!skipEncoding) {
			if (!this._formatted) {
329
				this._formatted = _asFormatted(this, false);
330 331 332 333
			}
			return this._formatted;
		} else {
			// we don't cache that
334
			return _asFormatted(this, true);
J
Johannes Rieken 已提交
335 336
		}
	}
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366

	toJSON(): object {
		const res = <UriState>{
			$mid: 1
		};
		// cached state
		if (this._fsPath) {
			res.fsPath = this._fsPath;
		}
		if (this._formatted) {
			res.external = this._formatted;
		}
		// uri components
		if (this.path) {
			res.path = this.path;
		}
		if (this.scheme) {
			res.scheme = this.scheme;
		}
		if (this.authority) {
			res.authority = this.authority;
		}
		if (this.query) {
			res.query = this.query;
		}
		if (this.fragment) {
			res.fragment = this.fragment;
		}
		return res;
	}
367 368
}

369
// reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
370
const encodeTable: { [ch: number]: string } = {
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
	[CharCode.Colon]: '%3A', // gen-delims
	[CharCode.Slash]: '%2F',
	[CharCode.QuestionMark]: '%3F',
	[CharCode.Hash]: '%23',
	[CharCode.OpenSquareBracket]: '%5B',
	[CharCode.CloseSquareBracket]: '%5D',
	[CharCode.AtSign]: '%40',

	[CharCode.ExclamationMark]: '%21', // sub-delims
	[CharCode.DollarSign]: '%24',
	[CharCode.Ampersand]: '%26',
	[CharCode.SingleQuote]: '%27',
	[CharCode.OpenParen]: '%28',
	[CharCode.CloseParen]: '%29',
	[CharCode.Asterisk]: '%2A',
	[CharCode.Plus]: '%2B',
	[CharCode.Comma]: '%2C',
	[CharCode.Semicolon]: '%3B',
	[CharCode.Equals]: '%3D',

	[CharCode.Space]: '%20',
};

function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string {
	let res: string = undefined;
	let nativeEncodePos = -1;

	for (let pos = 0; pos < uriComponent.length; pos++) {
		let code = uriComponent.charCodeAt(pos);

		// unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
		if (
			(code >= CharCode.a && code <= CharCode.z)
			|| (code >= CharCode.A && code <= CharCode.Z)
			|| (code >= CharCode.Digit0 && code <= CharCode.Digit9)
			|| code === CharCode.Dash
			|| code === CharCode.Period
			|| code === CharCode.Underline
			|| code === CharCode.Tilde
			|| (allowSlash && code === CharCode.Slash)
		) {
			// check if we are delaying native encode
			if (nativeEncodePos !== -1) {
				res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
				nativeEncodePos = -1;
			}
			// check if we write into a new string (by default we try to return the param)
			if (res !== undefined) {
				res += uriComponent.charAt(pos);
			}

		} else {
			// encoding needed, we need to allocate a new string
			if (res === undefined) {
				res = uriComponent.substr(0, pos);
			}

			// check with default table first
			let escaped = encodeTable[code];
			if (escaped !== undefined) {

				// check if we are delaying native encode
				if (nativeEncodePos !== -1) {
					res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
					nativeEncodePos = -1;
				}

				// append escaped variant to result
				res += escaped;

			} else if (nativeEncodePos === -1) {
				// use native encode only when needed
				nativeEncodePos = pos;
			}
		}
	}

	if (nativeEncodePos !== -1) {
		res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
	}

	return res !== undefined ? res : uriComponent;
}

function encodeURIComponentMinimal(path: string): string {
	let res: string = undefined;
	for (let pos = 0; pos < path.length; pos++) {
		let code = path.charCodeAt(pos);
		if (code === CharCode.Hash || code === CharCode.QuestionMark) {
			if (res === undefined) {
				res = path.substr(0, pos);
			}
			res += encodeTable[code];
		} else {
			if (res !== undefined) {
				res += path[pos];
			}
		}
	}
	return res !== undefined ? res : path;
}
J
Johannes Rieken 已提交
472

473 474 475 476 477 478 479
/**
 * Compute `fsPath` for the given uri
 * @param uri
 */
function _makeFsPath(uri: URI): string {

	let value: string;
480
	if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
481 482
		// unc path: file://shares/c$/far/boo
		value = `//${uri.authority}${uri.path}`;
J
Johannes Rieken 已提交
483 484 485 486 487
	} else if (
		uri.path.charCodeAt(0) === CharCode.Slash
		&& (uri.path.charCodeAt(1) >= CharCode.A && uri.path.charCodeAt(1) <= CharCode.Z || uri.path.charCodeAt(1) >= CharCode.a && uri.path.charCodeAt(1) <= CharCode.z)
		&& uri.path.charCodeAt(2) === CharCode.Colon
	) {
488 489 490 491 492 493
		// windows drive letter: file:///c:/far/boo
		value = uri.path[1].toLowerCase() + uri.path.substr(2);
	} else {
		// other path
		value = uri.path;
	}
494
	if (isWindows) {
495 496 497 498
		value = value.replace(/\//g, '\\');
	}
	return value;
}
499

500 501 502 503
/**
 * Create the external version of a uri
 */
function _asFormatted(uri: URI, skipEncoding: boolean): string {
504

505
	const encoder = !skipEncoding
506 507
		? encodeURIComponentFast
		: encodeURIComponentMinimal;
E
Erich Gamma 已提交
508

509
	let res = '';
510 511
	let { scheme, authority, path, query, fragment } = uri;
	if (scheme) {
512 513
		res += scheme;
		res += ':';
514 515
	}
	if (authority || scheme === 'file') {
516 517
		res += _slash;
		res += _slash;
518 519 520 521
	}
	if (authority) {
		let idx = authority.indexOf('@');
		if (idx !== -1) {
522
			// <user>@<auth>
523 524 525
			const userinfo = authority.substr(0, idx);
			authority = authority.substr(idx + 1);
			idx = userinfo.indexOf(':');
J
Johannes Rieken 已提交
526
			if (idx === -1) {
527
				res += encoder(userinfo, false);
J
Johannes Rieken 已提交
528
			} else {
529
				// <user>:<pass>@<auth>
530
				res += encoder(userinfo.substr(0, idx), false);
531
				res += ':';
532
				res += encoder(userinfo.substr(idx + 1), false);
E
Erich Gamma 已提交
533
			}
534
			res += '@';
J
Johannes Rieken 已提交
535
		}
536 537 538
		authority = authority.toLowerCase();
		idx = authority.indexOf(':');
		if (idx === -1) {
539
			res += encoder(authority, false);
540
		} else {
541
			// <auth>:<port>
542
			res += encoder(authority.substr(0, idx), false);
543
			res += authority.substr(idx);
J
Johannes Rieken 已提交
544
		}
545 546 547
	}
	if (path) {
		// lower-case windows drive letters in /C:/fff or C:/fff
548 549 550 551
		if (path.length >= 3 && path.charCodeAt(0) === CharCode.Slash && path.charCodeAt(2) === CharCode.Colon) {
			let code = path.charCodeAt(1);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
552
			}
553 554 555 556
		} else if (path.length >= 2 && path.charCodeAt(1) === CharCode.Colon) {
			let code = path.charCodeAt(0);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
557
			}
558
		}
559 560
		// encode the rest of the path
		res += encoder(path, true);
561 562
	}
	if (query) {
563
		res += '?';
564
		res += encoder(query, false);
565 566
	}
	if (fragment) {
567
		res += '#';
568
		res += encoder(fragment, false);
E
Erich Gamma 已提交
569
	}
570
	return res;
E
Erich Gamma 已提交
571
}