uri.ts 14.9 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

7
import * as platform from 'vs/base/common/platform';
8
import { CharCode } from 'vs/base/common/charCode';
9

10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
const _schemePattern = /^\w[\w\d+.-]*$/;
const _singleSlashStart = /^\//;
const _doubleSlashStart = /^\/\//;

function _validateUri(ret: URI): void {
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	if (ret.scheme && !_schemePattern.test(ret.scheme)) {
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
			if (!_singleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
			if (_doubleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
				path = _slash;
			} else if (path[0] !== _slash) {
				path = _slash + path;
			}
			break;
	}
	return path;
}

60 61 62 63 64
const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
const _driveLetterPath = /^\/[a-zA-Z]:/;

E
Erich Gamma 已提交
65 66 67 68 69 70
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
 * This class is a simple parser which creates the basic component paths
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
71
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
72 73 74 75 76 77 78 79 80
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 *
 *
 */
J
Johannes Rieken 已提交
81
export default class URI implements UriComponents {
E
Erich Gamma 已提交
82

83 84 85 86 87 88 89 90 91 92 93 94 95 96
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
			&& typeof (<URI>thing).scheme === 'string';
	}

E
Erich Gamma 已提交
97 98 99 100
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
101
	readonly scheme: string;
E
Erich Gamma 已提交
102 103 104 105 106

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
107
	readonly authority: string;
E
Erich Gamma 已提交
108 109 110 111

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
112
	readonly path: string;
E
Erich Gamma 已提交
113 114 115 116

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
117
	readonly query: string;
E
Erich Gamma 已提交
118 119 120 121

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
122 123 124 125 126
	readonly fragment: string;

	/**
	 * @internal
	 */
J
Johannes Rieken 已提交
127
	protected constructor(scheme: string, authority: string, path: string, query: string, fragment: string);
J
Johannes Rieken 已提交
128

J
Johannes Rieken 已提交
129 130 131 132
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
133

J
Johannes Rieken 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
	/**
	 * @internal
	 */
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {

		if (typeof schemeOrData === 'object') {
			this.scheme = schemeOrData.scheme || _empty;
			this.authority = schemeOrData.authority || _empty;
			this.path = schemeOrData.path || _empty;
			this.query = schemeOrData.query || _empty;
			this.fragment = schemeOrData.fragment || _empty;
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
			this.scheme = schemeOrData || _empty;
			this.authority = authority || _empty;
151
			this.path = _referenceResolution(this.scheme, path || _empty);
J
Johannes Rieken 已提交
152 153
			this.query = query || _empty;
			this.fragment = fragment || _empty;
154

J
Johannes Rieken 已提交
155 156
			_validateUri(this);
		}
E
Erich Gamma 已提交
157 158 159 160 161
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
162
	 * Returns a string representing the corresponding file system path of this URI.
E
Erich Gamma 已提交
163 164 165 166
	 * Will handle UNC paths and normalize windows drive letters to lower-case. Also
	 * uses the platform specific path separator. Will *not* validate the path for
	 * invalid characters and semantics. Will *not* look at the scheme of this URI.
	 */
J
Johannes Rieken 已提交
167
	get fsPath(): string {
168
		return _makeFsPath(this);
E
Erich Gamma 已提交
169 170 171 172
	}

	// ---- modify to new -------------------------

173
	public with(change: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
174 175 176 177 178

		if (!change) {
			return this;
		}

179
		let { scheme, authority, path, query, fragment } = change;
180 181
		if (scheme === void 0) {
			scheme = this.scheme;
J
Johannes Rieken 已提交
182
		} else if (scheme === null) {
J
Johannes Rieken 已提交
183
			scheme = _empty;
184 185 186
		}
		if (authority === void 0) {
			authority = this.authority;
J
Johannes Rieken 已提交
187
		} else if (authority === null) {
J
Johannes Rieken 已提交
188
			authority = _empty;
189 190 191
		}
		if (path === void 0) {
			path = this.path;
J
Johannes Rieken 已提交
192
		} else if (path === null) {
J
Johannes Rieken 已提交
193
			path = _empty;
194 195 196
		}
		if (query === void 0) {
			query = this.query;
J
Johannes Rieken 已提交
197
		} else if (query === null) {
J
Johannes Rieken 已提交
198
			query = _empty;
199 200 201
		}
		if (fragment === void 0) {
			fragment = this.fragment;
J
Johannes Rieken 已提交
202
		} else if (fragment === null) {
J
Johannes Rieken 已提交
203
			fragment = _empty;
204
		}
205 206 207 208 209 210 211 212 213 214

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

215
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
216 217 218 219 220
	}

	// ---- parse & validate ------------------------

	public static parse(value: string): URI {
221
		const match = _regexp.exec(value);
J
Johannes Rieken 已提交
222
		if (!match) {
223
			return new _URI(_empty, _empty, _empty, _empty, _empty);
J
Johannes Rieken 已提交
224
		}
225
		return new _URI(
226 227 228 229 230
			match[2] || _empty,
			decodeURIComponent(match[4] || _empty),
			decodeURIComponent(match[5] || _empty),
			decodeURIComponent(match[7] || _empty),
			decodeURIComponent(match[9] || _empty),
J
Johannes Rieken 已提交
231
		);
E
Erich Gamma 已提交
232 233 234 235
	}

	public static file(path: string): URI {

236
		let authority = _empty;
237

238
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
239
		// on other systems bwd-slashes are valid
240 241
		// filename character, eg /f\oo/ba\r.txt
		if (platform.isWindows) {
242
			path = path.replace(/\\/g, _slash);
243
		}
244 245 246

		// check for authority as used in UNC shares
		// or use the path as given
J
Johannes Rieken 已提交
247
		if (path[0] === _slash && path[1] === _slash) {
248
			let idx = path.indexOf(_slash, 2);
249
			if (idx === -1) {
J
Johannes Rieken 已提交
250
				authority = path.substring(2);
J
Johannes Rieken 已提交
251
				path = _slash;
252
			} else {
J
Johannes Rieken 已提交
253
				authority = path.substring(2, idx);
J
Johannes Rieken 已提交
254
				path = path.substring(idx) || _slash;
255 256 257
			}
		}

258
		return new _URI('file', authority, path, _empty, _empty);
E
Erich Gamma 已提交
259 260
	}

261
	public static from(components: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
262
		return new _URI(
J
Johannes Rieken 已提交
263 264 265 266 267 268
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
269 270 271 272
	}

	// ---- printing/externalize ---------------------------

273
	/**
274
	 *
275
	 * @param skipEncoding Do not encode the result, default is `false`
276
	 */
277
	public toString(skipEncoding: boolean = false): string {
278
		return _asFormatted(this, skipEncoding);
279 280
	}

J
Johannes Rieken 已提交
281
	public toJSON(): object {
282
		return this;
283 284
	}

J
Johannes Rieken 已提交
285
	static revive(data: UriComponents | any): URI {
J
Johannes Rieken 已提交
286 287 288 289 290 291 292 293 294 295
		if (!data) {
			return data;
		} else if (data instanceof URI) {
			return data;
		} else {
			let result = new _URI(data);
			result._fsPath = (<UriState>data).fsPath;
			result._formatted = (<UriState>data).external;
			return result;
		}
296 297 298
	}
}

J
Johannes Rieken 已提交
299
export interface UriComponents {
300 301 302 303 304 305 306 307 308 309 310 311 312
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
	$mid: number;
	fsPath: string;
	external: string;
}

313

314 315
// tslint:disable-next-line:class-name
class _URI extends URI {
316 317 318 319 320 321

	_formatted: string = null;
	_fsPath: string = null;

	get fsPath(): string {
		if (!this._fsPath) {
322
			this._fsPath = _makeFsPath(this);
323 324 325 326
		}
		return this._fsPath;
	}

327 328 329
	public toString(skipEncoding: boolean = false): string {
		if (!skipEncoding) {
			if (!this._formatted) {
330
				this._formatted = _asFormatted(this, false);
331 332 333 334
			}
			return this._formatted;
		} else {
			// we don't cache that
335
			return _asFormatted(this, true);
J
Johannes Rieken 已提交
336 337
		}
	}
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367

	toJSON(): object {
		const res = <UriState>{
			$mid: 1
		};
		// cached state
		if (this._fsPath) {
			res.fsPath = this._fsPath;
		}
		if (this._formatted) {
			res.external = this._formatted;
		}
		// uri components
		if (this.path) {
			res.path = this.path;
		}
		if (this.scheme) {
			res.scheme = this.scheme;
		}
		if (this.authority) {
			res.authority = this.authority;
		}
		if (this.query) {
			res.query = this.query;
		}
		if (this.fragment) {
			res.fragment = this.fragment;
		}
		return res;
	}
368 369
}

370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
// reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
const encodeTable = {
	[CharCode.Colon]: '%3A', // gen-delims
	[CharCode.Slash]: '%2F',
	[CharCode.QuestionMark]: '%3F',
	[CharCode.Hash]: '%23',
	[CharCode.OpenSquareBracket]: '%5B',
	[CharCode.CloseSquareBracket]: '%5D',
	[CharCode.AtSign]: '%40',

	[CharCode.ExclamationMark]: '%21', // sub-delims
	[CharCode.DollarSign]: '%24',
	[CharCode.Ampersand]: '%26',
	[CharCode.SingleQuote]: '%27',
	[CharCode.OpenParen]: '%28',
	[CharCode.CloseParen]: '%29',
	[CharCode.Asterisk]: '%2A',
	[CharCode.Plus]: '%2B',
	[CharCode.Comma]: '%2C',
	[CharCode.Semicolon]: '%3B',
	[CharCode.Equals]: '%3D',

	[CharCode.Space]: '%20',
};

function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string {
	let res: string = undefined;
	let nativeEncodePos = -1;

	for (let pos = 0; pos < uriComponent.length; pos++) {
		let code = uriComponent.charCodeAt(pos);

		// unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
		if (
			(code >= CharCode.a && code <= CharCode.z)
			|| (code >= CharCode.A && code <= CharCode.Z)
			|| (code >= CharCode.Digit0 && code <= CharCode.Digit9)
			|| code === CharCode.Dash
			|| code === CharCode.Period
			|| code === CharCode.Underline
			|| code === CharCode.Tilde
			|| (allowSlash && code === CharCode.Slash)
		) {
			// check if we are delaying native encode
			if (nativeEncodePos !== -1) {
				res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
				nativeEncodePos = -1;
			}
			// check if we write into a new string (by default we try to return the param)
			if (res !== undefined) {
				res += uriComponent.charAt(pos);
			}

		} else {
			// encoding needed, we need to allocate a new string
			if (res === undefined) {
				res = uriComponent.substr(0, pos);
			}

			// check with default table first
			let escaped = encodeTable[code];
			if (escaped !== undefined) {

				// check if we are delaying native encode
				if (nativeEncodePos !== -1) {
					res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
					nativeEncodePos = -1;
				}

				// append escaped variant to result
				res += escaped;

			} else if (nativeEncodePos === -1) {
				// use native encode only when needed
				nativeEncodePos = pos;
			}
		}
	}

	if (nativeEncodePos !== -1) {
		res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
	}

	return res !== undefined ? res : uriComponent;
}

function encodeURIComponentMinimal(path: string): string {
	let res: string = undefined;
	for (let pos = 0; pos < path.length; pos++) {
		let code = path.charCodeAt(pos);
		if (code === CharCode.Hash || code === CharCode.QuestionMark) {
			if (res === undefined) {
				res = path.substr(0, pos);
			}
			res += encodeTable[code];
		} else {
			if (res !== undefined) {
				res += path[pos];
			}
		}
	}
	return res !== undefined ? res : path;
}
J
Johannes Rieken 已提交
473

474 475 476 477 478 479 480
/**
 * Compute `fsPath` for the given uri
 * @param uri
 */
function _makeFsPath(uri: URI): string {

	let value: string;
481
	if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
482 483 484 485 486 487 488 489 490 491 492 493 494 495
		// unc path: file://shares/c$/far/boo
		value = `//${uri.authority}${uri.path}`;
	} else if (_driveLetterPath.test(uri.path)) {
		// windows drive letter: file:///c:/far/boo
		value = uri.path[1].toLowerCase() + uri.path.substr(2);
	} else {
		// other path
		value = uri.path;
	}
	if (platform.isWindows) {
		value = value.replace(/\//g, '\\');
	}
	return value;
}
496

497 498 499 500
/**
 * Create the external version of a uri
 */
function _asFormatted(uri: URI, skipEncoding: boolean): string {
501

502
	const encoder = !skipEncoding
503 504
		? encodeURIComponentFast
		: encodeURIComponentMinimal;
E
Erich Gamma 已提交
505

506
	let res = '';
507 508
	let { scheme, authority, path, query, fragment } = uri;
	if (scheme) {
509 510
		res += scheme;
		res += ':';
511 512
	}
	if (authority || scheme === 'file') {
513 514
		res += _slash;
		res += _slash;
515 516 517 518
	}
	if (authority) {
		let idx = authority.indexOf('@');
		if (idx !== -1) {
519
			// <user>@<auth>
520 521 522
			const userinfo = authority.substr(0, idx);
			authority = authority.substr(idx + 1);
			idx = userinfo.indexOf(':');
J
Johannes Rieken 已提交
523
			if (idx === -1) {
524
				res += encoder(userinfo, false);
J
Johannes Rieken 已提交
525
			} else {
526
				// <user>:<pass>@<auth>
527
				res += encoder(userinfo.substr(0, idx), false);
528
				res += ':';
529
				res += encoder(userinfo.substr(idx + 1), false);
E
Erich Gamma 已提交
530
			}
531
			res += '@';
J
Johannes Rieken 已提交
532
		}
533 534 535
		authority = authority.toLowerCase();
		idx = authority.indexOf(':');
		if (idx === -1) {
536
			res += encoder(authority, false);
537
		} else {
538
			// <auth>:<port>
539
			res += encoder(authority.substr(0, idx), false);
540
			res += authority.substr(idx);
J
Johannes Rieken 已提交
541
		}
542 543 544
	}
	if (path) {
		// lower-case windows drive letters in /C:/fff or C:/fff
545 546 547 548
		if (path.length >= 3 && path.charCodeAt(0) === CharCode.Slash && path.charCodeAt(2) === CharCode.Colon) {
			let code = path.charCodeAt(1);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
549
			}
550 551 552 553
		} else if (path.length >= 2 && path.charCodeAt(1) === CharCode.Colon) {
			let code = path.charCodeAt(0);
			if (code >= CharCode.A && code <= CharCode.Z) {
				path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
554
			}
555
		}
556 557
		// encode the rest of the path
		res += encoder(path, true);
558 559
	}
	if (query) {
560
		res += '?';
561
		res += encoder(query, false);
562 563
	}
	if (fragment) {
564
		res += '#';
565
		res += encoder(fragment, false);
E
Erich Gamma 已提交
566
	}
567
	return res;
E
Erich Gamma 已提交
568
}