uri.ts 12.2 KB
Newer Older
E
Erich Gamma 已提交
1 2 3 4 5 6
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/
'use strict';

7
import * as platform from 'vs/base/common/platform';
E
Erich Gamma 已提交
8

9 10 11 12 13

function _encode(ch: string): string {
	return '%' + ch.charCodeAt(0).toString(16).toUpperCase();
}

E
Erich Gamma 已提交
14
// see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent
15 16
function encodeURIComponent2(str: string): string {
	return encodeURIComponent(str).replace(/[!'()*]/g, _encode);
E
Erich Gamma 已提交
17 18
}

19
function encodeNoop(str: string): string {
J
Johannes Rieken 已提交
20
	return str.replace(/[#?]/, _encode);
21 22 23
}


24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
const _schemePattern = /^\w[\w\d+.-]*$/;
const _singleSlashStart = /^\//;
const _doubleSlashStart = /^\/\//;

function _validateUri(ret: URI): void {
	// scheme, https://tools.ietf.org/html/rfc3986#section-3.1
	// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	if (ret.scheme && !_schemePattern.test(ret.scheme)) {
		throw new Error('[UriError]: Scheme contains illegal characters.');
	}

	// path, http://tools.ietf.org/html/rfc3986#section-3.3
	// If a URI contains an authority component, then the path component
	// must either be empty or begin with a slash ("/") character.  If a URI
	// does not contain an authority component, then the path cannot begin
	// with two slash characters ("//").
	if (ret.path) {
		if (ret.authority) {
			if (!_singleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
			}
		} else {
			if (_doubleSlashStart.test(ret.path)) {
				throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
			}
		}
	}
}

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
// implements a bit of https://tools.ietf.org/html/rfc3986#section-5
function _referenceResolution(scheme: string, path: string): string {

	// the slash-character is our 'default base' as we don't
	// support constructing URIs relative to other URIs. This
	// also means that we alter and potentially break paths.
	// see https://tools.ietf.org/html/rfc3986#section-5.1.4
	switch (scheme) {
		case 'https':
		case 'http':
		case 'file':
			if (!path) {
				path = _slash;
			} else if (path[0] !== _slash) {
				path = _slash + path;
			}
			break;
	}
	return path;
}

74 75 76 77 78 79
const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
const _driveLetterPath = /^\/[a-zA-Z]:/;
const _upperCaseDrive = /^(\/)?([A-Z]:)/;

E
Erich Gamma 已提交
80 81 82 83 84 85
/**
 * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
 * This class is a simple parser which creates the basic component paths
 * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
 * and encoding.
 *
86
 *       foo://example.com:8042/over/there?name=ferret#nose
E
Erich Gamma 已提交
87 88 89 90 91 92 93 94 95
 *       \_/   \______________/\_________/ \_________/ \__/
 *        |           |            |            |        |
 *     scheme     authority       path        query   fragment
 *        |   _____________________|__
 *       / \ /                        \
 *       urn:example:animal:ferret:nose
 *
 *
 */
J
Johannes Rieken 已提交
96
export default class URI implements UriComponents {
E
Erich Gamma 已提交
97

98 99 100 101 102 103 104 105 106 107 108 109 110 111
	static isUri(thing: any): thing is URI {
		if (thing instanceof URI) {
			return true;
		}
		if (!thing) {
			return false;
		}
		return typeof (<URI>thing).authority === 'string'
			&& typeof (<URI>thing).fragment === 'string'
			&& typeof (<URI>thing).path === 'string'
			&& typeof (<URI>thing).query === 'string'
			&& typeof (<URI>thing).scheme === 'string';
	}

E
Erich Gamma 已提交
112 113 114 115
	/**
	 * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part before the first colon.
	 */
J
Johannes Rieken 已提交
116
	readonly scheme: string;
E
Erich Gamma 已提交
117 118 119 120 121

	/**
	 * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'.
	 * The part between the first double slashes and the next slash.
	 */
J
Johannes Rieken 已提交
122
	readonly authority: string;
E
Erich Gamma 已提交
123 124 125 126

	/**
	 * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
127
	readonly path: string;
E
Erich Gamma 已提交
128 129 130 131

	/**
	 * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
132
	readonly query: string;
E
Erich Gamma 已提交
133 134 135 136

	/**
	 * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'.
	 */
J
Johannes Rieken 已提交
137 138 139 140 141
	readonly fragment: string;

	/**
	 * @internal
	 */
J
Johannes Rieken 已提交
142
	protected constructor(scheme: string, authority: string, path: string, query: string, fragment: string);
J
Johannes Rieken 已提交
143

J
Johannes Rieken 已提交
144 145 146 147
	/**
	 * @internal
	 */
	protected constructor(components: UriComponents);
J
Johannes Rieken 已提交
148

J
Johannes Rieken 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
	/**
	 * @internal
	 */
	protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) {

		if (typeof schemeOrData === 'object') {
			this.scheme = schemeOrData.scheme || _empty;
			this.authority = schemeOrData.authority || _empty;
			this.path = schemeOrData.path || _empty;
			this.query = schemeOrData.query || _empty;
			this.fragment = schemeOrData.fragment || _empty;
			// no validation because it's this URI
			// that creates uri components.
			// _validateUri(this);
		} else {
			this.scheme = schemeOrData || _empty;
			this.authority = authority || _empty;
166
			this.path = _referenceResolution(this.scheme, path || _empty);
J
Johannes Rieken 已提交
167 168
			this.query = query || _empty;
			this.fragment = fragment || _empty;
169

J
Johannes Rieken 已提交
170 171
			_validateUri(this);
		}
E
Erich Gamma 已提交
172 173 174 175 176
	}

	// ---- filesystem path -----------------------

	/**
P
Pascal Borreli 已提交
177
	 * Returns a string representing the corresponding file system path of this URI.
E
Erich Gamma 已提交
178 179 180 181
	 * Will handle UNC paths and normalize windows drive letters to lower-case. Also
	 * uses the platform specific path separator. Will *not* validate the path for
	 * invalid characters and semantics. Will *not* look at the scheme of this URI.
	 */
J
Johannes Rieken 已提交
182
	get fsPath(): string {
183
		return _makeFsPath(this);
E
Erich Gamma 已提交
184 185 186 187
	}

	// ---- modify to new -------------------------

188
	public with(change: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
189 190 191 192 193

		if (!change) {
			return this;
		}

194
		let { scheme, authority, path, query, fragment } = change;
195 196
		if (scheme === void 0) {
			scheme = this.scheme;
J
Johannes Rieken 已提交
197
		} else if (scheme === null) {
J
Johannes Rieken 已提交
198
			scheme = _empty;
199 200 201
		}
		if (authority === void 0) {
			authority = this.authority;
J
Johannes Rieken 已提交
202
		} else if (authority === null) {
J
Johannes Rieken 已提交
203
			authority = _empty;
204 205 206
		}
		if (path === void 0) {
			path = this.path;
J
Johannes Rieken 已提交
207
		} else if (path === null) {
J
Johannes Rieken 已提交
208
			path = _empty;
209 210 211
		}
		if (query === void 0) {
			query = this.query;
J
Johannes Rieken 已提交
212
		} else if (query === null) {
J
Johannes Rieken 已提交
213
			query = _empty;
214 215 216
		}
		if (fragment === void 0) {
			fragment = this.fragment;
J
Johannes Rieken 已提交
217
		} else if (fragment === null) {
J
Johannes Rieken 已提交
218
			fragment = _empty;
219
		}
220 221 222 223 224 225 226 227 228 229

		if (scheme === this.scheme
			&& authority === this.authority
			&& path === this.path
			&& query === this.query
			&& fragment === this.fragment) {

			return this;
		}

230
		return new _URI(scheme, authority, path, query, fragment);
E
Erich Gamma 已提交
231 232 233 234 235
	}

	// ---- parse & validate ------------------------

	public static parse(value: string): URI {
236
		const match = _regexp.exec(value);
J
Johannes Rieken 已提交
237
		if (!match) {
238
			return new _URI(_empty, _empty, _empty, _empty, _empty);
J
Johannes Rieken 已提交
239
		}
240
		return new _URI(
241 242 243 244 245
			match[2] || _empty,
			decodeURIComponent(match[4] || _empty),
			decodeURIComponent(match[5] || _empty),
			decodeURIComponent(match[7] || _empty),
			decodeURIComponent(match[9] || _empty),
J
Johannes Rieken 已提交
246
		);
E
Erich Gamma 已提交
247 248 249 250
	}

	public static file(path: string): URI {

251
		let authority = _empty;
252

253
		// normalize to fwd-slashes on windows,
A
typo  
Andre Weinand 已提交
254
		// on other systems bwd-slashes are valid
255 256
		// filename character, eg /f\oo/ba\r.txt
		if (platform.isWindows) {
257
			path = path.replace(/\\/g, _slash);
258
		}
259 260 261

		// check for authority as used in UNC shares
		// or use the path as given
J
Johannes Rieken 已提交
262
		if (path[0] === _slash && path[1] === _slash) {
263
			let idx = path.indexOf(_slash, 2);
264
			if (idx === -1) {
J
Johannes Rieken 已提交
265
				authority = path.substring(2);
J
Johannes Rieken 已提交
266
				path = _slash;
267
			} else {
J
Johannes Rieken 已提交
268
				authority = path.substring(2, idx);
J
Johannes Rieken 已提交
269
				path = path.substring(idx) || _slash;
270 271 272
			}
		}

273
		return new _URI('file', authority, path, _empty, _empty);
E
Erich Gamma 已提交
274 275
	}

276
	public static from(components: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string }): URI {
277
		return new _URI(
J
Johannes Rieken 已提交
278 279 280 281 282 283
			components.scheme,
			components.authority,
			components.path,
			components.query,
			components.fragment,
		);
E
Erich Gamma 已提交
284 285 286 287
	}

	// ---- printing/externalize ---------------------------

288
	/**
289
	 *
290
	 * @param skipEncoding Do not encode the result, default is `false`
291
	 */
292
	public toString(skipEncoding: boolean = false): string {
293
		return _asFormatted(this, skipEncoding);
294 295
	}

J
Johannes Rieken 已提交
296
	public toJSON(): object {
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
		const res = <UriState>{
			$mid: 1,
			fsPath: this.fsPath,
			external: this.toString(),
		};

		if (this.path) {
			res.path = this.path;
		}

		if (this.scheme) {
			res.scheme = this.scheme;
		}

		if (this.authority) {
			res.authority = this.authority;
		}

		if (this.query) {
			res.query = this.query;
		}

		if (this.fragment) {
			res.fragment = this.fragment;
		}

		return res;
	}

J
Johannes Rieken 已提交
326
	static revive(data: UriComponents | any): URI {
J
Johannes Rieken 已提交
327 328 329 330 331 332 333 334 335 336
		if (!data) {
			return data;
		} else if (data instanceof URI) {
			return data;
		} else {
			let result = new _URI(data);
			result._fsPath = (<UriState>data).fsPath;
			result._formatted = (<UriState>data).external;
			return result;
		}
337 338 339
	}
}

J
Johannes Rieken 已提交
340
export interface UriComponents {
341 342 343 344 345 346 347 348 349 350 351 352 353
	scheme: string;
	authority: string;
	path: string;
	query: string;
	fragment: string;
}

interface UriState extends UriComponents {
	$mid: number;
	fsPath: string;
	external: string;
}

354

355 356
// tslint:disable-next-line:class-name
class _URI extends URI {
357 358 359 360 361 362

	_formatted: string = null;
	_fsPath: string = null;

	get fsPath(): string {
		if (!this._fsPath) {
363
			this._fsPath = _makeFsPath(this);
364 365 366 367
		}
		return this._fsPath;
	}

368 369 370
	public toString(skipEncoding: boolean = false): string {
		if (!skipEncoding) {
			if (!this._formatted) {
371
				this._formatted = _asFormatted(this, false);
372 373 374 375
			}
			return this._formatted;
		} else {
			// we don't cache that
376
			return _asFormatted(this, true);
J
Johannes Rieken 已提交
377 378
		}
	}
379 380
}

J
Johannes Rieken 已提交
381

382 383 384 385 386 387 388
/**
 * Compute `fsPath` for the given uri
 * @param uri
 */
function _makeFsPath(uri: URI): string {

	let value: string;
389
	if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
390 391 392 393 394 395 396 397 398 399 400 401 402 403
		// unc path: file://shares/c$/far/boo
		value = `//${uri.authority}${uri.path}`;
	} else if (_driveLetterPath.test(uri.path)) {
		// windows drive letter: file:///c:/far/boo
		value = uri.path[1].toLowerCase() + uri.path.substr(2);
	} else {
		// other path
		value = uri.path;
	}
	if (platform.isWindows) {
		value = value.replace(/\//g, '\\');
	}
	return value;
}
404

405 406 407 408
/**
 * Create the external version of a uri
 */
function _asFormatted(uri: URI, skipEncoding: boolean): string {
409

410 411 412
	const encoder = !skipEncoding
		? encodeURIComponent2
		: encodeNoop;
E
Erich Gamma 已提交
413

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
	const parts: string[] = [];

	let { scheme, authority, path, query, fragment } = uri;
	if (scheme) {
		parts.push(scheme, ':');
	}
	if (authority || scheme === 'file') {
		parts.push('//');
	}
	if (authority) {
		let idx = authority.indexOf('@');
		if (idx !== -1) {
			const userinfo = authority.substr(0, idx);
			authority = authority.substr(idx + 1);
			idx = userinfo.indexOf(':');
J
Johannes Rieken 已提交
429
			if (idx === -1) {
430
				parts.push(encoder(userinfo));
J
Johannes Rieken 已提交
431
			} else {
432
				parts.push(encoder(userinfo.substr(0, idx)), ':', encoder(userinfo.substr(idx + 1)));
E
Erich Gamma 已提交
433
			}
434
			parts.push('@');
J
Johannes Rieken 已提交
435
		}
436 437 438 439 440 441
		authority = authority.toLowerCase();
		idx = authority.indexOf(':');
		if (idx === -1) {
			parts.push(encoder(authority));
		} else {
			parts.push(encoder(authority.substr(0, idx)), authority.substr(idx));
J
Johannes Rieken 已提交
442
		}
443 444 445 446 447 448 449 450 451 452
	}
	if (path) {
		// lower-case windows drive letters in /C:/fff or C:/fff
		const m = _upperCaseDrive.exec(path);
		if (m) {
			if (m[1]) {
				path = '/' + m[2].toLowerCase() + path.substr(3); // "/c:".length === 3
			} else {
				path = m[2].toLowerCase() + path.substr(2); // // "c:".length === 2
			}
J
Johannes Rieken 已提交
453 454
		}

455 456 457 458 459 460 461 462 463 464 465 466 467
		// encode every segement but not slashes
		// make sure that # and ? are always encoded
		// when occurring in paths - otherwise the result
		// cannot be parsed back again
		let lastIdx = 0;
		while (true) {
			let idx = path.indexOf(_slash, lastIdx);
			if (idx === -1) {
				parts.push(encoder(path.substring(lastIdx)));
				break;
			}
			parts.push(encoder(path.substring(lastIdx, idx)), _slash);
			lastIdx = idx + 1;
468
		}
469 470 471 472 473 474
	}
	if (query) {
		parts.push('?', encoder(query));
	}
	if (fragment) {
		parts.push('#', encoder(fragment));
E
Erich Gamma 已提交
475
	}
476

477
	return parts.join(_empty);
E
Erich Gamma 已提交
478
}