/*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ 'use strict'; import { isWindows } from 'vs/base/common/platform'; import { CharCode } from 'vs/base/common/charCode'; const _schemePattern = /^\w[\w\d+.-]*$/; const _singleSlashStart = /^\//; const _doubleSlashStart = /^\/\//; function _validateUri(ret: URI): void { // // scheme, must be set // if (!ret.scheme) { // throw new Error('[UriError]: Scheme is missing.'); // } // scheme, https://tools.ietf.org/html/rfc3986#section-3.1 // ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) if (ret.scheme && !_schemePattern.test(ret.scheme)) { throw new Error('[UriError]: Scheme contains illegal characters.'); } // path, http://tools.ietf.org/html/rfc3986#section-3.3 // If a URI contains an authority component, then the path component // must either be empty or begin with a slash ("/") character. If a URI // does not contain an authority component, then the path cannot begin // with two slash characters ("//"). if (ret.path) { if (ret.authority) { if (!_singleSlashStart.test(ret.path)) { throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character'); } } else { if (_doubleSlashStart.test(ret.path)) { throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")'); } } } } // implements a bit of https://tools.ietf.org/html/rfc3986#section-5 function _referenceResolution(scheme: string, path: string): string { // the slash-character is our 'default base' as we don't // support constructing URIs relative to other URIs. This // also means that we alter and potentially break paths. // see https://tools.ietf.org/html/rfc3986#section-5.1.4 switch (scheme) { case 'https': case 'http': case 'file': if (!path) { path = _slash; } else if (path[0] !== _slash) { path = _slash + path; } break; } return path; } const _empty = ''; const _slash = '/'; const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/; /** * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986. * This class is a simple parser which creates the basic component parts * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation * and encoding. * * foo://example.com:8042/over/there?name=ferret#nose * \_/ \______________/\_________/ \_________/ \__/ * | | | | | * scheme authority path query fragment * | _____________________|__ * / \ / \ * urn:example:animal:ferret:nose */ export default class URI implements UriComponents { static isUri(thing: any): thing is URI { if (thing instanceof URI) { return true; } if (!thing) { return false; } return typeof (thing).authority === 'string' && typeof (thing).fragment === 'string' && typeof (thing).path === 'string' && typeof (thing).query === 'string' && typeof (thing).scheme === 'string'; } /** * scheme is the 'http' part of 'http://www.msft.com/some/path?query#fragment'. * The part before the first colon. */ readonly scheme: string; /** * authority is the 'www.msft.com' part of 'http://www.msft.com/some/path?query#fragment'. * The part between the first double slashes and the next slash. */ readonly authority: string; /** * path is the '/some/path' part of 'http://www.msft.com/some/path?query#fragment'. */ readonly path: string; /** * query is the 'query' part of 'http://www.msft.com/some/path?query#fragment'. */ readonly query: string; /** * fragment is the 'fragment' part of 'http://www.msft.com/some/path?query#fragment'. */ readonly fragment: string; /** * @internal */ protected constructor(scheme: string, authority: string, path: string, query: string, fragment: string); /** * @internal */ protected constructor(components: UriComponents); /** * @internal */ protected constructor(schemeOrData: string | UriComponents, authority?: string, path?: string, query?: string, fragment?: string) { if (typeof schemeOrData === 'object') { this.scheme = schemeOrData.scheme || _empty; this.authority = schemeOrData.authority || _empty; this.path = schemeOrData.path || _empty; this.query = schemeOrData.query || _empty; this.fragment = schemeOrData.fragment || _empty; // no validation because it's this URI // that creates uri components. // _validateUri(this); } else { this.scheme = schemeOrData || _empty; this.authority = authority || _empty; this.path = _referenceResolution(this.scheme, path || _empty); this.query = query || _empty; this.fragment = fragment || _empty; _validateUri(this); } } // ---- filesystem path ----------------------- /** * Returns a string representing the corresponding file system path of this URI. * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the * platform specific path separator. * * * Will *not* validate the path for invalid characters and semantics. * * Will *not* look at the scheme of this URI. * * The result shall *not* be used for display purposes but for accessing a file on disk. * * * The *difference* to `URI#path` is the use of the platform specific separator and the handling * of UNC paths. See the below sample of a file-uri with an authority (UNC path). * * ```ts const u = URI.parse('file://server/c$/folder/file.txt') u.authority === 'server' u.path === '/shares/c$/file.txt' u.fsPath === '\\server\c$\folder\file.txt' ``` * * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path, * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working * with URIs that represent files on disk (`file` scheme). */ get fsPath(): string { return _makeFsPath(this); } // ---- modify to new ------------------------- public with(change: { scheme?: string; authority?: string | null; path?: string | null; query?: string | null; fragment?: string | null }): URI { if (!change) { return this; } let { scheme, authority, path, query, fragment } = change; if (scheme === void 0) { scheme = this.scheme; } else if (scheme === null) { scheme = _empty; } if (authority === void 0) { authority = this.authority; } else if (authority === null) { authority = _empty; } if (path === void 0) { path = this.path; } else if (path === null) { path = _empty; } if (query === void 0) { query = this.query; } else if (query === null) { query = _empty; } if (fragment === void 0) { fragment = this.fragment; } else if (fragment === null) { fragment = _empty; } if (scheme === this.scheme && authority === this.authority && path === this.path && query === this.query && fragment === this.fragment) { return this; } return new _URI(scheme, authority, path, query, fragment); } // ---- parse & validate ------------------------ /** * Creates a new URI from a string, e.g. `http://www.msft.com/some/path`, * `file:///usr/home`, or `scheme:with/path`. * * @param value A string which represents an URI (see `URI#toString`). */ public static parse(value: string): URI { const match = _regexp.exec(value); if (!match) { return new _URI(_empty, _empty, _empty, _empty, _empty); } return new _URI( match[2] || _empty, decodeURIComponent(match[4] || _empty), decodeURIComponent(match[5] || _empty), decodeURIComponent(match[7] || _empty), decodeURIComponent(match[9] || _empty), ); } /** * Creates a new URI from a file system path, e.g. `c:\my\files`, * `/usr/home`, or `\\server\share\some\path`. * * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as** * `URI.parse('file://' + path)` because the path might contain characters that are * interpreted (# and ?). See the following sample: * ```ts const good = URI.file('/coding/c#/project1'); good.scheme === 'file'; good.path === '/coding/c#/project1'; good.fragment === ''; const bad = URI.parse('file://' + '/coding/c#/project1'); bad.scheme === 'file'; bad.path === '/coding/c'; // path is now broken bad.fragment === '/project1'; ``` * * @param path A file system path (see `URI#fsPath`) */ public static file(path: string): URI { let authority = _empty; // normalize to fwd-slashes on windows, // on other systems bwd-slashes are valid // filename character, eg /f\oo/ba\r.txt if (isWindows) { path = path.replace(/\\/g, _slash); } // check for authority as used in UNC shares // or use the path as given if (path[0] === _slash && path[1] === _slash) { let idx = path.indexOf(_slash, 2); if (idx === -1) { authority = path.substring(2); path = _slash; } else { authority = path.substring(2, idx); path = path.substring(idx) || _slash; } } return new _URI('file', authority, path, _empty, _empty); } public static from(components: { scheme: string; authority?: string; path?: string; query?: string; fragment?: string }): URI { return new _URI( components.scheme, components.authority, components.path, components.query, components.fragment, ); } // ---- printing/externalize --------------------------- /** * Creates a string presentation for this URI. It's guardeed that calling * `URI.parse` with the result of this function creates an URI which is equal * to this URI. * * * The result shall *not* be used for display purposes but for externalization or transport. * * The result will be encoded using the percentage encoding and encoding happens mostly * ignore the scheme-specific encoding rules. * * @param skipEncoding Do not encode the result, default is `false` */ public toString(skipEncoding: boolean = false): string { return _asFormatted(this, skipEncoding); } public toJSON(): object { return this; } static revive(data: UriComponents | any): URI { if (!data) { return data; } else if (data instanceof URI) { return data; } else { let result = new _URI(data); result._fsPath = (data).fsPath; result._formatted = (data).external; return result; } } } export interface UriComponents { scheme: string; authority: string; path: string; query: string; fragment: string; } interface UriState extends UriComponents { $mid: number; fsPath: string; external: string; } // tslint:disable-next-line:class-name class _URI extends URI { _formatted: string = null; _fsPath: string = null; get fsPath(): string { if (!this._fsPath) { this._fsPath = _makeFsPath(this); } return this._fsPath; } public toString(skipEncoding: boolean = false): string { if (!skipEncoding) { if (!this._formatted) { this._formatted = _asFormatted(this, false); } return this._formatted; } else { // we don't cache that return _asFormatted(this, true); } } toJSON(): object { const res = { $mid: 1 }; // cached state if (this._fsPath) { res.fsPath = this._fsPath; } if (this._formatted) { res.external = this._formatted; } // uri components if (this.path) { res.path = this.path; } if (this.scheme) { res.scheme = this.scheme; } if (this.authority) { res.authority = this.authority; } if (this.query) { res.query = this.query; } if (this.fragment) { res.fragment = this.fragment; } return res; } } // reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2 const encodeTable: { [ch: number]: string } = { [CharCode.Colon]: '%3A', // gen-delims [CharCode.Slash]: '%2F', [CharCode.QuestionMark]: '%3F', [CharCode.Hash]: '%23', [CharCode.OpenSquareBracket]: '%5B', [CharCode.CloseSquareBracket]: '%5D', [CharCode.AtSign]: '%40', [CharCode.ExclamationMark]: '%21', // sub-delims [CharCode.DollarSign]: '%24', [CharCode.Ampersand]: '%26', [CharCode.SingleQuote]: '%27', [CharCode.OpenParen]: '%28', [CharCode.CloseParen]: '%29', [CharCode.Asterisk]: '%2A', [CharCode.Plus]: '%2B', [CharCode.Comma]: '%2C', [CharCode.Semicolon]: '%3B', [CharCode.Equals]: '%3D', [CharCode.Space]: '%20', }; function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string { let res: string = undefined; let nativeEncodePos = -1; for (let pos = 0; pos < uriComponent.length; pos++) { let code = uriComponent.charCodeAt(pos); // unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3 if ( (code >= CharCode.a && code <= CharCode.z) || (code >= CharCode.A && code <= CharCode.Z) || (code >= CharCode.Digit0 && code <= CharCode.Digit9) || code === CharCode.Dash || code === CharCode.Period || code === CharCode.Underline || code === CharCode.Tilde || (allowSlash && code === CharCode.Slash) ) { // check if we are delaying native encode if (nativeEncodePos !== -1) { res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos)); nativeEncodePos = -1; } // check if we write into a new string (by default we try to return the param) if (res !== undefined) { res += uriComponent.charAt(pos); } } else { // encoding needed, we need to allocate a new string if (res === undefined) { res = uriComponent.substr(0, pos); } // check with default table first let escaped = encodeTable[code]; if (escaped !== undefined) { // check if we are delaying native encode if (nativeEncodePos !== -1) { res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos)); nativeEncodePos = -1; } // append escaped variant to result res += escaped; } else if (nativeEncodePos === -1) { // use native encode only when needed nativeEncodePos = pos; } } } if (nativeEncodePos !== -1) { res += encodeURIComponent(uriComponent.substring(nativeEncodePos)); } return res !== undefined ? res : uriComponent; } function encodeURIComponentMinimal(path: string): string { let res: string = undefined; for (let pos = 0; pos < path.length; pos++) { let code = path.charCodeAt(pos); if (code === CharCode.Hash || code === CharCode.QuestionMark) { if (res === undefined) { res = path.substr(0, pos); } res += encodeTable[code]; } else { if (res !== undefined) { res += path[pos]; } } } return res !== undefined ? res : path; } /** * Compute `fsPath` for the given uri * @param uri */ function _makeFsPath(uri: URI): string { let value: string; if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') { // unc path: file://shares/c$/far/boo value = `//${uri.authority}${uri.path}`; } else if ( uri.path.charCodeAt(0) === CharCode.Slash && (uri.path.charCodeAt(1) >= CharCode.A && uri.path.charCodeAt(1) <= CharCode.Z || uri.path.charCodeAt(1) >= CharCode.a && uri.path.charCodeAt(1) <= CharCode.z) && uri.path.charCodeAt(2) === CharCode.Colon ) { // windows drive letter: file:///c:/far/boo value = uri.path[1].toLowerCase() + uri.path.substr(2); } else { // other path value = uri.path; } if (isWindows) { value = value.replace(/\//g, '\\'); } return value; } /** * Create the external version of a uri */ function _asFormatted(uri: URI, skipEncoding: boolean): string { const encoder = !skipEncoding ? encodeURIComponentFast : encodeURIComponentMinimal; let res = ''; let { scheme, authority, path, query, fragment } = uri; if (scheme) { res += scheme; res += ':'; } if (authority || scheme === 'file') { res += _slash; res += _slash; } if (authority) { let idx = authority.indexOf('@'); if (idx !== -1) { // @ const userinfo = authority.substr(0, idx); authority = authority.substr(idx + 1); idx = userinfo.indexOf(':'); if (idx === -1) { res += encoder(userinfo, false); } else { // :@ res += encoder(userinfo.substr(0, idx), false); res += ':'; res += encoder(userinfo.substr(idx + 1), false); } res += '@'; } authority = authority.toLowerCase(); idx = authority.indexOf(':'); if (idx === -1) { res += encoder(authority, false); } else { // : res += encoder(authority.substr(0, idx), false); res += authority.substr(idx); } } if (path) { // lower-case windows drive letters in /C:/fff or C:/fff if (path.length >= 3 && path.charCodeAt(0) === CharCode.Slash && path.charCodeAt(2) === CharCode.Colon) { let code = path.charCodeAt(1); if (code >= CharCode.A && code <= CharCode.Z) { path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3 } } else if (path.length >= 2 && path.charCodeAt(1) === CharCode.Colon) { let code = path.charCodeAt(0); if (code >= CharCode.A && code <= CharCode.Z) { path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3 } } // encode the rest of the path res += encoder(path, true); } if (query) { res += '?'; res += encoder(query, false); } if (fragment) { res += '#'; res += !skipEncoding ? encodeURIComponentFast(fragment, false) : fragment; } return res; }