uri.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as paths from './path.js';
  6. import { isWindows } from './platform.js';
  7. const _schemePattern = /^\w[\w\d+.-]*$/;
  8. const _singleSlashStart = /^\//;
  9. const _doubleSlashStart = /^\/\//;
  10. function _validateUri(ret, _strict) {
  11. // scheme, must be set
  12. if (!ret.scheme && _strict) {
  13. throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${ret.authority}", path: "${ret.path}", query: "${ret.query}", fragment: "${ret.fragment}"}`);
  14. }
  15. // scheme, https://tools.ietf.org/html/rfc3986#section-3.1
  16. // ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  17. if (ret.scheme && !_schemePattern.test(ret.scheme)) {
  18. throw new Error('[UriError]: Scheme contains illegal characters.');
  19. }
  20. // path, http://tools.ietf.org/html/rfc3986#section-3.3
  21. // If a URI contains an authority component, then the path component
  22. // must either be empty or begin with a slash ("/") character. If a URI
  23. // does not contain an authority component, then the path cannot begin
  24. // with two slash characters ("//").
  25. if (ret.path) {
  26. if (ret.authority) {
  27. if (!_singleSlashStart.test(ret.path)) {
  28. throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
  29. }
  30. }
  31. else {
  32. if (_doubleSlashStart.test(ret.path)) {
  33. throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
  34. }
  35. }
  36. }
  37. }
  38. // for a while we allowed uris *without* schemes and this is the migration
  39. // for them, e.g. an uri without scheme and without strict-mode warns and falls
  40. // back to the file-scheme. that should cause the least carnage and still be a
  41. // clear warning
  42. function _schemeFix(scheme, _strict) {
  43. if (!scheme && !_strict) {
  44. return 'file';
  45. }
  46. return scheme;
  47. }
  48. // implements a bit of https://tools.ietf.org/html/rfc3986#section-5
  49. function _referenceResolution(scheme, path) {
  50. // the slash-character is our 'default base' as we don't
  51. // support constructing URIs relative to other URIs. This
  52. // also means that we alter and potentially break paths.
  53. // see https://tools.ietf.org/html/rfc3986#section-5.1.4
  54. switch (scheme) {
  55. case 'https':
  56. case 'http':
  57. case 'file':
  58. if (!path) {
  59. path = _slash;
  60. }
  61. else if (path[0] !== _slash) {
  62. path = _slash + path;
  63. }
  64. break;
  65. }
  66. return path;
  67. }
  68. const _empty = '';
  69. const _slash = '/';
  70. const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
  71. /**
  72. * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
  73. * This class is a simple parser which creates the basic component parts
  74. * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
  75. * and encoding.
  76. *
  77. * ```txt
  78. * foo://example.com:8042/over/there?name=ferret#nose
  79. * \_/ \______________/\_________/ \_________/ \__/
  80. * | | | | |
  81. * scheme authority path query fragment
  82. * | _____________________|__
  83. * / \ / \
  84. * urn:example:animal:ferret:nose
  85. * ```
  86. */
  87. export class URI {
  88. /**
  89. * @internal
  90. */
  91. constructor(schemeOrData, authority, path, query, fragment, _strict = false) {
  92. if (typeof schemeOrData === 'object') {
  93. this.scheme = schemeOrData.scheme || _empty;
  94. this.authority = schemeOrData.authority || _empty;
  95. this.path = schemeOrData.path || _empty;
  96. this.query = schemeOrData.query || _empty;
  97. this.fragment = schemeOrData.fragment || _empty;
  98. // no validation because it's this URI
  99. // that creates uri components.
  100. // _validateUri(this);
  101. }
  102. else {
  103. this.scheme = _schemeFix(schemeOrData, _strict);
  104. this.authority = authority || _empty;
  105. this.path = _referenceResolution(this.scheme, path || _empty);
  106. this.query = query || _empty;
  107. this.fragment = fragment || _empty;
  108. _validateUri(this, _strict);
  109. }
  110. }
  111. static isUri(thing) {
  112. if (thing instanceof URI) {
  113. return true;
  114. }
  115. if (!thing) {
  116. return false;
  117. }
  118. return typeof thing.authority === 'string'
  119. && typeof thing.fragment === 'string'
  120. && typeof thing.path === 'string'
  121. && typeof thing.query === 'string'
  122. && typeof thing.scheme === 'string'
  123. && typeof thing.fsPath === 'string'
  124. && typeof thing.with === 'function'
  125. && typeof thing.toString === 'function';
  126. }
  127. // ---- filesystem path -----------------------
  128. /**
  129. * Returns a string representing the corresponding file system path of this URI.
  130. * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
  131. * platform specific path separator.
  132. *
  133. * * Will *not* validate the path for invalid characters and semantics.
  134. * * Will *not* look at the scheme of this URI.
  135. * * The result shall *not* be used for display purposes but for accessing a file on disk.
  136. *
  137. *
  138. * The *difference* to `URI#path` is the use of the platform specific separator and the handling
  139. * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
  140. *
  141. * ```ts
  142. const u = URI.parse('file://server/c$/folder/file.txt')
  143. u.authority === 'server'
  144. u.path === '/shares/c$/file.txt'
  145. u.fsPath === '\\server\c$\folder\file.txt'
  146. ```
  147. *
  148. * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
  149. * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
  150. * with URIs that represent files on disk (`file` scheme).
  151. */
  152. get fsPath() {
  153. // if (this.scheme !== 'file') {
  154. // console.warn(`[UriError] calling fsPath with scheme ${this.scheme}`);
  155. // }
  156. return uriToFsPath(this, false);
  157. }
  158. // ---- modify to new -------------------------
  159. with(change) {
  160. if (!change) {
  161. return this;
  162. }
  163. let { scheme, authority, path, query, fragment } = change;
  164. if (scheme === undefined) {
  165. scheme = this.scheme;
  166. }
  167. else if (scheme === null) {
  168. scheme = _empty;
  169. }
  170. if (authority === undefined) {
  171. authority = this.authority;
  172. }
  173. else if (authority === null) {
  174. authority = _empty;
  175. }
  176. if (path === undefined) {
  177. path = this.path;
  178. }
  179. else if (path === null) {
  180. path = _empty;
  181. }
  182. if (query === undefined) {
  183. query = this.query;
  184. }
  185. else if (query === null) {
  186. query = _empty;
  187. }
  188. if (fragment === undefined) {
  189. fragment = this.fragment;
  190. }
  191. else if (fragment === null) {
  192. fragment = _empty;
  193. }
  194. if (scheme === this.scheme
  195. && authority === this.authority
  196. && path === this.path
  197. && query === this.query
  198. && fragment === this.fragment) {
  199. return this;
  200. }
  201. return new Uri(scheme, authority, path, query, fragment);
  202. }
  203. // ---- parse & validate ------------------------
  204. /**
  205. * Creates a new URI from a string, e.g. `http://www.msft.com/some/path`,
  206. * `file:///usr/home`, or `scheme:with/path`.
  207. *
  208. * @param value A string which represents an URI (see `URI#toString`).
  209. */
  210. static parse(value, _strict = false) {
  211. const match = _regexp.exec(value);
  212. if (!match) {
  213. return new Uri(_empty, _empty, _empty, _empty, _empty);
  214. }
  215. return new Uri(match[2] || _empty, percentDecode(match[4] || _empty), percentDecode(match[5] || _empty), percentDecode(match[7] || _empty), percentDecode(match[9] || _empty), _strict);
  216. }
  217. /**
  218. * Creates a new URI from a file system path, e.g. `c:\my\files`,
  219. * `/usr/home`, or `\\server\share\some\path`.
  220. *
  221. * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
  222. * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
  223. * `URI.parse('file://' + path)` because the path might contain characters that are
  224. * interpreted (# and ?). See the following sample:
  225. * ```ts
  226. const good = URI.file('/coding/c#/project1');
  227. good.scheme === 'file';
  228. good.path === '/coding/c#/project1';
  229. good.fragment === '';
  230. const bad = URI.parse('file://' + '/coding/c#/project1');
  231. bad.scheme === 'file';
  232. bad.path === '/coding/c'; // path is now broken
  233. bad.fragment === '/project1';
  234. ```
  235. *
  236. * @param path A file system path (see `URI#fsPath`)
  237. */
  238. static file(path) {
  239. let authority = _empty;
  240. // normalize to fwd-slashes on windows,
  241. // on other systems bwd-slashes are valid
  242. // filename character, eg /f\oo/ba\r.txt
  243. if (isWindows) {
  244. path = path.replace(/\\/g, _slash);
  245. }
  246. // check for authority as used in UNC shares
  247. // or use the path as given
  248. if (path[0] === _slash && path[1] === _slash) {
  249. const idx = path.indexOf(_slash, 2);
  250. if (idx === -1) {
  251. authority = path.substring(2);
  252. path = _slash;
  253. }
  254. else {
  255. authority = path.substring(2, idx);
  256. path = path.substring(idx) || _slash;
  257. }
  258. }
  259. return new Uri('file', authority, path, _empty, _empty);
  260. }
  261. static from(components) {
  262. const result = new Uri(components.scheme, components.authority, components.path, components.query, components.fragment);
  263. _validateUri(result, true);
  264. return result;
  265. }
  266. /**
  267. * Join a URI path with path fragments and normalizes the resulting path.
  268. *
  269. * @param uri The input URI.
  270. * @param pathFragment The path fragment to add to the URI path.
  271. * @returns The resulting URI.
  272. */
  273. static joinPath(uri, ...pathFragment) {
  274. if (!uri.path) {
  275. throw new Error(`[UriError]: cannot call joinPath on URI without path`);
  276. }
  277. let newPath;
  278. if (isWindows && uri.scheme === 'file') {
  279. newPath = URI.file(paths.win32.join(uriToFsPath(uri, true), ...pathFragment)).path;
  280. }
  281. else {
  282. newPath = paths.posix.join(uri.path, ...pathFragment);
  283. }
  284. return uri.with({ path: newPath });
  285. }
  286. // ---- printing/externalize ---------------------------
  287. /**
  288. * Creates a string representation for this URI. It's guaranteed that calling
  289. * `URI.parse` with the result of this function creates an URI which is equal
  290. * to this URI.
  291. *
  292. * * The result shall *not* be used for display purposes but for externalization or transport.
  293. * * The result will be encoded using the percentage encoding and encoding happens mostly
  294. * ignore the scheme-specific encoding rules.
  295. *
  296. * @param skipEncoding Do not encode the result, default is `false`
  297. */
  298. toString(skipEncoding = false) {
  299. return _asFormatted(this, skipEncoding);
  300. }
  301. toJSON() {
  302. return this;
  303. }
  304. static revive(data) {
  305. if (!data) {
  306. return data;
  307. }
  308. else if (data instanceof URI) {
  309. return data;
  310. }
  311. else {
  312. const result = new Uri(data);
  313. result._formatted = data.external;
  314. result._fsPath = data._sep === _pathSepMarker ? data.fsPath : null;
  315. return result;
  316. }
  317. }
  318. }
  319. const _pathSepMarker = isWindows ? 1 : undefined;
  320. // This class exists so that URI is compatible with vscode.Uri (API).
  321. class Uri extends URI {
  322. constructor() {
  323. super(...arguments);
  324. this._formatted = null;
  325. this._fsPath = null;
  326. }
  327. get fsPath() {
  328. if (!this._fsPath) {
  329. this._fsPath = uriToFsPath(this, false);
  330. }
  331. return this._fsPath;
  332. }
  333. toString(skipEncoding = false) {
  334. if (!skipEncoding) {
  335. if (!this._formatted) {
  336. this._formatted = _asFormatted(this, false);
  337. }
  338. return this._formatted;
  339. }
  340. else {
  341. // we don't cache that
  342. return _asFormatted(this, true);
  343. }
  344. }
  345. toJSON() {
  346. const res = {
  347. $mid: 1 /* Uri */
  348. };
  349. // cached state
  350. if (this._fsPath) {
  351. res.fsPath = this._fsPath;
  352. res._sep = _pathSepMarker;
  353. }
  354. if (this._formatted) {
  355. res.external = this._formatted;
  356. }
  357. // uri components
  358. if (this.path) {
  359. res.path = this.path;
  360. }
  361. if (this.scheme) {
  362. res.scheme = this.scheme;
  363. }
  364. if (this.authority) {
  365. res.authority = this.authority;
  366. }
  367. if (this.query) {
  368. res.query = this.query;
  369. }
  370. if (this.fragment) {
  371. res.fragment = this.fragment;
  372. }
  373. return res;
  374. }
  375. }
  376. // reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
  377. const encodeTable = {
  378. [58 /* Colon */]: '%3A',
  379. [47 /* Slash */]: '%2F',
  380. [63 /* QuestionMark */]: '%3F',
  381. [35 /* Hash */]: '%23',
  382. [91 /* OpenSquareBracket */]: '%5B',
  383. [93 /* CloseSquareBracket */]: '%5D',
  384. [64 /* AtSign */]: '%40',
  385. [33 /* ExclamationMark */]: '%21',
  386. [36 /* DollarSign */]: '%24',
  387. [38 /* Ampersand */]: '%26',
  388. [39 /* SingleQuote */]: '%27',
  389. [40 /* OpenParen */]: '%28',
  390. [41 /* CloseParen */]: '%29',
  391. [42 /* Asterisk */]: '%2A',
  392. [43 /* Plus */]: '%2B',
  393. [44 /* Comma */]: '%2C',
  394. [59 /* Semicolon */]: '%3B',
  395. [61 /* Equals */]: '%3D',
  396. [32 /* Space */]: '%20',
  397. };
  398. function encodeURIComponentFast(uriComponent, allowSlash) {
  399. let res = undefined;
  400. let nativeEncodePos = -1;
  401. for (let pos = 0; pos < uriComponent.length; pos++) {
  402. const code = uriComponent.charCodeAt(pos);
  403. // unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
  404. if ((code >= 97 /* a */ && code <= 122 /* z */)
  405. || (code >= 65 /* A */ && code <= 90 /* Z */)
  406. || (code >= 48 /* Digit0 */ && code <= 57 /* Digit9 */)
  407. || code === 45 /* Dash */
  408. || code === 46 /* Period */
  409. || code === 95 /* Underline */
  410. || code === 126 /* Tilde */
  411. || (allowSlash && code === 47 /* Slash */)) {
  412. // check if we are delaying native encode
  413. if (nativeEncodePos !== -1) {
  414. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  415. nativeEncodePos = -1;
  416. }
  417. // check if we write into a new string (by default we try to return the param)
  418. if (res !== undefined) {
  419. res += uriComponent.charAt(pos);
  420. }
  421. }
  422. else {
  423. // encoding needed, we need to allocate a new string
  424. if (res === undefined) {
  425. res = uriComponent.substr(0, pos);
  426. }
  427. // check with default table first
  428. const escaped = encodeTable[code];
  429. if (escaped !== undefined) {
  430. // check if we are delaying native encode
  431. if (nativeEncodePos !== -1) {
  432. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  433. nativeEncodePos = -1;
  434. }
  435. // append escaped variant to result
  436. res += escaped;
  437. }
  438. else if (nativeEncodePos === -1) {
  439. // use native encode only when needed
  440. nativeEncodePos = pos;
  441. }
  442. }
  443. }
  444. if (nativeEncodePos !== -1) {
  445. res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
  446. }
  447. return res !== undefined ? res : uriComponent;
  448. }
  449. function encodeURIComponentMinimal(path) {
  450. let res = undefined;
  451. for (let pos = 0; pos < path.length; pos++) {
  452. const code = path.charCodeAt(pos);
  453. if (code === 35 /* Hash */ || code === 63 /* QuestionMark */) {
  454. if (res === undefined) {
  455. res = path.substr(0, pos);
  456. }
  457. res += encodeTable[code];
  458. }
  459. else {
  460. if (res !== undefined) {
  461. res += path[pos];
  462. }
  463. }
  464. }
  465. return res !== undefined ? res : path;
  466. }
  467. /**
  468. * Compute `fsPath` for the given uri
  469. */
  470. export function uriToFsPath(uri, keepDriveLetterCasing) {
  471. let value;
  472. if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
  473. // unc path: file://shares/c$/far/boo
  474. value = `//${uri.authority}${uri.path}`;
  475. }
  476. else if (uri.path.charCodeAt(0) === 47 /* Slash */
  477. && (uri.path.charCodeAt(1) >= 65 /* A */ && uri.path.charCodeAt(1) <= 90 /* Z */ || uri.path.charCodeAt(1) >= 97 /* a */ && uri.path.charCodeAt(1) <= 122 /* z */)
  478. && uri.path.charCodeAt(2) === 58 /* Colon */) {
  479. if (!keepDriveLetterCasing) {
  480. // windows drive letter: file:///c:/far/boo
  481. value = uri.path[1].toLowerCase() + uri.path.substr(2);
  482. }
  483. else {
  484. value = uri.path.substr(1);
  485. }
  486. }
  487. else {
  488. // other path
  489. value = uri.path;
  490. }
  491. if (isWindows) {
  492. value = value.replace(/\//g, '\\');
  493. }
  494. return value;
  495. }
  496. /**
  497. * Create the external version of a uri
  498. */
  499. function _asFormatted(uri, skipEncoding) {
  500. const encoder = !skipEncoding
  501. ? encodeURIComponentFast
  502. : encodeURIComponentMinimal;
  503. let res = '';
  504. let { scheme, authority, path, query, fragment } = uri;
  505. if (scheme) {
  506. res += scheme;
  507. res += ':';
  508. }
  509. if (authority || scheme === 'file') {
  510. res += _slash;
  511. res += _slash;
  512. }
  513. if (authority) {
  514. let idx = authority.indexOf('@');
  515. if (idx !== -1) {
  516. // <user>@<auth>
  517. const userinfo = authority.substr(0, idx);
  518. authority = authority.substr(idx + 1);
  519. idx = userinfo.indexOf(':');
  520. if (idx === -1) {
  521. res += encoder(userinfo, false);
  522. }
  523. else {
  524. // <user>:<pass>@<auth>
  525. res += encoder(userinfo.substr(0, idx), false);
  526. res += ':';
  527. res += encoder(userinfo.substr(idx + 1), false);
  528. }
  529. res += '@';
  530. }
  531. authority = authority.toLowerCase();
  532. idx = authority.indexOf(':');
  533. if (idx === -1) {
  534. res += encoder(authority, false);
  535. }
  536. else {
  537. // <auth>:<port>
  538. res += encoder(authority.substr(0, idx), false);
  539. res += authority.substr(idx);
  540. }
  541. }
  542. if (path) {
  543. // lower-case windows drive letters in /C:/fff or C:/fff
  544. if (path.length >= 3 && path.charCodeAt(0) === 47 /* Slash */ && path.charCodeAt(2) === 58 /* Colon */) {
  545. const code = path.charCodeAt(1);
  546. if (code >= 65 /* A */ && code <= 90 /* Z */) {
  547. path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
  548. }
  549. }
  550. else if (path.length >= 2 && path.charCodeAt(1) === 58 /* Colon */) {
  551. const code = path.charCodeAt(0);
  552. if (code >= 65 /* A */ && code <= 90 /* Z */) {
  553. path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
  554. }
  555. }
  556. // encode the rest of the path
  557. res += encoder(path, true);
  558. }
  559. if (query) {
  560. res += '?';
  561. res += encoder(query, false);
  562. }
  563. if (fragment) {
  564. res += '#';
  565. res += !skipEncoding ? encodeURIComponentFast(fragment, false) : fragment;
  566. }
  567. return res;
  568. }
  569. // --- decode
  570. function decodeURIComponentGraceful(str) {
  571. try {
  572. return decodeURIComponent(str);
  573. }
  574. catch (_a) {
  575. if (str.length > 3) {
  576. return str.substr(0, 3) + decodeURIComponentGraceful(str.substr(3));
  577. }
  578. else {
  579. return str;
  580. }
  581. }
  582. }
  583. const _rEncodedAsHex = /(%[0-9A-Za-z][0-9A-Za-z])+/g;
  584. function percentDecode(str) {
  585. if (!str.match(_rEncodedAsHex)) {
  586. return str;
  587. }
  588. return str.replace(_rEncodedAsHex, (match) => decodeURIComponentGraceful(match));
  589. }