linkComputer.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import { CharacterClassifier } from '../core/characterClassifier.js';
  6. export class Uint8Matrix {
  7. constructor(rows, cols, defaultValue) {
  8. const data = new Uint8Array(rows * cols);
  9. for (let i = 0, len = rows * cols; i < len; i++) {
  10. data[i] = defaultValue;
  11. }
  12. this._data = data;
  13. this.rows = rows;
  14. this.cols = cols;
  15. }
  16. get(row, col) {
  17. return this._data[row * this.cols + col];
  18. }
  19. set(row, col, value) {
  20. this._data[row * this.cols + col] = value;
  21. }
  22. }
  23. export class StateMachine {
  24. constructor(edges) {
  25. let maxCharCode = 0;
  26. let maxState = 0 /* Invalid */;
  27. for (let i = 0, len = edges.length; i < len; i++) {
  28. let [from, chCode, to] = edges[i];
  29. if (chCode > maxCharCode) {
  30. maxCharCode = chCode;
  31. }
  32. if (from > maxState) {
  33. maxState = from;
  34. }
  35. if (to > maxState) {
  36. maxState = to;
  37. }
  38. }
  39. maxCharCode++;
  40. maxState++;
  41. let states = new Uint8Matrix(maxState, maxCharCode, 0 /* Invalid */);
  42. for (let i = 0, len = edges.length; i < len; i++) {
  43. let [from, chCode, to] = edges[i];
  44. states.set(from, chCode, to);
  45. }
  46. this._states = states;
  47. this._maxCharCode = maxCharCode;
  48. }
  49. nextState(currentState, chCode) {
  50. if (chCode < 0 || chCode >= this._maxCharCode) {
  51. return 0 /* Invalid */;
  52. }
  53. return this._states.get(currentState, chCode);
  54. }
  55. }
  56. // State machine for http:// or https:// or file://
  57. let _stateMachine = null;
  58. function getStateMachine() {
  59. if (_stateMachine === null) {
  60. _stateMachine = new StateMachine([
  61. [1 /* Start */, 104 /* h */, 2 /* H */],
  62. [1 /* Start */, 72 /* H */, 2 /* H */],
  63. [1 /* Start */, 102 /* f */, 6 /* F */],
  64. [1 /* Start */, 70 /* F */, 6 /* F */],
  65. [2 /* H */, 116 /* t */, 3 /* HT */],
  66. [2 /* H */, 84 /* T */, 3 /* HT */],
  67. [3 /* HT */, 116 /* t */, 4 /* HTT */],
  68. [3 /* HT */, 84 /* T */, 4 /* HTT */],
  69. [4 /* HTT */, 112 /* p */, 5 /* HTTP */],
  70. [4 /* HTT */, 80 /* P */, 5 /* HTTP */],
  71. [5 /* HTTP */, 115 /* s */, 9 /* BeforeColon */],
  72. [5 /* HTTP */, 83 /* S */, 9 /* BeforeColon */],
  73. [5 /* HTTP */, 58 /* Colon */, 10 /* AfterColon */],
  74. [6 /* F */, 105 /* i */, 7 /* FI */],
  75. [6 /* F */, 73 /* I */, 7 /* FI */],
  76. [7 /* FI */, 108 /* l */, 8 /* FIL */],
  77. [7 /* FI */, 76 /* L */, 8 /* FIL */],
  78. [8 /* FIL */, 101 /* e */, 9 /* BeforeColon */],
  79. [8 /* FIL */, 69 /* E */, 9 /* BeforeColon */],
  80. [9 /* BeforeColon */, 58 /* Colon */, 10 /* AfterColon */],
  81. [10 /* AfterColon */, 47 /* Slash */, 11 /* AlmostThere */],
  82. [11 /* AlmostThere */, 47 /* Slash */, 12 /* End */],
  83. ]);
  84. }
  85. return _stateMachine;
  86. }
  87. let _classifier = null;
  88. function getClassifier() {
  89. if (_classifier === null) {
  90. _classifier = new CharacterClassifier(0 /* None */);
  91. // allow-any-unicode-next-line
  92. const FORCE_TERMINATION_CHARACTERS = ' \t<>\'\"、。。、,.:;‘〈「『〔([{「」}])〕』」〉’`~…';
  93. for (let i = 0; i < FORCE_TERMINATION_CHARACTERS.length; i++) {
  94. _classifier.set(FORCE_TERMINATION_CHARACTERS.charCodeAt(i), 1 /* ForceTermination */);
  95. }
  96. const CANNOT_END_WITH_CHARACTERS = '.,;';
  97. for (let i = 0; i < CANNOT_END_WITH_CHARACTERS.length; i++) {
  98. _classifier.set(CANNOT_END_WITH_CHARACTERS.charCodeAt(i), 2 /* CannotEndIn */);
  99. }
  100. }
  101. return _classifier;
  102. }
  103. export class LinkComputer {
  104. static _createLink(classifier, line, lineNumber, linkBeginIndex, linkEndIndex) {
  105. // Do not allow to end link in certain characters...
  106. let lastIncludedCharIndex = linkEndIndex - 1;
  107. do {
  108. const chCode = line.charCodeAt(lastIncludedCharIndex);
  109. const chClass = classifier.get(chCode);
  110. if (chClass !== 2 /* CannotEndIn */) {
  111. break;
  112. }
  113. lastIncludedCharIndex--;
  114. } while (lastIncludedCharIndex > linkBeginIndex);
  115. // Handle links enclosed in parens, square brackets and curlys.
  116. if (linkBeginIndex > 0) {
  117. const charCodeBeforeLink = line.charCodeAt(linkBeginIndex - 1);
  118. const lastCharCodeInLink = line.charCodeAt(lastIncludedCharIndex);
  119. if ((charCodeBeforeLink === 40 /* OpenParen */ && lastCharCodeInLink === 41 /* CloseParen */)
  120. || (charCodeBeforeLink === 91 /* OpenSquareBracket */ && lastCharCodeInLink === 93 /* CloseSquareBracket */)
  121. || (charCodeBeforeLink === 123 /* OpenCurlyBrace */ && lastCharCodeInLink === 125 /* CloseCurlyBrace */)) {
  122. // Do not end in ) if ( is before the link start
  123. // Do not end in ] if [ is before the link start
  124. // Do not end in } if { is before the link start
  125. lastIncludedCharIndex--;
  126. }
  127. }
  128. return {
  129. range: {
  130. startLineNumber: lineNumber,
  131. startColumn: linkBeginIndex + 1,
  132. endLineNumber: lineNumber,
  133. endColumn: lastIncludedCharIndex + 2
  134. },
  135. url: line.substring(linkBeginIndex, lastIncludedCharIndex + 1)
  136. };
  137. }
  138. static computeLinks(model, stateMachine = getStateMachine()) {
  139. const classifier = getClassifier();
  140. let result = [];
  141. for (let i = 1, lineCount = model.getLineCount(); i <= lineCount; i++) {
  142. const line = model.getLineContent(i);
  143. const len = line.length;
  144. let j = 0;
  145. let linkBeginIndex = 0;
  146. let linkBeginChCode = 0;
  147. let state = 1 /* Start */;
  148. let hasOpenParens = false;
  149. let hasOpenSquareBracket = false;
  150. let inSquareBrackets = false;
  151. let hasOpenCurlyBracket = false;
  152. while (j < len) {
  153. let resetStateMachine = false;
  154. const chCode = line.charCodeAt(j);
  155. if (state === 13 /* Accept */) {
  156. let chClass;
  157. switch (chCode) {
  158. case 40 /* OpenParen */:
  159. hasOpenParens = true;
  160. chClass = 0 /* None */;
  161. break;
  162. case 41 /* CloseParen */:
  163. chClass = (hasOpenParens ? 0 /* None */ : 1 /* ForceTermination */);
  164. break;
  165. case 91 /* OpenSquareBracket */:
  166. inSquareBrackets = true;
  167. hasOpenSquareBracket = true;
  168. chClass = 0 /* None */;
  169. break;
  170. case 93 /* CloseSquareBracket */:
  171. inSquareBrackets = false;
  172. chClass = (hasOpenSquareBracket ? 0 /* None */ : 1 /* ForceTermination */);
  173. break;
  174. case 123 /* OpenCurlyBrace */:
  175. hasOpenCurlyBracket = true;
  176. chClass = 0 /* None */;
  177. break;
  178. case 125 /* CloseCurlyBrace */:
  179. chClass = (hasOpenCurlyBracket ? 0 /* None */ : 1 /* ForceTermination */);
  180. break;
  181. /* The following three rules make it that ' or " or ` are allowed inside links if the link began with a different one */
  182. case 39 /* SingleQuote */:
  183. chClass = (linkBeginChCode === 34 /* DoubleQuote */ || linkBeginChCode === 96 /* BackTick */) ? 0 /* None */ : 1 /* ForceTermination */;
  184. break;
  185. case 34 /* DoubleQuote */:
  186. chClass = (linkBeginChCode === 39 /* SingleQuote */ || linkBeginChCode === 96 /* BackTick */) ? 0 /* None */ : 1 /* ForceTermination */;
  187. break;
  188. case 96 /* BackTick */:
  189. chClass = (linkBeginChCode === 39 /* SingleQuote */ || linkBeginChCode === 34 /* DoubleQuote */) ? 0 /* None */ : 1 /* ForceTermination */;
  190. break;
  191. case 42 /* Asterisk */:
  192. // `*` terminates a link if the link began with `*`
  193. chClass = (linkBeginChCode === 42 /* Asterisk */) ? 1 /* ForceTermination */ : 0 /* None */;
  194. break;
  195. case 124 /* Pipe */:
  196. // `|` terminates a link if the link began with `|`
  197. chClass = (linkBeginChCode === 124 /* Pipe */) ? 1 /* ForceTermination */ : 0 /* None */;
  198. break;
  199. case 32 /* Space */:
  200. // ` ` allow space in between [ and ]
  201. chClass = (inSquareBrackets ? 0 /* None */ : 1 /* ForceTermination */);
  202. break;
  203. default:
  204. chClass = classifier.get(chCode);
  205. }
  206. // Check if character terminates link
  207. if (chClass === 1 /* ForceTermination */) {
  208. result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, j));
  209. resetStateMachine = true;
  210. }
  211. }
  212. else if (state === 12 /* End */) {
  213. let chClass;
  214. if (chCode === 91 /* OpenSquareBracket */) {
  215. // Allow for the authority part to contain ipv6 addresses which contain [ and ]
  216. hasOpenSquareBracket = true;
  217. chClass = 0 /* None */;
  218. }
  219. else {
  220. chClass = classifier.get(chCode);
  221. }
  222. // Check if character terminates link
  223. if (chClass === 1 /* ForceTermination */) {
  224. resetStateMachine = true;
  225. }
  226. else {
  227. state = 13 /* Accept */;
  228. }
  229. }
  230. else {
  231. state = stateMachine.nextState(state, chCode);
  232. if (state === 0 /* Invalid */) {
  233. resetStateMachine = true;
  234. }
  235. }
  236. if (resetStateMachine) {
  237. state = 1 /* Start */;
  238. hasOpenParens = false;
  239. hasOpenSquareBracket = false;
  240. hasOpenCurlyBracket = false;
  241. // Record where the link started
  242. linkBeginIndex = j + 1;
  243. linkBeginChCode = chCode;
  244. }
  245. j++;
  246. }
  247. if (state === 13 /* Accept */) {
  248. result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, len));
  249. }
  250. }
  251. return result;
  252. }
  253. }
  254. /**
  255. * Returns an array of all links contains in the provided
  256. * document. *Note* that this operation is computational
  257. * expensive and should not run in the UI thread.
  258. */
  259. export function computeLinks(model) {
  260. if (!model || typeof model.getLineCount !== 'function' || typeof model.getLineContent !== 'function') {
  261. // Unknown caller!
  262. return [];
  263. }
  264. return LinkComputer.computeLinks(model);
  265. }