123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- import { CharacterClassifier } from '../core/characterClassifier.js';
- export class Uint8Matrix {
- constructor(rows, cols, defaultValue) {
- const data = new Uint8Array(rows * cols);
- for (let i = 0, len = rows * cols; i < len; i++) {
- data[i] = defaultValue;
- }
- this._data = data;
- this.rows = rows;
- this.cols = cols;
- }
- get(row, col) {
- return this._data[row * this.cols + col];
- }
- set(row, col, value) {
- this._data[row * this.cols + col] = value;
- }
- }
- export class StateMachine {
- constructor(edges) {
- let maxCharCode = 0;
- let maxState = 0 /* Invalid */;
- for (let i = 0, len = edges.length; i < len; i++) {
- let [from, chCode, to] = edges[i];
- if (chCode > maxCharCode) {
- maxCharCode = chCode;
- }
- if (from > maxState) {
- maxState = from;
- }
- if (to > maxState) {
- maxState = to;
- }
- }
- maxCharCode++;
- maxState++;
- let states = new Uint8Matrix(maxState, maxCharCode, 0 /* Invalid */);
- for (let i = 0, len = edges.length; i < len; i++) {
- let [from, chCode, to] = edges[i];
- states.set(from, chCode, to);
- }
- this._states = states;
- this._maxCharCode = maxCharCode;
- }
- nextState(currentState, chCode) {
- if (chCode < 0 || chCode >= this._maxCharCode) {
- return 0 /* Invalid */;
- }
- return this._states.get(currentState, chCode);
- }
- }
- // State machine for http:// or https:// or file://
- let _stateMachine = null;
- function getStateMachine() {
- if (_stateMachine === null) {
- _stateMachine = new StateMachine([
- [1 /* Start */, 104 /* h */, 2 /* H */],
- [1 /* Start */, 72 /* H */, 2 /* H */],
- [1 /* Start */, 102 /* f */, 6 /* F */],
- [1 /* Start */, 70 /* F */, 6 /* F */],
- [2 /* H */, 116 /* t */, 3 /* HT */],
- [2 /* H */, 84 /* T */, 3 /* HT */],
- [3 /* HT */, 116 /* t */, 4 /* HTT */],
- [3 /* HT */, 84 /* T */, 4 /* HTT */],
- [4 /* HTT */, 112 /* p */, 5 /* HTTP */],
- [4 /* HTT */, 80 /* P */, 5 /* HTTP */],
- [5 /* HTTP */, 115 /* s */, 9 /* BeforeColon */],
- [5 /* HTTP */, 83 /* S */, 9 /* BeforeColon */],
- [5 /* HTTP */, 58 /* Colon */, 10 /* AfterColon */],
- [6 /* F */, 105 /* i */, 7 /* FI */],
- [6 /* F */, 73 /* I */, 7 /* FI */],
- [7 /* FI */, 108 /* l */, 8 /* FIL */],
- [7 /* FI */, 76 /* L */, 8 /* FIL */],
- [8 /* FIL */, 101 /* e */, 9 /* BeforeColon */],
- [8 /* FIL */, 69 /* E */, 9 /* BeforeColon */],
- [9 /* BeforeColon */, 58 /* Colon */, 10 /* AfterColon */],
- [10 /* AfterColon */, 47 /* Slash */, 11 /* AlmostThere */],
- [11 /* AlmostThere */, 47 /* Slash */, 12 /* End */],
- ]);
- }
- return _stateMachine;
- }
- let _classifier = null;
- function getClassifier() {
- if (_classifier === null) {
- _classifier = new CharacterClassifier(0 /* None */);
- // allow-any-unicode-next-line
- const FORCE_TERMINATION_CHARACTERS = ' \t<>\'\"、。。、,.:;‘〈「『〔([{「」}])〕』」〉’`~…';
- for (let i = 0; i < FORCE_TERMINATION_CHARACTERS.length; i++) {
- _classifier.set(FORCE_TERMINATION_CHARACTERS.charCodeAt(i), 1 /* ForceTermination */);
- }
- const CANNOT_END_WITH_CHARACTERS = '.,;';
- for (let i = 0; i < CANNOT_END_WITH_CHARACTERS.length; i++) {
- _classifier.set(CANNOT_END_WITH_CHARACTERS.charCodeAt(i), 2 /* CannotEndIn */);
- }
- }
- return _classifier;
- }
- export class LinkComputer {
- static _createLink(classifier, line, lineNumber, linkBeginIndex, linkEndIndex) {
- // Do not allow to end link in certain characters...
- let lastIncludedCharIndex = linkEndIndex - 1;
- do {
- const chCode = line.charCodeAt(lastIncludedCharIndex);
- const chClass = classifier.get(chCode);
- if (chClass !== 2 /* CannotEndIn */) {
- break;
- }
- lastIncludedCharIndex--;
- } while (lastIncludedCharIndex > linkBeginIndex);
- // Handle links enclosed in parens, square brackets and curlys.
- if (linkBeginIndex > 0) {
- const charCodeBeforeLink = line.charCodeAt(linkBeginIndex - 1);
- const lastCharCodeInLink = line.charCodeAt(lastIncludedCharIndex);
- if ((charCodeBeforeLink === 40 /* OpenParen */ && lastCharCodeInLink === 41 /* CloseParen */)
- || (charCodeBeforeLink === 91 /* OpenSquareBracket */ && lastCharCodeInLink === 93 /* CloseSquareBracket */)
- || (charCodeBeforeLink === 123 /* OpenCurlyBrace */ && lastCharCodeInLink === 125 /* CloseCurlyBrace */)) {
- // Do not end in ) if ( is before the link start
- // Do not end in ] if [ is before the link start
- // Do not end in } if { is before the link start
- lastIncludedCharIndex--;
- }
- }
- return {
- range: {
- startLineNumber: lineNumber,
- startColumn: linkBeginIndex + 1,
- endLineNumber: lineNumber,
- endColumn: lastIncludedCharIndex + 2
- },
- url: line.substring(linkBeginIndex, lastIncludedCharIndex + 1)
- };
- }
- static computeLinks(model, stateMachine = getStateMachine()) {
- const classifier = getClassifier();
- let result = [];
- for (let i = 1, lineCount = model.getLineCount(); i <= lineCount; i++) {
- const line = model.getLineContent(i);
- const len = line.length;
- let j = 0;
- let linkBeginIndex = 0;
- let linkBeginChCode = 0;
- let state = 1 /* Start */;
- let hasOpenParens = false;
- let hasOpenSquareBracket = false;
- let inSquareBrackets = false;
- let hasOpenCurlyBracket = false;
- while (j < len) {
- let resetStateMachine = false;
- const chCode = line.charCodeAt(j);
- if (state === 13 /* Accept */) {
- let chClass;
- switch (chCode) {
- case 40 /* OpenParen */:
- hasOpenParens = true;
- chClass = 0 /* None */;
- break;
- case 41 /* CloseParen */:
- chClass = (hasOpenParens ? 0 /* None */ : 1 /* ForceTermination */);
- break;
- case 91 /* OpenSquareBracket */:
- inSquareBrackets = true;
- hasOpenSquareBracket = true;
- chClass = 0 /* None */;
- break;
- case 93 /* CloseSquareBracket */:
- inSquareBrackets = false;
- chClass = (hasOpenSquareBracket ? 0 /* None */ : 1 /* ForceTermination */);
- break;
- case 123 /* OpenCurlyBrace */:
- hasOpenCurlyBracket = true;
- chClass = 0 /* None */;
- break;
- case 125 /* CloseCurlyBrace */:
- chClass = (hasOpenCurlyBracket ? 0 /* None */ : 1 /* ForceTermination */);
- break;
- /* The following three rules make it that ' or " or ` are allowed inside links if the link began with a different one */
- case 39 /* SingleQuote */:
- chClass = (linkBeginChCode === 34 /* DoubleQuote */ || linkBeginChCode === 96 /* BackTick */) ? 0 /* None */ : 1 /* ForceTermination */;
- break;
- case 34 /* DoubleQuote */:
- chClass = (linkBeginChCode === 39 /* SingleQuote */ || linkBeginChCode === 96 /* BackTick */) ? 0 /* None */ : 1 /* ForceTermination */;
- break;
- case 96 /* BackTick */:
- chClass = (linkBeginChCode === 39 /* SingleQuote */ || linkBeginChCode === 34 /* DoubleQuote */) ? 0 /* None */ : 1 /* ForceTermination */;
- break;
- case 42 /* Asterisk */:
- // `*` terminates a link if the link began with `*`
- chClass = (linkBeginChCode === 42 /* Asterisk */) ? 1 /* ForceTermination */ : 0 /* None */;
- break;
- case 124 /* Pipe */:
- // `|` terminates a link if the link began with `|`
- chClass = (linkBeginChCode === 124 /* Pipe */) ? 1 /* ForceTermination */ : 0 /* None */;
- break;
- case 32 /* Space */:
- // ` ` allow space in between [ and ]
- chClass = (inSquareBrackets ? 0 /* None */ : 1 /* ForceTermination */);
- break;
- default:
- chClass = classifier.get(chCode);
- }
- // Check if character terminates link
- if (chClass === 1 /* ForceTermination */) {
- result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, j));
- resetStateMachine = true;
- }
- }
- else if (state === 12 /* End */) {
- let chClass;
- if (chCode === 91 /* OpenSquareBracket */) {
- // Allow for the authority part to contain ipv6 addresses which contain [ and ]
- hasOpenSquareBracket = true;
- chClass = 0 /* None */;
- }
- else {
- chClass = classifier.get(chCode);
- }
- // Check if character terminates link
- if (chClass === 1 /* ForceTermination */) {
- resetStateMachine = true;
- }
- else {
- state = 13 /* Accept */;
- }
- }
- else {
- state = stateMachine.nextState(state, chCode);
- if (state === 0 /* Invalid */) {
- resetStateMachine = true;
- }
- }
- if (resetStateMachine) {
- state = 1 /* Start */;
- hasOpenParens = false;
- hasOpenSquareBracket = false;
- hasOpenCurlyBracket = false;
- // Record where the link started
- linkBeginIndex = j + 1;
- linkBeginChCode = chCode;
- }
- j++;
- }
- if (state === 13 /* Accept */) {
- result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, len));
- }
- }
- return result;
- }
- }
- /**
- * Returns an array of all links contains in the provided
- * document. *Note* that this operation is computational
- * expensive and should not run in the UI thread.
- */
- export function computeLinks(model) {
- if (!model || typeof model.getLineCount !== 'function' || typeof model.getLineContent !== 'function') {
- // Unknown caller!
- return [];
- }
- return LinkComputer.computeLinks(model);
- }
|