textModelTokens.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as arrays from '../../../base/common/arrays.js';
  6. import { onUnexpectedError } from '../../../base/common/errors.js';
  7. import { LineTokens } from '../core/lineTokens.js';
  8. import { Position } from '../core/position.js';
  9. import { TokenizationRegistry } from '../modes.js';
  10. import { nullTokenize2 } from '../modes/nullMode.js';
  11. import { Disposable } from '../../../base/common/lifecycle.js';
  12. import { StopWatch } from '../../../base/common/stopwatch.js';
  13. import { MultilineTokensBuilder, countEOL } from './tokensStore.js';
  14. import { setImmediate } from '../../../base/common/platform.js';
  15. export class TokenizationStateStore {
  16. constructor() {
  17. this._beginState = [];
  18. this._valid = [];
  19. this._len = 0;
  20. this._invalidLineStartIndex = 0;
  21. }
  22. _reset(initialState) {
  23. this._beginState = [];
  24. this._valid = [];
  25. this._len = 0;
  26. this._invalidLineStartIndex = 0;
  27. if (initialState) {
  28. this._setBeginState(0, initialState);
  29. }
  30. }
  31. flush(initialState) {
  32. this._reset(initialState);
  33. }
  34. get invalidLineStartIndex() {
  35. return this._invalidLineStartIndex;
  36. }
  37. _invalidateLine(lineIndex) {
  38. if (lineIndex < this._len) {
  39. this._valid[lineIndex] = false;
  40. }
  41. if (lineIndex < this._invalidLineStartIndex) {
  42. this._invalidLineStartIndex = lineIndex;
  43. }
  44. }
  45. _isValid(lineIndex) {
  46. if (lineIndex < this._len) {
  47. return this._valid[lineIndex];
  48. }
  49. return false;
  50. }
  51. getBeginState(lineIndex) {
  52. if (lineIndex < this._len) {
  53. return this._beginState[lineIndex];
  54. }
  55. return null;
  56. }
  57. _ensureLine(lineIndex) {
  58. while (lineIndex >= this._len) {
  59. this._beginState[this._len] = null;
  60. this._valid[this._len] = false;
  61. this._len++;
  62. }
  63. }
  64. _deleteLines(start, deleteCount) {
  65. if (deleteCount === 0) {
  66. return;
  67. }
  68. if (start + deleteCount > this._len) {
  69. deleteCount = this._len - start;
  70. }
  71. this._beginState.splice(start, deleteCount);
  72. this._valid.splice(start, deleteCount);
  73. this._len -= deleteCount;
  74. }
  75. _insertLines(insertIndex, insertCount) {
  76. if (insertCount === 0) {
  77. return;
  78. }
  79. const beginState = [];
  80. const valid = [];
  81. for (let i = 0; i < insertCount; i++) {
  82. beginState[i] = null;
  83. valid[i] = false;
  84. }
  85. this._beginState = arrays.arrayInsert(this._beginState, insertIndex, beginState);
  86. this._valid = arrays.arrayInsert(this._valid, insertIndex, valid);
  87. this._len += insertCount;
  88. }
  89. _setValid(lineIndex, valid) {
  90. this._ensureLine(lineIndex);
  91. this._valid[lineIndex] = valid;
  92. }
  93. _setBeginState(lineIndex, beginState) {
  94. this._ensureLine(lineIndex);
  95. this._beginState[lineIndex] = beginState;
  96. }
  97. setEndState(linesLength, lineIndex, endState) {
  98. this._setValid(lineIndex, true);
  99. this._invalidLineStartIndex = lineIndex + 1;
  100. // Check if this was the last line
  101. if (lineIndex === linesLength - 1) {
  102. return;
  103. }
  104. // Check if the end state has changed
  105. const previousEndState = this.getBeginState(lineIndex + 1);
  106. if (previousEndState === null || !endState.equals(previousEndState)) {
  107. this._setBeginState(lineIndex + 1, endState);
  108. this._invalidateLine(lineIndex + 1);
  109. return;
  110. }
  111. // Perhaps we can skip tokenizing some lines...
  112. let i = lineIndex + 1;
  113. while (i < linesLength) {
  114. if (!this._isValid(i)) {
  115. break;
  116. }
  117. i++;
  118. }
  119. this._invalidLineStartIndex = i;
  120. }
  121. setFakeTokens(lineIndex) {
  122. this._setValid(lineIndex, false);
  123. }
  124. //#region Editing
  125. applyEdits(range, eolCount) {
  126. const deletingLinesCnt = range.endLineNumber - range.startLineNumber;
  127. const insertingLinesCnt = eolCount;
  128. const editingLinesCnt = Math.min(deletingLinesCnt, insertingLinesCnt);
  129. for (let j = editingLinesCnt; j >= 0; j--) {
  130. this._invalidateLine(range.startLineNumber + j - 1);
  131. }
  132. this._acceptDeleteRange(range);
  133. this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount);
  134. }
  135. _acceptDeleteRange(range) {
  136. const firstLineIndex = range.startLineNumber - 1;
  137. if (firstLineIndex >= this._len) {
  138. return;
  139. }
  140. this._deleteLines(range.startLineNumber, range.endLineNumber - range.startLineNumber);
  141. }
  142. _acceptInsertText(position, eolCount) {
  143. const lineIndex = position.lineNumber - 1;
  144. if (lineIndex >= this._len) {
  145. return;
  146. }
  147. this._insertLines(position.lineNumber, eolCount);
  148. }
  149. }
  150. export class TextModelTokenization extends Disposable {
  151. constructor(_textModel, _languageIdCodec) {
  152. super();
  153. this._textModel = _textModel;
  154. this._languageIdCodec = _languageIdCodec;
  155. this._isScheduled = false;
  156. this._isDisposed = false;
  157. this._tokenizationStateStore = new TokenizationStateStore();
  158. this._tokenizationSupport = null;
  159. this._register(TokenizationRegistry.onDidChange((e) => {
  160. const languageId = this._textModel.getLanguageId();
  161. if (e.changedLanguages.indexOf(languageId) === -1) {
  162. return;
  163. }
  164. this._resetTokenizationState();
  165. this._textModel.clearTokens();
  166. }));
  167. this._register(this._textModel.onDidChangeContentFast((e) => {
  168. if (e.isFlush) {
  169. this._resetTokenizationState();
  170. return;
  171. }
  172. for (let i = 0, len = e.changes.length; i < len; i++) {
  173. const change = e.changes[i];
  174. const [eolCount] = countEOL(change.text);
  175. this._tokenizationStateStore.applyEdits(change.range, eolCount);
  176. }
  177. this._beginBackgroundTokenization();
  178. }));
  179. this._register(this._textModel.onDidChangeAttached(() => {
  180. this._beginBackgroundTokenization();
  181. }));
  182. this._register(this._textModel.onDidChangeLanguage(() => {
  183. this._resetTokenizationState();
  184. this._textModel.clearTokens();
  185. }));
  186. this._resetTokenizationState();
  187. }
  188. dispose() {
  189. this._isDisposed = true;
  190. super.dispose();
  191. }
  192. _resetTokenizationState() {
  193. const [tokenizationSupport, initialState] = initializeTokenization(this._textModel);
  194. this._tokenizationSupport = tokenizationSupport;
  195. this._tokenizationStateStore.flush(initialState);
  196. this._beginBackgroundTokenization();
  197. }
  198. _beginBackgroundTokenization() {
  199. if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
  200. return;
  201. }
  202. this._isScheduled = true;
  203. setImmediate(() => {
  204. this._isScheduled = false;
  205. if (this._isDisposed) {
  206. // disposed in the meantime
  207. return;
  208. }
  209. this._revalidateTokensNow();
  210. });
  211. }
  212. _revalidateTokensNow() {
  213. const textModelLastLineNumber = this._textModel.getLineCount();
  214. const MAX_ALLOWED_TIME = 1;
  215. const builder = new MultilineTokensBuilder();
  216. const sw = StopWatch.create(false);
  217. let tokenizedLineNumber = -1;
  218. do {
  219. if (sw.elapsed() > MAX_ALLOWED_TIME) {
  220. // Stop if MAX_ALLOWED_TIME is reached
  221. break;
  222. }
  223. tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
  224. if (tokenizedLineNumber >= textModelLastLineNumber) {
  225. break;
  226. }
  227. } while (this._hasLinesToTokenize());
  228. this._beginBackgroundTokenization();
  229. this._textModel.setTokens(builder.tokens, !this._hasLinesToTokenize());
  230. }
  231. tokenizeViewport(startLineNumber, endLineNumber) {
  232. const builder = new MultilineTokensBuilder();
  233. this._tokenizeViewport(builder, startLineNumber, endLineNumber);
  234. this._textModel.setTokens(builder.tokens, !this._hasLinesToTokenize());
  235. }
  236. reset() {
  237. this._resetTokenizationState();
  238. this._textModel.clearTokens();
  239. }
  240. forceTokenization(lineNumber) {
  241. const builder = new MultilineTokensBuilder();
  242. this._updateTokensUntilLine(builder, lineNumber);
  243. this._textModel.setTokens(builder.tokens, !this._hasLinesToTokenize());
  244. }
  245. getTokenTypeIfInsertingCharacter(position, character) {
  246. if (!this._tokenizationSupport) {
  247. return 0 /* Other */;
  248. }
  249. this.forceTokenization(position.lineNumber);
  250. const lineStartState = this._tokenizationStateStore.getBeginState(position.lineNumber - 1);
  251. if (!lineStartState) {
  252. return 0 /* Other */;
  253. }
  254. const languageId = this._textModel.getLanguageId();
  255. const lineContent = this._textModel.getLineContent(position.lineNumber);
  256. // Create the text as if `character` was inserted
  257. const text = (lineContent.substring(0, position.column - 1)
  258. + character
  259. + lineContent.substring(position.column - 1));
  260. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationSupport, text, true, lineStartState);
  261. const lineTokens = new LineTokens(r.tokens, text, this._languageIdCodec);
  262. if (lineTokens.getCount() === 0) {
  263. return 0 /* Other */;
  264. }
  265. const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
  266. return lineTokens.getStandardTokenType(tokenIndex);
  267. }
  268. isCheapToTokenize(lineNumber) {
  269. if (!this._tokenizationSupport) {
  270. return true;
  271. }
  272. const firstInvalidLineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
  273. if (lineNumber > firstInvalidLineNumber) {
  274. return false;
  275. }
  276. if (lineNumber < firstInvalidLineNumber) {
  277. return true;
  278. }
  279. if (this._textModel.getLineLength(lineNumber) < 2048 /* CHEAP_TOKENIZATION_LENGTH_LIMIT */) {
  280. return true;
  281. }
  282. return false;
  283. }
  284. _hasLinesToTokenize() {
  285. if (!this._tokenizationSupport) {
  286. return false;
  287. }
  288. return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
  289. }
  290. _tokenizeOneInvalidLine(builder) {
  291. if (!this._hasLinesToTokenize()) {
  292. return this._textModel.getLineCount() + 1;
  293. }
  294. const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
  295. this._updateTokensUntilLine(builder, lineNumber);
  296. return lineNumber;
  297. }
  298. _updateTokensUntilLine(builder, lineNumber) {
  299. if (!this._tokenizationSupport) {
  300. return;
  301. }
  302. const languageId = this._textModel.getLanguageId();
  303. const linesLength = this._textModel.getLineCount();
  304. const endLineIndex = lineNumber - 1;
  305. // Validate all states up to and including endLineIndex
  306. for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
  307. const text = this._textModel.getLineContent(lineIndex + 1);
  308. const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
  309. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationSupport, text, true, lineStartState);
  310. builder.add(lineIndex + 1, r.tokens);
  311. this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
  312. lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
  313. }
  314. }
  315. _tokenizeViewport(builder, startLineNumber, endLineNumber) {
  316. if (!this._tokenizationSupport) {
  317. // nothing to do
  318. return;
  319. }
  320. if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
  321. // nothing to do
  322. return;
  323. }
  324. if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
  325. // tokenization has reached the viewport start...
  326. this._updateTokensUntilLine(builder, endLineNumber);
  327. return;
  328. }
  329. let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
  330. const fakeLines = [];
  331. let initialState = null;
  332. for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
  333. const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
  334. if (newNonWhitespaceIndex === 0) {
  335. continue;
  336. }
  337. if (newNonWhitespaceIndex < nonWhitespaceColumn) {
  338. initialState = this._tokenizationStateStore.getBeginState(i - 1);
  339. if (initialState) {
  340. break;
  341. }
  342. fakeLines.push(this._textModel.getLineContent(i));
  343. nonWhitespaceColumn = newNonWhitespaceIndex;
  344. }
  345. }
  346. if (!initialState) {
  347. initialState = this._tokenizationSupport.getInitialState();
  348. }
  349. const languageId = this._textModel.getLanguageId();
  350. let state = initialState;
  351. for (let i = fakeLines.length - 1; i >= 0; i--) {
  352. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationSupport, fakeLines[i], false, state);
  353. state = r.endState;
  354. }
  355. for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
  356. const text = this._textModel.getLineContent(lineNumber);
  357. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationSupport, text, true, state);
  358. builder.add(lineNumber, r.tokens);
  359. this._tokenizationStateStore.setFakeTokens(lineNumber - 1);
  360. state = r.endState;
  361. }
  362. }
  363. }
  364. function initializeTokenization(textModel) {
  365. const languageId = textModel.getLanguageId();
  366. let tokenizationSupport = (textModel.isTooLargeForTokenization()
  367. ? null
  368. : TokenizationRegistry.get(languageId));
  369. let initialState = null;
  370. if (tokenizationSupport) {
  371. try {
  372. initialState = tokenizationSupport.getInitialState();
  373. }
  374. catch (e) {
  375. onUnexpectedError(e);
  376. tokenizationSupport = null;
  377. }
  378. }
  379. return [tokenizationSupport, initialState];
  380. }
  381. function safeTokenize(languageIdCodec, languageId, tokenizationSupport, text, hasEOL, state) {
  382. let r = null;
  383. if (tokenizationSupport) {
  384. try {
  385. r = tokenizationSupport.tokenize2(text, hasEOL, state.clone(), 0);
  386. }
  387. catch (e) {
  388. onUnexpectedError(e);
  389. }
  390. }
  391. if (!r) {
  392. r = nullTokenize2(languageIdCodec.encodeLanguageId(languageId), text, state, 0);
  393. }
  394. LineTokens.convertToEndOffset(r.tokens, text.length);
  395. return r;
  396. }