unicodeTextModelHighlighter.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import { Range } from '../core/range.js';
  6. import { Searcher } from '../model/textModelSearch.js';
  7. import * as strings from '../../../base/common/strings.js';
  8. import { assertNever } from '../../../base/common/types.js';
  9. export class UnicodeTextModelHighlighter {
  10. static computeUnicodeHighlights(model, options, range) {
  11. const startLine = range ? range.startLineNumber : 1;
  12. const endLine = range ? range.endLineNumber : model.getLineCount();
  13. const codePointHighlighter = new CodePointHighlighter(options);
  14. const candidates = codePointHighlighter.getCandidateCodePoints();
  15. let regex;
  16. if (candidates === 'allNonBasicAscii') {
  17. regex = new RegExp('[^\\t\\n\\r\\x20-\\x7E]', 'g');
  18. }
  19. else {
  20. regex = new RegExp(`${buildRegExpCharClassExpr(Array.from(candidates))}`, 'g');
  21. }
  22. const searcher = new Searcher(null, regex);
  23. const ranges = [];
  24. let hasMore = false;
  25. let m;
  26. let ambiguousCharacterCount = 0;
  27. let invisibleCharacterCount = 0;
  28. let nonBasicAsciiCharacterCount = 0;
  29. forLoop: for (let lineNumber = startLine, lineCount = endLine; lineNumber <= lineCount; lineNumber++) {
  30. const lineContent = model.getLineContent(lineNumber);
  31. const lineLength = lineContent.length;
  32. // Reset regex to search from the beginning
  33. searcher.reset(0);
  34. do {
  35. m = searcher.next(lineContent);
  36. if (m) {
  37. let startIndex = m.index;
  38. let endIndex = m.index + m[0].length;
  39. // Extend range to entire code point
  40. if (startIndex > 0) {
  41. const charCodeBefore = lineContent.charCodeAt(startIndex - 1);
  42. if (strings.isHighSurrogate(charCodeBefore)) {
  43. startIndex--;
  44. }
  45. }
  46. if (endIndex + 1 < lineLength) {
  47. const charCodeBefore = lineContent.charCodeAt(endIndex - 1);
  48. if (strings.isHighSurrogate(charCodeBefore)) {
  49. endIndex++;
  50. }
  51. }
  52. const str = lineContent.substring(startIndex, endIndex);
  53. const highlightReason = codePointHighlighter.shouldHighlightNonBasicASCII(str);
  54. if (highlightReason !== 0 /* None */) {
  55. if (highlightReason === 3 /* Ambiguous */) {
  56. ambiguousCharacterCount++;
  57. }
  58. else if (highlightReason === 2 /* Invisible */) {
  59. invisibleCharacterCount++;
  60. }
  61. else if (highlightReason === 1 /* NonBasicASCII */) {
  62. nonBasicAsciiCharacterCount++;
  63. }
  64. else {
  65. assertNever(highlightReason);
  66. }
  67. const MAX_RESULT_LENGTH = 1000;
  68. if (ranges.length >= MAX_RESULT_LENGTH) {
  69. hasMore = true;
  70. break forLoop;
  71. }
  72. ranges.push(new Range(lineNumber, startIndex + 1, lineNumber, endIndex + 1));
  73. }
  74. }
  75. } while (m);
  76. }
  77. return {
  78. ranges,
  79. hasMore,
  80. ambiguousCharacterCount,
  81. invisibleCharacterCount,
  82. nonBasicAsciiCharacterCount
  83. };
  84. }
  85. static computeUnicodeHighlightReason(char, options) {
  86. const codePointHighlighter = new CodePointHighlighter(options);
  87. const reason = codePointHighlighter.shouldHighlightNonBasicASCII(char);
  88. switch (reason) {
  89. case 0 /* None */:
  90. return null;
  91. case 2 /* Invisible */:
  92. return { kind: 1 /* Invisible */ };
  93. case 3 /* Ambiguous */:
  94. const primaryConfusable = strings.AmbiguousCharacters.getPrimaryConfusable(char.codePointAt(0));
  95. return { kind: 0 /* Ambiguous */, confusableWith: String.fromCodePoint(primaryConfusable) };
  96. case 1 /* NonBasicASCII */:
  97. return { kind: 2 /* NonBasicAscii */ };
  98. }
  99. }
  100. }
  101. function buildRegExpCharClassExpr(codePoints, flags) {
  102. const src = `[${strings.escapeRegExpCharacters(codePoints.map((i) => String.fromCodePoint(i)).join(''))}]`;
  103. return src;
  104. }
  105. class CodePointHighlighter {
  106. constructor(options) {
  107. this.options = options;
  108. this.allowedCodePoints = new Set(options.allowedCodePoints);
  109. }
  110. getCandidateCodePoints() {
  111. if (this.options.nonBasicASCII) {
  112. return 'allNonBasicAscii';
  113. }
  114. const set = new Set();
  115. if (this.options.invisibleCharacters) {
  116. for (const cp of strings.InvisibleCharacters.codePoints) {
  117. set.add(cp);
  118. }
  119. }
  120. if (this.options.ambiguousCharacters) {
  121. for (const cp of strings.AmbiguousCharacters.getPrimaryConfusableCodePoints()) {
  122. set.add(cp);
  123. }
  124. }
  125. for (const cp of this.allowedCodePoints) {
  126. set.delete(cp);
  127. }
  128. return set;
  129. }
  130. shouldHighlightNonBasicASCII(character) {
  131. const codePoint = character.codePointAt(0);
  132. if (this.allowedCodePoints.has(codePoint)) {
  133. return 0 /* None */;
  134. }
  135. if (this.options.nonBasicASCII) {
  136. return 1 /* NonBasicASCII */;
  137. }
  138. if (this.options.invisibleCharacters) {
  139. const isAllowedInvisibleCharacter = character === ' ' || character === '\n' || character === '\t';
  140. // TODO check for emojis
  141. if (!isAllowedInvisibleCharacter && strings.InvisibleCharacters.isInvisibleCharacter(codePoint)) {
  142. return 2 /* Invisible */;
  143. }
  144. }
  145. if (this.options.ambiguousCharacters) {
  146. if (strings.AmbiguousCharacters.isAmbiguous(codePoint)) {
  147. return 3 /* Ambiguous */;
  148. }
  149. }
  150. return 0 /* None */;
  151. }
  152. }