123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- import { Range } from '../core/range.js';
- import { Searcher } from '../model/textModelSearch.js';
- import * as strings from '../../../base/common/strings.js';
- import { assertNever } from '../../../base/common/types.js';
- export class UnicodeTextModelHighlighter {
- static computeUnicodeHighlights(model, options, range) {
- const startLine = range ? range.startLineNumber : 1;
- const endLine = range ? range.endLineNumber : model.getLineCount();
- const codePointHighlighter = new CodePointHighlighter(options);
- const candidates = codePointHighlighter.getCandidateCodePoints();
- let regex;
- if (candidates === 'allNonBasicAscii') {
- regex = new RegExp('[^\\t\\n\\r\\x20-\\x7E]', 'g');
- }
- else {
- regex = new RegExp(`${buildRegExpCharClassExpr(Array.from(candidates))}`, 'g');
- }
- const searcher = new Searcher(null, regex);
- const ranges = [];
- let hasMore = false;
- let m;
- let ambiguousCharacterCount = 0;
- let invisibleCharacterCount = 0;
- let nonBasicAsciiCharacterCount = 0;
- forLoop: for (let lineNumber = startLine, lineCount = endLine; lineNumber <= lineCount; lineNumber++) {
- const lineContent = model.getLineContent(lineNumber);
- const lineLength = lineContent.length;
- // Reset regex to search from the beginning
- searcher.reset(0);
- do {
- m = searcher.next(lineContent);
- if (m) {
- let startIndex = m.index;
- let endIndex = m.index + m[0].length;
- // Extend range to entire code point
- if (startIndex > 0) {
- const charCodeBefore = lineContent.charCodeAt(startIndex - 1);
- if (strings.isHighSurrogate(charCodeBefore)) {
- startIndex--;
- }
- }
- if (endIndex + 1 < lineLength) {
- const charCodeBefore = lineContent.charCodeAt(endIndex - 1);
- if (strings.isHighSurrogate(charCodeBefore)) {
- endIndex++;
- }
- }
- const str = lineContent.substring(startIndex, endIndex);
- const highlightReason = codePointHighlighter.shouldHighlightNonBasicASCII(str);
- if (highlightReason !== 0 /* None */) {
- if (highlightReason === 3 /* Ambiguous */) {
- ambiguousCharacterCount++;
- }
- else if (highlightReason === 2 /* Invisible */) {
- invisibleCharacterCount++;
- }
- else if (highlightReason === 1 /* NonBasicASCII */) {
- nonBasicAsciiCharacterCount++;
- }
- else {
- assertNever(highlightReason);
- }
- const MAX_RESULT_LENGTH = 1000;
- if (ranges.length >= MAX_RESULT_LENGTH) {
- hasMore = true;
- break forLoop;
- }
- ranges.push(new Range(lineNumber, startIndex + 1, lineNumber, endIndex + 1));
- }
- }
- } while (m);
- }
- return {
- ranges,
- hasMore,
- ambiguousCharacterCount,
- invisibleCharacterCount,
- nonBasicAsciiCharacterCount
- };
- }
- static computeUnicodeHighlightReason(char, options) {
- const codePointHighlighter = new CodePointHighlighter(options);
- const reason = codePointHighlighter.shouldHighlightNonBasicASCII(char);
- switch (reason) {
- case 0 /* None */:
- return null;
- case 2 /* Invisible */:
- return { kind: 1 /* Invisible */ };
- case 3 /* Ambiguous */:
- const primaryConfusable = strings.AmbiguousCharacters.getPrimaryConfusable(char.codePointAt(0));
- return { kind: 0 /* Ambiguous */, confusableWith: String.fromCodePoint(primaryConfusable) };
- case 1 /* NonBasicASCII */:
- return { kind: 2 /* NonBasicAscii */ };
- }
- }
- }
- function buildRegExpCharClassExpr(codePoints, flags) {
- const src = `[${strings.escapeRegExpCharacters(codePoints.map((i) => String.fromCodePoint(i)).join(''))}]`;
- return src;
- }
- class CodePointHighlighter {
- constructor(options) {
- this.options = options;
- this.allowedCodePoints = new Set(options.allowedCodePoints);
- }
- getCandidateCodePoints() {
- if (this.options.nonBasicASCII) {
- return 'allNonBasicAscii';
- }
- const set = new Set();
- if (this.options.invisibleCharacters) {
- for (const cp of strings.InvisibleCharacters.codePoints) {
- set.add(cp);
- }
- }
- if (this.options.ambiguousCharacters) {
- for (const cp of strings.AmbiguousCharacters.getPrimaryConfusableCodePoints()) {
- set.add(cp);
- }
- }
- for (const cp of this.allowedCodePoints) {
- set.delete(cp);
- }
- return set;
- }
- shouldHighlightNonBasicASCII(character) {
- const codePoint = character.codePointAt(0);
- if (this.allowedCodePoints.has(codePoint)) {
- return 0 /* None */;
- }
- if (this.options.nonBasicASCII) {
- return 1 /* NonBasicASCII */;
- }
- if (this.options.invisibleCharacters) {
- const isAllowedInvisibleCharacter = character === ' ' || character === '\n' || character === '\t';
- // TODO check for emojis
- if (!isAllowedInvisibleCharacter && strings.InvisibleCharacters.isInvisibleCharacter(codePoint)) {
- return 2 /* Invisible */;
- }
- }
- if (this.options.ambiguousCharacters) {
- if (strings.AmbiguousCharacters.isAmbiguous(codePoint)) {
- return 3 /* Ambiguous */;
- }
- }
- return 0 /* None */;
- }
- }
|