textModelSearch.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as strings from '../../../base/common/strings.js';
  6. import { getMapForWordSeparators } from '../controller/wordCharacterClassifier.js';
  7. import { Position } from '../core/position.js';
  8. import { Range } from '../core/range.js';
  9. import { FindMatch } from '../model.js';
  10. const LIMIT_FIND_COUNT = 999;
  11. export class SearchParams {
  12. constructor(searchString, isRegex, matchCase, wordSeparators) {
  13. this.searchString = searchString;
  14. this.isRegex = isRegex;
  15. this.matchCase = matchCase;
  16. this.wordSeparators = wordSeparators;
  17. }
  18. parseSearchRequest() {
  19. if (this.searchString === '') {
  20. return null;
  21. }
  22. // Try to create a RegExp out of the params
  23. let multiline;
  24. if (this.isRegex) {
  25. multiline = isMultilineRegexSource(this.searchString);
  26. }
  27. else {
  28. multiline = (this.searchString.indexOf('\n') >= 0);
  29. }
  30. let regex = null;
  31. try {
  32. regex = strings.createRegExp(this.searchString, this.isRegex, {
  33. matchCase: this.matchCase,
  34. wholeWord: false,
  35. multiline: multiline,
  36. global: true,
  37. unicode: true
  38. });
  39. }
  40. catch (err) {
  41. return null;
  42. }
  43. if (!regex) {
  44. return null;
  45. }
  46. let canUseSimpleSearch = (!this.isRegex && !multiline);
  47. if (canUseSimpleSearch && this.searchString.toLowerCase() !== this.searchString.toUpperCase()) {
  48. // casing might make a difference
  49. canUseSimpleSearch = this.matchCase;
  50. }
  51. return new SearchData(regex, this.wordSeparators ? getMapForWordSeparators(this.wordSeparators) : null, canUseSimpleSearch ? this.searchString : null);
  52. }
  53. }
  54. export function isMultilineRegexSource(searchString) {
  55. if (!searchString || searchString.length === 0) {
  56. return false;
  57. }
  58. for (let i = 0, len = searchString.length; i < len; i++) {
  59. const chCode = searchString.charCodeAt(i);
  60. if (chCode === 92 /* Backslash */) {
  61. // move to next char
  62. i++;
  63. if (i >= len) {
  64. // string ends with a \
  65. break;
  66. }
  67. const nextChCode = searchString.charCodeAt(i);
  68. if (nextChCode === 110 /* n */ || nextChCode === 114 /* r */ || nextChCode === 87 /* W */) {
  69. return true;
  70. }
  71. }
  72. }
  73. return false;
  74. }
  75. export class SearchData {
  76. constructor(regex, wordSeparators, simpleSearch) {
  77. this.regex = regex;
  78. this.wordSeparators = wordSeparators;
  79. this.simpleSearch = simpleSearch;
  80. }
  81. }
  82. export function createFindMatch(range, rawMatches, captureMatches) {
  83. if (!captureMatches) {
  84. return new FindMatch(range, null);
  85. }
  86. let matches = [];
  87. for (let i = 0, len = rawMatches.length; i < len; i++) {
  88. matches[i] = rawMatches[i];
  89. }
  90. return new FindMatch(range, matches);
  91. }
  92. class LineFeedCounter {
  93. constructor(text) {
  94. let lineFeedsOffsets = [];
  95. let lineFeedsOffsetsLen = 0;
  96. for (let i = 0, textLen = text.length; i < textLen; i++) {
  97. if (text.charCodeAt(i) === 10 /* LineFeed */) {
  98. lineFeedsOffsets[lineFeedsOffsetsLen++] = i;
  99. }
  100. }
  101. this._lineFeedsOffsets = lineFeedsOffsets;
  102. }
  103. findLineFeedCountBeforeOffset(offset) {
  104. const lineFeedsOffsets = this._lineFeedsOffsets;
  105. let min = 0;
  106. let max = lineFeedsOffsets.length - 1;
  107. if (max === -1) {
  108. // no line feeds
  109. return 0;
  110. }
  111. if (offset <= lineFeedsOffsets[0]) {
  112. // before first line feed
  113. return 0;
  114. }
  115. while (min < max) {
  116. const mid = min + ((max - min) / 2 >> 0);
  117. if (lineFeedsOffsets[mid] >= offset) {
  118. max = mid - 1;
  119. }
  120. else {
  121. if (lineFeedsOffsets[mid + 1] >= offset) {
  122. // bingo!
  123. min = mid;
  124. max = mid;
  125. }
  126. else {
  127. min = mid + 1;
  128. }
  129. }
  130. }
  131. return min + 1;
  132. }
  133. }
  134. export class TextModelSearch {
  135. static findMatches(model, searchParams, searchRange, captureMatches, limitResultCount) {
  136. const searchData = searchParams.parseSearchRequest();
  137. if (!searchData) {
  138. return [];
  139. }
  140. if (searchData.regex.multiline) {
  141. return this._doFindMatchesMultiline(model, searchRange, new Searcher(searchData.wordSeparators, searchData.regex), captureMatches, limitResultCount);
  142. }
  143. return this._doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount);
  144. }
  145. /**
  146. * Multiline search always executes on the lines concatenated with \n.
  147. * We must therefore compensate for the count of \n in case the model is CRLF
  148. */
  149. static _getMultilineMatchRange(model, deltaOffset, text, lfCounter, matchIndex, match0) {
  150. let startOffset;
  151. let lineFeedCountBeforeMatch = 0;
  152. if (lfCounter) {
  153. lineFeedCountBeforeMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex);
  154. startOffset = deltaOffset + matchIndex + lineFeedCountBeforeMatch /* add as many \r as there were \n */;
  155. }
  156. else {
  157. startOffset = deltaOffset + matchIndex;
  158. }
  159. let endOffset;
  160. if (lfCounter) {
  161. let lineFeedCountBeforeEndOfMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex + match0.length);
  162. let lineFeedCountInMatch = lineFeedCountBeforeEndOfMatch - lineFeedCountBeforeMatch;
  163. endOffset = startOffset + match0.length + lineFeedCountInMatch /* add as many \r as there were \n */;
  164. }
  165. else {
  166. endOffset = startOffset + match0.length;
  167. }
  168. const startPosition = model.getPositionAt(startOffset);
  169. const endPosition = model.getPositionAt(endOffset);
  170. return new Range(startPosition.lineNumber, startPosition.column, endPosition.lineNumber, endPosition.column);
  171. }
  172. static _doFindMatchesMultiline(model, searchRange, searcher, captureMatches, limitResultCount) {
  173. const deltaOffset = model.getOffsetAt(searchRange.getStartPosition());
  174. // We always execute multiline search over the lines joined with \n
  175. // This makes it that \n will match the EOL for both CRLF and LF models
  176. // We compensate for offset errors in `_getMultilineMatchRange`
  177. const text = model.getValueInRange(searchRange, 1 /* LF */);
  178. const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
  179. const result = [];
  180. let counter = 0;
  181. let m;
  182. searcher.reset(0);
  183. while ((m = searcher.next(text))) {
  184. result[counter++] = createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);
  185. if (counter >= limitResultCount) {
  186. return result;
  187. }
  188. }
  189. return result;
  190. }
  191. static _doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount) {
  192. const result = [];
  193. let resultLen = 0;
  194. // Early case for a search range that starts & stops on the same line number
  195. if (searchRange.startLineNumber === searchRange.endLineNumber) {
  196. const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1, searchRange.endColumn - 1);
  197. resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
  198. return result;
  199. }
  200. // Collect results from first line
  201. const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1);
  202. resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
  203. // Collect results from middle lines
  204. for (let lineNumber = searchRange.startLineNumber + 1; lineNumber < searchRange.endLineNumber && resultLen < limitResultCount; lineNumber++) {
  205. resultLen = this._findMatchesInLine(searchData, model.getLineContent(lineNumber), lineNumber, 0, resultLen, result, captureMatches, limitResultCount);
  206. }
  207. // Collect results from last line
  208. if (resultLen < limitResultCount) {
  209. const text = model.getLineContent(searchRange.endLineNumber).substring(0, searchRange.endColumn - 1);
  210. resultLen = this._findMatchesInLine(searchData, text, searchRange.endLineNumber, 0, resultLen, result, captureMatches, limitResultCount);
  211. }
  212. return result;
  213. }
  214. static _findMatchesInLine(searchData, text, lineNumber, deltaOffset, resultLen, result, captureMatches, limitResultCount) {
  215. const wordSeparators = searchData.wordSeparators;
  216. if (!captureMatches && searchData.simpleSearch) {
  217. const searchString = searchData.simpleSearch;
  218. const searchStringLen = searchString.length;
  219. const textLength = text.length;
  220. let lastMatchIndex = -searchStringLen;
  221. while ((lastMatchIndex = text.indexOf(searchString, lastMatchIndex + searchStringLen)) !== -1) {
  222. if (!wordSeparators || isValidMatch(wordSeparators, text, textLength, lastMatchIndex, searchStringLen)) {
  223. result[resultLen++] = new FindMatch(new Range(lineNumber, lastMatchIndex + 1 + deltaOffset, lineNumber, lastMatchIndex + 1 + searchStringLen + deltaOffset), null);
  224. if (resultLen >= limitResultCount) {
  225. return resultLen;
  226. }
  227. }
  228. }
  229. return resultLen;
  230. }
  231. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  232. let m;
  233. // Reset regex to search from the beginning
  234. searcher.reset(0);
  235. do {
  236. m = searcher.next(text);
  237. if (m) {
  238. result[resultLen++] = createFindMatch(new Range(lineNumber, m.index + 1 + deltaOffset, lineNumber, m.index + 1 + m[0].length + deltaOffset), m, captureMatches);
  239. if (resultLen >= limitResultCount) {
  240. return resultLen;
  241. }
  242. }
  243. } while (m);
  244. return resultLen;
  245. }
  246. static findNextMatch(model, searchParams, searchStart, captureMatches) {
  247. const searchData = searchParams.parseSearchRequest();
  248. if (!searchData) {
  249. return null;
  250. }
  251. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  252. if (searchData.regex.multiline) {
  253. return this._doFindNextMatchMultiline(model, searchStart, searcher, captureMatches);
  254. }
  255. return this._doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches);
  256. }
  257. static _doFindNextMatchMultiline(model, searchStart, searcher, captureMatches) {
  258. const searchTextStart = new Position(searchStart.lineNumber, 1);
  259. const deltaOffset = model.getOffsetAt(searchTextStart);
  260. const lineCount = model.getLineCount();
  261. // We always execute multiline search over the lines joined with \n
  262. // This makes it that \n will match the EOL for both CRLF and LF models
  263. // We compensate for offset errors in `_getMultilineMatchRange`
  264. const text = model.getValueInRange(new Range(searchTextStart.lineNumber, searchTextStart.column, lineCount, model.getLineMaxColumn(lineCount)), 1 /* LF */);
  265. const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
  266. searcher.reset(searchStart.column - 1);
  267. let m = searcher.next(text);
  268. if (m) {
  269. return createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);
  270. }
  271. if (searchStart.lineNumber !== 1 || searchStart.column !== 1) {
  272. // Try again from the top
  273. return this._doFindNextMatchMultiline(model, new Position(1, 1), searcher, captureMatches);
  274. }
  275. return null;
  276. }
  277. static _doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches) {
  278. const lineCount = model.getLineCount();
  279. const startLineNumber = searchStart.lineNumber;
  280. // Look in first line
  281. const text = model.getLineContent(startLineNumber);
  282. const r = this._findFirstMatchInLine(searcher, text, startLineNumber, searchStart.column, captureMatches);
  283. if (r) {
  284. return r;
  285. }
  286. for (let i = 1; i <= lineCount; i++) {
  287. const lineIndex = (startLineNumber + i - 1) % lineCount;
  288. const text = model.getLineContent(lineIndex + 1);
  289. const r = this._findFirstMatchInLine(searcher, text, lineIndex + 1, 1, captureMatches);
  290. if (r) {
  291. return r;
  292. }
  293. }
  294. return null;
  295. }
  296. static _findFirstMatchInLine(searcher, text, lineNumber, fromColumn, captureMatches) {
  297. // Set regex to search from column
  298. searcher.reset(fromColumn - 1);
  299. const m = searcher.next(text);
  300. if (m) {
  301. return createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);
  302. }
  303. return null;
  304. }
  305. static findPreviousMatch(model, searchParams, searchStart, captureMatches) {
  306. const searchData = searchParams.parseSearchRequest();
  307. if (!searchData) {
  308. return null;
  309. }
  310. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  311. if (searchData.regex.multiline) {
  312. return this._doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches);
  313. }
  314. return this._doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches);
  315. }
  316. static _doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches) {
  317. const matches = this._doFindMatchesMultiline(model, new Range(1, 1, searchStart.lineNumber, searchStart.column), searcher, captureMatches, 10 * LIMIT_FIND_COUNT);
  318. if (matches.length > 0) {
  319. return matches[matches.length - 1];
  320. }
  321. const lineCount = model.getLineCount();
  322. if (searchStart.lineNumber !== lineCount || searchStart.column !== model.getLineMaxColumn(lineCount)) {
  323. // Try again with all content
  324. return this._doFindPreviousMatchMultiline(model, new Position(lineCount, model.getLineMaxColumn(lineCount)), searcher, captureMatches);
  325. }
  326. return null;
  327. }
  328. static _doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches) {
  329. const lineCount = model.getLineCount();
  330. const startLineNumber = searchStart.lineNumber;
  331. // Look in first line
  332. const text = model.getLineContent(startLineNumber).substring(0, searchStart.column - 1);
  333. const r = this._findLastMatchInLine(searcher, text, startLineNumber, captureMatches);
  334. if (r) {
  335. return r;
  336. }
  337. for (let i = 1; i <= lineCount; i++) {
  338. const lineIndex = (lineCount + startLineNumber - i - 1) % lineCount;
  339. const text = model.getLineContent(lineIndex + 1);
  340. const r = this._findLastMatchInLine(searcher, text, lineIndex + 1, captureMatches);
  341. if (r) {
  342. return r;
  343. }
  344. }
  345. return null;
  346. }
  347. static _findLastMatchInLine(searcher, text, lineNumber, captureMatches) {
  348. let bestResult = null;
  349. let m;
  350. searcher.reset(0);
  351. while ((m = searcher.next(text))) {
  352. bestResult = createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);
  353. }
  354. return bestResult;
  355. }
  356. }
  357. function leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  358. if (matchStartIndex === 0) {
  359. // Match starts at start of string
  360. return true;
  361. }
  362. const charBefore = text.charCodeAt(matchStartIndex - 1);
  363. if (wordSeparators.get(charBefore) !== 0 /* Regular */) {
  364. // The character before the match is a word separator
  365. return true;
  366. }
  367. if (charBefore === 13 /* CarriageReturn */ || charBefore === 10 /* LineFeed */) {
  368. // The character before the match is line break or carriage return.
  369. return true;
  370. }
  371. if (matchLength > 0) {
  372. const firstCharInMatch = text.charCodeAt(matchStartIndex);
  373. if (wordSeparators.get(firstCharInMatch) !== 0 /* Regular */) {
  374. // The first character inside the match is a word separator
  375. return true;
  376. }
  377. }
  378. return false;
  379. }
  380. function rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  381. if (matchStartIndex + matchLength === textLength) {
  382. // Match ends at end of string
  383. return true;
  384. }
  385. const charAfter = text.charCodeAt(matchStartIndex + matchLength);
  386. if (wordSeparators.get(charAfter) !== 0 /* Regular */) {
  387. // The character after the match is a word separator
  388. return true;
  389. }
  390. if (charAfter === 13 /* CarriageReturn */ || charAfter === 10 /* LineFeed */) {
  391. // The character after the match is line break or carriage return.
  392. return true;
  393. }
  394. if (matchLength > 0) {
  395. const lastCharInMatch = text.charCodeAt(matchStartIndex + matchLength - 1);
  396. if (wordSeparators.get(lastCharInMatch) !== 0 /* Regular */) {
  397. // The last character in the match is a word separator
  398. return true;
  399. }
  400. }
  401. return false;
  402. }
  403. export function isValidMatch(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  404. return (leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)
  405. && rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength));
  406. }
  407. export class Searcher {
  408. constructor(wordSeparators, searchRegex) {
  409. this._wordSeparators = wordSeparators;
  410. this._searchRegex = searchRegex;
  411. this._prevMatchStartIndex = -1;
  412. this._prevMatchLength = 0;
  413. }
  414. reset(lastIndex) {
  415. this._searchRegex.lastIndex = lastIndex;
  416. this._prevMatchStartIndex = -1;
  417. this._prevMatchLength = 0;
  418. }
  419. next(text) {
  420. const textLength = text.length;
  421. let m;
  422. do {
  423. if (this._prevMatchStartIndex + this._prevMatchLength === textLength) {
  424. // Reached the end of the line
  425. return null;
  426. }
  427. m = this._searchRegex.exec(text);
  428. if (!m) {
  429. return null;
  430. }
  431. const matchStartIndex = m.index;
  432. const matchLength = m[0].length;
  433. if (matchStartIndex === this._prevMatchStartIndex && matchLength === this._prevMatchLength) {
  434. if (matchLength === 0) {
  435. // the search result is an empty string and won't advance `regex.lastIndex`, so `regex.exec` will stuck here
  436. // we attempt to recover from that by advancing by two if surrogate pair found and by one otherwise
  437. if (strings.getNextCodePoint(text, textLength, this._searchRegex.lastIndex) > 0xFFFF) {
  438. this._searchRegex.lastIndex += 2;
  439. }
  440. else {
  441. this._searchRegex.lastIndex += 1;
  442. }
  443. continue;
  444. }
  445. // Exit early if the regex matches the same range twice
  446. return null;
  447. }
  448. this._prevMatchStartIndex = matchStartIndex;
  449. this._prevMatchLength = matchLength;
  450. if (!this._wordSeparators || isValidMatch(this._wordSeparators, text, textLength, matchStartIndex, matchLength)) {
  451. return m;
  452. }
  453. } while (m);
  454. return null;
  455. }
  456. }