indentationGuesser.js 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. class SpacesDiffResult {
  6. constructor() {
  7. this.spacesDiff = 0;
  8. this.looksLikeAlignment = false;
  9. }
  10. }
  11. /**
  12. * Compute the diff in spaces between two line's indentation.
  13. */
  14. function spacesDiff(a, aLength, b, bLength, result) {
  15. result.spacesDiff = 0;
  16. result.looksLikeAlignment = false;
  17. // This can go both ways (e.g.):
  18. // - a: "\t"
  19. // - b: "\t "
  20. // => This should count 1 tab and 4 spaces
  21. let i;
  22. for (i = 0; i < aLength && i < bLength; i++) {
  23. let aCharCode = a.charCodeAt(i);
  24. let bCharCode = b.charCodeAt(i);
  25. if (aCharCode !== bCharCode) {
  26. break;
  27. }
  28. }
  29. let aSpacesCnt = 0, aTabsCount = 0;
  30. for (let j = i; j < aLength; j++) {
  31. let aCharCode = a.charCodeAt(j);
  32. if (aCharCode === 32 /* Space */) {
  33. aSpacesCnt++;
  34. }
  35. else {
  36. aTabsCount++;
  37. }
  38. }
  39. let bSpacesCnt = 0, bTabsCount = 0;
  40. for (let j = i; j < bLength; j++) {
  41. let bCharCode = b.charCodeAt(j);
  42. if (bCharCode === 32 /* Space */) {
  43. bSpacesCnt++;
  44. }
  45. else {
  46. bTabsCount++;
  47. }
  48. }
  49. if (aSpacesCnt > 0 && aTabsCount > 0) {
  50. return;
  51. }
  52. if (bSpacesCnt > 0 && bTabsCount > 0) {
  53. return;
  54. }
  55. let tabsDiff = Math.abs(aTabsCount - bTabsCount);
  56. let spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt);
  57. if (tabsDiff === 0) {
  58. // check if the indentation difference might be caused by alignment reasons
  59. // sometime folks like to align their code, but this should not be used as a hint
  60. result.spacesDiff = spacesDiff;
  61. if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) {
  62. if (b.charCodeAt(bSpacesCnt) !== 32 /* Space */ && a.charCodeAt(bSpacesCnt - 1) === 32 /* Space */) {
  63. if (a.charCodeAt(a.length - 1) === 44 /* Comma */) {
  64. // This looks like an alignment desire: e.g.
  65. // const a = b + c,
  66. // d = b - c;
  67. result.looksLikeAlignment = true;
  68. }
  69. }
  70. }
  71. return;
  72. }
  73. if (spacesDiff % tabsDiff === 0) {
  74. result.spacesDiff = spacesDiff / tabsDiff;
  75. return;
  76. }
  77. }
  78. export function guessIndentation(source, defaultTabSize, defaultInsertSpaces) {
  79. // Look at most at the first 10k lines
  80. const linesCount = Math.min(source.getLineCount(), 10000);
  81. let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
  82. let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
  83. let previousLineText = ''; // content of latest line that contained non-whitespace chars
  84. let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
  85. const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8].
  86. const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
  87. let spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores
  88. let tmp = new SpacesDiffResult();
  89. for (let lineNumber = 1; lineNumber <= linesCount; lineNumber++) {
  90. let currentLineLength = source.getLineLength(lineNumber);
  91. let currentLineText = source.getLineContent(lineNumber);
  92. // if the text buffer is chunk based, so long lines are cons-string, v8 will flattern the string when we check charCode.
  93. // checking charCode on chunks directly is cheaper.
  94. const useCurrentLineText = (currentLineLength <= 65536);
  95. let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
  96. let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
  97. let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
  98. let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
  99. for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) {
  100. let charCode = (useCurrentLineText ? currentLineText.charCodeAt(j) : source.getLineCharCode(lineNumber, j));
  101. if (charCode === 9 /* Tab */) {
  102. currentLineTabsCount++;
  103. }
  104. else if (charCode === 32 /* Space */) {
  105. currentLineSpacesCount++;
  106. }
  107. else {
  108. // Hit non whitespace character on this line
  109. currentLineHasContent = true;
  110. currentLineIndentation = j;
  111. break;
  112. }
  113. }
  114. // Ignore empty or only whitespace lines
  115. if (!currentLineHasContent) {
  116. continue;
  117. }
  118. if (currentLineTabsCount > 0) {
  119. linesIndentedWithTabsCount++;
  120. }
  121. else if (currentLineSpacesCount > 1) {
  122. linesIndentedWithSpacesCount++;
  123. }
  124. spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp);
  125. if (tmp.looksLikeAlignment) {
  126. // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
  127. //
  128. // - item1
  129. // - item2
  130. //
  131. // otherwise skip this line entirely
  132. //
  133. // const a = 1,
  134. // b = 2;
  135. if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) {
  136. continue;
  137. }
  138. }
  139. let currentSpacesDiff = tmp.spacesDiff;
  140. if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
  141. spacesDiffCount[currentSpacesDiff]++;
  142. }
  143. previousLineText = currentLineText;
  144. previousLineIndentation = currentLineIndentation;
  145. }
  146. let insertSpaces = defaultInsertSpaces;
  147. if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) {
  148. insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
  149. }
  150. let tabSize = defaultTabSize;
  151. // Guess tabSize only if inserting spaces...
  152. if (insertSpaces) {
  153. let tabSizeScore = (insertSpaces ? 0 : 0.1 * linesCount);
  154. // console.log("score threshold: " + tabSizeScore);
  155. ALLOWED_TAB_SIZE_GUESSES.forEach((possibleTabSize) => {
  156. let possibleTabSizeScore = spacesDiffCount[possibleTabSize];
  157. if (possibleTabSizeScore > tabSizeScore) {
  158. tabSizeScore = possibleTabSizeScore;
  159. tabSize = possibleTabSize;
  160. }
  161. });
  162. // Let a tabSize of 2 win even if it is not the maximum
  163. // (only in case 4 was guessed)
  164. if (tabSize === 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
  165. tabSize = 2;
  166. }
  167. }
  168. // console.log('--------------------------');
  169. // console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount);
  170. // console.log('spacesDiffCount: ' + spacesDiffCount);
  171. // console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore);
  172. return {
  173. insertSpaces: insertSpaces,
  174. tabSize: tabSize
  175. };
  176. }