glob.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import { isThenable } from './async.js';
  6. import * as extpath from './extpath.js';
  7. import { LRUCache } from './map.js';
  8. import * as paths from './path.js';
  9. import * as strings from './strings.js';
  10. const GLOBSTAR = '**';
  11. const GLOB_SPLIT = '/';
  12. const PATH_REGEX = '[/\\\\]'; // any slash or backslash
  13. const NO_PATH_REGEX = '[^/\\\\]'; // any non-slash and non-backslash
  14. const ALL_FORWARD_SLASHES = /\//g;
  15. function starsToRegExp(starCount) {
  16. switch (starCount) {
  17. case 0:
  18. return '';
  19. case 1:
  20. return `${NO_PATH_REGEX}*?`; // 1 star matches any number of characters except path separator (/ and \) - non greedy (?)
  21. default:
  22. // Matches: (Path Sep OR Path Val followed by Path Sep OR Path Sep followed by Path Val) 0-many times
  23. // Group is non capturing because we don't need to capture at all (?:...)
  24. // Overall we use non-greedy matching because it could be that we match too much
  25. return `(?:${PATH_REGEX}|${NO_PATH_REGEX}+${PATH_REGEX}|${PATH_REGEX}${NO_PATH_REGEX}+)*?`;
  26. }
  27. }
  28. export function splitGlobAware(pattern, splitChar) {
  29. if (!pattern) {
  30. return [];
  31. }
  32. const segments = [];
  33. let inBraces = false;
  34. let inBrackets = false;
  35. let curVal = '';
  36. for (const char of pattern) {
  37. switch (char) {
  38. case splitChar:
  39. if (!inBraces && !inBrackets) {
  40. segments.push(curVal);
  41. curVal = '';
  42. continue;
  43. }
  44. break;
  45. case '{':
  46. inBraces = true;
  47. break;
  48. case '}':
  49. inBraces = false;
  50. break;
  51. case '[':
  52. inBrackets = true;
  53. break;
  54. case ']':
  55. inBrackets = false;
  56. break;
  57. }
  58. curVal += char;
  59. }
  60. // Tail
  61. if (curVal) {
  62. segments.push(curVal);
  63. }
  64. return segments;
  65. }
  66. function parseRegExp(pattern) {
  67. if (!pattern) {
  68. return '';
  69. }
  70. let regEx = '';
  71. // Split up into segments for each slash found
  72. const segments = splitGlobAware(pattern, GLOB_SPLIT);
  73. // Special case where we only have globstars
  74. if (segments.every(s => s === GLOBSTAR)) {
  75. regEx = '.*';
  76. }
  77. // Build regex over segments
  78. else {
  79. let previousSegmentWasGlobStar = false;
  80. segments.forEach((segment, index) => {
  81. // Globstar is special
  82. if (segment === GLOBSTAR) {
  83. // if we have more than one globstar after another, just ignore it
  84. if (!previousSegmentWasGlobStar) {
  85. regEx += starsToRegExp(2);
  86. previousSegmentWasGlobStar = true;
  87. }
  88. return;
  89. }
  90. // States
  91. let inBraces = false;
  92. let braceVal = '';
  93. let inBrackets = false;
  94. let bracketVal = '';
  95. for (const char of segment) {
  96. // Support brace expansion
  97. if (char !== '}' && inBraces) {
  98. braceVal += char;
  99. continue;
  100. }
  101. // Support brackets
  102. if (inBrackets && (char !== ']' || !bracketVal) /* ] is literally only allowed as first character in brackets to match it */) {
  103. let res;
  104. // range operator
  105. if (char === '-') {
  106. res = char;
  107. }
  108. // negation operator (only valid on first index in bracket)
  109. else if ((char === '^' || char === '!') && !bracketVal) {
  110. res = '^';
  111. }
  112. // glob split matching is not allowed within character ranges
  113. // see http://man7.org/linux/man-pages/man7/glob.7.html
  114. else if (char === GLOB_SPLIT) {
  115. res = '';
  116. }
  117. // anything else gets escaped
  118. else {
  119. res = strings.escapeRegExpCharacters(char);
  120. }
  121. bracketVal += res;
  122. continue;
  123. }
  124. switch (char) {
  125. case '{':
  126. inBraces = true;
  127. continue;
  128. case '[':
  129. inBrackets = true;
  130. continue;
  131. case '}':
  132. const choices = splitGlobAware(braceVal, ',');
  133. // Converts {foo,bar} => [foo|bar]
  134. const braceRegExp = `(?:${choices.map(c => parseRegExp(c)).join('|')})`;
  135. regEx += braceRegExp;
  136. inBraces = false;
  137. braceVal = '';
  138. break;
  139. case ']':
  140. regEx += ('[' + bracketVal + ']');
  141. inBrackets = false;
  142. bracketVal = '';
  143. break;
  144. case '?':
  145. regEx += NO_PATH_REGEX; // 1 ? matches any single character except path separator (/ and \)
  146. continue;
  147. case '*':
  148. regEx += starsToRegExp(1);
  149. continue;
  150. default:
  151. regEx += strings.escapeRegExpCharacters(char);
  152. }
  153. }
  154. // Tail: Add the slash we had split on if there is more to come and the remaining pattern is not a globstar
  155. // For example if pattern: some/**/*.js we want the "/" after some to be included in the RegEx to prevent
  156. // a folder called "something" to match as well.
  157. // However, if pattern: some/**, we tolerate that we also match on "something" because our globstar behaviour
  158. // is to match 0-N segments.
  159. if (index < segments.length - 1 && (segments[index + 1] !== GLOBSTAR || index + 2 < segments.length)) {
  160. regEx += PATH_REGEX;
  161. }
  162. // reset state
  163. previousSegmentWasGlobStar = false;
  164. });
  165. }
  166. return regEx;
  167. }
  168. // regexes to check for trivial glob patterns that just check for String#endsWith
  169. const T1 = /^\*\*\/\*\.[\w\.-]+$/; // **/*.something
  170. const T2 = /^\*\*\/([\w\.-]+)\/?$/; // **/something
  171. const T3 = /^{\*\*\/[\*\.]?[\w\.-]+\/?(,\*\*\/[\*\.]?[\w\.-]+\/?)*}$/; // {**/*.something,**/*.else} or {**/package.json,**/project.json}
  172. const T3_2 = /^{\*\*\/[\*\.]?[\w\.-]+(\/(\*\*)?)?(,\*\*\/[\*\.]?[\w\.-]+(\/(\*\*)?)?)*}$/; // Like T3, with optional trailing /**
  173. const T4 = /^\*\*((\/[\w\.-]+)+)\/?$/; // **/something/else
  174. const T5 = /^([\w\.-]+(\/[\w\.-]+)*)\/?$/; // something/else
  175. const CACHE = new LRUCache(10000); // bounded to 10000 elements
  176. const FALSE = function () {
  177. return false;
  178. };
  179. const NULL = function () {
  180. return null;
  181. };
  182. function parsePattern(arg1, options) {
  183. if (!arg1) {
  184. return NULL;
  185. }
  186. // Handle IRelativePattern
  187. let pattern;
  188. if (typeof arg1 !== 'string') {
  189. pattern = arg1.pattern;
  190. }
  191. else {
  192. pattern = arg1;
  193. }
  194. // Whitespace trimming
  195. pattern = pattern.trim();
  196. // Check cache
  197. const patternKey = `${pattern}_${!!options.trimForExclusions}`;
  198. let parsedPattern = CACHE.get(patternKey);
  199. if (parsedPattern) {
  200. return wrapRelativePattern(parsedPattern, arg1);
  201. }
  202. // Check for Trivials
  203. let match;
  204. if (T1.test(pattern)) { // common pattern: **/*.txt just need endsWith check
  205. const base = pattern.substr(4); // '**/*'.length === 4
  206. parsedPattern = function (path, basename) {
  207. return typeof path === 'string' && path.endsWith(base) ? pattern : null;
  208. };
  209. }
  210. else if (match = T2.exec(trimForExclusions(pattern, options))) { // common pattern: **/some.txt just need basename check
  211. parsedPattern = trivia2(match[1], pattern);
  212. }
  213. else if ((options.trimForExclusions ? T3_2 : T3).test(pattern)) { // repetition of common patterns (see above) {**/*.txt,**/*.png}
  214. parsedPattern = trivia3(pattern, options);
  215. }
  216. else if (match = T4.exec(trimForExclusions(pattern, options))) { // common pattern: **/something/else just need endsWith check
  217. parsedPattern = trivia4and5(match[1].substr(1), pattern, true);
  218. }
  219. else if (match = T5.exec(trimForExclusions(pattern, options))) { // common pattern: something/else just need equals check
  220. parsedPattern = trivia4and5(match[1], pattern, false);
  221. }
  222. // Otherwise convert to pattern
  223. else {
  224. parsedPattern = toRegExp(pattern);
  225. }
  226. // Cache
  227. CACHE.set(patternKey, parsedPattern);
  228. return wrapRelativePattern(parsedPattern, arg1);
  229. }
  230. function wrapRelativePattern(parsedPattern, arg2) {
  231. if (typeof arg2 === 'string') {
  232. return parsedPattern;
  233. }
  234. return function (path, basename) {
  235. if (!extpath.isEqualOrParent(path, arg2.base)) {
  236. return null;
  237. }
  238. return parsedPattern(paths.relative(arg2.base, path), basename);
  239. };
  240. }
  241. function trimForExclusions(pattern, options) {
  242. return options.trimForExclusions && pattern.endsWith('/**') ? pattern.substr(0, pattern.length - 2) : pattern; // dropping **, tailing / is dropped later
  243. }
  244. // common pattern: **/some.txt just need basename check
  245. function trivia2(base, originalPattern) {
  246. const slashBase = `/${base}`;
  247. const backslashBase = `\\${base}`;
  248. const parsedPattern = function (path, basename) {
  249. if (typeof path !== 'string') {
  250. return null;
  251. }
  252. if (basename) {
  253. return basename === base ? originalPattern : null;
  254. }
  255. return path === base || path.endsWith(slashBase) || path.endsWith(backslashBase) ? originalPattern : null;
  256. };
  257. const basenames = [base];
  258. parsedPattern.basenames = basenames;
  259. parsedPattern.patterns = [originalPattern];
  260. parsedPattern.allBasenames = basenames;
  261. return parsedPattern;
  262. }
  263. // repetition of common patterns (see above) {**/*.txt,**/*.png}
  264. function trivia3(pattern, options) {
  265. const parsedPatterns = aggregateBasenameMatches(pattern.slice(1, -1).split(',')
  266. .map(pattern => parsePattern(pattern, options))
  267. .filter(pattern => pattern !== NULL), pattern);
  268. const n = parsedPatterns.length;
  269. if (!n) {
  270. return NULL;
  271. }
  272. if (n === 1) {
  273. return parsedPatterns[0];
  274. }
  275. const parsedPattern = function (path, basename) {
  276. for (let i = 0, n = parsedPatterns.length; i < n; i++) {
  277. if (parsedPatterns[i](path, basename)) {
  278. return pattern;
  279. }
  280. }
  281. return null;
  282. };
  283. const withBasenames = parsedPatterns.find(pattern => !!pattern.allBasenames);
  284. if (withBasenames) {
  285. parsedPattern.allBasenames = withBasenames.allBasenames;
  286. }
  287. const allPaths = parsedPatterns.reduce((all, current) => current.allPaths ? all.concat(current.allPaths) : all, []);
  288. if (allPaths.length) {
  289. parsedPattern.allPaths = allPaths;
  290. }
  291. return parsedPattern;
  292. }
  293. // common patterns: **/something/else just need endsWith check, something/else just needs and equals check
  294. function trivia4and5(targetPath, pattern, matchPathEnds) {
  295. const usingPosixSep = paths.sep === paths.posix.sep;
  296. const nativePath = usingPosixSep ? targetPath : targetPath.replace(ALL_FORWARD_SLASHES, paths.sep);
  297. const nativePathEnd = paths.sep + nativePath;
  298. const targetPathEnd = paths.posix.sep + targetPath;
  299. const parsedPattern = matchPathEnds ? function (testPath, basename) {
  300. return typeof testPath === 'string' &&
  301. ((testPath === nativePath || testPath.endsWith(nativePathEnd))
  302. || !usingPosixSep && (testPath === targetPath || testPath.endsWith(targetPathEnd)))
  303. ? pattern : null;
  304. } : function (testPath, basename) {
  305. return typeof testPath === 'string' &&
  306. (testPath === nativePath
  307. || (!usingPosixSep && testPath === targetPath))
  308. ? pattern : null;
  309. };
  310. parsedPattern.allPaths = [(matchPathEnds ? '*/' : './') + targetPath];
  311. return parsedPattern;
  312. }
  313. function toRegExp(pattern) {
  314. try {
  315. const regExp = new RegExp(`^${parseRegExp(pattern)}$`);
  316. return function (path) {
  317. regExp.lastIndex = 0; // reset RegExp to its initial state to reuse it!
  318. return typeof path === 'string' && regExp.test(path) ? pattern : null;
  319. };
  320. }
  321. catch (error) {
  322. return NULL;
  323. }
  324. }
  325. export function match(arg1, path, hasSibling) {
  326. if (!arg1 || typeof path !== 'string') {
  327. return false;
  328. }
  329. return parse(arg1)(path, undefined, hasSibling);
  330. }
  331. export function parse(arg1, options = {}) {
  332. if (!arg1) {
  333. return FALSE;
  334. }
  335. // Glob with String
  336. if (typeof arg1 === 'string' || isRelativePattern(arg1)) {
  337. const parsedPattern = parsePattern(arg1, options);
  338. if (parsedPattern === NULL) {
  339. return FALSE;
  340. }
  341. const resultPattern = function (path, basename) {
  342. return !!parsedPattern(path, basename);
  343. };
  344. if (parsedPattern.allBasenames) {
  345. resultPattern.allBasenames = parsedPattern.allBasenames;
  346. }
  347. if (parsedPattern.allPaths) {
  348. resultPattern.allPaths = parsedPattern.allPaths;
  349. }
  350. return resultPattern;
  351. }
  352. // Glob with Expression
  353. return parsedExpression(arg1, options);
  354. }
  355. export function isRelativePattern(obj) {
  356. const rp = obj;
  357. return rp && typeof rp.base === 'string' && typeof rp.pattern === 'string';
  358. }
  359. function parsedExpression(expression, options) {
  360. const parsedPatterns = aggregateBasenameMatches(Object.getOwnPropertyNames(expression)
  361. .map(pattern => parseExpressionPattern(pattern, expression[pattern], options))
  362. .filter(pattern => pattern !== NULL));
  363. const n = parsedPatterns.length;
  364. if (!n) {
  365. return NULL;
  366. }
  367. if (!parsedPatterns.some(parsedPattern => !!parsedPattern.requiresSiblings)) {
  368. if (n === 1) {
  369. return parsedPatterns[0];
  370. }
  371. const resultExpression = function (path, basename) {
  372. for (let i = 0, n = parsedPatterns.length; i < n; i++) {
  373. // Pattern matches path
  374. const result = parsedPatterns[i](path, basename);
  375. if (result) {
  376. return result;
  377. }
  378. }
  379. return null;
  380. };
  381. const withBasenames = parsedPatterns.find(pattern => !!pattern.allBasenames);
  382. if (withBasenames) {
  383. resultExpression.allBasenames = withBasenames.allBasenames;
  384. }
  385. const allPaths = parsedPatterns.reduce((all, current) => current.allPaths ? all.concat(current.allPaths) : all, []);
  386. if (allPaths.length) {
  387. resultExpression.allPaths = allPaths;
  388. }
  389. return resultExpression;
  390. }
  391. const resultExpression = function (path, basename, hasSibling) {
  392. let name = undefined;
  393. for (let i = 0, n = parsedPatterns.length; i < n; i++) {
  394. // Pattern matches path
  395. const parsedPattern = parsedPatterns[i];
  396. if (parsedPattern.requiresSiblings && hasSibling) {
  397. if (!basename) {
  398. basename = paths.basename(path);
  399. }
  400. if (!name) {
  401. name = basename.substr(0, basename.length - paths.extname(path).length);
  402. }
  403. }
  404. const result = parsedPattern(path, basename, name, hasSibling);
  405. if (result) {
  406. return result;
  407. }
  408. }
  409. return null;
  410. };
  411. const withBasenames = parsedPatterns.find(pattern => !!pattern.allBasenames);
  412. if (withBasenames) {
  413. resultExpression.allBasenames = withBasenames.allBasenames;
  414. }
  415. const allPaths = parsedPatterns.reduce((all, current) => current.allPaths ? all.concat(current.allPaths) : all, []);
  416. if (allPaths.length) {
  417. resultExpression.allPaths = allPaths;
  418. }
  419. return resultExpression;
  420. }
  421. function parseExpressionPattern(pattern, value, options) {
  422. if (value === false) {
  423. return NULL; // pattern is disabled
  424. }
  425. const parsedPattern = parsePattern(pattern, options);
  426. if (parsedPattern === NULL) {
  427. return NULL;
  428. }
  429. // Expression Pattern is <boolean>
  430. if (typeof value === 'boolean') {
  431. return parsedPattern;
  432. }
  433. // Expression Pattern is <SiblingClause>
  434. if (value) {
  435. const when = value.when;
  436. if (typeof when === 'string') {
  437. const result = (path, basename, name, hasSibling) => {
  438. if (!hasSibling || !parsedPattern(path, basename)) {
  439. return null;
  440. }
  441. const clausePattern = when.replace('$(basename)', name);
  442. const matched = hasSibling(clausePattern);
  443. return isThenable(matched) ?
  444. matched.then(m => m ? pattern : null) :
  445. matched ? pattern : null;
  446. };
  447. result.requiresSiblings = true;
  448. return result;
  449. }
  450. }
  451. // Expression is Anything
  452. return parsedPattern;
  453. }
  454. function aggregateBasenameMatches(parsedPatterns, result) {
  455. const basenamePatterns = parsedPatterns.filter(parsedPattern => !!parsedPattern.basenames);
  456. if (basenamePatterns.length < 2) {
  457. return parsedPatterns;
  458. }
  459. const basenames = basenamePatterns.reduce((all, current) => {
  460. const basenames = current.basenames;
  461. return basenames ? all.concat(basenames) : all;
  462. }, []);
  463. let patterns;
  464. if (result) {
  465. patterns = [];
  466. for (let i = 0, n = basenames.length; i < n; i++) {
  467. patterns.push(result);
  468. }
  469. }
  470. else {
  471. patterns = basenamePatterns.reduce((all, current) => {
  472. const patterns = current.patterns;
  473. return patterns ? all.concat(patterns) : all;
  474. }, []);
  475. }
  476. const aggregate = function (path, basename) {
  477. if (typeof path !== 'string') {
  478. return null;
  479. }
  480. if (!basename) {
  481. let i;
  482. for (i = path.length; i > 0; i--) {
  483. const ch = path.charCodeAt(i - 1);
  484. if (ch === 47 /* Slash */ || ch === 92 /* Backslash */) {
  485. break;
  486. }
  487. }
  488. basename = path.substr(i);
  489. }
  490. const index = basenames.indexOf(basename);
  491. return index !== -1 ? patterns[index] : null;
  492. };
  493. aggregate.basenames = basenames;
  494. aggregate.patterns = patterns;
  495. aggregate.allBasenames = basenames;
  496. const aggregatedPatterns = parsedPatterns.filter(parsedPattern => !parsedPattern.basenames);
  497. aggregatedPatterns.push(aggregate);
  498. return aggregatedPatterns;
  499. }