From 50bb7aad57cbf1924e28472de52734be49645558 Mon Sep 17 00:00:00 2001 From: Christof Marti Date: Tue, 13 Sep 2016 21:54:52 -0700 Subject: [PATCH] Prune find traversal with basename patterns --- src/vs/base/common/glob.ts | 40 ++++++++++-- src/vs/base/test/common/glob.test.ts | 20 +++++- .../services/search/node/fileSearch.ts | 35 +++++++++- .../search/test/node/fixtures/more/file.txt | 0 .../services/search/test/node/search.test.ts | 65 +++++++++++++++++-- 5 files changed, 146 insertions(+), 14 deletions(-) create mode 100644 src/vs/workbench/services/search/test/node/fixtures/more/file.txt diff --git a/src/vs/base/common/glob.ts b/src/vs/base/common/glob.ts index dfca5f23039..5562380d1aa 100644 --- a/src/vs/base/common/glob.ts +++ b/src/vs/base/common/glob.ts @@ -4,6 +4,7 @@ *--------------------------------------------------------------------------------------------*/ 'use strict'; +import arrays = require('vs/base/common/arrays'); import strings = require('vs/base/common/strings'); import paths = require('vs/base/common/paths'); import {BoundedLinkedMap} from 'vs/base/common/map'; @@ -219,11 +220,13 @@ interface ParsedStringPattern { (path: string, basename: string): string /* the matching pattern */; basenames?: string[]; patterns?: string[]; + allBasenames?: string[]; } type SiblingsPattern = { siblings: string[], name: string }; interface ParsedExpressionPattern { (path: string, basename: string, siblingsPatternFn: () => SiblingsPattern): string /* the matching pattern */; requiresSiblings?: boolean; + allBasenames?: string[]; } const CACHE = new BoundedLinkedMap(10000); // bounded to 10000 elements @@ -269,8 +272,10 @@ function parsePattern(pattern: string): ParsedStringPattern { } return path === base || strings.endsWith(path, slashBase) || strings.endsWith(path, backslashBase) ? pattern : null; }; - parsedPattern.basenames = [base]; + const basenames = [base]; + parsedPattern.basenames = basenames; parsedPattern.patterns = [pattern]; + parsedPattern.allBasenames = basenames; } else if (T3.test(pattern)) { // repetition of common patterns (see above) {**/*.txt,**/*.png} const parsedPatterns = aggregateBasenameMatches(pattern.slice(1, -1).split(',') .map(pattern => parsePattern(pattern)) @@ -290,6 +295,10 @@ function parsePattern(pattern: string): ParsedStringPattern { } return null; }; + const withBasenames = arrays.first(parsedPatterns, pattern => !!(pattern).allBasenames); + if (withBasenames) { + parsedPattern.allBasenames = (withBasenames).allBasenames; + } } // Otherwise convert to pattern @@ -354,15 +363,23 @@ export function parse(arg1: string | IExpression): any { if (parsedPattern === NULL) { return FALSE; } - return function (path: string, basename: string) { + const resultPattern = function (path: string, basename: string) { return !!parsedPattern(path, basename); }; + if (parsedPattern.allBasenames) { + (resultPattern).allBasenames = parsedPattern.allBasenames; + } + return resultPattern; } // Glob with Expression return parsedExpression(arg1); } +export function getBasenameTerms(patternOrExpression: ParsedPattern | ParsedExpression): string[] { + return (patternOrExpression).allBasenames || []; +} + function parsedExpression(expression: IExpression): ParsedExpression { const parsedPatterns = aggregateBasenameMatches(Object.getOwnPropertyNames(expression) .map(pattern => parseExpressionPattern(pattern, expression[pattern])) @@ -378,7 +395,7 @@ function parsedExpression(expression: IExpression): ParsedExpression { return parsedPatterns[0]; } - return function (path: string, basename: string, siblingsFn?: () => string[]) { + const resultExpression: ParsedStringPattern = function (path: string, basename: string, siblingsFn?: () => string[]) { for (let i = 0, n = parsedPatterns.length; i < n; i++) { // Pattern matches path const result = (parsedPatterns[i])(path, basename); @@ -389,9 +406,16 @@ function parsedExpression(expression: IExpression): ParsedExpression { return null; }; + + const withBasenames = arrays.first(parsedPatterns, pattern => !!(pattern).allBasenames); + if (withBasenames) { + resultExpression.allBasenames = (withBasenames).allBasenames; + } + + return resultExpression; } - return function (path: string, basename: string, siblingsFn?: () => string[]) { + const resultExpression: ParsedStringPattern = function (path: string, basename: string, siblingsFn?: () => string[]) { let siblingsPattern: SiblingsPattern; let siblingsResolved = !siblingsFn; @@ -421,6 +445,13 @@ function parsedExpression(expression: IExpression): ParsedExpression { return null; }; + + const withBasenames = arrays.first(parsedPatterns, pattern => !!(pattern).allBasenames); + if (withBasenames) { + resultExpression.allBasenames = (withBasenames).allBasenames; + } + + return resultExpression; } function parseExpressionPattern(pattern: string, value: any): (ParsedStringPattern | ParsedExpressionPattern) { @@ -503,6 +534,7 @@ function aggregateBasenameMatches(parsedPatterns: (ParsedStringPattern | ParsedE }; aggregate.basenames = basenames; aggregate.patterns = patterns; + aggregate.allBasenames = basenames; const aggregatedPatterns = parsedPatterns.filter(parsedPattern => !(parsedPattern).basenames); aggregatedPatterns.push(aggregate); diff --git a/src/vs/base/test/common/glob.test.ts b/src/vs/base/test/common/glob.test.ts index e63dfde4ae7..242fec0dcfe 100644 --- a/src/vs/base/test/common/glob.test.ts +++ b/src/vs/base/test/common/glob.test.ts @@ -668,7 +668,7 @@ suite('Glob', () => { assert.strictEqual(glob.parse('')('foo'), false); }); - test('expression falsy path', function () { + test('falsy path', function () { assert.strictEqual(glob.parse('foo')(null), false); assert.strictEqual(glob.parse('foo')(''), false); assert.strictEqual(glob.parse('**/*.j?')(null), false); @@ -683,7 +683,7 @@ suite('Glob', () => { assert.strictEqual(glob.parse('{**/*.baz,**/*.foo}')(''), false); }); - test('expression basename', function () { + test('expression/pattern basename', function () { assert.strictEqual(glob.parse('**/foo')('bar/baz', 'baz'), false); assert.strictEqual(glob.parse('**/foo')('bar/foo', 'foo'), true); @@ -696,4 +696,20 @@ suite('Glob', () => { assert.strictEqual(glob.parse(expr)('bar/baz.js', 'baz.js', sibilings), null); assert.strictEqual(glob.parse(expr)('bar/foo.js', 'foo.js', sibilings), '**/*.js'); }); + + test('expression/pattern basename terms', function () { + assert.deepStrictEqual(glob.getBasenameTerms(glob.parse('**/*.foo')), []); + assert.deepStrictEqual(glob.getBasenameTerms(glob.parse('**/foo')), ['foo']); + assert.deepStrictEqual(glob.getBasenameTerms(glob.parse('{**/baz,**/foo}')), ['baz', 'foo']); + + assert.deepStrictEqual(glob.getBasenameTerms(glob.parse({ + '**/foo': true, + '{**/bar,**/baz}': true, + '**/bulb': false + })), ['foo', 'bar', 'baz']); + assert.deepStrictEqual(glob.getBasenameTerms(glob.parse({ + '**/foo': { when: '$(basename).zip' }, + '**/bar': true + })), ['bar']); + }); }); \ No newline at end of file diff --git a/src/vs/workbench/services/search/node/fileSearch.ts b/src/vs/workbench/services/search/node/fileSearch.ts index 521199149ba..f46e6d16eb9 100644 --- a/src/vs/workbench/services/search/node/fileSearch.ts +++ b/src/vs/workbench/services/search/node/fileSearch.ts @@ -169,7 +169,7 @@ export class FileWalker { } private macFindTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void { - const cmd = childProcess.spawn('find', ['-L', '.', '-type', 'f'], { cwd: rootFolder }); + const cmd = this.spawnFindCmd(rootFolder, this.excludePattern); this.readStdout(cmd, 'utf8', (err: Error, stdout?: string) => { if (err) { done(err); @@ -224,7 +224,7 @@ export class FileWalker { } private linuxFindTraversal(rootFolder: string, onResult: (result: IRawFileMatch) => void, done: (err?: Error) => void): void { - const cmd = childProcess.spawn('find', ['-L', '.', '-type', 'f'], { cwd: rootFolder }); + const cmd = this.spawnFindCmd(rootFolder, this.excludePattern); this.readStdout(cmd, 'utf8', (err: Error, stdout?: string) => { if (err) { done(err); @@ -250,7 +250,36 @@ export class FileWalker { }); } - private readStdout(cmd: childProcess.ChildProcess, encoding: string, cb: (err: Error, stdout?: string) => void): void { + /** + * Public for testing. + */ + public spawnFindCmd(rootFolder: string, excludePattern: glob.ParsedExpression) { + const basenames = glob.getBasenameTerms(excludePattern); + let args = ['-L', '.']; + if (basenames.length) { + args.push('-not', '(', '('); + for (let i = 0, n = basenames.length; i < n; i++) { + if (i) { + args.push('-o'); + } + args.push('-name', FileWalker.escapeGlobSpecials(basenames[i])); + } + args.push(')', '-prune', ')'); + } + args.push('-type', 'f'); + return childProcess.spawn('find', args, { cwd: rootFolder }); + } + + private static GLOB_SPECIALS = /[*?\[\]\\]/g; + private static ESCAPE_CHAR = '\\$&'; + private static escapeGlobSpecials(string) { + return string.replace(this.GLOB_SPECIALS, this.ESCAPE_CHAR); + } + + /** + * Public for testing. + */ + public readStdout(cmd: childProcess.ChildProcess, encoding: string, cb: (err: Error, stdout?: string) => void): void { let done = (err: Error, stdout?: string) => { done = () => {}; this.cmdForkResultTime = Date.now(); diff --git a/src/vs/workbench/services/search/test/node/fixtures/more/file.txt b/src/vs/workbench/services/search/test/node/fixtures/more/file.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/vs/workbench/services/search/test/node/search.test.ts b/src/vs/workbench/services/search/test/node/search.test.ts index de3ada7f6a3..5ae1f019d0c 100644 --- a/src/vs/workbench/services/search/test/node/search.test.ts +++ b/src/vs/workbench/services/search/test/node/search.test.ts @@ -8,7 +8,9 @@ import path = require('path'); import assert = require('assert'); +import * as glob from 'vs/base/common/glob'; import {join, normalize} from 'vs/base/common/paths'; +import * as platform from 'vs/base/common/platform'; import {LineMatch} from 'vs/platform/search/common/search'; import {FileWalker, Engine as FileSearchEngine} from 'vs/workbench/services/search/node/fileSearch'; @@ -119,7 +121,7 @@ suite('Search', () => { } }, () => { }, (error) => { assert.ok(!error); - assert.equal(count, 12); + assert.equal(count, 13); done(); }); }); @@ -178,7 +180,7 @@ suite('Search', () => { } }, () => { }, (error) => { assert.ok(!error); - assert.equal(count, 7); + assert.equal(count, 8); done(); }); }); @@ -197,7 +199,7 @@ suite('Search', () => { } }, () => { }, (error) => { assert.ok(!error); - assert.equal(count, 7); + assert.equal(count, 8); done(); }); }); @@ -216,7 +218,7 @@ suite('Search', () => { } }, () => { }, (error) => { assert.ok(!error); - assert.equal(count, 7); + assert.equal(count, 8); done(); }); }); @@ -235,7 +237,7 @@ suite('Search', () => { } }, () => { }, (error) => { assert.ok(!error); - assert.equal(count, 11); + assert.equal(count, 12); done(); }); }); @@ -421,6 +423,59 @@ suite('Search', () => { }); }); + test('Find: exclude subfolder', function (done: () => void) { + if (platform.isWindows) { + return; + } + + const walker = new FileWalker({ rootFolders: rootfolders() }); + const file0 = './more/file.txt'; + const file1 = './examples/subfolder/subfile.txt'; + + const cmd1 = walker.spawnFindCmd(rootfolders()[0], glob.parse({ '**/something': true })); + walker.readStdout(cmd1, 'utf8', (err1, stdout1) => { + assert.equal(err1, null); + assert.notStrictEqual(stdout1.split('\n').indexOf(file0), -1, stdout1); + assert.notStrictEqual(stdout1.split('\n').indexOf(file1), -1, stdout1); + + const cmd2 = walker.spawnFindCmd(rootfolders()[0], glob.parse({ '**/subfolder': true })); + walker.readStdout(cmd2, 'utf8', (err2, stdout2) => { + assert.equal(err2, null); + assert.notStrictEqual(stdout1.split('\n').indexOf(file0), -1, stdout1); + assert.strictEqual(stdout2.split('\n').indexOf(file1), -1, stdout2); + done(); + }); + }); + }); + + test('Find: exclude multiple folders', function (done: () => void) { + if (platform.isWindows) { + return; + } + + const walker = new FileWalker({ rootFolders: rootfolders() }); + const file0 = './index.html'; + const file1 = './examples/small.js'; + const file2 = './more/file.txt'; + + const cmd1 = walker.spawnFindCmd(rootfolders()[0], glob.parse({ '**/something': true })); + walker.readStdout(cmd1, 'utf8', (err1, stdout1) => { + assert.equal(err1, null); + assert.notStrictEqual(stdout1.split('\n').indexOf(file0), -1, stdout1); + assert.notStrictEqual(stdout1.split('\n').indexOf(file1), -1, stdout1); + assert.notStrictEqual(stdout1.split('\n').indexOf(file2), -1, stdout1); + + const cmd2 = walker.spawnFindCmd(rootfolders()[0], glob.parse({ '{**/examples,**/more}': true })); + walker.readStdout(cmd2, 'utf8', (err2, stdout2) => { + assert.equal(err2, null); + assert.notStrictEqual(stdout1.split('\n').indexOf(file0), -1, stdout1); + assert.strictEqual(stdout2.split('\n').indexOf(file1), -1, stdout2); + assert.strictEqual(stdout2.split('\n').indexOf(file2), -1, stdout2); + done(); + }); + }); + }); + test('Text: GameOfLife', function (done: () => void) { let c = 0; let config = { -- GitLab