diff --git a/src/vs/base/common/strings.ts b/src/vs/base/common/strings.ts index 71185d0704613921fca4ff62f5974a47a8ea780d..1c4251e7803b896dde3b0df1ed815e52e140aaa0 100644 --- a/src/vs/base/common/strings.ts +++ b/src/vs/base/common/strings.ts @@ -70,7 +70,7 @@ export function escape(html: string): string { * Escapes regular expression characters in a given string */ export function escapeRegExpCharacters(value: string): string { - return value.replace(/[\-\\\{\}\*\+\?\|\^\$\.\[\]\(\)\#]/g, '\\$&'); + return value.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)\#]/g, '\\$&'); } /** diff --git a/src/vs/editor/common/model/textModelSearch.ts b/src/vs/editor/common/model/textModelSearch.ts index ec2bb71ec307257bfa13a67a8891f0d4854bbc47..f350079ee690ad595ed37b62a53489f8705bfe69 100644 --- a/src/vs/editor/common/model/textModelSearch.ts +++ b/src/vs/editor/common/model/textModelSearch.ts @@ -45,7 +45,8 @@ export class SearchParams { matchCase: this.matchCase, wholeWord: false, multiline: multiline, - global: true + global: true, + unicode: true }); } catch (err) { return null; diff --git a/src/vs/editor/contrib/find/findModel.ts b/src/vs/editor/contrib/find/findModel.ts index faca23f0a556cadc9df1a432d199ed91b217d99e..90932abefc65e9fbf21427cb82f4b925dacc3730 100644 --- a/src/vs/editor/contrib/find/findModel.ts +++ b/src/vs/editor/contrib/find/findModel.ts @@ -467,7 +467,7 @@ export class FindModelBoundToEditorModel { let searchRegex = searchData.regex; if (!searchRegex.multiline) { - let mod = 'm'; + let mod = 'mu'; if (searchRegex.ignoreCase) { mod += 'i'; } diff --git a/src/vs/editor/test/common/model/textModelSearch.test.ts b/src/vs/editor/test/common/model/textModelSearch.test.ts index 6476015cb5d89b67f1f38e8654214175bb981ed2..aa90e1c0c3f5cf1532d5fc57bcdf64457552a7be 100644 --- a/src/vs/editor/test/common/model/textModelSearch.test.ts +++ b/src/vs/editor/test/common/model/textModelSearch.test.ts @@ -611,25 +611,25 @@ suite('TextModelSearch', () => { }); test('parseSearchRequest non regex', () => { - assertParseSearchResult('foo', false, false, null, new SearchData(/foo/gi, null, null)); - assertParseSearchResult('foo', false, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/gi, usualWordSeparators, null)); - assertParseSearchResult('foo', false, true, null, new SearchData(/foo/g, null, 'foo')); - assertParseSearchResult('foo', false, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/g, usualWordSeparators, 'foo')); - assertParseSearchResult('foo\\n', false, false, null, new SearchData(/foo\\n/gi, null, null)); - assertParseSearchResult('foo\\\\n', false, false, null, new SearchData(/foo\\\\n/gi, null, null)); - assertParseSearchResult('foo\\r', false, false, null, new SearchData(/foo\\r/gi, null, null)); - assertParseSearchResult('foo\\\\r', false, false, null, new SearchData(/foo\\\\r/gi, null, null)); + assertParseSearchResult('foo', false, false, null, new SearchData(/foo/giu, null, null)); + assertParseSearchResult('foo', false, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/giu, usualWordSeparators, null)); + assertParseSearchResult('foo', false, true, null, new SearchData(/foo/gu, null, 'foo')); + assertParseSearchResult('foo', false, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/gu, usualWordSeparators, 'foo')); + assertParseSearchResult('foo\\n', false, false, null, new SearchData(/foo\\n/giu, null, null)); + assertParseSearchResult('foo\\\\n', false, false, null, new SearchData(/foo\\\\n/giu, null, null)); + assertParseSearchResult('foo\\r', false, false, null, new SearchData(/foo\\r/giu, null, null)); + assertParseSearchResult('foo\\\\r', false, false, null, new SearchData(/foo\\\\r/giu, null, null)); }); test('parseSearchRequest regex', () => { - assertParseSearchResult('foo', true, false, null, new SearchData(/foo/gi, null, null)); - assertParseSearchResult('foo', true, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/gi, usualWordSeparators, null)); - assertParseSearchResult('foo', true, true, null, new SearchData(/foo/g, null, null)); - assertParseSearchResult('foo', true, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/g, usualWordSeparators, null)); - assertParseSearchResult('foo\\n', true, false, null, new SearchData(/foo\n/gim, null, null)); - assertParseSearchResult('foo\\\\n', true, false, null, new SearchData(/foo\\n/gi, null, null)); - assertParseSearchResult('foo\\r', true, false, null, new SearchData(/foo\r/gim, null, null)); - assertParseSearchResult('foo\\\\r', true, false, null, new SearchData(/foo\\r/gi, null, null)); + assertParseSearchResult('foo', true, false, null, new SearchData(/foo/giu, null, null)); + assertParseSearchResult('foo', true, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/giu, usualWordSeparators, null)); + assertParseSearchResult('foo', true, true, null, new SearchData(/foo/gu, null, null)); + assertParseSearchResult('foo', true, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/gu, usualWordSeparators, null)); + assertParseSearchResult('foo\\n', true, false, null, new SearchData(/foo\n/gimu, null, null)); + assertParseSearchResult('foo\\\\n', true, false, null, new SearchData(/foo\\n/giu, null, null)); + assertParseSearchResult('foo\\r', true, false, null, new SearchData(/foo\r/gimu, null, null)); + assertParseSearchResult('foo\\\\r', true, false, null, new SearchData(/foo\\r/giu, null, null)); }); test('issue #53415. \W should match line break.', () => { @@ -721,6 +721,20 @@ suite('TextModelSearch', () => { ); }); + test('Simple find using unicode escape sequences', () => { + assertFindMatches( + regularText.join('\n'), + '\\u{0066}\\u006f\\u006F', true, false, null, + [ + [1, 14, 1, 17], + [1, 44, 1, 47], + [2, 22, 2, 25], + [2, 48, 2, 51], + [4, 59, 4, 62] + ] + ); + }); + test('isMultilineRegexSource', () => { assert(!isMultilineRegexSource('foo')); assert(!isMultilineRegexSource('')); diff --git a/src/vs/workbench/services/search/common/replace.ts b/src/vs/workbench/services/search/common/replace.ts index 99f610bbcf5ce6fc0ab162aa7e005959b7e5eed1..764873d247ab72e930e697d22433337f0e7fb1fb 100644 --- a/src/vs/workbench/services/search/common/replace.ts +++ b/src/vs/workbench/services/search/common/replace.ts @@ -27,7 +27,7 @@ export class ReplacePattern { } else { searchPatternInfo = arg2; parseParameters = !!searchPatternInfo.isRegExp; - this._regExp = strings.createRegExp(searchPatternInfo.pattern, !!searchPatternInfo.isRegExp, { matchCase: searchPatternInfo.isCaseSensitive, wholeWord: searchPatternInfo.isWordMatch, multiline: searchPatternInfo.isMultiline, global: false }); + this._regExp = strings.createRegExp(searchPatternInfo.pattern, !!searchPatternInfo.isRegExp, { matchCase: searchPatternInfo.isCaseSensitive, wholeWord: searchPatternInfo.isWordMatch, multiline: searchPatternInfo.isMultiline, global: false, unicode: true }); } if (parseParameters) { diff --git a/src/vs/workbench/services/search/common/search.ts b/src/vs/workbench/services/search/common/search.ts index bb47b117377e38ca23720eab1acf4dce15fe3b57..30e38c2c7686d8bd428ae371ce04e5e2dc056427 100644 --- a/src/vs/workbench/services/search/common/search.ts +++ b/src/vs/workbench/services/search/common/search.ts @@ -132,6 +132,7 @@ export interface IPatternInfo { isWordMatch?: boolean; wordSeparators?: string; isMultiline?: boolean; + isUnicode?: boolean; isCaseSensitive?: boolean; } diff --git a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts index ee39b55b8e8579eaa67cfb86ad4e63f6d026e9a3..84d73b811cc522c082fb49766ae256cb1e44f73a 100644 --- a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts +++ b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts @@ -394,13 +394,11 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] args.push('--encoding', options.encoding); } - let pattern = query.pattern; - // Ripgrep handles -- as a -- arg separator. Only --. // - is ok, --- is ok, --some-flag is also ok. Need to special case. - if (pattern === '--') { + if (query.pattern === '--') { query.isRegExp = true; - pattern = '\\-\\-'; + query.pattern = '\\-\\-'; } if (query.isMultiline && !query.isRegExp) { @@ -413,7 +411,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] } if (query.isRegExp) { - pattern = unicodeEscapesToPCRE2(pattern); + query.pattern = unicodeEscapesToPCRE2(query.pattern); } // Allow $ to match /r/n @@ -421,7 +419,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] let searchPatternAfterDoubleDashes: Maybe; if (query.isWordMatch) { - const regexp = createRegExp(pattern, !!query.isRegExp, { wholeWord: query.isWordMatch }); + const regexp = createRegExp(query.pattern, !!query.isRegExp, { wholeWord: query.isWordMatch }); const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy. args.push('--regexp', regexpStr); } else if (query.isRegExp) { @@ -430,7 +428,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] args.push('--regexp', fixedRegexpQuery); args.push('--auto-hybrid-regex'); } else { - searchPatternAfterDoubleDashes = pattern; + searchPatternAfterDoubleDashes = query.pattern; args.push('--fixed-strings'); } @@ -479,11 +477,18 @@ export function spreadGlobComponents(globArg: string): string[] { } export function unicodeEscapesToPCRE2(pattern: string): string { - const reg = /((?:[^\\]|^)(?:\\\\)*)\\u([a-z0-9]{4})(?!\d)/g; - // Replace an unescaped $ at the end of the pattern with \r?$ - // Match $ preceeded by none or even number of literal \ - while (pattern.match(reg)) { - pattern = pattern.replace(reg, `$1\\x{$2}`); + // Match \u1234 + const unicodePattern = /((?:[^\\]|^)(?:\\\\)*)\\u([a-z0-9]{4})/g; + + while (pattern.match(unicodePattern)) { + pattern = pattern.replace(unicodePattern, `$1\\x{$2}`); + } + + // Match \u{1234} + // \u with 5-6 characters will be left alone because \x only takes 4 characters. + const unicodePatternWithBraces = /((?:[^\\]|^)(?:\\\\)*)\\u\{([a-z0-9]{4})\}/g; + while (pattern.match(unicodePatternWithBraces)) { + pattern = pattern.replace(unicodePatternWithBraces, `$1\\x{$2}`); } return pattern; diff --git a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts index fb307c68bfde064b7bfb809e2f4cc8c3c733ec88..d1b0f31042f005e473110e8f72877a8ccf7bd1f7 100644 --- a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts +++ b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts @@ -17,9 +17,12 @@ suite('RipgrepTextSearchEngine', () => { assert.equal(unicodeEscapesToPCRE2('\\\\\\u1234'), '\\\\\\x{1234}'); assert.equal(unicodeEscapesToPCRE2('foo\\\\\\u1234'), 'foo\\\\\\x{1234}'); + assert.equal(unicodeEscapesToPCRE2('\\u{1234}'), '\\x{1234}'); + assert.equal(unicodeEscapesToPCRE2('\\u{1234}\\u{0001}'), '\\x{1234}\\x{0001}'); + assert.equal(unicodeEscapesToPCRE2('foo\\u{1234}bar'), 'foo\\x{1234}bar'); + + assert.equal(unicodeEscapesToPCRE2('foo\\u{123456}7bar'), 'foo\\u{123456}7bar'); assert.equal(unicodeEscapesToPCRE2('\\u123'), '\\u123'); - assert.equal(unicodeEscapesToPCRE2('\\u12345'), '\\u12345'); - assert.equal(unicodeEscapesToPCRE2('\\\\u12345'), '\\\\u12345'); assert.equal(unicodeEscapesToPCRE2('foo'), 'foo'); assert.equal(unicodeEscapesToPCRE2(''), ''); }); diff --git a/src/vs/workbench/services/search/test/node/textSearch.integrationTest.ts b/src/vs/workbench/services/search/test/node/textSearch.integrationTest.ts index 6f39df772504807988accd4b83f638cd15f77e3a..ce38358933eea517a3af504c126f036479d29ed5 100644 --- a/src/vs/workbench/services/search/test/node/textSearch.integrationTest.ts +++ b/src/vs/workbench/services/search/test/node/textSearch.integrationTest.ts @@ -69,6 +69,26 @@ suite('Search-integration', function () { return doSearchTest(config, 4); }); + test('Text: GameOfLife (unicode escape sequences)', () => { + const config: ITextQuery = { + type: QueryType.Text, + folderQueries: ROOT_FOLDER_QUERY, + contentPattern: { pattern: 'G\\u{0061}m\\u0065OfLife', isRegExp: true } + }; + + return doSearchTest(config, 4); + }); + + test('Text: GameOfLife (unicode escape sequences, force PCRE2)', () => { + const config: ITextQuery = { + type: QueryType.Text, + folderQueries: ROOT_FOLDER_QUERY, + contentPattern: { pattern: '(? { const config: ITextQuery = { type: QueryType.Text,