提交 250c58e6 编写于 作者: R Rob Lourens

Enable unicode mode for regex search in editor,

and remap \u{1234} for rg
Fix #62416, fix #61746
上级 1c623c16
......@@ -70,7 +70,7 @@ export function escape(html: string): string {
* Escapes regular expression characters in a given string
*/
export function escapeRegExpCharacters(value: string): string {
return value.replace(/[\-\\\{\}\*\+\?\|\^\$\.\[\]\(\)\#]/g, '\\$&');
return value.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)\#]/g, '\\$&');
}
/**
......
......@@ -45,7 +45,8 @@ export class SearchParams {
matchCase: this.matchCase,
wholeWord: false,
multiline: multiline,
global: true
global: true,
unicode: true
});
} catch (err) {
return null;
......
......@@ -467,7 +467,7 @@ export class FindModelBoundToEditorModel {
let searchRegex = searchData.regex;
if (!searchRegex.multiline) {
let mod = 'm';
let mod = 'mu';
if (searchRegex.ignoreCase) {
mod += 'i';
}
......
......@@ -611,25 +611,25 @@ suite('TextModelSearch', () => {
});
test('parseSearchRequest non regex', () => {
assertParseSearchResult('foo', false, false, null, new SearchData(/foo/gi, null, null));
assertParseSearchResult('foo', false, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/gi, usualWordSeparators, null));
assertParseSearchResult('foo', false, true, null, new SearchData(/foo/g, null, 'foo'));
assertParseSearchResult('foo', false, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/g, usualWordSeparators, 'foo'));
assertParseSearchResult('foo\\n', false, false, null, new SearchData(/foo\\n/gi, null, null));
assertParseSearchResult('foo\\\\n', false, false, null, new SearchData(/foo\\\\n/gi, null, null));
assertParseSearchResult('foo\\r', false, false, null, new SearchData(/foo\\r/gi, null, null));
assertParseSearchResult('foo\\\\r', false, false, null, new SearchData(/foo\\\\r/gi, null, null));
assertParseSearchResult('foo', false, false, null, new SearchData(/foo/giu, null, null));
assertParseSearchResult('foo', false, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/giu, usualWordSeparators, null));
assertParseSearchResult('foo', false, true, null, new SearchData(/foo/gu, null, 'foo'));
assertParseSearchResult('foo', false, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/gu, usualWordSeparators, 'foo'));
assertParseSearchResult('foo\\n', false, false, null, new SearchData(/foo\\n/giu, null, null));
assertParseSearchResult('foo\\\\n', false, false, null, new SearchData(/foo\\\\n/giu, null, null));
assertParseSearchResult('foo\\r', false, false, null, new SearchData(/foo\\r/giu, null, null));
assertParseSearchResult('foo\\\\r', false, false, null, new SearchData(/foo\\\\r/giu, null, null));
});
test('parseSearchRequest regex', () => {
assertParseSearchResult('foo', true, false, null, new SearchData(/foo/gi, null, null));
assertParseSearchResult('foo', true, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/gi, usualWordSeparators, null));
assertParseSearchResult('foo', true, true, null, new SearchData(/foo/g, null, null));
assertParseSearchResult('foo', true, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/g, usualWordSeparators, null));
assertParseSearchResult('foo\\n', true, false, null, new SearchData(/foo\n/gim, null, null));
assertParseSearchResult('foo\\\\n', true, false, null, new SearchData(/foo\\n/gi, null, null));
assertParseSearchResult('foo\\r', true, false, null, new SearchData(/foo\r/gim, null, null));
assertParseSearchResult('foo\\\\r', true, false, null, new SearchData(/foo\\r/gi, null, null));
assertParseSearchResult('foo', true, false, null, new SearchData(/foo/giu, null, null));
assertParseSearchResult('foo', true, false, USUAL_WORD_SEPARATORS, new SearchData(/foo/giu, usualWordSeparators, null));
assertParseSearchResult('foo', true, true, null, new SearchData(/foo/gu, null, null));
assertParseSearchResult('foo', true, true, USUAL_WORD_SEPARATORS, new SearchData(/foo/gu, usualWordSeparators, null));
assertParseSearchResult('foo\\n', true, false, null, new SearchData(/foo\n/gimu, null, null));
assertParseSearchResult('foo\\\\n', true, false, null, new SearchData(/foo\\n/giu, null, null));
assertParseSearchResult('foo\\r', true, false, null, new SearchData(/foo\r/gimu, null, null));
assertParseSearchResult('foo\\\\r', true, false, null, new SearchData(/foo\\r/giu, null, null));
});
test('issue #53415. \W should match line break.', () => {
......@@ -721,6 +721,20 @@ suite('TextModelSearch', () => {
);
});
test('Simple find using unicode escape sequences', () => {
assertFindMatches(
regularText.join('\n'),
'\\u{0066}\\u006f\\u006F', true, false, null,
[
[1, 14, 1, 17],
[1, 44, 1, 47],
[2, 22, 2, 25],
[2, 48, 2, 51],
[4, 59, 4, 62]
]
);
});
test('isMultilineRegexSource', () => {
assert(!isMultilineRegexSource('foo'));
assert(!isMultilineRegexSource(''));
......
......@@ -27,7 +27,7 @@ export class ReplacePattern {
} else {
searchPatternInfo = arg2;
parseParameters = !!searchPatternInfo.isRegExp;
this._regExp = strings.createRegExp(searchPatternInfo.pattern, !!searchPatternInfo.isRegExp, { matchCase: searchPatternInfo.isCaseSensitive, wholeWord: searchPatternInfo.isWordMatch, multiline: searchPatternInfo.isMultiline, global: false });
this._regExp = strings.createRegExp(searchPatternInfo.pattern, !!searchPatternInfo.isRegExp, { matchCase: searchPatternInfo.isCaseSensitive, wholeWord: searchPatternInfo.isWordMatch, multiline: searchPatternInfo.isMultiline, global: false, unicode: true });
}
if (parseParameters) {
......
......@@ -132,6 +132,7 @@ export interface IPatternInfo {
isWordMatch?: boolean;
wordSeparators?: string;
isMultiline?: boolean;
isUnicode?: boolean;
isCaseSensitive?: boolean;
}
......
......@@ -394,13 +394,11 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
args.push('--encoding', options.encoding);
}
let pattern = query.pattern;
// Ripgrep handles -- as a -- arg separator. Only --.
// - is ok, --- is ok, --some-flag is also ok. Need to special case.
if (pattern === '--') {
if (query.pattern === '--') {
query.isRegExp = true;
pattern = '\\-\\-';
query.pattern = '\\-\\-';
}
if (query.isMultiline && !query.isRegExp) {
......@@ -413,7 +411,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
}
if (query.isRegExp) {
pattern = unicodeEscapesToPCRE2(pattern);
query.pattern = unicodeEscapesToPCRE2(query.pattern);
}
// Allow $ to match /r/n
......@@ -421,7 +419,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
let searchPatternAfterDoubleDashes: Maybe<string>;
if (query.isWordMatch) {
const regexp = createRegExp(pattern, !!query.isRegExp, { wholeWord: query.isWordMatch });
const regexp = createRegExp(query.pattern, !!query.isRegExp, { wholeWord: query.isWordMatch });
const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy.
args.push('--regexp', regexpStr);
} else if (query.isRegExp) {
......@@ -430,7 +428,7 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
args.push('--regexp', fixedRegexpQuery);
args.push('--auto-hybrid-regex');
} else {
searchPatternAfterDoubleDashes = pattern;
searchPatternAfterDoubleDashes = query.pattern;
args.push('--fixed-strings');
}
......@@ -479,11 +477,18 @@ export function spreadGlobComponents(globArg: string): string[] {
}
export function unicodeEscapesToPCRE2(pattern: string): string {
const reg = /((?:[^\\]|^)(?:\\\\)*)\\u([a-z0-9]{4})(?!\d)/g;
// Replace an unescaped $ at the end of the pattern with \r?$
// Match $ preceeded by none or even number of literal \
while (pattern.match(reg)) {
pattern = pattern.replace(reg, `$1\\x{$2}`);
// Match \u1234
const unicodePattern = /((?:[^\\]|^)(?:\\\\)*)\\u([a-z0-9]{4})/g;
while (pattern.match(unicodePattern)) {
pattern = pattern.replace(unicodePattern, `$1\\x{$2}`);
}
// Match \u{1234}
// \u with 5-6 characters will be left alone because \x only takes 4 characters.
const unicodePatternWithBraces = /((?:[^\\]|^)(?:\\\\)*)\\u\{([a-z0-9]{4})\}/g;
while (pattern.match(unicodePatternWithBraces)) {
pattern = pattern.replace(unicodePatternWithBraces, `$1\\x{$2}`);
}
return pattern;
......
......@@ -17,9 +17,12 @@ suite('RipgrepTextSearchEngine', () => {
assert.equal(unicodeEscapesToPCRE2('\\\\\\u1234'), '\\\\\\x{1234}');
assert.equal(unicodeEscapesToPCRE2('foo\\\\\\u1234'), 'foo\\\\\\x{1234}');
assert.equal(unicodeEscapesToPCRE2('\\u{1234}'), '\\x{1234}');
assert.equal(unicodeEscapesToPCRE2('\\u{1234}\\u{0001}'), '\\x{1234}\\x{0001}');
assert.equal(unicodeEscapesToPCRE2('foo\\u{1234}bar'), 'foo\\x{1234}bar');
assert.equal(unicodeEscapesToPCRE2('foo\\u{123456}7bar'), 'foo\\u{123456}7bar');
assert.equal(unicodeEscapesToPCRE2('\\u123'), '\\u123');
assert.equal(unicodeEscapesToPCRE2('\\u12345'), '\\u12345');
assert.equal(unicodeEscapesToPCRE2('\\\\u12345'), '\\\\u12345');
assert.equal(unicodeEscapesToPCRE2('foo'), 'foo');
assert.equal(unicodeEscapesToPCRE2(''), '');
});
......
......@@ -69,6 +69,26 @@ suite('Search-integration', function () {
return doSearchTest(config, 4);
});
test('Text: GameOfLife (unicode escape sequences)', () => {
const config: ITextQuery = {
type: QueryType.Text,
folderQueries: ROOT_FOLDER_QUERY,
contentPattern: { pattern: 'G\\u{0061}m\\u0065OfLife', isRegExp: true }
};
return doSearchTest(config, 4);
});
test('Text: GameOfLife (unicode escape sequences, force PCRE2)', () => {
const config: ITextQuery = {
type: QueryType.Text,
folderQueries: ROOT_FOLDER_QUERY,
contentPattern: { pattern: '(?<!a)G\\u{0061}m\\u0065OfLife', isRegExp: true }
};
return doSearchTest(config, 4);
});
test('Text: GameOfLife (PCRE2 RegExp)', () => {
const config: ITextQuery = {
type: QueryType.Text,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册