Fix #36309 - remap \W, \s, and \n in regex to match editor regex search

c7a24d09 · Rob Lourens · 0f4fab15 · c7a24d09 · c7a24d09
2 changed file
--- a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts
+++ b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts
@@ -357,7 +357,11 @@ function getRgArgs(query: vscode.TextSearchQuery, options: vscode.TextSearchOpti
 		const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy.
 		args.push('--regexp', regexpStr);
 	} else if (query.isRegExp) {
-		args.push('--regexp', fixRegexEndingPattern(query.pattern));
+		let fixedRegexpQuery = fixRegexEndingPattern(query.pattern);
+		fixedRegexpQuery = fixRegexNewline(fixedRegexpQuery);
+		fixedRegexpQuery = fixRegexCRMatchingNonWordClass(fixedRegexpQuery, query.isMultiline);
+		fixedRegexpQuery = fixRegexCRMatchingWhitespaceClass(fixedRegexpQuery, query.isMultiline);
+		args.push('--regexp', fixedRegexpQuery);
 	} else {
 		searchPatternAfterDoubleDashes = pattern;
 		args.push('--fixed-strings');
@@ -435,22 +439,20 @@ export function fixRegexEndingPattern(pattern: string): string {
 		pattern;
 }

+export function fixRegexNewline(pattern: string): string {
+	// Replace an unescaped $ at the end of the pattern with \r?$
+	// Match $ preceeded by none or even number of literal \
+	return pattern.replace(/([^\\]|^)(\\\\)*\\n/g, '$1$2\\r?\\n');
+}
+
 export function fixRegexCRMatchingWhitespaceClass(pattern: string, isMultiline: boolean): string {
 	return isMultiline ?
-		pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/, (prefix1, prefix2) => {
-			return prefix1 + prefix2 + '(\\r?\\n|[^\\S\\r])';
-		}) :
-		pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/, (prefix1, prefix2) => {
-			return prefix1 + prefix2 + '[ \\t\\f]';
-		});
+		pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/g, '$1$2(\\r?\\n|[^\\S\\r])') :
+		pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/g, '$1$2[ \\t\\f]');
 }

 export function fixRegexCRMatchingNonWordClass(pattern: string, isMultiline: boolean): string {
 	return isMultiline ?
-		pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/, (prefix1, prefix2) => {
-			return prefix1 + prefix2 + '(\\r?\\n|[^\\w\\r])';
-		}) :
-		pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/, (prefix1, prefix2) => {
-			return prefix1 + prefix2 + '[^\\w\\r]';
-		});
+		pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/g, '$1$2(\\r?\\n|[^\\w\\r])') :
+		pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/g, '$1$2[^\\w\\r]');
 }
--- a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts
+++ b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts
@@ -4,7 +4,7 @@
 *--------------------------------------------------------------------------------------------*/

 import * as assert from 'assert';
-import { unicodeEscapesToPCRE2, fixRegexEndingPattern } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
+import { unicodeEscapesToPCRE2, fixRegexEndingPattern, fixRegexCRMatchingWhitespaceClass, fixRegexCRMatchingNonWordClass, fixRegexNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';

 suite('RipgrepTextSearchEngine', () => {
 	test('unicodeEscapesToPCRE2', async () => {
@@ -37,19 +37,72 @@ suite('RipgrepTextSearchEngine', () => {
 		].forEach(testFixRegexEndingPattern);
 	});

-	test('fixRegexEndingPattern', () => {
-		function testFixRegexEndingPattern([input, expectedResult]: string[]): void {
-			assert.equal(fixRegexEndingPattern(input), expectedResult);
+	test('fixRegexCRMatchingWhitespaceClass', () => {
+		function testFixRegexCRMatchingWhitespaceClass([inputReg, isMultiline, testStr, shouldMatch]): void {
+			const fixed = fixRegexCRMatchingWhitespaceClass(inputReg, isMultiline);
+			const reg = new RegExp(fixed);
+			assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`);
 		}

 		[
-			['foo', 'foo'],
-			['', ''],
-			['^foo.*bar\\s+', '^foo.*bar\\s+'],
-			['foo$', 'foo\\r?$'],
-			['$', '\\r?$'],
-			['foo\\$', 'foo\\$'],
-			['foo\\\\$', 'foo\\\\\\r?$'],
-		].forEach(testFixRegexEndingPattern);
+			['foo', false, 'foo', true],
+
+			['foo\\s', false, 'foo\r\n', false],
+			['foo\\sabc', true, 'foo\r\nabc', true],
+
+			['foo\\s', false, 'foo\n', false],
+			['foo\\s', true, 'foo\n', true],
+
+			['foo\\s\\n', true, 'foo\r\n', false],
+			['foo\\r\\s', true, 'foo\r\n', true],
+
+			['foo\\s+abc', true, 'foo   \r\nabc', true],
+			['foo\\s+abc', false, 'foo   \t   abc', true],
+		].forEach(testFixRegexCRMatchingWhitespaceClass);
+	});
+
+	test('fixRegexCRMatchingNonWordClass', () => {
+		function testRegexCRMatchingNonWordClass([inputReg, isMultiline, testStr, shouldMatch]): void {
+			const fixed = fixRegexCRMatchingNonWordClass(inputReg, isMultiline);
+			const reg = new RegExp(fixed);
+			assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`);
+		}
+
+		[
+			['foo', false, 'foo', true],
+
+			['foo\\W', false, 'foo\r\n', false],
+			['foo\\Wabc', true, 'foo\r\nabc', true],
+
+			['foo\\W', false, 'foo\n', true],
+			['foo\\W', true, 'foo\n', true],
+
+			['foo\\W\\n', true, 'foo\r\n', false],
+			['foo\\r\\W', true, 'foo\r\n', true],
+
+			['foo\\W+abc', true, 'foo   \r\nabc', true],
+			['foo\\W+abc', false, 'foo .-\t   abc', true],
+		].forEach(testRegexCRMatchingNonWordClass);
+	});
+
+	test('fixRegexNewline', () => {
+		function testFixRegexNewline([inputReg, testStr, shouldMatch]): void {
+			const fixed = fixRegexNewline(inputReg);
+			const reg = new RegExp(fixed);
+			assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`);
+		}
+
+		[
+			['foo', 'foo', true],
+
+			['foo\\n', 'foo\r\n', true],
+			['foo\\n', 'foo\n', true],
+			['foo\\nabc', 'foo\r\nabc', true],
+			['foo\\nabc', 'foo\nabc', true],
+			['foo\\r\\n', 'foo\r\n', true],
+
+			['foo\\n+abc', 'foo\r\nabc', true],
+			['foo\\n+abc', 'foo\n\n\nabc', true],
+		].forEach(testFixRegexNewline);
 	});
 });