提交 042217fc 编写于 作者: R Rob Lourens

Fix #26708 - use StringDecoder to handle data chunks that split multibyte characters

上级 fe69f9ac
......@@ -6,6 +6,7 @@
import { EventEmitter } from 'events';
import * as path from 'path';
import { StringDecoder, NodeStringDecoder } from 'string_decoder';
import * as cp from 'child_process';
import { rgPath } from 'vscode-ripgrep';
......@@ -174,11 +175,13 @@ export class RipgrepParser extends EventEmitter {
private fileMatch: FileMatch;
private remainder: string;
private isDone: boolean;
private stringDecoder: NodeStringDecoder;
private numResults = 0;
constructor(private maxResults: number, private rootFolder: string) {
super();
this.stringDecoder = new StringDecoder();
}
public cancel(): void {
......@@ -186,16 +189,23 @@ export class RipgrepParser extends EventEmitter {
}
public flush(): void {
this.handleDecodedData(this.stringDecoder.end());
if (this.fileMatch) {
this.onResult();
}
}
public handleData(data: string | Buffer): void {
public handleData(data: Buffer | string): void {
const dataStr = typeof data === 'string' ? data : this.stringDecoder.write(data);
this.handleDecodedData(dataStr);
}
private handleDecodedData(decodedData: string): void {
// If the previous data chunk didn't end in a newline, prepend it to this chunk
const dataStr = this.remainder ?
this.remainder + data.toString() :
data.toString();
this.remainder + decodedData :
decodedData;
const dataLines: string[] = dataStr.split(/\r\n|\n/);
this.remainder = dataLines[dataLines.length - 1] ? dataLines.pop() : null;
......
......@@ -33,7 +33,11 @@ suite('RipgrepParser', () => {
return matchLine;
}
function parseInput(inputChunks: string[]): ISerializedFileMatch[] {
function parseInputStrings(inputChunks: string[]): ISerializedFileMatch[] {
return parseInput(inputChunks.map(chunk => new Buffer(chunk)));
}
function parseInput(inputChunks: Buffer[]): ISerializedFileMatch[] {
const matches: ISerializedFileMatch[] = [];
const rgp = new RipgrepParser(1e6, rootFolder);
rgp.on('result', (match: ISerializedFileMatch) => {
......@@ -65,7 +69,7 @@ suite('RipgrepParser', () => {
[getFileLine('a.txt'), getMatchLine(1, ['before', 'match', 'after']), getMatchLine(2, ['before', 'match', 'after']), fileSectionEnd].join('\n')
];
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 1);
assert.deepEqual(results[0],
<ISerializedFileMatch>{
......@@ -93,7 +97,7 @@ suite('RipgrepParser', () => {
[getFileLine('c.txt'), getMatchLine(1, ['before', 'match', 'after']), getMatchLine(2, ['before', 'match', 'after']), fileSectionEnd].join('\n')
];
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 3);
results.forEach(fileResult => assert.equal(fileResult.numMatches, 2));
});
......@@ -116,7 +120,7 @@ suite('RipgrepParser', () => {
test('Parses multiple chunks broken at each line', () => {
const input = singleLineChunks.map(chunk => chunk + '\n');
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 3);
results.forEach(fileResult => assert.equal(fileResult.numMatches, 2));
});
......@@ -126,7 +130,7 @@ suite('RipgrepParser', () => {
.map(chunk => chunk + '\n')
.map(halve));
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 3);
results.forEach(fileResult => assert.equal(fileResult.numMatches, 2));
});
......@@ -136,7 +140,7 @@ suite('RipgrepParser', () => {
.map(chunk => chunk + '\n')
.map(arrayOfChars));
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 3);
results.forEach(fileResult => assert.equal(fileResult.numMatches, 2));
});
......@@ -145,8 +149,26 @@ suite('RipgrepParser', () => {
const input = singleLineChunks
.map(chunk => '\n' + chunk);
const results = parseInput(input);
const results = parseInputStrings(input);
assert.equal(results.length, 3);
results.forEach(fileResult => assert.equal(fileResult.numMatches, 2));
});
test('Parses chunks broken in the middle of a multibyte character', () => {
const multibyteStr = '';
const multibyteBuf = new Buffer(multibyteStr);
const text = getFileLine('foo/bar') + '\n' + getMatchLine(0, ['before', 'match', 'after']) + '\n';
// Split the multibyte char into two pieces and divide between the two buffers
const beforeIndex = 24;
const inputBufs = [
Buffer.concat([new Buffer(text.substr(0, beforeIndex)), multibyteBuf.slice(0, 2)]),
Buffer.concat([multibyteBuf.slice(2), new Buffer(text.substr(beforeIndex))])
];
const results = parseInput(inputBufs);
assert.equal(results.length, 1);
assert.equal(results[0].lineMatches.length, 1);
assert.deepEqual(results[0].lineMatches[0].offsetAndLengths, [[7, 5]]);
});
});
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册