提交 36708c0f 编写于 作者: J Johannes Rieken

add `toDecodeStream` to enable seamless encoding handling, #41985

上级 82137280
......@@ -10,12 +10,92 @@ import * as iconv from 'iconv-lite';
import { TPromise } from 'vs/base/common/winjs.base';
import { isLinux, isMacintosh } from 'vs/base/common/platform';
import { exec } from 'child_process';
import { Readable, Writable, WritableOptions } from 'stream';
export const UTF8 = 'utf8';
export const UTF8_with_bom = 'utf8bom';
export const UTF16be = 'utf16be';
export const UTF16le = 'utf16le';
export interface IDecodeStreamOptions {
minBytesRequiredForDetection: number;
guessEncoding: boolean;
overwriteEncoding(detected: string): string;
}
export function toDecodeStream(readable: Readable, opts: IDecodeStreamOptions): TPromise<{ detected: IDetectedEncodingResult, stream: NodeJS.ReadableStream }> {
return new TPromise<{ detected: IDetectedEncodingResult, stream: NodeJS.ReadableStream }>((resolve, reject) => {
readable.pipe(new class extends Writable {
private _decodeStream: NodeJS.ReadWriteStream;
private _decodeStreamConstruction: Thenable<any>;
private _buffer: Buffer[] = [];
private _bytesBuffered = 0;
constructor(opts?: WritableOptions) {
super(opts);
this.once('finish', () => this._finish());
}
_write(chunk: any, encoding: string, callback: Function): void {
if (!Buffer.isBuffer(chunk)) {
callback(new Error('data must be a buffer'));
}
if (this._decodeStream) {
// just a forwarder now
this._decodeStream.write(chunk, callback);
return;
}
this._buffer.push(chunk);
this._bytesBuffered += chunk.length;
if (this._decodeStreamConstruction) {
// waiting for the decoder to be ready
this._decodeStreamConstruction.then(_ => callback(), err => callback(err));
} else if (this._bytesBuffered >= opts.minBytesRequiredForDetection) {
// buffered enough data, create stream and forward data
this._startDecodeStream(callback);
} else {
// only buffering
callback();
}
}
_startDecodeStream(callback: Function): void {
this._decodeStreamConstruction = TPromise.as(detectEncodingFromBuffer({
buffer: Buffer.concat(this._buffer), bytesRead: this._bytesBuffered
}, opts.guessEncoding)).then(detected => {
detected.encoding = opts.overwriteEncoding(detected.encoding); // default encoding
this._decodeStream = decodeStream(detected.encoding);
for (const buffer of this._buffer) {
this._decodeStream.write(buffer);
}
callback();
resolve({ detected, stream: this._decodeStream });
}, err => {
callback(err);
});
}
_finish(): void {
if (this._decodeStream) {
// normal finish
this._decodeStream.end();
} else {
// we were still waiting for data...
this._startDecodeStream(() => this._decodeStream.end());
}
}
});
});
}
export function bomLength(encoding: string): number {
switch (encoding) {
case UTF8:
......@@ -350,4 +430,4 @@ export function resolveTerminalEncoding(verbose?: boolean): TPromise<string> {
return UTF8;
});
}
\ No newline at end of file
}
......@@ -6,9 +6,10 @@
'use strict';
import * as assert from 'assert';
import * as fs from 'fs';
import * as encoding from 'vs/base/node/encoding';
import { readExactlyByFile } from 'vs/base/node/stream';
import { Readable } from 'stream';
suite('Encoding', () => {
test('detectBOM UTF-8', () => {
......@@ -150,4 +151,115 @@ suite('Encoding', () => {
});
});
});
async function readAndDecodeFromDisk(path, _encoding) {
return new Promise<string>((resolve, reject) => {
fs.readFile(path, (err, data) => {
if (err) {
reject(err);
} else {
resolve(encoding.decode(data, _encoding));
}
});
});
}
async function readAllAsString(stream: NodeJS.ReadableStream) {
return new Promise<string>((resolve, reject) => {
let all = '';
stream.on('data', chunk => {
all += chunk;
assert.equal(typeof chunk, 'string');
});
stream.on('end', () => {
resolve(all);
});
stream.on('error', reject);
});
}
test('toDecodeStream - some stream', async function () {
let source = new Readable({
read(size) {
this.push(Buffer.from([65, 66, 67]));
this.push(Buffer.from([65, 66, 67]));
this.push(Buffer.from([65, 66, 67]));
this.push(null);
}
});
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
assert.ok(detected);
assert.ok(stream);
const content = await readAllAsString(stream);
assert.equal(content, 'ABCABCABC');
});
test('toDecodeStream - some stream, expect too much data', async function () {
let source = new Readable({
read(size) {
this.push(Buffer.from([65, 66, 67]));
this.push(Buffer.from([65, 66, 67]));
this.push(Buffer.from([65, 66, 67]));
this.push(null);
}
});
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
assert.ok(detected);
assert.ok(stream);
const content = await readAllAsString(stream);
assert.equal(content, 'ABCABCABC');
});
test('toDecodeStream - some stream, no data', async function () {
let source = new Readable({
read(size) {
this.push(null); // empty
}
});
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 512, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
assert.ok(detected);
assert.ok(stream);
const content = await readAllAsString(stream);
assert.equal(content, '');
});
test('toDecodeStream - encoding, utf16be', async function () {
let path = require.toUrl('./fixtures/some_utf16be.css');
let source = fs.createReadStream(path);
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding(detected) { return detected; } });
assert.equal(detected.encoding, 'utf16be');
assert.equal(detected.seemsBinary, false);
let expected = await readAndDecodeFromDisk(path, detected.encoding);
let actual = await readAllAsString(stream);
assert.equal(actual, expected);
});
test('toDecodeStream - empty file', async function () {
let path = require.toUrl('./fixtures/empty.txt');
let source = fs.createReadStream(path);
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
let expected = await readAndDecodeFromDisk(path, detected.encoding);
let actual = await readAllAsString(stream);
assert.equal(actual, expected);
});
});
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册