提交 9c882c7c 编写于 作者: B Benjamin Pasero

fix #36951

上级 f1962fe5
......@@ -19,6 +19,7 @@ export const UTF16le = 'utf16le';
export interface IDecodeStreamOptions {
guessEncoding?: boolean;
restrictGuessedEncodings?: string[];
minBytesRequiredForDetection?: number;
overwriteEncoding?(detectedEncoding: string): string;
}
......@@ -78,7 +79,7 @@ export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions
this._decodeStreamConstruction = TPromise.as(detectEncodingFromBuffer({
buffer: Buffer.concat(this._buffer), bytesRead: this._bytesBuffered
}, options.guessEncoding)).then(detected => {
}, options.guessEncoding, options.restrictGuessedEncodings)).then(detected => {
detected.encoding = options.overwriteEncoding(detected.encoding);
this._decodeStream = decodeStream(detected.encoding);
for (const buffer of this._buffer) {
......@@ -271,13 +272,9 @@ export interface IDetectedEncodingResult {
seemsBinary: boolean;
}
export interface DetectEncodingOption {
autoGuessEncoding?: boolean;
}
export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: false): IDetectedEncodingResult;
export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: boolean): TPromise<IDetectedEncodingResult>;
export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResult, autoGuessEncoding?: boolean): TPromise<IDetectedEncodingResult> | IDetectedEncodingResult {
export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: boolean, restrictGuessedEncodings?: string[]): TPromise<IDetectedEncodingResult>;
export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResult, autoGuessEncoding?: boolean, restrictGuessedEncodings?: string[]): TPromise<IDetectedEncodingResult> | IDetectedEncodingResult {
// Always first check for BOM to find out about encoding
let encoding = detectEncodingByBOMFromBuffer(buffer, bytesRead);
......@@ -334,10 +331,17 @@ export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResul
// Auto guess encoding if configured
if (autoGuessEncoding && !seemsBinary && !encoding) {
return guessEncodingByBuffer(buffer.slice(0, bytesRead)).then(encoding => {
return guessEncodingByBuffer(buffer.slice(0, bytesRead)).then(guessedEncoding => {
// Ignore encoding if we have a list of encodings to use for guessing
if (guessedEncoding && restrictGuessedEncodings && restrictGuessedEncodings.length > 0 && restrictGuessedEncodings.indexOf(guessedEncoding) === -1) {
return { seemsBinary, encoding };
}
// Proceed with guessed encoding
return {
seemsBinary: false,
encoding
encoding: guessedEncoding
};
});
}
......
......@@ -153,6 +153,42 @@ suite('Encoding', () => {
});
});
test('autoGuessEncoding, restrictGuessedEncodings empty (ShiftJIS)', function () {
const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt');
return readExactlyByFile(file, 512 * 8).then(buffer => {
return encoding.detectEncodingFromBuffer(buffer, true, []).then(mimes => {
assert.equal(mimes.encoding, 'shiftjis');
});
});
});
test('autoGuessEncoding, restrictGuessedEncodings (ShiftJIS)', function () {
const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt');
return readExactlyByFile(file, 512 * 8).then(buffer => {
return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252']).then(mimes => {
assert.ok(!mimes.encoding);
});
});
});
test('autoGuessEncoding, restrictGuessedEncodings (ShiftJIS)', function () {
const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt');
return readExactlyByFile(file, 512 * 8).then(buffer => {
return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252', 'shiftjis']).then(mimes => {
assert.equal(mimes.encoding, 'shiftjis');
});
});
});
test('autoGuessEncoding, restrictGuessedEncodings (CP1252)', function () {
const file = getPathFromAmdModule(require, './fixtures/some.cp1252.txt');
return readExactlyByFile(file, 512 * 8).then(buffer => {
return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252']).then(mimes => {
assert.equal(mimes.encoding, 'windows1252');
});
});
});
async function readAndDecodeFromDisk(path, _encoding) {
return new Promise<string>((resolve, reject) => {
fs.readFile(path, (err, data) => {
......
......@@ -552,6 +552,12 @@ export interface IResolveContentOptions {
*/
autoGuessEncoding?: boolean;
/**
* The optional list of encodings that can be used when guessing. If not provided, all encodings
* will be supported for guessing.
*/
restrictGuessedEncodings?: string[];
/**
* Is an integer specifying where to begin reading from in the file. If position is null,
* data will be read from the current file position.
......@@ -661,6 +667,7 @@ export interface IFilesConfiguration {
watcherExclude: { [filepattern: string]: boolean };
encoding: string;
autoGuessEncoding: boolean;
restrictGuessedEncodings: string[];
defaultLanguage: string;
trimTrailingWhitespace: boolean;
autoSave: string;
......
......@@ -163,6 +163,7 @@ const configurationValueWhitelist = [
'extensions.autoUpdate',
'files.associations',
'files.autoGuessEncoding',
'files.restrictGuessedEncodings',
'files.autoSave',
'files.autoSaveDelay',
'files.encoding',
......
......@@ -234,6 +234,17 @@ configurationRegistry.registerConfiguration({
'description': nls.localize('autoGuessEncoding', "When enabled, the editor will attempt to guess the character set encoding when opening files. This setting can also be configured per language."),
'scope': ConfigurationScope.RESOURCE
},
'files.restrictGuessedEncodings': {
'type': 'array',
'overridable': true,
'default': [],
'items': {
'type': 'string',
'enum': Object.keys(SUPPORTED_ENCODINGS)
},
'scope': ConfigurationScope.RESOURCE,
'description': nls.localize('restrictGuessedEncodings', "If provided, will restrict the list of encodings that can be used when guessing. If the guessed file encoding is not in the list, the default encoding will be used.")
},
'files.eol': {
'type': 'string',
'enum': [
......
......@@ -529,10 +529,10 @@ export class FileService extends Disposable implements IFileService {
} else {
// when receiving the first chunk of data we need to create the
// decoding stream which is then used to drive the string stream.
const autoGuessEncoding = (options && options.autoGuessEncoding) || this.textResourceConfigurationService.getValue(resource, 'files.autoGuessEncoding');
TPromise.as(encoding.detectEncodingFromBuffer(
{ buffer: chunkBuffer, bytesRead },
autoGuessEncoding
(options && options.autoGuessEncoding) || this.textResourceConfigurationService.getValue(resource, 'files.autoGuessEncoding'),
(options && options.restrictGuessedEncodings) || this.textResourceConfigurationService.getValue(resource, 'files.restrictGuessedEncodings')
)).then(detected => {
if (options && options.acceptTextOnly && detected.seemsBinary) {
......
......@@ -374,6 +374,7 @@ export class RemoteFileService extends FileService {
const decodeStreamOpts: IDecodeStreamOptions = {
guessEncoding: options.autoGuessEncoding,
restrictGuessedEncodings: options.restrictGuessedEncodings,
overwriteEncoding: detected => {
return this.encoding.getReadEncoding(resource, options, { encoding: detected, seemsBinary: false });
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册