diff --git a/src/vs/base/node/encoding.ts b/src/vs/base/node/encoding.ts index 9a23a47d9c6a4cb9b74cf935f02d811cbf300fc4..d1ed967b225acdc4da34b5d2a01b51bfcb02b90e 100644 --- a/src/vs/base/node/encoding.ts +++ b/src/vs/base/node/encoding.ts @@ -19,6 +19,7 @@ export const UTF16le = 'utf16le'; export interface IDecodeStreamOptions { guessEncoding?: boolean; + restrictGuessedEncodings?: string[]; minBytesRequiredForDetection?: number; overwriteEncoding?(detectedEncoding: string): string; } @@ -78,7 +79,7 @@ export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions this._decodeStreamConstruction = TPromise.as(detectEncodingFromBuffer({ buffer: Buffer.concat(this._buffer), bytesRead: this._bytesBuffered - }, options.guessEncoding)).then(detected => { + }, options.guessEncoding, options.restrictGuessedEncodings)).then(detected => { detected.encoding = options.overwriteEncoding(detected.encoding); this._decodeStream = decodeStream(detected.encoding); for (const buffer of this._buffer) { @@ -271,13 +272,9 @@ export interface IDetectedEncodingResult { seemsBinary: boolean; } -export interface DetectEncodingOption { - autoGuessEncoding?: boolean; -} - export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: false): IDetectedEncodingResult; -export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: boolean): TPromise; -export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResult, autoGuessEncoding?: boolean): TPromise | IDetectedEncodingResult { +export function detectEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: boolean, restrictGuessedEncodings?: string[]): TPromise; +export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResult, autoGuessEncoding?: boolean, restrictGuessedEncodings?: string[]): TPromise | IDetectedEncodingResult { // Always first check for BOM to find out about encoding let encoding = detectEncodingByBOMFromBuffer(buffer, bytesRead); @@ -334,10 +331,17 @@ export function detectEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResul // Auto guess encoding if configured if (autoGuessEncoding && !seemsBinary && !encoding) { - return guessEncodingByBuffer(buffer.slice(0, bytesRead)).then(encoding => { + return guessEncodingByBuffer(buffer.slice(0, bytesRead)).then(guessedEncoding => { + + // Ignore encoding if we have a list of encodings to use for guessing + if (guessedEncoding && restrictGuessedEncodings && restrictGuessedEncodings.length > 0 && restrictGuessedEncodings.indexOf(guessedEncoding) === -1) { + return { seemsBinary, encoding }; + } + + // Proceed with guessed encoding return { seemsBinary: false, - encoding + encoding: guessedEncoding }; }); } diff --git a/src/vs/base/test/node/encoding/encoding.test.ts b/src/vs/base/test/node/encoding/encoding.test.ts index 99d05d437a0f3189a369b1d3ffcf249300ab52f7..f6702f4db70c7549527a9f5b19883046dcaa5de3 100644 --- a/src/vs/base/test/node/encoding/encoding.test.ts +++ b/src/vs/base/test/node/encoding/encoding.test.ts @@ -153,6 +153,42 @@ suite('Encoding', () => { }); }); + test('autoGuessEncoding, restrictGuessedEncodings empty (ShiftJIS)', function () { + const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt'); + return readExactlyByFile(file, 512 * 8).then(buffer => { + return encoding.detectEncodingFromBuffer(buffer, true, []).then(mimes => { + assert.equal(mimes.encoding, 'shiftjis'); + }); + }); + }); + + test('autoGuessEncoding, restrictGuessedEncodings (ShiftJIS)', function () { + const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt'); + return readExactlyByFile(file, 512 * 8).then(buffer => { + return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252']).then(mimes => { + assert.ok(!mimes.encoding); + }); + }); + }); + + test('autoGuessEncoding, restrictGuessedEncodings (ShiftJIS)', function () { + const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt'); + return readExactlyByFile(file, 512 * 8).then(buffer => { + return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252', 'shiftjis']).then(mimes => { + assert.equal(mimes.encoding, 'shiftjis'); + }); + }); + }); + + test('autoGuessEncoding, restrictGuessedEncodings (CP1252)', function () { + const file = getPathFromAmdModule(require, './fixtures/some.cp1252.txt'); + return readExactlyByFile(file, 512 * 8).then(buffer => { + return encoding.detectEncodingFromBuffer(buffer, true, ['windows1252']).then(mimes => { + assert.equal(mimes.encoding, 'windows1252'); + }); + }); + }); + async function readAndDecodeFromDisk(path, _encoding) { return new Promise((resolve, reject) => { fs.readFile(path, (err, data) => { diff --git a/src/vs/platform/files/common/files.ts b/src/vs/platform/files/common/files.ts index abe84da07b4da9f6402b3a804631c2a09874bad2..e2d951ec24feefa02b66b23e7e38293449a28847 100644 --- a/src/vs/platform/files/common/files.ts +++ b/src/vs/platform/files/common/files.ts @@ -552,6 +552,12 @@ export interface IResolveContentOptions { */ autoGuessEncoding?: boolean; + /** + * The optional list of encodings that can be used when guessing. If not provided, all encodings + * will be supported for guessing. + */ + restrictGuessedEncodings?: string[]; + /** * Is an integer specifying where to begin reading from in the file. If position is null, * data will be read from the current file position. @@ -661,6 +667,7 @@ export interface IFilesConfiguration { watcherExclude: { [filepattern: string]: boolean }; encoding: string; autoGuessEncoding: boolean; + restrictGuessedEncodings: string[]; defaultLanguage: string; trimTrailingWhitespace: boolean; autoSave: string; diff --git a/src/vs/platform/telemetry/common/telemetryUtils.ts b/src/vs/platform/telemetry/common/telemetryUtils.ts index a098c7723c15f311db44882d3ac848acd35c8756..dce206b3b4128b772fb4021892a5346217f36b21 100644 --- a/src/vs/platform/telemetry/common/telemetryUtils.ts +++ b/src/vs/platform/telemetry/common/telemetryUtils.ts @@ -163,6 +163,7 @@ const configurationValueWhitelist = [ 'extensions.autoUpdate', 'files.associations', 'files.autoGuessEncoding', + 'files.restrictGuessedEncodings', 'files.autoSave', 'files.autoSaveDelay', 'files.encoding', diff --git a/src/vs/workbench/parts/files/electron-browser/files.contribution.ts b/src/vs/workbench/parts/files/electron-browser/files.contribution.ts index fc3c262ef518b7b8452b890e3c7dd4eaed2d68e4..7c2c6b7b7b8de4fdd2dc531ca3a637e65fa4aca5 100644 --- a/src/vs/workbench/parts/files/electron-browser/files.contribution.ts +++ b/src/vs/workbench/parts/files/electron-browser/files.contribution.ts @@ -234,6 +234,17 @@ configurationRegistry.registerConfiguration({ 'description': nls.localize('autoGuessEncoding', "When enabled, the editor will attempt to guess the character set encoding when opening files. This setting can also be configured per language."), 'scope': ConfigurationScope.RESOURCE }, + 'files.restrictGuessedEncodings': { + 'type': 'array', + 'overridable': true, + 'default': [], + 'items': { + 'type': 'string', + 'enum': Object.keys(SUPPORTED_ENCODINGS) + }, + 'scope': ConfigurationScope.RESOURCE, + 'description': nls.localize('restrictGuessedEncodings', "If provided, will restrict the list of encodings that can be used when guessing. If the guessed file encoding is not in the list, the default encoding will be used.") + }, 'files.eol': { 'type': 'string', 'enum': [ diff --git a/src/vs/workbench/services/files/electron-browser/fileService.ts b/src/vs/workbench/services/files/electron-browser/fileService.ts index 2c834e3e1ac992e4fb719ef2db0a24ee36d3b503..4e45241c0b44527b31685e24e5bea5571bed18b8 100644 --- a/src/vs/workbench/services/files/electron-browser/fileService.ts +++ b/src/vs/workbench/services/files/electron-browser/fileService.ts @@ -529,10 +529,10 @@ export class FileService extends Disposable implements IFileService { } else { // when receiving the first chunk of data we need to create the // decoding stream which is then used to drive the string stream. - const autoGuessEncoding = (options && options.autoGuessEncoding) || this.textResourceConfigurationService.getValue(resource, 'files.autoGuessEncoding'); TPromise.as(encoding.detectEncodingFromBuffer( { buffer: chunkBuffer, bytesRead }, - autoGuessEncoding + (options && options.autoGuessEncoding) || this.textResourceConfigurationService.getValue(resource, 'files.autoGuessEncoding'), + (options && options.restrictGuessedEncodings) || this.textResourceConfigurationService.getValue(resource, 'files.restrictGuessedEncodings') )).then(detected => { if (options && options.acceptTextOnly && detected.seemsBinary) { diff --git a/src/vs/workbench/services/files/electron-browser/remoteFileService.ts b/src/vs/workbench/services/files/electron-browser/remoteFileService.ts index 99a00594348f7a42152577cbc10db6fd7adefbff..ef76ccf008dd744f33234919fb93f772947fb083 100644 --- a/src/vs/workbench/services/files/electron-browser/remoteFileService.ts +++ b/src/vs/workbench/services/files/electron-browser/remoteFileService.ts @@ -374,6 +374,7 @@ export class RemoteFileService extends FileService { const decodeStreamOpts: IDecodeStreamOptions = { guessEncoding: options.autoGuessEncoding, + restrictGuessedEncodings: options.restrictGuessedEncodings, overwriteEncoding: detected => { return this.encoding.getReadEncoding(resource, options, { encoding: detected, seemsBinary: false }); }