提交 519daf64 编写于 作者: T tomoki1207 提交者: katainaka0503

detect encoding

上级 6b4e8566
declare module 'jschardet' {
export interface IDetectedMap {
encoding: string,
confidence: number
}
export function detect(buffer: NodeBuffer): IDetectedMap;
}
\ No newline at end of file
......@@ -8,6 +8,7 @@
import stream = require('vs/base/node/stream');
import iconv = require('iconv-lite');
import { TPromise } from 'vs/base/common/winjs.base';
import jschardet = require('jschardet');
export const UTF8 = 'utf8';
export const UTF8_with_bom = 'utf8bom';
......@@ -94,6 +95,25 @@ export function detectEncodingByBOM(file: string): TPromise<string> {
return stream.readExactlyByFile(file, 3).then(({buffer, bytesRead}) => detectEncodingByBOMFromBuffer(buffer, bytesRead));
}
const IGNORE_ENCODINGS = ['ascii', 'utf-8', 'utf-16', 'urf-32'];
/**
* Detects the encoding from buffer.
*/
export function detectEncodingByBuffer(buffer: NodeBuffer): string {
let detected = jschardet.detect(buffer);
if (!detected || !detected.encoding) {
return null;
}
let enc = detected.encoding.toLowerCase();
// Ignore encodings that cannot detect correctly
// (http://chardet.readthedocs.io/en/latest/supported-encodings.html)
if (0 <= IGNORE_ENCODINGS.indexOf(enc)) {
return null;
}
return detected.encoding;
}
/**
* The encodings that are allowed in a settings file don't match the canonical encoding labels specified by WHATWG.
* See https://encoding.spec.whatwg.org/#names-and-labels
......
......@@ -79,6 +79,9 @@ export function detectMimeAndEncodingFromBuffer({buffer, bytesRead}: stream.Read
}
}
}
if (isText && !enc) {
enc = encoding.detectEncodingByBuffer(buffer);
}
return {
mimes: isText ? [mime.MIME_TEXT] : [mime.MIME_BINARY],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册