提交 596469e2 编写于 作者: J Johannes Rieken

move `removeAccents` to normalization

上级 91ee177f
......@@ -46,3 +46,17 @@ function normalize(str: string, form: string, normalizedCache: LRUCache<string,
return res;
}
export const removeAccents: (str: string) => string = (function () {
if (!canNormalize) {
// no ES6 features...
return function (str: string) { return str; };
} else {
// transform into NFD form and remove accents
// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
const regex = /[\u0300-\u036f]/g;
return function (str: string) {
return normalizeNFD(str).replace(regex, '');
};
}
})();
......@@ -5,7 +5,6 @@
import { CharCode } from 'vs/base/common/charCode';
import { Constants } from 'vs/base/common/uint';
import { canNormalize, normalizeNFD } from 'vs/base/common/normalization';
export function isFalsyOrWhitespace(str: string | undefined): boolean {
if (!str || typeof str !== 'string') {
......@@ -853,21 +852,6 @@ export function removeAnsiEscapeCodes(str: string): string {
return str;
}
export const removeAccents: (str: string) => string = (function () {
if (!canNormalize) {
// no ES6 features...
return function (str: string) { return str; };
} else {
// transform into NFD form and remove accents
// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
const regex = /[\u0300-\u036f]/g;
return function (str: string) {
return normalizeNFD(str).replace(regex, '');
};
}
})();
// -- UTF-8 BOM
export const UTF8_BOM_CHARACTER = String.fromCharCode(CharCode.UTF8_BOM);
......
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { removeAccents } from 'vs/base/common/normalization';
suite('Normalization', () => {
test('removeAccents', function () {
assert.equal(removeAccents('joào'), 'joao');
assert.equal(removeAccents('joáo'), 'joao');
assert.equal(removeAccents('joâo'), 'joao');
assert.equal(removeAccents('joäo'), 'joao');
// assert.equal(strings.removeAccents('joæo'), 'joao'); // not an accent
assert.equal(removeAccents('joão'), 'joao');
assert.equal(removeAccents('joåo'), 'joao');
assert.equal(removeAccents('joåo'), 'joao');
assert.equal(removeAccents('joāo'), 'joao');
assert.equal(removeAccents('fôo'), 'foo');
assert.equal(removeAccents('föo'), 'foo');
assert.equal(removeAccents('fòo'), 'foo');
assert.equal(removeAccents('fóo'), 'foo');
// assert.equal(strings.removeAccents('fœo'), 'foo');
// assert.equal(strings.removeAccents('føo'), 'foo');
assert.equal(removeAccents('fōo'), 'foo');
assert.equal(removeAccents('fõo'), 'foo');
assert.equal(removeAccents('andrè'), 'andre');
assert.equal(removeAccents('andré'), 'andre');
assert.equal(removeAccents('andrê'), 'andre');
assert.equal(removeAccents('andrë'), 'andre');
assert.equal(removeAccents('andrē'), 'andre');
assert.equal(removeAccents('andrė'), 'andre');
assert.equal(removeAccents('andrę'), 'andre');
assert.equal(removeAccents('hvîc'), 'hvic');
assert.equal(removeAccents('hvïc'), 'hvic');
assert.equal(removeAccents('hvíc'), 'hvic');
assert.equal(removeAccents('hvīc'), 'hvic');
assert.equal(removeAccents('hvįc'), 'hvic');
assert.equal(removeAccents('hvìc'), 'hvic');
assert.equal(removeAccents('ûdo'), 'udo');
assert.equal(removeAccents('üdo'), 'udo');
assert.equal(removeAccents('ùdo'), 'udo');
assert.equal(removeAccents('údo'), 'udo');
assert.equal(removeAccents('ūdo'), 'udo');
assert.equal(removeAccents('heÿ'), 'hey');
// assert.equal(strings.removeAccents('gruß'), 'grus');
assert.equal(removeAccents('gruś'), 'grus');
assert.equal(removeAccents('gruš'), 'grus');
assert.equal(removeAccents('çool'), 'cool');
assert.equal(removeAccents('ćool'), 'cool');
assert.equal(removeAccents('čool'), 'cool');
assert.equal(removeAccents('ñice'), 'nice');
assert.equal(removeAccents('ńice'), 'nice');
});
});
......@@ -404,61 +404,6 @@ suite('Strings', () => {
assert.equal(strings.getNLines('foo', 0), '');
});
test('removeAccents', function () {
assert.equal(strings.removeAccents('joào'), 'joao');
assert.equal(strings.removeAccents('joáo'), 'joao');
assert.equal(strings.removeAccents('joâo'), 'joao');
assert.equal(strings.removeAccents('joäo'), 'joao');
// assert.equal(strings.removeAccents('joæo'), 'joao'); // not an accent
assert.equal(strings.removeAccents('joão'), 'joao');
assert.equal(strings.removeAccents('joåo'), 'joao');
assert.equal(strings.removeAccents('joåo'), 'joao');
assert.equal(strings.removeAccents('joāo'), 'joao');
assert.equal(strings.removeAccents('fôo'), 'foo');
assert.equal(strings.removeAccents('föo'), 'foo');
assert.equal(strings.removeAccents('fòo'), 'foo');
assert.equal(strings.removeAccents('fóo'), 'foo');
// assert.equal(strings.removeAccents('fœo'), 'foo');
// assert.equal(strings.removeAccents('føo'), 'foo');
assert.equal(strings.removeAccents('fōo'), 'foo');
assert.equal(strings.removeAccents('fõo'), 'foo');
assert.equal(strings.removeAccents('andrè'), 'andre');
assert.equal(strings.removeAccents('andré'), 'andre');
assert.equal(strings.removeAccents('andrê'), 'andre');
assert.equal(strings.removeAccents('andrë'), 'andre');
assert.equal(strings.removeAccents('andrē'), 'andre');
assert.equal(strings.removeAccents('andrė'), 'andre');
assert.equal(strings.removeAccents('andrę'), 'andre');
assert.equal(strings.removeAccents('hvîc'), 'hvic');
assert.equal(strings.removeAccents('hvïc'), 'hvic');
assert.equal(strings.removeAccents('hvíc'), 'hvic');
assert.equal(strings.removeAccents('hvīc'), 'hvic');
assert.equal(strings.removeAccents('hvįc'), 'hvic');
assert.equal(strings.removeAccents('hvìc'), 'hvic');
assert.equal(strings.removeAccents('ûdo'), 'udo');
assert.equal(strings.removeAccents('üdo'), 'udo');
assert.equal(strings.removeAccents('ùdo'), 'udo');
assert.equal(strings.removeAccents('údo'), 'udo');
assert.equal(strings.removeAccents('ūdo'), 'udo');
assert.equal(strings.removeAccents('heÿ'), 'hey');
// assert.equal(strings.removeAccents('gruß'), 'grus');
assert.equal(strings.removeAccents('gruś'), 'grus');
assert.equal(strings.removeAccents('gruš'), 'grus');
assert.equal(strings.removeAccents('çool'), 'cool');
assert.equal(strings.removeAccents('ćool'), 'cool');
assert.equal(strings.removeAccents('čool'), 'cool');
assert.equal(strings.removeAccents('ñice'), 'nice');
assert.equal(strings.removeAccents('ńice'), 'nice');
});
test('encodeUTF8', function () {
function assertEncodeUTF8(str: string, expected: number[]): void {
const actual = strings.encodeUTF8(str);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册