Merge pull request #130211 from microsoft/dev/mjbvz/always-text-encoder

Remove strings.encodeUtf8

Merge pull request #130211 from microsoft/dev/mjbvz/always-text-encoder
Remove strings.encodeUtf8
0dbbb581 · Alexandru Dima · GitHub · b794b922 · bfe6fda2 · 0dbbb581
3 changed file
--- a/src/vs/base/common/buffer.ts
+++ b/src/vs/base/common/buffer.ts
@@ -4,13 +4,10 @@
 *--------------------------------------------------------------------------------------------*/

 import * as streams from 'vs/base/common/stream';
-import * as strings from 'vs/base/common/strings';

 declare const Buffer: any;

 const hasBuffer = (typeof Buffer !== 'undefined');
-const hasTextEncoder = (typeof TextEncoder !== 'undefined');
-const hasTextDecoder = (typeof TextDecoder !== 'undefined');

 let textEncoder: TextEncoder | null;
 let textDecoder: TextDecoder | null;
@@ -38,13 +35,11 @@ export class VSBuffer {
 		const dontUseNodeBuffer = options?.dontUseNodeBuffer || false;
 		if (!dontUseNodeBuffer && hasBuffer) {
 			return new VSBuffer(Buffer.from(source));
-		} else if (hasTextEncoder) {
+		} else {
 			if (!textEncoder) {
 				textEncoder = new TextEncoder();
 			}
 			return new VSBuffer(textEncoder.encode(source));
-		} else {
-			return new VSBuffer(strings.encodeUTF8(source));
 		}
 	}

@@ -78,13 +73,11 @@ export class VSBuffer {
 	toString(): string {
 		if (hasBuffer) {
 			return this.buffer.toString();
-		} else if (hasTextDecoder) {
+		} else {
 			if (!textDecoder) {
 				textDecoder = new TextDecoder();
 			}
 			return textDecoder.decode(this.buffer);
-		} else {
-			return strings.decodeUTF8(this.buffer);
 		}
 	}


--- a/src/vs/base/common/strings.ts
+++ b/src/vs/base/common/strings.ts
@@ -573,119 +573,6 @@ export function getCharContainingOffset(str: string, offset: number): [number, n
 	return _getCharContainingOffset(str, offset);
 }

-/**
- * A manual encoding of `str` to UTF8.
- * Use only in environments which do not offer native conversion methods!
- */
-export function encodeUTF8(str: string): Uint8Array {
-	const strLen = str.length;
-
-	// See https://en.wikipedia.org/wiki/UTF-8
-
-	// first loop to establish needed buffer size
-	let neededSize = 0;
-	let strOffset = 0;
-	while (strOffset < strLen) {
-		const codePoint = getNextCodePoint(str, strLen, strOffset);
-		strOffset += (codePoint >= Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN ? 2 : 1);
-
-		if (codePoint < 0x0080) {
-			neededSize += 1;
-		} else if (codePoint < 0x0800) {
-			neededSize += 2;
-		} else if (codePoint < 0x10000) {
-			neededSize += 3;
-		} else {
-			neededSize += 4;
-		}
-	}
-
-	// second loop to actually encode
-	const arr = new Uint8Array(neededSize);
-	strOffset = 0;
-	let arrOffset = 0;
-	while (strOffset < strLen) {
-		const codePoint = getNextCodePoint(str, strLen, strOffset);
-		strOffset += (codePoint >= Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN ? 2 : 1);
-
-		if (codePoint < 0x0080) {
-			arr[arrOffset++] = codePoint;
-		} else if (codePoint < 0x0800) {
-			arr[arrOffset++] = 0b11000000 | ((codePoint & 0b00000000000000000000011111000000) >>> 6);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
-		} else if (codePoint < 0x10000) {
-			arr[arrOffset++] = 0b11100000 | ((codePoint & 0b00000000000000001111000000000000) >>> 12);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
-		} else {
-			arr[arrOffset++] = 0b11110000 | ((codePoint & 0b00000000000111000000000000000000) >>> 18);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000111111000000000000) >>> 12);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
-			arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
-		}
-	}
-
-	return arr;
-}
-
-/**
- * A manual decoding of a UTF8 string.
- * Use only in environments which do not offer native conversion methods!
- */
-export function decodeUTF8(buffer: Uint8Array): string {
-	// https://en.wikipedia.org/wiki/UTF-8
-
-	const len = buffer.byteLength;
-	const result: string[] = [];
-	let offset = 0;
-	while (offset < len) {
-		const v0 = buffer[offset];
-		let codePoint: number;
-		if (v0 >= 0b11110000 && offset + 3 < len) {
-			// 4 bytes
-			codePoint = (
-				(((buffer[offset++] & 0b00000111) << 18) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 12) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 6) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
-			);
-		} else if (v0 >= 0b11100000 && offset + 2 < len) {
-			// 3 bytes
-			codePoint = (
-				(((buffer[offset++] & 0b00001111) << 12) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 6) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
-			);
-		} else if (v0 >= 0b11000000 && offset + 1 < len) {
-			// 2 bytes
-			codePoint = (
-				(((buffer[offset++] & 0b00011111) << 6) >>> 0)
-				| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
-			);
-		} else {
-			// 1 byte
-			codePoint = buffer[offset++];
-		}
-
-		if ((codePoint >= 0 && codePoint <= 0xD7FF) || (codePoint >= 0xE000 && codePoint <= 0xFFFF)) {
-			// Basic Multilingual Plane
-			result.push(String.fromCharCode(codePoint));
-		} else if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
-			// Supplementary Planes
-			const uPrime = codePoint - 0x10000;
-			const w1 = 0xD800 + ((uPrime & 0b11111111110000000000) >>> 10);
-			const w2 = 0xDC00 + ((uPrime & 0b00000000001111111111) >>> 0);
-			result.push(String.fromCharCode(w1));
-			result.push(String.fromCharCode(w2));
-		} else {
-			// illegal code point
-			result.push(String.fromCharCode(0xFFFD));
-		}
-	}
-
-	return result.join('');
-}
-
 /**
 * Generated using https://github.com/alexdima/unicode-utils/blob/master/generate-rtl-test.js
 */

--- a/src/vs/base/test/common/strings.test.ts
+++ b/src/vs/base/test/common/strings.test.ts
@@ -396,40 +396,6 @@ suite('Strings', () => {
 		assert.strictEqual(strings.getNLines('foo', 0), '');
 	});

-	test('encodeUTF8', function () {
-		function assertEncodeUTF8(str: string, expected: number[]): void {
-			const actual = strings.encodeUTF8(str);
-			const actualArr: number[] = [];
-			for (let offset = 0; offset < actual.byteLength; offset++) {
-				actualArr[offset] = actual[offset];
-			}
-			assert.deepStrictEqual(actualArr, expected);
-		}
-
-		function assertDecodeUTF8(data: number[], expected: string): void {
-			const actual = strings.decodeUTF8(new Uint8Array(data));
-			assert.deepStrictEqual(actual, expected);
-		}
-
-		function assertEncodeDecodeUTF8(str: string, buff: number[]): void {
-			assertEncodeUTF8(str, buff);
-			assertDecodeUTF8(buff, str);
-		}
-
-		assertEncodeDecodeUTF8('\u0000', [0]);
-		assertEncodeDecodeUTF8('!', [33]);
-		assertEncodeDecodeUTF8('\u007F', [127]);
-		assertEncodeDecodeUTF8('\u0080', [194, 128]);
-		assertEncodeDecodeUTF8('Ɲ', [198, 157]);
-		assertEncodeDecodeUTF8('\u07FF', [223, 191]);
-		assertEncodeDecodeUTF8('\u0800', [224, 160, 128]);
-		assertEncodeDecodeUTF8('ஂ', [224, 174, 130]);
-		assertEncodeDecodeUTF8('\uffff', [239, 191, 191]);
-		assertEncodeDecodeUTF8('\u10000', [225, 128, 128, 48]);
-		assertEncodeDecodeUTF8('🧝', [240, 159, 167, 157]);
-
-	});
-
 	test('getGraphemeBreakType', () => {
 		assert.strictEqual(strings.getGraphemeBreakType(0xBC1), strings.GraphemeBreakType.SpacingMark);
 	});