diff --git a/src/Compilers/Core/CodeAnalysisTest/ObjectSerializationTests.cs b/src/Compilers/Core/CodeAnalysisTest/ObjectSerializationTests.cs index 3baceed5c2a7f41a98c703bd631fb4f0089d35f8..5c640655fb8f618497b7da7b769cf8a1c84625f6 100644 --- a/src/Compilers/Core/CodeAnalysisTest/ObjectSerializationTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/ObjectSerializationTests.cs @@ -1131,6 +1131,64 @@ private void TestRoundTripArray(T[] values) TestRoundTripValue(values); } + [Theory] + [CombinatorialData] + public void Encoding_UTF8(bool byteOrderMark) + { + TestRoundtripEncoding(new UTF8Encoding(byteOrderMark)); + } + + [Theory] + [CombinatorialData] + public void Encoding_UTF32(bool bigEndian, bool byteOrderMark) + { + TestRoundtripEncoding(new UTF32Encoding(bigEndian, byteOrderMark)); + } + + [Theory] + [CombinatorialData] + public void Encoding_Unicode(bool bigEndian, bool byteOrderMark) + { + TestRoundtripEncoding(new UnicodeEncoding(bigEndian, byteOrderMark)); + } + + [Fact] + public void Encoding_AllAvailable() + { + foreach (var info in Encoding.GetEncodings()) + { + TestRoundtripEncoding(Encoding.GetEncoding(info.Name)); + } + } + + private static void TestRoundtripEncoding(Encoding encoding) + { + using var stream = new MemoryStream(); + + using (var writer = new ObjectWriter(stream, leaveOpen: true)) + { + writer.WriteEncoding(encoding); + } + + stream.Position = 0; + + using var reader = ObjectReader.TryGetReader(stream); + Assert.NotNull(reader); + var actualEncoding = (Encoding)((Encoding)reader.ReadValue()).Clone(); + var expectedEncoding = (Encoding)encoding.Clone(); + + // set the fallbacks to the same instance so that equality comparison does not take them into account: + actualEncoding.EncoderFallback = EncoderFallback.ExceptionFallback; + actualEncoding.DecoderFallback = DecoderFallback.ExceptionFallback; + expectedEncoding.EncoderFallback = EncoderFallback.ExceptionFallback; + expectedEncoding.DecoderFallback = DecoderFallback.ExceptionFallback; + + Assert.Equal(expectedEncoding.GetPreamble(), actualEncoding.GetPreamble()); + Assert.Equal(expectedEncoding.CodePage, actualEncoding.CodePage); + Assert.Equal(expectedEncoding.WebName, actualEncoding.WebName); + Assert.Equal(expectedEncoding, actualEncoding); + } + [Fact] public void TestObjectMapLimits() { diff --git a/src/Compilers/Core/Portable/Serialization/ObjectReader.cs b/src/Compilers/Core/Portable/Serialization/ObjectReader.cs index 33e2b79aa2f97cea8a912acb4e31d6600a54f4cb..dad5d93b28d250cb42412f357a062efc58e1c0aa 100644 --- a/src/Compilers/Core/Portable/Serialization/ObjectReader.cs +++ b/src/Compilers/Core/Portable/Serialization/ObjectReader.cs @@ -35,7 +35,7 @@ internal sealed partial class ObjectReader : IDisposable /// this version, just change VersionByte2. /// internal const byte VersionByte1 = 0b10101010; - internal const byte VersionByte2 = 0b00001010; + internal const byte VersionByte2 = 0b00001011; private readonly BinaryReader _reader; private readonly CancellationToken _cancellationToken; @@ -243,11 +243,31 @@ private object ReadValueWorker() case EncodingKind.Array_2: case EncodingKind.Array_3: return ReadArray(kind); + + case EncodingKind.EncodingName: return Encoding.GetEncoding(ReadString()); + case EncodingKind.EncodingUTF8: return s_encodingUTF8; + case EncodingKind.EncodingUTF8_BOM: return Encoding.UTF8; + case EncodingKind.EncodingUTF32_BE: return s_encodingUTF32_BE; + case EncodingKind.EncodingUTF32_BE_BOM: return s_encodingUTF32_BE_BOM; + case EncodingKind.EncodingUTF32_LE: return s_encodingUTF32_LE; + case EncodingKind.EncodingUTF32_LE_BOM: return Encoding.UTF32; + case EncodingKind.EncodingUnicode_BE: return s_encodingUnicode_BE; + case EncodingKind.EncodingUnicode_BE_BOM: return Encoding.BigEndianUnicode; + case EncodingKind.EncodingUnicode_LE: return s_encodingUnicode_LE; + case EncodingKind.EncodingUnicode_LE_BOM: return Encoding.Unicode; + default: throw ExceptionUtilities.UnexpectedValue(kind); } } + private static readonly Encoding s_encodingUTF8 = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + private static readonly Encoding s_encodingUTF32_BE = new UTF32Encoding(bigEndian: true, byteOrderMark: false); + private static readonly Encoding s_encodingUTF32_BE_BOM = new UTF32Encoding(bigEndian: true, byteOrderMark: true); + private static readonly Encoding s_encodingUTF32_LE = new UTF32Encoding(bigEndian: false, byteOrderMark: false); + private static readonly Encoding s_encodingUnicode_BE = new UnicodeEncoding(bigEndian: true, byteOrderMark: false); + private static readonly Encoding s_encodingUnicode_LE = new UnicodeEncoding(bigEndian: false, byteOrderMark: false); + /// /// A reference-id to object map, that can share base data efficiently. /// diff --git a/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs b/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs index aabf218b17efc636bedf4f56f84227b32db29370..457a8bab9b4b58275891fcc1fbffb76658ff6368 100644 --- a/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs +++ b/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs @@ -262,6 +262,10 @@ public void WriteValue(object? value) WriteArray(instance); } + else if (value is Encoding encoding) + { + WriteEncoding(encoding); + } else { WriteObject(instance: value, instanceAsWritable: null); @@ -787,6 +791,58 @@ private void WriteKnownType(Type type) this.WriteInt32(_binderSnapshot.GetTypeId(type)); } + public void WriteEncoding(Encoding? encoding) + { + var kind = GetEncodingKind(encoding); + WriteByte((byte)kind); + + if (kind == EncodingKind.EncodingName) + { + WriteString(encoding!.WebName); + } + } + + private static EncodingKind GetEncodingKind(Encoding? encoding) + { + if (encoding is null) + { + return EncodingKind.Null; + } + + switch (encoding.CodePage) + { + case 1200: + Debug.Assert(HasPreamble(Encoding.Unicode)); + return (encoding.Equals(Encoding.Unicode) || HasPreamble(encoding)) ? EncodingKind.EncodingUnicode_LE_BOM : EncodingKind.EncodingUnicode_LE; + + case 1201: + Debug.Assert(HasPreamble(Encoding.BigEndianUnicode)); + return (encoding.Equals(Encoding.BigEndianUnicode) || HasPreamble(encoding)) ? EncodingKind.EncodingUnicode_BE_BOM : EncodingKind.EncodingUnicode_BE; + + case 12000: + Debug.Assert(HasPreamble(Encoding.UTF32)); + return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF32_LE_BOM : EncodingKind.EncodingUTF32_LE; + + case 12001: + Debug.Assert(HasPreamble(Encoding.UTF32)); + return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF32_BE_BOM : EncodingKind.EncodingUTF32_BE; + + case 65001: + Debug.Assert(HasPreamble(Encoding.UTF8)); + return (encoding.Equals(Encoding.UTF8) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF8_BOM : EncodingKind.EncodingUTF8; + + default: + return EncodingKind.EncodingName; + } + + static bool HasPreamble(Encoding encoding) +#if NETCOREAPP + => !encoding.Preamble.IsEmpty; +#else + => !encoding.GetPreamble().IsEmpty(); +#endif + } + private void WriteObject(object instance, IObjectWritable? instanceAsWritable) { RoslynDebug.Assert(instance != null); @@ -1240,8 +1296,24 @@ internal enum EncodingKind : byte /// StringType, - - Last = StringType + 1, + /// + /// Encoding serialized as . + /// + EncodingName, + + // well-known encodings (parameterized by BOM) + EncodingUTF8, + EncodingUTF8_BOM, + EncodingUTF32_BE, + EncodingUTF32_BE_BOM, + EncodingUTF32_LE, + EncodingUTF32_LE_BOM, + EncodingUnicode_BE, + EncodingUnicode_BE_BOM, + EncodingUnicode_LE, + EncodingUnicode_LE_BOM, + + Last, } } } diff --git a/src/Workspaces/Core/Portable/Execution/SerializerService_Asset.cs b/src/Workspaces/Core/Portable/Execution/SerializerService_Asset.cs index 2007d41fcb8646460bab6e1ae728cd6513f204b5..3f53ca3f995f9ceeaa230e97b05c36b28cc899aa 100644 --- a/src/Workspaces/Core/Portable/Execution/SerializerService_Asset.cs +++ b/src/Workspaces/Core/Portable/Execution/SerializerService_Asset.cs @@ -5,6 +5,7 @@ #nullable enable using System.Linq; +using System.Text; using System.Threading; using Microsoft.CodeAnalysis.Diagnostics; using Microsoft.CodeAnalysis.Execution; @@ -26,7 +27,7 @@ public void SerializeSourceText(ITemporaryStorageWithName? storage, SourceText t cancellationToken.ThrowIfCancellationRequested(); writer.WriteInt32((int)text.ChecksumAlgorithm); - WriteTo(text.Encoding, writer, cancellationToken); + writer.WriteEncoding(text.Encoding); // TODO: refactor this part in its own abstraction (Bits) that has multiple sub types // rather than using enums @@ -49,7 +50,7 @@ private SourceText DeserializeSourceText(ObjectReader reader, CancellationToken // REVIEW: why IDE services doesnt care about checksumAlgorithm? _ = (SourceHashAlgorithm)reader.ReadInt32(); - var encoding = ReadEncodingFrom(reader, cancellationToken); + var encoding = (Encoding)reader.ReadValue(); var kind = (SerializationKinds)reader.ReadInt32(); if (kind == SerializationKinds.MemoryMapFile) diff --git a/src/Workspaces/Core/Portable/Execution/SerializerService_Encoding.cs b/src/Workspaces/Core/Portable/Execution/SerializerService_Encoding.cs deleted file mode 100644 index 34eaf5cf88ab39e2bce5e5329390d6a7268d101c..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/Execution/SerializerService_Encoding.cs +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#nullable enable - -using System.Diagnostics; -using System.Text; -using System.Threading; -using Roslyn.Utilities; - -namespace Microsoft.CodeAnalysis.Serialization -{ - internal partial class SerializerService - { - private enum EncodingId : byte - { - None = 0, - Named = 1, - - // well-known encodings (parameterized by BOM) - UTF8 = 2, - UTF8_BOM = 3, - UTF32_BE = 4, - UTF32_BE_BOM = 5, - UTF32_LE = 6, - UTF32_LE_BOM = 7, - Unicode_BE = 8, - Unicode_BE_BOM = 9, - Unicode_LE = 10, - Unicode_LE_BOM = 11, - - Count - } - - private static readonly Encoding?[] _cachedEncodings = new Encoding[(int)EncodingId.Count]; - - public static void WriteTo(Encoding? encoding, ObjectWriter writer, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - var kind = GetEncodingKind(encoding); - writer.WriteByte((byte)kind); - - if (kind == EncodingId.Named) - { - writer.WriteString(encoding!.WebName); - } - } - - private static EncodingId GetEncodingKind(Encoding? encoding) - { - if (encoding == null) - { - return EncodingId.None; - } - - switch (encoding.CodePage) - { - case 1200: - Debug.Assert(HasPreamble(Encoding.Unicode)); - return (encoding.Equals(Encoding.Unicode) || HasPreamble(encoding)) ? EncodingId.Unicode_LE_BOM : EncodingId.Unicode_LE; - - case 1201: - Debug.Assert(HasPreamble(Encoding.BigEndianUnicode)); - return (encoding.Equals(Encoding.BigEndianUnicode) || HasPreamble(encoding)) ? EncodingId.Unicode_BE_BOM : EncodingId.Unicode_BE; - - case 12000: - Debug.Assert(HasPreamble(Encoding.UTF32)); - return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingId.UTF32_LE_BOM : EncodingId.UTF32_LE; - - case 12001: - Debug.Assert(HasPreamble(Encoding.UTF32)); - return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingId.UTF32_BE_BOM : EncodingId.UTF32_BE; - - case 65001: - Debug.Assert(HasPreamble(Encoding.UTF8)); - return (encoding.Equals(Encoding.UTF8) || HasPreamble(encoding)) ? EncodingId.UTF8_BOM : EncodingId.UTF8; - - default: - return EncodingId.Named; - } - } - - private static bool HasPreamble(Encoding encoding) -#if NETCOREAPP - => !encoding.Preamble.IsEmpty; -#else - => !encoding.GetPreamble().IsEmpty(); -#endif - - public static Encoding? ReadEncodingFrom(ObjectReader reader, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - var kind = reader.ReadByte(); - return ((EncodingId)kind) switch - { - EncodingId.None => null, - EncodingId.Named => Encoding.GetEncoding(reader.ReadString()), - EncodingId.UTF8 => _cachedEncodings[kind] ??= new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), - EncodingId.UTF8_BOM => Encoding.UTF8, - EncodingId.UTF32_BE => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: true, byteOrderMark: false), - EncodingId.UTF32_BE_BOM => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: true, byteOrderMark: true), - EncodingId.UTF32_LE => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: false, byteOrderMark: false), - EncodingId.UTF32_LE_BOM => Encoding.UTF32, - EncodingId.Unicode_BE => _cachedEncodings[kind] ??= new UnicodeEncoding(bigEndian: true, byteOrderMark: false), - EncodingId.Unicode_BE_BOM => Encoding.BigEndianUnicode, - EncodingId.Unicode_LE => _cachedEncodings[kind] ??= new UnicodeEncoding(bigEndian: false, byteOrderMark: false), - EncodingId.Unicode_LE_BOM => Encoding.Unicode, - _ => throw ExceptionUtilities.UnexpectedValue(kind), - }; - } - } -} diff --git a/src/Workspaces/CoreTest/SerializationTests.cs b/src/Workspaces/CoreTest/SerializationTests.cs index f13839206a0de465f5db0153a719b8a17dda052c..047707ce84b1b617e6834a98a41b1bfd06060872 100644 --- a/src/Workspaces/CoreTest/SerializationTests.cs +++ b/src/Workspaces/CoreTest/SerializationTests.cs @@ -89,63 +89,5 @@ private static void TestSymbolSerialization(Document document, string symbolName Assert.True(id.Equals(did)); } - - private static void TextEncodingRoundrip(Encoding encoding) - { - using var stream = new MemoryStream(); - - using (var writer = new ObjectWriter(stream, leaveOpen: true)) - { - SerializerService.WriteTo(encoding, writer, CancellationToken.None); - } - - stream.Position = 0; - - using var reader = ObjectReader.TryGetReader(stream); - Assert.NotNull(reader); - var actualEncoding = (Encoding)SerializerService.ReadEncodingFrom(reader, CancellationToken.None).Clone(); - var expectedEncoding = (Encoding)encoding.Clone(); - - // set the fallbacks to the same instance so that equality comparison does not take them into account: - actualEncoding.EncoderFallback = EncoderFallback.ExceptionFallback; - actualEncoding.DecoderFallback = DecoderFallback.ExceptionFallback; - expectedEncoding.EncoderFallback = EncoderFallback.ExceptionFallback; - expectedEncoding.DecoderFallback = DecoderFallback.ExceptionFallback; - - Assert.Equal(expectedEncoding.GetPreamble(), actualEncoding.GetPreamble()); - Assert.Equal(expectedEncoding.CodePage, actualEncoding.CodePage); - Assert.Equal(expectedEncoding.WebName, actualEncoding.WebName); - Assert.Equal(expectedEncoding, actualEncoding); - } - - [Theory] - [CombinatorialData] - public void EncodingSerialization_UTF8(bool byteOrderMark) - { - TextEncodingRoundrip(new UTF8Encoding(byteOrderMark)); - } - - [Theory] - [CombinatorialData] - public void EncodingSerialization_UTF32(bool bigEndian, bool byteOrderMark) - { - TextEncodingRoundrip(new UTF32Encoding(bigEndian, byteOrderMark)); - } - - [Theory] - [CombinatorialData] - public void EncodingSerialization_Unicode(bool bigEndian, bool byteOrderMark) - { - TextEncodingRoundrip(new UnicodeEncoding(bigEndian, byteOrderMark)); - } - - [Fact] - public void EncodingSerialization_AllAvailable() - { - foreach (var info in Encoding.GetEncodings()) - { - TextEncodingRoundrip(Encoding.GetEncoding(info.Name)); - } - } } }