未验证 提交 5de6e1bf 编写于 作者: T Tomáš Matoušek 提交者: GitHub

Move Encoding serialization to the compiler layer (#45552)

上级 49e57ff6
...@@ -1131,6 +1131,64 @@ private void TestRoundTripArray<T>(T[] values) ...@@ -1131,6 +1131,64 @@ private void TestRoundTripArray<T>(T[] values)
TestRoundTripValue(values); TestRoundTripValue(values);
} }
[Theory]
[CombinatorialData]
public void Encoding_UTF8(bool byteOrderMark)
{
TestRoundtripEncoding(new UTF8Encoding(byteOrderMark));
}
[Theory]
[CombinatorialData]
public void Encoding_UTF32(bool bigEndian, bool byteOrderMark)
{
TestRoundtripEncoding(new UTF32Encoding(bigEndian, byteOrderMark));
}
[Theory]
[CombinatorialData]
public void Encoding_Unicode(bool bigEndian, bool byteOrderMark)
{
TestRoundtripEncoding(new UnicodeEncoding(bigEndian, byteOrderMark));
}
[Fact]
public void Encoding_AllAvailable()
{
foreach (var info in Encoding.GetEncodings())
{
TestRoundtripEncoding(Encoding.GetEncoding(info.Name));
}
}
private static void TestRoundtripEncoding(Encoding encoding)
{
using var stream = new MemoryStream();
using (var writer = new ObjectWriter(stream, leaveOpen: true))
{
writer.WriteEncoding(encoding);
}
stream.Position = 0;
using var reader = ObjectReader.TryGetReader(stream);
Assert.NotNull(reader);
var actualEncoding = (Encoding)((Encoding)reader.ReadValue()).Clone();
var expectedEncoding = (Encoding)encoding.Clone();
// set the fallbacks to the same instance so that equality comparison does not take them into account:
actualEncoding.EncoderFallback = EncoderFallback.ExceptionFallback;
actualEncoding.DecoderFallback = DecoderFallback.ExceptionFallback;
expectedEncoding.EncoderFallback = EncoderFallback.ExceptionFallback;
expectedEncoding.DecoderFallback = DecoderFallback.ExceptionFallback;
Assert.Equal(expectedEncoding.GetPreamble(), actualEncoding.GetPreamble());
Assert.Equal(expectedEncoding.CodePage, actualEncoding.CodePage);
Assert.Equal(expectedEncoding.WebName, actualEncoding.WebName);
Assert.Equal(expectedEncoding, actualEncoding);
}
[Fact] [Fact]
public void TestObjectMapLimits() public void TestObjectMapLimits()
{ {
......
...@@ -35,7 +35,7 @@ internal sealed partial class ObjectReader : IDisposable ...@@ -35,7 +35,7 @@ internal sealed partial class ObjectReader : IDisposable
/// this version, just change VersionByte2. /// this version, just change VersionByte2.
/// </summary> /// </summary>
internal const byte VersionByte1 = 0b10101010; internal const byte VersionByte1 = 0b10101010;
internal const byte VersionByte2 = 0b00001010; internal const byte VersionByte2 = 0b00001011;
private readonly BinaryReader _reader; private readonly BinaryReader _reader;
private readonly CancellationToken _cancellationToken; private readonly CancellationToken _cancellationToken;
...@@ -243,11 +243,31 @@ private object ReadValueWorker() ...@@ -243,11 +243,31 @@ private object ReadValueWorker()
case EncodingKind.Array_2: case EncodingKind.Array_2:
case EncodingKind.Array_3: case EncodingKind.Array_3:
return ReadArray(kind); return ReadArray(kind);
case EncodingKind.EncodingName: return Encoding.GetEncoding(ReadString());
case EncodingKind.EncodingUTF8: return s_encodingUTF8;
case EncodingKind.EncodingUTF8_BOM: return Encoding.UTF8;
case EncodingKind.EncodingUTF32_BE: return s_encodingUTF32_BE;
case EncodingKind.EncodingUTF32_BE_BOM: return s_encodingUTF32_BE_BOM;
case EncodingKind.EncodingUTF32_LE: return s_encodingUTF32_LE;
case EncodingKind.EncodingUTF32_LE_BOM: return Encoding.UTF32;
case EncodingKind.EncodingUnicode_BE: return s_encodingUnicode_BE;
case EncodingKind.EncodingUnicode_BE_BOM: return Encoding.BigEndianUnicode;
case EncodingKind.EncodingUnicode_LE: return s_encodingUnicode_LE;
case EncodingKind.EncodingUnicode_LE_BOM: return Encoding.Unicode;
default: default:
throw ExceptionUtilities.UnexpectedValue(kind); throw ExceptionUtilities.UnexpectedValue(kind);
} }
} }
private static readonly Encoding s_encodingUTF8 = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
private static readonly Encoding s_encodingUTF32_BE = new UTF32Encoding(bigEndian: true, byteOrderMark: false);
private static readonly Encoding s_encodingUTF32_BE_BOM = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
private static readonly Encoding s_encodingUTF32_LE = new UTF32Encoding(bigEndian: false, byteOrderMark: false);
private static readonly Encoding s_encodingUnicode_BE = new UnicodeEncoding(bigEndian: true, byteOrderMark: false);
private static readonly Encoding s_encodingUnicode_LE = new UnicodeEncoding(bigEndian: false, byteOrderMark: false);
/// <summary> /// <summary>
/// A reference-id to object map, that can share base data efficiently. /// A reference-id to object map, that can share base data efficiently.
/// </summary> /// </summary>
......
...@@ -262,6 +262,10 @@ public void WriteValue(object? value) ...@@ -262,6 +262,10 @@ public void WriteValue(object? value)
WriteArray(instance); WriteArray(instance);
} }
else if (value is Encoding encoding)
{
WriteEncoding(encoding);
}
else else
{ {
WriteObject(instance: value, instanceAsWritable: null); WriteObject(instance: value, instanceAsWritable: null);
...@@ -787,6 +791,58 @@ private void WriteKnownType(Type type) ...@@ -787,6 +791,58 @@ private void WriteKnownType(Type type)
this.WriteInt32(_binderSnapshot.GetTypeId(type)); this.WriteInt32(_binderSnapshot.GetTypeId(type));
} }
public void WriteEncoding(Encoding? encoding)
{
var kind = GetEncodingKind(encoding);
WriteByte((byte)kind);
if (kind == EncodingKind.EncodingName)
{
WriteString(encoding!.WebName);
}
}
private static EncodingKind GetEncodingKind(Encoding? encoding)
{
if (encoding is null)
{
return EncodingKind.Null;
}
switch (encoding.CodePage)
{
case 1200:
Debug.Assert(HasPreamble(Encoding.Unicode));
return (encoding.Equals(Encoding.Unicode) || HasPreamble(encoding)) ? EncodingKind.EncodingUnicode_LE_BOM : EncodingKind.EncodingUnicode_LE;
case 1201:
Debug.Assert(HasPreamble(Encoding.BigEndianUnicode));
return (encoding.Equals(Encoding.BigEndianUnicode) || HasPreamble(encoding)) ? EncodingKind.EncodingUnicode_BE_BOM : EncodingKind.EncodingUnicode_BE;
case 12000:
Debug.Assert(HasPreamble(Encoding.UTF32));
return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF32_LE_BOM : EncodingKind.EncodingUTF32_LE;
case 12001:
Debug.Assert(HasPreamble(Encoding.UTF32));
return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF32_BE_BOM : EncodingKind.EncodingUTF32_BE;
case 65001:
Debug.Assert(HasPreamble(Encoding.UTF8));
return (encoding.Equals(Encoding.UTF8) || HasPreamble(encoding)) ? EncodingKind.EncodingUTF8_BOM : EncodingKind.EncodingUTF8;
default:
return EncodingKind.EncodingName;
}
static bool HasPreamble(Encoding encoding)
#if NETCOREAPP
=> !encoding.Preamble.IsEmpty;
#else
=> !encoding.GetPreamble().IsEmpty();
#endif
}
private void WriteObject(object instance, IObjectWritable? instanceAsWritable) private void WriteObject(object instance, IObjectWritable? instanceAsWritable)
{ {
RoslynDebug.Assert(instance != null); RoslynDebug.Assert(instance != null);
...@@ -1240,8 +1296,24 @@ internal enum EncodingKind : byte ...@@ -1240,8 +1296,24 @@ internal enum EncodingKind : byte
/// </summary> /// </summary>
StringType, StringType,
/// <summary>
Last = StringType + 1, /// Encoding serialized as <see cref="Encoding.WebName"/>.
/// </summary>
EncodingName,
// well-known encodings (parameterized by BOM)
EncodingUTF8,
EncodingUTF8_BOM,
EncodingUTF32_BE,
EncodingUTF32_BE_BOM,
EncodingUTF32_LE,
EncodingUTF32_LE_BOM,
EncodingUnicode_BE,
EncodingUnicode_BE_BOM,
EncodingUnicode_LE,
EncodingUnicode_LE_BOM,
Last,
} }
} }
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#nullable enable #nullable enable
using System.Linq; using System.Linq;
using System.Text;
using System.Threading; using System.Threading;
using Microsoft.CodeAnalysis.Diagnostics; using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Execution; using Microsoft.CodeAnalysis.Execution;
...@@ -26,7 +27,7 @@ public void SerializeSourceText(ITemporaryStorageWithName? storage, SourceText t ...@@ -26,7 +27,7 @@ public void SerializeSourceText(ITemporaryStorageWithName? storage, SourceText t
cancellationToken.ThrowIfCancellationRequested(); cancellationToken.ThrowIfCancellationRequested();
writer.WriteInt32((int)text.ChecksumAlgorithm); writer.WriteInt32((int)text.ChecksumAlgorithm);
WriteTo(text.Encoding, writer, cancellationToken); writer.WriteEncoding(text.Encoding);
// TODO: refactor this part in its own abstraction (Bits) that has multiple sub types // TODO: refactor this part in its own abstraction (Bits) that has multiple sub types
// rather than using enums // rather than using enums
...@@ -49,7 +50,7 @@ private SourceText DeserializeSourceText(ObjectReader reader, CancellationToken ...@@ -49,7 +50,7 @@ private SourceText DeserializeSourceText(ObjectReader reader, CancellationToken
// REVIEW: why IDE services doesnt care about checksumAlgorithm? // REVIEW: why IDE services doesnt care about checksumAlgorithm?
_ = (SourceHashAlgorithm)reader.ReadInt32(); _ = (SourceHashAlgorithm)reader.ReadInt32();
var encoding = ReadEncodingFrom(reader, cancellationToken); var encoding = (Encoding)reader.ReadValue();
var kind = (SerializationKinds)reader.ReadInt32(); var kind = (SerializationKinds)reader.ReadInt32();
if (kind == SerializationKinds.MemoryMapFile) if (kind == SerializationKinds.MemoryMapFile)
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
#nullable enable
using System.Diagnostics;
using System.Text;
using System.Threading;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Serialization
{
internal partial class SerializerService
{
private enum EncodingId : byte
{
None = 0,
Named = 1,
// well-known encodings (parameterized by BOM)
UTF8 = 2,
UTF8_BOM = 3,
UTF32_BE = 4,
UTF32_BE_BOM = 5,
UTF32_LE = 6,
UTF32_LE_BOM = 7,
Unicode_BE = 8,
Unicode_BE_BOM = 9,
Unicode_LE = 10,
Unicode_LE_BOM = 11,
Count
}
private static readonly Encoding?[] _cachedEncodings = new Encoding[(int)EncodingId.Count];
public static void WriteTo(Encoding? encoding, ObjectWriter writer, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
var kind = GetEncodingKind(encoding);
writer.WriteByte((byte)kind);
if (kind == EncodingId.Named)
{
writer.WriteString(encoding!.WebName);
}
}
private static EncodingId GetEncodingKind(Encoding? encoding)
{
if (encoding == null)
{
return EncodingId.None;
}
switch (encoding.CodePage)
{
case 1200:
Debug.Assert(HasPreamble(Encoding.Unicode));
return (encoding.Equals(Encoding.Unicode) || HasPreamble(encoding)) ? EncodingId.Unicode_LE_BOM : EncodingId.Unicode_LE;
case 1201:
Debug.Assert(HasPreamble(Encoding.BigEndianUnicode));
return (encoding.Equals(Encoding.BigEndianUnicode) || HasPreamble(encoding)) ? EncodingId.Unicode_BE_BOM : EncodingId.Unicode_BE;
case 12000:
Debug.Assert(HasPreamble(Encoding.UTF32));
return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingId.UTF32_LE_BOM : EncodingId.UTF32_LE;
case 12001:
Debug.Assert(HasPreamble(Encoding.UTF32));
return (encoding.Equals(Encoding.UTF32) || HasPreamble(encoding)) ? EncodingId.UTF32_BE_BOM : EncodingId.UTF32_BE;
case 65001:
Debug.Assert(HasPreamble(Encoding.UTF8));
return (encoding.Equals(Encoding.UTF8) || HasPreamble(encoding)) ? EncodingId.UTF8_BOM : EncodingId.UTF8;
default:
return EncodingId.Named;
}
}
private static bool HasPreamble(Encoding encoding)
#if NETCOREAPP
=> !encoding.Preamble.IsEmpty;
#else
=> !encoding.GetPreamble().IsEmpty();
#endif
public static Encoding? ReadEncodingFrom(ObjectReader reader, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
var kind = reader.ReadByte();
return ((EncodingId)kind) switch
{
EncodingId.None => null,
EncodingId.Named => Encoding.GetEncoding(reader.ReadString()),
EncodingId.UTF8 => _cachedEncodings[kind] ??= new UTF8Encoding(encoderShouldEmitUTF8Identifier: false),
EncodingId.UTF8_BOM => Encoding.UTF8,
EncodingId.UTF32_BE => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: true, byteOrderMark: false),
EncodingId.UTF32_BE_BOM => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: true, byteOrderMark: true),
EncodingId.UTF32_LE => _cachedEncodings[kind] ??= new UTF32Encoding(bigEndian: false, byteOrderMark: false),
EncodingId.UTF32_LE_BOM => Encoding.UTF32,
EncodingId.Unicode_BE => _cachedEncodings[kind] ??= new UnicodeEncoding(bigEndian: true, byteOrderMark: false),
EncodingId.Unicode_BE_BOM => Encoding.BigEndianUnicode,
EncodingId.Unicode_LE => _cachedEncodings[kind] ??= new UnicodeEncoding(bigEndian: false, byteOrderMark: false),
EncodingId.Unicode_LE_BOM => Encoding.Unicode,
_ => throw ExceptionUtilities.UnexpectedValue(kind),
};
}
}
}
...@@ -89,63 +89,5 @@ private static void TestSymbolSerialization(Document document, string symbolName ...@@ -89,63 +89,5 @@ private static void TestSymbolSerialization(Document document, string symbolName
Assert.True(id.Equals(did)); Assert.True(id.Equals(did));
} }
private static void TextEncodingRoundrip(Encoding encoding)
{
using var stream = new MemoryStream();
using (var writer = new ObjectWriter(stream, leaveOpen: true))
{
SerializerService.WriteTo(encoding, writer, CancellationToken.None);
}
stream.Position = 0;
using var reader = ObjectReader.TryGetReader(stream);
Assert.NotNull(reader);
var actualEncoding = (Encoding)SerializerService.ReadEncodingFrom(reader, CancellationToken.None).Clone();
var expectedEncoding = (Encoding)encoding.Clone();
// set the fallbacks to the same instance so that equality comparison does not take them into account:
actualEncoding.EncoderFallback = EncoderFallback.ExceptionFallback;
actualEncoding.DecoderFallback = DecoderFallback.ExceptionFallback;
expectedEncoding.EncoderFallback = EncoderFallback.ExceptionFallback;
expectedEncoding.DecoderFallback = DecoderFallback.ExceptionFallback;
Assert.Equal(expectedEncoding.GetPreamble(), actualEncoding.GetPreamble());
Assert.Equal(expectedEncoding.CodePage, actualEncoding.CodePage);
Assert.Equal(expectedEncoding.WebName, actualEncoding.WebName);
Assert.Equal(expectedEncoding, actualEncoding);
}
[Theory]
[CombinatorialData]
public void EncodingSerialization_UTF8(bool byteOrderMark)
{
TextEncodingRoundrip(new UTF8Encoding(byteOrderMark));
}
[Theory]
[CombinatorialData]
public void EncodingSerialization_UTF32(bool bigEndian, bool byteOrderMark)
{
TextEncodingRoundrip(new UTF32Encoding(bigEndian, byteOrderMark));
}
[Theory]
[CombinatorialData]
public void EncodingSerialization_Unicode(bool bigEndian, bool byteOrderMark)
{
TextEncodingRoundrip(new UnicodeEncoding(bigEndian, byteOrderMark));
}
[Fact]
public void EncodingSerialization_AllAvailable()
{
foreach (var info in Encoding.GetEncodings())
{
TextEncodingRoundrip(Encoding.GetEncoding(info.Name));
}
}
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册