提交 9b5083df 编写于 作者: P Paul Harrington

Remove CodePage1252Encoding and try to use the built-in Encoding. If not...

Remove CodePage1252Encoding and try to use the built-in Encoding. If not available, then fallback to Latin1
上级 ae843a15
......@@ -57,7 +57,6 @@
<DesignTime>True</DesignTime>
<DependentUpon>CodeAnalysisDesktopResources.resx</DependentUpon>
</Compile>
<Compile Include="CodePage1252Encoding.cs" />
<Compile Include="CommandLine\CommandLineReference.cs" />
<Compile Include="CommandLine\CommandLineSourceFile.cs" />
<Compile Include="CommandLine\CommonCommandLineArguments.cs" />
......
using System;
using System.Text;
namespace Microsoft.CodeAnalysis.Text
{
/// <summary>
/// CodePage 1252 decoder. This is a single byte character set encoding that is very close
/// to Latin1 except some of the code points in the control set 1 range (0x80 to 0x9F) are
/// replaced with typographic characters.
/// </summary>
internal sealed class CodePage1252Encoding : Encoding
{
public static readonly CodePage1252Encoding Instance = new CodePage1252Encoding();
public override int CodePage => 1252;
public override int GetByteCount(char[] chars, int index, int count)
{
// This is a decoder only
throw new NotSupportedException();
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
// This is a decoder only
throw new NotSupportedException();
}
// This is SBCS, so the number of characters is the same as the number of bytes.
public override int GetCharCount(byte[] bytes, int index, int count) => count;
// This is SBCS, so the number of characters is the same as the number of bytes.
public override unsafe int GetCharCount(byte* bytes, int count) => count;
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
int byteEndIndex = byteIndex + byteCount;
while (byteIndex < byteEndIndex)
{
chars[charIndex++] = Decode(bytes[byteIndex++]);
}
return byteCount;
}
public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
if (charCount < byteCount)
{
throw new ArgumentException(nameof(charCount)); // The output char buffer is too small to contain the decoded characters
}
byte* end = bytes + byteCount;
while (bytes < end)
{
*chars++ = Decode(*bytes++);
}
return byteCount;
}
private static readonly char[] s_c1ControlChars =
{
/* x80 */ '\u20ac' /* Euro Sign */,
/* x81 */ '\u0081' /* <control> */,
/* x82 */ '\u201a' /* Colon Sign */,
/* x83 */ '\u0192' /* Latin Small Letter F With Hook */,
/* x84 */ '\u201e' /* Double Low-9 Quotation Mark */,
/* x85 */ '\u2026' /* Horizontal Ellipsis */,
/* x86 */ '\u2020' /* Dagger */,
/* x87 */ '\u2021' /* Double Dagger */,
/* x88 */ '\u02c6' /* Modifier Letter Cicumflex Accent */,
/* x89 */ '\u2030' /* Per Mille Sign */,
/* x8a */ '\u0160' /* Latin Capital Letter S With Caron */,
/* x8b */ '\u2039' /* Single Left-Pointing Angle Quotation Mark */,
/* x8c */ '\u0152' /* Latin Capital Ligature Oe */,
/* x8d */ '\u008d' /* Reverse Line Feed */,
/* x8e */ '\u017d' /* Latin Capital Letter Z With Caron */,
/* x8f */ '\u008f' /* Single Shift Three */,
/* x90 */ '\u0090' /* Device Control String */,
/* x91 */ '\u2018' /* Left Single Quotation Mark */,
/* x92 */ '\u2019' /* Right Single Quotation Mark */,
/* x93 */ '\u201c' /* Left Double Quotation Mark */,
/* x94 */ '\u201d' /* Right Double Quotation Mark */,
/* x95 */ '\u2022' /* Bullet */,
/* x96 */ '\u2013' /* En Dash */,
/* x97 */ '\u2014' /* Em Dash */,
/* x98 */ '\u02dc' /* Small Tilde */,
/* x99 */ '\u2122' /* Trade Mark Sign */,
/* x9a */ '\u0161' /* Latin Small Letter S With Caron */,
/* x9b */ '\u203a' /* Single Right-Pointing Angle Quotation Mark */,
/* x9c */ '\u0153' /* Latin Small Ligature Oe */,
/* x9d */ '\u009d' /* Operating System Command */,
/* x9e */ '\u017e' /* Latin Small Letter Z With Caron */,
/* x9f */ '\u0178' /* Latin Captial Letter Y With Diaeresis */,
};
private static char Decode(byte b)
{
// For characters in the C1 Control set (0x80 to 0x9F), use the table.
// Otherwise, the mapping is 1 to 1.
uint c1 = unchecked(b - 0x80u);
return c1 < 0x20u ? s_c1ControlChars[c1] : (char)b;
}
public override int GetMaxByteCount(int charCount)
{
// This is a decoder only
throw new NotSupportedException();
}
public override int GetMaxCharCount(int byteCount)
{
if (byteCount < 0)
{
throw new ArgumentOutOfRangeException(nameof(byteCount));
}
// This is SBCS, so everything is 1:1
return byteCount;
}
}
}
......@@ -18,12 +18,29 @@ internal static class EncodedStringText
/// </summary>
private static readonly Encoding s_fallbackEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
/// <summary>
/// Encoding to use when UTF-8 fails. If available, we use CodePage 1252. If not, we use Latin1.
/// </summary>
private static readonly Encoding s_defaultEncoding = GetDefaultEncoding();
private static Encoding GetDefaultEncoding()
{
try
{
return Encoding.GetEncoding(codepage: 1252);
}
catch (NotSupportedException)
{
return Encoding.GetEncoding(name: "Latin1");
}
}
/// <summary>
/// Initializes an instance of <see cref="SourceText"/> from the provided stream. This version differs
/// from <see cref="SourceText.From(Stream, Encoding, SourceHashAlgorithm, bool)"/> in two ways:
/// 1. It attempts to minimize allocations by trying to read the stream into a byte array.
/// 2. If <paramref name="defaultEncoding"/> is null, it will first try UTF8 and, if that fails, it will
/// try CodePage 1252.
/// try CodePage 1252. If CodePage 1252 is not available on the system, then it will try Latin1.
/// </summary>
/// <param name="stream">The stream containing encoded text.</param>
/// <param name="defaultEncoding">
......@@ -57,7 +74,7 @@ internal static SourceText Create(Stream stream, Encoding defaultEncoding = null
try
{
return Decode(stream, defaultEncoding ?? CodePage1252Encoding.Instance, checksumAlgorithm, throwIfBinaryDetected: detectEncoding);
return Decode(stream, defaultEncoding ?? s_defaultEncoding, checksumAlgorithm, throwIfBinaryDetected: detectEncoding);
}
catch (DecoderFallbackException e)
{
......
......@@ -62,9 +62,6 @@
<Compile Include="..\..\..\Compilers\Core\Desktop\AssemblyReferenceResolver.cs">
<Link>InternalUtilities\AssemblyReferenceResolver.cs</Link>
</Compile>
<Compile Include="..\..\..\Compilers\Core\Desktop\CodePage1252Encoding.cs">
<Link>InternalUtilities\CodePage1252Encoding.cs</Link>
</Compile>
<Compile Include="..\..\..\Compilers\Core\Desktop\MetadataFileReferenceResolver.cs">
<Link>InternalUtilities\MetadataFileReferenceResolver.cs</Link>
</Compile>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册