提交 82855de9 编写于 作者: C Charles Stoner

Merge pull request #7665 from cston/7225

Include byte order marks from explicit Encoding in checksum
......@@ -81,6 +81,102 @@ public void ChecksumAlgorithm1()
Assert.Equal(SourceHashAlgorithm.Sha256, SourceText.From(stream, checksumAlgorithm: SourceHashAlgorithm.Sha256).ChecksumAlgorithm);
}
[WorkItem(7225)]
[Fact]
public void ChecksumAndBOM()
{
const string source = "Hello, World!";
var checksumAlgorigthm = SourceHashAlgorithm.Sha1;
var encodingNoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
var encodingBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
var checksumNoBOM = ImmutableArray.Create<byte>(0xa, 0xa, 0x9f, 0x2a, 0x67, 0x72, 0x94, 0x25, 0x57, 0xab, 0x53, 0x55, 0xd7, 0x6a, 0xf4, 0x42, 0xf8, 0xf6, 0x5e, 0x1);
var checksumBOM = ImmutableArray.Create<byte>(0xb2, 0x19, 0x0, 0x9b, 0x61, 0xce, 0xcd, 0x50, 0x7b, 0x2e, 0x56, 0x3c, 0xc0, 0xeb, 0x96, 0xe2, 0xa1, 0xd9, 0x3f, 0xfc);
// SourceText from string. Checksum should include BOM from explicit encoding.
VerifyChecksum(SourceText.From(source, encodingNoBOM, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(SourceText.From(source, encodingBOM, checksumAlgorigthm), checksumBOM);
var bytesNoBOM = new byte[] { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21 };
var bytesBOM = new byte[] { 0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21 };
var streamNoBOM = new MemoryStream(bytesNoBOM);
var streamBOM = new MemoryStream(bytesBOM);
// SourceText from bytes no BOM. Checksum should ignore explicit encoding.
VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, null, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, encodingNoBOM, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, encodingBOM, checksumAlgorigthm), checksumNoBOM);
// SourceText from bytes with BOM. Checksum should include BOM.
VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, null, checksumAlgorigthm), checksumBOM);
VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, encodingNoBOM, checksumAlgorigthm), checksumBOM);
VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, encodingBOM, checksumAlgorigthm), checksumBOM);
// SourceText from stream no BOM. Checksum should ignore explicit encoding.
VerifyChecksum(SourceText.From(streamNoBOM, null, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(SourceText.From(streamNoBOM, encodingNoBOM, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(SourceText.From(streamNoBOM, encodingBOM, checksumAlgorigthm), checksumNoBOM);
// SourceText from stream with BOM. Checksum should include BOM.
VerifyChecksum(SourceText.From(streamBOM, null, checksumAlgorigthm), checksumBOM);
VerifyChecksum(SourceText.From(streamBOM, encodingNoBOM, checksumAlgorigthm), checksumBOM);
VerifyChecksum(SourceText.From(streamBOM, encodingBOM, checksumAlgorigthm), checksumBOM);
// LargeText from stream no BOM. Checksum should ignore explicit encoding.
VerifyChecksum(LargeText.Decode(streamNoBOM, encodingNoBOM, checksumAlgorigthm, throwIfBinaryDetected: false), checksumNoBOM);
VerifyChecksum(LargeText.Decode(streamNoBOM, encodingBOM, checksumAlgorigthm, throwIfBinaryDetected: false), checksumNoBOM);
// LargeText from stream with BOM. Checksum should include BOM.
VerifyChecksum(LargeText.Decode(streamBOM, encodingNoBOM, checksumAlgorigthm, throwIfBinaryDetected: false), checksumBOM);
VerifyChecksum(LargeText.Decode(streamBOM, encodingBOM, checksumAlgorigthm, throwIfBinaryDetected: false), checksumBOM);
// LargeText from writer no BOM. Checksum includes BOM
// from explicit encoding. This is inconsistent with the
// LargeText cases above but LargeTextWriter is only used
// for unsaved edits where the checksum is ignored.
VerifyChecksum(FromLargeTextWriter(source, encodingNoBOM, checksumAlgorigthm), checksumNoBOM);
VerifyChecksum(FromLargeTextWriter(source, encodingBOM, checksumAlgorigthm), checksumBOM);
// SourceText from string with changes. Checksum includes BOM from explicit encoding.
VerifyChecksum(FromChanges(SourceText.From(source, encodingNoBOM, checksumAlgorigthm)), checksumNoBOM);
VerifyChecksum(FromChanges(SourceText.From(source, encodingBOM, checksumAlgorigthm)), checksumBOM);
// SourceText from stream with changes, no BOM. Checksum includes BOM
// from explicit encoding. This is inconsistent with the SourceText cases but
// "with changes" is only used for unsaved edits where the checksum is ignored.
VerifyChecksum(FromChanges(SourceText.From(streamNoBOM, encodingNoBOM, checksumAlgorigthm)), checksumNoBOM);
VerifyChecksum(FromChanges(SourceText.From(streamNoBOM, encodingBOM, checksumAlgorigthm)), checksumBOM);
// SourceText from stream with changes, with BOM. Checksum includes BOM.
VerifyChecksum(FromChanges(SourceText.From(streamBOM, encodingNoBOM, checksumAlgorigthm)), checksumBOM);
VerifyChecksum(FromChanges(SourceText.From(streamBOM, encodingBOM, checksumAlgorigthm)), checksumBOM);
}
private static SourceText FromLargeTextWriter(string source, Encoding encoding, SourceHashAlgorithm checksumAlgorithm)
{
using (var writer = new LargeTextWriter(encoding, checksumAlgorithm, source.Length))
{
writer.Write(source);
return writer.ToSourceText();
}
}
private static SourceText FromChanges(SourceText text)
{
var span = new TextSpan(0, 1);
var change = new TextChange(span, text.ToString(span));
var changed = text.WithChanges(change);
Assert.NotEqual(text, changed);
return changed;
}
private static void VerifyChecksum(SourceText text, ImmutableArray<byte> expectedChecksum)
{
var actualChecksum = text.GetChecksum();
Assert.Equal<byte>(expectedChecksum, actualChecksum);
}
[Fact]
public void ContentEquals()
{
......
......@@ -269,7 +269,7 @@ private static int GetSegmentCountIfCombined(ArrayBuilder<SourceText> segments,
}
/// <summary>
/// Combines continguous segments with lengths that are each less than or equal to the specified segment size.
/// Combines contiguous segments with lengths that are each less than or equal to the specified segment size.
/// </summary>
private static void CombineSegments(ArrayBuilder<SourceText> segments, int segmentSize)
{
......
......@@ -77,7 +77,8 @@ internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAl
chunks.Add(chunk);
}
return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, default(ImmutableArray<byte>), checksumAlgorithm);
var checksum = CalculateChecksum(stream, checksumAlgorithm);
return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm);
}
}
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Text
{
internal class LargeTextWriter : SourceTextWriter
internal sealed class LargeTextWriter : SourceTextWriter
{
private readonly Encoding _encoding;
private readonly SourceHashAlgorithm _checksumAlgorithm;
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Text
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册