未验证 提交 ad41de40 编写于 作者: L Levi Broderick 提交者: GitHub

Add string.ReplaceLineEndings and MemoryExtensions.EnumerateLines (#53115)

上级 8f18c5da
......@@ -49,6 +49,8 @@ public static partial class MemoryExtensions
public static bool EndsWith(this System.ReadOnlySpan<char> span, System.ReadOnlySpan<char> value, System.StringComparison comparisonType) { throw null; }
public static bool EndsWith<T>(this System.ReadOnlySpan<T> span, System.ReadOnlySpan<T> value) where T : System.IEquatable<T> { throw null; }
public static bool EndsWith<T>(this System.Span<T> span, System.ReadOnlySpan<T> value) where T : System.IEquatable<T> { throw null; }
public static System.Text.SpanLineEnumerator EnumerateLines(this System.ReadOnlySpan<char> span) { throw null; }
public static System.Text.SpanLineEnumerator EnumerateLines(this System.Span<char> span) { throw null; }
public static System.Text.SpanRuneEnumerator EnumerateRunes(this System.ReadOnlySpan<char> span) { throw null; }
public static System.Text.SpanRuneEnumerator EnumerateRunes(this System.Span<char> span) { throw null; }
public static bool Equals(this System.ReadOnlySpan<char> span, System.ReadOnlySpan<char> other, System.StringComparison comparisonType) { throw null; }
......@@ -543,6 +545,14 @@ public static partial class EncodingExtensions
public static long GetChars(this System.Text.Encoding encoding, System.ReadOnlySpan<byte> bytes, System.Buffers.IBufferWriter<char> writer) { throw null; }
public static string GetString(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<byte> bytes) { throw null; }
}
public ref partial struct SpanLineEnumerator
{
private object _dummy;
private int _dummyPrimitive;
public System.ReadOnlySpan<char> Current { get { throw null; } }
public System.Text.SpanLineEnumerator GetEnumerator() { throw null; }
public bool MoveNext() { throw null; }
}
public ref partial struct SpanRuneEnumerator
{
private object _dummy;
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Xunit;
namespace System.SpanTests
{
public static partial class SpanTests
{
// newline chars given by Unicode Standard, Sec. 5.8, Recommendation R4 and Table 5-2
public static IEnumerable<object[]> NewLineChars => new object[][]
{
new object[] { '\r' },
new object[] { '\n' },
new object[] { '\f' },
new object[] { '\u0085' },
new object[] { '\u2028' },
new object[] { '\u2029' },
};
[Fact]
public static void EnumerateLines_Empty()
{
// Enumerations over empty inputs should return a single empty element
var enumerator = Span<char>.Empty.EnumerateLines().GetEnumerator();
Assert.True(enumerator.MoveNext());
Assert.Equal("", enumerator.Current.ToString());
Assert.False(enumerator.MoveNext());
enumerator = ReadOnlySpan<char>.Empty.EnumerateLines().GetEnumerator();
Assert.True(enumerator.MoveNext());
Assert.Equal("", enumerator.Current.ToString());
Assert.False(enumerator.MoveNext());
}
[Theory]
[InlineData(null, new[] { ".." })]
[InlineData("", new[] { ".." })]
[InlineData("abc", new[] { ".." })]
[InlineData("<CR>", new[] { "..0", "1.." })] // empty sequences before and after the CR
[InlineData("<CR><CR>", new[] { "0..0", "1..1", "2..2" })] // empty sequences before and after the CR (CR doesn't consume CR)
[InlineData("<CR><LF>", new[] { "..0", "^0.." })] // CR should swallow any LF which follows
[InlineData("a<CR><LF><LF>z", new[] { "..1", "3..3", "4.." })] // CR should swallow only a single LF which follows
[InlineData("a<CR>b<LF>c", new[] { "..1", "2..3", "4.." })] // CR shouldn't swallow anything other than LF
[InlineData("aa<CR>bb<LF><CR>cc", new[] { "..2", "3..5", "6..6", "7.." })] // LF shouldn't swallow CR which follows
[InlineData("a<CR>b<VT>c<LF>d<NEL>e<FF>f<PS>g<LS>h", new[] { "..1", "2..5", "6..7", "8..9", "10..11", "12..13", "14.." })] // VT not recognized as NLF
[InlineData("xyz<NEL>", new[] { "..3", "^0.." })] // sequence at end produces empty string
[InlineData("<NEL>xyz", new[] { "..0", "^3.." })] // sequence at beginning produces empty string
[InlineData("abc<NAK>%def", new[] { ".." })] // we don't recognize EBCDIC encodings for LF (see Unicode Standard, Sec. 5.8, Table 5-1)
public static void EnumerateLines_Battery(string input, string[] expectedRanges)
{
// This test is similar to the string.ReplaceLineEndings test, but it checks ranges instead of substrings,
// as we want to ensure that the method under test points to very specific slices within the original input string.
input = FixupSequences(input);
Range[] expectedRangesNormalized = expectedRanges.Select(element =>
{
Range parsed = ParseRange(element);
(int actualOffset, int actualLength) = parsed.GetOffsetAndLength(input?.Length ?? 0);
return actualOffset..(actualOffset + actualLength);
}).ToArray();
List<Range> actualRangesNormalized = new List<Range>();
foreach (ReadOnlySpan<char> line in input.AsSpan().EnumerateLines())
{
actualRangesNormalized.Add(GetNormalizedRangeFromSubspan(input, line));
}
Assert.Equal(expectedRangesNormalized, actualRangesNormalized);
static unsafe Range GetNormalizedRangeFromSubspan<T>(ReadOnlySpan<T> outer, ReadOnlySpan<T> inner)
{
// We can't use MemoryExtensions.Overlaps because it doesn't handle empty spans in the way we need.
ref T refOuter = ref MemoryMarshal.GetReference(outer);
ref T refInner = ref MemoryMarshal.GetReference(inner);
fixed (byte* pOuterStart = &Unsafe.As<T, byte>(ref refOuter))
fixed (byte* pInnerStart = &Unsafe.As<T, byte>(ref refInner))
{
byte* pOuterEnd = pOuterStart + (uint)outer.Length * (nuint)Unsafe.SizeOf<T>();
byte* pInnerEnd = pInnerStart + (uint)inner.Length * (nuint)Unsafe.SizeOf<T>();
Assert.True(pOuterStart <= pInnerStart && pInnerStart <= pOuterEnd, "Inner span begins outside outer span.");
Assert.True(pOuterStart <= pInnerEnd && pInnerEnd <= pOuterEnd, "Inner span ends outside outer span.");
nuint byteOffset = (nuint)(pInnerStart - pOuterStart);
Assert.Equal((nuint)0, byteOffset % (nuint)Unsafe.SizeOf<T>()); // Unaligned elements; cannot compute offset
nuint elementOffset = byteOffset / (nuint)Unsafe.SizeOf<T>();
return checked((int)elementOffset)..checked((int)elementOffset + inner.Length);
}
}
static string FixupSequences(string input)
{
// We use <XYZ> markers so that the original strings show up better in the xunit test runner
// <VT> is included as a negative test; we *do not* want ReplaceLineEndings to honor it
if (input is null) { return null; }
return input.Replace("<CR>", "\r")
.Replace("<LF>", "\n")
.Replace("<VT>", "\v")
.Replace("<FF>", "\f")
.Replace("<NAK>", "\u0015")
.Replace("<NEL>", "\u0085")
.Replace("<LS>", "\u2028")
.Replace("<PS>", "\u2029");
}
static Range ParseRange(string input)
{
var idxOfDots = input.IndexOf("..", StringComparison.Ordinal);
if (idxOfDots < 0) { throw new ArgumentException(); }
ReadOnlySpan<char> begin = input.AsSpan(0, idxOfDots).Trim();
Index beginIdx = (begin.IsEmpty) ? Index.Start : ParseIndex(begin);
ReadOnlySpan<char> end = input.AsSpan(idxOfDots + 2).Trim();
Index endIdx = (end.IsEmpty) ? Index.End : ParseIndex(end);
return beginIdx..endIdx;
static Index ParseIndex(ReadOnlySpan<char> input)
{
bool fromEnd = false;
if (!input.IsEmpty && input[0] == '^') { fromEnd = true; input = input.Slice(1); }
return new Index(int.Parse(input, NumberStyles.Integer, CultureInfo.InvariantCulture), fromEnd);
}
}
}
[Theory]
[MemberData(nameof(NewLineChars))]
public static void EnumerateLines_EnumerationIsNotPolynomialComplexity(char newlineChar)
{
// This test ensures that the complexity of any call to MoveNext is O(i), where i is the
// index of the first occurrence of any NLF within the span; rather than O(n), where
// n is the length of the span. See comments in SpanLineEnumerator.MoveNext and
// string.IndexOfNewlineChar for more information.
//
// We test this by utilizing the BoundedMemory infrastructure to allocate a poison page
// after the scratch buffer, then we intentionally use MemoryMarshal to manipulate the
// scratch buffer so that it extends into the poison page. If the runtime skips the first
// occurrence of the newline char and attempts to read all the way to the end of the span,
// this will manifest as an AV within this unit test.
using var boundedMem = BoundedMemory.Allocate<char>(4096, PoisonPagePlacement.After);
Span<char> span = boundedMem.Span;
span.Fill('a');
span[512] = newlineChar;
boundedMem.MakeReadonly();
span = MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(span), span.Length + 4096);
var enumerator = span.EnumerateLines().GetEnumerator();
Assert.True(enumerator.MoveNext());
Assert.Equal(512, enumerator.Current.Length);
enumerator = ((ReadOnlySpan<char>)span).EnumerateLines().GetEnumerator();
Assert.True(enumerator.MoveNext());
Assert.Equal(512, enumerator.Current.Length);
}
}
}
......@@ -64,6 +64,7 @@
<Compile Include="Span\EndsWith.char.cs" />
<Compile Include="Span\EndsWith.long.cs" />
<Compile Include="Span\EndsWith.T.cs" />
<Compile Include="Span\EnumerateLines.cs" />
<Compile Include="Span\EnumerateRunes.cs" />
<Compile Include="Span\Equality.cs" />
<Compile Include="Span\GcReporting.cs" />
......
......@@ -14,22 +14,8 @@ internal static class FileUtil
public static void NormalizeLineEndings(string source, string normalizedDest)
{
string contents = File.ReadAllText(source);
if (Environment.NewLine == "\r\n")
{
if (!contents.Contains(Environment.NewLine))
{
contents = contents.Replace("\n", "\r\n");
}
}
else if (Environment.NewLine == "\n")
{
if (contents.Contains("\r\n"))
{
contents = contents.Replace("\r\n", "\n");
}
}
File.WriteAllText(normalizedDest, contents);
string normalizedContents = contents.ReplaceLineEndings();
File.WriteAllText(normalizedDest, normalizedContents);
}
}
}
......@@ -967,6 +967,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Utility.Helpers.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\NormalizationForm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Rune.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\SpanLineEnumerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\SpanRuneEnumerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilder.Debug.cs" Condition="'$(Configuration)' == 'Debug'" />
......
......@@ -380,5 +380,29 @@ public static SpanRuneEnumerator EnumerateRunes(this Span<char> span)
{
return new SpanRuneEnumerator(span);
}
/// <summary>
/// Returns an enumeration of lines over the provided span.
/// </summary>
/// <remarks>
/// See the documentation for <see cref="string.ReplaceLineEndings"/> for more information
/// on how newline sequences are detected.
/// </remarks>
public static SpanLineEnumerator EnumerateLines(this ReadOnlySpan<char> span)
{
return new SpanLineEnumerator(span);
}
/// <summary>
/// Returns an enumeration of lines over the provided span.
/// </summary>
/// <remarks>
/// See the documentation for <see cref="string.ReplaceLineEndings"/> for more information
/// on how newline sequences are detected.
/// </remarks>
public static SpanLineEnumerator EnumerateLines(this Span<char> span)
{
return new SpanLineEnumerator(span);
}
}
}
......@@ -93,7 +93,10 @@ public static Exception SetCurrentStackTrace(Exception source)
/// This method populates the <see cref="Exception.StackTrace"/> property from an arbitrary string value.
/// The typical use case is the transmission of <see cref="Exception"/> objects across processes with high fidelity,
/// allowing preservation of the exception object's stack trace information. .NET does not attempt to parse the
/// provided string value. The caller is responsible for normalizing line endings if required.
/// provided string value.
///
/// The caller is responsible for canonicalizing line endings if required. <see cref="string.ReplaceLineEndings"/>
/// can be used to canonicalize line endings.
/// </remarks>
public static Exception SetRemoteStackTrace(Exception source, string stackTrace)
{
......
......@@ -1152,6 +1152,139 @@ private string ReplaceHelper(int oldValueLength, string newValue, ReadOnlySpan<i
return dst;
}
/// <summary>
/// Replaces all newline sequences in the current string with <see cref="Environment.NewLine"/>.
/// </summary>
/// <returns>
/// A string whose contents match the current string, but with all newline sequences replaced
/// with <see cref="Environment.NewLine"/>.
/// </returns>
/// <remarks>
/// This method searches for all newline sequences within the string and canonicalizes them to match
/// the newline sequence for the current environment. For example, when running on Windows, all
/// occurrences of non-Windows newline sequences will be replaced with the sequence CRLF. When
/// running on Unix, all occurrences of non-Unix newline sequences will be replaced with
/// a single LF character.
///
/// It is not recommended that protocol parsers utilize this API. Protocol specifications often
/// mandate specific newline sequences. For example, HTTP/1.1 (RFC 8615) mandates that the request
/// line, status line, and headers lines end with CRLF. Since this API operates over a wide range
/// of newline sequences, a protocol parser utilizing this API could exhibit behaviors unintended
/// by the protocol's authors.
///
/// This overload is equivalent to calling <see cref="ReplaceLineEndings(string)"/>, passing
/// <see cref="Environment.NewLine"/> as the <em>replacementText</em> parameter.
///
/// This method is guaranteed O(n) complexity, where <em>n</em> is the length of the input string.
/// </remarks>
public string ReplaceLineEndings() => ReplaceLineEndings(Environment.NewLineConst);
/// <summary>
/// Replaces all newline sequences in the current string with <paramref name="replacementText"/>.
/// </summary>
/// <returns>
/// A string whose contents match the current string, but with all newline sequences replaced
/// with <paramref name="replacementText"/>.
/// </returns>
/// <remarks>
/// This method searches for all newline sequences within the string and canonicalizes them to the
/// newline sequence provided by <paramref name="replacementText"/>. If <paramref name="replacementText"/>
/// is <see cref="string.Empty"/>, all newline sequences within the string will be removed.
///
/// It is not recommended that protocol parsers utilize this API. Protocol specifications often
/// mandate specific newline sequences. For example, HTTP/1.1 (RFC 8615) mandates that the request
/// line, status line, and headers lines end with CRLF. Since this API operates over a wide range
/// of newline sequences, a protocol parser utilizing this API could exhibit behaviors unintended
/// by the protocol's authors.
///
/// The list of recognized newline sequences is CR (U+000D), LF (U+000A), CRLF (U+000D U+000A),
/// NEL (U+0085), LS (U+2028), FF (U+000C), and PS (U+2029). This list is given by the Unicode
/// Standard, Sec. 5.8, Recommendation R4 and Table 5-2.
///
/// This method is guaranteed O(n * r) complexity, where <em>n</em> is the length of the input string,
/// and where <em>r</em> is the length of <paramref name="replacementText"/>.
/// </remarks>
public string ReplaceLineEndings(string replacementText)
{
if (replacementText is null)
{
throw new ArgumentNullException(nameof(replacementText));
}
// Early-exit: do we need to do anything at all?
// If not, return this string as-is.
int idxOfFirstNewlineChar = IndexOfNewlineChar(this, out int stride);
if (idxOfFirstNewlineChar < 0)
{
return this;
}
// While writing to the builder, we don't bother memcpying the first
// or the last segment into the builder. We'll use the builder only
// for the intermediate segments, then we'll sandwich everything together
// with one final string.Concat call.
ReadOnlySpan<char> firstSegment = this.AsSpan(0, idxOfFirstNewlineChar);
ReadOnlySpan<char> remaining = this.AsSpan(idxOfFirstNewlineChar + stride);
ValueStringBuilder builder = new ValueStringBuilder(stackalloc char[256]);
while (true)
{
int idx = IndexOfNewlineChar(remaining, out stride);
if (idx < 0) { break; } // no more newline chars
builder.Append(replacementText);
builder.Append(remaining.Slice(0, idx));
remaining = remaining.Slice(idx + stride);
}
string retVal = Concat(firstSegment, builder.AsSpan(), replacementText, remaining);
builder.Dispose();
return retVal;
}
// Scans the input text, returning the index of the first newline char.
// Newline chars are given by the Unicode Standard, Sec. 5.8.
internal static int IndexOfNewlineChar(ReadOnlySpan<char> text, out int stride)
{
// !! IMPORTANT !!
//
// We expect this method may be called with untrusted input, which means we need to
// bound the worst-case runtime of this method. We rely on MemoryExtensions.IndexOfAny
// having worst-case runtime O(i), where i is the index of the first needle match within
// the haystack; or O(n) if no needle is found. This ensures that in the common case
// of this method being called within a loop, the worst-case runtime is O(n) rather than
// O(n^2), where n is the length of the input text.
//
// The Unicode Standard, Sec. 5.8, Recommendation R4 and Table 5-2 state that the CR, LF,
// CRLF, NEL, LS, FF, and PS sequences are considered newline functions. That section
// also specifically excludes VT from the list of newline functions, so we do not include
// it in the needle list.
const string needles = "\r\n\f\u0085\u2028\u2029";
stride = default;
int idx = text.IndexOfAny(needles);
if ((uint)idx < (uint)text.Length)
{
stride = 1; // needle found
// Did we match CR? If so, and if it's followed by LF, then we need
// to consume both chars as a single newline function match.
if (text[idx] == '\r')
{
int nextCharIdx = idx + 1;
if ((uint)nextCharIdx < (uint)text.Length && text[nextCharIdx] == '\n')
{
stride = 2;
}
}
}
return idx;
}
public string[] Split(char separator, StringSplitOptions options = StringSplitOptions.None)
{
return SplitInternal(new ReadOnlySpan<char>(ref separator, 1), int.MaxValue, options);
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
namespace System.Text
{
/// <summary>
/// Enumerates the lines of a <see cref="ReadOnlySpan{Char}"/>.
/// </summary>
/// <remarks>
/// To get an instance of this type, use <see cref="MemoryExtensions.EnumerateLines(ReadOnlySpan{char})"/>.
/// </remarks>
public ref struct SpanLineEnumerator
{
private ReadOnlySpan<char> _remaining;
private ReadOnlySpan<char> _current;
private bool _isEnumeratorActive;
internal SpanLineEnumerator(ReadOnlySpan<char> buffer)
{
_remaining = buffer;
_current = default;
_isEnumeratorActive = true;
}
/// <summary>
/// Gets the line at the current position of the enumerator.
/// </summary>
public ReadOnlySpan<char> Current => _current;
/// <summary>
/// Returns this instance as an enumerator.
/// </summary>
public SpanLineEnumerator GetEnumerator() => this;
/// <summary>
/// Advances the enumerator to the next line of the span.
/// </summary>
/// <returns>
/// True if the enumerator successfully advanced to the next line; false if
/// the enumerator has advanced past the end of the span.
/// </returns>
public bool MoveNext()
{
if (!_isEnumeratorActive)
{
return false; // EOF previously reached or enumerator was never initialized
}
int idx = string.IndexOfNewlineChar(_remaining, out int stride);
if (idx >= 0)
{
_current = _remaining.Slice(0, idx);
_remaining = _remaining.Slice(idx + stride);
}
else
{
// We've reached EOF, but we still need to return 'true' for this final
// iteration so that the caller can query the Current property once more.
_current = _remaining;
_remaining = default;
_isEnumeratorActive = false;
}
return true;
}
}
}
......@@ -256,11 +256,6 @@ private static string StripOffXmlDeclaration(string s)
return s;
}
private static string NormalizeNewLines(string s)
{
s = s.Replace("\n", "");
s = s.Replace("\r", "");
return s;
}
private static string NormalizeNewLines(string s) => s.ReplaceLineEndings("");
}
}
......@@ -72,11 +72,6 @@ protected static void CompareOutput(string expected, Stream actualStream)
}
}
private static string NormalizeLineEndings(string s)
{
return s.Replace("\r\n", "\n").Replace("\r", "\n");
}
protected static void CompareOutput(Stream expectedStream, Stream actualStream, int count = 0)
{
actualStream.Seek(0, SeekOrigin.Begin);
......@@ -90,8 +85,8 @@ protected static void CompareOutput(Stream expectedStream, Stream actualStream,
expectedReader.ReadLine();
}
string actual = NormalizeLineEndings(actualReader.ReadToEnd());
string expected = NormalizeLineEndings(expectedReader.ReadToEnd());
string actual = actualReader.ReadToEnd().ReplaceLineEndings();
string expected = expectedReader.ReadToEnd().ReplaceLineEndings();
if (actual.Equals(expected))
{
......
......@@ -3717,6 +3717,8 @@ public sealed partial class String : System.Collections.Generic.IEnumerable<char
public System.String Replace(System.String oldValue, System.String? newValue) { throw null; }
public System.String Replace(System.String oldValue, System.String? newValue, bool ignoreCase, System.Globalization.CultureInfo? culture) { throw null; }
public System.String Replace(System.String oldValue, System.String? newValue, System.StringComparison comparisonType) { throw null; }
public System.String ReplaceLineEndings() { throw null; }
public System.String ReplaceLineEndings(System.String replacementText) { throw null; }
public string[] Split(char separator, int count, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; }
public string[] Split(char separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; }
public string[] Split(params char[]? separator) { throw null; }
......
......@@ -10,7 +10,6 @@
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using Microsoft.DotNet.RemoteExecutor;
using Xunit;
namespace System.Tests
......@@ -508,6 +507,69 @@ public static void EnumerateRunes(char[] chars, int[] expected)
Assert.Equal(expected, enumeratedValues);
}
[Fact]
public static void ReplaceLineEndings_NullReplacementText_Throws()
{
Assert.Throws<ArgumentNullException>("replacementText", () => "Hello!".ReplaceLineEndings(null));
}
[Theory]
[InlineData("", new[] { "" })]
[InlineData("abc", new[] { "abc" })]
[InlineData("<CR>", new[] { "", "" })] // empty sequences before and after the CR
[InlineData("<CR><CR>", new[] { "", "", "" })] // empty sequences before and after the CR (CR doesn't consume CR)
[InlineData("<CR><LF>", new[] { "", "" })] // CR should swallow any LF which follows
[InlineData("a<CR><LF><LF>z", new[] { "a", "", "z" })] // CR should swallow only a single LF which follows
[InlineData("a<CR>b<LF>c", new[] { "a", "b", "c" })] // CR shouldn't swallow anything other than LF
[InlineData("aa<CR>bb<LF><CR>cc", new[] { "aa", "bb", "", "cc" })] // LF shouldn't swallow CR which follows
[InlineData("a<CR>b<VT>c<LF>d<NEL>e<FF>f<PS>g<LS>h", new[] { "a", "b<VT>c", "d", "e", "f", "g", "h" })] // VT not recognized as NLF
[InlineData("xyz<NEL>", new[] { "xyz", "" })] // sequence at end produces empty string
[InlineData("<NEL>xyz", new[] { "", "xyz" })] // sequence at beginning produces empty string
[InlineData("abc<NAK>%def", new[] { "abc<NAK>%def" })] // we don't recognize EBCDIC encodings for LF (see Unicode Standard, Sec. 5.8, Table 5-1)
public static void ReplaceLineEndings(string input, string[] expectedSegments)
{
input = FixupSequences(input);
expectedSegments = Array.ConvertAll(expectedSegments, FixupSequences);
// Try Environment.NewLine (and parameterless ctor)
string expectedEnvNewLineConcat = string.Join(Environment.NewLine, expectedSegments);
Assert.Equal(expectedEnvNewLineConcat, input.ReplaceLineEndings());
Assert.Equal(expectedEnvNewLineConcat, input.ReplaceLineEndings(Environment.NewLine));
// Try removing newlines entirely
Assert.Equal(string.Concat(expectedSegments) /* no joiner */, input.ReplaceLineEndings(""));
// And try using a custom separator
Assert.Equal(string.Join("<SEPARATOR>", expectedSegments), input.ReplaceLineEndings("<SEPARATOR>"));
if (expectedSegments.Length == 1)
{
// If no newline sequences at all, we should return the original string instance as an optimization
Assert.Same(input, input.ReplaceLineEndings());
Assert.Same(input, input.ReplaceLineEndings(Environment.NewLine));
Assert.Same(input, input.ReplaceLineEndings(""));
Assert.Same(input, input.ReplaceLineEndings("<SEPARATOR>"));
}
static string FixupSequences(string input)
{
// We use <XYZ> markers so that the original strings show up better in the xunit test runner
// <VT> is included as a negative test; we *do not* want ReplaceLineEndings to honor it
return input.Replace("<CR>", "\r")
.Replace("<LF>", "\n")
.Replace("<VT>", "\v")
.Replace("<FF>", "\f")
.Replace("<NAK>", "\u0015")
.Replace("<NEL>", "\u0085")
.Replace("<LS>", "\u2028")
.Replace("<PS>", "\u2029");
}
}
[Theory]
[InlineData("Hello", 'H', true)]
[InlineData("Hello", 'h', false)]
......
......@@ -25,12 +25,6 @@ internal static partial class JsonTestHelper
public const string SingleFormatString = "G9";
#endif
private const string CompiledNewline = @"
";
private static readonly bool s_replaceNewlines =
!StringComparer.Ordinal.Equals(CompiledNewline, Environment.NewLine);
public static string NewtonsoftReturnStringHelper(TextReader reader)
{
var sb = new StringBuilder();
......@@ -812,11 +806,22 @@ public static void AssertContentsNotEqualAgainstJsonNet(string expectedValue, st
public static string StripWhitespace(this string value)
=> s_stripWhitespace.Replace(value, string.Empty);
#if NET6_0_OR_GREATER
// This is needed due to the fact that git might normalize line endings when checking-out files
public static string NormalizeLineEndings(this string value) => value.ReplaceLineEndings();
#else
private const string CompiledNewline = @"
";
private static readonly bool s_replaceNewlines =
!StringComparer.Ordinal.Equals(CompiledNewline, Environment.NewLine);
// Should be called only on compile-time strings
// This is needed due to the fact that git might normalize line endings when checking-out files
public static string NormalizeLineEndings(this string value)
=> s_replaceNewlines ?
value.Replace(CompiledNewline, Environment.NewLine) :
value;
#endif
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册