diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 2c546be14525c17224ca9a1ab8e999bbe6744fd4..0d42d4c357c44e88be11a4c163476c91053603e8 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -4006,8 +4006,8 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options // character class were [A-Za-z0-9], so since the ch is now known to be >= 128, we // can just fail the comparison. return negate ? - $"((ch = {chExpr}) >= 128 || ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0)" : - $"((ch = {chExpr}) < 128 && ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0)"; + $"((ch = {chExpr}) >= {Literal((char)analysis.UpperBoundExclusiveIfContainsOnlyAscii)} || ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0)" : + $"((ch = {chExpr}) < {Literal((char)analysis.UpperBoundExclusiveIfContainsOnlyAscii)} && ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0)"; } if (analysis.AllNonAsciiContained) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index a85ecef23031737673ae16d855bcc9771fb59f3b..398f882c79c7b9f1e865639cde09d0cd3ec2d67e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -921,6 +921,8 @@ internal struct CharClassAnalysisResults public bool AllAsciiContained; /// true if we know for sure that all non-ASCII values are in the set; otherwise, false. public bool AllNonAsciiContained; + /// The exclusive upper bound. Only valid if is true. + public int UpperBoundExclusiveIfContainsOnlyAscii; } /// Analyzes the set to determine some basic properties that can be used to optimize usage. @@ -962,6 +964,7 @@ internal static CharClassAnalysisResults Analyze(string set) AllAsciiContained = false, ContainsOnlyAscii = set[set.Length - 1] <= 128, ContainsNoAscii = set[SetStartIndex] >= 128, + UpperBoundExclusiveIfContainsOnlyAscii = set[set.Length - 1], }; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index b3dfa8d120c271c165440007d5923e39924141b2..a64f098a6610d027b8dfb6e494a59c72c4ae9101 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -5020,7 +5020,7 @@ void EmitCharInClass() // ch < 128 ? (bitVectorString[ch >> 4] & (1 << (ch & 0xF))) != 0 : Ldloc(tempLocal); - Ldc(128); + Ldc(analysis.ContainsOnlyAscii ? analysis.UpperBoundExclusiveIfContainsOnlyAscii : 128); Bge(comparisonLabel); Ldstr(bitVectorString); Ldloc(tempLocal);