Unify char.IsControl and Rune.IsControl (#1988)

Not perf-critical, just some cleanup of low hanging fruit.

Unify char.IsControl and Rune.IsControl (#1988)
Not perf-critical, just some cleanup of low hanging fruit.
9b4eece6 · Levi Broderick · GitHub · 9c82a36c · 9b4eece6 · 9b4eece6
2 changed file
--- a/src/libraries/System.Private.CoreLib/src/System/Char.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Char.cs
@@ -472,11 +472,10 @@ object IConvertible.ToType(Type type, IFormatProvider? provider)

        public static bool IsControl(char c)
        {
-            if (IsLatin1(c))
-            {
-                return GetLatin1UnicodeCategory(c) == UnicodeCategory.Control;
-            }
-            return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Control;
+            // This works because 'c' can never be -1.
+            // See comments in Rune.IsControl for more information.
+
+            return (((uint)c + 1) & ~0x80u) <= 0x20u;
        }

        public static bool IsControl(string s, int index)
@@ -487,12 +486,9 @@ public static bool IsControl(string s, int index)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
-            char c = s[index];
-            if (IsLatin1(c))
-            {
-                return GetLatin1UnicodeCategory(c) == UnicodeCategory.Control;
-            }
-            return CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.Control;
+
+            // Control chars are always in the BMP, so don't need to worry about surrogate handling.
+            return IsControl(s[index]);
        }

        public static bool IsDigit(string s, int index)

--- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs
@@ -1133,8 +1133,8 @@ public static bool IsControl(Rune value)
        {
            // Per the Unicode stability policy, the set of control characters
            // is forever fixed at [ U+0000..U+001F ], [ U+007F..U+009F ]. No
-            // characters will ever be added to the "control characters" group.
-            // See http://www.unicode.org/policies/stability_policy.html.
+            // characters will ever be added to or removed from the "control characters"
+            // group. See https://www.unicode.org/policies/stability_policy.html.

            // Logic below depends on Rune.Value never being -1 (since Rune is a validating type)
            // 00..1F (+1) => 01..20 (&~80) => 01..20