From 67b1ede8cb7758b9c54bfce050c9c4b15715c6bc Mon Sep 17 00:00:00 2001 From: Meri Khamoyan <96171496+mkhamoyan@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:23:59 +0400 Subject: [PATCH] [OSX] HybridGlobalization Implement casing functions (#87919) Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX --- .../features/globalization-hybrid-mode.md | 32 +++++- .../Common/src/Interop/Interop.Casing.OSX.cs | 16 +++ .../Common/src/Interop/Interop.ResultCode.cs | 3 +- .../System.Globalization.IOS.Tests.csproj | 1 + .../System/Globalization/TextInfoTests.cs | 44 ++++---- .../System.Private.CoreLib.Shared.projitems | 4 + .../src/System/Globalization/TextInfo.OSX.cs | 27 +++++ .../src/System/Globalization/TextInfo.cs | 6 + src/mono/mono/mini/CMakeLists.txt | 3 +- .../CMakeLists.txt | 2 +- .../System.Globalization.Native/entrypoints.c | 8 +- .../System.Globalization.Native/pal_casing.h | 16 +++ .../System.Globalization.Native/pal_casing.m | 103 ++++++++++++++++++ .../System.Globalization.Native/pal_errors.h | 3 +- .../System.Globalization.Native/pal_locale.m | 2 +- 15 files changed, 242 insertions(+), 28 deletions(-) create mode 100644 src/libraries/Common/src/Interop/Interop.Casing.OSX.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs create mode 100644 src/native/libs/System.Globalization.Native/pal_casing.m diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md index 8b0be8251e5..920f808b4e6 100644 --- a/docs/design/features/globalization-hybrid-mode.md +++ b/docs/design/features/globalization-hybrid-mode.md @@ -408,4 +408,34 @@ Affected public APIs: - CompareInfo.GetSortKeyLength - CompareInfo.GetHashCode -Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`. \ No newline at end of file +Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`. + + +## Case change + +Affected public APIs: +- TextInfo.ToLower, +- TextInfo.ToUpper + +Below function are used from apple native functions: +- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring) +- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring) +- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc) +- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc) + +Behavioural changes compared to ICU + + - Final sigma behavior correction: + + ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ". + + - Below cases will throw exception because of insufficiently sized destination buffer + + - Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions. + + - Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF) + + - Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N -> (ʼN) + + + diff --git a/src/libraries/Common/src/Interop/Interop.Casing.OSX.cs b/src/libraries/Common/src/Interop/Interop.Casing.OSX.cs new file mode 100644 index 00000000000..5e0140faa4c --- /dev/null +++ b/src/libraries/Common/src/Interop/Interop.Casing.OSX.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; + +internal static partial class Interop +{ + internal static partial class Globalization + { + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)] + internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)] + internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper); + } +} diff --git a/src/libraries/Common/src/Interop/Interop.ResultCode.cs b/src/libraries/Common/src/Interop/Interop.ResultCode.cs index b99787f061f..c55bba661fc 100644 --- a/src/libraries/Common/src/Interop/Interop.ResultCode.cs +++ b/src/libraries/Common/src/Interop/Interop.ResultCode.cs @@ -11,7 +11,8 @@ internal enum ResultCode Success = 0, UnknownError = 1, InsufficientBuffer = 2, - OutOfMemory = 3 + OutOfMemory = 3, + InvalidCodePoint = 4, } } } diff --git a/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj b/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj index 1c680de3e24..da9a1a79c18 100644 --- a/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj +++ b/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj @@ -38,5 +38,6 @@ + diff --git a/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs b/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs index fcc4c53f5e4..6426a2a427b 100644 --- a/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs +++ b/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs @@ -274,9 +274,9 @@ public static IEnumerable ToLower_TestData() // we also don't preform. // Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule). yield return new object[] { cultureName, "\u03A3", "\u03C3" }; - if (PlatformDetection.IsHybridGlobalizationOnBrowser) + if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX) { - // JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior + // JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior yield return new object[] { cultureName, "O\u03A3", "o\u03C2" }; } else @@ -396,23 +396,29 @@ public static IEnumerable ToUpper_TestData() // RAINBOW (outside the BMP and does not case) yield return new object[] { cultureName, "\U0001F308", "\U0001F308" }; - // Unicode defines some codepoints which expand into multiple codepoints - // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done - // these sorts of expansions, since it would cause string lengths to change when cased, - // which is non-intuitive. In addition, there are some context sensitive mappings which - // we also don't preform. - // es-zed does not case to SS when uppercased. - yield return new object[] { cultureName, "\u00DF", "\u00DF" }; - yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" }; - if (!PlatformDetection.IsNlsGlobalization) - yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" }; - - // Ligatures do not expand when cased. - yield return new object[] { cultureName, "\uFB00", "\uFB00" }; - - // Precomposed character with no uppercase variant, we don't want to "decompose" this - // as part of casing. - yield return new object[] { cultureName, "\u0149", "\u0149" }; + if (!PlatformDetection.IsHybridGlobalizationOnOSX) + { + // Unicode defines some codepoints which expand into multiple codepoints + // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done + // these sorts of expansions, since it would cause string lengths to change when cased, + // which is non-intuitive. In addition, there are some context sensitive mappings which + // we also don't preform. + // es-zed does not case to SS when uppercased. + // on OSX, capitalizing the German letter ß (sharp S) gives SS + yield return new object[] { cultureName, "\u00DF", "\u00DF" }; + yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" }; + if (!PlatformDetection.IsNlsGlobalization) + yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" }; + + // Ligatures do not expand when cased. + // on OSX, this is uppercase to "FF" + yield return new object[] { cultureName, "\uFB00", "\uFB00" }; + + // Precomposed character with no uppercase variant, we don't want to "decompose" this + // as part of casing. + // on OSX, this is uppercased to "ʼN" + yield return new object[] { cultureName, "\u0149", "\u0149" }; + } } // Turkish i diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 0912d9b4598..5cc1e4823e5 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -389,6 +389,7 @@ + @@ -1273,6 +1274,9 @@ Common\Interop\Interop.Casing.cs + + Common\Interop\Interop.Casing.OSX.cs + Common\Interop\Interop.Collation.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs new file mode 100644 index 00000000000..4bfd34e3e58 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; + +namespace System.Globalization +{ + public partial class TextInfo + { + internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + Debug.Assert(GlobalizationMode.Hybrid); + int result; + + if (HasEmptyCultureName) + result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper); + else + result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper); + + if (result != (int)Interop.Globalization.ResultCode.Success) + throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing" : + result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing"); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs index 018724ada78..d868e0dfca8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs @@ -692,6 +692,12 @@ private unsafe void ChangeCaseCore(char* src, int srcLen, char* dstBuffer, int d JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); return; } +#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); + return; + } #endif IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); } diff --git a/src/mono/mono/mini/CMakeLists.txt b/src/mono/mono/mini/CMakeLists.txt index 6f5e8507315..54f20529261 100644 --- a/src/mono/mono/mini/CMakeLists.txt +++ b/src/mono/mono/mini/CMakeLists.txt @@ -71,7 +71,8 @@ if(HAVE_SYS_ICU) set(icu_shim_sources_base ${icu_shim_sources_base} pal_locale.m - pal_collation.m) + pal_collation.m + pal_casing.m) endif() addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}") diff --git a/src/native/libs/System.Globalization.Native/CMakeLists.txt b/src/native/libs/System.Globalization.Native/CMakeLists.txt index 8f528be93dc..1c9a25f5d5d 100644 --- a/src/native/libs/System.Globalization.Native/CMakeLists.txt +++ b/src/native/libs/System.Globalization.Native/CMakeLists.txt @@ -93,7 +93,7 @@ else() endif() if (CLR_CMAKE_TARGET_APPLE) - set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m) + set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m) endif() # time zone names are filtered out of icu data for the browser and associated functionality is disabled diff --git a/src/native/libs/System.Globalization.Native/entrypoints.c b/src/native/libs/System.Globalization.Native/entrypoints.c index 9e0e4f42769..37246e1bfbd 100644 --- a/src/native/libs/System.Globalization.Native/entrypoints.c +++ b/src/native/libs/System.Globalization.Native/entrypoints.c @@ -59,16 +59,18 @@ static const Entry s_globalizationNative[] = DllImportEntry(GlobalizationNative_ToUnicode) DllImportEntry(GlobalizationNative_WindowsIdToIanaId) #ifdef __APPLE__ + DllImportEntry(GlobalizationNative_ChangeCaseInvariantNative) + DllImportEntry(GlobalizationNative_ChangeCaseNative) DllImportEntry(GlobalizationNative_CompareStringNative) - DllImportEntry(GlobalizationNative_GetLocaleNameNative) - DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative) + DllImportEntry(GlobalizationNative_EndsWithNative) DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative) DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative) DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative) + DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative) + DllImportEntry(GlobalizationNative_GetLocaleNameNative) DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative) DllImportEntry(GlobalizationNative_IndexOfNative) DllImportEntry(GlobalizationNative_StartsWithNative) - DllImportEntry(GlobalizationNative_EndsWithNative) #endif }; diff --git a/src/native/libs/System.Globalization.Native/pal_casing.h b/src/native/libs/System.Globalization.Native/pal_casing.h index b49a77593e3..e39d38ecb1c 100644 --- a/src/native/libs/System.Globalization.Native/pal_casing.h +++ b/src/native/libs/System.Globalization.Native/pal_casing.h @@ -23,3 +23,19 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc, int32_t bToUpper); PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget); + +#ifdef __APPLE__ +PALEXPORT int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, + int32_t lNameLength, + const uint16_t* lpSrc, + int32_t cwSrcLength, + uint16_t* lpDst, + int32_t cwDstLength, + int32_t bToUpper); + +PALEXPORT int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, + int32_t cwSrcLength, + uint16_t* lpDst, + int32_t cwDstLength, + int32_t bToUpper); +#endif diff --git a/src/native/libs/System.Globalization.Native/pal_casing.m b/src/native/libs/System.Globalization.Native/pal_casing.m new file mode 100644 index 00000000000..7aeacb54cf4 --- /dev/null +++ b/src/native/libs/System.Globalization.Native/pal_casing.m @@ -0,0 +1,103 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "pal_icushim_internal.h" +#include "pal_casing.h" +#include "pal_errors.h" + +#import + +#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS) + + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * Converts code points outside of Basic Multilingual Plane into + * corresponding surrogate pairs if sufficient space in the string. + * High surrogate range: 0xD800 - 0xDBFF + * Low surrogate range: 0xDC00 - 0xDFFF + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to true. + * + * @param buffer const uint16_t * string buffer + * @param offset string offset, must be offset= (capacity)) /* insufficiently sized destination buffer */ { \ + (isError) = InsufficientBuffer; \ + } else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \ + (isError) = InvalidCodePoint; \ + } else if ((uint32_t)(codePoint) <= 0xffff) { \ + (buffer)[(offset)++] = (uint16_t)(codePoint); \ + } else { \ + (buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \ + (buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \ + } \ +} + +/* +Function: +ChangeCaseNative + +Performs upper or lower casing of a string into a new buffer, taking into account the specified locale. +Returns 0 for success, non-zero on failure see ErrorCodes. +*/ +int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t lNameLength, + const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper) +{ + NSLocale *currentLocale; + if(localeName == NULL || lNameLength == 0) + { + currentLocale = [NSLocale systemLocale]; + } + else + { + NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength]; + currentLocale = [NSLocale localeWithLocaleIdentifier:locName]; + } + NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength]; + NSString *result = bToUpper ? [source uppercaseStringWithLocale:currentLocale] : [source lowercaseStringWithLocale:currentLocale]; + + int32_t srcIdx = 0, dstIdx = 0, isError = 0; + uint16_t dstCodepoint; + while (srcIdx < result.length) + { + dstCodepoint = [result characterAtIndex:srcIdx++]; + Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + if (isError) + return isError; + } + return Success; +} + +/* +Function: +ChangeCaseInvariantNative + +Performs upper or lower casing of a string into a new buffer. +Returns 0 for success, non-zero on failure see ErrorCodes. +*/ +int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper) +{ + NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength]; + NSString *result = bToUpper ? source.uppercaseString : source.lowercaseString; + + int32_t srcIdx = 0, dstIdx = 0, isError = 0; + uint16_t dstCodepoint; + while (srcIdx < result.length) + { + dstCodepoint = [result characterAtIndex:srcIdx++]; + Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + if (isError) + return isError; + } + return Success; +} + +#endif diff --git a/src/native/libs/System.Globalization.Native/pal_errors.h b/src/native/libs/System.Globalization.Native/pal_errors.h index 535092a8d51..84fe004902e 100644 --- a/src/native/libs/System.Globalization.Native/pal_errors.h +++ b/src/native/libs/System.Globalization.Native/pal_errors.h @@ -12,5 +12,6 @@ typedef enum Success = 0, UnknownError = 1, InsufficientBuffer = 2, - OutOfMemory = 3 + OutOfMemory = 3, + InvalidCodePoint = 4 } ResultCode; diff --git a/src/native/libs/System.Globalization.Native/pal_locale.m b/src/native/libs/System.Globalization.Native/pal_locale.m index c26e89b6018..7856a8cb672 100644 --- a/src/native/libs/System.Globalization.Native/pal_locale.m +++ b/src/native/libs/System.Globalization.Native/pal_locale.m @@ -459,7 +459,7 @@ int32_t GlobalizationNative_GetLocaleInfoIntNative(const char* localeName, Local } case LocaleNumber_ReadingLayout: { - NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[[NSLocale currentLocale] objectForKey:NSLocaleLanguageCode]]; + NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[currentLocale objectForKey:NSLocaleLanguageCode]]; // 0 - Left to right (such as en-US) // 1 - Right to left (such as arabic locales) value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0; -- GitLab