未验证 提交 67b1ede8 编写于 作者: M Meri Khamoyan 提交者: GitHub

[OSX] HybridGlobalization Implement casing functions (#87919)


Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX
上级 c88b3776
...@@ -408,4 +408,34 @@ Affected public APIs: ...@@ -408,4 +408,34 @@ Affected public APIs:
- CompareInfo.GetSortKeyLength - CompareInfo.GetSortKeyLength
- CompareInfo.GetHashCode - CompareInfo.GetHashCode
Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`. Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
\ No newline at end of file
## Case change
Affected public APIs:
- TextInfo.ToLower,
- TextInfo.ToUpper
Below function are used from apple native functions:
- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring)
- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring)
- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc)
- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc)
Behavioural changes compared to ICU
- Final sigma behavior correction:
ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".
- Below cases will throw exception because of insufficiently sized destination buffer
- Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.
- Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)
- Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N -> (ʼN)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Runtime.InteropServices;
internal static partial class Interop
{
internal static partial class Globalization
{
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)]
internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
}
}
...@@ -11,7 +11,8 @@ internal enum ResultCode ...@@ -11,7 +11,8 @@ internal enum ResultCode
Success = 0, Success = 0,
UnknownError = 1, UnknownError = 1,
InsufficientBuffer = 2, InsufficientBuffer = 2,
OutOfMemory = 3 OutOfMemory = 3,
InvalidCodePoint = 4,
} }
} }
} }
...@@ -38,5 +38,6 @@ ...@@ -38,5 +38,6 @@
<Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" /> <Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" /> <Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" /> <Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
<Compile Include="..\System\Globalization\TextInfoTests.cs" />
</ItemGroup> </ItemGroup>
</Project> </Project>
...@@ -274,9 +274,9 @@ public static IEnumerable<object[]> ToLower_TestData() ...@@ -274,9 +274,9 @@ public static IEnumerable<object[]> ToLower_TestData()
// we also don't preform. // we also don't preform.
// Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule). // Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule).
yield return new object[] { cultureName, "\u03A3", "\u03C3" }; yield return new object[] { cultureName, "\u03A3", "\u03C3" };
if (PlatformDetection.IsHybridGlobalizationOnBrowser) if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX)
{ {
// JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior // JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
yield return new object[] { cultureName, "O\u03A3", "o\u03C2" }; yield return new object[] { cultureName, "O\u03A3", "o\u03C2" };
} }
else else
...@@ -396,23 +396,29 @@ public static IEnumerable<object[]> ToUpper_TestData() ...@@ -396,23 +396,29 @@ public static IEnumerable<object[]> ToUpper_TestData()
// RAINBOW (outside the BMP and does not case) // RAINBOW (outside the BMP and does not case)
yield return new object[] { cultureName, "\U0001F308", "\U0001F308" }; yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };
// Unicode defines some codepoints which expand into multiple codepoints if (!PlatformDetection.IsHybridGlobalizationOnOSX)
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done {
// these sorts of expansions, since it would cause string lengths to change when cased, // Unicode defines some codepoints which expand into multiple codepoints
// which is non-intuitive. In addition, there are some context sensitive mappings which // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
// we also don't preform. // these sorts of expansions, since it would cause string lengths to change when cased,
// es-zed does not case to SS when uppercased. // which is non-intuitive. In addition, there are some context sensitive mappings which
yield return new object[] { cultureName, "\u00DF", "\u00DF" }; // we also don't preform.
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" }; // es-zed does not case to SS when uppercased.
if (!PlatformDetection.IsNlsGlobalization) // on OSX, capitalizing the German letter ß (sharp S) gives SS
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" }; yield return new object[] { cultureName, "\u00DF", "\u00DF" };
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
// Ligatures do not expand when cased. if (!PlatformDetection.IsNlsGlobalization)
yield return new object[] { cultureName, "\uFB00", "\uFB00" }; yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
// Precomposed character with no uppercase variant, we don't want to "decompose" this // Ligatures do not expand when cased.
// as part of casing. // on OSX, this is uppercase to "FF"
yield return new object[] { cultureName, "\u0149", "\u0149" }; yield return new object[] { cultureName, "\uFB00", "\uFB00" };
// Precomposed character with no uppercase variant, we don't want to "decompose" this
// as part of casing.
// on OSX, this is uppercased to "ʼN"
yield return new object[] { cultureName, "\u0149", "\u0149" };
}
} }
// Turkish i // Turkish i
......
...@@ -389,6 +389,7 @@ ...@@ -389,6 +389,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" />
...@@ -1273,6 +1274,9 @@ ...@@ -1273,6 +1274,9 @@
<Compile Include="$(CommonPath)Interop\Interop.Casing.cs"> <Compile Include="$(CommonPath)Interop\Interop.Casing.cs">
<Link>Common\Interop\Interop.Casing.cs</Link> <Link>Common\Interop\Interop.Casing.cs</Link>
</Compile> </Compile>
<Compile Include="$(CommonPath)Interop\Interop.Casing.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
<Link>Common\Interop\Interop.Casing.OSX.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.Collation.cs"> <Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
<Link>Common\Interop\Interop.Collation.cs</Link> <Link>Common\Interop\Interop.Collation.cs</Link>
</Compile> </Compile>
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
namespace System.Globalization
{
public partial class TextInfo
{
internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(GlobalizationMode.Hybrid);
int result;
if (HasEmptyCultureName)
result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
else
result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
if (result != (int)Interop.Globalization.ResultCode.Success)
throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing" :
result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing");
}
}
}
...@@ -692,6 +692,12 @@ private unsafe void ChangeCaseCore(char* src, int srcLen, char* dstBuffer, int d ...@@ -692,6 +692,12 @@ private unsafe void ChangeCaseCore(char* src, int srcLen, char* dstBuffer, int d
JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
return; return;
} }
#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
{
ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
return;
}
#endif #endif
IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
} }
......
...@@ -71,7 +71,8 @@ if(HAVE_SYS_ICU) ...@@ -71,7 +71,8 @@ if(HAVE_SYS_ICU)
set(icu_shim_sources_base set(icu_shim_sources_base
${icu_shim_sources_base} ${icu_shim_sources_base}
pal_locale.m pal_locale.m
pal_collation.m) pal_collation.m
pal_casing.m)
endif() endif()
addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}") addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
......
...@@ -93,7 +93,7 @@ else() ...@@ -93,7 +93,7 @@ else()
endif() endif()
if (CLR_CMAKE_TARGET_APPLE) if (CLR_CMAKE_TARGET_APPLE)
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m) set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m)
endif() endif()
# time zone names are filtered out of icu data for the browser and associated functionality is disabled # time zone names are filtered out of icu data for the browser and associated functionality is disabled
......
...@@ -59,16 +59,18 @@ static const Entry s_globalizationNative[] = ...@@ -59,16 +59,18 @@ static const Entry s_globalizationNative[] =
DllImportEntry(GlobalizationNative_ToUnicode) DllImportEntry(GlobalizationNative_ToUnicode)
DllImportEntry(GlobalizationNative_WindowsIdToIanaId) DllImportEntry(GlobalizationNative_WindowsIdToIanaId)
#ifdef __APPLE__ #ifdef __APPLE__
DllImportEntry(GlobalizationNative_ChangeCaseInvariantNative)
DllImportEntry(GlobalizationNative_ChangeCaseNative)
DllImportEntry(GlobalizationNative_CompareStringNative) DllImportEntry(GlobalizationNative_CompareStringNative)
DllImportEntry(GlobalizationNative_GetLocaleNameNative) DllImportEntry(GlobalizationNative_EndsWithNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative) DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative) DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative) DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative) DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
DllImportEntry(GlobalizationNative_IndexOfNative) DllImportEntry(GlobalizationNative_IndexOfNative)
DllImportEntry(GlobalizationNative_StartsWithNative) DllImportEntry(GlobalizationNative_StartsWithNative)
DllImportEntry(GlobalizationNative_EndsWithNative)
#endif #endif
}; };
......
...@@ -23,3 +23,19 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc, ...@@ -23,3 +23,19 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc,
int32_t bToUpper); int32_t bToUpper);
PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget); PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget);
#ifdef __APPLE__
PALEXPORT int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName,
int32_t lNameLength,
const uint16_t* lpSrc,
int32_t cwSrcLength,
uint16_t* lpDst,
int32_t cwDstLength,
int32_t bToUpper);
PALEXPORT int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc,
int32_t cwSrcLength,
uint16_t* lpDst,
int32_t cwDstLength,
int32_t bToUpper);
#endif
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
#include "pal_icushim_internal.h"
#include "pal_casing.h"
#include "pal_errors.h"
#import <Foundation/Foundation.h>
#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* Converts code points outside of Basic Multilingual Plane into
* corresponding surrogate pairs if sufficient space in the string.
* High surrogate range: 0xD800 - 0xDBFF
* Low surrogate range: 0xDC00 - 0xDFFF
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param buffer const uint16_t * string buffer
* @param offset string offset, must be offset<capacity
* @param capacity size of the string buffer
* @param codePoint code point to append
* @param isError output bool set to true if an error occurs, otherwise not modified
*/
#define Append(buffer, offset, capacity, codePoint, isError) { \
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
(isError) = InsufficientBuffer; \
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
(isError) = InvalidCodePoint; \
} else if ((uint32_t)(codePoint) <= 0xffff) { \
(buffer)[(offset)++] = (uint16_t)(codePoint); \
} else { \
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
} \
}
/*
Function:
ChangeCaseNative
Performs upper or lower casing of a string into a new buffer, taking into account the specified locale.
Returns 0 for success, non-zero on failure see ErrorCodes.
*/
int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t lNameLength,
const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
{
NSLocale *currentLocale;
if(localeName == NULL || lNameLength == 0)
{
currentLocale = [NSLocale systemLocale];
}
else
{
NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength];
currentLocale = [NSLocale localeWithLocaleIdentifier:locName];
}
NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
NSString *result = bToUpper ? [source uppercaseStringWithLocale:currentLocale] : [source lowercaseStringWithLocale:currentLocale];
int32_t srcIdx = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (srcIdx < result.length)
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
return Success;
}
/*
Function:
ChangeCaseInvariantNative
Performs upper or lower casing of a string into a new buffer.
Returns 0 for success, non-zero on failure see ErrorCodes.
*/
int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
{
NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
NSString *result = bToUpper ? source.uppercaseString : source.lowercaseString;
int32_t srcIdx = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (srcIdx < result.length)
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
return Success;
}
#endif
...@@ -12,5 +12,6 @@ typedef enum ...@@ -12,5 +12,6 @@ typedef enum
Success = 0, Success = 0,
UnknownError = 1, UnknownError = 1,
InsufficientBuffer = 2, InsufficientBuffer = 2,
OutOfMemory = 3 OutOfMemory = 3,
InvalidCodePoint = 4
} ResultCode; } ResultCode;
...@@ -459,7 +459,7 @@ int32_t GlobalizationNative_GetLocaleInfoIntNative(const char* localeName, Local ...@@ -459,7 +459,7 @@ int32_t GlobalizationNative_GetLocaleInfoIntNative(const char* localeName, Local
} }
case LocaleNumber_ReadingLayout: case LocaleNumber_ReadingLayout:
{ {
NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[[NSLocale currentLocale] objectForKey:NSLocaleLanguageCode]]; NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[currentLocale objectForKey:NSLocaleLanguageCode]];
// 0 - Left to right (such as en-US) // 0 - Left to right (such as en-US)
// 1 - Right to left (such as arabic locales) // 1 - Right to left (such as arabic locales)
value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0; value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册