From 1d27c3da294df541478a78ae977d7629b17ae710 Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Mon, 29 Aug 2022 11:09:35 -0700 Subject: [PATCH] [NativeAOT] enable background GC on Unix (#74735) * enable for unix+arm64 * include softwarewritewatch.cpp * make the unix crst recursive * enable ww on Unix x64 * enabled card bundles * comment --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 7 ++ .../nativeaot/Runtime/amd64/WriteBarriers.S | 83 ++++++++++++++----- .../nativeaot/Runtime/arm64/WriteBarriers.S | 32 ++++--- src/coreclr/nativeaot/Runtime/gcrhenv.cpp | 4 +- .../nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 17 +++- 5 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 4ccefca81f5..2e37b1708d1 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -50,6 +50,7 @@ set(COMMON_RUNTIME_SOURCES ${GC_DIR}/handletablecore.cpp ${GC_DIR}/handletablescan.cpp ${GC_DIR}/objecthandle.cpp + ${GC_DIR}/softwarewritewatch.cpp ) set(SERVER_GC_SOURCES @@ -206,6 +207,12 @@ include_directories(${ARCH_SOURCES_DIR}) add_definitions(-DFEATURE_BASICFREEZE) add_definitions(-DFEATURE_CONSERVATIVE_GC) + +if(CLR_CMAKE_TARGET_UNIX) + add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) + add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES) +endif() + add_definitions(-DFEATURE_CUSTOM_IMPORTS) add_definitions(-DFEATURE_DYNAMIC_CODE) add_compile_definitions($<$,$>:FEATURE_GC_STRESS>) diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S index 4ec18c7d886..c31a95c9bec 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S @@ -84,6 +84,21 @@ LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): // we're in a debug build and write barrier checking has been enabled). UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov r11, [C_VAR(g_write_watch_table)] + cmp r11, 0x0 + je LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG) + + mov r10, rdi + shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + add r10, r11 + cmp byte ptr [r10], 0x0 + jne LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG) + mov byte ptr [r10], 0xFF +#endif + +LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG): + // If the reference is to an object that's not in an ephemeral generation we have no need to track it // (since the object won't be collected or moved by an ephemeral collection). cmp \REFREG, [C_VAR(g_ephemeral_low)] @@ -95,17 +110,25 @@ LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): // track this write. The location address is translated into an offset in the card table bitmap. We set // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write // the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rdi, 11 - add rdi, [C_VAR(g_card_table)] - cmp byte ptr [rdi], 0x0FF - jne LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG) - -LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): - ret + shr rdi, 0x0B + mov r10, [C_VAR(g_card_table)] + cmp byte ptr [rdi + r10], 0x0FF + je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) // We get here if it's necessary to update the card table. -LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG): - mov byte ptr [rdi], 0x0FF + mov byte ptr [rdi + r10], 0xFF + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Shift rdi by 0x0A more to get the card bundle byte (we shifted by 0x0B already) + shr rdi, 0x0A + add rdi, [C_VAR(g_card_bundle_table)] + cmp byte ptr [rdi], 0xFF + je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + + mov byte ptr [rdi], 0xFF +#endif + +LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): ret .endm @@ -252,6 +275,21 @@ LEAF_ENTRY RhpByRefAssignRef, _TEXT // we're in a debug build and write barrier checking has been enabled). UPDATE_GC_SHADOW BASENAME, rcx, rdi +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov r11, [C_VAR(g_write_watch_table)] + cmp r11, 0x0 + je LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable) + + mov r10, rdi + shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + add r10, r11 + cmp byte ptr [r10], 0x0 + jne LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable) + mov byte ptr [r10], 0xFF +#endif + +LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable): + // If the reference is to an object that's not in an ephemeral generation we have no need to track it // (since the object won't be collected or moved by an ephemeral collection). cmp rcx, [C_VAR(g_ephemeral_low)] @@ -259,25 +297,30 @@ LEAF_ENTRY RhpByRefAssignRef, _TEXT cmp rcx, [C_VAR(g_ephemeral_high)] jae LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) - // move current rdi value into rcx and then increment the pointers + // move current rdi value into rcx, we need to keep rdi and eventually increment by 8 mov rcx, rdi - add rsi, 0x8 - add rdi, 0x8 // We have a location on the GC heap being updated with a reference to an ephemeral object so we must // track this write. The location address is translated into an offset in the card table bitmap. We set // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write // the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rcx, 11 - add rcx, [C_VAR(g_card_table)] - cmp byte ptr [rcx], 0x0FF - jne LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable) - ret + shr rcx, 0x0B + mov r10, [C_VAR(g_card_table)] + cmp byte ptr [rcx + r10], 0x0FF + je LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) // We get here if it's necessary to update the card table. -LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable): - mov byte ptr [rcx], 0x0FF - ret + mov byte ptr [rcx + r10], 0xFF + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) + shr rcx, 0x0A + add rcx, [C_VAR(g_card_bundle_table)] + cmp byte ptr [rcx], 0xFF + je LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + + mov byte ptr [rcx], 0xFF +#endif LOCAL_LABEL(RhpByRefAssignRef_NotInHeap): // Increment the pointers before leaving diff --git a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S index 4a3c3edf1e2..8d908d993ae 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S @@ -92,12 +92,11 @@ // destReg: location to be updated // refReg: objectref to be stored // trash: register nr than can be trashed - // trash2: register than can be trashed // // On exit: // destReg: trashed // - .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless // we are in a debug build and write barrier checking has been enabled). @@ -129,27 +128,27 @@ // Set this objects card, if it has not already been set. PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x\trash - add \trash2, x\trash, \destReg, lsr #11 + add x17, x\trash, \destReg, lsr #11 // Check that this card has not already been written. Avoiding useless writes is a big win on // multi-proc systems since it avoids cache thrashing. - ldrb w\trash, [\trash2] + ldrb w\trash, [x17] cmp x\trash, 0xFF beq 0f mov x\trash, 0xFF - strb w\trash, [\trash2] + strb w\trash, [x17] #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Check if we need to update the card bundle table PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x\trash - add \trash2, x\trash, \destReg, lsr #21 - ldrb w\trash, [\trash2] + add x17, x\trash, \destReg, lsr #21 + ldrb w\trash, [x17] cmp x\trash, 0xFF beq 0f mov x\trash, 0xFF - strb w\trash, [\trash2] + strb w\trash, [x17] #endif 0: @@ -160,12 +159,11 @@ // destReg: location to be updated // refReg: objectref to be stored // trash: register nr than can be trashed - // trash2: register than can be trashed // // On exit: // destReg: trashed // - .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash // The "check" of this checked write barrier - is destReg // within the heap? if no, early out. @@ -180,7 +178,7 @@ ccmp \destReg, x\trash, #0x2, hs bhs 0f - INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash, \trash2 + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash 0: // Exit label @@ -265,7 +263,7 @@ CmpXchgRetry: // The following barrier code takes the destination in x0 and the value in x1 so the arguments are // already correctly set up. - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9 CmpXchgNoUpdate: // x10 still contains the original value. @@ -307,7 +305,7 @@ ExchangeRetry: // The following barrier code takes the destination in x0 and the value in x1 so the arguments are // already correctly set up. - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9 // x10 still contains the original value. mov x0, x10 @@ -321,7 +319,7 @@ LEAF_ENTRY RhpAssignRefArm64, _TEXT ALTERNATE_ENTRY RhpAssignRefX1AVLocation stlr x15, [x14] - INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14 + INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12 ret LEAF_END RhpAssignRefArm64, _TEXT @@ -343,9 +341,7 @@ LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT stlr x15, [x14] - INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 - - add x14, x14, #8 + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12 ret LEAF_END RhpCheckedAssignRefArm64, _TEXT @@ -366,7 +362,7 @@ LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT ldr x15, [x13] stlr x15, [x14] - INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12 add X13, x13, #8 add x14, x14, #8 diff --git a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp index f3d3010ff5a..b81422dc0d0 100644 --- a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp +++ b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp @@ -1018,8 +1018,8 @@ void GCToEEInterface::DiagWalkBGCSurvivors(void* gcContext) #endif // FEATURE_EVENT_TRACE } -#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && (!defined(TARGET_ARM64) || !defined(TARGET_UNIX)) -#error FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is only implemented for ARM64 and UNIX +#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && !defined(TARGET_UNIX) +#error FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is only implemented for UNIX #endif void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index 526810177a5..7ef38f6d464 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -869,7 +869,22 @@ extern "C" UInt32_BOOL DuplicateHandle( extern "C" UInt32_BOOL InitializeCriticalSection(CRITICAL_SECTION * lpCriticalSection) { - return pthread_mutex_init(&lpCriticalSection->mutex, NULL) == 0; + pthread_mutexattr_t mutexAttributes; + int st = pthread_mutexattr_init(&mutexAttributes); + if (st != 0) + { + return false; + } + + st = pthread_mutexattr_settype(&mutexAttributes, PTHREAD_MUTEX_RECURSIVE); + if (st == 0) + { + st = pthread_mutex_init(&lpCriticalSection->mutex, &mutexAttributes); + } + + pthread_mutexattr_destroy(&mutexAttributes); + + return (st == 0); } extern "C" UInt32_BOOL InitializeCriticalSectionEx(CRITICAL_SECTION * lpCriticalSection, uint32_t arg2, uint32_t arg3) -- GitLab