From b4c57fe2a44265674c8c965a5255709968ff7c61 Mon Sep 17 00:00:00 2001 From: tschatzl Date: Mon, 24 Mar 2014 15:30:30 +0100 Subject: [PATCH] 8035815: Cache-align and pad the from card cache Summary: The from card cache is a very frequently accessed data structure. It is essentially a 2d array of per-region values, one row of values for every GC thread. Pad and align the data structure to avoid false sharing. Reviewed-by: stefank --- .../gc_implementation/g1/heapRegionRemSet.cpp | 41 ++++++++++--------- .../gc_implementation/g1/heapRegionRemSet.hpp | 12 +++--- src/share/vm/memory/padded.hpp | 13 +++++- src/share/vm/memory/padded.inline.hpp | 31 +++++++++++++- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp index 51a699a96..ef2ed91ff 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -29,6 +29,7 @@ #include "gc_implementation/g1/heapRegionRemSet.hpp" #include "gc_implementation/g1/heapRegionSeq.inline.hpp" #include "memory/allocation.hpp" +#include "memory/padded.inline.hpp" #include "memory/space.inline.hpp" #include "oops/oop.inline.hpp" #include "utilities/bitMap.inline.hpp" @@ -358,27 +359,29 @@ void OtherRegionsTable::unlink_from_all(PerRegionTable* prt) { } int** OtherRegionsTable::_from_card_cache = NULL; -size_t OtherRegionsTable::_from_card_cache_max_regions = 0; +uint OtherRegionsTable::_from_card_cache_max_regions = 0; size_t OtherRegionsTable::_from_card_cache_mem_size = 0; -void OtherRegionsTable::init_from_card_cache(size_t max_regions) { +void OtherRegionsTable::init_from_card_cache(uint max_regions) { + guarantee(_from_card_cache == NULL, "Should not call this multiple times"); + uint n_par_rs = HeapRegionRemSet::num_par_rem_sets(); + _from_card_cache_max_regions = max_regions; + _from_card_cache = Padded2DArray::create_unfreeable(n_par_rs, + _from_card_cache_max_regions, + &_from_card_cache_mem_size); - int n_par_rs = HeapRegionRemSet::num_par_rem_sets(); - _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs, mtGC); - for (int i = 0; i < n_par_rs; i++) { - _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions, mtGC); - for (size_t j = 0; j < max_regions; j++) { + for (uint i = 0; i < n_par_rs; i++) { + for (uint j = 0; j < _from_card_cache_max_regions; j++) { _from_card_cache[i][j] = -1; // An invalid value. } } - _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int); } -void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { +void OtherRegionsTable::shrink_from_card_cache(uint new_n_regs) { + for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max."); - for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) { + for (uint j = new_n_regs; j < _from_card_cache_max_regions; j++) { _from_card_cache[i][j] = -1; // An invalid value. } } @@ -386,8 +389,8 @@ void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) { #ifndef PRODUCT void OtherRegionsTable::print_from_card_cache() { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { - for (size_t j = 0; j < _from_card_cache_max_regions; j++) { + for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + for (uint j = 0; j < _from_card_cache_max_regions; j++) { gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.", i, j, _from_card_cache[i][j]); } @@ -727,8 +730,8 @@ size_t OtherRegionsTable::fl_mem_size() { } void OtherRegionsTable::clear_fcc() { - size_t hrs_idx = hr()->hrs_index(); - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + uint hrs_idx = hr()->hrs_index(); + for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { _from_card_cache[i][hrs_idx] = -1; } } @@ -762,8 +765,8 @@ void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) { _coarse_map.par_at_put(hrs_ind, 0); } // Check to see if any of the fcc entries come from here. - size_t hr_ind = (size_t) hr()->hrs_index(); - for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { + uint hr_ind = hr()->hrs_index(); + for (uint tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { int fcc_ent = _from_card_cache[tid][hr_ind]; if (fcc_ent != -1) { HeapWord* card_addr = (HeapWord*) @@ -838,8 +841,8 @@ OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) { // Determines how many threads can add records to an rset in parallel. // This can be done by either mutator threads together with the // concurrent refinement threads or GC threads. -int HeapRegionRemSet::num_par_rem_sets() { - return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); +uint HeapRegionRemSet::num_par_rem_sets() { + return (uint)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); } HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index f0b315d49..e83855089 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -121,7 +121,7 @@ class OtherRegionsTable VALUE_OBJ_CLASS_SPEC { // Indexed by thread X heap region, to minimize thread contention. static int** _from_card_cache; - static size_t _from_card_cache_max_regions; + static uint _from_card_cache_max_regions; static size_t _from_card_cache_mem_size; // link/add the given fine grain remembered set into the "all" list @@ -170,11 +170,11 @@ public: // Declare the heap size (in # of regions) to the OtherRegionsTable. // (Uses it to initialize from_card_cache). - static void init_from_card_cache(size_t max_regions); + static void init_from_card_cache(uint max_regions); // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. // Make sure any entries for higher regions are invalid. - static void shrink_from_card_cache(size_t new_n_regs); + static void shrink_from_card_cache(uint new_n_regs); static void print_from_card_cache(); }; @@ -222,7 +222,7 @@ private: public: HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr); - static int num_par_rem_sets(); + static uint num_par_rem_sets(); static void setup_remset_size(); HeapRegion* hr() const { @@ -358,12 +358,12 @@ public: // (Uses it to initialize from_card_cache). static void init_heap(uint max_regions) { G1CodeRootSet::initialize(); - OtherRegionsTable::init_from_card_cache((size_t) max_regions); + OtherRegionsTable::init_from_card_cache(max_regions); } // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. static void shrink_heap(uint new_n_regs) { - OtherRegionsTable::shrink_from_card_cache((size_t) new_n_regs); + OtherRegionsTable::shrink_from_card_cache(new_n_regs); } #ifndef PRODUCT diff --git a/src/share/vm/memory/padded.hpp b/src/share/vm/memory/padded.hpp index 4c50b3996..a03c2ba56 100644 --- a/src/share/vm/memory/padded.hpp +++ b/src/share/vm/memory/padded.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -90,4 +90,15 @@ class PaddedArray { static PaddedEnd* create_unfreeable(uint length); }; +// Helper class to create an array of references to arrays of primitive types +// Both the array of references and the data arrays are aligned to the given +// alignment. The allocated memory is zero-filled. +template +class Padded2DArray { + public: + // Creates an aligned padded 2D array. + // The memory cannot be deleted since the raw memory chunk is not returned. + static T** create_unfreeable(uint rows, uint columns, size_t* allocation_size = NULL); +}; + #endif // SHARE_VM_MEMORY_PADDED_HPP diff --git a/src/share/vm/memory/padded.inline.hpp b/src/share/vm/memory/padded.inline.hpp index 1e9994ab6..e773c4075 100644 --- a/src/share/vm/memory/padded.inline.hpp +++ b/src/share/vm/memory/padded.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,3 +47,32 @@ PaddedEnd* PaddedArray::create_unfreeable(uint length) { return aligned_padded_array; } + +template +T** Padded2DArray::create_unfreeable(uint rows, uint columns, size_t* allocation_size) { + // Calculate and align the size of the first dimension's table. + size_t table_size = align_size_up_(rows * sizeof(T*), alignment); + // The size of the separate rows. + size_t row_size = align_size_up_(columns * sizeof(T), alignment); + // Total size consists of the indirection table plus the rows. + size_t total_size = table_size + rows * row_size + alignment; + + // Allocate a chunk of memory large enough to allow alignment of the chunk. + void* chunk = AllocateHeap(total_size, flags); + // Clear the allocated memory. + memset(chunk, 0, total_size); + // Align the chunk of memory. + T** result = (T**)align_pointer_up(chunk, alignment); + void* data_start = (void*)((uintptr_t)result + table_size); + + // Fill in the row table. + for (size_t i = 0; i < rows; i++) { + result[i] = (T*)((uintptr_t)data_start + i * row_size); + } + + if (allocation_size != NULL) { + *allocation_size = total_size; + } + + return result; +} -- GitLab