提交 3890d5c1 编写于 作者: J jcoomes

6725714: par compact - add a table to speed up bitmap searches

Reviewed-by: jmasa, tschatzl
上级 36894560
......@@ -116,7 +116,7 @@ class ParallelScavengeHeap : public CollectedHeap {
// The alignment used for eden and survivors within the young gen
// and for boundary between young gen and old gen.
size_t intra_heap_alignment() const { return 64 * K; }
size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; }
size_t capacity() const;
size_t used() const;
......
......@@ -59,13 +59,25 @@
#include <math.h>
// All sizes are in HeapWords.
const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words
const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words
const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize;
const size_t ParallelCompactData::RegionSizeBytes =
RegionSize << LogHeapWordSize;
const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words
const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize;
const size_t ParallelCompactData::BlockSizeBytes =
BlockSize << LogHeapWordSize;
const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1;
const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1;
const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask;
const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize;
const size_t ParallelCompactData::Log2BlocksPerRegion =
Log2RegionSize - Log2BlockSize;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_shift = 27;
......@@ -359,6 +371,10 @@ ParallelCompactData::ParallelCompactData()
_reserved_byte_size = 0;
_region_data = 0;
_region_count = 0;
_block_vspace = 0;
_block_data = 0;
_block_count = 0;
}
bool ParallelCompactData::initialize(MemRegion covered_region)
......@@ -372,8 +388,7 @@ bool ParallelCompactData::initialize(MemRegion covered_region)
assert((region_size & RegionSizeOffsetMask) == 0,
"region size not a multiple of RegionSize");
bool result = initialize_region_data(region_size);
bool result = initialize_region_data(region_size) && initialize_block_data();
return result;
}
......@@ -418,17 +433,36 @@ bool ParallelCompactData::initialize_region_data(size_t region_size)
return false;
}
bool ParallelCompactData::initialize_block_data()
{
assert(_region_count != 0, "region data must be initialized first");
const size_t count = _region_count << Log2BlocksPerRegion;
_block_vspace = create_vspace(count, sizeof(BlockData));
if (_block_vspace != 0) {
_block_data = (BlockData*)_block_vspace->reserved_low_addr();
_block_count = count;
return true;
}
return false;
}
void ParallelCompactData::clear()
{
memset(_region_data, 0, _region_vspace->committed_size());
memset(_block_data, 0, _block_vspace->committed_size());
}
void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
assert(beg_region <= _region_count, "beg_region out of range");
assert(end_region <= _region_count, "end_region out of range");
assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize");
const size_t region_cnt = end_region - beg_region;
memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
const size_t beg_block = beg_region * BlocksPerRegion;
const size_t block_cnt = region_cnt * BlocksPerRegion;
memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
}
HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
......@@ -707,49 +741,48 @@ bool ParallelCompactData::summarize(SplitInfo& split_info,
HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
assert(addr != NULL, "Should detect NULL oop earlier");
assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
#ifdef ASSERT
if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
}
#endif
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap");
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked");
// Region covering the object.
size_t region_index = addr_to_region_idx(addr);
const RegionData* const region_ptr = region(region_index);
HeapWord* const region_addr = region_align_down(addr);
assert(addr < region_addr + RegionSize, "Region does not cover object");
assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
RegionData* const region_ptr = addr_to_region_ptr(addr);
HeapWord* result = region_ptr->destination();
// If all the data in the region is live, then the new location of the object
// can be calculated from the destination of the region plus the offset of the
// object in the region.
// If the entire Region is live, the new location is region->destination + the
// offset of the object within in the Region.
// Run some performance tests to determine if this special case pays off. It
// is worth it for pointers into the dense prefix. If the optimization to
// avoid pointer updates in regions that only point to the dense prefix is
// ever implemented, this should be revisited.
if (region_ptr->data_size() == RegionSize) {
result += pointer_delta(addr, region_addr);
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
result += region_offset(addr);
return result;
}
// The new location of the object is
// region destination +
// size of the partial object extending onto the region +
// sizes of the live objects in the Region that are to the left of addr
const size_t partial_obj_size = region_ptr->partial_obj_size();
HeapWord* const search_start = region_addr + partial_obj_size;
// Otherwise, the new location is region->destination + block offset + the
// number of live words in the Block that are (a) to the left of addr and (b)
// due to objects that start in the Block.
const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
// Fill in the block table if necessary. This is unsynchronized, so multiple
// threads may fill the block table for a region (harmless, since it is
// idempotent).
if (!region_ptr->blocks_filled()) {
PSParallelCompact::fill_blocks(addr_to_region_idx(addr));
region_ptr->set_blocks_filled();
}
HeapWord* const search_start = block_align_down(addr);
const size_t block_offset = addr_to_block_ptr(addr)->offset();
result += partial_obj_size + live_to_left;
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
const size_t live = bitmap->live_words_in_range(search_start, oop(addr));
result += block_offset + live;
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result));
return result;
}
#ifdef ASSERT
#ifdef ASSERT
void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
{
const size_t* const beg = (const size_t*)vspace->committed_low_addr();
......@@ -762,16 +795,10 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
void ParallelCompactData::verify_clear()
{
verify_clear(_region_vspace);
verify_clear(_block_vspace);
}
#endif // #ifdef ASSERT
#ifdef NOT_PRODUCT
ParallelCompactData::RegionData* debug_region(size_t region_index) {
ParallelCompactData& sd = PSParallelCompact::summary_data();
return sd.region(region_index);
}
#endif
elapsedTimer PSParallelCompact::_accumulated_time;
unsigned int PSParallelCompact::_total_invocations = 0;
unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0;
......@@ -1961,11 +1988,6 @@ void PSParallelCompact::invoke(bool maximum_heap_compaction) {
maximum_heap_compaction);
}
bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
size_t addr_region_index = addr_to_region_idx(addr);
return region_index == addr_region_index;
}
// This method contains no policy. You should probably
// be calling invoke() instead.
bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
......@@ -2627,6 +2649,41 @@ void PSParallelCompact::enqueue_region_stealing_tasks(
}
}
#ifdef ASSERT
// Write a histogram of the number of times the block table was filled for a
// region.
void PSParallelCompact::write_block_fill_histogram(outputStream* const out)
{
if (!TraceParallelOldGCCompactionPhase) return;
typedef ParallelCompactData::RegionData rd_t;
ParallelCompactData& sd = summary_data();
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
MutableSpace* const spc = _space_info[id].space();
if (spc->bottom() != spc->top()) {
const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom());
HeapWord* const top_aligned_up = sd.region_align_up(spc->top());
const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up);
size_t histo[5] = { 0, 0, 0, 0, 0 };
const size_t histo_len = sizeof(histo) / sizeof(size_t);
const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t));
for (const rd_t* cur = beg; cur < end; ++cur) {
++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)];
}
out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt);
for (size_t i = 0; i < histo_len; ++i) {
out->print(" " SIZE_FORMAT_W(5) " %5.1f%%",
histo[i], 100.0 * histo[i] / region_cnt);
}
out->cr();
}
}
}
#endif // #ifdef ASSERT
void PSParallelCompact::compact() {
// trace("5");
TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty);
......@@ -2666,6 +2723,8 @@ void PSParallelCompact::compact() {
update_deferred_objects(cm, SpaceId(id));
}
}
DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty));
}
#ifdef ASSERT
......@@ -3130,6 +3189,57 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
} while (true);
}
void PSParallelCompact::fill_blocks(size_t region_idx)
{
// Fill in the block table elements for the specified region. Each block
// table element holds the number of live words in the region that are to the
// left of the first object that starts in the block. Thus only blocks in
// which an object starts need to be filled.
//
// The algorithm scans the section of the bitmap that corresponds to the
// region, keeping a running total of the live words. When an object start is
// found, if it's the first to start in the block that contains it, the
// current total is written to the block table element.
const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize;
const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize;
const size_t RegionSize = ParallelCompactData::RegionSize;
ParallelCompactData& sd = summary_data();
const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size();
if (partial_obj_size >= RegionSize) {
return; // No objects start in this region.
}
// Ensure the first loop iteration decides that the block has changed.
size_t cur_block = sd.block_count();
const ParMarkBitMap* const bitmap = mark_bitmap();
const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment;
assert((size_t)1 << Log2BitsPerBlock ==
bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity");
size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize);
const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize);
size_t live_bits = bitmap->words_to_bits(partial_obj_size);
beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end);
while (beg_bit < range_end) {
const size_t new_block = beg_bit >> Log2BitsPerBlock;
if (new_block != cur_block) {
cur_block = new_block;
sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
}
const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
if (end_bit < range_end - 1) {
live_bits += end_bit - beg_bit + 1;
beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end);
} else {
return;
}
}
}
void
PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
const MutableSpace* sp = space(space_id);
......
......@@ -220,6 +220,17 @@ public:
// Mask for the bits in a pointer to get the address of the start of a region.
static const size_t RegionAddrMask;
static const size_t Log2BlockSize;
static const size_t BlockSize;
static const size_t BlockSizeBytes;
static const size_t BlockSizeOffsetMask;
static const size_t BlockAddrOffsetMask;
static const size_t BlockAddrMask;
static const size_t BlocksPerRegion;
static const size_t Log2BlocksPerRegion;
class RegionData
{
public:
......@@ -272,6 +283,12 @@ public:
inline uint destination_count() const;
inline uint destination_count_raw() const;
// Whether the block table for this region has been filled.
inline bool blocks_filled() const;
// Number of times the block table was filled.
DEBUG_ONLY(inline size_t blocks_filled_count() const;)
// The location of the java heap data that corresponds to this region.
inline HeapWord* data_location() const;
......@@ -296,6 +313,7 @@ public:
void set_partial_obj_size(size_t words) {
_partial_obj_size = (region_sz_t) words;
}
inline void set_blocks_filled();
inline void set_destination_count(uint count);
inline void set_live_obj_size(size_t words);
......@@ -328,7 +346,11 @@ public:
HeapWord* _partial_obj_addr;
region_sz_t _partial_obj_size;
region_sz_t volatile _dc_and_los;
bool _blocks_filled;
#ifdef ASSERT
size_t _blocks_filled_count; // Number of block table fills.
// These enable optimizations that are only partially implemented. Use
// debug builds to prevent the code fragments from breaking.
HeapWord* _data_location;
......@@ -337,11 +359,26 @@ public:
#ifdef ASSERT
public:
uint _pushed; // 0 until region is pushed onto a worker's stack
uint _pushed; // 0 until region is pushed onto a stack
private:
#endif
};
// "Blocks" allow shorter sections of the bitmap to be searched. Each Block
// holds an offset, which is the amount of live data in the Region to the left
// of the first live object that starts in the Block.
class BlockData
{
public:
typedef unsigned short int blk_ofs_t;
blk_ofs_t offset() const { return _offset; }
void set_offset(size_t val) { _offset = (blk_ofs_t)val; }
private:
blk_ofs_t _offset;
};
public:
ParallelCompactData();
bool initialize(MemRegion covered_region);
......@@ -353,8 +390,9 @@ public:
inline RegionData* region(size_t region_idx) const;
inline size_t region(const RegionData* const region_ptr) const;
// Returns true if the given address is contained within the region
bool region_contains(size_t region_index, HeapWord* addr);
size_t block_count() const { return _block_count; }
inline BlockData* block(size_t block_idx) const;
inline size_t block(const BlockData* block_ptr) const;
void add_obj(HeapWord* addr, size_t len);
void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
......@@ -394,11 +432,24 @@ public:
inline HeapWord* region_align_up(HeapWord* addr) const;
inline bool is_region_aligned(HeapWord* addr) const;
// Analogous to region_offset() for blocks.
size_t block_offset(const HeapWord* addr) const;
size_t addr_to_block_idx(const HeapWord* addr) const;
size_t addr_to_block_idx(const oop obj) const {
return addr_to_block_idx((HeapWord*) obj);
}
inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
inline HeapWord* block_to_addr(size_t block) const;
inline size_t region_to_block_idx(size_t region) const;
inline HeapWord* block_align_down(HeapWord* addr) const;
inline HeapWord* block_align_up(HeapWord* addr) const;
inline bool is_block_aligned(HeapWord* addr) const;
// Return the address one past the end of the partial object.
HeapWord* partial_obj_end(size_t region_idx) const;
// Return the new location of the object p after the
// the compaction.
// Return the location of the object after compaction.
HeapWord* calc_new_pointer(HeapWord* addr);
HeapWord* calc_new_pointer(oop p) {
......@@ -411,6 +462,7 @@ public:
#endif // #ifdef ASSERT
private:
bool initialize_block_data();
bool initialize_region_data(size_t region_size);
PSVirtualSpace* create_vspace(size_t count, size_t element_size);
......@@ -424,6 +476,10 @@ private:
size_t _reserved_byte_size;
RegionData* _region_data;
size_t _region_count;
PSVirtualSpace* _block_vspace;
BlockData* _block_data;
size_t _block_count;
};
inline uint
......@@ -438,6 +494,28 @@ ParallelCompactData::RegionData::destination_count() const
return destination_count_raw() >> dc_shift;
}
inline bool
ParallelCompactData::RegionData::blocks_filled() const
{
return _blocks_filled;
}
#ifdef ASSERT
inline size_t
ParallelCompactData::RegionData::blocks_filled_count() const
{
return _blocks_filled_count;
}
#endif // #ifdef ASSERT
inline void
ParallelCompactData::RegionData::set_blocks_filled()
{
_blocks_filled = true;
// Debug builds count the number of times the table was filled.
DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
}
inline void
ParallelCompactData::RegionData::set_destination_count(uint count)
{
......@@ -532,6 +610,12 @@ ParallelCompactData::region(const RegionData* const region_ptr) const
return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
}
inline ParallelCompactData::BlockData*
ParallelCompactData::block(size_t n) const {
assert(n < block_count(), "bad arg");
return _block_data + n;
}
inline size_t
ParallelCompactData::region_offset(const HeapWord* addr) const
{
......@@ -598,6 +682,63 @@ ParallelCompactData::is_region_aligned(HeapWord* addr) const
return region_offset(addr) == 0;
}
inline size_t
ParallelCompactData::block_offset(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize;
}
inline size_t
ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return pointer_delta(addr, _region_start) >> Log2BlockSize;
}
inline ParallelCompactData::BlockData*
ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
{
return block(addr_to_block_idx(addr));
}
inline HeapWord*
ParallelCompactData::block_to_addr(size_t block) const
{
assert(block < _block_count, "block out of range");
return _region_start + (block << Log2BlockSize);
}
inline size_t
ParallelCompactData::region_to_block_idx(size_t region) const
{
return region << Log2BlocksPerRegion;
}
inline HeapWord*
ParallelCompactData::block_align_down(HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr < _region_end + RegionSize, "bad addr");
return (HeapWord*)(size_t(addr) & BlockAddrMask);
}
inline HeapWord*
ParallelCompactData::block_align_up(HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return block_align_down(addr + BlockSizeOffsetMask);
}
inline bool
ParallelCompactData::is_block_aligned(HeapWord* addr) const
{
return block_offset(addr) == 0;
}
// Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
// do_addr() method.
//
......@@ -775,6 +916,7 @@ class PSParallelCompact : AllStatic {
// Convenient access to type names.
typedef ParMarkBitMap::idx_t idx_t;
typedef ParallelCompactData::RegionData RegionData;
typedef ParallelCompactData::BlockData BlockData;
typedef enum {
old_space_id, eden_space_id,
......@@ -962,6 +1104,8 @@ class PSParallelCompact : AllStatic {
// Adjust addresses in roots. Does not adjust addresses in heap.
static void adjust_roots();
DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);)
// Move objects to new locations.
static void compact_perm(ParCompactionManager* cm);
static void compact();
......@@ -1128,6 +1272,9 @@ class PSParallelCompact : AllStatic {
fill_region(cm, region);
}
// Fill in the block table for the specified region.
static void fill_blocks(size_t region_idx);
// Update the deferred objects in the space.
static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册