提交 f9d9730e 编写于 作者: A acorn

Merge

......@@ -249,8 +249,6 @@ void AbstractAssembler::block_comment(const char* comment) {
bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
// Exception handler checks the nmethod's implicit null checks table
// only when this method returns false.
#ifndef SPARC
// Sparc does not have based addressing
if (UseCompressedOops) {
// The first page after heap_base is unmapped and
// the 'offset' is equal to [heap_base + offset] for
......@@ -261,7 +259,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
}
}
#endif // SPARC
return offset < 0 || os::vm_page_size() <= offset;
}
......
......@@ -49,7 +49,7 @@ bool ciMethodBlocks::is_block_start(int bci) {
// first half. Returns the range beginning at bci.
ciBlock *ciMethodBlocks::split_block_at(int bci) {
ciBlock *former_block = block_containing(bci);
ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
_blocks->append(new_block);
assert(former_block != NULL, "must not be NULL");
new_block->set_limit_bci(bci);
......@@ -83,7 +83,7 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
if (cb == NULL ) {
// This is our first time visiting this bytecode. Create
// a fresh block and assign it this starting point.
ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
_blocks->append(nb);
_bci_to_block[bci] = nb;
return nb;
......@@ -98,6 +98,11 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
}
}
ciBlock *ciMethodBlocks::make_dummy_block() {
ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
return dum;
}
void ciMethodBlocks::do_analysis() {
ciBytecodeStream s(_method);
ciBlock *cur_block = block_containing(0);
......@@ -253,7 +258,7 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
// create initial block covering the entire method
ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
_blocks->append(b);
_bci_to_block[0] = b;
......@@ -334,7 +339,7 @@ void ciMethodBlocks::dump() {
#endif
ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
#ifndef PRODUCT
_method(method),
#endif
......
......@@ -48,6 +48,8 @@ public:
int num_blocks() { return _num_blocks;}
void clear_processed();
ciBlock *make_dummy_block(); // a block not associated with a bci
#ifndef PRODUCT
void dump();
#endif
......@@ -81,7 +83,7 @@ public:
fall_through_bci = -1
};
ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
ciBlock(ciMethod *method, int index, int start_bci);
int start_bci() const { return _start_bci; }
int limit_bci() const { return _limit_bci; }
int control_bci() const { return _control_bci; }
......@@ -94,7 +96,6 @@ public:
int ex_limit_bci() const { return _ex_limit_bci; }
bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
// flag handling
bool processed() const { return (_flags & Processed) != 0; }
bool is_handler() const { return (_flags & Handler) != 0; }
......
......@@ -338,8 +338,10 @@ ciTypeFlow::StateVector::StateVector(ciTypeFlow* analyzer) {
}
_trap_bci = -1;
_trap_index = 0;
_def_locals.clear();
}
// ------------------------------------------------------------------
// ciTypeFlow::get_start_state
//
......@@ -735,7 +737,7 @@ void ciTypeFlow::StateVector::do_multianewarray(ciBytecodeStream* str) {
void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) {
bool will_link;
ciKlass* klass = str->get_klass(will_link);
if (!will_link) {
if (!will_link || str->is_unresolved_klass()) {
trap(str, klass, str->get_klass_index());
} else {
push_object(klass);
......@@ -1268,7 +1270,9 @@ bool ciTypeFlow::StateVector::apply_one_bytecode(ciBytecodeStream* str) {
}
case Bytecodes::_iinc:
{
check_int(local(str->get_index()));
int lnum = str->get_index();
check_int(local(lnum));
store_to_local(lnum);
break;
}
case Bytecodes::_iload: load_local_int(str->get_index()); break;
......@@ -1506,6 +1510,46 @@ void ciTypeFlow::StateVector::print_on(outputStream* st) const {
}
#endif
// ------------------------------------------------------------------
// ciTypeFlow::SuccIter::next
//
void ciTypeFlow::SuccIter::next() {
int succ_ct = _pred->successors()->length();
int next = _index + 1;
if (next < succ_ct) {
_index = next;
_succ = _pred->successors()->at(next);
return;
}
for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) {
// Do not compile any code for unloaded exception types.
// Following compiler passes are responsible for doing this also.
ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i);
if (exception_klass->is_loaded()) {
_index = next;
_succ = _pred->exceptions()->at(i);
return;
}
next++;
}
_index = -1;
_succ = NULL;
}
// ------------------------------------------------------------------
// ciTypeFlow::SuccIter::set_succ
//
void ciTypeFlow::SuccIter::set_succ(Block* succ) {
int succ_ct = _pred->successors()->length();
if (_index < succ_ct) {
_pred->successors()->at_put(_index, succ);
} else {
int idx = _index - succ_ct;
_pred->exceptions()->at_put(idx, succ);
}
}
// ciTypeFlow::Block
//
// A basic block.
......@@ -1526,10 +1570,11 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer,
_jsrs = new_jsrs;
_next = NULL;
_on_work_list = false;
_pre_order = -1; assert(!has_pre_order(), "");
_private_copy = false;
_backedge_copy = false;
_exception_entry = false;
_trap_bci = -1;
_trap_index = 0;
df_init();
if (CITraceTypeFlow) {
tty->print_cr(">> Created new block");
......@@ -1541,55 +1586,13 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer,
}
// ------------------------------------------------------------------
// ciTypeFlow::Block::clone_loop_head
//
ciTypeFlow::Block*
ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer,
int branch_bci,
ciTypeFlow::Block* target,
ciTypeFlow::JsrSet* jsrs) {
// Loop optimizations are not performed on Tier1 compiles. Do nothing.
if (analyzer->env()->comp_level() < CompLevel_full_optimization) {
return target;
}
// The current block ends with a branch.
//
// If the target block appears to be the test-clause of a for loop, and
// it is not too large, and it has not yet been cloned, clone it.
// The pre-existing copy becomes the private clone used only by
// the initial iteration of the loop. (We know we are simulating
// the initial iteration right now, since we have never calculated
// successors before for this block.)
if (branch_bci <= start()
&& (target->limit() - target->start()) <= CICloneLoopTestLimit
&& target->private_copy_count() == 0) {
// Setting the private_copy bit ensures that the target block cannot be
// reached by any other paths, such as fall-in from the loop body.
// The private copy will be accessible only on successor lists
// created up to this point.
target->set_private_copy(true);
if (CITraceTypeFlow) {
tty->print(">> Cloning a test-clause block ");
print_value_on(tty);
tty->cr();
}
// If the target is the current block, then later on a new copy of the
// target block will be created when its bytecodes are reached by
// an alternate path. (This is the case for loops with the loop
// head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.)
//
// Otherwise, duplicate the target block now and use it immediately.
// (The case for loops with the loop head at the bci-wise top of the
// loop, as with 1.4.2 javac.)
//
// In either case, the new copy of the block will remain public.
if (target != this) {
target = analyzer->block_at(branch_bci, jsrs);
}
}
return target;
// ciTypeFlow::Block::df_init
void ciTypeFlow::Block::df_init() {
_pre_order = -1; assert(!has_pre_order(), "");
_post_order = -1; assert(!has_post_order(), "");
_loop = NULL;
_irreducible_entry = false;
_rpo_next = NULL;
}
// ------------------------------------------------------------------
......@@ -1644,7 +1647,6 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str,
case Bytecodes::_ifnull: case Bytecodes::_ifnonnull:
// Our successors are the branch target and the next bci.
branch_bci = str->get_dest();
clone_loop_head(analyzer, branch_bci, this, jsrs);
_successors =
new (arena) GrowableArray<Block*>(arena, 2, 0, NULL);
assert(_successors->length() == IF_NOT_TAKEN, "");
......@@ -1658,14 +1660,7 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str,
_successors =
new (arena) GrowableArray<Block*>(arena, 1, 0, NULL);
assert(_successors->length() == GOTO_TARGET, "");
target = analyzer->block_at(branch_bci, jsrs);
// If the target block has not been visited yet, and looks like
// a two-way branch, attempt to clone it if it is a loop head.
if (target->_successors != NULL
&& target->_successors->length() == (IF_TAKEN + 1)) {
target = clone_loop_head(analyzer, branch_bci, target, jsrs);
}
_successors->append(target);
_successors->append(analyzer->block_at(branch_bci, jsrs));
break;
case Bytecodes::_jsr:
......@@ -1801,65 +1796,60 @@ void ciTypeFlow::Block::compute_exceptions() {
}
// ------------------------------------------------------------------
// ciTypeFlow::Block::is_simpler_than
//
// A relation used to order our work list. We work on a block earlier
// if it has a smaller jsr stack or it occurs earlier in the program
// text.
// ciTypeFlow::Block::set_backedge_copy
// Use this only to make a pre-existing public block into a backedge copy.
void ciTypeFlow::Block::set_backedge_copy(bool z) {
assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public");
_backedge_copy = z;
}
// ------------------------------------------------------------------
// ciTypeFlow::Block::is_clonable_exit
//
// Note: maybe we should redo this functionality to make blocks
// which correspond to exceptions lower priority.
bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) {
if (other == NULL) {
return true;
} else {
int size1 = _jsrs->size();
int size2 = other->_jsrs->size();
if (size1 < size2) {
return true;
} else if (size2 < size1) {
return false;
} else {
#if 0
if (size1 > 0) {
int r1 = _jsrs->record_at(0)->return_address();
int r2 = _jsrs->record_at(0)->return_address();
if (r1 < r2) {
return true;
} else if (r2 < r1) {
return false;
} else {
int e1 = _jsrs->record_at(0)->return_address();
int e2 = _jsrs->record_at(0)->return_address();
if (e1 < e2) {
return true;
} else if (e2 < e1) {
return false;
}
}
// At most 2 normal successors, one of which continues looping,
// and all exceptional successors must exit.
bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) {
int normal_cnt = 0;
int in_loop_cnt = 0;
for (SuccIter iter(this); !iter.done(); iter.next()) {
Block* succ = iter.succ();
if (iter.is_normal_ctrl()) {
if (++normal_cnt > 2) return false;
if (lp->contains(succ->loop())) {
if (++in_loop_cnt > 1) return false;
}
#endif
return (start() <= other->start());
} else {
if (lp->contains(succ->loop())) return false;
}
}
return in_loop_cnt == 1;
}
// ------------------------------------------------------------------
// ciTypeFlow::Block::set_private_copy
// Use this only to make a pre-existing public block into a private copy.
void ciTypeFlow::Block::set_private_copy(bool z) {
assert(z || (z == is_private_copy()), "cannot make a private copy public");
_private_copy = z;
// ciTypeFlow::Block::looping_succ
//
ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) {
assert(successors()->length() <= 2, "at most 2 normal successors");
for (SuccIter iter(this); !iter.done(); iter.next()) {
Block* succ = iter.succ();
if (lp->contains(succ->loop())) {
return succ;
}
}
return NULL;
}
#ifndef PRODUCT
// ------------------------------------------------------------------
// ciTypeFlow::Block::print_value_on
void ciTypeFlow::Block::print_value_on(outputStream* st) const {
if (has_pre_order()) st->print("#%-2d ", pre_order());
if (has_pre_order()) st->print("#%-2d ", pre_order());
if (has_rpo()) st->print("rpo#%-2d ", rpo());
st->print("[%d - %d)", start(), limit());
if (is_loop_head()) st->print(" lphd");
if (is_irreducible_entry()) st->print(" irred");
if (_jsrs->size() > 0) { st->print("/"); _jsrs->print_on(st); }
if (is_private_copy()) st->print("/private_copy");
if (is_backedge_copy()) st->print("/backedge_copy");
}
// ------------------------------------------------------------------
......@@ -1871,6 +1861,16 @@ void ciTypeFlow::Block::print_on(outputStream* st) const {
st->print_cr(" ==================================================== ");
st->print (" ");
print_value_on(st);
st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr();
if (loop() && loop()->parent() != NULL) {
st->print(" loops:");
Loop* lp = loop();
do {
st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order());
if (lp->is_irreducible()) st->print("(ir)");
lp = lp->parent();
} while (lp->parent() != NULL);
}
st->cr();
_state->print_on(st);
if (_successors == NULL) {
......@@ -1907,6 +1907,21 @@ void ciTypeFlow::Block::print_on(outputStream* st) const {
}
#endif
#ifndef PRODUCT
// ------------------------------------------------------------------
// ciTypeFlow::LocalSet::print_on
void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const {
st->print("{");
for (int i = 0; i < max; i++) {
if (test(i)) st->print(" %d", i);
}
if (limit > max) {
st->print(" %d..%d ", max, limit);
}
st->print(" }");
}
#endif
// ciTypeFlow
//
// This is a pass over the bytecodes which computes the following:
......@@ -1922,12 +1937,11 @@ ciTypeFlow::ciTypeFlow(ciEnv* env, ciMethod* method, int osr_bci) {
_max_locals = method->max_locals();
_max_stack = method->max_stack();
_code_size = method->code_size();
_has_irreducible_entry = false;
_osr_bci = osr_bci;
_failure_reason = NULL;
assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument");
_work_list = NULL;
_next_pre_order = 0;
_ciblock_count = _methodBlocks->num_blocks();
_idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count);
......@@ -1949,12 +1963,6 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() {
_work_list = next_block->next();
next_block->set_next(NULL);
next_block->set_on_work_list(false);
if (!next_block->has_pre_order()) {
// Assign "pre_order" as each new block is taken from the work list.
// This number may be used by following phases to order block visits.
assert(!have_block_count(), "must not have mapped blocks yet")
next_block->set_pre_order(_next_pre_order++);
}
return next_block;
}
......@@ -1962,30 +1970,37 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() {
// ciTypeFlow::add_to_work_list
//
// Add a basic block to our work list.
// List is sorted by decreasing postorder sort (same as increasing RPO)
void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) {
assert(!block->is_on_work_list(), "must not already be on work list");
if (CITraceTypeFlow) {
tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : "");
tty->print(">> Adding block ");
block->print_value_on(tty);
tty->print_cr(" to the work list : ");
}
block->set_on_work_list(true);
if (block->is_simpler_than(_work_list)) {
// decreasing post order sort
Block* prev = NULL;
Block* current = _work_list;
int po = block->post_order();
while (current != NULL) {
if (!current->has_post_order() || po > current->post_order())
break;
prev = current;
current = current->next();
}
if (prev == NULL) {
block->set_next(_work_list);
_work_list = block;
} else {
Block *temp = _work_list;
while (!block->is_simpler_than(temp->next())) {
if (CITraceTypeFlow) {
tty->print(".");
}
temp = temp->next();
}
block->set_next(temp->next());
temp->set_next(block);
block->set_next(current);
prev->set_next(block);
}
if (CITraceTypeFlow) {
tty->cr();
}
......@@ -2008,7 +2023,7 @@ ciTypeFlow::Block* ciTypeFlow::block_at(int bci, ciTypeFlow::JsrSet* jsrs, Creat
assert(ciblk->start_bci() == bci, "bad ciBlock boundaries");
Block* block = get_block_for(ciblk->index(), jsrs, option);
assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result");
assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result");
if (CITraceTypeFlow) {
if (block != NULL) {
......@@ -2072,8 +2087,9 @@ void ciTypeFlow::flow_exceptions(GrowableArray<ciTypeFlow::Block*>* exceptions,
}
if (block->meet_exception(exception_klass, state)) {
// Block was modified. Add it to the work list.
if (!block->is_on_work_list()) {
// Block was modified and has PO. Add it to the work list.
if (block->has_post_order() &&
!block->is_on_work_list()) {
add_to_work_list(block);
}
}
......@@ -2091,8 +2107,9 @@ void ciTypeFlow::flow_successors(GrowableArray<ciTypeFlow::Block*>* successors,
for (int i = 0; i < len; i++) {
Block* block = successors->at(i);
if (block->meet(state)) {
// Block was modified. Add it to the work list.
if (!block->is_on_work_list()) {
// Block was modified and has PO. Add it to the work list.
if (block->has_post_order() &&
!block->is_on_work_list()) {
add_to_work_list(block);
}
}
......@@ -2133,6 +2150,111 @@ bool ciTypeFlow::can_trap(ciBytecodeStream& str) {
return true;
}
// ------------------------------------------------------------------
// ciTypeFlow::clone_loop_heads
//
// Clone the loop heads
bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
bool rslt = false;
for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) {
lp = iter.current();
Block* head = lp->head();
if (lp == loop_tree_root() ||
lp->is_irreducible() ||
!head->is_clonable_exit(lp))
continue;
// check not already cloned
if (head->backedge_copy_count() != 0)
continue;
// check _no_ shared head below us
Loop* ch;
for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling());
if (ch != NULL)
continue;
// Clone head
Block* new_head = head->looping_succ(lp);
Block* clone = clone_loop_head(lp, temp_vector, temp_set);
// Update lp's info
clone->set_loop(lp);
lp->set_head(new_head);
lp->set_tail(clone);
// And move original head into outer loop
head->set_loop(lp->parent());
rslt = true;
}
return rslt;
}
// ------------------------------------------------------------------
// ciTypeFlow::clone_loop_head
//
// Clone lp's head and replace tail's successors with clone.
//
// |
// v
// head <-> body
// |
// v
// exit
//
// new_head
//
// |
// v
// head ----------\
// | |
// | v
// | clone <-> body
// | |
// | /--/
// | |
// v v
// exit
//
ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
Block* head = lp->head();
Block* tail = lp->tail();
if (CITraceTypeFlow) {
tty->print(">> Requesting clone of loop head "); head->print_value_on(tty);
tty->print(" for predecessor "); tail->print_value_on(tty);
tty->cr();
}
Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy);
assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges");
assert(!clone->has_pre_order(), "just created");
clone->set_next_pre_order();
// Insert clone after (orig) tail in reverse post order
clone->set_rpo_next(tail->rpo_next());
tail->set_rpo_next(clone);
// tail->head becomes tail->clone
for (SuccIter iter(tail); !iter.done(); iter.next()) {
if (iter.succ() == head) {
iter.set_succ(clone);
break;
}
}
flow_block(tail, temp_vector, temp_set);
if (head == tail) {
// For self-loops, clone->head becomes clone->clone
flow_block(clone, temp_vector, temp_set);
for (SuccIter iter(clone); !iter.done(); iter.next()) {
if (iter.succ() == head) {
iter.set_succ(clone);
break;
}
}
}
flow_block(clone, temp_vector, temp_set);
return clone;
}
// ------------------------------------------------------------------
// ciTypeFlow::flow_block
......@@ -2159,11 +2281,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
// Grab the state from the current block.
block->copy_state_into(state);
state->def_locals()->clear();
GrowableArray<Block*>* exceptions = block->exceptions();
GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses();
bool has_exceptions = exceptions->length() > 0;
bool exceptions_used = false;
ciBytecodeStream str(method());
str.reset_to_bci(start);
Bytecodes::Code code;
......@@ -2172,6 +2297,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
// Check for exceptional control flow from this point.
if (has_exceptions && can_trap(str)) {
flow_exceptions(exceptions, exc_klasses, state);
exceptions_used = true;
}
// Apply the effects of the current bytecode to our state.
bool res = state->apply_one_bytecode(&str);
......@@ -2189,9 +2315,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
block->print_on(tty);
}
// Save set of locals defined in this block
block->def_locals()->add(state->def_locals());
// Record (no) successors.
block->successors(&str, state, jsrs);
assert(!has_exceptions || exceptions_used, "Not removing exceptions");
// Discontinue interpretation of this Block.
return;
}
......@@ -2202,6 +2333,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
// Check for exceptional control flow from this point.
if (has_exceptions && can_trap(str)) {
flow_exceptions(exceptions, exc_klasses, state);
exceptions_used = true;
}
// Fix the JsrSet to reflect effect of the bytecode.
......@@ -2218,10 +2350,305 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
successors = block->successors(&str, NULL, NULL);
}
// Save set of locals defined in this block
block->def_locals()->add(state->def_locals());
// Remove untaken exception paths
if (!exceptions_used)
exceptions->clear();
// Pass our state to successors.
flow_successors(successors, state);
}
// ------------------------------------------------------------------
// ciTypeFlow::PostOrderLoops::next
//
// Advance to next loop tree using a postorder, left-to-right traversal.
void ciTypeFlow::PostorderLoops::next() {
assert(!done(), "must not be done.");
if (_current->sibling() != NULL) {
_current = _current->sibling();
while (_current->child() != NULL) {
_current = _current->child();
}
} else {
_current = _current->parent();
}
}
// ------------------------------------------------------------------
// ciTypeFlow::PreOrderLoops::next
//
// Advance to next loop tree using a preorder, left-to-right traversal.
void ciTypeFlow::PreorderLoops::next() {
assert(!done(), "must not be done.");
if (_current->child() != NULL) {
_current = _current->child();
} else if (_current->sibling() != NULL) {
_current = _current->sibling();
} else {
while (_current != _root && _current->sibling() == NULL) {
_current = _current->parent();
}
if (_current == _root) {
_current = NULL;
assert(done(), "must be done.");
} else {
assert(_current->sibling() != NULL, "must be more to do");
_current = _current->sibling();
}
}
}
// ------------------------------------------------------------------
// ciTypeFlow::Loop::sorted_merge
//
// Merge the branch lp into this branch, sorting on the loop head
// pre_orders. Returns the leaf of the merged branch.
// Child and sibling pointers will be setup later.
// Sort is (looking from leaf towards the root)
// descending on primary key: loop head's pre_order, and
// ascending on secondary key: loop tail's pre_order.
ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) {
Loop* leaf = this;
Loop* prev = NULL;
Loop* current = leaf;
while (lp != NULL) {
int lp_pre_order = lp->head()->pre_order();
// Find insertion point for "lp"
while (current != NULL) {
if (current == lp)
return leaf; // Already in list
if (current->head()->pre_order() < lp_pre_order)
break;
if (current->head()->pre_order() == lp_pre_order &&
current->tail()->pre_order() > lp->tail()->pre_order()) {
break;
}
prev = current;
current = current->parent();
}
Loop* next_lp = lp->parent(); // Save future list of items to insert
// Insert lp before current
lp->set_parent(current);
if (prev != NULL) {
prev->set_parent(lp);
} else {
leaf = lp;
}
prev = lp; // Inserted item is new prev[ious]
lp = next_lp; // Next item to insert
}
return leaf;
}
// ------------------------------------------------------------------
// ciTypeFlow::build_loop_tree
//
// Incrementally build loop tree.
void ciTypeFlow::build_loop_tree(Block* blk) {
assert(!blk->is_post_visited(), "precondition");
Loop* innermost = NULL; // merge of loop tree branches over all successors
for (SuccIter iter(blk); !iter.done(); iter.next()) {
Loop* lp = NULL;
Block* succ = iter.succ();
if (!succ->is_post_visited()) {
// Found backedge since predecessor post visited, but successor is not
assert(succ->pre_order() <= blk->pre_order(), "should be backedge");
// Create a LoopNode to mark this loop.
lp = new (arena()) Loop(succ, blk);
if (succ->loop() == NULL)
succ->set_loop(lp);
// succ->loop will be updated to innermost loop on a later call, when blk==succ
} else { // Nested loop
lp = succ->loop();
// If succ is loop head, find outer loop.
while (lp != NULL && lp->head() == succ) {
lp = lp->parent();
}
if (lp == NULL) {
// Infinite loop, it's parent is the root
lp = loop_tree_root();
}
}
// Check for irreducible loop.
// Successor has already been visited. If the successor's loop head
// has already been post-visited, then this is another entry into the loop.
while (lp->head()->is_post_visited() && lp != loop_tree_root()) {
_has_irreducible_entry = true;
lp->set_irreducible(succ);
if (!succ->is_on_work_list()) {
// Assume irreducible entries need more data flow
add_to_work_list(succ);
}
lp = lp->parent();
assert(lp != NULL, "nested loop must have parent by now");
}
// Merge loop tree branch for all successors.
innermost = innermost == NULL ? lp : innermost->sorted_merge(lp);
} // end loop
if (innermost == NULL) {
assert(blk->successors()->length() == 0, "CFG exit");
blk->set_loop(loop_tree_root());
} else if (innermost->head() == blk) {
// If loop header, complete the tree pointers
if (blk->loop() != innermost) {
#if ASSERT
assert(blk->loop()->head() == innermost->head(), "same head");
Loop* dl;
for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent());
assert(dl == blk->loop(), "blk->loop() already in innermost list");
#endif
blk->set_loop(innermost);
}
innermost->def_locals()->add(blk->def_locals());
Loop* l = innermost;
Loop* p = l->parent();
while (p && l->head() == blk) {
l->set_sibling(p->child()); // Put self on parents 'next child'
p->set_child(l); // Make self the first child of parent
p->def_locals()->add(l->def_locals());
l = p; // Walk up the parent chain
p = l->parent();
}
} else {
blk->set_loop(innermost);
innermost->def_locals()->add(blk->def_locals());
}
}
// ------------------------------------------------------------------
// ciTypeFlow::Loop::contains
//
// Returns true if lp is nested loop.
bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const {
assert(lp != NULL, "");
if (this == lp || head() == lp->head()) return true;
int depth1 = depth();
int depth2 = lp->depth();
if (depth1 > depth2)
return false;
while (depth1 < depth2) {
depth2--;
lp = lp->parent();
}
return this == lp;
}
// ------------------------------------------------------------------
// ciTypeFlow::Loop::depth
//
// Loop depth
int ciTypeFlow::Loop::depth() const {
int dp = 0;
for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent())
dp++;
return dp;
}
#ifndef PRODUCT
// ------------------------------------------------------------------
// ciTypeFlow::Loop::print
void ciTypeFlow::Loop::print(outputStream* st, int indent) const {
for (int i = 0; i < indent; i++) st->print(" ");
st->print("%d<-%d %s",
is_root() ? 0 : this->head()->pre_order(),
is_root() ? 0 : this->tail()->pre_order(),
is_irreducible()?" irr":"");
st->print(" defs: ");
def_locals()->print_on(st, _head->outer()->method()->max_locals());
st->cr();
for (Loop* ch = child(); ch != NULL; ch = ch->sibling())
ch->print(st, indent+2);
}
#endif
// ------------------------------------------------------------------
// ciTypeFlow::df_flow_types
//
// Perform the depth first type flow analysis. Helper for flow_types.
void ciTypeFlow::df_flow_types(Block* start,
bool do_flow,
StateVector* temp_vector,
JsrSet* temp_set) {
int dft_len = 100;
GrowableArray<Block*> stk(arena(), dft_len, 0, NULL);
ciBlock* dummy = _methodBlocks->make_dummy_block();
JsrSet* root_set = new JsrSet(NULL, 0);
Block* root_head = new (arena()) Block(this, dummy, root_set);
Block* root_tail = new (arena()) Block(this, dummy, root_set);
root_head->set_pre_order(0);
root_head->set_post_order(0);
root_tail->set_pre_order(max_jint);
root_tail->set_post_order(max_jint);
set_loop_tree_root(new (arena()) Loop(root_head, root_tail));
stk.push(start);
_next_pre_order = 0; // initialize pre_order counter
_rpo_list = NULL;
int next_po = 0; // initialize post_order counter
// Compute RPO and the control flow graph
int size;
while ((size = stk.length()) > 0) {
Block* blk = stk.top(); // Leave node on stack
if (!blk->is_visited()) {
// forward arc in graph
assert (!blk->has_pre_order(), "");
blk->set_next_pre_order();
if (_next_pre_order >= MaxNodeLimit / 2) {
// Too many basic blocks. Bail out.
// This can happen when try/finally constructs are nested to depth N,
// and there is O(2**N) cloning of jsr bodies. See bug 4697245!
// "MaxNodeLimit / 2" is used because probably the parser will
// generate at least twice that many nodes and bail out.
record_failure("too many basic blocks");
return;
}
if (do_flow) {
flow_block(blk, temp_vector, temp_set);
if (failing()) return; // Watch for bailouts.
}
} else if (!blk->is_post_visited()) {
// cross or back arc
for (SuccIter iter(blk); !iter.done(); iter.next()) {
Block* succ = iter.succ();
if (!succ->is_visited()) {
stk.push(succ);
}
}
if (stk.length() == size) {
// There were no additional children, post visit node now
stk.pop(); // Remove node from stack
build_loop_tree(blk);
blk->set_post_order(next_po++); // Assign post order
prepend_to_rpo_list(blk);
assert(blk->is_post_visited(), "");
if (blk->is_loop_head() && !blk->is_on_work_list()) {
// Assume loop heads need more data flow
add_to_work_list(blk);
}
}
} else {
stk.pop(); // Remove post-visited node from stack
}
}
}
// ------------------------------------------------------------------
// ciTypeFlow::flow_types
//
......@@ -2233,91 +2660,93 @@ void ciTypeFlow::flow_types() {
JsrSet* temp_set = new JsrSet(NULL, 16);
// Create the method entry block.
Block* block = block_at(start_bci(), temp_set);
block->set_pre_order(_next_pre_order++);
assert(block->is_start(), "start block must have order #0");
Block* start = block_at(start_bci(), temp_set);
// Load the initial state into it.
const StateVector* start_state = get_start_state();
if (failing()) return;
block->meet(start_state);
add_to_work_list(block);
start->meet(start_state);
// Trickle away.
while (!work_list_empty()) {
Block* block = work_list_next();
flow_block(block, temp_vector, temp_set);
// Depth first visit
df_flow_types(start, true /*do flow*/, temp_vector, temp_set);
if (failing()) return;
assert(_rpo_list == start, "must be start");
// Any loops found?
if (loop_tree_root()->child() != NULL &&
env()->comp_level() >= CompLevel_full_optimization) {
// Loop optimizations are not performed on Tier1 compiles.
bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set);
// If some loop heads were cloned, recompute postorder and loop tree
if (changed) {
loop_tree_root()->set_child(NULL);
for (Block* blk = _rpo_list; blk != NULL;) {
Block* next = blk->rpo_next();
blk->df_init();
blk = next;
}
df_flow_types(start, false /*no flow*/, temp_vector, temp_set);
}
}
// NodeCountCutoff is the number of nodes at which the parser
// will bail out. Probably if we already have lots of BBs,
// the parser will generate at least twice that many nodes and bail out.
// Therefore, this is a conservatively large limit at which to
// bail out in the pre-parse typeflow pass.
int block_limit = MaxNodeLimit / 2;
if (CITraceTypeFlow) {
tty->print_cr("\nLoop tree");
loop_tree_root()->print();
}
if (_next_pre_order >= block_limit) {
// Too many basic blocks. Bail out.
//
// This can happen when try/finally constructs are nested to depth N,
// and there is O(2**N) cloning of jsr bodies. See bug 4697245!
record_failure("too many basic blocks");
return;
}
// Continue flow analysis until fixed point reached
// Watch for bailouts.
if (failing()) return;
debug_only(int max_block = _next_pre_order;)
while (!work_list_empty()) {
Block* blk = work_list_next();
assert (blk->has_post_order(), "post order assigned above");
flow_block(blk, temp_vector, temp_set);
assert (max_block == _next_pre_order, "no new blocks");
assert (!failing(), "no more bailouts");
}
}
// ------------------------------------------------------------------
// ciTypeFlow::map_blocks
//
// Create the block map, which indexes blocks in pre_order.
// Create the block map, which indexes blocks in reverse post-order.
void ciTypeFlow::map_blocks() {
assert(_block_map == NULL, "single initialization");
int pre_order_limit = _next_pre_order;
_block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit);
assert(pre_order_limit == block_count(), "");
int po;
for (po = 0; po < pre_order_limit; po++) {
debug_only(_block_map[po] = NULL);
}
ciMethodBlocks *mblks = _methodBlocks;
ciBlock* current = NULL;
int limit_bci = code_size();
for (int bci = 0; bci < limit_bci; bci++) {
ciBlock* ciblk = mblks->block_containing(bci);
if (ciblk != NULL && ciblk != current) {
current = ciblk;
int curidx = ciblk->index();
int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length();
for (int i = 0; i < block_count; i++) {
Block* block = _idx_to_blocklist[curidx]->at(i);
if (!block->has_pre_order()) continue;
int po = block->pre_order();
assert(_block_map[po] == NULL, "unique ref to block");
assert(0 <= po && po < pre_order_limit, "");
_block_map[po] = block;
}
}
}
for (po = 0; po < pre_order_limit; po++) {
assert(_block_map[po] != NULL, "must not drop any blocks");
Block* block = _block_map[po];
int block_ct = _next_pre_order;
_block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct);
assert(block_ct == block_count(), "");
Block* blk = _rpo_list;
for (int m = 0; m < block_ct; m++) {
int rpo = blk->rpo();
assert(rpo == m, "should be sequential");
_block_map[rpo] = blk;
blk = blk->rpo_next();
}
assert(blk == NULL, "should be done");
for (int j = 0; j < block_ct; j++) {
assert(_block_map[j] != NULL, "must not drop any blocks");
Block* block = _block_map[j];
// Remove dead blocks from successor lists:
for (int e = 0; e <= 1; e++) {
GrowableArray<Block*>* l = e? block->exceptions(): block->successors();
for (int i = 0; i < l->length(); i++) {
Block* s = l->at(i);
if (!s->has_pre_order()) {
for (int k = 0; k < l->length(); k++) {
Block* s = l->at(k);
if (!s->has_post_order()) {
if (CITraceTypeFlow) {
tty->print("Removing dead %s successor of #%d: ", (e? "exceptional": "normal"), block->pre_order());
s->print_value_on(tty);
tty->cr();
}
l->remove(s);
--i;
--k;
}
}
}
......@@ -2329,7 +2758,7 @@ void ciTypeFlow::map_blocks() {
//
// Find a block with this ciBlock which has a compatible JsrSet.
// If no such block exists, create it, unless the option is no_create.
// If the option is create_private_copy, always create a fresh private copy.
// If the option is create_backedge_copy, always create a fresh backedge copy.
ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) {
Arena* a = arena();
GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
......@@ -2342,11 +2771,11 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe
_idx_to_blocklist[ciBlockIndex] = blocks;
}
if (option != create_private_copy) {
if (option != create_backedge_copy) {
int len = blocks->length();
for (int i = 0; i < len; i++) {
Block* block = blocks->at(i);
if (!block->is_private_copy() && block->is_compatible_with(jsrs)) {
if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
return block;
}
}
......@@ -2357,15 +2786,15 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe
// We did not find a compatible block. Create one.
Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs);
if (option == create_private_copy) new_block->set_private_copy(true);
if (option == create_backedge_copy) new_block->set_backedge_copy(true);
blocks->append(new_block);
return new_block;
}
// ------------------------------------------------------------------
// ciTypeFlow::private_copy_count
// ciTypeFlow::backedge_copy_count
//
int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
if (blocks == NULL) {
......@@ -2376,7 +2805,7 @@ int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) c
int len = blocks->length();
for (int i = 0; i < len; i++) {
Block* block = blocks->at(i);
if (block->is_private_copy() && block->is_compatible_with(jsrs)) {
if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
count++;
}
}
......@@ -2405,10 +2834,12 @@ void ciTypeFlow::do_flow() {
if (failing()) {
return;
}
map_blocks();
if (CIPrintTypeFlow || CITraceTypeFlow) {
print_on(tty);
rpo_print_on(tty);
}
map_blocks();
}
// ------------------------------------------------------------------
......@@ -2466,4 +2897,19 @@ void ciTypeFlow::print_on(outputStream* st) const {
st->print_cr("********************************************************");
st->cr();
}
void ciTypeFlow::rpo_print_on(outputStream* st) const {
st->print_cr("********************************************************");
st->print ("TypeFlow for ");
method()->name()->print_symbol_on(st);
int limit_bci = code_size();
st->print_cr(" %d bytes", limit_bci);
for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
blk->print_on(st);
st->print_cr("--------------------------------------------------------");
st->cr();
}
st->print_cr("********************************************************");
st->cr();
}
#endif
......@@ -34,11 +34,13 @@ private:
int _max_locals;
int _max_stack;
int _code_size;
bool _has_irreducible_entry;
const char* _failure_reason;
public:
class StateVector;
class Loop;
class Block;
// Build a type flow analyzer
......@@ -55,6 +57,7 @@ public:
int max_stack() const { return _max_stack; }
int max_cells() const { return _max_locals + _max_stack; }
int code_size() const { return _code_size; }
bool has_irreducible_entry() const { return _has_irreducible_entry; }
// Represents information about an "active" jsr call. This
// class represents a call to the routine at some entry address
......@@ -125,6 +128,19 @@ public:
void print_on(outputStream* st) const PRODUCT_RETURN;
};
class LocalSet VALUE_OBJ_CLASS_SPEC {
private:
enum Constants { max = 63 };
uint64_t _bits;
public:
LocalSet() : _bits(0) {}
void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); }
void add(LocalSet* ls) { _bits |= ls->_bits; }
bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
void clear() { _bits = 0; }
void print_on(outputStream* st, int limit) const PRODUCT_RETURN;
};
// Used as a combined index for locals and temps
enum Cell {
Cell_0, Cell_max = INT_MAX
......@@ -142,6 +158,8 @@ public:
int _trap_bci;
int _trap_index;
LocalSet _def_locals; // For entire block
static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
public:
......@@ -181,6 +199,9 @@ public:
int monitor_count() const { return _monitor_count; }
void set_monitor_count(int mc) { _monitor_count = mc; }
LocalSet* def_locals() { return &_def_locals; }
const LocalSet* def_locals() const { return &_def_locals; }
static Cell start_cell() { return (Cell)0; }
static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); }
Cell limit_cell() const {
......@@ -250,6 +271,10 @@ public:
return type->basic_type() == T_DOUBLE;
}
void store_to_local(int lnum) {
_def_locals.add((uint) lnum);
}
void push_translate(ciType* type);
void push_int() {
......@@ -358,6 +383,7 @@ public:
"must be reference type or return address");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_double(int index) {
......@@ -376,6 +402,8 @@ public:
overwrite_local_double_long(index);
set_type_at(local(index), type);
set_type_at(local(index+1), type2);
store_to_local(index);
store_to_local(index+1);
}
void load_local_float(int index) {
......@@ -388,6 +416,7 @@ public:
assert(is_float(type), "must be float type");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_int(int index) {
......@@ -400,6 +429,7 @@ public:
assert(is_int(type), "must be int type");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_long(int index) {
......@@ -418,6 +448,8 @@ public:
overwrite_local_double_long(index);
set_type_at(local(index), type);
set_type_at(local(index+1), type2);
store_to_local(index);
store_to_local(index+1);
}
// Stop interpretation of this path with a trap.
......@@ -450,13 +482,31 @@ public:
};
// Parameter for "find_block" calls:
// Describes the difference between a public and private copy.
// Describes the difference between a public and backedge copy.
enum CreateOption {
create_public_copy,
create_private_copy,
create_backedge_copy,
no_create
};
// Successor iterator
class SuccIter : public StackObj {
private:
Block* _pred;
int _index;
Block* _succ;
public:
SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {}
SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); }
int index() { return _index; }
Block* pred() { return _pred; } // Return predecessor
bool done() { return _index < 0; } // Finished?
Block* succ() { return _succ; } // Return current successor
void next(); // Advance
void set_succ(Block* succ); // Update current successor
bool is_normal_ctrl() { return index() < _pred->successors()->length(); }
};
// A basic block
class Block : public ResourceObj {
private:
......@@ -470,15 +520,24 @@ public:
int _trap_bci;
int _trap_index;
// A reasonable approximation to pre-order, provided.to the client.
// pre_order, assigned at first visit. Used as block ID and "visited" tag
int _pre_order;
// Has this block been cloned for some special purpose?
bool _private_copy;
// A post-order, used to compute the reverse post order (RPO) provided to the client
int _post_order; // used to compute rpo
// Has this block been cloned for a loop backedge?
bool _backedge_copy;
// A pointer used for our internal work list
Block* _next;
bool _on_work_list;
Block* _next;
bool _on_work_list; // on the work list
Block* _rpo_next; // Reverse post order list
// Loop info
Loop* _loop; // nearest loop
bool _irreducible_entry; // entry to irreducible loop
bool _exception_entry; // entry to exception handler
ciBlock* ciblock() const { return _ciblock; }
StateVector* state() const { return _state; }
......@@ -504,10 +563,11 @@ public:
int start() const { return _ciblock->start_bci(); }
int limit() const { return _ciblock->limit_bci(); }
int control() const { return _ciblock->control_bci(); }
JsrSet* jsrs() const { return _jsrs; }
bool is_private_copy() const { return _private_copy; }
void set_private_copy(bool z);
int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
bool is_backedge_copy() const { return _backedge_copy; }
void set_backedge_copy(bool z);
int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
// access to entry state
int stack_size() const { return _state->stack_size(); }
......@@ -515,6 +575,20 @@ public:
ciType* local_type_at(int i) const { return _state->local_type_at(i); }
ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
// Data flow on locals
bool is_invariant_local(uint v) const {
assert(is_loop_head(), "only loop heads");
// Find outermost loop with same loop head
Loop* lp = loop();
while (lp->parent() != NULL) {
if (lp->parent()->head() != lp->head()) break;
lp = lp->parent();
}
return !lp->def_locals()->test(v);
}
LocalSet* def_locals() { return _state->def_locals(); }
const LocalSet* def_locals() const { return _state->def_locals(); }
// Get the successors for this Block.
GrowableArray<Block*>* successors(ciBytecodeStream* str,
StateVector* state,
......@@ -524,13 +598,6 @@ public:
return _successors;
}
// Helper function for "successors" when making private copies of
// loop heads for C2.
Block * clone_loop_head(ciTypeFlow* analyzer,
int branch_bci,
Block* target,
JsrSet* jsrs);
// Get the exceptional successors for this Block.
GrowableArray<Block*>* exceptions() {
if (_exceptions == NULL) {
......@@ -584,17 +651,126 @@ public:
bool is_on_work_list() const { return _on_work_list; }
bool has_pre_order() const { return _pre_order >= 0; }
void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; }
int pre_order() const { assert(has_pre_order(), ""); return _pre_order; }
void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); }
bool is_start() const { return _pre_order == outer()->start_block_num(); }
// A ranking used in determining order within the work list.
bool is_simpler_than(Block* other);
// Reverse post order
void df_init();
bool has_post_order() const { return _post_order >= 0; }
void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
void reset_post_order(int o){ _post_order = o; }
int post_order() const { assert(has_post_order(), ""); return _post_order; }
bool has_rpo() const { return has_post_order() && outer()->have_block_count(); }
int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
void set_rpo_next(Block* b) { _rpo_next = b; }
Block* rpo_next() { return _rpo_next; }
// Loops
Loop* loop() const { return _loop; }
void set_loop(Loop* lp) { _loop = lp; }
bool is_loop_head() const { return _loop && _loop->head() == this; }
void set_irreducible_entry(bool c) { _irreducible_entry = c; }
bool is_irreducible_entry() const { return _irreducible_entry; }
bool is_visited() const { return has_pre_order(); }
bool is_post_visited() const { return has_post_order(); }
bool is_clonable_exit(Loop* lp);
Block* looping_succ(Loop* lp); // Successor inside of loop
bool is_single_entry_loop_head() const {
if (!is_loop_head()) return false;
for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
if (lp->is_irreducible()) return false;
return true;
}
void print_value_on(outputStream* st) const PRODUCT_RETURN;
void print_on(outputStream* st) const PRODUCT_RETURN;
};
// Loop
class Loop : public ResourceObj {
private:
Loop* _parent;
Loop* _sibling; // List of siblings, null terminated
Loop* _child; // Head of child list threaded thru sibling pointer
Block* _head; // Head of loop
Block* _tail; // Tail of loop
bool _irreducible;
LocalSet _def_locals;
public:
Loop(Block* head, Block* tail) :
_head(head), _tail(tail),
_parent(NULL), _sibling(NULL), _child(NULL),
_irreducible(false), _def_locals() {}
Loop* parent() const { return _parent; }
Loop* sibling() const { return _sibling; }
Loop* child() const { return _child; }
Block* head() const { return _head; }
Block* tail() const { return _tail; }
void set_parent(Loop* p) { _parent = p; }
void set_sibling(Loop* s) { _sibling = s; }
void set_child(Loop* c) { _child = c; }
void set_head(Block* hd) { _head = hd; }
void set_tail(Block* tl) { _tail = tl; }
int depth() const; // nesting depth
// Returns true if lp is a nested loop or us.
bool contains(Loop* lp) const;
bool contains(Block* blk) const { return contains(blk->loop()); }
// Data flow on locals
LocalSet* def_locals() { return &_def_locals; }
const LocalSet* def_locals() const { return &_def_locals; }
// Merge the branch lp into this branch, sorting on the loop head
// pre_orders. Returns the new branch.
Loop* sorted_merge(Loop* lp);
// Mark non-single entry to loop
void set_irreducible(Block* entry) {
_irreducible = true;
entry->set_irreducible_entry(true);
}
bool is_irreducible() const { return _irreducible; }
bool is_root() const { return _tail->pre_order() == max_jint; }
void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
};
// Postorder iteration over the loop tree.
class PostorderLoops : public StackObj {
private:
Loop* _root;
Loop* _current;
public:
PostorderLoops(Loop* root) : _root(root), _current(root) {
while (_current->child() != NULL) {
_current = _current->child();
}
}
bool done() { return _current == NULL; } // Finished iterating?
void next(); // Advance to next loop
Loop* current() { return _current; } // Return current loop.
};
// Preorder iteration over the loop tree.
class PreorderLoops : public StackObj {
private:
Loop* _root;
Loop* _current;
public:
PreorderLoops(Loop* root) : _root(root), _current(root) {}
bool done() { return _current == NULL; } // Finished iterating?
void next(); // Advance to next loop
Loop* current() { return _current; } // Return current loop.
};
// Standard indexes of successors, for various bytecodes.
enum {
FALL_THROUGH = 0, // normal control
......@@ -619,6 +795,12 @@ private:
// Tells if a given instruction is able to generate an exception edge.
bool can_trap(ciBytecodeStream& str);
// Clone the loop heads. Returns true if any cloning occurred.
bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
// Clone lp's head and replace tail's successors with clone.
Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
public:
// Return the block beginning at bci which has a JsrSet compatible
// with jsrs.
......@@ -627,8 +809,8 @@ public:
// block factory
Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
// How many of the blocks have the private_copy bit set?
int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
// How many of the blocks have the backedge_copy bit set?
int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
// Return an existing block containing bci which has a JsrSet compatible
// with jsrs, or NULL if there is none.
......@@ -651,11 +833,18 @@ public:
return _block_map[po]; }
Block* start_block() const { return pre_order_at(start_block_num()); }
int start_block_num() const { return 0; }
Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds");
return _block_map[rpo]; }
int next_pre_order() { return _next_pre_order; }
int inc_next_pre_order() { return _next_pre_order++; }
private:
// A work list used during flow analysis.
Block* _work_list;
// List of blocks in reverse post order
Block* _rpo_list;
// Next Block::_pre_order. After mapping, doubles as block_count.
int _next_pre_order;
......@@ -668,6 +857,15 @@ private:
// Add a basic block to our work list.
void add_to_work_list(Block* block);
// Prepend a basic block to rpo list.
void prepend_to_rpo_list(Block* blk) {
blk->set_rpo_next(_rpo_list);
_rpo_list = blk;
}
// Root of the loop tree
Loop* _loop_tree_root;
// State used for make_jsr_record
int _jsr_count;
GrowableArray<JsrRecord*>* _jsr_records;
......@@ -677,6 +875,9 @@ public:
// does not already exist.
JsrRecord* make_jsr_record(int entry_address, int return_address);
void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
Loop* loop_tree_root() { return _loop_tree_root; }
private:
// Get the initial state for start_bci:
const StateVector* get_start_state();
......@@ -703,6 +904,15 @@ private:
// necessary.
void flow_types();
// Perform the depth first type flow analysis. Helper for flow_types.
void df_flow_types(Block* start,
bool do_flow,
StateVector* temp_vector,
JsrSet* temp_set);
// Incrementally build loop tree.
void build_loop_tree(Block* blk);
// Create the block map, which indexes blocks in pre_order.
void map_blocks();
......@@ -711,4 +921,6 @@ public:
void do_flow();
void print_on(outputStream* st) const PRODUCT_RETURN;
void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
};
......@@ -1350,11 +1350,7 @@ bool nmethod::can_unload(BoolObjectClosure* is_alive,
return false;
}
}
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
// Cannot do this test if verification of the UseParallelOldGC
// code using the PSMarkSweep code is being done.
assert(unloading_occurred, "Inconsistency in unloading");
}
assert(unloading_occurred, "Inconsistency in unloading");
make_unloaded(is_alive, obj);
return true;
}
......
......@@ -210,10 +210,6 @@ void ParallelScavengeHeap::post_initialize() {
PSScavenge::initialize();
if (UseParallelOldGC) {
PSParallelCompact::post_initialize();
if (VerifyParallelOldWithMarkSweep) {
// Will be used for verification of par old.
PSMarkSweep::initialize();
}
} else {
PSMarkSweep::initialize();
}
......@@ -402,7 +398,7 @@ HeapWord* ParallelScavengeHeap::mem_allocate(
return result;
}
if (!is_tlab &&
size >= (young_gen()->eden_space()->capacity_in_words() / 2)) {
size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
result = old_gen()->allocate(size, is_tlab);
if (result != NULL) {
return result;
......
......@@ -146,7 +146,7 @@ void RefProcTaskExecutor::execute(ProcessTask& task)
{
ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
uint parallel_gc_threads = heap->gc_task_manager()->workers();
ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array();
RegionTaskQueueSet* qset = ParCompactionManager::region_array();
ParallelTaskTerminator terminator(parallel_gc_threads, qset);
GCTaskQueue* q = GCTaskQueue::create();
for(uint i=0; i<parallel_gc_threads; i++) {
......@@ -205,38 +205,38 @@ void StealMarkingTask::do_it(GCTaskManager* manager, uint which) {
}
//
// StealChunkCompactionTask
// StealRegionCompactionTask
//
StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) :
_terminator(t) {};
StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
_terminator(t) {}
void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) {
assert(Universe::heap()->is_gc_active(), "called outside gc");
NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask",
NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask",
PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(which);
// Has to drain stacks first because there may be chunks on
// Has to drain stacks first because there may be regions on
// preloaded onto the stack and this thread may never have
// done a draining task. Are the draining tasks needed?
cm->drain_chunk_stacks();
cm->drain_region_stacks();
size_t chunk_index = 0;
size_t region_index = 0;
int random_seed = 17;
// If we're the termination task, try 10 rounds of stealing before
// setting the termination flag
while(true) {
if (ParCompactionManager::steal(which, &random_seed, chunk_index)) {
PSParallelCompact::fill_and_update_chunk(cm, chunk_index);
cm->drain_chunk_stacks();
if (ParCompactionManager::steal(which, &random_seed, region_index)) {
PSParallelCompact::fill_and_update_region(cm, region_index);
cm->drain_region_stacks();
} else {
if (terminator()->offer_termination()) {
break;
......@@ -249,11 +249,10 @@ void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
UpdateDensePrefixTask::UpdateDensePrefixTask(
PSParallelCompact::SpaceId space_id,
size_t chunk_index_start,
size_t chunk_index_end) :
_space_id(space_id), _chunk_index_start(chunk_index_start),
_chunk_index_end(chunk_index_end)
{}
size_t region_index_start,
size_t region_index_end) :
_space_id(space_id), _region_index_start(region_index_start),
_region_index_end(region_index_end) {}
void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
......@@ -265,8 +264,8 @@ void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
_space_id,
_chunk_index_start,
_chunk_index_end);
_region_index_start,
_region_index_end);
}
void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
......@@ -278,6 +277,6 @@ void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(which);
// Process any chunks already in the compaction managers stacks.
cm->drain_chunk_stacks();
// Process any regions already in the compaction managers stacks.
cm->drain_region_stacks();
}
......@@ -188,18 +188,18 @@ class StealMarkingTask : public GCTask {
};
//
// StealChunkCompactionTask
// StealRegionCompactionTask
//
// This task is used to distribute work to idle threads.
//
class StealChunkCompactionTask : public GCTask {
class StealRegionCompactionTask : public GCTask {
private:
ParallelTaskTerminator* const _terminator;
public:
StealChunkCompactionTask(ParallelTaskTerminator* t);
StealRegionCompactionTask(ParallelTaskTerminator* t);
char* name() { return (char *)"steal-chunk-task"; }
char* name() { return (char *)"steal-region-task"; }
ParallelTaskTerminator* terminator() { return _terminator; }
virtual void do_it(GCTaskManager* manager, uint which);
......@@ -215,15 +215,15 @@ class StealChunkCompactionTask : public GCTask {
class UpdateDensePrefixTask : public GCTask {
private:
PSParallelCompact::SpaceId _space_id;
size_t _chunk_index_start;
size_t _chunk_index_end;
size_t _region_index_start;
size_t _region_index_end;
public:
char* name() { return (char *)"update-dense_prefix-task"; }
UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
size_t chunk_index_start,
size_t chunk_index_end);
size_t region_index_start,
size_t region_index_end);
virtual void do_it(GCTaskManager* manager, uint which);
};
......@@ -231,17 +231,17 @@ class UpdateDensePrefixTask : public GCTask {
//
// DrainStacksCompactionTask
//
// This task processes chunks that have been added to the stacks of each
// This task processes regions that have been added to the stacks of each
// compaction manager.
//
// Trying to use one draining thread does not work because there are no
// guarantees about which task will be picked up by which thread. For example,
// if thread A gets all the preloaded chunks, thread A may not get a draining
// if thread A gets all the preloaded regions, thread A may not get a draining
// task (they may all be done by other threads).
//
class DrainStacksCompactionTask : public GCTask {
public:
char* name() { return (char *)"drain-chunk-task"; }
char* name() { return (char *)"drain-region-task"; }
virtual void do_it(GCTaskManager* manager, uint which);
};
......@@ -30,7 +30,7 @@ ParCompactionManager** ParCompactionManager::_manager_array = NULL;
OopTaskQueueSet* ParCompactionManager::_stack_array = NULL;
ObjectStartArray* ParCompactionManager::_start_array = NULL;
ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL;
ChunkTaskQueueSet* ParCompactionManager::_chunk_array = NULL;
RegionTaskQueueSet* ParCompactionManager::_region_array = NULL;
ParCompactionManager::ParCompactionManager() :
_action(CopyAndUpdate) {
......@@ -46,13 +46,13 @@ ParCompactionManager::ParCompactionManager() :
// We want the overflow stack to be permanent
_overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
#ifdef USE_ChunkTaskQueueWithOverflow
chunk_stack()->initialize();
#ifdef USE_RegionTaskQueueWithOverflow
region_stack()->initialize();
#else
chunk_stack()->initialize();
region_stack()->initialize();
// We want the overflow stack to be permanent
_chunk_overflow_stack =
_region_overflow_stack =
new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true);
#endif
......@@ -86,18 +86,18 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
_stack_array = new OopTaskQueueSet(parallel_gc_threads);
guarantee(_stack_array != NULL, "Count not initialize promotion manager");
_chunk_array = new ChunkTaskQueueSet(parallel_gc_threads);
guarantee(_chunk_array != NULL, "Count not initialize promotion manager");
_region_array = new RegionTaskQueueSet(parallel_gc_threads);
guarantee(_region_array != NULL, "Count not initialize promotion manager");
// Create and register the ParCompactionManager(s) for the worker threads.
for(uint i=0; i<parallel_gc_threads; i++) {
_manager_array[i] = new ParCompactionManager();
guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
stack_array()->register_queue(i, _manager_array[i]->marking_stack());
#ifdef USE_ChunkTaskQueueWithOverflow
chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue());
#ifdef USE_RegionTaskQueueWithOverflow
region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
#else
chunk_array()->register_queue(i, _manager_array[i]->chunk_stack());
region_array()->register_queue(i, _manager_array[i]->region_stack());
#endif
}
......@@ -153,31 +153,31 @@ oop ParCompactionManager::retrieve_for_scanning() {
return NULL;
}
// Save chunk on a stack
void ParCompactionManager::save_for_processing(size_t chunk_index) {
// Save region on a stack
void ParCompactionManager::save_for_processing(size_t region_index) {
#ifdef ASSERT
const ParallelCompactData& sd = PSParallelCompact::summary_data();
ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index);
assert(chunk_ptr->claimed(), "must be claimed");
assert(chunk_ptr->_pushed++ == 0, "should only be pushed once");
ParallelCompactData::RegionData* const region_ptr = sd.region(region_index);
assert(region_ptr->claimed(), "must be claimed");
assert(region_ptr->_pushed++ == 0, "should only be pushed once");
#endif
chunk_stack_push(chunk_index);
region_stack_push(region_index);
}
void ParCompactionManager::chunk_stack_push(size_t chunk_index) {
void ParCompactionManager::region_stack_push(size_t region_index) {
#ifdef USE_ChunkTaskQueueWithOverflow
chunk_stack()->save(chunk_index);
#ifdef USE_RegionTaskQueueWithOverflow
region_stack()->save(region_index);
#else
if(!chunk_stack()->push(chunk_index)) {
chunk_overflow_stack()->push(chunk_index);
if(!region_stack()->push(region_index)) {
region_overflow_stack()->push(region_index);
}
#endif
}
bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) {
#ifdef USE_ChunkTaskQueueWithOverflow
return chunk_stack()->retrieve(chunk_index);
bool ParCompactionManager::retrieve_for_processing(size_t& region_index) {
#ifdef USE_RegionTaskQueueWithOverflow
return region_stack()->retrieve(region_index);
#else
// Should not be used in the parallel case
ShouldNotReachHere();
......@@ -230,14 +230,14 @@ void ParCompactionManager::drain_marking_stacks(OopClosure* blk) {
assert(overflow_stack()->length() == 0, "Sanity");
}
void ParCompactionManager::drain_chunk_overflow_stack() {
size_t chunk_index = (size_t) -1;
while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
PSParallelCompact::fill_and_update_chunk(this, chunk_index);
void ParCompactionManager::drain_region_overflow_stack() {
size_t region_index = (size_t) -1;
while(region_stack()->retrieve_from_overflow(region_index)) {
PSParallelCompact::fill_and_update_region(this, region_index);
}
}
void ParCompactionManager::drain_chunk_stacks() {
void ParCompactionManager::drain_region_stacks() {
#ifdef ASSERT
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
......@@ -249,42 +249,42 @@ void ParCompactionManager::drain_chunk_stacks() {
#if 1 // def DO_PARALLEL - the serial code hasn't been updated
do {
#ifdef USE_ChunkTaskQueueWithOverflow
#ifdef USE_RegionTaskQueueWithOverflow
// Drain overflow stack first, so other threads can steal from
// claimed stack while we work.
size_t chunk_index = (size_t) -1;
while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
PSParallelCompact::fill_and_update_chunk(this, chunk_index);
size_t region_index = (size_t) -1;
while(region_stack()->retrieve_from_overflow(region_index)) {
PSParallelCompact::fill_and_update_region(this, region_index);
}
while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) {
PSParallelCompact::fill_and_update_chunk(this, chunk_index);
while (region_stack()->retrieve_from_stealable_queue(region_index)) {
PSParallelCompact::fill_and_update_region(this, region_index);
}
} while (!chunk_stack()->is_empty());
} while (!region_stack()->is_empty());
#else
// Drain overflow stack first, so other threads can steal from
// claimed stack while we work.
while(!chunk_overflow_stack()->is_empty()) {
size_t chunk_index = chunk_overflow_stack()->pop();
PSParallelCompact::fill_and_update_chunk(this, chunk_index);
while(!region_overflow_stack()->is_empty()) {
size_t region_index = region_overflow_stack()->pop();
PSParallelCompact::fill_and_update_region(this, region_index);
}
size_t chunk_index = -1;
size_t region_index = -1;
// obj is a reference!!!
while (chunk_stack()->pop_local(chunk_index)) {
while (region_stack()->pop_local(region_index)) {
// It would be nice to assert about the type of objects we might
// pop, but they can come from anywhere, unfortunately.
PSParallelCompact::fill_and_update_chunk(this, chunk_index);
PSParallelCompact::fill_and_update_region(this, region_index);
}
} while((chunk_stack()->size() != 0) ||
(chunk_overflow_stack()->length() != 0));
} while((region_stack()->size() != 0) ||
(region_overflow_stack()->length() != 0));
#endif
#ifdef USE_ChunkTaskQueueWithOverflow
assert(chunk_stack()->is_empty(), "Sanity");
#ifdef USE_RegionTaskQueueWithOverflow
assert(region_stack()->is_empty(), "Sanity");
#else
assert(chunk_stack()->size() == 0, "Sanity");
assert(chunk_overflow_stack()->length() == 0, "Sanity");
assert(region_stack()->size() == 0, "Sanity");
assert(region_overflow_stack()->length() == 0, "Sanity");
#endif
#else
oop obj;
......
......@@ -52,7 +52,7 @@ class ParCompactionManager : public CHeapObj {
friend class ParallelTaskTerminator;
friend class ParMarkBitMap;
friend class PSParallelCompact;
friend class StealChunkCompactionTask;
friend class StealRegionCompactionTask;
friend class UpdateAndFillClosure;
friend class RefProcTaskExecutor;
......@@ -72,27 +72,27 @@ class ParCompactionManager : public CHeapObj {
// ------------------------ End don't putback if not needed
private:
static ParCompactionManager** _manager_array;
static OopTaskQueueSet* _stack_array;
static ObjectStartArray* _start_array;
static ChunkTaskQueueSet* _chunk_array;
static PSOldGen* _old_gen;
OopTaskQueue _marking_stack;
GrowableArray<oop>* _overflow_stack;
static ParCompactionManager** _manager_array;
static OopTaskQueueSet* _stack_array;
static ObjectStartArray* _start_array;
static RegionTaskQueueSet* _region_array;
static PSOldGen* _old_gen;
OopTaskQueue _marking_stack;
GrowableArray<oop>* _overflow_stack;
// Is there a way to reuse the _marking_stack for the
// saving empty chunks? For now just create a different
// saving empty regions? For now just create a different
// type of TaskQueue.
#ifdef USE_ChunkTaskQueueWithOverflow
ChunkTaskQueueWithOverflow _chunk_stack;
#ifdef USE_RegionTaskQueueWithOverflow
RegionTaskQueueWithOverflow _region_stack;
#else
ChunkTaskQueue _chunk_stack;
GrowableArray<size_t>* _chunk_overflow_stack;
RegionTaskQueue _region_stack;
GrowableArray<size_t>* _region_overflow_stack;
#endif
#if 1 // does this happen enough to need a per thread stack?
GrowableArray<Klass*>* _revisit_klass_stack;
GrowableArray<Klass*>* _revisit_klass_stack;
#endif
static ParMarkBitMap* _mark_bitmap;
......@@ -100,21 +100,22 @@ class ParCompactionManager : public CHeapObj {
static PSOldGen* old_gen() { return _old_gen; }
static ObjectStartArray* start_array() { return _start_array; }
static OopTaskQueueSet* stack_array() { return _stack_array; }
static OopTaskQueueSet* stack_array() { return _stack_array; }
static void initialize(ParMarkBitMap* mbm);
protected:
// Array of tasks. Needed by the ParallelTaskTerminator.
static ChunkTaskQueueSet* chunk_array() { return _chunk_array; }
OopTaskQueue* marking_stack() { return &_marking_stack; }
GrowableArray<oop>* overflow_stack() { return _overflow_stack; }
#ifdef USE_ChunkTaskQueueWithOverflow
ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; }
static RegionTaskQueueSet* region_array() { return _region_array; }
OopTaskQueue* marking_stack() { return &_marking_stack; }
GrowableArray<oop>* overflow_stack() { return _overflow_stack; }
#ifdef USE_RegionTaskQueueWithOverflow
RegionTaskQueueWithOverflow* region_stack() { return &_region_stack; }
#else
ChunkTaskQueue* chunk_stack() { return &_chunk_stack; }
GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; }
RegionTaskQueue* region_stack() { return &_region_stack; }
GrowableArray<size_t>* region_overflow_stack() {
return _region_overflow_stack;
}
#endif
// Pushes onto the marking stack. If the marking stack is full,
......@@ -123,9 +124,9 @@ class ParCompactionManager : public CHeapObj {
// Do not implement an equivalent stack_pop. Deal with the
// marking stack and overflow stack directly.
// Pushes onto the chunk stack. If the chunk stack is full,
// pushes onto the chunk overflow stack.
void chunk_stack_push(size_t chunk_index);
// Pushes onto the region stack. If the region stack is full,
// pushes onto the region overflow stack.
void region_stack_push(size_t region_index);
public:
Action action() { return _action; }
......@@ -160,10 +161,10 @@ class ParCompactionManager : public CHeapObj {
// Get a oop for scanning. If returns null, no oop were found.
oop retrieve_for_scanning();
// Save chunk for later processing. Must not fail.
void save_for_processing(size_t chunk_index);
// Get a chunk for processing. If returns null, no chunk were found.
bool retrieve_for_processing(size_t& chunk_index);
// Save region for later processing. Must not fail.
void save_for_processing(size_t region_index);
// Get a region for processing. If returns null, no region were found.
bool retrieve_for_processing(size_t& region_index);
// Access function for compaction managers
static ParCompactionManager* gc_thread_compaction_manager(int index);
......@@ -172,18 +173,18 @@ class ParCompactionManager : public CHeapObj {
return stack_array()->steal(queue_num, seed, t);
}
static bool steal(int queue_num, int* seed, ChunkTask& t) {
return chunk_array()->steal(queue_num, seed, t);
static bool steal(int queue_num, int* seed, RegionTask& t) {
return region_array()->steal(queue_num, seed, t);
}
// Process tasks remaining on any stack
void drain_marking_stacks(OopClosure *blk);
// Process tasks remaining on any stack
void drain_chunk_stacks();
void drain_region_stacks();
// Process tasks remaining on any stack
void drain_chunk_overflow_stack();
void drain_region_overflow_stack();
// Debugging support
#ifdef ASSERT
......
......@@ -35,9 +35,7 @@ void PSMarkSweep::initialize() {
_ref_processor = new ReferenceProcessor(mr,
true, // atomic_discovery
false); // mt_discovery
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
_counters = new CollectorCounters("PSMarkSweep", 1);
}
_counters = new CollectorCounters("PSMarkSweep", 1);
}
// This method contains all heap specific policy for invoking mark sweep.
......@@ -518,9 +516,6 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
follow_stack();
// Process reference objects found during marking
// Skipping the reference processing for VerifyParallelOldWithMarkSweep
// affects the marking (makes it different).
{
ReferencePolicy *soft_ref_policy;
if (clear_all_softrefs) {
......
......@@ -152,20 +152,15 @@ void PSMarkSweepDecorator::precompact() {
oop(q)->forward_to(oop(compact_top));
assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
} else {
// Don't clear the mark since it's confuses parallel old
// verification.
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
// if the object isn't moving we can just set the mark to the default
// mark and handle it specially later on.
oop(q)->init_mark();
}
// if the object isn't moving we can just set the mark to the default
// mark and handle it specially later on.
oop(q)->init_mark();
assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
}
// Update object start array
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
if (start_array)
start_array->allocate_block(compact_top);
if (start_array) {
start_array->allocate_block(compact_top);
}
VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size));
......@@ -219,19 +214,14 @@ void PSMarkSweepDecorator::precompact() {
assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
} else {
// if the object isn't moving we can just set the mark to the default
// Don't clear the mark since it's confuses parallel old
// verification.
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
// mark and handle it specially later on.
oop(q)->init_mark();
}
// mark and handle it specially later on.
oop(q)->init_mark();
assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
}
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
// Update object start array
if (start_array)
start_array->allocate_block(compact_top);
// Update object start array
if (start_array) {
start_array->allocate_block(compact_top);
}
VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));
......
......@@ -152,9 +152,7 @@ void PSOldGen::precompact() {
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
// Reset start array first.
debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
start_array()->reset();
debug_only(})
object_mark_sweep()->precompact();
......
......@@ -28,43 +28,31 @@
#include <math.h>
// All sizes are in HeapWords.
const size_t ParallelCompactData::Log2ChunkSize = 9; // 512 words
const size_t ParallelCompactData::ChunkSize = (size_t)1 << Log2ChunkSize;
const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize;
const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1;
const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1;
const size_t ParallelCompactData::ChunkAddrMask = ~ChunkAddrOffsetMask;
const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words
const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize;
const size_t ParallelCompactData::RegionSizeBytes =
RegionSize << LogHeapWordSize;
const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
// 32-bit: 128 words covers 4 bitmap words
// 64-bit: 128 words covers 2 bitmap words
const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words
const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize;
const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1;
const size_t ParallelCompactData::BlockMask = ~BlockOffsetMask;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_shift = 27;
const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::dc_shift = 27;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::los_mask = ~dc_mask;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::los_mask = ~dc_mask;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift;
const ParallelCompactData::ChunkData::chunk_sz_t
ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift;
#ifdef ASSERT
short ParallelCompactData::BlockData::_cur_phase = 0;
#endif
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift;
SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id];
bool PSParallelCompact::_print_phases = false;
......@@ -100,99 +88,12 @@ GrowableArray<HeapWord*>* PSParallelCompact::_last_gc_live_oops_moved_to = NULL;
GrowableArray<size_t> * PSParallelCompact::_last_gc_live_oops_size = NULL;
#endif
// XXX beg - verification code; only works while we also mark in object headers
static void
verify_mark_bitmap(ParMarkBitMap& _mark_bitmap)
{
ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
PSPermGen* perm_gen = heap->perm_gen();
PSOldGen* old_gen = heap->old_gen();
PSYoungGen* young_gen = heap->young_gen();
MutableSpace* perm_space = perm_gen->object_space();
MutableSpace* old_space = old_gen->object_space();
MutableSpace* eden_space = young_gen->eden_space();
MutableSpace* from_space = young_gen->from_space();
MutableSpace* to_space = young_gen->to_space();
// 'from_space' here is the survivor space at the lower address.
if (to_space->bottom() < from_space->bottom()) {
from_space = to_space;
to_space = young_gen->from_space();
}
HeapWord* boundaries[12];
unsigned int bidx = 0;
const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]);
boundaries[0] = perm_space->bottom();
boundaries[1] = perm_space->top();
boundaries[2] = old_space->bottom();
boundaries[3] = old_space->top();
boundaries[4] = eden_space->bottom();
boundaries[5] = eden_space->top();
boundaries[6] = from_space->bottom();
boundaries[7] = from_space->top();
boundaries[8] = to_space->bottom();
boundaries[9] = to_space->top();
boundaries[10] = to_space->end();
boundaries[11] = to_space->end();
BitMap::idx_t beg_bit = 0;
BitMap::idx_t end_bit;
BitMap::idx_t tmp_bit;
const BitMap::idx_t last_bit = _mark_bitmap.size();
do {
HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit);
if (_mark_bitmap.is_marked(beg_bit)) {
oop obj = (oop)addr;
assert(obj->is_gc_marked(), "obj header is not marked");
end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit);
const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit);
assert(size == (size_t)obj->size(), "end bit wrong?");
beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit);
assert(beg_bit > end_bit, "bit set in middle of an obj");
} else {
if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) {
// a dead object in the current space.
oop obj = (oop)addr;
end_bit = _mark_bitmap.addr_to_bit(addr + obj->size());
assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap");
tmp_bit = beg_bit + 1;
beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
assert(beg_bit == end_bit, "beg bit set in unmarked obj");
beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
assert(beg_bit == end_bit, "end bit set in unmarked obj");
} else if (addr < boundaries[bidx + 2]) {
// addr is between top in the current space and bottom in the next.
end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr);
tmp_bit = beg_bit;
beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
assert(beg_bit == end_bit, "beg bit set above top");
beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
assert(beg_bit == end_bit, "end bit set above top");
bidx += 2;
} else if (bidx < bidx_max - 2) {
bidx += 2; // ???
} else {
tmp_bit = beg_bit;
beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit);
assert(beg_bit == last_bit, "beg bit set outside heap");
beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit);
assert(beg_bit == last_bit, "end bit set outside heap");
}
}
} while (beg_bit < last_bit);
}
// XXX end - verification code; only works while we also mark in object headers
#ifndef PRODUCT
const char* PSParallelCompact::space_names[] = {
"perm", "old ", "eden", "from", "to "
};
void PSParallelCompact::print_chunk_ranges()
void PSParallelCompact::print_region_ranges()
{
tty->print_cr("space bottom top end new_top");
tty->print_cr("------ ---------- ---------- ---------- ----------");
......@@ -203,31 +104,31 @@ void PSParallelCompact::print_chunk_ranges()
SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " "
SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ",
id, space_names[id],
summary_data().addr_to_chunk_idx(space->bottom()),
summary_data().addr_to_chunk_idx(space->top()),
summary_data().addr_to_chunk_idx(space->end()),
summary_data().addr_to_chunk_idx(_space_info[id].new_top()));
summary_data().addr_to_region_idx(space->bottom()),
summary_data().addr_to_region_idx(space->top()),
summary_data().addr_to_region_idx(space->end()),
summary_data().addr_to_region_idx(_space_info[id].new_top()));
}
}
void
print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c)
print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c)
{
#define CHUNK_IDX_FORMAT SIZE_FORMAT_W(7)
#define CHUNK_DATA_FORMAT SIZE_FORMAT_W(5)
#define REGION_IDX_FORMAT SIZE_FORMAT_W(7)
#define REGION_DATA_FORMAT SIZE_FORMAT_W(5)
ParallelCompactData& sd = PSParallelCompact::summary_data();
size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0;
tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " "
CHUNK_IDX_FORMAT " " PTR_FORMAT " "
CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " "
CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d",
size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0;
tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " "
REGION_IDX_FORMAT " " PTR_FORMAT " "
REGION_DATA_FORMAT " " REGION_DATA_FORMAT " "
REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d",
i, c->data_location(), dci, c->destination(),
c->partial_obj_size(), c->live_obj_size(),
c->data_size(), c->source_chunk(), c->destination_count());
c->data_size(), c->source_region(), c->destination_count());
#undef CHUNK_IDX_FORMAT
#undef CHUNK_DATA_FORMAT
#undef REGION_IDX_FORMAT
#undef REGION_DATA_FORMAT
}
void
......@@ -236,14 +137,14 @@ print_generic_summary_data(ParallelCompactData& summary_data,
HeapWord* const end_addr)
{
size_t total_words = 0;
size_t i = summary_data.addr_to_chunk_idx(beg_addr);
const size_t last = summary_data.addr_to_chunk_idx(end_addr);
size_t i = summary_data.addr_to_region_idx(beg_addr);
const size_t last = summary_data.addr_to_region_idx(end_addr);
HeapWord* pdest = 0;
while (i <= last) {
ParallelCompactData::ChunkData* c = summary_data.chunk(i);
ParallelCompactData::RegionData* c = summary_data.region(i);
if (c->data_size() != 0 || c->destination() != pdest) {
print_generic_summary_chunk(i, c);
print_generic_summary_region(i, c);
total_words += c->data_size();
pdest = c->destination();
}
......@@ -265,16 +166,16 @@ print_generic_summary_data(ParallelCompactData& summary_data,
}
void
print_initial_summary_chunk(size_t i,
const ParallelCompactData::ChunkData* c,
bool newline = true)
print_initial_summary_region(size_t i,
const ParallelCompactData::RegionData* c,
bool newline = true)
{
tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " "
SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " "
SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d",
i, c->destination(),
c->partial_obj_size(), c->live_obj_size(),
c->data_size(), c->source_chunk(), c->destination_count());
c->data_size(), c->source_region(), c->destination_count());
if (newline) tty->cr();
}
......@@ -285,47 +186,48 @@ print_initial_summary_data(ParallelCompactData& summary_data,
return;
}
const size_t chunk_size = ParallelCompactData::ChunkSize;
HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top());
const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up);
const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1);
const size_t region_size = ParallelCompactData::RegionSize;
typedef ParallelCompactData::RegionData RegionData;
HeapWord* const top_aligned_up = summary_data.region_align_up(space->top());
const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up);
const RegionData* c = summary_data.region(end_region - 1);
HeapWord* end_addr = c->destination() + c->data_size();
const size_t live_in_space = pointer_delta(end_addr, space->bottom());
// Print (and count) the full chunks at the beginning of the space.
size_t full_chunk_count = 0;
size_t i = summary_data.addr_to_chunk_idx(space->bottom());
while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) {
print_initial_summary_chunk(i, summary_data.chunk(i));
++full_chunk_count;
// Print (and count) the full regions at the beginning of the space.
size_t full_region_count = 0;
size_t i = summary_data.addr_to_region_idx(space->bottom());
while (i < end_region && summary_data.region(i)->data_size() == region_size) {
print_initial_summary_region(i, summary_data.region(i));
++full_region_count;
++i;
}
size_t live_to_right = live_in_space - full_chunk_count * chunk_size;
size_t live_to_right = live_in_space - full_region_count * region_size;
double max_reclaimed_ratio = 0.0;
size_t max_reclaimed_ratio_chunk = 0;
size_t max_reclaimed_ratio_region = 0;
size_t max_dead_to_right = 0;
size_t max_live_to_right = 0;
// Print the 'reclaimed ratio' for chunks while there is something live in the
// chunk or to the right of it. The remaining chunks are empty (and
// Print the 'reclaimed ratio' for regions while there is something live in
// the region or to the right of it. The remaining regions are empty (and
// uninteresting), and computing the ratio will result in division by 0.
while (i < end_chunk && live_to_right > 0) {
c = summary_data.chunk(i);
HeapWord* const chunk_addr = summary_data.chunk_to_addr(i);
const size_t used_to_right = pointer_delta(space->top(), chunk_addr);
while (i < end_region && live_to_right > 0) {
c = summary_data.region(i);
HeapWord* const region_addr = summary_data.region_to_addr(i);
const size_t used_to_right = pointer_delta(space->top(), region_addr);
const size_t dead_to_right = used_to_right - live_to_right;
const double reclaimed_ratio = double(dead_to_right) / live_to_right;
if (reclaimed_ratio > max_reclaimed_ratio) {
max_reclaimed_ratio = reclaimed_ratio;
max_reclaimed_ratio_chunk = i;
max_reclaimed_ratio_region = i;
max_dead_to_right = dead_to_right;
max_live_to_right = live_to_right;
}
print_initial_summary_chunk(i, c, false);
print_initial_summary_region(i, c, false);
tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10),
reclaimed_ratio, dead_to_right, live_to_right);
......@@ -333,14 +235,14 @@ print_initial_summary_data(ParallelCompactData& summary_data,
++i;
}
// Any remaining chunks are empty. Print one more if there is one.
if (i < end_chunk) {
print_initial_summary_chunk(i, summary_data.chunk(i));
// Any remaining regions are empty. Print one more if there is one.
if (i < end_region) {
print_initial_summary_region(i, summary_data.region(i));
}
tty->print_cr("max: " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " "
"l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f",
max_reclaimed_ratio_chunk, max_dead_to_right,
max_reclaimed_ratio_region, max_dead_to_right,
max_live_to_right, max_reclaimed_ratio);
}
......@@ -372,13 +274,9 @@ ParallelCompactData::ParallelCompactData()
{
_region_start = 0;
_chunk_vspace = 0;
_chunk_data = 0;
_chunk_count = 0;
_block_vspace = 0;
_block_data = 0;
_block_count = 0;
_region_vspace = 0;
_region_data = 0;
_region_count = 0;
}
bool ParallelCompactData::initialize(MemRegion covered_region)
......@@ -387,18 +285,12 @@ bool ParallelCompactData::initialize(MemRegion covered_region)
const size_t region_size = covered_region.word_size();
DEBUG_ONLY(_region_end = _region_start + region_size;)
assert(chunk_align_down(_region_start) == _region_start,
assert(region_align_down(_region_start) == _region_start,
"region start not aligned");
assert((region_size & ChunkSizeOffsetMask) == 0,
"region size not a multiple of ChunkSize");
bool result = initialize_chunk_data(region_size);
assert((region_size & RegionSizeOffsetMask) == 0,
"region size not a multiple of RegionSize");
// Initialize the block data if it will be used for updating pointers, or if
// this is a debug build.
if (!UseParallelOldGCChunkPointerCalc || trueInDebug) {
result = result && initialize_block_data(region_size);
}
bool result = initialize_region_data(region_size);
return result;
}
......@@ -429,25 +321,13 @@ ParallelCompactData::create_vspace(size_t count, size_t element_size)
return 0;
}
bool ParallelCompactData::initialize_chunk_data(size_t region_size)
{
const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize;
_chunk_vspace = create_vspace(count, sizeof(ChunkData));
if (_chunk_vspace != 0) {
_chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr();
_chunk_count = count;
return true;
}
return false;
}
bool ParallelCompactData::initialize_block_data(size_t region_size)
bool ParallelCompactData::initialize_region_data(size_t region_size)
{
const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize;
_block_vspace = create_vspace(count, sizeof(BlockData));
if (_block_vspace != 0) {
_block_data = (BlockData*)_block_vspace->reserved_low_addr();
_block_count = count;
const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize;
_region_vspace = create_vspace(count, sizeof(RegionData));
if (_region_vspace != 0) {
_region_data = (RegionData*)_region_vspace->reserved_low_addr();
_region_count = count;
return true;
}
return false;
......@@ -455,38 +335,27 @@ bool ParallelCompactData::initialize_block_data(size_t region_size)
void ParallelCompactData::clear()
{
if (_block_data) {
memset(_block_data, 0, _block_vspace->committed_size());
}
memset(_chunk_data, 0, _chunk_vspace->committed_size());
memset(_region_data, 0, _region_vspace->committed_size());
}
void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) {
assert(beg_chunk <= _chunk_count, "beg_chunk out of range");
assert(end_chunk <= _chunk_count, "end_chunk out of range");
assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize");
void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
assert(beg_region <= _region_count, "beg_region out of range");
assert(end_region <= _region_count, "end_region out of range");
const size_t chunk_cnt = end_chunk - beg_chunk;
if (_block_data) {
const size_t blocks_per_chunk = ChunkSize / BlockSize;
const size_t beg_block = beg_chunk * blocks_per_chunk;
const size_t block_cnt = chunk_cnt * blocks_per_chunk;
memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
}
memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData));
const size_t region_cnt = end_region - beg_region;
memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
}
HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
{
const ChunkData* cur_cp = chunk(chunk_idx);
const ChunkData* const end_cp = chunk(chunk_count() - 1);
const RegionData* cur_cp = region(region_idx);
const RegionData* const end_cp = region(region_count() - 1);
HeapWord* result = chunk_to_addr(chunk_idx);
HeapWord* result = region_to_addr(region_idx);
if (cur_cp < end_cp) {
do {
result += cur_cp->partial_obj_size();
} while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp);
} while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp);
}
return result;
}
......@@ -494,56 +363,56 @@ HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
{
const size_t obj_ofs = pointer_delta(addr, _region_start);
const size_t beg_chunk = obj_ofs >> Log2ChunkSize;
const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize;
const size_t beg_region = obj_ofs >> Log2RegionSize;
const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;
DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)
if (beg_chunk == end_chunk) {
// All in one chunk.
_chunk_data[beg_chunk].add_live_obj(len);
if (beg_region == end_region) {
// All in one region.
_region_data[beg_region].add_live_obj(len);
return;
}
// First chunk.
const size_t beg_ofs = chunk_offset(addr);
_chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs);
// First region.
const size_t beg_ofs = region_offset(addr);
_region_data[beg_region].add_live_obj(RegionSize - beg_ofs);
klassOop klass = ((oop)addr)->klass();
// Middle chunks--completely spanned by this object.
for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) {
_chunk_data[chunk].set_partial_obj_size(ChunkSize);
_chunk_data[chunk].set_partial_obj_addr(addr);
// Middle regions--completely spanned by this object.
for (size_t region = beg_region + 1; region < end_region; ++region) {
_region_data[region].set_partial_obj_size(RegionSize);
_region_data[region].set_partial_obj_addr(addr);
}
// Last chunk.
const size_t end_ofs = chunk_offset(addr + len - 1);
_chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1);
_chunk_data[end_chunk].set_partial_obj_addr(addr);
// Last region.
const size_t end_ofs = region_offset(addr + len - 1);
_region_data[end_region].set_partial_obj_size(end_ofs + 1);
_region_data[end_region].set_partial_obj_addr(addr);
}
void
ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end)
{
assert(chunk_offset(beg) == 0, "not ChunkSize aligned");
assert(chunk_offset(end) == 0, "not ChunkSize aligned");
assert(region_offset(beg) == 0, "not RegionSize aligned");
assert(region_offset(end) == 0, "not RegionSize aligned");
size_t cur_chunk = addr_to_chunk_idx(beg);
const size_t end_chunk = addr_to_chunk_idx(end);
size_t cur_region = addr_to_region_idx(beg);
const size_t end_region = addr_to_region_idx(end);
HeapWord* addr = beg;
while (cur_chunk < end_chunk) {
_chunk_data[cur_chunk].set_destination(addr);
_chunk_data[cur_chunk].set_destination_count(0);
_chunk_data[cur_chunk].set_source_chunk(cur_chunk);
_chunk_data[cur_chunk].set_data_location(addr);
while (cur_region < end_region) {
_region_data[cur_region].set_destination(addr);
_region_data[cur_region].set_destination_count(0);
_region_data[cur_region].set_source_region(cur_region);
_region_data[cur_region].set_data_location(addr);
// Update live_obj_size so the chunk appears completely full.
size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size();
_chunk_data[cur_chunk].set_live_obj_size(live_size);
// Update live_obj_size so the region appears completely full.
size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size();
_region_data[cur_region].set_live_obj_size(live_size);
++cur_chunk;
addr += ChunkSize;
++cur_region;
addr += RegionSize;
}
}
......@@ -552,7 +421,7 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end,
HeapWord** target_next,
HeapWord** source_next) {
// This is too strict.
// assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned");
// assert(region_offset(source_beg) == 0, "not RegionSize aligned");
if (TraceParallelOldGCSummaryPhase) {
tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " "
......@@ -564,125 +433,93 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end,
source_next != 0 ? *source_next : (HeapWord*) 0);
}
size_t cur_chunk = addr_to_chunk_idx(source_beg);
const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end));
size_t cur_region = addr_to_region_idx(source_beg);
const size_t end_region = addr_to_region_idx(region_align_up(source_end));
HeapWord *dest_addr = target_beg;
while (cur_chunk < end_chunk) {
size_t words = _chunk_data[cur_chunk].data_size();
while (cur_region < end_region) {
size_t words = _region_data[cur_region].data_size();
#if 1
assert(pointer_delta(target_end, dest_addr) >= words,
"source region does not fit into target region");
#else
// XXX - need some work on the corner cases here. If the chunk does not
// fit, then must either make sure any partial_obj from the chunk fits, or
// 'undo' the initial part of the partial_obj that is in the previous chunk.
// XXX - need some work on the corner cases here. If the region does not
// fit, then must either make sure any partial_obj from the region fits, or
// "undo" the initial part of the partial_obj that is in the previous
// region.
if (dest_addr + words >= target_end) {
// Let the caller know where to continue.
*target_next = dest_addr;
*source_next = chunk_to_addr(cur_chunk);
*source_next = region_to_addr(cur_region);
return false;
}
#endif // #if 1
_chunk_data[cur_chunk].set_destination(dest_addr);
_region_data[cur_region].set_destination(dest_addr);
// Set the destination_count for cur_chunk, and if necessary, update
// source_chunk for a destination chunk. The source_chunk field is updated
// if cur_chunk is the first (left-most) chunk to be copied to a destination
// chunk.
// Set the destination_count for cur_region, and if necessary, update
// source_region for a destination region. The source_region field is
// updated if cur_region is the first (left-most) region to be copied to a
// destination region.
//
// The destination_count calculation is a bit subtle. A chunk that has data
// that compacts into itself does not count itself as a destination. This
// maintains the invariant that a zero count means the chunk is available
// and can be claimed and then filled.
// The destination_count calculation is a bit subtle. A region that has
// data that compacts into itself does not count itself as a destination.
// This maintains the invariant that a zero count means the region is
// available and can be claimed and then filled.
if (words > 0) {
HeapWord* const last_addr = dest_addr + words - 1;
const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr);
const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr);
const size_t dest_region_1 = addr_to_region_idx(dest_addr);
const size_t dest_region_2 = addr_to_region_idx(last_addr);
#if 0
// Initially assume that the destination chunks will be the same and
// Initially assume that the destination regions will be the same and
// adjust the value below if necessary. Under this assumption, if
// cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely
// into itself.
uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1;
if (dest_chunk_1 != dest_chunk_2) {
// Destination chunks differ; adjust destination_count.
// cur_region == dest_region_2, then cur_region will be compacted
// completely into itself.
uint destination_count = cur_region == dest_region_2 ? 0 : 1;
if (dest_region_1 != dest_region_2) {
// Destination regions differ; adjust destination_count.
destination_count += 1;
// Data from cur_chunk will be copied to the start of dest_chunk_2.
_chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
} else if (chunk_offset(dest_addr) == 0) {
// Data from cur_chunk will be copied to the start of the destination
// chunk.
_chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
// Data from cur_region will be copied to the start of dest_region_2.
_region_data[dest_region_2].set_source_region(cur_region);
} else if (region_offset(dest_addr) == 0) {
// Data from cur_region will be copied to the start of the destination
// region.
_region_data[dest_region_1].set_source_region(cur_region);
}
#else
// Initially assume that the destination chunks will be different and
// Initially assume that the destination regions will be different and
// adjust the value below if necessary. Under this assumption, if
// cur_chunk == dest_chunk2, then cur_chunk will be compacted partially
// into dest_chunk_1 and partially into itself.
uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2;
if (dest_chunk_1 != dest_chunk_2) {
// Data from cur_chunk will be copied to the start of dest_chunk_2.
_chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
// cur_region == dest_region2, then cur_region will be compacted partially
// into dest_region_1 and partially into itself.
uint destination_count = cur_region == dest_region_2 ? 1 : 2;
if (dest_region_1 != dest_region_2) {
// Data from cur_region will be copied to the start of dest_region_2.
_region_data[dest_region_2].set_source_region(cur_region);
} else {
// Destination chunks are the same; adjust destination_count.
// Destination regions are the same; adjust destination_count.
destination_count -= 1;
if (chunk_offset(dest_addr) == 0) {
// Data from cur_chunk will be copied to the start of the destination
// chunk.
_chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
if (region_offset(dest_addr) == 0) {
// Data from cur_region will be copied to the start of the destination
// region.
_region_data[dest_region_1].set_source_region(cur_region);
}
}
#endif // #if 0
_chunk_data[cur_chunk].set_destination_count(destination_count);
_chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk));
_region_data[cur_region].set_destination_count(destination_count);
_region_data[cur_region].set_data_location(region_to_addr(cur_region));
dest_addr += words;
}
++cur_chunk;
++cur_region;
}
*target_next = dest_addr;
return true;
}
bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) {
HeapWord* block_addr = block_to_addr(block_index);
HeapWord* block_end_addr = block_addr + BlockSize;
size_t chunk_index = addr_to_chunk_idx(block_addr);
HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index);
// An object that ends at the end of the block, ends
// in the block (the last word of the object is to
// the left of the end).
if ((block_addr < partial_obj_end_addr) &&
(partial_obj_end_addr <= block_end_addr)) {
return true;
}
return false;
}
HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
HeapWord* result = NULL;
if (UseParallelOldGCChunkPointerCalc) {
result = chunk_calc_new_pointer(addr);
} else {
result = block_calc_new_pointer(addr);
}
return result;
}
// This method is overly complicated (expensive) to be called
// for every reference.
// Try to restructure this so that a NULL is returned if
// the object is dead. But don't wast the cycles to explicitly check
// that it is dead since only live objects should be passed in.
HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
assert(addr != NULL, "Should detect NULL oop earlier");
assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
#ifdef ASSERT
......@@ -692,30 +529,30 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
#endif
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
// Chunk covering the object.
size_t chunk_index = addr_to_chunk_idx(addr);
const ChunkData* const chunk_ptr = chunk(chunk_index);
HeapWord* const chunk_addr = chunk_align_down(addr);
// Region covering the object.
size_t region_index = addr_to_region_idx(addr);
const RegionData* const region_ptr = region(region_index);
HeapWord* const region_addr = region_align_down(addr);
assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
assert(addr < region_addr + RegionSize, "Region does not cover object");
assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
HeapWord* result = chunk_ptr->destination();
HeapWord* result = region_ptr->destination();
// If all the data in the chunk is live, then the new location of the object
// can be calculated from the destination of the chunk plus the offset of the
// object in the chunk.
if (chunk_ptr->data_size() == ChunkSize) {
result += pointer_delta(addr, chunk_addr);
// If all the data in the region is live, then the new location of the object
// can be calculated from the destination of the region plus the offset of the
// object in the region.
if (region_ptr->data_size() == RegionSize) {
result += pointer_delta(addr, region_addr);
return result;
}
// The new location of the object is
// chunk destination +
// size of the partial object extending onto the chunk +
// sizes of the live objects in the Chunk that are to the left of addr
const size_t partial_obj_size = chunk_ptr->partial_obj_size();
HeapWord* const search_start = chunk_addr + partial_obj_size;
// region destination +
// size of the partial object extending onto the region +
// sizes of the live objects in the Region that are to the left of addr
const size_t partial_obj_size = region_ptr->partial_obj_size();
HeapWord* const search_start = region_addr + partial_obj_size;
const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
......@@ -725,50 +562,6 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
return result;
}
HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) {
assert(addr != NULL, "Should detect NULL oop earlier");
assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
#ifdef ASSERT
if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
}
#endif
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
// Chunk covering the object.
size_t chunk_index = addr_to_chunk_idx(addr);
const ChunkData* const chunk_ptr = chunk(chunk_index);
HeapWord* const chunk_addr = chunk_align_down(addr);
assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
HeapWord* result = chunk_ptr->destination();
// If all the data in the chunk is live, then the new location of the object
// can be calculated from the destination of the chunk plus the offset of the
// object in the chunk.
if (chunk_ptr->data_size() == ChunkSize) {
result += pointer_delta(addr, chunk_addr);
return result;
}
// The new location of the object is
// chunk destination +
// block offset +
// sizes of the live objects in the Block that are to the left of addr
const size_t block_offset = addr_to_block_ptr(addr)->offset();
HeapWord* const search_start = chunk_addr + block_offset;
const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
result += block_offset + live_to_left;
assert(result <= addr, "object cannot move to the right");
assert(result == chunk_calc_new_pointer(addr), "Should match");
return result;
}
klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
klassOop updated_klass;
if (PSParallelCompact::should_update_klass(old_klass)) {
......@@ -792,15 +585,14 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
void ParallelCompactData::verify_clear()
{
verify_clear(_chunk_vspace);
verify_clear(_block_vspace);
verify_clear(_region_vspace);
}
#endif // #ifdef ASSERT
#ifdef NOT_PRODUCT
ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) {
ParallelCompactData::RegionData* debug_region(size_t region_index) {
ParallelCompactData& sd = PSParallelCompact::summary_data();
return sd.chunk(chunk_index);
return sd.region(region_index);
}
#endif
......@@ -953,10 +745,10 @@ PSParallelCompact::clear_data_covering_space(SpaceId id)
const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top));
_mark_bitmap.clear_range(beg_bit, end_bit);
const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot);
const size_t end_chunk =
_summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top));
_summary_data.clear_range(beg_chunk, end_chunk);
const size_t beg_region = _summary_data.addr_to_region_idx(bot);
const size_t end_region =
_summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top));
_summary_data.clear_range(beg_region, end_region);
}
void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)
......@@ -1072,19 +864,19 @@ HeapWord*
PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
bool maximum_compaction)
{
const size_t chunk_size = ParallelCompactData::ChunkSize;
const size_t region_size = ParallelCompactData::RegionSize;
const ParallelCompactData& sd = summary_data();
const MutableSpace* const space = _space_info[id].space();
HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom());
const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up);
HeapWord* const top_aligned_up = sd.region_align_up(space->top());
const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom());
const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up);
// Skip full chunks at the beginning of the space--they are necessarily part
// Skip full regions at the beginning of the space--they are necessarily part
// of the dense prefix.
size_t full_count = 0;
const ChunkData* cp;
for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) {
const RegionData* cp;
for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) {
++full_count;
}
......@@ -1093,7 +885,7 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval;
if (maximum_compaction || cp == end_cp || interval_ended) {
_maximum_compaction_gc_num = total_invocations();
return sd.chunk_to_addr(cp);
return sd.region_to_addr(cp);
}
HeapWord* const new_top = _space_info[id].new_top();
......@@ -1116,52 +908,53 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
}
// XXX - Use binary search?
HeapWord* dense_prefix = sd.chunk_to_addr(cp);
const ChunkData* full_cp = cp;
const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1);
HeapWord* dense_prefix = sd.region_to_addr(cp);
const RegionData* full_cp = cp;
const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1);
while (cp < end_cp) {
HeapWord* chunk_destination = cp->destination();
const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination);
HeapWord* region_destination = cp->destination();
const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination);
if (TraceParallelOldGCDensePrefix && Verbose) {
tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " "
"dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8),
sd.chunk(cp), chunk_destination,
sd.region(cp), region_destination,
dense_prefix, cur_deadwood);
}
if (cur_deadwood >= deadwood_goal) {
// Found the chunk that has the correct amount of deadwood to the left.
// This typically occurs after crossing a fairly sparse set of chunks, so
// iterate backwards over those sparse chunks, looking for the chunk that
// has the lowest density of live objects 'to the right.'
size_t space_to_left = sd.chunk(cp) * chunk_size;
// Found the region that has the correct amount of deadwood to the left.
// This typically occurs after crossing a fairly sparse set of regions, so
// iterate backwards over those sparse regions, looking for the region
// that has the lowest density of live objects 'to the right.'
size_t space_to_left = sd.region(cp) * region_size;
size_t live_to_left = space_to_left - cur_deadwood;
size_t space_to_right = space_capacity - space_to_left;
size_t live_to_right = space_live - live_to_left;
double density_to_right = double(live_to_right) / space_to_right;
while (cp > full_cp) {
--cp;
const size_t prev_chunk_live_to_right = live_to_right - cp->data_size();
const size_t prev_chunk_space_to_right = space_to_right + chunk_size;
double prev_chunk_density_to_right =
double(prev_chunk_live_to_right) / prev_chunk_space_to_right;
if (density_to_right <= prev_chunk_density_to_right) {
const size_t prev_region_live_to_right = live_to_right -
cp->data_size();
const size_t prev_region_space_to_right = space_to_right + region_size;
double prev_region_density_to_right =
double(prev_region_live_to_right) / prev_region_space_to_right;
if (density_to_right <= prev_region_density_to_right) {
return dense_prefix;
}
if (TraceParallelOldGCDensePrefix && Verbose) {
tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f "
"pc_d2r=%10.8f", sd.chunk(cp), density_to_right,
prev_chunk_density_to_right);
"pc_d2r=%10.8f", sd.region(cp), density_to_right,
prev_region_density_to_right);
}
dense_prefix -= chunk_size;
live_to_right = prev_chunk_live_to_right;
space_to_right = prev_chunk_space_to_right;
density_to_right = prev_chunk_density_to_right;
dense_prefix -= region_size;
live_to_right = prev_region_live_to_right;
space_to_right = prev_region_space_to_right;
density_to_right = prev_region_density_to_right;
}
return dense_prefix;
}
dense_prefix += chunk_size;
dense_prefix += region_size;
++cp;
}
......@@ -1174,8 +967,8 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm,
const bool maximum_compaction,
HeapWord* const addr)
{
const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr);
ChunkData* const cp = summary_data().chunk(chunk_idx);
const size_t region_idx = summary_data().addr_to_region_idx(addr);
RegionData* const cp = summary_data().region(region_idx);
const MutableSpace* const space = _space_info[id].space();
HeapWord* const new_top = _space_info[id].new_top();
......@@ -1191,7 +984,7 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm,
"d2l=" SIZE_FORMAT " d2l%%=%6.4f "
"d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT
" ratio=%10.8f",
algorithm, addr, chunk_idx,
algorithm, addr, region_idx,
space_live,
dead_to_left, dead_to_left_pct,
dead_to_right, live_to_right,
......@@ -1253,52 +1046,52 @@ double PSParallelCompact::dead_wood_limiter(double density, size_t min_percent)
return MAX2(limit, 0.0);
}
ParallelCompactData::ChunkData*
PSParallelCompact::first_dead_space_chunk(const ChunkData* beg,
const ChunkData* end)
ParallelCompactData::RegionData*
PSParallelCompact::first_dead_space_region(const RegionData* beg,
const RegionData* end)
{
const size_t chunk_size = ParallelCompactData::ChunkSize;
const size_t region_size = ParallelCompactData::RegionSize;
ParallelCompactData& sd = summary_data();
size_t left = sd.chunk(beg);
size_t right = end > beg ? sd.chunk(end) - 1 : left;
size_t left = sd.region(beg);
size_t right = end > beg ? sd.region(end) - 1 : left;
// Binary search.
while (left < right) {
// Equivalent to (left + right) / 2, but does not overflow.
const size_t middle = left + (right - left) / 2;
ChunkData* const middle_ptr = sd.chunk(middle);
RegionData* const middle_ptr = sd.region(middle);
HeapWord* const dest = middle_ptr->destination();
HeapWord* const addr = sd.chunk_to_addr(middle);
HeapWord* const addr = sd.region_to_addr(middle);
assert(dest != NULL, "sanity");
assert(dest <= addr, "must move left");
if (middle > left && dest < addr) {
right = middle - 1;
} else if (middle < right && middle_ptr->data_size() == chunk_size) {
} else if (middle < right && middle_ptr->data_size() == region_size) {
left = middle + 1;
} else {
return middle_ptr;
}
}
return sd.chunk(left);
return sd.region(left);
}
ParallelCompactData::ChunkData*
PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
const ChunkData* end,
size_t dead_words)
ParallelCompactData::RegionData*
PSParallelCompact::dead_wood_limit_region(const RegionData* beg,
const RegionData* end,
size_t dead_words)
{
ParallelCompactData& sd = summary_data();
size_t left = sd.chunk(beg);
size_t right = end > beg ? sd.chunk(end) - 1 : left;
size_t left = sd.region(beg);
size_t right = end > beg ? sd.region(end) - 1 : left;
// Binary search.
while (left < right) {
// Equivalent to (left + right) / 2, but does not overflow.
const size_t middle = left + (right - left) / 2;
ChunkData* const middle_ptr = sd.chunk(middle);
RegionData* const middle_ptr = sd.region(middle);
HeapWord* const dest = middle_ptr->destination();
HeapWord* const addr = sd.chunk_to_addr(middle);
HeapWord* const addr = sd.region_to_addr(middle);
assert(dest != NULL, "sanity");
assert(dest <= addr, "must move left");
......@@ -1311,13 +1104,13 @@ PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
return middle_ptr;
}
}
return sd.chunk(left);
return sd.region(left);
}
// The result is valid during the summary phase, after the initial summarization
// of each space into itself, and before final summarization.
inline double
PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
PSParallelCompact::reclaimed_ratio(const RegionData* const cp,
HeapWord* const bottom,
HeapWord* const top,
HeapWord* const new_top)
......@@ -1331,12 +1124,13 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
assert(top >= new_top, "summary data problem?");
assert(new_top > bottom, "space is empty; should not be here");
assert(new_top >= cp->destination(), "sanity");
assert(top >= sd.chunk_to_addr(cp), "sanity");
assert(top >= sd.region_to_addr(cp), "sanity");
HeapWord* const destination = cp->destination();
const size_t dense_prefix_live = pointer_delta(destination, bottom);
const size_t compacted_region_live = pointer_delta(new_top, destination);
const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp));
const size_t compacted_region_used = pointer_delta(top,
sd.region_to_addr(cp));
const size_t reclaimable = compacted_region_used - compacted_region_live;
const double divisor = dense_prefix_live + 1.25 * compacted_region_live;
......@@ -1344,39 +1138,40 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
}
// Return the address of the end of the dense prefix, a.k.a. the start of the
// compacted region. The address is always on a chunk boundary.
// compacted region. The address is always on a region boundary.
//
// Completely full chunks at the left are skipped, since no compaction can occur
// in those chunks. Then the maximum amount of dead wood to allow is computed,
// based on the density (amount live / capacity) of the generation; the chunk
// with approximately that amount of dead space to the left is identified as the
// limit chunk. Chunks between the last completely full chunk and the limit
// chunk are scanned and the one that has the best (maximum) reclaimed_ratio()
// is selected.
// Completely full regions at the left are skipped, since no compaction can
// occur in those regions. Then the maximum amount of dead wood to allow is
// computed, based on the density (amount live / capacity) of the generation;
// the region with approximately that amount of dead space to the left is
// identified as the limit region. Regions between the last completely full
// region and the limit region are scanned and the one that has the best
// (maximum) reclaimed_ratio() is selected.
HeapWord*
PSParallelCompact::compute_dense_prefix(const SpaceId id,
bool maximum_compaction)
{
const size_t chunk_size = ParallelCompactData::ChunkSize;
const size_t region_size = ParallelCompactData::RegionSize;
const ParallelCompactData& sd = summary_data();
const MutableSpace* const space = _space_info[id].space();
HeapWord* const top = space->top();
HeapWord* const top_aligned_up = sd.chunk_align_up(top);
HeapWord* const top_aligned_up = sd.region_align_up(top);
HeapWord* const new_top = _space_info[id].new_top();
HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top);
HeapWord* const new_top_aligned_up = sd.region_align_up(new_top);
HeapWord* const bottom = space->bottom();
const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom);
const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up);
const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom);
const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
const RegionData* const new_top_cp =
sd.addr_to_region_ptr(new_top_aligned_up);
// Skip full chunks at the beginning of the space--they are necessarily part
// Skip full regions at the beginning of the space--they are necessarily part
// of the dense prefix.
const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp);
assert(full_cp->destination() == sd.chunk_to_addr(full_cp) ||
const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp);
assert(full_cp->destination() == sd.region_to_addr(full_cp) ||
space->is_empty(), "no dead space allowed to the left");
assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1,
"chunk must have dead space");
assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1,
"region must have dead space");
// The gc number is saved whenever a maximum compaction is done, and used to
// determine when the maximum compaction interval has expired. This avoids
......@@ -1387,7 +1182,7 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
total_invocations() == HeapFirstMaximumCompactionCount;
if (maximum_compaction || full_cp == top_cp || interval_ended) {
_maximum_compaction_gc_num = total_invocations();
return sd.chunk_to_addr(full_cp);
return sd.region_to_addr(full_cp);
}
const size_t space_live = pointer_delta(new_top, bottom);
......@@ -1413,15 +1208,15 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
dead_wood_max, dead_wood_limit);
}
// Locate the chunk with the desired amount of dead space to the left.
const ChunkData* const limit_cp =
dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit);
// Locate the region with the desired amount of dead space to the left.
const RegionData* const limit_cp =
dead_wood_limit_region(full_cp, top_cp, dead_wood_limit);
// Scan from the first chunk with dead space to the limit chunk and find the
// Scan from the first region with dead space to the limit region and find the
// one with the best (largest) reclaimed ratio.
double best_ratio = 0.0;
const ChunkData* best_cp = full_cp;
for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) {
const RegionData* best_cp = full_cp;
for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) {
double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top);
if (tmp_ratio > best_ratio) {
best_cp = cp;
......@@ -1430,18 +1225,18 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
}
#if 0
// Something to consider: if the chunk with the best ratio is 'close to' the
// first chunk w/free space, choose the first chunk with free space
// ("first-free"). The first-free chunk is usually near the start of the
// Something to consider: if the region with the best ratio is 'close to' the
// first region w/free space, choose the first region with free space
// ("first-free"). The first-free region is usually near the start of the
// heap, which means we are copying most of the heap already, so copy a bit
// more to get complete compaction.
if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) {
if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) {
_maximum_compaction_gc_num = total_invocations();
best_cp = full_cp;
}
#endif // #if 0
return sd.chunk_to_addr(best_cp);
return sd.region_to_addr(best_cp);
}
void PSParallelCompact::summarize_spaces_quick()
......@@ -1459,9 +1254,9 @@ void PSParallelCompact::summarize_spaces_quick()
void PSParallelCompact::fill_dense_prefix_end(SpaceId id)
{
HeapWord* const dense_prefix_end = dense_prefix(id);
const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end);
const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end);
const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end);
if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) {
if (dead_space_crosses_boundary(region, dense_prefix_bit)) {
// Only enough dead space is filled so that any remaining dead space to the
// left is larger than the minimum filler object. (The remainder is filled
// during the copy/update phase.)
......@@ -1552,7 +1347,7 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
fill_dense_prefix_end(id);
}
// Compute the destination of each Chunk, and thus each object.
// Compute the destination of each Region, and thus each object.
_summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end);
_summary_data.summarize(dense_prefix_end, space->end(),
dense_prefix_end, space->top(),
......@@ -1560,19 +1355,19 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
}
if (TraceParallelOldGCSummaryPhase) {
const size_t chunk_size = ParallelCompactData::ChunkSize;
const size_t region_size = ParallelCompactData::RegionSize;
HeapWord* const dense_prefix_end = _space_info[id].dense_prefix();
const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end);
const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end);
const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom());
HeapWord* const new_top = _space_info[id].new_top();
const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top);
const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top);
const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end);
tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " "
"dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
"dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
"cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT,
id, space->capacity_in_words(), dense_prefix_end,
dp_chunk, dp_words / chunk_size,
cr_words / chunk_size, new_top);
dp_region, dp_words / region_size,
cr_words / region_size, new_top);
}
}
......@@ -1584,11 +1379,6 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
// trace("2");
#ifdef ASSERT
if (VerifyParallelOldWithMarkSweep &&
(PSParallelCompact::total_invocations() %
VerifyParallelOldWithMarkSweepInterval) == 0) {
verify_mark_bitmap(_mark_bitmap);
}
if (TraceParallelOldGCMarkingPhase) {
tty->print_cr("add_obj_count=" SIZE_FORMAT " "
"add_obj_bytes=" SIZE_FORMAT,
......@@ -1605,7 +1395,7 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
if (TraceParallelOldGCSummaryPhase) {
tty->print_cr("summary_phase: after summarizing each space to self");
Universe::print();
NOT_PRODUCT(print_chunk_ranges());
NOT_PRODUCT(print_region_ranges());
if (Verbose) {
NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info));
}
......@@ -1651,14 +1441,15 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
space->bottom(), space->top(),
new_top_addr);
// Clear the source_chunk field for each chunk in the space.
// Clear the source_region field for each region in the space.
HeapWord* const new_top = _space_info[id].new_top();
HeapWord* const clear_end = _summary_data.chunk_align_up(new_top);
ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom());
ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end);
while (beg_chunk < end_chunk) {
beg_chunk->set_source_chunk(0);
++beg_chunk;
HeapWord* const clear_end = _summary_data.region_align_up(new_top);
RegionData* beg_region =
_summary_data.addr_to_region_ptr(space->bottom());
RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end);
while (beg_region < end_region) {
beg_region->set_source_region(0);
++beg_region;
}
// Reset the new_top value for the space.
......@@ -1666,243 +1457,16 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
}
}
// Fill in the block data after any changes to the chunks have
// been made.
#ifdef ASSERT
summarize_blocks(cm, perm_space_id);
summarize_blocks(cm, old_space_id);
#else
if (!UseParallelOldGCChunkPointerCalc) {
summarize_blocks(cm, perm_space_id);
summarize_blocks(cm, old_space_id);
}
#endif
if (TraceParallelOldGCSummaryPhase) {
tty->print_cr("summary_phase: after final summarization");
Universe::print();
NOT_PRODUCT(print_chunk_ranges());
NOT_PRODUCT(print_region_ranges());
if (Verbose) {
NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info));
}
}
}
// Fill in the BlockData.
// Iterate over the spaces and within each space iterate over
// the chunks and fill in the BlockData for each chunk.
void PSParallelCompact::summarize_blocks(ParCompactionManager* cm,
SpaceId first_compaction_space_id) {
#if 0
DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);)
for (SpaceId cur_space_id = first_compaction_space_id;
cur_space_id != last_space_id;
cur_space_id = next_compaction_space_id(cur_space_id)) {
// Iterate over the chunks in the space
size_t start_chunk_index =
_summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom());
BitBlockUpdateClosure bbu(mark_bitmap(),
cm,
start_chunk_index);
// Iterate over blocks.
for (size_t chunk_index = start_chunk_index;
chunk_index < _summary_data.chunk_count() &&
_summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top();
chunk_index++) {
// Reset the closure for the new chunk. Note that the closure
// maintains some data that does not get reset for each chunk
// so a new instance of the closure is no appropriate.
bbu.reset_chunk(chunk_index);
// Start the iteration with the first live object. This
// may return the end of the chunk. That is acceptable since
// it will properly limit the iterations.
ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit(
_summary_data.first_live_or_end_in_chunk(chunk_index));
// End the iteration at the end of the chunk.
HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index);
HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize;
ParMarkBitMap::idx_t right_offset =
mark_bitmap()->addr_to_bit(chunk_end);
// Blocks that have not objects starting in them can be
// skipped because their data will never be used.
if (left_offset < right_offset) {
// Iterate through the objects in the chunk.
ParMarkBitMap::idx_t last_offset =
mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset);
// If last_offset is less than right_offset, then the iterations
// terminated while it was looking for an end bit. "last_offset"
// is then the offset for the last start bit. In this situation
// the "offset" field for the next block to the right (_cur_block + 1)
// will not have been update although there may be live data
// to the left of the chunk.
size_t cur_block_plus_1 = bbu.cur_block() + 1;
HeapWord* cur_block_plus_1_addr =
_summary_data.block_to_addr(bbu.cur_block()) +
ParallelCompactData::BlockSize;
HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset);
#if 1 // This code works. The else doesn't but should. Why does it?
// The current block (cur_block()) has already been updated.
// The last block that may need to be updated is either the
// next block (current block + 1) or the block where the
// last object starts (which can be greater than the
// next block if there were no objects found in intervening
// blocks).
size_t last_block =
MAX2(bbu.cur_block() + 1,
_summary_data.addr_to_block_idx(last_offset_addr));
#else
// The current block has already been updated. The only block
// that remains to be updated is the block where the last
// object in the chunk starts.
size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr);
#endif
assert_bit_is_start(last_offset);
assert((last_block == _summary_data.block_count()) ||
(_summary_data.block(last_block)->raw_offset() == 0),
"Should not have been set");
// Is the last block still in the current chunk? If still
// in this chunk, update the last block (the counting that
// included the current block is meant for the offset of the last
// block). If not in this chunk, do nothing. Should not
// update a block in the next chunk.
if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(),
last_block)) {
if (last_offset < right_offset) {
// The last object started in this chunk but ends beyond
// this chunk. Update the block for this last object.
assert(mark_bitmap()->is_marked(last_offset), "Should be marked");
// No end bit was found. The closure takes care of
// the cases where
// an objects crosses over into the next block
// an objects starts and ends in the next block
// It does not handle the case where an object is
// the first object in a later block and extends
// past the end of the chunk (i.e., the closure
// only handles complete objects that are in the range
// it is given). That object is handed back here
// for any special consideration necessary.
//
// Is the first bit in the last block a start or end bit?
//
// If the partial object ends in the last block L,
// then the 1st bit in L may be an end bit.
//
// Else does the last object start in a block after the current
// block? A block AA will already have been updated if an
// object ends in the next block AA+1. An object found to end in
// the AA+1 is the trigger that updates AA. Objects are being
// counted in the current block for updaing a following
// block. An object may start in later block
// block but may extend beyond the last block in the chunk.
// Updates are only done when the end of an object has been
// found. If the last object (covered by block L) starts
// beyond the current block, then no object ends in L (otherwise
// L would be the current block). So the first bit in L is
// a start bit.
//
// Else the last objects start in the current block and ends
// beyond the chunk. The current block has already been
// updated and there is no later block (with an object
// starting in it) that needs to be updated.
//
if (_summary_data.partial_obj_ends_in_block(last_block)) {
_summary_data.block(last_block)->set_end_bit_offset(
bbu.live_data_left());
} else if (last_offset_addr >= cur_block_plus_1_addr) {
// The start of the object is on a later block
// (to the right of the current block and there are no
// complete live objects to the left of this last object
// within the chunk.
// The first bit in the block is for the start of the
// last object.
_summary_data.block(last_block)->set_start_bit_offset(
bbu.live_data_left());
} else {
// The start of the last object was found in
// the current chunk (which has already
// been updated).
assert(bbu.cur_block() ==
_summary_data.addr_to_block_idx(last_offset_addr),
"Should be a block already processed");
}
#ifdef ASSERT
// Is there enough block information to find this object?
// The destination of the chunk has not been set so the
// values returned by calc_new_pointer() and
// block_calc_new_pointer() will only be
// offsets. But they should agree.
HeapWord* moved_obj_with_chunks =
_summary_data.chunk_calc_new_pointer(last_offset_addr);
HeapWord* moved_obj_with_blocks =
_summary_data.calc_new_pointer(last_offset_addr);
assert(moved_obj_with_chunks == moved_obj_with_blocks,
"Block calculation is wrong");
#endif
} else if (last_block < _summary_data.block_count()) {
// Iterations ended looking for a start bit (but
// did not run off the end of the block table).
_summary_data.block(last_block)->set_start_bit_offset(
bbu.live_data_left());
}
}
#ifdef ASSERT
// Is there enough block information to find this object?
HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset);
HeapWord* moved_obj_with_chunks =
_summary_data.calc_new_pointer(left_offset_addr);
HeapWord* moved_obj_with_blocks =
_summary_data.calc_new_pointer(left_offset_addr);
assert(moved_obj_with_chunks == moved_obj_with_blocks,
"Block calculation is wrong");
#endif
// Is there another block after the end of this chunk?
#ifdef ASSERT
if (last_block < _summary_data.block_count()) {
// No object may have been found in a block. If that
// block is at the end of the chunk, the iteration will
// terminate without incrementing the current block so
// that the current block is not the last block in the
// chunk. That situation precludes asserting that the
// current block is the last block in the chunk. Assert
// the lesser condition that the current block does not
// exceed the chunk.
assert(_summary_data.block_to_addr(last_block) <=
(_summary_data.chunk_to_addr(chunk_index) +
ParallelCompactData::ChunkSize),
"Chunk and block inconsistency");
assert(last_offset <= right_offset, "Iteration over ran end");
}
#endif
}
#ifdef ASSERT
if (PrintGCDetails && Verbose) {
if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) {
size_t first_block =
chunk_index / ParallelCompactData::BlocksPerChunk;
gclog_or_tty->print_cr("first_block " PTR_FORMAT
" _offset " PTR_FORMAT
"_first_is_start_bit %d",
first_block,
_summary_data.block(first_block)->raw_offset(),
_summary_data.block(first_block)->first_is_start_bit());
}
}
#endif
}
}
DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);)
#endif // #if 0
}
// This method should contain all heap-specific policy for invoking a full
// collection. invoke_no_policy() will only attempt to compact the heap; it
// will do nothing further. If we need to bail out for policy reasons, scavenge
......@@ -1937,18 +1501,9 @@ void PSParallelCompact::invoke(bool maximum_heap_compaction) {
}
}
bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) {
size_t addr_chunk_index = addr_to_chunk_idx(addr);
return chunk_index == addr_chunk_index;
}
bool ParallelCompactData::chunk_contains_block(size_t chunk_index,
size_t block_index) {
size_t first_block_in_chunk = chunk_index * BlocksPerChunk;
size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1;
return (first_block_in_chunk <= block_index) &&
(block_index <= last_block_in_chunk);
bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
size_t addr_region_index = addr_to_region_idx(addr);
return region_index == addr_region_index;
}
// This method contains no policy. You should probably
......@@ -2038,39 +1593,9 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
}
#endif // #ifndef PRODUCT
#ifdef ASSERT
if (VerifyParallelOldWithMarkSweep &&
(PSParallelCompact::total_invocations() %
VerifyParallelOldWithMarkSweepInterval) == 0) {
gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()");
if (PrintGCDetails && Verbose) {
gclog_or_tty->print_cr("mark_sweep_phase1:");
}
// Clear the discovered lists so that discovered objects
// don't look like they have been discovered twice.
ref_processor()->clear_discovered_references();
PSMarkSweep::allocate_stacks();
MemRegion mr = Universe::heap()->reserved_region();
PSMarkSweep::ref_processor()->enable_discovery();
PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction);
}
#endif
bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);
#ifdef ASSERT
if (VerifyParallelOldWithMarkSweep &&
(PSParallelCompact::total_invocations() %
VerifyParallelOldWithMarkSweepInterval) == 0) {
if (PrintGCDetails && Verbose) {
gclog_or_tty->print_cr("mark_sweep_phase2:");
}
PSMarkSweep::mark_sweep_phase2();
}
#endif
COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity"));
COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
......@@ -2078,28 +1603,6 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
// needed by the compaction for filling holes in the dense prefix.
adjust_roots();
#ifdef ASSERT
if (VerifyParallelOldWithMarkSweep &&
(PSParallelCompact::total_invocations() %
VerifyParallelOldWithMarkSweepInterval) == 0) {
// Do a separate verify phase so that the verify
// code can use the the forwarding pointers to
// check the new pointer calculation. The restore_marks()
// has to be done before the real compact.
vmthread_cm->set_action(ParCompactionManager::VerifyUpdate);
compact_perm(vmthread_cm);
compact_serial(vmthread_cm);
vmthread_cm->set_action(ParCompactionManager::ResetObjects);
compact_perm(vmthread_cm);
compact_serial(vmthread_cm);
vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy);
// For debugging only
PSMarkSweep::restore_marks();
PSMarkSweep::deallocate_stacks();
}
#endif
compaction_start.update();
// Does the perm gen always have to be done serially because
// klasses are used in the update of an object?
......@@ -2349,7 +1852,7 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm,
ParallelScavengeHeap* heap = gc_heap();
uint parallel_gc_threads = heap->gc_task_manager()->workers();
TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
TaskQueueSetSuper* qset = ParCompactionManager::region_array();
ParallelTaskTerminator terminator(parallel_gc_threads, qset);
PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
......@@ -2487,8 +1990,9 @@ void PSParallelCompact::compact_perm(ParCompactionManager* cm) {
move_and_update(cm, perm_space_id);
}
void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
uint parallel_gc_threads) {
void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
uint parallel_gc_threads)
{
TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
......@@ -2496,13 +2000,13 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
q->enqueue(new DrainStacksCompactionTask());
}
// Find all chunks that are available (can be filled immediately) and
// Find all regions that are available (can be filled immediately) and
// distribute them to the thread stacks. The iteration is done in reverse
// order (high to low) so the chunks will be removed in ascending order.
// order (high to low) so the regions will be removed in ascending order.
const ParallelCompactData& sd = PSParallelCompact::summary_data();
size_t fillable_chunks = 0; // A count for diagnostic purposes.
size_t fillable_regions = 0; // A count for diagnostic purposes.
unsigned int which = 0; // The worker thread number.
for (unsigned int id = to_space_id; id > perm_space_id; --id) {
......@@ -2510,25 +2014,26 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
MutableSpace* const space = space_info->space();
HeapWord* const new_top = space_info->new_top();
const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix());
const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top));
assert(end_chunk > 0, "perm gen cannot be empty");
const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix());
const size_t end_region =
sd.addr_to_region_idx(sd.region_align_up(new_top));
assert(end_region > 0, "perm gen cannot be empty");
for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) {
if (sd.chunk(cur)->claim_unsafe()) {
for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
if (sd.region(cur)->claim_unsafe()) {
ParCompactionManager* cm = ParCompactionManager::manager_array(which);
cm->save_for_processing(cur);
if (TraceParallelOldGCCompactionPhase && Verbose) {
const size_t count_mod_8 = fillable_chunks & 7;
const size_t count_mod_8 = fillable_regions & 7;
if (count_mod_8 == 0) gclog_or_tty->print("fillable: ");
gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur);
if (count_mod_8 == 7) gclog_or_tty->cr();
}
NOT_PRODUCT(++fillable_chunks;)
NOT_PRODUCT(++fillable_regions;)
// Assign chunks to threads in round-robin fashion.
// Assign regions to threads in round-robin fashion.
if (++which == task_count) {
which = 0;
}
......@@ -2537,8 +2042,8 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
}
if (TraceParallelOldGCCompactionPhase) {
if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr();
gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks);
if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr();
gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions);
}
}
......@@ -2551,7 +2056,7 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
ParallelCompactData& sd = PSParallelCompact::summary_data();
// Iterate over all the spaces adding tasks for updating
// chunks in the dense prefix. Assume that 1 gc thread
// regions in the dense prefix. Assume that 1 gc thread
// will work on opening the gaps and the remaining gc threads
// will work on the dense prefix.
SpaceId space_id = old_space_id;
......@@ -2565,30 +2070,31 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
continue;
}
// The dense prefix is before this chunk.
size_t chunk_index_end_dense_prefix =
sd.addr_to_chunk_idx(dense_prefix_end);
ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix);
// The dense prefix is before this region.
size_t region_index_end_dense_prefix =
sd.addr_to_region_idx(dense_prefix_end);
RegionData* const dense_prefix_cp =
sd.region(region_index_end_dense_prefix);
assert(dense_prefix_end == space->end() ||
dense_prefix_cp->available() ||
dense_prefix_cp->claimed(),
"The chunk after the dense prefix should always be ready to fill");
"The region after the dense prefix should always be ready to fill");
size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom());
size_t region_index_start = sd.addr_to_region_idx(space->bottom());
// Is there dense prefix work?
size_t total_dense_prefix_chunks =
chunk_index_end_dense_prefix - chunk_index_start;
// How many chunks of the dense prefix should be given to
size_t total_dense_prefix_regions =
region_index_end_dense_prefix - region_index_start;
// How many regions of the dense prefix should be given to
// each thread?
if (total_dense_prefix_chunks > 0) {
if (total_dense_prefix_regions > 0) {
uint tasks_for_dense_prefix = 1;
if (UseParallelDensePrefixUpdate) {
if (total_dense_prefix_chunks <=
if (total_dense_prefix_regions <=
(parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
// Don't over partition. This assumes that
// PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
// so there are not many chunks to process.
// so there are not many regions to process.
tasks_for_dense_prefix = parallel_gc_threads;
} else {
// Over partition
......@@ -2596,50 +2102,50 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
}
}
size_t chunks_per_thread = total_dense_prefix_chunks /
size_t regions_per_thread = total_dense_prefix_regions /
tasks_for_dense_prefix;
// Give each thread at least 1 chunk.
if (chunks_per_thread == 0) {
chunks_per_thread = 1;
// Give each thread at least 1 region.
if (regions_per_thread == 0) {
regions_per_thread = 1;
}
for (uint k = 0; k < tasks_for_dense_prefix; k++) {
if (chunk_index_start >= chunk_index_end_dense_prefix) {
if (region_index_start >= region_index_end_dense_prefix) {
break;
}
// chunk_index_end is not processed
size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread,
chunk_index_end_dense_prefix);
// region_index_end is not processed
size_t region_index_end = MIN2(region_index_start + regions_per_thread,
region_index_end_dense_prefix);
q->enqueue(new UpdateDensePrefixTask(
space_id,
chunk_index_start,
chunk_index_end));
chunk_index_start = chunk_index_end;
region_index_start,
region_index_end));
region_index_start = region_index_end;
}
}
// This gets any part of the dense prefix that did not
// fit evenly.
if (chunk_index_start < chunk_index_end_dense_prefix) {
if (region_index_start < region_index_end_dense_prefix) {
q->enqueue(new UpdateDensePrefixTask(
space_id,
chunk_index_start,
chunk_index_end_dense_prefix));
region_index_start,
region_index_end_dense_prefix));
}
space_id = next_compaction_space_id(space_id);
} // End tasks for dense prefix
}
void PSParallelCompact::enqueue_chunk_stealing_tasks(
void PSParallelCompact::enqueue_region_stealing_tasks(
GCTaskQueue* q,
ParallelTaskTerminator* terminator_ptr,
uint parallel_gc_threads) {
TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty);
// Once a thread has drained it's stack, it should try to steal chunks from
// Once a thread has drained it's stack, it should try to steal regions from
// other threads.
if (parallel_gc_threads > 1) {
for (uint j = 0; j < parallel_gc_threads; j++) {
q->enqueue(new StealChunkCompactionTask(terminator_ptr));
q->enqueue(new StealRegionCompactionTask(terminator_ptr));
}
}
}
......@@ -2654,13 +2160,13 @@ void PSParallelCompact::compact() {
PSOldGen* old_gen = heap->old_gen();
old_gen->start_array()->reset();
uint parallel_gc_threads = heap->gc_task_manager()->workers();
TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
TaskQueueSetSuper* qset = ParCompactionManager::region_array();
ParallelTaskTerminator terminator(parallel_gc_threads, qset);
GCTaskQueue* q = GCTaskQueue::create();
enqueue_chunk_draining_tasks(q, parallel_gc_threads);
enqueue_region_draining_tasks(q, parallel_gc_threads);
enqueue_dense_prefix_tasks(q, parallel_gc_threads);
enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads);
enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);
{
TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
......@@ -2676,9 +2182,9 @@ void PSParallelCompact::compact() {
WaitForBarrierGCTask::destroy(fin);
#ifdef ASSERT
// Verify that all chunks have been processed before the deferred updates.
// Verify that all regions have been processed before the deferred updates.
// Note that perm_space_id is skipped; this type of verification is not
// valid until the perm gen is compacted by chunks.
// valid until the perm gen is compacted by regions.
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
verify_complete(SpaceId(id));
}
......@@ -2697,42 +2203,42 @@ void PSParallelCompact::compact() {
#ifdef ASSERT
void PSParallelCompact::verify_complete(SpaceId space_id) {
// All Chunks between space bottom() to new_top() should be marked as filled
// and all Chunks between new_top() and top() should be available (i.e.,
// All Regions between space bottom() to new_top() should be marked as filled
// and all Regions between new_top() and top() should be available (i.e.,
// should have been emptied).
ParallelCompactData& sd = summary_data();
SpaceInfo si = _space_info[space_id];
HeapWord* new_top_addr = sd.chunk_align_up(si.new_top());
HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top());
const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom());
const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr);
const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr);
HeapWord* new_top_addr = sd.region_align_up(si.new_top());
HeapWord* old_top_addr = sd.region_align_up(si.space()->top());
const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom());
const size_t new_top_region = sd.addr_to_region_idx(new_top_addr);
const size_t old_top_region = sd.addr_to_region_idx(old_top_addr);
bool issued_a_warning = false;
size_t cur_chunk;
for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) {
const ChunkData* const c = sd.chunk(cur_chunk);
size_t cur_region;
for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) {
const RegionData* const c = sd.region(cur_region);
if (!c->completed()) {
warning("chunk " SIZE_FORMAT " not filled: "
warning("region " SIZE_FORMAT " not filled: "
"destination_count=" SIZE_FORMAT,
cur_chunk, c->destination_count());
cur_region, c->destination_count());
issued_a_warning = true;
}
}
for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) {
const ChunkData* const c = sd.chunk(cur_chunk);
for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) {
const RegionData* const c = sd.region(cur_region);
if (!c->available()) {
warning("chunk " SIZE_FORMAT " not empty: "
warning("region " SIZE_FORMAT " not empty: "
"destination_count=" SIZE_FORMAT,
cur_chunk, c->destination_count());
cur_region, c->destination_count());
issued_a_warning = true;
}
}
if (issued_a_warning) {
print_chunk_ranges();
print_region_ranges();
}
}
#endif // #ifdef ASSERT
......@@ -2933,46 +2439,47 @@ void PSParallelCompact::print_new_location_of_heap_address(HeapWord* q) {
}
#endif //VALIDATE_MARK_SWEEP
// Update interior oops in the ranges of chunks [beg_chunk, end_chunk).
// Update interior oops in the ranges of regions [beg_region, end_region).
void
PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
SpaceId space_id,
size_t beg_chunk,
size_t end_chunk) {
size_t beg_region,
size_t end_region) {
ParallelCompactData& sd = summary_data();
ParMarkBitMap* const mbm = mark_bitmap();
HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk);
HeapWord* const end_addr = sd.chunk_to_addr(end_chunk);
assert(beg_chunk <= end_chunk, "bad chunk range");
HeapWord* beg_addr = sd.region_to_addr(beg_region);
HeapWord* const end_addr = sd.region_to_addr(end_region);
assert(beg_region <= end_region, "bad region range");
assert(end_addr <= dense_prefix(space_id), "not in the dense prefix");
#ifdef ASSERT
// Claim the chunks to avoid triggering an assert when they are marked as
// Claim the regions to avoid triggering an assert when they are marked as
// filled.
for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) {
assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed");
for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) {
assert(sd.region(claim_region)->claim_unsafe(), "claim() failed");
}
#endif // #ifdef ASSERT
if (beg_addr != space(space_id)->bottom()) {
// Find the first live object or block of dead space that *starts* in this
// range of chunks. If a partial object crosses onto the chunk, skip it; it
// will be marked for 'deferred update' when the object head is processed.
// If dead space crosses onto the chunk, it is also skipped; it will be
// filled when the prior chunk is processed. If neither of those apply, the
// first word in the chunk is the start of a live object or dead space.
// range of regions. If a partial object crosses onto the region, skip it;
// it will be marked for 'deferred update' when the object head is
// processed. If dead space crosses onto the region, it is also skipped; it
// will be filled when the prior region is processed. If neither of those
// apply, the first word in the region is the start of a live object or dead
// space.
assert(beg_addr > space(space_id)->bottom(), "sanity");
const ChunkData* const cp = sd.chunk(beg_chunk);
const RegionData* const cp = sd.region(beg_region);
if (cp->partial_obj_size() != 0) {
beg_addr = sd.partial_obj_end(beg_chunk);
beg_addr = sd.partial_obj_end(beg_region);
} else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) {
beg_addr = mbm->find_obj_beg(beg_addr, end_addr);
}
}
if (beg_addr < end_addr) {
// A live object or block of dead space starts in this range of Chunks.
// A live object or block of dead space starts in this range of Regions.
HeapWord* const dense_prefix_end = dense_prefix(space_id);
// Create closures and iterate.
......@@ -2986,10 +2493,10 @@ PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
}
}
// Mark the chunks as filled.
ChunkData* const beg_cp = sd.chunk(beg_chunk);
ChunkData* const end_cp = sd.chunk(end_chunk);
for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) {
// Mark the regions as filled.
RegionData* const beg_cp = sd.region(beg_region);
RegionData* const end_cp = sd.region(end_region);
for (RegionData* cp = beg_cp; cp < end_cp; ++cp) {
cp->set_completed();
}
}
......@@ -3021,13 +2528,13 @@ void PSParallelCompact::update_deferred_objects(ParCompactionManager* cm,
const MutableSpace* const space = space_info->space();
assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set");
HeapWord* const beg_addr = space_info->dense_prefix();
HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top());
HeapWord* const end_addr = sd.region_align_up(space_info->new_top());
const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr);
const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr);
const ChunkData* cur_chunk;
for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) {
HeapWord* const addr = cur_chunk->deferred_obj_addr();
const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr);
const RegionData* const end_region = sd.addr_to_region_ptr(end_addr);
const RegionData* cur_region;
for (cur_region = beg_region; cur_region < end_region; ++cur_region) {
HeapWord* const addr = cur_region->deferred_obj_addr();
if (addr != NULL) {
if (start_array != NULL) {
start_array->allocate_block(addr);
......@@ -3073,45 +2580,45 @@ PSParallelCompact::skip_live_words(HeapWord* beg, HeapWord* end, size_t count)
HeapWord*
PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
size_t src_chunk_idx)
size_t src_region_idx)
{
ParMarkBitMap* const bitmap = mark_bitmap();
const ParallelCompactData& sd = summary_data();
const size_t ChunkSize = ParallelCompactData::ChunkSize;
const size_t RegionSize = ParallelCompactData::RegionSize;
assert(sd.is_chunk_aligned(dest_addr), "not aligned");
assert(sd.is_region_aligned(dest_addr), "not aligned");
const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx);
const size_t partial_obj_size = src_chunk_ptr->partial_obj_size();
HeapWord* const src_chunk_destination = src_chunk_ptr->destination();
const RegionData* const src_region_ptr = sd.region(src_region_idx);
const size_t partial_obj_size = src_region_ptr->partial_obj_size();
HeapWord* const src_region_destination = src_region_ptr->destination();
assert(dest_addr >= src_chunk_destination, "wrong src chunk");
assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty");
assert(dest_addr >= src_region_destination, "wrong src region");
assert(src_region_ptr->data_size() > 0, "src region cannot be empty");
HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx);
HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize;
HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx);
HeapWord* const src_region_end = src_region_beg + RegionSize;
HeapWord* addr = src_chunk_beg;
if (dest_addr == src_chunk_destination) {
// Return the first live word in the source chunk.
HeapWord* addr = src_region_beg;
if (dest_addr == src_region_destination) {
// Return the first live word in the source region.
if (partial_obj_size == 0) {
addr = bitmap->find_obj_beg(addr, src_chunk_end);
assert(addr < src_chunk_end, "no objects start in src chunk");
addr = bitmap->find_obj_beg(addr, src_region_end);
assert(addr < src_region_end, "no objects start in src region");
}
return addr;
}
// Must skip some live data.
size_t words_to_skip = dest_addr - src_chunk_destination;
assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk");
size_t words_to_skip = dest_addr - src_region_destination;
assert(src_region_ptr->data_size() > words_to_skip, "wrong src region");
if (partial_obj_size >= words_to_skip) {
// All the live words to skip are part of the partial object.
addr += words_to_skip;
if (partial_obj_size == words_to_skip) {
// Find the first live word past the partial object.
addr = bitmap->find_obj_beg(addr, src_chunk_end);
assert(addr < src_chunk_end, "wrong src chunk");
addr = bitmap->find_obj_beg(addr, src_region_end);
assert(addr < src_region_end, "wrong src region");
}
return addr;
}
......@@ -3122,63 +2629,64 @@ PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
addr += partial_obj_size;
}
// Skip over live words due to objects that start in the chunk.
addr = skip_live_words(addr, src_chunk_end, words_to_skip);
assert(addr < src_chunk_end, "wrong src chunk");
// Skip over live words due to objects that start in the region.
addr = skip_live_words(addr, src_region_end, words_to_skip);
assert(addr < src_region_end, "wrong src region");
return addr;
}
void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm,
size_t beg_chunk,
size_t beg_region,
HeapWord* end_addr)
{
ParallelCompactData& sd = summary_data();
ChunkData* const beg = sd.chunk(beg_chunk);
HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr);
ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up);
size_t cur_idx = beg_chunk;
for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) {
assert(cur->data_size() > 0, "chunk must have live data");
RegionData* const beg = sd.region(beg_region);
HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr);
RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up);
size_t cur_idx = beg_region;
for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) {
assert(cur->data_size() > 0, "region must have live data");
cur->decrement_destination_count();
if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) {
if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) {
cm->save_for_processing(cur_idx);
}
}
}
size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
SpaceId& src_space_id,
HeapWord*& src_space_top,
HeapWord* end_addr)
size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure,
SpaceId& src_space_id,
HeapWord*& src_space_top,
HeapWord* end_addr)
{
typedef ParallelCompactData::ChunkData ChunkData;
typedef ParallelCompactData::RegionData RegionData;
ParallelCompactData& sd = PSParallelCompact::summary_data();
const size_t chunk_size = ParallelCompactData::ChunkSize;
size_t src_chunk_idx = 0;
// Skip empty chunks (if any) up to the top of the space.
HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr);
ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up);
HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top);
const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up);
while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) {
++src_chunk_ptr;
}
if (src_chunk_ptr < top_chunk_ptr) {
// The next source chunk is in the current space. Update src_chunk_idx and
// the source address to match src_chunk_ptr.
src_chunk_idx = sd.chunk(src_chunk_ptr);
HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx);
if (src_chunk_addr > closure.source()) {
closure.set_source(src_chunk_addr);
const size_t region_size = ParallelCompactData::RegionSize;
size_t src_region_idx = 0;
// Skip empty regions (if any) up to the top of the space.
HeapWord* const src_aligned_up = sd.region_align_up(end_addr);
RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up);
HeapWord* const top_aligned_up = sd.region_align_up(src_space_top);
const RegionData* const top_region_ptr =
sd.addr_to_region_ptr(top_aligned_up);
while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) {
++src_region_ptr;
}
if (src_region_ptr < top_region_ptr) {
// The next source region is in the current space. Update src_region_idx
// and the source address to match src_region_ptr.
src_region_idx = sd.region(src_region_ptr);
HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx);
if (src_region_addr > closure.source()) {
closure.set_source(src_region_addr);
}
return src_chunk_idx;
return src_region_idx;
}
// Switch to a new source space and find the first non-empty chunk.
// Switch to a new source space and find the first non-empty region.
unsigned int space_id = src_space_id + 1;
assert(space_id < last_space_id, "not enough spaces");
......@@ -3187,14 +2695,14 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
do {
MutableSpace* space = _space_info[space_id].space();
HeapWord* const bottom = space->bottom();
const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom);
const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom);
// Iterate over the spaces that do not compact into themselves.
if (bottom_cp->destination() != bottom) {
HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
HeapWord* const top_aligned_up = sd.region_align_up(space->top());
const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
if (src_cp->live_obj_size() > 0) {
// Found it.
assert(src_cp->destination() == destination,
......@@ -3204,9 +2712,9 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
src_space_id = SpaceId(space_id);
src_space_top = space->top();
const size_t src_chunk_idx = sd.chunk(src_cp);
closure.set_source(sd.chunk_to_addr(src_chunk_idx));
return src_chunk_idx;
const size_t src_region_idx = sd.region(src_cp);
closure.set_source(sd.region_to_addr(src_region_idx));
return src_region_idx;
} else {
assert(src_cp->data_size() == 0, "sanity");
}
......@@ -3214,38 +2722,38 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
}
} while (++space_id < last_space_id);
assert(false, "no source chunk was found");
assert(false, "no source region was found");
return 0;
}
void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
{
typedef ParMarkBitMap::IterationStatus IterationStatus;
const size_t ChunkSize = ParallelCompactData::ChunkSize;
const size_t RegionSize = ParallelCompactData::RegionSize;
ParMarkBitMap* const bitmap = mark_bitmap();
ParallelCompactData& sd = summary_data();
ChunkData* const chunk_ptr = sd.chunk(chunk_idx);
RegionData* const region_ptr = sd.region(region_idx);
// Get the items needed to construct the closure.
HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx);
HeapWord* dest_addr = sd.region_to_addr(region_idx);
SpaceId dest_space_id = space_id(dest_addr);
ObjectStartArray* start_array = _space_info[dest_space_id].start_array();
HeapWord* new_top = _space_info[dest_space_id].new_top();
assert(dest_addr < new_top, "sanity");
const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize);
const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize);
// Get the source chunk and related info.
size_t src_chunk_idx = chunk_ptr->source_chunk();
SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx));
// Get the source region and related info.
size_t src_region_idx = region_ptr->source_region();
SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx));
HeapWord* src_space_top = _space_info[src_space_id].space()->top();
MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words);
closure.set_source(first_src_addr(dest_addr, src_chunk_idx));
closure.set_source(first_src_addr(dest_addr, src_region_idx));
// Adjust src_chunk_idx to prepare for decrementing destination counts (the
// destination count is not decremented when a chunk is copied to itself).
if (src_chunk_idx == chunk_idx) {
src_chunk_idx += 1;
// Adjust src_region_idx to prepare for decrementing destination counts (the
// destination count is not decremented when a region is copied to itself).
if (src_region_idx == region_idx) {
src_region_idx += 1;
}
if (bitmap->is_unmarked(closure.source())) {
......@@ -3255,32 +2763,33 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
HeapWord* const old_src_addr = closure.source();
closure.copy_partial_obj();
if (closure.is_full()) {
decrement_destination_counts(cm, src_chunk_idx, closure.source());
chunk_ptr->set_deferred_obj_addr(NULL);
chunk_ptr->set_completed();
decrement_destination_counts(cm, src_region_idx, closure.source());
region_ptr->set_deferred_obj_addr(NULL);
region_ptr->set_completed();
return;
}
HeapWord* const end_addr = sd.chunk_align_down(closure.source());
if (sd.chunk_align_down(old_src_addr) != end_addr) {
// The partial object was copied from more than one source chunk.
decrement_destination_counts(cm, src_chunk_idx, end_addr);
HeapWord* const end_addr = sd.region_align_down(closure.source());
if (sd.region_align_down(old_src_addr) != end_addr) {
// The partial object was copied from more than one source region.
decrement_destination_counts(cm, src_region_idx, end_addr);
// Move to the next source chunk, possibly switching spaces as well. All
// Move to the next source region, possibly switching spaces as well. All
// args except end_addr may be modified.
src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
end_addr);
src_region_idx = next_src_region(closure, src_space_id, src_space_top,
end_addr);
}
}
do {
HeapWord* const cur_addr = closure.source();
HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1),
HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1),
src_space_top);
IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr);
if (status == ParMarkBitMap::incomplete) {
// The last obj that starts in the source chunk does not end in the chunk.
// The last obj that starts in the source region does not end in the
// region.
assert(closure.source() < end_addr, "sanity")
HeapWord* const obj_beg = closure.source();
HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(),
......@@ -3299,28 +2808,28 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
if (status == ParMarkBitMap::would_overflow) {
// The last object did not fit. Note that interior oop updates were
// deferred, then copy enough of the object to fill the chunk.
chunk_ptr->set_deferred_obj_addr(closure.destination());
// deferred, then copy enough of the object to fill the region.
region_ptr->set_deferred_obj_addr(closure.destination());
status = closure.copy_until_full(); // copies from closure.source()
decrement_destination_counts(cm, src_chunk_idx, closure.source());
chunk_ptr->set_completed();
decrement_destination_counts(cm, src_region_idx, closure.source());
region_ptr->set_completed();
return;
}
if (status == ParMarkBitMap::full) {
decrement_destination_counts(cm, src_chunk_idx, closure.source());
chunk_ptr->set_deferred_obj_addr(NULL);
chunk_ptr->set_completed();
decrement_destination_counts(cm, src_region_idx, closure.source());
region_ptr->set_deferred_obj_addr(NULL);
region_ptr->set_completed();
return;
}
decrement_destination_counts(cm, src_chunk_idx, end_addr);
decrement_destination_counts(cm, src_region_idx, end_addr);
// Move to the next source chunk, possibly switching spaces as well. All
// Move to the next source region, possibly switching spaces as well. All
// args except end_addr may be modified.
src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
end_addr);
src_region_idx = next_src_region(closure, src_space_id, src_space_top,
end_addr);
} while (true);
}
......@@ -3352,15 +2861,15 @@ PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
}
#endif
const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr);
const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr);
if (beg_chunk < dp_chunk) {
update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk);
const size_t beg_region = sd.addr_to_region_idx(beg_addr);
const size_t dp_region = sd.addr_to_region_idx(dp_addr);
if (beg_region < dp_region) {
update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region);
}
// The destination of the first live object that starts in the chunk is one
// past the end of the partial object entering the chunk (if any).
HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk);
// The destination of the first live object that starts in the region is one
// past the end of the partial object entering the region (if any).
HeapWord* const dest_addr = sd.partial_obj_end(dp_region);
HeapWord* const new_top = _space_info[space_id].new_top();
assert(new_top >= dest_addr, "bad new_top value");
const size_t words = pointer_delta(new_top, dest_addr);
......@@ -3469,172 +2978,6 @@ UpdateOnlyClosure::do_addr(HeapWord* addr, size_t words) {
return ParMarkBitMap::incomplete;
}
BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm,
ParCompactionManager* cm,
size_t chunk_index) :
ParMarkBitMapClosure(mbm, cm),
_live_data_left(0),
_cur_block(0) {
_chunk_start =
PSParallelCompact::summary_data().chunk_to_addr(chunk_index);
_chunk_end =
PSParallelCompact::summary_data().chunk_to_addr(chunk_index) +
ParallelCompactData::ChunkSize;
_chunk_index = chunk_index;
_cur_block =
PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start);
}
bool BitBlockUpdateClosure::chunk_contains_cur_block() {
return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block);
}
void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) {
DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);)
ParallelCompactData& sd = PSParallelCompact::summary_data();
_chunk_index = chunk_index;
_live_data_left = 0;
_chunk_start = sd.chunk_to_addr(chunk_index);
_chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize;
// The first block in this chunk
size_t first_block = sd.addr_to_block_idx(_chunk_start);
size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size();
// Set the offset to 0. By definition it should have that value
// but it may have been written while processing an earlier chunk.
if (partial_live_size == 0) {
// No live object extends onto the chunk. The first bit
// in the bit map for the first chunk must be a start bit.
// Although there may not be any marked bits, it is safe
// to set it as a start bit.
sd.block(first_block)->set_start_bit_offset(0);
sd.block(first_block)->set_first_is_start_bit(true);
} else if (sd.partial_obj_ends_in_block(first_block)) {
sd.block(first_block)->set_end_bit_offset(0);
sd.block(first_block)->set_first_is_start_bit(false);
} else {
// The partial object extends beyond the first block.
// There is no object starting in the first block
// so the offset and bit parity are not needed.
// Set the the bit parity to start bit so assertions
// work when not bit is found.
sd.block(first_block)->set_end_bit_offset(0);
sd.block(first_block)->set_first_is_start_bit(false);
}
_cur_block = first_block;
#ifdef ASSERT
if (sd.block(first_block)->first_is_start_bit()) {
assert(!sd.partial_obj_ends_in_block(first_block),
"Partial object cannot end in first block");
}
if (PrintGCDetails && Verbose) {
if (partial_live_size == 1) {
gclog_or_tty->print_cr("first_block " PTR_FORMAT
" _offset " PTR_FORMAT
" _first_is_start_bit %d",
first_block,
sd.block(first_block)->raw_offset(),
sd.block(first_block)->first_is_start_bit());
}
}
#endif
DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);)
}
// This method is called when a object has been found (both beginning
// and end of the object) in the range of iteration. This method is
// calculating the words of live data to the left of a block. That live
// data includes any object starting to the left of the block (i.e.,
// the live-data-to-the-left of block AAA will include the full size
// of any object entering AAA).
ParMarkBitMapClosure::IterationStatus
BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) {
// add the size to the block data.
HeapWord* obj = addr;
ParallelCompactData& sd = PSParallelCompact::summary_data();
assert(bitmap()->obj_size(obj) == words, "bad size");
assert(_chunk_start <= obj, "object is not in chunk");
assert(obj + words <= _chunk_end, "object is not in chunk");
// Update the live data to the left
size_t prev_live_data_left = _live_data_left;
_live_data_left = _live_data_left + words;
// Is this object in the current block.
size_t block_of_obj = sd.addr_to_block_idx(obj);
size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1);
HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last);
if (_cur_block < block_of_obj) {
//
// No object crossed the block boundary and this object was found
// on the other side of the block boundary. Update the offset for
// the new block with the data size that does not include this object.
//
// The first bit in block_of_obj is a start bit except in the
// case where the partial object for the chunk extends into
// this block.
if (sd.partial_obj_ends_in_block(block_of_obj)) {
sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left);
} else {
sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left);
}
// Does this object pass beyond the its block?
if (block_of_obj < block_of_obj_last) {
// Object crosses block boundary. Two blocks need to be udpated:
// the current block where the object started
// the block where the object ends
//
// The offset for blocks with no objects starting in them
// (e.g., blocks between _cur_block and block_of_obj_last)
// should not be needed.
// Note that block_of_obj_last may be in another chunk. If so,
// it should be overwritten later. This is a problem (writting
// into a block in a later chunk) for parallel execution.
assert(obj < block_of_obj_last_addr,
"Object should start in previous block");
// obj is crossing into block_of_obj_last so the first bit
// is and end bit.
sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
_cur_block = block_of_obj_last;
} else {
// _first_is_start_bit has already been set correctly
// in the if-then-else above so don't reset it here.
_cur_block = block_of_obj;
}
} else {
// The current block only changes if the object extends beyound
// the block it starts in.
//
// The object starts in the current block.
// Does this object pass beyond the end of it?
if (block_of_obj < block_of_obj_last) {
// Object crosses block boundary.
// See note above on possible blocks between block_of_obj and
// block_of_obj_last
assert(obj < block_of_obj_last_addr,
"Object should start in previous block");
sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
_cur_block = block_of_obj_last;
}
}
// Return incomplete if there are more blocks to be done.
if (chunk_contains_cur_block()) {
return ParMarkBitMap::incomplete;
}
return ParMarkBitMap::complete;
}
// Verify the new location using the forwarding pointer
// from MarkSweep::mark_sweep_phase2(). Set the mark_word
// to the initial value.
......@@ -3707,12 +3050,3 @@ PSParallelCompact::next_compaction_space_id(SpaceId id) {
return last_space_id;
}
}
// Here temporarily for debugging
#ifdef ASSERT
size_t ParallelCompactData::block_idx(BlockData* block) {
size_t index = pointer_delta(block,
PSParallelCompact::summary_data()._block_data, sizeof(BlockData));
return index;
}
#endif
......@@ -76,87 +76,80 @@ class ParallelCompactData
{
public:
// Sizes are in HeapWords, unless indicated otherwise.
static const size_t Log2ChunkSize;
static const size_t ChunkSize;
static const size_t ChunkSizeBytes;
// Mask for the bits in a size_t to get an offset within a chunk.
static const size_t ChunkSizeOffsetMask;
// Mask for the bits in a pointer to get an offset within a chunk.
static const size_t ChunkAddrOffsetMask;
// Mask for the bits in a pointer to get the address of the start of a chunk.
static const size_t ChunkAddrMask;
static const size_t Log2BlockSize;
static const size_t BlockSize;
static const size_t BlockOffsetMask;
static const size_t BlockMask;
static const size_t BlocksPerChunk;
class ChunkData
static const size_t Log2RegionSize;
static const size_t RegionSize;
static const size_t RegionSizeBytes;
// Mask for the bits in a size_t to get an offset within a region.
static const size_t RegionSizeOffsetMask;
// Mask for the bits in a pointer to get an offset within a region.
static const size_t RegionAddrOffsetMask;
// Mask for the bits in a pointer to get the address of the start of a region.
static const size_t RegionAddrMask;
class RegionData
{
public:
// Destination address of the chunk.
// Destination address of the region.
HeapWord* destination() const { return _destination; }
// The first chunk containing data destined for this chunk.
size_t source_chunk() const { return _source_chunk; }
// The first region containing data destined for this region.
size_t source_region() const { return _source_region; }
// The object (if any) starting in this chunk and ending in a different
// chunk that could not be updated during the main (parallel) compaction
// The object (if any) starting in this region and ending in a different
// region that could not be updated during the main (parallel) compaction
// phase. This is different from _partial_obj_addr, which is an object that
// extends onto a source chunk. However, the two uses do not overlap in
// extends onto a source region. However, the two uses do not overlap in
// time, so the same field is used to save space.
HeapWord* deferred_obj_addr() const { return _partial_obj_addr; }
// The starting address of the partial object extending onto the chunk.
// The starting address of the partial object extending onto the region.
HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
// Size of the partial object extending onto the chunk (words).
// Size of the partial object extending onto the region (words).
size_t partial_obj_size() const { return _partial_obj_size; }
// Size of live data that lies within this chunk due to objects that start
// in this chunk (words). This does not include the partial object
// extending onto the chunk (if any), or the part of an object that extends
// onto the next chunk (if any).
// Size of live data that lies within this region due to objects that start
// in this region (words). This does not include the partial object
// extending onto the region (if any), or the part of an object that extends
// onto the next region (if any).
size_t live_obj_size() const { return _dc_and_los & los_mask; }
// Total live data that lies within the chunk (words).
// Total live data that lies within the region (words).
size_t data_size() const { return partial_obj_size() + live_obj_size(); }
// The destination_count is the number of other chunks to which data from
// this chunk will be copied. At the end of the summary phase, the valid
// The destination_count is the number of other regions to which data from
// this region will be copied. At the end of the summary phase, the valid
// values of destination_count are
//
// 0 - data from the chunk will be compacted completely into itself, or the
// chunk is empty. The chunk can be claimed and then filled.
// 1 - data from the chunk will be compacted into 1 other chunk; some
// data from the chunk may also be compacted into the chunk itself.
// 2 - data from the chunk will be copied to 2 other chunks.
// 0 - data from the region will be compacted completely into itself, or the
// region is empty. The region can be claimed and then filled.
// 1 - data from the region will be compacted into 1 other region; some
// data from the region may also be compacted into the region itself.
// 2 - data from the region will be copied to 2 other regions.
//
// During compaction as chunks are emptied, the destination_count is
// During compaction as regions are emptied, the destination_count is
// decremented (atomically) and when it reaches 0, it can be claimed and
// then filled.
//
// A chunk is claimed for processing by atomically changing the
// destination_count to the claimed value (dc_claimed). After a chunk has
// A region is claimed for processing by atomically changing the
// destination_count to the claimed value (dc_claimed). After a region has
// been filled, the destination_count should be set to the completed value
// (dc_completed).
inline uint destination_count() const;
inline uint destination_count_raw() const;
// The location of the java heap data that corresponds to this chunk.
// The location of the java heap data that corresponds to this region.
inline HeapWord* data_location() const;
// The highest address referenced by objects in this chunk.
// The highest address referenced by objects in this region.
inline HeapWord* highest_ref() const;
// Whether this chunk is available to be claimed, has been claimed, or has
// Whether this region is available to be claimed, has been claimed, or has
// been completed.
//
// Minor subtlety: claimed() returns true if the chunk is marked
// completed(), which is desirable since a chunk must be claimed before it
// Minor subtlety: claimed() returns true if the region is marked
// completed(), which is desirable since a region must be claimed before it
// can be completed.
bool available() const { return _dc_and_los < dc_one; }
bool claimed() const { return _dc_and_los >= dc_claimed; }
......@@ -164,11 +157,11 @@ public:
// These are not atomic.
void set_destination(HeapWord* addr) { _destination = addr; }
void set_source_chunk(size_t chunk) { _source_chunk = chunk; }
void set_source_region(size_t region) { _source_region = region; }
void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
void set_partial_obj_size(size_t words) {
_partial_obj_size = (chunk_sz_t) words;
_partial_obj_size = (region_sz_t) words;
}
inline void set_destination_count(uint count);
......@@ -184,101 +177,35 @@ public:
inline bool claim();
private:
// The type used to represent object sizes within a chunk.
typedef uint chunk_sz_t;
// The type used to represent object sizes within a region.
typedef uint region_sz_t;
// Constants for manipulating the _dc_and_los field, which holds both the
// destination count and live obj size. The live obj size lives at the
// least significant end so no masking is necessary when adding.
static const chunk_sz_t dc_shift; // Shift amount.
static const chunk_sz_t dc_mask; // Mask for destination count.
static const chunk_sz_t dc_one; // 1, shifted appropriately.
static const chunk_sz_t dc_claimed; // Chunk has been claimed.
static const chunk_sz_t dc_completed; // Chunk has been completed.
static const chunk_sz_t los_mask; // Mask for live obj size.
HeapWord* _destination;
size_t _source_chunk;
HeapWord* _partial_obj_addr;
chunk_sz_t _partial_obj_size;
chunk_sz_t volatile _dc_and_los;
static const region_sz_t dc_shift; // Shift amount.
static const region_sz_t dc_mask; // Mask for destination count.
static const region_sz_t dc_one; // 1, shifted appropriately.
static const region_sz_t dc_claimed; // Region has been claimed.
static const region_sz_t dc_completed; // Region has been completed.
static const region_sz_t los_mask; // Mask for live obj size.
HeapWord* _destination;
size_t _source_region;
HeapWord* _partial_obj_addr;
region_sz_t _partial_obj_size;
region_sz_t volatile _dc_and_los;
#ifdef ASSERT
// These enable optimizations that are only partially implemented. Use
// debug builds to prevent the code fragments from breaking.
HeapWord* _data_location;
HeapWord* _highest_ref;
HeapWord* _data_location;
HeapWord* _highest_ref;
#endif // #ifdef ASSERT
#ifdef ASSERT
public:
uint _pushed; // 0 until chunk is pushed onto a worker's stack
private:
#endif
};
// 'Blocks' allow shorter sections of the bitmap to be searched. Each Block
// holds an offset, which is the amount of live data in the Chunk to the left
// of the first live object in the Block. This amount of live data will
// include any object extending into the block. The first block in
// a chunk does not include any partial object extending into the
// the chunk.
//
// The offset also encodes the
// 'parity' of the first 1 bit in the Block: a positive offset means the
// first 1 bit marks the start of an object, a negative offset means the first
// 1 bit marks the end of an object.
class BlockData
{
public:
typedef short int blk_ofs_t;
blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; }
blk_ofs_t raw_offset() const { return _offset; }
void set_first_is_start_bit(bool v) { _first_is_start_bit = v; }
#if 0
// The need for this method was anticipated but it is
// never actually used. Do not include it for now. If
// it is needed, consider the problem of what is passed
// as "v". To avoid warning errors the method set_start_bit_offset()
// was changed to take a size_t as the parameter and to do the
// check for the possible overflow. Doing the cast in these
// methods better limits the potential problems because of
// the size of the field to this class.
void set_raw_offset(blk_ofs_t v) { _offset = v; }
#endif
void set_start_bit_offset(size_t val) {
assert(val >= 0, "sanity");
_offset = (blk_ofs_t) val;
assert(val == (size_t) _offset, "Value is too large");
_first_is_start_bit = true;
}
void set_end_bit_offset(size_t val) {
assert(val >= 0, "sanity");
_offset = (blk_ofs_t) val;
assert(val == (size_t) _offset, "Value is too large");
_offset = - _offset;
_first_is_start_bit = false;
}
bool first_is_start_bit() {
assert(_set_phase > 0, "Not initialized");
return _first_is_start_bit;
}
bool first_is_end_bit() {
assert(_set_phase > 0, "Not initialized");
return !_first_is_start_bit;
}
uint _pushed; // 0 until region is pushed onto a worker's stack
private:
blk_ofs_t _offset;
// This is temporary until the mark_bitmap is separated into
// a start bit array and an end bit array.
bool _first_is_start_bit;
#ifdef ASSERT
short _set_phase;
static short _cur_phase;
public:
static void set_cur_phase(short v) { _cur_phase = v; }
#endif
};
......@@ -286,27 +213,21 @@ public:
ParallelCompactData();
bool initialize(MemRegion covered_region);
size_t chunk_count() const { return _chunk_count; }
size_t region_count() const { return _region_count; }
// Convert chunk indices to/from ChunkData pointers.
inline ChunkData* chunk(size_t chunk_idx) const;
inline size_t chunk(const ChunkData* const chunk_ptr) const;
// Convert region indices to/from RegionData pointers.
inline RegionData* region(size_t region_idx) const;
inline size_t region(const RegionData* const region_ptr) const;
// Returns true if the given address is contained within the chunk
bool chunk_contains(size_t chunk_index, HeapWord* addr);
size_t block_count() const { return _block_count; }
inline BlockData* block(size_t n) const;
// Returns true if the given block is in the given chunk.
static bool chunk_contains_block(size_t chunk_index, size_t block_index);
// Returns true if the given address is contained within the region
bool region_contains(size_t region_index, HeapWord* addr);
void add_obj(HeapWord* addr, size_t len);
void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
// Fill in the chunks covering [beg, end) so that no data moves; i.e., the
// destination of chunk n is simply the start of chunk n. The argument beg
// must be chunk-aligned; end need not be.
// Fill in the regions covering [beg, end) so that no data moves; i.e., the
// destination of region n is simply the start of region n. The argument beg
// must be region-aligned; end need not be.
void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
bool summarize(HeapWord* target_beg, HeapWord* target_end,
......@@ -314,48 +235,33 @@ public:
HeapWord** target_next, HeapWord** source_next = 0);
void clear();
void clear_range(size_t beg_chunk, size_t end_chunk);
void clear_range(size_t beg_region, size_t end_region);
void clear_range(HeapWord* beg, HeapWord* end) {
clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end));
clear_range(addr_to_region_idx(beg), addr_to_region_idx(end));
}
// Return the number of words between addr and the start of the chunk
// Return the number of words between addr and the start of the region
// containing addr.
inline size_t chunk_offset(const HeapWord* addr) const;
// Convert addresses to/from a chunk index or chunk pointer.
inline size_t addr_to_chunk_idx(const HeapWord* addr) const;
inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const;
inline HeapWord* chunk_to_addr(size_t chunk) const;
inline HeapWord* chunk_to_addr(size_t chunk, size_t offset) const;
inline HeapWord* chunk_to_addr(const ChunkData* chunk) const;
inline HeapWord* chunk_align_down(HeapWord* addr) const;
inline HeapWord* chunk_align_up(HeapWord* addr) const;
inline bool is_chunk_aligned(HeapWord* addr) const;
// Analogous to chunk_offset() for blocks.
size_t block_offset(const HeapWord* addr) const;
size_t addr_to_block_idx(const HeapWord* addr) const;
size_t addr_to_block_idx(const oop obj) const {
return addr_to_block_idx((HeapWord*) obj);
}
inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
inline HeapWord* block_to_addr(size_t block) const;
inline size_t region_offset(const HeapWord* addr) const;
// Convert addresses to/from a region index or region pointer.
inline size_t addr_to_region_idx(const HeapWord* addr) const;
inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
inline HeapWord* region_to_addr(size_t region) const;
inline HeapWord* region_to_addr(size_t region, size_t offset) const;
inline HeapWord* region_to_addr(const RegionData* region) const;
inline HeapWord* region_align_down(HeapWord* addr) const;
inline HeapWord* region_align_up(HeapWord* addr) const;
inline bool is_region_aligned(HeapWord* addr) const;
// Return the address one past the end of the partial object.
HeapWord* partial_obj_end(size_t chunk_idx) const;
HeapWord* partial_obj_end(size_t region_idx) const;
// Return the new location of the object p after the
// the compaction.
HeapWord* calc_new_pointer(HeapWord* addr);
// Same as calc_new_pointer() using blocks.
HeapWord* block_calc_new_pointer(HeapWord* addr);
// Same as calc_new_pointer() using chunks.
HeapWord* chunk_calc_new_pointer(HeapWord* addr);
HeapWord* calc_new_pointer(oop p) {
return calc_new_pointer((HeapWord*) p);
}
......@@ -363,22 +269,13 @@ public:
// Return the updated address for the given klass
klassOop calc_new_klass(klassOop);
// Given a block returns true if the partial object for the
// corresponding chunk ends in the block. Returns false, otherwise
// If there is no partial object, returns false.
bool partial_obj_ends_in_block(size_t block_index);
// Returns the block index for the block
static size_t block_idx(BlockData* block);
#ifdef ASSERT
void verify_clear(const PSVirtualSpace* vspace);
void verify_clear();
#endif // #ifdef ASSERT
private:
bool initialize_block_data(size_t region_size);
bool initialize_chunk_data(size_t region_size);
bool initialize_region_data(size_t region_size);
PSVirtualSpace* create_vspace(size_t count, size_t element_size);
private:
......@@ -387,74 +284,70 @@ private:
HeapWord* _region_end;
#endif // #ifdef ASSERT
PSVirtualSpace* _chunk_vspace;
ChunkData* _chunk_data;
size_t _chunk_count;
PSVirtualSpace* _block_vspace;
BlockData* _block_data;
size_t _block_count;
PSVirtualSpace* _region_vspace;
RegionData* _region_data;
size_t _region_count;
};
inline uint
ParallelCompactData::ChunkData::destination_count_raw() const
ParallelCompactData::RegionData::destination_count_raw() const
{
return _dc_and_los & dc_mask;
}
inline uint
ParallelCompactData::ChunkData::destination_count() const
ParallelCompactData::RegionData::destination_count() const
{
return destination_count_raw() >> dc_shift;
}
inline void
ParallelCompactData::ChunkData::set_destination_count(uint count)
ParallelCompactData::RegionData::set_destination_count(uint count)
{
assert(count <= (dc_completed >> dc_shift), "count too large");
const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size();
const region_sz_t live_sz = (region_sz_t) live_obj_size();
_dc_and_los = (count << dc_shift) | live_sz;
}
inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words)
inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
{
assert(words <= los_mask, "would overflow");
_dc_and_los = destination_count_raw() | (chunk_sz_t)words;
_dc_and_los = destination_count_raw() | (region_sz_t)words;
}
inline void ParallelCompactData::ChunkData::decrement_destination_count()
inline void ParallelCompactData::RegionData::decrement_destination_count()
{
assert(_dc_and_los < dc_claimed, "already claimed");
assert(_dc_and_los >= dc_one, "count would go negative");
Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los);
}
inline HeapWord* ParallelCompactData::ChunkData::data_location() const
inline HeapWord* ParallelCompactData::RegionData::data_location() const
{
DEBUG_ONLY(return _data_location;)
NOT_DEBUG(return NULL;)
}
inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const
inline HeapWord* ParallelCompactData::RegionData::highest_ref() const
{
DEBUG_ONLY(return _highest_ref;)
NOT_DEBUG(return NULL;)
}
inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr)
inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr)
{
DEBUG_ONLY(_data_location = addr;)
}
inline void ParallelCompactData::ChunkData::set_completed()
inline void ParallelCompactData::RegionData::set_completed()
{
assert(claimed(), "must be claimed first");
_dc_and_los = dc_completed | (chunk_sz_t) live_obj_size();
_dc_and_los = dc_completed | (region_sz_t) live_obj_size();
}
// MT-unsafe claiming of a chunk. Should only be used during single threaded
// MT-unsafe claiming of a region. Should only be used during single threaded
// execution.
inline bool ParallelCompactData::ChunkData::claim_unsafe()
inline bool ParallelCompactData::RegionData::claim_unsafe()
{
if (available()) {
_dc_and_los |= dc_claimed;
......@@ -463,13 +356,13 @@ inline bool ParallelCompactData::ChunkData::claim_unsafe()
return false;
}
inline void ParallelCompactData::ChunkData::add_live_obj(size_t words)
inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
{
assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
Atomic::add((int) words, (volatile int*) &_dc_and_los);
}
inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
{
#ifdef ASSERT
HeapWord* tmp = _highest_ref;
......@@ -479,7 +372,7 @@ inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
#endif // #ifdef ASSERT
}
inline bool ParallelCompactData::ChunkData::claim()
inline bool ParallelCompactData::RegionData::claim()
{
const int los = (int) live_obj_size();
const int old = Atomic::cmpxchg(dc_claimed | los,
......@@ -487,119 +380,85 @@ inline bool ParallelCompactData::ChunkData::claim()
return old == los;
}
inline ParallelCompactData::ChunkData*
ParallelCompactData::chunk(size_t chunk_idx) const
inline ParallelCompactData::RegionData*
ParallelCompactData::region(size_t region_idx) const
{
assert(chunk_idx <= chunk_count(), "bad arg");
return _chunk_data + chunk_idx;
assert(region_idx <= region_count(), "bad arg");
return _region_data + region_idx;
}
inline size_t
ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const
ParallelCompactData::region(const RegionData* const region_ptr) const
{
assert(chunk_ptr >= _chunk_data, "bad arg");
assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg");
return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData));
}
inline ParallelCompactData::BlockData*
ParallelCompactData::block(size_t n) const {
assert(n < block_count(), "bad arg");
return _block_data + n;
assert(region_ptr >= _region_data, "bad arg");
assert(region_ptr <= _region_data + region_count(), "bad arg");
return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
}
inline size_t
ParallelCompactData::chunk_offset(const HeapWord* addr) const
ParallelCompactData::region_offset(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize;
return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
}
inline size_t
ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const
ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return pointer_delta(addr, _region_start) >> Log2ChunkSize;
return pointer_delta(addr, _region_start) >> Log2RegionSize;
}
inline ParallelCompactData::ChunkData*
ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const
inline ParallelCompactData::RegionData*
ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
{
return chunk(addr_to_chunk_idx(addr));
return region(addr_to_region_idx(addr));
}
inline HeapWord*
ParallelCompactData::chunk_to_addr(size_t chunk) const
ParallelCompactData::region_to_addr(size_t region) const
{
assert(chunk <= _chunk_count, "chunk out of range");
return _region_start + (chunk << Log2ChunkSize);
assert(region <= _region_count, "region out of range");
return _region_start + (region << Log2RegionSize);
}
inline HeapWord*
ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const
ParallelCompactData::region_to_addr(const RegionData* region) const
{
return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData)));
return region_to_addr(pointer_delta(region, _region_data,
sizeof(RegionData)));
}
inline HeapWord*
ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const
ParallelCompactData::region_to_addr(size_t region, size_t offset) const
{
assert(chunk <= _chunk_count, "chunk out of range");
assert(offset < ChunkSize, "offset too big"); // This may be too strict.
return chunk_to_addr(chunk) + offset;
assert(region <= _region_count, "region out of range");
assert(offset < RegionSize, "offset too big"); // This may be too strict.
return region_to_addr(region) + offset;
}
inline HeapWord*
ParallelCompactData::chunk_align_down(HeapWord* addr) const
ParallelCompactData::region_align_down(HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr < _region_end + ChunkSize, "bad addr");
return (HeapWord*)(size_t(addr) & ChunkAddrMask);
assert(addr < _region_end + RegionSize, "bad addr");
return (HeapWord*)(size_t(addr) & RegionAddrMask);
}
inline HeapWord*
ParallelCompactData::chunk_align_up(HeapWord* addr) const
ParallelCompactData::region_align_up(HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return chunk_align_down(addr + ChunkSizeOffsetMask);
return region_align_down(addr + RegionSizeOffsetMask);
}
inline bool
ParallelCompactData::is_chunk_aligned(HeapWord* addr) const
{
return chunk_offset(addr) == 0;
}
inline size_t
ParallelCompactData::block_offset(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return pointer_delta(addr, _region_start) & BlockOffsetMask;
}
inline size_t
ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
{
assert(addr >= _region_start, "bad addr");
assert(addr <= _region_end, "bad addr");
return pointer_delta(addr, _region_start) >> Log2BlockSize;
}
inline ParallelCompactData::BlockData*
ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
{
return block(addr_to_block_idx(addr));
}
inline HeapWord*
ParallelCompactData::block_to_addr(size_t block) const
ParallelCompactData::is_region_aligned(HeapWord* addr) const
{
assert(block < _block_count, "block out of range");
return _region_start + (block << Log2BlockSize);
return region_offset(addr) == 0;
}
// Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
......@@ -687,45 +546,15 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) {
_words_remaining -= words;
}
// Closure for updating the block data during the summary phase.
class BitBlockUpdateClosure: public ParMarkBitMapClosure {
// ParallelCompactData::BlockData::blk_ofs_t _live_data_left;
size_t _live_data_left;
size_t _cur_block;
HeapWord* _chunk_start;
HeapWord* _chunk_end;
size_t _chunk_index;
public:
BitBlockUpdateClosure(ParMarkBitMap* mbm,
ParCompactionManager* cm,
size_t chunk_index);
size_t cur_block() { return _cur_block; }
size_t chunk_index() { return _chunk_index; }
size_t live_data_left() { return _live_data_left; }
// Returns true the first bit in the current block (cur_block) is
// a start bit.
// Returns true if the current block is within the chunk for the closure;
bool chunk_contains_cur_block();
// Set the chunk index and related chunk values for
// a new chunk.
void reset_chunk(size_t chunk_index);
virtual IterationStatus do_addr(HeapWord* addr, size_t words);
};
// The UseParallelOldGC collector is a stop-the-world garbage
// collector that does parts of the collection using parallel threads.
// The collection includes the tenured generation and the young
// generation. The permanent generation is collected at the same
// time as the other two generations but the permanent generation
// is collect by a single GC thread. The permanent generation is
// collected serially because of the requirement that during the
// processing of a klass AAA, any objects reference by AAA must
// already have been processed. This requirement is enforced by
// a left (lower address) to right (higher address) sliding compaction.
// The UseParallelOldGC collector is a stop-the-world garbage collector that
// does parts of the collection using parallel threads. The collection includes
// the tenured generation and the young generation. The permanent generation is
// collected at the same time as the other two generations but the permanent
// generation is collect by a single GC thread. The permanent generation is
// collected serially because of the requirement that during the processing of a
// klass AAA, any objects reference by AAA must already have been processed.
// This requirement is enforced by a left (lower address) to right (higher
// address) sliding compaction.
//
// There are four phases of the collection.
//
......@@ -740,81 +569,75 @@ class BitBlockUpdateClosure: public ParMarkBitMapClosure {
// - move the objects to their destination
// - update some references and reinitialize some variables
//
// These three phases are invoked in PSParallelCompact::invoke_no_policy().
// The marking phase is implemented in PSParallelCompact::marking_phase()
// and does a complete marking of the heap.
// The summary phase is implemented in PSParallelCompact::summary_phase().
// The move and update phase is implemented in PSParallelCompact::compact().
// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The
// marking phase is implemented in PSParallelCompact::marking_phase() and does a
// complete marking of the heap. The summary phase is implemented in
// PSParallelCompact::summary_phase(). The move and update phase is implemented
// in PSParallelCompact::compact().
//
// A space that is being collected is divided into chunks and with
// each chunk is associated an object of type ParallelCompactData.
// Each chunk is of a fixed size and typically will contain more than
// 1 object and may have parts of objects at the front and back of the
// chunk.
// A space that is being collected is divided into regions and with each region
// is associated an object of type ParallelCompactData. Each region is of a
// fixed size and typically will contain more than 1 object and may have parts
// of objects at the front and back of the region.
//
// chunk -----+---------------------+----------
// region -----+---------------------+----------
// objects covered [ AAA )[ BBB )[ CCC )[ DDD )
//
// The marking phase does a complete marking of all live objects in the
// heap. The marking also compiles the size of the data for
// all live objects covered by the chunk. This size includes the
// part of any live object spanning onto the chunk (part of AAA
// if it is live) from the front, all live objects contained in the chunk
// (BBB and/or CCC if they are live), and the part of any live objects
// covered by the chunk that extends off the chunk (part of DDD if it is
// live). The marking phase uses multiple GC threads and marking is
// done in a bit array of type ParMarkBitMap. The marking of the
// bit map is done atomically as is the accumulation of the size of the
// live objects covered by a chunk.
// The marking phase does a complete marking of all live objects in the heap.
// The marking also compiles the size of the data for all live objects covered
// by the region. This size includes the part of any live object spanning onto
// the region (part of AAA if it is live) from the front, all live objects
// contained in the region (BBB and/or CCC if they are live), and the part of
// any live objects covered by the region that extends off the region (part of
// DDD if it is live). The marking phase uses multiple GC threads and marking
// is done in a bit array of type ParMarkBitMap. The marking of the bit map is
// done atomically as is the accumulation of the size of the live objects
// covered by a region.
//
// The summary phase calculates the total live data to the left of
// each chunk XXX. Based on that total and the bottom of the space,
// it can calculate the starting location of the live data in XXX.
// The summary phase calculates for each chunk XXX quantites such as
// The summary phase calculates the total live data to the left of each region
// XXX. Based on that total and the bottom of the space, it can calculate the
// starting location of the live data in XXX. The summary phase calculates for
// each region XXX quantites such as
//
// - the amount of live data at the beginning of a chunk from an object
// entering the chunk.
// - the location of the first live data on the chunk
// - a count of the number of chunks receiving live data from XXX.
// - the amount of live data at the beginning of a region from an object
// entering the region.
// - the location of the first live data on the region
// - a count of the number of regions receiving live data from XXX.
//
// See ParallelCompactData for precise details. The summary phase also
// calculates the dense prefix for the compaction. The dense prefix
// is a portion at the beginning of the space that is not moved. The
// objects in the dense prefix do need to have their object references
// updated. See method summarize_dense_prefix().
// calculates the dense prefix for the compaction. The dense prefix is a
// portion at the beginning of the space that is not moved. The objects in the
// dense prefix do need to have their object references updated. See method
// summarize_dense_prefix().
//
// The summary phase is done using 1 GC thread.
//
// The compaction phase moves objects to their new location and updates
// all references in the object.
//
// A current exception is that objects that cross a chunk boundary
// are moved but do not have their references updated. References are
// not updated because it cannot easily be determined if the klass
// pointer KKK for the object AAA has been updated. KKK likely resides
// in a chunk to the left of the chunk containing AAA. These AAA's
// have there references updated at the end in a clean up phase.
// See the method PSParallelCompact::update_deferred_objects(). An
// alternate strategy is being investigated for this deferral of updating.
// The compaction phase moves objects to their new location and updates all
// references in the object.
//
// Compaction is done on a chunk basis. A chunk that is ready to be
// filled is put on a ready list and GC threads take chunk off the list
// and fill them. A chunk is ready to be filled if it
// empty of live objects. Such a chunk may have been initially
// empty (only contained
// dead objects) or may have had all its live objects copied out already.
// A chunk that compacts into itself is also ready for filling. The
// ready list is initially filled with empty chunks and chunks compacting
// into themselves. There is always at least 1 chunk that can be put on
// the ready list. The chunks are atomically added and removed from
// the ready list.
// A current exception is that objects that cross a region boundary are moved
// but do not have their references updated. References are not updated because
// it cannot easily be determined if the klass pointer KKK for the object AAA
// has been updated. KKK likely resides in a region to the left of the region
// containing AAA. These AAA's have there references updated at the end in a
// clean up phase. See the method PSParallelCompact::update_deferred_objects().
// An alternate strategy is being investigated for this deferral of updating.
//
// Compaction is done on a region basis. A region that is ready to be filled is
// put on a ready list and GC threads take region off the list and fill them. A
// region is ready to be filled if it empty of live objects. Such a region may
// have been initially empty (only contained dead objects) or may have had all
// its live objects copied out already. A region that compacts into itself is
// also ready for filling. The ready list is initially filled with empty
// regions and regions compacting into themselves. There is always at least 1
// region that can be put on the ready list. The regions are atomically added
// and removed from the ready list.
class PSParallelCompact : AllStatic {
public:
// Convenient access to type names.
typedef ParMarkBitMap::idx_t idx_t;
typedef ParallelCompactData::ChunkData ChunkData;
typedef ParallelCompactData::BlockData BlockData;
typedef ParallelCompactData::RegionData RegionData;
typedef enum {
perm_space_id, old_space_id, eden_space_id,
......@@ -977,26 +800,26 @@ class PSParallelCompact : AllStatic {
// not reclaimed).
static double dead_wood_limiter(double density, size_t min_percent);
// Find the first (left-most) chunk in the range [beg, end) that has at least
// Find the first (left-most) region in the range [beg, end) that has at least
// dead_words of dead space to the left. The argument beg must be the first
// chunk in the space that is not completely live.
static ChunkData* dead_wood_limit_chunk(const ChunkData* beg,
const ChunkData* end,
size_t dead_words);
// region in the space that is not completely live.
static RegionData* dead_wood_limit_region(const RegionData* beg,
const RegionData* end,
size_t dead_words);
// Return a pointer to the first chunk in the range [beg, end) that is not
// Return a pointer to the first region in the range [beg, end) that is not
// completely full.
static ChunkData* first_dead_space_chunk(const ChunkData* beg,
const ChunkData* end);
static RegionData* first_dead_space_region(const RegionData* beg,
const RegionData* end);
// Return a value indicating the benefit or 'yield' if the compacted region
// were to start (or equivalently if the dense prefix were to end) at the
// candidate chunk. Higher values are better.
// candidate region. Higher values are better.
//
// The value is based on the amount of space reclaimed vs. the costs of (a)
// updating references in the dense prefix plus (b) copying objects and
// updating references in the compacted region.
static inline double reclaimed_ratio(const ChunkData* const candidate,
static inline double reclaimed_ratio(const RegionData* const candidate,
HeapWord* const bottom,
HeapWord* const top,
HeapWord* const new_top);
......@@ -1005,9 +828,9 @@ class PSParallelCompact : AllStatic {
static HeapWord* compute_dense_prefix(const SpaceId id,
bool maximum_compaction);
// Return true if dead space crosses onto the specified Chunk; bit must be the
// bit index corresponding to the first word of the Chunk.
static inline bool dead_space_crosses_boundary(const ChunkData* chunk,
// Return true if dead space crosses onto the specified Region; bit must be
// the bit index corresponding to the first word of the Region.
static inline bool dead_space_crosses_boundary(const RegionData* region,
idx_t bit);
// Summary phase utility routine to fill dead space (if any) at the dense
......@@ -1019,12 +842,6 @@ class PSParallelCompact : AllStatic {
static void summarize_space(SpaceId id, bool maximum_compaction);
static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr);
// Fill in the BlockData
static void summarize_blocks(ParCompactionManager* cm,
SpaceId first_compaction_space_id);
// The space that is compacted after space_id.
static SpaceId next_compaction_space_id(SpaceId space_id);
......@@ -1038,16 +855,16 @@ class PSParallelCompact : AllStatic {
static void compact_perm(ParCompactionManager* cm);
static void compact();
// Add available chunks to the stack and draining tasks to the task queue.
static void enqueue_chunk_draining_tasks(GCTaskQueue* q,
uint parallel_gc_threads);
// Add available regions to the stack and draining tasks to the task queue.
static void enqueue_region_draining_tasks(GCTaskQueue* q,
uint parallel_gc_threads);
// Add dense prefix update tasks to the task queue.
static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
uint parallel_gc_threads);
// Add chunk stealing tasks to the task queue.
static void enqueue_chunk_stealing_tasks(
// Add region stealing tasks to the task queue.
static void enqueue_region_stealing_tasks(
GCTaskQueue* q,
ParallelTaskTerminator* terminator_ptr,
uint parallel_gc_threads);
......@@ -1154,56 +971,56 @@ class PSParallelCompact : AllStatic {
// Move and update the live objects in the specified space.
static void move_and_update(ParCompactionManager* cm, SpaceId space_id);
// Process the end of the given chunk range in the dense prefix.
// Process the end of the given region range in the dense prefix.
// This includes saving any object not updated.
static void dense_prefix_chunks_epilogue(ParCompactionManager* cm,
size_t chunk_start_index,
size_t chunk_end_index,
idx_t exiting_object_offset,
idx_t chunk_offset_start,
idx_t chunk_offset_end);
// Update a chunk in the dense prefix. For each live object
// in the chunk, update it's interior references. For each
static void dense_prefix_regions_epilogue(ParCompactionManager* cm,
size_t region_start_index,
size_t region_end_index,
idx_t exiting_object_offset,
idx_t region_offset_start,
idx_t region_offset_end);
// Update a region in the dense prefix. For each live object
// in the region, update it's interior references. For each
// dead object, fill it with deadwood. Dead space at the end
// of a chunk range will be filled to the start of the next
// live object regardless of the chunk_index_end. None of the
// of a region range will be filled to the start of the next
// live object regardless of the region_index_end. None of the
// objects in the dense prefix move and dead space is dead
// (holds only dead objects that don't need any processing), so
// dead space can be filled in any order.
static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
SpaceId space_id,
size_t chunk_index_start,
size_t chunk_index_end);
size_t region_index_start,
size_t region_index_end);
// Return the address of the count + 1st live word in the range [beg, end).
static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
// Return the address of the word to be copied to dest_addr, which must be
// aligned to a chunk boundary.
// aligned to a region boundary.
static HeapWord* first_src_addr(HeapWord* const dest_addr,
size_t src_chunk_idx);
size_t src_region_idx);
// Determine the next source chunk, set closure.source() to the start of the
// new chunk return the chunk index. Parameter end_addr is the address one
// Determine the next source region, set closure.source() to the start of the
// new region return the region index. Parameter end_addr is the address one
// beyond the end of source range just processed. If necessary, switch to a
// new source space and set src_space_id (in-out parameter) and src_space_top
// (out parameter) accordingly.
static size_t next_src_chunk(MoveAndUpdateClosure& closure,
SpaceId& src_space_id,
HeapWord*& src_space_top,
HeapWord* end_addr);
static size_t next_src_region(MoveAndUpdateClosure& closure,
SpaceId& src_space_id,
HeapWord*& src_space_top,
HeapWord* end_addr);
// Decrement the destination count for each non-empty source chunk in the
// range [beg_chunk, chunk(chunk_align_up(end_addr))).
// Decrement the destination count for each non-empty source region in the
// range [beg_region, region(region_align_up(end_addr))).
static void decrement_destination_counts(ParCompactionManager* cm,
size_t beg_chunk,
size_t beg_region,
HeapWord* end_addr);
// Fill a chunk, copying objects from one or more source chunks.
static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx);
static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) {
fill_chunk(cm, chunk);
// Fill a region, copying objects from one or more source regions.
static void fill_region(ParCompactionManager* cm, size_t region_idx);
static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
fill_region(cm, region);
}
// Update the deferred objects in the space.
......@@ -1259,7 +1076,7 @@ class PSParallelCompact : AllStatic {
#ifndef PRODUCT
// Debugging support.
static const char* space_names[last_space_id];
static void print_chunk_ranges();
static void print_region_ranges();
static void print_dense_prefix_stats(const char* const algorithm,
const SpaceId id,
const bool maximum_compaction,
......@@ -1267,7 +1084,7 @@ class PSParallelCompact : AllStatic {
#endif // #ifndef PRODUCT
#ifdef ASSERT
// Verify that all the chunks have been emptied.
// Verify that all the regions have been emptied.
static void verify_complete(SpaceId space_id);
#endif // #ifdef ASSERT
};
......@@ -1376,17 +1193,17 @@ inline double PSParallelCompact::normal_distribution(double density) {
}
inline bool
PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk,
PSParallelCompact::dead_space_crosses_boundary(const RegionData* region,
idx_t bit)
{
assert(bit > 0, "cannot call this for the first bit/chunk");
assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit),
assert(bit > 0, "cannot call this for the first bit/region");
assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit),
"sanity check");
// Dead space crosses the boundary if (1) a partial object does not extend
// onto the chunk, (2) an object does not start at the beginning of the chunk,
// and (3) an object does not end at the end of the prior chunk.
return chunk->partial_obj_size() == 0 &&
// onto the region, (2) an object does not start at the beginning of the
// region, and (3) an object does not end at the end of the prior region.
return region->partial_obj_size() == 0 &&
!_mark_bitmap.is_obj_beg(bit) &&
!_mark_bitmap.is_obj_end(bit - 1);
}
......
......@@ -123,8 +123,6 @@ void PSPermGen::move_and_update(ParCompactionManager* cm) {
void PSPermGen::precompact() {
// Reset start array first.
debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
_start_array.reset();
debug_only(})
object_mark_sweep()->precompact();
}
......@@ -50,7 +50,8 @@ class ImmutableSpace: public CHeapObj {
size_t capacity_in_bytes() const { return capacity_in_words() * HeapWordSize; }
// Size computations. Sizes are in heapwords.
size_t capacity_in_words() const { return pointer_delta(end(), bottom()); }
size_t capacity_in_words() const { return pointer_delta(end(), bottom()); }
virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); }
// Iteration.
virtual void oop_iterate(OopClosure* cl);
......
......@@ -23,13 +23,6 @@
*/
inline void MarkSweep::mark_object(oop obj) {
#ifndef SERIALGC
if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) {
assert(PSParallelCompact::mark_bitmap()->is_marked(obj),
"Should be marked in the marking bitmap");
}
#endif // SERIALGC
// some marks may contain information we need to preserve so we store them away
// and overwrite the mark. We'll restore it at the end of markSweep.
markOop mark = obj->mark();
......
......@@ -181,6 +181,25 @@ size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
return lgrp_spaces()->at(i)->space()->free_in_bytes();
}
size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
guarantee(thr != NULL, "No thread");
int lgrp_id = thr->lgrp_id();
if (lgrp_id == -1) {
if (lgrp_spaces()->length() > 0) {
return capacity_in_words() / lgrp_spaces()->length();
} else {
assert(false, "There should be at least one locality group");
return 0;
}
}
int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
if (i == -1) {
return 0;
}
return lgrp_spaces()->at(i)->space()->capacity_in_words();
}
// Check if the NUMA topology has changed. Add and remove spaces if needed.
// The update can be forced by setting the force parameter equal to true.
bool MutableNUMASpace::update_layout(bool force) {
......@@ -722,7 +741,8 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
i = os::random() % lgrp_spaces()->length();
}
MutableSpace *s = lgrp_spaces()->at(i)->space();
LGRPSpace* ls = lgrp_spaces()->at(i);
MutableSpace *s = ls->space();
HeapWord *p = s->allocate(size);
if (p != NULL) {
......@@ -743,6 +763,9 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
*(int*)i = 0;
}
}
if (p == NULL) {
ls->set_allocation_failed();
}
return p;
}
......@@ -761,7 +784,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
if (i == -1) {
i = os::random() % lgrp_spaces()->length();
}
MutableSpace *s = lgrp_spaces()->at(i)->space();
LGRPSpace *ls = lgrp_spaces()->at(i);
MutableSpace *s = ls->space();
HeapWord *p = s->cas_allocate(size);
if (p != NULL) {
size_t remainder = pointer_delta(s->end(), p + size);
......@@ -790,6 +814,9 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
*(int*)i = 0;
}
}
if (p == NULL) {
ls->set_allocation_failed();
}
return p;
}
......
......@@ -60,6 +60,7 @@ class MutableNUMASpace : public MutableSpace {
MutableSpace* _space;
MemRegion _invalid_region;
AdaptiveWeightedAverage *_alloc_rate;
bool _allocation_failed;
struct SpaceStats {
size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
......@@ -81,7 +82,7 @@ class MutableNUMASpace : public MutableSpace {
char* last_page_scanned() { return _last_page_scanned; }
void set_last_page_scanned(char* p) { _last_page_scanned = p; }
public:
LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
_space = new MutableSpace();
_alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
}
......@@ -103,8 +104,21 @@ class MutableNUMASpace : public MutableSpace {
return *(int*)lgrp_id_value == p->lgrp_id();
}
// Report a failed allocation.
void set_allocation_failed() { _allocation_failed = true; }
void sample() {
alloc_rate()->sample(space()->used_in_bytes());
// If there was a failed allocation make allocation rate equal
// to the size of the whole chunk. This ensures the progress of
// the adaptation process.
size_t alloc_rate_sample;
if (_allocation_failed) {
alloc_rate_sample = space()->capacity_in_bytes();
_allocation_failed = false;
} else {
alloc_rate_sample = space()->used_in_bytes();
}
alloc_rate()->sample(alloc_rate_sample);
}
MemRegion invalid_region() const { return _invalid_region; }
......@@ -190,6 +204,9 @@ class MutableNUMASpace : public MutableSpace {
virtual void ensure_parsability();
virtual size_t used_in_words() const;
virtual size_t free_in_words() const;
using MutableSpace::capacity_in_words;
virtual size_t capacity_in_words(Thread* thr) const;
virtual size_t tlab_capacity(Thread* thr) const;
virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
......
......@@ -586,6 +586,7 @@ locknode.hpp subnode.hpp
loopTransform.cpp addnode.hpp
loopTransform.cpp allocation.inline.hpp
loopTransform.cpp connode.hpp
loopTransform.cpp compileLog.hpp
loopTransform.cpp divnode.hpp
loopTransform.cpp loopnode.hpp
loopTransform.cpp mulnode.hpp
......@@ -601,6 +602,7 @@ loopnode.cpp addnode.hpp
loopnode.cpp allocation.inline.hpp
loopnode.cpp callnode.hpp
loopnode.cpp ciMethodData.hpp
loopnode.cpp compileLog.hpp
loopnode.cpp connode.hpp
loopnode.cpp divnode.hpp
loopnode.cpp loopnode.hpp
......
......@@ -25,19 +25,6 @@
#include "incls/_precompiled.incl"
#include "incls/_bytecodeInfo.cpp.incl"
// These variables are declared in parse1.cpp
extern int explicit_null_checks_inserted;
extern int explicit_null_checks_elided;
extern int explicit_null_checks_inserted_old;
extern int explicit_null_checks_elided_old;
extern int nodes_created_old;
extern int nodes_created;
extern int methods_parsed_old;
extern int methods_parsed;
extern int methods_seen;
extern int methods_seen_old;
//=============================================================================
//------------------------------InlineTree-------------------------------------
InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
......@@ -517,27 +504,3 @@ InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms,
}
return iltp;
}
// ----------------------------------------------------------------------------
#ifndef PRODUCT
static void per_method_stats() {
// Compute difference between this method's cumulative totals and old totals
int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
// Print differences
if( explicit_null_checks_cur )
tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
if( elided_null_checks_cur )
tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
// Store the current cumulative totals
nodes_created_old = nodes_created;
methods_parsed_old = methods_parsed;
methods_seen_old = methods_seen;
explicit_null_checks_inserted_old = explicit_null_checks_inserted;
explicit_null_checks_elided_old = explicit_null_checks_elided;
}
#endif
......@@ -1034,6 +1034,39 @@ AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
//=============================================================================
uint AllocateArrayNode::size_of() const { return sizeof(*this); }
// Retrieve the length from the AllocateArrayNode. Narrow the type with a
// CastII, if appropriate. If we are not allowed to create new nodes, and
// a CastII is appropriate, return NULL.
Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) {
Node *length = in(AllocateNode::ALength);
assert(length != NULL, "length is not null");
const TypeInt* length_type = phase->find_int_type(length);
const TypeAryPtr* ary_type = oop_type->isa_aryptr();
if (ary_type != NULL && length_type != NULL) {
const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type);
if (narrow_length_type != length_type) {
// Assert one of:
// - the narrow_length is 0
// - the narrow_length is not wider than length
assert(narrow_length_type == TypeInt::ZERO ||
(narrow_length_type->_hi <= length_type->_hi &&
narrow_length_type->_lo >= length_type->_lo),
"narrow type must be narrower than length type");
// Return NULL if new nodes are not allowed
if (!allow_new_nodes) return NULL;
// Create a cast which is control dependent on the initialization to
// propagate the fact that the array length must be positive.
length = new (phase->C, 2) CastIINode(length, narrow_length_type);
length->set_req(0, initialization()->proj_out(0));
}
}
return length;
}
//=============================================================================
uint LockNode::size_of() const { return sizeof(*this); }
......
......@@ -755,6 +755,15 @@ public:
virtual int Opcode() const;
virtual uint size_of() const; // Size is bigger
// Dig the length operand out of a array allocation site.
Node* Ideal_length() {
return in(AllocateNode::ALength);
}
// Dig the length operand out of a array allocation site and narrow the
// type with a CastII, if necesssary
Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true);
// Pattern-match a possible usage of AllocateArrayNode.
// Return null if no allocation is recognized.
static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
......@@ -762,12 +771,6 @@ public:
return (allo == NULL || !allo->is_AllocateArray())
? NULL : allo->as_AllocateArray();
}
// Dig the length operand out of a (possible) array allocation site.
static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
}
};
//------------------------------AbstractLockNode-----------------------------------
......
......@@ -1665,7 +1665,11 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// compress paths and change unreachable cycles to TOP
// If not, we can update the input infinitely along a MergeMem cycle
// Equivalent code is in MemNode::Ideal_common
Node *m = phase->transform(n);
Node *m = phase->transform(n);
if (outcnt() == 0) { // Above transform() may kill us!
progress = phase->C->top();
break;
}
// If tranformed to a MergeMem, get the desired slice
// Otherwise the returned node represents memory for every slice
Node *new_mem = (m->is_MergeMem()) ?
......@@ -1765,6 +1769,51 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
}
#ifdef _LP64
// Push DecodeN down through phi.
// The rest of phi graph will transform by split EncodeP node though phis up.
if (UseCompressedOops && can_reshape && progress == NULL) {
bool may_push = true;
bool has_decodeN = false;
Node* in_decodeN = NULL;
for (uint i=1; i<req(); ++i) {// For all paths in
Node *ii = in(i);
if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
has_decodeN = true;
in_decodeN = ii->in(1);
} else if (!ii->is_Phi()) {
may_push = false;
}
}
if (has_decodeN && may_push) {
PhaseIterGVN *igvn = phase->is_IterGVN();
// Note: in_decodeN is used only to define the type of new phi here.
PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
uint orig_cnt = req();
for (uint i=1; i<req(); ++i) {// For all paths in
Node *ii = in(i);
Node* new_ii = NULL;
if (ii->is_DecodeN()) {
assert(ii->bottom_type() == bottom_type(), "sanity");
new_ii = ii->in(1);
} else {
assert(ii->is_Phi(), "sanity");
if (ii->as_Phi() == this) {
new_ii = new_phi;
} else {
new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
igvn->register_new_node_with_optimizer(new_ii);
}
}
new_phi->set_req(i, new_ii);
}
igvn->register_new_node_with_optimizer(new_phi, this);
progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type());
}
}
#endif
return progress; // Return any progress
}
......
......@@ -467,6 +467,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
}
}
set_print_assembly(print_opto_assembly);
set_parsed_irreducible_loop(false);
#endif
if (ProfileTraps) {
......@@ -550,6 +551,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
rethrow_exceptions(kit.transfer_exceptions_into_jvms());
}
print_method("Before RemoveUseless");
// Remove clutter produced by parsing.
if (!failing()) {
ResourceMark rm;
......@@ -615,8 +618,6 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
if (failing()) return;
NOT_PRODUCT( verify_graph_edges(); )
print_method("Before Matching");
#ifndef PRODUCT
if (PrintIdeal) {
ttyLocker ttyl; // keep the following output all in one block
......@@ -720,6 +721,7 @@ Compile::Compile( ciEnv* ci_env,
TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
set_print_assembly(PrintFrameConverterAssembly);
set_parsed_irreducible_loop(false);
#endif
CompileWrapper cw(this);
Init(/*AliasLevel=*/ 0);
......@@ -2073,6 +2075,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
}
#ifdef _LP64
case Op_CastPP:
if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
Compile* C = Compile::current();
Node* in1 = n->in(1);
const Type* t = n->bottom_type();
Node* new_in1 = in1->clone();
new_in1->as_DecodeN()->set_type(t);
if (!Matcher::clone_shift_expressions) {
//
// x86, ARM and friends can handle 2 adds in addressing mode
// and Matcher can fold a DecodeN node into address by using
// a narrow oop directly and do implicit NULL check in address:
//
// [R12 + narrow_oop_reg<<3 + offset]
// NullCheck narrow_oop_reg
//
// On other platforms (Sparc) we have to keep new DecodeN node and
// use it to do implicit NULL check in address:
//
// decode_not_null narrow_oop_reg, base_reg
// [base_reg + offset]
// NullCheck base_reg
//
// Pin the new DecodeN node to non-null path on these patforms (Sparc)
// to keep the information to which NULL check the new DecodeN node
// corresponds to use it as value in implicit_null_check().
//
new_in1->set_req(0, n->in(0));
}
n->subsume_by(new_in1);
if (in1->outcnt() == 0) {
in1->disconnect_inputs(NULL);
}
}
break;
case Op_CmpP:
// Do this transformation here to preserve CmpPNode::sub() and
// other TypePtr related Ideal optimizations (for example, ptr nullness).
......@@ -2092,24 +2132,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
} else if (in2->Opcode() == Op_ConP) {
const Type* t = in2->bottom_type();
if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
if (Matcher::clone_shift_expressions) {
// x86, ARM and friends can handle 2 adds in addressing mode.
// Decode a narrow oop and do implicit NULL check in address
// [R12 + narrow_oop_reg<<3 + offset]
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
} else {
// Don't replace CmpP(o ,null) if 'o' is used in AddP
// to generate implicit NULL check on Sparc where
// narrow oops can't be used in address.
uint i = 0;
for (; i < in1->outcnt(); i++) {
if (in1->raw_out(i)->is_AddP())
break;
}
if (i >= in1->outcnt()) {
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
}
}
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
//
// This transformation together with CastPP transformation above
// will generated code for implicit NULL checks for compressed oops.
//
// The original code after Optimize()
//
// LoadN memory, narrow_oop_reg
// decode narrow_oop_reg, base_reg
// CmpP base_reg, NULL
// CastPP base_reg // NotNull
// Load [base_reg + offset], val_reg
//
// after these transformations will be
//
// LoadN memory, narrow_oop_reg
// CmpN narrow_oop_reg, NULL
// decode_not_null narrow_oop_reg, base_reg
// Load [base_reg + offset], val_reg
//
// and the uncommon path (== NULL) will use narrow_oop_reg directly
// since narrow oops can be used in debug info now (see the code in
// final_graph_reshaping_walk()).
//
// At the end the code will be matched to
// on x86:
//
// Load_narrow_oop memory, narrow_oop_reg
// Load [R12 + narrow_oop_reg<<3 + offset], val_reg
// NullCheck narrow_oop_reg
//
// and on sparc:
//
// Load_narrow_oop memory, narrow_oop_reg
// decode_not_null narrow_oop_reg, base_reg
// Load [base_reg + offset], val_reg
// NullCheck base_reg
//
} else if (t->isa_oopptr()) {
new_in2 = ConNode::make(C, t->make_narrowoop());
}
......@@ -2126,6 +2186,49 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
}
}
break;
case Op_DecodeN:
assert(!n->in(1)->is_EncodeP(), "should be optimized out");
break;
case Op_EncodeP: {
Node* in1 = n->in(1);
if (in1->is_DecodeN()) {
n->subsume_by(in1->in(1));
} else if (in1->Opcode() == Op_ConP) {
Compile* C = Compile::current();
const Type* t = in1->bottom_type();
if (t == TypePtr::NULL_PTR) {
n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
} else if (t->isa_oopptr()) {
n->subsume_by(ConNode::make(C, t->make_narrowoop()));
}
}
if (in1->outcnt() == 0) {
in1->disconnect_inputs(NULL);
}
break;
}
case Op_Phi:
if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
// The EncodeP optimization may create Phi with the same edges
// for all paths. It is not handled well by Register Allocator.
Node* unique_in = n->in(1);
assert(unique_in != NULL, "");
uint cnt = n->req();
for (uint i = 2; i < cnt; i++) {
Node* m = n->in(i);
assert(m != NULL, "");
if (unique_in != m)
unique_in = NULL;
}
if (unique_in != NULL) {
n->subsume_by(unique_in);
}
}
break;
#endif
case Op_ModI:
......
......@@ -160,6 +160,7 @@ class Compile : public Phase {
bool _print_assembly; // True if we should dump assembly code for this compilation
#ifndef PRODUCT
bool _trace_opto_output;
bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
#endif
// Compilation environment.
......@@ -319,6 +320,8 @@ class Compile : public Phase {
}
#ifndef PRODUCT
bool trace_opto_output() const { return _trace_opto_output; }
bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
#endif
void begin_method() {
......
......@@ -433,8 +433,8 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
// If not converting int->oop, throw away cast after constant propagation
Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
const Type *t = ccp->type(in(1));
if (!t->isa_oop_ptr()) {
return NULL; // do not transform raw pointers
if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
return NULL; // do not transform raw pointers or narrow oops
}
return ConstraintCastNode::Ideal_DU_postCCP(ccp);
}
......
......@@ -795,7 +795,7 @@ ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* k
ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
(ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
(ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
// ikl is a same or better type than the original actual_receiver,
// e.g. static receiver from bytecodes.
actual_receiver = ikl;
......
......@@ -587,7 +587,7 @@ PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) {
#ifdef ASSERT
_bci = kit->bci();
Parse* parser = kit->is_Parse();
int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
_block = block;
#endif
}
......@@ -596,7 +596,7 @@ PreserveJVMState::~PreserveJVMState() {
#ifdef ASSERT
assert(kit->bci() == _bci, "bci must not shift");
Parse* parser = kit->is_Parse();
int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
assert(block == _block, "block must not shift");
#endif
kit->set_map(_map);
......@@ -1049,10 +1049,19 @@ Node* GraphKit::load_object_klass(Node* obj) {
//-------------------------load_array_length-----------------------------------
Node* GraphKit::load_array_length(Node* array) {
// Special-case a fresh allocation to avoid building nodes:
Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
if (alen != NULL) return alen;
Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn);
Node *alen;
if (alloc == NULL) {
Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
} else {
alen = alloc->Ideal_length();
Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn);
if (ccast != alen) {
alen = _gvn.transform(ccast);
}
}
return alen;
}
//------------------------------do_null_check----------------------------------
......@@ -2847,20 +2856,18 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
assert(just_allocated_object(control()) == javaoop, "just allocated");
#ifdef ASSERT
{ // Verify that the AllocateNode::Ideal_foo recognizers work:
Node* kn = alloc->in(AllocateNode::KlassNode);
Node* ln = alloc->in(AllocateNode::ALength);
assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
"Ideal_klass works");
assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
"Ideal_klass works");
{ // Verify that the AllocateNode::Ideal_allocation recognizers work:
assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc,
"Ideal_allocation works");
assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc,
"Ideal_allocation works");
if (alloc->is_AllocateArray()) {
assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
"Ideal_length works");
assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
"Ideal_length works");
assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(),
"Ideal_allocation works");
assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(),
"Ideal_allocation works");
} else {
assert(ln->is_top(), "no length, please");
assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please");
}
}
#endif //ASSERT
......@@ -3109,25 +3116,20 @@ Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable)
// (This happens via a non-constant argument to inline_native_newArray.)
// In any case, the value of klass_node provides the desired array type.
const TypeInt* length_type = _gvn.find_int_type(length);
const TypeInt* narrow_length_type = NULL;
const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
if (ary_type->isa_aryptr() && length_type != NULL) {
// Try to get a better type than POS for the size
ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
narrow_length_type = ary_type->is_aryptr()->size();
if (narrow_length_type == length_type)
narrow_length_type = NULL;
}
Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
// Cast length on remaining path to be positive:
if (narrow_length_type != NULL) {
Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
ccast->set_req(0, control());
_gvn.set_type_bottom(ccast);
record_for_igvn(ccast);
if (map()->find_edge(length) >= 0) {
// Cast length on remaining path to be as narrow as possible
if (map()->find_edge(length) >= 0) {
Node* ccast = alloc->make_ideal_length(ary_type, &_gvn);
if (ccast != length) {
_gvn.set_type_bottom(ccast);
record_for_igvn(ccast);
replace_in_map(length, ccast);
}
}
......
......@@ -485,8 +485,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
// Liveout things are presumed live for the whole block. We accumulate
// 'area' accordingly. If they get killed in the block, we'll subtract
// the unused part of the block from the area.
double cost = b->_freq * double(last_inst-last_phi);
assert( cost >= 0, "negative spill cost" );
int inst_count = last_inst - last_phi;
double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
assert(!(cost < 0.0), "negative spill cost" );
IndexSetIterator elements(&liveout);
uint lidx;
while ((lidx = elements.next()) != 0) {
......@@ -590,7 +591,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
} else { // Else it is live
// A DEF also ends 'area' partway through the block.
lrgs(r)._area -= cost;
assert( lrgs(r)._area >= 0, "negative spill area" );
assert(!(lrgs(r)._area < 0.0), "negative spill area" );
// Insure high score for immediate-use spill copies so they get a color
if( n->is_SpillCopy()
......@@ -703,8 +704,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
} // End of if normal register-allocated value
cost -= b->_freq; // Area remaining in the block
if( cost < 0.0 ) cost = 0.0; // Cost goes negative in the Phi area
// Area remaining in the block
inst_count--;
cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
// Make all inputs live
if( !n->is_Phi() ) { // Phi function uses come from prior block
......@@ -751,7 +753,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
assert( pressure[0] == count_int_pressure (&liveout), "" );
assert( pressure[1] == count_float_pressure(&liveout), "" );
}
assert( lrg._area >= 0, "negative spill area" );
assert(!(lrg._area < 0.0), "negative spill area" );
}
}
} // End of reverse pass over all instructions in block
......
......@@ -1012,6 +1012,8 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
if (!has_ctrl(old))
set_loop(nnn, loop);
}
loop->record_for_igvn();
}
//------------------------------do_maximally_unroll----------------------------
......
......@@ -1279,7 +1279,7 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
// Visit all children, looking for Phis
for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
Node *out = cl->out(i);
if (!out->is_Phi()) continue; // Looking for phis
if (!out->is_Phi() || out == phi) continue; // Looking for other phis
PhiNode* phi2 = out->as_Phi();
Node *incr2 = phi2->in( LoopNode::LoopBackControl );
// Look for induction variables of the form: X += constant
......@@ -1388,6 +1388,37 @@ void IdealLoopTree::dump( ) const {
#endif
static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
if (loop == root) {
if (loop->_child != NULL) {
log->begin_head("loop_tree");
log->end_head();
if( loop->_child ) log_loop_tree(root, loop->_child, log);
log->tail("loop_tree");
assert(loop->_next == NULL, "what?");
}
} else {
Node* head = loop->_head;
log->begin_head("loop");
log->print(" idx='%d' ", head->_idx);
if (loop->_irreducible) log->print("irreducible='1' ");
if (head->is_Loop()) {
if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
}
if (head->is_CountedLoop()) {
CountedLoopNode* cl = head->as_CountedLoop();
if (cl->is_pre_loop()) log->print("pre_loop='%d' ", cl->main_idx());
if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
if (cl->is_post_loop()) log->print("post_loop='%d' ", cl->main_idx());
}
log->end_head();
if( loop->_child ) log_loop_tree(root, loop->_child, log);
log->tail("loop");
if( loop->_next ) log_loop_tree(root, loop->_next, log);
}
}
//=============================================================================
//------------------------------PhaseIdealLoop---------------------------------
// Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to
......@@ -1624,10 +1655,13 @@ PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify
// Cleanup any modified bits
_igvn.optimize();
// Do not repeat loop optimizations if irreducible loops are present
// by claiming no-progress.
if( _has_irreducible_loops )
C->clear_major_progress();
// disable assert until issue with split_flow_path is resolved (6742111)
// assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
// "shouldn't introduce irreducible loops");
if (C->log() != NULL) {
log_loop_tree(_ltree_root, _ltree_root, C->log());
}
}
#ifndef PRODUCT
......@@ -2732,11 +2766,7 @@ void PhaseIdealLoop::dump( ) const {
}
void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
// Indent by loop nesting depth
for( uint x = 0; x < loop->_nest; x++ )
tty->print(" ");
tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
loop->dump_head();
// Now scan for CFG nodes in the same loop
for( uint j=idx; j > 0; j-- ) {
......
......@@ -192,6 +192,8 @@ public:
int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
int main_idx() const { return _main_idx; }
void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
......
......@@ -2667,6 +2667,10 @@ void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
// Fix this by adjusting to use the post-increment trip counter.
Node *phi = cl->phi();
if( !phi ) return; // Dead infinite loop
// Shape messed up, probably by iteration_split_impl
if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
bool progress = true;
while (progress) {
progress = false;
......
......@@ -273,7 +273,7 @@ void Matcher::match( ) {
find_shared( C->root() );
find_shared( C->top() );
C->print_method("Before Matching", 2);
C->print_method("Before Matching");
// Swap out to old-space; emptying new-space
Arena *old = C->node_arena()->move_contents(C->old_arena());
......@@ -840,7 +840,7 @@ Node *Matcher::xform( Node *n, int max_stack ) {
_new2old_map.map(m->_idx, n);
#endif
if (m->in(0) != NULL) // m might be top
collect_null_checks(m);
collect_null_checks(m, n);
} else { // Else just a regular 'ol guy
m = n->clone(); // So just clone into new-space
#ifdef ASSERT
......@@ -1478,12 +1478,19 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
m = _mem_node;
assert(m != NULL && m->is_Mem(), "expecting memory node");
}
if (m->adr_type() != mach->adr_type()) {
const Type* mach_at = mach->adr_type();
// DecodeN node consumed by an address may have different type
// then its input. Don't compare types for such case.
if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
mach_at = m->adr_type();
}
if (m->adr_type() != mach_at) {
m->dump();
tty->print_cr("mach:");
mach->dump(1);
}
assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type");
assert(m->adr_type() == mach_at, "matcher should not change adr type");
}
#endif
}
......@@ -1995,7 +2002,7 @@ void Matcher::dump_old2new_map() {
// it. Used by later implicit-null-check handling. Actually collects
// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
// value being tested.
void Matcher::collect_null_checks( Node *proj ) {
void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
Node *iff = proj->in(0);
if( iff->Opcode() == Op_If ) {
// During matching If's have Bool & Cmp side-by-side
......@@ -2008,20 +2015,47 @@ void Matcher::collect_null_checks( Node *proj ) {
if (ct == TypePtr::NULL_PTR ||
(opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
bool push_it = false;
if( proj->Opcode() == Op_IfTrue ) {
extern int all_null_checks_found;
all_null_checks_found++;
if( b->_test._test == BoolTest::ne ) {
_null_check_tests.push(proj);
_null_check_tests.push(cmp->in(1));
push_it = true;
}
} else {
assert( proj->Opcode() == Op_IfFalse, "" );
if( b->_test._test == BoolTest::eq ) {
_null_check_tests.push(proj);
_null_check_tests.push(cmp->in(1));
push_it = true;
}
}
if( push_it ) {
_null_check_tests.push(proj);
Node* val = cmp->in(1);
#ifdef _LP64
if (UseCompressedOops && !Matcher::clone_shift_expressions &&
val->bottom_type()->isa_narrowoop()) {
//
// Look for DecodeN node which should be pinned to orig_proj.
// On platforms (Sparc) which can not handle 2 adds
// in addressing mode we have to keep a DecodeN node and
// use it to do implicit NULL check in address.
//
// DecodeN node was pinned to non-null path (orig_proj) during
// CastPP transformation in final_graph_reshaping_impl().
//
uint cnt = orig_proj->outcnt();
for (uint i = 0; i < orig_proj->outcnt(); i++) {
Node* d = orig_proj->raw_out(i);
if (d->is_DecodeN() && d->in(1) == val) {
val = d;
val->set_req(0, NULL); // Unpin now.
break;
}
}
}
#endif
_null_check_tests.push(val);
}
}
}
}
......
......@@ -166,7 +166,7 @@ public:
// List of IfFalse or IfTrue Nodes that indicate a taken null test.
// List is valid in the post-matching space.
Node_List _null_check_tests;
void collect_null_checks( Node *proj );
void collect_null_checks( Node *proj, Node *orig_proj );
void validate_null_checks( );
Matcher( Node_List &proj_list );
......
......@@ -1887,6 +1887,38 @@ const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
return tap->size();
}
//-------------------------------Ideal---------------------------------------
// Feed through the length in AllocateArray(...length...)._length.
Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) {
Node* p = MemNode::Ideal_common(phase, can_reshape);
if (p) return (p == NodeSentinel) ? NULL : p;
// Take apart the address into an oop and and offset.
// Return 'this' if we cannot.
Node* adr = in(MemNode::Address);
intptr_t offset = 0;
Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
if (base == NULL) return NULL;
const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
if (tary == NULL) return NULL;
// We can fetch the length directly through an AllocateArrayNode.
// This works even if the length is not constant (clone or newArray).
if (offset == arrayOopDesc::length_offset_in_bytes()) {
AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
if (alloc != NULL) {
Node* allocated_length = alloc->Ideal_length();
Node* len = alloc->make_ideal_length(tary, phase);
if (allocated_length != len) {
// New CastII improves on this.
return len;
}
}
}
return NULL;
}
//------------------------------Identity---------------------------------------
// Feed through the length in AllocateArray(...length...)._length.
Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
......@@ -1905,15 +1937,22 @@ Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
// We can fetch the length directly through an AllocateArrayNode.
// This works even if the length is not constant (clone or newArray).
if (offset == arrayOopDesc::length_offset_in_bytes()) {
Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
if (allocated_length != NULL) {
return allocated_length;
AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
if (alloc != NULL) {
Node* allocated_length = alloc->Ideal_length();
// Do not allow make_ideal_length to allocate a CastII node.
Node* len = alloc->make_ideal_length(tary, phase, false);
if (allocated_length == len) {
// Return allocated_length only if it would not be improved by a CastII.
return allocated_length;
}
}
}
return this;
}
//=============================================================================
//---------------------------StoreNode::make-----------------------------------
// Polymorphic factory method:
......
......@@ -241,6 +241,7 @@ public:
virtual int Opcode() const;
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Identity( PhaseTransform *phase );
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------LoadLNode--------------------------------------
......
......@@ -167,9 +167,19 @@ class Parse : public GraphKit {
int start() const { return flow()->start(); }
int limit() const { return flow()->limit(); }
int pre_order() const { return flow()->pre_order(); }
int rpo() const { return flow()->rpo(); }
int start_sp() const { return flow()->stack_size(); }
bool is_loop_head() const { return flow()->is_loop_head(); }
bool is_SEL_head() const { return flow()->is_single_entry_loop_head(); }
bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
bool is_invariant_local(uint i) const {
const JVMState* jvms = start_map()->jvms();
if (!jvms->is_loc(i)) return false;
return flow()->is_invariant_local(i - jvms->locoff());
}
bool can_elide_SEL_phi(uint i) const { assert(is_SEL_head(),""); return is_invariant_local(i); }
const Type* peek(int off=0) const { return stack_type_at(start_sp() - (off+1)); }
const Type* stack_type_at(int i) const;
......@@ -305,7 +315,7 @@ class Parse : public GraphKit {
// entry_bci() -- see osr_bci, etc.
ciTypeFlow* flow() const { return _flow; }
// blocks() -- see pre_order_at, start_block, etc.
// blocks() -- see rpo_at, start_block, etc.
int block_count() const { return _block_count; }
GraphKit& exits() { return _exits; }
......@@ -330,12 +340,12 @@ class Parse : public GraphKit {
// Must this parse be aborted?
bool failing() { return C->failing(); }
Block* pre_order_at(int po) {
assert(0 <= po && po < _block_count, "oob");
return &_blocks[po];
Block* rpo_at(int rpo) {
assert(0 <= rpo && rpo < _block_count, "oob");
return &_blocks[rpo];
}
Block* start_block() {
return pre_order_at(flow()->start_block()->pre_order());
return rpo_at(flow()->start_block()->rpo());
}
// Can return NULL if the flow pass did not complete a block.
Block* successor_for_bci(int bci) {
......@@ -359,9 +369,6 @@ class Parse : public GraphKit {
// Parse all the basic blocks.
void do_all_blocks();
// Helper for do_all_blocks; makes one pass in pre-order.
void visit_blocks();
// Parse the current basic block
void do_one_block();
......
......@@ -29,17 +29,17 @@
// the most. Some of the non-static variables are needed in bytecodeInfo.cpp
// and eventually should be encapsulated in a proper class (gri 8/18/98).
int nodes_created = 0; int nodes_created_old = 0;
int methods_parsed = 0; int methods_parsed_old = 0;
int methods_seen = 0; int methods_seen_old = 0;
int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
int explicit_null_checks_elided = 0, explicit_null_checks_elided_old = 0;
int nodes_created = 0;
int methods_parsed = 0;
int methods_seen = 0;
int blocks_parsed = 0;
int blocks_seen = 0;
int explicit_null_checks_inserted = 0;
int explicit_null_checks_elided = 0;
int all_null_checks_found = 0, implicit_null_checks = 0;
int implicit_null_throws = 0;
int parse_idx = 0;
size_t parse_arena = 0;
int reclaim_idx = 0;
int reclaim_in = 0;
int reclaim_node = 0;
......@@ -61,6 +61,7 @@ void Parse::print_statistics() {
tty->cr();
if (methods_seen != methods_parsed)
tty->print_cr("Reasons for parse failures (NOT cumulative):");
tty->print_cr("Blocks parsed: %d Blocks seen: %d", blocks_parsed, blocks_seen);
if( explicit_null_checks_inserted )
tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
......@@ -373,6 +374,12 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
C->record_method_not_compilable_all_tiers(_flow->failure_reason());
}
#ifndef PRODUCT
if (_flow->has_irreducible_entry()) {
C->set_parsed_irreducible_loop(true);
}
#endif
if (_expected_uses <= 0) {
_prof_factor = 1;
} else {
......@@ -556,118 +563,93 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
set_map(entry_map);
do_exits();
// Collect a few more statistics.
parse_idx += C->unique();
parse_arena += C->node_arena()->used();
if (log) log->done("parse nodes='%d' memory='%d'",
C->unique(), C->node_arena()->used());
}
//---------------------------do_all_blocks-------------------------------------
void Parse::do_all_blocks() {
_blocks_merged = 0;
_blocks_parsed = 0;
bool has_irreducible = flow()->has_irreducible_entry();
int old_blocks_merged = -1;
int old_blocks_parsed = -1;
// Walk over all blocks in Reverse Post-Order.
while (true) {
bool progress = false;
for (int rpo = 0; rpo < block_count(); rpo++) {
Block* block = rpo_at(rpo);
for (int tries = 0; ; tries++) {
visit_blocks();
if (failing()) return; // Check for bailout
if (block->is_parsed()) continue;
// No need for a work list. The outer loop is hardly ever repeated.
// The following loop traverses the blocks in a reasonable pre-order,
// as produced by the ciTypeFlow pass.
if (!block->is_merged()) {
// Dead block, no state reaches this block
continue;
}
// This loop can be taken more than once if there are two entries to
// a loop (irreduceable CFG), and the edge which ciTypeFlow chose
// as the first predecessor to the loop goes dead in the parser,
// due to parse-time optimization. (Could happen with obfuscated code.)
// Prepare to parse this block.
load_state_from(block);
// Look for progress, or the lack of it:
if (_blocks_parsed == block_count()) {
// That's all, folks.
if (TraceOptoParse) {
tty->print_cr("All blocks parsed.");
if (stopped()) {
// Block is dead.
continue;
}
break;
}
// How much work was done this time around?
int new_blocks_merged = _blocks_merged - old_blocks_merged;
int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
if (new_blocks_merged == 0) {
if (TraceOptoParse) {
tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
blocks_parsed++;
progress = true;
if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
// Not all preds have been parsed. We must build phis everywhere.
// (Note that dead locals do not get phis built, ever.)
ensure_phis_everywhere();
// Leave behind an undisturbed copy of the map, for future merges.
set_map(clone_map());
}
// No new blocks have become parseable. Some blocks are just dead.
break;
if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
// In the absence of irreducible loops, the Region and Phis
// associated with a merge that doesn't involve a backedge can
// be simplfied now since the RPO parsing order guarantees
// that any path which was supposed to reach here has already
// been parsed or must be dead.
Node* c = control();
Node* result = _gvn.transform_no_reclaim(control());
if (c != result && TraceOptoParse) {
tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
}
if (result != top()) {
record_for_igvn(result);
}
}
// Parse the block.
do_one_block();
// Check for bailouts.
if (failing()) return;
}
assert(new_blocks_parsed > 0, "must make progress");
assert(tries < block_count(), "the pre-order cannot be this bad!");
old_blocks_merged = _blocks_merged;
old_blocks_parsed = _blocks_parsed;
// with irreducible loops multiple passes might be necessary to parse everything
if (!has_irreducible || !progress) {
break;
}
}
blocks_seen += block_count();
#ifndef PRODUCT
// Make sure there are no half-processed blocks remaining.
// Every remaining unprocessed block is dead and may be ignored now.
for (int po = 0; po < block_count(); po++) {
Block* block = pre_order_at(po);
for (int rpo = 0; rpo < block_count(); rpo++) {
Block* block = rpo_at(rpo);
if (!block->is_parsed()) {
if (TraceOptoParse) {
tty->print("Skipped dead block %d at bci:%d", po, block->start());
assert(!block->is_merged(), "no half-processed blocks");
tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
}
assert(!block->is_merged(), "no half-processed blocks");
}
}
#endif
}
//---------------------------visit_blocks--------------------------------------
void Parse::visit_blocks() {
// Walk over all blocks, parsing every one that has been reached (merged).
for (int po = 0; po < block_count(); po++) {
Block* block = pre_order_at(po);
if (block->is_parsed()) {
// Do not parse twice.
continue;
}
if (!block->is_merged()) {
// No state on this block. It had not yet been reached.
// Delay reaching it until later.
continue;
}
// Prepare to parse this block.
load_state_from(block);
if (stopped()) {
// Block is dead.
continue;
}
if (!block->is_ready() || block->is_handler()) {
// Not all preds have been parsed. We must build phis everywhere.
// (Note that dead locals do not get phis built, ever.)
ensure_phis_everywhere();
// Leave behind an undisturbed copy of the map, for future merges.
set_map(clone_map());
}
// Ready or not, parse the block.
do_one_block();
// Check for bailouts.
if (failing()) return;
}
}
//-------------------------------build_exits----------------------------------
// Build normal and exceptional exit merge points.
void Parse::build_exits() {
......@@ -1134,24 +1116,24 @@ void Parse::init_blocks() {
_blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
int po;
int rpo;
// Initialize the structs.
for (po = 0; po < block_count(); po++) {
Block* block = pre_order_at(po);
block->init_node(this, po);
for (rpo = 0; rpo < block_count(); rpo++) {
Block* block = rpo_at(rpo);
block->init_node(this, rpo);
}
// Collect predecessor and successor information.
for (po = 0; po < block_count(); po++) {
Block* block = pre_order_at(po);
for (rpo = 0; rpo < block_count(); rpo++) {
Block* block = rpo_at(rpo);
block->init_graph(this);
}
}
//-------------------------------init_node-------------------------------------
void Parse::Block::init_node(Parse* outer, int po) {
_flow = outer->flow()->pre_order_at(po);
void Parse::Block::init_node(Parse* outer, int rpo) {
_flow = outer->flow()->rpo_at(rpo);
_pred_count = 0;
_preds_parsed = 0;
_count = 0;
......@@ -1177,7 +1159,7 @@ void Parse::Block::init_graph(Parse* outer) {
int p = 0;
for (int i = 0; i < ns+ne; i++) {
ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
Block* block2 = outer->pre_order_at(tf2->pre_order());
Block* block2 = outer->rpo_at(tf2->rpo());
_successors[i] = block2;
// Accumulate pred info for the other block, too.
......@@ -1368,10 +1350,11 @@ void Parse::do_one_block() {
int nt = b->all_successors();
tty->print("Parsing block #%d at bci [%d,%d), successors: ",
block()->pre_order(), block()->start(), block()->limit());
block()->rpo(), block()->start(), block()->limit());
for (int i = 0; i < nt; i++) {
tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo());
}
if (b->is_loop_head()) tty->print(" lphd");
tty->print_cr("");
}
......@@ -1501,7 +1484,7 @@ void Parse::handle_missing_successor(int target_bci) {
#ifndef PRODUCT
Block* b = block();
int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci);
#endif
ShouldNotReachHere();
}
......@@ -1509,7 +1492,7 @@ void Parse::handle_missing_successor(int target_bci) {
//--------------------------merge_common---------------------------------------
void Parse::merge_common(Parse::Block* target, int pnum) {
if (TraceOptoParse) {
tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start());
}
// Zap extra stack slots to top
......@@ -1534,6 +1517,7 @@ void Parse::merge_common(Parse::Block* target, int pnum) {
// which must not be allowed into this block's map.)
if (pnum > PhiNode::Input // Known multiple inputs.
|| target->is_handler() // These have unpredictable inputs.
|| target->is_loop_head() // Known multiple inputs
|| control()->is_Region()) { // We must hide this guy.
// Add a Region to start the new basic block. Phis will be added
// later lazily.
......@@ -1575,15 +1559,21 @@ void Parse::merge_common(Parse::Block* target, int pnum) {
// Compute where to merge into
// Merge incoming control path
r->set_req(pnum, newin->control());
r->init_req(pnum, newin->control());
if (pnum == 1) { // Last merge for this Region?
_gvn.transform_no_reclaim(r);
if (!block()->flow()->is_irreducible_entry()) {
Node* result = _gvn.transform_no_reclaim(r);
if (r != result && TraceOptoParse) {
tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx);
}
}
record_for_igvn(r);
}
// Update all the non-control inputs to map:
assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
bool check_elide_phi = target->is_SEL_backedge(save_block);
for (uint j = 1; j < newin->req(); j++) {
Node* m = map()->in(j); // Current state of target.
Node* n = newin->in(j); // Incoming change to target state.
......@@ -1603,7 +1593,11 @@ void Parse::merge_common(Parse::Block* target, int pnum) {
merge_memory_edges(n->as_MergeMem(), pnum, nophi);
continue;
default: // All normal stuff
if (phi == NULL) phi = ensure_phi(j, nophi);
if (phi == NULL) {
if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
phi = ensure_phi(j, nophi);
}
}
break;
}
}
......@@ -1736,9 +1730,13 @@ void Parse::ensure_phis_everywhere() {
uint nof_monitors = map()->jvms()->nof_monitors();
assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
bool check_elide_phi = block()->is_SEL_head();
for (uint i = TypeFunc::Parms; i < monoff; i++) {
ensure_phi(i);
if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) {
ensure_phi(i);
}
}
// Even monitors need Phis, though they are well-structured.
// This is true for OSR methods, and also for the rare cases where
// a monitor object is the subject of a replace_in_map operation.
......
......@@ -100,16 +100,17 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {
// Do the range check
if (GenerateRangeChecks && need_range_check) {
// Range is constant in array-oop, so we can use the original state of mem
Node* len = load_array_length(ary);
Node* tst;
if (sizetype->_hi <= 0) {
// If the greatest array bound is negative, we can conclude that we're
// The greatest array bound is negative, so we can conclude that we're
// compiling unreachable code, but the unsigned compare trick used below
// only works with non-negative lengths. Instead, hack "tst" to be zero so
// the uncommon_trap path will always be taken.
tst = _gvn.intcon(0);
} else {
// Range is constant in array-oop, so we can use the original state of mem
Node* len = load_array_length(ary);
// Test length vs index (standard trick using unsigned compare)
Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
BoolTest::mask btest = BoolTest::lt;
......@@ -137,9 +138,12 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {
// Check for always knowing you are throwing a range-check exception
if (stopped()) return top();
Node* ptr = array_element_address( ary, idx, type, sizetype);
Node* ptr = array_element_address(ary, idx, type, sizetype);
if (result2 != NULL) *result2 = elemtype;
assert(ptr != top(), "top should go hand-in-hand with stopped");
return ptr;
}
......
......@@ -3157,17 +3157,18 @@ static jint max_array_length(BasicType etype) {
// Narrow the given size type to the index range for the given array base type.
// Return NULL if the resulting int type becomes empty.
const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const {
jint hi = size->_hi;
jint lo = size->_lo;
jint min_lo = 0;
jint max_hi = max_array_length(elem);
jint max_hi = max_array_length(elem()->basic_type());
//if (index_not_size) --max_hi; // type of a valid array index, FTR
bool chg = false;
if (lo < min_lo) { lo = min_lo; chg = true; }
if (hi > max_hi) { hi = max_hi; chg = true; }
// Negative length arrays will produce weird intermediate dead fath-path code
if (lo > hi)
return NULL;
return TypeInt::ZERO;
if (!chg)
return size;
return TypeInt::make(lo, hi, Type::WidenMin);
......@@ -3176,9 +3177,7 @@ const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem)
//-------------------------------cast_to_size----------------------------------
const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
assert(new_size != NULL, "");
new_size = narrow_size_type(new_size, elem()->basic_type());
if (new_size == NULL) // Negative length arrays will produce weird
new_size = TypeInt::ZERO; // intermediate dead fast-path goo
new_size = narrow_size_type(new_size);
if (new_size == size()) return this;
const TypeAry* new_ary = TypeAry::make(elem(), new_size);
return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id);
......
......@@ -840,6 +840,7 @@ public:
virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const;
virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
virtual const TypeInt* narrow_size_type(const TypeInt* size) const;
virtual bool empty(void) const; // TRUE if type is vacuous
virtual const TypePtr *add_offset( intptr_t offset ) const;
......@@ -865,7 +866,6 @@ public:
}
static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
// sharpen the type of an int which is used as an array size
static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
#ifndef PRODUCT
virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
#endif
......
......@@ -121,7 +121,7 @@ JvmtiEnvBase::JvmtiEnvBase() : _env_event_enable() {
JvmtiEventController::env_initialize((JvmtiEnv*)this);
#ifdef JVMTI_TRACE
_jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface;
_jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface;
#else
_jvmti_external.functions = &jvmti_Interface;
#endif
......
......@@ -73,7 +73,7 @@ void JvmtiTrace::initialize() {
const char *very_end;
const char *curr;
if (strlen(TraceJVMTI)) {
if (TraceJVMTI != NULL) {
curr = TraceJVMTI;
} else {
curr = ""; // hack in fixed tracing here
......
......@@ -365,8 +365,11 @@ bool CommandLineFlags::ccstrAtPut(char* name, size_t len, ccstr* value, FlagValu
if (result == NULL) return false;
if (!result->is_ccstr()) return false;
ccstr old_value = result->get_ccstr();
char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
strcpy(new_value, *value);
char* new_value = NULL;
if (*value != NULL) {
new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
strcpy(new_value, *value);
}
result->set_ccstr(new_value);
if (result->origin == DEFAULT && old_value != NULL) {
// Prior value is NOT heap allocated, but was a literal constant.
......
......@@ -707,7 +707,7 @@ class CommandLineFlags {
diagnostic(bool, PrintAssembly, false, \
"Print assembly code (using external disassembler.so)") \
\
diagnostic(ccstr, PrintAssemblyOptions, false, \
diagnostic(ccstr, PrintAssemblyOptions, NULL, \
"Options string passed to disassembler.so") \
\
diagnostic(bool, PrintNMethods, false, \
......@@ -848,7 +848,7 @@ class CommandLineFlags {
"Use LWP-based instead of libthread-based synchronization " \
"(SPARC only)") \
\
product(ccstr, SyncKnobs, "", \
product(ccstr, SyncKnobs, NULL, \
"(Unstable) Various monitor synchronization tunables") \
\
product(intx, EmitSync, 0, \
......@@ -1032,7 +1032,7 @@ class CommandLineFlags {
notproduct(bool, TraceJVMCalls, false, \
"Trace JVM calls") \
\
product(ccstr, TraceJVMTI, "", \
product(ccstr, TraceJVMTI, NULL, \
"Trace flags for JVMTI functions and events") \
\
/* This option can change an EMCP method into an obsolete method. */ \
......@@ -1157,10 +1157,6 @@ class CommandLineFlags {
"In the Parallel Old garbage collector use parallel dense" \
" prefix update") \
\
develop(bool, UseParallelOldGCChunkPointerCalc, true, \
"In the Parallel Old garbage collector use chucks to calculate" \
" new object locations") \
\
product(uintx, HeapMaximumCompactionInterval, 20, \
"How often should we maximally compact the heap (not allowing " \
"any dead space)") \
......@@ -1189,21 +1185,14 @@ class CommandLineFlags {
product(uintx, ParallelCMSThreads, 0, \
"Max number of threads CMS will use for concurrent work") \
\
develop(bool, VerifyParallelOldWithMarkSweep, false, \
"Use the MarkSweep code to verify phases of Parallel Old") \
\
develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1, \
"Interval at which the MarkSweep code is used to verify " \
"phases of Parallel Old") \
\
develop(bool, ParallelOldMTUnsafeMarkBitMap, false, \
"Use the Parallel Old MT unsafe in marking the bitmap") \
\
develop(bool, ParallelOldMTUnsafeUpdateLiveData, false, \
"Use the Parallel Old MT unsafe in update of live size") \
\
develop(bool, TraceChunkTasksQueuing, false, \
"Trace the queuing of the chunk tasks") \
develop(bool, TraceRegionTasksQueuing, false, \
"Trace the queuing of the region tasks") \
\
product(uintx, ParallelMarkingThreads, 0, \
"Number of marking threads concurrent gc will use") \
......
......@@ -109,72 +109,72 @@ void ParallelTaskTerminator::reset_for_reuse() {
}
}
bool ChunkTaskQueueWithOverflow::is_empty() {
return (_chunk_queue.size() == 0) &&
bool RegionTaskQueueWithOverflow::is_empty() {
return (_region_queue.size() == 0) &&
(_overflow_stack->length() == 0);
}
bool ChunkTaskQueueWithOverflow::stealable_is_empty() {
return _chunk_queue.size() == 0;
bool RegionTaskQueueWithOverflow::stealable_is_empty() {
return _region_queue.size() == 0;
}
bool ChunkTaskQueueWithOverflow::overflow_is_empty() {
bool RegionTaskQueueWithOverflow::overflow_is_empty() {
return _overflow_stack->length() == 0;
}
void ChunkTaskQueueWithOverflow::initialize() {
_chunk_queue.initialize();
void RegionTaskQueueWithOverflow::initialize() {
_region_queue.initialize();
assert(_overflow_stack == 0, "Creating memory leak");
_overflow_stack =
new (ResourceObj::C_HEAP) GrowableArray<ChunkTask>(10, true);
new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true);
}
void ChunkTaskQueueWithOverflow::save(ChunkTask t) {
if (TraceChunkTasksQueuing && Verbose) {
void RegionTaskQueueWithOverflow::save(RegionTask t) {
if (TraceRegionTasksQueuing && Verbose) {
gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t);
}
if(!_chunk_queue.push(t)) {
if(!_region_queue.push(t)) {
_overflow_stack->push(t);
}
}
// Note that using this method will retrieve all chunks
// Note that using this method will retrieve all regions
// that have been saved but that it will always check
// the overflow stack. It may be more efficient to
// check the stealable queue and the overflow stack
// separately.
bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) {
bool result = retrieve_from_overflow(chunk_task);
bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) {
bool result = retrieve_from_overflow(region_task);
if (!result) {
result = retrieve_from_stealable_queue(chunk_task);
result = retrieve_from_stealable_queue(region_task);
}
if (TraceChunkTasksQueuing && Verbose && result) {
if (TraceRegionTasksQueuing && Verbose && result) {
gclog_or_tty->print_cr(" CTQ: retrieve " PTR_FORMAT, result);
}
return result;
}
bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue(
ChunkTask& chunk_task) {
bool result = _chunk_queue.pop_local(chunk_task);
if (TraceChunkTasksQueuing && Verbose) {
gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue(
RegionTask& region_task) {
bool result = _region_queue.pop_local(region_task);
if (TraceRegionTasksQueuing && Verbose) {
gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
}
return result;
}
bool ChunkTaskQueueWithOverflow::retrieve_from_overflow(
ChunkTask& chunk_task) {
bool
RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) {
bool result;
if (!_overflow_stack->is_empty()) {
chunk_task = _overflow_stack->pop();
region_task = _overflow_stack->pop();
result = true;
} else {
chunk_task = (ChunkTask) NULL;
region_task = (RegionTask) NULL;
result = false;
}
if (TraceChunkTasksQueuing && Verbose) {
gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
if (TraceRegionTasksQueuing && Verbose) {
gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
}
return result;
}
......@@ -557,32 +557,32 @@ class StarTask {
typedef GenericTaskQueue<StarTask> OopStarTaskQueue;
typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet;
typedef size_t ChunkTask; // index for chunk
typedef GenericTaskQueue<ChunkTask> ChunkTaskQueue;
typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet;
typedef size_t RegionTask; // index for region
typedef GenericTaskQueue<RegionTask> RegionTaskQueue;
typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet;
class ChunkTaskQueueWithOverflow: public CHeapObj {
class RegionTaskQueueWithOverflow: public CHeapObj {
protected:
ChunkTaskQueue _chunk_queue;
GrowableArray<ChunkTask>* _overflow_stack;
RegionTaskQueue _region_queue;
GrowableArray<RegionTask>* _overflow_stack;
public:
ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {}
RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {}
// Initialize both stealable queue and overflow
void initialize();
// Save first to stealable queue and then to overflow
void save(ChunkTask t);
void save(RegionTask t);
// Retrieve first from overflow and then from stealable queue
bool retrieve(ChunkTask& chunk_index);
bool retrieve(RegionTask& region_index);
// Retrieve from stealable queue
bool retrieve_from_stealable_queue(ChunkTask& chunk_index);
bool retrieve_from_stealable_queue(RegionTask& region_index);
// Retrieve from overflow
bool retrieve_from_overflow(ChunkTask& chunk_index);
bool retrieve_from_overflow(RegionTask& region_index);
bool is_empty();
bool stealable_is_empty();
bool overflow_is_empty();
juint stealable_size() { return _chunk_queue.size(); }
ChunkTaskQueue* task_queue() { return &_chunk_queue; }
juint stealable_size() { return _region_queue.size(); }
RegionTaskQueue* task_queue() { return &_region_queue; }
};
#define USE_ChunkTaskQueueWithOverflow
#define USE_RegionTaskQueueWithOverflow
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* @test
* @bug 6711100
* @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int")
* @run main/othervm -Xcomp -XX:CompileOnly=Test.<init> Test
*/
public class Test {
static byte b;
// The server compiler chokes on compiling
// this method when f() is not inlined
public Test() {
b = (new byte[1])[(new byte[f()])[-1]];
}
protected static int f() {
return 1;
}
public static void main(String[] args) {
try {
Test t = new Test();
} catch (ArrayIndexOutOfBoundsException e) {
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册