提交 86701242 编写于 作者: K kvn

7081933: Use zeroing elimination optimization for large array

Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it.
Reviewed-by: twisti
上级 0b2cedfe
......@@ -2359,10 +2359,10 @@ class StubGenerator: public StubCodeGenerator {
for (int off = 0; off < 64; off += 16) {
if (use_prefetch && (off & 31) == 0) {
if (ArraycopySrcPrefetchDistance > 0) {
__ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
__ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
}
if (ArraycopyDstPrefetchDistance > 0) {
__ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
__ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads);
}
}
__ ldx(from, off+0, O4);
......
......@@ -322,6 +322,7 @@ class CollectedHeap : public CHeapObj {
// General obj/array allocation facilities.
inline static oop obj_allocate(KlassHandle klass, int size, TRAPS);
inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS);
inline static oop array_allocate_nozero(KlassHandle klass, int size, int length, TRAPS);
// Special obj/array allocation facilities.
// Some heaps may want to manage "permanent" data uniquely. These default
......
......@@ -274,6 +274,23 @@ oop CollectedHeap::array_allocate(KlassHandle klass,
return (oop)obj;
}
oop CollectedHeap::array_allocate_nozero(KlassHandle klass,
int size,
int length,
TRAPS) {
debug_only(check_for_valid_allocation_state());
assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
assert(size >= 0, "int won't convert to size_t");
HeapWord* obj = common_mem_allocate_noinit(size, CHECK_NULL);
((oop)obj)->set_klass_gap(0);
post_allocation_setup_array(klass, obj, size, length);
#ifndef PRODUCT
const size_t hs = oopDesc::header_size()+1;
Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs);
#endif
return (oop)obj;
}
oop CollectedHeap::permanent_obj_allocate(KlassHandle klass, int size, TRAPS) {
oop obj = permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
post_allocation_install_obj_klass(klass, obj, size);
......
......@@ -77,7 +77,14 @@ typeArrayOop oopFactory::new_permanent_intArray(int length, TRAPS) {
typeArrayOop oopFactory::new_typeArray(BasicType type, int length, TRAPS) {
klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
typeArrayOop result = type_asArrayKlass->allocate(length, THREAD);
typeArrayOop result = type_asArrayKlass->allocate_common(length, true, THREAD);
return result;
}
typeArrayOop oopFactory::new_typeArray_nozero(BasicType type, int length, TRAPS) {
klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
typeArrayOop result = type_asArrayKlass->allocate_common(length, false, THREAD);
return result;
}
......
......@@ -63,6 +63,7 @@ class oopFactory: AllStatic {
static typeArrayOop new_permanent_intArray (int length, TRAPS); // used for class file structures
static typeArrayOop new_typeArray(BasicType type, int length, TRAPS);
static typeArrayOop new_typeArray_nozero(BasicType type, int length, TRAPS);
// Constant pools
static constantPoolOop new_constantPool (int length,
......
......@@ -76,7 +76,7 @@ klassOop typeArrayKlass::create_klass(BasicType type, int scale,
return k();
}
typeArrayOop typeArrayKlass::allocate(int length, TRAPS) {
typeArrayOop typeArrayKlass::allocate_common(int length, bool do_zero, TRAPS) {
assert(log2_element_size() >= 0, "bad scale");
if (length >= 0) {
if (length <= max_length()) {
......@@ -84,7 +84,11 @@ typeArrayOop typeArrayKlass::allocate(int length, TRAPS) {
KlassHandle h_k(THREAD, as_klassOop());
typeArrayOop t;
CollectedHeap* ch = Universe::heap();
t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
if (do_zero) {
t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
} else {
t = (typeArrayOop)CollectedHeap::array_allocate_nozero(h_k, (int)size, length, CHECK_NULL);
}
assert(t->is_parsable(), "Don't publish unless parsable");
return t;
} else {
......
......@@ -56,7 +56,8 @@ class typeArrayKlass : public arrayKlass {
bool compute_is_subtype_of(klassOop k);
// Allocation
typeArrayOop allocate(int length, TRAPS);
typeArrayOop allocate_common(int length, bool do_zero, TRAPS);
typeArrayOop allocate(int length, TRAPS) { return allocate_common(length, true, THREAD); }
typeArrayOop allocate_permanent(int length, TRAPS); // used for class file structures
oop multi_allocate(int rank, jint* sizes, TRAPS);
......
......@@ -4658,6 +4658,7 @@ LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
// "You break it, you buy it."
InitializeNode* init = alloc->initialization();
assert(init->is_complete(), "we just did this");
init->set_complete_with_arraycopy();
assert(dest->is_CheckCastPP(), "sanity");
assert(dest->in(0)->in(0) == init, "dest pinned");
adr_type = TypeRawPtr::BOTTOM; // all initializations are into raw memory
......
......@@ -1685,9 +1685,21 @@ void PhaseMacroExpand::expand_allocate(AllocateNode *alloc) {
void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
Node* length = alloc->in(AllocateNode::ALength);
InitializeNode* init = alloc->initialization();
Node* klass_node = alloc->in(AllocateNode::KlassNode);
ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
address slow_call_address; // Address of slow call
if (init != NULL && init->is_complete_with_arraycopy() &&
k->is_type_array_klass()) {
// Don't zero type array during slow allocation in VM since
// it will be initialized later by arraycopy in compiled code.
slow_call_address = OptoRuntime::new_array_nozero_Java();
} else {
slow_call_address = OptoRuntime::new_array_Java();
}
expand_allocate_common(alloc, length,
OptoRuntime::new_array_Type(),
OptoRuntime::new_array_Java());
slow_call_address);
}
//-----------------------mark_eliminated_locking_nodes-----------------------
......
......@@ -2847,7 +2847,7 @@ Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
//---------------------------InitializeNode------------------------------------
InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
: _is_complete(false),
: _is_complete(Incomplete),
MemBarNode(C, adr_type, rawoop)
{
init_class_id(Class_Initialize);
......@@ -2885,7 +2885,7 @@ bool InitializeNode::is_non_zero() {
void InitializeNode::set_complete(PhaseGVN* phase) {
assert(!is_complete(), "caller responsibility");
_is_complete = true;
_is_complete = Complete;
// After this node is complete, it contains a bunch of
// raw-memory initializations. There is no need for
......
......@@ -942,7 +942,12 @@ public:
class InitializeNode: public MemBarNode {
friend class AllocateNode;
bool _is_complete;
enum {
Incomplete = 0,
Complete = 1,
WithArraycopy = 2
};
int _is_complete;
public:
enum {
......@@ -976,10 +981,12 @@ public:
// An InitializeNode must completed before macro expansion is done.
// Completion requires that the AllocateNode must be followed by
// initialization of the new memory to zero, then to any initializers.
bool is_complete() { return _is_complete; }
bool is_complete() { return _is_complete != Incomplete; }
bool is_complete_with_arraycopy() { return (_is_complete & WithArraycopy) != 0; }
// Mark complete. (Must not yet be complete.)
void set_complete(PhaseGVN* phase);
void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
#ifdef ASSERT
// ensure all non-degenerate stores are ordered and non-overlapping
......
......@@ -102,6 +102,7 @@
// Compiled code entry points
address OptoRuntime::_new_instance_Java = NULL;
address OptoRuntime::_new_array_Java = NULL;
address OptoRuntime::_new_array_nozero_Java = NULL;
address OptoRuntime::_multianewarray2_Java = NULL;
address OptoRuntime::_multianewarray3_Java = NULL;
address OptoRuntime::_multianewarray4_Java = NULL;
......@@ -151,6 +152,7 @@ void OptoRuntime::generate(ciEnv* env) {
// -------------------------------------------------------------------------------------------------------------------------------
gen(env, _new_instance_Java , new_instance_Type , new_instance_C , 0 , true , false, false);
gen(env, _new_array_Java , new_array_Type , new_array_C , 0 , true , false, false);
gen(env, _new_array_nozero_Java , new_array_Type , new_array_nozero_C , 0 , true , false, false);
gen(env, _multianewarray2_Java , multianewarray2_Type , multianewarray2_C , 0 , true , false, false);
gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false);
gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false);
......@@ -308,6 +310,36 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len
}
JRT_END
// array allocation without zeroing
JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(klassOopDesc* array_type, int len, JavaThread *thread))
JRT_BLOCK;
#ifndef PRODUCT
SharedRuntime::_new_array_ctr++; // new array requires GC
#endif
assert(check_compiled_frame(thread), "incorrect caller");
// Scavenge and allocate an instance.
oop result;
assert(Klass::cast(array_type)->oop_is_typeArray(), "should be called only for type array");
// The oopFactory likes to work with the element type.
BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);
// Pass oops back through thread local storage. Our apparent type to Java
// is that we return an oop, but we can block on exit from this routine and
// a GC can trash the oop in C's return register. The generated stub will
// fetch the oop from TLS after any possible GC.
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(result);
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
// inform GC that we won't do card marks for initializing writes.
new_store_pre_barrier(thread);
}
JRT_END
// Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
// multianewarray for 2 dimensions
......
......@@ -114,6 +114,7 @@ class OptoRuntime : public AllStatic {
// References to generated stubs
static address _new_instance_Java;
static address _new_array_Java;
static address _new_array_nozero_Java;
static address _multianewarray2_Java;
static address _multianewarray3_Java;
static address _multianewarray4_Java;
......@@ -143,6 +144,7 @@ class OptoRuntime : public AllStatic {
// Allocate storage for a objArray or typeArray
static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
static void new_array_nozero_C(klassOopDesc* array_klass, int len, JavaThread *thread);
// Post-slow-path-allocation, pre-initializing-stores step for
// implementing ReduceInitialCardMarks
......@@ -208,6 +210,7 @@ private:
// access to runtime stubs entry points for java code
static address new_instance_Java() { return _new_instance_Java; }
static address new_array_Java() { return _new_array_Java; }
static address new_array_nozero_Java() { return _new_array_nozero_Java; }
static address multianewarray2_Java() { return _multianewarray2_Java; }
static address multianewarray3_Java() { return _multianewarray3_Java; }
static address multianewarray4_Java() { return _multianewarray4_Java; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册