diff --git a/.hgtags b/.hgtags index 9f74bce973e9c8a0989495e71ec9ff98d1c2fca9..0c59e415f6f9eb618ccf770cad155a33c32e196e 100644 --- a/.hgtags +++ b/.hgtags @@ -153,3 +153,4 @@ e9aa2ca89ad6c53420623d579765f9706ec523d7 jdk7-b130 e9aa2ca89ad6c53420623d579765f9706ec523d7 hs21-b02 0e531ab5ba04967a0e9aa6aef65e6eb3a0dcf632 jdk7-b132 a8d643a4db47c7b58e0bcb49c77b5c3610de86a8 hs21-b03 +1b3a350709e4325d759bb453ff3fb6a463270488 jdk7-b133 diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp index b75569b54a677436bc501aad5ecaa45033d56ccc..4c1f6d4feb29b4d2cb0cd04eb5d2561662149831 100644 --- a/src/share/vm/code/nmethod.cpp +++ b/src/share/vm/code/nmethod.cpp @@ -170,7 +170,7 @@ struct nmethod_stats_struct { int pc_desc_resets; // number of resets (= number of caches) int pc_desc_queries; // queries to nmethod::find_pc_desc int pc_desc_approx; // number of those which have approximate true - int pc_desc_repeats; // number of _last_pc_desc hits + int pc_desc_repeats; // number of _pc_descs[0] hits int pc_desc_hits; // number of LRU cache hits int pc_desc_tests; // total number of PcDesc examinations int pc_desc_searches; // total number of quasi-binary search steps @@ -278,40 +278,44 @@ static inline bool match_desc(PcDesc* pc, int pc_offset, bool approximate) { void PcDescCache::reset_to(PcDesc* initial_pc_desc) { if (initial_pc_desc == NULL) { - _last_pc_desc = NULL; // native method + _pc_descs[0] = NULL; // native method; no PcDescs at all return; } NOT_PRODUCT(++nmethod_stats.pc_desc_resets); // reset the cache by filling it with benign (non-null) values assert(initial_pc_desc->pc_offset() < 0, "must be sentinel"); - _last_pc_desc = initial_pc_desc + 1; // first valid one is after sentinel for (int i = 0; i < cache_size; i++) _pc_descs[i] = initial_pc_desc; } PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) { NOT_PRODUCT(++nmethod_stats.pc_desc_queries); - NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx); + NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx); + + // Note: one might think that caching the most recently + // read value separately would be a win, but one would be + // wrong. When many threads are updating it, the cache + // line it's in would bounce between caches, negating + // any benefit. // In order to prevent race conditions do not load cache elements // repeatedly, but use a local copy: PcDesc* res; - // Step one: Check the most recently returned value. - res = _last_pc_desc; - if (res == NULL) return NULL; // native method; no PcDescs at all + // Step one: Check the most recently added value. + res = _pc_descs[0]; + if (res == NULL) return NULL; // native method; no PcDescs at all if (match_desc(res, pc_offset, approximate)) { NOT_PRODUCT(++nmethod_stats.pc_desc_repeats); return res; } - // Step two: Check the LRU cache. - for (int i = 0; i < cache_size; i++) { + // Step two: Check the rest of the LRU cache. + for (int i = 1; i < cache_size; ++i) { res = _pc_descs[i]; - if (res->pc_offset() < 0) break; // optimization: skip empty cache + if (res->pc_offset() < 0) break; // optimization: skip empty cache if (match_desc(res, pc_offset, approximate)) { NOT_PRODUCT(++nmethod_stats.pc_desc_hits); - _last_pc_desc = res; // record this cache hit in case of repeat return res; } } @@ -322,24 +326,23 @@ PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) { void PcDescCache::add_pc_desc(PcDesc* pc_desc) { NOT_PRODUCT(++nmethod_stats.pc_desc_adds); - // Update the LRU cache by shifting pc_desc forward: + // Update the LRU cache by shifting pc_desc forward. for (int i = 0; i < cache_size; i++) { PcDesc* next = _pc_descs[i]; _pc_descs[i] = pc_desc; pc_desc = next; } - // Note: Do not update _last_pc_desc. It fronts for the LRU cache. } // adjust pcs_size so that it is a multiple of both oopSize and // sizeof(PcDesc) (assumes that if sizeof(PcDesc) is not a multiple // of oopSize, then 2*sizeof(PcDesc) is) -static int adjust_pcs_size(int pcs_size) { +static int adjust_pcs_size(int pcs_size) { int nsize = round_to(pcs_size, oopSize); if ((nsize % sizeof(PcDesc)) != 0) { nsize = pcs_size + sizeof(PcDesc); } - assert((nsize % oopSize) == 0, "correct alignment"); + assert((nsize % oopSize) == 0, "correct alignment"); return nsize; } @@ -1180,14 +1183,17 @@ void nmethod::mark_as_seen_on_stack() { set_stack_traversal_mark(NMethodSweeper::traversal_count()); } -// Tell if a non-entrant method can be converted to a zombie (i.e., there is no activations on the stack) +// Tell if a non-entrant method can be converted to a zombie (i.e., +// there are no activations on the stack, not in use by the VM, +// and not in use by the ServiceThread) bool nmethod::can_not_entrant_be_converted() { assert(is_not_entrant(), "must be a non-entrant method"); // Since the nmethod sweeper only does partial sweep the sweeper's traversal // count can be greater than the stack traversal count before it hits the // nmethod for the second time. - return stack_traversal_mark()+1 < NMethodSweeper::traversal_count(); + return stack_traversal_mark()+1 < NMethodSweeper::traversal_count() && + !is_locked_by_vm(); } void nmethod::inc_decompile_count() { @@ -1294,6 +1300,7 @@ void nmethod::log_state_change() const { // Common functionality for both make_not_entrant and make_zombie bool nmethod::make_not_entrant_or_zombie(unsigned int state) { assert(state == zombie || state == not_entrant, "must be zombie or not_entrant"); + assert(!is_zombie(), "should not already be a zombie"); // Make sure neither the nmethod nor the method is flushed in case of a safepoint in code below. nmethodLocker nml(this); @@ -1301,11 +1308,6 @@ bool nmethod::make_not_entrant_or_zombie(unsigned int state) { No_Safepoint_Verifier nsv; { - // If the method is already zombie there is nothing to do - if (is_zombie()) { - return false; - } - // invalidate osr nmethod before acquiring the patching lock since // they both acquire leaf locks and we don't want a deadlock. // This logic is equivalent to the logic below for patching the @@ -1375,13 +1377,12 @@ bool nmethod::make_not_entrant_or_zombie(unsigned int state) { flush_dependencies(NULL); } - { - // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event - // and it hasn't already been reported for this nmethod then report it now. - // (the event may have been reported earilier if the GC marked it for unloading). - Pause_No_Safepoint_Verifier pnsv(&nsv); - post_compiled_method_unload(); - } + // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload + // event and it hasn't already been reported for this nmethod then + // report it now. The event may have been reported earilier if the GC + // marked it for unloading). JvmtiDeferredEventQueue support means + // we no longer go to a safepoint here. + post_compiled_method_unload(); #ifdef ASSERT // It's no longer safe to access the oops section since zombie @@ -1566,7 +1567,7 @@ void nmethod::post_compiled_method_unload() { if (_jmethod_id != NULL && JvmtiExport::should_post_compiled_method_unload()) { assert(!unload_reported(), "already unloaded"); JvmtiDeferredEvent event = - JvmtiDeferredEvent::compiled_method_unload_event( + JvmtiDeferredEvent::compiled_method_unload_event(this, _jmethod_id, insts_begin()); if (SafepointSynchronize::is_at_safepoint()) { // Don't want to take the queueing lock. Add it as pending and @@ -2171,10 +2172,12 @@ nmethodLocker::nmethodLocker(address pc) { lock_nmethod(_nm); } -void nmethodLocker::lock_nmethod(nmethod* nm) { +// Only JvmtiDeferredEvent::compiled_method_unload_event() +// should pass zombie_ok == true. +void nmethodLocker::lock_nmethod(nmethod* nm, bool zombie_ok) { if (nm == NULL) return; Atomic::inc(&nm->_lock_count); - guarantee(!nm->is_zombie(), "cannot lock a zombie method"); + guarantee(zombie_ok || !nm->is_zombie(), "cannot lock a zombie method"); } void nmethodLocker::unlock_nmethod(nmethod* nm) { diff --git a/src/share/vm/code/nmethod.hpp b/src/share/vm/code/nmethod.hpp index 7e7025611f6fe43aaec414173f3e598f47b0ca14..60af7fb6f1eb74a9cd96d23961cc6f44011fe6e0 100644 --- a/src/share/vm/code/nmethod.hpp +++ b/src/share/vm/code/nmethod.hpp @@ -69,14 +69,13 @@ class PcDescCache VALUE_OBJ_CLASS_SPEC { friend class VMStructs; private: enum { cache_size = 4 }; - PcDesc* _last_pc_desc; // most recent pc_desc found PcDesc* _pc_descs[cache_size]; // last cache_size pc_descs found public: - PcDescCache() { debug_only(_last_pc_desc = NULL); } + PcDescCache() { debug_only(_pc_descs[0] = NULL); } void reset_to(PcDesc* initial_pc_desc); PcDesc* find_pc_desc(int pc_offset, bool approximate); void add_pc_desc(PcDesc* pc_desc); - PcDesc* last_pc_desc() { return _last_pc_desc; } + PcDesc* last_pc_desc() { return _pc_descs[0]; } }; @@ -178,7 +177,7 @@ class nmethod : public CodeBlob { unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes? // Protected by Patching_lock - unsigned char _state; // {alive, not_entrant, zombie, unloaded) + unsigned char _state; // {alive, not_entrant, zombie, unloaded} #ifdef ASSERT bool _oops_are_stale; // indicates that it's no longer safe to access oops section @@ -194,7 +193,10 @@ class nmethod : public CodeBlob { NOT_PRODUCT(bool _has_debug_info; ) - // Nmethod Flushing lock (if non-zero, then the nmethod is not removed) + // Nmethod Flushing lock. If non-zero, then the nmethod is not removed + // and is not made into a zombie. However, once the nmethod is made into + // a zombie, it will be locked one final time if CompiledMethodUnload + // event processing needs to be done. jint _lock_count; // not_entrant method removal. Each mark_sweep pass will update @@ -522,8 +524,9 @@ public: void flush(); public: - // If returning true, it is unsafe to remove this nmethod even though it is a zombie - // nmethod, since the VM might have a reference to it. Should only be called from a safepoint. + // When true is returned, it is unsafe to remove this nmethod even if + // it is a zombie, since the VM or the ServiceThread might still be + // using it. bool is_locked_by_vm() const { return _lock_count >0; } // See comment at definition of _last_seen_on_stack @@ -689,13 +692,20 @@ public: }; -// Locks an nmethod so its code will not get removed, even if it is a zombie/not_entrant method +// Locks an nmethod so its code will not get removed and it will not +// be made into a zombie, even if it is a not_entrant method. After the +// nmethod becomes a zombie, if CompiledMethodUnload event processing +// needs to be done, then lock_nmethod() is used directly to keep the +// generated code from being reused too early. class nmethodLocker : public StackObj { nmethod* _nm; public: - static void lock_nmethod(nmethod* nm); // note: nm can be NULL + // note: nm can be NULL + // Only JvmtiDeferredEvent::compiled_method_unload_event() + // should pass zombie_ok == true. + static void lock_nmethod(nmethod* nm, bool zombie_ok = false); static void unlock_nmethod(nmethod* nm); // (ditto) nmethodLocker(address pc); // derive nm from pc diff --git a/src/share/vm/prims/jvmtiImpl.cpp b/src/share/vm/prims/jvmtiImpl.cpp index c821c8ed37620164c28ca6d6cc0b69de345c5a80..5bb68f7ab887fcbdc52bf14fc8d7ad6b3f948973 100644 --- a/src/share/vm/prims/jvmtiImpl.cpp +++ b/src/share/vm/prims/jvmtiImpl.cpp @@ -919,15 +919,24 @@ JvmtiDeferredEvent JvmtiDeferredEvent::compiled_method_load_event( nmethod* nm) { JvmtiDeferredEvent event = JvmtiDeferredEvent(TYPE_COMPILED_METHOD_LOAD); event._event_data.compiled_method_load = nm; - nmethodLocker::lock_nmethod(nm); // will be unlocked when posted + // Keep the nmethod alive until the ServiceThread can process + // this deferred event. + nmethodLocker::lock_nmethod(nm); return event; } JvmtiDeferredEvent JvmtiDeferredEvent::compiled_method_unload_event( - jmethodID id, const void* code) { + nmethod* nm, jmethodID id, const void* code) { JvmtiDeferredEvent event = JvmtiDeferredEvent(TYPE_COMPILED_METHOD_UNLOAD); + event._event_data.compiled_method_unload.nm = nm; event._event_data.compiled_method_unload.method_id = id; event._event_data.compiled_method_unload.code_begin = code; + // Keep the nmethod alive until the ServiceThread can process + // this deferred event. This will keep the memory for the + // generated code from being reused too early. We pass + // zombie_ok == true here so that our nmethod that was just + // made into a zombie can be locked. + nmethodLocker::lock_nmethod(nm, true /* zombie_ok */); return event; } JvmtiDeferredEvent JvmtiDeferredEvent::dynamic_code_generated_event( @@ -946,14 +955,19 @@ void JvmtiDeferredEvent::post() { case TYPE_COMPILED_METHOD_LOAD: { nmethod* nm = _event_data.compiled_method_load; JvmtiExport::post_compiled_method_load(nm); + // done with the deferred event so unlock the nmethod nmethodLocker::unlock_nmethod(nm); break; } - case TYPE_COMPILED_METHOD_UNLOAD: + case TYPE_COMPILED_METHOD_UNLOAD: { + nmethod* nm = _event_data.compiled_method_unload.nm; JvmtiExport::post_compiled_method_unload( _event_data.compiled_method_unload.method_id, _event_data.compiled_method_unload.code_begin); + // done with the deferred event so unlock the nmethod + nmethodLocker::unlock_nmethod(nm); break; + } case TYPE_DYNAMIC_CODE_GENERATED: JvmtiExport::post_dynamic_code_generated_internal( _event_data.dynamic_code_generated.name, diff --git a/src/share/vm/prims/jvmtiImpl.hpp b/src/share/vm/prims/jvmtiImpl.hpp index b7bc0fd68a4a0b4099645755aa942ad6deca600b..704d287a8f30593d7c111094e402df25aec98874 100644 --- a/src/share/vm/prims/jvmtiImpl.hpp +++ b/src/share/vm/prims/jvmtiImpl.hpp @@ -458,6 +458,7 @@ class JvmtiDeferredEvent VALUE_OBJ_CLASS_SPEC { union { nmethod* compiled_method_load; struct { + nmethod* nm; jmethodID method_id; const void* code_begin; } compiled_method_unload; @@ -477,7 +478,7 @@ class JvmtiDeferredEvent VALUE_OBJ_CLASS_SPEC { // Factory methods static JvmtiDeferredEvent compiled_method_load_event(nmethod* nm) KERNEL_RETURN_(JvmtiDeferredEvent()); - static JvmtiDeferredEvent compiled_method_unload_event( + static JvmtiDeferredEvent compiled_method_unload_event(nmethod* nm, jmethodID id, const void* code) KERNEL_RETURN_(JvmtiDeferredEvent()); static JvmtiDeferredEvent dynamic_code_generated_event( const char* name, const void* begin, const void* end) diff --git a/src/share/vm/runtime/deoptimization.cpp b/src/share/vm/runtime/deoptimization.cpp index 36fd0be4bd37d852e2bab1a2d537354dc12895c7..3e03486d3ea50b1ca7045a8259e4dd130f1713bd 100644 --- a/src/share/vm/runtime/deoptimization.cpp +++ b/src/share/vm/runtime/deoptimization.cpp @@ -101,9 +101,9 @@ Deoptimization::UnrollBlock::UnrollBlock(int size_of_deoptimized_frame, _frame_pcs = frame_pcs; _register_block = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2); _return_type = return_type; + _initial_fp = 0; // PD (x86 only) _counter_temp = 0; - _initial_fp = 0; _unpack_kind = 0; _sender_sp_temp = 0; @@ -459,18 +459,9 @@ Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread frame_sizes, frame_pcs, return_type); -#if defined(IA32) || defined(AMD64) - // We need a way to pass fp to the unpacking code so the skeletal frames - // come out correct. This is only needed for x86 because of c2 using ebp - // as an allocatable register. So this update is useless (and harmless) - // on the other platforms. It would be nice to do this in a different - // way but even the old style deoptimization had a problem with deriving - // this value. NEEDS_CLEANUP - // Note: now that c1 is using c2's deopt blob we must do this on all - // x86 based platforms - intptr_t** fp_addr = (intptr_t**) (((address)info) + info->initial_fp_offset_in_bytes()); - *fp_addr = array->sender().fp(); // was adapter_caller -#endif /* IA32 || AMD64 */ + // On some platforms, we need a way to pass fp to the unpacking code + // so the skeletal frames come out correct. + info->set_initial_fp((intptr_t) array->sender().fp()); if (array->frames() > 1) { if (VerifyStack && TraceDeoptimization) { diff --git a/src/share/vm/runtime/deoptimization.hpp b/src/share/vm/runtime/deoptimization.hpp index e560fde5d81dd4ea9324f1fd77c8e96c7ac9d754..c62616fcb74666deecd03ac6ee625afa33f36592 100644 --- a/src/share/vm/runtime/deoptimization.hpp +++ b/src/share/vm/runtime/deoptimization.hpp @@ -136,12 +136,12 @@ class Deoptimization : AllStatic { address* _frame_pcs; // Array of frame pc's, in bytes, for unrolling the stack intptr_t* _register_block; // Block for storing callee-saved registers. BasicType _return_type; // Tells if we have to restore double or long return value + intptr_t _initial_fp; // FP of the sender frame // The following fields are used as temps during the unpacking phase // (which is tight on registers, especially on x86). They really ought // to be PD variables but that involves moving this class into its own // file to use the pd include mechanism. Maybe in a later cleanup ... intptr_t _counter_temp; // SHOULD BE PD VARIABLE (x86 frame count temp) - intptr_t _initial_fp; // SHOULD BE PD VARIABLE (x86/c2 initial ebp) intptr_t _unpack_kind; // SHOULD BE PD VARIABLE (x86 unpack kind) intptr_t _sender_sp_temp; // SHOULD BE PD VARIABLE (x86 sender_sp) public: @@ -165,6 +165,8 @@ class Deoptimization : AllStatic { // Returns the total size of frames int size_of_frames() const; + void set_initial_fp(intptr_t fp) { _initial_fp = fp; } + // Accessors used by the code generator for the unpack stub. static int size_of_deoptimized_frame_offset_in_bytes() { return offset_of(UnrollBlock, _size_of_deoptimized_frame); } static int caller_adjustment_offset_in_bytes() { return offset_of(UnrollBlock, _caller_adjustment); } diff --git a/src/share/vm/runtime/serviceThread.cpp b/src/share/vm/runtime/serviceThread.cpp index 600398d1c12859ba7b3cf509c4273d4bdf8b3193..e5c70f7b70c98a7eaf8cc020f25f5cc3c103e9d9 100644 --- a/src/share/vm/runtime/serviceThread.cpp +++ b/src/share/vm/runtime/serviceThread.cpp @@ -70,11 +70,10 @@ void ServiceThread::initialize() { java_lang_Thread::set_priority(thread_oop(), NearMaxPriority); java_lang_Thread::set_daemon(thread_oop()); thread->set_threadObj(thread_oop()); + _instance = thread; Threads::add(thread); Thread::start(thread); - - _instance = thread; } }