提交 c1bb7488 编写于 作者: A acorn

Merge

......@@ -170,7 +170,7 @@ struct nmethod_stats_struct {
int pc_desc_resets; // number of resets (= number of caches)
int pc_desc_queries; // queries to nmethod::find_pc_desc
int pc_desc_approx; // number of those which have approximate true
int pc_desc_repeats; // number of _last_pc_desc hits
int pc_desc_repeats; // number of _pc_descs[0] hits
int pc_desc_hits; // number of LRU cache hits
int pc_desc_tests; // total number of PcDesc examinations
int pc_desc_searches; // total number of quasi-binary search steps
......@@ -278,40 +278,44 @@ static inline bool match_desc(PcDesc* pc, int pc_offset, bool approximate) {
void PcDescCache::reset_to(PcDesc* initial_pc_desc) {
if (initial_pc_desc == NULL) {
_last_pc_desc = NULL; // native method
_pc_descs[0] = NULL; // native method; no PcDescs at all
return;
}
NOT_PRODUCT(++nmethod_stats.pc_desc_resets);
// reset the cache by filling it with benign (non-null) values
assert(initial_pc_desc->pc_offset() < 0, "must be sentinel");
_last_pc_desc = initial_pc_desc + 1; // first valid one is after sentinel
for (int i = 0; i < cache_size; i++)
_pc_descs[i] = initial_pc_desc;
}
PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) {
NOT_PRODUCT(++nmethod_stats.pc_desc_queries);
NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx);
NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx);
// Note: one might think that caching the most recently
// read value separately would be a win, but one would be
// wrong. When many threads are updating it, the cache
// line it's in would bounce between caches, negating
// any benefit.
// In order to prevent race conditions do not load cache elements
// repeatedly, but use a local copy:
PcDesc* res;
// Step one: Check the most recently returned value.
res = _last_pc_desc;
if (res == NULL) return NULL; // native method; no PcDescs at all
// Step one: Check the most recently added value.
res = _pc_descs[0];
if (res == NULL) return NULL; // native method; no PcDescs at all
if (match_desc(res, pc_offset, approximate)) {
NOT_PRODUCT(++nmethod_stats.pc_desc_repeats);
return res;
}
// Step two: Check the LRU cache.
for (int i = 0; i < cache_size; i++) {
// Step two: Check the rest of the LRU cache.
for (int i = 1; i < cache_size; ++i) {
res = _pc_descs[i];
if (res->pc_offset() < 0) break; // optimization: skip empty cache
if (res->pc_offset() < 0) break; // optimization: skip empty cache
if (match_desc(res, pc_offset, approximate)) {
NOT_PRODUCT(++nmethod_stats.pc_desc_hits);
_last_pc_desc = res; // record this cache hit in case of repeat
return res;
}
}
......@@ -322,24 +326,23 @@ PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) {
void PcDescCache::add_pc_desc(PcDesc* pc_desc) {
NOT_PRODUCT(++nmethod_stats.pc_desc_adds);
// Update the LRU cache by shifting pc_desc forward:
// Update the LRU cache by shifting pc_desc forward.
for (int i = 0; i < cache_size; i++) {
PcDesc* next = _pc_descs[i];
_pc_descs[i] = pc_desc;
pc_desc = next;
}
// Note: Do not update _last_pc_desc. It fronts for the LRU cache.
}
// adjust pcs_size so that it is a multiple of both oopSize and
// sizeof(PcDesc) (assumes that if sizeof(PcDesc) is not a multiple
// of oopSize, then 2*sizeof(PcDesc) is)
static int adjust_pcs_size(int pcs_size) {
static int adjust_pcs_size(int pcs_size) {
int nsize = round_to(pcs_size, oopSize);
if ((nsize % sizeof(PcDesc)) != 0) {
nsize = pcs_size + sizeof(PcDesc);
}
assert((nsize % oopSize) == 0, "correct alignment");
assert((nsize % oopSize) == 0, "correct alignment");
return nsize;
}
......
......@@ -69,14 +69,13 @@ class PcDescCache VALUE_OBJ_CLASS_SPEC {
friend class VMStructs;
private:
enum { cache_size = 4 };
PcDesc* _last_pc_desc; // most recent pc_desc found
PcDesc* _pc_descs[cache_size]; // last cache_size pc_descs found
public:
PcDescCache() { debug_only(_last_pc_desc = NULL); }
PcDescCache() { debug_only(_pc_descs[0] = NULL); }
void reset_to(PcDesc* initial_pc_desc);
PcDesc* find_pc_desc(int pc_offset, bool approximate);
void add_pc_desc(PcDesc* pc_desc);
PcDesc* last_pc_desc() { return _last_pc_desc; }
PcDesc* last_pc_desc() { return _pc_descs[0]; }
};
......@@ -178,7 +177,7 @@ class nmethod : public CodeBlob {
unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
// Protected by Patching_lock
unsigned char _state; // {alive, not_entrant, zombie, unloaded)
unsigned char _state; // {alive, not_entrant, zombie, unloaded}
#ifdef ASSERT
bool _oops_are_stale; // indicates that it's no longer safe to access oops section
......
......@@ -101,9 +101,9 @@ Deoptimization::UnrollBlock::UnrollBlock(int size_of_deoptimized_frame,
_frame_pcs = frame_pcs;
_register_block = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2);
_return_type = return_type;
_initial_fp = 0;
// PD (x86 only)
_counter_temp = 0;
_initial_fp = 0;
_unpack_kind = 0;
_sender_sp_temp = 0;
......@@ -459,18 +459,9 @@ Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread
frame_sizes,
frame_pcs,
return_type);
#if defined(IA32) || defined(AMD64)
// We need a way to pass fp to the unpacking code so the skeletal frames
// come out correct. This is only needed for x86 because of c2 using ebp
// as an allocatable register. So this update is useless (and harmless)
// on the other platforms. It would be nice to do this in a different
// way but even the old style deoptimization had a problem with deriving
// this value. NEEDS_CLEANUP
// Note: now that c1 is using c2's deopt blob we must do this on all
// x86 based platforms
intptr_t** fp_addr = (intptr_t**) (((address)info) + info->initial_fp_offset_in_bytes());
*fp_addr = array->sender().fp(); // was adapter_caller
#endif /* IA32 || AMD64 */
// On some platforms, we need a way to pass fp to the unpacking code
// so the skeletal frames come out correct.
info->set_initial_fp((intptr_t) array->sender().fp());
if (array->frames() > 1) {
if (VerifyStack && TraceDeoptimization) {
......
......@@ -136,12 +136,12 @@ class Deoptimization : AllStatic {
address* _frame_pcs; // Array of frame pc's, in bytes, for unrolling the stack
intptr_t* _register_block; // Block for storing callee-saved registers.
BasicType _return_type; // Tells if we have to restore double or long return value
intptr_t _initial_fp; // FP of the sender frame
// The following fields are used as temps during the unpacking phase
// (which is tight on registers, especially on x86). They really ought
// to be PD variables but that involves moving this class into its own
// file to use the pd include mechanism. Maybe in a later cleanup ...
intptr_t _counter_temp; // SHOULD BE PD VARIABLE (x86 frame count temp)
intptr_t _initial_fp; // SHOULD BE PD VARIABLE (x86/c2 initial ebp)
intptr_t _unpack_kind; // SHOULD BE PD VARIABLE (x86 unpack kind)
intptr_t _sender_sp_temp; // SHOULD BE PD VARIABLE (x86 sender_sp)
public:
......@@ -165,6 +165,8 @@ class Deoptimization : AllStatic {
// Returns the total size of frames
int size_of_frames() const;
void set_initial_fp(intptr_t fp) { _initial_fp = fp; }
// Accessors used by the code generator for the unpack stub.
static int size_of_deoptimized_frame_offset_in_bytes() { return offset_of(UnrollBlock, _size_of_deoptimized_frame); }
static int caller_adjustment_offset_in_bytes() { return offset_of(UnrollBlock, _caller_adjustment); }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册