提交 21662839 编写于 作者: K kvn

Merge

...@@ -2343,6 +2343,11 @@ void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector25 ...@@ -2343,6 +2343,11 @@ void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector25
emit_int8(imm8); emit_int8(imm8);
} }
void Assembler::pause() {
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)0x90);
}
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), ""); assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this); InstructionMark im(this);
...@@ -2667,6 +2672,11 @@ void Assembler::rcll(Register dst, int imm8) { ...@@ -2667,6 +2672,11 @@ void Assembler::rcll(Register dst, int imm8) {
} }
} }
void Assembler::rdtsc() {
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)0x31);
}
// copies data from [esi] to [edi] using rcx pointer sized words // copies data from [esi] to [edi] using rcx pointer sized words
// generic // generic
void Assembler::rep_mov() { void Assembler::rep_mov() {
...@@ -2976,6 +2986,11 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { ...@@ -2976,6 +2986,11 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
} }
void Assembler::xabort(int8_t imm8) {
emit_int8((unsigned char)0xC6);
emit_int8((unsigned char)0xF8);
emit_int8((unsigned char)(imm8 & 0xFF));
}
void Assembler::xaddl(Address dst, Register src) { void Assembler::xaddl(Address dst, Register src) {
InstructionMark im(this); InstructionMark im(this);
...@@ -2985,6 +3000,24 @@ void Assembler::xaddl(Address dst, Register src) { ...@@ -2985,6 +3000,24 @@ void Assembler::xaddl(Address dst, Register src) {
emit_operand(src, dst); emit_operand(src, dst);
} }
void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
InstructionMark im(this);
relocate(rtype);
if (abort.is_bound()) {
address entry = target(abort);
assert(entry != NULL, "abort entry NULL");
intptr_t offset = entry - pc();
emit_int8((unsigned char)0xC7);
emit_int8((unsigned char)0xF8);
emit_int32(offset - 6); // 2 opcode + 4 address
} else {
abort.add_patch_at(code(), locator());
emit_int8((unsigned char)0xC7);
emit_int8((unsigned char)0xF8);
emit_int32(0);
}
}
void Assembler::xchgl(Register dst, Address src) { // xchg void Assembler::xchgl(Register dst, Address src) { // xchg
InstructionMark im(this); InstructionMark im(this);
prefix(src, dst); prefix(src, dst);
...@@ -2998,6 +3031,12 @@ void Assembler::xchgl(Register dst, Register src) { ...@@ -2998,6 +3031,12 @@ void Assembler::xchgl(Register dst, Register src) {
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
} }
void Assembler::xend() {
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)0x01);
emit_int8((unsigned char)0xD5);
}
void Assembler::xgetbv() { void Assembler::xgetbv() {
emit_int8(0x0F); emit_int8(0x0F);
emit_int8(0x01); emit_int8(0x01);
......
...@@ -1451,6 +1451,8 @@ private: ...@@ -1451,6 +1451,8 @@ private:
// Pemutation of 64bit words // Pemutation of 64bit words
void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256); void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
void pause();
// SSE4.2 string instructions // SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8);
...@@ -1535,6 +1537,8 @@ private: ...@@ -1535,6 +1537,8 @@ private:
void rclq(Register dst, int imm8); void rclq(Register dst, int imm8);
void rdtsc();
void ret(int imm16); void ret(int imm16);
void sahf(); void sahf();
...@@ -1632,16 +1636,22 @@ private: ...@@ -1632,16 +1636,22 @@ private:
void ucomiss(XMMRegister dst, Address src); void ucomiss(XMMRegister dst, Address src);
void ucomiss(XMMRegister dst, XMMRegister src); void ucomiss(XMMRegister dst, XMMRegister src);
void xabort(int8_t imm8);
void xaddl(Address dst, Register src); void xaddl(Address dst, Register src);
void xaddq(Address dst, Register src); void xaddq(Address dst, Register src);
void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
void xchgl(Register reg, Address adr); void xchgl(Register reg, Address adr);
void xchgl(Register dst, Register src); void xchgl(Register dst, Register src);
void xchgq(Register reg, Address adr); void xchgq(Register reg, Address adr);
void xchgq(Register dst, Register src); void xchgq(Register dst, Register src);
void xend();
// Get Value of Extended Control Register // Get Value of Extended Control Register
void xgetbv(); void xgetbv();
......
...@@ -128,6 +128,42 @@ define_pd_global(uintx, TypeProfileLevel, 111); ...@@ -128,6 +128,42 @@ define_pd_global(uintx, TypeProfileLevel, 111);
product(bool, UseFastStosb, false, \ product(bool, UseFastStosb, false, \
"Use fast-string operation for zeroing: rep stosb") \ "Use fast-string operation for zeroing: rep stosb") \
\ \
/* Use Restricted Transactional Memory for lock eliding */ \
experimental(bool, UseRTMLocking, false, \
"Enable RTM lock eliding for inflated locks in compiled code") \
\
experimental(bool, UseRTMForStackLocks, false, \
"Enable RTM lock eliding for stack locks in compiled code") \
\
experimental(bool, UseRTMDeopt, false, \
"Perform deopt and recompilation based on RTM abort ratio") \
\
experimental(uintx, RTMRetryCount, 5, \
"Number of RTM retries on lock abort or busy") \
\
experimental(intx, RTMSpinLoopCount, 100, \
"Spin count for lock to become free before RTM retry") \
\
experimental(intx, RTMAbortThreshold, 1000, \
"Calculate abort ratio after this number of aborts") \
\
experimental(intx, RTMLockingThreshold, 10000, \
"Lock count at which to do RTM lock eliding without " \
"abort ratio calculation") \
\
experimental(intx, RTMAbortRatio, 50, \
"Lock abort ratio at which to stop use RTM lock eliding") \
\
experimental(intx, RTMTotalCountIncrRate, 64, \
"Increment total RTM attempted lock count once every n times") \
\
experimental(intx, RTMLockingCalculationDelay, 0, \
"Number of milliseconds to wait before start calculating aborts " \
"for RTM locking") \
\
experimental(bool, UseRTMXendForLockBusy, false, \
"Use RTM Xend instead of Xabort when lock busy") \
\
/* assembler */ \ /* assembler */ \
product(bool, Use486InstrsOnly, false, \ product(bool, Use486InstrsOnly, false, \
"Use 80486 Compliant instruction subset") \ "Use 80486 Compliant instruction subset") \
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "asm/assembler.hpp" #include "asm/assembler.hpp"
#include "utilities/macros.hpp" #include "utilities/macros.hpp"
#include "runtime/rtmLocking.hpp"
// MacroAssembler extends Assembler by frequently used macros. // MacroAssembler extends Assembler by frequently used macros.
...@@ -111,7 +112,8 @@ class MacroAssembler: public Assembler { ...@@ -111,7 +112,8 @@ class MacroAssembler: public Assembler {
op == 0xE9 /* jmp */ || op == 0xE9 /* jmp */ ||
op == 0xEB /* short jmp */ || op == 0xEB /* short jmp */ ||
(op & 0xF0) == 0x70 /* short jcc */ || (op & 0xF0) == 0x70 /* short jcc */ ||
op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */, op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||
op == 0xC7 && branch[1] == 0xF8 /* xbegin */,
"Invalid opcode at patch point"); "Invalid opcode at patch point");
if (op == 0xEB || (op & 0xF0) == 0x70) { if (op == 0xEB || (op & 0xF0) == 0x70) {
...@@ -121,7 +123,7 @@ class MacroAssembler: public Assembler { ...@@ -121,7 +123,7 @@ class MacroAssembler: public Assembler {
guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset"); guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
*disp = imm8; *disp = imm8;
} else { } else {
int* disp = (int*) &branch[(op == 0x0F)? 2: 1]; int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
int imm32 = target - (address) &disp[1]; int imm32 = target - (address) &disp[1];
*disp = imm32; *disp = imm32;
} }
...@@ -161,7 +163,6 @@ class MacroAssembler: public Assembler { ...@@ -161,7 +163,6 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1); void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1); void incrementq(Address dst, int value = 1);
// Support optimal SSE move instructions. // Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) { void movflt(XMMRegister dst, XMMRegister src) {
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
...@@ -187,6 +188,8 @@ class MacroAssembler: public Assembler { ...@@ -187,6 +188,8 @@ class MacroAssembler: public Assembler {
void incrementl(AddressLiteral dst); void incrementl(AddressLiteral dst);
void incrementl(ArrayAddress dst); void incrementl(ArrayAddress dst);
void incrementq(AddressLiteral dst);
// Alignment // Alignment
void align(int modulus); void align(int modulus);
...@@ -654,8 +657,36 @@ class MacroAssembler: public Assembler { ...@@ -654,8 +657,36 @@ class MacroAssembler: public Assembler {
#ifdef COMPILER2 #ifdef COMPILER2
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp. // See full desription in macroAssembler_x86.cpp.
void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters); void fast_lock(Register obj, Register box, Register tmp,
void fast_unlock(Register obj, Register box, Register tmp); Register scr, Register cx1, Register cx2,
BiasedLockingCounters* counters,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data,
bool use_rtm, bool profile_rtm);
void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
#if INCLUDE_RTM_OPT
void rtm_counters_update(Register abort_status, Register rtm_counters);
void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data);
void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
void rtm_stack_locking(Register obj, Register tmp, Register scr,
Register retry_on_abort_count,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated);
void rtm_inflated_locking(Register obj, Register box, Register tmp,
Register scr, Register retry_on_busy_count,
Register retry_on_abort_count,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL);
#endif
#endif #endif
Condition negate_condition(Condition cond); Condition negate_condition(Condition cond);
...@@ -721,6 +752,7 @@ class MacroAssembler: public Assembler { ...@@ -721,6 +752,7 @@ class MacroAssembler: public Assembler {
void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }
void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
...@@ -762,7 +794,14 @@ class MacroAssembler: public Assembler { ...@@ -762,7 +794,14 @@ class MacroAssembler: public Assembler {
// Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
void cond_inc32(Condition cond, AddressLiteral counter_addr); void cond_inc32(Condition cond, AddressLiteral counter_addr);
// Unconditional atomic increment. // Unconditional atomic increment.
void atomic_incl(AddressLiteral counter_addr); void atomic_incl(Address counter_addr);
void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
#ifdef _LP64
void atomic_incq(Address counter_addr);
void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
#endif
void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; }
void lea(Register dst, AddressLiteral adr); void lea(Register dst, AddressLiteral adr);
void lea(Address dst, AddressLiteral adr); void lea(Address dst, AddressLiteral adr);
...@@ -1074,7 +1113,11 @@ public: ...@@ -1074,7 +1113,11 @@ public:
void movptr(Register dst, Address src); void movptr(Register dst, Address src);
void movptr(Register dst, AddressLiteral src); #ifdef _LP64
void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
#else
void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
#endif
void movptr(Register dst, intptr_t src); void movptr(Register dst, intptr_t src);
void movptr(Register dst, Register src); void movptr(Register dst, Register src);
......
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "runtime/task.hpp"
#include "runtime/rtmLocking.hpp"
// One-shot PeriodicTask subclass for enabling RTM locking
uintx RTMLockingCounters::_calculation_flag = 0;
class RTMLockingCalculationTask : public PeriodicTask {
public:
RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){ }
virtual void task() {
RTMLockingCounters::_calculation_flag = 1;
// Reclaim our storage and disenroll ourself
delete this;
}
};
void RTMLockingCounters::init() {
if (UseRTMLocking && RTMLockingCalculationDelay > 0) {
RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay);
task->enroll();
} else {
_calculation_flag = 1;
}
}
//------------------------------print_on-------------------------------
void RTMLockingCounters::print_on(outputStream* st) {
tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate);
tty->print_cr("# rtm lock aborts : " UINTX_FORMAT, _abort_count);
for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]);
}
}
...@@ -1815,6 +1815,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ...@@ -1815,6 +1815,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Frame is now completed as far as size and linkage. // Frame is now completed as far as size and linkage.
int frame_complete = ((intptr_t)__ pc()) - start; int frame_complete = ((intptr_t)__ pc()) - start;
if (UseRTMLocking) {
// Abort RTM transaction before calling JNI
// because critical section will be large and will be
// aborted anyway. Also nmethod could be deoptimized.
__ xabort(0);
}
// Calculate the difference between rsp and rbp,. We need to know it // Calculate the difference between rsp and rbp,. We need to know it
// after the native call because on windows Java Natives will pop // after the native call because on windows Java Natives will pop
// the arguments and it is painful to do rsp relative addressing // the arguments and it is painful to do rsp relative addressing
...@@ -3168,6 +3175,12 @@ void SharedRuntime::generate_uncommon_trap_blob() { ...@@ -3168,6 +3175,12 @@ void SharedRuntime::generate_uncommon_trap_blob() {
}; };
address start = __ pc(); address start = __ pc();
if (UseRTMLocking) {
// Abort RTM transaction before possible nmethod deoptimization.
__ xabort(0);
}
// Push self-frame. // Push self-frame.
__ subptr(rsp, return_off*wordSize); // Epilog! __ subptr(rsp, return_off*wordSize); // Epilog!
...@@ -3353,6 +3366,14 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t ...@@ -3353,6 +3366,14 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
address call_pc = NULL; address call_pc = NULL;
bool cause_return = (poll_type == POLL_AT_RETURN); bool cause_return = (poll_type == POLL_AT_RETURN);
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
if (UseRTMLocking) {
// Abort RTM transaction before calling runtime
// because critical section will be large and will be
// aborted anyway. Also nmethod could be deoptimized.
__ xabort(0);
}
// If cause_return is true we are at a poll_return and there is // If cause_return is true we are at a poll_return and there is
// the return address on the stack to the caller on the nmethod // the return address on the stack to the caller on the nmethod
// that is safepoint. We can leave this return on the stack and // that is safepoint. We can leave this return on the stack and
......
...@@ -2010,6 +2010,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ...@@ -2010,6 +2010,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Frame is now completed as far as size and linkage. // Frame is now completed as far as size and linkage.
int frame_complete = ((intptr_t)__ pc()) - start; int frame_complete = ((intptr_t)__ pc()) - start;
if (UseRTMLocking) {
// Abort RTM transaction before calling JNI
// because critical section will be large and will be
// aborted anyway. Also nmethod could be deoptimized.
__ xabort(0);
}
#ifdef ASSERT #ifdef ASSERT
{ {
Label L; Label L;
...@@ -3610,6 +3617,11 @@ void SharedRuntime::generate_uncommon_trap_blob() { ...@@ -3610,6 +3617,11 @@ void SharedRuntime::generate_uncommon_trap_blob() {
address start = __ pc(); address start = __ pc();
if (UseRTMLocking) {
// Abort RTM transaction before possible nmethod deoptimization.
__ xabort(0);
}
// Push self-frame. We get here with a return address on the // Push self-frame. We get here with a return address on the
// stack, so rsp is 8-byte aligned until we allocate our frame. // stack, so rsp is 8-byte aligned until we allocate our frame.
__ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
...@@ -3790,6 +3802,13 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t ...@@ -3790,6 +3802,13 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
bool cause_return = (poll_type == POLL_AT_RETURN); bool cause_return = (poll_type == POLL_AT_RETURN);
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
if (UseRTMLocking) {
// Abort RTM transaction before calling runtime
// because critical section will be large and will be
// aborted anyway. Also nmethod could be deoptimized.
__ xabort(0);
}
// Make room for return address (or push it again) // Make room for return address (or push it again)
if (!cause_return) { if (!cause_return) {
__ push(rbx); __ push(rbx);
......
...@@ -475,7 +475,7 @@ void VM_Version::get_processor_features() { ...@@ -475,7 +475,7 @@ void VM_Version::get_processor_features() {
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -492,8 +492,9 @@ void VM_Version::get_processor_features() { ...@@ -492,8 +492,9 @@ void VM_Version::get_processor_features() {
(supports_avx() ? ", avx" : ""), (supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""), (supports_avx2() ? ", avx2" : ""),
(supports_aes() ? ", aes" : ""), (supports_aes() ? ", aes" : ""),
(supports_clmul() ? ", clmul" : ""), (supports_clmul() ? ", clmul" : ""),
(supports_erms() ? ", erms" : ""), (supports_erms() ? ", erms" : ""),
(supports_rtm() ? ", rtm" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""), (supports_lzcnt() ? ", lzcnt": ""),
...@@ -534,7 +535,7 @@ void VM_Version::get_processor_features() { ...@@ -534,7 +535,7 @@ void VM_Version::get_processor_features() {
} }
} else if (UseAES) { } else if (UseAES) {
if (!FLAG_IS_DEFAULT(UseAES)) if (!FLAG_IS_DEFAULT(UseAES))
warning("AES instructions not available on this CPU"); warning("AES instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false); FLAG_SET_DEFAULT(UseAES, false);
} }
...@@ -567,10 +568,57 @@ void VM_Version::get_processor_features() { ...@@ -567,10 +568,57 @@ void VM_Version::get_processor_features() {
} }
} else if (UseAESIntrinsics) { } else if (UseAESIntrinsics) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
warning("AES intrinsics not available on this CPU"); warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false); FLAG_SET_DEFAULT(UseAESIntrinsics, false);
} }
// Adjust RTM (Restricted Transactional Memory) flags
if (!supports_rtm() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
// setting during arguments processing. See use_biased_locking().
// VM_Version_init() is executed after UseBiasedLocking is used
// in Thread::allocate().
vm_exit_during_initialization("RTM instructions are not available on this CPU");
}
#if INCLUDE_RTM_OPT
if (UseRTMLocking) {
if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
// RTM locking should be used only for applications with
// high lock contention. For now we do not use it by default.
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
}
if (!is_power_of_2(RTMTotalCountIncrRate)) {
warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
}
if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
FLAG_SET_DEFAULT(RTMAbortRatio, 50);
}
} else { // !UseRTMLocking
if (UseRTMForStackLocks) {
if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
}
FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
}
if (UseRTMDeopt) {
FLAG_SET_DEFAULT(UseRTMDeopt, false);
}
if (PrintPreciseRTMLockingStatistics) {
FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
}
}
#else
if (UseRTMLocking) {
// Only C2 does RTM locking optimization.
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
// setting during arguments processing. See use_biased_locking().
vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
}
#endif
#ifdef COMPILER2 #ifdef COMPILER2
if (UseFPUForSpilling) { if (UseFPUForSpilling) {
if (UseSSE < 2) { if (UseSSE < 2) {
...@@ -913,6 +961,27 @@ void VM_Version::get_processor_features() { ...@@ -913,6 +961,27 @@ void VM_Version::get_processor_features() {
#endif // !PRODUCT #endif // !PRODUCT
} }
bool VM_Version::use_biased_locking() {
#if INCLUDE_RTM_OPT
// RTM locking is most useful when there is high lock contention and
// low data contention. With high lock contention the lock is usually
// inflated and biased locking is not suitable for that case.
// RTM locking code requires that biased locking is off.
// Note: we can't switch off UseBiasedLocking in get_processor_features()
// because it is used by Thread::allocate() which is called before
// VM_Version::initialize().
if (UseRTMLocking && UseBiasedLocking) {
if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
FLAG_SET_DEFAULT(UseBiasedLocking, false);
} else {
warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
UseBiasedLocking = false;
}
}
#endif
return UseBiasedLocking;
}
void VM_Version::initialize() { void VM_Version::initialize() {
ResourceMark rm; ResourceMark rm;
// Making this stub must be FIRST use of assembler // Making this stub must be FIRST use of assembler
......
...@@ -207,7 +207,9 @@ public: ...@@ -207,7 +207,9 @@ public:
: 2, : 2,
bmi2 : 1, bmi2 : 1,
erms : 1, erms : 1,
: 22; : 1,
rtm : 1,
: 20;
} bits; } bits;
}; };
...@@ -257,7 +259,8 @@ protected: ...@@ -257,7 +259,8 @@ protected:
CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
CPU_CLMUL = (1 << 21), // carryless multiply for CRC CPU_CLMUL = (1 << 21), // carryless multiply for CRC
CPU_BMI1 = (1 << 22), CPU_BMI1 = (1 << 22),
CPU_BMI2 = (1 << 23) CPU_BMI2 = (1 << 23),
CPU_RTM = (1 << 24) // Restricted Transactional Memory instructions
} cpuFeatureFlags; } cpuFeatureFlags;
enum { enum {
...@@ -444,6 +447,8 @@ protected: ...@@ -444,6 +447,8 @@ protected:
result |= CPU_ERMS; result |= CPU_ERMS;
if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
result |= CPU_CLMUL; result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
// AMD features. // AMD features.
if (is_amd()) { if (is_amd()) {
...@@ -514,6 +519,9 @@ public: ...@@ -514,6 +519,9 @@ public:
// Initialization // Initialization
static void initialize(); static void initialize();
// Override Abstract_VM_Version implementation
static bool use_biased_locking();
// Asserts // Asserts
static void assert_is_initialized() { static void assert_is_initialized() {
assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
...@@ -606,6 +614,7 @@ public: ...@@ -606,6 +614,7 @@ public:
static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
static bool supports_rtm() { return (_cpuFeatures & CPU_RTM) != 0; }
static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; } static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; }
static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; } static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; }
// Intel features // Intel features
......
...@@ -12915,13 +12915,31 @@ instruct RethrowException() ...@@ -12915,13 +12915,31 @@ instruct RethrowException()
// inlined locking and unlocking // inlined locking and unlocking
instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
predicate(Compile::current()->use_rtm());
match(Set cr (FastLock object box));
effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
ins_cost(300);
format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
ins_encode %{
__ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
$scr$$Register, $cx1$$Register, $cx2$$Register,
_counters, _rtm_counters, _stack_rtm_counters,
((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
true, ra_->C->profile_rtm());
%}
ins_pipe(pipe_slow);
%}
instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
predicate(!Compile::current()->use_rtm());
match(Set cr (FastLock object box)); match(Set cr (FastLock object box));
effect(TEMP tmp, TEMP scr, USE_KILL box); effect(TEMP tmp, TEMP scr, USE_KILL box);
ins_cost(300); ins_cost(300);
format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
ins_encode %{ ins_encode %{
__ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters); __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
$scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -12932,7 +12950,7 @@ instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ ...@@ -12932,7 +12950,7 @@ instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
ins_cost(300); ins_cost(300);
format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
ins_encode %{ ins_encode %{
__ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
......
...@@ -11377,13 +11377,31 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{ ...@@ -11377,13 +11377,31 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
// ============================================================================ // ============================================================================
// inlined locking and unlocking // inlined locking and unlocking
instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
predicate(Compile::current()->use_rtm());
match(Set cr (FastLock object box));
effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
ins_cost(300);
format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
ins_encode %{
__ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
$scr$$Register, $cx1$$Register, $cx2$$Register,
_counters, _rtm_counters, _stack_rtm_counters,
((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
true, ra_->C->profile_rtm());
%}
ins_pipe(pipe_slow);
%}
instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{ instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
predicate(!Compile::current()->use_rtm());
match(Set cr (FastLock object box)); match(Set cr (FastLock object box));
effect(TEMP tmp, TEMP scr, USE_KILL box); effect(TEMP tmp, TEMP scr, USE_KILL box);
ins_cost(300); ins_cost(300);
format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
ins_encode %{ ins_encode %{
__ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters); __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
$scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11394,7 +11412,7 @@ instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{ ...@@ -11394,7 +11412,7 @@ instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
ins_cost(300); ins_cost(300);
format %{ "fastunlock $object,$box\t! kills $box,$tmp" %} format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
ins_encode %{ ins_encode %{
__ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
......
...@@ -1592,6 +1592,8 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { ...@@ -1592,6 +1592,8 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) { if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) {
fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt); fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt);
fprintf(fp, " ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt);
fprintf(fp, " ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt);
} }
// Fill in the bottom_type where requested // Fill in the bottom_type where requested
...@@ -3828,6 +3830,8 @@ void ArchDesc::buildMachNode(FILE *fp_cpp, InstructForm *inst, const char *inden ...@@ -3828,6 +3830,8 @@ void ArchDesc::buildMachNode(FILE *fp_cpp, InstructForm *inst, const char *inden
} }
if( inst->is_ideal_fastlock() ) { if( inst->is_ideal_fastlock() ) {
fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent); fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent);
fprintf(fp_cpp, "%s node->_rtm_counters = _leaf->as_FastLock()->rtm_counters();\n", indent);
fprintf(fp_cpp, "%s node->_stack_rtm_counters = _leaf->as_FastLock()->stack_rtm_counters();\n", indent);
} }
} }
......
...@@ -926,7 +926,8 @@ void ciEnv::register_method(ciMethod* target, ...@@ -926,7 +926,8 @@ void ciEnv::register_method(ciMethod* target,
AbstractCompiler* compiler, AbstractCompiler* compiler,
int comp_level, int comp_level,
bool has_unsafe_access, bool has_unsafe_access,
bool has_wide_vectors) { bool has_wide_vectors,
RTMState rtm_state) {
VM_ENTRY_MARK; VM_ENTRY_MARK;
nmethod* nm = NULL; nmethod* nm = NULL;
{ {
...@@ -973,6 +974,15 @@ void ciEnv::register_method(ciMethod* target, ...@@ -973,6 +974,15 @@ void ciEnv::register_method(ciMethod* target,
methodHandle method(THREAD, target->get_Method()); methodHandle method(THREAD, target->get_Method());
#if INCLUDE_RTM_OPT
if (!failing() && (rtm_state != NoRTM) &&
(method()->method_data() != NULL) &&
(method()->method_data()->rtm_state() != rtm_state)) {
// Preemptive decompile if rtm state was changed.
record_failure("RTM state change invalidated rtm code");
}
#endif
if (failing()) { if (failing()) {
// While not a true deoptimization, it is a preemptive decompile. // While not a true deoptimization, it is a preemptive decompile.
MethodData* mdo = method()->method_data(); MethodData* mdo = method()->method_data();
...@@ -999,13 +1009,15 @@ void ciEnv::register_method(ciMethod* target, ...@@ -999,13 +1009,15 @@ void ciEnv::register_method(ciMethod* target,
frame_words, oop_map_set, frame_words, oop_map_set,
handler_table, inc_table, handler_table, inc_table,
compiler, comp_level); compiler, comp_level);
// Free codeBlobs // Free codeBlobs
code_buffer->free_blob(); code_buffer->free_blob();
if (nm != NULL) { if (nm != NULL) {
nm->set_has_unsafe_access(has_unsafe_access); nm->set_has_unsafe_access(has_unsafe_access);
nm->set_has_wide_vectors(has_wide_vectors); nm->set_has_wide_vectors(has_wide_vectors);
#if INCLUDE_RTM_OPT
nm->set_rtm_state(rtm_state);
#endif
// Record successful registration. // Record successful registration.
// (Put nm into the task handle *before* publishing to the Java heap.) // (Put nm into the task handle *before* publishing to the Java heap.)
......
...@@ -363,7 +363,8 @@ public: ...@@ -363,7 +363,8 @@ public:
AbstractCompiler* compiler, AbstractCompiler* compiler,
int comp_level, int comp_level,
bool has_unsafe_access, bool has_unsafe_access,
bool has_wide_vectors); bool has_wide_vectors,
RTMState rtm_state = NoRTM);
// Access to certain well known ciObjects. // Access to certain well known ciObjects.
......
...@@ -478,6 +478,18 @@ public: ...@@ -478,6 +478,18 @@ public:
int invocation_count() { return _invocation_counter; } int invocation_count() { return _invocation_counter; }
int backedge_count() { return _backedge_counter; } int backedge_count() { return _backedge_counter; }
#if INCLUDE_RTM_OPT
// return cached value
int rtm_state() {
if (is_empty()) {
return NoRTM;
} else {
return get_MethodData()->rtm_state();
}
}
#endif
// Transfer information about the method to MethodData*. // Transfer information about the method to MethodData*.
// would_profile means we would like to profile this method, // would_profile means we would like to profile this method,
// meaning it's not trivial. // meaning it's not trivial.
......
...@@ -481,7 +481,9 @@ void nmethod::init_defaults() { ...@@ -481,7 +481,9 @@ void nmethod::init_defaults() {
_scavenge_root_link = NULL; _scavenge_root_link = NULL;
_scavenge_root_state = 0; _scavenge_root_state = 0;
_compiler = NULL; _compiler = NULL;
#if INCLUDE_RTM_OPT
_rtm_state = NoRTM;
#endif
#ifdef HAVE_DTRACE_H #ifdef HAVE_DTRACE_H
_trap_offset = 0; _trap_offset = 0;
#endif // def HAVE_DTRACE_H #endif // def HAVE_DTRACE_H
......
...@@ -193,6 +193,12 @@ class nmethod : public CodeBlob { ...@@ -193,6 +193,12 @@ class nmethod : public CodeBlob {
jbyte _scavenge_root_state; jbyte _scavenge_root_state;
#if INCLUDE_RTM_OPT
// RTM state at compile time. Used during deoptimization to decide
// whether to restart collecting RTM locking abort statistic again.
RTMState _rtm_state;
#endif
// Nmethod Flushing lock. If non-zero, then the nmethod is not removed // Nmethod Flushing lock. If non-zero, then the nmethod is not removed
// and is not made into a zombie. However, once the nmethod is made into // and is not made into a zombie. However, once the nmethod is made into
// a zombie, it will be locked one final time if CompiledMethodUnload // a zombie, it will be locked one final time if CompiledMethodUnload
...@@ -414,6 +420,12 @@ class nmethod : public CodeBlob { ...@@ -414,6 +420,12 @@ class nmethod : public CodeBlob {
bool is_zombie() const { return _state == zombie; } bool is_zombie() const { return _state == zombie; }
bool is_unloaded() const { return _state == unloaded; } bool is_unloaded() const { return _state == unloaded; }
#if INCLUDE_RTM_OPT
// rtm state accessing and manipulating
RTMState rtm_state() const { return _rtm_state; }
void set_rtm_state(RTMState state) { _rtm_state = state; }
#endif
// Make the nmethod non entrant. The nmethod will continue to be // Make the nmethod non entrant. The nmethod will continue to be
// alive. It is used when an uncommon trap happens. Returns true // alive. It is used when an uncommon trap happens. Returns true
// if this thread changed the state of the nmethod or false if // if this thread changed the state of the nmethod or false if
......
...@@ -273,7 +273,7 @@ int Method::validate_bci_from_bcx(intptr_t bcx) const { ...@@ -273,7 +273,7 @@ int Method::validate_bci_from_bcx(intptr_t bcx) const {
} }
address Method::bcp_from(int bci) const { address Method::bcp_from(int bci) const {
assert((is_native() && bci == 0) || (!is_native() && 0 <= bci && bci < code_size()), "illegal bci"); assert((is_native() && bci == 0) || (!is_native() && 0 <= bci && bci < code_size()), err_msg("illegal bci: %d", bci));
address bcp = code_base() + bci; address bcp = code_base() + bci;
assert(is_native() && bcp == code_base() || contains(bcp), "bcp doesn't belong to this method"); assert(is_native() && bcp == code_base() || contains(bcp), "bcp doesn't belong to this method");
return bcp; return bcp;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "precompiled.hpp" #include "precompiled.hpp"
#include "classfile/systemDictionary.hpp" #include "classfile/systemDictionary.hpp"
#include "compiler/compilerOracle.hpp"
#include "interpreter/bytecode.hpp" #include "interpreter/bytecode.hpp"
#include "interpreter/bytecodeStream.hpp" #include "interpreter/bytecodeStream.hpp"
#include "interpreter/linkResolver.hpp" #include "interpreter/linkResolver.hpp"
...@@ -1148,6 +1149,21 @@ void MethodData::init() { ...@@ -1148,6 +1149,21 @@ void MethodData::init() {
_highest_osr_comp_level = 0; _highest_osr_comp_level = 0;
_would_profile = true; _would_profile = true;
#if INCLUDE_RTM_OPT
_rtm_state = NoRTM; // No RTM lock eliding by default
if (UseRTMLocking &&
!CompilerOracle::has_option_string(_method, "NoRTMLockEliding")) {
if (CompilerOracle::has_option_string(_method, "UseRTMLockEliding") || !UseRTMDeopt) {
// Generate RTM lock eliding code without abort ratio calculation code.
_rtm_state = UseRTM;
} else if (UseRTMDeopt) {
// Generate RTM lock eliding code and include abort ratio calculation
// code if UseRTMDeopt is on.
_rtm_state = ProfileRTM;
}
}
#endif
// Initialize flags and trap history. // Initialize flags and trap history.
_nof_decompiles = 0; _nof_decompiles = 0;
_nof_overflow_recompiles = 0; _nof_overflow_recompiles = 0;
......
...@@ -1861,7 +1861,7 @@ public: ...@@ -1861,7 +1861,7 @@ public:
// Whole-method sticky bits and flags // Whole-method sticky bits and flags
enum { enum {
_trap_hist_limit = 18, // decoupled from Deoptimization::Reason_LIMIT _trap_hist_limit = 19, // decoupled from Deoptimization::Reason_LIMIT
_trap_hist_mask = max_jubyte, _trap_hist_mask = max_jubyte,
_extra_data_count = 4 // extra DataLayout headers, for trap history _extra_data_count = 4 // extra DataLayout headers, for trap history
}; // Public flag values }; // Public flag values
...@@ -1892,6 +1892,12 @@ private: ...@@ -1892,6 +1892,12 @@ private:
// Counter values at the time profiling started. // Counter values at the time profiling started.
int _invocation_counter_start; int _invocation_counter_start;
int _backedge_counter_start; int _backedge_counter_start;
#if INCLUDE_RTM_OPT
// State of RTM code generation during compilation of the method
int _rtm_state;
#endif
// Number of loops and blocks is computed when compiling the first // Number of loops and blocks is computed when compiling the first
// time with C1. It is used to determine if method is trivial. // time with C1. It is used to determine if method is trivial.
short _num_loops; short _num_loops;
...@@ -2055,6 +2061,22 @@ public: ...@@ -2055,6 +2061,22 @@ public:
InvocationCounter* invocation_counter() { return &_invocation_counter; } InvocationCounter* invocation_counter() { return &_invocation_counter; }
InvocationCounter* backedge_counter() { return &_backedge_counter; } InvocationCounter* backedge_counter() { return &_backedge_counter; }
#if INCLUDE_RTM_OPT
int rtm_state() const {
return _rtm_state;
}
void set_rtm_state(RTMState rstate) {
_rtm_state = (int)rstate;
}
void atomic_set_rtm_state(RTMState rstate) {
Atomic::store((int)rstate, &_rtm_state);
}
static int rtm_state_offset_in_bytes() {
return offset_of(MethodData, _rtm_state);
}
#endif
void set_would_profile(bool p) { _would_profile = p; } void set_would_profile(bool p) { _would_profile = p; }
bool would_profile() const { return _would_profile; } bool would_profile() const { return _would_profile; }
......
...@@ -442,6 +442,9 @@ ...@@ -442,6 +442,9 @@
diagnostic(bool, PrintPreciseBiasedLockingStatistics, false, \ diagnostic(bool, PrintPreciseBiasedLockingStatistics, false, \
"Print per-lock-site statistics of biased locking in JVM") \ "Print per-lock-site statistics of biased locking in JVM") \
\ \
diagnostic(bool, PrintPreciseRTMLockingStatistics, false, \
"Print per-lock-site statistics of rtm locking in JVM") \
\
notproduct(bool, PrintEliminateLocks, false, \ notproduct(bool, PrintEliminateLocks, false, \
"Print out when locks are eliminated") \ "Print out when locks are eliminated") \
\ \
......
...@@ -196,6 +196,7 @@ macro(NegF) ...@@ -196,6 +196,7 @@ macro(NegF)
macro(NeverBranch) macro(NeverBranch)
macro(Opaque1) macro(Opaque1)
macro(Opaque2) macro(Opaque2)
macro(Opaque3)
macro(OrI) macro(OrI)
macro(OrL) macro(OrL)
macro(OverflowAddI) macro(OverflowAddI)
......
...@@ -690,9 +690,10 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr ...@@ -690,9 +690,10 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining)); set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics")); set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
if (ProfileTraps) { if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
// Make sure the method being compiled gets its own MDO, // Make sure the method being compiled gets its own MDO,
// so we can at least track the decompile_count(). // so we can at least track the decompile_count().
// Need MDO to record RTM code generation state.
method()->ensure_method_data(); method()->ensure_method_data();
} }
...@@ -899,7 +900,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr ...@@ -899,7 +900,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
compiler, compiler,
env()->comp_level(), env()->comp_level(),
has_unsafe_access(), has_unsafe_access(),
SharedRuntime::is_wide_vector(max_vector_size()) SharedRuntime::is_wide_vector(max_vector_size()),
rtm_state()
); );
if (log() != NULL) // Print code cache state into compiler log if (log() != NULL) // Print code cache state into compiler log
...@@ -1063,7 +1065,23 @@ void Compile::Init(int aliaslevel) { ...@@ -1063,7 +1065,23 @@ void Compile::Init(int aliaslevel) {
set_do_scheduling(OptoScheduling); set_do_scheduling(OptoScheduling);
set_do_count_invocations(false); set_do_count_invocations(false);
set_do_method_data_update(false); set_do_method_data_update(false);
set_rtm_state(NoRTM); // No RTM lock eliding by default
#if INCLUDE_RTM_OPT
if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
int rtm_state = method()->method_data()->rtm_state();
if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
// Don't generate RTM lock eliding code.
set_rtm_state(NoRTM);
} else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
// Generate RTM lock eliding code without abort ratio calculation code.
set_rtm_state(UseRTM);
} else if (UseRTMDeopt) {
// Generate RTM lock eliding code and include abort ratio calculation
// code if UseRTMDeopt is on.
set_rtm_state(ProfileRTM);
}
}
#endif
if (debug_info()->recording_non_safepoints()) { if (debug_info()->recording_non_safepoints()) {
set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*> set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
(comp_arena(), 8, 0, NULL)); (comp_arena(), 8, 0, NULL));
...@@ -2565,6 +2583,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { ...@@ -2565,6 +2583,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
break; break;
case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque1: // Remove Opaque Nodes before matching
case Op_Opaque2: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching
case Op_Opaque3:
n->subsume_by(n->in(1), this); n->subsume_by(n->in(1), this);
break; break;
case Op_CallStaticJava: case Op_CallStaticJava:
......
...@@ -319,9 +319,9 @@ class Compile : public Phase { ...@@ -319,9 +319,9 @@ class Compile : public Phase {
bool _trace_opto_output; bool _trace_opto_output;
bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
#endif #endif
// JSR 292 // JSR 292
bool _has_method_handle_invokes; // True if this method has MethodHandle invokes. bool _has_method_handle_invokes; // True if this method has MethodHandle invokes.
RTMState _rtm_state; // State of Restricted Transactional Memory usage
// Compilation environment. // Compilation environment.
Arena _comp_arena; // Arena with lifetime equivalent to Compile Arena _comp_arena; // Arena with lifetime equivalent to Compile
...@@ -591,6 +591,10 @@ class Compile : public Phase { ...@@ -591,6 +591,10 @@ class Compile : public Phase {
void set_print_inlining(bool z) { _print_inlining = z; } void set_print_inlining(bool z) { _print_inlining = z; }
bool print_intrinsics() const { return _print_intrinsics; } bool print_intrinsics() const { return _print_intrinsics; }
void set_print_intrinsics(bool z) { _print_intrinsics = z; } void set_print_intrinsics(bool z) { _print_intrinsics = z; }
RTMState rtm_state() const { return _rtm_state; }
void set_rtm_state(RTMState s) { _rtm_state = s; }
bool use_rtm() const { return (_rtm_state & NoRTM) == 0; }
bool profile_rtm() const { return _rtm_state == ProfileRTM; }
// check the CompilerOracle for special behaviours for this compile // check the CompilerOracle for special behaviours for this compile
bool method_has_option(const char * option) { bool method_has_option(const char * option) {
return method() != NULL && method()->has_option(option); return method() != NULL && method()->has_option(option);
......
...@@ -642,6 +642,19 @@ public: ...@@ -642,6 +642,19 @@ public:
virtual const Type *bottom_type() const { return TypeInt::INT; } virtual const Type *bottom_type() const { return TypeInt::INT; }
}; };
//------------------------------Opaque3Node------------------------------------
// A node to prevent unwanted optimizations. Will be optimized only during
// macro nodes expansion.
class Opaque3Node : public Opaque2Node {
int _opt; // what optimization it was used for
public:
enum { RTM_OPT };
Opaque3Node(Compile* C, Node *n, int opt) : Opaque2Node(C, n), _opt(opt) {}
virtual int Opcode() const;
bool rtm_opt() const { return (_opt == RTM_OPT); }
};
//----------------------PartialSubtypeCheckNode-------------------------------- //----------------------PartialSubtypeCheckNode--------------------------------
// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
// array for an instance of the superklass. Set a hidden internal cache on a // array for an instance of the superklass. Set a hidden internal cache on a
......
...@@ -3157,10 +3157,14 @@ FastLockNode* GraphKit::shared_lock(Node* obj) { ...@@ -3157,10 +3157,14 @@ FastLockNode* GraphKit::shared_lock(Node* obj) {
Node* mem = reset_memory(); Node* mem = reset_memory();
FastLockNode * flock = _gvn.transform(new (C) FastLockNode(0, obj, box) )->as_FastLock(); FastLockNode * flock = _gvn.transform(new (C) FastLockNode(0, obj, box) )->as_FastLock();
if (PrintPreciseBiasedLockingStatistics) { if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) {
// Create the counters for this fast lock. // Create the counters for this fast lock.
flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci
} }
// Create the rtm counters for this fast lock if needed.
flock->create_rtm_lock_counter(sync_jvms()); // sync_jvms used to get current bci
// Add monitor to debug info for the slow path. If we block inside the // Add monitor to debug info for the slow path. If we block inside the
// slow path and de-opt, we need the monitor hanging around // slow path and de-opt, we need the monitor hanging around
map()->push_monitor( flock ); map()->push_monitor( flock );
......
...@@ -136,6 +136,8 @@ bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) { ...@@ -136,6 +136,8 @@ bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) {
//-----------------------------hash-------------------------------------------- //-----------------------------hash--------------------------------------------
uint FastLockNode::hash() const { return NO_HASH; } uint FastLockNode::hash() const { return NO_HASH; }
uint FastLockNode::size_of() const { return sizeof(*this); }
//------------------------------cmp-------------------------------------------- //------------------------------cmp--------------------------------------------
uint FastLockNode::cmp( const Node &n ) const { uint FastLockNode::cmp( const Node &n ) const {
return (&n == this); // Always fail except on self return (&n == this); // Always fail except on self
...@@ -159,6 +161,22 @@ void FastLockNode::create_lock_counter(JVMState* state) { ...@@ -159,6 +161,22 @@ void FastLockNode::create_lock_counter(JVMState* state) {
_counters = blnc->counters(); _counters = blnc->counters();
} }
void FastLockNode::create_rtm_lock_counter(JVMState* state) {
#if INCLUDE_RTM_OPT
Compile* C = Compile::current();
if (C->profile_rtm() || (PrintPreciseRTMLockingStatistics && C->use_rtm())) {
RTMLockingNamedCounter* rlnc = (RTMLockingNamedCounter*)
OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter);
_rtm_counters = rlnc->counters();
if (UseRTMForStackLocks) {
rlnc = (RTMLockingNamedCounter*)
OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter);
_stack_rtm_counters = rlnc->counters();
}
}
#endif
}
//============================================================================= //=============================================================================
//------------------------------do_monitor_enter------------------------------- //------------------------------do_monitor_enter-------------------------------
void Parse::do_monitor_enter() { void Parse::do_monitor_enter() {
......
...@@ -89,13 +89,17 @@ public: ...@@ -89,13 +89,17 @@ public:
//------------------------------FastLockNode----------------------------------- //------------------------------FastLockNode-----------------------------------
class FastLockNode: public CmpNode { class FastLockNode: public CmpNode {
private: private:
BiasedLockingCounters* _counters; BiasedLockingCounters* _counters;
RTMLockingCounters* _rtm_counters; // RTM lock counters for inflated locks
RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks
public: public:
FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) { FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
init_req(0,ctrl); init_req(0,ctrl);
init_class_id(Class_FastLock); init_class_id(Class_FastLock);
_counters = NULL; _counters = NULL;
_rtm_counters = NULL;
_stack_rtm_counters = NULL;
} }
Node* obj_node() const { return in(1); } Node* obj_node() const { return in(1); }
Node* box_node() const { return in(2); } Node* box_node() const { return in(2); }
...@@ -104,13 +108,17 @@ public: ...@@ -104,13 +108,17 @@ public:
// FastLock and FastUnlockNode do not hash, we need one for each correspoding // FastLock and FastUnlockNode do not hash, we need one for each correspoding
// LockNode/UnLockNode to avoid creating Phi's. // LockNode/UnLockNode to avoid creating Phi's.
virtual uint hash() const ; // { return NO_HASH; } virtual uint hash() const ; // { return NO_HASH; }
virtual uint size_of() const;
virtual uint cmp( const Node &n ) const ; // Always fail, except on self virtual uint cmp( const Node &n ) const ; // Always fail, except on self
virtual int Opcode() const; virtual int Opcode() const;
virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; } virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;} const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
void create_lock_counter(JVMState* s); void create_lock_counter(JVMState* s);
BiasedLockingCounters* counters() const { return _counters; } void create_rtm_lock_counter(JVMState* state);
BiasedLockingCounters* counters() const { return _counters; }
RTMLockingCounters* rtm_counters() const { return _rtm_counters; }
RTMLockingCounters* stack_rtm_counters() const { return _stack_rtm_counters; }
}; };
......
...@@ -617,6 +617,15 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { ...@@ -617,6 +617,15 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
case Op_AryEq: { case Op_AryEq: {
return false; return false;
} }
#if INCLUDE_RTM_OPT
case Op_FastLock:
case Op_FastUnlock: {
// Don't unroll RTM locking code because it is large.
if (UseRTMLocking) {
return false;
}
}
#endif
} // switch } // switch
} }
...@@ -722,6 +731,15 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { ...@@ -722,6 +731,15 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
// String intrinsics are large and have loops. // String intrinsics are large and have loops.
return false; return false;
} }
#if INCLUDE_RTM_OPT
case Op_FastLock:
case Op_FastUnlock: {
// Don't unroll RTM locking code because it is large.
if (UseRTMLocking) {
return false;
}
}
#endif
} // switch } // switch
} }
......
...@@ -52,6 +52,7 @@ class MachSpillCopyNode; ...@@ -52,6 +52,7 @@ class MachSpillCopyNode;
class Matcher; class Matcher;
class PhaseRegAlloc; class PhaseRegAlloc;
class RegMask; class RegMask;
class RTMLockingCounters;
class State; class State;
//---------------------------MachOper------------------------------------------ //---------------------------MachOper------------------------------------------
...@@ -620,8 +621,9 @@ public: ...@@ -620,8 +621,9 @@ public:
class MachFastLockNode : public MachNode { class MachFastLockNode : public MachNode {
virtual uint size_of() const { return sizeof(*this); } // Size is bigger virtual uint size_of() const { return sizeof(*this); } // Size is bigger
public: public:
BiasedLockingCounters* _counters; BiasedLockingCounters* _counters;
RTMLockingCounters* _rtm_counters; // RTM lock counters for inflated locks
RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks
MachFastLockNode() : MachNode() {} MachFastLockNode() : MachNode() {}
}; };
......
...@@ -2437,6 +2437,7 @@ void PhaseMacroExpand::eliminate_macro_nodes() { ...@@ -2437,6 +2437,7 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
} }
} }
// Next, attempt to eliminate allocations // Next, attempt to eliminate allocations
_has_locks = false;
progress = true; progress = true;
while (progress) { while (progress) {
progress = false; progress = false;
...@@ -2455,11 +2456,13 @@ void PhaseMacroExpand::eliminate_macro_nodes() { ...@@ -2455,11 +2456,13 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
case Node::Class_Lock: case Node::Class_Lock:
case Node::Class_Unlock: case Node::Class_Unlock:
assert(!n->as_AbstractLock()->is_eliminated(), "sanity"); assert(!n->as_AbstractLock()->is_eliminated(), "sanity");
_has_locks = true;
break; break;
default: default:
assert(n->Opcode() == Op_LoopLimit || assert(n->Opcode() == Op_LoopLimit ||
n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque1 ||
n->Opcode() == Op_Opaque2, "unknown node type in macro list"); n->Opcode() == Op_Opaque2 ||
n->Opcode() == Op_Opaque3, "unknown node type in macro list");
} }
assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
progress = progress || success; progress = progress || success;
...@@ -2500,6 +2503,30 @@ bool PhaseMacroExpand::expand_macro_nodes() { ...@@ -2500,6 +2503,30 @@ bool PhaseMacroExpand::expand_macro_nodes() {
} else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) { } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
_igvn.replace_node(n, n->in(1)); _igvn.replace_node(n, n->in(1));
success = true; success = true;
#if INCLUDE_RTM_OPT
} else if ((n->Opcode() == Op_Opaque3) && ((Opaque3Node*)n)->rtm_opt()) {
assert(C->profile_rtm(), "should be used only in rtm deoptimization code");
assert((n->outcnt() == 1) && n->unique_out()->is_Cmp(), "");
Node* cmp = n->unique_out();
#ifdef ASSERT
// Validate graph.
assert((cmp->outcnt() == 1) && cmp->unique_out()->is_Bool(), "");
BoolNode* bol = cmp->unique_out()->as_Bool();
assert((bol->outcnt() == 1) && bol->unique_out()->is_If() &&
(bol->_test._test == BoolTest::ne), "");
IfNode* ifn = bol->unique_out()->as_If();
assert((ifn->outcnt() == 2) &&
ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change), "");
#endif
Node* repl = n->in(1);
if (!_has_locks) {
// Remove RTM state check if there are no locks in the code.
// Replace input to compare the same value.
repl = (cmp->in(1) == n) ? cmp->in(2) : cmp->in(1);
}
_igvn.replace_node(n, repl);
success = true;
#endif
} }
assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
progress = progress || success; progress = progress || success;
......
...@@ -76,6 +76,8 @@ private: ...@@ -76,6 +76,8 @@ private:
ProjNode *_memproj_catchall; ProjNode *_memproj_catchall;
ProjNode *_resproj; ProjNode *_resproj;
// Additional data collected during macro expansion
bool _has_locks;
void expand_allocate(AllocateNode *alloc); void expand_allocate(AllocateNode *alloc);
void expand_allocate_array(AllocateArrayNode *alloc); void expand_allocate_array(AllocateArrayNode *alloc);
...@@ -118,7 +120,7 @@ private: ...@@ -118,7 +120,7 @@ private:
Node* length); Node* length);
public: public:
PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) { PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) {
_igvn.set_delay_transform(true); _igvn.set_delay_transform(true);
} }
void eliminate_macro_nodes(); void eliminate_macro_nodes();
......
...@@ -477,6 +477,8 @@ class Parse : public GraphKit { ...@@ -477,6 +477,8 @@ class Parse : public GraphKit {
// Helper function to compute array addressing // Helper function to compute array addressing
Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL); Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL);
void rtm_deopt();
// Pass current map to exits // Pass current map to exits
void return_current(Node* value); void return_current(Node* value);
......
...@@ -564,6 +564,10 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Pars ...@@ -564,6 +564,10 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Pars
set_map(entry_map); set_map(entry_map);
do_method_entry(); do_method_entry();
} }
if (depth() == 1) {
// Add check to deoptimize the nmethod if RTM state was changed
rtm_deopt();
}
// Check for bailouts during method entry. // Check for bailouts during method entry.
if (failing()) { if (failing()) {
...@@ -1975,6 +1979,42 @@ void Parse::call_register_finalizer() { ...@@ -1975,6 +1979,42 @@ void Parse::call_register_finalizer() {
set_control( _gvn.transform(result_rgn) ); set_control( _gvn.transform(result_rgn) );
} }
// Add check to deoptimize if RTM state is not ProfileRTM
void Parse::rtm_deopt() {
#if INCLUDE_RTM_OPT
if (C->profile_rtm()) {
assert(C->method() != NULL, "only for normal compilations");
assert(!C->method()->method_data()->is_empty(), "MDO is needed to record RTM state");
assert(depth() == 1, "generate check only for main compiled method");
// Set starting bci for uncommon trap.
set_parse_bci(is_osr_parse() ? osr_bci() : 0);
// Load the rtm_state from the MethodData.
const TypePtr* adr_type = TypeMetadataPtr::make(C->method()->method_data());
Node* mdo = makecon(adr_type);
int offset = MethodData::rtm_state_offset_in_bytes();
Node* adr_node = basic_plus_adr(mdo, mdo, offset);
Node* rtm_state = make_load(control(), adr_node, TypeInt::INT, T_INT, adr_type);
// Separate Load from Cmp by Opaque.
// In expand_macro_nodes() it will be replaced either
// with this load when there are locks in the code
// or with ProfileRTM (cmp->in(2)) otherwise so that
// the check will fold.
Node* profile_state = makecon(TypeInt::make(ProfileRTM));
Node* opq = _gvn.transform( new (C) Opaque3Node(C, rtm_state, Opaque3Node::RTM_OPT) );
Node* chk = _gvn.transform( new (C) CmpINode(opq, profile_state) );
Node* tst = _gvn.transform( new (C) BoolNode(chk, BoolTest::eq) );
// Branch to failure if state was changed
{ BuildCutout unless(this, tst, PROB_ALWAYS);
uncommon_trap(Deoptimization::Reason_rtm_state_change,
Deoptimization::Action_make_not_entrant);
}
}
#endif
}
//------------------------------return_current--------------------------------- //------------------------------return_current---------------------------------
// Append current _map to _exit_return // Append current _map to _exit_return
void Parse::return_current(Node* value) { void Parse::return_current(Node* value) {
......
...@@ -1299,6 +1299,14 @@ void OptoRuntime::print_named_counters() { ...@@ -1299,6 +1299,14 @@ void OptoRuntime::print_named_counters() {
tty->print_cr("%s", c->name()); tty->print_cr("%s", c->name());
blc->print_on(tty); blc->print_on(tty);
} }
#if INCLUDE_RTM_OPT
} else if (c->tag() == NamedCounter::RTMLockingCounter) {
RTMLockingCounters* rlc = ((RTMLockingNamedCounter*)c)->counters();
if (rlc->nonzero()) {
tty->print_cr("%s", c->name());
rlc->print_on(tty);
}
#endif
} }
c = c->next(); c = c->next();
} }
...@@ -1338,6 +1346,8 @@ NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCount ...@@ -1338,6 +1346,8 @@ NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCount
NamedCounter* c; NamedCounter* c;
if (tag == NamedCounter::BiasedLockingCounter) { if (tag == NamedCounter::BiasedLockingCounter) {
c = new BiasedLockingNamedCounter(strdup(st.as_string())); c = new BiasedLockingNamedCounter(strdup(st.as_string()));
} else if (tag == NamedCounter::RTMLockingCounter) {
c = new RTMLockingNamedCounter(strdup(st.as_string()));
} else { } else {
c = new NamedCounter(strdup(st.as_string()), tag); c = new NamedCounter(strdup(st.as_string()), tag);
} }
...@@ -1346,6 +1356,7 @@ NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCount ...@@ -1346,6 +1356,7 @@ NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCount
// add counters so this is safe. // add counters so this is safe.
NamedCounter* head; NamedCounter* head;
do { do {
c->set_next(NULL);
head = _named_counters; head = _named_counters;
c->set_next(head); c->set_next(head);
} while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head); } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "opto/machnode.hpp" #include "opto/machnode.hpp"
#include "opto/type.hpp" #include "opto/type.hpp"
#include "runtime/biasedLocking.hpp" #include "runtime/biasedLocking.hpp"
#include "runtime/rtmLocking.hpp"
#include "runtime/deoptimization.hpp" #include "runtime/deoptimization.hpp"
#include "runtime/vframe.hpp" #include "runtime/vframe.hpp"
...@@ -61,7 +62,8 @@ public: ...@@ -61,7 +62,8 @@ public:
NoTag, NoTag,
LockCounter, LockCounter,
EliminatedLockCounter, EliminatedLockCounter,
BiasedLockingCounter BiasedLockingCounter,
RTMLockingCounter
}; };
private: private:
...@@ -85,7 +87,7 @@ private: ...@@ -85,7 +87,7 @@ private:
NamedCounter* next() const { return _next; } NamedCounter* next() const { return _next; }
void set_next(NamedCounter* next) { void set_next(NamedCounter* next) {
assert(_next == NULL, "already set"); assert(_next == NULL || next == NULL, "already set");
_next = next; _next = next;
} }
...@@ -102,6 +104,18 @@ class BiasedLockingNamedCounter : public NamedCounter { ...@@ -102,6 +104,18 @@ class BiasedLockingNamedCounter : public NamedCounter {
BiasedLockingCounters* counters() { return &_counters; } BiasedLockingCounters* counters() { return &_counters; }
}; };
class RTMLockingNamedCounter : public NamedCounter {
private:
RTMLockingCounters _counters;
public:
RTMLockingNamedCounter(const char *n) :
NamedCounter(n, RTMLockingCounter), _counters() {}
RTMLockingCounters* counters() { return &_counters; }
};
typedef const TypeFunc*(*TypeFunc_generator)(); typedef const TypeFunc*(*TypeFunc_generator)();
class OptoRuntime : public AllStatic { class OptoRuntime : public AllStatic {
......
...@@ -4374,7 +4374,7 @@ const Type *TypeMetadataPtr::xmeet( const Type *t ) const { ...@@ -4374,7 +4374,7 @@ const Type *TypeMetadataPtr::xmeet( const Type *t ) const {
// else fall through: // else fall through:
case TopPTR: case TopPTR:
case AnyNull: { case AnyNull: {
return make(ptr, NULL, offset); return make(ptr, _metadata, offset);
} }
case BotPTR: case BotPTR:
case NotNull: case NotNull:
......
...@@ -3748,9 +3748,6 @@ jint Arguments::apply_ergo() { ...@@ -3748,9 +3748,6 @@ jint Arguments::apply_ergo() {
#endif // CC_INTERP #endif // CC_INTERP
#ifdef COMPILER2 #ifdef COMPILER2
if (!UseBiasedLocking || EmitSync != 0) {
UseOptoBiasInlining = false;
}
if (!EliminateLocks) { if (!EliminateLocks) {
EliminateNestedLocks = false; EliminateNestedLocks = false;
} }
...@@ -3811,6 +3808,11 @@ jint Arguments::apply_ergo() { ...@@ -3811,6 +3808,11 @@ jint Arguments::apply_ergo() {
UseBiasedLocking = false; UseBiasedLocking = false;
} }
} }
#ifdef COMPILER2
if (!UseBiasedLocking || EmitSync != 0) {
UseOptoBiasInlining = false;
}
#endif
// set PauseAtExit if the gamma launcher was used and a debugger is attached // set PauseAtExit if the gamma launcher was used and a debugger is attached
// but only if not already set on the commandline // but only if not already set on the commandline
......
...@@ -1285,7 +1285,8 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra ...@@ -1285,7 +1285,8 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra
gather_statistics(reason, action, trap_bc); gather_statistics(reason, action, trap_bc);
// Ensure that we can record deopt. history: // Ensure that we can record deopt. history:
bool create_if_missing = ProfileTraps; // Need MDO to record RTM code generation state.
bool create_if_missing = ProfileTraps RTM_OPT_ONLY( || UseRTMLocking );
MethodData* trap_mdo = MethodData* trap_mdo =
get_method_data(thread, trap_method, create_if_missing); get_method_data(thread, trap_method, create_if_missing);
...@@ -1566,6 +1567,17 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra ...@@ -1566,6 +1567,17 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra
if (tstate1 != tstate0) if (tstate1 != tstate0)
pdata->set_trap_state(tstate1); pdata->set_trap_state(tstate1);
} }
#if INCLUDE_RTM_OPT
// Restart collecting RTM locking abort statistic if the method
// is recompiled for a reason other than RTM state change.
// Assume that in new recompiled code the statistic could be different,
// for example, due to different inlining.
if ((reason != Reason_rtm_state_change) && (trap_mdo != NULL) &&
UseRTMDeopt && (nm->rtm_state() != ProfileRTM)) {
trap_mdo->atomic_set_rtm_state(ProfileRTM);
}
#endif
} }
if (inc_recompile_count) { if (inc_recompile_count) {
...@@ -1823,7 +1835,8 @@ const char* Deoptimization::_trap_reason_name[Reason_LIMIT] = { ...@@ -1823,7 +1835,8 @@ const char* Deoptimization::_trap_reason_name[Reason_LIMIT] = {
"age", "age",
"predicate", "predicate",
"loop_limit_check", "loop_limit_check",
"speculate_class_check" "speculate_class_check",
"rtm_state_change"
}; };
const char* Deoptimization::_trap_action_name[Action_LIMIT] = { const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
// Note: Keep this in sync. with enum DeoptAction. // Note: Keep this in sync. with enum DeoptAction.
......
...@@ -60,6 +60,7 @@ class Deoptimization : AllStatic { ...@@ -60,6 +60,7 @@ class Deoptimization : AllStatic {
Reason_predicate, // compiler generated predicate failed Reason_predicate, // compiler generated predicate failed
Reason_loop_limit_check, // compiler generated loop limits check failed Reason_loop_limit_check, // compiler generated loop limits check failed
Reason_speculate_class_check, // saw unexpected object class from type speculation Reason_speculate_class_check, // saw unexpected object class from type speculation
Reason_rtm_state_change, // rtm state change detected
Reason_LIMIT, Reason_LIMIT,
// Note: Keep this enum in sync. with _trap_reason_name. // Note: Keep this enum in sync. with _trap_reason_name.
Reason_RECORDED_LIMIT = Reason_bimorphic // some are not recorded per bc Reason_RECORDED_LIMIT = Reason_bimorphic // some are not recorded per bc
......
...@@ -268,7 +268,7 @@ void print_statistics() { ...@@ -268,7 +268,7 @@ void print_statistics() {
os::print_statistics(); os::print_statistics();
} }
if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics) { if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) {
OptoRuntime::print_named_counters(); OptoRuntime::print_named_counters();
} }
...@@ -390,7 +390,7 @@ void print_statistics() { ...@@ -390,7 +390,7 @@ void print_statistics() {
} }
#ifdef COMPILER2 #ifdef COMPILER2
if (PrintPreciseBiasedLockingStatistics) { if (PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) {
OptoRuntime::print_named_counters(); OptoRuntime::print_named_counters();
} }
#endif #endif
......
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_RUNTIME_RTMLOCKING_HPP
#define SHARE_VM_RUNTIME_RTMLOCKING_HPP
// Generate RTM (Restricted Transactional Memory) locking code for all inflated
// locks when "UseRTMLocking" option is on with normal locking mechanism as fall back
// handler.
//
// On abort/lock busy the lock will be retried a fixed number of times under RTM
// as specified by "RTMRetryCount" option. The locks which abort too often
// can be auto tuned or manually tuned.
//
// Auto-tuning can be done on an option like UseRTMDeopt and it will need abort
// ratio calculation for each lock. The abort ratio will be calculated after
// "RTMAbortThreshold" number of aborts is reached. The formulas are:
//
// Aborted transactions = abort_count * 100
// All transactions = total_count * RTMTotalCountIncrRate
//
// Aborted transactions >= All transactions * RTMAbortRatio
//
// If "UseRTMDeopt" is on and the aborts ratio reaches "RTMAbortRatio"
// the method containing the lock will be deoptimized and recompiled with
// all locks as normal locks. If the abort ratio continues to remain low after
// "RTMLockingThreshold" locks are attempted, then the method will be deoptimized
// and recompiled with all locks as RTM locks without abort ratio calculation code.
// The abort ratio calculation can be delayed by specifying flag
// -XX:RTMLockingCalculationDelay in millisecond.
//
// For manual tuning the abort statistics for each lock needs to be provided
// to the user on some JVM option like "PrintPreciseRTMLockingStatistics".
// Based on the abort statistics users can create a .hotspot_compiler file
// or use -XX:CompileCommand=option,class::method,NoRTMLockEliding
// to specify for which methods to disable RTM locking.
//
// When UseRTMForStackLocks option is enabled along with UseRTMLocking option,
// the RTM locking code is generated for stack locks too.
// The retries, auto-tuning support and rtm locking statistics are all
// supported for stack locks just like inflated locks.
// RTM locking counters
class RTMLockingCounters VALUE_OBJ_CLASS_SPEC {
private:
uintx _total_count; // Total RTM locks count
uintx _abort_count; // Total aborts count
public:
enum { ABORT_STATUS_LIMIT = 6 };
// Counters per RTM Abort Status. Incremented with +PrintPreciseRTMLockingStatistics
// RTM uses the EAX register to communicate abort status to software.
// Following an RTM abort the EAX register has the following definition.
//
// EAX register bit position Meaning
// 0 Set if abort caused by XABORT instruction.
// 1 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
// 2 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
// 3 Set if an internal buffer overflowed.
// 4 Set if a debug breakpoint was hit.
// 5 Set if an abort occurred during execution of a nested transaction.
private:
uintx _abortX_count[ABORT_STATUS_LIMIT];
public:
static uintx _calculation_flag;
static uintx* rtm_calculation_flag_addr() { return &_calculation_flag; }
static void init();
RTMLockingCounters() : _total_count(0), _abort_count(0) {
for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
_abortX_count[i] = 0;
}
}
uintx* total_count_addr() { return &_total_count; }
uintx* abort_count_addr() { return &_abort_count; }
uintx* abortX_count_addr() { return &_abortX_count[0]; }
static int total_count_offset() { return (int)offset_of(RTMLockingCounters, _total_count); }
static int abort_count_offset() { return (int)offset_of(RTMLockingCounters, _abort_count); }
static int abortX_count_offset() { return (int)offset_of(RTMLockingCounters, _abortX_count[0]); }
bool nonzero() { return (_abort_count + _total_count) > 0; }
void print_on(outputStream* st);
void print() { print_on(tty); }
};
#endif // SHARE_VM_RUNTIME_RTMLOCKING_HPP
...@@ -105,7 +105,6 @@ PeriodicTask::PeriodicTask(size_t interval_time) : ...@@ -105,7 +105,6 @@ PeriodicTask::PeriodicTask(size_t interval_time) :
_counter(0), _interval((int) interval_time) { _counter(0), _interval((int) interval_time) {
// Sanity check the interval time // Sanity check the interval time
assert(_interval >= PeriodicTask::min_interval && assert(_interval >= PeriodicTask::min_interval &&
_interval <= PeriodicTask::max_interval &&
_interval % PeriodicTask::interval_gran == 0, _interval % PeriodicTask::interval_gran == 0,
"improper PeriodicTask interval time"); "improper PeriodicTask interval time");
} }
......
...@@ -107,6 +107,9 @@ ...@@ -107,6 +107,9 @@
#include "opto/c2compiler.hpp" #include "opto/c2compiler.hpp"
#include "opto/idealGraphPrinter.hpp" #include "opto/idealGraphPrinter.hpp"
#endif #endif
#if INCLUDE_RTM_OPT
#include "runtime/rtmLocking.hpp"
#endif
#ifdef DTRACE_ENABLED #ifdef DTRACE_ENABLED
...@@ -3670,6 +3673,10 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { ...@@ -3670,6 +3673,10 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
BiasedLocking::init(); BiasedLocking::init();
#if INCLUDE_RTM_OPT
RTMLockingCounters::init();
#endif
if (JDK_Version::current().post_vm_init_hook_enabled()) { if (JDK_Version::current().post_vm_init_hook_enabled()) {
call_postVMInitHook(THREAD); call_postVMInitHook(THREAD);
// The Java side of PostVMInitHook.run must deal with all // The Java side of PostVMInitHook.run must deal with all
......
...@@ -370,6 +370,21 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlass ...@@ -370,6 +370,21 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlass
// Machine dependent stuff // Machine dependent stuff
#if defined(X86) && defined(COMPILER2) && !defined(JAVASE_EMBEDDED)
// Include Restricted Transactional Memory lock eliding optimization
#define INCLUDE_RTM_OPT 1
#define RTM_OPT_ONLY(code) code
#else
#define INCLUDE_RTM_OPT 0
#define RTM_OPT_ONLY(code)
#endif
// States of Restricted Transactional Memory usage.
enum RTMState {
NoRTM = 0x2, // Don't use RTM
UseRTM = 0x1, // Use RTM
ProfileRTM = 0x0 // Use RTM with abort ratio calculation
};
#ifdef TARGET_ARCH_x86 #ifdef TARGET_ARCH_x86
# include "globalDefinitions_x86.hpp" # include "globalDefinitions_x86.hpp"
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册