提交 cf124abe 编写于 作者: R rasbold

6695049: (coll) Create an x86 intrinsic for Arrays.equals

Summary: Intrinsify java/util/Arrays.equals(char[], char[])
Reviewed-by: kvn, never
上级 0bde47b5
......@@ -3806,6 +3806,78 @@ encode %{
masm.bind(DONE_LABEL);
%}
enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
MacroAssembler masm(&cbuf);
Register ary1Reg = as_Register($ary1$$reg);
Register ary2Reg = as_Register($ary2$$reg);
Register tmp1Reg = as_Register($tmp1$$reg);
Register tmp2Reg = as_Register($tmp2$$reg);
Register resultReg = as_Register($result$$reg);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
// Check the input args
masm.cmpl(ary1Reg, ary2Reg);
masm.jcc(Assembler::equal, TRUE_LABEL);
masm.testl(ary1Reg, ary1Reg);
masm.jcc(Assembler::zero, FALSE_LABEL);
masm.testl(ary2Reg, ary2Reg);
masm.jcc(Assembler::zero, FALSE_LABEL);
// Check the lengths
masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
masm.movl(resultReg, Address(ary2Reg, length_offset));
masm.cmpl(tmp2Reg, resultReg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.testl(resultReg, resultReg);
masm.jcc(Assembler::zero, TRUE_LABEL);
// Get the number of 4 byte vectors to compare
masm.shrl(resultReg, 1);
// Check for odd-length arrays
masm.andl(tmp2Reg, 1);
masm.testl(tmp2Reg, tmp2Reg);
masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
// Compare 2-byte "tail" at end of arrays
masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
masm.cmpl(tmp1Reg, tmp2Reg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.testl(resultReg, resultReg);
masm.jcc(Assembler::zero, TRUE_LABEL);
// Setup compare loop
masm.bind(COMPARE_LOOP_HDR);
// Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
masm.negl(resultReg);
// 4-byte-wide compare loop
masm.bind(COMPARE_LOOP);
masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
masm.cmpl(ary1Reg, ary2Reg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.increment(resultReg);
masm.jcc(Assembler::notZero, COMPARE_LOOP);
masm.bind(TRUE_LABEL);
masm.movl(resultReg, 1); // return true
masm.jmp(DONE_LABEL);
masm.bind(FALSE_LABEL);
masm.xorl(resultReg, resultReg); // return false
// That's it
masm.bind(DONE_LABEL);
%}
enc_class enc_pop_rdx() %{
emit_opcode(cbuf,0x5A);
%}
......@@ -11565,6 +11637,17 @@ instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2,
ins_pipe( pipe_slow );
%}
// fast array equals
instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
match(Set result (AryEq ary1 ary2));
effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
//ins_cost(300);
format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %}
ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
ins_pipe( pipe_slow );
%}
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
......
......@@ -3808,6 +3808,78 @@ encode %{
masm.bind(DONE_LABEL);
%}
enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{
Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
MacroAssembler masm(&cbuf);
Register ary1Reg = as_Register($ary1$$reg);
Register ary2Reg = as_Register($ary2$$reg);
Register tmp1Reg = as_Register($tmp1$$reg);
Register tmp2Reg = as_Register($tmp2$$reg);
Register resultReg = as_Register($result$$reg);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
// Check the input args
masm.cmpq(ary1Reg, ary2Reg);
masm.jcc(Assembler::equal, TRUE_LABEL);
masm.testq(ary1Reg, ary1Reg);
masm.jcc(Assembler::zero, FALSE_LABEL);
masm.testq(ary2Reg, ary2Reg);
masm.jcc(Assembler::zero, FALSE_LABEL);
// Check the lengths
masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
masm.movl(resultReg, Address(ary2Reg, length_offset));
masm.cmpl(tmp2Reg, resultReg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.testl(resultReg, resultReg);
masm.jcc(Assembler::zero, TRUE_LABEL);
// Get the number of 4 byte vectors to compare
masm.shrl(resultReg, 1);
// Check for odd-length arrays
masm.andl(tmp2Reg, 1);
masm.testl(tmp2Reg, tmp2Reg);
masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
// Compare 2-byte "tail" at end of arrays
masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
masm.cmpl(tmp1Reg, tmp2Reg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.testl(resultReg, resultReg);
masm.jcc(Assembler::zero, TRUE_LABEL);
// Setup compare loop
masm.bind(COMPARE_LOOP_HDR);
// Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
masm.negq(resultReg);
// 4-byte-wide compare loop
masm.bind(COMPARE_LOOP);
masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
masm.cmpl(ary1Reg, ary2Reg);
masm.jcc(Assembler::notEqual, FALSE_LABEL);
masm.incrementq(resultReg);
masm.jcc(Assembler::notZero, COMPARE_LOOP);
masm.bind(TRUE_LABEL);
masm.movl(resultReg, 1); // return true
masm.jmp(DONE_LABEL);
masm.bind(FALSE_LABEL);
masm.xorl(resultReg, resultReg); // return false
// That's it
masm.bind(DONE_LABEL);
%}
enc_class enc_rethrow()
%{
cbuf.set_inst_mark();
......@@ -10876,6 +10948,18 @@ instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
ins_pipe( pipe_slow );
%}
// fast array equals
instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1,
rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{
match(Set result (AryEq ary1 ary2));
effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
//ins_cost(300);
format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %}
ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
ins_pipe( pipe_slow );
%}
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
......
......@@ -564,6 +564,10 @@
do_name( copyOfRange_name, "copyOfRange") \
do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \
\
do_intrinsic(_equalsC, java_util_Arrays, equals_name, equalsC_signature, F_S) \
do_name( equals_name, "equals") \
do_signature(equalsC_signature, "([C[C)Z") \
\
do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
/* (symbols invoke_name and invoke_signature defined above) */ \
\
......
......@@ -37,6 +37,7 @@ macro(Allocate)
macro(AllocateArray)
macro(AndI)
macro(AndL)
macro(AryEq)
macro(AtanD)
macro(Binary)
macro(Bool)
......
......@@ -134,6 +134,7 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
if( mach->in(2) != val ) continue;
break; // Found a memory op?
case Op_StrComp:
case Op_AryEq:
// Not a legit memory op for implicit null check regardless of
// embedded loads
continue;
......
......@@ -163,6 +163,7 @@ class LibraryCallKit : public GraphKit {
bool inline_native_newArray();
bool inline_native_getLength();
bool inline_array_copyOf(bool is_copyOfRange);
bool inline_array_equals();
bool inline_native_clone(bool is_virtual);
bool inline_native_Reflection_getCallerClass();
bool inline_native_AtomicLong_get();
......@@ -259,6 +260,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
switch (id) {
case vmIntrinsics::_indexOf:
case vmIntrinsics::_compareTo:
case vmIntrinsics::_equalsC:
break; // InlineNatives does not control String.compareTo
default:
return NULL;
......@@ -272,6 +274,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
case vmIntrinsics::_indexOf:
if (!SpecialStringIndexOf) return NULL;
break;
case vmIntrinsics::_equalsC:
if (!SpecialArraysEquals) return NULL;
break;
case vmIntrinsics::_arraycopy:
if (!InlineArrayCopy) return NULL;
break;
......@@ -586,6 +591,8 @@ bool LibraryCallKit::try_to_inline() {
return inline_array_copyOf(false);
case vmIntrinsics::_copyOfRange:
return inline_array_copyOf(true);
case vmIntrinsics::_equalsC:
return inline_array_equals();
case vmIntrinsics::_clone:
return inline_native_clone(intrinsic()->is_virtual());
......@@ -813,6 +820,22 @@ bool LibraryCallKit::inline_string_compareTo() {
return true;
}
//------------------------------inline_array_equals----------------------------
bool LibraryCallKit::inline_array_equals() {
_sp += 2;
Node *argument2 = pop();
Node *argument1 = pop();
Node* equals =
_gvn.transform(new (C, 3) AryEqNode(control(),
argument1,
argument2)
);
push(equals);
return true;
}
// Java version of String.indexOf(constant string)
// class StringDecl {
// StringDecl(char[] ca) {
......
......@@ -2632,6 +2632,7 @@ void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify
case Op_LoadD_unaligned:
case Op_LoadL_unaligned:
case Op_StrComp: // Does a bunch of load-like effects
case Op_AryEq:
pinned = false;
}
if( pinned ) {
......
......@@ -744,6 +744,7 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
switch (n->Opcode()) {
case Op_StrComp:
case Op_AryEq:
case Op_MemBarVolatile:
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
nidx = Compile::AliasIdxTop;
......@@ -1717,6 +1718,7 @@ void Matcher::find_shared( Node *n ) {
mstack.push(n->in(0), Pre_Visit); // Visit Control input
continue; // while (mstack.is_nonempty())
case Op_StrComp:
case Op_AryEq:
set_shared(n); // Force result into register (it will be anyways)
break;
case Op_ConP: { // Convert pointers above the centerline to NUL
......
......@@ -156,7 +156,7 @@ static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const T
phase->C->must_alias(adr_check, alias_idx );
// Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
if( !consistent && adr_check != NULL && !adr_check->empty() &&
tp->isa_aryptr() && tp->offset() == Type::OffsetBot &&
tp->isa_aryptr() && tp->offset() == Type::OffsetBot &&
adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot &&
( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() ||
adr_check->offset() == oopDesc::klass_offset_in_bytes() ||
......@@ -2394,6 +2394,13 @@ Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Strip out
// control copies
Node *AryEqNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//=============================================================================
MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
......
......@@ -725,6 +725,18 @@ public:
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------AryEq---------------------------------------
class AryEqNode: public Node {
public:
AryEqNode(Node *control, Node* s1, Node* s2): Node(control, s1, s2) {};
virtual int Opcode() const;
virtual bool depends_only_on_test() const { return false; }
virtual const Type* bottom_type() const { return TypeInt::BOOL; }
virtual const TypePtr* adr_type() const { return TypeAryPtr::CHARS; }
virtual uint ideal_reg() const { return Op_RegI; }
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------MemBar-----------------------------------------
// There are different flavors of Memory Barriers to match the Java Memory
// Model. Monitor-enter and volatile-load act as Aquires: no following ref
......
......@@ -1312,6 +1312,9 @@ void Arguments::set_aggressive_opts_flags() {
if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
FLAG_SET_DEFAULT(DoEscapeAnalysis, true);
}
if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) {
FLAG_SET_DEFAULT(SpecialArraysEquals, true);
}
#endif
if (AggressiveOpts) {
......
......@@ -460,6 +460,9 @@ class CommandLineFlags {
develop(bool, SpecialStringIndexOf, true, \
"special version of string indexOf") \
\
product(bool, SpecialArraysEquals, true, \
"special version of Arrays.equals(char[],char[])") \
\
develop(bool, TraceCallFixup, false, \
"traces all call fixups") \
\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册