提交 ccf9e3df 编写于 作者: C cfang

6761600: Use sse 4.2 in intrinsics

Summary: Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals.
Reviewed-by: kvn, never, jrose
上级 6507eb9c
......@@ -3003,6 +3003,202 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
__ bind(Ldone);
%}
enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
MacroAssembler _masm(&cbuf);
Register str1_reg = reg_to_register_object($str1$$reg);
Register str2_reg = reg_to_register_object($str2$$reg);
Register tmp1_reg = reg_to_register_object($tmp1$$reg);
Register tmp2_reg = reg_to_register_object($tmp2$$reg);
Register result_reg = reg_to_register_object($result$$reg);
// Get the first character position in both strings
// [8] char array, [12] offset, [16] count
int value_offset = java_lang_String:: value_offset_in_bytes();
int offset_offset = java_lang_String::offset_offset_in_bytes();
int count_offset = java_lang_String:: count_offset_in_bytes();
// load str1 (jchar*) base address into tmp1_reg
__ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg);
__ ld(Address(str1_reg, 0, offset_offset), result_reg);
__ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg);
__ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted
__ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
__ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted
__ add(result_reg, tmp1_reg, tmp1_reg);
// load str2 (jchar*) base address into tmp2_reg
// __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted
__ ld(Address(str2_reg, 0, offset_offset), result_reg);
__ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg);
__ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted
__ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
__ cmp(str1_reg, str2_reg); // hoisted
__ add(result_reg, tmp2_reg, tmp2_reg);
__ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg);
__ br(Assembler::notEqual, true, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
__ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone);
__ delayed()->add(G0, 1, result_reg); //equals
__ cmp(tmp1_reg, tmp2_reg); //same string ?
__ brx(Assembler::equal, true, Assembler::pn, Ldone);
__ delayed()->add(G0, 1, result_reg);
//rename registers
Register limit_reg = str1_reg;
Register chr2_reg = str2_reg;
Register chr1_reg = result_reg;
// tmp{12} are the base pointers
//check for alignment and position the pointers to the ends
__ or3(tmp1_reg, tmp2_reg, chr1_reg);
__ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned
__ br(Assembler::notZero, false, Assembler::pn, Lchar);
__ delayed()->nop();
__ bind(Lword);
__ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2)
__ andn(limit_reg, 0x3, limit_reg);
__ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word);
__ delayed()->nop();
__ add(tmp1_reg, limit_reg, tmp1_reg);
__ add(tmp2_reg, limit_reg, tmp2_reg);
__ neg(limit_reg);
__ lduw(tmp1_reg, limit_reg, chr1_reg);
__ bind(Lword_loop);
__ lduw(tmp2_reg, limit_reg, chr2_reg);
__ cmp(chr1_reg, chr2_reg);
__ br(Assembler::notEqual, true, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg);
__ inccc(limit_reg, 2*sizeof(jchar));
// annul LDUW if branch i s not taken to prevent access past end of string
__ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken
__ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted
__ bind(Lpost_word);
__ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone);
__ delayed()->add(G0, 1, result_reg);
__ lduh(tmp1_reg, 0, chr1_reg);
__ lduh(tmp2_reg, 0, chr2_reg);
__ cmp (chr1_reg, chr2_reg);
__ br(Assembler::notEqual, true, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg);
__ ba(false,Ldone);
__ delayed()->add(G0, 1, result_reg);
__ bind(Lchar);
__ add(tmp1_reg, limit_reg, tmp1_reg);
__ add(tmp2_reg, limit_reg, tmp2_reg);
__ neg(limit_reg); //negate count
__ lduh(tmp1_reg, limit_reg, chr1_reg);
__ bind(Lchar_loop);
__ lduh(tmp2_reg, limit_reg, chr2_reg);
__ cmp(chr1_reg, chr2_reg);
__ br(Assembler::notEqual, true, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg); //not equal
__ inccc(limit_reg, sizeof(jchar));
// annul LDUH if branch is not taken to prevent access past end of string
__ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken
__ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
__ add(G0, 1, result_reg); //equal
__ bind(Ldone);
%}
enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
Label Lvector, Ldone, Lloop;
MacroAssembler _masm(&cbuf);
Register ary1_reg = reg_to_register_object($ary1$$reg);
Register ary2_reg = reg_to_register_object($ary2$$reg);
Register tmp1_reg = reg_to_register_object($tmp1$$reg);
Register tmp2_reg = reg_to_register_object($tmp2$$reg);
Register result_reg = reg_to_register_object($result$$reg);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
// return true if the same array
__ cmp(ary1_reg, ary2_reg);
__ br(Assembler::equal, true, Assembler::pn, Ldone);
__ delayed()->add(G0, 1, result_reg); // equal
__ br_null(ary1_reg, true, Assembler::pn, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
__ br_null(ary2_reg, true, Assembler::pn, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
//load the lengths of arrays
__ ld(Address(ary1_reg, 0, length_offset), tmp1_reg);
__ ld(Address(ary2_reg, 0, length_offset), tmp2_reg);
// return false if the two arrays are not equal length
__ cmp(tmp1_reg, tmp2_reg);
__ br(Assembler::notEqual, true, Assembler::pn, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
__ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone);
__ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
// load array addresses
__ add(ary1_reg, base_offset, ary1_reg);
__ add(ary2_reg, base_offset, ary2_reg);
// renaming registers
Register chr1_reg = tmp2_reg; // for characters in ary1
Register chr2_reg = result_reg; // for characters in ary2
Register limit_reg = tmp1_reg; // length
// set byte count
__ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
__ andcc(limit_reg, 0x2, chr1_reg); //trailing character ?
__ br(Assembler::zero, false, Assembler::pt, Lvector);
__ delayed()->nop();
//compare the trailing char
__ sub(limit_reg, sizeof(jchar), limit_reg);
__ lduh(ary1_reg, limit_reg, chr1_reg);
__ lduh(ary2_reg, limit_reg, chr2_reg);
__ cmp(chr1_reg, chr2_reg);
__ br(Assembler::notEqual, true, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
// only one char ?
__ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone);
__ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
__ bind(Lvector);
// Shift ary1_reg and ary2_reg to the end of the arrays, negate limit
__ add(ary1_reg, limit_reg, ary1_reg);
__ add(ary2_reg, limit_reg, ary2_reg);
__ neg(limit_reg, limit_reg);
__ lduw(ary1_reg, limit_reg, chr1_reg);
__ bind(Lloop);
__ lduw(ary2_reg, limit_reg, chr2_reg);
__ cmp(chr1_reg, chr2_reg);
__ br(Assembler::notEqual, false, Assembler::pt, Ldone);
__ delayed()->mov(G0, result_reg); // not equal
__ inccc(limit_reg, 2*sizeof(jchar));
// annul LDUW if branch is not taken to prevent access past end of string
__ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken
__ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted
__ add(G0, 1, result_reg); // equals
__ bind(Ldone);
%}
enc_class enc_rethrow() %{
cbuf.set_inst_mark();
Register temp_reg = G3;
......@@ -9015,6 +9211,25 @@ instruct string_compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, note
ins_pipe(long_memory_op);
%}
instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
o7RegI tmp3, flagsReg ccr) %{
match(Set result (StrEquals str1 str2));
effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3);
ins_cost(300);
format %{ "String Equals $str1,$str2 -> $result" %}
ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) );
ins_pipe(long_memory_op);
%}
instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
flagsReg ccr) %{
match(Set result (AryEq ary1 ary2));
effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
ins_cost(300);
format %{ "Array Equals $ary1,$ary2 -> $result" %}
ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result));
ins_pipe(long_memory_op);
%}
//---------- Population Count Instructions -------------------------------------
......
......@@ -2173,6 +2173,31 @@ void Assembler::orl(Register dst, Register src) {
emit_arith(0x0B, 0xC0, dst, src);
}
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this);
emit_byte(0x66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x3A);
emit_byte(0x61);
emit_operand(dst, src);
emit_byte(imm8);
}
void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
emit_byte(0x66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x3A);
emit_byte(0x61);
emit_byte(0xC0 | encode);
emit_byte(imm8);
}
// generic
void Assembler::pop(Register dst) {
int encode = prefix_and_encode(dst->encoding());
......@@ -2330,6 +2355,29 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
emit_byte(shift);
}
void Assembler::ptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
emit_byte(0x66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x38);
emit_byte(0x17);
emit_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
emit_byte(0x66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x38);
emit_byte(0x17);
emit_byte(0xC0 | encode);
}
void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66);
......
......@@ -1226,6 +1226,10 @@ private:
void orq(Register dst, Address src);
void orq(Register dst, Register src);
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void popl(Address dst);
#ifdef _LP64
......@@ -1260,6 +1264,10 @@ private:
// Shift Right Logical Quadword Immediate
void psrlq(XMMRegister dst, int shift);
// Logical Compare Double Quadword
void ptest(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, Address src);
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
......
......@@ -408,6 +408,11 @@ void VM_Version::get_processor_features() {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
}
if( supports_sse4_2() && UseSSE >= 4 ) {
if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
UseSSE42Intrinsics = true;
}
}
}
}
......
此差异已折叠。
此差异已折叠。
......@@ -574,9 +574,13 @@ bool InstructForm::needs_anti_dependence_check(FormDict &globals) const {
// TEMPORARY
// if( is_simple_chain_rule(globals) ) return false;
// String-compare uses many memorys edges, but writes none
// String.(compareTo/equals/indexOf) and Arrays.equals use many memorys edges,
// but writes none
if( _matrule && _matrule->_rChild &&
strcmp(_matrule->_rChild->_opType,"StrComp")==0 )
( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 ||
strcmp(_matrule->_rChild->_opType,"AryEq" )==0 ))
return true;
// Check if instruction has a USE of a memory operand class, but no defs
......@@ -815,8 +819,10 @@ uint InstructForm::oper_input_base(FormDict &globals) {
return AdlcVMDeps::Parms; // Skip the machine-state edges
if( _matrule->_rChild &&
strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) {
// String compare takes 1 control and 4 memory edges.
( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) {
// String.(compareTo/equals/indexOf) take 1 control and 4 memory edges.
return 5;
}
......
......@@ -288,6 +288,7 @@
template(stringCacheEnabled_name, "stringCacheEnabled") \
template(bitCount_name, "bitCount") \
template(profile_name, "profile") \
template(equals_name, "equals") \
\
/* non-intrinsic name/signature pairs: */ \
template(register_method_name, "register") \
......@@ -579,7 +580,6 @@
do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \
\
do_intrinsic(_equalsC, java_util_Arrays, equals_name, equalsC_signature, F_S) \
do_name( equals_name, "equals") \
do_signature(equalsC_signature, "([C[C)Z") \
\
do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
......@@ -589,6 +589,7 @@
do_name( compareTo_name, "compareTo") \
do_intrinsic(_indexOf, java_lang_String, indexOf_name, string_int_signature, F_R) \
do_name( indexOf_name, "indexOf") \
do_intrinsic(_equals, java_lang_String, equals_name, object_boolean_signature, F_R) \
\
do_class(java_nio_Buffer, "java/nio/Buffer") \
do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \
......
......@@ -218,6 +218,8 @@ macro(StoreL)
macro(StoreP)
macro(StoreN)
macro(StrComp)
macro(StrEquals)
macro(StrIndexOf)
macro(SubD)
macro(SubF)
macro(SubI)
......
......@@ -438,6 +438,12 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
#endif
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
"String compare is only known 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals),
"String equals is a 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf),
"String indexOf is a 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq),
"Arrays equals is a 'load' that do not conflict with any stores");
if (!C->alias_type(load_alias_idx)->is_rewritable()) {
// It is impossible to spoil this load by putting stores before it,
......
......@@ -137,6 +137,8 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
if( mach->in(2) != val ) continue;
break; // Found a memory op?
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
// Not a legit memory op for implicit null check regardless of
// embedded loads
......
......@@ -136,6 +136,7 @@ class LibraryCallKit : public GraphKit {
bool inline_string_compareTo();
bool inline_string_indexOf();
Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
bool inline_string_equals();
Node* pop_math_arg();
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_math_native(vmIntrinsics::ID id);
......@@ -261,6 +262,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
switch (id) {
case vmIntrinsics::_indexOf:
case vmIntrinsics::_compareTo:
case vmIntrinsics::_equals:
case vmIntrinsics::_equalsC:
break; // InlineNatives does not control String.compareTo
default:
......@@ -275,6 +277,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
case vmIntrinsics::_indexOf:
if (!SpecialStringIndexOf) return NULL;
break;
case vmIntrinsics::_equals:
if (!SpecialStringEquals) return NULL;
break;
case vmIntrinsics::_equalsC:
if (!SpecialArraysEquals) return NULL;
break;
......@@ -442,6 +447,8 @@ bool LibraryCallKit::try_to_inline() {
return inline_string_compareTo();
case vmIntrinsics::_indexOf:
return inline_string_indexOf();
case vmIntrinsics::_equals:
return inline_string_equals();
case vmIntrinsics::_getObject:
return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
......@@ -793,6 +800,8 @@ Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
//------------------------------inline_string_compareTo------------------------
bool LibraryCallKit::inline_string_compareTo() {
if (!Matcher::has_match_rule(Op_StrComp)) return false;
const int value_offset = java_lang_String::value_offset_in_bytes();
const int count_offset = java_lang_String::count_offset_in_bytes();
const int offset_offset = java_lang_String::offset_offset_in_bytes();
......@@ -830,6 +839,82 @@ bool LibraryCallKit::inline_string_compareTo() {
return true;
}
//------------------------------inline_string_equals------------------------
bool LibraryCallKit::inline_string_equals() {
if (!Matcher::has_match_rule(Op_StrEquals)) return false;
const int value_offset = java_lang_String::value_offset_in_bytes();
const int count_offset = java_lang_String::count_offset_in_bytes();
const int offset_offset = java_lang_String::offset_offset_in_bytes();
_sp += 2;
Node* argument = pop(); // pop non-receiver first: it was pushed second
Node* receiver = pop();
// Null check on self without removing any arguments. The argument
// null check technically happens in the wrong place, which can lead to
// invalid stack traces when string compare is inlined into a method
// which handles NullPointerExceptions.
_sp += 2;
receiver = do_null_check(receiver, T_OBJECT);
//should not do null check for argument for String.equals(), because spec
//allows to specify NULL as argument.
_sp -= 2;
if (stopped()) {
return true;
}
// get String klass for instanceOf
ciInstanceKlass* klass = env()->String_klass();
// two paths (plus control) merge
RegionNode* region = new (C, 3) RegionNode(3);
Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL);
Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq));
IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
Node* if_true = _gvn.transform(new (C, 1) IfTrueNode(iff));
set_control(if_true);
const TypeInstPtr* string_type =
TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
// instanceOf == true
Node* equals =
_gvn.transform(new (C, 7) StrEqualsNode(
control(),
memory(TypeAryPtr::CHARS),
memory(string_type->add_offset(value_offset)),
memory(string_type->add_offset(count_offset)),
memory(string_type->add_offset(offset_offset)),
receiver,
argument));
phi->init_req(1, _gvn.transform(equals));
region->init_req(1, if_true);
//instanceOf == false, fallthrough
Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff));
set_control(if_false);
phi->init_req(2, _gvn.transform(intcon(0)));
region->init_req(2, if_false);
// post merge
set_control(_gvn.transform(region));
record_for_igvn(region);
push(_gvn.transform(phi));
return true;
}
//------------------------------inline_array_equals----------------------------
bool LibraryCallKit::inline_array_equals() {
......@@ -994,80 +1079,115 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar
return result;
}
//------------------------------inline_string_indexOf------------------------
bool LibraryCallKit::inline_string_indexOf() {
const int value_offset = java_lang_String::value_offset_in_bytes();
const int count_offset = java_lang_String::count_offset_in_bytes();
const int offset_offset = java_lang_String::offset_offset_in_bytes();
_sp += 2;
Node *argument = pop(); // pop non-receiver first: it was pushed second
Node *receiver = pop();
// don't intrinsify if argument isn't a constant string.
if (!argument->is_Con()) {
return false;
}
const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
if (str_type == NULL) {
return false;
}
ciInstanceKlass* klass = env()->String_klass();
ciObject* str_const = str_type->const_oop();
if (str_const == NULL || str_const->klass() != klass) {
return false;
}
ciInstance* str = str_const->as_instance();
assert(str != NULL, "must be instance");
Node* result;
if (Matcher::has_match_rule(Op_StrIndexOf) &&
UseSSE42Intrinsics) {
// Generate SSE4.2 version of indexOf
// We currently only have match rules that use SSE4.2
const int value_offset = java_lang_String::value_offset_in_bytes();
const int count_offset = java_lang_String::count_offset_in_bytes();
const int offset_offset = java_lang_String::offset_offset_in_bytes();
// Null check on self without removing any arguments. The argument
// null check technically happens in the wrong place, which can lead to
// invalid stack traces when string compare is inlined into a method
// which handles NullPointerExceptions.
_sp += 2;
receiver = do_null_check(receiver, T_OBJECT);
argument = do_null_check(argument, T_OBJECT);
_sp -= 2;
ciObject* v = str->field_value_by_offset(value_offset).as_object();
int o = str->field_value_by_offset(offset_offset).as_int();
int c = str->field_value_by_offset(count_offset).as_int();
ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
if (stopped()) {
return true;
}
// constant strings have no offset and count == length which
// simplifies the resulting code somewhat so lets optimize for that.
if (o != 0 || c != pat->length()) {
return false;
}
ciInstanceKlass* klass = env()->String_klass();
const TypeInstPtr* string_type =
TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
result =
_gvn.transform(new (C, 7)
StrIndexOfNode(control(),
memory(TypeAryPtr::CHARS),
memory(string_type->add_offset(value_offset)),
memory(string_type->add_offset(count_offset)),
memory(string_type->add_offset(offset_offset)),
receiver,
argument));
} else { //Use LibraryCallKit::string_indexOf
// don't intrinsify is argument isn't a constant string.
if (!argument->is_Con()) {
return false;
}
const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
if (str_type == NULL) {
return false;
}
ciInstanceKlass* klass = env()->String_klass();
ciObject* str_const = str_type->const_oop();
if (str_const == NULL || str_const->klass() != klass) {
return false;
}
ciInstance* str = str_const->as_instance();
assert(str != NULL, "must be instance");
ciObject* v = str->field_value_by_offset(value_offset).as_object();
int o = str->field_value_by_offset(offset_offset).as_int();
int c = str->field_value_by_offset(count_offset).as_int();
ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
// constant strings have no offset and count == length which
// simplifies the resulting code somewhat so lets optimize for that.
if (o != 0 || c != pat->length()) {
return false;
}
// Null check on self without removing any arguments. The argument
// null check technically happens in the wrong place, which can lead to
// invalid stack traces when string compare is inlined into a method
// which handles NullPointerExceptions.
_sp += 2;
receiver = do_null_check(receiver, T_OBJECT);
// No null check on the argument is needed since it's a constant String oop.
_sp -= 2;
if (stopped()) {
return true;
}
// Null check on self without removing any arguments. The argument
// null check technically happens in the wrong place, which can lead to
// invalid stack traces when string compare is inlined into a method
// which handles NullPointerExceptions.
_sp += 2;
receiver = do_null_check(receiver, T_OBJECT);
// No null check on the argument is needed since it's a constant String oop.
_sp -= 2;
if (stopped()) {
return true;
}
// The null string as a pattern always returns 0 (match at beginning of string)
if (c == 0) {
push(intcon(0));
return true;
}
// The null string as a pattern always returns 0 (match at beginning of string)
if (c == 0) {
push(intcon(0));
return true;
}
jchar lastChar = pat->char_at(o + (c - 1));
int cache = 0;
int i;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
}
// Generate default indexOf
jchar lastChar = pat->char_at(o + (c - 1));
int cache = 0;
int i;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
}
int md2 = c;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
if (pat->char_at(o + i) == lastChar) {
md2 = (c - 1) - i;
int md2 = c;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
if (pat->char_at(o + i) == lastChar) {
md2 = (c - 1) - i;
}
}
result = string_indexOf(receiver, pat, o, cache, md2);
}
Node* result = string_indexOf(receiver, pat, o, cache, md2);
push(result);
return true;
}
......
......@@ -2668,6 +2668,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify
case Op_LoadD_unaligned:
case Op_LoadL_unaligned:
case Op_StrComp: // Does a bunch of load-like effects
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
pinned = false;
}
......
......@@ -746,6 +746,8 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
switch (n->Opcode()) {
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
case Op_MemBarVolatile:
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
......@@ -1788,6 +1790,8 @@ void Matcher::find_shared( Node *n ) {
mstack.push(n->in(0), Pre_Visit); // Visit Control input
continue; // while (mstack.is_nonempty())
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
set_shared(n); // Force result into register (it will be anyways)
break;
......
......@@ -2481,13 +2481,37 @@ Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
// Do we match on this edge? No memory edges
uint StrEqualsNode::match_edge(uint idx) const {
return idx == 5 || idx == 6;
}
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Strip out
// control copies
Node *AryEqNode::Ideal(PhaseGVN *phase, bool can_reshape){
Node *StrEqualsNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//=============================================================================
// Do we match on this edge? No memory edges
uint StrIndexOfNode::match_edge(uint idx) const {
return idx == 5 || idx == 6;
}
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Strip out
// control copies
Node *StrIndexOfNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Strip out
// control copies
Node *AryEqNode::Ideal(PhaseGVN *phase, bool can_reshape){
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//=============================================================================
MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
......
......@@ -765,6 +765,54 @@ public:
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------StrEquals-------------------------------------
class StrEqualsNode: public Node {
public:
StrEqualsNode(Node *control,
Node* char_array_mem,
Node* value_mem,
Node* count_mem,
Node* offset_mem,
Node* s1, Node* s2): Node(control,
char_array_mem,
value_mem,
count_mem,
offset_mem,
s1, s2) {};
virtual int Opcode() const;
virtual bool depends_only_on_test() const { return false; }
virtual const Type* bottom_type() const { return TypeInt::BOOL; }
// a StrEqualsNode (conservatively) aliases with everything:
virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
virtual uint match_edge(uint idx) const;
virtual uint ideal_reg() const { return Op_RegI; }
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------StrIndexOf-------------------------------------
class StrIndexOfNode: public Node {
public:
StrIndexOfNode(Node *control,
Node* char_array_mem,
Node* value_mem,
Node* count_mem,
Node* offset_mem,
Node* s1, Node* s2): Node(control,
char_array_mem,
value_mem,
count_mem,
offset_mem,
s1, s2) {};
virtual int Opcode() const;
virtual bool depends_only_on_test() const { return false; }
virtual const Type* bottom_type() const { return TypeInt::INT; }
// a StrIndexOfNode (conservatively) aliases with everything:
virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
virtual uint match_edge(uint idx) const;
virtual uint ideal_reg() const { return Op_RegI; }
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------AryEq---------------------------------------
class AryEqNode: public Node {
public:
......
......@@ -1366,9 +1366,6 @@ void Arguments::set_aggressive_opts_flags() {
if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
FLAG_SET_DEFAULT(DoEscapeAnalysis, true);
}
if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) {
FLAG_SET_DEFAULT(SpecialArraysEquals, true);
}
if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
}
......
......@@ -491,9 +491,15 @@ class CommandLineFlags {
develop(bool, SpecialStringIndexOf, true, \
"special version of string indexOf") \
\
product(bool, SpecialArraysEquals, false, \
develop(bool, SpecialStringEquals, true, \
"special version of string equals") \
\
develop(bool, SpecialArraysEquals, true, \
"special version of Arrays.equals(char[],char[])") \
\
product(bool, UseSSE42Intrinsics, false, \
"SSE4.2 versions of intrinsics") \
\
develop(bool, TraceCallFixup, false, \
"traces all call fixups") \
\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册