提交 062d1ab1 编写于 作者: K kvn

Merge

...@@ -1007,6 +1007,67 @@ void Assembler::addss(XMMRegister dst, Address src) { ...@@ -1007,6 +1007,67 @@ void Assembler::addss(XMMRegister dst, Address src) {
emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
} }
void Assembler::aesdec(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xde);
emit_operand(dst, src);
}
void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xde);
emit_byte(0xC0 | encode);
}
void Assembler::aesdeclast(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdf);
emit_operand(dst, src);
}
void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdf);
emit_byte(0xC0 | encode);
}
void Assembler::aesenc(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdc);
emit_operand(dst, src);
}
void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdc);
emit_byte(0xC0 | encode);
}
void Assembler::aesenclast(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdd);
emit_operand(dst, src);
}
void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0xdd);
emit_byte(0xC0 | encode);
}
void Assembler::andl(Address dst, int32_t imm32) { void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this); InstructionMark im(this);
prefix(dst); prefix(dst);
...@@ -2307,6 +2368,22 @@ void Assembler::prefix(Prefix p) { ...@@ -2307,6 +2368,22 @@ void Assembler::prefix(Prefix p) {
a_byte(p); a_byte(p);
} }
void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_ssse3(), "");
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x00);
emit_byte(0xC0 | encode);
}
void Assembler::pshufb(XMMRegister dst, Address src) {
assert(VM_Version::supports_ssse3(), "");
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x00);
emit_operand(dst, src);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value"); assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
...@@ -8067,6 +8144,15 @@ void MacroAssembler::movptr(Address dst, Register src) { ...@@ -8067,6 +8144,15 @@ void MacroAssembler::movptr(Address dst, Register src) {
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
} }
void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::movdqu(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::movdqu(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) { if (reachable(src)) {
Assembler::movsd(dst, as_Address(src)); Assembler::movsd(dst, as_Address(src));
...@@ -8357,6 +8443,17 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { ...@@ -8357,6 +8443,17 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
} }
} }
void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::pshufb(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::pshufb(dst, Address(rscratch1, 0));
}
}
// AVX 3-operands instructions // AVX 3-operands instructions
void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
......
...@@ -875,6 +875,17 @@ private: ...@@ -875,6 +875,17 @@ private:
void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src); void addss(XMMRegister dst, XMMRegister src);
// AES instructions
void aesdec(XMMRegister dst, Address src);
void aesdec(XMMRegister dst, XMMRegister src);
void aesdeclast(XMMRegister dst, Address src);
void aesdeclast(XMMRegister dst, XMMRegister src);
void aesenc(XMMRegister dst, Address src);
void aesenc(XMMRegister dst, XMMRegister src);
void aesenclast(XMMRegister dst, Address src);
void aesenclast(XMMRegister dst, XMMRegister src);
void andl(Address dst, int32_t imm32); void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32); void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src); void andl(Register dst, Address src);
...@@ -1424,6 +1435,10 @@ private: ...@@ -1424,6 +1435,10 @@ private:
void prefetcht2(Address src); void prefetcht2(Address src);
void prefetchw(Address src); void prefetchw(Address src);
// Shuffle Bytes
void pshufb(XMMRegister dst, XMMRegister src);
void pshufb(XMMRegister dst, Address src);
// Shuffle Packed Doublewords // Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode); void pshufd(XMMRegister dst, Address src, int mode);
...@@ -2611,6 +2626,12 @@ public: ...@@ -2611,6 +2626,12 @@ public:
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src); void divss(XMMRegister dst, AddressLiteral src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
...@@ -2658,6 +2679,10 @@ public: ...@@ -2658,6 +2679,10 @@ public:
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src); void xorps(XMMRegister dst, AddressLiteral src);
// Shuffle Bytes
void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
void pshufb(XMMRegister dst, AddressLiteral src);
// AVX 3-operands instructions // AVX 3-operands instructions
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
......
...@@ -44,3 +44,4 @@ ...@@ -44,3 +44,4 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL; address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL; address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
...@@ -41,10 +41,14 @@ class x86 { ...@@ -41,10 +41,14 @@ class x86 {
private: private:
static address _verify_mxcsr_entry; static address _verify_mxcsr_entry;
static address _verify_fpu_cntrl_wrd_entry; static address _verify_fpu_cntrl_wrd_entry;
// shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
static address _key_shuffle_mask_addr;
public: public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; } static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
}; };
static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
......
...@@ -56,3 +56,4 @@ address StubRoutines::x86::_float_sign_flip = NULL; ...@@ -56,3 +56,4 @@ address StubRoutines::x86::_float_sign_flip = NULL;
address StubRoutines::x86::_double_sign_mask = NULL; address StubRoutines::x86::_double_sign_mask = NULL;
address StubRoutines::x86::_double_sign_flip = NULL; address StubRoutines::x86::_double_sign_flip = NULL;
address StubRoutines::x86::_mxcsr_std = NULL; address StubRoutines::x86::_mxcsr_std = NULL;
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
...@@ -54,6 +54,8 @@ class x86 { ...@@ -54,6 +54,8 @@ class x86 {
static address _double_sign_mask; static address _double_sign_mask;
static address _double_sign_flip; static address _double_sign_flip;
static address _mxcsr_std; static address _mxcsr_std;
// shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
static address _key_shuffle_mask_addr;
public: public:
...@@ -116,6 +118,9 @@ class x86 { ...@@ -116,6 +118,9 @@ class x86 {
{ {
return _mxcsr_std; return _mxcsr_std;
} }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
}; };
#endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP #endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
...@@ -419,13 +419,16 @@ void VM_Version::get_processor_features() { ...@@ -419,13 +419,16 @@ void VM_Version::get_processor_features() {
if (UseAVX < 1) if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX; _cpuFeatures &= ~CPU_AVX;
if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
_cpuFeatures &= ~CPU_AES;
if (logical_processors_per_package() == 1) { if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT. // HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT; _cpuFeatures &= ~CPU_HT;
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -441,6 +444,7 @@ void VM_Version::get_processor_features() { ...@@ -441,6 +444,7 @@ void VM_Version::get_processor_features() {
(supports_popcnt() ? ", popcnt" : ""), (supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""), (supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""), (supports_avx2() ? ", avx2" : ""),
(supports_aes() ? ", aes" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""), (supports_lzcnt() ? ", lzcnt": ""),
...@@ -472,6 +476,29 @@ void VM_Version::get_processor_features() { ...@@ -472,6 +476,29 @@ void VM_Version::get_processor_features() {
if (!supports_avx ()) // Drop to 0 if no AVX support if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0; UseAVX = 0;
// Use AES instructions if available.
if (supports_aes()) {
if (FLAG_IS_DEFAULT(UseAES)) {
UseAES = true;
}
} else if (UseAES) {
if (!FLAG_IS_DEFAULT(UseAES))
warning("AES instructions not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false);
}
// The AES intrinsic stubs require AES instruction support (of course)
// but also require AVX mode for misaligned SSE access
if (UseAES && (UseAVX > 0)) {
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
UseAESIntrinsics = true;
}
} else if (UseAESIntrinsics) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
warning("AES intrinsics not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#ifdef COMPILER2 #ifdef COMPILER2
if (UseFPUForSpilling) { if (UseFPUForSpilling) {
if (UseSSE < 2) { if (UseSSE < 2) {
...@@ -714,6 +741,9 @@ void VM_Version::get_processor_features() { ...@@ -714,6 +741,9 @@ void VM_Version::get_processor_features() {
if (UseAVX > 0) { if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX); tty->print(" UseAVX=%d",UseAVX);
} }
if (UseAES) {
tty->print(" UseAES=1");
}
tty->cr(); tty->cr();
tty->print("Allocation"); tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
......
...@@ -78,7 +78,9 @@ public: ...@@ -78,7 +78,9 @@ public:
sse4_2 : 1, sse4_2 : 1,
: 2, : 2,
popcnt : 1, popcnt : 1,
: 3, : 1,
aes : 1,
: 1,
osxsave : 1, osxsave : 1,
avx : 1, avx : 1,
: 3; : 3;
...@@ -244,7 +246,8 @@ protected: ...@@ -244,7 +246,8 @@ protected:
CPU_TSC = (1 << 15), CPU_TSC = (1 << 15),
CPU_TSCINV = (1 << 16), CPU_TSCINV = (1 << 16),
CPU_AVX = (1 << 17), CPU_AVX = (1 << 17),
CPU_AVX2 = (1 << 18) CPU_AVX2 = (1 << 18),
CPU_AES = (1 << 19)
} cpuFeatureFlags; } cpuFeatureFlags;
enum { enum {
...@@ -420,6 +423,8 @@ protected: ...@@ -420,6 +423,8 @@ protected:
result |= CPU_TSC; result |= CPU_TSC;
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
result |= CPU_TSCINV; result |= CPU_TSCINV;
if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
result |= CPU_AES;
// AMD features. // AMD features.
if (is_amd()) { if (is_amd()) {
...@@ -544,6 +549,7 @@ public: ...@@ -544,6 +549,7 @@ public:
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
// Intel features // Intel features
static bool is_intel_family_core() { return is_intel() && static bool is_intel_family_core() { return is_intel() &&
......
...@@ -4102,9 +4102,158 @@ instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ ...@@ -4102,9 +4102,158 @@ instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
// ----------------------- LogicalRightShift ----------------------------------- // ----------------------- LogicalRightShift -----------------------------------
// Shorts/Chars vector logical right shift produces incorrect Java result // Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with // for negative data because java code convert short value into int with
// sign extension before a shift. // sign extension before a shift. But char vectors are fine since chars are
// unsigned values.
instruct vsrl2S(vecS dst, vecS shift) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl2S_imm(vecS dst, immI8 shift) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4S(vecD dst, vecS shift) %{
predicate(n->as_Vector()->length() == 4);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4S_imm(vecD dst, immI8 shift) %{
predicate(n->as_Vector()->length() == 4);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl8S(vecX dst, vecS shift) %{
predicate(n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl8S_imm(vecX dst, immI8 shift) %{
predicate(n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
bool vector256 = false;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
bool vector256 = true;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
bool vector256 = true;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
%}
ins_pipe( pipe_slow );
%}
// Integers vector logical right shift // Integers vector logical right shift
instruct vsrl2I(vecD dst, vecS shift) %{ instruct vsrl2I(vecD dst, vecS shift) %{
......
...@@ -1844,17 +1844,12 @@ void GraphBuilder::invoke(Bytecodes::Code code) { ...@@ -1844,17 +1844,12 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
code == Bytecodes::_invokevirtual && target->is_final_method() || code == Bytecodes::_invokevirtual && target->is_final_method() ||
code == Bytecodes::_invokedynamic) { code == Bytecodes::_invokedynamic) {
ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target; ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
bool success = false; // static binding => check if callee is ok
if (target->is_method_handle_intrinsic()) { bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
// method handle invokes
success = try_method_handle_inline(target);
} else {
// static binding => check if callee is ok
success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
}
CHECK_BAILOUT();
CHECK_BAILOUT();
clear_inline_bailout(); clear_inline_bailout();
if (success) { if (success) {
// Register dependence if JVMTI has either breakpoint // Register dependence if JVMTI has either breakpoint
// setting or hotswapping of methods capabilities since they may // setting or hotswapping of methods capabilities since they may
...@@ -3201,6 +3196,11 @@ bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Bytecodes::Co ...@@ -3201,6 +3196,11 @@ bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Bytecodes::Co
return false; return false;
} }
// method handle invokes
if (callee->is_method_handle_intrinsic()) {
return try_method_handle_inline(callee);
}
// handle intrinsics // handle intrinsics
if (callee->intrinsic_id() != vmIntrinsics::_none) { if (callee->intrinsic_id() != vmIntrinsics::_none) {
if (try_inline_intrinsics(callee)) { if (try_inline_intrinsics(callee)) {
...@@ -3885,10 +3885,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) { ...@@ -3885,10 +3885,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
ValueType* type = state()->stack_at(args_base)->type(); ValueType* type = state()->stack_at(args_base)->type();
if (type->is_constant()) { if (type->is_constant()) {
ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget(); ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove // We don't do CHA here so only inline static and statically bindable methods.
Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; if (target->is_static() || target->can_be_statically_bound()) {
if (try_inline(target, /*holder_known*/ true, bc)) { Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
return true; if (try_inline(target, /*holder_known*/ true, bc)) {
return true;
}
} else {
print_inlining(target, "not static or statically bindable", /*success*/ false);
} }
} else { } else {
print_inlining(callee, "receiver not constant", /*success*/ false); print_inlining(callee, "receiver not constant", /*success*/ false);
...@@ -3941,9 +3945,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) { ...@@ -3941,9 +3945,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
} }
j += t->size(); // long and double take two slots j += t->size(); // long and double take two slots
} }
Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; // We don't do CHA here so only inline static and statically bindable methods.
if (try_inline(target, /*holder_known*/ true, bc)) { if (target->is_static() || target->can_be_statically_bound()) {
return true; Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
if (try_inline(target, /*holder_known*/ true, bc)) {
return true;
}
} else {
print_inlining(target, "not static or statically bindable", /*success*/ false);
} }
} }
} else { } else {
......
...@@ -110,6 +110,7 @@ ...@@ -110,6 +110,7 @@
template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \ template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \
template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \ template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \
template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \ template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \
template(sun_misc_Launcher_ExtClassLoader, "sun/misc/Launcher$ExtClassLoader") \
\ \
/* Java runtime version access */ \ /* Java runtime version access */ \
template(sun_misc_Version, "sun/misc/Version") \ template(sun_misc_Version, "sun/misc/Version") \
...@@ -723,6 +724,21 @@ ...@@ -723,6 +724,21 @@
/* java/lang/ref/Reference */ \ /* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\ \
/* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \
do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \
do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
do_name( encryptBlock_name, "encryptBlock") \
do_name( decryptBlock_name, "decryptBlock") \
do_signature(byteArray_int_byteArray_int_signature, "([BI[BI)V") \
\
do_class(com_sun_crypto_provider_cipherBlockChaining, "com/sun/crypto/provider/CipherBlockChaining") \
do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
do_name( encrypt_name, "encrypt") \
do_name( decrypt_name, "decrypt") \
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
\
/* support for sun.misc.Unsafe */ \ /* support for sun.misc.Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
\ \
......
...@@ -1155,8 +1155,12 @@ methodHandle Method::clone_with_new_data(methodHandle m, u_char* new_code, int n ...@@ -1155,8 +1155,12 @@ methodHandle Method::clone_with_new_data(methodHandle m, u_char* new_code, int n
vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) { vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
// if loader is not the default loader (i.e., != NULL), we can't know the intrinsics // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
// because we are not loading from core libraries // because we are not loading from core libraries
if (InstanceKlass::cast(holder)->class_loader() != NULL) // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
// which does not use the class default class loader so we check for its loader here
if ((InstanceKlass::cast(holder)->class_loader() != NULL) &&
InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) {
return vmSymbols::NO_SID; // regardless of name, no intrinsics here return vmSymbols::NO_SID; // regardless of name, no intrinsics here
}
// see if the klass name is well-known: // see if the klass name is well-known:
Symbol* klass_name = InstanceKlass::cast(holder)->name(); Symbol* klass_name = InstanceKlass::cast(holder)->name();
......
...@@ -439,6 +439,9 @@ ...@@ -439,6 +439,9 @@
product(bool, DoEscapeAnalysis, true, \ product(bool, DoEscapeAnalysis, true, \
"Perform escape analysis") \ "Perform escape analysis") \
\ \
develop(bool, ExitEscapeAnalysisOnTimeout, true, \
"Exit or throw assert in EA when it reaches time limit") \
\
notproduct(bool, PrintEscapeAnalysis, false, \ notproduct(bool, PrintEscapeAnalysis, false, \
"Print the results of escape analysis") \ "Print the results of escape analysis") \
\ \
......
...@@ -670,6 +670,129 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* ...@@ -670,6 +670,129 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
} }
//------------------------PredictedIntrinsicGenerator------------------------------
// Internal class which handles all predicted Intrinsic calls.
class PredictedIntrinsicGenerator : public CallGenerator {
CallGenerator* _intrinsic;
CallGenerator* _cg;
public:
PredictedIntrinsicGenerator(CallGenerator* intrinsic,
CallGenerator* cg)
: CallGenerator(cg->method())
{
_intrinsic = intrinsic;
_cg = cg;
}
virtual bool is_virtual() const { return true; }
virtual bool is_inlined() const { return true; }
virtual bool is_intrinsic() const { return true; }
virtual JVMState* generate(JVMState* jvms);
};
CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
CallGenerator* cg) {
return new PredictedIntrinsicGenerator(intrinsic, cg);
}
JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
GraphKit kit(jvms);
PhaseGVN& gvn = kit.gvn();
CompileLog* log = kit.C->log();
if (log != NULL) {
log->elem("predicted_intrinsic bci='%d' method='%d'",
jvms->bci(), log->identify(method()));
}
Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
if (kit.failing())
return NULL; // might happen because of NodeCountInliningCutoff
SafePointNode* slow_map = NULL;
JVMState* slow_jvms;
if (slow_ctl != NULL) {
PreserveJVMState pjvms(&kit);
kit.set_control(slow_ctl);
if (!kit.stopped()) {
slow_jvms = _cg->generate(kit.sync_jvms());
if (kit.failing())
return NULL; // might happen because of NodeCountInliningCutoff
assert(slow_jvms != NULL, "must be");
kit.add_exception_states_from(slow_jvms);
kit.set_map(slow_jvms->map());
if (!kit.stopped())
slow_map = kit.stop();
}
}
if (kit.stopped()) {
// Predicate is always false.
kit.set_jvms(slow_jvms);
return kit.transfer_exceptions_into_jvms();
}
// Generate intrinsic code:
JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
if (new_jvms == NULL) {
// Intrinsic failed, so use slow code or make a direct call.
if (slow_map == NULL) {
CallGenerator* cg = CallGenerator::for_direct_call(method());
new_jvms = cg->generate(kit.sync_jvms());
} else {
kit.set_jvms(slow_jvms);
return kit.transfer_exceptions_into_jvms();
}
}
kit.add_exception_states_from(new_jvms);
kit.set_jvms(new_jvms);
// Need to merge slow and fast?
if (slow_map == NULL) {
// The fast path is the only path remaining.
return kit.transfer_exceptions_into_jvms();
}
if (kit.stopped()) {
// Intrinsic method threw an exception, so it's just the slow path after all.
kit.set_jvms(slow_jvms);
return kit.transfer_exceptions_into_jvms();
}
// Finish the diamond.
kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
RegionNode* region = new (kit.C) RegionNode(3);
region->init_req(1, kit.control());
region->init_req(2, slow_map->control());
kit.set_control(gvn.transform(region));
Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
iophi->set_req(2, slow_map->i_o());
kit.set_i_o(gvn.transform(iophi));
kit.merge_memory(slow_map->merged_memory(), region, 2);
uint tos = kit.jvms()->stkoff() + kit.sp();
uint limit = slow_map->req();
for (uint i = TypeFunc::Parms; i < limit; i++) {
// Skip unused stack slots; fast forward to monoff();
if (i == tos) {
i = kit.jvms()->monoff();
if( i >= limit ) break;
}
Node* m = kit.map()->in(i);
Node* n = slow_map->in(i);
if (m != n) {
const Type* t = gvn.type(m)->meet(gvn.type(n));
Node* phi = PhiNode::make(region, m, t);
phi->set_req(2, n);
kit.map()->set_req(i, gvn.transform(phi));
}
}
return kit.transfer_exceptions_into_jvms();
}
//-------------------------UncommonTrapCallGenerator----------------------------- //-------------------------UncommonTrapCallGenerator-----------------------------
// Internal class which handles all out-of-line calls checking receiver type. // Internal class which handles all out-of-line calls checking receiver type.
class UncommonTrapCallGenerator : public CallGenerator { class UncommonTrapCallGenerator : public CallGenerator {
......
...@@ -143,6 +143,9 @@ class CallGenerator : public ResourceObj { ...@@ -143,6 +143,9 @@ class CallGenerator : public ResourceObj {
// Registry for intrinsics: // Registry for intrinsics:
static CallGenerator* for_intrinsic(ciMethod* m); static CallGenerator* for_intrinsic(ciMethod* m);
static void register_intrinsic(ciMethod* m, CallGenerator* cg); static void register_intrinsic(ciMethod* m, CallGenerator* cg);
static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
CallGenerator* cg);
virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) { static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining) if (PrintInlining)
......
...@@ -3047,9 +3047,9 @@ bool Compile::Constant::operator==(const Constant& other) { ...@@ -3047,9 +3047,9 @@ bool Compile::Constant::operator==(const Constant& other) {
case T_LONG: case T_LONG:
case T_DOUBLE: return (_v._value.j == other._v._value.j); case T_DOUBLE: return (_v._value.j == other._v._value.j);
case T_OBJECT: case T_OBJECT:
case T_METADATA: return (_v._metadata == other._v._metadata);
case T_ADDRESS: return (_v._value.l == other._v._value.l); case T_ADDRESS: return (_v._value.l == other._v._value.l);
case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries
case T_METADATA: return (_v._metadata == other._v._metadata);
default: ShouldNotReachHere(); default: ShouldNotReachHere();
} }
return false; return false;
......
...@@ -149,7 +149,7 @@ class Compile : public Phase { ...@@ -149,7 +149,7 @@ class Compile : public Phase {
private: private:
BasicType _type; BasicType _type;
union { union {
jvalue _value; jvalue _value;
Metadata* _metadata; Metadata* _metadata;
} _v; } _v;
int _offset; // offset of this constant (in bytes) relative to the constant table base. int _offset; // offset of this constant (in bytes) relative to the constant table base.
......
...@@ -107,7 +107,17 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool ...@@ -107,7 +107,17 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
// intrinsics handle strict f.p. correctly. // intrinsics handle strict f.p. correctly.
if (allow_inline && allow_intrinsics) { if (allow_inline && allow_intrinsics) {
CallGenerator* cg = find_intrinsic(callee, call_is_virtual); CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
if (cg != NULL) return cg; if (cg != NULL) {
if (cg->is_predicted()) {
// Code without intrinsic but, hopefully, inlined.
CallGenerator* inline_cg = this->call_generator(callee,
vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false);
if (inline_cg != NULL) {
cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
}
}
return cg;
}
} }
// Do method handle calls. // Do method handle calls.
......
...@@ -893,12 +893,16 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { ...@@ -893,12 +893,16 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
arg_has_oops && (i > TypeFunc::Parms); arg_has_oops && (i > TypeFunc::Parms);
#ifdef ASSERT #ifdef ASSERT
if (!(is_arraycopy || if (!(is_arraycopy ||
call->as_CallLeaf()->_name != NULL && (call->as_CallLeaf()->_name != NULL &&
(strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 )) strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
) { strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
))) {
call->dump(); call->dump();
assert(false, "EA: unexpected CallLeaf"); fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
} }
#endif #endif
// Always process arraycopy's destination object since // Always process arraycopy's destination object since
...@@ -1080,7 +1084,7 @@ bool ConnectionGraph::complete_connection_graph( ...@@ -1080,7 +1084,7 @@ bool ConnectionGraph::complete_connection_graph(
C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time"); C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
C->log()->end_elem(" limit'"); C->log()->end_elem(" limit'");
} }
assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d", assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
time.seconds(), iterations, nodes_size(), ptnodes_worklist.length())); time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
// Possible infinite build_connection_graph loop, // Possible infinite build_connection_graph loop,
// bailout (no changes to ideal graph were made). // bailout (no changes to ideal graph were made).
......
...@@ -44,18 +44,22 @@ class LibraryIntrinsic : public InlineCallGenerator { ...@@ -44,18 +44,22 @@ class LibraryIntrinsic : public InlineCallGenerator {
public: public:
private: private:
bool _is_virtual; bool _is_virtual;
bool _is_predicted;
vmIntrinsics::ID _intrinsic_id; vmIntrinsics::ID _intrinsic_id;
public: public:
LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id) LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id)
: InlineCallGenerator(m), : InlineCallGenerator(m),
_is_virtual(is_virtual), _is_virtual(is_virtual),
_is_predicted(is_predicted),
_intrinsic_id(id) _intrinsic_id(id)
{ {
} }
virtual bool is_intrinsic() const { return true; } virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; } virtual bool is_virtual() const { return _is_virtual; }
virtual bool is_predicted() const { return _is_predicted; }
virtual JVMState* generate(JVMState* jvms); virtual JVMState* generate(JVMState* jvms);
virtual Node* generate_predicate(JVMState* jvms);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; } vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
}; };
...@@ -83,6 +87,7 @@ class LibraryCallKit : public GraphKit { ...@@ -83,6 +87,7 @@ class LibraryCallKit : public GraphKit {
int arg_size() const { return callee()->arg_size(); } int arg_size() const { return callee()->arg_size(); }
bool try_to_inline(); bool try_to_inline();
Node* try_to_predicate();
// Helper functions to inline natives // Helper functions to inline natives
void push_result(RegionNode* region, PhiNode* value); void push_result(RegionNode* region, PhiNode* value);
...@@ -148,6 +153,7 @@ class LibraryCallKit : public GraphKit { ...@@ -148,6 +153,7 @@ class LibraryCallKit : public GraphKit {
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) { CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false); return generate_method_call(method_id, true, false);
} }
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2); Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
Node* make_string_method_node(int opcode, Node* str1, Node* str2); Node* make_string_method_node(int opcode, Node* str1, Node* str2);
...@@ -253,6 +259,10 @@ class LibraryCallKit : public GraphKit { ...@@ -253,6 +259,10 @@ class LibraryCallKit : public GraphKit {
bool inline_reverseBytes(vmIntrinsics::ID id); bool inline_reverseBytes(vmIntrinsics::ID id);
bool inline_reference_get(); bool inline_reference_get();
bool inline_aescrypt_Block(vmIntrinsics::ID id);
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
}; };
...@@ -306,6 +316,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { ...@@ -306,6 +316,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
} }
} }
bool is_predicted = false;
switch (id) { switch (id) {
case vmIntrinsics::_compareTo: case vmIntrinsics::_compareTo:
if (!SpecialStringCompareTo) return NULL; if (!SpecialStringCompareTo) return NULL;
...@@ -413,6 +425,18 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { ...@@ -413,6 +425,18 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
break; break;
#endif #endif
case vmIntrinsics::_aescrypt_encryptBlock:
case vmIntrinsics::_aescrypt_decryptBlock:
if (!UseAESIntrinsics) return NULL;
break;
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
if (!UseAESIntrinsics) return NULL;
// these two require the predicated logic
is_predicted = true;
break;
default: default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility"); assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?"); assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
...@@ -444,7 +468,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { ...@@ -444,7 +468,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
if (!InlineUnsafeOps) return NULL; if (!InlineUnsafeOps) return NULL;
} }
return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id); return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id);
} }
//----------------------register_library_intrinsics----------------------- //----------------------register_library_intrinsics-----------------------
...@@ -496,6 +520,47 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { ...@@ -496,6 +520,47 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
return NULL; return NULL;
} }
Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
LibraryCallKit kit(jvms, this);
Compile* C = kit.C;
int nodes = C->unique();
#ifndef PRODUCT
assert(is_predicted(), "sanity");
if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
char buf[1000];
const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
tty->print_cr("Predicate for intrinsic %s", str);
}
#endif
Node* slow_ctl = kit.try_to_predicate();
if (!kit.failing()) {
if (C->log()) {
C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " virtual='1'" : ""),
C->unique() - nodes);
}
return slow_ctl; // Could be NULL if the check folds.
}
// The intrinsic bailed out
if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
if (jvms->has_method()) {
// Not a root compile.
const char* msg = "failed to generate predicate for intrinsic";
CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
} else {
// Root compile
tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " (virtual)" : ""), kit.bci());
}
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
return NULL;
}
bool LibraryCallKit::try_to_inline() { bool LibraryCallKit::try_to_inline() {
// Handle symbolic names for otherwise undistinguished boolean switches: // Handle symbolic names for otherwise undistinguished boolean switches:
const bool is_store = true; const bool is_store = true;
...@@ -767,6 +832,14 @@ bool LibraryCallKit::try_to_inline() { ...@@ -767,6 +832,14 @@ bool LibraryCallKit::try_to_inline() {
case vmIntrinsics::_Reference_get: case vmIntrinsics::_Reference_get:
return inline_reference_get(); return inline_reference_get();
case vmIntrinsics::_aescrypt_encryptBlock:
case vmIntrinsics::_aescrypt_decryptBlock:
return inline_aescrypt_Block(intrinsic_id());
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
default: default:
// If you get here, it may be that someone has added a new intrinsic // If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here. // to the list in vmSymbols.hpp without implementing it here.
...@@ -780,6 +853,36 @@ bool LibraryCallKit::try_to_inline() { ...@@ -780,6 +853,36 @@ bool LibraryCallKit::try_to_inline() {
} }
} }
Node* LibraryCallKit::try_to_predicate() {
if (!jvms()->has_method()) {
// Root JVMState has a null method.
assert(map()->memory()->Opcode() == Op_Parm, "");
// Insert the memory aliasing node
set_all_memory(reset_memory());
}
assert(merged_memory(), "");
switch (intrinsic_id()) {
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt_predicate(false);
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt_predicate(true);
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
#ifndef PRODUCT
if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
}
#endif
Node* slow_ctl = control();
set_control(top()); // No fast path instrinsic
return slow_ctl;
}
}
//------------------------------push_result------------------------------ //------------------------------push_result------------------------------
// Helper function for finishing intrinsics. // Helper function for finishing intrinsics.
void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) { void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
...@@ -3830,7 +3933,7 @@ Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass, ...@@ -3830,7 +3933,7 @@ Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass,
vtable_index*vtableEntry::size()) * wordSize + vtable_index*vtableEntry::size()) * wordSize +
vtableEntry::method_offset_in_bytes(); vtableEntry::method_offset_in_bytes();
Node* entry_addr = basic_plus_adr(obj_klass, entry_offset); Node* entry_addr = basic_plus_adr(obj_klass, entry_offset);
Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT); Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
// Compare the target method with the expected method (e.g., Object.hashCode). // Compare the target method with the expected method (e.g., Object.hashCode).
const TypePtr* native_call_addr = TypeMetadataPtr::make(method); const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
...@@ -5613,3 +5716,265 @@ bool LibraryCallKit::inline_reference_get() { ...@@ -5613,3 +5716,265 @@ bool LibraryCallKit::inline_reference_get() {
push(result); push(result);
return true; return true;
} }
Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
bool is_exact=true, bool is_static=false) {
const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
assert(tinst != NULL, "obj is null");
assert(tinst->klass()->is_loaded(), "obj is not loaded");
assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
ciSymbol::make(fieldTypeString),
is_static);
if (field == NULL) return (Node *) NULL;
assert (field != NULL, "undefined field");
// Next code copied from Parse::do_get_xxx():
// Compute address and memory type.
int offset = field->offset_in_bytes();
bool is_vol = field->is_volatile();
ciType* field_klass = field->type();
assert(field_klass->is_loaded(), "should be loaded");
const TypePtr* adr_type = C->alias_type(field)->adr_type();
Node *adr = basic_plus_adr(fromObj, fromObj, offset);
BasicType bt = field->layout_type();
// Build the resultant type of the load
const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
// Build the load.
Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
return loadedField;
}
//------------------------------inline_aescrypt_Block-----------------------
bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
address stubAddr;
const char *stubName;
assert(UseAES, "need AES instruction support");
switch(id) {
case vmIntrinsics::_aescrypt_encryptBlock:
stubAddr = StubRoutines::aescrypt_encryptBlock();
stubName = "aescrypt_encryptBlock";
break;
case vmIntrinsics::_aescrypt_decryptBlock:
stubAddr = StubRoutines::aescrypt_decryptBlock();
stubName = "aescrypt_decryptBlock";
break;
}
if (stubAddr == NULL) return false;
// Restore the stack and pop off the arguments.
int nargs = 5; // this + 2 oop/offset combos
assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments");
Node *aescrypt_object = argument(0);
Node *src = argument(1);
Node *src_offset = argument(2);
Node *dest = argument(3);
Node *dest_offset = argument(4);
// (1) src and dest are arrays.
const Type* src_type = src->Value(&_gvn);
const Type* dest_type = dest->Value(&_gvn);
const TypeAryPtr* top_src = src_type->isa_aryptr();
const TypeAryPtr* top_dest = dest_type->isa_aryptr();
assert (top_src != NULL && top_src->klass() != NULL && top_dest != NULL && top_dest->klass() != NULL, "args are strange");
// for the quick and dirty code we will skip all the checks.
// we are just trying to get the call to be generated.
Node* src_start = src;
Node* dest_start = dest;
if (src_offset != NULL || dest_offset != NULL) {
assert(src_offset != NULL && dest_offset != NULL, "");
src_start = array_element_address(src, src_offset, T_BYTE);
dest_start = array_element_address(dest, dest_offset, T_BYTE);
}
// now need to get the start of its expanded key array
// this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
if (k_start == NULL) return false;
// Call the stub.
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start);
return true;
}
//------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
address stubAddr;
const char *stubName;
assert(UseAES, "need AES instruction support");
switch(id) {
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt();
stubName = "cipherBlockChaining_encryptAESCrypt";
break;
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt();
stubName = "cipherBlockChaining_decryptAESCrypt";
break;
}
if (stubAddr == NULL) return false;
// Restore the stack and pop off the arguments.
int nargs = 6; // this + oop/offset + len + oop/offset
assert(callee()->signature()->size() == nargs-1, "wrong number of arguments");
Node *cipherBlockChaining_object = argument(0);
Node *src = argument(1);
Node *src_offset = argument(2);
Node *len = argument(3);
Node *dest = argument(4);
Node *dest_offset = argument(5);
// (1) src and dest are arrays.
const Type* src_type = src->Value(&_gvn);
const Type* dest_type = dest->Value(&_gvn);
const TypeAryPtr* top_src = src_type->isa_aryptr();
const TypeAryPtr* top_dest = dest_type->isa_aryptr();
assert (top_src != NULL && top_src->klass() != NULL
&& top_dest != NULL && top_dest->klass() != NULL, "args are strange");
// checks are the responsibility of the caller
Node* src_start = src;
Node* dest_start = dest;
if (src_offset != NULL || dest_offset != NULL) {
assert(src_offset != NULL && dest_offset != NULL, "");
src_start = array_element_address(src, src_offset, T_BYTE);
dest_start = array_element_address(dest, dest_offset, T_BYTE);
}
// if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
// (because of the predicated logic executed earlier).
// so we cast it here safely.
// this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
if (embeddedCipherObj == NULL) return false;
// cast it to what we know it will be at runtime
const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr();
assert(tinst != NULL, "CBC obj is null");
assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
if (!klass_AESCrypt->is_loaded()) return false;
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
const TypeOopPtr* xtype = aklass->as_instance_type();
Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
aescrypt_object = _gvn.transform(aescrypt_object);
// we need to get the start of the aescrypt_object's expanded key array
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
if (k_start == NULL) return false;
// similarly, get the start address of the r vector
Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false);
if (objRvec == NULL) return false;
Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
// Call the stub, passing src_start, dest_start, k_start, r_start and src_len
make_runtime_call(RC_LEAF|RC_NO_FP,
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start, r_start, len);
// return is void so no result needs to be pushed
return true;
}
//------------------------------get_key_start_from_aescrypt_object-----------------------
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false);
assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
if (objAESCryptKey == NULL) return (Node *) NULL;
// now have the array, need to get the start address of the K array
Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT);
return k_start;
}
//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
// Return node representing slow path of predicate check.
// the pseudo code we want to emulate with this predicate is:
// for encryption:
// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
// for decryption:
// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
// note cipher==plain is more conservative than the original java code but that's OK
//
Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
// First, check receiver for NULL since it is virtual method.
int nargs = arg_size();
Node* objCBC = argument(0);
_sp += nargs;
objCBC = do_null_check(objCBC, T_OBJECT);
_sp -= nargs;
if (stopped()) return NULL; // Always NULL
// Load embeddedCipher field of CipherBlockChaining object.
Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
// get AESCrypt klass for instanceOf check
// AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
// will have same classloader as CipherBlockChaining object
const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr();
assert(tinst != NULL, "CBCobj is null");
assert(tinst->klass()->is_loaded(), "CBCobj is not loaded");
// we want to do an instanceof comparison against the AESCrypt class
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
if (!klass_AESCrypt->is_loaded()) {
// if AESCrypt is not even loaded, we never take the intrinsic fast path
Node* ctrl = control();
set_control(top()); // no regular fast path
return ctrl;
}
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
_sp += nargs; // gen_instanceof might do an uncommon trap
Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
_sp -= nargs;
Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
// for encryption, we are done
if (!decrypting)
return instof_false; // even if it is NULL
// for decryption, we need to add a further check to avoid
// taking the intrinsic path when cipher and plain are the same
// see the original java code for why.
RegionNode* region = new(C) RegionNode(3);
region->init_req(1, instof_false);
Node* src = argument(1);
Node *dest = argument(4);
Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest));
Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq));
Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
region->init_req(2, src_dest_conjoint);
record_for_igvn(region);
return _gvn.transform(region);
}
...@@ -479,24 +479,27 @@ Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -479,24 +479,27 @@ Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) {
return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF)); return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF));
// Masking bits off of a Short? Loading a Character does some masking // Masking bits off of a Short? Loading a Character does some masking
if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) { if (can_reshape &&
Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control), load->outcnt() == 1 && load->unique_out() == this) {
load->in(MemNode::Memory), if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
load->in(MemNode::Address), Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
load->adr_type()); load->in(MemNode::Memory),
ldus = phase->transform(ldus); load->in(MemNode::Address),
return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF)); load->adr_type());
} ldus = phase->transform(ldus);
return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
}
// Masking sign bits off of a Byte? Do an unsigned byte load plus // Masking sign bits off of a Byte? Do an unsigned byte load plus
// an and. // an and.
if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) { if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control), Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
load->in(MemNode::Memory), load->in(MemNode::Memory),
load->in(MemNode::Address), load->in(MemNode::Address),
load->adr_type()); load->adr_type());
ldub = phase->transform(ldub); ldub = phase->transform(ldub);
return new (phase->C) AndINode(ldub, phase->intcon(mask)); return new (phase->C) AndINode(ldub, phase->intcon(mask));
}
} }
// Masking off sign bits? Dont make them! // Masking off sign bits? Dont make them!
...@@ -923,7 +926,9 @@ Node *RShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -923,7 +926,9 @@ Node *RShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
set_req(2, phase->intcon(0)); set_req(2, phase->intcon(0));
return this; return this;
} }
else if( ld->Opcode() == Op_LoadUS ) else if( can_reshape &&
ld->Opcode() == Op_LoadUS &&
ld->outcnt() == 1 && ld->unique_out() == shl)
// Replace zero-extension-load with sign-extension-load // Replace zero-extension-load with sign-extension-load
return new (phase->C) LoadSNode( ld->in(MemNode::Control), return new (phase->C) LoadSNode( ld->in(MemNode::Control),
ld->in(MemNode::Memory), ld->in(MemNode::Memory),
......
...@@ -811,6 +811,48 @@ const TypeFunc* OptoRuntime::array_fill_Type() { ...@@ -811,6 +811,48 @@ const TypeFunc* OptoRuntime::array_fill_Type() {
return TypeFunc::make(domain, range); return TypeFunc::make(domain, range);
} }
// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
// create input type (domain)
int num_args = 3;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypePtr::NOTNULL; // dest
fields[argp++] = TypePtr::NOTNULL; // k array
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
int num_args = 5;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypePtr::NOTNULL; // dest
fields[argp++] = TypePtr::NOTNULL; // k array
fields[argp++] = TypePtr::NOTNULL; // r array
fields[argp++] = TypeInt::INT; // src len
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
//------------- Interpreter state access for on stack replacement //------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() { const TypeFunc* OptoRuntime::osr_end_Type() {
// create input type (domain) // create input type (domain)
......
...@@ -280,6 +280,9 @@ private: ...@@ -280,6 +280,9 @@ private:
static const TypeFunc* array_fill_Type(); static const TypeFunc* array_fill_Type();
static const TypeFunc* aescrypt_block_Type();
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
// leaf on stack replacement interpreter accessor types // leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type(); static const TypeFunc* osr_end_Type();
......
...@@ -1776,16 +1776,15 @@ void SuperWord::compute_vector_element_type() { ...@@ -1776,16 +1776,15 @@ void SuperWord::compute_vector_element_type() {
set_velt_type(n, container_type(n)); set_velt_type(n, container_type(n));
} }
// Propagate narrowed type backwards through operations // Propagate integer narrowed type backwards through operations
// that don't depend on higher order bits // that don't depend on higher order bits
for (int i = _block.length() - 1; i >= 0; i--) { for (int i = _block.length() - 1; i >= 0; i--) {
Node* n = _block.at(i); Node* n = _block.at(i);
// Only integer types need be examined // Only integer types need be examined
const Type* vt = velt_type(n); const Type* vtn = velt_type(n);
if (vt->basic_type() == T_INT) { if (vtn->basic_type() == T_INT) {
uint start, end; uint start, end;
VectorNode::vector_operands(n, &start, &end); VectorNode::vector_operands(n, &start, &end);
const Type* vt = velt_type(n);
for (uint j = start; j < end; j++) { for (uint j = start; j < end; j++) {
Node* in = n->in(j); Node* in = n->in(j);
...@@ -1801,6 +1800,24 @@ void SuperWord::compute_vector_element_type() { ...@@ -1801,6 +1800,24 @@ void SuperWord::compute_vector_element_type() {
} }
} }
if (same_type) { if (same_type) {
// For right shifts of small integer types (bool, byte, char, short)
// we need precise information about sign-ness. Only Load nodes have
// this information because Store nodes are the same for signed and
// unsigned values. And any arithmetic operation after a load may
// expand a value to signed Int so such right shifts can't be used
// because vector elements do not have upper bits of Int.
const Type* vt = vtn;
if (VectorNode::is_shift(in)) {
Node* load = in->in(1);
if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
vt = velt_type(load);
} else if (in->Opcode() != Op_LShiftI) {
// Widen type to Int to avoid creation of right shift vector
// (align + data_size(s1) check in stmts_can_pack() will fail).
// Note, left shifts work regardless type.
vt = TypeInt::INT;
}
}
set_velt_type(in, vt); set_velt_type(in, vt);
} }
} }
...@@ -1841,7 +1858,20 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { ...@@ -1841,7 +1858,20 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
// Smallest type containing range of values // Smallest type containing range of values
const Type* SuperWord::container_type(Node* n) { const Type* SuperWord::container_type(Node* n) {
if (n->is_Mem()) { if (n->is_Mem()) {
return Type::get_const_basic_type(n->as_Mem()->memory_type()); BasicType bt = n->as_Mem()->memory_type();
if (n->is_Store() && (bt == T_CHAR)) {
// Use T_SHORT type instead of T_CHAR for stored values because any
// preceding arithmetic operation extends values to signed Int.
bt = T_SHORT;
}
if (n->Opcode() == Op_LoadUB) {
// Adjust type for unsigned byte loads, it is important for right shifts.
// T_BOOLEAN is used because there is no basic type representing type
// TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
// size (one byte) and sign is important.
bt = T_BOOLEAN;
}
return Type::get_const_basic_type(bt);
} }
const Type* t = _igvn.type(n); const Type* t = _igvn.type(n);
if (t->basic_type() == T_INT) { if (t->basic_type() == T_INT) {
......
...@@ -61,7 +61,7 @@ Type::TypeInfo Type::_type_info[Type::lastype] = { ...@@ -61,7 +61,7 @@ Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "tuple:", false, Node::NotAMachineReg, relocInfo::none }, // Tuple { Bad, T_ILLEGAL, "tuple:", false, Node::NotAMachineReg, relocInfo::none }, // Tuple
{ Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array { Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array
#if defined(IA32) || defined(AMD64) #ifndef SPARC
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
......
...@@ -29,8 +29,7 @@ ...@@ -29,8 +29,7 @@
//------------------------------VectorNode-------------------------------------- //------------------------------VectorNode--------------------------------------
// Return the vector operator for the specified scalar operation // Return the vector operator for the specified scalar operation
// and vector length. Also used to check if the code generator // and vector length.
// supports the vector operation.
int VectorNode::opcode(int sopc, BasicType bt) { int VectorNode::opcode(int sopc, BasicType bt) {
switch (sopc) { switch (sopc) {
case Op_AddI: case Op_AddI:
...@@ -75,7 +74,7 @@ int VectorNode::opcode(int sopc, BasicType bt) { ...@@ -75,7 +74,7 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case T_BYTE: return 0; // Unimplemented case T_BYTE: return 0; // Unimplemented
case T_CHAR: case T_CHAR:
case T_SHORT: return Op_MulVS; case T_SHORT: return Op_MulVS;
case T_INT: return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1 case T_INT: return Op_MulVI;
} }
ShouldNotReachHere(); ShouldNotReachHere();
case Op_MulF: case Op_MulF:
...@@ -104,9 +103,9 @@ int VectorNode::opcode(int sopc, BasicType bt) { ...@@ -104,9 +103,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_LShiftVL; return Op_LShiftVL;
case Op_RShiftI: case Op_RShiftI:
switch (bt) { switch (bt) {
case T_BOOLEAN: case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value
case T_CHAR: return Op_URShiftVS; // char is unsigned value
case T_BYTE: return Op_RShiftVB; case T_BYTE: return Op_RShiftVB;
case T_CHAR:
case T_SHORT: return Op_RShiftVS; case T_SHORT: return Op_RShiftVS;
case T_INT: return Op_RShiftVI; case T_INT: return Op_RShiftVI;
} }
...@@ -116,10 +115,14 @@ int VectorNode::opcode(int sopc, BasicType bt) { ...@@ -116,10 +115,14 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_RShiftVL; return Op_RShiftVL;
case Op_URShiftI: case Op_URShiftI:
switch (bt) { switch (bt) {
case T_BOOLEAN: case T_BOOLEAN:return Op_URShiftVB;
case T_BYTE: return Op_URShiftVB; case T_CHAR: return Op_URShiftVS;
case T_CHAR: case T_BYTE:
case T_SHORT: return Op_URShiftVS; case T_SHORT: return 0; // Vector logical right shift for signed short
// values produces incorrect Java result for
// negative data because java code should convert
// a short value into int value with sign
// extension before a shift.
case T_INT: return Op_URShiftVI; case T_INT: return Op_URShiftVI;
} }
ShouldNotReachHere(); ShouldNotReachHere();
...@@ -157,12 +160,14 @@ int VectorNode::opcode(int sopc, BasicType bt) { ...@@ -157,12 +160,14 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return 0; // Unimplemented return 0; // Unimplemented
} }
// Also used to check if the code generator
// supports the vector operation.
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) && if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) && (vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) { Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt); int vopc = VectorNode::opcode(opc, bt);
return vopc > 0 && Matcher::has_match_rule(vopc); return vopc > 0 && Matcher::match_rule_supported(vopc);
} }
return false; return false;
} }
......
...@@ -124,6 +124,8 @@ inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) { ...@@ -124,6 +124,8 @@ inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp, assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp,
"raw [ptr+disp] must be consistent with oop::field_base"); "raw [ptr+disp] must be consistent with oop::field_base");
} }
jlong p_size = HeapWordSize * (jlong)(p->size());
assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size));
} }
#endif #endif
if (sizeof(char*) == sizeof(jint)) // (this constant folds!) if (sizeof(char*) == sizeof(jint)) // (this constant folds!)
......
...@@ -533,6 +533,9 @@ class CommandLineFlags { ...@@ -533,6 +533,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \ product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \ "Highest supported SSE instructions set on x86/x64") \
\ \
product(bool, UseAES, false, \
"Control whether AES instructions can be used on x86/x64") \
\
product(uintx, LargePageSizeInBytes, 0, \ product(uintx, LargePageSizeInBytes, 0, \
"Large page size (0 to let VM choose the page size") \ "Large page size (0 to let VM choose the page size") \
\ \
...@@ -635,6 +638,9 @@ class CommandLineFlags { ...@@ -635,6 +638,9 @@ class CommandLineFlags {
product(bool, UseSSE42Intrinsics, false, \ product(bool, UseSSE42Intrinsics, false, \
"SSE4.2 versions of intrinsics") \ "SSE4.2 versions of intrinsics") \
\ \
product(bool, UseAESIntrinsics, false, \
"use intrinsics for AES versions of crypto") \
\
develop(bool, TraceCallFixup, false, \ develop(bool, TraceCallFixup, false, \
"traces all call fixups") \ "traces all call fixups") \
\ \
......
...@@ -120,6 +120,10 @@ address StubRoutines::_arrayof_jbyte_fill; ...@@ -120,6 +120,10 @@ address StubRoutines::_arrayof_jbyte_fill;
address StubRoutines::_arrayof_jshort_fill; address StubRoutines::_arrayof_jshort_fill;
address StubRoutines::_arrayof_jint_fill; address StubRoutines::_arrayof_jint_fill;
address StubRoutines::_aescrypt_encryptBlock = NULL;
address StubRoutines::_aescrypt_decryptBlock = NULL;
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
double (* StubRoutines::_intrinsic_log )(double) = NULL; double (* StubRoutines::_intrinsic_log )(double) = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
......
...@@ -199,6 +199,11 @@ class StubRoutines: AllStatic { ...@@ -199,6 +199,11 @@ class StubRoutines: AllStatic {
// zero heap space aligned to jlong (8 bytes) // zero heap space aligned to jlong (8 bytes)
static address _zero_aligned_words; static address _zero_aligned_words;
static address _aescrypt_encryptBlock;
static address _aescrypt_decryptBlock;
static address _cipherBlockChaining_encryptAESCrypt;
static address _cipherBlockChaining_decryptAESCrypt;
// These are versions of the java.lang.Math methods which perform // These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for // the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the // constant folding in the compiler to ensure equivalence. If the
...@@ -330,6 +335,11 @@ class StubRoutines: AllStatic { ...@@ -330,6 +335,11 @@ class StubRoutines: AllStatic {
static address arrayof_jshort_fill() { return _arrayof_jshort_fill; } static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
static address arrayof_jint_fill() { return _arrayof_jint_fill; } static address arrayof_jint_fill() { return _arrayof_jint_fill; }
static address aescrypt_encryptBlock() { return _aescrypt_encryptBlock; }
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
static address select_fill_function(BasicType t, bool aligned, const char* &name); static address select_fill_function(BasicType t, bool aligned, const char* &name);
static address zero_aligned_words() { return _zero_aligned_words; } static address zero_aligned_words() { return _zero_aligned_words; }
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
public class TestByteVect { public class TestByteVect {
private static final int ARRLEN = 997; private static final int ARRLEN = 997;
private static final int ITERS = 11000; private static final int ITERS = 11000;
private static final int ADD_INIT = 0; private static final int ADD_INIT = 63;
private static final int BIT_MASK = 0xB7; private static final int BIT_MASK = 0xB7;
private static final int VALUE = 3; private static final int VALUE = 3;
private static final int SHIFT = 8; private static final int SHIFT = 8;
...@@ -76,6 +76,7 @@ public class TestByteVect { ...@@ -76,6 +76,7 @@ public class TestByteVect {
test_subc(a0, a1); test_subc(a0, a1);
test_subv(a0, a1, (byte)VALUE); test_subv(a0, a1, (byte)VALUE);
test_suba(a0, a1, a2); test_suba(a0, a1, a2);
test_mulc(a0, a1); test_mulc(a0, a1);
test_mulv(a0, a1, (byte)VALUE); test_mulv(a0, a1, (byte)VALUE);
test_mula(a0, a1, a2); test_mula(a0, a1, a2);
...@@ -88,6 +89,7 @@ public class TestByteVect { ...@@ -88,6 +89,7 @@ public class TestByteVect {
test_divc_n(a0, a1); test_divc_n(a0, a1);
test_divv(a0, a1, (byte)-VALUE); test_divv(a0, a1, (byte)-VALUE);
test_diva(a0, a1, a3); test_diva(a0, a1, a3);
test_andc(a0, a1); test_andc(a0, a1);
test_andv(a0, a1, (byte)BIT_MASK); test_andv(a0, a1, (byte)BIT_MASK);
test_anda(a0, a1, a4); test_anda(a0, a1, a4);
...@@ -97,30 +99,49 @@ public class TestByteVect { ...@@ -97,30 +99,49 @@ public class TestByteVect {
test_xorc(a0, a1); test_xorc(a0, a1);
test_xorv(a0, a1, (byte)BIT_MASK); test_xorv(a0, a1, (byte)BIT_MASK);
test_xora(a0, a1, a4); test_xora(a0, a1, a4);
test_sllc(a0, a1); test_sllc(a0, a1);
test_sllv(a0, a1, VALUE); test_sllv(a0, a1, VALUE);
test_srlc(a0, a1); test_srlc(a0, a1);
test_srlv(a0, a1, VALUE); test_srlv(a0, a1, VALUE);
test_srac(a0, a1); test_srac(a0, a1);
test_srav(a0, a1, VALUE); test_srav(a0, a1, VALUE);
test_sllc_n(a0, a1); test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE); test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1); test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE); test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1); test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE); test_srav(a0, a1, -VALUE);
test_sllc_o(a0, a1); test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT); test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1); test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT); test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1); test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT); test_srav(a0, a1, SHIFT);
test_sllc_on(a0, a1); test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT); test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1); test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT); test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1); test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT); test_srav(a0, a1, -SHIFT);
test_sllc_add(a0, a1);
test_sllv_add(a0, a1, ADD_INIT);
test_srlc_add(a0, a1);
test_srlv_add(a0, a1, ADD_INIT);
test_srac_add(a0, a1);
test_srav_add(a0, a1, ADD_INIT);
test_sllc_and(a0, a1);
test_sllv_and(a0, a1, BIT_MASK);
test_srlc_and(a0, a1);
test_srlv_and(a0, a1, BIT_MASK);
test_srac_and(a0, a1);
test_srav_and(a0, a1, BIT_MASK);
test_pack2(p2, a1); test_pack2(p2, a1);
test_unpack2(a0, p2); test_unpack2(a0, p2);
test_pack2_swap(p2, a1); test_pack2_swap(p2, a1);
...@@ -369,6 +390,60 @@ public class TestByteVect { ...@@ -369,6 +390,60 @@ public class TestByteVect {
errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT))); errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
} }
test_sllc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_sllv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_srlc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srlv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srac_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_srav_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_sllc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_sllv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_srlc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srlv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srac_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_srav_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_pack2(p2, a1); test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) { for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8))); errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
...@@ -803,6 +878,84 @@ public class TestByteVect { ...@@ -803,6 +878,84 @@ public class TestByteVect {
end = System.currentTimeMillis(); end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start)); System.out.println("test_srav_on: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srav_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srav_and: " + (end - start));
start = System.currentTimeMillis(); start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) { for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1); test_pack2(p2, a1);
...@@ -1036,6 +1189,26 @@ public class TestByteVect { ...@@ -1036,6 +1189,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]<<b); a0[i] = (byte)(a1[i]<<b);
} }
} }
static void test_sllc_add(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
}
}
static void test_sllv_add(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + b)<<VALUE);
}
}
static void test_sllc_and(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
}
}
static void test_sllv_and(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & b)<<VALUE);
}
}
static void test_srlc(byte[] a0, byte[] a1) { static void test_srlc(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -1062,6 +1235,26 @@ public class TestByteVect { ...@@ -1062,6 +1235,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]>>>b); a0[i] = (byte)(a1[i]>>>b);
} }
} }
static void test_srlc_add(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
}
}
static void test_srlv_add(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + b)>>>VALUE);
}
}
static void test_srlc_and(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
}
}
static void test_srlv_and(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & b)>>>VALUE);
}
}
static void test_srac(byte[] a0, byte[] a1) { static void test_srac(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -1088,6 +1281,26 @@ public class TestByteVect { ...@@ -1088,6 +1281,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]>>b); a0[i] = (byte)(a1[i]>>b);
} }
} }
static void test_srac_add(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
}
}
static void test_srav_add(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] + b)>>VALUE);
}
}
static void test_srac_and(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
}
}
static void test_srav_and(byte[] a0, byte[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (byte)((a1[i] & b)>>VALUE);
}
}
static void test_pack2(short[] p2, byte[] a1) { static void test_pack2(short[] p2, byte[] a1) {
if (p2.length*2 > a1.length) return; if (p2.length*2 > a1.length) return;
......
...@@ -74,6 +74,7 @@ public class TestIntVect { ...@@ -74,6 +74,7 @@ public class TestIntVect {
test_subc(a0, a1); test_subc(a0, a1);
test_subv(a0, a1, (int)VALUE); test_subv(a0, a1, (int)VALUE);
test_suba(a0, a1, a2); test_suba(a0, a1, a2);
test_mulc(a0, a1); test_mulc(a0, a1);
test_mulv(a0, a1, (int)VALUE); test_mulv(a0, a1, (int)VALUE);
test_mula(a0, a1, a2); test_mula(a0, a1, a2);
...@@ -86,6 +87,7 @@ public class TestIntVect { ...@@ -86,6 +87,7 @@ public class TestIntVect {
test_divc_n(a0, a1); test_divc_n(a0, a1);
test_divv(a0, a1, (int)-VALUE); test_divv(a0, a1, (int)-VALUE);
test_diva(a0, a1, a3); test_diva(a0, a1, a3);
test_andc(a0, a1); test_andc(a0, a1);
test_andv(a0, a1, (int)BIT_MASK); test_andv(a0, a1, (int)BIT_MASK);
test_anda(a0, a1, a4); test_anda(a0, a1, a4);
...@@ -95,30 +97,49 @@ public class TestIntVect { ...@@ -95,30 +97,49 @@ public class TestIntVect {
test_xorc(a0, a1); test_xorc(a0, a1);
test_xorv(a0, a1, (int)BIT_MASK); test_xorv(a0, a1, (int)BIT_MASK);
test_xora(a0, a1, a4); test_xora(a0, a1, a4);
test_sllc(a0, a1); test_sllc(a0, a1);
test_sllv(a0, a1, VALUE); test_sllv(a0, a1, VALUE);
test_srlc(a0, a1); test_srlc(a0, a1);
test_srlv(a0, a1, VALUE); test_srlv(a0, a1, VALUE);
test_srac(a0, a1); test_srac(a0, a1);
test_srav(a0, a1, VALUE); test_srav(a0, a1, VALUE);
test_sllc_n(a0, a1); test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE); test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1); test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE); test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1); test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE); test_srav(a0, a1, -VALUE);
test_sllc_o(a0, a1); test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT); test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1); test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT); test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1); test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT); test_srav(a0, a1, SHIFT);
test_sllc_on(a0, a1); test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT); test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1); test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT); test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1); test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT); test_srav(a0, a1, -SHIFT);
test_sllc_add(a0, a1);
test_sllv_add(a0, a1, ADD_INIT);
test_srlc_add(a0, a1);
test_srlv_add(a0, a1, ADD_INIT);
test_srac_add(a0, a1);
test_srav_add(a0, a1, ADD_INIT);
test_sllc_and(a0, a1);
test_sllv_and(a0, a1, BIT_MASK);
test_srlc_and(a0, a1);
test_srlv_and(a0, a1, BIT_MASK);
test_srac_and(a0, a1);
test_srav_and(a0, a1, BIT_MASK);
test_pack2(p2, a1); test_pack2(p2, a1);
test_unpack2(a0, p2); test_unpack2(a0, p2);
test_pack2_swap(p2, a1); test_pack2_swap(p2, a1);
...@@ -359,6 +380,60 @@ public class TestIntVect { ...@@ -359,6 +380,60 @@ public class TestIntVect {
errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT))); errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
} }
test_sllc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_sllv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_srlc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srlv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srac_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_srav_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_sllc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_sllv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_srlc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srlv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srac_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_srav_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_pack2(p2, a1); test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) { for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32)); errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
...@@ -725,6 +800,84 @@ public class TestIntVect { ...@@ -725,6 +800,84 @@ public class TestIntVect {
end = System.currentTimeMillis(); end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start)); System.out.println("test_srav_on: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srav_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srav_and: " + (end - start));
start = System.currentTimeMillis(); start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) { for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1); test_pack2(p2, a1);
...@@ -908,6 +1061,26 @@ public class TestIntVect { ...@@ -908,6 +1061,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]<<b); a0[i] = (int)(a1[i]<<b);
} }
} }
static void test_sllc_add(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + ADD_INIT)<<VALUE);
}
}
static void test_sllv_add(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + b)<<VALUE);
}
}
static void test_sllc_and(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & BIT_MASK)<<VALUE);
}
}
static void test_sllv_and(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & b)<<VALUE);
}
}
static void test_srlc(int[] a0, int[] a1) { static void test_srlc(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -934,6 +1107,26 @@ public class TestIntVect { ...@@ -934,6 +1107,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]>>>b); a0[i] = (int)(a1[i]>>>b);
} }
} }
static void test_srlc_add(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE);
}
}
static void test_srlv_add(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + b)>>>VALUE);
}
}
static void test_srlc_and(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE);
}
}
static void test_srlv_and(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & b)>>>VALUE);
}
}
static void test_srac(int[] a0, int[] a1) { static void test_srac(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -960,6 +1153,26 @@ public class TestIntVect { ...@@ -960,6 +1153,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]>>b); a0[i] = (int)(a1[i]>>b);
} }
} }
static void test_srac_add(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE);
}
}
static void test_srav_add(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] + b)>>VALUE);
}
}
static void test_srac_and(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE);
}
}
static void test_srav_and(int[] a0, int[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)((a1[i] & b)>>VALUE);
}
}
static void test_pack2(long[] p2, int[] a1) { static void test_pack2(long[] p2, int[] a1) {
if (p2.length*2 > a1.length) return; if (p2.length*2 > a1.length) return;
......
...@@ -73,6 +73,7 @@ public class TestLongVect { ...@@ -73,6 +73,7 @@ public class TestLongVect {
test_subc(a0, a1); test_subc(a0, a1);
test_subv(a0, a1, (long)VALUE); test_subv(a0, a1, (long)VALUE);
test_suba(a0, a1, a2); test_suba(a0, a1, a2);
test_mulc(a0, a1); test_mulc(a0, a1);
test_mulv(a0, a1, (long)VALUE); test_mulv(a0, a1, (long)VALUE);
test_mula(a0, a1, a2); test_mula(a0, a1, a2);
...@@ -85,6 +86,7 @@ public class TestLongVect { ...@@ -85,6 +86,7 @@ public class TestLongVect {
test_divc_n(a0, a1); test_divc_n(a0, a1);
test_divv(a0, a1, (long)-VALUE); test_divv(a0, a1, (long)-VALUE);
test_diva(a0, a1, a3); test_diva(a0, a1, a3);
test_andc(a0, a1); test_andc(a0, a1);
test_andv(a0, a1, (long)BIT_MASK); test_andv(a0, a1, (long)BIT_MASK);
test_anda(a0, a1, a4); test_anda(a0, a1, a4);
...@@ -94,30 +96,48 @@ public class TestLongVect { ...@@ -94,30 +96,48 @@ public class TestLongVect {
test_xorc(a0, a1); test_xorc(a0, a1);
test_xorv(a0, a1, (long)BIT_MASK); test_xorv(a0, a1, (long)BIT_MASK);
test_xora(a0, a1, a4); test_xora(a0, a1, a4);
test_sllc(a0, a1); test_sllc(a0, a1);
test_sllv(a0, a1, VALUE); test_sllv(a0, a1, VALUE);
test_srlc(a0, a1); test_srlc(a0, a1);
test_srlv(a0, a1, VALUE); test_srlv(a0, a1, VALUE);
test_srac(a0, a1); test_srac(a0, a1);
test_srav(a0, a1, VALUE); test_srav(a0, a1, VALUE);
test_sllc_n(a0, a1); test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE); test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1); test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE); test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1); test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE); test_srav(a0, a1, -VALUE);
test_sllc_o(a0, a1); test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT); test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1); test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT); test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1); test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT); test_srav(a0, a1, SHIFT);
test_sllc_on(a0, a1); test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT); test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1); test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT); test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1); test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT); test_srav(a0, a1, -SHIFT);
test_sllc_add(a0, a1);
test_sllv_add(a0, a1, ADD_INIT);
test_srlc_add(a0, a1);
test_srlv_add(a0, a1, ADD_INIT);
test_srac_add(a0, a1);
test_srav_add(a0, a1, ADD_INIT);
test_sllc_and(a0, a1);
test_sllv_and(a0, a1, BIT_MASK);
test_srlc_and(a0, a1);
test_srlv_and(a0, a1, BIT_MASK);
test_srac_and(a0, a1);
test_srav_and(a0, a1, BIT_MASK);
} }
// Test and verify results // Test and verify results
System.out.println("Verification"); System.out.println("Verification");
...@@ -354,6 +374,60 @@ public class TestLongVect { ...@@ -354,6 +374,60 @@ public class TestLongVect {
errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT))); errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
} }
test_sllc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_sllv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_srlc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srlv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srac_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_srav_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_sllc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_sllv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_srlc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srlv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srac_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_srav_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
} }
if (errn > 0) if (errn > 0)
...@@ -696,6 +770,84 @@ public class TestLongVect { ...@@ -696,6 +770,84 @@ public class TestLongVect {
end = System.currentTimeMillis(); end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start)); System.out.println("test_srav_on: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srav_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srav_and: " + (end - start));
return errn; return errn;
} }
...@@ -854,6 +1006,26 @@ public class TestLongVect { ...@@ -854,6 +1006,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]<<b); a0[i] = (long)(a1[i]<<b);
} }
} }
static void test_sllc_add(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + ADD_INIT)<<VALUE);
}
}
static void test_sllv_add(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + b)<<VALUE);
}
}
static void test_sllc_and(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & BIT_MASK)<<VALUE);
}
}
static void test_sllv_and(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & b)<<VALUE);
}
}
static void test_srlc(long[] a0, long[] a1) { static void test_srlc(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -880,6 +1052,26 @@ public class TestLongVect { ...@@ -880,6 +1052,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]>>>b); a0[i] = (long)(a1[i]>>>b);
} }
} }
static void test_srlc_add(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE);
}
}
static void test_srlv_add(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + b)>>>VALUE);
}
}
static void test_srlc_and(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE);
}
}
static void test_srlv_and(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & b)>>>VALUE);
}
}
static void test_srac(long[] a0, long[] a1) { static void test_srac(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -906,6 +1098,26 @@ public class TestLongVect { ...@@ -906,6 +1098,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]>>b); a0[i] = (long)(a1[i]>>b);
} }
} }
static void test_srac_add(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE);
}
}
static void test_srav_add(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] + b)>>VALUE);
}
}
static void test_srac_and(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE);
}
}
static void test_srav_and(long[] a0, long[] a1, long b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (long)((a1[i] & b)>>VALUE);
}
}
static int verify(String text, int i, long elem, long val) { static int verify(String text, int i, long elem, long val) {
if (elem != val) { if (elem != val) {
......
...@@ -75,6 +75,7 @@ public class TestShortVect { ...@@ -75,6 +75,7 @@ public class TestShortVect {
test_subc(a0, a1); test_subc(a0, a1);
test_subv(a0, a1, (short)VALUE); test_subv(a0, a1, (short)VALUE);
test_suba(a0, a1, a2); test_suba(a0, a1, a2);
test_mulc(a0, a1); test_mulc(a0, a1);
test_mulv(a0, a1, (short)VALUE); test_mulv(a0, a1, (short)VALUE);
test_mula(a0, a1, a2); test_mula(a0, a1, a2);
...@@ -87,6 +88,7 @@ public class TestShortVect { ...@@ -87,6 +88,7 @@ public class TestShortVect {
test_divc_n(a0, a1); test_divc_n(a0, a1);
test_divv(a0, a1, (short)-VALUE); test_divv(a0, a1, (short)-VALUE);
test_diva(a0, a1, a3); test_diva(a0, a1, a3);
test_andc(a0, a1); test_andc(a0, a1);
test_andv(a0, a1, (short)BIT_MASK); test_andv(a0, a1, (short)BIT_MASK);
test_anda(a0, a1, a4); test_anda(a0, a1, a4);
...@@ -96,30 +98,49 @@ public class TestShortVect { ...@@ -96,30 +98,49 @@ public class TestShortVect {
test_xorc(a0, a1); test_xorc(a0, a1);
test_xorv(a0, a1, (short)BIT_MASK); test_xorv(a0, a1, (short)BIT_MASK);
test_xora(a0, a1, a4); test_xora(a0, a1, a4);
test_sllc(a0, a1); test_sllc(a0, a1);
test_sllv(a0, a1, VALUE); test_sllv(a0, a1, VALUE);
test_srlc(a0, a1); test_srlc(a0, a1);
test_srlv(a0, a1, VALUE); test_srlv(a0, a1, VALUE);
test_srac(a0, a1); test_srac(a0, a1);
test_srav(a0, a1, VALUE); test_srav(a0, a1, VALUE);
test_sllc_n(a0, a1); test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE); test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1); test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE); test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1); test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE); test_srav(a0, a1, -VALUE);
test_sllc_o(a0, a1); test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT); test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1); test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT); test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1); test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT); test_srav(a0, a1, SHIFT);
test_sllc_on(a0, a1); test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT); test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1); test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT); test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1); test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT); test_srav(a0, a1, -SHIFT);
test_sllc_add(a0, a1);
test_sllv_add(a0, a1, ADD_INIT);
test_srlc_add(a0, a1);
test_srlv_add(a0, a1, ADD_INIT);
test_srac_add(a0, a1);
test_srav_add(a0, a1, ADD_INIT);
test_sllc_and(a0, a1);
test_sllv_and(a0, a1, BIT_MASK);
test_srlc_and(a0, a1);
test_srlv_and(a0, a1, BIT_MASK);
test_srac_and(a0, a1);
test_srav_and(a0, a1, BIT_MASK);
test_pack2(p2, a1); test_pack2(p2, a1);
test_unpack2(a0, p2); test_unpack2(a0, p2);
test_pack2_swap(p2, a1); test_pack2_swap(p2, a1);
...@@ -364,6 +385,60 @@ public class TestShortVect { ...@@ -364,6 +385,60 @@ public class TestShortVect {
errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT))); errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
} }
test_sllc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_sllv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
}
test_srlc_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srlv_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
}
test_srac_add(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_srav_add(a0, a1, ADD_INIT);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
}
test_sllc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_sllv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
}
test_srlc_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srlv_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srlv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
}
test_srac_and(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srac_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_srav_and(a0, a1, BIT_MASK);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_srav_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
}
test_pack2(p2, a1); test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) { for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16)); errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
...@@ -760,6 +835,84 @@ public class TestShortVect { ...@@ -760,6 +835,84 @@ public class TestShortVect {
end = System.currentTimeMillis(); end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start)); System.out.println("test_srav_on: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_add(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_add(a0, a1, ADD_INIT);
}
end = System.currentTimeMillis();
System.out.println("test_srav_add: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_sllc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sllv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_sllv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlc_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srlc_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srlv_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srlv_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srac_and(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_srac_and: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_srav_and(a0, a1, BIT_MASK);
}
end = System.currentTimeMillis();
System.out.println("test_srav_and: " + (end - start));
start = System.currentTimeMillis(); start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) { for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1); test_pack2(p2, a1);
...@@ -968,6 +1121,26 @@ public class TestShortVect { ...@@ -968,6 +1121,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]<<b); a0[i] = (short)(a1[i]<<b);
} }
} }
static void test_sllc_add(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + ADD_INIT)<<VALUE);
}
}
static void test_sllv_add(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + b)<<VALUE);
}
}
static void test_sllc_and(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & BIT_MASK)<<VALUE);
}
}
static void test_sllv_and(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & b)<<VALUE);
}
}
static void test_srlc(short[] a0, short[] a1) { static void test_srlc(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -994,6 +1167,26 @@ public class TestShortVect { ...@@ -994,6 +1167,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]>>>b); a0[i] = (short)(a1[i]>>>b);
} }
} }
static void test_srlc_add(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE);
}
}
static void test_srlv_add(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + b)>>>VALUE);
}
}
static void test_srlc_and(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE);
}
}
static void test_srlv_and(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & b)>>>VALUE);
}
}
static void test_srac(short[] a0, short[] a1) { static void test_srac(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) { for (int i = 0; i < a0.length; i+=1) {
...@@ -1020,6 +1213,26 @@ public class TestShortVect { ...@@ -1020,6 +1213,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]>>b); a0[i] = (short)(a1[i]>>b);
} }
} }
static void test_srac_add(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE);
}
}
static void test_srav_add(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] + b)>>VALUE);
}
}
static void test_srac_and(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE);
}
}
static void test_srav_and(short[] a0, short[] a1, int b) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (short)((a1[i] & b)>>VALUE);
}
}
static void test_pack2(int[] p2, short[] a1) { static void test_pack2(int[] p2, short[] a1) {
if (p2.length*2 > a1.length) return; if (p2.length*2 > a1.length) return;
......
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @author Tom Deneau
*/
import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import java.security.AlgorithmParameters;
import java.util.Random;
import java.util.Arrays;
abstract public class TestAESBase {
int msgSize = Integer.getInteger("msgSize", 646);
boolean checkOutput = Boolean.getBoolean("checkOutput");
boolean noReinit = Boolean.getBoolean("noReinit");
int keySize = Integer.getInteger("keySize", 128);
String algorithm = System.getProperty("algorithm", "AES");
String mode = System.getProperty("mode", "CBC");
byte[] input;
byte[] encode;
byte[] expectedEncode;
byte[] decode;
byte[] expectedDecode;
Random random = new Random(0);
Cipher cipher;
Cipher dCipher;
String paddingStr = "PKCS5Padding";
AlgorithmParameters algParams;
SecretKey key;
int ivLen;
static int numThreads = 0;
int threadId;
static synchronized int getThreadId() {
int id = numThreads;
numThreads++;
return id;
}
abstract public void run();
public void prepare() {
try {
System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
byte keyBytes[] = new byte[keyLenBytes];
if (keySize == 128)
keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7};
else
random.nextBytes(keyBytes);
key = new SecretKeySpec(keyBytes, algorithm);
if (threadId == 0) {
System.out.println("Algorithm: " + key.getAlgorithm() + "("
+ key.getEncoded().length * 8 + "bit)");
}
input = new byte[msgSize];
for (int i=0; i<input.length; i++) {
input[i] = (byte) (i & 0xff);
}
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
algParams = cipher.getParameters();
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
if (threadId == 0) {
childShowCipher();
}
// do one encode and decode in preparation
// this will also create the encode buffer and decode buffer
encode = cipher.doFinal(input);
decode = dCipher.doFinal(encode);
if (checkOutput) {
expectedEncode = (byte[]) encode.clone();
expectedDecode = (byte[]) decode.clone();
showArray(key.getEncoded() , "key: ");
showArray(input, "input: ");
showArray(encode, "encode: ");
showArray(decode, "decode: ");
}
}
catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
void showArray(byte b[], String name) {
System.out.format("%s [%d]: ", name, b.length);
for (int i=0; i<Math.min(b.length, 32); i++) {
System.out.format("%02x ", b[i] & 0xff);
}
System.out.println();
}
void compareArrays(byte b[], byte exp[]) {
if (b.length != exp.length) {
System.out.format("different lengths for actual and expected output arrays\n");
showArray(b, "test: ");
showArray(exp, "exp : ");
System.exit(1);
}
for (int i=0; i< exp.length; i++) {
if (b[i] != exp[i]) {
System.out.format("output error at index %d: got %02x, expected %02x\n", i, b[i] & 0xff, exp[i] & 0xff);
showArray(b, "test: ");
showArray(exp, "exp : ");
System.exit(1);
}
}
}
void showCipher(Cipher c, String kind) {
System.out.println(kind + " cipher provider: " + cipher.getProvider());
System.out.println(kind + " cipher algorithm: " + cipher.getAlgorithm());
}
abstract void childShowCipher();
}
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @author Tom Deneau
*/
import javax.crypto.Cipher;
public class TestAESDecode extends TestAESBase {
@Override
public void run() {
try {
if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
if (checkOutput) {
// checked version creates new output buffer each time
decode = dCipher.doFinal(encode, 0, encode.length);
compareArrays(decode, expectedDecode);
} else {
// non-checked version outputs to existing encode buffer for maximum speed
decode = new byte[dCipher.getOutputSize(encode.length)];
dCipher.doFinal(encode, 0, encode.length, decode);
}
}
catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
@Override
void childShowCipher() {
showCipher(dCipher, "Decryption");
}
}
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @author Tom Deneau
*/
import javax.crypto.Cipher;
public class TestAESEncode extends TestAESBase {
@Override
public void run() {
try {
if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
if (checkOutput) {
// checked version creates new output buffer each time
encode = cipher.doFinal(input, 0, msgSize);
compareArrays(encode, expectedEncode);
} else {
// non-checked version outputs to existing encode buffer for maximum speed
encode = new byte[cipher.getOutputSize(msgSize)];
cipher.doFinal(input, 0, msgSize, encode);
}
}
catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
@Override
void childShowCipher() {
showCipher(cipher, "Encryption");
}
}
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
* @bug 7184394
* @summary add intrinsics to use AES instructions
*
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
*
* @author Tom Deneau
*/
public class TestAESMain {
public static void main(String[] args) {
int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000);
System.out.println(iters + " iterations");
TestAESEncode etest = new TestAESEncode();
etest.prepare();
long start = System.nanoTime();
for (int i=0; i<iters; i++) {
etest.run();
}
long end = System.nanoTime();
System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
TestAESDecode dtest = new TestAESDecode();
dtest.prepare();
start = System.nanoTime();
for (int i=0; i<iters; i++) {
dtest.run();
}
end = System.nanoTime();
System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
}
}
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8000805
* @summary JMM issue: short loads are non-atomic
*
* @run main/othervm -server -XX:-TieredCompilation -Xcomp -XX:+PrintCompilation -XX:CompileOnly=Test8000805.loadS2LmaskFF,Test8000805.loadS2Lmask16,Test8000805.loadS2Lmask13,Test8000805.loadUS_signExt,Test8000805.loadB2L_mask8 Test8000805
*/
public class Test8000805 {
static long loadS2LmaskFF (short[] sa) { return sa[0] & 0xFF; }
static long loadS2LmaskFF_1 (short[] sa) { return sa[0] & 0xFF; }
static long loadS2Lmask16 (short[] sa) { return sa[0] & 0xFFFE; }
static long loadS2Lmask16_1 (short[] sa) { return sa[0] & 0xFFFE; }
static long loadS2Lmask13 (short[] sa) { return sa[0] & 0x0FFF; }
static long loadS2Lmask13_1 (short[] sa) { return sa[0] & 0x0FFF; }
static int loadUS_signExt (char[] ca) { return (ca[0] << 16) >> 16; }
static int loadUS_signExt_1 (char[] ca) { return (ca[0] << 16) >> 16; }
static long loadB2L_mask8 (byte[] ba) { return ba[0] & 0x55; }
static long loadB2L_mask8_1 (byte[] ba) { return ba[0] & 0x55; }
public static void main(String[] args) {
for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
byte[] ba = new byte[] { (byte) i};
{ long v1 = loadB2L_mask8(ba);
long v2 = loadB2L_mask8_1(ba);
if (v1 != v2)
throw new InternalError(String.format("loadB2L_mask8 failed: %x != %x", v1, v2)); }
}
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
short[] sa = new short[] { (short)i };
char[] ca = new char[] { (char)i };
{ long v1 = loadS2LmaskFF(sa);
long v2 = loadS2LmaskFF_1(sa);
if (v1 != v2)
throw new InternalError(String.format("loadS2LmaskFF failed: %x != %x", v1, v2)); }
{ long v1 = loadS2Lmask16(sa);
long v2 = loadS2Lmask16_1(sa);
if (v1 != v2)
throw new InternalError(String.format("loadS2Lmask16 failed: %x != %x", v1, v2)); }
{ long v1 = loadS2Lmask13(sa);
long v2 = loadS2Lmask13_1(sa);
if (v1 != v2)
throw new InternalError(String.format("loadS2Lmask13 failed: %x != %x", v1, v2)); }
{ int v1 = loadUS_signExt(ca);
int v2 = loadUS_signExt_1(ca);
if (v1 != v2)
throw new InternalError(String.format("loadUS_signExt failed: %x != %x", v1, v2)); }
}
System.out.println("TEST PASSED.");
}
}
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册