提交 de51fea1 编写于 作者: K kvn

8002074: Support for AES on SPARC

Summary: Add intrinsics/stub routines support for single-block and multi-block (as used by Cipher Block Chaining mode) AES encryption and decryption operations on the SPARC platform.
Reviewed-by: kvn, roland
Contributed-by: shrinivas.joshi@oracle.com
上级 84e57bf1
......@@ -88,6 +88,7 @@ class Assembler : public AbstractAssembler {
orncc_op3 = 0x16,
xnorcc_op3 = 0x17,
addccc_op3 = 0x18,
aes4_op3 = 0x19,
umulcc_op3 = 0x1a,
smulcc_op3 = 0x1b,
subccc_op3 = 0x1c,
......@@ -121,6 +122,8 @@ class Assembler : public AbstractAssembler {
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
aes3_op3 = 0x36,
flog3_op3 = 0x36,
impdep2_op3 = 0x37,
jmpl_op3 = 0x38,
rett_op3 = 0x39,
......@@ -206,7 +209,22 @@ class Assembler : public AbstractAssembler {
mstouw_opf = 0x111,
mstosw_opf = 0x113,
mxtod_opf = 0x118,
mwtos_opf = 0x119
mwtos_opf = 0x119,
aes_kexpand0_opf = 0x130,
aes_kexpand2_opf = 0x131
};
enum op5s {
aes_eround01_op5 = 0x00,
aes_eround23_op5 = 0x01,
aes_dround01_op5 = 0x02,
aes_dround23_op5 = 0x03,
aes_eround01_l_op5 = 0x04,
aes_eround23_l_op5 = 0x05,
aes_dround01_l_op5 = 0x06,
aes_dround23_l_op5 = 0x07,
aes_kexpand1_op5 = 0x08
};
enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez };
......@@ -427,6 +445,7 @@ class Assembler : public AbstractAssembler {
static int immed( bool i) { return u_field(i ? 1 : 0, 13, 13); }
static int opf_low6( int w) { return u_field(w, 10, 5); }
static int opf_low5( int w) { return u_field(w, 9, 5); }
static int op5( int x) { return u_field(x, 8, 5); }
static int trapcc( CC cc) { return u_field(cc, 12, 11); }
static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit
static int opf( int x) { return u_field(x, 13, 5); }
......@@ -451,6 +470,7 @@ class Assembler : public AbstractAssembler {
static int fd( FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); };
static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); };
// some float instructions use this encoding on the op3 field
static int alt_op3(int op, FloatRegisterImpl::Width w) {
......@@ -559,6 +579,12 @@ class Assembler : public AbstractAssembler {
return x & ((1 << 10) - 1);
}
// AES crypto instructions supported only on certain processors
static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
// instruction only in VIS1
static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
// instruction only in VIS3
static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
......@@ -682,6 +708,24 @@ public:
void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// 4-operand AES instructions
void aes_eround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_eround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_dround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_dround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_eround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_eround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_dround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_dround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_kexpand1( FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
// 3-operand AES instructions
void aes_kexpand0( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
void aes_kexpand2( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
// pp 136
inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
......@@ -784,6 +828,10 @@ public:
void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
// FXORs/FXORd instructions
void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
// pp 164
void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
......
......@@ -1848,6 +1848,12 @@ const bool Matcher::misaligned_vectors_ok() {
return false;
}
// Current (2013) SPARC platforms need to read original key
// to construct decryption expanded key
const bool Matcher::pass_original_key_for_aes() {
return true;
}
// USII supports fxtof through the whole range of number, USIII doesn't
const bool Matcher::convL2FSupported(void) {
return VM_Version::has_fast_fxtof();
......
......@@ -3304,6 +3304,775 @@ class StubGenerator: public StubCodeGenerator {
}
}
address generate_aescrypt_encryptBlock() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
Label L_doLast128bit, L_storeOutput;
address start = __ pc();
Register from = O0; // source byte array
Register to = O1; // destination byte array
Register key = O2; // expanded key array
const Register keylen = O4; //reg for storing expanded key array length
// read expanded key length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
// load input into F54-F56; F30-F31 used as temp
__ ldf(FloatRegisterImpl::S, from, 0, F30);
__ ldf(FloatRegisterImpl::S, from, 4, F31);
__ fmov(FloatRegisterImpl::D, F30, F54);
__ ldf(FloatRegisterImpl::S, from, 8, F30);
__ ldf(FloatRegisterImpl::S, from, 12, F31);
__ fmov(FloatRegisterImpl::D, F30, F56);
// load expanded key
for ( int i = 0; i <= 38; i += 2 ) {
__ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
}
// perform cipher transformation
__ fxor(FloatRegisterImpl::D, F0, F54, F54);
__ fxor(FloatRegisterImpl::D, F2, F56, F56);
// rounds 1 through 8
for ( int i = 4; i <= 28; i += 8 ) {
__ aes_eround01(as_FloatRegister(i), F54, F56, F58);
__ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
__ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
__ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
}
__ aes_eround01(F36, F54, F56, F58); //round 9
__ aes_eround23(F38, F54, F56, F60);
// 128-bit original key size
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
for ( int i = 40; i <= 50; i += 2 ) {
__ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
}
__ aes_eround01(F40, F58, F60, F54); //round 10
__ aes_eround23(F42, F58, F60, F56);
__ aes_eround01(F44, F54, F56, F58); //round 11
__ aes_eround23(F46, F54, F56, F60);
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
__ ldf(FloatRegisterImpl::D, key, 208, F52);
__ aes_eround01(F48, F58, F60, F54); //round 12
__ aes_eround23(F50, F58, F60, F56);
__ ldf(FloatRegisterImpl::D, key, 216, F46);
__ ldf(FloatRegisterImpl::D, key, 224, F48);
__ ldf(FloatRegisterImpl::D, key, 232, F50);
__ aes_eround01(F52, F54, F56, F58); //round 13
__ aes_eround23(F46, F54, F56, F60);
__ br(Assembler::always, false, Assembler::pt, L_storeOutput);
__ delayed()->nop();
__ BIND(L_doLast128bit);
__ ldf(FloatRegisterImpl::D, key, 160, F48);
__ ldf(FloatRegisterImpl::D, key, 168, F50);
__ BIND(L_storeOutput);
// perform last round of encryption common for all key sizes
__ aes_eround01_l(F48, F58, F60, F54); //last round
__ aes_eround23_l(F50, F58, F60, F56);
// store output into the destination array, F0-F1 used as temp
__ fmov(FloatRegisterImpl::D, F54, F0);
__ stf(FloatRegisterImpl::S, F0, to, 0);
__ stf(FloatRegisterImpl::S, F1, to, 4);
__ fmov(FloatRegisterImpl::D, F56, F0);
__ stf(FloatRegisterImpl::S, F0, to, 8);
__ retl();
__ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
return start;
}
address generate_aescrypt_decryptBlock() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
address start = __ pc();
Label L_expand192bit, L_expand256bit, L_common_transform;
Register from = O0; // source byte array
Register to = O1; // destination byte array
Register key = O2; // expanded key array
Register original_key = O3; // original key array only required during decryption
const Register keylen = O4; // reg for storing expanded key array length
// read expanded key array length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
// load input into F52-F54; F30,F31 used as temp
__ ldf(FloatRegisterImpl::S, from, 0, F30);
__ ldf(FloatRegisterImpl::S, from, 4, F31);
__ fmov(FloatRegisterImpl::D, F30, F52);
__ ldf(FloatRegisterImpl::S, from, 8, F30);
__ ldf(FloatRegisterImpl::S, from, 12, F31);
__ fmov(FloatRegisterImpl::D, F30, F54);
// load original key from SunJCE expanded decryption key
for ( int i = 0; i <= 3; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
// 256-bit original key size
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
// 128-bit original key size
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 36; i += 4 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
}
// perform 128-bit key specific inverse cipher transformation
__ fxor(FloatRegisterImpl::D, F42, F54, F54);
__ fxor(FloatRegisterImpl::D, F40, F52, F52);
__ br(Assembler::always, false, Assembler::pt, L_common_transform);
__ delayed()->nop();
__ BIND(L_expand192bit);
// start loading rest of the 192-bit key
__ ldf(FloatRegisterImpl::S, original_key, 16, F4);
__ ldf(FloatRegisterImpl::S, original_key, 20, F5);
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 36; i += 6 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
__ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
}
__ aes_kexpand1(F42, F46, 7, F48);
__ aes_kexpand2(F44, F48, F50);
// perform 192-bit key specific inverse cipher transformation
__ fxor(FloatRegisterImpl::D, F50, F54, F54);
__ fxor(FloatRegisterImpl::D, F48, F52, F52);
__ aes_dround23(F46, F52, F54, F58);
__ aes_dround01(F44, F52, F54, F56);
__ aes_dround23(F42, F56, F58, F54);
__ aes_dround01(F40, F56, F58, F52);
__ br(Assembler::always, false, Assembler::pt, L_common_transform);
__ delayed()->nop();
__ BIND(L_expand256bit);
// load rest of the 256-bit key
for ( int i = 4; i <= 7; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 40; i += 8 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
__ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
__ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
}
__ aes_kexpand1(F48, F54, 6, F56);
__ aes_kexpand2(F50, F56, F58);
for ( int i = 0; i <= 6; i += 2 ) {
__ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
}
// load input into F52-F54
__ ldf(FloatRegisterImpl::D, from, 0, F52);
__ ldf(FloatRegisterImpl::D, from, 8, F54);
// perform 256-bit key specific inverse cipher transformation
__ fxor(FloatRegisterImpl::D, F0, F54, F54);
__ fxor(FloatRegisterImpl::D, F2, F52, F52);
__ aes_dround23(F4, F52, F54, F58);
__ aes_dround01(F6, F52, F54, F56);
__ aes_dround23(F50, F56, F58, F54);
__ aes_dround01(F48, F56, F58, F52);
__ aes_dround23(F46, F52, F54, F58);
__ aes_dround01(F44, F52, F54, F56);
__ aes_dround23(F42, F56, F58, F54);
__ aes_dround01(F40, F56, F58, F52);
for ( int i = 0; i <= 7; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
// perform inverse cipher transformations common for all key sizes
__ BIND(L_common_transform);
for ( int i = 38; i >= 6; i -= 8 ) {
__ aes_dround23(as_FloatRegister(i), F52, F54, F58);
__ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
if ( i != 6) {
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
} else {
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
}
}
// store output to destination array, F0-F1 used as temp
__ fmov(FloatRegisterImpl::D, F52, F0);
__ stf(FloatRegisterImpl::S, F0, to, 0);
__ stf(FloatRegisterImpl::S, F1, to, 4);
__ fmov(FloatRegisterImpl::D, F54, F0);
__ stf(FloatRegisterImpl::S, F0, to, 8);
__ retl();
__ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
return start;
}
address generate_cipherBlockChaining_encryptAESCrypt() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
address start = __ pc();
Register from = O0; // source byte array
Register to = O1; // destination byte array
Register key = O2; // expanded key array
Register rvec = O3; // init vector
const Register len_reg = O4; // cipher length
const Register keylen = O5; // reg for storing expanded key array length
// save cipher len to return in the end
__ mov(len_reg, L1);
// read expanded key length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
// load init vector
__ ldf(FloatRegisterImpl::D, rvec, 0, F60);
__ ldf(FloatRegisterImpl::D, rvec, 8, F62);
__ ldx(key,0,G1);
__ ldx(key,8,G2);
// start loading expanded key
for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
}
// 128-bit original key size
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
}
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
}
// 256-bit original key size
__ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
__ delayed()->nop();
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc128);
__ ldx(from,0,G3);
__ ldx(from,8,G4);
__ xor3(G1,G3,G3);
__ xor3(G2,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
// TEN_EROUNDS
for ( int i = 0; i <= 32; i += 8 ) {
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
if (i != 32 ) {
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
} else {
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
}
}
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
__ add(from, 16, from);
__ add(to, 16, to);
__ subcc(len_reg, 16, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
__ delayed()->nop();
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
__ retl();
__ delayed()->mov(L1, O0);
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc192);
__ ldx(from,0,G3);
__ ldx(from,8,G4);
__ xor3(G1,G3,G3);
__ xor3(G2,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
// TWELEVE_EROUNDS
for ( int i = 0; i <= 40; i += 8 ) {
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
if (i != 40 ) {
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
} else {
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
}
}
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
__ add(from, 16, from);
__ subcc(len_reg, 16, len_reg);
__ add(to, 16, to);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
__ delayed()->nop();
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
__ retl();
__ delayed()->mov(L1, O0);
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc256);
__ ldx(from,0,G3);
__ ldx(from,8,G4);
__ xor3(G1,G3,G3);
__ xor3(G2,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
// FOURTEEN_EROUNDS
for ( int i = 0; i <= 48; i += 8 ) {
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
if (i != 48 ) {
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
} else {
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
}
}
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
__ add(from, 16, from);
__ subcc(len_reg, 16, len_reg);
__ add(to, 16, to);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
__ delayed()->nop();
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
__ retl();
__ delayed()->mov(L1, O0);
return start;
}
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
address start = __ pc();
Register from = I0; // source byte array
Register to = I1; // destination byte array
Register key = I2; // expanded key array
Register rvec = I3; // init vector
const Register len_reg = I4; // cipher length
const Register original_key = I5; // original key array only required during decryption
const Register keylen = L6; // reg for storing expanded key array length
// save cipher len before save_frame, to return in the end
__ mov(O4, L0);
__ save_frame(0); //args are read from I* registers since we save the frame in the beginning
// load original key from SunJCE expanded decryption key
for ( int i = 0; i <= 3; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
// load initial vector
__ ldx(rvec,0,L0);
__ ldx(rvec,8,L1);
// read expanded key array length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
// 256-bit original key size
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
// 128-bit original key size
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 36; i += 4 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
}
// load expanded key[last-1] and key[last] elements
__ movdtox(F40,L2);
__ movdtox(F42,L3);
__ and3(len_reg, 16, L4);
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
__ delayed()->nop();
__ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
__ delayed()->nop();
__ BIND(L_expand192bit);
// load rest of the 192-bit key
__ ldf(FloatRegisterImpl::S, original_key, 16, F4);
__ ldf(FloatRegisterImpl::S, original_key, 20, F5);
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 36; i += 6 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
__ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
}
__ aes_kexpand1(F42, F46, 7, F48);
__ aes_kexpand2(F44, F48, F50);
// load expanded key[last-1] and key[last] elements
__ movdtox(F48,L2);
__ movdtox(F50,L3);
__ and3(len_reg, 16, L4);
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
__ delayed()->nop();
__ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
__ delayed()->nop();
__ BIND(L_expand256bit);
// load rest of the 256-bit key
for ( int i = 4; i <= 7; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
for ( int i = 0; i <= 40; i += 8 ) {
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
__ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
__ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
}
__ aes_kexpand1(F48, F54, 6, F56);
__ aes_kexpand2(F50, F56, F58);
// load expanded key[last-1] and key[last] elements
__ movdtox(F56,L2);
__ movdtox(F58,L3);
__ and3(len_reg, 16, L4);
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
__ delayed()->nop();
__ BIND(L_dec_first_block_start);
__ ldx(from,0,L4);
__ ldx(from,8,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
__ movxtod(G1,F62);
// 128-bit original key size
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
__ aes_dround23(F54, F60, F62, F58);
__ aes_dround01(F52, F60, F62, F56);
__ aes_dround23(F50, F56, F58, F62);
__ aes_dround01(F48, F56, F58, F60);
__ BIND(L_dec_first_block192);
__ aes_dround23(F46, F60, F62, F58);
__ aes_dround01(F44, F60, F62, F56);
__ aes_dround23(F42, F56, F58, F62);
__ aes_dround01(F40, F56, F58, F60);
__ BIND(L_dec_first_block128);
for ( int i = 38; i >= 6; i -= 8 ) {
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
if ( i != 6) {
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
} else {
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
}
}
__ movxtod(L0,F56);
__ movxtod(L1,F58);
__ mov(L4,L0);
__ mov(L5,L1);
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
__ add(from, 16, from);
__ add(to, 16, to);
__ subcc(len_reg, 16, len_reg);
__ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
__ delayed()->nop();
// 256-bit original key size
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
// 192-bit original key size
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
__ align(OptoLoopAlignment);
__ BIND(L_dec_next2_blocks128);
__ nop();
// F40:F42 used for first 16-bytes
__ ldx(from,0,G4);
__ ldx(from,8,G5);
__ xor3(L2,G4,G1);
__ movxtod(G1,F40);
__ xor3(L3,G5,G1);
__ movxtod(G1,F42);
// F60:F62 used for next 16-bytes
__ ldx(from,16,L4);
__ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
__ movxtod(G1,F62);
for ( int i = 38; i >= 6; i -= 8 ) {
__ aes_dround23(as_FloatRegister(i), F40, F42, F44);
__ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
if (i != 6 ) {
__ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
__ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
} else {
__ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
__ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
}
}
__ movxtod(L0,F46);
__ movxtod(L1,F44);
__ fxor(FloatRegisterImpl::D, F46, F40, F40);
__ fxor(FloatRegisterImpl::D, F44, F42, F42);
__ stf(FloatRegisterImpl::D, F40, to, 0);
__ stf(FloatRegisterImpl::D, F42, to, 8);
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
__ mov(L5,L1);
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
__ delayed()->nop();
__ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
__ delayed()->nop();
__ align(OptoLoopAlignment);
__ BIND(L_dec_next2_blocks192);
__ nop();
// F48:F50 used for first 16-bytes
__ ldx(from,0,G4);
__ ldx(from,8,G5);
__ xor3(L2,G4,G1);
__ movxtod(G1,F48);
__ xor3(L3,G5,G1);
__ movxtod(G1,F50);
// F60:F62 used for next 16-bytes
__ ldx(from,16,L4);
__ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
__ movxtod(G1,F62);
for ( int i = 46; i >= 6; i -= 8 ) {
__ aes_dround23(as_FloatRegister(i), F48, F50, F52);
__ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
if (i != 6 ) {
__ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
__ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
} else {
__ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
__ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
}
}
__ movxtod(L0,F54);
__ movxtod(L1,F52);
__ fxor(FloatRegisterImpl::D, F54, F48, F48);
__ fxor(FloatRegisterImpl::D, F52, F50, F50);
__ stf(FloatRegisterImpl::D, F48, to, 0);
__ stf(FloatRegisterImpl::D, F50, to, 8);
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
__ mov(L5,L1);
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
__ delayed()->nop();
__ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
__ delayed()->nop();
__ align(OptoLoopAlignment);
__ BIND(L_dec_next2_blocks256);
__ nop();
// F0:F2 used for first 16-bytes
__ ldx(from,0,G4);
__ ldx(from,8,G5);
__ xor3(L2,G4,G1);
__ movxtod(G1,F0);
__ xor3(L3,G5,G1);
__ movxtod(G1,F2);
// F60:F62 used for next 16-bytes
__ ldx(from,16,L4);
__ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
__ movxtod(G1,F62);
__ aes_dround23(F54, F0, F2, F4);
__ aes_dround01(F52, F0, F2, F6);
__ aes_dround23(F54, F60, F62, F58);
__ aes_dround01(F52, F60, F62, F56);
__ aes_dround23(F50, F6, F4, F2);
__ aes_dround01(F48, F6, F4, F0);
__ aes_dround23(F50, F56, F58, F62);
__ aes_dround01(F48, F56, F58, F60);
// save F48:F54 in temp registers
__ movdtox(F54,G2);
__ movdtox(F52,G3);
__ movdtox(F50,G6);
__ movdtox(F48,G1);
for ( int i = 46; i >= 14; i -= 8 ) {
__ aes_dround23(as_FloatRegister(i), F0, F2, F4);
__ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
__ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
__ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
}
// init F48:F54 with F0:F6 values (original key)
__ ldf(FloatRegisterImpl::D, original_key, 0, F48);
__ ldf(FloatRegisterImpl::D, original_key, 8, F50);
__ ldf(FloatRegisterImpl::D, original_key, 16, F52);
__ ldf(FloatRegisterImpl::D, original_key, 24, F54);
__ aes_dround23(F54, F0, F2, F4);
__ aes_dround01(F52, F0, F2, F6);
__ aes_dround23(F54, F60, F62, F58);
__ aes_dround01(F52, F60, F62, F56);
__ aes_dround23_l(F50, F6, F4, F2);
__ aes_dround01_l(F48, F6, F4, F0);
__ aes_dround23_l(F50, F56, F58, F62);
__ aes_dround01_l(F48, F56, F58, F60);
// re-init F48:F54 with their original values
__ movxtod(G2,F54);
__ movxtod(G3,F52);
__ movxtod(G6,F50);
__ movxtod(G1,F48);
__ movxtod(L0,F6);
__ movxtod(L1,F4);
__ fxor(FloatRegisterImpl::D, F6, F0, F0);
__ fxor(FloatRegisterImpl::D, F4, F2, F2);
__ stf(FloatRegisterImpl::D, F0, to, 0);
__ stf(FloatRegisterImpl::D, F2, to, 8);
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
__ mov(L5,L1);
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
__ delayed()->nop();
__ BIND(L_cbcdec_end);
__ stx(L0, rvec, 0);
__ stx(L1, rvec, 8);
__ restore();
__ mov(L0, O0);
__ retl();
__ delayed()->nop();
return start;
}
void generate_initial() {
// Generates all stubs and initializes the entry points
......@@ -3368,6 +4137,14 @@ class StubGenerator: public StubCodeGenerator {
generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
&StubRoutines::_safefetchN_fault_pc,
&StubRoutines::_safefetchN_continuation_pc);
// generate AES intrinsics code
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
}
......
......@@ -234,7 +234,7 @@ void VM_Version::initialize() {
assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
char buf[512];
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
(has_hardware_popc() ? ", popc" : ""),
(has_vis1() ? ", vis1" : ""),
......@@ -242,6 +242,7 @@ void VM_Version::initialize() {
(has_vis3() ? ", vis3" : ""),
(has_blk_init() ? ", blk_init" : ""),
(has_cbcond() ? ", cbcond" : ""),
(has_aes() ? ", aes" : ""),
(is_ultra3() ? ", ultra3" : ""),
(is_sun4v() ? ", sun4v" : ""),
(is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
......@@ -265,6 +266,41 @@ void VM_Version::initialize() {
if (!has_vis1()) // Drop to 0 if no VIS1 support
UseVIS = 0;
// T2 and above should have support for AES instructions
if (has_aes()) {
if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1
if (FLAG_IS_DEFAULT(UseAES)) {
FLAG_SET_DEFAULT(UseAES, true);
}
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
FLAG_SET_DEFAULT(UseAESIntrinsics, true);
}
// we disable both the AES flags if either of them is disabled on the command line
if (!UseAES || !UseAESIntrinsics) {
FLAG_SET_DEFAULT(UseAES, false);
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
} else {
if (UseAES || UseAESIntrinsics) {
warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
if (UseAES) {
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
}
}
} else if (UseAES || UseAESIntrinsics) {
warning("AES instructions are not available on this CPU");
if (UseAES) {
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
}
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;
......
......@@ -48,7 +48,8 @@ protected:
sparc64_family = 14,
M_family = 15,
T_family = 16,
T1_model = 17
T1_model = 17,
aes_instructions = 18
};
enum Feature_Flag_Set {
......@@ -73,6 +74,7 @@ protected:
M_family_m = 1 << M_family,
T_family_m = 1 << T_family,
T1_model_m = 1 << T1_model,
aes_instructions_m = 1 << aes_instructions,
generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
generic_v9_m = generic_v8_m | v9_instructions_m,
......@@ -123,6 +125,7 @@ public:
static bool has_vis3() { return (_features & vis3_instructions_m) != 0; }
static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; }
static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; }
static bool has_aes() { return (_features & aes_instructions_m) != 0; }
static bool supports_compare_and_exchange()
{ return has_v9(); }
......
......@@ -2403,6 +2403,9 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// Output:
// rax - input length
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
......@@ -2483,7 +2486,7 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
__ movl(rax, 0); // return 0 (why?)
__ movptr(rax, len_param); // return length
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......@@ -2557,6 +2560,9 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// Output:
// rax - input length
//
address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
......@@ -2650,7 +2656,7 @@ class StubGenerator: public StubCodeGenerator {
__ movptr(rvec , rvec_param); // restore this since used in loop
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
__ movl(rax, 0); // return 0 (why?)
__ movptr(rax, len_param); // return length
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......
......@@ -3217,6 +3217,9 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// Output:
// rax - input length
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
......@@ -3232,7 +3235,7 @@ class StubGenerator: public StubCodeGenerator {
#ifndef _WIN64
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
#else
const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
const Register len_reg = r10; // pick the first volatile windows register
#endif
const Register pos = rax;
......@@ -3259,6 +3262,8 @@ class StubGenerator: public StubCodeGenerator {
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
}
#else
__ push(len_reg); // Save
#endif
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
......@@ -3301,8 +3306,10 @@ class StubGenerator: public StubCodeGenerator {
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
__ movdqu(as_XMMRegister(i), xmm_save(i));
}
__ movl(rax, len_mem);
#else
__ pop(rax); // return length
#endif
__ movl(rax, 0); // return 0 (why?)
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......@@ -3409,6 +3416,9 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// Output:
// rax - input length
//
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
assert(UseAES, "need AES instructions and misaligned SSE support");
......@@ -3427,7 +3437,7 @@ class StubGenerator: public StubCodeGenerator {
#ifndef _WIN64
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
#else
const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
const Register len_reg = r10; // pick the first volatile windows register
#endif
const Register pos = rax;
......@@ -3448,7 +3458,10 @@ class StubGenerator: public StubCodeGenerator {
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
}
#else
__ push(len_reg); // Save
#endif
// the java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
......@@ -3554,8 +3567,10 @@ class StubGenerator: public StubCodeGenerator {
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
__ movdqu(as_XMMRegister(i), xmm_save(i));
}
__ movl(rax, len_mem);
#else
__ pop(rax); // return length
#endif
__ movl(rax, 0); // return 0 (why?)
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......
......@@ -581,6 +581,12 @@ const bool Matcher::misaligned_vectors_ok() {
return !AlignVector; // can be changed by flag
}
// x86 AES instructions are compatible with SunJCE expanded
// keys, hence we do not need to pass the original key to stubs
const bool Matcher::pass_original_key_for_aes() {
return false;
}
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {
......
......@@ -119,6 +119,11 @@ int VM_Version::platform_features(int features) {
#endif
if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m;
#ifndef AV_SPARC_AES
#define AV_SPARC_AES 0x00020000 /* aes instrs supported */
#endif
if (av & AV_SPARC_AES) features |= aes_instructions_m;
} else {
// getisax(2) failed, use the old legacy code.
#ifndef PRODUCT
......
......@@ -787,7 +787,7 @@
do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
do_name( encrypt_name, "encrypt") \
do_name( decrypt_name, "decrypt") \
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \
\
/* support for java.util.zip */ \
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
......
......@@ -304,6 +304,7 @@ class LibraryCallKit : public GraphKit {
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_encodeISOArray();
bool inline_updateCRC32();
bool inline_updateBytesCRC32();
......@@ -5936,10 +5937,22 @@ bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
if (k_start == NULL) return false;
if (Matcher::pass_original_key_for_aes()) {
// on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
// compatibility issues between Java key expansion and SPARC crypto instructions
Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
if (original_k_start == NULL) return false;
// Call the stub.
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start, original_k_start);
} else {
// Call the stub.
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start);
}
return true;
}
......@@ -6017,14 +6030,29 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
if (objRvec == NULL) return false;
Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
Node* cbcCrypt;
if (Matcher::pass_original_key_for_aes()) {
// on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
// compatibility issues between Java key expansion and SPARC crypto instructions
Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
if (original_k_start == NULL) return false;
// Call the stub, passing src_start, dest_start, k_start, r_start, src_len and original_k_start
cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start, r_start, len, original_k_start);
} else {
// Call the stub, passing src_start, dest_start, k_start, r_start and src_len
make_runtime_call(RC_LEAF|RC_NO_FP,
cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
src_start, dest_start, k_start, r_start, len);
}
// return is void so no result needs to be pushed
// return cipher length (int)
Node* retvalue = _gvn.transform(new (C) ProjNode(cbcCrypt, TypeFunc::Parms));
set_result(retvalue);
return true;
}
......@@ -6039,6 +6067,17 @@ Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object)
return k_start;
}
//------------------------------get_original_key_start_from_aescrypt_object-----------------------
Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescrypt_object) {
Node* objAESCryptKey = load_field_from_object(aescrypt_object, "lastKey", "[B", /*is_exact*/ false);
assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
if (objAESCryptKey == NULL) return (Node *) NULL;
// now have the array, need to get the start address of the lastKey array
Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE);
return original_k_start;
}
//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
// Return node representing slow path of predicate check.
// the pseudo code we want to emulate with this predicate is:
......
......@@ -286,6 +286,9 @@ public:
// CPU supports misaligned vectors store/load.
static const bool misaligned_vectors_ok();
// Should original key array reference be passed to AES stubs
static const bool pass_original_key_for_aes();
// Used to determine a "low complexity" 64-bit constant. (Zero is simple.)
// The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
// Depends on the details of 64-bit constant generation on the CPU.
......
......@@ -814,12 +814,18 @@ const TypeFunc* OptoRuntime::array_fill_Type() {
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
// create input type (domain)
int num_args = 3;
if (Matcher::pass_original_key_for_aes()) {
num_args = 4;
}
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypePtr::NOTNULL; // dest
fields[argp++] = TypePtr::NOTNULL; // k array
if (Matcher::pass_original_key_for_aes()) {
fields[argp++] = TypePtr::NOTNULL; // original k array
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
......@@ -856,6 +862,9 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
int num_args = 5;
if (Matcher::pass_original_key_for_aes()) {
num_args = 6;
}
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
......@@ -864,13 +873,16 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
fields[argp++] = TypePtr::NOTNULL; // k array
fields[argp++] = TypePtr::NOTNULL; // r array
fields[argp++] = TypeInt::INT; // src len
if (Matcher::pass_original_key_for_aes()) {
fields[argp++] = TypePtr::NOTNULL; // original k array
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
// returning cipher len (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
fields[TypeFunc::Parms+0] = TypeInt::INT;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
......
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -39,20 +39,32 @@ public class TestAESMain {
System.out.println(iters + " iterations");
TestAESEncode etest = new TestAESEncode();
etest.prepare();
// warm-up for 20K iterations
System.out.println("Starting encryption warm-up");
for (int i=0; i<20000; i++) {
etest.run();
}
System.out.println("Finished encryption warm-up");
long start = System.nanoTime();
for (int i=0; i<iters; i++) {
etest.run();
}
long end = System.nanoTime();
System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000.0) + " ms");
TestAESDecode dtest = new TestAESDecode();
dtest.prepare();
// warm-up for 20K iterations
System.out.println("Starting decryption warm-up");
for (int i=0; i<20000; i++) {
dtest.run();
}
System.out.println("Finished decryption warm-up");
start = System.nanoTime();
for (int i=0; i<iters; i++) {
dtest.run();
}
end = System.nanoTime();
System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000.0) + " ms");
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册