提交 67e07902 编写于 作者: M mdoerr

8188868: PPC64: Support AES intrinsics on Big Endian

Reviewed-by: goetz
上级 7970ee20
...@@ -2100,6 +2100,7 @@ class Assembler : public AbstractAssembler { ...@@ -2100,6 +2100,7 @@ class Assembler : public AbstractAssembler {
// Endianess specific concatenation of 2 loaded vectors. // Endianess specific concatenation of 2 loaded vectors.
inline void load_perm(VectorRegister perm, Register addr); inline void load_perm(VectorRegister perm, Register addr);
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm); inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
// RegisterOrConstant versions. // RegisterOrConstant versions.
// These emitters choose between the versions using two registers and // These emitters choose between the versions using two registers and
......
...@@ -904,6 +904,14 @@ inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second ...@@ -904,6 +904,14 @@ inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second
#endif #endif
} }
inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(dest, second, first, perm);
#else
vperm(dest, first, second, perm);
#endif
}
inline void Assembler::load_const(Register d, void* x, Register tmp) { inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp); load_const(d, (long)x, tmp);
} }
......
...@@ -2224,7 +2224,7 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2224,7 +2224,7 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
// Arguments for generated stub (little endian only): // Arguments for generated stub:
// R3_ARG1 - source byte array address // R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address // R4_ARG2 - destination byte array address
// R5_ARG3 - round key array // R5_ARG3 - round key array
...@@ -2243,7 +2243,6 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2243,7 +2243,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8; Register keylen = R8;
Register temp = R9; Register temp = R9;
Register keypos = R10; Register keypos = R10;
Register hex = R11;
Register fifteen = R12; Register fifteen = R12;
VectorRegister vRet = VR0; VectorRegister vRet = VR0;
...@@ -2263,164 +2262,170 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2263,164 +2262,170 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR11; VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12; VectorRegister vTmp4 = VR12;
VectorRegister vLow = VR13;
VectorRegister vHigh = VR14;
__ li (hex, 16);
__ li (fifteen, 15); __ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet // load unaligned from[0-15] to vsRet
__ lvx (vRet, from); __ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from); __ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from); __ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt); __ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm); __ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60) // load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys // to load keys
__ lvsr (keyPerm, key); __ load_perm (keyPerm, key);
__ vxor (vTmp2, vTmp2, vTmp2); #ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -16); __ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2); __ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2); __ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8); __ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
// load the 1st round key to vKey1 // load the 1st round key to vTmp1
__ li (keypos, 0); __ lvx (vTmp1, key);
__ li (keypos, 16);
__ lvx (vKey1, keypos, key); __ lvx (vKey1, keypos, key);
__ addi (keypos, keypos, 16); __ vec_perm (vTmp1, vKey1, keyPerm);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round // 1st round
__ vxor (vRet, vRet, vKey1); __ vxor (vRet, vRet, vTmp1);
// load the 2nd round key to vKey1 // load the 2nd round key to vKey1
__ addi (keypos, keypos, 16); __ li (keypos, 32);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vKey2, keyPerm);
// load the 3rd round key to vKey2 // load the 3rd round key to vKey2
__ addi (keypos, keypos, 16); __ li (keypos, 48);
__ lvx (vTmp1, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vKey3, keyPerm);
// load the 4th round key to vKey3 // load the 4th round key to vKey3
__ addi (keypos, keypos, 16); __ li (keypos, 64);
__ lvx (vTmp2, keypos, key); __ lvx (vKey4, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey3, vKey4, keyPerm);
// load the 5th round key to vKey4 // load the 5th round key to vKey4
__ addi (keypos, keypos, 16); __ li (keypos, 80);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey4, vTmp1, keyPerm);
// 2nd - 5th rounds // 2nd - 5th rounds
__ vcipher (vRet, vRet, vKey1); __ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2); __ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3); __ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4); __ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1 // load the 6th round key to vKey1
__ addi (keypos, keypos, 16); __ li (keypos, 96);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 7th round key to vKey2 // load the 7th round key to vKey2
__ addi (keypos, keypos, 16); __ li (keypos, 112);
__ lvx (vTmp1, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vKey3, keyPerm);
// load the 8th round key to vKey3 // load the 8th round key to vKey3
__ addi (keypos, keypos, 16); __ li (keypos, 128);
__ lvx (vTmp2, keypos, key); __ lvx (vKey4, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey3, vKey4, keyPerm);
// load the 9th round key to vKey4 // load the 9th round key to vKey4
__ addi (keypos, keypos, 16); __ li (keypos, 144);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey4, vTmp1, keyPerm);
// 6th - 9th rounds // 6th - 9th rounds
__ vcipher (vRet, vRet, vKey1); __ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2); __ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3); __ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4); __ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1 // load the 10th round key to vKey1
__ addi (keypos, keypos, 16); __ li (keypos, 160);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 11th round key to vKey2 // load the 11th round key to vKey2
__ addi (keypos, keypos, 16); __ li (keypos, 176);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 4 rounds // if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44); __ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_doLast); __ beq (CCR0, L_doLast);
// 10th - 11th rounds // 10th - 11th rounds
__ vcipher (vRet, vRet, vKey1); __ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2); __ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1 // load the 12th round key to vKey1
__ addi (keypos, keypos, 16); __ li (keypos, 192);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 13th round key to vKey2 // load the 13th round key to vKey2
__ addi (keypos, keypos, 16); __ li (keypos, 208);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 2 rounds // if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52); __ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_doLast); __ beq (CCR0, L_doLast);
// 12th - 13th rounds // 12th - 13th rounds
__ vcipher (vRet, vRet, vKey1); __ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2); __ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1 // load the 14th round key to vKey1
__ addi (keypos, keypos, 16); __ li (keypos, 224);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 15th round key to vKey2 // load the 15th round key to vKey2
__ addi (keypos, keypos, 16); __ li (keypos, 240);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vTmp1, keyPerm);
__ bind(L_doLast); __ bind(L_doLast);
// last two rounds // last two rounds
__ vcipher (vRet, vRet, vKey1); __ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2); __ vcipherlast (vRet, vRet, vKey2);
__ neg (temp, to); // store result (unaligned)
__ lvsr (toPerm, temp); #ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -1); __ lvsl (toPerm, to);
__ vxor (vTmp1, vTmp1, vTmp1); #else
__ vperm (vTmp2, vTmp2, vTmp1, toPerm); __ lvsr (toPerm, to);
__ vxor (toPerm, toPerm, fSplt); #endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to); __ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm); __ lvx (vTmp2, fifteen, to);
__ vsel (vTmp1, vTmp1, vRet, vTmp2); #ifdef VM_LITTLE_ENDIAN
__ lvx (vTmp4, fifteen, to); __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to); __ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr(); __ blr();
return start; return start;
} }
// Arguments for generated stub (little endian only): // Arguments for generated stub:
// R3_ARG1 - source byte array address // R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address // R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array // R5_ARG3 - K (key) in little endian int array
...@@ -2442,7 +2447,6 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2442,7 +2447,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8; Register keylen = R8;
Register temp = R9; Register temp = R9;
Register keypos = R10; Register keypos = R10;
Register hex = R11;
Register fifteen = R12; Register fifteen = R12;
VectorRegister vRet = VR0; VectorRegister vRet = VR0;
...@@ -2463,30 +2467,30 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2463,30 +2467,30 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR12; VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13; VectorRegister vTmp4 = VR13;
VectorRegister vLow = VR14;
VectorRegister vHigh = VR15;
__ li (hex, 16);
__ li (fifteen, 15); __ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet // load unaligned from[0-15] to vsRet
__ lvx (vRet, from); __ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from); __ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from); __ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt); __ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE] __ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60) // load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys // to load keys
__ lvsr (keyPerm, key); __ load_perm (keyPerm, key);
#ifdef VM_LITTLE_ENDIAN
__ vxor (vTmp2, vTmp2, vTmp2); __ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16); __ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2); __ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2); __ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8); __ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
__ cmpwi (CCR0, keylen, 44); __ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44); __ beq (CCR0, L_do44);
...@@ -2494,32 +2498,32 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2494,32 +2498,32 @@ class StubGenerator: public StubCodeGenerator {
__ cmpwi (CCR0, keylen, 52); __ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52); __ beq (CCR0, L_do52);
// load the 15th round key to vKey11 // load the 15th round key to vKey1
__ li (keypos, 240); __ li (keypos, 240);
__ lvx (vTmp1, keypos, key); __ lvx (vKey1, keypos, key);
__ addi (keypos, keypos, -16); __ li (keypos, 224);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 14th round key to vKey10 // load the 14th round key to vKey2
__ addi (keypos, keypos, -16); __ li (keypos, 208);
__ lvx (vTmp1, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 13th round key to vKey10 // load the 13th round key to vKey3
__ addi (keypos, keypos, -16); __ li (keypos, 192);
__ lvx (vTmp2, keypos, key); __ lvx (vKey4, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 12th round key to vKey10 // load the 12th round key to vKey4
__ addi (keypos, keypos, -16); __ li (keypos, 176);
__ lvx (vTmp1, keypos, key); __ lvx (vKey5, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 11th round key to vKey10 // load the 11th round key to vKey5
__ addi (keypos, keypos, -16); __ li (keypos, 160);
__ lvx (vTmp2, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// 1st - 5th rounds // 1st - 5th rounds
__ vxor (vRet, vRet, vKey1); __ vxor (vRet, vRet, vKey1);
...@@ -2532,22 +2536,22 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2532,22 +2536,22 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do52); __ bind (L_do52);
// load the 13th round key to vKey11 // load the 13th round key to vKey1
__ li (keypos, 208); __ li (keypos, 208);
__ lvx (vTmp1, keypos, key); __ lvx (vKey1, keypos, key);
__ addi (keypos, keypos, -16); __ li (keypos, 192);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 12th round key to vKey10 // load the 12th round key to vKey2
__ addi (keypos, keypos, -16); __ li (keypos, 176);
__ lvx (vTmp1, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 11th round key to vKey10 // load the 11th round key to vKey3
__ addi (keypos, keypos, -16); __ li (keypos, 160);
__ lvx (vTmp2, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
// 1st - 3rd rounds // 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1); __ vxor (vRet, vRet, vKey1);
...@@ -2558,42 +2562,42 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2558,42 +2562,42 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do44); __ bind (L_do44);
// load the 11th round key to vKey11 // load the 11th round key to vKey1
__ li (keypos, 176); __ li (keypos, 176);
__ lvx (vKey1, keypos, key);
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16); __ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// 1st round // 1st round
__ vxor (vRet, vRet, vKey1); __ vxor (vRet, vRet, vKey1);
__ bind (L_doLast); __ bind (L_doLast);
// load the 10th round key to vKey10 // load the 10th round key to vKey1
__ addi (keypos, keypos, -16); __ li (keypos, 144);
__ lvx (vTmp1, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 9th round key to vKey10 // load the 9th round key to vKey2
__ addi (keypos, keypos, -16); __ li (keypos, 128);
__ lvx (vTmp2, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 8th round key to vKey10 // load the 8th round key to vKey3
__ addi (keypos, keypos, -16); __ li (keypos, 112);
__ lvx (vTmp1, keypos, key); __ lvx (vKey4, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 7th round key to vKey10 // load the 7th round key to vKey4
__ addi (keypos, keypos, -16); __ li (keypos, 96);
__ lvx (vTmp2, keypos, key); __ lvx (vKey5, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 6th round key to vKey10 // load the 6th round key to vKey5
__ addi (keypos, keypos, -16); __ li (keypos, 80);
__ lvx (vTmp1, keypos, key); __ lvx (vTmp1, keypos, key);
__ vperm (vKey5, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 10th - 6th rounds // last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1); __ vncipher (vRet, vRet, vKey1);
...@@ -2602,30 +2606,29 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2602,30 +2606,29 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4); __ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5); __ vncipher (vRet, vRet, vKey5);
// load the 5th round key to vKey10 // load the 5th round key to vKey1
__ addi (keypos, keypos, -16); __ li (keypos, 64);
__ lvx (vTmp2, keypos, key); __ lvx (vKey2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 4th round key to vKey10 // load the 4th round key to vKey2
__ addi (keypos, keypos, -16); __ li (keypos, 48);
__ lvx (vTmp1, keypos, key); __ lvx (vKey3, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 3rd round key to vKey10 // load the 3rd round key to vKey3
__ addi (keypos, keypos, -16); __ li (keypos, 32);
__ lvx (vTmp2, keypos, key); __ lvx (vKey4, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm); __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 2nd round key to vKey10 // load the 2nd round key to vKey4
__ addi (keypos, keypos, -16); __ li (keypos, 16);
__ lvx (vTmp1, keypos, key); __ lvx (vKey5, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm); __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 1st round key to vKey10 // load the 1st round key to vKey5
__ addi (keypos, keypos, -16); __ lvx (vTmp1, key);
__ lvx (vTmp2, keypos, key); __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
// last 5th - 1th rounds // last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1); __ vncipher (vRet, vRet, vKey1);
...@@ -2634,19 +2637,27 @@ class StubGenerator: public StubCodeGenerator { ...@@ -2634,19 +2637,27 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4); __ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5); __ vncipherlast (vRet, vRet, vKey5);
__ neg (temp, to); // store result (unaligned)
__ lvsr (toPerm, temp); #ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -1); __ lvsl (toPerm, to);
__ vxor (vTmp1, vTmp1, vTmp1); #else
__ vperm (vTmp2, vTmp2, vTmp1, toPerm); __ lvsr (toPerm, to);
__ vxor (toPerm, toPerm, fSplt); #endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to); __ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm); __ lvx (vTmp2, fifteen, to);
__ vsel (vTmp1, vTmp1, vRet, vTmp2); #ifdef VM_LITTLE_ENDIAN
__ lvx (vTmp4, fifteen, to); __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to); __ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr(); __ blr();
return start; return start;
......
...@@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_ ...@@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants { enum platform_dependent_constants {
code_size1 = 20000, // simply increase if too small (assembler will crash if too small) code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 22000 // simply increase if too small (assembler will crash if too small) code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
}; };
// CRC32 Intrinsics. // CRC32 Intrinsics.
......
...@@ -174,7 +174,6 @@ void VM_Version::initialize() { ...@@ -174,7 +174,6 @@ void VM_Version::initialize() {
} }
// The AES intrinsic stubs require AES instruction support. // The AES intrinsic stubs require AES instruction support.
#if defined(VM_LITTLE_ENDIAN)
if (has_vcipher()) { if (has_vcipher()) {
if (FLAG_IS_DEFAULT(UseAES)) { if (FLAG_IS_DEFAULT(UseAES)) {
UseAES = true; UseAES = true;
...@@ -195,18 +194,6 @@ void VM_Version::initialize() { ...@@ -195,18 +194,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false); FLAG_SET_DEFAULT(UseAESIntrinsics, false);
} }
#else
if (UseAES) {
warning("AES instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#endif
if (has_vshasig()) { if (has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA)) { if (FLAG_IS_DEFAULT(UseSHA)) {
UseSHA = true; UseSHA = true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册