diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp index bbc52691b9cf4ce624c5f7f3a08d3db06b031124..3c59c96ca2452063467017dbf3784352a05d40fa 100644 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -194,6 +194,11 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + if (has_vshasig()) { if (FLAG_IS_DEFAULT(UseSHA)) { UseSHA = true; diff --git a/src/cpu/sparc/vm/assembler_sparc.hpp b/src/cpu/sparc/vm/assembler_sparc.hpp index dd83b092f75da5b2fc5ea031512445bc7b48b7c4..55f338754dd3fdbf800acf5a57ae616a89193847 100644 --- a/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/src/cpu/sparc/vm/assembler_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -129,6 +129,7 @@ class Assembler : public AbstractAssembler { flog3_op3 = 0x36, edge_op3 = 0x36, fsrc_op3 = 0x36, + xmulx_op3 = 0x36, impdep2_op3 = 0x37, stpartialf_op3 = 0x37, jmpl_op3 = 0x38, @@ -220,6 +221,8 @@ class Assembler : public AbstractAssembler { mdtox_opf = 0x110, mstouw_opf = 0x111, mstosw_opf = 0x113, + xmulx_opf = 0x115, + xmulxhi_opf = 0x116, mxtod_opf = 0x118, mwtos_opf = 0x119, @@ -1212,6 +1215,9 @@ public: void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); } void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); } + void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); } + void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); } + // Crypto SHA instructions void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); } diff --git a/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/src/cpu/sparc/vm/stubGenerator_sparc.cpp index 6ad0b1a41668368c0f893b1d833f09ce76d8f8cd..51064927eb01477d1b0a9fabc5e1034e510458ab 100644 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -4788,6 +4788,130 @@ class StubGenerator: public StubCodeGenerator { return start; } + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_aligned, L_main; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + Register state = I0; + Register subkeyH = I1; + Register data = I2; + Register len = I3; + + __ save_frame(0); + + __ ldx(state, 0, O0); + __ ldx(state, 8, O1); + + // Loop label for multiblock operations + __ BIND(L_ghash_loop); + + // Check if 'data' is unaligned + __ andcc(data, 7, G1); + __ br(Assembler::zero, false, Assembler::pt, L_aligned); + __ delayed()->nop(); + + Register left_shift = L1; + Register right_shift = L2; + Register data_ptr = L3; + + // Get left and right shift values in bits + __ sll(G1, LogBitsPerByte, left_shift); + __ mov(64, right_shift); + __ sub(right_shift, left_shift, right_shift); + + // Align to read 'data' + __ sub(data, G1, data_ptr); + + // Load first 8 bytes of 'data' + __ ldx(data_ptr, 0, O4); + __ sllx(O4, left_shift, O4); + __ ldx(data_ptr, 8, O5); + __ srlx(O5, right_shift, G4); + __ bset(G4, O4); + + // Load second 8 bytes of 'data' + __ sllx(O5, left_shift, O5); + __ ldx(data_ptr, 16, G4); + __ srlx(G4, right_shift, G4); + __ ba(L_main); + __ delayed()->bset(G4, O5); + + // If 'data' is aligned, load normally + __ BIND(L_aligned); + __ ldx(data, 0, O4); + __ ldx(data, 8, O5); + + __ BIND(L_main); + __ ldx(subkeyH, 0, O2); + __ ldx(subkeyH, 8, O3); + + __ xor3(O0, O4, O0); + __ xor3(O1, O5, O1); + + __ xmulxhi(O0, O3, G3); + __ xmulx(O0, O2, O5); + __ xmulxhi(O1, O2, G4); + __ xmulxhi(O1, O3, G5); + __ xmulx(O0, O3, G1); + __ xmulx(O1, O3, G2); + __ xmulx(O1, O2, O3); + __ xmulxhi(O0, O2, O4); + + __ mov(0xE1, O0); + __ sllx(O0, 56, O0); + + __ xor3(O5, G3, O5); + __ xor3(O5, G4, O5); + __ xor3(G5, G1, G1); + __ xor3(G1, O3, G1); + __ srlx(G2, 63, O1); + __ srlx(G1, 63, G3); + __ sllx(G2, 63, O3); + __ sllx(G2, 58, O2); + __ xor3(O3, O2, O2); + + __ sllx(G1, 1, G1); + __ or3(G1, O1, G1); + + __ xor3(G1, O2, G1); + + __ sllx(G2, 1, G2); + + __ xmulxhi(G1, O0, O1); + __ xmulx(G1, O0, O2); + __ xmulxhi(G2, O0, O3); + __ xmulx(G2, O0, G1); + + __ xor3(O4, O1, O4); + __ xor3(O5, O2, O5); + __ xor3(O5, O3, O5); + + __ sllx(O4, 1, O2); + __ srlx(O5, 63, O3); + + __ or3(O2, O3, O0); + + __ sllx(O5, 1, O1); + __ srlx(G1, 63, O2); + __ or3(O1, O2, O1); + __ xor3(O1, G3, O1); + + __ deccc(len); + __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); + __ delayed()->add(data, 16, data); + + __ stx(O0, I0, 0); + __ stx(O1, I0, 8); + + __ ret(); + __ delayed()->restore(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -4860,6 +4984,10 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + // generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } // generate SHA1/SHA256/SHA512 intrinsics code if (UseSHA1Intrinsics) { diff --git a/src/cpu/sparc/vm/vm_version_sparc.cpp b/src/cpu/sparc/vm/vm_version_sparc.cpp index c0cd16a1870cee9285cdd5cc4af3fe0a50e34642..793dc184a8234e0f6aca55e37aef73cc0b40a990 100644 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -319,6 +319,17 @@ void VM_Version::initialize() { } } + // GHASH/GCM intrinsics + if (has_vis3() && (UseVIS > 2)) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; + } + } else if (UseGHASHIntrinsics) { + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) + warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times if (has_sha1() || has_sha256() || has_sha512()) { if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp index 7cbc47d6053063d63493ea3de87399c8c458f7c9..1759ecdfd2b9bc2d7f352c05438228f6a1f93621 100644 --- a/src/cpu/x86/vm/assembler_x86.cpp +++ b/src/cpu/x86/vm/assembler_x86.cpp @@ -2575,6 +2575,15 @@ void Assembler::psrldq(XMMRegister dst, int shift) { emit_int8(shift); } +void Assembler::pslldq(XMMRegister dst, int shift) { + // Shift left 128 bit value in xmm register by number of bytes. + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift); +} + void Assembler::ptest(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp index 341d9e39bd65709bcca77e191c7e4c36e824bc56..5ea01311ed827a88d5cdd6ecea7ded2539bf7683 100644 --- a/src/cpu/x86/vm/assembler_x86.hpp +++ b/src/cpu/x86/vm/assembler_x86.hpp @@ -1527,6 +1527,8 @@ private: // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift); + // Shift Left by bytes Logical DoubleQuadword Immediate + void pslldq(XMMRegister dst, int shift); // Logical Compare 128bit void ptest(XMMRegister dst, XMMRegister src); diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp index 50a06d7a5b594915344ebdaff6a992662b49b720..2e55998074d54142f85720e7c4ebbfd0379195ef 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -2719,6 +2719,169 @@ class StubGenerator: public StubCodeGenerator { return start; } + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc(); + __ emit_data(0x0b0a0908, relocInfo::none, 0); + __ emit_data(0x0f0e0d0c, relocInfo::none, 0); + __ emit_data(0x03020100, relocInfo::none, 0); + __ emit_data(0x07060504, relocInfo::none, 0); + + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc(); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); + __ emit_data(0x08090a0b, relocInfo::none, 0); + __ emit_data(0x04050607, relocInfo::none, 0); + __ emit_data(0x00010203, relocInfo::none, 0); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + const Register state = rdi; + const Register subkeyH = rsi; + const Register data = rdx; + const Register blocks = rcx; + + const Address state_param(rbp, 8+0); + const Address subkeyH_param(rbp, 8+4); + const Address data_param(rbp, 8+8); + const Address blocks_param(rbp, 8+12); + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + + __ enter(); + handleSOERegisters(true); // Save registers + + __ movptr(state, state_param); + __ movptr(subkeyH, subkeyH_param); + __ movptr(data, data_param); + __ movptr(blocks, blocks_param); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp6); + __ pslld (xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp4, 31); + __ movdqu(xmm_temp5, xmm_temp7); + __ pslldq(xmm_temp4, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp5, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp4); + __ por(xmm_temp6, xmm_temp5); + + // + // First phase of the reduction + // + // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp5); + __ movdqu(xmm_temp4, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp4, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp7, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp4); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + // Byte swap 16-byte result + __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + __ movdqu(Address(state, 0), xmm_temp6); // store the result + + handleSOERegisters(false); // restore registers + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * @@ -3018,6 +3181,13 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); } + // Generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 1d38af79974082248eb3425c0ccc6bd695610235..c5811b28b094199b55c15f902974ce3e5dd194c3 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -3639,6 +3639,175 @@ class StubGenerator: public StubCodeGenerator { return start; } + + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc(); + __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); + __ emit_data64(0x0706050403020100, relocInfo::none ); + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc(); + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); + __ emit_data64(0x0001020304050607, relocInfo::none ); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + const Register state = c_rarg0; + const Register subkeyH = c_rarg1; + const Register data = c_rarg2; + const Register blocks = c_rarg3; + +#ifdef _WIN64 + const int XMM_REG_LAST = 10; +#endif + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + const XMMRegister xmm_temp8 = xmm8; + const XMMRegister xmm_temp9 = xmm9; + const XMMRegister xmm_temp10 = xmm10; + + __ enter(); + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-10 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + + __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, xmm_temp10); + + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, xmm_temp10); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp6); + __ pslld(xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp8, 31); + __ movdqu(xmm_temp9, xmm_temp7); + __ pslldq(xmm_temp8, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp9, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp8); + __ por(xmm_temp6, xmm_temp9); + + // + // First phase of the reduction + // + // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp3); + __ movdqu(xmm_temp9, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp8, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp9, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp9); + __ movdqu(xmm_temp8, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp8, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp4, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp8); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result + __ movdqu(Address(state, 0), xmm_temp6); // store the result + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + for (int i = 6; i <= XMM_REG_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * @@ -4077,6 +4246,13 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + // Generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, diff --git a/src/cpu/x86/vm/stubRoutines_x86.cpp b/src/cpu/x86/vm/stubRoutines_x86.cpp index 200f2aff80d3cf652d65984b1e48a12277e4f9ba..9b0d8fc756f9f050b6d23e55eb28a882c08e435d 100644 --- a/src/cpu/x86/vm/stubRoutines_x86.cpp +++ b/src/cpu/x86/vm/stubRoutines_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,8 @@ address StubRoutines::x86::_verify_mxcsr_entry = NULL; address StubRoutines::x86::_key_shuffle_mask_addr = NULL; +address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; +address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; uint64_t StubRoutines::x86::_crc_by128_masks[] = { diff --git a/src/cpu/x86/vm/stubRoutines_x86.hpp b/src/cpu/x86/vm/stubRoutines_x86.hpp index d8e52ab3b118a4262a60c84e546a1da6d3eb88d7..bb160486cd1cdce62e06c3dea10994ccc880bf1b 100644 --- a/src/cpu/x86/vm/stubRoutines_x86.hpp +++ b/src/cpu/x86/vm/stubRoutines_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,10 +36,15 @@ // masks and table for CRC32 static uint64_t _crc_by128_masks[]; static juint _crc_table[]; + // swap mask for ghash + static address _ghash_long_swap_mask_addr; + static address _ghash_byte_swap_mask_addr; public: static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } + static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } + static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp index fd0a68d10af047c5144fa56db6f9b21cd95c513e..1f5ae757a3d4423387f78b02b236585547c3b1f8 100644 --- a/src/cpu/x86/vm/vm_version_x86.cpp +++ b/src/cpu/x86/vm/vm_version_x86.cpp @@ -594,6 +594,17 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + // GHASH/GCM intrinsics + if (UseCLMUL && (UseSSE > 2)) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; + } + } else if (UseGHASHIntrinsics) { + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) + warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + if (UseSHA) { warning("SHA instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseSHA, false); diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp index 7f0e8207194304d6ce659b8aec9819d59b275382..46f2e2d8bc84393c894917ba02ee5572f00228c7 100644 --- a/src/share/vm/classfile/vmSymbols.hpp +++ b/src/share/vm/classfile/vmSymbols.hpp @@ -863,6 +863,12 @@ do_name( implCompressMB_name, "implCompressMultiBlock0") \ do_signature(implCompressMB_signature, "([BII)I") \ \ + /* support for com.sun.crypto.provider.GHASH */ \ + do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \ + do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \ + do_name(processBlocks_name, "processBlocks") \ + do_signature(ghash_processBlocks_signature, "([BII[J[J)V") \ + \ /* support for java.util.zip */ \ do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \ do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \ diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index aa6bbc81a44b0b5c0736d272eab618da1c26b093..4100b83ef3ef9b3d9448280f331f291819ae44e5 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -2520,6 +2520,12 @@ void G1CollectedHeap::collect(GCCause::Cause cause) { } } } + } else if (GC_locker::should_discard(cause, gc_count_before)) { + // Return to be consistent with VMOp failure due to another + // collection slipping in after our gc_count but before our + // request is processed. _gc_locker collections upgraded by + // GCLockerInvokesConcurrent are handled above and never discarded. + return; } else { if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) { diff --git a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp index f407f20b7cc5becbe9b4f40054eb4b4662e20f70..e13fefa2c374236ccd760b01630a8a5521b1c9d0 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp @@ -530,6 +530,10 @@ void ParallelScavengeHeap::collect(GCCause::Cause cause) { full_gc_count = Universe::heap()->total_full_collections(); } + if (GC_locker::should_discard(cause, gc_count)) { + return; + } + VM_ParallelGCSystemGC op(gc_count, full_gc_count, cause); VMThread::execute(&op); } diff --git a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp index 9148bb4ffc05b414e42aff62acef75eb16f753f2..2d554b3975d899ed2402c246aa19eff4bcd8eeda 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp @@ -52,11 +52,16 @@ void VM_ParallelGCFailedAllocation::doit() { } } +static bool is_cause_full(GCCause::Cause cause) { + return (cause != GCCause::_gc_locker) && (cause != GCCause::_wb_young_gc) + DEBUG_ONLY(&& (cause != GCCause::_scavenge_alot)); +} + // Only used for System.gc() calls VM_ParallelGCSystemGC::VM_ParallelGCSystemGC(uint gc_count, uint full_gc_count, GCCause::Cause gc_cause) : - VM_GC_Operation(gc_count, gc_cause, full_gc_count, true /* full */) + VM_GC_Operation(gc_count, gc_cause, full_gc_count, is_cause_full(gc_cause)) { } @@ -68,8 +73,7 @@ void VM_ParallelGCSystemGC::doit() { "must be a ParallelScavengeHeap"); GCCauseSetter gccs(heap, _gc_cause); - if (_gc_cause == GCCause::_gc_locker || _gc_cause == GCCause::_wb_young_gc - DEBUG_ONLY(|| _gc_cause == GCCause::_scavenge_alot)) { + if (!_full) { // If (and only if) the scavenge fails, this will invoke a full gc. heap->invoke_scavenge(); } else { diff --git a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp index 972099b9cbe7fc4eb3352ae25cd8007f92ee186b..d60f751af9ce0e1315f01e08839309ad761e534c 100644 --- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp @@ -201,6 +201,19 @@ void VM_GenCollectForAllocation::doit() { } } +static bool is_full_gc(int max_level) { + // Return true if max_level is all generations + return (max_level == (GenCollectedHeap::heap()->n_gens() - 1)); +} + +VM_GenCollectFull::VM_GenCollectFull(uint gc_count_before, + uint full_gc_count_before, + GCCause::Cause gc_cause, + int max_level) : + VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, + is_full_gc(max_level) /* full */), + _max_level(max_level) { } + void VM_GenCollectFull::doit() { SvcGCMarker sgcm(SvcGCMarker::FULL); diff --git a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp index b8027a97282f6e9e79e7a4bdeaca82bf079e1c99..cb070bd730eb5e3f8331f4a719f05bff839fbe08 100644 --- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp @@ -201,9 +201,7 @@ class VM_GenCollectFull: public VM_GC_Operation { VM_GenCollectFull(uint gc_count_before, uint full_gc_count_before, GCCause::Cause gc_cause, - int max_level) - : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true /* full */), - _max_level(max_level) { } + int max_level); ~VM_GenCollectFull() {} virtual VMOp_Type type() const { return VMOp_GenCollectFull; } virtual void doit(); diff --git a/src/share/vm/memory/gcLocker.cpp b/src/share/vm/memory/gcLocker.cpp index 9674263793987321337ed76aa18f85bee7d754a2..df8914e50c914dae515321348849e1fdaf4ceede 100644 --- a/src/share/vm/memory/gcLocker.cpp +++ b/src/share/vm/memory/gcLocker.cpp @@ -31,6 +31,7 @@ volatile jint GC_locker::_jni_lock_count = 0; volatile bool GC_locker::_needs_gc = false; volatile bool GC_locker::_doing_gc = false; +unsigned int GC_locker::_total_collections = 0; #ifdef ASSERT volatile jint GC_locker::_debug_jni_lock_count = 0; @@ -94,6 +95,11 @@ void GC_locker::stall_until_clear() { } } +bool GC_locker::should_discard(GCCause::Cause cause, uint total_collections) { + return (cause == GCCause::_gc_locker) && + (_total_collections != total_collections); +} + void GC_locker::jni_lock(JavaThread* thread) { assert(!thread->in_critical(), "shouldn't currently be in a critical region"); MutexLocker mu(JNICritical_lock); @@ -117,7 +123,13 @@ void GC_locker::jni_unlock(JavaThread* thread) { decrement_debug_jni_lock_count(); thread->exit_critical(); if (needs_gc() && !is_active_internal()) { - // We're the last thread out. Cause a GC to occur. + // We're the last thread out. Request a GC. + // Capture the current total collections, to allow detection of + // other collections that make this one unnecessary. The value of + // total_collections() is only changed at a safepoint, so there + // must not be a safepoint between the lock becoming inactive and + // getting the count, else there may be unnecessary GCLocker GCs. + _total_collections = Universe::heap()->total_collections(); _doing_gc = true; { // Must give up the lock while at a safepoint diff --git a/src/share/vm/memory/gcLocker.hpp b/src/share/vm/memory/gcLocker.hpp index f12aa6755525154004bea34b029f3c9448da88e8..068688e93a368cbf14c3da4d0eef59f22cc4a876 100644 --- a/src/share/vm/memory/gcLocker.hpp +++ b/src/share/vm/memory/gcLocker.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_MEMORY_GCLOCKER_HPP #include "gc_interface/collectedHeap.hpp" +#include "gc_interface/gcCause.hpp" #include "memory/genCollectedHeap.hpp" #include "memory/universe.hpp" #include "oops/oop.hpp" @@ -57,6 +58,7 @@ class GC_locker: public AllStatic { static volatile bool _needs_gc; // heap is filling, we need a GC // note: bool is typedef'd as jint static volatile bool _doing_gc; // unlock_critical() is doing a GC + static uint _total_collections; // value for _gc_locker collection #ifdef ASSERT // This lock count is updated for all operations and is used to @@ -116,6 +118,12 @@ class GC_locker: public AllStatic { // Sets _needs_gc if is_active() is true. Returns is_active(). static bool check_active_before_gc(); + // Return true if the designated collection is a GCLocker request + // that should be discarded. Returns true if cause == GCCause::_gc_locker + // and the given total collection value indicates a collection has been + // done since the GCLocker request was made. + static bool should_discard(GCCause::Cause cause, uint total_collections); + // Stalls the caller (who should not be in a jni critical section) // until needs_gc() clears. Note however that needs_gc() may be // set at a subsequent safepoint and/or cleared under the diff --git a/src/share/vm/memory/genCollectedHeap.cpp b/src/share/vm/memory/genCollectedHeap.cpp index ddf0a860ce9db2c907387aa2371f23f6afb0d32a..daa0a06bff35eb0f8332e11ab53c2c5a36922f2c 100644 --- a/src/share/vm/memory/genCollectedHeap.cpp +++ b/src/share/vm/memory/genCollectedHeap.cpp @@ -796,8 +796,11 @@ void GenCollectedHeap::collect(GCCause::Cause cause) { #else // INCLUDE_ALL_GCS ShouldNotReachHere(); #endif // INCLUDE_ALL_GCS - } else if (cause == GCCause::_wb_young_gc) { - // minor collection for WhiteBox API + } else if ((cause == GCCause::_wb_young_gc) || + (cause == GCCause::_gc_locker)) { + // minor collection for WhiteBox or GCLocker. + // _gc_locker collections upgraded by GCLockerInvokesConcurrent + // are handled above and never discarded. collect(cause, 0); } else { #ifdef ASSERT @@ -835,6 +838,11 @@ void GenCollectedHeap::collect_locked(GCCause::Cause cause, int max_level) { // Read the GC count while holding the Heap_lock unsigned int gc_count_before = total_collections(); unsigned int full_gc_count_before = total_full_collections(); + + if (GC_locker::should_discard(cause, gc_count_before)) { + return; + } + { MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back VM_GenCollectFull op(gc_count_before, full_gc_count_before, @@ -887,24 +895,16 @@ void GenCollectedHeap::do_full_collection(bool clear_all_soft_refs) { void GenCollectedHeap::do_full_collection(bool clear_all_soft_refs, int max_level) { - int local_max_level; - if (!incremental_collection_will_fail(false /* don't consult_young */) && - gc_cause() == GCCause::_gc_locker) { - local_max_level = 0; - } else { - local_max_level = max_level; - } do_collection(true /* full */, clear_all_soft_refs /* clear_all_soft_refs */, 0 /* size */, false /* is_tlab */, - local_max_level /* max_level */); + max_level /* max_level */); // Hack XXX FIX ME !!! // A scavenge may not have been attempted, or may have // been attempted and failed, because the old gen was too full - if (local_max_level == 0 && gc_cause() == GCCause::_gc_locker && - incremental_collection_will_fail(false /* don't consult_young */)) { + if (gc_cause() == GCCause::_gc_locker && incremental_collection_failed()) { if (PrintGCDetails) { gclog_or_tty->print_cr("GC locker: Trying a full collection " "because scavenge failed"); diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp index ac8217382da2e3c1473c1708ce8f906fb708fb0e..66e5c3365ab961900f4ee6302766f8daa6460479 100644 --- a/src/share/vm/opto/escape.cpp +++ b/src/share/vm/opto/escape.cpp @@ -952,6 +952,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 || + strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 || strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 || strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 || strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 || diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 4bb5ca886c94b969abef14bc7b1f2929b864b443..5c5fec586f3e9ca954947eb9c67091aa7a5a5c60 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -311,6 +311,7 @@ class LibraryCallKit : public GraphKit { Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); + bool inline_ghash_processBlocks(); bool inline_sha_implCompress(vmIntrinsics::ID id); bool inline_digestBase_implCompressMB(int predicate); bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA, @@ -570,6 +571,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { predicates = 3; break; + case vmIntrinsics::_ghash_processBlocks: + if (!UseGHASHIntrinsics) return NULL; + break; + case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: @@ -957,6 +962,9 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_montgomerySquare: return inline_montgomerySquare(); + case vmIntrinsics::_ghash_processBlocks: + return inline_ghash_processBlocks(); + case vmIntrinsics::_encodeISOArray: return inline_encodeISOArray(); @@ -6599,6 +6607,35 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt return _gvn.transform(region); } +//------------------------------inline_ghash_processBlocks +bool LibraryCallKit::inline_ghash_processBlocks() { + address stubAddr; + const char *stubName; + assert(UseGHASHIntrinsics, "need GHASH intrinsics support"); + + stubAddr = StubRoutines::ghash_processBlocks(); + stubName = "ghash_processBlocks"; + + Node* data = argument(0); + Node* offset = argument(1); + Node* len = argument(2); + Node* state = argument(3); + Node* subkeyH = argument(4); + + Node* state_start = array_element_address(state, intcon(0), T_LONG); + assert(state_start, "state is NULL"); + Node* subkeyH_start = array_element_address(subkeyH, intcon(0), T_LONG); + assert(subkeyH_start, "subkeyH is NULL"); + Node* data_start = array_element_address(data, offset, T_BYTE); + assert(data_start, "data is NULL"); + + Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::ghash_processBlocks_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + state_start, subkeyH_start, data_start, len); + return true; +} + //------------------------------inline_sha_implCompress----------------------- // // Calculate SHA (i.e., SHA-1) for single-block byte[] array. diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp index 57d2f5764899d82406a72e623c99ce44c7d73983..4562dbcd5f678eb4103f1a634c8e70cfbf749aee 100644 --- a/src/share/vm/opto/runtime.cpp +++ b/src/share/vm/opto/runtime.cpp @@ -92,7 +92,25 @@ // At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000 - +// GHASH block processing +const TypeFunc* OptoRuntime::ghash_processBlocks_Type() { + int argcnt = 4; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // state + fields[argp++] = TypePtr::NOTNULL; // subkeyH + fields[argp++] = TypePtr::NOTNULL; // data + fields[argp++] = TypeInt::INT; // blocks + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} // Compiled code entry points address OptoRuntime::_new_instance_Java = NULL; diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp index 99f03f408de46ed4ce9c76aa207c1ddf32a786a3..58c6bd5017805ce6f2a2454a229a586fb6e5968b 100644 --- a/src/share/vm/opto/runtime.hpp +++ b/src/share/vm/opto/runtime.hpp @@ -311,6 +311,8 @@ private: static const TypeFunc* montgomeryMultiply_Type(); static const TypeFunc* montgomerySquare_Type(); + static const TypeFunc* ghash_processBlocks_Type(); + static const TypeFunc* updateBytesCRC32_Type(); // leaf on stack replacement interpreter accessor types diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp index 421a3ba13b383db2d6d98834419688f6c224c41a..0fdf47ca90fc3152131165b5dd8caff4b2c1e072 100644 --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -602,6 +602,9 @@ class CommandLineFlags { product(bool, UseSHA, false, \ "Control whether SHA instructions can be used on SPARC") \ \ + product(bool, UseGHASHIntrinsics, false, \ + "Use intrinsics for GHASH versions of crypto") \ + \ product(uintx, LargePageSizeInBytes, 0, \ "Large page size (0 to let VM choose the page size)") \ \ diff --git a/src/share/vm/runtime/stubRoutines.cpp b/src/share/vm/runtime/stubRoutines.cpp index b2b3a90d9bf3b54bac5af5704acdb9b9e88acf8d..d943248da0f5d33c141b10b70c317e1bbed26219 100644 --- a/src/share/vm/runtime/stubRoutines.cpp +++ b/src/share/vm/runtime/stubRoutines.cpp @@ -124,6 +124,7 @@ address StubRoutines::_aescrypt_encryptBlock = NULL; address StubRoutines::_aescrypt_decryptBlock = NULL; address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL; address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; +address StubRoutines::_ghash_processBlocks = NULL; address StubRoutines::_sha1_implCompress = NULL; address StubRoutines::_sha1_implCompressMB = NULL; diff --git a/src/share/vm/runtime/stubRoutines.hpp b/src/share/vm/runtime/stubRoutines.hpp index 42808a4c63daa2a962e3f9c5de501620fce8cd62..0f6641c17a88bdf7103dfb228ac64f36b2c82184 100644 --- a/src/share/vm/runtime/stubRoutines.hpp +++ b/src/share/vm/runtime/stubRoutines.hpp @@ -197,6 +197,7 @@ class StubRoutines: AllStatic { static address _aescrypt_decryptBlock; static address _cipherBlockChaining_encryptAESCrypt; static address _cipherBlockChaining_decryptAESCrypt; + static address _ghash_processBlocks; static address _sha1_implCompress; static address _sha1_implCompressMB; @@ -359,6 +360,7 @@ class StubRoutines: AllStatic { static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; } static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; } static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } + static address ghash_processBlocks() { return _ghash_processBlocks; } static address sha1_implCompress() { return _sha1_implCompress; } static address sha1_implCompressMB() { return _sha1_implCompressMB; } diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp index 161a4c401db1bd430c89884818a446083bb3474c..7f6f84887790b7a4ab2cb668bd9f5b95bbd13b7e 100644 --- a/src/share/vm/runtime/vmStructs.cpp +++ b/src/share/vm/runtime/vmStructs.cpp @@ -810,6 +810,7 @@ typedef TwoOopHashtable SymbolTwoOopHashtable; static_field(StubRoutines, _aescrypt_decryptBlock, address) \ static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \ static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ + static_field(StubRoutines, _ghash_processBlocks, address) \ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ static_field(StubRoutines, _multiplyToLen, address) \ diff --git a/test/compiler/7184394/TestAESBase.java b/test/compiler/7184394/TestAESBase.java index 4d3204880bc4ec0db47249c15ece366c61d835bf..5c3e6881e33898ee27a0d9c15f358981d7aa15b8 100644 --- a/test/compiler/7184394/TestAESBase.java +++ b/test/compiler/7184394/TestAESBase.java @@ -29,6 +29,7 @@ import javax.crypto.Cipher; import javax.crypto.KeyGenerator; import javax.crypto.SecretKey; +import javax.crypto.spec.GCMParameterSpec; import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.SecretKeySpec; import java.security.AlgorithmParameters; @@ -62,8 +63,12 @@ abstract public class TestAESBase { Random random = new Random(0); Cipher cipher; Cipher dCipher; - AlgorithmParameters algParams; + AlgorithmParameters algParams = null; SecretKey key; + GCMParameterSpec gcm_spec; + byte[] aad = { 0x11, 0x22, 0x33, 0x44, 0x55 }; + int tlen = 12; + byte[] iv = new byte[16]; static int numThreads = 0; int threadId; @@ -77,7 +82,10 @@ abstract public class TestAESBase { public void prepare() { try { - System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize ); + System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize ); if (encInputOffset % ALIGN != 0 || encOutputOffset % ALIGN != 0 || decOutputOffset % ALIGN !=0 ) testingMisalignment = true; @@ -98,16 +106,24 @@ abstract public class TestAESBase { cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); + // CBC init if (mode.equals("CBC")) { - int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); - IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); + IvParameterSpec initVector = new IvParameterSpec(iv); cipher.init(Cipher.ENCRYPT_MODE, key, initVector); - } else { algParams = cipher.getParameters(); + dCipher.init(Cipher.DECRYPT_MODE, key, initVector); + + // GCM init + } else if (mode.equals("GCM")) { + gcm_init(true); + gcm_init(false); + + // ECB init + } else { cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + dCipher.init(Cipher.DECRYPT_MODE, key, algParams); } - algParams = cipher.getParameters(); - dCipher.init(Cipher.DECRYPT_MODE, key, algParams); + if (threadId == 0) { childShowCipher(); } @@ -188,4 +204,19 @@ abstract public class TestAESBase { } abstract void childShowCipher(); + + void gcm_init(boolean encrypt) throws Exception { + gcm_spec = new GCMParameterSpec(tlen * 8, iv); + if (encrypt) { + // Get a new instance everytime because of reuse IV restrictions + cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); + cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec); + cipher.updateAAD(aad); + } else { + dCipher.init(Cipher.DECRYPT_MODE, key, gcm_spec); + dCipher.updateAAD(aad); + + + } + } } diff --git a/test/compiler/7184394/TestAESDecode.java b/test/compiler/7184394/TestAESDecode.java index 21f1f55595a23b271e8d185929655a0750e68c67..e90ef767e7e9758f770b02a3c3699c2898031082 100644 --- a/test/compiler/7184394/TestAESDecode.java +++ b/test/compiler/7184394/TestAESDecode.java @@ -32,7 +32,11 @@ public class TestAESDecode extends TestAESBase { @Override public void run() { try { - if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams); + if (mode.equals("GCM")) { + gcm_init(false); + } else if (!noReinit) { + dCipher.init(Cipher.DECRYPT_MODE, key, algParams); + } decode = new byte[decodeLength]; if (testingMisalignment) { int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset); diff --git a/test/compiler/7184394/TestAESEncode.java b/test/compiler/7184394/TestAESEncode.java index f1a35bde0fbfb69e79336673c2669492c9a82118..cbfb81795c19e5c5548dbd4351ef3d147b76820c 100644 --- a/test/compiler/7184394/TestAESEncode.java +++ b/test/compiler/7184394/TestAESEncode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,11 @@ public class TestAESEncode extends TestAESBase { @Override public void run() { try { - if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + if (mode.equals("GCM")) { + gcm_init(true); + } else if (!noReinit) { + cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + } encode = new byte[encodeLength]; if (testingMisalignment) { int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset); diff --git a/test/compiler/7184394/TestAESMain.java b/test/compiler/7184394/TestAESMain.java index 20929e8ba684c13134fad84b08ebfe9a0a5a9e09..ddd8eeaefb7b850f1ffe1abf0dc3dbeec30f7086 100644 --- a/test/compiler/7184394/TestAESMain.java +++ b/test/compiler/7184394/TestAESMain.java @@ -41,6 +41,13 @@ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain * * @author Tom Deneau */ diff --git a/test/gc/stress/gclocker/TestExcessGCLockerCollections.java b/test/gc/stress/gclocker/TestExcessGCLockerCollections.java new file mode 100644 index 0000000000000000000000000000000000000000..7495d5b9edf63ae9d31b182d5143c265f2006647 --- /dev/null +++ b/test/gc/stress/gclocker/TestExcessGCLockerCollections.java @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package gc.stress.gclocker; + +// Based on Kim Barrett;s test for JDK-8048556 + +/* + * @test TestExcessGCLockerCollections + * @key gc + * @bug 8048556 + * @summary Check for GC Locker initiated GCs that immediately follow another + * GC and so have very little needing to be collected. + * @library /testlibrary + * @run driver/timeout=1000 gc.stress.gclocker.TestExcessGCLockerCollections 300 4 2 + */ + +import java.util.HashMap; +import java.util.Map; + +import java.util.zip.Deflater; + +import java.util.ArrayList; +import java.util.Arrays; + +import javax.management.MBeanServer; +import javax.management.Notification; +import javax.management.NotificationListener; +import javax.management.openmbean.CompositeData; +import java.lang.management.ManagementFactory; +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.MemoryUsage; +import java.util.List; +import com.sun.management.GarbageCollectionNotificationInfo; +import com.sun.management.GcInfo; + +import com.oracle.java.testlibrary.Asserts; +import com.oracle.java.testlibrary.ProcessTools; +import com.oracle.java.testlibrary.OutputAnalyzer; + +class TestExcessGCLockerCollectionsStringConstants { + // Some constant strings used in both GC logging and error detection + static public final String GCLOCKER_CAUSE = "GCLocker Initiated GC"; + static public final String USED_TOO_LOW = "TOO LOW"; + static public final String USED_OK = "OK"; +} + +class TestExcessGCLockerCollectionsAux { + static private final int LARGE_MAP_SIZE = 64 * 1024; + + static private final int MAP_ARRAY_LENGTH = 4; + static private final int MAP_SIZE = 1024; + + static private final int BYTE_ARRAY_LENGTH = 128 * 1024; + + static private void println(String str) { System.out.println(str); } + static private void println() { System.out.println(); } + + static private volatile boolean keepRunning = true; + + static Map populateMap(int size) { + Map map = new HashMap(); + for (int i = 0; i < size; i += 1) { + Integer keyInt = Integer.valueOf(i); + String valStr = "value is [" + i + "]"; + map.put(keyInt,valStr); + } + return map; + } + + static private class AllocatingWorker implements Runnable { + private final Object[] array = new Object[MAP_ARRAY_LENGTH]; + private int arrayIndex = 0; + + private void doStep() { + Map map = populateMap(MAP_SIZE); + array[arrayIndex] = map; + arrayIndex = (arrayIndex + 1) % MAP_ARRAY_LENGTH; + } + + public void run() { + while (keepRunning) { + doStep(); + } + } + } + + static private class JNICriticalWorker implements Runnable { + private int count; + + private void doStep() { + byte[] inputArray = new byte[BYTE_ARRAY_LENGTH]; + for (int i = 0; i < inputArray.length; i += 1) { + inputArray[i] = (byte) (count + i); + } + + Deflater deflater = new Deflater(); + deflater.setInput(inputArray); + deflater.finish(); + + byte[] outputArray = new byte[2 * inputArray.length]; + deflater.deflate(outputArray); + + count += 1; + } + + public void run() { + while (keepRunning) { + doStep(); + } + } + } + + static class GCNotificationListener implements NotificationListener { + static private final double MIN_USED_PERCENT = 40.0; + + static private final List newGenPoolNames = Arrays.asList( + "G1 Eden Space", // OpenJDK G1GC: -XX:+UseG1GC + "PS Eden Space", // OpenJDK ParallelGC: -XX:+ParallelGC + "Par Eden Space", // OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC + "Eden Space" // OpenJDK SerialGC: -XX:+UseSerialGC + // OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC -XX:-UseParNewGC + ); + + @Override + public void handleNotification(Notification notification, Object handback) { + try { + if (notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) { + GarbageCollectionNotificationInfo info = + GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData()); + + String gc_cause = info.getGcCause(); + + if (gc_cause.equals(TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE)) { + Map memory_before_gc = info.getGcInfo().getMemoryUsageBeforeGc(); + + for (String newGenPoolName : newGenPoolNames) { + MemoryUsage usage = memory_before_gc.get(newGenPoolName); + if (usage == null) continue; + + double startTime = ((double) info.getGcInfo().getStartTime()) / 1000.0; + long used = usage.getUsed(); + long committed = usage.getCommitted(); + long max = usage.getMax(); + double used_percent = (((double) used) / Math.max(committed, max)) * 100.0; + + System.out.printf("%6.3f: (%s) %d/%d/%d, %8.4f%% (%s)\n", + startTime, gc_cause, used, committed, max, used_percent, + ((used_percent < MIN_USED_PERCENT) ? TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW + : TestExcessGCLockerCollectionsStringConstants.USED_OK)); + } + } + } + } catch (RuntimeException ex) { + System.err.println("Exception during notification processing:" + ex); + ex.printStackTrace(); + } + } + + public static boolean register() { + try { + MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer(); + + // Get the list of MX + List gc_mxbeans = ManagementFactory.getGarbageCollectorMXBeans(); + + // Create the notification listener + GCNotificationListener gcNotificationListener = new GCNotificationListener(); + + for (GarbageCollectorMXBean gcbean : gc_mxbeans) { + // Add notification listener for the MXBean + mbeanServer.addNotificationListener(gcbean.getObjectName(), gcNotificationListener, null, null); + } + } catch (Exception ex) { + System.err.println("Exception during mbean registration:" + ex); + ex.printStackTrace(); + // We've failed to set up, terminate + return false; + } + + return true; + } + } + + static public Map largeMap; + + static public void main(String args[]) { + long durationSec = Long.parseLong(args[0]); + int allocThreadNum = Integer.parseInt(args[1]); + int jniCriticalThreadNum = Integer.parseInt(args[2]); + + println("Running for " + durationSec + " secs"); + + if (!GCNotificationListener.register()) { + println("failed to register GC notification listener"); + System.exit(-1); + } + + largeMap = populateMap(LARGE_MAP_SIZE); + + println("Starting " + allocThreadNum + " allocating threads"); + for (int i = 0; i < allocThreadNum; i += 1) { + new Thread(new AllocatingWorker()).start(); + } + + println("Starting " + jniCriticalThreadNum + " jni critical threads"); + for (int i = 0; i < jniCriticalThreadNum; i += 1) { + new Thread(new JNICriticalWorker()).start(); + } + + long durationMS = (long) (1000 * durationSec); + long start = System.currentTimeMillis(); + long now = start; + long soFar = now - start; + while (soFar < durationMS) { + try { + Thread.sleep(durationMS - soFar); + } catch (Exception e) { + } + now = System.currentTimeMillis(); + soFar = now - start; + } + println("Done."); + keepRunning = false; + } +} + +public class TestExcessGCLockerCollections { + private static final String USED_OK_LINE = + "\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)" + + " .* " + + "\\(" + TestExcessGCLockerCollectionsStringConstants.USED_OK + "\\)"; + private static final String USED_TOO_LOW_LINE = + "\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)" + + " .* " + + "\\(" + TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW + "\\)"; + + private static final String[] COMMON_OPTIONS = new String[] { + "-Xmx1G", "-Xms1G", "-Xmn256M" }; + + public static void main(String args[]) throws Exception { + if (args.length < 3) { + System.out.println("usage: TestExcessGCLockerCollections" + + " " + + " "); + throw new RuntimeException("Invalid arguments"); + } + + ArrayList finalArgs = new ArrayList(); + finalArgs.addAll(Arrays.asList(COMMON_OPTIONS)); + finalArgs.add(TestExcessGCLockerCollectionsAux.class.getName()); + finalArgs.addAll(Arrays.asList(args)); + + // GC and other options obtained from test framework. + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( + true, finalArgs.toArray(new String[0])); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + //System.out.println("------------- begin stdout ----------------"); + //System.out.println(output.getStdout()); + //System.out.println("------------- end stdout ----------------"); + output.stdoutShouldMatch(USED_OK_LINE); + output.stdoutShouldNotMatch(USED_TOO_LOW_LINE); + } +}