提交 d28612f9 编写于 作者: A andrew


......@@ -194,6 +194,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
if (UseGHASHIntrinsics) {
warning("GHASH intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
if (has_vshasig()) {
UseSHA = true;
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
......@@ -129,6 +129,7 @@ class Assembler : public AbstractAssembler {
flog3_op3 = 0x36,
edge_op3 = 0x36,
fsrc_op3 = 0x36,
xmulx_op3 = 0x36,
impdep2_op3 = 0x37,
stpartialf_op3 = 0x37,
jmpl_op3 = 0x38,
......@@ -220,6 +221,8 @@ class Assembler : public AbstractAssembler {
mdtox_opf = 0x110,
mstouw_opf = 0x111,
mstosw_opf = 0x113,
xmulx_opf = 0x115,
xmulxhi_opf = 0x116,
mxtod_opf = 0x118,
mwtos_opf = 0x119,
......@@ -1212,6 +1215,9 @@ public:
void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); }
void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); }
// Crypto SHA instructions
void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
......@@ -4788,6 +4788,130 @@ class StubGenerator: public StubCodeGenerator {
return start;
/* Single and multi-block ghash operations */
address generate_ghash_processBlocks() {
__ align(CodeEntryAlignment);
Label L_ghash_loop, L_aligned, L_main;
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
address start = __ pc();
Register state = I0;
Register subkeyH = I1;
Register data = I2;
Register len = I3;
__ save_frame(0);
__ ldx(state, 0, O0);
__ ldx(state, 8, O1);
// Loop label for multiblock operations
__ BIND(L_ghash_loop);
// Check if 'data' is unaligned
__ andcc(data, 7, G1);
__ br(Assembler::zero, false, Assembler::pt, L_aligned);
__ delayed()->nop();
Register left_shift = L1;
Register right_shift = L2;
Register data_ptr = L3;
// Get left and right shift values in bits
__ sll(G1, LogBitsPerByte, left_shift);
__ mov(64, right_shift);
__ sub(right_shift, left_shift, right_shift);
// Align to read 'data'
__ sub(data, G1, data_ptr);
// Load first 8 bytes of 'data'
__ ldx(data_ptr, 0, O4);
__ sllx(O4, left_shift, O4);
__ ldx(data_ptr, 8, O5);
__ srlx(O5, right_shift, G4);
__ bset(G4, O4);
// Load second 8 bytes of 'data'
__ sllx(O5, left_shift, O5);
__ ldx(data_ptr, 16, G4);
__ srlx(G4, right_shift, G4);
__ ba(L_main);
__ delayed()->bset(G4, O5);
// If 'data' is aligned, load normally
__ BIND(L_aligned);
__ ldx(data, 0, O4);
__ ldx(data, 8, O5);
__ BIND(L_main);
__ ldx(subkeyH, 0, O2);
__ ldx(subkeyH, 8, O3);
__ xor3(O0, O4, O0);
__ xor3(O1, O5, O1);
__ xmulxhi(O0, O3, G3);
__ xmulx(O0, O2, O5);
__ xmulxhi(O1, O2, G4);
__ xmulxhi(O1, O3, G5);
__ xmulx(O0, O3, G1);
__ xmulx(O1, O3, G2);
__ xmulx(O1, O2, O3);
__ xmulxhi(O0, O2, O4);
__ mov(0xE1, O0);
__ sllx(O0, 56, O0);
__ xor3(O5, G3, O5);
__ xor3(O5, G4, O5);
__ xor3(G5, G1, G1);
__ xor3(G1, O3, G1);
__ srlx(G2, 63, O1);
__ srlx(G1, 63, G3);
__ sllx(G2, 63, O3);
__ sllx(G2, 58, O2);
__ xor3(O3, O2, O2);
__ sllx(G1, 1, G1);
__ or3(G1, O1, G1);
__ xor3(G1, O2, G1);
__ sllx(G2, 1, G2);
__ xmulxhi(G1, O0, O1);
__ xmulx(G1, O0, O2);
__ xmulxhi(G2, O0, O3);
__ xmulx(G2, O0, G1);
__ xor3(O4, O1, O4);
__ xor3(O5, O2, O5);
__ xor3(O5, O3, O5);
__ sllx(O4, 1, O2);
__ srlx(O5, 63, O3);
__ or3(O2, O3, O0);
__ sllx(O5, 1, O1);
__ srlx(G1, 63, O2);
__ or3(O1, O2, O1);
__ xor3(O1, G3, O1);
__ deccc(len);
__ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
__ delayed()->add(data, 16, data);
__ stx(O0, I0, 0);
__ stx(O1, I0, 8);
__ ret();
__ delayed()->restore();
return start;
void generate_initial() {
// Generates all stubs and initializes the entry points
......@@ -4860,6 +4984,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
// generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
// generate SHA1/SHA256/SHA512 intrinsics code
if (UseSHA1Intrinsics) {
......@@ -319,6 +319,17 @@ void VM_Version::initialize() {
// GHASH/GCM intrinsics
if (has_vis3() && (UseVIS > 2)) {
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
UseGHASHIntrinsics = true;
} else if (UseGHASHIntrinsics) {
if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled");
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
// SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
if (has_sha1() || has_sha256() || has_sha512()) {
if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
......@@ -2575,6 +2575,15 @@ void Assembler::psrldq(XMMRegister dst, int shift) {
void Assembler::pslldq(XMMRegister dst, int shift) {
// Shift left 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
emit_int8((unsigned char)(0xC0 | encode));
void Assembler::ptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
......@@ -1527,6 +1527,8 @@ private:
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
// Shift Left by bytes Logical DoubleQuadword Immediate
void pslldq(XMMRegister dst, int shift);
// Logical Compare 128bit
void ptest(XMMRegister dst, XMMRegister src);
......@@ -2719,6 +2719,169 @@ class StubGenerator: public StubCodeGenerator {
return start;
// byte swap x86 long
address generate_ghash_long_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
address start = __ pc();
__ emit_data(0x0b0a0908, relocInfo::none, 0);
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
__ emit_data(0x03020100, relocInfo::none, 0);
__ emit_data(0x07060504, relocInfo::none, 0);
return start;
// byte swap x86 byte array
address generate_ghash_byte_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
address start = __ pc();
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
return start;
/* Single and multi-block ghash operations */
address generate_ghash_processBlocks() {
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
__ align(CodeEntryAlignment);
Label L_ghash_loop, L_exit;
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
address start = __ pc();
const Register state = rdi;
const Register subkeyH = rsi;
const Register data = rdx;
const Register blocks = rcx;
const Address state_param(rbp, 8+0);
const Address subkeyH_param(rbp, 8+4);
const Address data_param(rbp, 8+8);
const Address blocks_param(rbp, 8+12);
const XMMRegister xmm_temp0 = xmm0;
const XMMRegister xmm_temp1 = xmm1;
const XMMRegister xmm_temp2 = xmm2;
const XMMRegister xmm_temp3 = xmm3;
const XMMRegister xmm_temp4 = xmm4;
const XMMRegister xmm_temp5 = xmm5;
const XMMRegister xmm_temp6 = xmm6;
const XMMRegister xmm_temp7 = xmm7;
__ enter();
handleSOERegisters(true); // Save registers
__ movptr(state, state_param);
__ movptr(subkeyH, subkeyH_param);
__ movptr(data, data_param);
__ movptr(blocks, blocks_param);
__ movdqu(xmm_temp0, Address(state, 0));
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ movdqu(xmm_temp1, Address(subkeyH, 0));
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ BIND(L_ghash_loop);
__ movdqu(xmm_temp2, Address(data, 0));
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
__ pxor(xmm_temp0, xmm_temp2);
// Multiply with the hash key
__ movdqu(xmm_temp3, xmm_temp0);
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
__ movdqu(xmm_temp4, xmm_temp0);
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
__ movdqu(xmm_temp5, xmm_temp0);
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
__ movdqu(xmm_temp6, xmm_temp0);
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
__ pxor(xmm_temp3, xmm_temp5);
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
// of the carry-less multiplication of
// xmm0 by xmm1.
// We shift the result of the multiplication by one bit position
// to the left to cope for the fact that the bits are reversed.
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp4, xmm_temp6);
__ pslld (xmm_temp3, 1);
__ pslld(xmm_temp6, 1);
__ psrld(xmm_temp7, 31);
__ psrld(xmm_temp4, 31);
__ movdqu(xmm_temp5, xmm_temp7);
__ pslldq(xmm_temp4, 4);
__ pslldq(xmm_temp7, 4);
__ psrldq(xmm_temp5, 12);
__ por(xmm_temp3, xmm_temp7);
__ por(xmm_temp6, xmm_temp4);
__ por(xmm_temp6, xmm_temp5);
// First phase of the reduction
// Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
// independently.
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp4, xmm_temp3);
__ movdqu(xmm_temp5, xmm_temp3);
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
__ pxor(xmm_temp7, xmm_temp5);
__ movdqu(xmm_temp4, xmm_temp7);
__ pslldq(xmm_temp7, 12);
__ psrldq(xmm_temp4, 4);
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
// Second phase of the reduction
// Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
// shift operations.
__ movdqu(xmm_temp2, xmm_temp3);
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp5, xmm_temp3);
__ psrld(xmm_temp2, 1); // packed left shifting >> 1
__ psrld(xmm_temp7, 2); // packed left shifting >> 2
__ psrld(xmm_temp5, 7); // packed left shifting >> 7
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
__ pxor(xmm_temp2, xmm_temp5);
__ pxor(xmm_temp2, xmm_temp4);
__ pxor(xmm_temp3, xmm_temp2);
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
__ decrement(blocks);
__ jcc(Assembler::zero, L_exit);
__ movdqu(xmm_temp0, xmm_temp6);
__ addptr(data, 16);
__ jmp(L_ghash_loop);
__ BIND(L_exit);
// Byte swap 16-byte result
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ movdqu(Address(state, 0), xmm_temp6); // store the result
handleSOERegisters(false); // restore registers
__ leave();
__ ret(0);
return start;
* Arguments:
......@@ -3018,6 +3181,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
// Safefetch stubs.
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
......@@ -3639,6 +3639,175 @@ class StubGenerator: public StubCodeGenerator {
return start;
// byte swap x86 long
address generate_ghash_long_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
address start = __ pc();
__ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
__ emit_data64(0x0706050403020100, relocInfo::none );
return start;
// byte swap x86 byte array
address generate_ghash_byte_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
address start = __ pc();
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
__ emit_data64(0x0001020304050607, relocInfo::none );
return start;
/* Single and multi-block ghash operations */
address generate_ghash_processBlocks() {
__ align(CodeEntryAlignment);
Label L_ghash_loop, L_exit;
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
address start = __ pc();
const Register state = c_rarg0;
const Register subkeyH = c_rarg1;
const Register data = c_rarg2;
const Register blocks = c_rarg3;
#ifdef _WIN64
const int XMM_REG_LAST = 10;
const XMMRegister xmm_temp0 = xmm0;
const XMMRegister xmm_temp1 = xmm1;
const XMMRegister xmm_temp2 = xmm2;
const XMMRegister xmm_temp3 = xmm3;
const XMMRegister xmm_temp4 = xmm4;
const XMMRegister xmm_temp5 = xmm5;
const XMMRegister xmm_temp6 = xmm6;
const XMMRegister xmm_temp7 = xmm7;
const XMMRegister xmm_temp8 = xmm8;
const XMMRegister xmm_temp9 = xmm9;
const XMMRegister xmm_temp10 = xmm10;
__ enter();
#ifdef _WIN64
// save the xmm registers which must be preserved 6-10
__ subptr(rsp, -rsp_after_call_off * wordSize);
for (int i = 6; i <= XMM_REG_LAST; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
__ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ movdqu(xmm_temp0, Address(state, 0));
__ pshufb(xmm_temp0, xmm_temp10);
__ BIND(L_ghash_loop);
__ movdqu(xmm_temp2, Address(data, 0));
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
__ movdqu(xmm_temp1, Address(subkeyH, 0));
__ pshufb(xmm_temp1, xmm_temp10);
__ pxor(xmm_temp0, xmm_temp2);
// Multiply with the hash key
__ movdqu(xmm_temp3, xmm_temp0);
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
__ movdqu(xmm_temp4, xmm_temp0);
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
__ movdqu(xmm_temp5, xmm_temp0);
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
__ movdqu(xmm_temp6, xmm_temp0);
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
__ pxor(xmm_temp3, xmm_temp5);
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
// of the carry-less multiplication of
// xmm0 by xmm1.
// We shift the result of the multiplication by one bit position
// to the left to cope for the fact that the bits are reversed.
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp8, xmm_temp6);
__ pslld(xmm_temp3, 1);
__ pslld(xmm_temp6, 1);
__ psrld(xmm_temp7, 31);
__ psrld(xmm_temp8, 31);
__ movdqu(xmm_temp9, xmm_temp7);
__ pslldq(xmm_temp8, 4);
__ pslldq(xmm_temp7, 4);
__ psrldq(xmm_temp9, 12);
__ por(xmm_temp3, xmm_temp7);
__ por(xmm_temp6, xmm_temp8);
__ por(xmm_temp6, xmm_temp9);
// First phase of the reduction
// Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
// independently.
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp8, xmm_temp3);
__ movdqu(xmm_temp9, xmm_temp3);
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31
__ pslld(xmm_temp8, 30); // packed right shift shifting << 30
__ pslld(xmm_temp9, 25); // packed right shift shifting << 25
__ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
__ pxor(xmm_temp7, xmm_temp9);
__ movdqu(xmm_temp8, xmm_temp7);
__ pslldq(xmm_temp7, 12);
__ psrldq(xmm_temp8, 4);
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
// Second phase of the reduction
// Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
// shift operations.
__ movdqu(xmm_temp2, xmm_temp3);
__ movdqu(xmm_temp4, xmm_temp3);
__ movdqu(xmm_temp5, xmm_temp3);
__ psrld(xmm_temp2, 1); // packed left shifting >> 1
__ psrld(xmm_temp4, 2); // packed left shifting >> 2
__ psrld(xmm_temp5, 7); // packed left shifting >> 7
__ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
__ pxor(xmm_temp2, xmm_temp5);
__ pxor(xmm_temp2, xmm_temp8);
__ pxor(xmm_temp3, xmm_temp2);
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
__ decrement(blocks);
__ jcc(Assembler::zero, L_exit);
__ movdqu(xmm_temp0, xmm_temp6);
__ addptr(data, 16);
__ jmp(L_ghash_loop);
__ BIND(L_exit);
__ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
__ movdqu(Address(state, 0), xmm_temp6); // store the result
#ifdef _WIN64
// restore xmm regs belonging to calling function
for (int i = 6; i <= XMM_REG_LAST; i++) {
__ movdqu(as_XMMRegister(i), xmm_save(i));
__ leave();
__ ret(0);
return start;
* Arguments:
......@@ -4077,6 +4246,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
// Safefetch stubs.
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
......@@ -33,6 +33,8 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
......@@ -36,10 +36,15 @@
// masks and table for CRC32
static uint64_t _crc_by128_masks[];
static juint _crc_table[];
// swap mask for ghash
static address _ghash_long_swap_mask_addr;
static address _ghash_byte_swap_mask_addr;
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
......@@ -594,6 +594,17 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
// GHASH/GCM intrinsics
if (UseCLMUL && (UseSSE > 2)) {
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
UseGHASHIntrinsics = true;
} else if (UseGHASHIntrinsics) {
if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
if (UseSHA) {
warning("SHA instructions are not available on this CPU");
......@@ -863,6 +863,12 @@
do_name( implCompressMB_name, "implCompressMultiBlock0") \
do_signature(implCompressMB_signature, "([BII)I") \
/* support for com.sun.crypto.provider.GHASH */ \
do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \
do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \
do_name(processBlocks_name, "processBlocks") \
do_signature(ghash_processBlocks_signature, "([BII[J[J)V") \
/* support for java.util.zip */ \
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
......@@ -2520,6 +2520,12 @@ void G1CollectedHeap::collect(GCCause::Cause cause) {
} else if (GC_locker::should_discard(cause, gc_count_before)) {
// Return to be consistent with VMOp failure due to another
// collection slipping in after our gc_count but before our
// request is processed. _gc_locker collections upgraded by
// GCLockerInvokesConcurrent are handled above and never discarded.
} else {
if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc
DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
......@@ -530,6 +530,10 @@ void ParallelScavengeHeap::collect(GCCause::Cause cause) {
full_gc_count = Universe::heap()->total_full_collections();
if (GC_locker::should_discard(cause, gc_count)) {
VM_ParallelGCSystemGC op(gc_count, full_gc_count, cause);
......@@ -52,11 +52,16 @@ void VM_ParallelGCFailedAllocation::doit() {
static bool is_cause_full(GCCause::Cause cause) {
return (cause != GCCause::_gc_locker) && (cause != GCCause::_wb_young_gc)
DEBUG_ONLY(&& (cause != GCCause::_scavenge_alot));
// Only used for System.gc() calls
VM_ParallelGCSystemGC::VM_ParallelGCSystemGC(uint gc_count,
uint full_gc_count,
GCCause::Cause gc_cause) :
VM_GC_Operation(gc_count, gc_cause, full_gc_count, true /* full */)
VM_GC_Operation(gc_count, gc_cause, full_gc_count, is_cause_full(gc_cause))
......@@ -68,8 +73,7 @@ void VM_ParallelGCSystemGC::doit() {
"must be a ParallelScavengeHeap");
GCCauseSetter gccs(heap, _gc_cause);
if (_gc_cause == GCCause::_gc_locker || _gc_cause == GCCause::_wb_young_gc
DEBUG_ONLY(|| _gc_cause == GCCause::_scavenge_alot)) {
if (!_full) {
// If (and only if) the scavenge fails, this will invoke a full gc.
} else {
......@@ -201,6 +201,19 @@ void VM_GenCollectForAllocation::doit() {
static bool is_full_gc(int max_level) {
// Return true if max_level is all generations
return (max_level == (GenCollectedHeap::heap()->n_gens() - 1));
VM_GenCollectFull::VM_GenCollectFull(uint gc_count_before,
uint full_gc_count_before,
GCCause::Cause gc_cause,
int max_level) :
VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before,
is_full_gc(max_level) /* full */),
_max_level(max_level) { }
void VM_GenCollectFull::doit() {
SvcGCMarker sgcm(SvcGCMarker::FULL);
......@@ -201,9 +201,7 @@ class VM_GenCollectFull: public VM_GC_Operation {
VM_GenCollectFull(uint gc_count_before,
uint full_gc_count_before,
GCCause::Cause gc_cause,
int max_level)
: VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true /* full */),
_max_level(max_level) { }
int max_level);
~VM_GenCollectFull() {}
virtual VMOp_Type type() const { return VMOp_GenCollectFull; }
virtual void doit();
......@@ -31,6 +31,7 @@
volatile jint GC_locker::_jni_lock_count = 0;
volatile bool GC_locker::_needs_gc = false;
volatile bool GC_locker::_doing_gc = false;
unsigned int GC_locker::_total_collections = 0;
#ifdef ASSERT
volatile jint GC_locker::_debug_jni_lock_count = 0;
......@@ -94,6 +95,11 @@ void GC_locker::stall_until_clear() {
bool GC_locker::should_discard(GCCause::Cause cause, uint total_collections) {
return (cause == GCCause::_gc_locker) &&
(_total_collections != total_collections);
void GC_locker::jni_lock(JavaThread* thread) {
assert(!thread->in_critical(), "shouldn't currently be in a critical region");
MutexLocker mu(JNICritical_lock);
......@@ -117,7 +123,13 @@ void GC_locker::jni_unlock(JavaThread* thread) {
if (needs_gc() && !is_active_internal()) {
// We're the last thread out. Cause a GC to occur.
// We're the last thread out. Request a GC.
// Capture the current total collections, to allow detection of
// other collections that make this one unnecessary. The value of
// total_collections() is only changed at a safepoint, so there
// must not be a safepoint between the lock becoming inactive and
// getting the count, else there may be unnecessary GCLocker GCs.
_total_collections = Universe::heap()->total_collections();
_doing_gc = true;
// Must give up the lock while at a safepoint
......@@ -26,6 +26,7 @@
#include "gc_interface/collectedHeap.hpp"
#include "gc_interface/gcCause.hpp"
#include "memory/genCollectedHeap.hpp"
#include "memory/universe.hpp"
#include "oops/oop.hpp"
......@@ -57,6 +58,7 @@ class GC_locker: public AllStatic {
static volatile bool _needs_gc; // heap is filling, we need a GC
// note: bool is typedef'd as jint
static volatile bool _doing_gc; // unlock_critical() is doing a GC
static uint _total_collections; // value for _gc_locker collection
#ifdef ASSERT
// This lock count is updated for all operations and is used to
......@@ -116,6 +118,12 @@ class GC_locker: public AllStatic {
// Sets _needs_gc if is_active() is true. Returns is_active().
static bool check_active_before_gc();
// Return true if the designated collection is a GCLocker request
// that should be discarded. Returns true if cause == GCCause::_gc_locker
// and the given total collection value indicates a collection has been
// done since the GCLocker request was made.
static bool should_discard(GCCause::Cause cause, uint total_collections);
// Stalls the caller (who should not be in a jni critical section)
// until needs_gc() clears. Note however that needs_gc() may be
// set at a subsequent safepoint and/or cleared under the
......@@ -796,8 +796,11 @@ void GenCollectedHeap::collect(GCCause::Cause cause) {
} else if (cause == GCCause::_wb_young_gc) {
// minor collection for WhiteBox API
} else if ((cause == GCCause::_wb_young_gc) ||
(cause == GCCause::_gc_locker)) {
// minor collection for WhiteBox or GCLocker.
// _gc_locker collections upgraded by GCLockerInvokesConcurrent
// are handled above and never discarded.
collect(cause, 0);
} else {
#ifdef ASSERT
......@@ -835,6 +838,11 @@ void GenCollectedHeap::collect_locked(GCCause::Cause cause, int max_level) {
// Read the GC count while holding the Heap_lock
unsigned int gc_count_before = total_collections();
unsigned int full_gc_count_before = total_full_collections();
if (GC_locker::should_discard(cause, gc_count_before)) {
MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back
VM_GenCollectFull op(gc_count_before, full_gc_count_before,
......@@ -887,24 +895,16 @@ void GenCollectedHeap::do_full_collection(bool clear_all_soft_refs) {
void GenCollectedHeap::do_full_collection(bool clear_all_soft_refs,
int max_level) {
int local_max_level;
if (!incremental_collection_will_fail(false /* don't consult_young */) &&
gc_cause() == GCCause::_gc_locker) {
local_max_level = 0;
} else {
local_max_level = max_level;
do_collection(true /* full */,
clear_all_soft_refs /* clear_all_soft_refs */,
0 /* size */,
false /* is_tlab */,
local_max_level /* max_level */);
max_level /* max_level */);
// Hack XXX FIX ME !!!
// A scavenge may not have been attempted, or may have
// been attempted and failed, because the old gen was too full
if (local_max_level == 0 && gc_cause() == GCCause::_gc_locker &&
incremental_collection_will_fail(false /* don't consult_young */)) {
if (gc_cause() == GCCause::_gc_locker && incremental_collection_failed()) {
if (PrintGCDetails) {
gclog_or_tty->print_cr("GC locker: Trying a full collection "
"because scavenge failed");
......@@ -952,6 +952,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
......@@ -311,6 +311,7 @@ class LibraryCallKit : public GraphKit {
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_ghash_processBlocks();
bool inline_sha_implCompress(vmIntrinsics::ID id);
bool inline_digestBase_implCompressMB(int predicate);
bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
......@@ -570,6 +571,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
predicates = 3;
case vmIntrinsics::_ghash_processBlocks:
if (!UseGHASHIntrinsics) return NULL;
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32:
......@@ -957,6 +962,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_montgomerySquare:
return inline_montgomerySquare();
case vmIntrinsics::_ghash_processBlocks:
return inline_ghash_processBlocks();
case vmIntrinsics::_encodeISOArray:
return inline_encodeISOArray();
......@@ -6599,6 +6607,35 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt
return _gvn.transform(region);
bool LibraryCallKit::inline_ghash_processBlocks() {
address stubAddr;
const char *stubName;
assert(UseGHASHIntrinsics, "need GHASH intrinsics support");
stubAddr = StubRoutines::ghash_processBlocks();
stubName = "ghash_processBlocks";
Node* data = argument(0);
Node* offset = argument(1);
Node* len = argument(2);
Node* state = argument(3);
Node* subkeyH = argument(4);
Node* state_start = array_element_address(state, intcon(0), T_LONG);
assert(state_start, "state is NULL");
Node* subkeyH_start = array_element_address(subkeyH, intcon(0), T_LONG);
assert(subkeyH_start, "subkeyH is NULL");
Node* data_start = array_element_address(data, offset, T_BYTE);
assert(data_start, "data is NULL");
Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP,
stubAddr, stubName, TypePtr::BOTTOM,
state_start, subkeyH_start, data_start, len);
return true;
// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
......@@ -92,7 +92,25 @@
// At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
// GHASH block processing
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
int argcnt = 4;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // state
fields[argp++] = TypePtr::NOTNULL; // subkeyH
fields[argp++] = TypePtr::NOTNULL; // data
fields[argp++] = TypeInt::INT; // blocks
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
// Compiled code entry points
address OptoRuntime::_new_instance_Java = NULL;
......@@ -311,6 +311,8 @@ private:
static const TypeFunc* montgomeryMultiply_Type();
static const TypeFunc* montgomerySquare_Type();
static const TypeFunc* ghash_processBlocks_Type();
static const TypeFunc* updateBytesCRC32_Type();
// leaf on stack replacement interpreter accessor types
......@@ -602,6 +602,9 @@ class CommandLineFlags {
product(bool, UseSHA, false, \
"Control whether SHA instructions can be used on SPARC") \
product(bool, UseGHASHIntrinsics, false, \
"Use intrinsics for GHASH versions of crypto") \
product(uintx, LargePageSizeInBytes, 0, \
"Large page size (0 to let VM choose the page size)") \
......@@ -124,6 +124,7 @@ address StubRoutines::_aescrypt_encryptBlock = NULL;
address StubRoutines::_aescrypt_decryptBlock = NULL;
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
address StubRoutines::_ghash_processBlocks = NULL;
address StubRoutines::_sha1_implCompress = NULL;
address StubRoutines::_sha1_implCompressMB = NULL;
......@@ -197,6 +197,7 @@ class StubRoutines: AllStatic {
static address _aescrypt_decryptBlock;
static address _cipherBlockChaining_encryptAESCrypt;
static address _cipherBlockChaining_decryptAESCrypt;
static address _ghash_processBlocks;
static address _sha1_implCompress;
static address _sha1_implCompressMB;
......@@ -359,6 +360,7 @@ class StubRoutines: AllStatic {
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
static address ghash_processBlocks() { return _ghash_processBlocks; }
static address sha1_implCompress() { return _sha1_implCompress; }
static address sha1_implCompressMB() { return _sha1_implCompressMB; }
......@@ -810,6 +810,7 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
static_field(StubRoutines, _aescrypt_decryptBlock, address) \
static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \
static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \
static_field(StubRoutines, _ghash_processBlocks, address) \
static_field(StubRoutines, _updateBytesCRC32, address) \
static_field(StubRoutines, _crc_table_adr, address) \
static_field(StubRoutines, _multiplyToLen, address) \
......@@ -29,6 +29,7 @@
import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import javax.crypto.spec.GCMParameterSpec;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import java.security.AlgorithmParameters;
......@@ -62,8 +63,12 @@ abstract public class TestAESBase {
Random random = new Random(0);
Cipher cipher;
Cipher dCipher;
AlgorithmParameters algParams;
AlgorithmParameters algParams = null;
SecretKey key;
GCMParameterSpec gcm_spec;
byte[] aad = { 0x11, 0x22, 0x33, 0x44, 0x55 };
int tlen = 12;
byte[] iv = new byte[16];
static int numThreads = 0;
int threadId;
......@@ -77,7 +82,10 @@ abstract public class TestAESBase {
public void prepare() {
try {
System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize );
System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr +
", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit +
", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" +
encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize );
if (encInputOffset % ALIGN != 0 || encOutputOffset % ALIGN != 0 || decOutputOffset % ALIGN !=0 )
testingMisalignment = true;
......@@ -98,16 +106,24 @@ abstract public class TestAESBase {
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
// CBC init
if (mode.equals("CBC")) {
int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
IvParameterSpec initVector = new IvParameterSpec(iv);
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
} else {
algParams = cipher.getParameters();
dCipher.init(Cipher.DECRYPT_MODE, key, initVector);
// GCM init
} else if (mode.equals("GCM")) {
// ECB init
} else {
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
algParams = cipher.getParameters();
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
if (threadId == 0) {
......@@ -188,4 +204,19 @@ abstract public class TestAESBase {
abstract void childShowCipher();
void gcm_init(boolean encrypt) throws Exception {
gcm_spec = new GCMParameterSpec(tlen * 8, iv);
if (encrypt) {
// Get a new instance everytime because of reuse IV restrictions
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec);
} else {
dCipher.init(Cipher.DECRYPT_MODE, key, gcm_spec);
......@@ -32,7 +32,11 @@ public class TestAESDecode extends TestAESBase {
public void run() {
try {
if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
if (mode.equals("GCM")) {
} else if (!noReinit) {
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
decode = new byte[decodeLength];
if (testingMisalignment) {
int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset);
* Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
......@@ -32,7 +32,11 @@ public class TestAESEncode extends TestAESBase {
public void run() {
try {
if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
if (mode.equals("GCM")) {
} else if (!noReinit) {
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
encode = new byte[encodeLength];
if (testingMisalignment) {
int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
......@@ -41,6 +41,13 @@
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
* @author Tom Deneau
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
package gc.stress.gclocker;
// Based on Kim Barrett;s test for JDK-8048556
* @test TestExcessGCLockerCollections
* @key gc
* @bug 8048556
* @summary Check for GC Locker initiated GCs that immediately follow another
* GC and so have very little needing to be collected.
* @library /testlibrary
* @run driver/timeout=1000 gc.stress.gclocker.TestExcessGCLockerCollections 300 4 2
import java.util.HashMap;
import java.util.Map;
import java.util.zip.Deflater;
import java.util.ArrayList;
import java.util.Arrays;
import javax.management.MBeanServer;
import javax.management.Notification;
import javax.management.NotificationListener;
import javax.management.openmbean.CompositeData;
import java.lang.management.ManagementFactory;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.MemoryUsage;
import java.util.List;
import com.sun.management.GarbageCollectionNotificationInfo;
import com.sun.management.GcInfo;
import com.oracle.java.testlibrary.Asserts;
import com.oracle.java.testlibrary.ProcessTools;
import com.oracle.java.testlibrary.OutputAnalyzer;
class TestExcessGCLockerCollectionsStringConstants {
// Some constant strings used in both GC logging and error detection
static public final String GCLOCKER_CAUSE = "GCLocker Initiated GC";
static public final String USED_TOO_LOW = "TOO LOW";
static public final String USED_OK = "OK";
class TestExcessGCLockerCollectionsAux {
static private final int LARGE_MAP_SIZE = 64 * 1024;
static private final int MAP_ARRAY_LENGTH = 4;
static private final int MAP_SIZE = 1024;
static private final int BYTE_ARRAY_LENGTH = 128 * 1024;
static private void println(String str) { System.out.println(str); }
static private void println() { System.out.println(); }
static private volatile boolean keepRunning = true;
static Map<Integer,String> populateMap(int size) {
Map<Integer,String> map = new HashMap<Integer,String>();
for (int i = 0; i < size; i += 1) {
Integer keyInt = Integer.valueOf(i);
String valStr = "value is [" + i + "]";
return map;
static private class AllocatingWorker implements Runnable {
private final Object[] array = new Object[MAP_ARRAY_LENGTH];
private int arrayIndex = 0;
private void doStep() {
Map<Integer,String> map = populateMap(MAP_SIZE);
array[arrayIndex] = map;
arrayIndex = (arrayIndex + 1) % MAP_ARRAY_LENGTH;
public void run() {
while (keepRunning) {
static private class JNICriticalWorker implements Runnable {
private int count;
private void doStep() {
byte[] inputArray = new byte[BYTE_ARRAY_LENGTH];
for (int i = 0; i < inputArray.length; i += 1) {
inputArray[i] = (byte) (count + i);
Deflater deflater = new Deflater();
byte[] outputArray = new byte[2 * inputArray.length];
count += 1;
public void run() {
while (keepRunning) {
static class GCNotificationListener implements NotificationListener {
static private final double MIN_USED_PERCENT = 40.0;
static private final List<String> newGenPoolNames = Arrays.asList(
"G1 Eden Space", // OpenJDK G1GC: -XX:+UseG1GC
"PS Eden Space", // OpenJDK ParallelGC: -XX:+ParallelGC
"Par Eden Space", // OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC
"Eden Space" // OpenJDK SerialGC: -XX:+UseSerialGC
// OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC -XX:-UseParNewGC
public void handleNotification(Notification notification, Object handback) {
try {
if (notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) {
GarbageCollectionNotificationInfo info =
GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData());
String gc_cause = info.getGcCause();
if (gc_cause.equals(TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE)) {
Map<String, MemoryUsage> memory_before_gc = info.getGcInfo().getMemoryUsageBeforeGc();
for (String newGenPoolName : newGenPoolNames) {
MemoryUsage usage = memory_before_gc.get(newGenPoolName);
if (usage == null) continue;
double startTime = ((double) info.getGcInfo().getStartTime()) / 1000.0;
long used = usage.getUsed();
long committed = usage.getCommitted();
long max = usage.getMax();
double used_percent = (((double) used) / Math.max(committed, max)) * 100.0;
System.out.printf("%6.3f: (%s) %d/%d/%d, %8.4f%% (%s)\n",
startTime, gc_cause, used, committed, max, used_percent,
((used_percent < MIN_USED_PERCENT) ? TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW
: TestExcessGCLockerCollectionsStringConstants.USED_OK));
} catch (RuntimeException ex) {
System.err.println("Exception during notification processing:" + ex);
public static boolean register() {
try {
MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
// Get the list of MX
List<GarbageCollectorMXBean> gc_mxbeans = ManagementFactory.getGarbageCollectorMXBeans();
// Create the notification listener
GCNotificationListener gcNotificationListener = new GCNotificationListener();
for (GarbageCollectorMXBean gcbean : gc_mxbeans) {
// Add notification listener for the MXBean
mbeanServer.addNotificationListener(gcbean.getObjectName(), gcNotificationListener, null, null);
} catch (Exception ex) {
System.err.println("Exception during mbean registration:" + ex);
// We've failed to set up, terminate
return false;
return true;
static public Map<Integer,String> largeMap;
static public void main(String args[]) {
long durationSec = Long.parseLong(args[0]);
int allocThreadNum = Integer.parseInt(args[1]);
int jniCriticalThreadNum = Integer.parseInt(args[2]);
println("Running for " + durationSec + " secs");
if (!GCNotificationListener.register()) {
println("failed to register GC notification listener");
largeMap = populateMap(LARGE_MAP_SIZE);
println("Starting " + allocThreadNum + " allocating threads");
for (int i = 0; i < allocThreadNum; i += 1) {
new Thread(new AllocatingWorker()).start();
println("Starting " + jniCriticalThreadNum + " jni critical threads");
for (int i = 0; i < jniCriticalThreadNum; i += 1) {
new Thread(new JNICriticalWorker()).start();
long durationMS = (long) (1000 * durationSec);
long start = System.currentTimeMillis();
long now = start;
long soFar = now - start;
while (soFar < durationMS) {
try {
Thread.sleep(durationMS - soFar);
} catch (Exception e) {
now = System.currentTimeMillis();
soFar = now - start;
keepRunning = false;
public class TestExcessGCLockerCollections {
private static final String USED_OK_LINE =
"\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)"
+ " .* " +
"\\(" + TestExcessGCLockerCollectionsStringConstants.USED_OK + "\\)";
private static final String USED_TOO_LOW_LINE =
"\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)"
+ " .* " +
"\\(" + TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW + "\\)";
private static final String[] COMMON_OPTIONS = new String[] {
"-Xmx1G", "-Xms1G", "-Xmn256M" };
public static void main(String args[]) throws Exception {
if (args.length < 3) {
System.out.println("usage: TestExcessGCLockerCollections" +
" <duration sec> <alloc threads>" +
" <jni critical threads>");
throw new RuntimeException("Invalid arguments");
ArrayList<String> finalArgs = new ArrayList<String>();
// GC and other options obtained from test framework.
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
true, finalArgs.toArray(new String[0]));
OutputAnalyzer output = new OutputAnalyzer(pb.start());
//System.out.println("------------- begin stdout ----------------");
//System.out.println("------------- end stdout ----------------");
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册