提交 95c29028 编写于 作者: G gromero

8154156: PPC64: improve array copy stubs by using vector instructions

Reviewed-by: goetz, mdoerr
Contributed-by: NKazunori Ogata <ogatak@jp.ibm.com>
上级 a44dcc97
......@@ -469,6 +469,8 @@ class Assembler : public AbstractAssembler {
LVSR_OPCODE = (31u << OPCODE_SHIFT | 38u << 1),
// Vector-Scalar (VSX) instruction support.
LXVD2X_OPCODE = (31u << OPCODE_SHIFT | 844u << 1),
STXVD2X_OPCODE = (31u << OPCODE_SHIFT | 972u << 1),
MTVSRD_OPCODE = (31u << OPCODE_SHIFT | 179u << 1),
MFVSRD_OPCODE = (31u << OPCODE_SHIFT | 51u << 1),
......@@ -670,8 +672,10 @@ class Assembler : public AbstractAssembler {
// Atomics.
LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1),
LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1),
LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1),
STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1),
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1)
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1),
STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1)
};
......@@ -1052,6 +1056,19 @@ class Assembler : public AbstractAssembler {
static int vrs( VectorRegister r) { return vrs(r->encoding());}
static int vrt( VectorRegister r) { return vrt(r->encoding());}
// Support Vector-Scalar (VSX) instructions.
static int vsra( int x) { return opp_u_field(x, 15, 11); }
static int vsrb( int x) { return opp_u_field(x, 20, 16); }
static int vsrc( int x) { return opp_u_field(x, 25, 21); }
static int vsrs( int x) { return opp_u_field(x, 10, 6); }
static int vsrt( int x) { return opp_u_field(x, 10, 6); }
static int vsra( VectorSRegister r) { return vsra(r->encoding());}
static int vsrb( VectorSRegister r) { return vsrb(r->encoding());}
static int vsrc( VectorSRegister r) { return vsrc(r->encoding());}
static int vsrs( VectorSRegister r) { return vsrs(r->encoding());}
static int vsrt( VectorSRegister r) { return vsrt(r->encoding());}
static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
......@@ -1663,11 +1680,14 @@ class Assembler : public AbstractAssembler {
// atomics
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline bool lxarx_hint_exclusive_access();
inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void stwcx_( Register s, Register a, Register b);
inline void stdcx_( Register s, Register a, Register b);
inline void stqcx_( Register s, Register a, Register b);
// Instructions for adjusting thread priority for simultaneous
// multithreading (SMT) on Power5.
......@@ -1943,6 +1963,8 @@ class Assembler : public AbstractAssembler {
inline void mfvscr( VectorRegister d);
// Vector-Scalar (VSX) instructions.
inline void lxvd2x( VectorSRegister d, Register a, Register b);
inline void stxvd2x( VectorSRegister d, Register a, Register b);
inline void mtvrd( VectorRegister d, Register a);
inline void mfvrd( Register a, VectorRegister d);
......@@ -2022,10 +2044,13 @@ class Assembler : public AbstractAssembler {
// Atomics: use ra0mem to disallow R0 as base.
inline void lwarx_unchecked(Register d, Register b, int eh1);
inline void ldarx_unchecked(Register d, Register b, int eh1);
inline void lqarx_unchecked(Register d, Register b, int eh1);
inline void lwarx( Register d, Register b, bool hint_exclusive_access);
inline void ldarx( Register d, Register b, bool hint_exclusive_access);
inline void lqarx( Register d, Register b, bool hint_exclusive_access);
inline void stwcx_(Register s, Register b);
inline void stdcx_(Register s, Register b);
inline void stqcx_(Register s, Register b);
inline void lfs( FloatRegister d, int si16);
inline void lfsx( FloatRegister d, Register b);
inline void lfd( FloatRegister d, int si16);
......
/*
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018, SAP SE. All rights reserved.
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -504,11 +504,14 @@ inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invali
// Use ra0mem to disallow R0 as base.
inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); }
inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
// Instructions for adjusting thread priority
// for simultaneous multithreading (SMT) on POWER5.
......@@ -624,6 +627,8 @@ inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit
inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x (VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); }
inline void Assembler::stxvd2x(VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
......@@ -833,10 +838,13 @@ inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCOD
// ra0 version
inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
// ra0 version
inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
......
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2018 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -81,6 +81,19 @@ define_pd_global(uintx, TypeProfileLevel, 0);
product(bool, ReoptimizeCallSequences, true, \
"Reoptimize code-sequences of calls at runtime.") \
\
/* Power 8: Configure Data Stream Control Register. */ \
product(uint64_t,DSCR_PPC64, (uintx)-1, \
"Power8 or later: Specify encoded value for Data Stream Control " \
"Register") \
product(uint64_t,DSCR_DPFD_PPC64, 8, \
"Power8 or later: DPFD (default prefetch depth) value of the " \
"Data Stream Control Register." \
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
product(uint64_t,DSCR_URG_PPC64, 8, \
"Power8 or later: URG (depth attainment urgency) value of the " \
"Data Stream Control Register." \
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
\
product(bool, UseLoadInstructionsForStackBangingPPC64, false, \
"Use load instructions for stack banging.") \
\
......
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2018 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -75,3 +75,14 @@ const char* VectorRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "vnoreg";
}
const char* VectorSRegisterImpl::name() const {
const char* names[number_of_registers] = {
"VSR0", "VSR1", "VSR2", "VSR3", "VSR4", "VSR5", "VSR6", "VSR7",
"VSR8", "VSR9", "VSR10", "VSR11", "VSR12", "VSR13", "VSR14", "VSR15",
"VSR16", "VSR17", "VSR18", "VSR19", "VSR20", "VSR21", "VSR22", "VSR23",
"VSR24", "VSR25", "VSR26", "VSR27", "VSR28", "VSR29", "VSR30", "VSR31"
};
return is_valid() ? names[encoding()] : "vsnoreg";
}
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2018 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -492,6 +492,106 @@ CONSTANT_REGISTER_DECLARATION(VectorRegister, VR31, (31));
#endif // DONT_USE_REGISTER_DEFINES
// Use VectorSRegister as a shortcut.
class VectorSRegisterImpl;
typedef VectorSRegisterImpl* VectorSRegister;
inline VectorSRegister as_VectorSRegister(int encoding) {
return (VectorSRegister)(intptr_t)encoding;
}
// The implementation of Vector-Scalar (VSX) registers on POWER architecture.
class VectorSRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32
};
// construction
inline friend VectorSRegister as_VectorSRegister(int encoding);
// accessors
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
// testers
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
const char* name() const;
};
// The Vector-Scalar (VSX) registers of the POWER architecture.
CONSTANT_REGISTER_DECLARATION(VectorSRegister, vsnoreg, (-1));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR0, ( 0));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR1, ( 1));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR2, ( 2));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR3, ( 3));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR4, ( 4));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR5, ( 5));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR6, ( 6));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR7, ( 7));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR8, ( 8));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR9, ( 9));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR10, (10));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR11, (11));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR12, (12));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR13, (13));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR14, (14));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR15, (15));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR16, (16));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR17, (17));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR18, (18));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR19, (19));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR20, (20));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR21, (21));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR22, (22));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR23, (23));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR24, (24));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR25, (25));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR26, (26));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR27, (27));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR28, (28));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR29, (29));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR30, (30));
CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR31, (31));
#ifndef DONT_USE_REGISTER_DEFINES
#define vsnoregi ((VectorSRegister)(vsnoreg_VectorSRegisterEnumValue))
#define VSR0 ((VectorSRegister)( VSR0_VectorSRegisterEnumValue))
#define VSR1 ((VectorSRegister)( VSR1_VectorSRegisterEnumValue))
#define VSR2 ((VectorSRegister)( VSR2_VectorSRegisterEnumValue))
#define VSR3 ((VectorSRegister)( VSR3_VectorSRegisterEnumValue))
#define VSR4 ((VectorSRegister)( VSR4_VectorSRegisterEnumValue))
#define VSR5 ((VectorSRegister)( VSR5_VectorSRegisterEnumValue))
#define VSR6 ((VectorSRegister)( VSR6_VectorSRegisterEnumValue))
#define VSR7 ((VectorSRegister)( VSR7_VectorSRegisterEnumValue))
#define VSR8 ((VectorSRegister)( VSR8_VectorSRegisterEnumValue))
#define VSR9 ((VectorSRegister)( VSR9_VectorSRegisterEnumValue))
#define VSR10 ((VectorSRegister)( VSR10_VectorSRegisterEnumValue))
#define VSR11 ((VectorSRegister)( VSR11_VectorSRegisterEnumValue))
#define VSR12 ((VectorSRegister)( VSR12_VectorSRegisterEnumValue))
#define VSR13 ((VectorSRegister)( VSR13_VectorSRegisterEnumValue))
#define VSR14 ((VectorSRegister)( VSR14_VectorSRegisterEnumValue))
#define VSR15 ((VectorSRegister)( VSR15_VectorSRegisterEnumValue))
#define VSR16 ((VectorSRegister)( VSR16_VectorSRegisterEnumValue))
#define VSR17 ((VectorSRegister)( VSR17_VectorSRegisterEnumValue))
#define VSR18 ((VectorSRegister)( VSR18_VectorSRegisterEnumValue))
#define VSR19 ((VectorSRegister)( VSR19_VectorSRegisterEnumValue))
#define VSR20 ((VectorSRegister)( VSR20_VectorSRegisterEnumValue))
#define VSR21 ((VectorSRegister)( VSR21_VectorSRegisterEnumValue))
#define VSR22 ((VectorSRegister)( VSR22_VectorSRegisterEnumValue))
#define VSR23 ((VectorSRegister)( VSR23_VectorSRegisterEnumValue))
#define VSR24 ((VectorSRegister)( VSR24_VectorSRegisterEnumValue))
#define VSR25 ((VectorSRegister)( VSR25_VectorSRegisterEnumValue))
#define VSR26 ((VectorSRegister)( VSR26_VectorSRegisterEnumValue))
#define VSR27 ((VectorSRegister)( VSR27_VectorSRegisterEnumValue))
#define VSR28 ((VectorSRegister)( VSR28_VectorSRegisterEnumValue))
#define VSR29 ((VectorSRegister)( VSR29_VectorSRegisterEnumValue))
#define VSR30 ((VectorSRegister)( VSR30_VectorSRegisterEnumValue))
#define VSR31 ((VectorSRegister)( VSR31_VectorSRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Maximum number of incoming arguments that can be passed in i registers.
const int PPC_ARGS_IN_REGS_NUM = 8;
......
/*
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018, SAP SE. All rights reserved.
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -1352,9 +1352,13 @@ class StubGenerator: public StubCodeGenerator {
Register tmp3 = R8_ARG6;
Register tmp4 = R9_ARG7;
VectorSRegister tmp_vsr1 = VSR1;
VectorSRegister tmp_vsr2 = VSR2;
address start = __ function_entry();
Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
// don't try anything fancy if arrays don't have many elements
__ li(tmp3, 0);
__ cmpwi(CCR0, R5_ARG3, 9);
......@@ -1412,22 +1416,60 @@ class StubGenerator: public StubCodeGenerator {
__ andi_(R5_ARG3, R5_ARG3, 15);
__ mtctr(tmp1);
__ bind(l_8);
// Use unrolled version for mass copying (copy 16 elements a time).
// Load feeding store gets zero latency on Power6, however not on Power5.
// Therefore, the following sequence is made for the good of both.
__ ld(tmp1, 0, R3_ARG1);
__ ld(tmp2, 8, R3_ARG1);
__ ld(tmp3, 16, R3_ARG1);
__ ld(tmp4, 24, R3_ARG1);
__ std(tmp1, 0, R4_ARG2);
__ std(tmp2, 8, R4_ARG2);
__ std(tmp3, 16, R4_ARG2);
__ std(tmp4, 24, R4_ARG2);
__ addi(R3_ARG1, R3_ARG1, 32);
__ addi(R4_ARG2, R4_ARG2, 32);
__ bdnz(l_8);
}
if (!VM_Version::has_vsx()) {
__ bind(l_8);
// Use unrolled version for mass copying (copy 16 elements a time).
// Load feeding store gets zero latency on Power6, however not on Power5.
// Therefore, the following sequence is made for the good of both.
__ ld(tmp1, 0, R3_ARG1);
__ ld(tmp2, 8, R3_ARG1);
__ ld(tmp3, 16, R3_ARG1);
__ ld(tmp4, 24, R3_ARG1);
__ std(tmp1, 0, R4_ARG2);
__ std(tmp2, 8, R4_ARG2);
__ std(tmp3, 16, R4_ARG2);
__ std(tmp4, 24, R4_ARG2);
__ addi(R3_ARG1, R3_ARG1, 32);
__ addi(R4_ARG2, R4_ARG2, 32);
__ bdnz(l_8);
} else { // Processor supports VSX, so use it to mass copy.
// Prefetch src data into L2 cache.
__ dcbt(R3_ARG1, 0);
// If supported set DSCR pre-fetch to deepest.
if (VM_Version::has_mfdscr()) {
__ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
__ mtdscr(tmp2);
}
__ li(tmp1, 16);
// Backbranch target aligned to 32-byte. It's not aligned 16-byte
// as loop contains < 8 instructions that fit inside a single
// i-cache sector.
__ align(32);
__ bind(l_9);
// Use loop with VSX load/store instructions to
// copy 16 elements a time.
__ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load from src.
__ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst.
__ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16.
__ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16.
__ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32.
__ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32.
__ bdnz(l_9); // Dec CTR and loop if not zero.
// Restore DSCR pre-fetch value.
if (VM_Version::has_mfdscr()) {
__ load_const_optimized(tmp2, VM_Version::_dscr_val);
__ mtdscr(tmp2);
}
}
} // FasterArrayCopy
__ bind(l_6);
// copy 2 elements at a time
......
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2018 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -45,7 +45,7 @@ int VM_Version::_features = VM_Version::unknown_m;
int VM_Version::_measured_cache_line_size = 128; // default value
const char* VM_Version::_features_str = "";
bool VM_Version::_is_determine_features_test_running = false;
uint64_t VM_Version::_dscr_val = 0;
#define MSG(flag) \
if (flag && !FLAG_IS_DEFAULT(flag)) \
......@@ -60,7 +60,9 @@ void VM_Version::initialize() {
// If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
if (VM_Version::has_popcntw()) {
if (VM_Version::has_lqarx()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
} else if (VM_Version::has_popcntw()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
} else if (VM_Version::has_cmpb()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
......@@ -71,8 +73,14 @@ void VM_Version::initialize() {
}
}
guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
"PowerArchitecturePPC64 should be 0, 5, 6 or 7");
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
PowerArchitecturePPC64 == 8,
"PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
// Power 8: Configure Data Stream Control Register.
if (PowerArchitecturePPC64 >= 8) {
config_dscr();
}
if (!UseSIGTRAP) {
MSG(TrapBasedICMissChecks);
......@@ -102,7 +110,7 @@ void VM_Version::initialize() {
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
......@@ -112,12 +120,17 @@ void VM_Version::initialize() {
(has_popcntw() ? " popcntw" : ""),
(has_fcfids() ? " fcfids" : ""),
(has_vand() ? " vand" : ""),
(has_lqarx() ? " lqarx" : ""),
(has_vcipher() ? " aes" : ""),
(has_vpmsumb() ? " vpmsumb" : "")
(has_vpmsumb() ? " vpmsumb" : ""),
(has_mfdscr() ? " mfdscr" : ""),
(has_vsx() ? " vsx" : "")
// Make sure number of %s matches num_features!
);
_features_str = strdup(buf);
NOT_PRODUCT(if (Verbose) print_features(););
if (Verbose) {
print_features();
}
// PPC64 supports 8-byte compare-exchange operations (see
// Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
......@@ -485,8 +498,11 @@ void VM_Version::determine_features() {
a->popcntw(R7, R5); // code[7] -> popcntw
a->fcfids(F3, F4); // code[8] -> fcfids
a->vand(VR0, VR0, VR0); // code[9] -> vand
a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher
a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb
a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[10] -> lqarx_m
a->vcipher(VR0, VR1, VR2); // code[11] -> vcipher
a->vpmsumb(VR0, VR1, VR2); // code[12] -> vpmsumb
a->mfdscr(R0); // code[13] -> mfdscr
a->lxvd2x(VSR0, 0, R3_ARG1); // code[14] -> vsx
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
......@@ -530,8 +546,11 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= popcntw_m;
if (code[feature_cntr++]) features |= fcfids_m;
if (code[feature_cntr++]) features |= vand_m;
if (code[feature_cntr++]) features |= lqarx_m;
if (code[feature_cntr++]) features |= vcipher_m;
if (code[feature_cntr++]) features |= vpmsumb_m;
if (code[feature_cntr++]) features |= mfdscr_m;
if (code[feature_cntr++]) features |= vsx_m;
// Print the detection code.
if (PrintAssembly) {
......@@ -543,6 +562,69 @@ void VM_Version::determine_features() {
_features = features;
}
// Power 8: Configure Data Stream Control Register.
void VM_Version::config_dscr() {
assert(has_lqarx(), "Only execute on Power 8 or later!");
// 7 InstWords for each call (function descriptor + blr instruction).
const int code_size = (2+2*7)*BytesPerInstWord;
// Allocate space for the code.
ResourceMark rm;
CodeBuffer cb("config_dscr", code_size, 0);
MacroAssembler* a = new MacroAssembler(&cb);
// Emit code.
uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry();
uint32_t *code = (uint32_t *)a->pc();
a->mfdscr(R3);
a->blr();
void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry();
a->mtdscr(R3);
a->blr();
uint32_t *code_end = (uint32_t *)a->pc();
a->flush();
// Print the detection code.
if (PrintAssembly) {
ttyLocker ttyl;
tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code));
Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
}
// Apply the configuration if needed.
_dscr_val = (*get_dscr)();
if (Verbose) {
tty->print_cr("dscr value was 0x%lx" , _dscr_val);
}
bool change_requested = false;
if (DSCR_PPC64 != (uintx)-1) {
_dscr_val = DSCR_PPC64;
change_requested = true;
}
if (DSCR_DPFD_PPC64 <= 7) {
uint64_t mask = 0x7;
if ((_dscr_val & mask) != DSCR_DPFD_PPC64) {
_dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64);
change_requested = true;
}
}
if (DSCR_URG_PPC64 <= 7) {
uint64_t mask = 0x7 << 6;
if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
_dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
change_requested = true;
}
}
if (change_requested) {
(*set_dscr)(_dscr_val);
if (Verbose) {
tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
}
}
}
static int saved_features = 0;
......
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2018 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -42,8 +42,11 @@ protected:
fcfids,
vand,
dcba,
lqarx,
vcipher,
vpmsumb,
mfdscr,
vsx,
num_features // last entry to count features
};
enum Feature_Flag_Set {
......@@ -58,8 +61,11 @@ protected:
fcfids_m = (1 << fcfids ),
vand_m = (1 << vand ),
dcba_m = (1 << dcba ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
vpmsumb_m = (1 << vpmsumb),
mfdscr_m = (1 << mfdscr ),
vsx_m = (1 << vsx ),
all_features_m = -1
};
static int _features;
......@@ -69,6 +75,7 @@ protected:
static void print_features();
static void determine_features(); // also measures cache line size
static void config_dscr(); // Power 8: Configure Data Stream Control Register.
static void determine_section_size();
static void power6_micro_bench();
public:
......@@ -87,8 +94,11 @@ public:
static bool has_fcfids() { return (_features & fcfids_m) != 0; }
static bool has_vand() { return (_features & vand_m) != 0; }
static bool has_dcba() { return (_features & dcba_m) != 0; }
static bool has_lqarx() { return (_features & lqarx_m) != 0; }
static bool has_vcipher() { return (_features & vcipher_m) != 0; }
static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
static bool has_mfdscr() { return (_features & mfdscr_m) != 0; }
static bool has_vsx() { return (_features & vsx_m) != 0; }
static const char* cpu_features() { return _features_str; }
......@@ -97,6 +107,9 @@ public:
// Assembler testing
static void allow_all();
static void revert();
// POWER 8: DSCR current value.
static uint64_t _dscr_val;
};
#endif // CPU_PPC_VM_VM_VERSION_PPC_HPP
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册