提交 4321c49f 编写于 作者: K kvn

6964774: Adjust optimization flags setting

Summary: Adjust performance flags settings.
Reviewed-by: never, phh
上级 d0db2eff
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -34,7 +34,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob;
static const int stub_size = 300;
static const int stub_size = 400;
extern "C" {
typedef void (*getPsrInfo_stub_t)(void*);
......@@ -56,7 +56,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1;
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label ext_cpuid1, ext_cpuid5, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
......@@ -131,13 +131,62 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
__ cmpl(rax, 3); // Is cpuid(0x4) supported?
__ jccb(Assembler::belowEqual, std_cpuid1);
__ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
__ jccb(Assembler::belowEqual, std_cpuid4);
//
// cpuid(0xB) Processor Topology
//
__ movl(rax, 0xb);
__ xorl(rcx, rcx); // Threads level
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
__ movl(rax, 0xb);
__ movl(rcx, 1); // Cores level
__ cpuid();
__ push(rax);
__ andl(rax, 0x1f); // Determine if valid topology level
__ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
__ andl(rax, 0xffff);
__ pop(rax);
__ jccb(Assembler::equal, std_cpuid4);
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
__ movl(rax, 0xb);
__ movl(rcx, 2); // Packages level
__ cpuid();
__ push(rax);
__ andl(rax, 0x1f); // Determine if valid topology level
__ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
__ andl(rax, 0xffff);
__ pop(rax);
__ jccb(Assembler::equal, std_cpuid4);
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
//
// cpuid(0x4) Deterministic cache params
//
__ bind(std_cpuid4);
__ movl(rax, 4);
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
__ jccb(Assembler::greater, std_cpuid1);
__ xorl(rcx, rcx); // L1 cache
__ cpuid();
__ push(rax);
......@@ -460,13 +509,18 @@ void VM_Version::get_processor_features() {
AllocatePrefetchDistance = allocate_prefetch_distance();
AllocatePrefetchStyle = allocate_prefetch_style();
if( AllocatePrefetchStyle == 2 && is_intel() &&
cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
#ifdef _LP64
AllocatePrefetchDistance = 384;
AllocatePrefetchDistance = 384;
#else
AllocatePrefetchDistance = 320;
AllocatePrefetchDistance = 320;
#endif
}
if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
AllocatePrefetchDistance = 192;
AllocatePrefetchLines = 4;
}
}
assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
......
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -114,6 +114,14 @@ public:
} bits;
};
union TplCpuidBEbx {
uint32_t value;
struct {
uint32_t logical_cpus : 16,
: 16;
} bits;
};
union ExtCpuid1Ecx {
uint32_t value;
struct {
......@@ -211,6 +219,25 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 0xB (processor topology)
// ecx = 0
uint32_t tpl_cpuidB0_eax;
TplCpuidBEbx tpl_cpuidB0_ebx;
uint32_t tpl_cpuidB0_ecx; // unused currently
uint32_t tpl_cpuidB0_edx; // unused currently
// ecx = 1
uint32_t tpl_cpuidB1_eax;
TplCpuidBEbx tpl_cpuidB1_ebx;
uint32_t tpl_cpuidB1_ecx; // unused currently
uint32_t tpl_cpuidB1_edx; // unused currently
// ecx = 2
uint32_t tpl_cpuidB2_eax;
TplCpuidBEbx tpl_cpuidB2_ebx;
uint32_t tpl_cpuidB2_ecx; // unused currently
uint32_t tpl_cpuidB2_edx; // unused currently
// cpuid function 0x80000000 // example, unused
uint32_t ext_max_function;
uint32_t ext_vendor_name_0;
......@@ -316,6 +343,9 @@ public:
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
// Initialization
static void initialize();
......@@ -349,7 +379,12 @@ public:
static uint cores_per_cpu() {
uint result = 1;
if (is_intel()) {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
if (_cpuid_info.std_max_function >= 0xB) {
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
}
} else if (is_amd()) {
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
}
......@@ -358,7 +393,9 @@ public:
static uint threads_per_core() {
uint result = 1;
if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
if (is_intel() && _cpuid_info.std_max_function >= 0xB) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
}
......
......@@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() {
if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
}
if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
FLAG_SET_DEFAULT(OptimizeStringConcat, true);
}
#endif
if (AggressiveOpts) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册