提交 4321c49f 编写于 作者: K kvn

6964774: Adjust optimization flags setting

Summary: Adjust performance flags settings.
Reviewed-by: never, phh
上级 d0db2eff
/* /*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -34,7 +34,7 @@ const char* VM_Version::_features_str = ""; ...@@ -34,7 +34,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob; static BufferBlob* stub_blob;
static const int stub_size = 300; static const int stub_size = 400;
extern "C" { extern "C" {
typedef void (*getPsrInfo_stub_t)(void*); typedef void (*getPsrInfo_stub_t)(void*);
...@@ -56,7 +56,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -56,7 +56,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1; Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label ext_cpuid1, ext_cpuid5, done; Label ext_cpuid1, ext_cpuid5, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
...@@ -131,13 +131,62 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -131,13 +131,62 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx); __ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx); __ movl(Address(rsi,12), rdx);
__ cmpl(rax, 3); // Is cpuid(0x4) supported? __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
__ jccb(Assembler::belowEqual, std_cpuid1); __ jccb(Assembler::belowEqual, std_cpuid4);
//
// cpuid(0xB) Processor Topology
//
__ movl(rax, 0xb);
__ xorl(rcx, rcx); // Threads level
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
__ movl(rax, 0xb);
__ movl(rcx, 1); // Cores level
__ cpuid();
__ push(rax);
__ andl(rax, 0x1f); // Determine if valid topology level
__ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
__ andl(rax, 0xffff);
__ pop(rax);
__ jccb(Assembler::equal, std_cpuid4);
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
__ movl(rax, 0xb);
__ movl(rcx, 2); // Packages level
__ cpuid();
__ push(rax);
__ andl(rax, 0x1f); // Determine if valid topology level
__ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
__ andl(rax, 0xffff);
__ pop(rax);
__ jccb(Assembler::equal, std_cpuid4);
__ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
// //
// cpuid(0x4) Deterministic cache params // cpuid(0x4) Deterministic cache params
// //
__ bind(std_cpuid4);
__ movl(rax, 4); __ movl(rax, 4);
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
__ jccb(Assembler::greater, std_cpuid1);
__ xorl(rcx, rcx); // L1 cache __ xorl(rcx, rcx); // L1 cache
__ cpuid(); __ cpuid();
__ push(rax); __ push(rax);
...@@ -460,13 +509,18 @@ void VM_Version::get_processor_features() { ...@@ -460,13 +509,18 @@ void VM_Version::get_processor_features() {
AllocatePrefetchDistance = allocate_prefetch_distance(); AllocatePrefetchDistance = allocate_prefetch_distance();
AllocatePrefetchStyle = allocate_prefetch_style(); AllocatePrefetchStyle = allocate_prefetch_style();
if( AllocatePrefetchStyle == 2 && is_intel() && if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
#ifdef _LP64 #ifdef _LP64
AllocatePrefetchDistance = 384; AllocatePrefetchDistance = 384;
#else #else
AllocatePrefetchDistance = 320; AllocatePrefetchDistance = 320;
#endif #endif
}
if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
AllocatePrefetchDistance = 192;
AllocatePrefetchLines = 4;
}
} }
assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
......
/* /*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -114,6 +114,14 @@ public: ...@@ -114,6 +114,14 @@ public:
} bits; } bits;
}; };
union TplCpuidBEbx {
uint32_t value;
struct {
uint32_t logical_cpus : 16,
: 16;
} bits;
};
union ExtCpuid1Ecx { union ExtCpuid1Ecx {
uint32_t value; uint32_t value;
struct { struct {
...@@ -211,6 +219,25 @@ protected: ...@@ -211,6 +219,25 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 0xB (processor topology)
// ecx = 0
uint32_t tpl_cpuidB0_eax;
TplCpuidBEbx tpl_cpuidB0_ebx;
uint32_t tpl_cpuidB0_ecx; // unused currently
uint32_t tpl_cpuidB0_edx; // unused currently
// ecx = 1
uint32_t tpl_cpuidB1_eax;
TplCpuidBEbx tpl_cpuidB1_ebx;
uint32_t tpl_cpuidB1_ecx; // unused currently
uint32_t tpl_cpuidB1_edx; // unused currently
// ecx = 2
uint32_t tpl_cpuidB2_eax;
TplCpuidBEbx tpl_cpuidB2_ebx;
uint32_t tpl_cpuidB2_ecx; // unused currently
uint32_t tpl_cpuidB2_edx; // unused currently
// cpuid function 0x80000000 // example, unused // cpuid function 0x80000000 // example, unused
uint32_t ext_max_function; uint32_t ext_max_function;
uint32_t ext_vendor_name_0; uint32_t ext_vendor_name_0;
...@@ -316,6 +343,9 @@ public: ...@@ -316,6 +343,9 @@ public:
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
// Initialization // Initialization
static void initialize(); static void initialize();
...@@ -349,7 +379,12 @@ public: ...@@ -349,7 +379,12 @@ public:
static uint cores_per_cpu() { static uint cores_per_cpu() {
uint result = 1; uint result = 1;
if (is_intel()) { if (is_intel()) {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); if (_cpuid_info.std_max_function >= 0xB) {
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
}
} else if (is_amd()) { } else if (is_amd()) {
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
} }
...@@ -358,7 +393,9 @@ public: ...@@ -358,7 +393,9 @@ public:
static uint threads_per_core() { static uint threads_per_core() {
uint result = 1; uint result = 1;
if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { if (is_intel() && _cpuid_info.std_max_function >= 0xB) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu(); cores_per_cpu();
} }
......
...@@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() { ...@@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() {
if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
} }
if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
FLAG_SET_DEFAULT(OptimizeStringConcat, true);
}
#endif #endif
if (AggressiveOpts) { if (AggressiveOpts) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册