From 4321c49fb9851a99e76c5898f878994c94644b15 Mon Sep 17 00:00:00 2001 From: kvn Date: Tue, 29 Jun 2010 10:34:00 -0700 Subject: [PATCH] 6964774: Adjust optimization flags setting Summary: Adjust performance flags settings. Reviewed-by: never, phh --- src/cpu/x86/vm/vm_version_x86.cpp | 72 ++++++++++++++++++++++++++---- src/cpu/x86/vm/vm_version_x86.hpp | 43 ++++++++++++++++-- src/share/vm/runtime/arguments.cpp | 3 ++ 3 files changed, 106 insertions(+), 12 deletions(-) diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp index a00c88c3c..d6968a4f8 100644 --- a/src/cpu/x86/vm/vm_version_x86.cpp +++ b/src/cpu/x86/vm/vm_version_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ const char* VM_Version::_features_str = ""; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; static BufferBlob* stub_blob; -static const int stub_size = 300; +static const int stub_size = 400; extern "C" { typedef void (*getPsrInfo_stub_t)(void*); @@ -56,7 +56,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); - Label detect_486, cpu486, detect_586, std_cpuid1; + Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; Label ext_cpuid1, ext_cpuid5, done; StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); @@ -131,13 +131,62 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); - __ cmpl(rax, 3); // Is cpuid(0x4) supported? - __ jccb(Assembler::belowEqual, std_cpuid1); + __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? + __ jccb(Assembler::belowEqual, std_cpuid4); + + // + // cpuid(0xB) Processor Topology + // + __ movl(rax, 0xb); + __ xorl(rcx, rcx); // Threads level + __ cpuid(); + + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ movl(rax, 0xb); + __ movl(rcx, 1); // Cores level + __ cpuid(); + __ push(rax); + __ andl(rax, 0x1f); // Determine if valid topology level + __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level + __ andl(rax, 0xffff); + __ pop(rax); + __ jccb(Assembler::equal, std_cpuid4); + + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ movl(rax, 0xb); + __ movl(rcx, 2); // Packages level + __ cpuid(); + __ push(rax); + __ andl(rax, 0x1f); // Determine if valid topology level + __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level + __ andl(rax, 0xffff); + __ pop(rax); + __ jccb(Assembler::equal, std_cpuid4); + + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); // // cpuid(0x4) Deterministic cache params // + __ bind(std_cpuid4); __ movl(rax, 4); + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? + __ jccb(Assembler::greater, std_cpuid1); + __ xorl(rcx, rcx); // L1 cache __ cpuid(); __ push(rax); @@ -460,13 +509,18 @@ void VM_Version::get_processor_features() { AllocatePrefetchDistance = allocate_prefetch_distance(); AllocatePrefetchStyle = allocate_prefetch_style(); - if( AllocatePrefetchStyle == 2 && is_intel() && - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core + if( is_intel() && cpu_family() == 6 && supports_sse3() ) { + if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core #ifdef _LP64 - AllocatePrefetchDistance = 384; + AllocatePrefetchDistance = 384; #else - AllocatePrefetchDistance = 320; + AllocatePrefetchDistance = 320; #endif + } + if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus + AllocatePrefetchDistance = 192; + AllocatePrefetchLines = 4; + } } assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp index bbb360f9c..3a76612ac 100644 --- a/src/cpu/x86/vm/vm_version_x86.hpp +++ b/src/cpu/x86/vm/vm_version_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -114,6 +114,14 @@ public: } bits; }; + union TplCpuidBEbx { + uint32_t value; + struct { + uint32_t logical_cpus : 16, + : 16; + } bits; + }; + union ExtCpuid1Ecx { uint32_t value; struct { @@ -211,6 +219,25 @@ protected: uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently + // cpuid function 0xB (processor topology) + // ecx = 0 + uint32_t tpl_cpuidB0_eax; + TplCpuidBEbx tpl_cpuidB0_ebx; + uint32_t tpl_cpuidB0_ecx; // unused currently + uint32_t tpl_cpuidB0_edx; // unused currently + + // ecx = 1 + uint32_t tpl_cpuidB1_eax; + TplCpuidBEbx tpl_cpuidB1_ebx; + uint32_t tpl_cpuidB1_ecx; // unused currently + uint32_t tpl_cpuidB1_edx; // unused currently + + // ecx = 2 + uint32_t tpl_cpuidB2_eax; + TplCpuidBEbx tpl_cpuidB2_ebx; + uint32_t tpl_cpuidB2_ecx; // unused currently + uint32_t tpl_cpuidB2_edx; // unused currently + // cpuid function 0x80000000 // example, unused uint32_t ext_max_function; uint32_t ext_vendor_name_0; @@ -316,6 +343,9 @@ public: static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } + static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } + static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } + static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } // Initialization static void initialize(); @@ -349,7 +379,12 @@ public: static uint cores_per_cpu() { uint result = 1; if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); + if (_cpuid_info.std_max_function >= 0xB) { + result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / + _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; + } else { + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); + } } else if (is_amd()) { result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); } @@ -358,7 +393,9 @@ public: static uint threads_per_core() { uint result = 1; - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { + if (is_intel() && _cpuid_info.std_max_function >= 0xB) { + result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; + } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / cores_per_cpu(); } diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp index 4d2f6762e..03ff472e3 100644 --- a/src/share/vm/runtime/arguments.cpp +++ b/src/share/vm/runtime/arguments.cpp @@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() { if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); } + if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) { + FLAG_SET_DEFAULT(OptimizeStringConcat, true); + } #endif if (AggressiveOpts) { -- GitLab