提交 09d1c944 编写于 作者: K kevinw

8049717: expose L1_data_cache_line_size for diagnostic/sanity checks

Summary: Add support for VM_Version::L1_data_cache_line_size().
Reviewed-by: dsimms, kvn, dholmes
上级 87c03b2f
...@@ -259,6 +259,49 @@ void VM_Version::initialize() { ...@@ -259,6 +259,49 @@ void VM_Version::initialize() {
// buf is started with ", " or is empty // buf is started with ", " or is empty
_features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf); _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
// There are three 64-bit SPARC families that do not overlap, e.g.,
// both is_ultra3() and is_sparc64() cannot be true at the same time.
// Within these families, there can be more than one chip, e.g.,
// is_T4() and is_T7() machines are also is_niagara().
if (is_ultra3()) {
assert(_L1_data_cache_line_size == 0, "overlap with Ultra3 family");
// Ref: UltraSPARC III Cu Processor
_L1_data_cache_line_size = 64;
}
if (is_niagara()) {
assert(_L1_data_cache_line_size == 0, "overlap with niagara family");
// All Niagara's are sun4v's, but not all sun4v's are Niagaras, e.g.,
// Fujitsu SPARC64 is sun4v, but we don't want it in this block.
//
// Ref: UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
// Appendix F.1.3.1 Cacheable Accesses
// -> 16-byte L1 cache line size
//
// Ref: UltraSPARC T2: A Highly-Threaded, Power-Efficient, SPARC SOC
// Section III: SPARC Processor Core
// -> 16-byte L1 cache line size
//
// Ref: Oracle's SPARC T4-1, SPARC T4-2, SPARC T4-4, and SPARC T4-1B Server Architecture
// Section SPARC T4 Processor Cache Architecture
// -> 32-byte L1 cache line size (no longer see that info on this ref)
//
// XXX - still need a T7 reference here
//
if (is_T7()) { // T7 or newer
_L1_data_cache_line_size = 64;
} else if (is_T4()) { // T4 or newer (until T7)
_L1_data_cache_line_size = 32;
} else { // T1 or newer (until T4)
_L1_data_cache_line_size = 16;
}
}
if (is_sparc64()) {
guarantee(_L1_data_cache_line_size == 0, "overlap with SPARC64 family");
// Ref: Fujitsu SPARC64 VII Processor
// Section 4 Cache System
_L1_data_cache_line_size = 64;
}
// UseVIS is set to the smallest of what hardware supports and what // UseVIS is set to the smallest of what hardware supports and what
// the command line requires. I.e., you cannot set UseVIS to 3 on // the command line requires. I.e., you cannot set UseVIS to 3 on
// older UltraSparc which do not support it. // older UltraSparc which do not support it.
...@@ -364,6 +407,7 @@ void VM_Version::initialize() { ...@@ -364,6 +407,7 @@ void VM_Version::initialize() {
#ifndef PRODUCT #ifndef PRODUCT
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
tty->print("Allocation"); tty->print("Allocation");
if (AllocatePrefetchStyle <= 0) { if (AllocatePrefetchStyle <= 0) {
......
...@@ -406,6 +406,8 @@ void VM_Version::get_processor_features() { ...@@ -406,6 +406,8 @@ void VM_Version::get_processor_features() {
_stepping = 0; _stepping = 0;
_cpuFeatures = 0; _cpuFeatures = 0;
_logical_processors_per_package = 1; _logical_processors_per_package = 1;
// i486 internal cache is both I&D and has a 16-byte line size
_L1_data_cache_line_size = 16;
if (!Use486InstrsOnly) { if (!Use486InstrsOnly) {
// Get raw processor info // Get raw processor info
...@@ -424,6 +426,7 @@ void VM_Version::get_processor_features() { ...@@ -424,6 +426,7 @@ void VM_Version::get_processor_features() {
// Logical processors are only available on P4s and above, // Logical processors are only available on P4s and above,
// and only if hyperthreading is available. // and only if hyperthreading is available.
_logical_processors_per_package = logical_processor_count(); _logical_processors_per_package = logical_processor_count();
_L1_data_cache_line_size = L1_line_size();
} }
} }
...@@ -1034,6 +1037,7 @@ void VM_Version::get_processor_features() { ...@@ -1034,6 +1037,7 @@ void VM_Version::get_processor_features() {
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u", tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package()); logical_processors_per_package());
tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
tty->print("UseSSE=%d", (int) UseSSE); tty->print("UseSSE=%d", (int) UseSSE);
if (UseAVX > 0) { if (UseAVX > 0) {
tty->print(" UseAVX=%d", (int) UseAVX); tty->print(" UseAVX=%d", (int) UseAVX);
......
/* /*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -595,7 +595,7 @@ public: ...@@ -595,7 +595,7 @@ public:
return (result == 0 ? 1 : result); return (result == 0 ? 1 : result);
} }
static intx prefetch_data_size() { static intx L1_line_size() {
intx result = 0; intx result = 0;
if (is_intel()) { if (is_intel()) {
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
...@@ -607,6 +607,10 @@ public: ...@@ -607,6 +607,10 @@ public:
return result; return result;
} }
static intx prefetch_data_size() {
return L1_line_size();
}
// //
// Feature identification // Feature identification
// //
......
...@@ -5129,6 +5129,7 @@ void execute_internal_vm_tests() { ...@@ -5129,6 +5129,7 @@ void execute_internal_vm_tests() {
run_unit_test(TestKlass_test()); run_unit_test(TestKlass_test());
run_unit_test(Test_linked_list()); run_unit_test(Test_linked_list());
run_unit_test(TestChunkedList_test()); run_unit_test(TestChunkedList_test());
run_unit_test(ObjectMonitor::sanity_checks());
#if INCLUDE_VM_STRUCTS #if INCLUDE_VM_STRUCTS
run_unit_test(VMStructs::test()); run_unit_test(VMStructs::test());
#endif #endif
......
...@@ -2529,6 +2529,10 @@ void ObjectMonitor::DeferredInitialize () { ...@@ -2529,6 +2529,10 @@ void ObjectMonitor::DeferredInitialize () {
SETKNOB(FastHSSEC) ; SETKNOB(FastHSSEC) ;
#undef SETKNOB #undef SETKNOB
if (Knob_Verbose) {
sanity_checks();
}
if (os::is_MP()) { if (os::is_MP()) {
BackOffMask = (1 << Knob_SpinBackOff) - 1 ; BackOffMask = (1 << Knob_SpinBackOff) - 1 ;
if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ; if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ;
...@@ -2549,6 +2553,66 @@ void ObjectMonitor::DeferredInitialize () { ...@@ -2549,6 +2553,66 @@ void ObjectMonitor::DeferredInitialize () {
InitDone = 1 ; InitDone = 1 ;
} }
void ObjectMonitor::sanity_checks() {
int error_cnt = 0;
int warning_cnt = 0;
bool verbose = Knob_Verbose != 0 NOT_PRODUCT(|| VerboseInternalVMTests);
if (verbose) {
tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT,
sizeof(ObjectMonitor));
}
uint cache_line_size = VM_Version::L1_data_cache_line_size();
if (verbose) {
tty->print_cr("INFO: L1_data_cache_line_size=%u", cache_line_size);
}
ObjectMonitor dummy;
u_char *addr_begin = (u_char*)&dummy;
u_char *addr_header = (u_char*)&dummy._header;
u_char *addr_owner = (u_char*)&dummy._owner;
uint offset_header = (uint)(addr_header - addr_begin);
if (verbose) tty->print_cr("INFO: offset(_header)=%u", offset_header);
uint offset_owner = (uint)(addr_owner - addr_begin);
if (verbose) tty->print_cr("INFO: offset(_owner)=%u", offset_owner);
if ((uint)(addr_header - addr_begin) != 0) {
tty->print_cr("ERROR: offset(_header) must be zero (0).");
error_cnt++;
}
if (cache_line_size != 0) {
// We were able to determine the L1 data cache line size so
// do some cache line specific sanity checks
if ((offset_owner - offset_header) < cache_line_size) {
tty->print_cr("WARNING: the _header and _owner fields are closer "
"than a cache line which permits false sharing.");
warning_cnt++;
}
if ((sizeof(ObjectMonitor) % cache_line_size) != 0) {
tty->print_cr("WARNING: ObjectMonitor size is not a multiple of "
"a cache line which permits false sharing.");
warning_cnt++;
}
}
ObjectSynchronizer::sanity_checks(verbose, cache_line_size, &error_cnt,
&warning_cnt);
if (verbose || error_cnt != 0 || warning_cnt != 0) {
tty->print_cr("INFO: error_cnt=%d", error_cnt);
tty->print_cr("INFO: warning_cnt=%d", warning_cnt);
}
guarantee(error_cnt == 0,
"Fatal error(s) found in ObjectMonitor::sanity_checks()");
}
#ifndef PRODUCT #ifndef PRODUCT
void ObjectMonitor::verify() { void ObjectMonitor::verify() {
} }
......
...@@ -189,6 +189,8 @@ public: ...@@ -189,6 +189,8 @@ public:
bool check(TRAPS); // true if the thread owns the monitor. bool check(TRAPS); // true if the thread owns the monitor.
void check_slow(TRAPS); void check_slow(TRAPS);
void clear(); void clear();
static void sanity_checks(); // public for -XX:+ExecuteInternalVMTests
// in PRODUCT for -XX:SyncKnobs=Verbose=1
#ifndef PRODUCT #ifndef PRODUCT
void verify(); void verify();
void print(); void print();
...@@ -234,8 +236,6 @@ public: ...@@ -234,8 +236,6 @@ public:
// WARNING: this must be the very first word of ObjectMonitor // WARNING: this must be the very first word of ObjectMonitor
// This means this class can't use any virtual member functions. // This means this class can't use any virtual member functions.
// TODO-FIXME: assert that offsetof(_header) is 0 or get rid of the
// implicit 0 offset in emitted code.
volatile markOop _header; // displaced object header word - mark volatile markOop _header; // displaced object header word - mark
void* volatile _object; // backward object pointer - strong root void* volatile _object; // backward object pointer - strong root
......
...@@ -437,19 +437,22 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) { ...@@ -437,19 +437,22 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
// Hash Code handling // Hash Code handling
// //
// Performance concern: // Performance concern:
// OrderAccess::storestore() calls release() which STs 0 into the global volatile // OrderAccess::storestore() calls release() which at one time stored 0
// OrderAccess::Dummy variable. This store is unnecessary for correctness. // into the global volatile OrderAccess::dummy variable. This store was
// Many threads STing into a common location causes considerable cache migration // unnecessary for correctness. Many threads storing into a common location
// or "sloshing" on large SMP system. As such, I avoid using OrderAccess::storestore() // causes considerable cache migration or "sloshing" on large SMP systems.
// until it's repaired. In some cases OrderAccess::fence() -- which incurs local // As such, I avoided using OrderAccess::storestore(). In some cases
// latency on the executing processor -- is a better choice as it scales on SMP // OrderAccess::fence() -- which incurs local latency on the executing
// systems. See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a // processor -- is a better choice as it scales on SMP systems.
// discussion of coherency costs. Note that all our current reference platforms //
// provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC. // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
// a discussion of coherency costs. Note that all our current reference
// platforms provide strong ST-ST order, so the issue is moot on IA32,
// x64, and SPARC.
// //
// As a general policy we use "volatile" to control compiler-based reordering // As a general policy we use "volatile" to control compiler-based reordering
// and explicit fences (barriers) to control for architectural reordering performed // and explicit fences (barriers) to control for architectural reordering
// by the CPU(s) or platform. // performed by the CPU(s) or platform.
struct SharedGlobals { struct SharedGlobals {
// These are highly shared mostly-read variables. // These are highly shared mostly-read variables.
...@@ -1636,7 +1639,55 @@ void ObjectSynchronizer::release_monitors_owned_by_thread(TRAPS) { ...@@ -1636,7 +1639,55 @@ void ObjectSynchronizer::release_monitors_owned_by_thread(TRAPS) {
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Non-product code // Debugging code
void ObjectSynchronizer::sanity_checks(const bool verbose,
const uint cache_line_size,
int *error_cnt_ptr,
int *warning_cnt_ptr) {
u_char *addr_begin = (u_char*)&GVars;
u_char *addr_stwRandom = (u_char*)&GVars.stwRandom;
u_char *addr_hcSequence = (u_char*)&GVars.hcSequence;
if (verbose) {
tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT,
sizeof(SharedGlobals));
}
uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin);
if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom);
uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin);
if (verbose) {
tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence);
}
if (cache_line_size != 0) {
// We were able to determine the L1 data cache line size so
// do some cache line specific sanity checks
if (offset_stwRandom < cache_line_size) {
tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer "
"to the struct beginning than a cache line which permits "
"false sharing.");
(*warning_cnt_ptr)++;
}
if ((offset_hcSequence - offset_stwRandom) < cache_line_size) {
tty->print_cr("WARNING: the SharedGlobals.stwRandom and "
"SharedGlobals.hcSequence fields are closer than a cache "
"line which permits false sharing.");
(*warning_cnt_ptr)++;
}
if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) {
tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer "
"to the struct end than a cache line which permits false "
"sharing.");
(*warning_cnt_ptr)++;
}
}
}
#ifndef PRODUCT #ifndef PRODUCT
......
...@@ -121,6 +121,9 @@ class ObjectSynchronizer : AllStatic { ...@@ -121,6 +121,9 @@ class ObjectSynchronizer : AllStatic {
static void oops_do(OopClosure* f); static void oops_do(OopClosure* f);
// debugging // debugging
static void sanity_checks(const bool verbose,
const unsigned int cache_line_size,
int *error_cnt_ptr, int *warning_cnt_ptr);
static void verify() PRODUCT_RETURN; static void verify() PRODUCT_RETURN;
static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0;
......
...@@ -50,6 +50,7 @@ bool Abstract_VM_Version::_supports_atomic_getset8 = false; ...@@ -50,6 +50,7 @@ bool Abstract_VM_Version::_supports_atomic_getset8 = false;
bool Abstract_VM_Version::_supports_atomic_getadd4 = false; bool Abstract_VM_Version::_supports_atomic_getadd4 = false;
bool Abstract_VM_Version::_supports_atomic_getadd8 = false; bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U; unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0; int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
#ifndef HOTSPOT_RELEASE_VERSION #ifndef HOTSPOT_RELEASE_VERSION
......
/* /*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -42,6 +42,7 @@ class Abstract_VM_Version: AllStatic { ...@@ -42,6 +42,7 @@ class Abstract_VM_Version: AllStatic {
static bool _supports_atomic_getadd4; static bool _supports_atomic_getadd4;
static bool _supports_atomic_getadd8; static bool _supports_atomic_getadd8;
static unsigned int _logical_processors_per_package; static unsigned int _logical_processors_per_package;
static unsigned int _L1_data_cache_line_size;
static int _vm_major_version; static int _vm_major_version;
static int _vm_minor_version; static int _vm_minor_version;
static int _vm_build_number; static int _vm_build_number;
...@@ -114,6 +115,10 @@ class Abstract_VM_Version: AllStatic { ...@@ -114,6 +115,10 @@ class Abstract_VM_Version: AllStatic {
return _logical_processors_per_package; return _logical_processors_per_package;
} }
static unsigned int L1_data_cache_line_size() {
return _L1_data_cache_line_size;
}
// Need a space at the end of TLAB for prefetch instructions // Need a space at the end of TLAB for prefetch instructions
// which may fault when accessing memory outside of heap. // which may fault when accessing memory outside of heap.
static int reserve_for_allocation_prefetch() { static int reserve_for_allocation_prefetch() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册