提交 b0ad9b42 编写于 作者: I iveresov

6779436: NUMA allocator: libnuma expects certain size of the buffer in numa_node_to_cpus()

Summary: In os::Linux::rebuild_cpu_to_node_map() fix the size of the CPU bitmap. Fixed arithmetic in MutableNUMASpace::adaptive_chunk_size() that could cause overflows and underflows of the chunk_size variable.
Reviewed-by: apetrusenko
上级 c2c7191b
...@@ -2272,7 +2272,9 @@ void os::free_memory(char *addr, size_t bytes) { ...@@ -2272,7 +2272,9 @@ void os::free_memory(char *addr, size_t bytes) {
uncommit_memory(addr, bytes); uncommit_memory(addr, bytes);
} }
void os::numa_make_global(char *addr, size_t bytes) { } void os::numa_make_global(char *addr, size_t bytes) {
Linux::numa_interleave_memory(addr, bytes);
}
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
Linux::numa_tonode_memory(addr, bytes, lgrp_hint); Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
...@@ -2314,7 +2316,7 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info ...@@ -2314,7 +2316,7 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info
extern "C" void numa_warn(int number, char *where, ...) { } extern "C" void numa_warn(int number, char *where, ...) { }
extern "C" void numa_error(char *where) { } extern "C" void numa_error(char *where) { }
void os::Linux::libnuma_init() { bool os::Linux::libnuma_init() {
// sched_getcpu() should be in libc. // sched_getcpu() should be in libc.
set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
dlsym(RTLD_DEFAULT, "sched_getcpu"))); dlsym(RTLD_DEFAULT, "sched_getcpu")));
...@@ -2330,31 +2332,51 @@ void os::Linux::libnuma_init() { ...@@ -2330,31 +2332,51 @@ void os::Linux::libnuma_init() {
dlsym(handle, "numa_available"))); dlsym(handle, "numa_available")));
set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
dlsym(handle, "numa_tonode_memory"))); dlsym(handle, "numa_tonode_memory")));
set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
dlsym(handle, "numa_interleave_memory")));
if (numa_available() != -1) { if (numa_available() != -1) {
set_numa_all_nodes((unsigned long*)dlsym(handle, "numa_all_nodes"));
// Create a cpu -> node mapping // Create a cpu -> node mapping
_cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true); _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
rebuild_cpu_to_node_map(); rebuild_cpu_to_node_map();
return true;
} }
} }
} }
return false;
} }
// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
// The table is later used in get_node_by_cpu(). // The table is later used in get_node_by_cpu().
void os::Linux::rebuild_cpu_to_node_map() { void os::Linux::rebuild_cpu_to_node_map() {
int cpu_num = os::active_processor_count(); const size_t NCPUS = 32768; // Since the buffer size computation is very obscure
// in libnuma (possible values are starting from 16,
// and continuing up with every other power of 2, but less
// than the maximum number of CPUs supported by kernel), and
// is a subject to change (in libnuma version 2 the requirements
// are more reasonable) we'll just hardcode the number they use
// in the library.
const size_t BitsPerCLong = sizeof(long) * CHAR_BIT;
size_t cpu_num = os::active_processor_count();
size_t cpu_map_size = NCPUS / BitsPerCLong;
size_t cpu_map_valid_size =
MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size);
cpu_to_node()->clear(); cpu_to_node()->clear();
cpu_to_node()->at_grow(cpu_num - 1); cpu_to_node()->at_grow(cpu_num - 1);
int node_num = numa_get_groups_num(); size_t node_num = numa_get_groups_num();
int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size); unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
for (int i = 0; i < node_num; i++) { for (size_t i = 0; i < node_num; i++) {
if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
for (int j = 0; j < cpu_map_size; j++) { for (size_t j = 0; j < cpu_map_valid_size; j++) {
if (cpu_map[j] != 0) { if (cpu_map[j] != 0) {
for (int k = 0; k < BitsPerLong; k++) { for (size_t k = 0; k < BitsPerCLong; k++) {
if (cpu_map[j] & (1UL << k)) { if (cpu_map[j] & (1UL << k)) {
cpu_to_node()->at_put(j * BitsPerLong + k, i); cpu_to_node()->at_put(j * BitsPerCLong + k, i);
} }
} }
} }
...@@ -2377,7 +2399,8 @@ os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; ...@@ -2377,7 +2399,8 @@ os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
os::Linux::numa_available_func_t os::Linux::_numa_available; os::Linux::numa_available_func_t os::Linux::_numa_available;
os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
unsigned long* os::Linux::_numa_all_nodes;
bool os::uncommit_memory(char* addr, size_t size) { bool os::uncommit_memory(char* addr, size_t size) {
return ::mmap(addr, size, return ::mmap(addr, size,
...@@ -3695,7 +3718,17 @@ jint os::init_2(void) ...@@ -3695,7 +3718,17 @@ jint os::init_2(void)
} }
if (UseNUMA) { if (UseNUMA) {
Linux::libnuma_init(); if (!Linux::libnuma_init()) {
UseNUMA = false;
} else {
if ((Linux::numa_max_node() < 1)) {
// There's only one node(they start from 0), disable NUMA.
UseNUMA = false;
}
}
if (!UseNUMA && ForceNUMA) {
UseNUMA = true;
}
} }
if (MaxFDLimit) { if (MaxFDLimit) {
......
...@@ -146,7 +146,7 @@ class Linux { ...@@ -146,7 +146,7 @@ class Linux {
static bool is_floating_stack() { return _is_floating_stack; } static bool is_floating_stack() { return _is_floating_stack; }
static void libpthread_init(); static void libpthread_init();
static void libnuma_init(); static bool libnuma_init();
// Minimum stack size a thread can be created with (allowing // Minimum stack size a thread can be created with (allowing
// the VM to completely create the thread and enter user code) // the VM to completely create the thread and enter user code)
...@@ -240,20 +240,23 @@ private: ...@@ -240,20 +240,23 @@ private:
typedef int (*numa_max_node_func_t)(void); typedef int (*numa_max_node_func_t)(void);
typedef int (*numa_available_func_t)(void); typedef int (*numa_available_func_t)(void);
typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
static sched_getcpu_func_t _sched_getcpu; static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus; static numa_node_to_cpus_func_t _numa_node_to_cpus;
static numa_max_node_func_t _numa_max_node; static numa_max_node_func_t _numa_max_node;
static numa_available_func_t _numa_available; static numa_available_func_t _numa_available;
static numa_tonode_memory_func_t _numa_tonode_memory; static numa_tonode_memory_func_t _numa_tonode_memory;
static numa_interleave_memory_func_t _numa_interleave_memory;
static unsigned long* _numa_all_nodes;
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; } static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
static void set_numa_available(numa_available_func_t func) { _numa_available = func; } static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
public: public:
static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) { static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
...@@ -264,6 +267,11 @@ public: ...@@ -264,6 +267,11 @@ public:
static int numa_tonode_memory(void *start, size_t size, int node) { static int numa_tonode_memory(void *start, size_t size, int node) {
return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
} }
static void numa_interleave_memory(void *start, size_t size) {
if (_numa_interleave_memory != NULL && _numa_all_nodes != NULL) {
_numa_interleave_memory(start, size, _numa_all_nodes);
}
}
static int get_node_by_cpu(int cpu_id); static int get_node_by_cpu(int cpu_id);
}; };
......
...@@ -4638,7 +4638,7 @@ void os::Solaris::synchronization_init() { ...@@ -4638,7 +4638,7 @@ void os::Solaris::synchronization_init() {
} }
} }
void os::Solaris::liblgrp_init() { bool os::Solaris::liblgrp_init() {
void *handle = dlopen("liblgrp.so.1", RTLD_LAZY); void *handle = dlopen("liblgrp.so.1", RTLD_LAZY);
if (handle != NULL) { if (handle != NULL) {
os::Solaris::set_lgrp_home(CAST_TO_FN_PTR(lgrp_home_func_t, dlsym(handle, "lgrp_home"))); os::Solaris::set_lgrp_home(CAST_TO_FN_PTR(lgrp_home_func_t, dlsym(handle, "lgrp_home")));
...@@ -4653,9 +4653,9 @@ void os::Solaris::liblgrp_init() { ...@@ -4653,9 +4653,9 @@ void os::Solaris::liblgrp_init() {
lgrp_cookie_t c = lgrp_init(LGRP_VIEW_CALLER); lgrp_cookie_t c = lgrp_init(LGRP_VIEW_CALLER);
set_lgrp_cookie(c); set_lgrp_cookie(c);
} else { return true;
warning("your OS does not support NUMA");
} }
return false;
} }
void os::Solaris::misc_sym_init() { void os::Solaris::misc_sym_init() {
...@@ -4824,9 +4824,25 @@ jint os::init_2(void) { ...@@ -4824,9 +4824,25 @@ jint os::init_2(void) {
vm_page_size())); vm_page_size()));
Solaris::libthread_init(); Solaris::libthread_init();
if (UseNUMA) { if (UseNUMA) {
Solaris::liblgrp_init(); if (!Solaris::liblgrp_init()) {
UseNUMA = false;
} else {
size_t lgrp_limit = os::numa_get_groups_num();
int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit);
size_t lgrp_num = os::numa_get_leaf_groups(lgrp_ids, lgrp_limit);
FREE_C_HEAP_ARRAY(int, lgrp_ids);
if (lgrp_num < 2) {
// There's only one locality group, disable NUMA.
UseNUMA = false;
}
}
if (!UseNUMA && ForceNUMA) {
UseNUMA = true;
}
} }
Solaris::misc_sym_init(); Solaris::misc_sym_init();
Solaris::signal_sets_init(); Solaris::signal_sets_init();
Solaris::init_signal_mem(); Solaris::init_signal_mem();
......
...@@ -176,7 +176,7 @@ class Solaris { ...@@ -176,7 +176,7 @@ class Solaris {
public: public:
static void libthread_init(); static void libthread_init();
static void synchronization_init(); static void synchronization_init();
static void liblgrp_init(); static bool liblgrp_init();
// Load miscellaneous symbols. // Load miscellaneous symbols.
static void misc_sym_init(); static void misc_sym_init();
// This boolean allows users to forward their own non-matching signals // This boolean allows users to forward their own non-matching signals
......
...@@ -3353,6 +3353,10 @@ jint os::init_2(void) { ...@@ -3353,6 +3353,10 @@ jint os::init_2(void) {
// initialize thread priority policy // initialize thread priority policy
prio_init(); prio_init();
if (UseNUMA && !ForceNUMA) {
UseNUMA = false; // Currently unsupported.
}
return JNI_OK; return JNI_OK;
} }
......
...@@ -414,9 +414,20 @@ size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) { ...@@ -414,9 +414,20 @@ size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
if (limit > 0) { if (limit > 0) {
limit = round_down(limit, page_size()); limit = round_down(limit, page_size());
if (chunk_size > current_chunk_size(i)) { if (chunk_size > current_chunk_size(i)) {
chunk_size = MIN2((off_t)chunk_size, (off_t)current_chunk_size(i) + (off_t)limit); size_t upper_bound = pages_available * page_size();
if (upper_bound > limit &&
current_chunk_size(i) < upper_bound - limit) {
// The resulting upper bound should not exceed the available
// amount of memory (pages_available * page_size()).
upper_bound = current_chunk_size(i) + limit;
}
chunk_size = MIN2(chunk_size, upper_bound);
} else { } else {
chunk_size = MAX2((off_t)chunk_size, (off_t)current_chunk_size(i) - (off_t)limit); size_t lower_bound = page_size();
if (current_chunk_size(i) > limit) { // lower_bound shouldn't underflow.
lower_bound = current_chunk_size(i) - limit;
}
chunk_size = MAX2(chunk_size, lower_bound);
} }
} }
assert(chunk_size <= pages_available * page_size(), "Chunk size out of range"); assert(chunk_size <= pages_available * page_size(), "Chunk size out of range");
......
...@@ -342,6 +342,9 @@ class CommandLineFlags { ...@@ -342,6 +342,9 @@ class CommandLineFlags {
product(bool, UseNUMA, false, \ product(bool, UseNUMA, false, \
"Use NUMA if available") \ "Use NUMA if available") \
\ \
product(bool, ForceNUMA, false, \
"Force NUMA optimizations on single-node/UMA systems") \
\
product(intx, NUMAChunkResizeWeight, 20, \ product(intx, NUMAChunkResizeWeight, 20, \
"Percentage (0-100) used to weight the current sample when " \ "Percentage (0-100) used to weight the current sample when " \
"computing exponentially decaying average for " \ "computing exponentially decaying average for " \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册