diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp index 23d3457046d4615bf29c0bc00f071221a88b71e4..db82fb7e5aa4f83a3c9da67215ac29cfaf5c8435 100644 --- a/src/os/linux/vm/os_linux.cpp +++ b/src/os/linux/vm/os_linux.cpp @@ -2767,7 +2767,19 @@ void os::numa_make_global(char *addr, size_t bytes) { Linux::numa_interleave_memory(addr, bytes); } +// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the +// bind policy to MPOL_PREFERRED for the current thread. +#define USE_MPOL_PREFERRED 0 + void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { + // To make NUMA and large pages more robust when both enabled, we need to ease + // the requirements on where the memory should be allocated. MPOL_BIND is the + // default policy and it will force memory to be allocated on the specified + // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on + // the specified node, but will not force it. Using this policy will prevent + // getting SIGBUS when trying to allocate large pages on NUMA nodes with no + // free large pages. + Linux::numa_set_bind_policy(USE_MPOL_PREFERRED); Linux::numa_tonode_memory(addr, bytes, lgrp_hint); } @@ -2869,6 +2881,8 @@ bool os::Linux::libnuma_init() { libnuma_dlsym(handle, "numa_tonode_memory"))); set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, libnuma_dlsym(handle, "numa_interleave_memory"))); + set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t, + libnuma_dlsym(handle, "numa_set_bind_policy"))); if (numa_available() != -1) { @@ -2935,6 +2949,7 @@ os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; os::Linux::numa_available_func_t os::Linux::_numa_available; os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; +os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy; unsigned long* os::Linux::_numa_all_nodes; bool os::pd_uncommit_memory(char* addr, size_t size) { diff --git a/src/os/linux/vm/os_linux.hpp b/src/os/linux/vm/os_linux.hpp index 5fa2449931947f2f6befec34bcd2bf0c16f159fe..c824e8ad3fef216f7335fbdba0ae6a3647e5867f 100644 --- a/src/os/linux/vm/os_linux.hpp +++ b/src/os/linux/vm/os_linux.hpp @@ -235,6 +235,7 @@ private: typedef int (*numa_available_func_t)(void); typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); + typedef void (*numa_set_bind_policy_func_t)(int policy); static sched_getcpu_func_t _sched_getcpu; static numa_node_to_cpus_func_t _numa_node_to_cpus; @@ -242,6 +243,7 @@ private: static numa_available_func_t _numa_available; static numa_tonode_memory_func_t _numa_tonode_memory; static numa_interleave_memory_func_t _numa_interleave_memory; + static numa_set_bind_policy_func_t _numa_set_bind_policy; static unsigned long* _numa_all_nodes; static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } @@ -250,6 +252,7 @@ private: static void set_numa_available(numa_available_func_t func) { _numa_available = func; } static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; } + static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; } static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } static int sched_getcpu_syscall(void); public: @@ -267,6 +270,11 @@ public: _numa_interleave_memory(start, size, _numa_all_nodes); } } + static void numa_set_bind_policy(int policy) { + if (_numa_set_bind_policy != NULL) { + _numa_set_bind_policy(policy); + } + } static int get_node_by_cpu(int cpu_id); };