提交 9748092a 编写于 作者: E Eugene Brevdo 提交者: TensorFlower Gardener

[TF port] Add port::GetCurrentCPU and port::NumTotalCPUs.

GetCurrentCPU: returns the current CPU of the calling thread.
NumTotalCPUs: attempts to get the total number of physical cores on the system

When both return non-failing values, we expect 0 <= GetCurrentCPU < NumTotalCPUs.

PiperOrigin-RevId: 225088316
上级 2087bffc
......@@ -32,9 +32,22 @@ namespace port {
// Returns an estimate of the number of schedulable CPUs for this
// process. Usually, it's constant throughout the lifetime of a
// process, but it might change if the underlying cluster management
// software can change it dynamically.
// software can change it dynamically. If the underlying call fails, a default
// value (e.g. `4`) may be returned.
int NumSchedulableCPUs();
// Returns the total number of CPUs on the system. This number should
// not change even if the underlying cluster management software may
// change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the
// underlying call fails, an invalid value of -1 will be returned;
// the user must check for validity.
static constexpr int kUnknownCPU = -1;
int NumTotalCPUs();
// Returns the id of the current CPU. Returns -1 if the current CPU cannot be
// identified. If successful, the return value will be in [0, NumTotalCPUs()).
int GetCurrentCPU();
// Returns an estimate of the number of hyperthreads per physical core
// on the CPU
int NumHyperthreadsPerCore();
......
......@@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) {
}
}
TEST(Port, GetCurrentCPU) {
const int cpu = GetCurrentCPU();
EXPECT_GE(cpu, 0);
EXPECT_LT(cpu, NumTotalCPUs());
}
TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
mutex m;
mutex_lock l(m);
......@@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) {
} // namespace port
} // namespace tensorflow
int main(int argc, char** argv) {
// On Linux, add: FLAGS_logtostderr = true;
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
......@@ -25,7 +25,14 @@ limitations under the License.
#if defined(__linux__) && !defined(__ANDROID__)
#include <sched.h>
#include <sys/sysinfo.h>
#else
#include <sys/syscall.h>
#endif
#if !defined(__APPLE__) && (__x86_64__ || __i386__)
#include <cpuid.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
......@@ -69,6 +76,34 @@ int NumSchedulableCPUs() {
return kDefaultCores;
}
int NumTotalCPUs() {
int count = absl::base_internal::NumCPUs();
return (count == 0) ? kUnknownCPU : count;
}
int GetCurrentCPU() {
#if defined(__linux__) && !defined(__ANDROID__)
return sched_getcpu();
#elif defined(__cpuid_count)
// Attempt to use cpuid on all other platforms. If that fails, perform a
// syscall.
uint32_t eax, ebx, ecx, edx;
__cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
if ((edx & (1 << 9)) != 0) {
// EBX bits 24-31 are APIC ID
return static_cast<unsigned int>(ebx >> 24);
}
#elif defined(__NR_getcpu)
unsigned int cpu;
if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
return kUnknownCPU;
} else {
return static_cast<int>(cpu);
}
#endif
return kUnknownCPU;
}
int NumHyperthreadsPerCore() {
static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
return (ht_per_core > 0) ? ht_per_core : 1;
......@@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; }
void NUMASetThreadNodeAffinity(int node) {}
int NUMAGetThreadNodeAffinity() {
return kNUMANoAffinity;
}
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
void* AlignedMalloc(size_t size, int minimum_alignment) {
#if defined(__ANDROID__)
......
......@@ -21,6 +21,7 @@ limitations under the License.
#endif
#include <Windows.h>
#include <processthreadsapi.h>
#include <shlwapi.h>
#include "tensorflow/core/platform/cpu_info.h"
......@@ -54,6 +55,30 @@ int NumSchedulableCPUs() {
return system_info.dwNumberOfProcessors;
}
int NumTotalCPUs() {
// TODO(ebrevdo): Make this more accurate.
//
// This only returns the number of processors in the current
// processor group; which may be undercounting if you have more than 64 cores.
// For that case, one needs to call
// GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate
// the Size fields by iterating over the written-to buffer. Since I can't
// easily test this on Windows, I'm deferring this to someone who can!
//
// If you fix this, also consider updatig GetCurrentCPU below.
return NumSchedulableCPUs();
}
int GetCurrentCPU() {
// NOTE(ebrevdo): This returns the processor number within the processor
// group on systems with >64 processors. Therefore it doesn't necessarily map
// naturally to an index in NumSchedulableCPUs().
//
// On the plus side, this number is probably guaranteed to be within
// [0, NumTotalCPUs()) due to its incomplete implementation.
return GetCurrentProcessorNumber();
}
bool NUMAEnabled() {
// Not yet implemented: coming soon.
return false;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册