提交 911c3141 编写于 作者: A Alex Stark 提交者: TensorFlower Gardener

Ruy: Introduce CPU ID detection on x86.

This amounts to disabling Ruy paths for this cpuid instruction results lack selected features.

PiperOrigin-RevId: 264683681
上级 df22a29b
......@@ -204,17 +204,32 @@ cc_library(
)
cc_library(
name = "detect_dotprod",
name = "detect_arm",
srcs = [
"detect_dotprod.cc",
"detect_arm.cc",
],
hdrs = [
"detect_dotprod.h",
"detect_arm.h",
],
copts = RUY_COPTS,
visibility = ruy_visibility(),
)
cc_library(
name = "detect_x86",
srcs = [
"detect_x86.cc",
],
hdrs = [
"detect_x86.h",
],
copts = RUY_COPTS,
visibility = ruy_visibility(),
deps = [
":platform",
],
)
cc_library(
name = "path",
hdrs = ["path.h"],
......@@ -256,7 +271,8 @@ cc_library(
deps = [
":allocator",
":check_macros",
":detect_dotprod",
":detect_arm",
":detect_x86",
":path",
":thread_pool",
":trace",
......
......@@ -16,7 +16,8 @@ limitations under the License.
#include "tensorflow/lite/experimental/ruy/context.h"
#include "tensorflow/lite/experimental/ruy/check_macros.h"
#include "tensorflow/lite/experimental/ruy/detect_dotprod.h"
#include "tensorflow/lite/experimental/ruy/detect_arm.h"
#include "tensorflow/lite/experimental/ruy/detect_x86.h"
namespace ruy {
......@@ -41,13 +42,31 @@ Path Context::GetRuntimeEnabledPaths() {
// Now selectively disable paths that aren't supported on this machine.
if ((runtime_enabled_paths_ & Path::kNeonDotprod) != Path::kNone) {
if (!DetectDotprod()) {
runtime_enabled_paths_ = runtime_enabled_paths_ ^ Path::kNeonDotprod;
runtime_enabled_paths_ = runtime_enabled_paths_ & ~Path::kNeonDotprod;
// Sanity check.
RUY_DCHECK((runtime_enabled_paths_ & Path::kNeonDotprod) == Path::kNone);
}
}
#endif
#if RUY_PLATFORM(X86)
if ((runtime_enabled_paths_ & Path::kAvx2) != Path::kNone) {
if (!DetectCpuAvx2()) {
runtime_enabled_paths_ = runtime_enabled_paths_ & ~Path::kAvx2;
// Sanity check.
RUY_DCHECK((runtime_enabled_paths_ & Path::kAvx2) == Path::kNone);
}
}
if ((runtime_enabled_paths_ & Path::kAvx512) != Path::kNone) {
if (!DetectCpuAvx512()) {
runtime_enabled_paths_ = runtime_enabled_paths_ & ~Path::kAvx512;
// Sanity check.
RUY_DCHECK((runtime_enabled_paths_ & Path::kAvx512) == Path::kNone);
}
}
#endif
// Sanity check. We can't possibly have disabled all paths, as some paths
// are universally available (kReference, kStandardCpp).
RUY_DCHECK(runtime_enabled_paths_ != Path::kNone);
......
......@@ -70,6 +70,8 @@ bool try_asm_snippet(bool (*asm_snippet)()) {
```
*/
#include "tensorflow/lite/experimental/ruy/detect_arm.h"
#if defined __aarch64__ && defined __linux__
#define RUY_IMPLEMENT_DETECT_DOTPROD
#endif
......@@ -215,8 +217,8 @@ bool DetectDotprod() {
return DetectDotprodBySigIllMethod();
}
#else
#else // RUY_IMPLEMENT_DETECT_DOTPROD
bool DetectDotprod() { return false; }
#endif
#endif // RUY_IMPLEMENT_DETECT_DOTPROD
} // namespace ruy
......@@ -15,8 +15,8 @@ limitations under the License.
// Temporary dotprod-detection code until we can rely on getauxval.
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_DOTPROD_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_DOTPROD_H_
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_ARM_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_ARM_H_
namespace ruy {
......@@ -26,4 +26,4 @@ bool DetectDotprod();
} // namespace ruy
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_DOTPROD_H_
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_ARM_H_
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/ruy/detect_x86.h"
#include <cstdint>
#if RUY_PLATFORM(X86) && RUY_PLATFORM(X86_ENHANCEMENTS)
#include <immintrin.h> // IWYU pragma: keep
#endif
namespace ruy {
#if RUY_PLATFORM(X86) && RUY_PLATFORM(X86_ENHANCEMENTS)
namespace {
// See Intel docs, such as http://goo.gl/c6IkGX.
inline void RunCpuid(std::uint32_t eax, std::uint32_t ecx,
std::uint32_t abcd[4]) {
std::uint32_t ebx, edx;
#if defined(__i386__) && defined(__PIC__)
/* in case of PIC under 32-bit EBX cannot be clobbered */
asm volatile("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi"
: "=D"(ebx),
#else
asm volatile("cpuid"
: "+b"(ebx),
#endif
"+a"(eax), "+c"(ecx), "=d"(edx));
abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
}
} // namespace
bool DetectCpuSse42() {
constexpr std::uint32_t kAvx512EcxSse42 = 1u << 20;
constexpr std::uint32_t kAvx512EcxAbm = 1u << 5;
std::uint32_t abcd[4];
RunCpuid(1, 0, abcd);
const bool has_sse4_2_base = (abcd[2] & kAvx512EcxSse42) == kAvx512EcxSse42;
RunCpuid(0x80000001, 0, abcd);
const bool has_abm = (abcd[2] & kAvx512EcxAbm) == kAvx512EcxAbm;
return has_sse4_2_base && has_abm;
}
bool DetectCpuAvx2() {
constexpr std::uint32_t kAvx2Ebx = 1u << 5;
std::uint32_t abcd[4];
RunCpuid(7, 0, abcd);
return (abcd[1] & kAvx2Ebx) == kAvx2Ebx;
}
bool DetectCpuAvx512() {
constexpr std::uint32_t kAvx512EbxF = 1u << 16;
constexpr std::uint32_t kAvx512EbxDq = 1u << 17;
constexpr std::uint32_t kAvx512EbxCd = 1u << 28;
constexpr std::uint32_t kAvx512EbxBw = 1u << 30;
constexpr std::uint32_t kAvx512EbxVl = 1u << 31;
constexpr std::uint32_t kAvx512EbxMask =
kAvx512EbxF | kAvx512EbxDq | kAvx512EbxCd | kAvx512EbxBw | kAvx512EbxVl;
std::uint32_t abcd[4];
RunCpuid(7, 0, abcd);
return (abcd[1] & kAvx512EbxMask) == kAvx512EbxMask;
}
#endif
} // namespace ruy
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_X86_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_X86_H_
#include "tensorflow/lite/experimental/ruy/platform.h"
namespace ruy {
#if RUY_PLATFORM(X86)
#if RUY_PLATFORM(X86_ENHANCEMENTS)
// This also checks ABM support, which implies LZCNT and POPCNT.
bool DetectCpuSse42();
bool DetectCpuAvx2();
bool DetectCpuAvx512();
#else // RUY_PLATFORM(X86_ENHANCEMENTS)
inline bool DetectCpuSse42() { return false; }
inline bool DetectCpuAvx2() { return false; }
inline bool DetectCpuAvx512() { return false; }
#endif // !RUY_PLATFORM(X86_ENHANCEMENTS)
#endif // RUY_PLATFORM(X86)
} // namespace ruy
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_DETECT_X86_H_
......@@ -80,9 +80,9 @@ limitations under the License.
// restriction.
#if defined(RUY_FORCE_ENABLE_X86_ENHANCEMENTS) || \
(defined(__clang__) && defined(__linux__))
#define RUY_USE_X86_ENHANCEMENTS 1
#define RUY_DONOTUSEDIRECTLY_X86_ENHANCEMENTS 1
#else
#define RUY_USE_X86_ENHANCEMENTS 0
#define RUY_DONOTUSEDIRECTLY_X86_ENHANCEMENTS 0
#endif
// These CPU capabilities will all be true when Skylake, etc, are enabled during
......@@ -91,15 +91,15 @@ limitations under the License.
// TODO(b/138433137) Select x86 enhancements at runtime rather than via compile
// options.
//
#if RUY_USE_X86_ENHANCEMENTS && RUY_PLATFORM(X86) && defined(__AVX512F__) && \
defined(__AVX512DQ__) && defined(__AVX512CD__) && defined(__AVX512BW__) && \
defined(__AVX512VL__)
#if RUY_PLATFORM(X86_ENHANCEMENTS) && RUY_PLATFORM(X86) && \
defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512CD__) && \
defined(__AVX512BW__) && defined(__AVX512VL__)
#define RUY_DONOTUSEDIRECTLY_AVX512 1
#else
#define RUY_DONOTUSEDIRECTLY_AVX512 0
#endif
#if defined(RUY_ENABLE_AVX2_ENHANCEMENTS) && RUY_USE_X86_ENHANCEMENTS && \
#if defined(RUY_ENABLE_AVX2_ENHANCEMENTS) && RUY_PLATFORM(X86_ENHANCEMENTS) && \
RUY_PLATFORM(X86) && defined(__AVX2__)
#define RUY_DONOTUSEDIRECTLY_AVX2 1
#else
......@@ -107,7 +107,7 @@ limitations under the License.
#endif
// Note does not check for LZCNT or POPCNT.
#if RUY_USE_X86_ENHANCEMENTS && RUY_PLATFORM(X86) && defined(__SSE4_2__)
#if RUY_PLATFORM(X86_ENHANCEMENTS) && RUY_PLATFORM(X86) && defined(__SSE4_2__)
#define RUY_DONOTUSEDIRECTLY_SSE4_2 1
#else
#define RUY_DONOTUSEDIRECTLY_SSE4_2 0
......@@ -120,6 +120,4 @@ limitations under the License.
#define RUY_DONOTUSEDIRECTLY_APPLE 0
#endif
#undef RUY_USE_X86_ENHANCEMENTS
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_
......@@ -535,7 +535,7 @@ cc_library(
":round",
":types",
"//tensorflow/lite/c:c_api_internal",
"//tensorflow/lite/experimental/ruy:detect_dotprod",
"//tensorflow/lite/experimental/ruy:detect_arm",
"//tensorflow/lite/kernels:activation_functor",
"//tensorflow/lite/kernels:cpu_backend_context",
"//tensorflow/lite/kernels:op_macros",
......
......@@ -22,7 +22,7 @@ limitations under the License.
#include <vector>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/experimental/ruy/detect_dotprod.h"
#include "tensorflow/lite/experimental/ruy/detect_arm.h"
#include "tensorflow/lite/kernels/activation_functor.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册