未验证 提交 16b65c55 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #12646 from tensor-tang/feature/jit/xbyak

introduce xbyak
...@@ -160,6 +160,7 @@ endif() ...@@ -160,6 +160,7 @@ endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
include(external/xbyak) # download xbyak package
include(external/libxsmm) # download, build, install libxsmm include(external/libxsmm) # download, build, install libxsmm
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set(WITH_XBYAK ON)
if(WIN32 OR APPLE)
SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE)
return()
endif()
include(ExternalProject)
set(XBYAK_PROJECT extern_xbyak)
set(XBYAK_PREFIX_DIR ${THIRD_PARTY_PATH}/xbyak)
set(XBYAK_INSTALL_ROOT ${THIRD_PARTY_PATH}/install/xbyak)
set(XBYAK_INC_DIR ${XBYAK_INSTALL_ROOT}/include)
include_directories(${XBYAK_INC_DIR})
include_directories(${XBYAK_INC_DIR}/xbyak)
add_definitions(-DPADDLE_WITH_XBYAK)
# xbyak options
add_definitions(-DXBYAK64)
add_definitions(-DXBYAK_NO_OP_NAMES)
ExternalProject_Add(
${XBYAK_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ""
GIT_REPOSITORY "https://github.com/herumi/xbyak.git"
GIT_TAG "v5.661" # Jul 26th
PREFIX ${XBYAK_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}
)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/xbyak_dummy.c)
file(WRITE ${dummyfile} "const char *dummy_xbyak = \"${dummyfile}\";")
add_library(xbyak STATIC ${dummyfile})
else()
add_library(xbyak INTERFACE)
endif()
add_dependencies(xbyak ${XBYAK_PROJECT})
list(APPEND external_project_dependencies xbyak)
...@@ -18,7 +18,11 @@ else() ...@@ -18,7 +18,11 @@ else()
endif() endif()
cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce)
cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog enforce) set(CPU_INFO_DEPS gflags glog enforce)
IF(WITH_XBYAK)
list(APPEND CPU_INFO_DEPS xbyak)
ENDIF()
cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS})
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info)
nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce)
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#ifdef __APPLE__ #ifdef __APPLE__
#include <sys/sysctl.h> #include <sys/sysctl.h>
...@@ -98,5 +100,39 @@ size_t CUDAPinnedMaxChunkSize() { ...@@ -98,5 +100,39 @@ size_t CUDAPinnedMaxChunkSize() {
return CUDAPinnedMaxAllocSize() / 256; return CUDAPinnedMaxAllocSize() / 256;
} }
#ifdef PADDLE_WITH_XBYAK
namespace jit {
static Xbyak::util::Cpu cpu;
bool MayIUse(const cpu_isa_t cpu_isa) {
using namespace Xbyak::util; // NOLINT
switch (cpu_isa) {
case sse42:
return cpu.has(Cpu::tSSE42);
case avx2:
return cpu.has(Cpu::tAVX2);
case avx512_common:
return cpu.has(Cpu::tAVX512F);
case avx512_core:
return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512BW) &&
cpu.has(Cpu::tAVX512VL) && cpu.has(Cpu::tAVX512DQ);
case avx512_core_vnni:
return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512BW) &&
cpu.has(Cpu::tAVX512VL) && cpu.has(Cpu::tAVX512DQ) &&
cpu.has(Cpu::tAVX512_VNNI);
case avx512_mic:
return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512CD) &&
cpu.has(Cpu::tAVX512ER) && cpu.has(Cpu::tAVX512PF);
case avx512_mic_4ops:
return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) &&
cpu.has(Cpu::tAVX512_4VNNIW);
case isa_any:
return true;
}
return false;
}
} // namespace jit
#endif
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -37,5 +37,25 @@ size_t CUDAPinnedMinChunkSize(); ...@@ -37,5 +37,25 @@ size_t CUDAPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator. //! Get the maximum chunk size for buddy allocator.
size_t CUDAPinnedMaxChunkSize(); size_t CUDAPinnedMaxChunkSize();
#ifdef PADDLE_WITH_XBYAK
namespace jit {
typedef enum {
isa_any,
sse42,
avx2,
avx512_common,
avx512_core,
avx512_core_vnni,
avx512_mic,
avx512_mic_4ops,
} cpu_isa_t; // Instruction set architecture
// May I use some instruction
inline bool MayIUse(const cpu_isa_t cpu_isa);
} // namespace jit
#endif
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册