diff --git a/CMakeLists.txt b/CMakeLists.txt index f2514d44acf230762cf8a2d2d40e5d715c0ca9fb..f56c5d382af8cdfb5a941ee272a0f8d22ec04d67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,6 +160,7 @@ endif() ######################################################################################## include(external/mklml) # download mklml package +include(external/xbyak) # download xbyak package include(external/libxsmm) # download, build, install libxsmm include(external/zlib) # download, build, install zlib include(external/gflags) # download, build, install gflags diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake new file mode 100644 index 0000000000000000000000000000000000000000..384c2f9328296ce6a8a6293be6cc47e5063dd3c4 --- /dev/null +++ b/cmake/external/xbyak.cmake @@ -0,0 +1,58 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(WITH_XBYAK ON) +if(WIN32 OR APPLE) + SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE) + return() +endif() + +include(ExternalProject) + +set(XBYAK_PROJECT extern_xbyak) +set(XBYAK_PREFIX_DIR ${THIRD_PARTY_PATH}/xbyak) +set(XBYAK_INSTALL_ROOT ${THIRD_PARTY_PATH}/install/xbyak) +set(XBYAK_INC_DIR ${XBYAK_INSTALL_ROOT}/include) + +include_directories(${XBYAK_INC_DIR}) +include_directories(${XBYAK_INC_DIR}/xbyak) + +add_definitions(-DPADDLE_WITH_XBYAK) + +# xbyak options +add_definitions(-DXBYAK64) +add_definitions(-DXBYAK_NO_OP_NAMES) + +ExternalProject_Add( + ${XBYAK_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS "" + GIT_REPOSITORY "https://github.com/herumi/xbyak.git" + GIT_TAG "v5.661" # Jul 26th + PREFIX ${XBYAK_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/xbyak_dummy.c) + file(WRITE ${dummyfile} "const char *dummy_xbyak = \"${dummyfile}\";") + add_library(xbyak STATIC ${dummyfile}) +else() + add_library(xbyak INTERFACE) +endif() + +add_dependencies(xbyak ${XBYAK_PROJECT}) +list(APPEND external_project_dependencies xbyak) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index a6f68f8b0c0a9b07c326888e30c0c911e7861607..f08c0e8e345179bf198ca9d50278b7f65e03ca2c 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -18,7 +18,11 @@ else() endif() cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) -cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog enforce) +set(CPU_INFO_DEPS gflags glog enforce) +IF(WITH_XBYAK) + list(APPEND CPU_INFO_DEPS xbyak) +ENDIF() +cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS}) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index f832d72b53e8d06a32d5c0ac2ecf7130aa28a666..9280965af29d0f5635c015846ed65746ee3dc669 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/cpu_info.h" +#include "xbyak/xbyak.h" +#include "xbyak/xbyak_util.h" #ifdef __APPLE__ #include @@ -98,5 +100,39 @@ size_t CUDAPinnedMaxChunkSize() { return CUDAPinnedMaxAllocSize() / 256; } +#ifdef PADDLE_WITH_XBYAK +namespace jit { + +static Xbyak::util::Cpu cpu; +bool MayIUse(const cpu_isa_t cpu_isa) { + using namespace Xbyak::util; // NOLINT + switch (cpu_isa) { + case sse42: + return cpu.has(Cpu::tSSE42); + case avx2: + return cpu.has(Cpu::tAVX2); + case avx512_common: + return cpu.has(Cpu::tAVX512F); + case avx512_core: + return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512BW) && + cpu.has(Cpu::tAVX512VL) && cpu.has(Cpu::tAVX512DQ); + case avx512_core_vnni: + return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512BW) && + cpu.has(Cpu::tAVX512VL) && cpu.has(Cpu::tAVX512DQ) && + cpu.has(Cpu::tAVX512_VNNI); + case avx512_mic: + return true && cpu.has(Cpu::tAVX512F) && cpu.has(Cpu::tAVX512CD) && + cpu.has(Cpu::tAVX512ER) && cpu.has(Cpu::tAVX512PF); + case avx512_mic_4ops: + return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) && + cpu.has(Cpu::tAVX512_4VNNIW); + case isa_any: + return true; + } + return false; +} + +} // namespace jit +#endif } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h index f06c2b67fe4385f427322e9bb2f3080fdd3acc94..f5f67667594f1ab80058533e4c5d5b04c2592b60 100644 --- a/paddle/fluid/platform/cpu_info.h +++ b/paddle/fluid/platform/cpu_info.h @@ -37,5 +37,25 @@ size_t CUDAPinnedMinChunkSize(); //! Get the maximum chunk size for buddy allocator. size_t CUDAPinnedMaxChunkSize(); +#ifdef PADDLE_WITH_XBYAK +namespace jit { + +typedef enum { + isa_any, + sse42, + avx2, + avx512_common, + avx512_core, + avx512_core_vnni, + avx512_mic, + avx512_mic_4ops, +} cpu_isa_t; // Instruction set architecture + +// May I use some instruction +inline bool MayIUse(const cpu_isa_t cpu_isa); + +} // namespace jit +#endif + } // namespace platform } // namespace paddle