diff --git a/AUTHORS.md b/AUTHORS.md
index 71d028fac369150ad7e8c0e78b5099b47abb56ee..1eaaff297714364d14a5463fb730d84761c8d18f 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,6 +1,9 @@
| Github account | name |
|---|---|
| abhinavarora | Abhinav Arora |
+| andreazanetti | Andrea Zanetti |
+| arlesniak | Artur Lesniak |
+| arogowie-intel | Adam Osewski |
| backyes | Yan-Fei Wang |
| baiyfbupt | Yi-Fan Bai |
| beckett1124 | Bin Qi |
@@ -8,6 +11,7 @@
| chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng |
+| ddokupil | Dariusz Dokupil |
| dzhwinter | Zhi-Hong Dong |
| dragonwarrior | Long Wang |
| dyning | Yuning Du |
@@ -21,6 +25,7 @@
| hedaoyuan | Dao-Yuan He |
| helinwang | He-Lin Wang |
| jacquesqiao | Long-Fei Qiao |
+| jakpiase | Jakub Piasecki |
| [jczaja](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias |
@@ -42,6 +47,7 @@
| pakchoi | Chuan-Jiang Song |
| panyx0718 | Xin Pan |
| pengli09 | Peng Li |
+| pmajchrzak |Piotr Majchrzak |
| pkuyym | Ya-Ming Yang |
| pzelazko-intel | Pawel Zelazko |
| [pawelpiotrowicz](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Pawel Piotrowicz |
@@ -72,3 +78,6 @@
| zhaopu7 | Pu Zhao |
| zhouxiao-coder | Xiao Zhou |
| Zrachel | Rui-Qing Zhang |
+| jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
+| mingxu1067 | Ming Huang (NVIDIA) |
+| zlsh80826 | Reese Wang (NVIDIA) |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d874b21b0873da47d5acd5ef6a78bfe7fd7ce2e1..50070c7fc05133da758650eb5ac50e32effe63c9 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,14 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License
-cmake_minimum_required(VERSION 3.10)
-cmake_policy(VERSION 3.10)
+if(APPLE AND WITH_ARM)
+ # cmake 3.19.2 version starts to support M1
+ cmake_minimum_required(VERSION 3.19.2)
+ cmake_policy(VERSION 3.19.2)
+else(APPLE AND WITH_ARM)
+ cmake_minimum_required(VERSION 3.10)
+ cmake_policy(VERSION 3.10)
+endif(APPLE AND WITH_ARM)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system)
+# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE "Release" CACHE STRING
+ "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
+ FORCE)
+endif()
+
project(paddle CXX C)
# enable language CUDA
@@ -66,6 +79,11 @@ if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
endif()
+if(APPLE AND WITH_ARM)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
+ set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
+endif()
+
if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif()
@@ -90,10 +108,6 @@ if(WIN32)
if (MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019")
- set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
- set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
@@ -105,9 +119,7 @@ if(WIN32)
endforeach(flag_var)
endif()
- # NOTE(zhouwei25): temporarily change MP to 1 for reducing CPU & memory utilization
- set(PROCESS_MAX 1)
- #math(EXPR PROCESS_MAX "${CPU_CORES} * 1 / 2")
+ math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
# windows build turn off warnings, use parallel compiling.
foreach(flag_var
@@ -116,7 +128,10 @@ if(WIN32)
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
- set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
+ # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling
+ if(NOT WITH_GPU)
+ set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
+ endif()
endforeach(flag_var)
foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
set(${flag_var} "${${flag_var}} /w")
@@ -208,16 +223,10 @@ option(WITH_STRIP "Strip so files of Whl packages" OFF)
# PY_VERSION
if(NOT PY_VERSION)
- set(PY_VERSION 2.7)
+ set(PY_VERSION 3.6)
endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
-# CMAKE_BUILD_TYPE
-if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING
- "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
- FORCE)
-endif()
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES "^(Address|Leak|Memory|Thread|Undefined)$")
@@ -282,6 +291,27 @@ if(WITH_GPU)
endif()
endif()
+if(WITH_ROCM)
+ include(hip)
+ include(miopen) # set miopen libraries, must before configure
+endif(WITH_ROCM)
+
+if (NOT WITH_ROCM AND WITH_RCCL)
+ MESSAGE(WARNING
+ "Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
+ set(WITH_RCCL OFF CACHE STRING
+ "Disable RCCL when compiling without ROCM" FORCE)
+endif()
+
+if(WITH_RCCL)
+ add_definitions("-DPADDLE_WITH_RCCL")
+ include(rccl)
+else()
+ if(WITH_ROCM)
+ MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
+ endif()
+endif()
+
include(third_party) # download, build, install third_party, Contains about 20+ dependencies
include(flags) # set paddle compile flags
@@ -306,26 +336,6 @@ include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}")
-if(WITH_ROCM)
- include(hip)
-endif(WITH_ROCM)
-
-if (NOT WITH_ROCM AND WITH_RCCL)
- MESSAGE(WARNING
- "Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
- set(WITH_RCCL OFF CACHE STRING
- "Disable RCCL when compiling without ROCM" FORCE)
-endif()
-
-if(WITH_RCCL)
- add_definitions("-DPADDLE_WITH_RCCL")
- include(rccl)
-else()
- if(WITH_ROCM)
- MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
- endif()
-endif()
-
if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
endif()
@@ -333,8 +343,9 @@ endif()
if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
- set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON" FORCE)
+ set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE)
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE)
+ set(WITH_AVX OFF CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE)
add_definitions(-DPADDLE_WITH_ARM)
endif()
@@ -352,6 +363,11 @@ if (WITH_MIPS)
add_definitions(-DPADDLE_WITH_MIPS)
endif()
+if (WITH_HETERPS)
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")
+ endif()
+endif()
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
diff --git a/README.md b/README.md
index e8a7013d0b4432bc871843b83cf19494ca870cbc..6b3f3ef86fe1bc38483789d85b101143fc723ded 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,3 @@
-
@@ -22,7 +21,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm
## Installation
-### Latest PaddlePaddle Release: [v2.0](https://github.com/PaddlePaddle/Paddle/tree/release/2.0)
+### Latest PaddlePaddle Release: [v2.1](https://github.com/PaddlePaddle/Paddle/tree/release/2.1)
Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
@@ -34,9 +33,9 @@ pip install paddlepaddle
pip install paddlepaddle-gpu
```
-More infomation about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
+For more information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
-Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 10 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index).
+Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 8 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index).
## FOUR LEADING TECHNOLOGIES
@@ -47,14 +46,13 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
- **Support Ultra-Large-Scale Training of Deep Neural Networks**
- PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billions of features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved the real-time model updating with more than 1 trillion parameters.
+ PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billion features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved real-time model updating with more than 1 trillion parameters.
[Click here to learn more](https://github.com/PaddlePaddle/Fleet)
-- **Accelerated High-Performance Inference over Ubiquitous Deployments**
+- **High-Performance Inference Engines for Comprehensive Deployment Enviroments**
- PaddlePaddle is not only compatible with other open-source frameworks for models training, but also works well on the ubiquitous developments, varying from platforms to devices. More specifically, PaddlePaddle accelerates the inference procedure with the fastest speed-up. Note that, a recent breakthrough of inference speed has been made by PaddlePaddle on Huawei's Kirin NPU, through the hardware/software co-optimization.
- [Click here to learn more](https://github.com/PaddlePaddle/Paddle-Lite)
+ PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
- **Industry-Oriented Models and Libraries with Open Source Repositories**
@@ -87,8 +85,13 @@ We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guide
## Communication
- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc.
-- QQ discussion group: 778260830 (PaddlePaddle).
+- QQ discussion group: 793866180 (PaddlePaddle).
- [Forums](https://ai.baidu.com/forum/topic/list/168?pageNo=1): discuss implementations, research, etc.
+
+## Courses
+
+- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses intorducing high performance server deployments via local and remote services.
+- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses intorducing edge deployments from mobile, IoT to web and applets.
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
diff --git a/README_cn.md b/README_cn.md
index 7a10cba2845498d2299fc516f5804eb1a84e4ecc..cc8afde7dd266262c321c8277c88e6420716d7f6 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -1,4 +1,4 @@
-
+
@@ -19,7 +19,7 @@
## 安装
-### PaddlePaddle最新版本: [v2.0](https://github.com/PaddlePaddle/Paddle/tree/release/2.0)
+### PaddlePaddle最新版本: [v2.1](https://github.com/PaddlePaddle/Paddle/tree/release/2.1)
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
@@ -32,7 +32,7 @@ pip install paddlepaddle-gpu
```
更多安装信息详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick)
-PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型更高效。**每日登陆即送10小时**,[前往使用免费算力](https://aistudio.baidu.com/aistudio/index)。
+PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型更高效。**每日登陆即送8小时**,[前往使用免费算力](https://aistudio.baidu.com/aistudio/index)。
## 四大领先技术
@@ -47,10 +47,9 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
[查看详情](https://github.com/PaddlePaddle/Fleet)
-- **多端多平台部署的高性能推理引擎**
+- **支持多端多平台的高性能推理部署工具**
- 飞桨不仅兼容其他开源框架训练的模型,还可以轻松地部署到不同架构的平台设备上。同时,飞桨的推理速度也是全面领先的。尤其经过了跟华为麒麟NPU的软硬一体优化,使得飞桨在NPU上的推理速度进一步突破。
- [查看详情](https://github.com/PaddlePaddle/Paddle-Lite)
+ 飞桨不仅广泛兼容第三方开源框架训练的模型部署,并且为不同的场景的生产环境提供了完备的推理引擎,包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html),面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving),针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite),以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时,透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
- **面向产业应用,开源开放覆盖多领域的工业级模型库。**
@@ -83,8 +82,13 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
## 交流与反馈
- 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议
-- QQ群: 778260830 (PaddlePaddle)
+- QQ群: 793866180 (PaddlePaddle)
- [论坛](https://ai.baidu.com/forum/topic/list/168): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验, 营造良好的论坛氛围
+
+## 课程
+
+- [服务器部署](https://aistudio.baidu.com/aistudio/course/introduce/19084): 详细介绍高性能服务器端部署实操,包含本地端及服务化Serving部署等
+- [端侧部署](https://aistudio.baidu.com/aistudio/course/introduce/22690): 详细介绍端侧多场景部署实操,从移端端设备、IoT、网页到小程序部署
## 版权和许可证
PaddlePaddle由[Apache-2.0 license](LICENSE)提供
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 6056b53bc2218fb24d2e97b281b9a0d68bc9a306..69e66407580b62d52c941fee522bae7dbca23796 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -69,15 +69,21 @@ if(NOT DEFINED CBLAS_PROVIDER)
PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_LIB)
- set(CBLAS_PROVIDER OPENBLAS)
- set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
- set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
-
- add_definitions(-DPADDLE_USE_OPENBLAS)
- add_definitions(-DLAPACK_FOUND)
-
- message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
- message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
+ file(READ "${OPENBLAS_INC_DIR}/openblas_config.h" config_file)
+ string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file})
+ string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp})
+
+ if (${ver} VERSION_GREATER_EQUAL "0.3.7")
+ set(CBLAS_PROVIDER OPENBLAS)
+ set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
+ set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
+
+ add_definitions(-DPADDLE_USE_OPENBLAS)
+ add_definitions(-DLAPACK_FOUND)
+
+ message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
+ message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
+ endif()
endif()
endif()
diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake
index 64f4f6c2a1c254d868b29bdcebf9840a54146d4a..25798758473af52dc66230ac70a7d750e78176de 100644
--- a/cmake/ccache.cmake
+++ b/cmake/ccache.cmake
@@ -1,14 +1,29 @@
# Use ccache if found ccache program
-find_program(CCACHE_PATH ccache)
+if(NOT WIN32)
+ find_program(CCACHE_PATH ccache)
+ if(CCACHE_PATH)
+ execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output)
+ execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory)
+ string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
+ message(STATUS "ccache is founded, use ccache to speed up compile on Unix.")
+ # show statistics summary of ccache
+ message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory})
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
+ endif(CCACHE_PATH)
+elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja")
+ # (Note:zhouwei25) Only Ninja Generator can support sccache now
+ find_program(SCCACHE_PATH sccache)
-if(CCACHE_PATH)
- execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output)
- execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory)
- string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
- message(STATUS "Ccache is founded, use ccache to speed up compile.")
- # show statistics summary of ccache
- message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory})
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
- set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
-endif(CCACHE_PATH)
+ if(SCCACHE_PATH)
+ execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version)
+ message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
+
+ set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH})
+ set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH})
+ # (Note:zhouwei25) sccache for cuda compiler has bug so that it can't be hit
+ # refer to https://github.com/mozilla/sccache/issues/1017, so we fix it
+ set(CMAKE_CUDA_COMPILER_LAUNCHER ${SCCACHE_PATH})
+ endif(SCCACHE_PATH)
+endif()
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index e7f125269be1f5e015c6cf015489c312538ca4ba..458ab992c25f3818ae53b28fab38d9f986a36265 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -143,6 +143,14 @@ elseif(WITH_ROCM)
add_definitions(-DPADDLE_WITH_HIP)
add_definitions(-DEIGEN_USE_GPU)
add_definitions(-DEIGEN_USE_HIP)
+
+ if(NOT MIOPEN_FOUND)
+ message(FATAL_ERROR "Paddle needs MIOpen to compile")
+ endif()
+
+ if(${MIOPEN_VERSION} VERSION_LESS 2090)
+ message(FATAL_ERROR "Paddle needs MIOPEN >= 2.9 to compile")
+ endif()
else()
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 7f2addb02d36ddf85cd08542cc5baab31d495bc5..e1a9324650ac9c2c595ea7727354069080df10c1 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -95,11 +95,23 @@ function(select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
- set(cuda_arch_bin "50")
+ if (WITH_NV_JETSON)
+ set(cuda_arch_bin "53")
+ else()
+ set(cuda_arch_bin "50")
+ endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
- set(cuda_arch_bin "60 61")
+ if (WITH_NV_JETSON)
+ set(cuda_arch_bin "62")
+ else()
+ set(cuda_arch_bin "60 61")
+ endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
- set(cuda_arch_bin "70")
+ if (WITH_NV_JETSON)
+ set(cuda_arch_bin "72")
+ else()
+ set(cuda_arch_bin "70")
+ endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
@@ -205,26 +217,18 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
if(WIN32)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
- if(CMAKE_BUILD_TYPE STREQUAL "Debug")
- # match the cl's _ITERATOR_DEBUG_LEVEL
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"-g -G -D_DEBUG\"")
- if(MSVC_STATIC_CRT)
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MTd")
- else()
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MDd")
- endif()
- elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"-DNDEBUG\"")
- if(MSVC_STATIC_CRT)
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MT")
- else()
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MD")
- endif()
- else()
- message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.")
+ if(MSVC_STATIC_CRT)
+ foreach(flag_var
+ CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
+ CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
+ if(${flag_var} MATCHES "-MD")
+ string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
+ endif()
+ endforeach(flag_var)
endif()
endif()
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
+include(thrust)
diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake
index f14195480b7dc80df0566c9b09075797010fe289..d88d693d8286d1efab5242fb758331ef64663a4d 100644
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -46,6 +46,7 @@ ExternalProject_Add(
${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
"${BOOST_DOWNLOAD_CMD}"
+ URL_MD5 f891e8c2c9424f0565f0129ad9ab4aff
PREFIX ${BOOST_PREFIX_DIR}
DOWNLOAD_DIR ${BOOST_SOURCE_DIR}
SOURCE_DIR ${BOOST_SOURCE_DIR}
diff --git a/cmake/external/box_ps.cmake b/cmake/external/box_ps.cmake
index adfc6dba1f083e11446401e6b5d5623db080f912..85e1f94fd2c67f2526a5201045caac724fd2250f 100644
--- a/cmake/external/box_ps.cmake
+++ b/cmake/external/box_ps.cmake
@@ -49,7 +49,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BOX_PS_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BOX_PS_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${BOX_PS_LIB}
)
ADD_LIBRARY(box_ps SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET box_ps PROPERTY IMPORTED_LOCATION ${BOX_PS_LIB})
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index 2d72b6eb56deaa2547051756afc075a100aeb251..1a45cfa0a1e514aae83808aebf401c38efd825fd 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -45,23 +45,24 @@ ExternalProject_Add(
PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
- -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
- -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
- -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
- -DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR}
- -DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib
- -DCMAKE_POSITION_INDEPENDENT_CODE=ON
- -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
- -DCMAKE_PREFIX_PATH=${prefix_path}
- -DWITH_GLOG=ON
- -DIOBUF_WITH_HUGE_BLOCK=ON
- -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
- ${EXTERNAL_OPTIONAL_ARGS}
- LIST_SEPARATOR |
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+ -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+ -DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR}
+ -DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ -DCMAKE_PREFIX_PATH=${prefix_path}
+ -DWITH_GLOG=ON
+ -DIOBUF_WITH_HUGE_BLOCK=ON
+ -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
+ ${EXTERNAL_OPTIONAL_ARGS}
+ LIST_SEPARATOR |
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR}
- -DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib
- -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
- -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ -DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib
+ -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+ -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${BRPC_LIBRARIES}
)
# ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy)
ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog snappy)
diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake
index a30164ada2791bd90529a34e4103a358854ccec6..aedd40aec68481e1a92924bcd484384ecdd87d88 100644
--- a/cmake/external/cryptopp.cmake
+++ b/cmake/external/cryptopp.cmake
@@ -33,6 +33,10 @@ ELSE(WIN32)
SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE)
ENDIF(WIN32)
+IF(APPLE AND WITH_ARM)
+ SET(CMAKE_CXX_FLAGS "-DCRYPTOPP_ARM_CRC32_AVAILABLE=0")
+ENDIF()
+
set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS}
-DBUILD_SHARED=ON
-DBUILD_STATIC=ON
@@ -72,6 +76,7 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${CRYPTOPP_LIBRARIES}
)
ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL)
diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake
index a26568860f42dae5cdcce0b1bf51d06b531608c6..f263086e8bef80864790e2c44474a45f072a3873 100644
--- a/cmake/external/cub.cmake
+++ b/cmake/external/cub.cmake
@@ -14,27 +14,27 @@
include(ExternalProject)
-set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub)
-set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub)
-set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git)
-set(CUB_TAG 1.8.0)
+# Note(zhouwei): extern_cub has code __FILE_, If the path of extern_cub is changed,
+# it will effect about 30+ cu files sccache hit and slow compile speed on windows.
+# Therefore, a fixed CUB_PATH will be input to increase the sccache hit rate.
+set(CUB_PATH "${THIRD_PARTY_PATH}/cub" CACHE STRING "A path setting for external_cub path.")
+set(CUB_PREFIX_DIR ${CUB_PATH})
-cache_third_party(extern_cub
- REPOSITORY ${CUB_REPOSITORY}
- TAG ${CUB_TAG}
- DIR CUB_SOURCE_DIR)
+set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git)
+set(CUB_TAG 1.8.0)
-SET(CUB_INCLUDE_DIR ${CUB_SOURCE_DIR})
+SET(CUB_INCLUDE_DIR ${CUB_PREFIX_DIR}/src/extern_cub)
+message("CUB_INCLUDE_DIR is ${CUB_INCLUDE_DIR}")
include_directories(${CUB_INCLUDE_DIR})
ExternalProject_Add(
extern_cub
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
- "${CUB_DOWNLOAD_CMD}"
+ GIT_REPOSITORY ${CUB_REPOSITORY}
+ GIT_TAG ${CUB_TAG}
PREFIX ${CUB_PREFIX_DIR}
- SOURCE_DIR ${CUB_SOURCE_DIR}
- UPDATE_COMMAND ""
+ UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
diff --git a/cmake/external/dgc.cmake b/cmake/external/dgc.cmake
index bc8611f3862cd14c0de493564ea82a1c9ce66667..3c64e1ea11ecd65ab15e80147cd62b1cde371722 100644
--- a/cmake/external/dgc.cmake
+++ b/cmake/external/dgc.cmake
@@ -39,6 +39,7 @@ ExternalProject_Add(
&& cp ${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES}
&& cp ${DGC_SOURCES_DIR}/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/
BUILD_IN_SOURCE 1
+ BUILD_BYPRODUCTS ${DGC_LIBRARIES}
)
ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL)
diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
index 4619f9f7b7e34c99f7fb3048a3eae9e9ffc0b5ac..aa471002eacb6a61a9cf835f293a86a75d87db8f 100644
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@@ -33,7 +33,9 @@ elseif(LINUX)
# which will cause compiler error of using __host__ funciont in __host__ __device__
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src)
file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst)
- set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst})
+ file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h native_src1)
+ file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h native_dst1)
+ set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} ${native_dst1})
endif()
endif()
diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index 576598b4ac6e3bc085f75465456be580db159005..8360761de6fb9869fec42fa40e87fd29e595650f 100644
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -61,6 +61,7 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index 05b98e2b56a33a65315d1e4fb1c02c738f93b712..d2bb1e62e83de391272315d379619feca84c62bd 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -64,6 +64,7 @@ ExternalProject_Add(
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
)
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake
index e8db13a694f5578e314dc1a7c95ed24ad88bad02..03e45e3e5c67b0118727a616f8cd0c013c621fe6 100644
--- a/cmake/external/gloo.cmake
+++ b/cmake/external/gloo.cmake
@@ -32,7 +32,7 @@ cache_third_party(extern_gloo
TAG ${GLOO_TAG}
DIR GLOO_SOURCE_DIR)
- if(WITH_ASCEND OR WITH_ASCEND_CL)
+if(WITH_ASCEND OR WITH_ASCEND_CL)
ExternalProject_Add(
extern_gloo
${EXTERNAL_PROJECT_LOG_ARGS}
@@ -47,6 +47,7 @@ cache_third_party(extern_gloo
&& mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo"
+ BUILD_BYPRODUCTS ${GLOO_LIBRARIES}
)
else()
ExternalProject_Add(
@@ -63,6 +64,7 @@ else()
&& mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo"
+ BUILD_BYPRODUCTS ${GLOO_LIBRARIES}
)
endif()
diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake
index 3db12f084eb5a3519e529afe90a151b33823fe82..e7d4783a9593a7bac474adc089eaca543db7a600 100644
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@@ -79,6 +79,8 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${GTEST_LIBRARIES}
+ BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}
)
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
diff --git a/cmake/external/leveldb.cmake b/cmake/external/leveldb.cmake
index 79dc403e67d5266fe618b997c08c75d4cc86b82b..c36f49d3bd354acabf3654b642fd24ba227470b8 100644
--- a/cmake/external/leveldb.cmake
+++ b/cmake/external/leveldb.cmake
@@ -33,6 +33,7 @@ ExternalProject_Add(
&& cp ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/libleveldb.a ${LEVELDB_LIBRARIES}
&& cp -r ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/include ${LEVELDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1
+ BUILD_BYPRODUCTS ${LEVELDB_LIBRARIES}
)
ADD_DEPENDENCIES(extern_leveldb snappy)
diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake
index c10a662485c2d172f408a7622e7f14d0b566f274..d318bc7d0f3c3fa99d68a502496423ffbc4c08a2 100644
--- a/cmake/external/libmct.cmake
+++ b/cmake/external/libmct.cmake
@@ -49,7 +49,9 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
)
add_library(libmct INTERFACE)
diff --git a/cmake/external/libxsmm.cmake b/cmake/external/libxsmm.cmake
index 0d09576286d907ec6964df69efb0efcf9885f57d..fae8154eb1cb0354683b8141eeb28a7bf5012cbe 100644
--- a/cmake/external/libxsmm.cmake
+++ b/cmake/external/libxsmm.cmake
@@ -18,8 +18,8 @@ SET(LIBXSMM_SOURCES_DIR ${THIRD_PARTY_PATH}/libxsmm)
SET(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm)
SET(LIBXSMM_INCLUDE_DIR "${LIBXSMM_INSTALL_DIR}/include" CACHE PATH "LIBXSMM include directory." FORCE)
SET(LIBXSMM_LIBRARY_DIR "${LIBXSMM_INSTALL_DIR}/lib" CACHE PATH "LIBXSMM library directory." FORCE)
-SET(LIBXSMM_LIBS "${LIBXSMM_LIBRARY_DIR}/libxsmm.a"
- "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
+SET(LIBXSMM_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmm.a")
+SET(LIBXSMMNOBLAS_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
ExternalProject_Add(
extern_libxsmm
@@ -32,10 +32,12 @@ ExternalProject_Add(
BUILD_IN_SOURCE 1
BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc WARP=0 install
INSTALL_COMMAND ""
+ BUILD_BYPRODUCTS ${LIBXSMM_LIB}
+ BUILD_BYPRODUCTS ${LIBXSMMNOBLAS_LIB}
)
ADD_LIBRARY(libxsmm STATIC IMPORTED GLOBAL)
-SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmm.a")
-SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
+SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIB}")
+SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMMNOBLAS_LIB}")
MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}")
include_directories(${LIBXSMM_INCLUDE_DIR})
diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake
index 6e2157e30871678a5f78ac35726805fb1c1f0466..e213068377b1409595cac9b6169fe7605cff059c 100644
--- a/cmake/external/lite.cmake
+++ b/cmake/external/lite.cmake
@@ -18,13 +18,21 @@ if(NOT LINUX)
return()
endif()
-if(XPU_SDK_ROOT)
- set(LITE_WITH_XPU ON)
- include_directories("${XPU_SDK_ROOT}/XTDK/include")
- include_directories("${XPU_SDK_ROOT}/XTCL/include")
+if (LITE_WITH_XPU)
add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
- LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
- LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
+ IF(WITH_AARCH64)
+ SET(XPU_SDK_ENV "kylin_aarch64")
+ ELSEIF(WITH_SUNWAY)
+ SET(XPU_SDK_ENV "deepin_sw6_64")
+ ELSEIF(WITH_BDCENTOS)
+ SET(XPU_SDK_ENV "bdcentos_x86_64")
+ ELSEIF(WITH_UBUNTU)
+ SET(XPU_SDK_ENV "ubuntu_x86_64")
+ ELSEIF(WITH_CENTOS)
+ SET(XPU_SDK_ENV "centos7_x86_64")
+ ELSE ()
+ SET(XPU_SDK_ENV "ubuntu_x86_64")
+ ENDIF()
endif()
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU}
- -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+ -DXPU_SDK_URL=${XPU_BASE_URL}
+ -DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
- -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+ -DXPU_SDK_URL=${XPU_BASE_URL}
+ -DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF)
@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
include_directories(${LITE_SOURCE_DIR})
include_directories(${LITE_BINARY_DIR})
+if(LITE_WITH_XPU)
+ include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
+ include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
+endif()
function(external_lite_libs alias path)
add_library(${alias} SHARED IMPORTED GLOBAL)
diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index fb1d4d9d56dcc6f38a86242b4d78b88ef31ddaa0..9963237ff188cfc736520588fc462a4a7c8a1700 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -20,7 +20,8 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git)
-SET(MKLDNN_TAG f58682cd8bd0615f41d879f8afc8f1511ab42d24)
+SET(MKLDNN_TAG 593e0de6267d2575f3e4c9e9818f0f11253d093a)
+
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
@@ -42,8 +43,10 @@ IF(NOT WIN32)
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
+ SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" CACHE FILEPATH "mkldnn library." FORCE)
ELSE()
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc")
+ SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
ENDIF(NOT WIN32)
cache_third_party(${MKLDNN_PROJECT}
@@ -59,8 +62,8 @@ ExternalProject_Add(
DEPENDS ${MKLDNN_DEPENDS}
PREFIX ${MKLDNN_PREFIX_DIR}
SOURCE_DIR ${MKLDNN_SOURCE_DIR}
- BUILD_ALWAYS 1
- # UPDATE_COMMAND ""
+ UPDATE_COMMAND ""
+ #BUILD_ALWAYS 1
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
@@ -76,12 +79,8 @@ ExternalProject_Add(
-DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
-DDNNL_BUILD_TESTS=OFF -DDNNL_BUILD_EXAMPLES=OFF
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
+ BUILD_BYPRODUCTS ${MKLDNN_LIB}
)
-if(WIN32)
- SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
-else(WIN32)
- SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" CACHE FILEPATH "mkldnn library." FORCE)
-endif(WIN32)
ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
@@ -100,8 +99,11 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# it can be directly contained in wheel or capi
if(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll)
+
+ file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR)
+ file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB)
ADD_CUSTOM_COMMAND(TARGET ${MKLDNN_PROJECT} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_INSTALL_DIR}/bin/dnnl.dll ${MKLDNN_SHARED_LIB})
+ COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll ${NATIVE_MKLDNN_SHARED_LIB} /Y))
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
@@ -109,7 +111,7 @@ if(WIN32)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND echo EXPORTS >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
- COMMAND for /f "skip=19 tokens=4" %A in (${MKLDNN_INSTALL_DIR}/bin/exports.txt) do echo %A >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def)
+ COMMAND echo off && (for /f "skip=19 tokens=4" %A in (${MKLDNN_INSTALL_DIR}/bin/exports.txt) do echo %A >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) && echo on)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib /machine:x64)
else(WIN32)
diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 4cf9b626d15472206f47cd604d0b5b87089c4476..a4df5756ce015d14e0a366643ed6e0c45385657c 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -24,6 +24,7 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
IF(WIN32)
SET(MKLML_VER "mklml_win_2019.0.5.20190502" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
+ SET(MKLML_URL_MD5 ff8c5237570f03eea37377ccfc95a08a)
SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
@@ -33,6 +34,7 @@ ELSE()
# Now enable csrmm function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "csrmm_mklml_lnx_2019.0.5" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
+ SET(MKLML_URL_MD5 bc6a7faea6a2a9ad31752386f3ae87da)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
@@ -48,10 +50,15 @@ cache_third_party(${MKLML_PROJECT}
URL ${MKLML_URL}
DIR MKLML_SOURCE_DIR)
+# Ninja Generator can not establish the correct dependency relationship between the imported library with target,
+# the product file in the ExternalProject need to be specified manually, please refer to
+# https://stackoverflow.com/questions/54866067/cmake-and-ninja-missing-and-no-known-rule-to-make-it
+# It is the same to all other ExternalProject.
ExternalProject_Add(
${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
"${MKLML_DOWNLOAD_CMD}"
+ URL_MD5 ${MKLML_URL_MD5}
PREFIX ${MKLML_PREFIX_DIR}
DOWNLOAD_DIR ${MKLML_SOURCE_DIR}
SOURCE_DIR ${MKLML_SOURCE_DIR}
@@ -60,7 +67,9 @@ ExternalProject_Add(
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/include ${MKLML_INC_DIR} &&
- ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR}
+ ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR}
+ BUILD_BYPRODUCTS ${MKLML_LIB}
+ BUILD_BYPRODUCTS ${MKLML_IOMP_LIB}
)
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index 19ba6d15c59ea802cc94ea6138871c15cb49077b..a6033a20c6fb06c6e6b26100c1997b7881767e85 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -19,6 +19,10 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git)
SET(CBLAS_TAG v0.3.7)
+if(APPLE AND WITH_ARM)
+ SET(CBLAS_TAG v0.3.13)
+endif()
+
if(WITH_MIPS)
SET(CBLAS_TAG v0.3.13)
endif()
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index c108c05368c915f6d4998d46713cda315dfb93ff..a2b6ddadb625f67f119cc314970f1a654cf0c0ab 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -239,6 +239,10 @@ endif()
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS}
+ BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}
+ BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}
+ BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}
+ BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}
)
ENDFUNCTION()
diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake
index bdfd335172d877d7e294c898dad7e3a554f5531c..40d198b2958339d938961d7d75fe357826b4e227 100644
--- a/cmake/external/pslib.cmake
+++ b/cmake/external/pslib.cmake
@@ -53,7 +53,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${PSLIB_LIB}
)
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake
index 7b00474a650706b6de6e549c56ca94485cfc2300..d69c27a197b25a7320e7755f26b7a151628e1c62 100644
--- a/cmake/external/pslib_brpc.cmake
+++ b/cmake/external/pslib_brpc.cmake
@@ -52,7 +52,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${PSLIB_BRPC_LIB}
)
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..f5b85cc71a25f12285bb02648df55c3d88ec8e53
--- /dev/null
+++ b/cmake/external/rocksdb.cmake
@@ -0,0 +1,51 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INCLUDE(ExternalProject)
+
+SET(ROCKSDB_SOURCES_DIR ${THIRD_PARTY_PATH}/rocksdb)
+SET(ROCKSDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocksdb)
+SET(ROCKSDB_INCLUDE_DIR "${ROCKSDB_INSTALL_DIR}/include" CACHE PATH "rocksdb include directory." FORCE)
+SET(ROCKSDB_LIBRARIES "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" CACHE FILEPATH "rocksdb library." FORCE)
+SET(ROCKSDB_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+INCLUDE_DIRECTORIES(${ROCKSDB_INCLUDE_DIR})
+
+ExternalProject_Add(
+ extern_rocksdb
+ ${EXTERNAL_PROJECT_LOG_ARGS}
+ PREFIX ${ROCKSDB_SOURCES_DIR}
+ GIT_REPOSITORY "https://github.com/facebook/rocksdb"
+ GIT_TAG v6.10.1
+ UPDATE_COMMAND ""
+ CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -DWITH_BZ2=OFF
+ -DWITH_GFLAGS=OFF
+ -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
+ -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+# BUILD_BYPRODUCTS ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/librocksdb.a
+ INSTALL_COMMAND mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/
+ && cp ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
+ && cp -r ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/include ${ROCKSDB_INSTALL_DIR}/
+ BUILD_IN_SOURCE 1
+)
+
+ADD_DEPENDENCIES(extern_rocksdb snappy)
+
+ADD_LIBRARY(rocksdb STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES})
+ADD_DEPENDENCIES(rocksdb extern_rocksdb)
+
+LIST(APPEND external_project_dependencies rocksdb)
+
diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake
index ab9cb02307c1f04384f8e12e843c121c01995d12..fb4c1c7cc8a3d57846648b5638f54adf40b50416 100644
--- a/cmake/external/snappy.cmake
+++ b/cmake/external/snappy.cmake
@@ -22,8 +22,15 @@ set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy includ
if(WIN32)
SET(SNAPPY_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267")
+ IF(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
+ add_custom_command(TARGET extern_snappy POST_BUILD
+ COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib
+ )
+ ENDIF()
+ set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
else()
SET(SNAPPY_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
endif()
ExternalProject_Add(
@@ -33,35 +40,26 @@ ExternalProject_Add(
PREFIX ${SNAPPY_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
- -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
- -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
- -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
- -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
- -DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS}
- -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
- -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
- -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
- -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
- -DCMAKE_POSITION_INDEPENDENT_CODE=ON
- -DBUILD_TESTING=OFF
- -DSNAPPY_BUILD_TESTS:BOOL=OFF
- -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
- ${EXTERNAL_OPTIONAL_ARGS}
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+ -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+ -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+ -DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS}
+ -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+ -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+ -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
+ -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+ -DBUILD_TESTING=OFF
+ -DSNAPPY_BUILD_TESTS:BOOL=OFF
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR}
- -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
- -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
- -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
+ -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+ -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${SNAPPY_LIBRARIES}
)
-IF(WIN32)
- IF(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
- add_custom_command(TARGET extern_snappy POST_BUILD
- COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib
- )
- ENDIF()
- set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
-else(WIN32)
- set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
-endif (WIN32)
add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 100b9153394690f6d872a4f16fb0a1ee5827b89f..532ebaaf5c0643a86fcf24022d0084fb572877b5 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -24,7 +24,7 @@ SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
# in case of low internet speed
#set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
-set(WARPCTC_TAG c690fc5755abbdbdc98ef78d51ec10a6748a8cd1)
+set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE)
@@ -32,6 +32,14 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib"
CACHE PATH "Warp-ctc Library Directory" FORCE)
+IF(WIN32)
+ SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ CACHE FILEPATH "Warp-ctc Library" FORCE)
+else(WIN32)
+ SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ CACHE FILEPATH "Warp-ctc Library" FORCE)
+ENDIF(WIN32)
+
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32)
SET(USE_OMP OFF)
ELSE()
@@ -59,7 +67,7 @@ if(WITH_ASCEND OR WITH_ASCEND_CL)
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
- "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
@@ -76,8 +84,24 @@ if(WITH_ASCEND OR WITH_ASCEND_CL)
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
+ BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}
)
else()
+ if(WIN32)
+ set(WARPCTC_C_FLAGS $)
+ set(WARPCTC_C_FLAGS_DEBUG $)
+ set(WARPCTC_C_FLAGS_RELEASE $)
+ set(WARPCTC_CXX_FLAGS $)
+ set(WARPCTC_CXX_FLAGS_RELEASE $)
+ set(WARPCTC_CXX_FLAGS_DEBUG $)
+ else()
+ set(WARPCTC_C_FLAGS ${CMAKE_C_FLAGS})
+ set(WARPCTC_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
+ set(WARPCTC_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE})
+ set(WARPCTC_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+ set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
+ endif()
ExternalProject_Add(
extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
@@ -85,17 +109,17 @@ else()
"${WARPCTC_DOWNLOAD_CMD}"
PREFIX ${WARPCTC_PREFIX_DIR}
SOURCE_DIR ${WARPCTC_SOURCE_DIR}
- #UPDATE_COMMAND ""
+ UPDATE_COMMAND ""
PATCH_COMMAND ""
- BUILD_ALWAYS 1
+ #BUILD_ALWAYS 1
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
- -DCMAKE_C_FLAGS=$
- -DCMAKE_C_FLAGS_DEBUG=$
- -DCMAKE_C_FLAGS_RELEASE=$
- -DCMAKE_CXX_FLAGS=$
- -DCMAKE_CXX_FLAGS_RELEASE=$
- -DCMAKE_CXX_FLAGS_DEBUG=$
+ -DCMAKE_C_FLAGS=${WARPCTC_C_FLAGS}
+ -DCMAKE_C_FLAGS_DEBUG=${WARPCTC_C_FLAGS_DEBUG}
+ -DCMAKE_C_FLAGS_RELEASE=${WARPCTC_C_FLAGS_RELEASE}
+ -DCMAKE_CXX_FLAGS=${WARPCTC_CXX_FLAGS}
+ -DCMAKE_CXX_FLAGS_RELEASE=${WARPCTC_CXX_FLAGS_RELEASE}
+ -DCMAKE_CXX_FLAGS_DEBUG=${WARPCTC_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU}
-DWITH_ROCM=${WITH_ROCM}
@@ -110,18 +134,10 @@ else()
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
+ BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}
)
endif()
-
-IF(WIN32)
- SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
- CACHE FILEPATH "Warp-ctc Library" FORCE)
-else(WIN32)
- SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
- CACHE FILEPATH "Warp-ctc Library" FORCE)
-ENDIF(WIN32)
-
MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers.
diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake
index 610a692ef12c6ae6f992fff8e4e65f48f3aeb01f..eabcabf7430633bd14bcf9814f112e7a4d043336 100644
--- a/cmake/external/xbyak.cmake
+++ b/cmake/external/xbyak.cmake
@@ -46,7 +46,9 @@ ExternalProject_Add(
SOURCE_DIR ${XBYAK_SOURCE_DIR}
# UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
)
add_library(xbyak INTERFACE)
diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
index f846623602ed79a5bd84268436a59ede1957364b..42de34fb52061af23eee28377659ed4cbbb4de0a 100644
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -7,52 +7,74 @@ SET(XPU_PROJECT "extern_xpu")
SET(XPU_API_LIB_NAME "libxpuapi.so")
SET(XPU_RT_LIB_NAME "libxpurt.so")
-if(NOT XPU_SDK_ROOT)
- if (WITH_AARCH64)
- SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/aarch64/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE)
- elseif(WITH_SUNWAY)
- SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE)
- else()
- SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_04_09.tar.gz" CACHE STRING "" FORCE)
- endif()
-
- SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
- SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
- SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
- SET(XPU_API_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
- SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")
-
- SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
- SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
-
- SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
-
- FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
- "PROJECT(XPU)\n"
- "cmake_minimum_required(VERSION 3.0)\n"
- "install(DIRECTORY xpu/include xpu/lib \n"
- " DESTINATION ${XPU_INSTALL_DIR})\n")
-
- ExternalProject_Add(
- ${XPU_PROJECT}
- ${EXTERNAL_PROJECT_LOG_ARGS}
- PREFIX ${XPU_SOURCE_DIR}
- DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
- DOWNLOAD_COMMAND wget --no-check-certificate ${XPU_URL} -c -q -O xpu.tar.gz
- && tar xvf xpu.tar.gz
- DOWNLOAD_NO_PROGRESS 1
- UPDATE_COMMAND ""
- CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
- CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
- )
-else()
- SET(XPU_API_INC_DIR "${XPU_SDK_ROOT}/XTDK/include/")
- SET(XPU_API_LIB "${XPU_SDK_ROOT}/XTDK/shlib/libxpuapi.so")
- SET(XPU_RT_LIB "${XPU_SDK_ROOT}/XTDK/runtime/shlib/libxpurt.so")
- SET(XPU_LIB_DIR "${XPU_SDK_ROOT}/XTDK/shlib/")
-endif()
+IF(WITH_AARCH64)
+ SET(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
+ SET(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
+ELSEIF(WITH_SUNWAY)
+ SET(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
+ SET(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
+ELSEIF(WITH_BDCENTOS)
+ SET(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
+ SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
+ELSEIF(WITH_UBUNTU)
+ SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
+ SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
+ELSEIF(WITH_CENTOS)
+ SET(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
+ SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
+
+ELSE ()
+ SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
+ SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
+ SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
+ENDIF()
+
+SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
+SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701")
+SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
+SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
+SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
+SET(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE)
+
+SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
+SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
+SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
+SET(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
+SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")
+
+SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
+SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
+
+SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
-INCLUDE_DIRECTORIES(${XPU_API_INC_DIR})
+FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
+ "PROJECT(XPU)\n"
+ "cmake_minimum_required(VERSION 3.0)\n"
+ "install(DIRECTORY xpu/include xpu/lib \n"
+ " DESTINATION ${XPU_INSTALL_DIR})\n")
+
+ExternalProject_Add(
+ ${XPU_PROJECT}
+ ${EXTERNAL_PROJECT_LOG_ARGS}
+ PREFIX ${XPU_SOURCE_DIR}
+ DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
+ DOWNLOAD_COMMAND wget ${XPU_PACK_DEPENCE_URL}
+ && bash pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME}
+
+ DOWNLOAD_NO_PROGRESS 1
+ UPDATE_COMMAND ""
+ CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
+ CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
+ BUILD_BYPRODUCTS ${XPU_API_LIB}
+ BUILD_BYPRODUCTS ${XPU_RT_LIB}
+)
+
+INCLUDE_DIRECTORIES(${XPU_INC_DIR})
ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL)
set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}")
@@ -62,7 +84,7 @@ generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake")
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
-if (WITH_XPU_BKCL)
+IF(WITH_XPU_BKCL)
MESSAGE(STATUS "Compile with XPU BKCL!")
ADD_DEFINITIONS(-DPADDLE_WITH_XPU_BKCL)
@@ -71,15 +93,11 @@ if (WITH_XPU_BKCL)
SET(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
INCLUDE_DIRECTORIES(${XPU_BKCL_INC_DIR})
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB})
-else(WITH_XPU_BKCL)
- TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} )
-endif(WITH_XPU_BKCL)
-
-if(NOT XPU_SDK_ROOT)
- ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
-else()
- ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
-endif()
+ELSE(WITH_XPU_BKCL)
+ TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
+ENDIF(WITH_XPU_BKCL)
+
+ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
# Ensure that xpu/api.h can be included without dependency errors.
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")
diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake
index bdd7df190ff106178266fbd47716e7d70fd229bd..0279d4e2a835c2c1fa2bf8e2f4cafd21391accfc 100644
--- a/cmake/external/xxhash.cmake
+++ b/cmake/external/xxhash.cmake
@@ -21,10 +21,7 @@ set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git)
set(XXHASH_TAG v0.6.5)
-cache_third_party(extern_xxhash
- REPOSITORY ${XXHASH_REPOSITORY}
- TAG ${XXHASH_TAG}
- DIR XXHASH_SOURCE_DIR)
+INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
IF(APPLE)
SET(BUILD_CMD sed -i \"\" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib)
@@ -32,6 +29,17 @@ ELSEIF(UNIX)
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib)
ENDIF()
+if (WIN32)
+ set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib")
+else()
+ set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
+endif ()
+
+cache_third_party(extern_xxhash
+ REPOSITORY ${XXHASH_REPOSITORY}
+ TAG ${XXHASH_TAG}
+ DIR XXHASH_SOURCE_DIR)
+
if(WIN32)
ExternalProject_Add(
extern_xxhash
@@ -54,6 +62,7 @@ if(WIN32)
-DBUILD_SHARED_LIBS=OFF
${OPTIONAL_CACHE_ARGS}
TEST_COMMAND ""
+ BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}
)
else()
ExternalProject_Add(
@@ -68,16 +77,10 @@ else()
BUILD_COMMAND ${BUILD_CMD}
INSTALL_COMMAND make PREFIX=${XXHASH_INSTALL_DIR} install
TEST_COMMAND ""
+ BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}
)
endif()
-if (WIN32)
- set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib")
-else()
- set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
-endif ()
-INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
-
add_library(xxhash STATIC IMPORTED GLOBAL)
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
include_directories(${XXHASH_INCLUDE_DIR})
diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake
index 4464787a0c2a64066585e8f308c68a62286478e9..f1a015f6304a386fcc4cb985e4d0523d0d8eabb6 100644
--- a/cmake/external/zlib.cmake
+++ b/cmake/external/zlib.cmake
@@ -25,6 +25,12 @@ set(ZLIB_TAG v1.2.8)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers.
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include zlib.h.
+IF(WIN32)
+ SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
+ELSE(WIN32)
+ SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
+ENDIF(WIN32)
+
cache_third_party(extern_zlib
REPOSITORY ${ZLIB_REPOSITORY}
TAG ${ZLIB_TAG}
@@ -51,12 +57,8 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${ZLIB_LIBRARIES}
)
-IF(WIN32)
- SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
-ELSE(WIN32)
- SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
-ENDIF(WIN32)
ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index a2ddad557c2956f7de21bceaf7a6699e8dfbed43..7afff25664bbbb6f8ac93392dc39ed621e57e849 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -28,7 +28,12 @@ function(CheckCompilerCXX14Flag)
endfunction()
CheckCompilerCXX14Flag()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+if(NOT WIN32)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+else()
+ set(CMAKE_CXX_STANDARD 14)
+endif()
+
# safe_set_flag
#
# Set a compile flag only if compiler is support
@@ -181,8 +186,11 @@ endif()
endif(NOT WIN32)
if (APPLE)
- # On Mac OS X build fat binaries with x86_64 architectures by default.
- set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
+ if(WITH_ARM)
+ set (CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Build architectures for OSX" FORCE)
+ else(WITH_ARM)
+ set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
+ endif(WITH_ARM)
# On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
set (COMMON_FLAGS -Wno-deprecated-register)
endif(APPLE)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index a5c74a46631e9d76fa78261f706a1853a80bab32..cea65f17fbe836ee5951805dfdf5d3078087ba44 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -92,7 +92,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
# including io directory for inference lib paddle_api.h
include_directories("${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io")
-if(NOT APPLE)
+if(NOT APPLE AND NOT WIN32)
find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT})
if(WITH_PSLIB OR WITH_DISTRIBUTE)
@@ -100,7 +100,7 @@ if(NOT APPLE)
else()
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif()
-endif(NOT APPLE)
+endif()
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
@@ -391,7 +391,7 @@ function(cc_binary TARGET_NAME)
endfunction(cc_binary)
function(cc_test_build TARGET_NAME)
- if(WITH_TESTING)
+ if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
@@ -409,14 +409,12 @@ function(cc_test_build TARGET_NAME)
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
+ check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
endif()
-
- check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
-
endfunction()
function(cc_test_run TARGET_NAME)
- if(WITH_TESTING)
+ if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "")
set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index 4c492d7cc48f0657f5389e23ddccc4561708c4a8..514f5ea9deaa32e2c7a926dd38a2c2f8d80682d6 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -85,3 +85,5 @@ message(STATUS "HIP library name: ${hip_library_name}")
# set HIP link libs
find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
+
+include(thrust)
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index 9694a7bc59c12a96e1c0c33488895ae94dbf2a03..3dcf0b74f7940f7a0d9c9b5242e7df96bf274cdc 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -146,12 +146,19 @@ copy(inference_lib_dist
SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h
DSTS ${dst_dir})
-# Only GPU need cudaErrorMessage.pb
+# GPU must copy externalErrorMsg.pb
IF(WITH_GPU)
- set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data")
- copy(inference_lib_dist
- SRCS ${cudaerror_INCLUDE_DIR}
- DSTS ${dst_dir})
+ set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/externalError/data")
+ copy(inference_lib_dist
+ SRCS ${externalError_INCLUDE_DIR}
+ DSTS ${dst_dir})
+ENDIF()
+
+IF(WITH_XPU)
+ set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu")
+ copy(inference_lib_dist
+ SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR}
+ DSTS ${dst_dir} ${dst_dir})
ENDIF()
# CMakeCache Info
@@ -193,10 +200,7 @@ copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include/*
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
- SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex64.h
- DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
-copy(inference_lib_dist
- SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex128.h
+ SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
@@ -259,7 +263,7 @@ copy(fluid_lib_dist
set(module "platform")
set(platform_lib_deps profiler_proto error_codes_proto)
if(WITH_GPU)
- set(platform_lib_deps ${platform_lib_deps} cuda_error_proto)
+ set(platform_lib_deps ${platform_lib_deps} external_error_proto)
endif(WITH_GPU)
add_dependencies(fluid_lib_dist ${platform_lib_deps})
@@ -323,16 +327,22 @@ function(version version_file)
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n"
"WITH_MKLDNN: ${WITH_MKLDNN}\n"
- "WITH_GPU: ${WITH_GPU}\n")
+ "WITH_GPU: ${WITH_GPU}\n"
+ "WITH_ROCM: ${WITH_ROCM}\n")
if(WITH_GPU)
file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n"
"CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n")
endif()
+ if(WITH_ROCM)
+ file(APPEND ${version_file}
+ "HIP version: ${HIP_VERSION}\n"
+ "MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
+ endif()
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND)
file(APPEND ${version_file}
- "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}\n")
+ "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n")
endif()
if(WITH_LITE)
file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n")
diff --git a/cmake/init.cmake b/cmake/init.cmake
index b11156d2e9986f879dcf4dd63354edb81c493260..0ebcdc8ceeebcabc2c7c639076939cef5c0fe546 100644
--- a/cmake/init.cmake
+++ b/cmake/init.cmake
@@ -17,17 +17,34 @@ if(NOT WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG")
+
+ if(WITH_GPU)
+ set(CMAKE_CUDA_FLAGS_DEBUG "-g")
+ set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
+ set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
+ set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
+ endif()
else()
+ set(CMAKE_C_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1")
+ set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG")
+ set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG")
+ set(CMAKE_C_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG")
+
+ set(CMAKE_CXX_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1")
+ set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG")
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG")
+ set(CMAKE_CXX_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG")
+
+ if(WITH_GPU)
+ set(CMAKE_CUDA_FLAGS_DEBUG "-Xcompiler=\"-MDd -Zi -Ob0 -Od /RTC1\"")
+ set(CMAKE_CUDA_FLAGS_RELEASE "-Xcompiler=\"-MD -O2 -Ob2\" -DNDEBUG")
+ set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-Xcompiler=\"-MD -Zi -O2 -Ob1\" -DNDEBUG")
+ set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Xcompiler=\"-MD -O1 -Ob1\" -DNDEBUG")
+ endif()
+
# It can specify CUDA compile flag manualy,
# its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous
# because CUDA will update by nvidia, then error will occur.
# Now, it's only used in VS2015 + CUDA:[10.0, 10.2]
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif()
-
-if(WITH_GPU)
- set(CMAKE_CUDA_FLAGS_DEBUG "-g")
- set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
- set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
- set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
-endif()
diff --git a/cmake/miopen.cmake b/cmake/miopen.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..f482f423dc5c12c5c0d7d87401c5d4a1d85a218a
--- /dev/null
+++ b/cmake/miopen.cmake
@@ -0,0 +1,67 @@
+if(NOT WITH_ROCM)
+ return()
+endif()
+
+# Now we don't support ROCm on windows
+if(WIN32)
+ return()
+endif()
+
+set(MIOPEN_ROOT ${ROCM_PATH}/miopen CACHE PATH "MIOPEN ROOT")
+
+find_path(MIOPEN_INCLUDE_DIR "miopen/miopen.h"
+ PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/include ${MIOPEN_ROOT}/local/include
+ $ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/include $ENV{MIOPEN_ROOT}/local/include
+ NO_DEFAULT_PATH
+)
+
+get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
+
+find_library(MIOPEN_LIBRARY NAMES "libMIOpen.so"
+ PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/lib ${MIOPEN_ROOT}/lib64 ${__libpath_hist}
+ $ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/lib $ENV{MIOPEN_ROOT}/lib64
+ NO_DEFAULT_PATH
+ DOC "Path to MIOpen library.")
+
+if(MIOPEN_INCLUDE_DIR AND MIOPEN_LIBRARY)
+ set(MIOPEN_FOUND ON)
+else()
+ set(MIOPEN_FOUND OFF)
+endif()
+
+macro(find_miopen_version miopen_header_file)
+ file(READ ${miopen_header_file} MIOPEN_VERSION_FILE_CONTENTS)
+ get_filename_component(MIOPEN_LIB_PATH ${MIOPEN_LIBRARY} DIRECTORY)
+
+ string(REGEX MATCH "define MIOPEN_VERSION_MAJOR +([0-9]+)" MIOPEN_MAJOR_VERSION
+ "${MIOPEN_VERSION_FILE_CONTENTS}")
+ string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR +([0-9]+)" "\\1"
+ MIOPEN_MAJOR_VERSION "${MIOPEN_MAJOR_VERSION}")
+ string(REGEX MATCH "define MIOPEN_VERSION_MINOR +([0-9]+)" MIOPEN_MINOR_VERSION
+ "${MIOPEN_VERSION_FILE_CONTENTS}")
+ string(REGEX REPLACE "define MIOPEN_VERSION_MINOR +([0-9]+)" "\\1"
+ MIOPEN_MINOR_VERSION "${MIOPEN_MINOR_VERSION}")
+ string(REGEX MATCH "define MIOPEN_VERSION_PATCH +([0-9]+)" MIOPEN_PATCH_VERSION
+ "${MIOPEN_VERSION_FILE_CONTENTS}")
+ string(REGEX REPLACE "define MIOPEN_VERSION_PATCH +([0-9]+)" "\\1"
+ MIOPEN_PATCH_VERSION "${MIOPEN_PATCH_VERSION}")
+ string(REGEX MATCH "define MIOPEN_VERSION_TWEAK +([0-9]+)" MIOPEN_TWEAK_VERSION
+ "${MIOPEN_VERSION_FILE_CONTENTS}")
+ string(REGEX REPLACE "define MIOPEN_VERSION_TWEAK +([0-9]+)" "\\1"
+ MIOPEN_TWEAK_VERSION "${MIOPEN_TWEAK_VERSION}")
+
+ if(NOT MIOPEN_MAJOR_VERSION)
+ set(MIOPEN_VERSION "???")
+ else()
+ add_definitions("-DMIOPEN_MAJOR_VERSION=\"${MIOPEN_MAJOR_VERSION}\"")
+ math(EXPR MIOPEN_VERSION
+ "${MIOPEN_MAJOR_VERSION} * 1000 +
+ ${MIOPEN_MINOR_VERSION} * 10 + ${MIOPEN_PATCH_VERSION}")
+ message(STATUS "Current MIOpen header is ${MIOPEN_INCLUDE_DIR}/miopen/miopen.h "
+ "Current MIOpen version is v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}.${MIOPEN_PATCH_VERSION}. ")
+ endif()
+endmacro()
+
+if(MIOPEN_FOUND)
+ find_miopen_version(${MIOPEN_INCLUDE_DIR}/miopen/version.h)
+endif()
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
index 7dac91e531e4cfd16fed211ef659350262dd3153..a200b948dea45dd0ee9e5ced5fbc38e1eb4349b7 100644
--- a/cmake/operators.cmake
+++ b/cmake/operators.cmake
@@ -44,6 +44,9 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu)
endif()
+ if (WITH_NV_JETSON)
+ list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu")
+ endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
@@ -180,8 +183,8 @@ function(op_library TARGET)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
- list(REMOVE_ITEM hip_srcs "correlation_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
+ list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else()
@@ -205,7 +208,7 @@ function(op_library TARGET)
endif()
# Define operators that don't need pybind here.
- foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "nccl_op"
+ foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
"sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op"
diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake
index 889332fc55704f96f0afbd5815042ae8c0ba1035..e4b22befff8508f677288bba7b938556b796b68a 100644
--- a/cmake/tensorrt.cmake
+++ b/cmake/tensorrt.cmake
@@ -47,11 +47,23 @@ if(TENSORRT_FOUND)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
+ string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+ "${TENSORRT_VERSION_FILE_CONTENTS}")
endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
@@ -60,9 +72,15 @@ if(TENSORRT_FOUND)
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
+ string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
+ TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
+ string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
+ TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
+ string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
+ TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
- "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
+ "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index f90fa3509d63d4765ef78638b9f4f28d0e22bed2..aa31745c21340c4bef521f9cbf44535a634c4eb7 100644
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -108,13 +108,19 @@ ENDMACRO()
# 2. NAME: The name of file, that determin the dirname
#
FUNCTION(file_download_and_uncompress URL NAME)
- MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}")
+ set(options "")
+ set(oneValueArgs MD5)
+ set(multiValueArgs "")
+ cmake_parse_arguments(URL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+ MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}, MD5: ${URL_MD5}")
SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME}/data PARENT_SCOPE)
ExternalProject_Add(
- extern_download_${NAME}
+ download_${NAME}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${THIRD_PARTY_PATH}/${NAME}
URL ${URL}
+ URL_MD5 ${URL_MD5}
+ TIMEOUT 120
DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
DOWNLOAD_NO_PROGRESS 1
@@ -123,7 +129,7 @@ FUNCTION(file_download_and_uncompress URL NAME)
UPDATE_COMMAND ""
INSTALL_COMMAND ""
)
- set(third_party_deps ${third_party_deps} extern_download_${NAME} PARENT_SCOPE)
+ set(third_party_deps ${third_party_deps} download_${NAME} PARENT_SCOPE)
ENDFUNCTION()
@@ -209,6 +215,8 @@ list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boos
list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool)
include(cblas) # find first, then download, build, install openblas
+
+message(STATUS "CBLAS_PROVIDER: ${CBLAS_PROVIDER}")
if(${CBLAS_PROVIDER} STREQUAL MKLML)
list(APPEND third_party_deps extern_mklml)
elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS)
@@ -242,8 +250,22 @@ if(WITH_GPU)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
- set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE)
- file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage
+ set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE)
+ file_download_and_uncompress(${URL} "externalError" MD5 c0749523ebb536eb7382487d645d9cd4) # download file externalErrorMsg.tar.gz
+ if(WITH_TESTING)
+ # copy externalErrorMsg.pb, just for unittest can get error message correctly.
+ set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
+ if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja"))
+ set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data)
+ else()
+ set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data)
+ endif()
+ set(DST_DIR2 ${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data)
+ add_custom_command(TARGET download_externalError POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1}
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2}
+ COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}")
+ endif()
endif(WITH_GPU)
if(WITH_XPU)
@@ -261,6 +283,14 @@ if(WITH_PSLIB)
if(WITH_PSLIB_BRPC)
include(external/pslib_brpc) # download, build, install pslib_brpc
list(APPEND third_party_deps extern_pslib_brpc)
+ else()
+ include(external/snappy)
+ list(APPEND third_party_deps extern_snappy)
+
+ include(external/leveldb)
+ list(APPEND third_party_deps extern_leveldb)
+ include(external/brpc)
+ list(APPEND third_party_deps extern_brpc)
endif()
endif(WITH_PSLIB)
@@ -296,6 +326,11 @@ if (WITH_PSCORE)
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct)
+
+ if (WITH_HETERPS)
+ include(external/rocksdb) # download, build, install libmct
+ list(APPEND third_party_deps extern_rocksdb)
+ endif()
endif()
if(WITH_XBYAK)
diff --git a/cmake/thrust.cmake b/cmake/thrust.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ff415b1e3c4bf6ff190b2f8e97cfb9da52259435
--- /dev/null
+++ b/cmake/thrust.cmake
@@ -0,0 +1,24 @@
+function(add_thrust_patches_if_necessary)
+ set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu)
+ file(WRITE ${thrust_detect_file} ""
+ "#include \"thrust/version.h\"\n"
+ "#include \"thrust/shuffle.h\"\n"
+ "#include \"stdio.h\"\n"
+ "int main() {\n"
+ " int version = THRUST_VERSION;\n"
+ " printf(\"%d\", version);\n"
+ " return 0;\n"
+ "}\n")
+
+ execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}"
+ "--run" "${thrust_detect_file}"
+ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
+ RESULT_VARIABLE nvcc_res ERROR_QUIET)
+ if(NOT nvcc_res EQUAL 0)
+ set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust")
+ message(STATUS "Add thrust patches: ${thrust_patches}")
+ include_directories(${thrust_patches})
+ endif()
+endfunction()
+
+add_thrust_patches_if_necessary()
diff --git a/go/README_cn.md b/go/README_cn.md
deleted file mode 100644
index 040540e939bc3a0993e7c963b281ad91fbfe1ffc..0000000000000000000000000000000000000000
--- a/go/README_cn.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Paddle 预测golang API
-
-## 安装
-首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c`
-
-## 在Go中使用Paddle预测
-首先创建预测配置
-``` go
-config := paddle.NewAnalysisConfig()
-config.SetModel(model_file, params_file)
-config.SwitchUseFeedFetchOps(false)
-config.SwitchSpecifyInputNames(true)
-```
-
-创建predictor
-``` go
-predictor := paddle.NewPredictor(config)
-```
-
-获取输入Tensor和输出Tensor
-``` go
-inputs = predictor.GetInputTensors()
-```
-
-设置输入数据(假设只有一个输入)
-``` go
-input := inputs[0]
-input.SetValue(data)
-input.Reshape([]int32{1, 3, 300, 300})
-```
-
-运行预测
-``` go
-predictor.ZeroCopyRun()
-```
-
-获取输入Tensor的真实值
-``` go
-output := outputs[0]
-predictor.GetZeroCopyOutput(output)
-value := reflect.ValueOf(output.Value())
-shape, dtype := paddle.ShapeAndTypeOf(value)
-output_data := value.Interface().([][]float32)
-```
-
-## 示例
-源码见[mobilenet](./demo/mobilenet.go)
-
-下载[数据](https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz)并解压到当前目录
-
-运行
-```bash
-go mod init github.com/paddlepaddle
-export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
-go run ./demo/mobilenet.go
-```
diff --git a/go/demo/mobilenet.go b/go/demo/mobilenet.go
deleted file mode 100644
index c1ca2e967f72dc6646a6785d86ba59c709bfe25c..0000000000000000000000000000000000000000
--- a/go/demo/mobilenet.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package main
-
-import "github.com/paddlepaddle/paddle"
-import "strings"
-import "io/ioutil"
-import "strconv"
-import "reflect"
-
-func main() {
- config := paddle.NewAnalysisConfig()
- config.SetModel("data/model/__model__", "data/model/__params__")
- config.DisableGlogInfo()
- config.SwitchUseFeedFetchOps(false)
- config.SwitchSpecifyInputNames(true)
-
- predictor := paddle.NewPredictor(config)
-
- println("============== paddle inference ==============")
- println("input num: ", predictor.GetInputNum())
- println("input name: ", predictor.GetInputNames()[0])
- println("output num: ", predictor.GetOutputNum())
- println("output name: ", predictor.GetInputNames()[0])
- println("============== run inference =================")
-
- input := predictor.GetInputTensors()[0]
- output := predictor.GetOutputTensors()[0]
-
- filename := "data/data.txt"
- data := ReadData(filename)
- input.SetValue(data[:1 * 3 * 300 * 300])
- input.Reshape([]int32{1, 3, 300, 300})
-
- predictor.SetZeroCopyInput(input)
- predictor.ZeroCopyRun()
- predictor.GetZeroCopyOutput(output)
-
- println("============= parse output ===================")
- output_val := output.Value()
- value := reflect.ValueOf(output_val)
- shape, dtype := paddle.ShapeAndTypeOf(value)
- switch dtype {
- case paddle.PaddleDType(paddle.FLOAT32):
- v := value.Interface().([][]float32)
- println("v: ", v[0][0], v[0][1], "...")
- case paddle.PaddleDType(paddle.UINT8):
- v := value.Interface().([][]uint8)
- println("v: ", v[0][0], v[0][1], "...")
- case paddle.PaddleDType(paddle.INT32):
- v := value.Interface().([][]int32)
- println("v: ", v[0][0], v[0][1], "...")
- case paddle.PaddleDType(paddle.INT64):
- v := value.Interface().([][]int64)
- println("v: ", v[0][0], v[0][1], "...")
- }
- println(shape[0], shape[1])
- println(output.Shape()[0])
-}
-
-func ReadData(filename string) []float32 {
- file_bytes, _ := ioutil.ReadFile(filename)
- data_slice := strings.Split(string(file_bytes), " ")
- var result []float32
- for _, n := range data_slice {
- r, _ := strconv.ParseFloat(n, 32)
- result = append(result, float32(r))
- }
- return result
-}
diff --git a/go/demo/mobilenet_c.cc b/go/demo/mobilenet_c.cc
deleted file mode 100644
index 6a5cc683c9f9a9c88f73a3ca5ebac274210f3b7a..0000000000000000000000000000000000000000
--- a/go/demo/mobilenet_c.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include
-#include
-#include
-
-void SetConfig(PD_AnalysisConfig *);
-void ReadData(float *data, int size);
-
-int main(int argc, char *argv[]) {
- PD_AnalysisConfig *config = PD_NewAnalysisConfig();
- SetConfig(config);
- PD_Predictor *predictor = PD_NewPredictor(config);
-
- int input_num = PD_GetInputNum(predictor);
- printf("Input num: %d\n", input_num);
- int output_num = PD_GetOutputNum(predictor);
- printf("Output num: %d\n", output_num);
-
- PD_ZeroCopyTensor input;
- PD_InitZeroCopyTensor(&input);
- input.name = const_cast(PD_GetInputName(predictor, 0)); // NOLINT
- input.data.capacity = sizeof(float) * 1 * 3 * 300 * 300;
- input.data.length = input.data.capacity;
- input.data.data = malloc(input.data.capacity);
- int shape[] = {1, 3, 300, 300};
- input.shape.data = static_cast(shape);
- input.shape.capacity = sizeof(shape);
- input.shape.length = sizeof(shape);
- input.dtype = PD_FLOAT32;
- ReadData((float *)input.data.data, 1 * 3 * 300 * 300); // NOLINT
- float *data = (float *)input.data.data; // NOLINT
- PD_SetZeroCopyInput(predictor, &input);
- int *shape_ptr = (int *)input.shape.data; // NOLINT
-
- PD_ZeroCopyRun(predictor);
- PD_ZeroCopyTensor output;
- PD_InitZeroCopyTensor(&output);
- output.name = const_cast(PD_GetOutputName(predictor, 0)); // NOLINT
- PD_GetZeroCopyOutput(predictor, &output);
-
- PD_DestroyZeroCopyTensor(&output);
-
- PD_DeleteAnalysisConfig(config);
- PD_DeletePredictor(predictor);
- return 0;
-}
-
-void SetConfig(PD_AnalysisConfig *config) {
- PD_SetModel(config, "data/model/__model__", "data/model/__params__");
- PD_SwitchUseFeedFetchOps(config, false);
- PD_SwitchSpecifyInputNames(config, true);
- PD_DisableGlogInfo(config);
- // PD_SwitchIrOptim(config, false);
-}
-
-void ReadData(float *data, int n) {
- FILE *fp = fopen("data/data.txt", "r");
- for (int i = 0; i < n; i++) {
- fscanf(fp, "%f", &data[i]);
- }
- fclose(fp);
-}
diff --git a/go/demo/mobilenet_c_exp.cc b/go/demo/mobilenet_c_exp.cc
deleted file mode 100644
index b4f42dab6790bfb6dd33860a8ada704166bb74ac..0000000000000000000000000000000000000000
--- a/go/demo/mobilenet_c_exp.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include
-#include
-#include
-
-void ReadData(float* data, int size);
-
-int main(int argc, char* argv[]) {
- PD_Config* config = PD_ConfigCreate();
- PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
- PD_ConfigDisableGlogInfo(config);
-
- PD_Predictor* predictor = PD_PredictorCreate(config);
- // config has destroyed in PD_PredictorCreate
- config = NULL;
-
- int input_num = PD_PredictorGetInputNum(predictor);
- printf("Input num: %d\n", input_num);
- int output_num = PD_PredictorGetOutputNum(predictor);
- printf("Output num: %d\n", output_num);
-
- PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
- PD_Tensor* input_tensor =
- PD_PredictorGetInputHandle(predictor, input_names->data[0]);
- PD_OneDimArrayCstrDestroy(input_names);
- input_names = NULL;
-
- int32_t shape[] = {1, 3, 300, 300};
- float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300); // NOLINT
- ReadData(data, 1 * 3 * 300 * 300); // NOLINT
- PD_TensorReshape(input_tensor, 4, shape);
- PD_TensorCopyFromCpuFloat(input_tensor, data);
- free(data);
- data = NULL;
- PD_PredictorRun(predictor);
-
- PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
- PD_Tensor* output_tensor =
- PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
- PD_OneDimArrayCstrDestroy(output_names);
- output_names = nullptr;
-
- PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
- int32_t size = 1;
- for (size_t index = 0; index < out_shape->size; ++index) {
- size = size * out_shape->data[index];
- }
- PD_OneDimArrayInt32Destroy(out_shape);
- out_shape = NULL;
-
- data = (float*)malloc(sizeof(float) * size); // NOLINT
- PD_TensorCopyToCpuFloat(output_tensor, data);
- free(data);
- data = NULL;
-
- PD_TensorDestroy(output_tensor);
- output_tensor = NULL;
- PD_TensorDestroy(input_tensor);
- input_tensor = NULL;
- PD_PredictorDestroy(predictor);
- predictor = NULL;
-
- return 0;
-}
-
-void ReadData(float* data, int n) {
- FILE* fp = fopen("data/data.txt", "r");
- for (int i = 0; i < n; i++) {
- fscanf(fp, "%f", &data[i]);
- }
- fclose(fp);
-}
diff --git a/go/demo/mobilenet_cxx.cc b/go/demo/mobilenet_cxx.cc
deleted file mode 100644
index 7bdd6b2b03b24e2393e746edde754f763e9dd986..0000000000000000000000000000000000000000
--- a/go/demo/mobilenet_cxx.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include
-#include
-#include
-
-void SetConfig(paddle::AnalysisConfig *);
-
-int main(int argc, char *argv[]) {
- paddle::AnalysisConfig config;
- SetConfig(&config);
- auto predictor = paddle::CreatePaddlePredictor(config);
- auto input_name = predictor->GetInputNames()[0];
- auto input = predictor->GetInputTensor(input_name);
- std::cout << predictor->GetOutputNames()[0] << std::endl;
- std::vector shape{1, 3, 300, 300};
- input->Reshape(std::move(shape));
- std::vector data(1 * 300 * 300 * 3);
- std::ifstream fin("data/data.txt");
- for (int i = 0; i < data.size(); i++) {
- fin >> data[i];
- }
-
- input->copy_from_cpu(data.data());
- predictor->ZeroCopyRun();
- auto output_name = predictor->GetOutputNames()[0];
- auto output = predictor->GetOutputTensor(output_name);
- return 0;
-}
-
-void SetConfig(paddle::AnalysisConfig *config) {
- config->SetModel("data/model/__model__", "data/model/__params__");
- config->SwitchUseFeedFetchOps(false);
- config->SwitchSpecifyInputNames(true);
- config->SwitchIrOptim(false);
-}
diff --git a/go/paddle/config.go b/go/paddle/config.go
deleted file mode 100644
index 68a31230997bed73fbab1c1d1c7af123e353cf97..0000000000000000000000000000000000000000
--- a/go/paddle/config.go
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include
-// #include
-// #include
-import "C"
-
-import "runtime"
-import "unsafe"
-
-type Precision C.Precision
-
-const (
- Precision_FLOAT32 Precision = C.kFloat32
- Precision_INT8 Precision = C.kInt8
- Precision_HALF Precision = C.kHalf
-)
-
-type AnalysisConfig struct {
- c *C.PD_AnalysisConfig
-}
-
-func NewAnalysisConfig() *AnalysisConfig {
- c_config := C.PD_NewAnalysisConfig()
- config := &AnalysisConfig{c: c_config}
- runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
- return config
-}
-
-func (config *AnalysisConfig) finalize() {
- C.PD_DeleteAnalysisConfig(config.c)
-}
-
-func (config *AnalysisConfig) SetModel(model, params string) {
- //C.printString((*C.char)(unsafe.Pointer(&s[0])))
- c_model := C.CString(model)
- defer C.free(unsafe.Pointer(c_model))
- var c_params *C.char
- if params == "" {
- c_params = nil
- } else {
- c_params = C.CString(params)
- defer C.free(unsafe.Pointer(c_params))
- }
-
- C.PD_SetModel(config.c, c_model, c_params)
-}
-
-func (config *AnalysisConfig) ModelDir() string {
- return C.GoString(C.PD_ModelDir(config.c))
-}
-
-func (config *AnalysisConfig) ProgFile() string {
- return C.GoString(C.PD_ProgFile(config.c))
-}
-
-func (config *AnalysisConfig) ParamsFile() string {
- return C.GoString(C.PD_ParamsFile(config.c))
-}
-
-func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
- C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
-}
-
-func (config *AnalysisConfig) DisableGpu() {
- C.PD_DisableGpu(config.c)
-}
-
-func (config *AnalysisConfig) UseGpu() bool {
- return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
-}
-
-func (config *AnalysisConfig) GpuDeviceId() int {
- return int(C.PD_GpuDeviceId(config.c))
-}
-
-func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
- return int(C.PD_MemoryPoolInitSizeMb(config.c))
-}
-
-func (config *AnalysisConfig) FractionOfGpuMemoryForPool() float32 {
- return float32(C.PD_FractionOfGpuMemoryForPool(config.c))
-}
-
-func (config *AnalysisConfig) EnableCudnn() {
- C.PD_EnableCUDNN(config.c)
-}
-
-func (config *AnalysisConfig) CudnnEnabled() bool {
- return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchIrOptim(x bool) {
- C.PD_SwitchIrOptim(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) IrOptim() bool {
- return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
-}
-
-func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
- C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
- return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
- C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) SpecifyInputName() bool {
- return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
-}
-
-func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
- C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
-}
-
-func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
- return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchIrDebug(x bool) {
- C.PD_SwitchIrDebug(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) EnableMkldnn() {
- C.PD_EnableMKLDNN(config.c)
-}
-
-func (config *AnalysisConfig) MkldnnEnabled() bool {
- return ConvertCBooleanToGo(C.PD_MkldnnEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
- C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
-}
-
-func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
- return int(C.PD_CpuMathLibraryNumThreads(config.c))
-}
-
-func (config *AnalysisConfig) EnableMkldnnQuantizer() {
- C.PD_EnableMkldnnQuantizer(config.c)
-}
-
-func (config *AnalysisConfig) EnableMkldnnBfloat16() {
- C.PD_EnableMkldnnBfloat16(config.c)
-}
-
-func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
- return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
-}
-
-func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
- return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
-}
-// SetModelBuffer
-// ModelFromMemory
-
-func (config *AnalysisConfig) EnableMemoryOptim() {
- C.PD_EnableMemoryOptim(config.c)
-}
-
-func (config *AnalysisConfig) MemoryOptimEnabled() bool {
- return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
-}
-
-func (config *AnalysisConfig) EnableProfile() {
- C.PD_EnableProfile(config.c)
-}
-
-func (config *AnalysisConfig) ProfileEnabled() bool {
- return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
-}
-
-func (config *AnalysisConfig) DisableGlogInfo() {
- C.PD_DisableGlogInfo(config.c)
-}
-
-func (config *AnalysisConfig) DeletePass(pass string) {
- c_pass := C.CString(pass)
- defer C.free(unsafe.Pointer(c_pass))
- C.PD_DeletePass(config.c, c_pass)
-}
-
-func (config *AnalysisConfig) SetInValid() {
- C.PD_SetInValid(config.c)
-}
-
-func (config *AnalysisConfig) IsValid() bool {
- return ConvertCBooleanToGo(C.PD_IsValid(config.c))
-}
diff --git a/go/paddle/predictor.go b/go/paddle/predictor.go
deleted file mode 100644
index 5f2b2c81a60549dfdbf22dd31a98560e7e3a8cee..0000000000000000000000000000000000000000
--- a/go/paddle/predictor.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include
-// #include "paddle_c_api.h"
-import "C"
-
-import "reflect"
-import "runtime"
-import "unsafe"
-
-type Predictor struct {
- c *C.PD_Predictor
-}
-
-func NewPredictor(config *AnalysisConfig) *Predictor {
- c_predictor := C.PD_NewPredictor((*config).c)
- predictor := &Predictor{c: c_predictor}
- runtime.SetFinalizer(predictor, (*Predictor).finalize)
- return predictor
-}
-
-func (predictor *Predictor) finalize() {
- C.PD_DeletePredictor(predictor.c)
-}
-
-func DeletePredictor(predictor *Predictor) {
- C.PD_DeletePredictor(predictor.c)
-}
-
-func (predictor *Predictor) GetInputNum() int {
- return int(C.PD_GetInputNum(predictor.c))
-}
-
-func (predictor *Predictor) GetOutputNum() int {
- return int(C.PD_GetOutputNum(predictor.c))
-}
-
-func (predictor *Predictor) GetInputName(n int) string {
- return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
-}
-
-func (predictor *Predictor) GetOutputName(n int) string {
- return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
-}
-
-func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
- var result [](*ZeroCopyTensor)
- for i := 0; i < predictor.GetInputNum(); i++ {
- tensor := NewZeroCopyTensor()
- tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
- result = append(result, tensor)
- }
- return result
-}
-
-func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
- var result [](*ZeroCopyTensor)
- for i := 0; i < predictor.GetOutputNum(); i++ {
- tensor := NewZeroCopyTensor()
- tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
- result = append(result, tensor)
- }
- return result
-}
-
-func (predictor *Predictor) GetInputNames() []string {
- names := make([]string, predictor.GetInputNum())
- for i := 0; i < len(names); i++ {
- names[i] = predictor.GetInputName(i)
- }
- return names
-}
-
-func (predictor *Predictor) GetOutputNames() []string {
- names := make([]string, predictor.GetOutputNum())
- for i := 0; i < len(names); i++ {
- names[i] = predictor.GetOutputName(i)
- }
- return names
-}
-
-func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
- C.PD_SetZeroCopyInput(predictor.c, tensor.c)
-}
-
-func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
- C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
- tensor.name = C.GoString(tensor.c.name)
- var shape []int32
- shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
- shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
- shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
- shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
- tensor.Reshape(shape)
-}
-
-func (predictor *Predictor) ZeroCopyRun() {
- C.PD_ZeroCopyRun(predictor.c)
-}
diff --git a/go/paddle/tensor.go b/go/paddle/tensor.go
deleted file mode 100644
index 6fbcf039f88a7cc43a5d28f0433c9feb965566f0..0000000000000000000000000000000000000000
--- a/go/paddle/tensor.go
+++ /dev/null
@@ -1,255 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include
-// #include
-// #include
-// #include
-import "C"
-
-import "runtime"
-import "reflect"
-import "unsafe"
-import (
- "bytes"
- "encoding/binary"
-)
-
-type PaddleDType C.PD_DataType
-
-const (
- FLOAT32 PaddleDType = C.PD_FLOAT32
- INT32 PaddleDType = C.PD_INT32
- INT64 PaddleDType = C.PD_INT64
- UINT8 PaddleDType = C.PD_UINT8
- UNKDTYPE PaddleDType = C.PD_UNKDTYPE
-)
-
-var types = []struct {
- gotype reflect.Type
- dtype PaddleDType
-}{
- {reflect.TypeOf(float32(0)), FLOAT32},
- {reflect.TypeOf(int32(0)), INT32},
- {reflect.TypeOf(int64(0)), INT64},
- {reflect.TypeOf(uint8(0)), UINT8},
-}
-
-func TypeOfShape(dtype PaddleDType, shape []int32) reflect.Type {
- var ret reflect.Type
- for _, t := range types {
- if dtype == PaddleDType(t.dtype) {
- ret = t.gotype
- break
- }
- }
-
- if ret == nil {
- panic(bug("Data %v type is not support", dtype))
- }
-
- for range shape {
- ret = reflect.SliceOf(ret)
- }
- return ret
-}
-
-type ZeroCopyTensor struct {
- c *C.PD_ZeroCopyTensor
- name string
- shape []int32
-}
-
-func NewZeroCopyTensor() *ZeroCopyTensor {
- c_tensor := C.PD_NewZeroCopyTensor()
-
- tensor := &ZeroCopyTensor{c: c_tensor}
- runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
- return tensor
-}
-
-func (tensor *ZeroCopyTensor) finalize() {
- C.PD_DeleteZeroCopyTensor(tensor.c)
-}
-
-func (tensor *ZeroCopyTensor) Shape() []int32 {
- return tensor.shape
-}
-
-func (tensor *ZeroCopyTensor) Name() string {
- return C.GoString(tensor.c.name)
-}
-
-func (tensor *ZeroCopyTensor) Rename(name string) {
- tensor.name = name
- tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
- //tensor.c.name = C.CString(tensor.name)
- //defer C.free(unsafe.Pointer(tensor.c.name))
-}
-
-func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
- tensor.shape = make([]int32, len(shape))
- copy(tensor.shape, shape)
- length := C.sizeof_int * C.size_t(len(shape))
- if tensor.c.shape.capacity < C.size_t(length) {
- if tensor.c.shape.capacity != C.size_t(0) {
- C.free(tensor.c.shape.data)
- }
- tensor.c.shape.data = C.malloc(length)
- tensor.c.shape.capacity = length
- }
- tensor.c.shape.length = length
- C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
-}
-
-func (tensor *ZeroCopyTensor) DataType() PaddleDType {
- return PaddleDType(tensor.c.dtype)
-}
-
-func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
- val := reflect.ValueOf(value)
- shape, dtype := ShapeAndTypeOf(val)
- tensor.Reshape(shape)
- num := numel(shape)
- length := C.size_t(SizeofDataType(dtype) * num)
- if tensor.c.data.capacity < length {
- if tensor.c.data.capacity != C.size_t(0) {
- C.free(tensor.c.data.data)
- }
- tensor.c.data.data = C.malloc(length)
- tensor.c.data.capacity = length
- }
- tensor.c.data.length = length
-
- switch dtype {
- case PaddleDType(UINT8):
- data := val.Interface().([]uint8)
- C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
- case PaddleDType(INT32):
- data := val.Interface().([]int32)
- C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
- case PaddleDType(INT64):
- data := val.Interface().([]int64)
- C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
- case PaddleDType(FLOAT32):
- data := val.Interface().([]float32)
- C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
- }
- tensor.c.dtype = C.PD_DataType(dtype)
-}
-
-func TypeOf(dtype PaddleDType, shape []int32) reflect.Type {
- var ret reflect.Type
- for _, t := range types {
- if t.dtype == dtype {
- ret = t.gotype
- break
- }
- }
-
- for range shape {
- ret = reflect.SliceOf(ret)
- }
- return ret
-}
-
-func (tensor *ZeroCopyTensor) Value() interface{} {
- t := TypeOf(PaddleDType(tensor.c.dtype), tensor.shape)
- value := reflect.New(t)
- c_bytes := tensor.c.data.data
- length := tensor.c.data.length
- var slice []byte
- if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
- slice = (*[1<<50 - 1]byte)(unsafe.Pointer(c_bytes))[:length:length]
- } else {
- slice = (*[1 << 30]byte)(unsafe.Pointer(c_bytes))[:length:length]
- }
- r := bytes.NewReader(slice)
- DecodeTensor(r, tensor.Shape(), t, value)
- return reflect.Indirect(value).Interface()
-}
-
-func Endian() binary.ByteOrder {
- buf := [2]byte{}
- *(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
-
- var endian binary.ByteOrder
-
- switch buf {
- case [2]byte{0xCD, 0xAB}:
- endian = binary.LittleEndian
- case [2]byte{0xAB, 0xCD}:
- endian = binary.BigEndian
- default:
- panic("Could not determine native endianness.")
- }
- return endian
-}
-
-func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Value) {
- switch t.Kind() {
- case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
- binary.Read(r, Endian(), ptr.Interface())
- case reflect.Slice:
- value := reflect.Indirect(ptr)
- value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
- if len(shape) == 1 && value.Len() > 0 {
- switch value.Index(0).Kind() {
- case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
- binary.Read(r, Endian(), value.Interface())
- return
- }
- }
-
- for i := 0; i < value.Len(); i++ {
- DecodeTensor(r, shape[1:], t.Elem(), value.Index(i).Addr())
- }
- }
-}
-
-func SizeofDataType(dtype PaddleDType) int32 {
- switch dtype {
- case UINT8:
- return int32(C.sizeof_uchar)
- case INT32:
- return int32(C.sizeof_int)
- case INT64:
- return int32(C.sizeof_longlong)
- case FLOAT32:
- return int32(C.sizeof_float)
- }
- return -1
-}
-
-func ShapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
- gotype := val.Type()
- for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
- shape = append(shape, int32(val.Len()))
- if val.Len() > 0 {
- val = val.Index(0)
- }
- gotype = gotype.Elem()
- }
-
- for _, t := range types {
- if gotype.Kind() == t.gotype.Kind() {
- return shape, PaddleDType(t.dtype)
- }
- }
- return shape, dt
-}
diff --git a/paddle/fluid/distributed/CMakeLists.txt b/paddle/fluid/distributed/CMakeLists.txt
index a2062d82c8130bbde5e59e6bd0ca3515c38537b1..905347d031b35b39b43879c7bd78ab39e933a5b3 100644
--- a/paddle/fluid/distributed/CMakeLists.txt
+++ b/paddle/fluid/distributed/CMakeLists.txt
@@ -11,8 +11,8 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
"${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
endif()
-add_subdirectory(table)
add_subdirectory(service)
+add_subdirectory(table)
add_subdirectory(test)
add_subdirectory(index_dataset)
diff --git a/paddle/fluid/distributed/common/sparse_sharding_merge.h b/paddle/fluid/distributed/common/sparse_sharding_merge.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f84b5c4b212e2b261a4ef9b3f21163e5ef705b2
--- /dev/null
+++ b/paddle/fluid/distributed/common/sparse_sharding_merge.h
@@ -0,0 +1,311 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include
+
+#include
+#include
+#include
+#include // NOLINT
+#include
+
+#include
+#include "boost/lexical_cast.hpp"
+#include "glog/logging.h"
+#include "paddle/fluid/distributed/common/utils.h"
+#include "paddle/fluid/framework/blocking_queue.h"
+#include "paddle/fluid/framework/dim.h"
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/string/split.h"
+
+constexpr int FG = 256 * 1024 * 1024;
+constexpr int Q_SIZE = 10000;
+constexpr int BUCKET = 10;
+constexpr char XEOF[] = "EOF";
+
+using boost::lexical_cast;
+
+inline double GetCurrentUS() {
+ struct timeval time;
+ gettimeofday(&time, NULL);
+ return 1e+6 * time.tv_sec + time.tv_usec;
+}
+
+namespace paddle {
+namespace distributed {
+
+class ShardingMerge {
+ public:
+ ShardingMerge() {}
+ ~ShardingMerge() {}
+
+ void Merge(const std::vector &inputs,
+ const std::vector &feasigns, const std::string &output,
+ const int embedding_dim) {
+ pool_.reset(new ::ThreadPool(inputs.size()));
+
+ std::vector> tasks(inputs.size());
+ std::vector> rows;
+ rows.resize(inputs.size());
+
+ auto begin = GetCurrentUS();
+ for (int x = 0; x < inputs.size(); ++x) {
+ tasks[x] = pool_->enqueue([this, x, &rows, &inputs, &feasigns]() -> int {
+ DeserializeRowsFromFile(inputs[x], feasigns[x], &rows[x]);
+ return 0;
+ });
+ }
+
+ for (size_t x = 0; x < tasks.size(); ++x) {
+ tasks[x].wait();
+ }
+
+ int64_t total_rows = 0;
+ for (auto x = 0; x < rows.size(); x++) {
+ total_rows += rows[x].size();
+ }
+
+ auto end = GetCurrentUS();
+
+ VLOG(0) << "got " << total_rows
+ << " feasigin ids from sparse embedding using " << end - begin;
+
+ std::vector total_dims = {total_rows,
+ static_cast(embedding_dim)};
+
+ std::vector> batch_buckets;
+ batch_buckets.resize(inputs.size());
+
+ for (int x = 0; x < rows.size(); ++x) {
+ batch_buckets[x] = bucket(rows[x].size(), BUCKET);
+ }
+
+ std::ofstream out(output, std::ios::binary);
+
+ begin = GetCurrentUS();
+ SerializeRowsToStream(out, rows, batch_buckets, total_rows);
+ end = GetCurrentUS();
+ VLOG(0) << "write rows to oostrream using " << end - begin;
+
+ begin = GetCurrentUS();
+ SerializePreTensorToStream(out, total_dims);
+ end = GetCurrentUS();
+ VLOG(0) << "write pretensor to oostrream using " << end - begin;
+
+ begin = GetCurrentUS();
+ SerializeValueToStream(out, inputs, batch_buckets, embedding_dim);
+ end = GetCurrentUS();
+ VLOG(0) << "write values to oostrream using " << end - begin;
+ }
+
+ private:
+ void SerializeRowsToStream(std::ostream &os,
+ const std::vector> &rows,
+ const std::vector> &batch_buckets,
+ int64_t total_rows) {
+ { // the 1st field, uint32_t version
+ constexpr uint32_t version = 0;
+ os.write(reinterpret_cast(&version), sizeof(version));
+ }
+
+ {
+ // the 2st field, rows information
+ os.write(reinterpret_cast(&total_rows), sizeof(total_rows));
+
+ for (int b = 0; b < BUCKET; ++b) {
+ for (int x = 0; x < batch_buckets.size(); ++x) {
+ auto begin = batch_buckets[x][b];
+ auto end = batch_buckets[x][b + 1];
+
+ if (end - begin == 0) continue;
+
+ os.write(reinterpret_cast(rows[x].data() + begin),
+ sizeof(int64_t) * (end - begin));
+ }
+ }
+
+ // the 3st field, the height of SelectedRows
+ int64_t height = total_rows;
+ os.write(reinterpret_cast(&height), sizeof(height));
+ }
+ }
+
+ void SerializePreTensorToStream(std::ostream &os,
+ const std::vector &dims) {
+ { // the 1st field, uint32_t version
+ constexpr uint32_t version = 0;
+ os.write(reinterpret_cast(&version), sizeof(version));
+ }
+ { // the 2nd field, tensor description
+ // int32_t size
+ framework::proto::VarType::TensorDesc desc;
+ desc.set_data_type(framework::proto::VarType::FP32);
+ auto *pb_dims = desc.mutable_dims();
+ pb_dims->Resize(static_cast(dims.size()), 0);
+ std::copy(dims.begin(), dims.end(), pb_dims->begin());
+ int32_t size = desc.ByteSize();
+ os.write(reinterpret_cast(&size), sizeof(size));
+ auto out = desc.SerializeAsString();
+ os.write(out.data(), size);
+ }
+ }
+
+ void SerializeValueToVec(std::ifstream &in, const int batch,
+ const int embedding_dim, std::vector *out) {
+ auto queue =
+ std::make_shared>>();
+
+ auto read = [batch, &in, &queue]() {
+ std::string line;
+ std::vector columns;
+ std::vector values_str;
+
+ int count = 0;
+
+ while (std::getline(in, line)) {
+ ++count;
+ columns = string::Split(line, '\t');
+
+ if (columns.size() != 5) {
+ VLOG(0) << "unexpected line: " << line << ", skip it";
+ continue;
+ }
+
+ values_str = string::Split(columns[4], ',');
+ queue->Push(values_str);
+
+ if (count >= batch) {
+ break;
+ }
+ }
+ queue->Push({});
+ };
+
+ auto write = [embedding_dim, &out, &queue]() {
+ std::vector values_str;
+ std::string line;
+
+ while (true) {
+ queue->Pop(&values_str);
+
+ if (values_str.size() == 0) {
+ break;
+ }
+
+ for (int x = 0; x < embedding_dim; ++x) {
+ float v = 0.0;
+ try {
+ v = lexical_cast(values_str[x]);
+ } catch (boost::bad_lexical_cast &e) {
+ VLOG(0) << " get unexpected line: " << line;
+ }
+ out->push_back(v);
+ }
+ }
+ };
+
+ std::thread p_read(read);
+ std::thread p_write(write);
+ p_read.join();
+ p_write.join();
+ }
+
+ void SerializeVecToStream(std::ostream &out,
+ const std::vector &value) {
+ out.write(reinterpret_cast(value.data()),
+ static_cast(sizeof(float) * value.size()));
+ }
+
+ void SerializeValueToStream(
+ std::ostream &out, const std::vector &ins,
+ const std::vector> &batch_buckets,
+ const int embedding_dim) {
+ std::vector> in_streams;
+
+ for (int x = 0; x < ins.size(); ++x) {
+ in_streams.emplace_back(std::make_shared(ins[x]));
+ }
+
+ std::vector> tasks(ins.size());
+
+ for (int b = 0; b < BUCKET; ++b) {
+ std::vector> values;
+ values.resize(tasks.size());
+
+ auto begin = GetCurrentUS();
+
+ for (int x = 0; x < tasks.size(); ++x) {
+ auto batch = batch_buckets[x][b + 1] - batch_buckets[x][b];
+ values[x].clear();
+ values[x].reserve(batch * embedding_dim);
+ }
+
+ for (int x = 0; x < tasks.size(); ++x) {
+ tasks[x] =
+ pool_->enqueue([this, b, x, &out, &in_streams, &batch_buckets,
+ &values, embedding_dim]() -> int {
+ auto batch = batch_buckets[x][b + 1] - batch_buckets[x][b];
+ if (batch == 0) return 0;
+ SerializeValueToVec(*(in_streams[x].get()), batch, embedding_dim,
+ &values[x]);
+ return 0;
+ });
+ }
+
+ for (size_t x = 0; x < tasks.size(); ++x) {
+ tasks[x].wait();
+ }
+
+ auto end = GetCurrentUS();
+
+ auto begin1 = GetCurrentUS();
+ for (size_t x = 0; x < tasks.size(); ++x) {
+ SerializeVecToStream(out, values[x]);
+ }
+ auto end1 = GetCurrentUS();
+
+ VLOG(0) << "serialize buckets " << b << " read using " << end - begin
+ << ", to oostream using " << end1 - begin1;
+ }
+ }
+
+ void DeserializeRowsFromFile(const std::string &input_file,
+ const int64_t feasigns,
+ std::vector *rows) {
+ std::string line;
+ std::vector columns;
+ std::ifstream file(input_file);
+
+ rows->reserve(feasigns);
+
+ while (std::getline(file, line)) {
+ columns = string::Split(line, '\t');
+ if (columns.size() != 5) {
+ VLOG(0) << "unexpected line: " << line << ", skip it";
+ continue;
+ }
+ rows->push_back(std::stoull(columns[0]));
+ }
+
+ VLOG(0) << "parse " << rows->size() << " embedding rows from "
+ << input_file;
+ }
+
+ private:
+ std::unique_ptr<::ThreadPool> pool_;
+};
+} // namespace distributed
+} // namespace paddle
diff --git a/paddle/fluid/distributed/common/utils.h b/paddle/fluid/distributed/common/utils.h
index f81f84b1e117510443a5698a6ba1574262f640a5..2305001ad6f8f90eea49efa88b2a2615176f3ffb 100644
--- a/paddle/fluid/distributed/common/utils.h
+++ b/paddle/fluid/distributed/common/utils.h
@@ -14,6 +14,8 @@
#pragma once
+#include
+
#include
#include
#include
@@ -83,5 +85,11 @@ std::string to_string(const std::vector& vec) {
}
return ss.str();
}
+
+inline double GetCurrentUS() {
+ struct timeval time;
+ gettimeofday(&time, NULL);
+ return 1e+6 * time.tv_sec + time.tv_usec;
}
-}
+} // namespace distributed
+} // namespace paddle
diff --git a/paddle/fluid/distributed/fleet.cc b/paddle/fluid/distributed/fleet.cc
index dfd55f16e1a065e46b2186a6a589eabc1ac3b431..9e2a0b35224a4ea3a6198e20309d3a335999651e 100644
--- a/paddle/fluid/distributed/fleet.cc
+++ b/paddle/fluid/distributed/fleet.cc
@@ -417,8 +417,10 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync(
return;
}
-void FleetWrapper::LoadModel(const std::string& path, const int mode) {
- auto ret = pserver_ptr_->_worker_ptr->load(path, std::to_string(mode));
+void FleetWrapper::LoadModel(const std::string& path, const std::string& mode) {
+ auto* communicator = Communicator::GetInstance();
+ auto ret = communicator->_worker_ptr->load(path, mode);
+ // auto ret = pserver_ptr_->_worker_ptr->load(path, std::to_string(mode));
ret.wait();
if (ret.get() != 0) {
LOG(ERROR) << "load model from path:" << path << " failed";
@@ -429,8 +431,11 @@ void FleetWrapper::LoadModel(const std::string& path, const int mode) {
void FleetWrapper::LoadModelOneTable(const uint64_t table_id,
const std::string& path, const int mode) {
+ auto* communicator = Communicator::GetInstance();
auto ret =
- pserver_ptr_->_worker_ptr->load(table_id, path, std::to_string(mode));
+ communicator->_worker_ptr->load(table_id, path, std::to_string(mode));
+ // auto ret =
+ // pserver_ptr_->_worker_ptr->load(table_id, path, std::to_string(mode));
ret.wait();
if (ret.get() != 0) {
LOG(ERROR) << "load model of table id: " << table_id
diff --git a/paddle/fluid/distributed/fleet.h b/paddle/fluid/distributed/fleet.h
index 0da5d1e2bf987f38de3b9a03c659fc5e1841eca1..1b2bde85de04c2f0dc528700f10d087199c56c50 100644
--- a/paddle/fluid/distributed/fleet.h
+++ b/paddle/fluid/distributed/fleet.h
@@ -200,7 +200,7 @@ class FleetWrapper {
void PrintTableStat(const uint64_t table_id);
// mode = 0, load all feature
// mode = 1, load delta feature, which means load diff
- void LoadModel(const std::string& path, const int mode);
+ void LoadModel(const std::string& path, const std::string& mode);
// mode = 0, load all feature
// mode = 1, load delta feature, which means load diff
void LoadModelOneTable(const uint64_t table_id, const std::string& path,
diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.cc b/paddle/fluid/distributed/index_dataset/index_sampler.cc
index 58f85d98fb09c6576daa0816be2d58c90c5a8a42..3e573bbdd2de97130a109ddb583a724cf363c6be 100644
--- a/paddle/fluid/distributed/index_dataset/index_sampler.cc
+++ b/paddle/fluid/distributed/index_dataset/index_sampler.cc
@@ -13,13 +13,10 @@
// limitations under the License.
#include "paddle/fluid/distributed/index_dataset/index_sampler.h"
-#include "paddle/fluid/operators/math/sampler.h"
namespace paddle {
namespace distributed {
-using Sampler = paddle::operators::math::Sampler;
-
std::vector> LayerWiseSampler::sample(
const std::vector>& user_inputs,
const std::vector& target_ids, bool with_hierarchy) {
@@ -30,22 +27,7 @@ std::vector> LayerWiseSampler::sample(
std::vector(user_feature_num + 2));
auto max_layer = tree_->Height();
- std::vector sampler_vec(max_layer - start_sample_layer_);
- std::vector> layer_ids(max_layer -
- start_sample_layer_);
-
- auto layer_index = max_layer - 1;
size_t idx = 0;
- while (layer_index >= start_sample_layer_) {
- auto layer_codes = tree_->GetLayerCodes(layer_index);
- layer_ids[idx] = tree_->GetNodes(layer_codes);
- sampler_vec[idx] = new paddle::operators::math::UniformSampler(
- layer_ids[idx].size() - 1, seed_);
- layer_index--;
- idx++;
- }
-
- idx = 0;
for (size_t i = 0; i < input_num; i++) {
auto travel_codes =
tree_->GetTravelCodes(target_ids[i], start_sample_layer_);
@@ -76,18 +58,15 @@ std::vector> LayerWiseSampler::sample(
for (int idx_offset = 0; idx_offset < layer_counts_[j]; idx_offset++) {
int sample_res = 0;
do {
- sample_res = sampler_vec[j]->Sample();
- } while (layer_ids[j][sample_res].id() == travel_path[j].id());
+ sample_res = sampler_vec_[j]->Sample();
+ } while (layer_ids_[j][sample_res].id() == travel_path[j].id());
outputs[idx + idx_offset][user_feature_num] =
- layer_ids[j][sample_res].id();
+ layer_ids_[j][sample_res].id();
outputs[idx + idx_offset][user_feature_num + 1] = 0;
}
idx += layer_counts_[j];
}
}
- for (size_t i = 0; i < sampler_vec.size(); i++) {
- delete sampler_vec[i];
- }
return outputs;
}
diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.h b/paddle/fluid/distributed/index_dataset/index_sampler.h
index 66882bedc9b76593b9b28f184fc26ff4897494e6..8813421446a21c1379ca872952fe8b367d0724ca 100644
--- a/paddle/fluid/distributed/index_dataset/index_sampler.h
+++ b/paddle/fluid/distributed/index_dataset/index_sampler.h
@@ -16,6 +16,7 @@
#include
#include "paddle/fluid/distributed/index_dataset/index_wrapper.h"
#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
@@ -83,6 +84,23 @@ class LayerWiseSampler : public IndexSampler {
}
reverse(layer_counts_.begin(), layer_counts_.end());
VLOG(3) << "sample counts sum: " << layer_counts_sum_;
+
+ auto max_layer = tree_->Height();
+ sampler_vec_.clear();
+ layer_ids_.clear();
+
+ auto layer_index = max_layer - 1;
+ size_t idx = 0;
+ while (layer_index >= start_sample_layer_) {
+ auto layer_codes = tree_->GetLayerCodes(layer_index);
+ layer_ids_.push_back(tree_->GetNodes(layer_codes));
+ auto sampler_temp =
+ std::make_shared(
+ layer_ids_[idx].size() - 1, seed_);
+ sampler_vec_.push_back(sampler_temp);
+ layer_index--;
+ idx++;
+ }
}
std::vector> sample(
const std::vector>& user_inputs,
@@ -94,6 +112,8 @@ class LayerWiseSampler : public IndexSampler {
std::shared_ptr tree_{nullptr};
int seed_{0};
int start_sample_layer_{1};
+ std::vector> sampler_vec_;
+ std::vector> layer_ids_;
};
} // end namespace distributed
diff --git a/paddle/fluid/distributed/service/brpc_ps_server.cc b/paddle/fluid/distributed/service/brpc_ps_server.cc
index a9370561a540bea3416508b45d8cbf8cb997ed33..a1440260bf2e77093bb937e62b13b54ad06a3e64 100644
--- a/paddle/fluid/distributed/service/brpc_ps_server.cc
+++ b/paddle/fluid/distributed/service/brpc_ps_server.cc
@@ -14,6 +14,7 @@
#include "paddle/fluid/distributed/service/brpc_ps_server.h"
#include // NOLINT
+#include "butil/object_pool.h"
#include "paddle/fluid/distributed/table/depends/sparse_utils.h"
#include "paddle/fluid/distributed/table/table.h"
#include "paddle/fluid/framework/archive.h"
@@ -196,12 +197,13 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request,
return 0;
}
- std::vector res_data;
- res_data.resize(num * table->value_accesor()->select_size() / sizeof(float));
- table->pull_dense(res_data.data(), num);
+ auto res_data = butil::get_object>();
+ res_data->resize(num * table->value_accesor()->select_size() / sizeof(float));
+ table->pull_dense(res_data->data(), num);
- cntl->response_attachment().append((char *)res_data.data(),
- res_data.size() * sizeof(float));
+ cntl->response_attachment().append((char *)(res_data->data()),
+ res_data->size() * sizeof(float));
+ butil::return_object(res_data);
return 0;
}
@@ -367,12 +369,13 @@ int32_t BrpcPsService::pull_sparse(Table *table,
value.DeserializeFromBytes(const_cast(data));
- std::vector res_data;
- res_data.resize(num * dim);
- table->pull_sparse(res_data.data(), value);
+ auto res_data = butil::get_object>();
+ res_data->resize(num * dim);
+ table->pull_sparse(res_data->data(), value);
- cntl->response_attachment().append((char *)res_data.data(),
- res_data.size() * sizeof(float));
+ cntl->response_attachment().append((char *)(res_data->data()),
+ res_data->size() * sizeof(float));
+ butil::return_object(res_data);
return 0;
}
diff --git a/paddle/fluid/distributed/service/graph_brpc_client.cc b/paddle/fluid/distributed/service/graph_brpc_client.cc
index eafb4d596cc1671db26189b84ea9d0c0c31ea398..70f2da6d7252cee0268bdd35999926a232bc5b34 100644
--- a/paddle/fluid/distributed/service/graph_brpc_client.cc
+++ b/paddle/fluid/distributed/service/graph_brpc_client.cc
@@ -80,11 +80,11 @@ std::future GraphBrpcClient::get_node_feat(
[&, node_id_buckets, query_idx_buckets, request_call_num](void *done) {
int ret = 0;
auto *closure = (DownpourBrpcClosure *)done;
- int fail_num = 0;
+ size_t fail_num = 0;
for (int request_idx = 0; request_idx < request_call_num;
++request_idx) {
- if (closure->check_response(request_idx,
- PS_GRAPH_SAMPLE_NEIGHBOORS) != 0) {
+ if (closure->check_response(request_idx, PS_GRAPH_GET_NODE_FEAT) !=
+ 0) {
++fail_num;
} else {
auto &res_io_buffer =
@@ -144,6 +144,163 @@ std::future GraphBrpcClient::get_node_feat(
return fut;
}
+
+std::future GraphBrpcClient::clear_nodes(uint32_t table_id) {
+ DownpourBrpcClosure *closure = new DownpourBrpcClosure(
+ server_size, [&, server_size = this->server_size ](void *done) {
+ int ret = 0;
+ auto *closure = (DownpourBrpcClosure *)done;
+ size_t fail_num = 0;
+ for (size_t request_idx = 0; request_idx < server_size; ++request_idx) {
+ if (closure->check_response(request_idx, PS_GRAPH_CLEAR) != 0) {
+ ++fail_num;
+ break;
+ }
+ }
+ ret = fail_num == 0 ? 0 : -1;
+ closure->set_promise_value(ret);
+ });
+ auto promise = std::make_shared>();
+ closure->add_promise(promise);
+ std::future fut = promise->get_future();
+ for (size_t i = 0; i < server_size; i++) {
+ int server_index = i;
+ closure->request(server_index)->set_cmd_id(PS_GRAPH_CLEAR);
+ closure->request(server_index)->set_table_id(table_id);
+ closure->request(server_index)->set_client_id(_client_id);
+
+ GraphPsService_Stub rpc_stub =
+ getServiceStub(get_cmd_channel(server_index));
+ closure->cntl(server_index)->set_log_id(butil::gettimeofday_ms());
+ rpc_stub.service(closure->cntl(server_index),
+ closure->request(server_index),
+ closure->response(server_index), closure);
+ }
+ return fut;
+}
+std::future GraphBrpcClient::add_graph_node(
+ uint32_t table_id, std::vector &node_id_list,
+ std::vector &is_weighted_list) {
+ std::vector> request_bucket;
+ std::vector> is_weighted_bucket;
+ bool add_weight = is_weighted_list.size() > 0;
+ std::vector server_index_arr;
+ std::vector index_mapping(server_size, -1);
+ for (size_t query_idx = 0; query_idx < node_id_list.size(); ++query_idx) {
+ int server_index = get_server_index_by_id(node_id_list[query_idx]);
+ if (index_mapping[server_index] == -1) {
+ index_mapping[server_index] = request_bucket.size();
+ server_index_arr.push_back(server_index);
+ request_bucket.push_back(std::vector());
+ if (add_weight) is_weighted_bucket.push_back(std::vector());
+ }
+ request_bucket[index_mapping[server_index]].push_back(
+ node_id_list[query_idx]);
+ if (add_weight)
+ is_weighted_bucket[index_mapping[server_index]].push_back(
+ query_idx < is_weighted_list.size() ? is_weighted_list[query_idx]
+ : false);
+ }
+ size_t request_call_num = request_bucket.size();
+ DownpourBrpcClosure *closure = new DownpourBrpcClosure(
+ request_call_num, [&, request_call_num](void *done) {
+ int ret = 0;
+ auto *closure = (DownpourBrpcClosure *)done;
+ size_t fail_num = 0;
+ for (size_t request_idx = 0; request_idx < request_call_num;
+ ++request_idx) {
+ if (closure->check_response(request_idx, PS_GRAPH_ADD_GRAPH_NODE) !=
+ 0) {
+ ++fail_num;
+ }
+ }
+ ret = fail_num == request_call_num ? -1 : 0;
+ closure->set_promise_value(ret);
+ });
+ auto promise = std::make_shared>();
+ closure->add_promise(promise);
+ std::future fut = promise->get_future();
+
+ for (size_t request_idx = 0; request_idx < request_call_num; ++request_idx) {
+ int server_index = server_index_arr[request_idx];
+ closure->request(request_idx)->set_cmd_id(PS_GRAPH_ADD_GRAPH_NODE);
+ closure->request(request_idx)->set_table_id(table_id);
+ closure->request(request_idx)->set_client_id(_client_id);
+ size_t node_num = request_bucket[request_idx].size();
+ closure->request(request_idx)
+ ->add_params((char *)request_bucket[request_idx].data(),
+ sizeof(uint64_t) * node_num);
+ if (add_weight) {
+ bool weighted[is_weighted_bucket[request_idx].size() + 1];
+ for (size_t j = 0; j < is_weighted_bucket[request_idx].size(); j++)
+ weighted[j] = is_weighted_bucket[request_idx][j];
+ closure->request(request_idx)
+ ->add_params((char *)weighted,
+ sizeof(bool) * is_weighted_bucket[request_idx].size());
+ }
+ // PsService_Stub rpc_stub(get_cmd_channel(server_index));
+ GraphPsService_Stub rpc_stub =
+ getServiceStub(get_cmd_channel(server_index));
+ closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
+ rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
+ closure->response(request_idx), closure);
+ }
+ return fut;
+}
+std::future GraphBrpcClient::remove_graph_node(
+ uint32_t table_id, std::vector &node_id_list) {
+ std::vector> request_bucket;
+ std::vector server_index_arr;
+ std::vector index_mapping(server_size, -1);
+ for (size_t query_idx = 0; query_idx < node_id_list.size(); ++query_idx) {
+ int server_index = get_server_index_by_id(node_id_list[query_idx]);
+ if (index_mapping[server_index] == -1) {
+ index_mapping[server_index] = request_bucket.size();
+ server_index_arr.push_back(server_index);
+ request_bucket.push_back(std::vector());
+ }
+ request_bucket[index_mapping[server_index]].push_back(
+ node_id_list[query_idx]);
+ }
+ size_t request_call_num = request_bucket.size();
+ DownpourBrpcClosure *closure = new DownpourBrpcClosure(
+ request_call_num, [&, request_call_num](void *done) {
+ int ret = 0;
+ auto *closure = (DownpourBrpcClosure *)done;
+ int fail_num = 0;
+ for (size_t request_idx = 0; request_idx < request_call_num;
+ ++request_idx) {
+ if (closure->check_response(request_idx,
+ PS_GRAPH_REMOVE_GRAPH_NODE) != 0) {
+ ++fail_num;
+ }
+ }
+ ret = fail_num == request_call_num ? -1 : 0;
+ closure->set_promise_value(ret);
+ });
+ auto promise = std::make_shared>();
+ closure->add_promise(promise);
+ std::future fut = promise->get_future();
+
+ for (size_t request_idx = 0; request_idx < request_call_num; ++request_idx) {
+ int server_index = server_index_arr[request_idx];
+ closure->request(request_idx)->set_cmd_id(PS_GRAPH_REMOVE_GRAPH_NODE);
+ closure->request(request_idx)->set_table_id(table_id);
+ closure->request(request_idx)->set_client_id(_client_id);
+ size_t node_num = request_bucket[request_idx].size();
+
+ closure->request(request_idx)
+ ->add_params((char *)request_bucket[request_idx].data(),
+ sizeof(uint64_t) * node_num);
+ // PsService_Stub rpc_stub(get_cmd_channel(server_index));
+ GraphPsService_Stub rpc_stub =
+ getServiceStub(get_cmd_channel(server_index));
+ closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
+ rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
+ closure->response(request_idx), closure);
+ }
+ return fut;
+}
// char* &buffer,int &actual_size
std::future GraphBrpcClient::batch_sample_neighboors(
uint32_t table_id, std::vector node_ids, int sample_size,
@@ -174,8 +331,8 @@ std::future GraphBrpcClient::batch_sample_neighboors(
[&, node_id_buckets, query_idx_buckets, request_call_num](void *done) {
int ret = 0;
auto *closure = (DownpourBrpcClosure *)done;
- int fail_num = 0;
- for (int request_idx = 0; request_idx < request_call_num;
+ size_t fail_num = 0;
+ for (size_t request_idx = 0; request_idx < request_call_num;
++request_idx) {
if (closure->check_response(request_idx,
PS_GRAPH_SAMPLE_NEIGHBOORS) != 0) {
@@ -254,13 +411,14 @@ std::future GraphBrpcClient::random_sample_nodes(
auto &res_io_buffer = closure->cntl(0)->response_attachment();
butil::IOBufBytesIterator io_buffer_itr(res_io_buffer);
size_t bytes_size = io_buffer_itr.bytes_left();
- char buffer[bytes_size];
+ char *buffer = new char[bytes_size];
auto size = io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size);
int index = 0;
while (index < bytes_size) {
ids.push_back(*(uint64_t *)(buffer + index));
index += GraphNode::id_size;
}
+ delete[] buffer;
}
closure->set_promise_value(ret);
});
@@ -292,7 +450,7 @@ std::future GraphBrpcClient::pull_graph_list(
auto &res_io_buffer = closure->cntl(0)->response_attachment();
butil::IOBufBytesIterator io_buffer_itr(res_io_buffer);
size_t bytes_size = io_buffer_itr.bytes_left();
- char buffer[bytes_size];
+ char *buffer = new char[bytes_size];
io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size);
int index = 0;
while (index < bytes_size) {
@@ -301,6 +459,7 @@ std::future GraphBrpcClient::pull_graph_list(
index += node.get_size(false);
res.push_back(node);
}
+ delete buffer;
}
closure->set_promise_value(ret);
});
diff --git a/paddle/fluid/distributed/service/graph_brpc_client.h b/paddle/fluid/distributed/service/graph_brpc_client.h
index 4e6775a4bedaf1a4028fe483f58be818ef1e3581..5696e8b08037b7027939f472f58ec79925143e4f 100644
--- a/paddle/fluid/distributed/service/graph_brpc_client.h
+++ b/paddle/fluid/distributed/service/graph_brpc_client.h
@@ -78,6 +78,13 @@ class GraphBrpcClient : public BrpcPsClient {
const uint32_t& table_id, const std::vector& node_ids,
const std::vector& feature_names,
std::vector>& res);
+
+ virtual std::future clear_nodes(uint32_t table_id);
+ virtual std::future add_graph_node(
+ uint32_t table_id, std::vector& node_id_list,
+ std::vector& is_weighted_list);
+ virtual std::future remove_graph_node(
+ uint32_t table_id, std::vector& node_id_list);
virtual int32_t initialize();
int get_shard_num() { return shard_num; }
void set_shard_num(int shard_num) { this->shard_num = shard_num; }
diff --git a/paddle/fluid/distributed/service/graph_brpc_server.cc b/paddle/fluid/distributed/service/graph_brpc_server.cc
index bdd926278b624b9e9bfdf19a4f293784bef6e28f..52ac8c5d688a4ada72212923bdd478b788e422ee 100644
--- a/paddle/fluid/distributed/service/graph_brpc_server.cc
+++ b/paddle/fluid/distributed/service/graph_brpc_server.cc
@@ -24,6 +24,14 @@
namespace paddle {
namespace distributed {
+#define CHECK_TABLE_EXIST(table, request, response) \
+ if (table == NULL) { \
+ std::string err_msg("table not found with table_id:"); \
+ err_msg.append(std::to_string(request.table_id())); \
+ set_response_code(response, -1, err_msg.c_str()); \
+ return -1; \
+ }
+
int32_t GraphBrpcServer::initialize() {
auto &service_config = _config.downpour_server_param().service_param();
if (!service_config.has_service_class()) {
@@ -71,6 +79,58 @@ uint64_t GraphBrpcServer::start(const std::string &ip, uint32_t port) {
return 0;
}
+int32_t GraphBrpcService::clear_nodes(Table *table,
+ const PsRequestMessage &request,
+ PsResponseMessage &response,
+ brpc::Controller *cntl) {
+ ((GraphTable *)table)->clear_nodes();
+ return 0;
+}
+
+int32_t GraphBrpcService::add_graph_node(Table *table,
+ const PsRequestMessage &request,
+ PsResponseMessage &response,
+ brpc::Controller *cntl) {
+ CHECK_TABLE_EXIST(table, request, response)
+ if (request.params_size() < 1) {
+ set_response_code(
+ response, -1,
+ "graph_get_node_feat request requires at least 2 arguments");
+ return 0;
+ }
+
+ size_t node_num = request.params(0).size() / sizeof(uint64_t);
+ uint64_t *node_data = (uint64_t *)(request.params(0).c_str());
+ std::vector node_ids(node_data, node_data + node_num);
+ std::vector is_weighted_list;
+ if (request.params_size() == 2) {
+ size_t weight_list_size = request.params(1).size() / sizeof(bool);
+ bool *is_weighted_buffer = (bool *)(request.params(1).c_str());
+ is_weighted_list = std::vector(is_weighted_buffer,
+ is_weighted_buffer + weight_list_size);
+ }
+
+ ((GraphTable *)table)->add_graph_node(node_ids, is_weighted_list);
+ return 0;
+}
+int32_t GraphBrpcService::remove_graph_node(Table *table,
+ const PsRequestMessage &request,
+ PsResponseMessage &response,
+ brpc::Controller *cntl) {
+ CHECK_TABLE_EXIST(table, request, response)
+ if (request.params_size() < 1) {
+ set_response_code(
+ response, -1,
+ "graph_get_node_feat request requires at least 1 argument");
+ return 0;
+ }
+ size_t node_num = request.params(0).size() / sizeof(uint64_t);
+ uint64_t *node_data = (uint64_t *)(request.params(0).c_str());
+ std::vector node_ids(node_data, node_data + node_num);
+
+ ((GraphTable *)table)->remove_graph_node(node_ids);
+ return 0;
+}
int32_t GraphBrpcServer::port() { return _server.listen_address().port; }
int32_t GraphBrpcService::initialize() {
@@ -92,21 +152,17 @@ int32_t GraphBrpcService::initialize() {
&GraphBrpcService::graph_random_sample_nodes;
_service_handler_map[PS_GRAPH_GET_NODE_FEAT] =
&GraphBrpcService::graph_get_node_feat;
-
+ _service_handler_map[PS_GRAPH_CLEAR] = &GraphBrpcService::clear_nodes;
+ _service_handler_map[PS_GRAPH_ADD_GRAPH_NODE] =
+ &GraphBrpcService::add_graph_node;
+ _service_handler_map[PS_GRAPH_REMOVE_GRAPH_NODE] =
+ &GraphBrpcService::remove_graph_node;
// shard初始化,server启动后才可从env获取到server_list的shard信息
initialize_shard_info();
return 0;
}
-#define CHECK_TABLE_EXIST(table, request, response) \
- if (table == NULL) { \
- std::string err_msg("table not found with table_id:"); \
- err_msg.append(std::to_string(request.table_id())); \
- set_response_code(response, -1, err_msg.c_str()); \
- return -1; \
- }
-
int32_t GraphBrpcService::initialize_shard_info() {
if (!_is_initialize_shard_info) {
std::lock_guard guard(_initialize_shard_mutex);
diff --git a/paddle/fluid/distributed/service/graph_brpc_server.h b/paddle/fluid/distributed/service/graph_brpc_server.h
index 32c572f9e6c2bf759c59190679bcf7570a807f2d..47c370572826ac2807e4ea5cb36cf3a667dfed10 100644
--- a/paddle/fluid/distributed/service/graph_brpc_server.h
+++ b/paddle/fluid/distributed/service/graph_brpc_server.h
@@ -86,6 +86,13 @@ class GraphBrpcService : public PsBaseService {
int32_t graph_get_node_feat(Table *table, const PsRequestMessage &request,
PsResponseMessage &response,
brpc::Controller *cntl);
+ int32_t clear_nodes(Table *table, const PsRequestMessage &request,
+ PsResponseMessage &response, brpc::Controller *cntl);
+ int32_t add_graph_node(Table *table, const PsRequestMessage &request,
+ PsResponseMessage &response, brpc::Controller *cntl);
+ int32_t remove_graph_node(Table *table, const PsRequestMessage &request,
+ PsResponseMessage &response,
+ brpc::Controller *cntl);
int32_t barrier(Table *table, const PsRequestMessage &request,
PsResponseMessage &response, brpc::Controller *cntl);
int32_t load_one_table(Table *table, const PsRequestMessage &request,
diff --git a/paddle/fluid/distributed/service/graph_py_service.cc b/paddle/fluid/distributed/service/graph_py_service.cc
index 61e4e0cf7bb9155d25c630296c2b55a7d3400bfc..39befb1a112c854a183903d76a71d9e6c920b215 100644
--- a/paddle/fluid/distributed/service/graph_py_service.cc
+++ b/paddle/fluid/distributed/service/graph_py_service.cc
@@ -44,6 +44,9 @@ void GraphPyService::add_table_feat_conf(std::string table_name,
}
}
+void add_graph_node(std::vector node_ids,
+ std::vector weight_list) {}
+void remove_graph_node(std::vector node_ids) {}
void GraphPyService::set_up(std::string ips_str, int shard_num,
std::vector node_types,
std::vector edge_types) {
@@ -247,6 +250,34 @@ void GraphPyClient::load_edge_file(std::string name, std::string filepath,
}
}
+void GraphPyClient::clear_nodes(std::string name) {
+ if (this->table_id_map.count(name)) {
+ uint32_t table_id = this->table_id_map[name];
+ auto status = get_ps_client()->clear_nodes(table_id);
+ status.wait();
+ }
+}
+
+void GraphPyClient::add_graph_node(std::string name,
+ std::vector& node_ids,
+ std::vector& weight_list) {
+ if (this->table_id_map.count(name)) {
+ uint32_t table_id = this->table_id_map[name];
+ auto status =
+ get_ps_client()->add_graph_node(table_id, node_ids, weight_list);
+ status.wait();
+ }
+}
+
+void GraphPyClient::remove_graph_node(std::string name,
+ std::vector& node_ids) {
+ if (this->table_id_map.count(name)) {
+ uint32_t table_id = this->table_id_map[name];
+ auto status = get_ps_client()->remove_graph_node(table_id, node_ids);
+ status.wait();
+ }
+}
+
void GraphPyClient::load_node_file(std::string name, std::string filepath) {
// 'n' means load nodes and 'node_type' follows
std::string params = "n" + name;
diff --git a/paddle/fluid/distributed/service/graph_py_service.h b/paddle/fluid/distributed/service/graph_py_service.h
index c6657be96ba446d2f7538943aab43dd47e1868fb..da027fbae3e6f0ca1e902795b0640cee1e0b76cc 100644
--- a/paddle/fluid/distributed/service/graph_py_service.h
+++ b/paddle/fluid/distributed/service/graph_py_service.h
@@ -141,6 +141,10 @@ class GraphPyClient : public GraphPyService {
void finalize_worker();
void load_edge_file(std::string name, std::string filepath, bool reverse);
void load_node_file(std::string name, std::string filepath);
+ void clear_nodes(std::string name);
+ void add_graph_node(std::string name, std::vector& node_ids,
+ std::vector& weight_list);
+ void remove_graph_node(std::string name, std::vector& node_ids);
int get_client_id() { return client_id; }
void set_client_id(int client_id) { this->client_id = client_id; }
void start_client();
diff --git a/paddle/fluid/distributed/service/ps_local_client.cc b/paddle/fluid/distributed/service/ps_local_client.cc
index 2acc845a50890beb834676c3394f8dabc2a77e78..e949b21b02e6d9842ffae377a17610757a65ae75 100644
--- a/paddle/fluid/distributed/service/ps_local_client.cc
+++ b/paddle/fluid/distributed/service/ps_local_client.cc
@@ -42,17 +42,17 @@ int32_t PsLocalClient::initialize() {
::std::future PsLocalClient::load(const std::string& epoch,
const std::string& mode) {
// TODO
- // for (auto& it : _table_map) {
- // load(it.first, epoch, mode);
- //}
+ for (auto& it : _table_map) {
+ load(it.first, epoch, mode);
+ }
return done();
}
::std::future PsLocalClient::load(uint32_t table_id,
const std::string& epoch,
const std::string& mode) {
// TODO
- // auto* table_ptr = table(table_id);
- // table_ptr->load(epoch, mode);
+ auto* table_ptr = table(table_id);
+ table_ptr->load(epoch, mode);
return done();
}
@@ -245,7 +245,6 @@ int32_t PsLocalClient::initialize() {
::std::future PsLocalClient::push_sparse_raw_gradient(
size_t table_id, const uint64_t* keys, const float** update_values,
size_t num, void* callback) {
- VLOG(1) << "wxx push_sparse_raw_gradient";
PSClientClosure* closure = reinterpret_cast(callback);
auto* accessor = table_accessor(table_id);
auto* table_ptr = table(table_id);
diff --git a/paddle/fluid/distributed/service/ps_local_server.h b/paddle/fluid/distributed/service/ps_local_server.h
index dfbccc70900e3cf10fbb0852a114e400d738e2d6..33b0b5fa796d7571e16a0f79fc6ce4de21b1e7a8 100644
--- a/paddle/fluid/distributed/service/ps_local_server.h
+++ b/paddle/fluid/distributed/service/ps_local_server.h
@@ -26,9 +26,14 @@ class PsLocalServer : public PSServer {
PsLocalServer() {}
virtual ~PsLocalServer() {}
virtual uint64_t start() { return 0; }
- virtual uint64_t start(const std::string& ip, uint32_t port) { return 0; }
+ virtual uint64_t start(const std::string &ip, uint32_t port) { return 0; }
virtual int32_t stop() { return 0; }
virtual int32_t port() { return 0; }
+ virtual int32_t configure(
+ const PSParameter &config, PSEnvironment &env, size_t server_rank,
+ const std::vector &server_sub_program = {}) {
+ return 0;
+ }
private:
virtual int32_t initialize() { return 0; }
diff --git a/paddle/fluid/distributed/service/sendrecv.proto b/paddle/fluid/distributed/service/sendrecv.proto
index d908c26da9870a93d81c0242ac03e26cfebdb976..a4b811e950a3b56443261ceac37fa658007d519d 100644
--- a/paddle/fluid/distributed/service/sendrecv.proto
+++ b/paddle/fluid/distributed/service/sendrecv.proto
@@ -52,6 +52,9 @@ enum PsCmdID {
PS_GRAPH_SAMPLE_NEIGHBOORS = 31;
PS_GRAPH_SAMPLE_NODES = 32;
PS_GRAPH_GET_NODE_FEAT = 33;
+ PS_GRAPH_CLEAR = 34;
+ PS_GRAPH_ADD_GRAPH_NODE = 35;
+ PS_GRAPH_REMOVE_GRAPH_NODE = 36;
}
message PsRequestMessage {
diff --git a/paddle/fluid/distributed/service/server.h b/paddle/fluid/distributed/service/server.h
index 74a8cbe44b144b75f33a9c392ffdc80148a82011..89b089386f501835b7c384477b84f98f94c2a4a9 100644
--- a/paddle/fluid/distributed/service/server.h
+++ b/paddle/fluid/distributed/service/server.h
@@ -70,7 +70,7 @@ class PSServer {
virtual int32_t configure(
const PSParameter &config, PSEnvironment &env, size_t server_rank,
- const std::vector &server_sub_program = {}) final;
+ const std::vector &server_sub_program = {});
// return server_ip
virtual std::string ip() { return butil::my_ip_cstr(); }
diff --git a/paddle/fluid/distributed/table/CMakeLists.txt b/paddle/fluid/distributed/table/CMakeLists.txt
index dde1f5ae8ee3a1d683c805896a470612de6e2aba..c928ebe90ceb9e6a6c2cd7983d112c9a6f9af6b3 100644
--- a/paddle/fluid/distributed/table/CMakeLists.txt
+++ b/paddle/fluid/distributed/table/CMakeLists.txt
@@ -9,11 +9,24 @@ set_source_files_properties(${graphDir}/graph_node.cc PROPERTIES COMPILE_FLAGS $
cc_library(graph_node SRCS ${graphDir}/graph_node.cc DEPS WeightedSampler)
set_source_files_properties(common_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(common_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
+set_source_files_properties(ssd_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
-cc_library(common_table SRCS common_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc DEPS ${TABLE_DEPS} graph_edge graph_node device_context string_helper simple_threadpool xxhash generator)
+get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS)
+
+set(EXTERN_DEP "")
+if(WITH_HETERPS)
+ set(TABLE_SRC common_sparse_table.cc ssd_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc)
+ set(EXTERN_DEP rocksdb)
+else()
+ set(TABLE_SRC common_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc)
+endif()
+
+cc_library(common_table SRCS ${TABLE_SRC} DEPS ${TABLE_DEPS}
+${RPC_DEPS} graph_edge graph_node device_context string_helper
+simple_threadpool xxhash generator ${EXTERN_DEP})
set_source_files_properties(tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(tensor_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
diff --git a/paddle/fluid/distributed/table/common_graph_table.cc b/paddle/fluid/distributed/table/common_graph_table.cc
index 0dc99de1bfe82a691fdacb834acd1ad606dcb04b..29bcc04d9c1dfb3f3a5d32040162c4f5c6371672 100644
--- a/paddle/fluid/distributed/table/common_graph_table.cc
+++ b/paddle/fluid/distributed/table/common_graph_table.cc
@@ -15,12 +15,15 @@
#include "paddle/fluid/distributed/table/common_graph_table.h"
#include
#include
+#include
#include
#include
#include "paddle/fluid/distributed/common/utils.h"
#include "paddle/fluid/distributed/table/graph/graph_node.h"
+#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/string_helper.h"
+
namespace paddle {
namespace distributed {
@@ -35,6 +38,77 @@ std::vector GraphShard::get_batch(int start, int end, int step) {
size_t GraphShard::get_size() { return bucket.size(); }
+int32_t GraphTable::add_graph_node(std::vector &id_list,
+ std::vector &is_weight_list) {
+ size_t node_size = id_list.size();
+ std::vector>> batch(task_pool_size_);
+ for (size_t i = 0; i < node_size; i++) {
+ size_t shard_id = id_list[i] % shard_num;
+ if (shard_id >= shard_end || shard_id < shard_start) {
+ continue;
+ }
+ batch[get_thread_pool_index(id_list[i])].push_back(
+ {id_list[i], i < is_weight_list.size() ? is_weight_list[i] : false});
+ }
+ std::vector> tasks;
+ for (size_t i = 0; i < batch.size(); ++i) {
+ if (!batch[i].size()) continue;
+ tasks.push_back(_shards_task_pool[i]->enqueue([&batch, i, this]() -> int {
+ for (auto &p : batch[i]) {
+ size_t index = p.first % this->shard_num - this->shard_start;
+ this->shards[index].add_graph_node(p.first)->build_edges(p.second);
+ }
+ return 0;
+ }));
+ }
+ for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
+ return 0;
+}
+
+int32_t GraphTable::remove_graph_node(std::vector &id_list) {
+ size_t node_size = id_list.size();
+ std::vector> batch(task_pool_size_);
+ for (size_t i = 0; i < node_size; i++) {
+ size_t shard_id = id_list[i] % shard_num;
+ if (shard_id >= shard_end || shard_id < shard_start) continue;
+ batch[get_thread_pool_index(id_list[i])].push_back(id_list[i]);
+ }
+ std::vector> tasks;
+ for (size_t i = 0; i < batch.size(); ++i) {
+ if (!batch[i].size()) continue;
+ tasks.push_back(_shards_task_pool[i]->enqueue([&batch, i, this]() -> int {
+ for (auto &p : batch[i]) {
+ size_t index = p % this->shard_num - this->shard_start;
+ this->shards[index].delete_node(p);
+ }
+ return 0;
+ }));
+ }
+ for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
+ return 0;
+}
+
+void GraphShard::clear() {
+ for (size_t i = 0; i < bucket.size(); i++) {
+ delete bucket[i];
+ }
+ bucket.clear();
+ node_location.clear();
+}
+
+GraphShard::~GraphShard() { clear(); }
+void GraphShard::delete_node(uint64_t id) {
+ auto iter = node_location.find(id);
+ if (iter == node_location.end()) return;
+ int pos = iter->second;
+ delete bucket[pos];
+ if (pos != (int)bucket.size() - 1) {
+ bucket[pos] = bucket.back();
+ node_location[bucket.back()->get_id()] = pos;
+ }
+ node_location.erase(id);
+ bucket.pop_back();
+}
GraphNode *GraphShard::add_graph_node(uint64_t id) {
if (node_location.find(id) == node_location.end()) {
node_location[id] = bucket.size();
@@ -79,11 +153,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges(
int start = 0, end, index = 0, total_size = 0;
res.clear();
std::vector>> tasks;
- // std::string temp = "";
- // for(int i = 0;i < shards.size();i++)
- // temp+= std::to_string((int)shards[i].get_size()) + " ";
- // VLOG(0)<<"range distribution "<enqueue(
[this, first, second, i]() -> std::vector {
return shards[i].get_ids_by_range(first, second);
@@ -106,7 +175,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges(
}
total_size += shards[i].get_size();
}
- for (int i = 0; i < tasks.size(); i++) {
+ for (size_t i = 0; i < tasks.size(); i++) {
auto vec = tasks[i].get();
for (auto &id : vec) {
res.push_back(id);
@@ -219,7 +288,7 @@ int32_t GraphTable::load_edges(const std::string &path, bool reverse_edge) {
for (auto &shard : shards) {
auto bucket = shard.get_bucket();
- for (int i = 0; i < bucket.size(); i++) {
+ for (size_t i = 0; i < bucket.size(); i++) {
bucket[i]->build_sampler(sample_type);
}
}
@@ -238,10 +307,29 @@ Node *GraphTable::find_node(uint64_t id) {
uint32_t GraphTable::get_thread_pool_index(uint64_t node_id) {
return node_id % shard_num % shard_num_per_table % task_pool_size_;
}
+
+uint32_t GraphTable::get_thread_pool_index_by_shard_index(
+ uint64_t shard_index) {
+ return shard_index % shard_num_per_table % task_pool_size_;
+}
+
+int32_t GraphTable::clear_nodes() {
+ std::vector> tasks;
+ for (size_t i = 0; i < shards.size(); i++) {
+ tasks.push_back(
+ _shards_task_pool[get_thread_pool_index_by_shard_index(i)]->enqueue(
+ [this, i]() -> int {
+ this->shards[i].clear();
+ return 0;
+ }));
+ }
+ for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
+ return 0;
+}
+
int32_t GraphTable::random_sample_nodes(int sample_size,
std::unique_ptr &buffer,
int &actual_size) {
- bool need_feature = false;
int total_size = 0;
for (int i = 0; i < shards.size(); i++) {
total_size += shards[i].get_size();
@@ -281,7 +369,7 @@ int32_t GraphTable::random_sample_nodes(int sample_size,
}
std::vector> first_half, second_half;
int start_index = rand() % total_size;
- for (int i = 0; i < ranges_len.size() && i < ranges_pos.size(); i++) {
+ for (size_t i = 0; i < ranges_len.size() && i < ranges_pos.size(); i++) {
if (ranges_pos[i] + ranges_len[i] - 1 + start_index < total_size)
first_half.push_back({ranges_pos[i] + start_index,
ranges_pos[i] + ranges_len[i] + start_index});
@@ -314,31 +402,34 @@ int32_t GraphTable::random_sample_neighboors(
uint64_t &node_id = node_ids[idx];
std::unique_ptr &buffer = buffers[idx];
int &actual_size = actual_sizes[idx];
- tasks.push_back(_shards_task_pool[get_thread_pool_index(node_id)]->enqueue(
- [&]() -> int {
- Node *node = find_node(node_id);
- if (node == nullptr) {
- actual_size = 0;
- return 0;
- }
- std::vector res = node->sample_k(sample_size);
- actual_size = res.size() * (Node::id_size + Node::weight_size);
- int offset = 0;
- uint64_t id;
- float weight;
- char *buffer_addr = new char[actual_size];
- buffer.reset(buffer_addr);
- for (int &x : res) {
- id = node->get_neighbor_id(x);
- weight = node->get_neighbor_weight(x);
- memcpy(buffer_addr + offset, &id, Node::id_size);
- offset += Node::id_size;
- memcpy(buffer_addr + offset, &weight, Node::weight_size);
- offset += Node::weight_size;
- }
- return 0;
- }));
+ int thread_pool_index = get_thread_pool_index(node_id);
+ auto rng = _shards_task_rng_pool[thread_pool_index];
+
+ tasks.push_back(_shards_task_pool[thread_pool_index]->enqueue([&]() -> int {
+ Node *node = find_node(node_id);
+
+ if (node == nullptr) {
+ actual_size = 0;
+ return 0;
+ }
+ std::vector res = node->sample_k(sample_size, rng);
+ actual_size = res.size() * (Node::id_size + Node::weight_size);
+ int offset = 0;
+ uint64_t id;
+ float weight;
+ char *buffer_addr = new char[actual_size];
+ buffer.reset(buffer_addr);
+ for (int &x : res) {
+ id = node->get_neighbor_id(x);
+ weight = node->get_neighbor_weight(x);
+ memcpy(buffer_addr + offset, &id, Node::id_size);
+ offset += Node::id_size;
+ memcpy(buffer_addr + offset, &weight, Node::weight_size);
+ offset += Node::weight_size;
+ }
+ return 0;
+ }));
}
for (size_t idx = 0; idx < node_num; ++idx) {
tasks[idx].get();
@@ -386,7 +477,6 @@ std::pair GraphTable::parse_feature(
if (this->feat_id_map.count(fields[0])) {
int32_t id = this->feat_id_map[fields[0]];
std::string dtype = this->feat_dtype[id];
- int32_t shape = this->feat_shape[id];
std::vector values(fields.begin() + 1, fields.end());
if (dtype == "feasign") {
return std::make_pair(
@@ -428,7 +518,6 @@ int32_t GraphTable::pull_graph_list(int start, int total_size,
int end = start + (count - 1) * step + 1;
tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue(
[this, i, start, end, step, size]() -> std::vector {
-
return this->shards[i].get_batch(start - size, end - size, step);
}));
start += count * step;
@@ -462,6 +551,7 @@ int32_t GraphTable::initialize() {
_shards_task_pool.resize(task_pool_size_);
for (size_t i = 0; i < _shards_task_pool.size(); ++i) {
_shards_task_pool[i].reset(new ::ThreadPool(1));
+ _shards_task_rng_pool.push_back(paddle::framework::GetCPURandomEngine(0));
}
server_num = _shard_num;
// VLOG(0) << "in init graph table server num = " << server_num;
@@ -502,5 +592,5 @@ int32_t GraphTable::initialize() {
shards = std::vector(shard_num_per_table, GraphShard(shard_num));
return 0;
}
-}
-};
+} // namespace distributed
+}; // namespace paddle
diff --git a/paddle/fluid/distributed/table/common_graph_table.h b/paddle/fluid/distributed/table/common_graph_table.h
index b18da82abe61c9695712f542e187ac48fd5edc9d..6ccce44c7ead6983efb57718999f1b36499b34e8 100644
--- a/paddle/fluid/distributed/table/common_graph_table.h
+++ b/paddle/fluid/distributed/table/common_graph_table.h
@@ -36,11 +36,12 @@ class GraphShard {
size_t get_size();
GraphShard() {}
GraphShard(int shard_num) { this->shard_num = shard_num; }
+ ~GraphShard();
std::vector &get_bucket() { return bucket; }
std::vector get_batch(int start, int end, int step);
std::vector get_ids_by_range(int start, int end) {
std::vector res;
- for (int i = start; i < end && i < bucket.size(); i++) {
+ for (int i = start; i < end && i < (int)bucket.size(); i++) {
res.push_back(bucket[i]->get_id());
}
return res;
@@ -48,6 +49,8 @@ class GraphShard {
GraphNode *add_graph_node(uint64_t id);
FeatureNode *add_feature_node(uint64_t id);
Node *find_node(uint64_t id);
+ void delete_node(uint64_t id);
+ void clear();
void add_neighboor(uint64_t id, uint64_t dst_id, float weight);
std::unordered_map get_node_location() {
return node_location;
@@ -85,6 +88,11 @@ class GraphTable : public SparseTable {
int32_t load_nodes(const std::string &path, std::string node_type);
+ int32_t add_graph_node(std::vector &id_list,
+ std::vector &is_weight_list);
+
+ int32_t remove_graph_node(std::vector &id_list);
+
Node *find_node(uint64_t id);
virtual int32_t pull_sparse(float *values,
@@ -97,6 +105,7 @@ class GraphTable : public SparseTable {
return 0;
}
+ virtual int32_t clear_nodes();
virtual void clear() {}
virtual int32_t flush() { return 0; }
virtual int32_t shrink(const std::string ¶m) { return 0; }
@@ -105,6 +114,7 @@ class GraphTable : public SparseTable {
return 0;
}
virtual int32_t initialize_shard() { return 0; }
+ virtual uint32_t get_thread_pool_index_by_shard_index(uint64_t shard_index);
virtual uint32_t get_thread_pool_index(uint64_t node_id);
virtual std::pair parse_feature(std::string feat_str);
@@ -126,6 +136,8 @@ class GraphTable : public SparseTable {
std::string table_type;
std::vector> _shards_task_pool;
+ std::vector> _shards_task_rng_pool;
};
} // namespace distributed
+
}; // namespace paddle
diff --git a/paddle/fluid/distributed/table/common_sparse_table.cc b/paddle/fluid/distributed/table/common_sparse_table.cc
index 1c315d34abcb6ef73d898da4f71e0659842e5588..e1223face0f54ac782fa41ff16a2db1b08aa413a 100644
--- a/paddle/fluid/distributed/table/common_sparse_table.cc
+++ b/paddle/fluid/distributed/table/common_sparse_table.cc
@@ -13,9 +13,9 @@
// limitations under the License.
#include "paddle/fluid/distributed/table/common_sparse_table.h"
-
#include
+#include "boost/lexical_cast.hpp"
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
@@ -25,82 +25,12 @@ class ValueBlock;
} // namespace distributed
} // namespace paddle
-#define PSERVER_SAVE_SUFFIX "_txt"
-
namespace paddle {
namespace distributed {
-enum SaveMode { all, base, delta };
-
-struct Meta {
- std::string param;
- int shard_id;
- std::vector names;
- std::vector dims;
- uint64_t count;
- std::unordered_map dims_map;
-
- explicit Meta(const std::string& metapath) {
- std::ifstream file(metapath);
- std::string line;
- int num_lines = 0;
- while (std::getline(file, line)) {
- if (StartWith(line, "#")) {
- continue;
- }
- auto pairs = paddle::string::split_string(line, "=");
- PADDLE_ENFORCE_EQ(
- pairs.size(), 2,
- paddle::platform::errors::InvalidArgument(
- "info in %s except k=v, but got %s", metapath, line));
-
- if (pairs[0] == "param") {
- param = pairs[1];
- }
- if (pairs[0] == "shard_id") {
- shard_id = std::stoi(pairs[1]);
- }
- if (pairs[0] == "row_names") {
- names = paddle::string::split_string(pairs[1], ",");
- }
- if (pairs[0] == "row_dims") {
- auto dims_strs =
- paddle::string::split_string(pairs[1], ",");
- for (auto& str : dims_strs) {
- dims.push_back(std::stoi(str));
- }
- }
- if (pairs[0] == "count") {
- count = std::stoull(pairs[1]);
- }
- }
- for (int x = 0; x < names.size(); ++x) {
- dims_map[names[x]] = dims[x];
- }
- }
-
- Meta(std::string param, int shard_id, std::vector row_names,
- std::vector dims, uint64_t count) {
- this->param = param;
- this->shard_id = shard_id;
- this->names = row_names;
- this->dims = dims;
- this->count = count;
- }
-
- std::string ToString() {
- std::stringstream ss;
- ss << "param=" << param << "\n";
- ss << "shard_id=" << shard_id << "\n";
- ss << "row_names=" << paddle::string::join_strings(names, ',') << "\n";
- ss << "row_dims=" << paddle::string::join_strings(dims, ',') << "\n";
- ss << "count=" << count << "\n";
- return ss.str();
- }
-};
-
-void ProcessALine(const std::vector& columns, const Meta& meta,
- std::vector>* values) {
+void CommonSparseTable::ProcessALine(const std::vector& columns,
+ const Meta& meta, const int64_t id,
+ std::vector>* values) {
auto colunmn_size = columns.size();
auto load_values =
paddle::string::split_string(columns[colunmn_size - 1], ",");
@@ -116,49 +46,83 @@ void ProcessALine(const std::vector& columns, const Meta& meta,
"The data format in txt does not meet the field "
"requirements defined in meta"));
- std::transform(start, end, std::back_inserter(val),
- [](std::string va) { return std::stof(va); });
+ std::transform(start, end, std::back_inserter(val), [id](std::string va) {
+ float v = 0.0;
+
+ try {
+ v = lexical_cast(va);
+ } catch (boost::bad_lexical_cast& e) {
+ VLOG(0) << "id: " << id << " get unexpected value: " << va
+ << " and be reset to: 0.0";
+ }
+ return v;
+ });
+
values->push_back(val);
offset += meta.dims[x];
}
}
-int64_t SaveToText(std::ostream* os, std::shared_ptr block,
- const int mode) {
- int64_t not_save_num = 0;
- for (auto& value : block->values_) {
- if (mode == SaveMode::delta && !value.second.need_save_) {
- not_save_num++;
- continue;
- }
+void CommonSparseTable::SaveMetaToText(std::ostream* os,
+ const CommonAccessorParameter& common,
+ const size_t shard_idx,
+ const int64_t total) {
+ // save meta
+ std::stringstream stream;
+ stream << "param=" << common.table_name() << "\n";
+ stream << "shard_id=" << shard_idx << "\n";
+ stream << "row_names=" << paddle::string::join_strings(common.params(), ',')
+ << "\n";
+ stream << "row_dims=" << paddle::string::join_strings(common.dims(), ',')
+ << "\n";
+ stream << "count=" << total << "\n";
+ os->write(stream.str().c_str(), sizeof(char) * stream.str().size());
+}
- auto* vs = value.second.data_;
- std::stringstream ss;
- auto id = value.first;
- ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_
- << "\t" << value.second.is_entry_ << "\t";
+int64_t CommonSparseTable::SaveValueToText(std::ostream* os,
+ std::shared_ptr block,
+ std::shared_ptr<::ThreadPool> pool,
+ const int mode, int shard_id) {
+ int64_t save_num = 0;
+ for (auto& table : block->values_) {
+ for (auto& value : table) {
+ if (mode == SaveMode::delta && !value.second->need_save_) {
+ continue;
+ }
- for (int i = 0; i < block->value_length_; i++) {
- ss << vs[i];
- ss << ",";
- }
+ ++save_num;
+
+ std::stringstream ss;
+ auto* vs = value.second->data_.data();
- ss << "\n";
+ auto id = value.first;
- os->write(ss.str().c_str(), sizeof(char) * ss.str().size());
+ ss << id << "\t" << value.second->count_ << "\t"
+ << value.second->unseen_days_ << "\t" << value.second->is_entry_
+ << "\t";
+
+ for (int i = 0; i < block->value_length_ - 1; i++) {
+ ss << std::to_string(vs[i]) << ",";
+ }
- if (mode == SaveMode::base || mode == SaveMode::delta) {
- value.second.need_save_ = false;
+ ss << std::to_string(vs[block->value_length_ - 1]);
+ ss << "\n";
+
+ os->write(ss.str().c_str(), sizeof(char) * ss.str().size());
+
+ if (mode == SaveMode::base || mode == SaveMode::delta) {
+ value.second->need_save_ = false;
+ }
}
}
- return block->values_.size() - not_save_num;
+ return save_num;
}
-int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
- const int pserver_id, const int pserver_num,
- const int local_shard_num,
- std::vector>* blocks) {
+int64_t CommonSparseTable::LoadFromText(
+ const std::string& valuepath, const std::string& metapath,
+ const int pserver_id, const int pserver_num, const int local_shard_num,
+ std::vector>* blocks) {
Meta meta = Meta(metapath);
int num_lines = 0;
@@ -167,7 +131,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
while (std::getline(file, line)) {
auto values = paddle::string::split_string(line, "\t");
- auto id = std::stoull(values[0]);
+ auto id = lexical_cast(values[0]);
if (id % pserver_num != pserver_id) {
VLOG(3) << "will not load " << values[0] << " from " << valuepath
@@ -179,15 +143,17 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
auto block = blocks->at(shard_id);
std::vector> kvalues;
- ProcessALine(values, meta, &kvalues);
+ ProcessALine(values, meta, id, &kvalues);
block->Init(id, false);
- auto value_instant = block->GetValue(id);
+ VALUE* value_instant = block->GetValue(id);
+
if (values.size() == 5) {
- value_instant->count_ = std::stoi(values[1]);
- value_instant->unseen_days_ = std::stoi(values[2]);
- value_instant->is_entry_ = static_cast(std::stoi(values[3]));
+ value_instant->count_ = lexical_cast(values[1]);
+ value_instant->unseen_days_ = lexical_cast(values[2]);
+ value_instant->is_entry_ =
+ static_cast(lexical_cast(values[3]));
}
std::vector block_values = block->Get(id, meta.names, meta.dims);
@@ -314,16 +280,24 @@ int32_t CommonSparseTable::set_global_lr(float* lr) {
int32_t CommonSparseTable::load(const std::string& path,
const std::string& param) {
+ auto begin = GetCurrentUS();
rwlock_->WRLock();
- VLOG(3) << "sparse table load with " << path << " with meta " << param;
LoadFromText(path, param, _shard_idx, _shard_num, task_pool_size_,
&shard_values_);
rwlock_->UNLock();
+ auto end = GetCurrentUS();
+
+ auto varname = _config.common().table_name();
+ VLOG(0) << "load " << varname << " with value: " << path
+ << " , meta: " << param
+ << " using: " << std::to_string((end - begin) / 1e+6) << " seconds";
+
return 0;
}
int32_t CommonSparseTable::save(const std::string& dirname,
const std::string& param) {
+ auto begin = GetCurrentUS();
rwlock_->WRLock();
int mode = std::stoi(param);
VLOG(3) << "sparse table save: " << dirname << " mode: " << mode;
@@ -336,36 +310,34 @@ int32_t CommonSparseTable::save(const std::string& dirname,
VLOG(3) << "save " << varname << " in dir: " << var_store << " begin";
std::vector params(_config.common().params().begin(),
_config.common().params().end());
+
std::string shard_var_pre =
string::Sprintf("%s.block%d", varname, _shard_idx);
std::string value_ = string::Sprintf("%s/%s.txt", var_store, shard_var_pre);
- std::unique_ptr value_out(new std::ofstream(value_));
+ std::unique_ptr vs(new std::ofstream(value_));
int64_t total_ins = 0;
for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) {
// save values
- total_ins += SaveToText(value_out.get(), shard_values_[shard_id], mode);
+ auto shard_save_num =
+ SaveValueToText(vs.get(), shard_values_[shard_id],
+ _shards_task_pool[shard_id], mode, shard_id);
+ total_ins += shard_save_num;
}
- value_out->close();
+ vs->close();
- // save meta
- std::stringstream stream;
- stream << "param=" << _config.common().table_name() << "\n";
- stream << "shard_id=" << _shard_idx << "\n";
- stream << "row_names="
- << paddle::string::join_strings(_config.common().params(), ',')
- << "\n";
- stream << "row_dims="
- << paddle::string::join_strings(_config.common().dims(), ',') << "\n";
- stream << "count=" << total_ins << "\n";
std::string meta_ = string::Sprintf("%s/%s.meta", var_store, shard_var_pre);
- std::unique_ptr meta_out(new std::ofstream(meta_));
- meta_out->write(stream.str().c_str(), sizeof(char) * stream.str().size());
- meta_out->close();
- VLOG(3) << "save " << varname << " in dir: " << var_store << " done";
+ std::unique_ptr ms(new std::ofstream(meta_));
+ SaveMetaToText(ms.get(), _config.common(), _shard_idx, total_ins);
+ ms->close();
+
+ auto end = GetCurrentUS();
rwlock_->UNLock();
+ VLOG(0) << "save " << varname << " with path: " << value_
+ << " using: " << std::to_string((end - begin) / 1e+6) << " seconds";
+
return 0;
}
@@ -373,16 +345,16 @@ std::pair CommonSparseTable::print_table_stat() {
int64_t feasign_size = 0;
int64_t mf_size = 0;
- for (auto& value : shard_values_) {
- feasign_size += value->values_.size();
+ for (auto& shard : shard_values_) {
+ for (auto& table : shard->values_) {
+ feasign_size += table.size();
+ }
}
return {feasign_size, mf_size};
}
int32_t CommonSparseTable::pour() {
- rwlock_->RDLock();
-
std::vector values;
std::vector keys;
@@ -399,14 +371,11 @@ int32_t CommonSparseTable::pour() {
_push_sparse(keys.data(), values.data(), pull_reservoir_.size());
pull_reservoir_.clear();
- rwlock_->UNLock();
return 0;
}
int32_t CommonSparseTable::pull_sparse(float* pull_values,
const PullSparseValue& pull_value) {
- rwlock_->RDLock();
-
auto shard_num = task_pool_size_;
std::vector> tasks(shard_num);
@@ -442,7 +411,6 @@ int32_t CommonSparseTable::pull_sparse(float* pull_values,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait();
}
- rwlock_->UNLock();
return 0;
}
@@ -470,7 +438,7 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values,
auto* value = block->InitGet(id);
// std::copy_n(value + param_offset_, param_dim_,
// pull_values + param_dim_ * offset);
- pull_values[offset] = (char*)value;
+ pull_values[offset] = reinterpret_cast(value);
}
return 0;
@@ -485,7 +453,6 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values,
int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
const float* values, size_t num) {
- rwlock_->RDLock();
std::vector> offset_bucket;
offset_bucket.resize(task_pool_size_);
@@ -509,7 +476,6 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait();
}
- rwlock_->UNLock();
return 0;
}
@@ -547,7 +513,6 @@ int32_t CommonSparseTable::push_sparse(const uint64_t* keys,
int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
const float** values, size_t num) {
- rwlock_->RDLock();
std::vector> offset_bucket;
offset_bucket.resize(task_pool_size_);
@@ -574,14 +539,11 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait();
}
- rwlock_->UNLock();
return 0;
}
int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
const float* values, size_t num) {
- rwlock_->RDLock();
-
std::vector> offset_bucket;
offset_bucket.resize(task_pool_size_);
@@ -613,14 +575,12 @@ int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait();
}
- rwlock_->UNLock();
return 0;
}
int32_t CommonSparseTable::flush() { return 0; }
int32_t CommonSparseTable::shrink(const std::string& param) {
- rwlock_->WRLock();
int threshold = std::stoi(param);
VLOG(3) << "sparse table shrink: " << threshold;
@@ -629,7 +589,6 @@ int32_t CommonSparseTable::shrink(const std::string& param) {
VLOG(4) << shard_id << " " << task_pool_size_ << " begin shrink";
shard_values_[shard_id]->Shrink(threshold);
}
- rwlock_->UNLock();
return 0;
}
diff --git a/paddle/fluid/distributed/table/common_sparse_table.h b/paddle/fluid/distributed/table/common_sparse_table.h
index 50c295da53464c8cc1589b27a6dbc233367991b4..ce3cc11686a4807e9de616e2de2dc1d9b1e7c3f9 100644
--- a/paddle/fluid/distributed/table/common_sparse_table.h
+++ b/paddle/fluid/distributed/table/common_sparse_table.h
@@ -32,11 +32,83 @@
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/string/string_helper.h"
+#define PSERVER_SAVE_SUFFIX ".shard"
+using boost::lexical_cast;
+
namespace paddle {
namespace distributed {
class SparseOptimizer;
+enum SaveMode { all, base, delta };
+
+struct Meta {
+ std::string param;
+ int shard_id;
+ std::vector names;
+ std::vector dims;
+ uint64_t count;
+ std::unordered_map dims_map;
+
+ explicit Meta(const std::string& metapath) {
+ std::ifstream file(metapath);
+ std::string line;
+ int num_lines = 0;
+ while (std::getline(file, line)) {
+ if (StartWith(line, "#")) {
+ continue;
+ }
+ auto pairs = paddle::string::split_string(line, "=");
+ PADDLE_ENFORCE_EQ(
+ pairs.size(), 2,
+ paddle::platform::errors::InvalidArgument(
+ "info in %s except k=v, but got %s", metapath, line));
+
+ if (pairs[0] == "param") {
+ param = pairs[1];
+ }
+ if (pairs[0] == "shard_id") {
+ shard_id = std::stoi(pairs[1]);
+ }
+ if (pairs[0] == "row_names") {
+ names = paddle::string::split_string(pairs[1], ",");
+ }
+ if (pairs[0] == "row_dims") {
+ auto dims_strs =
+ paddle::string::split_string(pairs[1], ",");
+ for (auto& str : dims_strs) {
+ dims.push_back(std::stoi(str));
+ }
+ }
+ if (pairs[0] == "count") {
+ count = std::stoull(pairs[1]);
+ }
+ }
+ for (int x = 0; x < names.size(); ++x) {
+ dims_map[names[x]] = dims[x];
+ }
+ }
+
+ Meta(std::string param, int shard_id, std::vector row_names,
+ std::vector dims, uint64_t count) {
+ this->param = param;
+ this->shard_id = shard_id;
+ this->names = row_names;
+ this->dims = dims;
+ this->count = count;
+ }
+
+ std::string ToString() {
+ std::stringstream ss;
+ ss << "param=" << param << "\n";
+ ss << "shard_id=" << shard_id << "\n";
+ ss << "row_names=" << paddle::string::join_strings(names, ',') << "\n";
+ ss << "row_dims=" << paddle::string::join_strings(dims, ',') << "\n";
+ ss << "count=" << count << "\n";
+ return ss.str();
+ }
+};
+
class CommonSparseTable : public SparseTable {
public:
CommonSparseTable() { rwlock_.reset(new framework::RWLock); }
@@ -56,9 +128,25 @@ class CommonSparseTable : public SparseTable {
virtual int32_t initialize_optimizer();
virtual int32_t initialize_recorder();
- int32_t load(const std::string& path, const std::string& param);
+ virtual int32_t load(const std::string& path, const std::string& param);
+
+ virtual int32_t save(const std::string& path, const std::string& param);
+
+ void SaveMetaToText(std::ostream* os, const CommonAccessorParameter& common,
+ const size_t shard_idx, const int64_t total);
- int32_t save(const std::string& path, const std::string& param);
+ int64_t SaveValueToText(std::ostream* os, std::shared_ptr block,
+ std::shared_ptr<::ThreadPool> pool, const int mode,
+ int shard_id);
+
+ virtual void ProcessALine(const std::vector